From 1ca79e948aa4bf923b92757ea68a98b7bcd0767d Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Fri, 13 Feb 2015 12:56:33 +0100 Subject: Drop support for atomic ops and bucket formats Atomic ops are slower than locks, probably going to switch buckets to uint64_t later. --- flam3.c | 231 +--------------------------------------------------------------- rect.c | 26 -------- wscript | 2 - 3 files changed, 2 insertions(+), 257 deletions(-) diff --git a/flam3.c b/flam3.c index 2b4976f..f1b72be 100644 --- a/flam3.c +++ b/flam3.c @@ -1832,11 +1832,7 @@ void flam3_print(FILE *f, flam3_genome *cp, char *extra_attributes, int print_ed if (getenv("intpalette")) fprintf(f, "", i, (int)rint(r), (int)rint(g), (int)rint(b)); else { -#ifdef USE_FLOAT_INDICES - fprintf(f, "", cp->palette[i].index, r, g, b); -#else fprintf(f, "", i, r, g, b); -#endif } } else { if (getenv("intpalette")) @@ -3593,180 +3589,6 @@ typedef unsigned int abucket_int[4]; typedef float bucket_float[5]; typedef float abucket_float[4]; -#ifdef HAVE_GCC_64BIT_ATOMIC_OPS -static inline void -double_atomic_add(double *dest, double delta) -{ - uint64_t *int_ptr = (uint64_t *)dest; - union { - double dblval; - uint64_t intval; - } old_val, new_val; - int success; - - do { - old_val.dblval = *dest; - new_val.dblval = old_val.dblval + delta; - success = __sync_bool_compare_and_swap( - int_ptr, old_val.intval, new_val.intval); - } while (!success); -} -#endif /* HAVE_GCC_64BIT_ATOMIC_OPS */ - -#ifdef HAVE_GCC_ATOMIC_OPS -static inline void -float_atomic_add(float *dest, float delta) -{ - uint32_t *int_ptr = (uint32_t *)dest; - union { - float fltval; - uint32_t intval; - } old_val, new_val; - int success; - - do { - old_val.fltval = *dest; - new_val.fltval = old_val.fltval + delta; - success = __sync_bool_compare_and_swap( - int_ptr, old_val.intval, new_val.intval); - } while (!success); -} - -static inline void -uint_atomic_add(unsigned int *dest, unsigned int delta) -{ - unsigned int old_val, new_val; - int success; - - do { - old_val = *dest; - if (UINT_MAX - old_val > delta) - new_val = old_val + delta; - else - new_val = UINT_MAX; - success = __sync_bool_compare_and_swap( - dest, old_val, new_val); - } while (!success); -} - -static inline void -ushort_atomic_add(unsigned short *dest, unsigned short delta) -{ - unsigned short old_val, new_val; - int success; - - do { - old_val = *dest; - if (USHRT_MAX - old_val > delta) - new_val = old_val + delta; - else - new_val = USHRT_MAX; - success = __sync_bool_compare_and_swap( - dest, old_val, new_val); - } while (!success); -} -#endif /* HAVE_GCC_ATOMIC_OPS */ - -/* 64-bit datatypes */ -#define bucket bucket_double -#define abucket abucket_double -#define abump_no_overflow(dest, delta) do {dest += delta;} while (0) -#define add_c_to_accum(acc,i,ii,j,jj,wid,hgt,c) do { \ - if ( (j) + (jj) >=0 && (j) + (jj) < (hgt) && (i) + (ii) >=0 && (i) + (ii) < (wid)) { \ - abucket *a = (acc) + ( (i) + (ii) ) + ( (j) + (jj) ) * (wid); \ - abump_no_overflow(a[0][0],(c)[0]); \ - abump_no_overflow(a[0][1],(c)[1]); \ - abump_no_overflow(a[0][2],(c)[2]); \ - abump_no_overflow(a[0][3],(c)[3]); \ - } \ -} while (0) -/* single-threaded */ -#define USE_LOCKS -#define bump_no_overflow(dest, delta) do {dest += delta;} while (0) -#define render_rectangle render_rectangle_double -#define iter_thread iter_thread_double -#define de_thread_helper de_thread_helper_64 -#define de_thread de_thread_64 -#include "rect.c" -#ifdef HAVE_GCC_64BIT_ATOMIC_OPS - /* multi-threaded */ - #undef USE_LOCKS - #undef bump_no_overflow - #undef render_rectangle - #undef iter_thread - #undef de_thread_helper - #undef de_thread - #define bump_no_overflow(dest, delta) double_atomic_add(&dest, delta) - #define render_rectangle render_rectangle_double_mt - #define iter_thread iter_thread_double_mt - #define de_thread_helper de_thread_helper_64_mt - #define de_thread de_thread_64_mt - #include "rect.c" -#else /* !HAVE_GCC_64BIT_ATOMIC_OPS */ - #define render_rectangle_double_mt render_rectangle_double -#endif /* HAVE_GCC_64BIT_ATOMIC_OPS */ -#undef render_rectangle -#undef iter_thread -#undef add_c_to_accum -#undef bucket -#undef abucket -#undef bump_no_overflow -#undef abump_no_overflow -#undef de_thread_helper -#undef de_thread - -/* 32-bit datatypes */ -#define bucket bucket_int -#define abucket abucket_int -#define abump_no_overflow(dest, delta) do { \ - if (UINT_MAX - dest > delta) dest += delta; else dest = UINT_MAX; \ -} while (0) -#define add_c_to_accum(acc,i,ii,j,jj,wid,hgt,c) do { \ - if ( (j) + (jj) >=0 && (j) + (jj) < (hgt) && (i) + (ii) >=0 && (i) + (ii) < (wid)) { \ - abucket *a = (acc) + ( (i) + (ii) ) + ( (j) + (jj) ) * (wid); \ - abump_no_overflow(a[0][0],(c)[0]); \ - abump_no_overflow(a[0][1],(c)[1]); \ - abump_no_overflow(a[0][2],(c)[2]); \ - abump_no_overflow(a[0][3],(c)[3]); \ - } \ -} while (0) -/* single-threaded */ -#define USE_LOCKS -#define bump_no_overflow(dest, delta) do { \ - if (UINT_MAX - dest > delta) dest += delta; else dest = UINT_MAX; \ -} while (0) -#define render_rectangle render_rectangle_int -#define iter_thread iter_thread_int -#define de_thread_helper de_thread_helper_32 -#define de_thread de_thread_32 -#include "rect.c" -#ifdef HAVE_GCC_ATOMIC_OPS - /* multi-threaded */ - #undef USE_LOCKS - #undef bump_no_overflow - #undef render_rectangle - #undef iter_thread - #undef de_thread_helper - #undef de_thread - #define bump_no_overflow(dest, delta) uint_atomic_add(&dest, delta) - #define render_rectangle render_rectangle_int_mt - #define iter_thread iter_thread_int_mt - #define de_thread_helper de_thread_helper_32_mt - #define de_thread de_thread_32_mt - #include "rect.c" -#else /* !HAVE_GCC_ATOMIC_OPS */ - #define render_rectangle_int_mt render_rectangle_int -#endif /* HAVE_GCC_ATOMIC_OPS */ -#undef iter_thread -#undef render_rectangle -#undef add_c_to_accum -#undef bucket -#undef abucket -#undef bump_no_overflow -#undef abump_no_overflow -#undef de_thread_helper -#undef de_thread - /* experimental 32-bit datatypes (called 33) */ #define bucket bucket_int #define abucket abucket_float @@ -3785,28 +3607,10 @@ ushort_atomic_add(unsigned short *dest, unsigned short delta) #define bump_no_overflow(dest, delta) do { \ if (UINT_MAX - dest > delta) dest += delta; else dest = UINT_MAX; \ } while (0) -#define render_rectangle render_rectangle_float #define iter_thread iter_thread_float #define de_thread_helper de_thread_helper_33 #define de_thread de_thread_33 #include "rect.c" -#ifdef HAVE_GCC_ATOMIC_OPS - /* multi-threaded */ - #undef USE_LOCKS - #undef bump_no_overflow - #undef render_rectangle - #undef iter_thread - #undef de_thread_helper - #undef de_thread - #define bump_no_overflow(dest, delta) uint_atomic_add(&dest, delta) - #define render_rectangle render_rectangle_float_mt - #define iter_thread iter_thread_float_mt - #define de_thread_helper de_thread_helper_33_mt - #define de_thread de_thread_33_mt - #include "rect.c" -#else /* !HAVE_GCC_ATOMIC_OPS */ - #define render_rectangle_float_mt render_rectangle_float -#endif /* HAVE_GCC_ATOMIC_OPS */ #undef iter_thread #undef render_rectangle #undef add_c_to_accum @@ -3843,39 +3647,8 @@ int flam3_render(flam3_frame *spec, void *out, int retval; - if (spec->nthreads <= 2) { - /* single-threaded or 2 threads without atomic operations */ - switch (spec->bits) { - case 32: - retval = render_rectangle_int(spec, out, field, nchan, trans, stats); - return(retval); - case 33: - retval = render_rectangle_float(spec, out, field, nchan, trans, stats); - return(retval); - case 64: - retval = render_rectangle_double(spec, out, field, nchan, trans, stats); - return(retval); - default: - bits_error(spec); - return(1); - } - } else { - /* 3+ threads using atomic ops if available */ - switch (spec->bits) { - case 32: - retval = render_rectangle_int_mt(spec, out, field, nchan, trans, stats); - return(retval); - case 33: - retval = render_rectangle_float_mt(spec, out, field, nchan, trans, stats); - return(retval); - case 64: - retval = render_rectangle_double_mt(spec, out, field, nchan, trans, stats); - return(retval); - default: - bits_error(spec); - return(1); - } - } + retval = render_rectangle (spec, out, field, nchan, trans, stats); + return(retval); } diff --git a/rect.c b/rect.c index 30d76fd..0c99e39 100644 --- a/rect.c +++ b/rect.c @@ -441,31 +441,6 @@ static void iter_thread(void *fth) { b = buckets + (int)(ficp->ws0 * p0 - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p1 - ficp->hb1s1); -#ifdef USE_FLOAT_INDICES - color_index0 = 0; - - //fprintf(stdout,"%.16f\n",p[2]*256.0); - - while(color_index0 < cmap_size_m1) { - if (ficp->dmap[color_index0+1].index > p[2]) - break; - else - color_index0++; - } - - if (p[3]==1.0) { - bump_no_overflow(b[0][0], ficp->dmap[color_index0].color[0]); - bump_no_overflow(b[0][1], ficp->dmap[color_index0].color[1]); - bump_no_overflow(b[0][2], ficp->dmap[color_index0].color[2]); - bump_no_overflow(b[0][3], ficp->dmap[color_index0].color[3]); - bump_no_overflow(b[0][4], 255.0); - } else { - bump_no_overflow(b[0][0], logvis*ficp->dmap[color_index0].color[0]); - bump_no_overflow(b[0][1], logvis*ficp->dmap[color_index0].color[1]); - bump_no_overflow(b[0][2], logvis*ficp->dmap[color_index0].color[2]); - bump_no_overflow(b[0][3], logvis*ficp->dmap[color_index0].color[3]); - bump_no_overflow(b[0][4], logvis*255.0); -#else dbl_index0 = p[2] * cmap_size; color_index0 = (int) (dbl_index0); @@ -511,7 +486,6 @@ static void iter_thread(void *fth) { bump_no_overflow(b[0][3], logvis*interpcolor[3]); bump_no_overflow(b[0][4], logvis*255.0); } -#endif } } diff --git a/wscript b/wscript index 07c9deb..1f97094 100644 --- a/wscript +++ b/wscript @@ -14,8 +14,6 @@ def configure(conf): conf.check_cc (lib='jpeg', uselib_store='jpeg') conf.check_cfg (package='libpng', uselib_store='png', args=['--cflags', '--libs'], msg='Checking for library png') conf.check_cc (lib='amdlibm', header_name='amdlibm.h', mandatory=False, define_name='HAVE_AMDLIBM', uselib_store='amdlibm') - conf.check_cc (fragment='#include \nint main() { uint32_t a = 4; __sync_bool_compare_and_swap(&a, 4, 5); }', define_name='HAVE_GCC_ATOMIC_OPS', msg='Checking for atomic CAS') - conf.check_cc (fragment='#include \nint main() { uint64_t a = 4; __sync_bool_compare_and_swap(&a, 4, 5); }', define_name='HAVE_GCC_64BIT_ATOMIC_OPS', msg='Checking for 64 bit atomic CAS') # does not work #conf.check_cc (function_name='__builtin_ia32_rdrand64_step', define_name='HAVE_RDRAND64') -- cgit v1.2.3