diff options
| -rw-r--r-- | flam3.c | 35 | ||||
| -rw-r--r-- | flam3.h | 4 | ||||
| -rw-r--r-- | interpolation.c | 31 | ||||
| -rw-r--r-- | palettes.c | 63 | ||||
| -rw-r--r-- | palettes.h | 7 | ||||
| -rw-r--r-- | private.h | 2 | ||||
| -rw-r--r-- | rect.c | 131 | ||||
| -rw-r--r-- | wscript | 2 | 
8 files changed, 97 insertions, 178 deletions
@@ -3312,41 +3312,6 @@ int flam3_estimate_bounding_box(flam3_genome *cp, double eps, int nsamples,     return(bv);  } - -typedef double bucket_double[5]; -typedef double abucket_double[4]; -typedef unsigned int bucket_int[5]; -typedef unsigned int abucket_int[4]; -typedef float bucket_float[5]; -typedef float abucket_float[4]; - -/* experimental 32-bit datatypes (called 33) */ -#define bucket bucket_int -#define abucket abucket_float -#define abump_no_overflow(dest, delta) do {dest += delta;} while (0) -#define add_c_to_accum(acc,i,ii,j,jj,wid,hgt,c) do { \ -   if ( (j) + (jj) >=0 && (j) + (jj) < (hgt) && (i) + (ii) >=0 && (i) + (ii) < (wid)) { \ -   abucket *a = (acc) + ( (i) + (ii) ) + ( (j) + (jj) ) * (wid); \ -   abump_no_overflow(a[0][0],(c)[0]); \ -   abump_no_overflow(a[0][1],(c)[1]); \ -   abump_no_overflow(a[0][2],(c)[2]); \ -   abump_no_overflow(a[0][3],(c)[3]); \ -   } \ -} while (0) -/* single-threaded */ -#define bump_no_overflow(dest, delta) do { \ -   if (UINT_MAX - dest > delta) dest += delta; else dest = UINT_MAX; \ -} while (0) -#define iter_thread iter_thread_float -#include "rect.c" -#undef iter_thread -#undef render_rectangle -#undef add_c_to_accum -#undef bucket -#undef abucket -#undef bump_no_overflow -#undef abump_no_overflow -  int flam3_render(flam3_frame *spec, void *out,          int field, int nchan, int trans, stat_struct *stats) { @@ -36,10 +36,12 @@ char *flam3_version();  #define flam3_print_edits  (1)  #define flam3_dont_print_edits  (0) +#include "vector.h" +  //typedef double flam3_palette[256][3];  typedef struct {  	double index; -	double color[4]; +	double4 color;  } flam3_palette_entry;  typedef flam3_palette_entry flam3_palette[256]; diff --git a/interpolation.c b/interpolation.c index 528e312..78b798f 100644 --- a/interpolation.c +++ b/interpolation.c @@ -156,23 +156,25 @@ void interpolate_cmap(flam3_palette cmap, double blend,        fprintf(stderr,"unable to retrieve palette %d, setting to white\n", index1);     for (i = 0; i < 256; i++) { -      double t[5], s[5]; +      double4 t, s; +	  double t4, s4; -      rgb2hsv(p0[i].color, s); -      rgb2hsv(p1[i].color, t); +      s = rgb2hsv(p0[i].color); +      t = rgb2hsv(p1[i].color);        s[3] = p0[i].color[3];        t[3] = p1[i].color[3]; -      s[4] = p0[i].index; -      t[4] = p1[i].index; +      s4 = p0[i].index; +      t4 = p1[i].index; -      for (j = 0; j < 5; j++) +      for (j = 0; j < 4; j++)           t[j] = ((1.0-blend) * s[j]) + (blend * t[j]); +      t4 = ((1.0-blend) * s4) + (blend * t4); -      hsv2rgb(t, cmap[i].color); +      cmap[i].color = hsv2rgb(t);        cmap[i].color[3] = t[3]; -      cmap[i].index = t[4];       +      cmap[i].index = t4;     }  } @@ -362,29 +364,30 @@ void flam3_interpolate_n(flam3_genome *result, int ncp,     if (flam3_palette_interpolation_hsv == cpi[0].palette_interpolation) {        for (i = 0; i < 256; i++) { -         double t[3], s[5]; +         double s4; +		 double4 s, t;           int alpha1 = 1; -         s[0] = s[1] = s[2] = s[3] = s[4] = 0.0; +         s[0] = s[1] = s[2] = s[3] = s4 = 0.0;           for (k = 0; k < ncp; k++) { -            rgb2hsv(cpi[k].palette[i].color, t); +            t = rgb2hsv(cpi[k].palette[i].color);              for (j = 0; j < 3; j++)                 s[j] += c[k] * t[j];              s[3] += c[k] * cpi[k].palette[i].color[3];              if (cpi[k].palette[i].color[3] != 1.0)                 alpha1 = 0; -            s[4] += c[k] * cpi[k].palette[i].index; +            s4 += c[k] * cpi[k].palette[i].index;           }           if (alpha1 == 1)              s[3] = 1.0; -         hsv2rgb(s, result->palette[i].color); +         result->palette[i].color = hsv2rgb(s);           result->palette[i].color[3] = s[3]; -         result->palette[i].index = s[4]; +         result->palette[i].index = s4;           for (j = 0; j < 4; j++) {              if (result->palette[i].color[j] < 0.0) @@ -167,22 +167,24 @@ int flam3_get_palette(int n, flam3_palette c, double hue_rotation, randctx * con        	/* Loop over elements of cmap */  	      for (i = 0; i < cmap_len; i++) {              int ii = (i * 256) / cmap_len; -            double rgb[3], hsv[3]; +            double4 rgb, hsv; -            /* Colors are in 0-1 space */ -            for (j = 0; j < 3; j++) -               rgb[j] = the_palettes[idx].colors[ii][j] / 255.0; +             rgb = (double4) { +			       the_palettes[idx].colors[ii][0] / 255.0, +				   the_palettes[idx].colors[ii][1] / 255.0, +				   the_palettes[idx].colors[ii][2] / 255.0, +				   1.0, +				   }; -	         rgb2hsv(rgb, hsv); +	         hsv = rgb2hsv(rgb);  	         hsv[0] += hue_rotation * 6.0; -	         hsv2rgb(hsv, rgb); +	         rgb = hsv2rgb(hsv);  	         c[i].index = i; -	         for (j = 0; j < 3; j++) -		         c[i].color[j] = rgb[j]; +		     c[i].color = rgb; -		      c[i].color[3] = 1.0; +		     c[i].color[3] = 1.0;           }  	      return n; @@ -196,8 +198,7 @@ int flam3_get_palette(int n, flam3_palette c, double hue_rotation, randctx * con  /* rgb 0 - 1,     h 0 - 6, s 0 - 1, v 0 - 1 */ -void rgb2hsv(rgb, hsv) -   double *rgb; double *hsv; +double4 rgb2hsv(double4 rgb)   {    double rd, gd, bd, h, s, v, max, min, del, rc, gc, bc; @@ -231,17 +232,13 @@ void rgb2hsv(rgb, hsv)      if (h<0) h += 6;    } -  hsv[0] = h; -  hsv[1] = s; -  hsv[2] = v; +  return (double4) { h, s, v, rgb[3] };  }  /* h 0 - 6, s 0 - 1, v 0 - 1     rgb 0 - 1 */ -void hsv2rgb(hsv, rgb) -   double *hsv; -   double *rgb; +double4 hsv2rgb(double4 hsv)  {     double h = hsv[0], s = hsv[1], v = hsv[2];    int    j; @@ -266,9 +263,7 @@ void hsv2rgb(hsv, rgb)      default: rd = v;  gd = t;  bd = p;  break;     } -   rgb[0] = rd; -   rgb[1] = gd; -   rgb[2] = bd; +   return (double4) { rd, gd, bd, hsv[3] };  }  double flam3_calc_alpha(double density, double gamma, double linrange) { @@ -289,19 +284,15 @@ double flam3_calc_alpha(double density, double gamma, double linrange) {     return(alpha);  } -void flam3_calc_newrgb(double *cbuf, double ls, double highpow, double *newrgb) { +double4 flam3_calc_newrgb(double4 cbuf, double ls, double highpow) {     int rgbi;     double newls,lsratio; -   double newhsv[3];     double a, maxa=-1.0, maxc=0;     double adjhlp;     if (ls==0.0 || (cbuf[0]==0.0 && cbuf[1]==0.0 && cbuf[2]==0.0)) { -      newrgb[0] = 0.0; -      newrgb[1] = 0.0; -      newrgb[2] = 0.0; -      return; +      return (double4) { 0, 0, 0, 0 };     }     /* Identify the most saturated channel */ @@ -320,17 +311,13 @@ void flam3_calc_newrgb(double *cbuf, double ls, double highpow, double *newrgb)        lsratio = pow(newls/ls,highpow);        /* Calculate the max-value color (ranged 0 - 1) */ -      for (rgbi=0;rgbi<3;rgbi++) -         newrgb[rgbi] = newls*(cbuf[rgbi]/PREFILTER_WHITE)/255.0; +	  double4 newrgb = newls*(cbuf/PREFILTER_WHITE)/255.0;        /* Reduce saturation by the lsratio */ -      rgb2hsv(newrgb,newhsv); +      double4 newhsv = rgb2hsv(newrgb);        newhsv[1] *= lsratio; -      hsv2rgb(newhsv,newrgb); -      for (rgbi=0;rgbi<3;rgbi++) -         newrgb[rgbi] *= 255.0; -       +      return hsv2rgb(newhsv) * 255.0;     } else {        newls = 255.0/maxc;        adjhlp = -highpow; @@ -339,12 +326,10 @@ void flam3_calc_newrgb(double *cbuf, double ls, double highpow, double *newrgb)        if (maxa<=255)           adjhlp=1.0; -      /* Calculate the max-value color (ranged 0 - 1) interpolated with the old behaviour */ -      for (rgbi=0;rgbi<3;rgbi++) -         newrgb[rgbi] = ((1.0-adjhlp)*newls + adjhlp*ls)*(cbuf[rgbi]/PREFILTER_WHITE); - -//     for (rgbi=0;rgbi<3;rgbi++) -//        newrgb[rgbi] = ls*(cbuf[rgbi]/PREFILTER_WHITE); +	  /* Calculate the max-value color (ranged 0 - 1) interpolated with the old +	   * behaviour */ +		 +      return ((1.0-adjhlp)*newls + adjhlp*ls)*(cbuf/PREFILTER_WHITE);     }  } @@ -26,12 +26,13 @@ typedef struct {      unsigned char colors[256][3];  } lib_palette; +#include "vector.h" -void rgb2hsv(double *rgb, double *hsv); -void hsv2rgb(double *hsv, double *rgb); +double4 rgb2hsv(double4); +double4 hsv2rgb(double4);  double flam3_calc_alpha(double density, double gamma, double linrange); -void flam3_calc_newrgb(double *cbuf, double ls, double highpow, double *newrgb); +double4 flam3_calc_newrgb(double4 cbuf, double ls, double highpow);  #endif @@ -62,7 +62,7 @@ typedef struct {     double ws0, wb0s0, hs1, hb1s1; /* shortcuts for indexing */     flam3_palette_entry *dmap; /* palette */     double color_scalar; /* <1.0 if non-uniform motion blur is set */ -   void *buckets; /* Points to the first accumulator */ +   double4 *buckets; /* Points to the first accumulator */     double badvals; /* accumulates all badvalue resets */     double batch_size;     int temporal_sample_num,ntemporal_samples; @@ -16,7 +16,12 @@      along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ -/* this file is included into flam3.c once for each buffer bit-width */ +#include <assert.h> + +#include "private.h" +#include "filters.h" +#include "variations.h" +#include "palettes.h"  /*   * for batch @@ -181,10 +186,9 @@ static void iter_thread(void *fth) {        for (j = 0; j < sub_batch_size; j++) {           double p0, p1, p00, p11;           double dbl_index0,dbl_frac; -         double interpcolor[4]; -         int ci, color_index0; +         double4 interpcolor; +         int color_index0;           const double4 p = fthp->iter_storage[j]; -         bucket *b;           if (fthp->cp.rotate != 0.0) {              p00 = p[0] - fthp->cp.rot_center[0]; @@ -199,7 +203,6 @@ static void iter_thread(void *fth) {           if (p0 >= ficp->bounds[0] && p1 >= ficp->bounds[1] && p0 <= ficp->bounds[2] && p1 <= ficp->bounds[3]) {              double logvis=1.0; -            bucket *buckets = (bucket *)(ficp->buckets);              /* Skip if invisible */              if (p[3]==0) @@ -207,9 +210,6 @@ static void iter_thread(void *fth) {              else                 logvis = p[3]; -            b = buckets + (int)(ficp->ws0 * p0 - ficp->wb0s0) + -                ficp->width * (int)(ficp->hs1 * p1 - ficp->hb1s1); -              dbl_index0 = p[2] * cmap_size;              color_index0 = (int) (dbl_index0); @@ -225,11 +225,8 @@ static void iter_thread(void *fth) {                    dbl_frac = dbl_index0 - (double)color_index0;                 } -               for (ci=0;ci<4;ci++) { -                  interpcolor[ci] = ficp->dmap[color_index0].color[ci] * (1.0-dbl_frac) +  -                                    ficp->dmap[color_index0+1].color[ci] * dbl_frac;                -               } -                +               interpcolor = ficp->dmap[color_index0].color * (1.0-dbl_frac) +  +                                    ficp->dmap[color_index0+1].color * dbl_frac;              } else { /* Palette mode step */                 if (color_index0 < 0) { @@ -238,24 +235,15 @@ static void iter_thread(void *fth) {                    color_index0 = cmap_size_m1;                 } -               for (ci=0;ci<4;ci++) -                  interpcolor[ci] = ficp->dmap[color_index0].color[ci];                +               interpcolor = ficp->dmap[color_index0].color;              } -            if (p[3]==1.0) { -               bump_no_overflow(b[0][0], interpcolor[0]); -               bump_no_overflow(b[0][1], interpcolor[1]); -               bump_no_overflow(b[0][2], interpcolor[2]); -               bump_no_overflow(b[0][3], interpcolor[3]); -               bump_no_overflow(b[0][4], 255.0); -            } else { -               bump_no_overflow(b[0][0], logvis*interpcolor[0]); -               bump_no_overflow(b[0][1], logvis*interpcolor[1]); -               bump_no_overflow(b[0][2], logvis*interpcolor[2]); -               bump_no_overflow(b[0][3], logvis*interpcolor[3]); -               bump_no_overflow(b[0][4], logvis*255.0); +            if (p[3]!=1.0) { +			   interpcolor *= logvis;              } +            ficp->buckets[(int)(ficp->ws0 * p0 - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p1 - ficp->hb1s1)] += interpcolor; +           }        } @@ -266,14 +254,11 @@ static void iter_thread(void *fth) {       pthread_exit((void *)0);  } -static int render_rectangle(flam3_frame *spec, void *out, +int render_rectangle(flam3_frame *spec, void *out,  			     int field, int nchan, int transp, stat_struct *stats) {     long nbuckets;     int i, j, k, batch_num, temporal_sample_num;     double nsamples, batch_size; -   bucket  *buckets; -   abucket *accumulate; -   double4 *points;     double *filter, *temporal_filter, *temporal_deltas, *batch_filter;     double ppux=0, ppuy=0;     int image_width, image_height;    /* size of the image to produce */ @@ -304,9 +289,6 @@ static int render_rectangle(flam3_frame *spec, void *out,     char *ai;     int cmap_size; -   char *last_block; -   size_t memory_rqd; -     /* Per-render progress timers */     time_t progress_timer=0;     time_t progress_timer_history[64]; @@ -397,19 +379,16 @@ static int render_rectangle(flam3_frame *spec, void *out,     fic.width  = oversample * image_width  + 2 * gutter_width;     nbuckets = (long)fic.width * (long)fic.height; -   memory_rqd = (sizeof(bucket) * nbuckets + sizeof(abucket) * nbuckets + -                 4 * sizeof(double) * (size_t)(spec->sub_batch_size) * spec->nthreads); -   last_block = (char *) malloc(memory_rqd); -   if (NULL == last_block) { -      fprintf(stderr, "render_rectangle: cannot malloc %g bytes.\n", (double)memory_rqd); -      fprintf(stderr, "render_rectangle: w=%d h=%d nb=%ld.\n", fic.width, fic.height, nbuckets); -      return(1); -   } -   /* Just free buckets at the end */    -   buckets = (bucket *) last_block; -   accumulate = (abucket *) (last_block + sizeof(bucket) * nbuckets); -   points = (double4 *)  (last_block + (sizeof(bucket) + sizeof(abucket)) * nbuckets); +   double4 * const buckets = malloc (nbuckets * sizeof (*buckets)); +   assert (buckets != NULL); +   double4 * const accumulate = malloc (nbuckets * sizeof (*accumulate)); +   assert (accumulate != NULL); +   double4 ** const iter_storage = malloc (spec->nthreads * sizeof (*iter_storage)); +   assert (iter_storage != NULL); +   for (size_t i = 0; i < spec->nthreads; i++) { +      iter_storage[i] = malloc (spec->sub_batch_size * sizeof (*iter_storage[i])); +   }     if (verbose) {        fprintf(stderr, "chaos: "); @@ -417,7 +396,7 @@ static int render_rectangle(flam3_frame *spec, void *out,     }     background[0] = background[1] = background[2] = 0.0; -   memset((char *) accumulate, 0, sizeof(abucket) * nbuckets); +   memset(accumulate, 0, sizeof(*accumulate) * nbuckets);     /* Batch loop - outermost */ @@ -425,7 +404,7 @@ static int render_rectangle(flam3_frame *spec, void *out,        double sample_density=0.0;        double k1, area, k2; -      memset((char *) buckets, 0, sizeof(bucket) * nbuckets); +      memset(buckets, 0, sizeof(*buckets) * nbuckets);        /* Temporal sample loop */        for (temporal_sample_num = 0; temporal_sample_num < ntemporal_samples; temporal_sample_num++) { @@ -550,7 +529,7 @@ static int render_rectangle(flam3_frame *spec, void *out,  	         	fth[thi].timer_initialize = 0;              } -            fth[thi].iter_storage = &(points[thi*spec->sub_batch_size]); +            fth[thi].iter_storage = iter_storage[thi];              fth[thi].fic = &fic;              flam3_copy(&(fth[thi].cp),&cp); @@ -610,28 +589,14 @@ static int render_rectangle(flam3_frame *spec, void *out,        for (j = 0; j < fic.height; j++) {           for (i = 0; i < fic.width; i++) { +			const double4 c = buckets[i + j * fic.width]; -            abucket *a = accumulate + i + j * fic.width; -            bucket *b = buckets + i + j * fic.width; -            double c[4], ls; - -            c[0] = (double) b[0][0]; -            c[1] = (double) b[0][1]; -            c[2] = (double) b[0][2]; -            c[3] = (double) b[0][3];              if (0.0 == c[3])                 continue; -            ls = (k1 * log(1.0 + c[3] * k2))/c[3]; -            c[0] *= ls; -            c[1] *= ls; -            c[2] *= ls; -            c[3] *= ls; +            const double ls = (k1 * log(1.0 + c[3] * k2))/c[3]; -            abump_no_overflow(a[0][0], c[0]); -            abump_no_overflow(a[0][1], c[1]); -            abump_no_overflow(a[0][2], c[2]); -            abump_no_overflow(a[0][3], c[3]); +            accumulate[i + j * fic.width] += c * ls;           }        } @@ -646,7 +611,7 @@ static int render_rectangle(flam3_frame *spec, void *out,     /* filter the accumulation buffer down into the image */     if (1) {        int x, y; -      double t[4],newrgb[3]; +      double4 t,newrgb;        double g = 1.0 / (gamma / vib_gam_n);        double tmp,a;        double alpha,ls; @@ -666,26 +631,22 @@ static int render_rectangle(flam3_frame *spec, void *out,           for (j = 0; j < fic.height; j++) {              for (i = 0; i < fic.width; i++) { - -               abucket *ac = accumulate + i + j*fic.width; +               double4 ac = accumulate[i + j*fic.width]; -               if (ac[0][3]<=0) { +               if (ac[3]<=0) {                    alpha = 0.0;                    ls = 0.0;                 } else { -                  tmp=ac[0][3]/PREFILTER_WHITE; +                  tmp=ac[3]/PREFILTER_WHITE;                    alpha = flam3_calc_alpha(tmp,g,linrange);                    ls = vibrancy * 256.0 * alpha / tmp;                    if (alpha<0.0) alpha = 0.0;                    if (alpha>1.0) alpha = 1.0;                 } -               t[0] = (double)ac[0][0]; -               t[1] = (double)ac[0][1]; -               t[2] = (double)ac[0][2]; -               t[3] = (double)ac[0][3]; +			   t = ac; -               flam3_calc_newrgb(t, ls, highpow, newrgb); +               newrgb = flam3_calc_newrgb(t, ls, highpow);                 for (rgbi=0;rgbi<3;rgbi++) {                    a = newrgb[rgbi]; @@ -704,10 +665,12 @@ static int render_rectangle(flam3_frame *spec, void *out,                    if (a<0) a = 0;                    /* Replace values in accumulation buffer with these new ones */ -                  ac[0][rgbi] = a; +                  ac[rgbi] = a;                 } -               ac[0][3] = alpha; +               ac[3] = alpha; + +               accumulate[i + j*fic.width] = ac;              }           } @@ -726,13 +689,13 @@ static int render_rectangle(flam3_frame *spec, void *out,              for (ii = 0; ii < filter_width; ii++) {                 for (jj = 0; jj < filter_width; jj++) {                    double k = filter[ii + jj * filter_width]; -                  abucket *ac = accumulate + x+ii + (y+jj)*fic.width; +                  double4 ac = accumulate[x+ii + (y+jj)*fic.width]; -                  t[0] += k * ac[0][0]; -                  t[1] += k * ac[0][1]; -                  t[2] += k * ac[0][2]; -                  t[3] += k * ac[0][3]; +                  t[0] += k * ac[0]; +                  t[1] += k * ac[1]; +                  t[2] += k * ac[2]; +                  t[3] += k * ac[3];                 } @@ -757,7 +720,7 @@ static int render_rectangle(flam3_frame *spec, void *out,                    if (alpha>1.0) alpha = 1.0;                 } -               flam3_calc_newrgb(t, ls, highpow, newrgb); +               newrgb = flam3_calc_newrgb(t, ls, highpow);                 for (rgbi=0;rgbi<3;rgbi++) {                    a = newrgb[rgbi]; @@ -19,7 +19,7 @@ def configure(conf):      conf.write_config_header ('config.h')  def build(bld): -    bld.stlib (features='c cstlib', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c docstring.c', target='libflam3', use='xml2 png pthread', includes='.') +    bld.stlib (features='c cstlib', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c docstring.c rect.c', target='libflam3', use='xml2 png pthread', includes='.')      bld.program (features='c cprogram', source='flam3-render.c', target='flam3-render', use='libflam3 xml2 png amdlibm pthread', includes='.')      bld.program (features='c cprogram', source='flam3-genome.c', target='flam3-genome', use='libflam3 xml2 png amdlibm pthread', includes='.')      bld.program (features='c cprogram', source='flam3-animate.c', target='flam3-animate', use='libflam3 xml2 png amdlibm pthread', includes='.')  | 
