Vectorize color clipping

Replaces redundant code with one function. Oddly this fixes rendering with earlyclip – not sure why. Drop transparency and channel settings (always transparent, always four channels).
author: Lars-Dominik Braun <lars@6xq.net> 2015-02-16 17:35:10 +0100
committer: Lars-Dominik Braun <lars@6xq.net> 2015-05-02 21:36:45 +0200
commit: 6123a81aecc4e3cd6c47c908fb7e9010d3d64798 (patch)
tree: c5db975b3789fd984fc51b7c3a426e7f56ff68bb
parent: 215dcd3d466303b39f8912602be039a7a3aefe5c (diff)
download: pucket-6123a81aecc4e3cd6c47c908fb7e9010d3d64798.tar.gz
pucket-6123a81aecc4e3cd6c47c908fb7e9010d3d64798.tar.bz2
pucket-6123a81aecc4e3cd6c47c908fb7e9010d3d64798.zip
11 files changed, 143 insertions, 166 deletions
diff --git a/flam3.c b/flam3.c
index c4464ed..694868f 100644
--- a/flam3.c
+++ b/flam3.c
@@ -17,8 +17,9 @@
 */
 
 #include "private.h"
+#include "rect.h"
 #include "img.h"
-#include "config.h"
+#include "build/config.h"
 #include "variations.h"
 #include "interpolation.h"
 #include "parser.h"
@@ -3164,11 +3165,11 @@ int flam3_estimate_bounding_box(flam3_genome *cp, double eps, int nsamples,
 }
 
 int flam3_render(flam3_frame *spec, void *out,
-        int field, int nchan, int trans, stat_struct *stats) {
+        int field, stat_struct *stats) {
          
   int retval;
   
-  retval = render_rectangle (spec, out, field, nchan, trans, stats);
+  retval = render_rectangle (spec, out, field, stats);
   return(retval);
 }
 
diff --git a/flam3.h b/flam3.h
index 5fe28f9..2434a78 100644
--- a/flam3.h
+++ b/flam3.h
@@ -601,9 +601,9 @@ typedef struct {
 #define flam3_field_even  1
 #define flam3_field_odd   2
 
-/* out is pixel array.
-   pixels are rgb or rgba if nchan is 3 or 4. */
-int flam3_render(flam3_frame *f, void *out, int field, int nchan, int transp, stat_struct *stats);
+/* out is pixel array.  pixels are rgba */
+int flam3_render(flam3_frame *spec, void *out,
+        int field, stat_struct *stats);
 
 void rotate_by(double *p, double *center, double by);
 
diff --git a/main.c b/main.c
index ec70e49..d30d833 100644
--- a/main.c
+++ b/main.c
@@ -35,7 +35,6 @@ typedef struct {
 	bool verbose;
 	unsigned int threads, bpc, quality, oversample;
 	float scale;
-	bool transparent;
 } render_arguments;
 
 static error_t parse_render_opt (int key, char *arg,
@@ -147,8 +146,7 @@ static void do_render (const render_arguments * const arguments) {
 	void *image = (void *) calloc(this_size, sizeof(char));
 
 	stat_struct stats;
-	if (flam3_render (&f, image, flam3_field_both, channels,
-			arguments->transparent, &stats)) {
+	if (flam3_render (&f, image, flam3_field_both, &stats)) {
 		fprintf(stderr,"error rendering image: aborting.\n");
 		exit(1);
 	}
@@ -423,15 +421,16 @@ int main (int argc, char **argv) {
 		const char doc[] = "vlame3-render -- a fractal flame renderer";
 		const struct argp argp = {
 				.options = options, .parser = parse_render_opt,
-				.args_doc = NULL, .doc = doc, .children = NULL
+				.args_doc = NULL, .doc = doc, .children = NULL,
 				};
 
 		render_arguments arguments = {
 				.threads = flam3_count_nthreads(),
 				.bpc = 8,
 				.scale = 1.0,
-				.transparent = false,
 				.quality = 100,
+				.verbose = true,
+				.oversample = 1,
 				};
 
 		argp_parse (&argp, argc, argv, 0, NULL, &arguments);
diff --git a/math.h b/math.h
new file mode 100644
index 0000000..defc7c6
--- /dev/null
+++ b/math.h
@@ -0,0 +1,61 @@
+/*
+    FLAM3 - cosmic recursive fractal flames
+    Copyright (C) 1992-2009 Spotworks LLC
+    Copyright (C) 2015 vlam3 contributors
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include <math.h>
+
+#include "build/config.h"
+
+#ifdef HAVE_AMDLIBM
+#define REPLACE_WITH_AMDLIBM
+#include <amdlibm.h>
+#endif
+
+#define clamp(a,min,max) (a > max ? max : (a < min ? min : a))
+
+/*	Vector wrapping function, could be replaced by true vector functions later
+ */
+
+inline double4 clamp_d4 (const double4 in, const double min, const double max) {
+	return (double4) {
+			clamp (in[0], min, max),
+			clamp (in[1], min, max),
+			clamp (in[2], min, max),
+			clamp (in[3], min, max),
+			};
+}
+
+inline double4 pow_d4 (const double4 in, double exp) {
+	return (double4) {
+			pow (in[0], exp),
+			pow (in[1], exp),
+			pow (in[2], exp),
+			pow (in[3], exp),
+			};
+}
+
+inline double4 nearbyint_d4 (const double4 in) {
+	return (double4) {
+			nearbyint (in[0]),
+			nearbyint (in[1]),
+			nearbyint (in[2]),
+			nearbyint (in[3]),
+			};
+}
diff --git a/palettes.c b/palettes.c
index 9251e93..53e99c1 100644
--- a/palettes.c
+++ b/palettes.c
@@ -386,7 +386,7 @@ static double try_colors(flam3_genome *g, int color_resolution) {
     f.sub_batch_size = 10000;
         
     image = (unsigned char *) calloc(g->width * g->height, 3);
-    if (flam3_render(&f, image, flam3_field_both, 3, 0, &stats)) {
+    if (flam3_render(&f, image, flam3_field_both, &stats)) {
        fprintf(stderr,"Error rendering test image for trycolors.  Aborting.\n");
        return(-1);
     }
diff --git a/png.c b/png.c
index 1c28172..f28452b 100644
--- a/png.c
+++ b/png.c
@@ -22,7 +22,7 @@
 #include <png.h>
 #include <setjmp.h>
 
-#include "config.h"
+#include "build/config.h"
 #include "img.h"
 #include "flam3.h"
 #include "private.h"
diff --git a/private.h b/private.h
index b568d8e..1f27600 100644
--- a/private.h
+++ b/private.h
@@ -20,7 +20,7 @@
 #define private_included
 
 #include "flam3.h"
-#include "config.h"
+#include "build/config.h"
 #include <stdlib.h>
 
 #include <ctype.h>
diff --git a/rect.c b/rect.c
index 5aee1d5..57234c5 100644
--- a/rect.c
+++ b/rect.c
@@ -23,6 +23,7 @@
 #include "filters.h"
 #include "variations.h"
 #include "palettes.h"
+#include "math.h"
 
 /*
  * for batch
@@ -254,8 +255,36 @@ static void iter_thread(void *fth) {
      pthread_exit((void *)0);
 }
 
+/*	Perform clipping
+ */
+static double4 clip (const double4 in, const double g, const double linrange,
+		const double highpow, const double vibrancy) {
+	double alpha, ls;
+
+	if (in[3] <= 0.0) {
+		alpha = 0.0;
+		ls = 0.0;
+	} else {
+		alpha = flam3_calc_alpha (in[3], g, linrange);
+		ls = vibrancy * alpha / in[3];
+		alpha = clamp (alpha, 0.0, 1.0);
+	}
+
+	double4 newrgb = flam3_calc_newrgb (in, ls, highpow);
+	newrgb += (1.0-vibrancy) * pow_d4 (in, g);
+	if (alpha > 0.0) {
+		newrgb /= alpha;
+	} else {
+		newrgb = (double4) {0, 0, 0, 0};
+	}
+	newrgb[3] = alpha;
+	newrgb = clamp_d4 (newrgb, 0.0, 1.0);
+
+	return newrgb;
+}
+
 int render_rectangle(flam3_frame *spec, void *out,
-			     int field, int nchan, int transp, stat_struct *stats) {
+			     int field, stat_struct *stats) {
    long nbuckets;
    int i, j, k, batch_num, temporal_sample_num;
    double nsamples, batch_size;
@@ -273,7 +302,6 @@ int render_rectangle(flam3_frame *spec, void *out,
    int gutter_width;
    double vibrancy = 0.0;
    double gamma = 0.0;
-   double background[3];
    int vib_gam_n = 0;
    time_t progress_began=0;
    int verbose = spec->verbose;
@@ -329,13 +357,14 @@ int render_rectangle(flam3_frame *spec, void *out,
       fth[i].cp.final_xform_index=-1;
       
    /* Set up the output image dimensions, adjusted for scanline */   
+   const unsigned int channels = 4;
    image_width = cp.width;
    out_width = image_width;
    if (field) {
       image_height = cp.height / 2;
       
       if (field == flam3_field_odd)
-         out = (unsigned char *)out + nchan * bytes_per_channel * out_width;
+         out = (unsigned char *)out + channels * bytes_per_channel * out_width;
          
       out_width *= 2;
    } else
@@ -404,7 +433,6 @@ int render_rectangle(flam3_frame *spec, void *out,
       progress_began = time(NULL);
    }
 
-   background[0] = background[1] = background[2] = 0.0;
    memset(accumulate, 0, sizeof(*accumulate) * nbuckets);
 
 
@@ -575,9 +603,6 @@ int render_rectangle(flam3_frame *spec, void *out,
 
          vibrancy += cp.vibrancy;
          gamma += cp.gamma;
-         background[0] += cp.background[0];
-         background[1] += cp.background[1];
-         background[2] += cp.background[2];
          vib_gam_n++;
 
       }
@@ -620,67 +645,21 @@ int render_rectangle(flam3_frame *spec, void *out,
    /* filter the accumulation buffer down into the image */
    if (1) {
       int x, y;
-      double4 t,newrgb;
-      double g = 1.0 / (gamma / vib_gam_n);
-      double tmp,a;
-      double alpha,ls;
-      int rgbi;
+      const double g = 1.0 / (gamma / vib_gam_n);
 
       double linrange = cp.gam_lin_thresh;
 
       vibrancy /= vib_gam_n;
-      background[0] /= vib_gam_n;
-      background[1] /= vib_gam_n;
-      background[2] /= vib_gam_n;
       
       /* If we're in the early clip mode, perform this first step to  */
       /* apply the gamma correction and clipping before the spat filt */
       
       if (spec->earlyclip) {
-
          for (j = 0; j < fic.height; j++) {
             for (i = 0; i < fic.width; i++) {
-               double4 ac = accumulate[i + j*fic.width];
-               
-               if (ac[3]<=0) {
-                  alpha = 0.0;
-                  ls = 0.0;
-               } else {
-                  tmp=ac[3];
-                  alpha = flam3_calc_alpha(tmp,g,linrange);
-                  ls = vibrancy * alpha / tmp;
-                  if (alpha<0.0) alpha = 0.0;
-                  if (alpha>1.0) alpha = 1.0;
-               }
-            
-			   t = ac;
-            
-               newrgb = flam3_calc_newrgb(t, ls, highpow);
-                  
-               for (rgbi=0;rgbi<3;rgbi++) {
-                  a = newrgb[rgbi];
-                  a += (1.0-vibrancy) * pow( t[rgbi], g);
-                  if (nchan<=3 || transp==0)
-                     a += ((1.0 - alpha) * background[rgbi]);
-                  else {
-                     if (alpha>0)
-                        a /= alpha;
-                     else
-                        a = 0;
-                  }
-
-                  /* Clamp here to ensure proper filter functionality */
-                  if (a>1.0) a = 1.0;
-                  if (a<0) a = 0;
-               
-                  /* Replace values in accumulation buffer with these new ones */
-                  ac[rgbi] = a;
-               }
-
-               ac[3] = alpha;
-
-               accumulate[i + j*fic.width] = ac;
-
+               const double4 in = accumulate[i + j*fic.width];
+			   accumulate[i + j*fic.width] = clip (in, g, linrange, highpow,
+					   vibrancy);
             }
          }
       }
@@ -690,105 +669,41 @@ int render_rectangle(flam3_frame *spec, void *out,
       for (j = 0; j < image_height; j++) {
          x = 0;
          for (i = 0; i < image_width; i++) {
-            int ii, jj,rgbi;
-            void *p;
-            unsigned short *p16;
-            unsigned char *p8;
-            t[0] = t[1] = t[2] = t[3] = 0.0;
+            int ii, jj;
+			double4 t = (double4) { 0.0, 0.0, 0.0, 0.0 };
+
             for (ii = 0; ii < filter_width; ii++) {
                for (jj = 0; jj < filter_width; jj++) {
-                  double k = filter[ii + jj * filter_width];
-                  double4 ac = accumulate[x+ii + (y+jj)*fic.width];
+                  const double k = filter[ii + jj * filter_width];
+                  const double4 ac = accumulate[x+ii + (y+jj)*fic.width];
                   
-
-                  t[0] += k * ac[0];
-                  t[1] += k * ac[1];
-                  t[2] += k * ac[2];
-                  t[3] += k * ac[3];
-
-
+				  t += k * ac;
                }
             }
 
-            p = (unsigned char *)out + nchan * bytes_per_channel * (i + j * out_width);
-            p8 = (unsigned char *)p;
-            p16 = (unsigned short *)p;
-            
             /* The old way, spatial filter first and then clip after gamma */
             if (!spec->earlyclip) {
-            
-               tmp=t[3];
-               
-               if (t[3]<=0) {
-                  alpha = 0.0;
-                  ls = 0.0;
-               } else { 
-                  alpha = flam3_calc_alpha(tmp,g,linrange);
-                  ls = vibrancy * alpha / tmp;
-                  if (alpha<0.0) alpha = 0.0;
-                  if (alpha>1.0) alpha = 1.0;
-               }
-              
-               newrgb = flam3_calc_newrgb(t, ls, highpow);
-
-               for (rgbi=0;rgbi<3;rgbi++) {
-                  a = newrgb[rgbi];
-                  a += (1.0-vibrancy) * pow( t[rgbi], g);
-                  if (nchan<=3 || transp==0)
-                     a += ((1.0 - alpha) * background[rgbi]);
-                  else {
-                     if (alpha>0)
-                        a /= alpha;
-                     else
-                        a = 0;
-                  }
-
-                  /* Clamp here to ensure proper filter functionality */
-                  if (a>1.0) a = 1.0;
-                  if (a<0) a = 0;
-               
-                  /* Replace values in accumulation buffer with these new ones */
-                  t[rgbi] = a;
-               }
-               t[3] = alpha;
-            }
-
-            for (rgbi=0;rgbi<3;rgbi++) {
-
-               a = t[rgbi];
-
-               if (a > 1.0)
-                  a = 1.0;
-               if (a < 0)
-                  a = 0;
-               
-               if (2==bytes_per_channel) {
-                  p16[rgbi] = nearbyint (a * 65535.0);
-               } else {
-                  p8[rgbi] = nearbyint (a * 255.0);
-               }
+			   t = clip (t, g, linrange, highpow, vibrancy);
             }
 
-
-            if (t[3]>1)
-               t[3]=1;
-            if (t[3]<0)
-               t[3]=0;
-
-            /* alpha */
-            if (nchan>3) {
-               if (transp==1) {
-                  if (2==bytes_per_channel)
-                     p16[3] = nearbyint (t[3] * 65535.0);
-                  else
-                     p8[3] = nearbyint (t[3] * 255);
-               } else {
-                  if (2==bytes_per_channel)
-                     p16[3] = 65535;
-                  else
-                     p8[3] = 255;
-               }
-            }
+			const double maxval = (1 << (bytes_per_channel*8)) - 1;
+			t = nearbyint_d4 (t * maxval);
+
+			if (bytes_per_channel == 2) {
+				uint16_t * const p = &((uint16_t *) out)[channels * (i + j * out_width)];
+				p[0] = t[0];
+				p[1] = t[1];
+				p[2] = t[2];
+				p[3] = t[3];
+			} else if (bytes_per_channel == 1) {
+				uint8_t * const p = &((uint8_t *) out)[channels * (i + j * out_width)];
+				p[0] = t[0];
+				p[1] = t[1];
+				p[2] = t[2];
+				p[3] = t[3];
+			} else {
+				assert (0);
+			}
 
             x += oversample;
          }
diff --git a/rect.h b/rect.h
new file mode 100644
index 0000000..2d40713
--- /dev/null
+++ b/rect.h
@@ -0,0 +1,5 @@
+#pragma once
+
+int render_rectangle(flam3_frame *spec, void *out,
+			     int field, stat_struct *stats);
+
diff --git a/variations.c b/variations.c
index fe4f6b9..44e4eb6 100644
--- a/variations.c
+++ b/variations.c
@@ -18,11 +18,7 @@
 
 #include "variations.h"
 #include "interpolation.h" 
-
-#ifdef HAVE_AMDLIBM
-#define REPLACE_WITH_AMDLIBM
-#include <amdlibm.h>
-#endif
+#include "math.h" 
 
 #define badvalue(x) (((x)!=(x))||((x)>1e10)||((x)<-1e10))
 
diff --git a/wscript b/wscript
index 40dc819..6ba36c4 100644
--- a/wscript
+++ b/wscript
@@ -19,5 +19,5 @@ def configure(conf):
     conf.write_config_header ('config.h')
 
 def build(bld):
-    bld.program (features='c cprogram', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c rect.c main.c', target='vlam3', use='xml2 png amdlibm pthread', includes='.')
+    bld.program (features='c cprogram', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c rect.c main.c', target='vlam3', use='xml2 png amdlibm pthread')
author	Lars-Dominik Braun <lars@6xq.net>	2015-02-16 17:35:10 +0100
committer	Lars-Dominik Braun <lars@6xq.net>	2015-05-02 21:36:45 +0200
commit	6123a81aecc4e3cd6c47c908fb7e9010d3d64798 (patch)
tree	c5db975b3789fd984fc51b7c3a426e7f56ff68bb
parent	215dcd3d466303b39f8912602be039a7a3aefe5c (diff)
download	pucket-6123a81aecc4e3cd6c47c908fb7e9010d3d64798.tar.gz pucket-6123a81aecc4e3cd6c47c908fb7e9010d3d64798.tar.bz2 pucket-6123a81aecc4e3cd6c47c908fb7e9010d3d64798.zip