summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2015-02-16 17:35:10 +0100
committerLars-Dominik Braun <lars@6xq.net>2015-05-02 21:36:45 +0200
commit6123a81aecc4e3cd6c47c908fb7e9010d3d64798 (patch)
treec5db975b3789fd984fc51b7c3a426e7f56ff68bb
parent215dcd3d466303b39f8912602be039a7a3aefe5c (diff)
downloadpucket-6123a81aecc4e3cd6c47c908fb7e9010d3d64798.tar.gz
pucket-6123a81aecc4e3cd6c47c908fb7e9010d3d64798.tar.bz2
pucket-6123a81aecc4e3cd6c47c908fb7e9010d3d64798.zip
Vectorize color clipping
Replaces redundant code with one function. Oddly this fixes rendering with earlyclip – not sure why. Drop transparency and channel settings (always transparent, always four channels).
-rw-r--r--flam3.c7
-rw-r--r--flam3.h6
-rw-r--r--main.c9
-rw-r--r--math.h61
-rw-r--r--palettes.c2
-rw-r--r--png.c2
-rw-r--r--private.h2
-rw-r--r--rect.c207
-rw-r--r--rect.h5
-rw-r--r--variations.c6
-rw-r--r--wscript2
11 files changed, 143 insertions, 166 deletions
diff --git a/flam3.c b/flam3.c
index c4464ed..694868f 100644
--- a/flam3.c
+++ b/flam3.c
@@ -17,8 +17,9 @@
*/
#include "private.h"
+#include "rect.h"
#include "img.h"
-#include "config.h"
+#include "build/config.h"
#include "variations.h"
#include "interpolation.h"
#include "parser.h"
@@ -3164,11 +3165,11 @@ int flam3_estimate_bounding_box(flam3_genome *cp, double eps, int nsamples,
}
int flam3_render(flam3_frame *spec, void *out,
- int field, int nchan, int trans, stat_struct *stats) {
+ int field, stat_struct *stats) {
int retval;
- retval = render_rectangle (spec, out, field, nchan, trans, stats);
+ retval = render_rectangle (spec, out, field, stats);
return(retval);
}
diff --git a/flam3.h b/flam3.h
index 5fe28f9..2434a78 100644
--- a/flam3.h
+++ b/flam3.h
@@ -601,9 +601,9 @@ typedef struct {
#define flam3_field_even 1
#define flam3_field_odd 2
-/* out is pixel array.
- pixels are rgb or rgba if nchan is 3 or 4. */
-int flam3_render(flam3_frame *f, void *out, int field, int nchan, int transp, stat_struct *stats);
+/* out is pixel array. pixels are rgba */
+int flam3_render(flam3_frame *spec, void *out,
+ int field, stat_struct *stats);
void rotate_by(double *p, double *center, double by);
diff --git a/main.c b/main.c
index ec70e49..d30d833 100644
--- a/main.c
+++ b/main.c
@@ -35,7 +35,6 @@ typedef struct {
bool verbose;
unsigned int threads, bpc, quality, oversample;
float scale;
- bool transparent;
} render_arguments;
static error_t parse_render_opt (int key, char *arg,
@@ -147,8 +146,7 @@ static void do_render (const render_arguments * const arguments) {
void *image = (void *) calloc(this_size, sizeof(char));
stat_struct stats;
- if (flam3_render (&f, image, flam3_field_both, channels,
- arguments->transparent, &stats)) {
+ if (flam3_render (&f, image, flam3_field_both, &stats)) {
fprintf(stderr,"error rendering image: aborting.\n");
exit(1);
}
@@ -423,15 +421,16 @@ int main (int argc, char **argv) {
const char doc[] = "vlame3-render -- a fractal flame renderer";
const struct argp argp = {
.options = options, .parser = parse_render_opt,
- .args_doc = NULL, .doc = doc, .children = NULL
+ .args_doc = NULL, .doc = doc, .children = NULL,
};
render_arguments arguments = {
.threads = flam3_count_nthreads(),
.bpc = 8,
.scale = 1.0,
- .transparent = false,
.quality = 100,
+ .verbose = true,
+ .oversample = 1,
};
argp_parse (&argp, argc, argv, 0, NULL, &arguments);
diff --git a/math.h b/math.h
new file mode 100644
index 0000000..defc7c6
--- /dev/null
+++ b/math.h
@@ -0,0 +1,61 @@
+/*
+ FLAM3 - cosmic recursive fractal flames
+ Copyright (C) 1992-2009 Spotworks LLC
+ Copyright (C) 2015 vlam3 contributors
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#pragma once
+
+#include <math.h>
+
+#include "build/config.h"
+
+#ifdef HAVE_AMDLIBM
+#define REPLACE_WITH_AMDLIBM
+#include <amdlibm.h>
+#endif
+
+#define clamp(a,min,max) (a > max ? max : (a < min ? min : a))
+
+/* Vector wrapping function, could be replaced by true vector functions later
+ */
+
+inline double4 clamp_d4 (const double4 in, const double min, const double max) {
+ return (double4) {
+ clamp (in[0], min, max),
+ clamp (in[1], min, max),
+ clamp (in[2], min, max),
+ clamp (in[3], min, max),
+ };
+}
+
+inline double4 pow_d4 (const double4 in, double exp) {
+ return (double4) {
+ pow (in[0], exp),
+ pow (in[1], exp),
+ pow (in[2], exp),
+ pow (in[3], exp),
+ };
+}
+
+inline double4 nearbyint_d4 (const double4 in) {
+ return (double4) {
+ nearbyint (in[0]),
+ nearbyint (in[1]),
+ nearbyint (in[2]),
+ nearbyint (in[3]),
+ };
+}
diff --git a/palettes.c b/palettes.c
index 9251e93..53e99c1 100644
--- a/palettes.c
+++ b/palettes.c
@@ -386,7 +386,7 @@ static double try_colors(flam3_genome *g, int color_resolution) {
f.sub_batch_size = 10000;
image = (unsigned char *) calloc(g->width * g->height, 3);
- if (flam3_render(&f, image, flam3_field_both, 3, 0, &stats)) {
+ if (flam3_render(&f, image, flam3_field_both, &stats)) {
fprintf(stderr,"Error rendering test image for trycolors. Aborting.\n");
return(-1);
}
diff --git a/png.c b/png.c
index 1c28172..f28452b 100644
--- a/png.c
+++ b/png.c
@@ -22,7 +22,7 @@
#include <png.h>
#include <setjmp.h>
-#include "config.h"
+#include "build/config.h"
#include "img.h"
#include "flam3.h"
#include "private.h"
diff --git a/private.h b/private.h
index b568d8e..1f27600 100644
--- a/private.h
+++ b/private.h
@@ -20,7 +20,7 @@
#define private_included
#include "flam3.h"
-#include "config.h"
+#include "build/config.h"
#include <stdlib.h>
#include <ctype.h>
diff --git a/rect.c b/rect.c
index 5aee1d5..57234c5 100644
--- a/rect.c
+++ b/rect.c
@@ -23,6 +23,7 @@
#include "filters.h"
#include "variations.h"
#include "palettes.h"
+#include "math.h"
/*
* for batch
@@ -254,8 +255,36 @@ static void iter_thread(void *fth) {
pthread_exit((void *)0);
}
+/* Perform clipping
+ */
+static double4 clip (const double4 in, const double g, const double linrange,
+ const double highpow, const double vibrancy) {
+ double alpha, ls;
+
+ if (in[3] <= 0.0) {
+ alpha = 0.0;
+ ls = 0.0;
+ } else {
+ alpha = flam3_calc_alpha (in[3], g, linrange);
+ ls = vibrancy * alpha / in[3];
+ alpha = clamp (alpha, 0.0, 1.0);
+ }
+
+ double4 newrgb = flam3_calc_newrgb (in, ls, highpow);
+ newrgb += (1.0-vibrancy) * pow_d4 (in, g);
+ if (alpha > 0.0) {
+ newrgb /= alpha;
+ } else {
+ newrgb = (double4) {0, 0, 0, 0};
+ }
+ newrgb[3] = alpha;
+ newrgb = clamp_d4 (newrgb, 0.0, 1.0);
+
+ return newrgb;
+}
+
int render_rectangle(flam3_frame *spec, void *out,
- int field, int nchan, int transp, stat_struct *stats) {
+ int field, stat_struct *stats) {
long nbuckets;
int i, j, k, batch_num, temporal_sample_num;
double nsamples, batch_size;
@@ -273,7 +302,6 @@ int render_rectangle(flam3_frame *spec, void *out,
int gutter_width;
double vibrancy = 0.0;
double gamma = 0.0;
- double background[3];
int vib_gam_n = 0;
time_t progress_began=0;
int verbose = spec->verbose;
@@ -329,13 +357,14 @@ int render_rectangle(flam3_frame *spec, void *out,
fth[i].cp.final_xform_index=-1;
/* Set up the output image dimensions, adjusted for scanline */
+ const unsigned int channels = 4;
image_width = cp.width;
out_width = image_width;
if (field) {
image_height = cp.height / 2;
if (field == flam3_field_odd)
- out = (unsigned char *)out + nchan * bytes_per_channel * out_width;
+ out = (unsigned char *)out + channels * bytes_per_channel * out_width;
out_width *= 2;
} else
@@ -404,7 +433,6 @@ int render_rectangle(flam3_frame *spec, void *out,
progress_began = time(NULL);
}
- background[0] = background[1] = background[2] = 0.0;
memset(accumulate, 0, sizeof(*accumulate) * nbuckets);
@@ -575,9 +603,6 @@ int render_rectangle(flam3_frame *spec, void *out,
vibrancy += cp.vibrancy;
gamma += cp.gamma;
- background[0] += cp.background[0];
- background[1] += cp.background[1];
- background[2] += cp.background[2];
vib_gam_n++;
}
@@ -620,67 +645,21 @@ int render_rectangle(flam3_frame *spec, void *out,
/* filter the accumulation buffer down into the image */
if (1) {
int x, y;
- double4 t,newrgb;
- double g = 1.0 / (gamma / vib_gam_n);
- double tmp,a;
- double alpha,ls;
- int rgbi;
+ const double g = 1.0 / (gamma / vib_gam_n);
double linrange = cp.gam_lin_thresh;
vibrancy /= vib_gam_n;
- background[0] /= vib_gam_n;
- background[1] /= vib_gam_n;
- background[2] /= vib_gam_n;
/* If we're in the early clip mode, perform this first step to */
/* apply the gamma correction and clipping before the spat filt */
if (spec->earlyclip) {
-
for (j = 0; j < fic.height; j++) {
for (i = 0; i < fic.width; i++) {
- double4 ac = accumulate[i + j*fic.width];
-
- if (ac[3]<=0) {
- alpha = 0.0;
- ls = 0.0;
- } else {
- tmp=ac[3];
- alpha = flam3_calc_alpha(tmp,g,linrange);
- ls = vibrancy * alpha / tmp;
- if (alpha<0.0) alpha = 0.0;
- if (alpha>1.0) alpha = 1.0;
- }
-
- t = ac;
-
- newrgb = flam3_calc_newrgb(t, ls, highpow);
-
- for (rgbi=0;rgbi<3;rgbi++) {
- a = newrgb[rgbi];
- a += (1.0-vibrancy) * pow( t[rgbi], g);
- if (nchan<=3 || transp==0)
- a += ((1.0 - alpha) * background[rgbi]);
- else {
- if (alpha>0)
- a /= alpha;
- else
- a = 0;
- }
-
- /* Clamp here to ensure proper filter functionality */
- if (a>1.0) a = 1.0;
- if (a<0) a = 0;
-
- /* Replace values in accumulation buffer with these new ones */
- ac[rgbi] = a;
- }
-
- ac[3] = alpha;
-
- accumulate[i + j*fic.width] = ac;
-
+ const double4 in = accumulate[i + j*fic.width];
+ accumulate[i + j*fic.width] = clip (in, g, linrange, highpow,
+ vibrancy);
}
}
}
@@ -690,105 +669,41 @@ int render_rectangle(flam3_frame *spec, void *out,
for (j = 0; j < image_height; j++) {
x = 0;
for (i = 0; i < image_width; i++) {
- int ii, jj,rgbi;
- void *p;
- unsigned short *p16;
- unsigned char *p8;
- t[0] = t[1] = t[2] = t[3] = 0.0;
+ int ii, jj;
+ double4 t = (double4) { 0.0, 0.0, 0.0, 0.0 };
+
for (ii = 0; ii < filter_width; ii++) {
for (jj = 0; jj < filter_width; jj++) {
- double k = filter[ii + jj * filter_width];
- double4 ac = accumulate[x+ii + (y+jj)*fic.width];
+ const double k = filter[ii + jj * filter_width];
+ const double4 ac = accumulate[x+ii + (y+jj)*fic.width];
-
- t[0] += k * ac[0];
- t[1] += k * ac[1];
- t[2] += k * ac[2];
- t[3] += k * ac[3];
-
-
+ t += k * ac;
}
}
- p = (unsigned char *)out + nchan * bytes_per_channel * (i + j * out_width);
- p8 = (unsigned char *)p;
- p16 = (unsigned short *)p;
-
/* The old way, spatial filter first and then clip after gamma */
if (!spec->earlyclip) {
-
- tmp=t[3];
-
- if (t[3]<=0) {
- alpha = 0.0;
- ls = 0.0;
- } else {
- alpha = flam3_calc_alpha(tmp,g,linrange);
- ls = vibrancy * alpha / tmp;
- if (alpha<0.0) alpha = 0.0;
- if (alpha>1.0) alpha = 1.0;
- }
-
- newrgb = flam3_calc_newrgb(t, ls, highpow);
-
- for (rgbi=0;rgbi<3;rgbi++) {
- a = newrgb[rgbi];
- a += (1.0-vibrancy) * pow( t[rgbi], g);
- if (nchan<=3 || transp==0)
- a += ((1.0 - alpha) * background[rgbi]);
- else {
- if (alpha>0)
- a /= alpha;
- else
- a = 0;
- }
-
- /* Clamp here to ensure proper filter functionality */
- if (a>1.0) a = 1.0;
- if (a<0) a = 0;
-
- /* Replace values in accumulation buffer with these new ones */
- t[rgbi] = a;
- }
- t[3] = alpha;
- }
-
- for (rgbi=0;rgbi<3;rgbi++) {
-
- a = t[rgbi];
-
- if (a > 1.0)
- a = 1.0;
- if (a < 0)
- a = 0;
-
- if (2==bytes_per_channel) {
- p16[rgbi] = nearbyint (a * 65535.0);
- } else {
- p8[rgbi] = nearbyint (a * 255.0);
- }
+ t = clip (t, g, linrange, highpow, vibrancy);
}
-
- if (t[3]>1)
- t[3]=1;
- if (t[3]<0)
- t[3]=0;
-
- /* alpha */
- if (nchan>3) {
- if (transp==1) {
- if (2==bytes_per_channel)
- p16[3] = nearbyint (t[3] * 65535.0);
- else
- p8[3] = nearbyint (t[3] * 255);
- } else {
- if (2==bytes_per_channel)
- p16[3] = 65535;
- else
- p8[3] = 255;
- }
- }
+ const double maxval = (1 << (bytes_per_channel*8)) - 1;
+ t = nearbyint_d4 (t * maxval);
+
+ if (bytes_per_channel == 2) {
+ uint16_t * const p = &((uint16_t *) out)[channels * (i + j * out_width)];
+ p[0] = t[0];
+ p[1] = t[1];
+ p[2] = t[2];
+ p[3] = t[3];
+ } else if (bytes_per_channel == 1) {
+ uint8_t * const p = &((uint8_t *) out)[channels * (i + j * out_width)];
+ p[0] = t[0];
+ p[1] = t[1];
+ p[2] = t[2];
+ p[3] = t[3];
+ } else {
+ assert (0);
+ }
x += oversample;
}
diff --git a/rect.h b/rect.h
new file mode 100644
index 0000000..2d40713
--- /dev/null
+++ b/rect.h
@@ -0,0 +1,5 @@
+#pragma once
+
+int render_rectangle(flam3_frame *spec, void *out,
+ int field, stat_struct *stats);
+
diff --git a/variations.c b/variations.c
index fe4f6b9..44e4eb6 100644
--- a/variations.c
+++ b/variations.c
@@ -18,11 +18,7 @@
#include "variations.h"
#include "interpolation.h"
-
-#ifdef HAVE_AMDLIBM
-#define REPLACE_WITH_AMDLIBM
-#include <amdlibm.h>
-#endif
+#include "math.h"
#define badvalue(x) (((x)!=(x))||((x)>1e10)||((x)<-1e10))
diff --git a/wscript b/wscript
index 40dc819..6ba36c4 100644
--- a/wscript
+++ b/wscript
@@ -19,5 +19,5 @@ def configure(conf):
conf.write_config_header ('config.h')
def build(bld):
- bld.program (features='c cprogram', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c rect.c main.c', target='vlam3', use='xml2 png amdlibm pthread', includes='.')
+ bld.program (features='c cprogram', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c rect.c main.c', target='vlam3', use='xml2 png amdlibm pthread')