From 6123a81aecc4e3cd6c47c908fb7e9010d3d64798 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Mon, 16 Feb 2015 17:35:10 +0100 Subject: Vectorize color clipping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces redundant code with one function. Oddly this fixes rendering with earlyclip – not sure why. Drop transparency and channel settings (always transparent, always four channels). --- flam3.c | 7 +- flam3.h | 6 +- main.c | 9 ++- math.h | 61 ++++++++++++++++++ palettes.c | 2 +- png.c | 2 +- private.h | 2 +- rect.c | 207 ++++++++++++++++++----------------------------------------- rect.h | 5 ++ variations.c | 6 +- wscript | 2 +- 11 files changed, 143 insertions(+), 166 deletions(-) create mode 100644 math.h create mode 100644 rect.h diff --git a/flam3.c b/flam3.c index c4464ed..694868f 100644 --- a/flam3.c +++ b/flam3.c @@ -17,8 +17,9 @@ */ #include "private.h" +#include "rect.h" #include "img.h" -#include "config.h" +#include "build/config.h" #include "variations.h" #include "interpolation.h" #include "parser.h" @@ -3164,11 +3165,11 @@ int flam3_estimate_bounding_box(flam3_genome *cp, double eps, int nsamples, } int flam3_render(flam3_frame *spec, void *out, - int field, int nchan, int trans, stat_struct *stats) { + int field, stat_struct *stats) { int retval; - retval = render_rectangle (spec, out, field, nchan, trans, stats); + retval = render_rectangle (spec, out, field, stats); return(retval); } diff --git a/flam3.h b/flam3.h index 5fe28f9..2434a78 100644 --- a/flam3.h +++ b/flam3.h @@ -601,9 +601,9 @@ typedef struct { #define flam3_field_even 1 #define flam3_field_odd 2 -/* out is pixel array. - pixels are rgb or rgba if nchan is 3 or 4. */ -int flam3_render(flam3_frame *f, void *out, int field, int nchan, int transp, stat_struct *stats); +/* out is pixel array. pixels are rgba */ +int flam3_render(flam3_frame *spec, void *out, + int field, stat_struct *stats); void rotate_by(double *p, double *center, double by); diff --git a/main.c b/main.c index ec70e49..d30d833 100644 --- a/main.c +++ b/main.c @@ -35,7 +35,6 @@ typedef struct { bool verbose; unsigned int threads, bpc, quality, oversample; float scale; - bool transparent; } render_arguments; static error_t parse_render_opt (int key, char *arg, @@ -147,8 +146,7 @@ static void do_render (const render_arguments * const arguments) { void *image = (void *) calloc(this_size, sizeof(char)); stat_struct stats; - if (flam3_render (&f, image, flam3_field_both, channels, - arguments->transparent, &stats)) { + if (flam3_render (&f, image, flam3_field_both, &stats)) { fprintf(stderr,"error rendering image: aborting.\n"); exit(1); } @@ -423,15 +421,16 @@ int main (int argc, char **argv) { const char doc[] = "vlame3-render -- a fractal flame renderer"; const struct argp argp = { .options = options, .parser = parse_render_opt, - .args_doc = NULL, .doc = doc, .children = NULL + .args_doc = NULL, .doc = doc, .children = NULL, }; render_arguments arguments = { .threads = flam3_count_nthreads(), .bpc = 8, .scale = 1.0, - .transparent = false, .quality = 100, + .verbose = true, + .oversample = 1, }; argp_parse (&argp, argc, argv, 0, NULL, &arguments); diff --git a/math.h b/math.h new file mode 100644 index 0000000..defc7c6 --- /dev/null +++ b/math.h @@ -0,0 +1,61 @@ +/* + FLAM3 - cosmic recursive fractal flames + Copyright (C) 1992-2009 Spotworks LLC + Copyright (C) 2015 vlam3 contributors + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +#pragma once + +#include + +#include "build/config.h" + +#ifdef HAVE_AMDLIBM +#define REPLACE_WITH_AMDLIBM +#include +#endif + +#define clamp(a,min,max) (a > max ? max : (a < min ? min : a)) + +/* Vector wrapping function, could be replaced by true vector functions later + */ + +inline double4 clamp_d4 (const double4 in, const double min, const double max) { + return (double4) { + clamp (in[0], min, max), + clamp (in[1], min, max), + clamp (in[2], min, max), + clamp (in[3], min, max), + }; +} + +inline double4 pow_d4 (const double4 in, double exp) { + return (double4) { + pow (in[0], exp), + pow (in[1], exp), + pow (in[2], exp), + pow (in[3], exp), + }; +} + +inline double4 nearbyint_d4 (const double4 in) { + return (double4) { + nearbyint (in[0]), + nearbyint (in[1]), + nearbyint (in[2]), + nearbyint (in[3]), + }; +} diff --git a/palettes.c b/palettes.c index 9251e93..53e99c1 100644 --- a/palettes.c +++ b/palettes.c @@ -386,7 +386,7 @@ static double try_colors(flam3_genome *g, int color_resolution) { f.sub_batch_size = 10000; image = (unsigned char *) calloc(g->width * g->height, 3); - if (flam3_render(&f, image, flam3_field_both, 3, 0, &stats)) { + if (flam3_render(&f, image, flam3_field_both, &stats)) { fprintf(stderr,"Error rendering test image for trycolors. Aborting.\n"); return(-1); } diff --git a/png.c b/png.c index 1c28172..f28452b 100644 --- a/png.c +++ b/png.c @@ -22,7 +22,7 @@ #include #include -#include "config.h" +#include "build/config.h" #include "img.h" #include "flam3.h" #include "private.h" diff --git a/private.h b/private.h index b568d8e..1f27600 100644 --- a/private.h +++ b/private.h @@ -20,7 +20,7 @@ #define private_included #include "flam3.h" -#include "config.h" +#include "build/config.h" #include #include diff --git a/rect.c b/rect.c index 5aee1d5..57234c5 100644 --- a/rect.c +++ b/rect.c @@ -23,6 +23,7 @@ #include "filters.h" #include "variations.h" #include "palettes.h" +#include "math.h" /* * for batch @@ -254,8 +255,36 @@ static void iter_thread(void *fth) { pthread_exit((void *)0); } +/* Perform clipping + */ +static double4 clip (const double4 in, const double g, const double linrange, + const double highpow, const double vibrancy) { + double alpha, ls; + + if (in[3] <= 0.0) { + alpha = 0.0; + ls = 0.0; + } else { + alpha = flam3_calc_alpha (in[3], g, linrange); + ls = vibrancy * alpha / in[3]; + alpha = clamp (alpha, 0.0, 1.0); + } + + double4 newrgb = flam3_calc_newrgb (in, ls, highpow); + newrgb += (1.0-vibrancy) * pow_d4 (in, g); + if (alpha > 0.0) { + newrgb /= alpha; + } else { + newrgb = (double4) {0, 0, 0, 0}; + } + newrgb[3] = alpha; + newrgb = clamp_d4 (newrgb, 0.0, 1.0); + + return newrgb; +} + int render_rectangle(flam3_frame *spec, void *out, - int field, int nchan, int transp, stat_struct *stats) { + int field, stat_struct *stats) { long nbuckets; int i, j, k, batch_num, temporal_sample_num; double nsamples, batch_size; @@ -273,7 +302,6 @@ int render_rectangle(flam3_frame *spec, void *out, int gutter_width; double vibrancy = 0.0; double gamma = 0.0; - double background[3]; int vib_gam_n = 0; time_t progress_began=0; int verbose = spec->verbose; @@ -329,13 +357,14 @@ int render_rectangle(flam3_frame *spec, void *out, fth[i].cp.final_xform_index=-1; /* Set up the output image dimensions, adjusted for scanline */ + const unsigned int channels = 4; image_width = cp.width; out_width = image_width; if (field) { image_height = cp.height / 2; if (field == flam3_field_odd) - out = (unsigned char *)out + nchan * bytes_per_channel * out_width; + out = (unsigned char *)out + channels * bytes_per_channel * out_width; out_width *= 2; } else @@ -404,7 +433,6 @@ int render_rectangle(flam3_frame *spec, void *out, progress_began = time(NULL); } - background[0] = background[1] = background[2] = 0.0; memset(accumulate, 0, sizeof(*accumulate) * nbuckets); @@ -575,9 +603,6 @@ int render_rectangle(flam3_frame *spec, void *out, vibrancy += cp.vibrancy; gamma += cp.gamma; - background[0] += cp.background[0]; - background[1] += cp.background[1]; - background[2] += cp.background[2]; vib_gam_n++; } @@ -620,67 +645,21 @@ int render_rectangle(flam3_frame *spec, void *out, /* filter the accumulation buffer down into the image */ if (1) { int x, y; - double4 t,newrgb; - double g = 1.0 / (gamma / vib_gam_n); - double tmp,a; - double alpha,ls; - int rgbi; + const double g = 1.0 / (gamma / vib_gam_n); double linrange = cp.gam_lin_thresh; vibrancy /= vib_gam_n; - background[0] /= vib_gam_n; - background[1] /= vib_gam_n; - background[2] /= vib_gam_n; /* If we're in the early clip mode, perform this first step to */ /* apply the gamma correction and clipping before the spat filt */ if (spec->earlyclip) { - for (j = 0; j < fic.height; j++) { for (i = 0; i < fic.width; i++) { - double4 ac = accumulate[i + j*fic.width]; - - if (ac[3]<=0) { - alpha = 0.0; - ls = 0.0; - } else { - tmp=ac[3]; - alpha = flam3_calc_alpha(tmp,g,linrange); - ls = vibrancy * alpha / tmp; - if (alpha<0.0) alpha = 0.0; - if (alpha>1.0) alpha = 1.0; - } - - t = ac; - - newrgb = flam3_calc_newrgb(t, ls, highpow); - - for (rgbi=0;rgbi<3;rgbi++) { - a = newrgb[rgbi]; - a += (1.0-vibrancy) * pow( t[rgbi], g); - if (nchan<=3 || transp==0) - a += ((1.0 - alpha) * background[rgbi]); - else { - if (alpha>0) - a /= alpha; - else - a = 0; - } - - /* Clamp here to ensure proper filter functionality */ - if (a>1.0) a = 1.0; - if (a<0) a = 0; - - /* Replace values in accumulation buffer with these new ones */ - ac[rgbi] = a; - } - - ac[3] = alpha; - - accumulate[i + j*fic.width] = ac; - + const double4 in = accumulate[i + j*fic.width]; + accumulate[i + j*fic.width] = clip (in, g, linrange, highpow, + vibrancy); } } } @@ -690,105 +669,41 @@ int render_rectangle(flam3_frame *spec, void *out, for (j = 0; j < image_height; j++) { x = 0; for (i = 0; i < image_width; i++) { - int ii, jj,rgbi; - void *p; - unsigned short *p16; - unsigned char *p8; - t[0] = t[1] = t[2] = t[3] = 0.0; + int ii, jj; + double4 t = (double4) { 0.0, 0.0, 0.0, 0.0 }; + for (ii = 0; ii < filter_width; ii++) { for (jj = 0; jj < filter_width; jj++) { - double k = filter[ii + jj * filter_width]; - double4 ac = accumulate[x+ii + (y+jj)*fic.width]; + const double k = filter[ii + jj * filter_width]; + const double4 ac = accumulate[x+ii + (y+jj)*fic.width]; - - t[0] += k * ac[0]; - t[1] += k * ac[1]; - t[2] += k * ac[2]; - t[3] += k * ac[3]; - - + t += k * ac; } } - p = (unsigned char *)out + nchan * bytes_per_channel * (i + j * out_width); - p8 = (unsigned char *)p; - p16 = (unsigned short *)p; - /* The old way, spatial filter first and then clip after gamma */ if (!spec->earlyclip) { - - tmp=t[3]; - - if (t[3]<=0) { - alpha = 0.0; - ls = 0.0; - } else { - alpha = flam3_calc_alpha(tmp,g,linrange); - ls = vibrancy * alpha / tmp; - if (alpha<0.0) alpha = 0.0; - if (alpha>1.0) alpha = 1.0; - } - - newrgb = flam3_calc_newrgb(t, ls, highpow); - - for (rgbi=0;rgbi<3;rgbi++) { - a = newrgb[rgbi]; - a += (1.0-vibrancy) * pow( t[rgbi], g); - if (nchan<=3 || transp==0) - a += ((1.0 - alpha) * background[rgbi]); - else { - if (alpha>0) - a /= alpha; - else - a = 0; - } - - /* Clamp here to ensure proper filter functionality */ - if (a>1.0) a = 1.0; - if (a<0) a = 0; - - /* Replace values in accumulation buffer with these new ones */ - t[rgbi] = a; - } - t[3] = alpha; - } - - for (rgbi=0;rgbi<3;rgbi++) { - - a = t[rgbi]; - - if (a > 1.0) - a = 1.0; - if (a < 0) - a = 0; - - if (2==bytes_per_channel) { - p16[rgbi] = nearbyint (a * 65535.0); - } else { - p8[rgbi] = nearbyint (a * 255.0); - } + t = clip (t, g, linrange, highpow, vibrancy); } - - if (t[3]>1) - t[3]=1; - if (t[3]<0) - t[3]=0; - - /* alpha */ - if (nchan>3) { - if (transp==1) { - if (2==bytes_per_channel) - p16[3] = nearbyint (t[3] * 65535.0); - else - p8[3] = nearbyint (t[3] * 255); - } else { - if (2==bytes_per_channel) - p16[3] = 65535; - else - p8[3] = 255; - } - } + const double maxval = (1 << (bytes_per_channel*8)) - 1; + t = nearbyint_d4 (t * maxval); + + if (bytes_per_channel == 2) { + uint16_t * const p = &((uint16_t *) out)[channels * (i + j * out_width)]; + p[0] = t[0]; + p[1] = t[1]; + p[2] = t[2]; + p[3] = t[3]; + } else if (bytes_per_channel == 1) { + uint8_t * const p = &((uint8_t *) out)[channels * (i + j * out_width)]; + p[0] = t[0]; + p[1] = t[1]; + p[2] = t[2]; + p[3] = t[3]; + } else { + assert (0); + } x += oversample; } diff --git a/rect.h b/rect.h new file mode 100644 index 0000000..2d40713 --- /dev/null +++ b/rect.h @@ -0,0 +1,5 @@ +#pragma once + +int render_rectangle(flam3_frame *spec, void *out, + int field, stat_struct *stats); + diff --git a/variations.c b/variations.c index fe4f6b9..44e4eb6 100644 --- a/variations.c +++ b/variations.c @@ -18,11 +18,7 @@ #include "variations.h" #include "interpolation.h" - -#ifdef HAVE_AMDLIBM -#define REPLACE_WITH_AMDLIBM -#include -#endif +#include "math.h" #define badvalue(x) (((x)!=(x))||((x)>1e10)||((x)<-1e10)) diff --git a/wscript b/wscript index 40dc819..6ba36c4 100644 --- a/wscript +++ b/wscript @@ -19,5 +19,5 @@ def configure(conf): conf.write_config_header ('config.h') def build(bld): - bld.program (features='c cprogram', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c rect.c main.c', target='vlam3', use='xml2 png amdlibm pthread', includes='.') + bld.program (features='c cprogram', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c rect.c main.c', target='vlam3', use='xml2 png amdlibm pthread') -- cgit v1.2.3