summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--flam3.c35
-rw-r--r--flam3.h4
-rw-r--r--interpolation.c31
-rw-r--r--palettes.c63
-rw-r--r--palettes.h7
-rw-r--r--private.h2
-rw-r--r--rect.c131
-rw-r--r--wscript2
8 files changed, 97 insertions, 178 deletions
diff --git a/flam3.c b/flam3.c
index 2380c6f..4564b3f 100644
--- a/flam3.c
+++ b/flam3.c
@@ -3312,41 +3312,6 @@ int flam3_estimate_bounding_box(flam3_genome *cp, double eps, int nsamples,
return(bv);
}
-
-typedef double bucket_double[5];
-typedef double abucket_double[4];
-typedef unsigned int bucket_int[5];
-typedef unsigned int abucket_int[4];
-typedef float bucket_float[5];
-typedef float abucket_float[4];
-
-/* experimental 32-bit datatypes (called 33) */
-#define bucket bucket_int
-#define abucket abucket_float
-#define abump_no_overflow(dest, delta) do {dest += delta;} while (0)
-#define add_c_to_accum(acc,i,ii,j,jj,wid,hgt,c) do { \
- if ( (j) + (jj) >=0 && (j) + (jj) < (hgt) && (i) + (ii) >=0 && (i) + (ii) < (wid)) { \
- abucket *a = (acc) + ( (i) + (ii) ) + ( (j) + (jj) ) * (wid); \
- abump_no_overflow(a[0][0],(c)[0]); \
- abump_no_overflow(a[0][1],(c)[1]); \
- abump_no_overflow(a[0][2],(c)[2]); \
- abump_no_overflow(a[0][3],(c)[3]); \
- } \
-} while (0)
-/* single-threaded */
-#define bump_no_overflow(dest, delta) do { \
- if (UINT_MAX - dest > delta) dest += delta; else dest = UINT_MAX; \
-} while (0)
-#define iter_thread iter_thread_float
-#include "rect.c"
-#undef iter_thread
-#undef render_rectangle
-#undef add_c_to_accum
-#undef bucket
-#undef abucket
-#undef bump_no_overflow
-#undef abump_no_overflow
-
int flam3_render(flam3_frame *spec, void *out,
int field, int nchan, int trans, stat_struct *stats) {
diff --git a/flam3.h b/flam3.h
index 0107672..a92c203 100644
--- a/flam3.h
+++ b/flam3.h
@@ -36,10 +36,12 @@ char *flam3_version();
#define flam3_print_edits (1)
#define flam3_dont_print_edits (0)
+#include "vector.h"
+
//typedef double flam3_palette[256][3];
typedef struct {
double index;
- double color[4];
+ double4 color;
} flam3_palette_entry;
typedef flam3_palette_entry flam3_palette[256];
diff --git a/interpolation.c b/interpolation.c
index 528e312..78b798f 100644
--- a/interpolation.c
+++ b/interpolation.c
@@ -156,23 +156,25 @@ void interpolate_cmap(flam3_palette cmap, double blend,
fprintf(stderr,"unable to retrieve palette %d, setting to white\n", index1);
for (i = 0; i < 256; i++) {
- double t[5], s[5];
+ double4 t, s;
+ double t4, s4;
- rgb2hsv(p0[i].color, s);
- rgb2hsv(p1[i].color, t);
+ s = rgb2hsv(p0[i].color);
+ t = rgb2hsv(p1[i].color);
s[3] = p0[i].color[3];
t[3] = p1[i].color[3];
- s[4] = p0[i].index;
- t[4] = p1[i].index;
+ s4 = p0[i].index;
+ t4 = p1[i].index;
- for (j = 0; j < 5; j++)
+ for (j = 0; j < 4; j++)
t[j] = ((1.0-blend) * s[j]) + (blend * t[j]);
+ t4 = ((1.0-blend) * s4) + (blend * t4);
- hsv2rgb(t, cmap[i].color);
+ cmap[i].color = hsv2rgb(t);
cmap[i].color[3] = t[3];
- cmap[i].index = t[4];
+ cmap[i].index = t4;
}
}
@@ -362,29 +364,30 @@ void flam3_interpolate_n(flam3_genome *result, int ncp,
if (flam3_palette_interpolation_hsv == cpi[0].palette_interpolation) {
for (i = 0; i < 256; i++) {
- double t[3], s[5];
+ double s4;
+ double4 s, t;
int alpha1 = 1;
- s[0] = s[1] = s[2] = s[3] = s[4] = 0.0;
+ s[0] = s[1] = s[2] = s[3] = s4 = 0.0;
for (k = 0; k < ncp; k++) {
- rgb2hsv(cpi[k].palette[i].color, t);
+ t = rgb2hsv(cpi[k].palette[i].color);
for (j = 0; j < 3; j++)
s[j] += c[k] * t[j];
s[3] += c[k] * cpi[k].palette[i].color[3];
if (cpi[k].palette[i].color[3] != 1.0)
alpha1 = 0;
- s[4] += c[k] * cpi[k].palette[i].index;
+ s4 += c[k] * cpi[k].palette[i].index;
}
if (alpha1 == 1)
s[3] = 1.0;
- hsv2rgb(s, result->palette[i].color);
+ result->palette[i].color = hsv2rgb(s);
result->palette[i].color[3] = s[3];
- result->palette[i].index = s[4];
+ result->palette[i].index = s4;
for (j = 0; j < 4; j++) {
if (result->palette[i].color[j] < 0.0)
diff --git a/palettes.c b/palettes.c
index 816f3a2..980a465 100644
--- a/palettes.c
+++ b/palettes.c
@@ -167,22 +167,24 @@ int flam3_get_palette(int n, flam3_palette c, double hue_rotation, randctx * con
/* Loop over elements of cmap */
for (i = 0; i < cmap_len; i++) {
int ii = (i * 256) / cmap_len;
- double rgb[3], hsv[3];
+ double4 rgb, hsv;
- /* Colors are in 0-1 space */
- for (j = 0; j < 3; j++)
- rgb[j] = the_palettes[idx].colors[ii][j] / 255.0;
+ rgb = (double4) {
+ the_palettes[idx].colors[ii][0] / 255.0,
+ the_palettes[idx].colors[ii][1] / 255.0,
+ the_palettes[idx].colors[ii][2] / 255.0,
+ 1.0,
+ };
- rgb2hsv(rgb, hsv);
+ hsv = rgb2hsv(rgb);
hsv[0] += hue_rotation * 6.0;
- hsv2rgb(hsv, rgb);
+ rgb = hsv2rgb(hsv);
c[i].index = i;
- for (j = 0; j < 3; j++)
- c[i].color[j] = rgb[j];
+ c[i].color = rgb;
- c[i].color[3] = 1.0;
+ c[i].color[3] = 1.0;
}
return n;
@@ -196,8 +198,7 @@ int flam3_get_palette(int n, flam3_palette c, double hue_rotation, randctx * con
/* rgb 0 - 1,
h 0 - 6, s 0 - 1, v 0 - 1 */
-void rgb2hsv(rgb, hsv)
- double *rgb; double *hsv;
+double4 rgb2hsv(double4 rgb)
{
double rd, gd, bd, h, s, v, max, min, del, rc, gc, bc;
@@ -231,17 +232,13 @@ void rgb2hsv(rgb, hsv)
if (h<0) h += 6;
}
- hsv[0] = h;
- hsv[1] = s;
- hsv[2] = v;
+ return (double4) { h, s, v, rgb[3] };
}
/* h 0 - 6, s 0 - 1, v 0 - 1
rgb 0 - 1 */
-void hsv2rgb(hsv, rgb)
- double *hsv;
- double *rgb;
+double4 hsv2rgb(double4 hsv)
{
double h = hsv[0], s = hsv[1], v = hsv[2];
int j;
@@ -266,9 +263,7 @@ void hsv2rgb(hsv, rgb)
default: rd = v; gd = t; bd = p; break;
}
- rgb[0] = rd;
- rgb[1] = gd;
- rgb[2] = bd;
+ return (double4) { rd, gd, bd, hsv[3] };
}
double flam3_calc_alpha(double density, double gamma, double linrange) {
@@ -289,19 +284,15 @@ double flam3_calc_alpha(double density, double gamma, double linrange) {
return(alpha);
}
-void flam3_calc_newrgb(double *cbuf, double ls, double highpow, double *newrgb) {
+double4 flam3_calc_newrgb(double4 cbuf, double ls, double highpow) {
int rgbi;
double newls,lsratio;
- double newhsv[3];
double a, maxa=-1.0, maxc=0;
double adjhlp;
if (ls==0.0 || (cbuf[0]==0.0 && cbuf[1]==0.0 && cbuf[2]==0.0)) {
- newrgb[0] = 0.0;
- newrgb[1] = 0.0;
- newrgb[2] = 0.0;
- return;
+ return (double4) { 0, 0, 0, 0 };
}
/* Identify the most saturated channel */
@@ -320,17 +311,13 @@ void flam3_calc_newrgb(double *cbuf, double ls, double highpow, double *newrgb)
lsratio = pow(newls/ls,highpow);
/* Calculate the max-value color (ranged 0 - 1) */
- for (rgbi=0;rgbi<3;rgbi++)
- newrgb[rgbi] = newls*(cbuf[rgbi]/PREFILTER_WHITE)/255.0;
+ double4 newrgb = newls*(cbuf/PREFILTER_WHITE)/255.0;
/* Reduce saturation by the lsratio */
- rgb2hsv(newrgb,newhsv);
+ double4 newhsv = rgb2hsv(newrgb);
newhsv[1] *= lsratio;
- hsv2rgb(newhsv,newrgb);
- for (rgbi=0;rgbi<3;rgbi++)
- newrgb[rgbi] *= 255.0;
-
+ return hsv2rgb(newhsv) * 255.0;
} else {
newls = 255.0/maxc;
adjhlp = -highpow;
@@ -339,12 +326,10 @@ void flam3_calc_newrgb(double *cbuf, double ls, double highpow, double *newrgb)
if (maxa<=255)
adjhlp=1.0;
- /* Calculate the max-value color (ranged 0 - 1) interpolated with the old behaviour */
- for (rgbi=0;rgbi<3;rgbi++)
- newrgb[rgbi] = ((1.0-adjhlp)*newls + adjhlp*ls)*(cbuf[rgbi]/PREFILTER_WHITE);
-
-// for (rgbi=0;rgbi<3;rgbi++)
-// newrgb[rgbi] = ls*(cbuf[rgbi]/PREFILTER_WHITE);
+ /* Calculate the max-value color (ranged 0 - 1) interpolated with the old
+ * behaviour */
+
+ return ((1.0-adjhlp)*newls + adjhlp*ls)*(cbuf/PREFILTER_WHITE);
}
}
diff --git a/palettes.h b/palettes.h
index 5d7b617..97b9e94 100644
--- a/palettes.h
+++ b/palettes.h
@@ -26,12 +26,13 @@ typedef struct {
unsigned char colors[256][3];
} lib_palette;
+#include "vector.h"
-void rgb2hsv(double *rgb, double *hsv);
-void hsv2rgb(double *hsv, double *rgb);
+double4 rgb2hsv(double4);
+double4 hsv2rgb(double4);
double flam3_calc_alpha(double density, double gamma, double linrange);
-void flam3_calc_newrgb(double *cbuf, double ls, double highpow, double *newrgb);
+double4 flam3_calc_newrgb(double4 cbuf, double ls, double highpow);
#endif
diff --git a/private.h b/private.h
index 62cdb71..ae026f5 100644
--- a/private.h
+++ b/private.h
@@ -62,7 +62,7 @@ typedef struct {
double ws0, wb0s0, hs1, hb1s1; /* shortcuts for indexing */
flam3_palette_entry *dmap; /* palette */
double color_scalar; /* <1.0 if non-uniform motion blur is set */
- void *buckets; /* Points to the first accumulator */
+ double4 *buckets; /* Points to the first accumulator */
double badvals; /* accumulates all badvalue resets */
double batch_size;
int temporal_sample_num,ntemporal_samples;
diff --git a/rect.c b/rect.c
index 7a64cbd..7e45e16 100644
--- a/rect.c
+++ b/rect.c
@@ -16,7 +16,12 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-/* this file is included into flam3.c once for each buffer bit-width */
+#include <assert.h>
+
+#include "private.h"
+#include "filters.h"
+#include "variations.h"
+#include "palettes.h"
/*
* for batch
@@ -181,10 +186,9 @@ static void iter_thread(void *fth) {
for (j = 0; j < sub_batch_size; j++) {
double p0, p1, p00, p11;
double dbl_index0,dbl_frac;
- double interpcolor[4];
- int ci, color_index0;
+ double4 interpcolor;
+ int color_index0;
const double4 p = fthp->iter_storage[j];
- bucket *b;
if (fthp->cp.rotate != 0.0) {
p00 = p[0] - fthp->cp.rot_center[0];
@@ -199,7 +203,6 @@ static void iter_thread(void *fth) {
if (p0 >= ficp->bounds[0] && p1 >= ficp->bounds[1] && p0 <= ficp->bounds[2] && p1 <= ficp->bounds[3]) {
double logvis=1.0;
- bucket *buckets = (bucket *)(ficp->buckets);
/* Skip if invisible */
if (p[3]==0)
@@ -207,9 +210,6 @@ static void iter_thread(void *fth) {
else
logvis = p[3];
- b = buckets + (int)(ficp->ws0 * p0 - ficp->wb0s0) +
- ficp->width * (int)(ficp->hs1 * p1 - ficp->hb1s1);
-
dbl_index0 = p[2] * cmap_size;
color_index0 = (int) (dbl_index0);
@@ -225,11 +225,8 @@ static void iter_thread(void *fth) {
dbl_frac = dbl_index0 - (double)color_index0;
}
- for (ci=0;ci<4;ci++) {
- interpcolor[ci] = ficp->dmap[color_index0].color[ci] * (1.0-dbl_frac) +
- ficp->dmap[color_index0+1].color[ci] * dbl_frac;
- }
-
+ interpcolor = ficp->dmap[color_index0].color * (1.0-dbl_frac) +
+ ficp->dmap[color_index0+1].color * dbl_frac;
} else { /* Palette mode step */
if (color_index0 < 0) {
@@ -238,24 +235,15 @@ static void iter_thread(void *fth) {
color_index0 = cmap_size_m1;
}
- for (ci=0;ci<4;ci++)
- interpcolor[ci] = ficp->dmap[color_index0].color[ci];
+ interpcolor = ficp->dmap[color_index0].color;
}
- if (p[3]==1.0) {
- bump_no_overflow(b[0][0], interpcolor[0]);
- bump_no_overflow(b[0][1], interpcolor[1]);
- bump_no_overflow(b[0][2], interpcolor[2]);
- bump_no_overflow(b[0][3], interpcolor[3]);
- bump_no_overflow(b[0][4], 255.0);
- } else {
- bump_no_overflow(b[0][0], logvis*interpcolor[0]);
- bump_no_overflow(b[0][1], logvis*interpcolor[1]);
- bump_no_overflow(b[0][2], logvis*interpcolor[2]);
- bump_no_overflow(b[0][3], logvis*interpcolor[3]);
- bump_no_overflow(b[0][4], logvis*255.0);
+ if (p[3]!=1.0) {
+ interpcolor *= logvis;
}
+ ficp->buckets[(int)(ficp->ws0 * p0 - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p1 - ficp->hb1s1)] += interpcolor;
+
}
}
@@ -266,14 +254,11 @@ static void iter_thread(void *fth) {
pthread_exit((void *)0);
}
-static int render_rectangle(flam3_frame *spec, void *out,
+int render_rectangle(flam3_frame *spec, void *out,
int field, int nchan, int transp, stat_struct *stats) {
long nbuckets;
int i, j, k, batch_num, temporal_sample_num;
double nsamples, batch_size;
- bucket *buckets;
- abucket *accumulate;
- double4 *points;
double *filter, *temporal_filter, *temporal_deltas, *batch_filter;
double ppux=0, ppuy=0;
int image_width, image_height; /* size of the image to produce */
@@ -304,9 +289,6 @@ static int render_rectangle(flam3_frame *spec, void *out,
char *ai;
int cmap_size;
- char *last_block;
- size_t memory_rqd;
-
/* Per-render progress timers */
time_t progress_timer=0;
time_t progress_timer_history[64];
@@ -397,19 +379,16 @@ static int render_rectangle(flam3_frame *spec, void *out,
fic.width = oversample * image_width + 2 * gutter_width;
nbuckets = (long)fic.width * (long)fic.height;
- memory_rqd = (sizeof(bucket) * nbuckets + sizeof(abucket) * nbuckets +
- 4 * sizeof(double) * (size_t)(spec->sub_batch_size) * spec->nthreads);
- last_block = (char *) malloc(memory_rqd);
- if (NULL == last_block) {
- fprintf(stderr, "render_rectangle: cannot malloc %g bytes.\n", (double)memory_rqd);
- fprintf(stderr, "render_rectangle: w=%d h=%d nb=%ld.\n", fic.width, fic.height, nbuckets);
- return(1);
- }
- /* Just free buckets at the end */
- buckets = (bucket *) last_block;
- accumulate = (abucket *) (last_block + sizeof(bucket) * nbuckets);
- points = (double4 *) (last_block + (sizeof(bucket) + sizeof(abucket)) * nbuckets);
+ double4 * const buckets = malloc (nbuckets * sizeof (*buckets));
+ assert (buckets != NULL);
+ double4 * const accumulate = malloc (nbuckets * sizeof (*accumulate));
+ assert (accumulate != NULL);
+ double4 ** const iter_storage = malloc (spec->nthreads * sizeof (*iter_storage));
+ assert (iter_storage != NULL);
+ for (size_t i = 0; i < spec->nthreads; i++) {
+ iter_storage[i] = malloc (spec->sub_batch_size * sizeof (*iter_storage[i]));
+ }
if (verbose) {
fprintf(stderr, "chaos: ");
@@ -417,7 +396,7 @@ static int render_rectangle(flam3_frame *spec, void *out,
}
background[0] = background[1] = background[2] = 0.0;
- memset((char *) accumulate, 0, sizeof(abucket) * nbuckets);
+ memset(accumulate, 0, sizeof(*accumulate) * nbuckets);
/* Batch loop - outermost */
@@ -425,7 +404,7 @@ static int render_rectangle(flam3_frame *spec, void *out,
double sample_density=0.0;
double k1, area, k2;
- memset((char *) buckets, 0, sizeof(bucket) * nbuckets);
+ memset(buckets, 0, sizeof(*buckets) * nbuckets);
/* Temporal sample loop */
for (temporal_sample_num = 0; temporal_sample_num < ntemporal_samples; temporal_sample_num++) {
@@ -550,7 +529,7 @@ static int render_rectangle(flam3_frame *spec, void *out,
fth[thi].timer_initialize = 0;
}
- fth[thi].iter_storage = &(points[thi*spec->sub_batch_size]);
+ fth[thi].iter_storage = iter_storage[thi];
fth[thi].fic = &fic;
flam3_copy(&(fth[thi].cp),&cp);
@@ -610,28 +589,14 @@ static int render_rectangle(flam3_frame *spec, void *out,
for (j = 0; j < fic.height; j++) {
for (i = 0; i < fic.width; i++) {
+ const double4 c = buckets[i + j * fic.width];
- abucket *a = accumulate + i + j * fic.width;
- bucket *b = buckets + i + j * fic.width;
- double c[4], ls;
-
- c[0] = (double) b[0][0];
- c[1] = (double) b[0][1];
- c[2] = (double) b[0][2];
- c[3] = (double) b[0][3];
if (0.0 == c[3])
continue;
- ls = (k1 * log(1.0 + c[3] * k2))/c[3];
- c[0] *= ls;
- c[1] *= ls;
- c[2] *= ls;
- c[3] *= ls;
+ const double ls = (k1 * log(1.0 + c[3] * k2))/c[3];
- abump_no_overflow(a[0][0], c[0]);
- abump_no_overflow(a[0][1], c[1]);
- abump_no_overflow(a[0][2], c[2]);
- abump_no_overflow(a[0][3], c[3]);
+ accumulate[i + j * fic.width] += c * ls;
}
}
@@ -646,7 +611,7 @@ static int render_rectangle(flam3_frame *spec, void *out,
/* filter the accumulation buffer down into the image */
if (1) {
int x, y;
- double t[4],newrgb[3];
+ double4 t,newrgb;
double g = 1.0 / (gamma / vib_gam_n);
double tmp,a;
double alpha,ls;
@@ -666,26 +631,22 @@ static int render_rectangle(flam3_frame *spec, void *out,
for (j = 0; j < fic.height; j++) {
for (i = 0; i < fic.width; i++) {
-
- abucket *ac = accumulate + i + j*fic.width;
+ double4 ac = accumulate[i + j*fic.width];
- if (ac[0][3]<=0) {
+ if (ac[3]<=0) {
alpha = 0.0;
ls = 0.0;
} else {
- tmp=ac[0][3]/PREFILTER_WHITE;
+ tmp=ac[3]/PREFILTER_WHITE;
alpha = flam3_calc_alpha(tmp,g,linrange);
ls = vibrancy * 256.0 * alpha / tmp;
if (alpha<0.0) alpha = 0.0;
if (alpha>1.0) alpha = 1.0;
}
- t[0] = (double)ac[0][0];
- t[1] = (double)ac[0][1];
- t[2] = (double)ac[0][2];
- t[3] = (double)ac[0][3];
+ t = ac;
- flam3_calc_newrgb(t, ls, highpow, newrgb);
+ newrgb = flam3_calc_newrgb(t, ls, highpow);
for (rgbi=0;rgbi<3;rgbi++) {
a = newrgb[rgbi];
@@ -704,10 +665,12 @@ static int render_rectangle(flam3_frame *spec, void *out,
if (a<0) a = 0;
/* Replace values in accumulation buffer with these new ones */
- ac[0][rgbi] = a;
+ ac[rgbi] = a;
}
- ac[0][3] = alpha;
+ ac[3] = alpha;
+
+ accumulate[i + j*fic.width] = ac;
}
}
@@ -726,13 +689,13 @@ static int render_rectangle(flam3_frame *spec, void *out,
for (ii = 0; ii < filter_width; ii++) {
for (jj = 0; jj < filter_width; jj++) {
double k = filter[ii + jj * filter_width];
- abucket *ac = accumulate + x+ii + (y+jj)*fic.width;
+ double4 ac = accumulate[x+ii + (y+jj)*fic.width];
- t[0] += k * ac[0][0];
- t[1] += k * ac[0][1];
- t[2] += k * ac[0][2];
- t[3] += k * ac[0][3];
+ t[0] += k * ac[0];
+ t[1] += k * ac[1];
+ t[2] += k * ac[2];
+ t[3] += k * ac[3];
}
@@ -757,7 +720,7 @@ static int render_rectangle(flam3_frame *spec, void *out,
if (alpha>1.0) alpha = 1.0;
}
- flam3_calc_newrgb(t, ls, highpow, newrgb);
+ newrgb = flam3_calc_newrgb(t, ls, highpow);
for (rgbi=0;rgbi<3;rgbi++) {
a = newrgb[rgbi];
diff --git a/wscript b/wscript
index 61459f7..8eb7748 100644
--- a/wscript
+++ b/wscript
@@ -19,7 +19,7 @@ def configure(conf):
conf.write_config_header ('config.h')
def build(bld):
- bld.stlib (features='c cstlib', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c docstring.c', target='libflam3', use='xml2 png pthread', includes='.')
+ bld.stlib (features='c cstlib', source='flam3.c filters.c parser.c variations.c interpolation.c palettes.c png.c random.c docstring.c rect.c', target='libflam3', use='xml2 png pthread', includes='.')
bld.program (features='c cprogram', source='flam3-render.c', target='flam3-render', use='libflam3 xml2 png amdlibm pthread', includes='.')
bld.program (features='c cprogram', source='flam3-genome.c', target='flam3-genome', use='libflam3 xml2 png amdlibm pthread', includes='.')
bld.program (features='c cprogram', source='flam3-animate.c', target='flam3-animate', use='libflam3 xml2 png amdlibm pthread', includes='.')