/*
FLAM3 - cosmic recursive fractal flames
Copyright (C) 1992-2009 Spotworks LLC
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
*/
#include
#include
#include "private.h"
#include "variations.h"
#include "palettes.h"
#include "math.h"
#include "rect.h"
/* allow this many iterations for settling into attractor */
#define FUSE_27 15
#define FUSE_28 100
/* Structures for passing parameters to iteration threads */
typedef struct {
unsigned short *xform_distrib; /* Distribution of xforms based on weights */
flam3_frame *spec; /* Frame contains timing information */
double bounds[4]; /* Corner coords of viewable area */
double2 rot[3]; /* Rotation transformation */
double size[2];
int width, height; /* buffer width/height */
double ws0, wb0s0, hs1, hb1s1; /* shortcuts for indexing */
flam3_palette_entry *dmap; /* palette */
double color_scalar; /* <1.0 if non-uniform motion blur is set */
double4 *buckets; /* Points to the first accumulator */
double badvals; /* accumulates all badvalue resets */
double batch_size;
int aborted, cmap_size;
/* mutex for bucket accumulator */
pthread_mutex_t bucket_mutex;
} flam3_iter_constants;
typedef struct {
flam3_genome cp; /* Full copy of genome for use by the thread */
flam3_iter_constants *fic; /* Constants for render */
/* thread number */
size_t i;
} flam3_thread_helper;
/* Lookup color [0,1]
*/
static double4 color_palette_lookup (const double color,
const color_palette_mode mode, const flam3_palette map,
const unsigned int map_count) {
assert (color >= 0.0 && color <= 1.0);
switch (mode) {
case PALETTE_MODE_LINEAR: {
const double ix = color * map_count;
const double bottomix = floor (ix);
const double frac = ix - bottomix;
const unsigned int intix = bottomix;
if (intix == map_count-1) {
return map[intix].color;
} else {
return map[intix].color * (1.0-frac) +
map[intix+1].color * frac;
}
break;
}
case PALETTE_MODE_STEP: {
const unsigned int intix = nearbyint (color * map_count);
return map[intix].color;
break;
}
default:
assert (0);
break;
}
}
static void *iter_thread(void *fth) {
double sub_batch;
int j;
flam3_thread_helper *fthp = (flam3_thread_helper *)fth;
flam3_iter_constants *ficp = fthp->fic;
int SBS = ficp->spec->sub_batch_size;
int fuse;
int cmap_size = ficp->cmap_size;
double4 *iter_storage;
randctx rc;
rand_seed (&rc);
int ret = posix_memalign ((void **) &iter_storage, sizeof (*iter_storage),
SBS * sizeof (*iter_storage));
assert (ret == 0);
assert (iter_storage != NULL);
fuse = (ficp->spec->earlyclip) ? FUSE_28 : FUSE_27;
for (sub_batch = 0; sub_batch < ficp->batch_size; sub_batch+=SBS) {
int sub_batch_size, badcount;
/* sub_batch is double so this is sketchy */
sub_batch_size = (sub_batch + SBS > ficp->batch_size) ?
(ficp->batch_size - sub_batch) : SBS;
/* Seed iterations */
const double4 start = (double4) {
rand_d11(&rc),
rand_d11(&rc),
rand_d01(&rc),
rand_d01(&rc),
};
/* Execute iterations */
badcount = flam3_iterate(&(fthp->cp), sub_batch_size, fuse, start, iter_storage, ficp->xform_distrib, &rc);
/* Lock mutex for access to accumulator */
pthread_mutex_lock(&ficp->bucket_mutex);
/* Add the badcount to the counter */
ficp->badvals += badcount;
/* Put them in the bucket accumulator */
for (j = 0; j < sub_batch_size; j++) {
double4 p = iter_storage[j];
if (fthp->cp.rotate != 0.0) {
const double2 p01 = (double2) { p[0], p[1] };
const double2 rotatedp = apply_affine (p01, ficp->rot);
p[0] = rotatedp[0];
p[1] = rotatedp[1];
}
/* Skip if out of bounding box or invisible */
if (p[0] >= ficp->bounds[0] && p[1] >= ficp->bounds[1] &&
p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3] &&
p[3] > 0) {
const size_t ix = (int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1);
#if HAVE_BUILTIN_PREFETCH
/* prefetch for reading (0) with no locality (0). This (partially)
* hides the load latency for the += operation at the end of this
* block */
__builtin_prefetch (&ficp->buckets[ix], 0, 0);
#endif
double4 interpcolor = color_palette_lookup (p[2],
fthp->cp.palette_mode, ficp->dmap, cmap_size);
const double logvis = p[3];
if (logvis != 1.0) {
interpcolor *= logvis;
}
ficp->buckets[ix] += interpcolor;
}
}
/* Release mutex */
pthread_mutex_unlock(&ficp->bucket_mutex);
}
free (iter_storage);
return NULL;
}
/* Perform clipping
*/
static double4 clip (const double4 in, const double g, const double linrange,
const double highpow, const double vibrancy) {
double alpha, ls;
if (in[3] <= 0.0) {
alpha = 0.0;
ls = 0.0;
} else {
alpha = flam3_calc_alpha (in[3], g, linrange);
ls = vibrancy * alpha / in[3];
alpha = clamp (alpha, 0.0, 1.0);
}
double4 newrgb = flam3_calc_newrgb (in, ls, highpow);
newrgb += (1.0-vibrancy) * pow_d4 (in, g);
if (alpha > 0.0) {
newrgb /= alpha;
} else {
newrgb = (double4) {0, 0, 0, 0};
}
newrgb[3] = alpha;
newrgb = clamp_d4 (newrgb, 0.0, 1.0);
return newrgb;
}
int render_parallel (flam3_frame *spec, void *out, stat_struct *stats) {
long nbuckets;
double ppux=0, ppuy=0;
int image_width, image_height; /* size of the image to produce */
int out_width;
int bytes_per_channel = spec->bytes_per_channel;
double highpow;
flam3_palette dmap;
double vibrancy = 0.0;
double gamma = 0.0;
int vib_gam_n = 0;
flam3_genome cp;
unsigned short *xform_distrib;
flam3_iter_constants fic;
flam3_thread_helper *fth;
pthread_attr_t pt_attr;
pthread_t *myThreads=NULL;
int thi;
int cmap_size;
fic.badvals = 0;
fic.aborted = 0;
stats->num_iters = 0;
/* correct for apophysis's use of 255 colors in the palette rather than all 256 */
cmap_size = 256;
memset(&cp,0, sizeof(flam3_genome));
/* interpolate and get a control point */
flam3_interpolate(spec->genomes, spec->ngenomes, spec->time, 0, &cp);
highpow = cp.highlight_power;
/* Initialize the thread helper structures */
fth = (flam3_thread_helper *)calloc(spec->nthreads,sizeof(flam3_thread_helper));
for (unsigned int i=0;inthreads;i++)
fth[i].cp.final_xform_index=-1;
/* Set up the output image dimensions, adjusted for scanline */
const unsigned int channels = 4;
image_width = cp.width;
out_width = image_width;
image_height = cp.height;
/* Allocate the space required to render the image */
fic.height = image_height;
fic.width = image_width;
nbuckets = (long)fic.width * (long)fic.height;
double4 *buckets;
int ret = posix_memalign ((void **) &buckets, sizeof (*buckets),
nbuckets * sizeof (*buckets));
assert (ret == 0);
assert (buckets != NULL);
memset (buckets, 0, nbuckets * sizeof (*buckets));
double sample_density=0.0;
/* Batch loop - outermost */
{
{
/* Get the xforms ready to render */
if (prepare_precalc_flags(&cp)) {
fprintf(stderr,"prepare xform pointers returned error: aborting.\n");
return(1);
}
xform_distrib = flam3_create_xform_distrib(&cp);
if (xform_distrib==NULL) {
fprintf(stderr,"create xform distrib returned error: aborting.\n");
return(1);
}
/* compute the colormap entries. */
/* the input colormap is 256 long with entries from 0 to 1.0 */
for (unsigned int j = 0; j < CMAP_SIZE; j++) {
dmap[j].index = cp.palette[(j * 256) / CMAP_SIZE].index / 256.0;
for (unsigned int k = 0; k < 4; k++)
dmap[j].color[k] = cp.palette[(j * 256) / CMAP_SIZE].color[k];
}
/* compute camera */
{
double corner0, corner1;
double scale;
if (cp.sample_density <= 0.0) {
fprintf(stderr,
"sample density (quality) must be greater than zero,"
" not %g.\n", cp.sample_density);
return(1);
}
scale = pow(2.0, cp.zoom);
sample_density = cp.sample_density * scale * scale;
ppux = cp.pixels_per_unit * scale;
ppuy = ppux;
ppux /= spec->pixel_aspect_ratio;
corner0 = cp.center[0] - image_width / ppux / 2.0;
corner1 = cp.center[1] - image_height / ppuy / 2.0;
fic.bounds[0] = corner0;
fic.bounds[1] = corner1;
fic.bounds[2] = corner0 + image_width / ppux;
fic.bounds[3] = corner1 + image_height / ppuy;
fic.size[0] = 1.0 / (fic.bounds[2] - fic.bounds[0]);
fic.size[1] = 1.0 / (fic.bounds[3] - fic.bounds[1]);
rotate_center ((double2) { cp.rot_center[0], cp.rot_center[1] },
cp.rotate, fic.rot);
fic.ws0 = fic.width * fic.size[0];
fic.wb0s0 = fic.ws0 * fic.bounds[0];
fic.hs1 = fic.height * fic.size[1];
fic.hb1s1 = fic.hs1 * fic.bounds[1];
}
/* number of samples is based only on the output image size */
double nsamples = sample_density * image_width * image_height;
/* how many of these samples are rendered in this loop? */
double batch_size = nsamples;
stats->num_iters += batch_size;
/* Fill in the iter constants */
fic.xform_distrib = xform_distrib;
fic.spec = spec;
fic.batch_size = batch_size / (double)spec->nthreads;
fic.cmap_size = cmap_size;
fic.dmap = (flam3_palette_entry *)dmap;
fic.buckets = (void *)buckets;
/* Initialize the thread helper structures */
for (thi = 0; thi < spec->nthreads; thi++) {
fth[thi].fic = &fic;
fth[thi].i = thi;
flam3_copy(&(fth[thi].cp),&cp);
}
/* Let's make some threads */
myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t));
pthread_mutex_init(&fic.bucket_mutex, NULL);
pthread_attr_init(&pt_attr);
pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE);
for (thi=0; thi nthreads; thi ++)
pthread_create(&myThreads[thi], &pt_attr, (void *)iter_thread, (void *)(&(fth[thi])));
pthread_attr_destroy(&pt_attr);
/* Wait for them to return */
for (thi=0; thi < spec->nthreads; thi++)
pthread_join(myThreads[thi], NULL);
pthread_mutex_destroy(&fic.bucket_mutex);
free(myThreads);
/* Free the xform_distrib array */
free(xform_distrib);
if (fic.aborted) {
goto done;
}
vibrancy += cp.vibrancy;
gamma += cp.gamma;
vib_gam_n++;
}
#if 0
printf("iw=%d,ih=%d,ppux=%f,ppuy=%f\n",image_width,image_height,ppux,ppuy);
printf("contrast=%f, brightness=%f, PREFILTER=%d\n",
cp.contrast, cp.brightness, PREFILTER_WHITE);
printf("area = %f, WHITE_LEVEL=%d, sample_density=%f\n",
area, WHITE_LEVEL, sample_density);
printf("k1=%f,k2=%15.12f\n",k1,k2);
#endif
}
/* filter the accumulation buffer down into the image */
if (1) {
const double g = 1.0 / (gamma / vib_gam_n);
double linrange = cp.gam_lin_thresh;
vibrancy /= vib_gam_n;
/* XXX: the original formula has a factor 268/256 in here, not sure why */
const double k1 = cp.contrast * cp.brightness;
const double area = image_width * image_height / (ppux * ppuy);
const double k2 = 1.0 / (cp.contrast * area * sample_density);
for (unsigned int y = 0; y < image_height; y++) {
for (unsigned int x = 0; x < image_width; x++) {
double4 t = buckets[x + y * fic.width];
const double ls = (k1 * log(1.0 + t[3] * k2))/t[3];
t = t * ls;
t = clip (t, g, linrange, highpow, vibrancy);
const double maxval = (1 << (bytes_per_channel*8)) - 1;
t = nearbyint_d4 (t * maxval);
if (bytes_per_channel == 2) {
uint16_t * const p = &((uint16_t *) out)[channels * (x + y * out_width)];
p[0] = t[0];
p[1] = t[1];
p[2] = t[2];
p[3] = t[3];
} else if (bytes_per_channel == 1) {
uint8_t * const p = &((uint8_t *) out)[channels * (x + y * out_width)];
p[0] = t[0];
p[1] = t[1];
p[2] = t[2];
p[3] = t[3];
} else {
assert (0);
}
}
}
}
done:
stats->badvals = fic.badvals;
free(buckets);
/* We have to clear the cps in fth first */
for (thi = 0; thi < spec->nthreads; thi++) {
clear_cp(&(fth[thi].cp),0);
}
free(fth);
clear_cp(&cp,0);
return(0);
}