summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2015-03-01 13:02:50 +0100
committerLars-Dominik Braun <lars@6xq.net>2015-05-02 21:36:45 +0200
commit4d2d896e28446928d820bf1353abcf40e3f66ed8 (patch)
tree1aa8c49b5faec5f96a80f2257bb97d1c123d36b6
parent11f46f4bf230a22559110f64d01ee9c05749de38 (diff)
downloadpucket-4d2d896e28446928d820bf1353abcf40e3f66ed8.tar.gz
pucket-4d2d896e28446928d820bf1353abcf40e3f66ed8.tar.bz2
pucket-4d2d896e28446928d820bf1353abcf40e3f66ed8.zip
Switch to OpenMP
Replaces quality (target density) parameter with time limit. In preparation for rendering resumption.
-rw-r--r--flam3.c32
-rw-r--r--flam3.h6
-rw-r--r--main.c59
-rw-r--r--palettes.c12
-rw-r--r--private.h2
-rw-r--r--rect.c545
-rw-r--r--rect.h14
-rw-r--r--vector.h2
-rw-r--r--wscript5
9 files changed, 256 insertions, 421 deletions
diff --git a/flam3.c b/flam3.c
index 3bb89cd..873efbd 100644
--- a/flam3.c
+++ b/flam3.c
@@ -37,8 +37,6 @@
#include <errno.h>
#include <assert.h>
-#include <pthread.h>
-
char *flam3_version() {
return VERSION;
}
@@ -182,7 +180,7 @@ int flam3_create_chaos_distrib(flam3_genome *cp, int xi, unsigned short *xform_d
*/
-int flam3_iterate(flam3_genome *cp, int n, int fuse, const double4 in, double4 *samples, unsigned short *xform_distrib, randctx *rc) {
+int flam3_iterate(flam3_genome *cp, int n, int fuse, const double4 in, double4 *samples, const unsigned short *xform_distrib, randctx *rc) {
int i;
double4 p, q;
int consec = 0;
@@ -933,7 +931,7 @@ void flam3_copy_xform(flam3_xform *dest, flam3_xform *src) {
}
/* Copy one control point to another */
-void flam3_copy(flam3_genome *dest, flam3_genome *src) {
+void flam3_copy(flam3_genome *dest, const flam3_genome * const src) {
int i,ii;
int numstd;
@@ -1107,32 +1105,6 @@ void clear_cp(flam3_genome *cp, int default_flag) {
}
-
-int flam3_count_nthreads(void) {
- int nthreads;
-
-#ifndef _SC_NPROCESSORS_ONLN
- char line[MAXBUF];
- FILE *f = fopen("/proc/cpuinfo", "r");
- if (NULL == f) goto def;
- nthreads = 0;
- while (fgets(line, MAXBUF, f)) {
- if (!strncmp("processor\t:", line, 11))
- nthreads++;
- }
- fclose(f);
- if (nthreads < 1) goto def;
- return (nthreads);
-def:
- fprintf(stderr, "could not read /proc/cpuinfo, using one render thread.\n");
- nthreads = 1;
-#else
- nthreads = sysconf(_SC_NPROCESSORS_ONLN);
- if (nthreads < 1) nthreads = 1;
-#endif
- return (nthreads);
-}
-
flam3_genome *flam3_parse_xml2(char *xmldata, char *xmlfilename, int default_flag, int *ncps, randctx * const rc) {
xmlDocPtr doc; /* Parsed XML document tree */
diff --git a/flam3.h b/flam3.h
index 3c17ffd..ac8278d 100644
--- a/flam3.h
+++ b/flam3.h
@@ -508,7 +508,7 @@ void flam3_add_motion_element(flam3_xform *xf);
void flam3_add_xforms(flam3_genome *cp, int num_to_add, int interp_padding, int final_flag);
void flam3_delete_xform(flam3_genome *thiscp, int idx_to_delete);
void flam3_copy_xform(flam3_xform *dest, flam3_xform *src);
-void flam3_copy(flam3_genome *dest, flam3_genome *src);
+void flam3_copy(flam3_genome *dest, const flam3_genome * const src);
void flam3_copyx(flam3_genome *dest, flam3_genome *src, int num_std, int num_final);
void flam3_copy_params(flam3_xform *dest, flam3_xform *src, int varn);
void flam3_delete_motion_elements(flam3_xform *xf);
@@ -523,7 +523,7 @@ void clear_cp(flam3_genome *cp, int def_flag);
(samples[2], samples[3]) as starting color coordinate,
perform fuse iterations and throw them away, then perform
nsamples iterations and save them in the samples array */
-int flam3_iterate(flam3_genome *cp, int n, int fuse, const double4 in, double4 *samples, unsigned short *xform_distrib, randctx *rc);
+int flam3_iterate(flam3_genome *cp, int n, int fuse, const double4 in, double4 *samples, const unsigned short *xform_distrib, randctx *rc);
void apply_motion_parameters(flam3_xform *xf, flam3_xform *addto, double blend);
@@ -564,8 +564,6 @@ double flam3_lyapunov(flam3_genome *g, int ntries);
void flam3_apply_template(flam3_genome *cp, flam3_genome *templ);
-int flam3_count_nthreads(void);
-
typedef struct {
double pixel_aspect_ratio; /* width over height of each pixel */
flam3_genome *genomes;
diff --git a/main.c b/main.c
index 931fee0..7a699c3 100644
--- a/main.c
+++ b/main.c
@@ -33,8 +33,8 @@ const char *argp_program_version =
"vlam3-pre";
typedef struct {
- unsigned int threads, bpc, quality;
- float scale;
+ unsigned int bpc;
+ float scale, time;
} render_arguments;
static error_t parse_render_opt (int key, char *arg,
@@ -51,12 +51,12 @@ static error_t parse_render_opt (int key, char *arg,
break;
}
- case 'q': {
- int i = atoi (arg);
- if (i < 1) {
- argp_error (state, "Quality must be >= 1");
+ case 't': {
+ float i = atof (arg);
+ if (i <= 0) {
+ argp_error (state, "Time must be > 0");
} else {
- arguments->quality = i;
+ arguments->time = i;
}
break;
}
@@ -68,16 +68,6 @@ static error_t parse_render_opt (int key, char *arg,
}
break;
- case 't': {
- int i = atoi (arg);
- if (i <= 0) {
- argp_error (state, "Threads must be > 0");
- } else {
- arguments->threads = i;
- }
- break;
- }
-
case ARGP_KEY_ARG:
if (state->arg_num > 0) {
return ARGP_ERR_UNKNOWN;
@@ -110,36 +100,27 @@ static void do_render (const render_arguments * const arguments) {
flam3_genome * const genome = &cps[0];
- genome->sample_density = arguments->quality;
genome->height *= arguments->scale;
genome->width *= arguments->scale;
genome->pixels_per_unit *= arguments->scale;
- flam3_frame f;
- f.genomes = genome;
- f.ngenomes = 1;
- f.time = 0.0;
- f.pixel_aspect_ratio = 1.0;
- f.progress = 0;
- f.nthreads = arguments->threads;
- f.earlyclip = 0;
- f.sub_batch_size = 10000;
- f.bytes_per_channel = arguments->bpc / 8;
-
+ const unsigned int bytes_per_channel = arguments->bpc/8;
const unsigned int channels = 4;
const size_t this_size = channels * genome->width * genome->height *
- f.bytes_per_channel;
+ bytes_per_channel;
void *image = (void *) calloc(this_size, sizeof(char));
- stat_struct stats;
- if (render_parallel (&f, image, &stats)) {
- fprintf(stderr,"error rendering image: aborting.\n");
- exit(1);
- }
+ bucket bucket;
+ bucket_init (&bucket, (uint2) { genome->width, genome->height });
+
+ render_bucket (genome, &bucket, arguments->time);
+ fprintf (stderr, "%lu samples, %lu bad\n",
+ bucket.samples, bucket.badvals);
+ render_image (genome, &bucket, image, bytes_per_channel);
flam3_img_comments fpc;
write_png (stdout, image, genome->width, genome->height, &fpc,
- f.bytes_per_channel);
+ bytes_per_channel);
}
static void print_genome (flam3_genome * const genome) {
@@ -492,10 +473,9 @@ int main (int argc, char **argv) {
} else if (streq (command, "render")) {
/* render flame to image file */
const struct argp_option options[] = {
- {"threads", 't', "num", 0, "Number of threads (auto)" },
{"scale", 's', "factor", 0, "Scale image dimensions by factor (1.0)" },
{"bpc", 'b', "8|16", 0, "Bits per channel of output image (8)" },
- {"quality", 'q', "num", 0, "Average samples per pixel (100)" },
+ {"time", 't', "seconds", 0, "Rendering time" },
{"width", 'w', "pixels", 0, "Output image width" },
{"height", 'h', "pixels", 0, "Output image height" },
{ 0 },
@@ -507,10 +487,9 @@ int main (int argc, char **argv) {
};
render_arguments arguments = {
- .threads = flam3_count_nthreads(),
.bpc = 8,
.scale = 1.0,
- .quality = 100,
+ .time = 1.0,
};
argp_parse (&argp, argc, argv, 0, NULL, &arguments);
diff --git a/palettes.c b/palettes.c
index f73bd24..cf76003 100644
--- a/palettes.c
+++ b/palettes.c
@@ -16,6 +16,8 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <assert.h>
+
#include "private.h"
#include "palettes.h"
#include "rect.h"
@@ -353,7 +355,6 @@ static double try_colors(flam3_genome *g, int color_resolution) {
flam3_genome saved;
double scalar;
int pixtotal;
- stat_struct stats;
memset(&saved, 0, sizeof(flam3_genome));
@@ -382,10 +383,11 @@ static double try_colors(flam3_genome *g, int color_resolution) {
f.sub_batch_size = 10000;
image = (unsigned char *) calloc(g->width * g->height, 3);
- if (render_parallel (&f, image, &stats)) {
- fprintf(stderr,"Error rendering test image for trycolors. Aborting.\n");
- return(-1);
- }
+
+ bucket bucket;
+ bucket_init (&bucket, (uint2) { g->width, g->height });
+ render_bucket (g, &bucket, 0.2);
+ render_image (g, &bucket, image, f.bytes_per_channel);
hist = calloc(sizeof(int), res3);
p = image;
diff --git a/private.h b/private.h
index ff0d1c1..3f2b044 100644
--- a/private.h
+++ b/private.h
@@ -33,8 +33,6 @@
#include <math.h>
-#include <pthread.h>
-
#define EPS (1e-10)
#define CMAP_SIZE 256
#define CMAP_SIZE_M1 255
diff --git a/rect.c b/rect.c
index 172bd4e..0d9a9db 100644
--- a/rect.c
+++ b/rect.c
@@ -18,6 +18,7 @@
#include <assert.h>
#include <stdlib.h>
+#include <omp.h>
#include "private.h"
#include "variations.h"
@@ -25,35 +26,20 @@
#include "math.h"
#include "rect.h"
-/* allow this many iterations for settling into attractor */
-#define FUSE_27 15
-#define FUSE_28 100
-
-/* Structures for passing parameters to iteration threads */
typedef struct {
- unsigned short *xform_distrib; /* Distribution of xforms based on weights */
- flam3_frame *spec; /* Frame contains timing information */
- double bounds[4]; /* Corner coords of viewable area */
- double2 rot[3]; /* Rotation transformation */
- double size[2];
- int width, height; /* buffer width/height */
- double ws0, wb0s0, hs1, hb1s1; /* shortcuts for indexing */
- flam3_palette_entry *dmap; /* palette */
- double color_scalar; /* <1.0 if non-uniform motion blur is set */
- double4 *buckets; /* Points to the first accumulator */
- double badvals; /* accumulates all badvalue resets */
- double batch_size;
- int aborted, cmap_size;
- /* mutex for bucket accumulator */
- pthread_mutex_t bucket_mutex;
-} flam3_iter_constants;
+ double timelimit;
+ unsigned int sub_batch_size, fuse;
+ unsigned short *xform_distrib;
-typedef struct {
- flam3_genome cp; /* Full copy of genome for use by the thread */
- flam3_iter_constants *fic; /* Constants for render */
- /* thread number */
- size_t i;
-} flam3_thread_helper;
+ flam3_palette dmap;
+ unsigned int cmap_size;
+
+ /* camera stuff */
+ double ws0, wb0s0, hs1, hb1s1; /* shortcuts for indexing */
+ double bounds[4]; /* Corner coords of viewable area */
+ double2 rot[3]; /* Rotation transformation */
+ double ppux, ppuy;
+} render_constants;
/* Lookup color [0,1]
*/
@@ -90,92 +76,87 @@ static double4 color_palette_lookup (const double color,
}
}
-static void *iter_thread(void *fth) {
- double sub_batch;
- int j;
- flam3_thread_helper *fthp = (flam3_thread_helper *)fth;
- flam3_iter_constants *ficp = fthp->fic;
- int SBS = ficp->spec->sub_batch_size;
- int fuse;
- int cmap_size = ficp->cmap_size;
- double4 *iter_storage;
- randctx rc;
-
- rand_seed (&rc);
-
- int ret = posix_memalign ((void **) &iter_storage, sizeof (*iter_storage),
- SBS * sizeof (*iter_storage));
- assert (ret == 0);
- assert (iter_storage != NULL);
-
- fuse = (ficp->spec->earlyclip) ? FUSE_28 : FUSE_27;
-
- for (sub_batch = 0; sub_batch < ficp->batch_size; sub_batch+=SBS) {
- int sub_batch_size, badcount;
- /* sub_batch is double so this is sketchy */
- sub_batch_size = (sub_batch + SBS > ficp->batch_size) ?
- (ficp->batch_size - sub_batch) : SBS;
-
- /* Seed iterations */
- const double4 start = (double4) {
- rand_d11(&rc),
- rand_d11(&rc),
- rand_d01(&rc),
- rand_d01(&rc),
- };
-
- /* Execute iterations */
- badcount = flam3_iterate(&(fthp->cp), sub_batch_size, fuse, start, iter_storage, ficp->xform_distrib, &rc);
-
- /* Lock mutex for access to accumulator */
- pthread_mutex_lock(&ficp->bucket_mutex);
-
- /* Add the badcount to the counter */
- ficp->badvals += badcount;
-
- /* Put them in the bucket accumulator */
- for (j = 0; j < sub_batch_size; j++) {
- double4 p = iter_storage[j];
-
- if (fthp->cp.rotate != 0.0) {
- const double2 p01 = (double2) { p[0], p[1] };
- const double2 rotatedp = apply_affine (p01, ficp->rot);
- p[0] = rotatedp[0];
- p[1] = rotatedp[1];
- }
-
- /* Skip if out of bounding box or invisible */
- if (p[0] >= ficp->bounds[0] && p[1] >= ficp->bounds[1] &&
- p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3] &&
- p[3] > 0) {
- const size_t ix = (int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1);
+static void iter_thread (flam3_genome * const input_genome,
+ bucket * const bucket, const render_constants * const c,
+ volatile bool * const stopped) {
+ randctx rc;
+ rand_seed (&rc);
+
+ flam3_genome genome;
+ flam3_copy (&genome, input_genome);
+
+ double4 *iter_storage;
+ int ret = posix_memalign ((void **) &iter_storage, sizeof (*iter_storage),
+ c->sub_batch_size * sizeof (*iter_storage));
+ assert (ret == 0);
+ assert (iter_storage != NULL);
+
+ const double starttime = omp_get_wtime ();
+
+ do {
+ /* Seed iterations */
+ const double4 start = (double4) {
+ rand_d11(&rc),
+ rand_d11(&rc),
+ rand_d01(&rc),
+ rand_d01(&rc),
+ };
+
+ /* Execute iterations */
+ const unsigned long badcount = flam3_iterate(&genome,
+ c->sub_batch_size, c->fuse, start, iter_storage,
+ c->xform_distrib, &rc);
+
+#pragma omp critical
+ {
+ /* Add the badcount to the counter */
+ bucket->badvals += badcount;
+ bucket->samples += c->sub_batch_size;
+
+ /* Put them in the bucket accumulator */
+ for (unsigned int j = 0; j < c->sub_batch_size; j++) {
+ double4 p = iter_storage[j];
+
+ if (genome.rotate != 0.0) {
+ const double2 p01 = (double2) { p[0], p[1] };
+ const double2 rotatedp = apply_affine (p01, c->rot);
+ p[0] = rotatedp[0];
+ p[1] = rotatedp[1];
+ }
+
+ /* Skip if out of bounding box or invisible */
+ if (p[0] >= c->bounds[0] && p[1] >= c->bounds[1] &&
+ p[0] <= c->bounds[2] && p[1] <= c->bounds[3] &&
+ p[3] > 0) {
+ const size_t ix = (int)(c->ws0 * p[0] - c->wb0s0) + bucket->dim[0] * (int)(c->hs1 * p[1] - c->hb1s1);
#if HAVE_BUILTIN_PREFETCH
- /* prefetch for reading (0) with no locality (0). This (partially)
- * hides the load latency for the += operation at the end of this
- * block */
- __builtin_prefetch (&ficp->buckets[ix], 0, 0);
+ /* prefetch for reading (0) with no locality (0). This (partially)
+ * hides the load latency for the += operation at the end of this
+ * block */
+ __builtin_prefetch (&bucket->data[ix], 0, 0);
#endif
- double4 interpcolor = color_palette_lookup (p[2],
- fthp->cp.palette_mode, ficp->dmap, cmap_size);
-
- const double logvis = p[3];
- if (logvis != 1.0) {
- interpcolor *= logvis;
- }
+ double4 interpcolor = color_palette_lookup (p[2],
+ genome.palette_mode, c->dmap, c->cmap_size);
- ficp->buckets[ix] += interpcolor;
+ const double logvis = p[3];
+ if (logvis != 1.0) {
+ interpcolor *= logvis;
+ }
- }
- }
-
- /* Release mutex */
- pthread_mutex_unlock(&ficp->bucket_mutex);
-
- }
+ bucket->data[ix] += interpcolor;
+ }
+ }
+ }
+#pragma omp master
+ {
+ if (omp_get_wtime () - starttime > c->timelimit) {
+ *stopped = true;
+ }
+ }
+ } while (!(*stopped));
- free (iter_storage);
- return NULL;
+ free (iter_storage);
}
/* Perform clipping
@@ -206,252 +187,142 @@ static double4 clip (const double4 in, const double g, const double linrange,
return newrgb;
}
-int render_parallel (flam3_frame *spec, void *out, stat_struct *stats) {
- long nbuckets;
- double ppux=0, ppuy=0;
- int image_width, image_height; /* size of the image to produce */
- int out_width;
- int bytes_per_channel = spec->bytes_per_channel;
- double highpow;
- flam3_palette dmap;
- double vibrancy = 0.0;
- double gamma = 0.0;
- int vib_gam_n = 0;
- flam3_genome cp;
- unsigned short *xform_distrib;
- flam3_iter_constants fic;
- flam3_thread_helper *fth;
- pthread_attr_t pt_attr;
- pthread_t *myThreads=NULL;
- int thi;
- int cmap_size;
-
- fic.badvals = 0;
- fic.aborted = 0;
-
- stats->num_iters = 0;
-
- /* correct for apophysis's use of 255 colors in the palette rather than all 256 */
- cmap_size = 256;
-
- memset(&cp,0, sizeof(flam3_genome));
-
- /* interpolate and get a control point */
- flam3_interpolate(spec->genomes, spec->ngenomes, spec->time, 0, &cp);
- highpow = cp.highlight_power;
-
- /* Initialize the thread helper structures */
- fth = (flam3_thread_helper *)calloc(spec->nthreads,sizeof(flam3_thread_helper));
- for (unsigned int i=0;i<spec->nthreads;i++)
- fth[i].cp.final_xform_index=-1;
-
- /* Set up the output image dimensions, adjusted for scanline */
- const unsigned int channels = 4;
- image_width = cp.width;
- out_width = image_width;
- image_height = cp.height;
-
- /* Allocate the space required to render the image */
- fic.height = image_height;
- fic.width = image_width;
-
- nbuckets = (long)fic.width * (long)fic.height;
-
- double4 *buckets;
- int ret = posix_memalign ((void **) &buckets, sizeof (*buckets),
- nbuckets * sizeof (*buckets));
- assert (ret == 0);
- assert (buckets != NULL);
- memset (buckets, 0, nbuckets * sizeof (*buckets));
-
- double sample_density=0.0;
-
- /* Batch loop - outermost */
- {
-
-
- {
-
- /* Get the xforms ready to render */
- if (prepare_precalc_flags(&cp)) {
- fprintf(stderr,"prepare xform pointers returned error: aborting.\n");
- return(1);
- }
- xform_distrib = flam3_create_xform_distrib(&cp);
- if (xform_distrib==NULL) {
- fprintf(stderr,"create xform distrib returned error: aborting.\n");
- return(1);
- }
-
- /* compute the colormap entries. */
- /* the input colormap is 256 long with entries from 0 to 1.0 */
- for (unsigned int j = 0; j < CMAP_SIZE; j++) {
- dmap[j].index = cp.palette[(j * 256) / CMAP_SIZE].index / 256.0;
- for (unsigned int k = 0; k < 4; k++)
- dmap[j].color[k] = cp.palette[(j * 256) / CMAP_SIZE].color[k];
- }
-
- /* compute camera */
- {
- double corner0, corner1;
- double scale;
-
- if (cp.sample_density <= 0.0) {
- fprintf(stderr,
- "sample density (quality) must be greater than zero,"
- " not %g.\n", cp.sample_density);
- return(1);
- }
-
- scale = pow(2.0, cp.zoom);
- sample_density = cp.sample_density * scale * scale;
-
- ppux = cp.pixels_per_unit * scale;
- ppuy = ppux;
- ppux /= spec->pixel_aspect_ratio;
- corner0 = cp.center[0] - image_width / ppux / 2.0;
- corner1 = cp.center[1] - image_height / ppuy / 2.0;
- fic.bounds[0] = corner0;
- fic.bounds[1] = corner1;
- fic.bounds[2] = corner0 + image_width / ppux;
- fic.bounds[3] = corner1 + image_height / ppuy;
- fic.size[0] = 1.0 / (fic.bounds[2] - fic.bounds[0]);
- fic.size[1] = 1.0 / (fic.bounds[3] - fic.bounds[1]);
- rotate_center ((double2) { cp.rot_center[0], cp.rot_center[1] },
- cp.rotate, fic.rot);
- fic.ws0 = fic.width * fic.size[0];
- fic.wb0s0 = fic.ws0 * fic.bounds[0];
- fic.hs1 = fic.height * fic.size[1];
- fic.hb1s1 = fic.hs1 * fic.bounds[1];
-
- }
-
- /* number of samples is based only on the output image size */
- double nsamples = sample_density * image_width * image_height;
-
- /* how many of these samples are rendered in this loop? */
- double batch_size = nsamples;
-
- stats->num_iters += batch_size;
-
- /* Fill in the iter constants */
- fic.xform_distrib = xform_distrib;
- fic.spec = spec;
- fic.batch_size = batch_size / (double)spec->nthreads;
- fic.cmap_size = cmap_size;
-
- fic.dmap = (flam3_palette_entry *)dmap;
- fic.buckets = (void *)buckets;
-
- /* Initialize the thread helper structures */
- for (thi = 0; thi < spec->nthreads; thi++) {
- fth[thi].fic = &fic;
- fth[thi].i = thi;
- flam3_copy(&(fth[thi].cp),&cp);
- }
-
- /* Let's make some threads */
- myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t));
-
- pthread_mutex_init(&fic.bucket_mutex, NULL);
-
- pthread_attr_init(&pt_attr);
- pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE);
-
- for (thi=0; thi <spec->nthreads; thi ++)
- pthread_create(&myThreads[thi], &pt_attr, (void *)iter_thread, (void *)(&(fth[thi])));
-
- pthread_attr_destroy(&pt_attr);
-
- /* Wait for them to return */
- for (thi=0; thi < spec->nthreads; thi++)
- pthread_join(myThreads[thi], NULL);
-
- pthread_mutex_destroy(&fic.bucket_mutex);
-
- free(myThreads);
-
- /* Free the xform_distrib array */
- free(xform_distrib);
-
- if (fic.aborted) {
- goto done;
- }
-
- vibrancy += cp.vibrancy;
- gamma += cp.gamma;
- vib_gam_n++;
-
- }
-
-
-#if 0
- printf("iw=%d,ih=%d,ppux=%f,ppuy=%f\n",image_width,image_height,ppux,ppuy);
- printf("contrast=%f, brightness=%f, PREFILTER=%d\n",
- cp.contrast, cp.brightness, PREFILTER_WHITE);
- printf("area = %f, WHITE_LEVEL=%d, sample_density=%f\n",
- area, WHITE_LEVEL, sample_density);
- printf("k1=%f,k2=%15.12f\n",k1,k2);
-#endif
-
- }
+void bucket_init (bucket * const b, const uint2 dim) {
+ memset (b, 0, sizeof (*b));
+ b->dim = dim;
- /* filter the accumulation buffer down into the image */
- if (1) {
- const double g = 1.0 / (gamma / vib_gam_n);
+ size_t size = dim[0] * dim[1] * sizeof (*b->data);
+ int ret = posix_memalign ((void **) &b->data, sizeof (*b->data), size);
+ assert (ret == 0);
+ assert (b->data != NULL);
+ memset (b->data, 0, size);
+}
- double linrange = cp.gam_lin_thresh;
+static void compute_camera (const flam3_genome * const genome,
+ const bucket * const bucket, render_constants * const c) {
+ assert (genome != NULL);
+ assert (bucket != NULL);
+ assert (c != NULL);
+
+ double corner0, corner1;
+
+ const double scale = pow(2.0, genome->zoom);
+
+ c->ppux = genome->pixels_per_unit * scale;
+ c->ppuy = c->ppux;
+ //ppux /= spec->pixel_aspect_ratio;
+ corner0 = genome->center[0] - bucket->dim[0] / c->ppux / 2.0;
+ corner1 = genome->center[1] - bucket->dim[1] / c->ppuy / 2.0;
+ c->bounds[0] = corner0;
+ c->bounds[1] = corner1;
+ c->bounds[2] = corner0 + bucket->dim[0] / c->ppux;
+ c->bounds[3] = corner1 + bucket->dim[1] / c->ppuy;
+ const double size[2] = {1.0 / (c->bounds[2] - c->bounds[0]),
+ 1.0 / (c->bounds[3] - c->bounds[1])};
+ rotate_center ((double2) { genome->rot_center[0], genome->rot_center[1] },
+ genome->rotate, c->rot);
+ c->ws0 = bucket->dim[0] * size[0];
+ c->wb0s0 = c->ws0 * c->bounds[0];
+ c->hs1 = bucket->dim[1] * size[1];
+ c->hb1s1 = c->hs1 * c->bounds[1];
+}
- vibrancy /= vib_gam_n;
-
- /* XXX: the original formula has a factor 268/256 in here, not sure why */
- const double k1 = cp.contrast * cp.brightness;
- const double area = image_width * image_height / (ppux * ppuy);
- const double k2 = 1.0 / (cp.contrast * area * sample_density);
+bool render_bucket (flam3_genome * const genome, bucket * const bucket,
+ const double timelimit) {
+ assert (bucket != NULL);
+ assert (genome != NULL);
+
+ int ret = prepare_precalc_flags(genome);
+ assert (ret == 0);
+
+ render_constants c = {
+ .fuse = 100,
+ .sub_batch_size = 10000,
+ .xform_distrib = flam3_create_xform_distrib(genome),
+ .timelimit = timelimit,
+ };
+ assert (c.xform_distrib != NULL);
+
+ /* compute the colormap entries. */
+ /* the input colormap is 256 long with entries from 0 to 1.0 */
+ for (unsigned int j = 0; j < CMAP_SIZE; j++) {
+ c.dmap[j].index = genome->palette[(j * 256) / CMAP_SIZE].index / 256.0;
+ for (unsigned int k = 0; k < 4; k++) {
+ c.dmap[j].color[k] = genome->palette[(j * 256) / CMAP_SIZE].color[k];
+ }
+ }
+ c.cmap_size = 256;
- for (unsigned int y = 0; y < image_height; y++) {
- for (unsigned int x = 0; x < image_width; x++) {
- double4 t = buckets[x + y * fic.width];
+ /* compute camera */
+ compute_camera (genome, bucket, &c);
- const double ls = (k1 * log(1.0 + t[3] * k2))/t[3];
+ bool stopped = false;
+#pragma omp parallel shared(stopped)
+ iter_thread (genome, bucket, &c, &stopped);
- t = t * ls;
- t = clip (t, g, linrange, highpow, vibrancy);
+ free (c.xform_distrib);
- const double maxval = (1 << (bytes_per_channel*8)) - 1;
- t = nearbyint_d4 (t * maxval);
+ return true;
+}
- if (bytes_per_channel == 2) {
- uint16_t * const p = &((uint16_t *) out)[channels * (x + y * out_width)];
+void render_image (const flam3_genome * const genome, const bucket * const bucket,
+ void * const out, const unsigned int bytes_per_channel) {
+ assert (genome != NULL);
+ assert (bucket != NULL);
+ assert (bucket->data != NULL);
+
+ const unsigned int pixels = bucket->dim[0] * bucket->dim[1];
+ const unsigned int channels = 4;
+
+ /* XXX: copied from above */
+ const double scale = pow(2.0, genome->zoom);
+ const double ppux = genome->pixels_per_unit * scale;
+ const double ppuy = ppux;
+
+ const double sample_density = (double) bucket->samples / (double) pixels;
+ const double g = 1.0 / genome->gamma;
+ const double linrange = genome->gam_lin_thresh;
+ const double vibrancy = genome->vibrancy;
+ /* XXX: the original formula has a factor 268/256 in here, not sure why */
+ const double k1 = genome->contrast * genome->brightness;
+ const double area = (double) pixels / (ppux * ppuy);
+ const double k2 = 1.0 / (genome->contrast * area * sample_density);
+ const double highpow = genome->highlight_power;
+
+#pragma omp parallel for
+ for (unsigned int i = 0; i < pixels; i++) {
+ double4 t = bucket->data[i];
+
+ const double ls = (k1 * log(1.0 + t[3] * k2))/t[3];
+
+ t = t * ls;
+ t = clip (t, g, linrange, highpow, vibrancy);
+
+ const double maxval = (1 << (bytes_per_channel*8)) - 1;
+ t = nearbyint_d4 (t * maxval);
+
+ switch (bytes_per_channel) {
+ case 2: {
+ uint16_t * const p = &((uint16_t *) out)[channels * i];
p[0] = t[0];
p[1] = t[1];
p[2] = t[2];
p[3] = t[3];
- } else if (bytes_per_channel == 1) {
- uint8_t * const p = &((uint8_t *) out)[channels * (x + y * out_width)];
+ break;
+ }
+
+ case 1: {
+ uint8_t * const p = &((uint8_t *) out)[channels * i];
p[0] = t[0];
p[1] = t[1];
p[2] = t[2];
p[3] = t[3];
- } else {
- assert (0);
+ break;
}
- }
- }
- }
-
- done:
-
- stats->badvals = fic.badvals;
-
- free(buckets);
- /* We have to clear the cps in fth first */
- for (thi = 0; thi < spec->nthreads; thi++) {
- clear_cp(&(fth[thi].cp),0);
- }
- free(fth);
- clear_cp(&cp,0);
-
- return(0);
+ default:
+ assert (0);
+ break;
+ }
+ }
}
diff --git a/rect.h b/rect.h
index 3def002..39c76cb 100644
--- a/rect.h
+++ b/rect.h
@@ -1,6 +1,18 @@
#pragma once
#include <stdbool.h>
-int render_parallel (flam3_frame *spec, void *out, stat_struct *stats);
+#include "vector.h"
+typedef struct {
+ /* bucket width/height */
+ uint2 dim;
+ double4 *data;
+ unsigned long int badvals, samples;
+} bucket;
+
+void bucket_init (bucket * const b, const uint2 dim);
+bool render_bucket (flam3_genome * const genome, bucket * const bucket,
+ const double timelimit);
+void render_image (const flam3_genome * const genome, const bucket * const b,
+ void * const out, const unsigned int bytes_per_channel);
diff --git a/vector.h b/vector.h
index 6e65186..2c4ff52 100644
--- a/vector.h
+++ b/vector.h
@@ -5,10 +5,12 @@
/* LLVM/clang */
typedef double double2 __attribute__ ((ext_vector_type (2)));
typedef double double4 __attribute__ ((ext_vector_type (4)));
+typedef unsigned int uint2 __attribute__ ((ext_vector_type (2)));
#endif
#else
/* GCC */
typedef double double2 __attribute__ ((vector_size (sizeof (double)*2)));
typedef double double4 __attribute__ ((vector_size (sizeof (double)*4)));
+typedef unsigned int uint2 __attribute__ ((vector_size (sizeof (unsigned int)*2)));
#endif
diff --git a/wscript b/wscript
index 732086e..c6ed83f 100644
--- a/wscript
+++ b/wscript
@@ -7,10 +7,11 @@ def configure(conf):
conf.env.append_unique ('CFLAGS', '-std=gnu99')
conf.env.append_unique ('CFLAGS', '-D_GNU_SOURCE')
+ conf.env.append_unique ('CFLAGS', '-fopenmp')
+ conf.env.append_unique ('LINKFLAGS', '-fopenmp')
conf.check_cfg (path='xml2-config', args='--cflags --libs', package='', uselib_store='xml2')
conf.check_cc (lib='xml2', header_name='libxml/parser.h', function_name='xmlParseFile', use='xml2')
- conf.check_cc (lib='pthread', uselib_store='pthread')
conf.check_cfg (package='libpng', uselib_store='png', args=['--cflags', '--libs'], msg='Checking for library png')
conf.check_cc (lib='amdlibm', header_name='amdlibm.h', mandatory=False, define_name='HAVE_AMDLIBM', uselib_store='amdlibm')
@@ -19,5 +20,5 @@ def configure(conf):
conf.write_config_header ('config.h')
def build(bld):
- bld.program (features='c cprogram', source='flam3.c parser.c variations.c interpolation.c palettes.c png.c random.c rect.c main.c', target='vlam3', use='xml2 png amdlibm pthread')
+ bld.program (features='c cprogram', source='flam3.c parser.c variations.c interpolation.c palettes.c png.c random.c rect.c main.c', target='vlam3', use='xml2 png amdlibm')