/* FLAM3 - cosmic recursive fractal flames Copyright (C) 1992-2009 Spotworks LLC This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include "private.h" #include "filters.h" #include "variations.h" #include "palettes.h" #include "math.h" /* * for batch * generate de filters * for temporal_sample_batch * interpolate * compute colormap * for subbatch * compute samples * buckets += cmap[samples] * accum += time_filter[temporal_sample_batch] * log[buckets] * de_filter * image = filter(accum) */ /* allow this many iterations for settling into attractor */ #define FUSE_27 15 #define FUSE_28 100 /* Lookup color [0,1] */ static double4 color_palette_lookup (const double color, const color_palette_mode mode, const flam3_palette map, const unsigned int map_count) { assert (color >= 0.0 && color <= 1.0); switch (mode) { case PALETTE_MODE_LINEAR: { const double ix = color * map_count; const double bottomix = floor (ix); const double frac = ix - bottomix; const unsigned int intix = bottomix; if (intix == map_count-1) { return map[intix].color; } else { return map[intix].color * (1.0-frac) + map[intix+1].color * frac; } break; } case PALETTE_MODE_STEP: { const unsigned int intix = nearbyint (color * map_count); return map[intix].color; break; } default: assert (0); break; } } static void *iter_thread(void *fth) { double sub_batch; int j; flam3_thread_helper *fthp = (flam3_thread_helper *)fth; flam3_iter_constants *ficp = fthp->fic; struct timespec pauset; int SBS = ficp->spec->sub_batch_size; int fuse; int cmap_size = ficp->cmap_size; double eta = 0.0; double4 *iter_storage; randctx rc; rand_seed (&rc); int ret = posix_memalign ((void **) &iter_storage, sizeof (*iter_storage), SBS * sizeof (*iter_storage)); assert (ret == 0); assert (iter_storage != NULL); fuse = (ficp->spec->earlyclip) ? FUSE_28 : FUSE_27; pauset.tv_sec = 0; pauset.tv_nsec = 100000000; if (fthp->timer_initialize) { *(ficp->progress_timer) = 0; memset(ficp->progress_timer_history,0,64*sizeof(time_t)); memset(ficp->progress_history,0,64*sizeof(double)); *(ficp->progress_history_mark) = 0; } for (sub_batch = 0; sub_batch < ficp->batch_size; sub_batch+=SBS) { int sub_batch_size, badcount; time_t newt = time(NULL); /* sub_batch is double so this is sketchy */ sub_batch_size = (sub_batch + SBS > ficp->batch_size) ? (ficp->batch_size - sub_batch) : SBS; if (fthp->first_thread && newt != *(ficp->progress_timer)) { double percent = 100.0 * ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num) / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches; int old_mark = 0; int ticker; if (ficp->spec->verbose) fprintf(stderr, "\rchaos: %5.1f%%", percent); *(ficp->progress_timer) = newt; if (ficp->progress_timer_history[*(ficp->progress_history_mark)] && ficp->progress_history[*(ficp->progress_history_mark)] < percent) old_mark = *(ficp->progress_history_mark); if (percent > 0) { eta = (100 - percent) * (*(ficp->progress_timer) - ficp->progress_timer_history[old_mark]) / (percent - ficp->progress_history[old_mark]); if (ficp->spec->verbose) { ticker = (*(ficp->progress_timer)&1)?':':'.'; if (eta < 1000) ticker = ':'; if (eta > 100) fprintf(stderr, " ETA%c %.1f minutes", ticker, eta / 60); else fprintf(stderr, " ETA%c %ld seconds ", ticker, (long) ceil(eta)); fprintf(stderr, " \r"); fflush(stderr); } } ficp->progress_timer_history[*(ficp->progress_history_mark)] = *(ficp->progress_timer); ficp->progress_history[*(ficp->progress_history_mark)] = percent; *(ficp->progress_history_mark) = (*(ficp->progress_history_mark) + 1) % 64; } /* Custom progress function */ if (ficp->spec->progress) { if (fthp->first_thread) { int rv; /* Recalculate % done, as the other calculation only updates once per second */ double percent = 100.0 * ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num) / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches; rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta); if (rv==2) { /* PAUSE */ time_t tnow = time(NULL); time_t tend; int lastpt; ficp->aborted = -1; do { nanosleep(&pauset,NULL); rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta); } while (rv==2); /* modify the timer history to compensate for the pause */ tend = time(NULL)-tnow; ficp->aborted = 0; for (lastpt=0;lastpt<64;lastpt++) { if (ficp->progress_timer_history[lastpt]) { ficp->progress_timer_history[lastpt] += tend; } } } if (rv==1) { /* ABORT */ ficp->aborted = 1; goto done; } } else { if (ficp->aborted<0) { do { nanosleep(&pauset,NULL); } while (ficp->aborted==-1); } if (ficp->aborted>0) goto done; } } /* Seed iterations */ const double4 start = (double4) { rand_d11(&rc), rand_d11(&rc), rand_d01(&rc), rand_d01(&rc), }; /* Execute iterations */ badcount = flam3_iterate(&(fthp->cp), sub_batch_size, fuse, start, iter_storage, ficp->xform_distrib, &rc); /* Lock mutex for access to accumulator */ pthread_mutex_lock(&ficp->bucket_mutex); /* Add the badcount to the counter */ ficp->badvals += badcount; /* Put them in the bucket accumulator */ for (j = 0; j < sub_batch_size; j++) { double4 p = iter_storage[j]; if (fthp->cp.rotate != 0.0) { const double2 p01 = (double2) { p[0], p[1] }; const double2 rotatedp = apply_affine (p01, ficp->rot); p[0] = rotatedp[0]; p[1] = rotatedp[1]; } /* Skip if out of bounding box or invisible */ if (p[0] >= ficp->bounds[0] && p[1] >= ficp->bounds[1] && p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3] && p[3] > 0) { const size_t ix = (int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1); #if HAVE_BUILTIN_PREFETCH /* prefetch for reading (0) with no locality (0). This (partially) * hides the load latency for the += operation at the end of this * block */ __builtin_prefetch (&ficp->buckets[ix], 0, 0); #endif double4 interpcolor = color_palette_lookup (p[2], fthp->cp.palette_mode, ficp->dmap, cmap_size); const double logvis = p[3]; if (logvis != 1.0) { interpcolor *= logvis; } ficp->buckets[ix] += interpcolor; } } /* Release mutex */ pthread_mutex_unlock(&ficp->bucket_mutex); } done: free (iter_storage); return NULL; } /* Perform clipping */ static double4 clip (const double4 in, const double g, const double linrange, const double highpow, const double vibrancy) { double alpha, ls; if (in[3] <= 0.0) { alpha = 0.0; ls = 0.0; } else { alpha = flam3_calc_alpha (in[3], g, linrange); ls = vibrancy * alpha / in[3]; alpha = clamp (alpha, 0.0, 1.0); } double4 newrgb = flam3_calc_newrgb (in, ls, highpow); newrgb += (1.0-vibrancy) * pow_d4 (in, g); if (alpha > 0.0) { newrgb /= alpha; } else { newrgb = (double4) {0, 0, 0, 0}; } newrgb[3] = alpha; newrgb = clamp_d4 (newrgb, 0.0, 1.0); return newrgb; } int render_rectangle(flam3_frame *spec, void *out, int field, stat_struct *stats) { long nbuckets; int i, j, k, batch_num, temporal_sample_num; double nsamples, batch_size; double *filter, *temporal_filter, *temporal_deltas, *batch_filter; double ppux=0, ppuy=0; int image_width, image_height; /* size of the image to produce */ int out_width; int filter_width=0; int bytes_per_channel = spec->bytes_per_channel; int oversample; double highpow; int nbatches; int ntemporal_samples; flam3_palette dmap; int gutter_width; double vibrancy = 0.0; double gamma = 0.0; int vib_gam_n = 0; time_t progress_began=0; int verbose = spec->verbose; flam3_genome cp; unsigned short *xform_distrib; flam3_iter_constants fic; flam3_thread_helper *fth; pthread_attr_t pt_attr; pthread_t *myThreads=NULL; int thi; time_t tstart,tend; double sumfilt; int cmap_size; /* Per-render progress timers */ time_t progress_timer=0; time_t progress_timer_history[64]; double progress_history[64]; int progress_history_mark = 0; tstart = time(NULL); fic.badvals = 0; fic.aborted = 0; stats->num_iters = 0; /* correct for apophysis's use of 255 colors in the palette rather than all 256 */ cmap_size = 256; memset(&cp,0, sizeof(flam3_genome)); /* interpolate and get a control point */ flam3_interpolate(spec->genomes, spec->ngenomes, spec->time, 0, &cp); oversample = cp.spatial_oversample; highpow = cp.highlight_power; nbatches = cp.nbatches; ntemporal_samples = cp.ntemporal_samples; if (nbatches < 1) { fprintf(stderr, "nbatches must be positive, not %d.\n", nbatches); return(1); } if (oversample < 1) { fprintf(stderr, "oversample must be positive, not %d.\n", oversample); return(1); } /* Initialize the thread helper structures */ fth = (flam3_thread_helper *)calloc(spec->nthreads,sizeof(flam3_thread_helper)); for (i=0;inthreads;i++) fth[i].cp.final_xform_index=-1; /* Set up the output image dimensions, adjusted for scanline */ const unsigned int channels = 4; image_width = cp.width; out_width = image_width; if (field) { image_height = cp.height / 2; if (field == flam3_field_odd) out = (unsigned char *)out + channels * bytes_per_channel * out_width; out_width *= 2; } else image_height = cp.height; /* Spatial Filter kernel creation */ filter_width = flam3_create_spatial_filter(spec, field, &filter); /* handle error */ if (filter_width<0) { fprintf(stderr,"flam3_create_spatial_filter returned error: aborting\n"); return(1); } /* note we must free 'filter' at the end */ /* batch filter */ /* may want to revisit this at some point */ batch_filter = (double *) malloc(sizeof(double) * nbatches); for (i=0; itime + temporal_deltas[batch_num*ntemporal_samples + temporal_sample_num]; /* Interpolate and get a control point */ flam3_interpolate(spec->genomes, spec->ngenomes, temporal_sample_time, 0, &cp); /* Get the xforms ready to render */ if (prepare_precalc_flags(&cp)) { fprintf(stderr,"prepare xform pointers returned error: aborting.\n"); return(1); } xform_distrib = flam3_create_xform_distrib(&cp); if (xform_distrib==NULL) { fprintf(stderr,"create xform distrib returned error: aborting.\n"); return(1); } /* compute the colormap entries. */ /* the input colormap is 256 long with entries from 0 to 1.0 */ for (j = 0; j < CMAP_SIZE; j++) { dmap[j].index = cp.palette[(j * 256) / CMAP_SIZE].index / 256.0; for (k = 0; k < 4; k++) dmap[j].color[k] = cp.palette[(j * 256) / CMAP_SIZE].color[k] * color_scalar; } /* compute camera */ if (1) { double t0, t1, shift=0.0, corner0, corner1; double scale; if (cp.sample_density <= 0.0) { fprintf(stderr, "sample density (quality) must be greater than zero," " not %g.\n", cp.sample_density); return(1); } scale = pow(2.0, cp.zoom); sample_density = cp.sample_density * scale * scale; ppux = cp.pixels_per_unit * scale; ppuy = field ? (ppux / 2.0) : ppux; ppux /= spec->pixel_aspect_ratio; switch (field) { case flam3_field_both: shift = 0.0; break; case flam3_field_even: shift = -0.5; break; case flam3_field_odd: shift = 0.5; break; } shift = shift / ppux; t0 = (double) gutter_width / (oversample * ppux); t1 = (double) gutter_width / (oversample * ppuy); corner0 = cp.center[0] - image_width / ppux / 2.0; corner1 = cp.center[1] - image_height / ppuy / 2.0; fic.bounds[0] = corner0 - t0; fic.bounds[1] = corner1 - t1 + shift; fic.bounds[2] = corner0 + image_width / ppux + t0; fic.bounds[3] = corner1 + image_height / ppuy + t1 + shift; fic.size[0] = 1.0 / (fic.bounds[2] - fic.bounds[0]); fic.size[1] = 1.0 / (fic.bounds[3] - fic.bounds[1]); rotate_center ((double2) { cp.rot_center[0], cp.rot_center[1] }, cp.rotate, fic.rot); fic.ws0 = fic.width * fic.size[0]; fic.wb0s0 = fic.ws0 * fic.bounds[0]; fic.hs1 = fic.height * fic.size[1]; fic.hb1s1 = fic.hs1 * fic.bounds[1]; } /* number of samples is based only on the output image size */ nsamples = sample_density * image_width * image_height; /* how many of these samples are rendered in this loop? */ batch_size = nsamples / (nbatches * ntemporal_samples); stats->num_iters += batch_size; /* Fill in the iter constants */ fic.xform_distrib = xform_distrib; fic.spec = spec; fic.batch_size = batch_size / (double)spec->nthreads; fic.temporal_sample_num = temporal_sample_num; fic.ntemporal_samples = ntemporal_samples; fic.batch_num = batch_num; fic.nbatches = nbatches; fic.cmap_size = cmap_size; fic.dmap = (flam3_palette_entry *)dmap; fic.color_scalar = color_scalar; fic.buckets = (void *)buckets; /* Need a timer per job */ fic.progress_timer = &progress_timer; fic.progress_timer_history = &(progress_timer_history[0]); fic.progress_history = &(progress_history[0]); fic.progress_history_mark = &progress_history_mark; /* Initialize the thread helper structures */ for (thi = 0; thi < spec->nthreads; thi++) { if (0==thi) { fth[thi].first_thread=1; if (0==batch_num && 0==temporal_sample_num) fth[thi].timer_initialize = 1; else fth[thi].timer_initialize = 0; } else { fth[thi].first_thread=0; fth[thi].timer_initialize = 0; } fth[thi].fic = &fic; flam3_copy(&(fth[thi].cp),&cp); } /* Let's make some threads */ myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t)); pthread_mutex_init(&fic.bucket_mutex, NULL); pthread_attr_init(&pt_attr); pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE); for (thi=0; thi nthreads; thi ++) pthread_create(&myThreads[thi], &pt_attr, (void *)iter_thread, (void *)(&(fth[thi]))); pthread_attr_destroy(&pt_attr); /* Wait for them to return */ for (thi=0; thi < spec->nthreads; thi++) pthread_join(myThreads[thi], NULL); pthread_mutex_destroy(&fic.bucket_mutex); free(myThreads); /* Free the xform_distrib array */ free(xform_distrib); if (fic.aborted) { if (verbose) fprintf(stderr, "\naborted!\n"); goto done; } vibrancy += cp.vibrancy; gamma += cp.gamma; vib_gam_n++; } /* XXX: the original formula has a factor 268/256 in here, not sure why */ k1 = cp.contrast * cp.brightness * batch_filter[batch_num]; area = image_width * image_height / (ppux * ppuy); k2 = (oversample * oversample * nbatches) / (cp.contrast * area * sample_density * sumfilt); #if 0 printf("iw=%d,ih=%d,ppux=%f,ppuy=%f\n",image_width,image_height,ppux,ppuy); printf("contrast=%f, brightness=%f, PREFILTER=%d, temporal_filter=%f\n", cp.contrast, cp.brightness, PREFILTER_WHITE, temporal_filter[batch_num]); printf("oversample=%d, nbatches=%d, area = %f, WHITE_LEVEL=%d, sample_density=%f\n", oversample, nbatches, area, WHITE_LEVEL, sample_density); printf("k1=%f,k2=%15.12f\n",k1,k2); #endif for (j = 0; j < fic.height; j++) { for (i = 0; i < fic.width; i++) { const double4 c = buckets[i + j * fic.width]; if (0.0 == c[3]) continue; const double ls = (k1 * log(1.0 + c[3] * k2))/c[3]; accumulate[i + j * fic.width] += c * ls; } } } if (verbose) { fprintf(stderr, "\rchaos: 100.0%% took: %ld seconds \n", time(NULL) - progress_began); fprintf(stderr, "filtering..."); } /* filter the accumulation buffer down into the image */ if (1) { int x, y; const double g = 1.0 / (gamma / vib_gam_n); double linrange = cp.gam_lin_thresh; vibrancy /= vib_gam_n; /* If we're in the early clip mode, perform this first step to */ /* apply the gamma correction and clipping before the spat filt */ if (spec->earlyclip) { for (j = 0; j < fic.height; j++) { for (i = 0; i < fic.width; i++) { const double4 in = accumulate[i + j*fic.width]; accumulate[i + j*fic.width] = clip (in, g, linrange, highpow, vibrancy); } } } /* Apply the spatial filter */ y = 0; for (j = 0; j < image_height; j++) { x = 0; for (i = 0; i < image_width; i++) { int ii, jj; double4 t = (double4) { 0.0, 0.0, 0.0, 0.0 }; for (ii = 0; ii < filter_width; ii++) { for (jj = 0; jj < filter_width; jj++) { const double k = filter[ii + jj * filter_width]; const double4 ac = accumulate[x+ii + (y+jj)*fic.width]; t += k * ac; } } /* The old way, spatial filter first and then clip after gamma */ if (!spec->earlyclip) { t = clip (t, g, linrange, highpow, vibrancy); } const double maxval = (1 << (bytes_per_channel*8)) - 1; t = nearbyint_d4 (t * maxval); if (bytes_per_channel == 2) { uint16_t * const p = &((uint16_t *) out)[channels * (i + j * out_width)]; p[0] = t[0]; p[1] = t[1]; p[2] = t[2]; p[3] = t[3]; } else if (bytes_per_channel == 1) { uint8_t * const p = &((uint8_t *) out)[channels * (i + j * out_width)]; p[0] = t[0]; p[1] = t[1]; p[2] = t[2]; p[3] = t[3]; } else { assert (0); } x += oversample; } y += oversample; } } done: stats->badvals = fic.badvals; free(temporal_filter); free(temporal_deltas); free(batch_filter); free(filter); free(buckets); free(accumulate); /* We have to clear the cps in fth first */ for (thi = 0; thi < spec->nthreads; thi++) { clear_cp(&(fth[thi].cp),0); } free(fth); clear_cp(&cp,0); tend = time(NULL); stats->render_seconds = (int)(tend-tstart); return(0); }