From b2dfbdf4d9644c684c938cb2730deab66aa06d9b Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 2 May 2015 21:36:31 +0200 Subject: Move out of subdir --- rect.c | 1287 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1287 insertions(+) create mode 100644 rect.c (limited to 'rect.c') diff --git a/rect.c b/rect.c new file mode 100644 index 0000000..ddea9cf --- /dev/null +++ b/rect.c @@ -0,0 +1,1287 @@ +/* + FLAM3 - cosmic recursive fractal flames + Copyright (C) 1992-2009 Spotworks LLC + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . +*/ + +/* this file is included into flam3.c once for each buffer bit-width */ + +/* + * for batch + * generate de filters + * for temporal_sample_batch + * interpolate + * compute colormap + * for subbatch + * compute samples + * buckets += cmap[samples] + * accum += time_filter[temporal_sample_batch] * log[buckets] * de_filter + * image = filter(accum) + */ + + +/* allow this many iterations for settling into attractor */ +#define FUSE_27 15 +#define FUSE_28 100 +#define WHITE_LEVEL 255 + + +typedef struct { + + bucket *b; + abucket *accumulate; + int width, height, oversample; + flam3_de_helper *de; + double k1,k2; + double curve; + int last_thread; + int start_row, end_row; + flam3_frame *spec; + int *aborted; + int progress_size; + +} de_thread_helper; + +static void de_thread(void *dth) { + + de_thread_helper *dthp = (de_thread_helper *)dth; + int oversample = dthp->oversample; + int ss = (int)floor(oversample / 2.0); + int scf = !(oversample & 1); + double scfact = pow(oversample/(oversample+1.0), 2.0); + int wid=dthp->width; + int hig=dthp->height; + int ps =dthp->progress_size; + int str = (oversample-1)+dthp->start_row; + int enr = (oversample-1)+dthp->end_row; + int i,j; + time_t progress_timer=0; + struct timespec pauset; + int progress_count = 0; + int pixel_num; + + pauset.tv_sec = 0; + pauset.tv_nsec = 100000000; + + /* Density estimation code */ + for (j = str; j < enr; j++) { + for (i = oversample-1; i < wid-(oversample-1); i++) { + + int ii,jj; + double f_select=0.0; + int f_select_int,f_coef_idx; + int arr_filt_width; + bucket *b; + double c[4],ls; + + b = dthp->b + i + j*wid; + + /* Don't do anything if there's no hits here */ + if (b[0][4] == 0 || b[0][3] == 0) + continue; + + /* Count density in ssxss area */ + /* Scale if OS>1 for equal iters */ + for (ii=-ss; ii<=ss; ii++) { + for (jj=-ss; jj<=ss; jj++) { + b = dthp->b + (i + ii) + (j + jj)*wid; + f_select += b[0][4]/255.0; + } + } + + if (scf) + f_select *= scfact; + + if (f_select > dthp->de->max_filtered_counts) + f_select_int = dthp->de->max_filter_index; + else if (f_select<=DE_THRESH) + f_select_int = (int)ceil(f_select)-1; + else + f_select_int = (int)DE_THRESH + + (int)floor(pow(f_select-DE_THRESH,dthp->curve)); + + /* If the filter selected below the min specified clamp it to the min */ + if (f_select_int > dthp->de->max_filter_index) + f_select_int = dthp->de->max_filter_index; + + /* We only have to calculate the values for ~1/8 of the square */ + f_coef_idx = f_select_int*dthp->de->kernel_size; + + arr_filt_width = (int)ceil(dthp->de->filter_widths[f_select_int])-1; + + b = dthp->b + i + j*wid; + + for (jj=0; jj<=arr_filt_width; jj++) { + for (ii=0; ii<=jj; ii++) { + + /* Skip if coef is 0 */ + if (dthp->de->filter_coefs[f_coef_idx]==0.0) { + f_coef_idx++; + continue; + } + + c[0] = (double) b[0][0]; + c[1] = (double) b[0][1]; + c[2] = (double) b[0][2]; + c[3] = (double) b[0][3]; + + ls = dthp->de->filter_coefs[f_coef_idx]*(dthp->k1 * log(1.0 + c[3] * dthp->k2))/c[3]; + + c[0] *= ls; + c[1] *= ls; + c[2] *= ls; + c[3] *= ls; + + if (jj==0 && ii==0) { + add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c); + } + else if (ii==0) { + add_c_to_accum(dthp->accumulate,i,jj,j,0,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,-jj,j,0,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,0,j,jj,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,0,j,-jj,wid,hig,c); + } else if (jj==ii) { + add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,-ii,j,jj,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,ii,j,-jj,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,-ii,j,-jj,wid,hig,c); + } else { + add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,-ii,j,jj,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,ii,j,-jj,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,-ii,j,-jj,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,jj,j,ii,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,-jj,j,ii,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,jj,j,-ii,wid,hig,c); + add_c_to_accum(dthp->accumulate,i,-jj,j,-ii,wid,hig,c); + } + + f_coef_idx++; + + } + } + } + + pixel_num = (j-str+1)*wid; + + if (dthp->last_thread) { + /* Standard progress function */ + if (dthp->spec->verbose && time(NULL) != progress_timer) { + progress_timer = time(NULL); + fprintf(stderr, "\rdensity estimation: %d/%d ", j-str, enr-str); + fflush(stderr); + } + + } + /* Custom progress function */ + if (dthp->spec->progress && pixel_num > progress_count + ps) { + + progress_count = ps * floor(pixel_num/(double)ps); + + if (dthp->last_thread) { + + int rv = (*dthp->spec->progress)(dthp->spec->progress_parameter, + 100*(j-str)/(double)(enr-str), 1, 0); + + if (rv==2) { /* PAUSE */ + + *(dthp->aborted) = -1; + + do { +#if defined(_WIN32) /* mingw or msvc */ + Sleep(100); +#else + nanosleep(&pauset,NULL); +#endif + rv = (*dthp->spec->progress)(dthp->spec->progress_parameter, + 100*(j-str)/(double)(enr-str), 1, 0); + } while (rv==2); + + *(dthp->aborted) = 0; + + } + + if (rv==1) { + *(dthp->aborted) = 1; +#ifdef HAVE_LIBPTHREAD + pthread_exit((void *)0); +#else + return; +#endif + } + } else { +#ifdef HAVE_LIBPTHREAD + + if (*(dthp->aborted)<0) { + + do { +#if defined(_WIN32) /* mingw or msvc */ + Sleep(100); +#else + nanosleep(&pauset,NULL); +#endif + } while (*(dthp->aborted)<0); + } + + if (*(dthp->aborted)>0) pthread_exit((void *)0); +#else + if (*(dthp->aborted)>0) return; +#endif + } + } + + } + + #ifdef HAVE_LIBPTHREAD + pthread_exit((void *)0); + #endif + +} + +static void iter_thread(void *fth) { + double sub_batch; + int j; + flam3_thread_helper *fthp = (flam3_thread_helper *)fth; + flam3_iter_constants *ficp = fthp->fic; + struct timespec pauset; + int SBS = ficp->spec->sub_batch_size; + int fuse; + int cmap_size = ficp->cmap_size; + int cmap_size_m1 = ficp->cmap_size-1; + + double eta = 0.0; + + fuse = (ficp->spec->earlyclip) ? FUSE_28 : FUSE_27; + + pauset.tv_sec = 0; + pauset.tv_nsec = 100000000; + + + if (fthp->timer_initialize) { + *(ficp->progress_timer) = 0; + memset(ficp->progress_timer_history,0,64*sizeof(time_t)); + memset(ficp->progress_history,0,64*sizeof(double)); + *(ficp->progress_history_mark) = 0; + } + + for (sub_batch = 0; sub_batch < ficp->batch_size; sub_batch+=SBS) { + int sub_batch_size, badcount; + time_t newt = time(NULL); + /* sub_batch is double so this is sketchy */ + sub_batch_size = (sub_batch + SBS > ficp->batch_size) ? + (ficp->batch_size - sub_batch) : SBS; + + if (fthp->first_thread && newt != *(ficp->progress_timer)) { + double percent = 100.0 * + ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num) + / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches; + int old_mark = 0; + int ticker; + + if (ficp->spec->verbose) + fprintf(stderr, "\rchaos: %5.1f%%", percent); + + *(ficp->progress_timer) = newt; + if (ficp->progress_timer_history[*(ficp->progress_history_mark)] && + ficp->progress_history[*(ficp->progress_history_mark)] < percent) + old_mark = *(ficp->progress_history_mark); + + if (percent > 0) { + eta = (100 - percent) * (*(ficp->progress_timer) - ficp->progress_timer_history[old_mark]) + / (percent - ficp->progress_history[old_mark]); + + if (ficp->spec->verbose) { + ticker = (*(ficp->progress_timer)&1)?':':'.'; + if (eta < 1000) + ticker = ':'; + if (eta > 100) + fprintf(stderr, " ETA%c %.1f minutes", ticker, eta / 60); + else + fprintf(stderr, " ETA%c %ld seconds ", ticker, (long) ceil(eta)); + fprintf(stderr, " \r"); + fflush(stderr); + } + } + + ficp->progress_timer_history[*(ficp->progress_history_mark)] = *(ficp->progress_timer); + ficp->progress_history[*(ficp->progress_history_mark)] = percent; + *(ficp->progress_history_mark) = (*(ficp->progress_history_mark) + 1) % 64; + } + + /* Custom progress function */ + if (ficp->spec->progress) { + if (fthp->first_thread) { + + int rv; + + /* Recalculate % done, as the other calculation only updates once per second */ + double percent = 100.0 * + ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num) + / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches; + + rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta); + + if (rv==2) { /* PAUSE */ + + time_t tnow = time(NULL); + time_t tend; + int lastpt; + + ficp->aborted = -1; + + do { +#if defined(_WIN32) /* mingw or msvc */ + Sleep(100); +#else + nanosleep(&pauset,NULL); +#endif + rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta); + } while (rv==2); + + /* modify the timer history to compensate for the pause */ + tend = time(NULL)-tnow; + + ficp->aborted = 0; + + for (lastpt=0;lastpt<64;lastpt++) { + if (ficp->progress_timer_history[lastpt]) { + ficp->progress_timer_history[lastpt] += tend; + } + } + + } + + if (rv==1) { /* ABORT */ + ficp->aborted = 1; +#ifdef HAVE_LIBPTHREAD + pthread_exit((void *)0); +#else + return; +#endif + } + } else { + if (ficp->aborted<0) { + + do { +#if defined(_WIN32) /* mingw or msvc */ + Sleep(100); +#else + nanosleep(&pauset,NULL); +#endif + } while (ficp->aborted==-1); + } +#ifdef HAVE_LIBPTHREAD + if (ficp->aborted>0) pthread_exit((void *)0); +#else + if (ficp->aborted>0) return; +#endif + } + } + + /* Seed iterations */ + fthp->iter_storage[0] = flam3_random_isaac_11(&(fthp->rc)); + fthp->iter_storage[1] = flam3_random_isaac_11(&(fthp->rc)); + fthp->iter_storage[2] = flam3_random_isaac_01(&(fthp->rc)); + fthp->iter_storage[3] = flam3_random_isaac_01(&(fthp->rc)); + + /* Execute iterations */ + badcount = flam3_iterate(&(fthp->cp), sub_batch_size, fuse, fthp->iter_storage, ficp->xform_distrib, &(fthp->rc)); + + #if defined(HAVE_LIBPTHREAD) && defined(USE_LOCKS) + /* Lock mutex for access to accumulator */ + pthread_mutex_lock(&ficp->bucket_mutex); + #endif + + /* Add the badcount to the counter */ + ficp->badvals += badcount; + + /* Put them in the bucket accumulator */ + for (j = 0; j < sub_batch_size*4; j+=4) { + double p0, p1, p00, p11; + double dbl_index0,dbl_frac; + double interpcolor[4]; + int ci, color_index0; + double *p = &(fthp->iter_storage[j]); + bucket *b; + + if (fthp->cp.rotate != 0.0) { + p00 = p[0] - fthp->cp.rot_center[0]; + p11 = p[1] - fthp->cp.rot_center[1]; + p0 = p00 * ficp->rot[0][0] + p11 * ficp->rot[0][1] + fthp->cp.rot_center[0]; + p1 = p00 * ficp->rot[1][0] + p11 * ficp->rot[1][1] + fthp->cp.rot_center[1]; + } else { + p0 = p[0]; + p1 = p[1]; + } + + if (p0 >= ficp->bounds[0] && p1 >= ficp->bounds[1] && p0 <= ficp->bounds[2] && p1 <= ficp->bounds[3]) { + + double logvis=1.0; + bucket *buckets = (bucket *)(ficp->buckets); + + /* Skip if invisible */ + if (p[3]==0) + continue; + else + logvis = p[3]; + + b = buckets + (int)(ficp->ws0 * p0 - ficp->wb0s0) + + ficp->width * (int)(ficp->hs1 * p1 - ficp->hb1s1); + +#ifdef USE_FLOAT_INDICES + color_index0 = 0; + + //fprintf(stdout,"%.16f\n",p[2]*256.0); + + while(color_index0 < cmap_size_m1) { + if (ficp->dmap[color_index0+1].index > p[2]) + break; + else + color_index0++; + } + + if (p[3]==1.0) { + bump_no_overflow(b[0][0], ficp->dmap[color_index0].color[0]); + bump_no_overflow(b[0][1], ficp->dmap[color_index0].color[1]); + bump_no_overflow(b[0][2], ficp->dmap[color_index0].color[2]); + bump_no_overflow(b[0][3], ficp->dmap[color_index0].color[3]); + bump_no_overflow(b[0][4], 255.0); + } else { + bump_no_overflow(b[0][0], logvis*ficp->dmap[color_index0].color[0]); + bump_no_overflow(b[0][1], logvis*ficp->dmap[color_index0].color[1]); + bump_no_overflow(b[0][2], logvis*ficp->dmap[color_index0].color[2]); + bump_no_overflow(b[0][3], logvis*ficp->dmap[color_index0].color[3]); + bump_no_overflow(b[0][4], logvis*255.0); +#else + dbl_index0 = p[2] * cmap_size; + color_index0 = (int) (dbl_index0); + + if (flam3_palette_mode_linear == fthp->cp.palette_mode) { + if (color_index0 < 0) { + color_index0 = 0; + dbl_frac = 0; + } else if (color_index0 >= cmap_size_m1) { + color_index0 = cmap_size_m1-1; + dbl_frac = 1.0; + } else { + /* interpolate between color_index0 and color_index0+1 */ + dbl_frac = dbl_index0 - (double)color_index0; + } + + for (ci=0;ci<4;ci++) { + interpcolor[ci] = ficp->dmap[color_index0].color[ci] * (1.0-dbl_frac) + + ficp->dmap[color_index0+1].color[ci] * dbl_frac; + } + + } else { /* Palette mode step */ + + if (color_index0 < 0) { + color_index0 = 0; + } else if (color_index0 >= cmap_size_m1) { + color_index0 = cmap_size_m1; + } + + for (ci=0;ci<4;ci++) + interpcolor[ci] = ficp->dmap[color_index0].color[ci]; + } + + if (p[3]==1.0) { + bump_no_overflow(b[0][0], interpcolor[0]); + bump_no_overflow(b[0][1], interpcolor[1]); + bump_no_overflow(b[0][2], interpcolor[2]); + bump_no_overflow(b[0][3], interpcolor[3]); + bump_no_overflow(b[0][4], 255.0); + } else { + bump_no_overflow(b[0][0], logvis*interpcolor[0]); + bump_no_overflow(b[0][1], logvis*interpcolor[1]); + bump_no_overflow(b[0][2], logvis*interpcolor[2]); + bump_no_overflow(b[0][3], logvis*interpcolor[3]); + bump_no_overflow(b[0][4], logvis*255.0); + } +#endif + + } + } + + #if defined(HAVE_LIBPTHREAD) && defined(USE_LOCKS) + /* Release mutex */ + pthread_mutex_unlock(&ficp->bucket_mutex); + #endif + + } + #ifdef HAVE_LIBPTHREAD + pthread_exit((void *)0); + #endif +} + +static int render_rectangle(flam3_frame *spec, void *out, + int field, int nchan, int transp, stat_struct *stats) { + long nbuckets; + int i, j, k, batch_num, temporal_sample_num; + double nsamples, batch_size; + bucket *buckets; + abucket *accumulate; + double *points; + double *filter, *temporal_filter, *temporal_deltas, *batch_filter; + double ppux=0, ppuy=0; + int image_width, image_height; /* size of the image to produce */ + int out_width; + int filter_width=0; + int bytes_per_channel = spec->bytes_per_channel; + int oversample; + double highpow; + int nbatches; + int ntemporal_samples; + flam3_palette dmap; + int gutter_width; + double vibrancy = 0.0; + double gamma = 0.0; + double background[3]; + int vib_gam_n = 0; + time_t progress_began=0; + int verbose = spec->verbose; + int gnm_idx,max_gnm_de_fw,de_offset; + flam3_genome cp; + unsigned short *xform_distrib; + flam3_iter_constants fic; + flam3_thread_helper *fth; +#ifdef HAVE_LIBPTHREAD + pthread_attr_t pt_attr; + pthread_t *myThreads=NULL; +#endif + int thread_status; + int thi; + time_t tstart,tend; + double sumfilt; + char *ai; + int cmap_size; + + char *last_block; + size_t memory_rqd; + + /* Per-render progress timers */ + time_t progress_timer=0; + time_t progress_timer_history[64]; + double progress_history[64]; + int progress_history_mark = 0; + + tstart = time(NULL); + + fic.badvals = 0; + fic.aborted = 0; + + stats->num_iters = 0; + + /* correct for apophysis's use of 255 colors in the palette rather than all 256 */ + cmap_size = 256 - argi("apo_palette",0); + + memset(&cp,0, sizeof(flam3_genome)); + + /* interpolate and get a control point */ + flam3_interpolate(spec->genomes, spec->ngenomes, spec->time, 0, &cp); + oversample = cp.spatial_oversample; + highpow = cp.highlight_power; + nbatches = cp.nbatches; + ntemporal_samples = cp.ntemporal_samples; + + if (nbatches < 1) { + fprintf(stderr, "nbatches must be positive, not %d.\n", nbatches); + return(1); + } + + if (oversample < 1) { + fprintf(stderr, "oversample must be positive, not %d.\n", oversample); + return(1); + } + + /* Initialize the thread helper structures */ + fth = (flam3_thread_helper *)calloc(spec->nthreads,sizeof(flam3_thread_helper)); + for (i=0;inthreads;i++) + fth[i].cp.final_xform_index=-1; + + /* Set up the output image dimensions, adjusted for scanline */ + image_width = cp.width; + out_width = image_width; + if (field) { + image_height = cp.height / 2; + + if (field == flam3_field_odd) + out = (unsigned char *)out + nchan * bytes_per_channel * out_width; + + out_width *= 2; + } else + image_height = cp.height; + + + /* Spatial Filter kernel creation */ + filter_width = flam3_create_spatial_filter(spec, field, &filter); + + /* handle error */ + if (filter_width<0) { + fprintf(stderr,"flam3_create_spatial_filter returned error: aborting\n"); + return(1); + } + + /* note we must free 'filter' at the end */ + + /* batch filter */ + /* may want to revisit this at some point */ + batch_filter = (double *) malloc(sizeof(double) * nbatches); + for (i=0; ingenomes; gnm_idx++) { + int this_width = (int)ceil(spec->genomes[gnm_idx].estimator) * oversample; + if (this_width > max_gnm_de_fw) + max_gnm_de_fw = this_width; + } + + /* Add OS-1 for the averaging at the edges, if it's > 0 already */ + if (max_gnm_de_fw>0) + max_gnm_de_fw += (oversample-1); + + /* max_gnm_de_fw is now the number of pixels of additional gutter */ + /* necessary to appropriately perform the density estimation filtering */ + /* Check to see if it's greater than the gutter_width */ + if (max_gnm_de_fw > gutter_width) { + de_offset = max_gnm_de_fw - gutter_width; + gutter_width = max_gnm_de_fw; + } else + de_offset = 0; + + + /* Allocate the space required to render the image */ + fic.height = oversample * image_height + 2 * gutter_width; + fic.width = oversample * image_width + 2 * gutter_width; + + nbuckets = (long)fic.width * (long)fic.height; + memory_rqd = (sizeof(bucket) * nbuckets + sizeof(abucket) * nbuckets + + 4 * sizeof(double) * (size_t)(spec->sub_batch_size) * spec->nthreads); + last_block = (char *) malloc(memory_rqd); + if (NULL == last_block) { + fprintf(stderr, "render_rectangle: cannot malloc %g bytes.\n", (double)memory_rqd); + fprintf(stderr, "render_rectangle: w=%d h=%d nb=%ld.\n", fic.width, fic.height, nbuckets); + return(1); + } + + /* Just free buckets at the end */ + buckets = (bucket *) last_block; + accumulate = (abucket *) (last_block + sizeof(bucket) * nbuckets); + points = (double *) (last_block + (sizeof(bucket) + sizeof(abucket)) * nbuckets); + + if (verbose) { + fprintf(stderr, "chaos: "); + progress_began = time(NULL); + } + + background[0] = background[1] = background[2] = 0.0; + memset((char *) accumulate, 0, sizeof(abucket) * nbuckets); + + + /* Batch loop - outermost */ + for (batch_num = 0; batch_num < nbatches; batch_num++) { + double de_time; + double sample_density=0.0; + double k1, area, k2; + flam3_de_helper de; + + de_time = spec->time + temporal_deltas[batch_num*ntemporal_samples]; + + memset((char *) buckets, 0, sizeof(bucket) * nbuckets); + + /* interpolate and get a control point */ + /* ONLY FOR DENSITY FILTER WIDTH PURPOSES */ + /* additional interpolation will be done in the temporal_sample loop */ + flam3_interpolate(spec->genomes, spec->ngenomes, de_time, 0, &cp); + + /* if instructed to by the genome, create the density estimation */ + /* filter kernels. Check boundary conditions as well. */ + if (cp.estimator < 0.0 || cp.estimator_minimum < 0.0) { + fprintf(stderr,"density estimator filter widths must be >= 0\n"); + return(1); + } + + if (spec->bits <= 32) { + if (cp.estimator > 0.0) { + fprintf(stderr, "warning: density estimation disabled with %d bit buffers.\n", spec->bits); + cp.estimator = 0.0; + } + } + + /* Create DE filters */ + if (cp.estimator > 0.0) { + de = flam3_create_de_filters(cp.estimator,cp.estimator_minimum, + cp.estimator_curve,oversample); + if (de.kernel_size<0) { + fprintf(stderr,"de.kernel_size returned 0 - aborting.\n"); + return(1); + } + } else + de.max_filter_index = 0; + + /* Temporal sample loop */ + for (temporal_sample_num = 0; temporal_sample_num < ntemporal_samples; temporal_sample_num++) { + + double temporal_sample_time; + double color_scalar = temporal_filter[batch_num*ntemporal_samples + temporal_sample_num]; + + temporal_sample_time = spec->time + + temporal_deltas[batch_num*ntemporal_samples + temporal_sample_num]; + + /* Interpolate and get a control point */ + flam3_interpolate(spec->genomes, spec->ngenomes, temporal_sample_time, 0, &cp); + + /* Get the xforms ready to render */ + if (prepare_precalc_flags(&cp)) { + fprintf(stderr,"prepare xform pointers returned error: aborting.\n"); + return(1); + } + xform_distrib = flam3_create_xform_distrib(&cp); + if (xform_distrib==NULL) { + fprintf(stderr,"create xform distrib returned error: aborting.\n"); + return(1); + } + + /* compute the colormap entries. */ + /* the input colormap is 256 long with entries from 0 to 1.0 */ + for (j = 0; j < CMAP_SIZE; j++) { + dmap[j].index = cp.palette[(j * 256) / CMAP_SIZE].index / 256.0; + for (k = 0; k < 4; k++) + dmap[j].color[k] = (cp.palette[(j * 256) / CMAP_SIZE].color[k] * WHITE_LEVEL) * color_scalar; + } + + /* compute camera */ + if (1) { + double t0, t1, shift=0.0, corner0, corner1; + double scale; + + if (cp.sample_density <= 0.0) { + fprintf(stderr, + "sample density (quality) must be greater than zero," + " not %g.\n", cp.sample_density); + return(1); + } + + scale = pow(2.0, cp.zoom); + sample_density = cp.sample_density * scale * scale; + + ppux = cp.pixels_per_unit * scale; + ppuy = field ? (ppux / 2.0) : ppux; + ppux /= spec->pixel_aspect_ratio; + switch (field) { + case flam3_field_both: shift = 0.0; break; + case flam3_field_even: shift = -0.5; break; + case flam3_field_odd: shift = 0.5; break; + } + shift = shift / ppux; + t0 = (double) gutter_width / (oversample * ppux); + t1 = (double) gutter_width / (oversample * ppuy); + corner0 = cp.center[0] - image_width / ppux / 2.0; + corner1 = cp.center[1] - image_height / ppuy / 2.0; + fic.bounds[0] = corner0 - t0; + fic.bounds[1] = corner1 - t1 + shift; + fic.bounds[2] = corner0 + image_width / ppux + t0; + fic.bounds[3] = corner1 + image_height / ppuy + t1 + shift; + fic.size[0] = 1.0 / (fic.bounds[2] - fic.bounds[0]); + fic.size[1] = 1.0 / (fic.bounds[3] - fic.bounds[1]); + fic.rot[0][0] = cos(cp.rotate * 2 * M_PI / 360.0); + fic.rot[0][1] = -sin(cp.rotate * 2 * M_PI / 360.0); + fic.rot[1][0] = -fic.rot[0][1]; + fic.rot[1][1] = fic.rot[0][0]; + fic.ws0 = fic.width * fic.size[0]; + fic.wb0s0 = fic.ws0 * fic.bounds[0]; + fic.hs1 = fic.height * fic.size[1]; + fic.hb1s1 = fic.hs1 * fic.bounds[1]; + + } + + /* number of samples is based only on the output image size */ + nsamples = sample_density * image_width * image_height; + + /* how many of these samples are rendered in this loop? */ + batch_size = nsamples / (nbatches * ntemporal_samples); + + stats->num_iters += batch_size; + + /* Fill in the iter constants */ + fic.xform_distrib = xform_distrib; + fic.spec = spec; + fic.batch_size = batch_size / (double)spec->nthreads; + fic.temporal_sample_num = temporal_sample_num; + fic.ntemporal_samples = ntemporal_samples; + fic.batch_num = batch_num; + fic.nbatches = nbatches; + fic.cmap_size = cmap_size; + + fic.dmap = (flam3_palette_entry *)dmap; + fic.color_scalar = color_scalar; + fic.buckets = (void *)buckets; + + /* Need a timer per job */ + fic.progress_timer = &progress_timer; + fic.progress_timer_history = &(progress_timer_history[0]); + fic.progress_history = &(progress_history[0]); + fic.progress_history_mark = &progress_history_mark; + + /* Initialize the thread helper structures */ + for (thi = 0; thi < spec->nthreads; thi++) { + + int rk; + /* Create a new isaac state for this thread */ + for (rk = 0; rk < RANDSIZ; rk++) + fth[thi].rc.randrsl[rk] = irand(&spec->rc); + + irandinit(&(fth[thi].rc),1); + + if (0==thi) { + + fth[thi].first_thread=1; + if (0==batch_num && 0==temporal_sample_num) + fth[thi].timer_initialize = 1; + else + fth[thi].timer_initialize = 0; + + } else { + fth[thi].first_thread=0; + fth[thi].timer_initialize = 0; + } + + fth[thi].iter_storage = &(points[thi*(spec->sub_batch_size)*4]); + fth[thi].fic = &fic; + flam3_copy(&(fth[thi].cp),&cp); + + } + +#ifdef HAVE_LIBPTHREAD + /* Let's make some threads */ + myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t)); + + #if defined(USE_LOCKS) + pthread_mutex_init(&fic.bucket_mutex, NULL); + #endif + + pthread_attr_init(&pt_attr); + pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE); + + for (thi=0; thi nthreads; thi ++) + pthread_create(&myThreads[thi], &pt_attr, (void *)iter_thread, (void *)(&(fth[thi]))); + + pthread_attr_destroy(&pt_attr); + + /* Wait for them to return */ + for (thi=0; thi < spec->nthreads; thi++) + pthread_join(myThreads[thi], (void **)&thread_status); + + #if defined(USE_LOCKS) + pthread_mutex_destroy(&fic.bucket_mutex); + #endif + + free(myThreads); +#else + for (thi=0; thi < spec->nthreads; thi++) + iter_thread( (void *)(&(fth[thi])) ); +#endif + + /* Free the xform_distrib array */ + free(xform_distrib); + + if (fic.aborted) { + if (verbose) fprintf(stderr, "\naborted!\n"); + goto done; + } + + vibrancy += cp.vibrancy; + gamma += cp.gamma; + background[0] += cp.background[0]; + background[1] += cp.background[1]; + background[2] += cp.background[2]; + vib_gam_n++; + + } + + k1 =(cp.contrast * cp.brightness * + PREFILTER_WHITE * 268.0 * batch_filter[batch_num]) / 256; + area = image_width * image_height / (ppux * ppuy); + k2 = (oversample * oversample * nbatches) / + (cp.contrast * area * WHITE_LEVEL * sample_density * sumfilt); +#if 0 + printf("iw=%d,ih=%d,ppux=%f,ppuy=%f\n",image_width,image_height,ppux,ppuy); + printf("contrast=%f, brightness=%f, PREFILTER=%d, temporal_filter=%f\n", + cp.contrast, cp.brightness, PREFILTER_WHITE, temporal_filter[batch_num]); + printf("oversample=%d, nbatches=%d, area = %f, WHITE_LEVEL=%d, sample_density=%f\n", + oversample, nbatches, area, WHITE_LEVEL, sample_density); + printf("k1=%f,k2=%15.12f\n",k1,k2); +#endif + + if (de.max_filter_index == 0) { + + for (j = 0; j < fic.height; j++) { + for (i = 0; i < fic.width; i++) { + + abucket *a = accumulate + i + j * fic.width; + bucket *b = buckets + i + j * fic.width; + double c[4], ls; + + c[0] = (double) b[0][0]; + c[1] = (double) b[0][1]; + c[2] = (double) b[0][2]; + c[3] = (double) b[0][3]; + if (0.0 == c[3]) + continue; + + ls = (k1 * log(1.0 + c[3] * k2))/c[3]; + c[0] *= ls; + c[1] *= ls; + c[2] *= ls; + c[3] *= ls; + + abump_no_overflow(a[0][0], c[0]); + abump_no_overflow(a[0][1], c[1]); + abump_no_overflow(a[0][2], c[2]); + abump_no_overflow(a[0][3], c[3]); + } + } + } else { + + de_thread_helper *deth; + int de_aborted=0; + int myspan = (fic.height-2*(oversample-1)+1); + int swath = myspan/(spec->nthreads); + + /* Create the de helper structures */ + deth = (de_thread_helper *)calloc(spec->nthreads,sizeof(de_thread_helper)); + + for (thi=0;thi<(spec->nthreads);thi++) { + + /* Set up the contents of the helper structure */ + deth[thi].b = buckets; + deth[thi].accumulate = accumulate; + deth[thi].width = fic.width; + deth[thi].height = fic.height; + deth[thi].oversample = oversample; + deth[thi].progress_size = spec->sub_batch_size/10; + deth[thi].de = &de; + deth[thi].k1 = k1; + deth[thi].k2 = k2; + deth[thi].curve = cp.estimator_curve; + deth[thi].spec = spec; + deth[thi].aborted = &de_aborted; + if ( (spec->nthreads)>myspan) { /* More threads than rows */ + deth[thi].start_row=0; + if (thi==spec->nthreads-1) { + deth[thi].end_row=myspan; + deth[thi].last_thread=1; + } else { + deth[thi].end_row=-1; + deth[thi].last_thread=0; + } + } else { /* Normal case */ + deth[thi].start_row=thi*swath; + deth[thi].end_row=(thi+1)*swath; + if (thi==spec->nthreads-1) { + deth[thi].end_row=myspan; + deth[thi].last_thread=1; + } else { + deth[thi].last_thread=0; + } + } + } + +#ifdef HAVE_LIBPTHREAD + /* Let's make some threads */ + myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t)); + + pthread_attr_init(&pt_attr); + pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE); + + for (thi=0; thi nthreads; thi ++) + pthread_create(&myThreads[thi], &pt_attr, (void *)de_thread, (void *)(&(deth[thi]))); + + pthread_attr_destroy(&pt_attr); + + /* Wait for them to return */ + for (thi=0; thi < spec->nthreads; thi++) + pthread_join(myThreads[thi], (void **)&thread_status); + + free(myThreads); +#else + for (thi=0; thi nthreads; thi ++) + de_thread((void *)(&(deth[thi]))); +#endif + + free(deth); + + if (de_aborted) { + if (verbose) fprintf(stderr, "\naborted!\n"); + goto done; + } + + } /* End density estimation loop */ + + + /* If allocated, free the de filter memory for the next batch */ + if (de.max_filter_index > 0) { + free(de.filter_coefs); + free(de.filter_widths); + } + + } + + if (verbose) { + fprintf(stderr, "\rchaos: 100.0%% took: %ld seconds \n", time(NULL) - progress_began); + fprintf(stderr, "filtering..."); + } + + + /* filter the accumulation buffer down into the image */ + if (1) { + int x, y; + double t[4],newrgb[3]; + double g = 1.0 / (gamma / vib_gam_n); + double tmp,a; + double alpha,ls; + int rgbi; + + double linrange = cp.gam_lin_thresh; + + vibrancy /= vib_gam_n; + background[0] /= vib_gam_n/256.0; + background[1] /= vib_gam_n/256.0; + background[2] /= vib_gam_n/256.0; + + /* If we're in the early clip mode, perform this first step to */ + /* apply the gamma correction and clipping before the spat filt */ + + if (spec->earlyclip) { + + for (j = 0; j < fic.height; j++) { + for (i = 0; i < fic.width; i++) { + + abucket *ac = accumulate + i + j*fic.width; + + if (ac[0][3]<=0) { + alpha = 0.0; + ls = 0.0; + } else { + tmp=ac[0][3]/PREFILTER_WHITE; + alpha = flam3_calc_alpha(tmp,g,linrange); + ls = vibrancy * 256.0 * alpha / tmp; + if (alpha<0.0) alpha = 0.0; + if (alpha>1.0) alpha = 1.0; + } + + t[0] = (double)ac[0][0]; + t[1] = (double)ac[0][1]; + t[2] = (double)ac[0][2]; + t[3] = (double)ac[0][3]; + + flam3_calc_newrgb(t, ls, highpow, newrgb); + + for (rgbi=0;rgbi<3;rgbi++) { + a = newrgb[rgbi]; + a += (1.0-vibrancy) * 256.0 * pow( t[rgbi] / PREFILTER_WHITE, g); + if (nchan<=3 || transp==0) + a += ((1.0 - alpha) * background[rgbi]); + else { + if (alpha>0) + a /= alpha; + else + a = 0; + } + + /* Clamp here to ensure proper filter functionality */ + if (a>255) a = 255; + if (a<0) a = 0; + + /* Replace values in accumulation buffer with these new ones */ + ac[0][rgbi] = a; + } + + ac[0][3] = alpha; + + } + } + } + + /* Apply the spatial filter */ + y = de_offset; + for (j = 0; j < image_height; j++) { + x = de_offset; + for (i = 0; i < image_width; i++) { + int ii, jj,rgbi; + void *p; + unsigned short *p16; + unsigned char *p8; + t[0] = t[1] = t[2] = t[3] = 0.0; + for (ii = 0; ii < filter_width; ii++) { + for (jj = 0; jj < filter_width; jj++) { + double k = filter[ii + jj * filter_width]; + abucket *ac = accumulate + x+ii + (y+jj)*fic.width; + + + t[0] += k * ac[0][0]; + t[1] += k * ac[0][1]; + t[2] += k * ac[0][2]; + t[3] += k * ac[0][3]; + + + } + } + + p = (unsigned char *)out + nchan * bytes_per_channel * (i + j * out_width); + p8 = (unsigned char *)p; + p16 = (unsigned short *)p; + + /* The old way, spatial filter first and then clip after gamma */ + if (!spec->earlyclip) { + + tmp=t[3]/PREFILTER_WHITE; + + if (t[3]<=0) { + alpha = 0.0; + ls = 0.0; + } else { + alpha = flam3_calc_alpha(tmp,g,linrange); + ls = vibrancy * 256.0 * alpha / tmp; + if (alpha<0.0) alpha = 0.0; + if (alpha>1.0) alpha = 1.0; + } + + flam3_calc_newrgb(t, ls, highpow, newrgb); + + for (rgbi=0;rgbi<3;rgbi++) { + a = newrgb[rgbi]; + a += (1.0-vibrancy) * 256.0 * pow( t[rgbi] / PREFILTER_WHITE, g); + if (nchan<=3 || transp==0) + a += ((1.0 - alpha) * background[rgbi]); + else { + if (alpha>0) + a /= alpha; + else + a = 0; + } + + /* Clamp here to ensure proper filter functionality */ + if (a>255) a = 255; + if (a<0) a = 0; + + /* Replace values in accumulation buffer with these new ones */ + t[rgbi] = a; + } + t[3] = alpha; + } + + for (rgbi=0;rgbi<3;rgbi++) { + + a = t[rgbi]; + + if (a > 255) + a = 255; + if (a < 0) + a = 0; + + if (2==bytes_per_channel) { + a *= 256.0; /* Scales to [0-65280] */ + p16[rgbi] = (unsigned short) a; + } else { + p8[rgbi] = (unsigned char) a; + } + } + + + if (t[3]>1) + t[3]=1; + if (t[3]<0) + t[3]=0; + + /* alpha */ + if (nchan>3) { + if (transp==1) { + if (2==bytes_per_channel) + p16[3] = (unsigned short) (t[3] * 65535); + else + p8[3] = (unsigned char) (t[3] * 255); + } else { + if (2==bytes_per_channel) + p16[3] = 65535; + else + p8[3] = 255; + } + } + + x += oversample; + } + y += oversample; + } + } + + done: + + stats->badvals = fic.badvals; + + free(temporal_filter); + free(temporal_deltas); + free(batch_filter); + free(filter); + free(buckets); +// free(accumulate); +// free(points); + /* We have to clear the cps in fth first */ + for (thi = 0; thi < spec->nthreads; thi++) { + clear_cp(&(fth[thi].cp),0); + } + free(fth); + clear_cp(&cp,0); + + if (getenv("insert_palette")) { + int ph = 100; + if (ph >= image_height) ph = image_height; + /* insert the palette into the image */ + for (j = 0; j < ph; j++) { + for (i = 0; i < image_width; i++) { + unsigned char *p = (unsigned char *)out + nchan * (i + j * out_width); + p[0] = (unsigned char)dmap[i * 256 / image_width].color[0]; + p[1] = (unsigned char)dmap[i * 256 / image_width].color[1]; + p[2] = (unsigned char)dmap[i * 256 / image_width].color[2]; + } + } + } + + tend = time(NULL); + stats->render_seconds = (int)(tend-tstart); + + return(0); + +} -- cgit v1.2.3