/* FLAM3 - cosmic recursive fractal flames Copyright (C) 1992-2009 Spotworks LLC This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* this file is included into flam3.c once for each buffer bit-width */ /* * for batch * generate de filters * for temporal_sample_batch * interpolate * compute colormap * for subbatch * compute samples * buckets += cmap[samples] * accum += time_filter[temporal_sample_batch] * log[buckets] * de_filter * image = filter(accum) */ /* allow this many iterations for settling into attractor */ #define FUSE 100 #define WHITE_LEVEL 255 typedef struct { bucket *b; abucket *accumulate; int width, height, oversample; flam3_de_helper *de; double k1,k2; double curve; int last_thread; int start_row, end_row; flam3_frame *spec; int *aborted; int progress_size; } de_thread_helper; static void de_thread(void *dth) { de_thread_helper *dthp = (de_thread_helper *)dth; int oversample = dthp->oversample; int ss = (int)floor(oversample / 2.0); int scf = !(oversample & 1); double scfact = pow(oversample/(oversample+1.0), 2.0); int wid=dthp->width; int hig=dthp->height; int ps =dthp->progress_size; int str = (oversample-1)+dthp->start_row; int enr = (oversample-1)+dthp->end_row; int i,j; time_t progress_timer=0; struct timespec pauset; int progress_count = 0; int pixel_num; pauset.tv_sec = 0; pauset.tv_nsec = 100000000; /* Density estimation code */ for (j = str; j < enr; j++) { for (i = oversample-1; i < wid-(oversample-1); i++) { int ii,jj; double f_select=0.0; int f_select_int,f_coef_idx; int arr_filt_width; bucket *b; double c[4],ls; b = dthp->b + i + j*wid; /* Don't do anything if there's no hits here */ if (b[0][4] == 0 || b[0][3] == 0) continue; /* Count density in ssxss area */ /* Scale if OS>1 for equal iters */ for (ii=-ss; ii<=ss; ii++) { for (jj=-ss; jj<=ss; jj++) { b = dthp->b + (i + ii) + (j + jj)*wid; f_select += b[0][4]/255.0; } } if (scf) f_select *= scfact; if (f_select > dthp->de->max_filtered_counts) f_select_int = dthp->de->max_filter_index; else if (f_select<=DE_THRESH) f_select_int = (int)ceil(f_select)-1; else f_select_int = (int)DE_THRESH + (int)floor(pow(f_select-DE_THRESH,dthp->curve)); /* If the filter selected below the min specified clamp it to the min */ if (f_select_int > dthp->de->max_filter_index) f_select_int = dthp->de->max_filter_index; /* We only have to calculate the values for ~1/8 of the square */ f_coef_idx = f_select_int*dthp->de->kernel_size; arr_filt_width = (int)ceil(dthp->de->filter_widths[f_select_int])-1; b = dthp->b + i + j*wid; for (jj=0; jj<=arr_filt_width; jj++) { for (ii=0; ii<=jj; ii++) { /* Skip if coef is 0 */ if (dthp->de->filter_coefs[f_coef_idx]==0.0) { f_coef_idx++; continue; } c[0] = (double) b[0][0]; c[1] = (double) b[0][1]; c[2] = (double) b[0][2]; c[3] = (double) b[0][3]; ls = dthp->de->filter_coefs[f_coef_idx]*(dthp->k1 * log(1.0 + c[3] * dthp->k2))/c[3]; c[0] *= ls; c[1] *= ls; c[2] *= ls; c[3] *= ls; if (jj==0 && ii==0) { add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c); } else if (ii==0) { add_c_to_accum(dthp->accumulate,i,jj,j,0,wid,hig,c); add_c_to_accum(dthp->accumulate,i,-jj,j,0,wid,hig,c); add_c_to_accum(dthp->accumulate,i,0,j,jj,wid,hig,c); add_c_to_accum(dthp->accumulate,i,0,j,-jj,wid,hig,c); } else if (jj==ii) { add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c); add_c_to_accum(dthp->accumulate,i,-ii,j,jj,wid,hig,c); add_c_to_accum(dthp->accumulate,i,ii,j,-jj,wid,hig,c); add_c_to_accum(dthp->accumulate,i,-ii,j,-jj,wid,hig,c); } else { add_c_to_accum(dthp->accumulate,i,ii,j,jj,wid,hig,c); add_c_to_accum(dthp->accumulate,i,-ii,j,jj,wid,hig,c); add_c_to_accum(dthp->accumulate,i,ii,j,-jj,wid,hig,c); add_c_to_accum(dthp->accumulate,i,-ii,j,-jj,wid,hig,c); add_c_to_accum(dthp->accumulate,i,jj,j,ii,wid,hig,c); add_c_to_accum(dthp->accumulate,i,-jj,j,ii,wid,hig,c); add_c_to_accum(dthp->accumulate,i,jj,j,-ii,wid,hig,c); add_c_to_accum(dthp->accumulate,i,-jj,j,-ii,wid,hig,c); } f_coef_idx++; } } } pixel_num = (j-str+1)*wid; if (dthp->last_thread) { /* Standard progress function */ if (dthp->spec->verbose && time(NULL) != progress_timer) { progress_timer = time(NULL); fprintf(stderr, "\rdensity estimation: %d/%d ", j-str, enr-str); fflush(stderr); } } /* Custom progress function */ if (dthp->spec->progress && pixel_num > progress_count + ps) { progress_count = ps * floor(pixel_num/(double)ps); if (dthp->last_thread) { int rv = (*dthp->spec->progress)(dthp->spec->progress_parameter, 100*(j-str)/(double)(enr-str), 1, 0); if (rv==2) { /* PAUSE */ *(dthp->aborted) = -1; do { #if defined(_WIN32) /* mingw or msvc */ Sleep(100); #else nanosleep(&pauset,NULL); #endif rv = (*dthp->spec->progress)(dthp->spec->progress_parameter, 100*(j-str)/(double)(enr-str), 1, 0); } while (rv==2); *(dthp->aborted) = 0; } if (rv==1) { *(dthp->aborted) = 1; #ifdef HAVE_LIBPTHREAD pthread_exit((void *)0); #else return; #endif } } else { #ifdef HAVE_LIBPTHREAD if (*(dthp->aborted)<0) { do { #if defined(_WIN32) /* mingw or msvc */ Sleep(100); #else nanosleep(&pauset,NULL); #endif } while (*(dthp->aborted)<0); } if (*(dthp->aborted)>0) pthread_exit((void *)0); #else if (*(dthp->aborted)>0) return; #endif } } } #ifdef HAVE_LIBPTHREAD pthread_exit((void *)0); #endif } static void iter_thread(void *fth) { double sub_batch; int j; flam3_thread_helper *fthp = (flam3_thread_helper *)fth; flam3_iter_constants *ficp = fthp->fic; struct timespec pauset; int SBS = ficp->spec->sub_batch_size; double eta = 0.0; pauset.tv_sec = 0; pauset.tv_nsec = 100000000; if (fthp->timer_initialize) { *(ficp->progress_timer) = 0; memset(ficp->progress_timer_history,0,64*sizeof(time_t)); memset(ficp->progress_history,0,64*sizeof(double)); *(ficp->progress_history_mark) = 0; } for (sub_batch = 0; sub_batch < ficp->batch_size; sub_batch+=SBS) { int sub_batch_size, badcount; time_t newt = time(NULL); /* sub_batch is double so this is sketchy */ sub_batch_size = (sub_batch + SBS > ficp->batch_size) ? (ficp->batch_size - sub_batch) : SBS; if (fthp->first_thread && newt != *(ficp->progress_timer)) { double percent = 100.0 * ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num) / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches; int old_mark = 0; int ticker; if (ficp->spec->verbose) fprintf(stderr, "\rchaos: %5.1f%%", percent); *(ficp->progress_timer) = newt; if (ficp->progress_timer_history[*(ficp->progress_history_mark)] && ficp->progress_history[*(ficp->progress_history_mark)] < percent) old_mark = *(ficp->progress_history_mark); if (percent > 0) { eta = (100 - percent) * (*(ficp->progress_timer) - ficp->progress_timer_history[old_mark]) / (percent - ficp->progress_history[old_mark]); if (ficp->spec->verbose) { ticker = (*(ficp->progress_timer)&1)?':':'.'; if (eta < 1000) ticker = ':'; if (eta > 100) fprintf(stderr, " ETA%c %.1f minutes", ticker, eta / 60); else fprintf(stderr, " ETA%c %ld seconds ", ticker, (long) ceil(eta)); fprintf(stderr, " \r"); fflush(stderr); } } ficp->progress_timer_history[*(ficp->progress_history_mark)] = *(ficp->progress_timer); ficp->progress_history[*(ficp->progress_history_mark)] = percent; *(ficp->progress_history_mark) = (*(ficp->progress_history_mark) + 1) % 64; } /* Custom progress function */ if (ficp->spec->progress) { if (fthp->first_thread) { int rv; /* Recalculate % done, as the other calculation only updates once per second */ double percent = 100.0 * ((((sub_batch / (double) ficp->batch_size) + ficp->temporal_sample_num) / ficp->ntemporal_samples) + ficp->batch_num)/ficp->nbatches; rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta); if (rv==2) { /* PAUSE */ time_t tnow = time(NULL); time_t tend; int lastpt; ficp->aborted = -1; do { #if defined(_WIN32) /* mingw or msvc */ Sleep(100); #else nanosleep(&pauset,NULL); #endif rv = (*ficp->spec->progress)(ficp->spec->progress_parameter, percent, 0, eta); } while (rv==2); /* modify the timer history to compensate for the pause */ tend = time(NULL)-tnow; ficp->aborted = 0; for (lastpt=0;lastpt<64;lastpt++) { if (ficp->progress_timer_history[lastpt]) { ficp->progress_timer_history[lastpt] += tend; } } } if (rv==1) { /* ABORT */ ficp->aborted = 1; #ifdef HAVE_LIBPTHREAD pthread_exit((void *)0); #else return; #endif } } else { if (ficp->aborted<0) { do { #if defined(_WIN32) /* mingw or msvc */ Sleep(100); #else nanosleep(&pauset,NULL); #endif } while (ficp->aborted==-1); } #ifdef HAVE_LIBPTHREAD if (ficp->aborted>0) pthread_exit((void *)0); #else if (ficp->aborted>0) return; #endif } } /* Seed iterations */ fthp->iter_storage[0] = flam3_random_isaac_11(&(fthp->rc)); fthp->iter_storage[1] = flam3_random_isaac_11(&(fthp->rc)); fthp->iter_storage[2] = flam3_random_isaac_01(&(fthp->rc)); fthp->iter_storage[3] = flam3_random_isaac_01(&(fthp->rc)); /* Execute iterations */ badcount = flam3_iterate(&(fthp->cp), sub_batch_size, FUSE, fthp->iter_storage, ficp->xform_distrib, &(fthp->rc)); #if defined(HAVE_LIBPTHREAD) && defined(USE_LOCKS) /* Lock mutex for access to accumulator */ pthread_mutex_lock(&ficp->bucket_mutex); #endif /* Add the badcount to the counter */ ficp->badvals += badcount; /* Put them in the bucket accumulator */ for (j = 0; j < sub_batch_size*4; j+=4) { double p0, p1, p00, p11; double dbl_index0,dbl_frac; double interpcolor[4]; int ci, color_index0; double *p = &(fthp->iter_storage[j]); bucket *b; if (fthp->cp.rotate != 0.0) { p00 = p[0] - fthp->cp.rot_center[0]; p11 = p[1] - fthp->cp.rot_center[1]; p0 = p00 * ficp->rot[0][0] + p11 * ficp->rot[0][1] + fthp->cp.rot_center[0]; p1 = p00 * ficp->rot[1][0] + p11 * ficp->rot[1][1] + fthp->cp.rot_center[1]; } else { p0 = p[0]; p1 = p[1]; } if (p0 >= ficp->bounds[0] && p1 >= ficp->bounds[1] && p0 <= ficp->bounds[2] && p1 <= ficp->bounds[3]) { double logvis=1.0; bucket *buckets = (bucket *)(ficp->buckets); /* Skip if invisible */ if (p[3]==0) continue; else logvis = p[3]; b = buckets + (int)(ficp->ws0 * p0 - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p1 - ficp->hb1s1); #ifdef USE_FLOAT_INDICES color_index0 = 0; //fprintf(stdout,"%.16f\n",p[2]*256.0); while(color_index0 < CMAP_SIZE_M1) { if (ficp->dmap[color_index0+1].index > p[2]) break; else color_index0++; } if (p[3]==1.0) { bump_no_overflow(b[0][0], ficp->dmap[color_index0].color[0]); bump_no_overflow(b[0][1], ficp->dmap[color_index0].color[1]); bump_no_overflow(b[0][2], ficp->dmap[color_index0].color[2]); bump_no_overflow(b[0][3], ficp->dmap[color_index0].color[3]); bump_no_overflow(b[0][4], 255.0); } else { bump_no_overflow(b[0][0], logvis*ficp->dmap[color_index0].color[0]); bump_no_overflow(b[0][1], logvis*ficp->dmap[color_index0].color[1]); bump_no_overflow(b[0][2], logvis*ficp->dmap[color_index0].color[2]); bump_no_overflow(b[0][3], logvis*ficp->dmap[color_index0].color[3]); bump_no_overflow(b[0][4], 255.0); #else dbl_index0 = p[2] * CMAP_SIZE; color_index0 = (int) (dbl_index0); if (flam3_palette_mode_linear == fthp->cp.palette_mode) { if (color_index0 < 0) { color_index0 = 0; dbl_frac = 0; } else if (color_index0 >= CMAP_SIZE_M1) { color_index0 = CMAP_SIZE_M1-1; dbl_frac = 1.0; } else { /* interpolate between color_index0 and color_index0+1 */ dbl_frac = dbl_index0 - (double)color_index0; } for (ci=0;ci<4;ci++) { interpcolor[ci] = ficp->dmap[color_index0].color[ci] * (1.0-dbl_frac) + ficp->dmap[color_index0+1].color[ci] * dbl_frac; } } else { /* Palette mode step */ if (color_index0 < 0) { color_index0 = 0; } else if (color_index0 >= CMAP_SIZE_M1) { color_index0 = CMAP_SIZE_M1; } for (ci=0;ci<4;ci++) interpcolor[ci] = ficp->dmap[color_index0].color[ci]; } if (p[3]==1.0) { bump_no_overflow(b[0][0], interpcolor[0]); bump_no_overflow(b[0][1], interpcolor[1]); bump_no_overflow(b[0][2], interpcolor[2]); bump_no_overflow(b[0][3], interpcolor[3]); bump_no_overflow(b[0][4], 255.0); } else { bump_no_overflow(b[0][0], logvis*interpcolor[0]); bump_no_overflow(b[0][1], logvis*interpcolor[1]); bump_no_overflow(b[0][2], logvis*interpcolor[2]); bump_no_overflow(b[0][3], logvis*interpcolor[3]); bump_no_overflow(b[0][4], 255.0); } #endif } } #if defined(HAVE_LIBPTHREAD) && defined(USE_LOCKS) /* Release mutex */ pthread_mutex_unlock(&ficp->bucket_mutex); #endif } #ifdef HAVE_LIBPTHREAD pthread_exit((void *)0); #endif } static int render_rectangle(flam3_frame *spec, void *out, int field, int nchan, int transp, stat_struct *stats) { long nbuckets; int i, j, k, batch_num, temporal_sample_num; double nsamples, batch_size; bucket *buckets; abucket *accumulate; double *points; double *filter, *temporal_filter, *temporal_deltas, *batch_filter; double ppux=0, ppuy=0; int image_width, image_height; /* size of the image to produce */ int out_width; int filter_width=0; int bytes_per_channel = spec->bytes_per_channel; int oversample = spec->genomes[0].spatial_oversample; double highpow = spec->genomes[0].highlight_power; int nbatches = spec->genomes[0].nbatches; int ntemporal_samples = spec->genomes[0].ntemporal_samples; flam3_palette dmap; int gutter_width; double vibrancy = 0.0; double gamma = 0.0; double background[3]; int vib_gam_n = 0; time_t progress_began=0; int verbose = spec->verbose; int gnm_idx,max_gnm_de_fw,de_offset; flam3_genome cp; unsigned short *xform_distrib; flam3_iter_constants fic; flam3_thread_helper *fth; #ifdef HAVE_LIBPTHREAD pthread_attr_t pt_attr; pthread_t *myThreads=NULL; #endif int thread_status; int thi; time_t tstart,tend; double sumfilt; char *last_block; size_t memory_rqd; /* Per-render progress timers */ time_t progress_timer=0; time_t progress_timer_history[64]; double progress_history[64]; int progress_history_mark = 0; tstart = time(NULL); fic.badvals = 0; fic.aborted = 0; stats->num_iters = 0; memset(&cp,0, sizeof(flam3_genome)); if (nbatches < 1) { fprintf(stderr, "nbatches must be positive, not %d.\n", nbatches); return(1); } if (oversample < 1) { fprintf(stderr, "oversample must be positive, not %d.\n", oversample); return(1); } /* Initialize the thread helper structures */ fth = (flam3_thread_helper *)calloc(spec->nthreads,sizeof(flam3_thread_helper)); for (i=0;inthreads;i++) fth[i].cp.final_xform_index=-1; /* Set up the output image dimensions, adjusted for scanline */ image_width = spec->genomes[0].width; out_width = image_width; if (field) { image_height = spec->genomes[0].height / 2; if (field == flam3_field_odd) out = (unsigned char *)out + nchan * bytes_per_channel * out_width; out_width *= 2; } else image_height = spec->genomes[0].height; /* Spatial Filter kernel creation */ filter_width = flam3_create_spatial_filter(spec, field, &filter); /* handle error */ if (filter_width<0) { fprintf(stderr,"flam3_create_spatial_filter returned error: aborting\n"); return(1); } /* note we must free 'filter' at the end */ /* batch filter */ /* may want to revisit this at some point */ batch_filter = (double *) malloc(sizeof(double) * nbatches); for (i=0; igenomes[0].temporal_filter_type, spec->genomes[0].temporal_filter_exp, spec->genomes[0].temporal_filter_width, &temporal_filter, &temporal_deltas); /* the number of additional rows of buckets we put at the edge so that the filter doesn't go off the edge */ gutter_width = (filter_width - oversample) / 2; /* Check the size of the density estimation filter. If the 'radius' of the density estimation filter is greater than the gutter width, we have to pad with more. Otherwise, we can use the same value. */ max_gnm_de_fw=0; for (gnm_idx = 0; gnm_idx < spec->ngenomes; gnm_idx++) { int this_width = (int)ceil(spec->genomes[gnm_idx].estimator) * oversample; if (this_width > max_gnm_de_fw) max_gnm_de_fw = this_width; } /* Add OS-1 for the averaging at the edges, if it's > 0 already */ if (max_gnm_de_fw>0) max_gnm_de_fw += (oversample-1); /* max_gnm_de_fw is now the number of pixels of additional gutter */ /* necessary to appropriately perform the density estimation filtering */ /* Check to see if it's greater than the gutter_width */ if (max_gnm_de_fw > gutter_width) { de_offset = max_gnm_de_fw - gutter_width; gutter_width = max_gnm_de_fw; } else de_offset = 0; /* Allocate the space required to render the image */ fic.height = oversample * image_height + 2 * gutter_width; fic.width = oversample * image_width + 2 * gutter_width; nbuckets = (long)fic.width * (long)fic.height; memory_rqd = (sizeof(bucket) * nbuckets + sizeof(abucket) * nbuckets + 4 * sizeof(double) * (size_t)(spec->sub_batch_size) * spec->nthreads); last_block = (char *) malloc(memory_rqd); if (NULL == last_block) { fprintf(stderr, "render_rectangle: cannot malloc %g bytes.\n", (double)memory_rqd); fprintf(stderr, "render_rectangle: w=%d h=%d nb=%ld.\n", fic.width, fic.height, nbuckets); return(1); } /* Just free buckets at the end */ buckets = (bucket *) last_block; accumulate = (abucket *) (last_block + sizeof(bucket) * nbuckets); points = (double *) (last_block + (sizeof(bucket) + sizeof(abucket)) * nbuckets); if (verbose) { fprintf(stderr, "chaos: "); progress_began = time(NULL); } background[0] = background[1] = background[2] = 0.0; memset((char *) accumulate, 0, sizeof(abucket) * nbuckets); /* Batch loop - outermost */ for (batch_num = 0; batch_num < nbatches; batch_num++) { double de_time; double sample_density=0.0; double k1, area, k2; flam3_de_helper de; de_time = spec->time + temporal_deltas[batch_num*ntemporal_samples]; memset((char *) buckets, 0, sizeof(bucket) * nbuckets); /* interpolate and get a control point */ /* ONLY FOR DENSITY FILTER WIDTH PURPOSES */ /* additional interpolation will be done in the temporal_sample loop */ flam3_interpolate(spec->genomes, spec->ngenomes, de_time, 0, &cp); /* if instructed to by the genome, create the density estimation */ /* filter kernels. Check boundary conditions as well. */ if (cp.estimator < 0.0 || cp.estimator_minimum < 0.0) { fprintf(stderr,"density estimator filter widths must be >= 0\n"); return(1); } if (spec->bits <= 32) { if (cp.estimator > 0.0) { fprintf(stderr, "warning: density estimation disabled with %d bit buffers.\n", spec->bits); cp.estimator = 0.0; } } /* Create DE filters */ if (cp.estimator > 0.0) { de = flam3_create_de_filters(cp.estimator,cp.estimator_minimum, cp.estimator_curve,oversample); if (de.kernel_size<0) { fprintf(stderr,"de.kernel_size returned 0 - aborting.\n"); return(1); } } else de.max_filter_index = 0; /* Temporal sample loop */ for (temporal_sample_num = 0; temporal_sample_num < ntemporal_samples; temporal_sample_num++) { double temporal_sample_time; double color_scalar = temporal_filter[batch_num*ntemporal_samples + temporal_sample_num]; temporal_sample_time = spec->time + temporal_deltas[batch_num*ntemporal_samples + temporal_sample_num]; /* Interpolate and get a control point */ flam3_interpolate(spec->genomes, spec->ngenomes, temporal_sample_time, 0, &cp); /* Get the xforms ready to render */ if (prepare_precalc_flags(&cp)) { fprintf(stderr,"prepare xform pointers returned error: aborting.\n"); return(1); } xform_distrib = flam3_create_xform_distrib(&cp); if (xform_distrib==NULL) { fprintf(stderr,"create xform distrib returned error: aborting.\n"); return(1); } /* compute the colormap entries. */ /* the input colormap is 256 long with entries from 0 to 1.0 */ for (j = 0; j < CMAP_SIZE; j++) { dmap[j].index = cp.palette[(j * 256) / CMAP_SIZE].index / 256.0; for (k = 0; k < 4; k++) dmap[j].color[k] = (cp.palette[(j * 256) / CMAP_SIZE].color[k] * WHITE_LEVEL) * color_scalar; } /* compute camera */ if (1) { double t0, t1, shift=0.0, corner0, corner1; double scale; if (cp.sample_density <= 0.0) { fprintf(stderr, "sample density (quality) must be greater than zero," " not %g.\n", cp.sample_density); return(1); } scale = pow(2.0, cp.zoom); sample_density = cp.sample_density * scale * scale; ppux = cp.pixels_per_unit * scale; ppuy = field ? (ppux / 2.0) : ppux; ppux /= spec->pixel_aspect_ratio; switch (field) { case flam3_field_both: shift = 0.0; break; case flam3_field_even: shift = -0.5; break; case flam3_field_odd: shift = 0.5; break; } shift = shift / ppux; t0 = (double) gutter_width / (oversample * ppux); t1 = (double) gutter_width / (oversample * ppuy); corner0 = cp.center[0] - image_width / ppux / 2.0; corner1 = cp.center[1] - image_height / ppuy / 2.0; fic.bounds[0] = corner0 - t0; fic.bounds[1] = corner1 - t1 + shift; fic.bounds[2] = corner0 + image_width / ppux + t0; fic.bounds[3] = corner1 + image_height / ppuy + t1 + shift; fic.size[0] = 1.0 / (fic.bounds[2] - fic.bounds[0]); fic.size[1] = 1.0 / (fic.bounds[3] - fic.bounds[1]); fic.rot[0][0] = cos(cp.rotate * 2 * M_PI / 360.0); fic.rot[0][1] = -sin(cp.rotate * 2 * M_PI / 360.0); fic.rot[1][0] = -fic.rot[0][1]; fic.rot[1][1] = fic.rot[0][0]; fic.ws0 = fic.width * fic.size[0]; fic.wb0s0 = fic.ws0 * fic.bounds[0]; fic.hs1 = fic.height * fic.size[1]; fic.hb1s1 = fic.hs1 * fic.bounds[1]; } /* number of samples is based only on the output image size */ nsamples = sample_density * image_width * image_height; /* how many of these samples are rendered in this loop? */ batch_size = nsamples / (nbatches * ntemporal_samples); stats->num_iters += batch_size; /* Fill in the iter constants */ fic.xform_distrib = xform_distrib; fic.spec = spec; fic.batch_size = batch_size / (double)spec->nthreads; fic.temporal_sample_num = temporal_sample_num; fic.ntemporal_samples = ntemporal_samples; fic.batch_num = batch_num; fic.nbatches = nbatches; fic.dmap = (flam3_palette_entry *)dmap; fic.color_scalar = color_scalar; fic.buckets = (void *)buckets; /* Need a timer per job */ fic.progress_timer = &progress_timer; fic.progress_timer_history = &(progress_timer_history[0]); fic.progress_history = &(progress_history[0]); fic.progress_history_mark = &progress_history_mark; /* Initialize the thread helper structures */ for (thi = 0; thi < spec->nthreads; thi++) { int rk; /* Create a new isaac state for this thread */ for (rk = 0; rk < RANDSIZ; rk++) fth[thi].rc.randrsl[rk] = irand(&spec->rc); irandinit(&(fth[thi].rc),1); if (0==thi) { fth[thi].first_thread=1; if (0==batch_num && 0==temporal_sample_num) fth[thi].timer_initialize = 1; else fth[thi].timer_initialize = 0; } else { fth[thi].first_thread=0; fth[thi].timer_initialize = 0; } fth[thi].iter_storage = &(points[thi*(spec->sub_batch_size)*4]); fth[thi].fic = &fic; flam3_copy(&(fth[thi].cp),&cp); } #ifdef HAVE_LIBPTHREAD /* Let's make some threads */ myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t)); #if defined(USE_LOCKS) pthread_mutex_init(&fic.bucket_mutex, NULL); #endif pthread_attr_init(&pt_attr); pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE); for (thi=0; thi nthreads; thi ++) pthread_create(&myThreads[thi], &pt_attr, (void *)iter_thread, (void *)(&(fth[thi]))); pthread_attr_destroy(&pt_attr); /* Wait for them to return */ for (thi=0; thi < spec->nthreads; thi++) pthread_join(myThreads[thi], (void **)&thread_status); #if defined(USE_LOCKS) pthread_mutex_destroy(&fic.bucket_mutex); #endif free(myThreads); #else for (thi=0; thi < spec->nthreads; thi++) iter_thread( (void *)(&(fth[thi])) ); #endif /* Free the xform_distrib array */ free(xform_distrib); if (fic.aborted) { if (verbose) fprintf(stderr, "\naborted!\n"); goto done; } vibrancy += cp.vibrancy; gamma += cp.gamma; background[0] += cp.background[0]; background[1] += cp.background[1]; background[2] += cp.background[2]; vib_gam_n++; } k1 =(cp.contrast * cp.brightness * PREFILTER_WHITE * 268.0 * batch_filter[batch_num]) / 256; area = image_width * image_height / (ppux * ppuy); k2 = (oversample * oversample * nbatches) / (cp.contrast * area * WHITE_LEVEL * sample_density * sumfilt); #if 0 printf("iw=%d,ih=%d,ppux=%f,ppuy=%f\n",image_width,image_height,ppux,ppuy); printf("contrast=%f, brightness=%f, PREFILTER=%d, temporal_filter=%f\n", cp.contrast, cp.brightness, PREFILTER_WHITE, temporal_filter[batch_num]); printf("oversample=%d, nbatches=%d, area = %f, WHITE_LEVEL=%d, sample_density=%f\n", oversample, nbatches, area, WHITE_LEVEL, sample_density); printf("k1=%f,k2=%15.12f\n",k1,k2); #endif if (de.max_filter_index == 0) { for (j = 0; j < fic.height; j++) { for (i = 0; i < fic.width; i++) { abucket *a = accumulate + i + j * fic.width; bucket *b = buckets + i + j * fic.width; double c[4], ls; c[0] = (double) b[0][0]; c[1] = (double) b[0][1]; c[2] = (double) b[0][2]; c[3] = (double) b[0][3]; if (0.0 == c[3]) continue; ls = (k1 * log(1.0 + c[3] * k2))/c[3]; c[0] *= ls; c[1] *= ls; c[2] *= ls; c[3] *= ls; abump_no_overflow(a[0][0], c[0]); abump_no_overflow(a[0][1], c[1]); abump_no_overflow(a[0][2], c[2]); abump_no_overflow(a[0][3], c[3]); } } } else { de_thread_helper *deth; int de_aborted=0; int myspan = (fic.height-2*(oversample-1)+1); int swath = myspan/(spec->nthreads); /* Create the de helper structures */ deth = (de_thread_helper *)calloc(spec->nthreads,sizeof(de_thread_helper)); for (thi=0;thi<(spec->nthreads);thi++) { /* Set up the contents of the helper structure */ deth[thi].b = buckets; deth[thi].accumulate = accumulate; deth[thi].width = fic.width; deth[thi].height = fic.height; deth[thi].oversample = oversample; deth[thi].progress_size = spec->sub_batch_size/10; deth[thi].de = &de; deth[thi].k1 = k1; deth[thi].k2 = k2; deth[thi].curve = cp.estimator_curve; deth[thi].spec = spec; deth[thi].aborted = &de_aborted; if ( (spec->nthreads)>myspan) { /* More threads than rows */ deth[thi].start_row=0; if (thi==spec->nthreads-1) { deth[thi].end_row=myspan; deth[thi].last_thread=1; } else { deth[thi].end_row=-1; deth[thi].last_thread=0; } } else { /* Normal case */ deth[thi].start_row=thi*swath; deth[thi].end_row=(thi+1)*swath; if (thi==spec->nthreads-1) { deth[thi].end_row=myspan; deth[thi].last_thread=1; } else { deth[thi].last_thread=0; } } } #ifdef HAVE_LIBPTHREAD /* Let's make some threads */ myThreads = (pthread_t *)malloc(spec->nthreads * sizeof(pthread_t)); pthread_attr_init(&pt_attr); pthread_attr_setdetachstate(&pt_attr,PTHREAD_CREATE_JOINABLE); for (thi=0; thi nthreads; thi ++) pthread_create(&myThreads[thi], &pt_attr, (void *)de_thread, (void *)(&(deth[thi]))); pthread_attr_destroy(&pt_attr); /* Wait for them to return */ for (thi=0; thi < spec->nthreads; thi++) pthread_join(myThreads[thi], (void **)&thread_status); free(myThreads); #else for (thi=0; thi nthreads; thi ++) de_thread((void *)(&(deth[thi]))); #endif free(deth); if (de_aborted) { if (verbose) fprintf(stderr, "\naborted!\n"); goto done; } } /* End density estimation loop */ /* If allocated, free the de filter memory for the next batch */ if (de.max_filter_index > 0) { free(de.filter_coefs); free(de.filter_widths); } } if (verbose) { fprintf(stderr, "\rchaos: 100.0%% took: %ld seconds \n", time(NULL) - progress_began); fprintf(stderr, "filtering..."); } /* filter the accumulation buffer down into the image */ if (1) { int x, y; double t[4],newrgb[3]; double g = 1.0 / (gamma / vib_gam_n); double tmp,a; double alpha,ls; int rgbi; double linrange = cp.gam_lin_thresh; vibrancy /= vib_gam_n; background[0] /= vib_gam_n/256.0; background[1] /= vib_gam_n/256.0; background[2] /= vib_gam_n/256.0; /* If we're in the early clip mode, perform this first step to */ /* apply the gamma correction and clipping before the spat filt */ if (spec->earlyclip) { for (j = 0; j < fic.height; j++) { for (i = 0; i < fic.width; i++) { abucket *ac = accumulate + i + j*fic.width; if (ac[0][3]<=0) { alpha = 0.0; ls = 0.0; } else { tmp=ac[0][3]/PREFILTER_WHITE; alpha = flam3_calc_alpha(tmp,g,linrange); ls = vibrancy * 256.0 * alpha / tmp; if (alpha<0.0) alpha = 0.0; if (alpha>1.0) alpha = 1.0; } t[0] = (double)ac[0][0]; t[1] = (double)ac[0][1]; t[2] = (double)ac[0][2]; t[3] = (double)ac[0][3]; flam3_calc_newrgb(t, ls, highpow, newrgb); for (rgbi=0;rgbi<3;rgbi++) { a = newrgb[rgbi]; a += (1.0-vibrancy) * 256.0 * pow( t[rgbi] / PREFILTER_WHITE, g); if (nchan<=3 || transp==0) a += ((1.0 - alpha) * background[rgbi]); else { if (alpha>0) a /= alpha; else a = 0; } /* Clamp here to ensure proper filter functionality */ if (a>255) a = 255; if (a<0) a = 0; /* Replace values in accumulation buffer with these new ones */ ac[0][rgbi] = a; } ac[0][3] = alpha; } } } /* Apply the spatial filter */ y = de_offset; for (j = 0; j < image_height; j++) { x = de_offset; for (i = 0; i < image_width; i++) { int ii, jj,rgbi; void *p; unsigned short *p16; unsigned char *p8; t[0] = t[1] = t[2] = t[3] = 0.0; for (ii = 0; ii < filter_width; ii++) { for (jj = 0; jj < filter_width; jj++) { double k = filter[ii + jj * filter_width]; abucket *ac = accumulate + x+ii + (y+jj)*fic.width; t[0] += k * ac[0][0]; t[1] += k * ac[0][1]; t[2] += k * ac[0][2]; t[3] += k * ac[0][3]; } } p = (unsigned char *)out + nchan * bytes_per_channel * (i + j * out_width); p8 = (unsigned char *)p; p16 = (unsigned short *)p; /* The old way, spatial filter first and then clip after gamma */ if (!spec->earlyclip) { tmp=t[3]/PREFILTER_WHITE; if (t[3]<=0) { alpha = 0.0; ls = 0.0; } else { alpha = flam3_calc_alpha(tmp,g,linrange); ls = vibrancy * 256.0 * alpha / tmp; if (alpha<0.0) alpha = 0.0; if (alpha>1.0) alpha = 1.0; } flam3_calc_newrgb(t, ls, highpow, newrgb); for (rgbi=0;rgbi<3;rgbi++) { a = newrgb[rgbi]; a += (1.0-vibrancy) * 256.0 * pow( t[rgbi] / PREFILTER_WHITE, g); if (nchan<=3 || transp==0) a += ((1.0 - alpha) * background[rgbi]); else { if (alpha>0) a /= alpha; else a = 0; } /* Clamp here to ensure proper filter functionality */ if (a>255) a = 255; if (a<0) a = 0; /* Replace values in accumulation buffer with these new ones */ t[rgbi] = a; } t[3] = alpha; } for (rgbi=0;rgbi<3;rgbi++) { a = t[rgbi]; if (a > 255) a = 255; if (a < 0) a = 0; if (2==bytes_per_channel) { a *= 256.0; /* Scales to [0-65280] */ p16[rgbi] = (unsigned short) a; } else { p8[rgbi] = (unsigned char) a; } } if (t[3]>1) t[3]=1; if (t[3]<0) t[3]=0; /* alpha */ if (nchan>3) { if (transp==1) { if (2==bytes_per_channel) p16[3] = (unsigned short) (t[3] * 65535); else p8[3] = (unsigned char) (t[3] * 255); } else { if (2==bytes_per_channel) p16[3] = 65535; else p8[3] = 255; } } x += oversample; } y += oversample; } } done: stats->badvals = fic.badvals; free(temporal_filter); free(temporal_deltas); free(batch_filter); free(filter); free(buckets); // free(accumulate); // free(points); /* We have to clear the cps in fth first */ for (thi = 0; thi < spec->nthreads; thi++) { clear_cp(&(fth[thi].cp),0); } free(fth); clear_cp(&cp,0); if (getenv("insert_palette")) { int ph = 100; if (ph >= image_height) ph = image_height; /* insert the palette into the image */ for (j = 0; j < ph; j++) { for (i = 0; i < image_width; i++) { unsigned char *p = (unsigned char *)out + nchan * (i + j * out_width); p[0] = (unsigned char)dmap[i * 256 / image_width].color[0]; p[1] = (unsigned char)dmap[i * 256 / image_width].color[1]; p[2] = (unsigned char)dmap[i * 256 / image_width].color[2]; } } } tend = time(NULL); stats->render_seconds = (int)(tend-tstart); return(0); }