summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2015-02-12 16:36:05 +0100
committerLars-Dominik Braun <lars@6xq.net>2015-05-02 21:36:44 +0200
commitcb40d04fa6c5d6105adc42278b13954ee003d0e9 (patch)
treee085c3ad3a900ae163dd2aff97d59966a3233c08
parent334e82a0a23db8a5c0816756021611bfffe2fa26 (diff)
downloadpucket-cb40d04fa6c5d6105adc42278b13954ee003d0e9.tar.gz
pucket-cb40d04fa6c5d6105adc42278b13954ee003d0e9.tar.bz2
pucket-cb40d04fa6c5d6105adc42278b13954ee003d0e9.zip
Vectorize flam3_iterate
-rw-r--r--flam3.c76
-rw-r--r--flam3.h3
-rw-r--r--private.h2
-rw-r--r--rect.c22
4 files changed, 51 insertions, 52 deletions
diff --git a/flam3.c b/flam3.c
index 6eaedcb..2b4976f 100644
--- a/flam3.c
+++ b/flam3.c
@@ -227,7 +227,7 @@ int flam3_create_chaos_distrib(flam3_genome *cp, int xi, unsigned short *xform_d
*/
-int flam3_iterate(flam3_genome *cp, int n, int fuse, double *samples, unsigned short *xform_distrib, randctx *rc) {
+int flam3_iterate(flam3_genome *cp, int n, int fuse, const double4 in, double4 *samples, unsigned short *xform_distrib, randctx *rc) {
int i;
double4 p, q;
int consec = 0;
@@ -235,13 +235,13 @@ int flam3_iterate(flam3_genome *cp, int n, int fuse, double *samples, unsigned
int lastxf=0;
int fn;
- p = (double4) { samples[0], samples[1], samples[2], samples[3] };
+ p = in;
/* Perform precalculations */
for (i=0;i<cp->num_xforms;i++)
xform_precalc(cp,i);
- for (i = -4*fuse; i < 4*n; i+=4) {
+ for (i = fuse; i < n; i++) {
// fn = xform_distrib[ lastxf*CHOOSE_XFORM_GRAIN + (((unsigned)irand(rc)) % CHOOSE_XFORM_GRAIN)];
if (cp->chaos_enable)
@@ -254,7 +254,7 @@ int flam3_iterate(flam3_genome *cp, int n, int fuse, double *samples, unsigned
badvals ++;
if (consec<5) {
p = q;
- i -= 4;
+ --i;
continue;
} else
consec = 0;
@@ -277,10 +277,7 @@ int flam3_iterate(flam3_genome *cp, int n, int fuse, double *samples, unsigned
/* if fuse over, store it */
if (i >= 0) {
- samples[i] = q[0];
- samples[i+1] = q[1];
- samples[i+2] = q[2];
- samples[i+3] = q[3];
+ samples[i] = q;
}
}
@@ -339,13 +336,16 @@ int flam3_xform_preview(flam3_genome *cp, int xi, double range, int numvals, int
}
#endif
+#if 0
int flam3_colorhist(flam3_genome *cp, int num_batches, randctx *rc, double *hist) {
int lp,plp;
int mycolor;
unsigned short *xform_distrib;
int sbs = 10000;
- double sub_batch[4*10000];
+ double4 *sub_batch;
+
+ sub_batch = malloc (sizeof (*sub_batch) * sbs);
memset(hist,0,256*sizeof(double));
@@ -357,15 +357,12 @@ int flam3_colorhist(flam3_genome *cp, int num_batches, randctx *rc, double *hist
for (lp=0;lp<num_batches;lp++) {
- sub_batch[0] = flam3_random_isaac_11(rc);
- sub_batch[1] = flam3_random_isaac_11(rc);
- sub_batch[2] = 0;
- sub_batch[3] = 0;
+ double4 start = (double4) { flam3_random_isaac_11(rc), flam3_random_isaac_11(rc), 0, 0 };
if (xform_distrib==NULL)
return(1);
- flam3_iterate(cp, sbs, 20, sub_batch, xform_distrib, rc);
+ flam3_iterate(cp, sbs, 20, start, sub_batch, xform_distrib, rc);
// histogram the colors in the sub_batch array
for (plp=0;plp<4*sbs;plp+=4) {
@@ -378,11 +375,13 @@ int flam3_colorhist(flam3_genome *cp, int num_batches, randctx *rc, double *hist
}
free(xform_distrib);
+ free(sub_batch);
for (plp=0;plp<256;plp++)
hist[plp] /= (float)(num_batches*sbs);
return(0);
}
+#endif
flam3_genome *sheep_loop(flam3_genome *cp, double blend) {
@@ -3485,16 +3484,16 @@ void flam3_random(flam3_genome *cp, int *ivars, int ivars_n, int sym, int spec_x
static int sort_by_x(const void *av, const void *bv) {
- double *a = (double *) av;
- double *b = (double *) bv;
+ double4 a = *((double4 *) av);
+ double4 b = *((double4 *) bv);
if (a[0] < b[0]) return -1;
if (a[0] > b[0]) return 1;
return 0;
}
static int sort_by_y(const void *av, const void *bv) {
- double *a = (double *) av;
- double *b = (double *) bv;
+ double4 a = *((double4 *) av);
+ double4 b = *((double4 *) bv);
if (a[1] < b[1]) return -1;
if (a[1] > b[1]) return 1;
return 0;
@@ -3527,24 +3526,21 @@ int flam3_estimate_bounding_box(flam3_genome *cp, double eps, int nsamples,
int i;
int low_target, high_target;
double min[2], max[2];
- double *points;
+ double4 *points;
int bv;
unsigned short *xform_distrib;
if (nsamples <= 0) nsamples = 10000;
- points = (double *) malloc(sizeof(double) * 4 * nsamples);
- points[0] = flam3_random_isaac_11(rc);
- points[1] = flam3_random_isaac_11(rc);
- points[2] = 0.0;
- points[3] = 0.0;
+ points = (double4 *) malloc(sizeof(double4) * nsamples);
+ const double4 start = (double4) { flam3_random_isaac_11(rc), flam3_random_isaac_11(rc), 0.0, 0.0 };
if (prepare_precalc_flags(cp))
return(-1);
xform_distrib = flam3_create_xform_distrib(cp);
if (xform_distrib==NULL)
return(-1);
- bv=flam3_iterate(cp, nsamples, 20, points, xform_distrib, rc);
+ bv=flam3_iterate(cp, nsamples, 20, start, points, xform_distrib, rc);
free(xform_distrib);
if ( bv/(double)nsamples > eps )
@@ -3561,7 +3557,7 @@ int flam3_estimate_bounding_box(flam3_genome *cp, double eps, int nsamples,
max[0] = max[1] = -1e10;
for (i = 0; i < nsamples; i++) {
- double *p = &points[4*i];
+ const double4 p = points[i];
if (p[0] < min[0]) min[0] = p[0];
if (p[1] < min[1]) min[1] = p[1];
if (p[0] > max[0]) max[0] = p[0];
@@ -3577,13 +3573,13 @@ int flam3_estimate_bounding_box(flam3_genome *cp, double eps, int nsamples,
return(bv);
}
- qsort(points, nsamples, sizeof(double) * 4, sort_by_x);
- bmin[0] = points[4 * low_target];
- bmax[0] = points[4 * high_target];
+ qsort(points, nsamples, sizeof(double4), sort_by_x);
+ bmin[0] = points[low_target][0];
+ bmax[0] = points[high_target][0];
- qsort(points, nsamples, sizeof(double) * 4, sort_by_y);
- bmin[1] = points[4 * low_target + 1];
- bmax[1] = points[4 * high_target + 1];
+ qsort(points, nsamples, sizeof(double4), sort_by_y);
+ bmin[1] = points[low_target][1];
+ bmax[1] = points[high_target][1];
free(points);
return(bv);
@@ -3895,7 +3891,7 @@ void flam3_srandom() {
srandom(seed);
}
-
+#if 0
/* correlation dimension, after clint sprott.
computes slope of the correlation sum at a size scale
the order of 2% the size of the attractor or the camera. */
@@ -3941,20 +3937,17 @@ double flam3_dimension(flam3_genome *cp, int ntries, int clip_to_camera) {
got = 0;
nclipped = 0;
+ double4 *subb = malloc (sizeof (*subb) * SBS);
while (got < 2*ntries) {
- double subb[40000];
int i4, clipped;
unsigned short *xform_distrib;
- subb[0] = flam3_random_isaac_11(&rc);
- subb[1] = flam3_random_isaac_11(&rc);
- subb[2] = 0.0;
- subb[3] = 0.0;
+ const double4 start = (double4) { flam3_random_isaac_11(&rc), flam3_random_isaac_11(&rc), 0.0, 0.0 };
if (prepare_precalc_flags(cp))
return(-1.0);
xform_distrib = flam3_create_xform_distrib(cp);
if (xform_distrib==NULL)
return(-1.0);
- flam3_iterate(cp, SBS, 20, subb, xform_distrib, &rc);
+ flam3_iterate(cp, SBS, 20, start, subb, xform_distrib, &rc);
free(xform_distrib);
i4 = 0;
for (i = 0; i < SBS; i++) {
@@ -3973,12 +3966,14 @@ double flam3_dimension(flam3_genome *cp, int ntries, int clip_to_camera) {
if (nclipped > 10 * ntries) {
fprintf(stderr, "warning: too much clipping, "
"flam3_dimension giving up.\n");
+ free (subb);
return sqrt(-1.0);
}
}
i4 += 4;
}
}
+ free (subb);
if (0)
fprintf(stderr, "cliprate=%g\n", nclipped/(ntries+(double)nclipped));
@@ -4009,7 +4004,9 @@ double flam3_dimension(flam3_genome *cp, int ntries, int clip_to_camera) {
free(hist);
return fd;
}
+#endif
+#if 0
double flam3_lyapunov(flam3_genome *cp, int ntries) {
double p[4];
double x, y;
@@ -4100,4 +4097,5 @@ double flam3_lyapunov(flam3_genome *cp, int ntries) {
}
return sum/(log(2.0)*ntries);
}
+#endif
diff --git a/flam3.h b/flam3.h
index 4135701..611cb22 100644
--- a/flam3.h
+++ b/flam3.h
@@ -549,8 +549,7 @@ void clear_cp(flam3_genome *cp, int def_flag);
(samples[2], samples[3]) as starting color coordinate,
perform fuse iterations and throw them away, then perform
nsamples iterations and save them in the samples array */
-EXPORT int flam3_iterate(flam3_genome *g, int nsamples, int fuse, double *samples,
- unsigned short *xform_distrib, randctx *rc);
+EXPORT int flam3_iterate(flam3_genome *cp, int n, int fuse, const double4 in, double4 *samples, unsigned short *xform_distrib, randctx *rc);
void apply_motion_parameters(flam3_xform *xf, flam3_xform *addto, double blend);
diff --git a/private.h b/private.h
index f34599b..c33a099 100644
--- a/private.h
+++ b/private.h
@@ -102,7 +102,7 @@ typedef struct {
typedef struct {
- double *iter_storage; /* Storage for iteration coordinates */
+ double4 *iter_storage; /* Storage for iteration coordinates */
randctx rc; /* Thread-unique ISAAC seed */
flam3_genome cp; /* Full copy of genome for use by the thread */
int first_thread;
diff --git a/rect.c b/rect.c
index 0b18993..30d76fd 100644
--- a/rect.c
+++ b/rect.c
@@ -390,13 +390,15 @@ static void iter_thread(void *fth) {
}
/* Seed iterations */
- fthp->iter_storage[0] = flam3_random_isaac_11(&(fthp->rc));
- fthp->iter_storage[1] = flam3_random_isaac_11(&(fthp->rc));
- fthp->iter_storage[2] = flam3_random_isaac_01(&(fthp->rc));
- fthp->iter_storage[3] = flam3_random_isaac_01(&(fthp->rc));
+ const double4 start = (double4) {
+ flam3_random_isaac_11(&(fthp->rc)),
+ flam3_random_isaac_11(&(fthp->rc)),
+ flam3_random_isaac_01(&(fthp->rc)),
+ flam3_random_isaac_01(&(fthp->rc)),
+ };
/* Execute iterations */
- badcount = flam3_iterate(&(fthp->cp), sub_batch_size, fuse, fthp->iter_storage, ficp->xform_distrib, &(fthp->rc));
+ badcount = flam3_iterate(&(fthp->cp), sub_batch_size, fuse, start, fthp->iter_storage, ficp->xform_distrib, &(fthp->rc));
#if defined(HAVE_LIBPTHREAD) && defined(USE_LOCKS)
/* Lock mutex for access to accumulator */
@@ -407,12 +409,12 @@ static void iter_thread(void *fth) {
ficp->badvals += badcount;
/* Put them in the bucket accumulator */
- for (j = 0; j < sub_batch_size*4; j+=4) {
+ for (j = 0; j < sub_batch_size; j++) {
double p0, p1, p00, p11;
double dbl_index0,dbl_frac;
double interpcolor[4];
int ci, color_index0;
- double *p = &(fthp->iter_storage[j]);
+ const double4 p = fthp->iter_storage[j];
bucket *b;
if (fthp->cp.rotate != 0.0) {
@@ -532,7 +534,7 @@ static int render_rectangle(flam3_frame *spec, void *out,
double nsamples, batch_size;
bucket *buckets;
abucket *accumulate;
- double *points;
+ double4 *points;
double *filter, *temporal_filter, *temporal_deltas, *batch_filter;
double ppux=0, ppuy=0;
int image_width, image_height; /* size of the image to produce */
@@ -698,7 +700,7 @@ static int render_rectangle(flam3_frame *spec, void *out,
/* Just free buckets at the end */
buckets = (bucket *) last_block;
accumulate = (abucket *) (last_block + sizeof(bucket) * nbuckets);
- points = (double *) (last_block + (sizeof(bucket) + sizeof(abucket)) * nbuckets);
+ points = (double4 *) (last_block + (sizeof(bucket) + sizeof(abucket)) * nbuckets);
if (verbose) {
fprintf(stderr, "chaos: ");
@@ -874,7 +876,7 @@ static int render_rectangle(flam3_frame *spec, void *out,
fth[thi].timer_initialize = 0;
}
- fth[thi].iter_storage = &(points[thi*(spec->sub_batch_size)*4]);
+ fth[thi].iter_storage = &(points[thi*spec->sub_batch_size]);
fth[thi].fic = &fic;
flam3_copy(&(fth[thi].cp),&cp);