From 334e82a0a23db8a5c0816756021611bfffe2fa26 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Thu, 12 Feb 2015 15:43:26 +0100 Subject: Vectorize c and post matrices as well apply_xform --- flam3.c | 25 +++++++++-------------- flam3.h | 8 ++++---- interpolation.c | 27 ++++++++++--------------- interpolation.h | 8 ++++---- variations.c | 63 ++++++++++++++++++++++++++++----------------------------- variations.h | 2 +- vector.h | 5 +++++ wscript | 2 +- 8 files changed, 66 insertions(+), 74 deletions(-) create mode 100644 vector.h diff --git a/flam3.c b/flam3.c index 928aed0..6eaedcb 100644 --- a/flam3.c +++ b/flam3.c @@ -229,16 +229,13 @@ int flam3_create_chaos_distrib(flam3_genome *cp, int xi, unsigned short *xform_d int flam3_iterate(flam3_genome *cp, int n, int fuse, double *samples, unsigned short *xform_distrib, randctx *rc) { int i; - double p[4], q[4]; + double4 p, q; int consec = 0; int badvals = 0; int lastxf=0; int fn; - p[0] = samples[0]; - p[1] = samples[1]; - p[2] = samples[2]; - p[3] = samples[3]; + p = (double4) { samples[0], samples[1], samples[2], samples[3] }; /* Perform precalculations */ for (i=0;inum_xforms;i++) @@ -252,14 +249,11 @@ int flam3_iterate(flam3_genome *cp, int n, int fuse, double *samples, unsigned else fn = xform_distrib[ xorshift_step(rc) & CHOOSE_XFORM_GRAIN_M1 ]; - if (apply_xform(cp, fn, p, q, rc)>0) { + if (apply_xform(cp, fn, p, &q, rc)>0) { consec ++; badvals ++; if (consec<5) { - p[0] = q[0]; - p[1] = q[1]; - p[2] = q[2]; - p[3] = q[3]; + p = q; i -= 4; continue; } else @@ -270,17 +264,14 @@ int flam3_iterate(flam3_genome *cp, int n, int fuse, double *samples, unsigned /* Store the last used transform */ lastxf = fn+1; - p[0] = q[0]; - p[1] = q[1]; - p[2] = q[2]; - p[3] = q[3]; + p = q; if (cp->final_xform_enable == 1) { if (cp->xform[cp->final_xform_index].opacity==1 || flam3_random_isaac_01(rc)xform[cp->final_xform_index].opacity) { - apply_xform(cp, cp->final_xform_index, p, q, rc); + apply_xform(cp, cp->final_xform_index, p, &q, rc); /* Keep the opacity from the original xform */ - q[3] = p[3]; + q = (double4) { q[0], q[1], q[2], p[3] }; } } @@ -296,6 +287,7 @@ int flam3_iterate(flam3_genome *cp, int n, int fuse, double *samples, unsigned return(badvals); } +#if 0 int flam3_xform_preview(flam3_genome *cp, int xi, double range, int numvals, int depth, double *result, randctx *rc) { /* We will evaluate the 'xi'th xform 'depth' times, over the following values: */ @@ -345,6 +337,7 @@ int flam3_xform_preview(flam3_genome *cp, int xi, double range, int numvals, int return(0); } +#endif int flam3_colorhist(flam3_genome *cp, int num_batches, randctx *rc, double *hist) { diff --git a/flam3.h b/flam3.h index a66e40e..4135701 100644 --- a/flam3.h +++ b/flam3.h @@ -207,11 +207,12 @@ typedef struct { } flam3_image_store; +#include "vector.h" typedef struct xform { double var[flam3_nvariations]; /* interp coefs between variations */ - double c[3][2]; /* the coefs to the affine part of the function */ - double post[3][2]; /* the post transform */ + double2 c[3]; /* the coefs to the affine part of the function */ + double2 post[3]; /* the post transform */ double density; /* probability that this function is chosen. 0 - 1 */ double color; /* color coords for this function. 0 - 1 */ double color_speed; /* scaling factor on color added to current iteration */ @@ -420,8 +421,7 @@ typedef struct xform { double radialBlur_zoomvar; /* Precalculate these values for waves */ - double waves_dx2; - double waves_dy2; + double2 waves_d2; /* If disc2 is used, precalculate these values */ double disc2_sinadd; diff --git a/interpolation.c b/interpolation.c index 0ab1c54..4bef70f 100644 --- a/interpolation.c +++ b/interpolation.c @@ -57,7 +57,7 @@ double det_matrix(double s[2][2]) { return s[0][0] * s[1][1] - s[0][1] * s[1][0]; } -int id_matrix(double s[3][2]) { +int id_matrix(double2 s[3]) { return (s[0][0] == 1.0) && (s[0][1] == 0.0) && @@ -67,7 +67,7 @@ int id_matrix(double s[3][2]) { (s[2][1] == 0.0); } -int zero_matrix(double s[3][2]) { +int zero_matrix(double2 s[3]) { return (s[0][0] == 0.0) && (s[0][1] == 0.0) && @@ -88,23 +88,18 @@ void copy_matrix(double to[3][2], double from[3][2]) { } -void clear_matrix(double m[3][2]) { - m[0][0] = 0.0; - m[0][1] = 0.0; - m[1][0] = 0.0; - m[1][1] = 0.0; - m[2][0] = 0.0; - m[2][1] = 0.0; +void clear_matrix(double2 m[3]) { + const double2 zero = (double2) { 0.0, 0.0 }; + m[0] = zero; + m[1] = zero; + m[2] = zero; } -void sum_matrix(double s, double m1[3][2], double m2[3][2]) { +void sum_matrix(double s, const double2 m1[3], double2 m2[3]) { - m2[0][0] += s * m1[0][0]; - m2[0][1] += s * m1[0][1]; - m2[1][0] += s * m1[1][0]; - m2[1][1] += s * m1[1][1]; - m2[2][0] += s * m1[2][0]; - m2[2][1] += s * m1[2][1]; + m2[0] += s * m1[0]; + m2[1] += s * m1[1]; + m2[2] += s * m1[2]; } void mult_matrix(double s1[2][2], double s2[2][2], double d[2][2]) { diff --git a/interpolation.h b/interpolation.h index 82d3133..da3dfd1 100644 --- a/interpolation.h +++ b/interpolation.h @@ -35,11 +35,11 @@ double smoother(double t); double get_stagger_coef(double t, double stagger_prc, int num_xforms, int this_xform); double det_matrix(double s[2][2]); -int id_matrix(double s[3][2]); -int zero_matrix(double s[3][2]); +int id_matrix(double2 s[3]); +int zero_matrix(double2 s[3]); void copy_matrix(double to[3][2], double from[3][2]); -void clear_matrix(double m[3][2]); -void sum_matrix(double s, double m1[3][2], double m2[3][2]); +void clear_matrix(double2 m[3]); +void sum_matrix(double s, const double2 m1[3], double2 m2[3]); void mult_matrix(double s1[2][2], double s2[2][2], double d[2][2]); int compare_xforms(const void *av, const void *bv); diff --git a/variations.c b/variations.c index 27bd52c..fd7eddb 100644 --- a/variations.c +++ b/variations.c @@ -37,8 +37,6 @@ extern void sincos(double x, double *s, double *c); #define trunc (int) #endif -typedef double double2 __attribute__ ((vector_size (sizeof (double)*2))); - typedef struct { double precalc_atan, precalc_sina; /* Precalculated, if needed */ double precalc_cosa, precalc_sqrt; @@ -405,12 +403,11 @@ static double2 var15_waves (const double2 in, const flam3_iter_helper * const f, p[0] += v * nx; p[1] += v * ny; */ - const double2 c1 = (double2) {f->xform->c[1][0], f->xform->c[1][1] }; + const double2 c1 = f->xform->c[1]; + const double2 inswap = (double2) { in[1], in[0] }; + const double2 a = inswap * f->xform->waves_d2; - const double2 n = in + c1 * (double2) { - sin( in[1] * f->xform->waves_dx2 ), - sin( in[0] * f->xform->waves_dy2 ), - }; + const double2 n = in + c1 * (double2) { sin(a[0]), sin(a[1]), }; return weight * n; } @@ -447,10 +444,7 @@ static double2 var17_popcorn (const double2 in, const flam3_iter_helper * const const double dx = tan(3.0*in[1]); const double dy = tan(3.0*in[0]); - const double2 n = in + (double2) { - f->xform->c[2][0] * sin(dx), - f->xform->c[2][1] * sin(dy) - }; + const double2 n = in + f->xform->c[2] * (double2) { sin(dx), sin(dy) }; return weight * n; } @@ -1893,11 +1887,9 @@ static void radial_blur_precalc(flam3_xform *xf) { } static void waves_precalc(flam3_xform *xf) { - double dx = xf->c[2][0]; - double dy = xf->c[2][1]; + const double2 d = xf->c[2]; - xf->waves_dx2 = 1.0/(dx * dx + EPS); - xf->waves_dy2 = 1.0/(dy * dy + EPS); + xf->waves_d2 = 1.0/(d * d + EPS); } static void disc2_precalc(flam3_xform *xf) { @@ -2051,8 +2043,17 @@ int prepare_precalc_flags(flam3_genome *cp) { return(0); } +/* Apply affine coordinate transformation + */ +static double2 apply_affine (const double2 in, const double2 matrix[3]) { + return matrix[0] * in[0] + matrix[1] * in[1] + matrix[2]; +} + +static double sum(const double2 in) { + return in[0] + in[1]; +} -int apply_xform(flam3_genome *cp, int fn, double *p, double *q, randctx *rc) +int apply_xform(flam3_genome *cp, int fn, const double4 p, double4 *q_ret, randctx *rc) { flam3_iter_helper f; int var_n; @@ -2063,22 +2064,21 @@ int apply_xform(flam3_genome *cp, int fn, double *p, double *q, randctx *rc) s1 = cp->xform[fn].color_speed; - q[2] = s1 * cp->xform[fn].color + (1.0-s1) * p[2]; - q[3] = cp->xform[fn].vis_adjusted; + const double2 q23 = (double2) { + s1 * cp->xform[fn].color + (1.0-s1) * p[2], + cp->xform[fn].vis_adjusted, + }; //fprintf(stderr,"%d : %f %f %f\n",fn,cp->xform[fn].c[0][0],cp->xform[fn].c[1][0],cp->xform[fn].c[2][0]); - const double2 t = (double2) { - cp->xform[fn].c[0][0] * p[0] + cp->xform[fn].c[1][0] * p[1] + cp->xform[fn].c[2][0], - cp->xform[fn].c[0][1] * p[0] + cp->xform[fn].c[1][1] * p[1] + cp->xform[fn].c[2][1] - }; + const double2 t = apply_affine ((double2) { p[0], p[1] }, cp->xform[fn].c); /* Pre-xforms go here, and modify the f.tx and f.ty values */ if (cp->xform[fn].has_preblur!=0.0) var67_pre_blur(t, &f, cp->xform[fn].has_preblur); /* Always calculate sumsq and sqrt */ - f.precalc_sumsq = t[0]*t[0] + t[1]*t[1]; + f.precalc_sumsq = sum(t*t); f.precalc_sqrt = sqrt(f.precalc_sumsq); /* Check to see if we can precalculate any parts */ @@ -2099,7 +2099,6 @@ int apply_xform(flam3_genome *cp, int fn, double *p, double *q, randctx *rc) f.xform = &(cp->xform[fn]); - double2 accum = (double2) {0.0, 0.0}; for (var_n=0; var_n < cp->xform[fn].num_active_vars; var_n++) { @@ -2306,22 +2305,22 @@ int apply_xform(flam3_genome *cp, int fn, double *p, double *q, randctx *rc) } } + double2 q01; /* apply the post transform */ if (cp->xform[fn].has_post) { - q[0] = cp->xform[fn].post[0][0] * accum[0] + cp->xform[fn].post[1][0] * accum[1] + cp->xform[fn].post[2][0]; - q[1] = cp->xform[fn].post[0][1] * accum[0] + cp->xform[fn].post[1][1] * accum[1] + cp->xform[fn].post[2][1]; + q01 = apply_affine (accum, cp->xform[fn].post); } else { - q[0] = accum[0]; - q[1] = accum[1]; + q01 = accum; } /* Check for badvalues and return randoms if bad */ - if (badvalue(q[0]) || badvalue(q[1])) { - q[0] = flam3_random_isaac_11(rc); - q[1] = flam3_random_isaac_11(rc); + if (badvalue(q01[0]) || badvalue(q01[1])) { + *q_ret = (double4) { flam3_random_isaac_11(rc), flam3_random_isaac_11(rc), q23[0], q23[1] }; return(1); - } else + } else { + *q_ret = (double4) { q01[0], q01[1], q23[0], q23[1] }; return(0); + } } diff --git a/variations.h b/variations.h index c606484..bc5f29e 100644 --- a/variations.h +++ b/variations.h @@ -24,6 +24,6 @@ void xform_precalc(flam3_genome *cp, int xi); int prepare_precalc_flags(flam3_genome *); -int apply_xform(flam3_genome *cp, int fn, double *p, double *q, randctx *rc); +int apply_xform(flam3_genome *cp, int fn, const double4 p, double4 *, randctx *rc); void initialize_xforms(flam3_genome *thiscp, int start_here); #endif diff --git a/vector.h b/vector.h new file mode 100644 index 0000000..7633c15 --- /dev/null +++ b/vector.h @@ -0,0 +1,5 @@ +#pragma once + +typedef double double2 __attribute__ ((vector_size (sizeof (double)*2))); +typedef double double4 __attribute__ ((vector_size (sizeof (double)*4))); + diff --git a/wscript b/wscript index 68b76f7..07c9deb 100644 --- a/wscript +++ b/wscript @@ -26,5 +26,5 @@ def build(bld): bld.program (features='c cprogram', source='flam3-render.c', target='flam3-render', use='libflam3 xml2 jpeg png amdlibm pthread', includes='.') bld.program (features='c cprogram', source='flam3-genome.c', target='flam3-genome', use='libflam3 xml2 png amdlibm pthread', includes='.') bld.program (features='c cprogram', source='flam3-animate.c', target='flam3-animate', use='libflam3 xml2 png amdlibm pthread', includes='.') - bld.program (features='c cprogram', source='flam3-convert.c', target='flam3-convert', use='libflam3 xml2 png amdlibm pthread', includes='.') + #bld.program (features='c cprogram', source='flam3-convert.c', target='flam3-convert', use='libflam3 xml2 png amdlibm pthread', includes='.') -- cgit v1.2.3