summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2015-03-22 11:17:44 +0100
committerLars-Dominik Braun <lars@6xq.net>2015-05-02 21:36:45 +0200
commit2b86c2ba0074afe2eff0cfeb5cb1b18ca9984834 (patch)
tree9da16060a396070e32e728544005568e992d9038
parenta093dd0ee969be5150fdff459db385fbc5613452 (diff)
downloadpucket-2b86c2ba0074afe2eff0cfeb5cb1b18ca9984834.tar.gz
pucket-2b86c2ba0074afe2eff0cfeb5cb1b18ca9984834.tar.bz2
pucket-2b86c2ba0074afe2eff0cfeb5cb1b18ca9984834.zip
Use camera transform matrix
Rotation and camera transformation are a single matrix now. Speedup is negligible (~1%, depending on #xforms).
-rw-r--r--math.h26
-rw-r--r--rect.c63
2 files changed, 52 insertions, 37 deletions
diff --git a/math.h b/math.h
index fa49d09..d6e3211 100644
--- a/math.h
+++ b/math.h
@@ -27,6 +27,7 @@
#define REPLACE_WITH_AMDLIBM
#include <amdlibm.h>
#undef nearbyint
+#undef floor
#endif
#define clamp(a,min,max) (a > max ? max : (a < min ? min : a))
@@ -55,6 +56,14 @@ inline void translate (const double2 xy, double2 matrix[3]) {
matrix[2] = xy;
}
+/* Create affine scaling matrix
+ */
+inline void scale (const double2 xy, double2 matrix[3]) {
+ matrix[0] = (double2) { xy[0], 0.0 };
+ matrix[1] = (double2) { 0.0, xy[1] };
+ matrix[2] = (double2) { 0.0, 0.0 };
+}
+
/* Multiply two affine matrices a, b and store the result in c.
*
* The last row of each matrix is assumed to be 0, 0, 1.
@@ -65,6 +74,23 @@ inline void matrixmul (const double2 a[3], const double2 b[3], double2 c[3]) {
c[2] = a[0] * b[2][0] + a[1] * b[2][1] + a[2];
}
+/* Affine matrix that transforms rect from (x1, y1, x2, y2) into rect to
+ */
+inline void translate_rect (const double4 from, const double4 to,
+ double2 matrix[3]) {
+ const double2 from_edge = (double2) { from[0], from[1] },
+ to_edge = (double2) { to[0], to[1] };
+ /* first align one of A and B’s edges */
+ double2 translate_edge[3];
+ translate (to_edge - from_edge, translate_edge);
+ /* then scale it up or down */
+ double2 scale_rect[3];
+ scale ((double2) { (to[2] - to[0])/(from[2] - from[0]),
+ (to[3] - to[1])/(from[3] - from[1])}, scale_rect);
+ /* the result is scale*translate (i.e. translate first) */
+ matrixmul (scale_rect, translate_edge, matrix);
+}
+
/* Create rotation around center. Note that matrix multiplication is
* right-associative, thus A*B*C == A*(B*C) */
inline void rotate_center (const double2 center, const double angle, double2 out[3]) {
diff --git a/rect.c b/rect.c
index b964e45..1dc47b0 100644
--- a/rect.c
+++ b/rect.c
@@ -29,12 +29,7 @@ typedef struct {
double timelimit;
unsigned int sub_batch_size, fuse;
unsigned short *xform_distrib;
-
- /* camera stuff */
- double ws0, wb0s0, hs1, hb1s1; /* shortcuts for indexing */
- double bounds[4]; /* Corner coords of viewable area */
- double2 rot[3]; /* Rotation transformation */
- double ppux, ppuy;
+ double2 camera[3];
} render_constants;
/* Lookup color [0,1]
@@ -112,20 +107,18 @@ static void iter_thread (flam3_genome * const input_genome,
/* Put them in the bucket accumulator */
for (unsigned int j = 0; j < c->sub_batch_size; j++) {
- double4 p = iter_storage[j];
+ const double4 p = iter_storage[j];
- if (genome.rotate != 0.0) {
- const double2 p01 = (double2) { p[0], p[1] };
- const double2 rotatedp = apply_affine (p01, c->rot);
- p[0] = rotatedp[0];
- p[1] = rotatedp[1];
- }
+ const double2 origpos = (double2) { p[0], p[1] };
+ const double2 transpos = apply_affine (origpos, c->camera);
+ const unsigned int x = floor (transpos[0]);
+ const unsigned int y = floor (transpos[1]);
/* Skip if out of bounding box or invisible */
- if (p[0] >= c->bounds[0] && p[1] >= c->bounds[1] &&
- p[0] <= c->bounds[2] && p[1] <= c->bounds[3] &&
+ if (x >= 0 && x < bucket->dim[0] &&
+ y >= 0 && y < bucket->dim[1] &&
p[3] > 0) {
- const size_t ix = (int)(c->ws0 * p[0] - c->wb0s0) + bucket->dim[0] * (int)(c->hs1 * p[1] - c->hb1s1);
+ const size_t ix = x + bucket->dim[0] * y;
#if HAVE_BUILTIN_PREFETCH
/* prefetch for reading (0) with no locality (0). This (partially)
* hides the load latency for the += operation at the end of this
@@ -137,9 +130,7 @@ static void iter_thread (flam3_genome * const input_genome,
genome.palette_mode, &input_genome->palette);
const double logvis = p[3];
- if (logvis != 1.0) {
- interpcolor *= logvis;
- }
+ interpcolor *= logvis;
bucket->data[ix] += interpcolor;
}
@@ -264,27 +255,25 @@ static void compute_camera (const flam3_genome * const genome,
assert (bucket != NULL);
assert (c != NULL);
- double corner0, corner1;
-
const double scale = pow(2.0, genome->zoom);
- c->ppux = genome->pixels_per_unit * scale;
- c->ppuy = c->ppux;
- //ppux /= spec->pixel_aspect_ratio;
- corner0 = genome->center[0] - bucket->dim[0] / c->ppux / 2.0;
- corner1 = genome->center[1] - bucket->dim[1] / c->ppuy / 2.0;
- c->bounds[0] = corner0;
- c->bounds[1] = corner1;
- c->bounds[2] = corner0 + bucket->dim[0] / c->ppux;
- c->bounds[3] = corner1 + bucket->dim[1] / c->ppuy;
- const double size[2] = {1.0 / (c->bounds[2] - c->bounds[0]),
- 1.0 / (c->bounds[3] - c->bounds[1])};
+ const double ppux = genome->pixels_per_unit * scale;
+ const double ppuy = ppux;
+ const double corner0 = genome->center[0] - bucket->dim[0] / ppux / 2.0;
+ const double corner1 = genome->center[1] - bucket->dim[1] / ppuy / 2.0;
+
+ double2 rot_matrix[3];
rotate_center ((double2) { genome->rot_center[0], genome->rot_center[1] },
- genome->rotate, c->rot);
- c->ws0 = bucket->dim[0] * size[0];
- c->wb0s0 = c->ws0 * c->bounds[0];
- c->hs1 = bucket->dim[1] * size[1];
- c->hb1s1 = c->hs1 * c->bounds[1];
+ genome->rotate, rot_matrix);
+
+ const double4 from_rect = (double4) { corner0, corner1,
+ corner0 + bucket->dim[0] / ppux,
+ corner1 + bucket->dim[1] / ppuy };
+ const double4 to_rect = (double4) { 0, 0, bucket->dim[0], bucket->dim[1] };
+ double2 transform_matrix[3];
+ translate_rect (from_rect, to_rect, transform_matrix);
+
+ matrixmul (transform_matrix, rot_matrix, c->camera);
}
bool render_bucket (flam3_genome * const genome, bucket * const bucket,