summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--rect.c24
-rw-r--r--wscript2
2 files changed, 14 insertions, 12 deletions
diff --git a/rect.c b/rect.c
index 0cea6ec..fb8c550 100644
--- a/rect.c
+++ b/rect.c
@@ -228,25 +228,27 @@ static void iter_thread(void *fth) {
p[1] = rotatedp[1];
}
+ /* Skip if out of bounding box or invisible */
if (p[0] >= ficp->bounds[0] && p[1] >= ficp->bounds[1] &&
- p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3]) {
-
- double logvis=1.0;
-
- /* Skip if invisible */
- if (p[3]==0)
- continue;
- else
- logvis = p[3];
+ p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3] &&
+ p[3] > 0) {
+ const size_t ix = (int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1);
+#if HAVE_BUILTIN_PREFETCH
+ /* prefetch for reading (0) with no locality (0). This (partially)
+ * hides the load latency for the += operation at the end of this
+ * block */
+ __builtin_prefetch (&ficp->buckets[ix], 0, 0);
+#endif
double4 interpcolor = color_palette_lookup (p[2],
fthp->cp.palette_mode, ficp->dmap, cmap_size);
- if (p[3]!=1.0) {
+ const double logvis = p[3];
+ if (logvis != 1.0) {
interpcolor *= logvis;
}
- ficp->buckets[(int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1)] += interpcolor;
+ ficp->buckets[ix] += interpcolor;
}
}
diff --git a/wscript b/wscript
index 6ba36c4..d804ab1 100644
--- a/wscript
+++ b/wscript
@@ -15,7 +15,7 @@ def configure(conf):
conf.check_cc (lib='amdlibm', header_name='amdlibm.h', mandatory=False, define_name='HAVE_AMDLIBM', uselib_store='amdlibm')
# does not work
- #conf.check_cc (function_name='__builtin_ia32_rdrand64_step', define_name='HAVE_RDRAND64')
+ conf.check_cc (function_name='__builtin_prefetch', define_name='HAVE_BUILTIN_PREFETCH')
conf.write_config_header ('config.h')
def build(bld):