summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2015-02-22 15:17:24 +0100
committerLars-Dominik Braun <lars@6xq.net>2015-05-02 21:36:45 +0200
commit9e3c0bf4512865b7b5cd0ef2f46ac79fa12b4e86 (patch)
tree6b5c1d13f05a37bed0f8d39e8c3bf5c21126a178
parent0d9a396fa7d6e72cfd2157e0e47f00d08813f64f (diff)
downloadpucket-9e3c0bf4512865b7b5cd0ef2f46ac79fa12b4e86.tar.gz
pucket-9e3c0bf4512865b7b5cd0ef2f46ac79fa12b4e86.tar.bz2
pucket-9e3c0bf4512865b7b5cd0ef2f46ac79fa12b4e86.zip
Prefetch bucket accumulator
The bucket add is a hotspot.
-rw-r--r--rect.c24
-rw-r--r--wscript2
2 files changed, 14 insertions, 12 deletions
diff --git a/rect.c b/rect.c
index 0cea6ec..fb8c550 100644
--- a/rect.c
+++ b/rect.c
@@ -228,25 +228,27 @@ static void iter_thread(void *fth) {
p[1] = rotatedp[1];
}
+ /* Skip if out of bounding box or invisible */
if (p[0] >= ficp->bounds[0] && p[1] >= ficp->bounds[1] &&
- p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3]) {
-
- double logvis=1.0;
-
- /* Skip if invisible */
- if (p[3]==0)
- continue;
- else
- logvis = p[3];
+ p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3] &&
+ p[3] > 0) {
+ const size_t ix = (int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1);
+#if HAVE_BUILTIN_PREFETCH
+ /* prefetch for reading (0) with no locality (0). This (partially)
+ * hides the load latency for the += operation at the end of this
+ * block */
+ __builtin_prefetch (&ficp->buckets[ix], 0, 0);
+#endif
double4 interpcolor = color_palette_lookup (p[2],
fthp->cp.palette_mode, ficp->dmap, cmap_size);
- if (p[3]!=1.0) {
+ const double logvis = p[3];
+ if (logvis != 1.0) {
interpcolor *= logvis;
}
- ficp->buckets[(int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1)] += interpcolor;
+ ficp->buckets[ix] += interpcolor;
}
}
diff --git a/wscript b/wscript
index 6ba36c4..d804ab1 100644
--- a/wscript
+++ b/wscript
@@ -15,7 +15,7 @@ def configure(conf):
conf.check_cc (lib='amdlibm', header_name='amdlibm.h', mandatory=False, define_name='HAVE_AMDLIBM', uselib_store='amdlibm')
# does not work
- #conf.check_cc (function_name='__builtin_ia32_rdrand64_step', define_name='HAVE_RDRAND64')
+ conf.check_cc (function_name='__builtin_prefetch', define_name='HAVE_BUILTIN_PREFETCH')
conf.write_config_header ('config.h')
def build(bld):