diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2015-02-22 15:17:24 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2015-05-02 21:36:45 +0200 |
commit | 9e3c0bf4512865b7b5cd0ef2f46ac79fa12b4e86 (patch) | |
tree | 6b5c1d13f05a37bed0f8d39e8c3bf5c21126a178 | |
parent | 0d9a396fa7d6e72cfd2157e0e47f00d08813f64f (diff) | |
download | pucket-9e3c0bf4512865b7b5cd0ef2f46ac79fa12b4e86.tar.gz pucket-9e3c0bf4512865b7b5cd0ef2f46ac79fa12b4e86.tar.bz2 pucket-9e3c0bf4512865b7b5cd0ef2f46ac79fa12b4e86.zip |
Prefetch bucket accumulator
The bucket add is a hotspot.
-rw-r--r-- | rect.c | 24 | ||||
-rw-r--r-- | wscript | 2 |
2 files changed, 14 insertions, 12 deletions
@@ -228,25 +228,27 @@ static void iter_thread(void *fth) { p[1] = rotatedp[1]; } + /* Skip if out of bounding box or invisible */ if (p[0] >= ficp->bounds[0] && p[1] >= ficp->bounds[1] && - p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3]) { - - double logvis=1.0; - - /* Skip if invisible */ - if (p[3]==0) - continue; - else - logvis = p[3]; + p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3] && + p[3] > 0) { + const size_t ix = (int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1); +#if HAVE_BUILTIN_PREFETCH + /* prefetch for reading (0) with no locality (0). This (partially) + * hides the load latency for the += operation at the end of this + * block */ + __builtin_prefetch (&ficp->buckets[ix], 0, 0); +#endif double4 interpcolor = color_palette_lookup (p[2], fthp->cp.palette_mode, ficp->dmap, cmap_size); - if (p[3]!=1.0) { + const double logvis = p[3]; + if (logvis != 1.0) { interpcolor *= logvis; } - ficp->buckets[(int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1)] += interpcolor; + ficp->buckets[ix] += interpcolor; } } @@ -15,7 +15,7 @@ def configure(conf): conf.check_cc (lib='amdlibm', header_name='amdlibm.h', mandatory=False, define_name='HAVE_AMDLIBM', uselib_store='amdlibm') # does not work - #conf.check_cc (function_name='__builtin_ia32_rdrand64_step', define_name='HAVE_RDRAND64') + conf.check_cc (function_name='__builtin_prefetch', define_name='HAVE_BUILTIN_PREFETCH') conf.write_config_header ('config.h') def build(bld): |