From 9e3c0bf4512865b7b5cd0ef2f46ac79fa12b4e86 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 22 Feb 2015 15:17:24 +0100 Subject: Prefetch bucket accumulator The bucket add is a hotspot. --- rect.c | 24 +++++++++++++----------- wscript | 2 +- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/rect.c b/rect.c index 0cea6ec..fb8c550 100644 --- a/rect.c +++ b/rect.c @@ -228,25 +228,27 @@ static void iter_thread(void *fth) { p[1] = rotatedp[1]; } + /* Skip if out of bounding box or invisible */ if (p[0] >= ficp->bounds[0] && p[1] >= ficp->bounds[1] && - p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3]) { - - double logvis=1.0; - - /* Skip if invisible */ - if (p[3]==0) - continue; - else - logvis = p[3]; + p[0] <= ficp->bounds[2] && p[1] <= ficp->bounds[3] && + p[3] > 0) { + const size_t ix = (int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1); +#if HAVE_BUILTIN_PREFETCH + /* prefetch for reading (0) with no locality (0). This (partially) + * hides the load latency for the += operation at the end of this + * block */ + __builtin_prefetch (&ficp->buckets[ix], 0, 0); +#endif double4 interpcolor = color_palette_lookup (p[2], fthp->cp.palette_mode, ficp->dmap, cmap_size); - if (p[3]!=1.0) { + const double logvis = p[3]; + if (logvis != 1.0) { interpcolor *= logvis; } - ficp->buckets[(int)(ficp->ws0 * p[0] - ficp->wb0s0) + ficp->width * (int)(ficp->hs1 * p[1] - ficp->hb1s1)] += interpcolor; + ficp->buckets[ix] += interpcolor; } } diff --git a/wscript b/wscript index 6ba36c4..d804ab1 100644 --- a/wscript +++ b/wscript @@ -15,7 +15,7 @@ def configure(conf): conf.check_cc (lib='amdlibm', header_name='amdlibm.h', mandatory=False, define_name='HAVE_AMDLIBM', uselib_store='amdlibm') # does not work - #conf.check_cc (function_name='__builtin_ia32_rdrand64_step', define_name='HAVE_RDRAND64') + conf.check_cc (function_name='__builtin_prefetch', define_name='HAVE_BUILTIN_PREFETCH') conf.write_config_header ('config.h') def build(bld): -- cgit v1.2.3