7 files changed, 166 insertions, 91 deletions
diff --git a/gen.sh b/gen.sh
index 3d83839..340bfe1 100755
--- a/gen.sh
+++ b/gen.sh
@@ -9,6 +9,7 @@ cat <<EOF
 ### auto-generated by gen.sh. Do not edit. ###
 
 ### settings ###
+datadir=lulua/data
 corpusdir=corpus
 statsdir=stats
 docdir=doc
@@ -83,20 +84,17 @@ rule mkdir
 rule letterfreq
     command = lulua-analyze -l ar-lulua letterfreq < \$in > \$out
 
-rule analyze-fingerhand
-    command = lulua-analyze -l \$layout fingerhand < \$in > \$out
+rule analyze-layoutstats
+    command = lulua-analyze -l \$layout layoutstats < \$in > \$out
 
 rule analyze-corpusstats
     command = lulua-analyze -l ar-lulua corpusstats \$metadata < \$stats > \$out
 
-rule analyze-corpushtml
-    command = cat \$in | lulua-analyze -l ar-lulua corpushtml > \$out
-
 rule wordlist
     command = lulua-analyze -l ar-lulua latinime < \$in > \$out
 
-rule html
-    command = m4 -I \$docdir/_temp \$template > \$out
+rule report
+    command = lulua-report -c \$corpus -l \$layoutstats > \$out
 
 rule cp
     command = cp \$in \$out
@@ -121,8 +119,8 @@ build \$docdir/_build: mkdir
 build \$docdir/_build/fonts: mkdir
 build \$docdir/_temp: mkdir
 build \$docdir/_build/letterfreq.json: letterfreq \$statsdir/ar-lulua/all.pickle || \$docdir/_build
-build \$docdir/_build/style.css: cp \$docdir/style.css || \$docdir/_build
-build \$docdir/_build/lulua-logo.svg: cp \$docdir/lulua-logo.svg || \$docdir/_build
+build \$docdir/_build/style.css: cp \$datadir/report/style.css || \$docdir/_build
+build \$docdir/_build/lulua-logo.svg: cp \$datadir/report/lulua-logo.svg || \$docdir/_build
 # wordlist
 build \$docdir/_temp/lulua.combined: wordlist \$statsdir/ar-lulua/all.pickle || \$docdir/_temp
 build \$docdir/_build/lulua.combined.gz: gz \$docdir/_temp/lulua.combined || \$docdir/_build
@@ -191,12 +189,12 @@ build \$docdir/_temp/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle ||
 build \$docdir/_build/${l}-heat.svg: render-svg-heat \$docdir/_temp/${l}-heat.yaml || \$docdir/_build
     layout = ${l}
 
-build \$docdir/_temp/${l}-fingerhand.html: analyze-fingerhand \$statsdir/${l}/all.pickle || \$docdir/_temp
+build \$docdir/_temp/${l}-layoutstats.pickle: analyze-layoutstats \$statsdir/${l}/all.pickle || \$docdir/_temp
     layout = ${l}
 
 EOF
 # included by index.html and thus must be its dependencies
-fingerhandfiles+=" \$docdir/_temp/${l}-fingerhand.html"
+layoutstatsfiles+=" \$docdir/_temp/${l}-layoutstats.pickle"
 done
 
 # layouts with xmodmap support
@@ -209,7 +207,7 @@ EOF
 done
 
 # statistics for each corpus (ar-lulua) and html rendering
-outfiles=""
+metafiles=""
 for c in $corpora; do
 cat <<EOF
 build \$docdir/_temp/metadata-$c.yaml: analyze-corpusstats \$statsdir/ar-lulua/$c.pickle \$corpusdir/$c/metadata.yaml || \$docdir/_temp \$corpusdir/$c/metadata.yaml
@@ -217,18 +215,14 @@ build \$docdir/_temp/metadata-$c.yaml: analyze-corpusstats \$statsdir/ar-lulua/$
     stats = \$statsdir/ar-lulua/$c.pickle
 
 EOF
-outfiles+=" \$docdir/_temp/metadata-$c.yaml"
+metafiles+=" \$docdir/_temp/metadata-$c.yaml"
 done
 
+# dependencies are not properly modeled, always rebuild
 cat <<EOF
-build \$docdir/_temp/corpus.html: analyze-corpushtml $outfiles || \$docdir/_temp
-
-EOF
-
-# html, which depends on several other files generated above
-cat <<EOF
-build \$docdir/_build/index.html: html \$docdir/index.html \$docdir/_temp/corpus.html $fingerhandfiles || \$docdir/_build
-    template = \$docdir/index.html
-
+build always: phony
+build \$docdir/_build/index.html: report | always || \$docdir/_build $metafiles $layoutstatsfiles
+    corpus = $metafiles
+    layoutstats = $layoutstatsfiles
 EOF
 
diff --git a/doc/index.html b/lulua/data/report/index.html
index cc5c69f..5649fab 100644
--- a/doc/index.html
+++ b/lulua/data/report/index.html
@@ -9,7 +9,13 @@
 	<link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet">
 	<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/pure-min.css" integrity="sha384-oAOxQR6DkCoMliIh8yFnu25d7Eq/PHS21PClpwjOTeU2jRSq11vu66rf90/cZr47" crossorigin="anonymous">
 	<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css">
-	<script src="https://cdn.pydata.org/bokeh/release/bokeh-1.3.4.min.js"></script>
+	{# bokeh #}
+	{% for f in bokehres.js_files -%}
+		<script src="{{ f }}"></script>
+	{%- endfor %}
+	{% for f in bokehres.css_files -%}
+		<link rel="stylesheet" href="{{ f }}">
+	{%- endfor %}
 	<link rel="stylesheet" href="style.css">
 </head>
 <body>
@@ -126,7 +132,32 @@
 		The corpus used for the following analysis consists of
 		</p>
 
-		include(`corpus.html')
+		<table class="pure-table"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody>
+		{% for c in corpus|sort(attribute='source.name') %}
+			<tr>
+			<td><a href="{{ c.source.url }}">{{ c.source.name }}</a></td>
+			{% set count = c.get ('count') %}
+			{% if count %}
+				{# use new style formatting, for some reason %7,d does not work #}
+				<td>{{ '{:7,d}'.format(count[0])|numspace }}&#x202f;{{ count[1] }}</td>
+			{% else %}
+				<td></td>
+			{% endif %}
+
+			{% set stats = c.get ('stats') %}
+			{% for k in ('words', 'characters') %}
+				{% set i = stats[k]|approx %}
+				<td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+			{% endfor %}
+			</tr>
+		{% endfor %}
+		<tr><td>Total</td><td></td>
+		{% for k in ('words', 'characters') %}
+			{% set i = corpustotal[k]|approx %}
+			<td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+		{% endfor %}
+		</tr>
+		</tbody></table>
 
 		<p>
 		The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be
@@ -175,14 +206,39 @@
 			<dt class="finger thumb">cyan</dt>
 			<dd>thumb</dd>
 		</dl>
+		<p>Asymmetry is defined as the difference between left and right hand usage.</p>
 	</div>
 	</div>
 </div>
 
+{% macro fingerhandstats(stats) %}
+{% set hands = stats.hands %}
+{% set fingers = stats.fingers %}
+<div class="fingerhandstats" dir="ltr" lang="en">
+{% for hand in Direction %}
+	{% set handpct = hands[hand]/stats.buttonPresses*100 %}
+	<div class="{{ hand.name.lower() }}" style="width: {{ '%.3f'|format(handpct) }}%;">
+	<div class="hand">{{ '%.2f'|format(handpct) }}%</div>
+	<div class="fingers">
+	{% for finger in fingerOrder[hand] %}
+		{% set fingerpct = fingers[(hand, finger)]/stats.buttonPresses*100 %}
+		{# finger width is relative to parent (i.e. hand) #}
+		{% set fingerwidth = fingers[(hand, finger)]/hands[hand]*100 %}
+		<div class="{{ finger.name.lower() }}" style="width: {{ '%.3f'|format(fingerwidth) }}%;">{{ '%.2f'|format(fingerpct) }}</div>
+	{% endfor %}
+	</div>
+	</div>
+	{% if loop.first %}
+		<div class="asymm"><small>Asymmetry: {{ '%.3f'|format(stats.asymmetry) }}</small></div>
+	{% endif %}
+{% endfor %}
+</div>
+{% endmacro %}
+
 <figure id="ar-lulua-heat">
 <div class="lbox" lang="en">
 	<img src="ar-lulua-heat.svg">
-	include(`ar-lulua-fingerhand.html')
+	{{ fingerhandstats(layoutstats['ar-lulua']) }}
 </div>
 </figure>
 </section>
@@ -201,7 +257,7 @@
 	<figure id="ar-asmo663">
 	<div class="lbox">
 		<img src="ar-asmo663-heat.svg">
-		include(`ar-asmo663-fingerhand.html')
+		{{ fingerhandstats(layoutstats['ar-asmo663']) }}
 	</div>
 	<figcaption class="pure-g flexreverse">
 		<div class="pure-u-1 pure-u-md-1-2">
@@ -226,7 +282,7 @@
 	<figure id="ar-linux">
 	<div class="lbox">
 		<img src="ar-linux-heat.svg">
-		include(`ar-linux-fingerhand.html')
+		{{ fingerhandstats(layoutstats['ar-linux']) }}
 	</div>
 	<figcaption class="pure-g flexreverse">
 		<div class="pure-u-1 pure-u-md-1-2">
@@ -251,7 +307,7 @@
 	<figure id="ar-malas">
 	<div class="lbox">
 		<img src="ar-malas-heat.svg">
-		include(`ar-malas-fingerhand.html')
+		{{ fingerhandstats(layoutstats['ar-malas']) }}
 	</div>
 	<figcaption class="pure-g flexreverse">
 		<div class="pure-u-1 pure-u-md-1-2">
@@ -282,7 +338,7 @@
 	<figure id="ar-osman">
 	<div class="lbox">
 		<img src="ar-osman-heat.svg">
-		include(`ar-osman-fingerhand.html')
+		{{ fingerhandstats(layoutstats['ar-osman']) }}
 	</div>
 	<figcaption class="pure-g flexreverse">
 		<div class="pure-u-1 pure-u-md-1-2">
@@ -315,7 +371,7 @@
 	<figure>
 	<div class="lbox">
 		<img src="ar-khorshid-heat.svg">
-		include(`ar-khorshid-fingerhand.html')
+		{{ fingerhandstats(layoutstats['ar-khorshid']) }}
 	</div>
 	<figcaption class="pure-g flexreverse">
 		<div class="pure-u-1 pure-u-md-1-2">
@@ -345,7 +401,7 @@
 	<figure>
 	<div class="lbox">
 		<img src="ar-phonetic-heat.svg">
-		include(`ar-phonetic-fingerhand.html')
+		{{ fingerhandstats(layoutstats['ar-phonetic']) }}
 	</div>
 	<figcaption class="pure-g flexreverse">
 		<div class="pure-u-1 pure-u-md-1-2">
diff --git a/doc/lulua-logo.svg b/lulua/data/report/lulua-logo.svg
index 20136c0..20136c0 100644
--- a/doc/lulua-logo.svg
+++ b/lulua/data/report/lulua-logo.svg
diff --git a/doc/style.css b/lulua/data/report/style.css
index 3d8e482..26b2e96 100644
--- a/doc/style.css
+++ b/lulua/data/report/style.css
@@ -147,6 +147,9 @@ div.fingerhandstats .fingers .thumb {
 	border: 0.1em solid var(--finger-thumb);
 }
 
+table {
+	font-variant-numeric: tabular-nums;
+}
 .pure-table td.numint {
 	text-align: right;
 	padding-right: 0;
diff --git a/lulua/report.py b/lulua/report.py
new file mode 100644
index 0000000..200bb9b
--- /dev/null
+++ b/lulua/report.py
@@ -0,0 +1,64 @@
+import sys, argparse, logging, pickle
+from gettext import GNUTranslations, NullTranslations
+from decimal import Decimal
+
+import yaml
+from jinja2 import Environment, PackageLoader
+from bokeh.resources import CDN as bokehres
+
+from .layout import LEFT, RIGHT, Direction, FingerType
+
+def approx (i):
+    """ Get approximate human-readable string for large number """
+
+    units = ['', 'thousand', 'million', 'billion']
+    base = Decimal (1000)
+    i = Decimal (i)
+    while i >= base and len (units) > 1:
+        i /= base
+        units.pop (0)
+    return round (i, 1), units[0]
+
+def numspace (s):
+    """ Replace ordinary spaces with unicode FIGURE SPACE """
+    return s.replace (' ', '\u2007')
+
+def render ():
+    parser = argparse.ArgumentParser(description='Create lulua report.')
+    parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files')
+    parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files')
+    logging.basicConfig (level=logging.INFO)
+    args = parser.parse_args()
+
+    env = Environment (
+            loader=PackageLoader (__package__, 'data/report'),
+            )
+    env.filters['approx'] = approx
+    env.filters['numspace'] = numspace
+
+    corpus = []
+    for x in args.corpus:
+        with open (x) as fd:
+            corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd)))
+    layoutstats = {}
+    for x in args.layoutstats:
+        with open (x, 'rb') as fd:
+            d = pickle.load (fd)
+            layoutstats[d['layout']] = d
+
+    corpustotal = {}
+    for k in ('words', 'characters'):
+        corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus))
+
+    tpl = env.get_template('index.html')
+
+    tpl.stream (
+            corpus=corpus,
+            corpustotal=corpustotal,
+            layoutstats=layoutstats,
+            bokehres=bokehres,
+            # XXX: not sure how to expose these properly to the template
+            fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))},
+            Direction=Direction,
+            ).dump (sys.stdout)
+
diff --git a/lulua/stats.py b/lulua/stats.py
index 80c269b..13d878b 100644
--- a/lulua/stats.py
+++ b/lulua/stats.py
@@ -22,7 +22,6 @@ import sys, operator, pickle, argparse, logging, yaml, math, time
 from operator import itemgetter
 from itertools import chain, groupby, product
 from collections import defaultdict
-from decimal import Decimal
 
 from .layout import *
 from .keyboard import defaultKeyboards
@@ -313,7 +312,7 @@ def keyHeatmap (args):
         buttons[k.name] = v
     yaml.dump (data, sys.stdout)
 
-def fingerHand (args):
+def layoutstats (args):
     stats = pickle.load (sys.stdin.buffer)
 
     keyboard = defaultKeyboards[args.keyboard]
@@ -328,19 +327,14 @@ def fingerHand (args):
         hands[hand] += count
         fingers[(hand, finger)] += count
 
-    print ('<div class="fingerhandstats" dir="ltr" lang="en">')
-    fingerOrder = {LEFT: list (FingerType), RIGHT: reversed (FingerType)}
-    for hand in Direction:
-        handpct = hands[hand]/buttonPresses*100
-        print (f'<div class="{hand.name.lower()}" style="width: {handpct:.3f}%;">\n\t<div class="hand">{handpct:.2f}%</div>')
-        print ('\t<div class="fingers">')
-        for finger in fingerOrder[hand]:
-            fingerpct = fingers[(hand, finger)]/buttonPresses*100
-            # finger width is relative to parent (i.e. hand)
-            fingerwidth = fingers[(hand, finger)]/hands[hand]*100
-            print (f'\t\t<div class="{finger.name.lower()}" style="width: {fingerwidth:.3f}%;">{fingerpct:.2f}</div>')
-        print ('\t</div>\n\t</div>')
-    print ('</div>')
+    asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses
+    pickle.dump (dict (
+            layout=args.layout,
+            hands=dict (hands),
+            fingers=dict (fingers),
+            buttonPresses=buttonPresses,
+            asymmetry=asymmetry,
+            ), sys.stdout.buffer)
 
 def latinImeDict (args):
     """
@@ -379,46 +373,6 @@ def corpusStats (args):
     # make document concatable
     print ('---')
 
-def approx (i):
-    """ Get approximate human-readable string for large number """
-
-    units = ['', 'thousand', 'million', 'billion']
-    base = Decimal (1000)
-    i = Decimal (i)
-    while i >= base and len (units) > 1:
-        i /= base
-        units.pop (0)
-    i = round (i, 1)
-    return int (i), int (i%1*10), units[0]
-
-def corpusHtml (args):
-    meta = list (filter (lambda x: x is not None, yaml.safe_load_all (sys.stdin)))
-    total = {'words': 0, 'characters': 0}
-    print ('<table class="pure-table"><thead><tr><th>Source</th><th colspan="2"></th><th colspan="2">Words</th><th colspan="2">Characters</th></thead><tbody>')
-    for c in sorted (meta, key=lambda x: x['source']['name'].lower ()):
-        print ('<tr>')
-        print (f'<td><a href="{c["source"]["url"]}">{c["source"]["name"]}</a></td>')
-        count = c.get ('count')
-        if count:
-            print (f'<td class="numint">{count[0]//1000:d},</td><td class="numfrac">{count[0]%1000:03d}\u202f{count[1]}</td>')
-        else:
-            print ('<td class="numint"></td><td class="numfrac"></td>')
-
-        stats = c.get ('stats')
-        for k in ('words', 'characters'):
-            i = approx (stats[k])
-            print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>')
-        print ('</tr>')
-
-        for k in ('words', 'characters'):
-            total[k] += c['stats'][k]
-    print ('<tr><td>Total</td><td class="numint"></td><td class="numfrac"></td>')
-    for k in ('words', 'characters'):
-        i = approx (total[k])
-        print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>')
-    print ('</tr>')
-    print ('</tbody></table>')
-
 def main ():
     parser = argparse.ArgumentParser(description='Process statistics files.')
     parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name')
@@ -439,15 +393,13 @@ def main ():
     sp.set_defaults (func=triadfreq)
     sp = subparsers.add_parser('keyheatmap')
     sp.set_defaults (func=keyHeatmap)
-    sp = subparsers.add_parser('fingerhand')
-    sp.set_defaults (func=fingerHand)
+    sp = subparsers.add_parser('layoutstats')
+    sp.set_defaults (func=layoutstats)
     sp = subparsers.add_parser('latinime')
     sp.set_defaults (func=latinImeDict)
     sp = subparsers.add_parser('corpusstats')
     sp.add_argument('metadata', type=argparse.FileType ('r'))
     sp.set_defaults (func=corpusStats)
-    sp = subparsers.add_parser('corpushtml')
-    sp.set_defaults (func=corpusHtml)
 
     logging.basicConfig (level=logging.INFO)
     args = parser.parse_args()
diff --git a/setup.py b/setup.py
index 2e20067..4134766 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@ setup(
     version='0.1dev0',
     author='Lars-Dominik Braun',
     author_email='lars+lulua@6xq.net',
-    #url='https://6xq.net/crocoite/',
+    url='https://6xq.net/lulua/',
     packages=['lulua'],
     license='LICENSE.txt',
     description='Keyboard layout optimization',
@@ -39,16 +39,22 @@ setup(
         'tqdm',
         'html5lib',
         'ebooklib',
+        'jinja2',
     ],
     entry_points={
     'console_scripts': [
             'lulua-analyze = lulua.stats:main',
             'lulua-render = lulua.render:render',
+            'lulua-report = lulua.report:render',
             'lulua-import = lulua.layout:importFrom',
             'lulua-optimize = lulua.optimize:optimize',
             'lulua-write = lulua.text:write',
             ],
     },
+    package_data = {
+        'lulua': ['data/*', 'data/keyboards/*.yaml', 'data/layouts/*.yaml',
+                'data/report/*', 'data/winkbd/*'],
+        },
     setup_requires=['pytest-runner'],
     tests_require=["pytest", 'pytest-cov'],
     python_requires='>=3.6',