diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2020-02-22 13:20:31 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2020-02-22 13:23:26 +0100 |
commit | 0f8643954fd9507aec85bab46046e71a497bfffe (patch) | |
tree | 28708a991bd136fd255282326e93f7588120a2e3 | |
parent | a91fc5e945b841ae54f67ed331409ad857178f13 (diff) | |
download | lulua-0f8643954fd9507aec85bab46046e71a497bfffe.tar.gz lulua-0f8643954fd9507aec85bab46046e71a497bfffe.tar.bz2 lulua-0f8643954fd9507aec85bab46046e71a497bfffe.zip |
doc: Switch to jinja2-based rendering
Pre-rendering HTML was not the best idea. Instead pre-process the data,
cache it into data files and do the HTML rendering only as the final
step.
Also adds asymmetry to analysis and uses tabular numbers and spaces
instead of ugly table hacks to align numbers.
-rwxr-xr-x | gen.sh | 38 | ||||
-rw-r--r-- | lulua/data/report/index.html (renamed from doc/index.html) | 74 | ||||
-rw-r--r-- | lulua/data/report/lulua-logo.svg (renamed from doc/lulua-logo.svg) | 0 | ||||
-rw-r--r-- | lulua/data/report/style.css (renamed from doc/style.css) | 3 | ||||
-rw-r--r-- | lulua/report.py | 64 | ||||
-rw-r--r-- | lulua/stats.py | 70 | ||||
-rw-r--r-- | setup.py | 8 |
7 files changed, 166 insertions, 91 deletions
@@ -9,6 +9,7 @@ cat <<EOF ### auto-generated by gen.sh. Do not edit. ### ### settings ### +datadir=lulua/data corpusdir=corpus statsdir=stats docdir=doc @@ -83,20 +84,17 @@ rule mkdir rule letterfreq command = lulua-analyze -l ar-lulua letterfreq < \$in > \$out -rule analyze-fingerhand - command = lulua-analyze -l \$layout fingerhand < \$in > \$out +rule analyze-layoutstats + command = lulua-analyze -l \$layout layoutstats < \$in > \$out rule analyze-corpusstats command = lulua-analyze -l ar-lulua corpusstats \$metadata < \$stats > \$out -rule analyze-corpushtml - command = cat \$in | lulua-analyze -l ar-lulua corpushtml > \$out - rule wordlist command = lulua-analyze -l ar-lulua latinime < \$in > \$out -rule html - command = m4 -I \$docdir/_temp \$template > \$out +rule report + command = lulua-report -c \$corpus -l \$layoutstats > \$out rule cp command = cp \$in \$out @@ -121,8 +119,8 @@ build \$docdir/_build: mkdir build \$docdir/_build/fonts: mkdir build \$docdir/_temp: mkdir build \$docdir/_build/letterfreq.json: letterfreq \$statsdir/ar-lulua/all.pickle || \$docdir/_build -build \$docdir/_build/style.css: cp \$docdir/style.css || \$docdir/_build -build \$docdir/_build/lulua-logo.svg: cp \$docdir/lulua-logo.svg || \$docdir/_build +build \$docdir/_build/style.css: cp \$datadir/report/style.css || \$docdir/_build +build \$docdir/_build/lulua-logo.svg: cp \$datadir/report/lulua-logo.svg || \$docdir/_build # wordlist build \$docdir/_temp/lulua.combined: wordlist \$statsdir/ar-lulua/all.pickle || \$docdir/_temp build \$docdir/_build/lulua.combined.gz: gz \$docdir/_temp/lulua.combined || \$docdir/_build @@ -191,12 +189,12 @@ build \$docdir/_temp/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle || build \$docdir/_build/${l}-heat.svg: render-svg-heat \$docdir/_temp/${l}-heat.yaml || \$docdir/_build layout = ${l} -build \$docdir/_temp/${l}-fingerhand.html: analyze-fingerhand \$statsdir/${l}/all.pickle || \$docdir/_temp +build \$docdir/_temp/${l}-layoutstats.pickle: analyze-layoutstats \$statsdir/${l}/all.pickle || \$docdir/_temp layout = ${l} EOF # included by index.html and thus must be its dependencies -fingerhandfiles+=" \$docdir/_temp/${l}-fingerhand.html" +layoutstatsfiles+=" \$docdir/_temp/${l}-layoutstats.pickle" done # layouts with xmodmap support @@ -209,7 +207,7 @@ EOF done # statistics for each corpus (ar-lulua) and html rendering -outfiles="" +metafiles="" for c in $corpora; do cat <<EOF build \$docdir/_temp/metadata-$c.yaml: analyze-corpusstats \$statsdir/ar-lulua/$c.pickle \$corpusdir/$c/metadata.yaml || \$docdir/_temp \$corpusdir/$c/metadata.yaml @@ -217,18 +215,14 @@ build \$docdir/_temp/metadata-$c.yaml: analyze-corpusstats \$statsdir/ar-lulua/$ stats = \$statsdir/ar-lulua/$c.pickle EOF -outfiles+=" \$docdir/_temp/metadata-$c.yaml" +metafiles+=" \$docdir/_temp/metadata-$c.yaml" done +# dependencies are not properly modeled, always rebuild cat <<EOF -build \$docdir/_temp/corpus.html: analyze-corpushtml $outfiles || \$docdir/_temp - -EOF - -# html, which depends on several other files generated above -cat <<EOF -build \$docdir/_build/index.html: html \$docdir/index.html \$docdir/_temp/corpus.html $fingerhandfiles || \$docdir/_build - template = \$docdir/index.html - +build always: phony +build \$docdir/_build/index.html: report | always || \$docdir/_build $metafiles $layoutstatsfiles + corpus = $metafiles + layoutstats = $layoutstatsfiles EOF diff --git a/doc/index.html b/lulua/data/report/index.html index cc5c69f..5649fab 100644 --- a/doc/index.html +++ b/lulua/data/report/index.html @@ -9,7 +9,13 @@ <link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet"> <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/pure-min.css" integrity="sha384-oAOxQR6DkCoMliIh8yFnu25d7Eq/PHS21PClpwjOTeU2jRSq11vu66rf90/cZr47" crossorigin="anonymous"> <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css"> - <script src="https://cdn.pydata.org/bokeh/release/bokeh-1.3.4.min.js"></script> + {# bokeh #} + {% for f in bokehres.js_files -%} + <script src="{{ f }}"></script> + {%- endfor %} + {% for f in bokehres.css_files -%} + <link rel="stylesheet" href="{{ f }}"> + {%- endfor %} <link rel="stylesheet" href="style.css"> </head> <body> @@ -126,7 +132,32 @@ The corpus used for the following analysis consists of </p> - include(`corpus.html') + <table class="pure-table"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody> + {% for c in corpus|sort(attribute='source.name') %} + <tr> + <td><a href="{{ c.source.url }}">{{ c.source.name }}</a></td> + {% set count = c.get ('count') %} + {% if count %} + {# use new style formatting, for some reason %7,d does not work #} + <td>{{ '{:7,d}'.format(count[0])|numspace }} {{ count[1] }}</td> + {% else %} + <td></td> + {% endif %} + + {% set stats = c.get ('stats') %} + {% for k in ('words', 'characters') %} + {% set i = stats[k]|approx %} + <td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> + {% endfor %} + </tr> + {% endfor %} + <tr><td>Total</td><td></td> + {% for k in ('words', 'characters') %} + {% set i = corpustotal[k]|approx %} + <td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> + {% endfor %} + </tr> + </tbody></table> <p> The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be @@ -175,14 +206,39 @@ <dt class="finger thumb">cyan</dt> <dd>thumb</dd> </dl> + <p>Asymmetry is defined as the difference between left and right hand usage.</p> </div> </div> </div> +{% macro fingerhandstats(stats) %} +{% set hands = stats.hands %} +{% set fingers = stats.fingers %} +<div class="fingerhandstats" dir="ltr" lang="en"> +{% for hand in Direction %} + {% set handpct = hands[hand]/stats.buttonPresses*100 %} + <div class="{{ hand.name.lower() }}" style="width: {{ '%.3f'|format(handpct) }}%;"> + <div class="hand">{{ '%.2f'|format(handpct) }}%</div> + <div class="fingers"> + {% for finger in fingerOrder[hand] %} + {% set fingerpct = fingers[(hand, finger)]/stats.buttonPresses*100 %} + {# finger width is relative to parent (i.e. hand) #} + {% set fingerwidth = fingers[(hand, finger)]/hands[hand]*100 %} + <div class="{{ finger.name.lower() }}" style="width: {{ '%.3f'|format(fingerwidth) }}%;">{{ '%.2f'|format(fingerpct) }}</div> + {% endfor %} + </div> + </div> + {% if loop.first %} + <div class="asymm"><small>Asymmetry: {{ '%.3f'|format(stats.asymmetry) }}</small></div> + {% endif %} +{% endfor %} +</div> +{% endmacro %} + <figure id="ar-lulua-heat"> <div class="lbox" lang="en"> <img src="ar-lulua-heat.svg"> - include(`ar-lulua-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-lulua']) }} </div> </figure> </section> @@ -201,7 +257,7 @@ <figure id="ar-asmo663"> <div class="lbox"> <img src="ar-asmo663-heat.svg"> - include(`ar-asmo663-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-asmo663']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -226,7 +282,7 @@ <figure id="ar-linux"> <div class="lbox"> <img src="ar-linux-heat.svg"> - include(`ar-linux-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-linux']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -251,7 +307,7 @@ <figure id="ar-malas"> <div class="lbox"> <img src="ar-malas-heat.svg"> - include(`ar-malas-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-malas']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -282,7 +338,7 @@ <figure id="ar-osman"> <div class="lbox"> <img src="ar-osman-heat.svg"> - include(`ar-osman-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-osman']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -315,7 +371,7 @@ <figure> <div class="lbox"> <img src="ar-khorshid-heat.svg"> - include(`ar-khorshid-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-khorshid']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -345,7 +401,7 @@ <figure> <div class="lbox"> <img src="ar-phonetic-heat.svg"> - include(`ar-phonetic-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-phonetic']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> diff --git a/doc/lulua-logo.svg b/lulua/data/report/lulua-logo.svg index 20136c0..20136c0 100644 --- a/doc/lulua-logo.svg +++ b/lulua/data/report/lulua-logo.svg diff --git a/doc/style.css b/lulua/data/report/style.css index 3d8e482..26b2e96 100644 --- a/doc/style.css +++ b/lulua/data/report/style.css @@ -147,6 +147,9 @@ div.fingerhandstats .fingers .thumb { border: 0.1em solid var(--finger-thumb); } +table { + font-variant-numeric: tabular-nums; +} .pure-table td.numint { text-align: right; padding-right: 0; diff --git a/lulua/report.py b/lulua/report.py new file mode 100644 index 0000000..200bb9b --- /dev/null +++ b/lulua/report.py @@ -0,0 +1,64 @@ +import sys, argparse, logging, pickle +from gettext import GNUTranslations, NullTranslations +from decimal import Decimal + +import yaml +from jinja2 import Environment, PackageLoader +from bokeh.resources import CDN as bokehres + +from .layout import LEFT, RIGHT, Direction, FingerType + +def approx (i): + """ Get approximate human-readable string for large number """ + + units = ['', 'thousand', 'million', 'billion'] + base = Decimal (1000) + i = Decimal (i) + while i >= base and len (units) > 1: + i /= base + units.pop (0) + return round (i, 1), units[0] + +def numspace (s): + """ Replace ordinary spaces with unicode FIGURE SPACE """ + return s.replace (' ', '\u2007') + +def render (): + parser = argparse.ArgumentParser(description='Create lulua report.') + parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files') + parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files') + logging.basicConfig (level=logging.INFO) + args = parser.parse_args() + + env = Environment ( + loader=PackageLoader (__package__, 'data/report'), + ) + env.filters['approx'] = approx + env.filters['numspace'] = numspace + + corpus = [] + for x in args.corpus: + with open (x) as fd: + corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd))) + layoutstats = {} + for x in args.layoutstats: + with open (x, 'rb') as fd: + d = pickle.load (fd) + layoutstats[d['layout']] = d + + corpustotal = {} + for k in ('words', 'characters'): + corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus)) + + tpl = env.get_template('index.html') + + tpl.stream ( + corpus=corpus, + corpustotal=corpustotal, + layoutstats=layoutstats, + bokehres=bokehres, + # XXX: not sure how to expose these properly to the template + fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))}, + Direction=Direction, + ).dump (sys.stdout) + diff --git a/lulua/stats.py b/lulua/stats.py index 80c269b..13d878b 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -22,7 +22,6 @@ import sys, operator, pickle, argparse, logging, yaml, math, time from operator import itemgetter from itertools import chain, groupby, product from collections import defaultdict -from decimal import Decimal from .layout import * from .keyboard import defaultKeyboards @@ -313,7 +312,7 @@ def keyHeatmap (args): buttons[k.name] = v yaml.dump (data, sys.stdout) -def fingerHand (args): +def layoutstats (args): stats = pickle.load (sys.stdin.buffer) keyboard = defaultKeyboards[args.keyboard] @@ -328,19 +327,14 @@ def fingerHand (args): hands[hand] += count fingers[(hand, finger)] += count - print ('<div class="fingerhandstats" dir="ltr" lang="en">') - fingerOrder = {LEFT: list (FingerType), RIGHT: reversed (FingerType)} - for hand in Direction: - handpct = hands[hand]/buttonPresses*100 - print (f'<div class="{hand.name.lower()}" style="width: {handpct:.3f}%;">\n\t<div class="hand">{handpct:.2f}%</div>') - print ('\t<div class="fingers">') - for finger in fingerOrder[hand]: - fingerpct = fingers[(hand, finger)]/buttonPresses*100 - # finger width is relative to parent (i.e. hand) - fingerwidth = fingers[(hand, finger)]/hands[hand]*100 - print (f'\t\t<div class="{finger.name.lower()}" style="width: {fingerwidth:.3f}%;">{fingerpct:.2f}</div>') - print ('\t</div>\n\t</div>') - print ('</div>') + asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses + pickle.dump (dict ( + layout=args.layout, + hands=dict (hands), + fingers=dict (fingers), + buttonPresses=buttonPresses, + asymmetry=asymmetry, + ), sys.stdout.buffer) def latinImeDict (args): """ @@ -379,46 +373,6 @@ def corpusStats (args): # make document concatable print ('---') -def approx (i): - """ Get approximate human-readable string for large number """ - - units = ['', 'thousand', 'million', 'billion'] - base = Decimal (1000) - i = Decimal (i) - while i >= base and len (units) > 1: - i /= base - units.pop (0) - i = round (i, 1) - return int (i), int (i%1*10), units[0] - -def corpusHtml (args): - meta = list (filter (lambda x: x is not None, yaml.safe_load_all (sys.stdin))) - total = {'words': 0, 'characters': 0} - print ('<table class="pure-table"><thead><tr><th>Source</th><th colspan="2"></th><th colspan="2">Words</th><th colspan="2">Characters</th></thead><tbody>') - for c in sorted (meta, key=lambda x: x['source']['name'].lower ()): - print ('<tr>') - print (f'<td><a href="{c["source"]["url"]}">{c["source"]["name"]}</a></td>') - count = c.get ('count') - if count: - print (f'<td class="numint">{count[0]//1000:d},</td><td class="numfrac">{count[0]%1000:03d}\u202f{count[1]}</td>') - else: - print ('<td class="numint"></td><td class="numfrac"></td>') - - stats = c.get ('stats') - for k in ('words', 'characters'): - i = approx (stats[k]) - print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>') - print ('</tr>') - - for k in ('words', 'characters'): - total[k] += c['stats'][k] - print ('<tr><td>Total</td><td class="numint"></td><td class="numfrac"></td>') - for k in ('words', 'characters'): - i = approx (total[k]) - print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>') - print ('</tr>') - print ('</tbody></table>') - def main (): parser = argparse.ArgumentParser(description='Process statistics files.') parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name') @@ -439,15 +393,13 @@ def main (): sp.set_defaults (func=triadfreq) sp = subparsers.add_parser('keyheatmap') sp.set_defaults (func=keyHeatmap) - sp = subparsers.add_parser('fingerhand') - sp.set_defaults (func=fingerHand) + sp = subparsers.add_parser('layoutstats') + sp.set_defaults (func=layoutstats) sp = subparsers.add_parser('latinime') sp.set_defaults (func=latinImeDict) sp = subparsers.add_parser('corpusstats') sp.add_argument('metadata', type=argparse.FileType ('r')) sp.set_defaults (func=corpusStats) - sp = subparsers.add_parser('corpushtml') - sp.set_defaults (func=corpusHtml) logging.basicConfig (level=logging.INFO) args = parser.parse_args() @@ -25,7 +25,7 @@ setup( version='0.1dev0', author='Lars-Dominik Braun', author_email='lars+lulua@6xq.net', - #url='https://6xq.net/crocoite/', + url='https://6xq.net/lulua/', packages=['lulua'], license='LICENSE.txt', description='Keyboard layout optimization', @@ -39,16 +39,22 @@ setup( 'tqdm', 'html5lib', 'ebooklib', + 'jinja2', ], entry_points={ 'console_scripts': [ 'lulua-analyze = lulua.stats:main', 'lulua-render = lulua.render:render', + 'lulua-report = lulua.report:render', 'lulua-import = lulua.layout:importFrom', 'lulua-optimize = lulua.optimize:optimize', 'lulua-write = lulua.text:write', ], }, + package_data = { + 'lulua': ['data/*', 'data/keyboards/*.yaml', 'data/layouts/*.yaml', + 'data/report/*', 'data/winkbd/*'], + }, setup_requires=['pytest-runner'], tests_require=["pytest", 'pytest-cov'], python_requires='>=3.6', |