diff options
-rwxr-xr-x | gen.sh | 38 | ||||
-rw-r--r-- | lulua/data/report/index.html (renamed from doc/index.html) | 74 | ||||
-rw-r--r-- | lulua/data/report/lulua-logo.svg (renamed from doc/lulua-logo.svg) | 0 | ||||
-rw-r--r-- | lulua/data/report/style.css (renamed from doc/style.css) | 3 | ||||
-rw-r--r-- | lulua/report.py | 64 | ||||
-rw-r--r-- | lulua/stats.py | 70 | ||||
-rw-r--r-- | setup.py | 8 |
7 files changed, 166 insertions, 91 deletions
@@ -9,6 +9,7 @@ cat <<EOF ### auto-generated by gen.sh. Do not edit. ### ### settings ### +datadir=lulua/data corpusdir=corpus statsdir=stats docdir=doc @@ -83,20 +84,17 @@ rule mkdir rule letterfreq command = lulua-analyze -l ar-lulua letterfreq < \$in > \$out -rule analyze-fingerhand - command = lulua-analyze -l \$layout fingerhand < \$in > \$out +rule analyze-layoutstats + command = lulua-analyze -l \$layout layoutstats < \$in > \$out rule analyze-corpusstats command = lulua-analyze -l ar-lulua corpusstats \$metadata < \$stats > \$out -rule analyze-corpushtml - command = cat \$in | lulua-analyze -l ar-lulua corpushtml > \$out - rule wordlist command = lulua-analyze -l ar-lulua latinime < \$in > \$out -rule html - command = m4 -I \$docdir/_temp \$template > \$out +rule report + command = lulua-report -c \$corpus -l \$layoutstats > \$out rule cp command = cp \$in \$out @@ -121,8 +119,8 @@ build \$docdir/_build: mkdir build \$docdir/_build/fonts: mkdir build \$docdir/_temp: mkdir build \$docdir/_build/letterfreq.json: letterfreq \$statsdir/ar-lulua/all.pickle || \$docdir/_build -build \$docdir/_build/style.css: cp \$docdir/style.css || \$docdir/_build -build \$docdir/_build/lulua-logo.svg: cp \$docdir/lulua-logo.svg || \$docdir/_build +build \$docdir/_build/style.css: cp \$datadir/report/style.css || \$docdir/_build +build \$docdir/_build/lulua-logo.svg: cp \$datadir/report/lulua-logo.svg || \$docdir/_build # wordlist build \$docdir/_temp/lulua.combined: wordlist \$statsdir/ar-lulua/all.pickle || \$docdir/_temp build \$docdir/_build/lulua.combined.gz: gz \$docdir/_temp/lulua.combined || \$docdir/_build @@ -191,12 +189,12 @@ build \$docdir/_temp/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle || build \$docdir/_build/${l}-heat.svg: render-svg-heat \$docdir/_temp/${l}-heat.yaml || \$docdir/_build layout = ${l} -build \$docdir/_temp/${l}-fingerhand.html: analyze-fingerhand \$statsdir/${l}/all.pickle || \$docdir/_temp +build \$docdir/_temp/${l}-layoutstats.pickle: analyze-layoutstats \$statsdir/${l}/all.pickle || \$docdir/_temp layout = ${l} EOF # included by index.html and thus must be its dependencies -fingerhandfiles+=" \$docdir/_temp/${l}-fingerhand.html" +layoutstatsfiles+=" \$docdir/_temp/${l}-layoutstats.pickle" done # layouts with xmodmap support @@ -209,7 +207,7 @@ EOF done # statistics for each corpus (ar-lulua) and html rendering -outfiles="" +metafiles="" for c in $corpora; do cat <<EOF build \$docdir/_temp/metadata-$c.yaml: analyze-corpusstats \$statsdir/ar-lulua/$c.pickle \$corpusdir/$c/metadata.yaml || \$docdir/_temp \$corpusdir/$c/metadata.yaml @@ -217,18 +215,14 @@ build \$docdir/_temp/metadata-$c.yaml: analyze-corpusstats \$statsdir/ar-lulua/$ stats = \$statsdir/ar-lulua/$c.pickle EOF -outfiles+=" \$docdir/_temp/metadata-$c.yaml" +metafiles+=" \$docdir/_temp/metadata-$c.yaml" done +# dependencies are not properly modeled, always rebuild cat <<EOF -build \$docdir/_temp/corpus.html: analyze-corpushtml $outfiles || \$docdir/_temp - -EOF - -# html, which depends on several other files generated above -cat <<EOF -build \$docdir/_build/index.html: html \$docdir/index.html \$docdir/_temp/corpus.html $fingerhandfiles || \$docdir/_build - template = \$docdir/index.html - +build always: phony +build \$docdir/_build/index.html: report | always || \$docdir/_build $metafiles $layoutstatsfiles + corpus = $metafiles + layoutstats = $layoutstatsfiles EOF diff --git a/doc/index.html b/lulua/data/report/index.html index cc5c69f..5649fab 100644 --- a/doc/index.html +++ b/lulua/data/report/index.html @@ -9,7 +9,13 @@ <link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet"> <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/pure-min.css" integrity="sha384-oAOxQR6DkCoMliIh8yFnu25d7Eq/PHS21PClpwjOTeU2jRSq11vu66rf90/cZr47" crossorigin="anonymous"> <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css"> - <script src="https://cdn.pydata.org/bokeh/release/bokeh-1.3.4.min.js"></script> + {# bokeh #} + {% for f in bokehres.js_files -%} + <script src="{{ f }}"></script> + {%- endfor %} + {% for f in bokehres.css_files -%} + <link rel="stylesheet" href="{{ f }}"> + {%- endfor %} <link rel="stylesheet" href="style.css"> </head> <body> @@ -126,7 +132,32 @@ The corpus used for the following analysis consists of </p> - include(`corpus.html') + <table class="pure-table"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody> + {% for c in corpus|sort(attribute='source.name') %} + <tr> + <td><a href="{{ c.source.url }}">{{ c.source.name }}</a></td> + {% set count = c.get ('count') %} + {% if count %} + {# use new style formatting, for some reason %7,d does not work #} + <td>{{ '{:7,d}'.format(count[0])|numspace }} {{ count[1] }}</td> + {% else %} + <td></td> + {% endif %} + + {% set stats = c.get ('stats') %} + {% for k in ('words', 'characters') %} + {% set i = stats[k]|approx %} + <td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> + {% endfor %} + </tr> + {% endfor %} + <tr><td>Total</td><td></td> + {% for k in ('words', 'characters') %} + {% set i = corpustotal[k]|approx %} + <td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> + {% endfor %} + </tr> + </tbody></table> <p> The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be @@ -175,14 +206,39 @@ <dt class="finger thumb">cyan</dt> <dd>thumb</dd> </dl> + <p>Asymmetry is defined as the difference between left and right hand usage.</p> </div> </div> </div> +{% macro fingerhandstats(stats) %} +{% set hands = stats.hands %} +{% set fingers = stats.fingers %} +<div class="fingerhandstats" dir="ltr" lang="en"> +{% for hand in Direction %} + {% set handpct = hands[hand]/stats.buttonPresses*100 %} + <div class="{{ hand.name.lower() }}" style="width: {{ '%.3f'|format(handpct) }}%;"> + <div class="hand">{{ '%.2f'|format(handpct) }}%</div> + <div class="fingers"> + {% for finger in fingerOrder[hand] %} + {% set fingerpct = fingers[(hand, finger)]/stats.buttonPresses*100 %} + {# finger width is relative to parent (i.e. hand) #} + {% set fingerwidth = fingers[(hand, finger)]/hands[hand]*100 %} + <div class="{{ finger.name.lower() }}" style="width: {{ '%.3f'|format(fingerwidth) }}%;">{{ '%.2f'|format(fingerpct) }}</div> + {% endfor %} + </div> + </div> + {% if loop.first %} + <div class="asymm"><small>Asymmetry: {{ '%.3f'|format(stats.asymmetry) }}</small></div> + {% endif %} +{% endfor %} +</div> +{% endmacro %} + <figure id="ar-lulua-heat"> <div class="lbox" lang="en"> <img src="ar-lulua-heat.svg"> - include(`ar-lulua-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-lulua']) }} </div> </figure> </section> @@ -201,7 +257,7 @@ <figure id="ar-asmo663"> <div class="lbox"> <img src="ar-asmo663-heat.svg"> - include(`ar-asmo663-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-asmo663']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -226,7 +282,7 @@ <figure id="ar-linux"> <div class="lbox"> <img src="ar-linux-heat.svg"> - include(`ar-linux-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-linux']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -251,7 +307,7 @@ <figure id="ar-malas"> <div class="lbox"> <img src="ar-malas-heat.svg"> - include(`ar-malas-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-malas']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -282,7 +338,7 @@ <figure id="ar-osman"> <div class="lbox"> <img src="ar-osman-heat.svg"> - include(`ar-osman-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-osman']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -315,7 +371,7 @@ <figure> <div class="lbox"> <img src="ar-khorshid-heat.svg"> - include(`ar-khorshid-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-khorshid']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -345,7 +401,7 @@ <figure> <div class="lbox"> <img src="ar-phonetic-heat.svg"> - include(`ar-phonetic-fingerhand.html') + {{ fingerhandstats(layoutstats['ar-phonetic']) }} </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> diff --git a/doc/lulua-logo.svg b/lulua/data/report/lulua-logo.svg index 20136c0..20136c0 100644 --- a/doc/lulua-logo.svg +++ b/lulua/data/report/lulua-logo.svg diff --git a/doc/style.css b/lulua/data/report/style.css index 3d8e482..26b2e96 100644 --- a/doc/style.css +++ b/lulua/data/report/style.css @@ -147,6 +147,9 @@ div.fingerhandstats .fingers .thumb { border: 0.1em solid var(--finger-thumb); } +table { + font-variant-numeric: tabular-nums; +} .pure-table td.numint { text-align: right; padding-right: 0; diff --git a/lulua/report.py b/lulua/report.py new file mode 100644 index 0000000..200bb9b --- /dev/null +++ b/lulua/report.py @@ -0,0 +1,64 @@ +import sys, argparse, logging, pickle +from gettext import GNUTranslations, NullTranslations +from decimal import Decimal + +import yaml +from jinja2 import Environment, PackageLoader +from bokeh.resources import CDN as bokehres + +from .layout import LEFT, RIGHT, Direction, FingerType + +def approx (i): + """ Get approximate human-readable string for large number """ + + units = ['', 'thousand', 'million', 'billion'] + base = Decimal (1000) + i = Decimal (i) + while i >= base and len (units) > 1: + i /= base + units.pop (0) + return round (i, 1), units[0] + +def numspace (s): + """ Replace ordinary spaces with unicode FIGURE SPACE """ + return s.replace (' ', '\u2007') + +def render (): + parser = argparse.ArgumentParser(description='Create lulua report.') + parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files') + parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files') + logging.basicConfig (level=logging.INFO) + args = parser.parse_args() + + env = Environment ( + loader=PackageLoader (__package__, 'data/report'), + ) + env.filters['approx'] = approx + env.filters['numspace'] = numspace + + corpus = [] + for x in args.corpus: + with open (x) as fd: + corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd))) + layoutstats = {} + for x in args.layoutstats: + with open (x, 'rb') as fd: + d = pickle.load (fd) + layoutstats[d['layout']] = d + + corpustotal = {} + for k in ('words', 'characters'): + corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus)) + + tpl = env.get_template('index.html') + + tpl.stream ( + corpus=corpus, + corpustotal=corpustotal, + layoutstats=layoutstats, + bokehres=bokehres, + # XXX: not sure how to expose these properly to the template + fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))}, + Direction=Direction, + ).dump (sys.stdout) + diff --git a/lulua/stats.py b/lulua/stats.py index 80c269b..13d878b 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -22,7 +22,6 @@ import sys, operator, pickle, argparse, logging, yaml, math, time from operator import itemgetter from itertools import chain, groupby, product from collections import defaultdict -from decimal import Decimal from .layout import * from .keyboard import defaultKeyboards @@ -313,7 +312,7 @@ def keyHeatmap (args): buttons[k.name] = v yaml.dump (data, sys.stdout) -def fingerHand (args): +def layoutstats (args): stats = pickle.load (sys.stdin.buffer) keyboard = defaultKeyboards[args.keyboard] @@ -328,19 +327,14 @@ def fingerHand (args): hands[hand] += count fingers[(hand, finger)] += count - print ('<div class="fingerhandstats" dir="ltr" lang="en">') - fingerOrder = {LEFT: list (FingerType), RIGHT: reversed (FingerType)} - for hand in Direction: - handpct = hands[hand]/buttonPresses*100 - print (f'<div class="{hand.name.lower()}" style="width: {handpct:.3f}%;">\n\t<div class="hand">{handpct:.2f}%</div>') - print ('\t<div class="fingers">') - for finger in fingerOrder[hand]: - fingerpct = fingers[(hand, finger)]/buttonPresses*100 - # finger width is relative to parent (i.e. hand) - fingerwidth = fingers[(hand, finger)]/hands[hand]*100 - print (f'\t\t<div class="{finger.name.lower()}" style="width: {fingerwidth:.3f}%;">{fingerpct:.2f}</div>') - print ('\t</div>\n\t</div>') - print ('</div>') + asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses + pickle.dump (dict ( + layout=args.layout, + hands=dict (hands), + fingers=dict (fingers), + buttonPresses=buttonPresses, + asymmetry=asymmetry, + ), sys.stdout.buffer) def latinImeDict (args): """ @@ -379,46 +373,6 @@ def corpusStats (args): # make document concatable print ('---') -def approx (i): - """ Get approximate human-readable string for large number """ - - units = ['', 'thousand', 'million', 'billion'] - base = Decimal (1000) - i = Decimal (i) - while i >= base and len (units) > 1: - i /= base - units.pop (0) - i = round (i, 1) - return int (i), int (i%1*10), units[0] - -def corpusHtml (args): - meta = list (filter (lambda x: x is not None, yaml.safe_load_all (sys.stdin))) - total = {'words': 0, 'characters': 0} - print ('<table class="pure-table"><thead><tr><th>Source</th><th colspan="2"></th><th colspan="2">Words</th><th colspan="2">Characters</th></thead><tbody>') - for c in sorted (meta, key=lambda x: x['source']['name'].lower ()): - print ('<tr>') - print (f'<td><a href="{c["source"]["url"]}">{c["source"]["name"]}</a></td>') - count = c.get ('count') - if count: - print (f'<td class="numint">{count[0]//1000:d},</td><td class="numfrac">{count[0]%1000:03d}\u202f{count[1]}</td>') - else: - print ('<td class="numint"></td><td class="numfrac"></td>') - - stats = c.get ('stats') - for k in ('words', 'characters'): - i = approx (stats[k]) - print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>') - print ('</tr>') - - for k in ('words', 'characters'): - total[k] += c['stats'][k] - print ('<tr><td>Total</td><td class="numint"></td><td class="numfrac"></td>') - for k in ('words', 'characters'): - i = approx (total[k]) - print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>') - print ('</tr>') - print ('</tbody></table>') - def main (): parser = argparse.ArgumentParser(description='Process statistics files.') parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name') @@ -439,15 +393,13 @@ def main (): sp.set_defaults (func=triadfreq) sp = subparsers.add_parser('keyheatmap') sp.set_defaults (func=keyHeatmap) - sp = subparsers.add_parser('fingerhand') - sp.set_defaults (func=fingerHand) + sp = subparsers.add_parser('layoutstats') + sp.set_defaults (func=layoutstats) sp = subparsers.add_parser('latinime') sp.set_defaults (func=latinImeDict) sp = subparsers.add_parser('corpusstats') sp.add_argument('metadata', type=argparse.FileType ('r')) sp.set_defaults (func=corpusStats) - sp = subparsers.add_parser('corpushtml') - sp.set_defaults (func=corpusHtml) logging.basicConfig (level=logging.INFO) args = parser.parse_args() @@ -25,7 +25,7 @@ setup( version='0.1dev0', author='Lars-Dominik Braun', author_email='lars+lulua@6xq.net', - #url='https://6xq.net/crocoite/', + url='https://6xq.net/lulua/', packages=['lulua'], license='LICENSE.txt', description='Keyboard layout optimization', @@ -39,16 +39,22 @@ setup( 'tqdm', 'html5lib', 'ebooklib', + 'jinja2', ], entry_points={ 'console_scripts': [ 'lulua-analyze = lulua.stats:main', 'lulua-render = lulua.render:render', + 'lulua-report = lulua.report:render', 'lulua-import = lulua.layout:importFrom', 'lulua-optimize = lulua.optimize:optimize', 'lulua-write = lulua.text:write', ], }, + package_data = { + 'lulua': ['data/*', 'data/keyboards/*.yaml', 'data/layouts/*.yaml', + 'data/report/*', 'data/winkbd/*'], + }, setup_requires=['pytest-runner'], tests_require=["pytest", 'pytest-cov'], python_requires='>=3.6', |