summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2020-02-22 13:20:31 +0100
committerLars-Dominik Braun <lars@6xq.net>2020-02-22 13:23:26 +0100
commit0f8643954fd9507aec85bab46046e71a497bfffe (patch)
tree28708a991bd136fd255282326e93f7588120a2e3
parenta91fc5e945b841ae54f67ed331409ad857178f13 (diff)
downloadlulua-0f8643954fd9507aec85bab46046e71a497bfffe.tar.gz
lulua-0f8643954fd9507aec85bab46046e71a497bfffe.tar.bz2
lulua-0f8643954fd9507aec85bab46046e71a497bfffe.zip
doc: Switch to jinja2-based rendering
Pre-rendering HTML was not the best idea. Instead pre-process the data, cache it into data files and do the HTML rendering only as the final step. Also adds asymmetry to analysis and uses tabular numbers and spaces instead of ugly table hacks to align numbers.
-rwxr-xr-xgen.sh38
-rw-r--r--lulua/data/report/index.html (renamed from doc/index.html)74
-rw-r--r--lulua/data/report/lulua-logo.svg (renamed from doc/lulua-logo.svg)0
-rw-r--r--lulua/data/report/style.css (renamed from doc/style.css)3
-rw-r--r--lulua/report.py64
-rw-r--r--lulua/stats.py70
-rw-r--r--setup.py8
7 files changed, 166 insertions, 91 deletions
diff --git a/gen.sh b/gen.sh
index 3d83839..340bfe1 100755
--- a/gen.sh
+++ b/gen.sh
@@ -9,6 +9,7 @@ cat <<EOF
### auto-generated by gen.sh. Do not edit. ###
### settings ###
+datadir=lulua/data
corpusdir=corpus
statsdir=stats
docdir=doc
@@ -83,20 +84,17 @@ rule mkdir
rule letterfreq
command = lulua-analyze -l ar-lulua letterfreq < \$in > \$out
-rule analyze-fingerhand
- command = lulua-analyze -l \$layout fingerhand < \$in > \$out
+rule analyze-layoutstats
+ command = lulua-analyze -l \$layout layoutstats < \$in > \$out
rule analyze-corpusstats
command = lulua-analyze -l ar-lulua corpusstats \$metadata < \$stats > \$out
-rule analyze-corpushtml
- command = cat \$in | lulua-analyze -l ar-lulua corpushtml > \$out
-
rule wordlist
command = lulua-analyze -l ar-lulua latinime < \$in > \$out
-rule html
- command = m4 -I \$docdir/_temp \$template > \$out
+rule report
+ command = lulua-report -c \$corpus -l \$layoutstats > \$out
rule cp
command = cp \$in \$out
@@ -121,8 +119,8 @@ build \$docdir/_build: mkdir
build \$docdir/_build/fonts: mkdir
build \$docdir/_temp: mkdir
build \$docdir/_build/letterfreq.json: letterfreq \$statsdir/ar-lulua/all.pickle || \$docdir/_build
-build \$docdir/_build/style.css: cp \$docdir/style.css || \$docdir/_build
-build \$docdir/_build/lulua-logo.svg: cp \$docdir/lulua-logo.svg || \$docdir/_build
+build \$docdir/_build/style.css: cp \$datadir/report/style.css || \$docdir/_build
+build \$docdir/_build/lulua-logo.svg: cp \$datadir/report/lulua-logo.svg || \$docdir/_build
# wordlist
build \$docdir/_temp/lulua.combined: wordlist \$statsdir/ar-lulua/all.pickle || \$docdir/_temp
build \$docdir/_build/lulua.combined.gz: gz \$docdir/_temp/lulua.combined || \$docdir/_build
@@ -191,12 +189,12 @@ build \$docdir/_temp/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle ||
build \$docdir/_build/${l}-heat.svg: render-svg-heat \$docdir/_temp/${l}-heat.yaml || \$docdir/_build
layout = ${l}
-build \$docdir/_temp/${l}-fingerhand.html: analyze-fingerhand \$statsdir/${l}/all.pickle || \$docdir/_temp
+build \$docdir/_temp/${l}-layoutstats.pickle: analyze-layoutstats \$statsdir/${l}/all.pickle || \$docdir/_temp
layout = ${l}
EOF
# included by index.html and thus must be its dependencies
-fingerhandfiles+=" \$docdir/_temp/${l}-fingerhand.html"
+layoutstatsfiles+=" \$docdir/_temp/${l}-layoutstats.pickle"
done
# layouts with xmodmap support
@@ -209,7 +207,7 @@ EOF
done
# statistics for each corpus (ar-lulua) and html rendering
-outfiles=""
+metafiles=""
for c in $corpora; do
cat <<EOF
build \$docdir/_temp/metadata-$c.yaml: analyze-corpusstats \$statsdir/ar-lulua/$c.pickle \$corpusdir/$c/metadata.yaml || \$docdir/_temp \$corpusdir/$c/metadata.yaml
@@ -217,18 +215,14 @@ build \$docdir/_temp/metadata-$c.yaml: analyze-corpusstats \$statsdir/ar-lulua/$
stats = \$statsdir/ar-lulua/$c.pickle
EOF
-outfiles+=" \$docdir/_temp/metadata-$c.yaml"
+metafiles+=" \$docdir/_temp/metadata-$c.yaml"
done
+# dependencies are not properly modeled, always rebuild
cat <<EOF
-build \$docdir/_temp/corpus.html: analyze-corpushtml $outfiles || \$docdir/_temp
-
-EOF
-
-# html, which depends on several other files generated above
-cat <<EOF
-build \$docdir/_build/index.html: html \$docdir/index.html \$docdir/_temp/corpus.html $fingerhandfiles || \$docdir/_build
- template = \$docdir/index.html
-
+build always: phony
+build \$docdir/_build/index.html: report | always || \$docdir/_build $metafiles $layoutstatsfiles
+ corpus = $metafiles
+ layoutstats = $layoutstatsfiles
EOF
diff --git a/doc/index.html b/lulua/data/report/index.html
index cc5c69f..5649fab 100644
--- a/doc/index.html
+++ b/lulua/data/report/index.html
@@ -9,7 +9,13 @@
<link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/pure-min.css" integrity="sha384-oAOxQR6DkCoMliIh8yFnu25d7Eq/PHS21PClpwjOTeU2jRSq11vu66rf90/cZr47" crossorigin="anonymous">
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css">
- <script src="https://cdn.pydata.org/bokeh/release/bokeh-1.3.4.min.js"></script>
+ {# bokeh #}
+ {% for f in bokehres.js_files -%}
+ <script src="{{ f }}"></script>
+ {%- endfor %}
+ {% for f in bokehres.css_files -%}
+ <link rel="stylesheet" href="{{ f }}">
+ {%- endfor %}
<link rel="stylesheet" href="style.css">
</head>
<body>
@@ -126,7 +132,32 @@
The corpus used for the following analysis consists of
</p>
- include(`corpus.html')
+ <table class="pure-table"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody>
+ {% for c in corpus|sort(attribute='source.name') %}
+ <tr>
+ <td><a href="{{ c.source.url }}">{{ c.source.name }}</a></td>
+ {% set count = c.get ('count') %}
+ {% if count %}
+ {# use new style formatting, for some reason %7,d does not work #}
+ <td>{{ '{:7,d}'.format(count[0])|numspace }}&#x202f;{{ count[1] }}</td>
+ {% else %}
+ <td></td>
+ {% endif %}
+
+ {% set stats = c.get ('stats') %}
+ {% for k in ('words', 'characters') %}
+ {% set i = stats[k]|approx %}
+ <td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+ {% endfor %}
+ </tr>
+ {% endfor %}
+ <tr><td>Total</td><td></td>
+ {% for k in ('words', 'characters') %}
+ {% set i = corpustotal[k]|approx %}
+ <td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+ {% endfor %}
+ </tr>
+ </tbody></table>
<p>
The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be
@@ -175,14 +206,39 @@
<dt class="finger thumb">cyan</dt>
<dd>thumb</dd>
</dl>
+ <p>Asymmetry is defined as the difference between left and right hand usage.</p>
</div>
</div>
</div>
+{% macro fingerhandstats(stats) %}
+{% set hands = stats.hands %}
+{% set fingers = stats.fingers %}
+<div class="fingerhandstats" dir="ltr" lang="en">
+{% for hand in Direction %}
+ {% set handpct = hands[hand]/stats.buttonPresses*100 %}
+ <div class="{{ hand.name.lower() }}" style="width: {{ '%.3f'|format(handpct) }}%;">
+ <div class="hand">{{ '%.2f'|format(handpct) }}%</div>
+ <div class="fingers">
+ {% for finger in fingerOrder[hand] %}
+ {% set fingerpct = fingers[(hand, finger)]/stats.buttonPresses*100 %}
+ {# finger width is relative to parent (i.e. hand) #}
+ {% set fingerwidth = fingers[(hand, finger)]/hands[hand]*100 %}
+ <div class="{{ finger.name.lower() }}" style="width: {{ '%.3f'|format(fingerwidth) }}%;">{{ '%.2f'|format(fingerpct) }}</div>
+ {% endfor %}
+ </div>
+ </div>
+ {% if loop.first %}
+ <div class="asymm"><small>Asymmetry: {{ '%.3f'|format(stats.asymmetry) }}</small></div>
+ {% endif %}
+{% endfor %}
+</div>
+{% endmacro %}
+
<figure id="ar-lulua-heat">
<div class="lbox" lang="en">
<img src="ar-lulua-heat.svg">
- include(`ar-lulua-fingerhand.html')
+ {{ fingerhandstats(layoutstats['ar-lulua']) }}
</div>
</figure>
</section>
@@ -201,7 +257,7 @@
<figure id="ar-asmo663">
<div class="lbox">
<img src="ar-asmo663-heat.svg">
- include(`ar-asmo663-fingerhand.html')
+ {{ fingerhandstats(layoutstats['ar-asmo663']) }}
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -226,7 +282,7 @@
<figure id="ar-linux">
<div class="lbox">
<img src="ar-linux-heat.svg">
- include(`ar-linux-fingerhand.html')
+ {{ fingerhandstats(layoutstats['ar-linux']) }}
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -251,7 +307,7 @@
<figure id="ar-malas">
<div class="lbox">
<img src="ar-malas-heat.svg">
- include(`ar-malas-fingerhand.html')
+ {{ fingerhandstats(layoutstats['ar-malas']) }}
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -282,7 +338,7 @@
<figure id="ar-osman">
<div class="lbox">
<img src="ar-osman-heat.svg">
- include(`ar-osman-fingerhand.html')
+ {{ fingerhandstats(layoutstats['ar-osman']) }}
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -315,7 +371,7 @@
<figure>
<div class="lbox">
<img src="ar-khorshid-heat.svg">
- include(`ar-khorshid-fingerhand.html')
+ {{ fingerhandstats(layoutstats['ar-khorshid']) }}
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -345,7 +401,7 @@
<figure>
<div class="lbox">
<img src="ar-phonetic-heat.svg">
- include(`ar-phonetic-fingerhand.html')
+ {{ fingerhandstats(layoutstats['ar-phonetic']) }}
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
diff --git a/doc/lulua-logo.svg b/lulua/data/report/lulua-logo.svg
index 20136c0..20136c0 100644
--- a/doc/lulua-logo.svg
+++ b/lulua/data/report/lulua-logo.svg
diff --git a/doc/style.css b/lulua/data/report/style.css
index 3d8e482..26b2e96 100644
--- a/doc/style.css
+++ b/lulua/data/report/style.css
@@ -147,6 +147,9 @@ div.fingerhandstats .fingers .thumb {
border: 0.1em solid var(--finger-thumb);
}
+table {
+ font-variant-numeric: tabular-nums;
+}
.pure-table td.numint {
text-align: right;
padding-right: 0;
diff --git a/lulua/report.py b/lulua/report.py
new file mode 100644
index 0000000..200bb9b
--- /dev/null
+++ b/lulua/report.py
@@ -0,0 +1,64 @@
+import sys, argparse, logging, pickle
+from gettext import GNUTranslations, NullTranslations
+from decimal import Decimal
+
+import yaml
+from jinja2 import Environment, PackageLoader
+from bokeh.resources import CDN as bokehres
+
+from .layout import LEFT, RIGHT, Direction, FingerType
+
+def approx (i):
+ """ Get approximate human-readable string for large number """
+
+ units = ['', 'thousand', 'million', 'billion']
+ base = Decimal (1000)
+ i = Decimal (i)
+ while i >= base and len (units) > 1:
+ i /= base
+ units.pop (0)
+ return round (i, 1), units[0]
+
+def numspace (s):
+ """ Replace ordinary spaces with unicode FIGURE SPACE """
+ return s.replace (' ', '\u2007')
+
+def render ():
+ parser = argparse.ArgumentParser(description='Create lulua report.')
+ parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files')
+ parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files')
+ logging.basicConfig (level=logging.INFO)
+ args = parser.parse_args()
+
+ env = Environment (
+ loader=PackageLoader (__package__, 'data/report'),
+ )
+ env.filters['approx'] = approx
+ env.filters['numspace'] = numspace
+
+ corpus = []
+ for x in args.corpus:
+ with open (x) as fd:
+ corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd)))
+ layoutstats = {}
+ for x in args.layoutstats:
+ with open (x, 'rb') as fd:
+ d = pickle.load (fd)
+ layoutstats[d['layout']] = d
+
+ corpustotal = {}
+ for k in ('words', 'characters'):
+ corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus))
+
+ tpl = env.get_template('index.html')
+
+ tpl.stream (
+ corpus=corpus,
+ corpustotal=corpustotal,
+ layoutstats=layoutstats,
+ bokehres=bokehres,
+ # XXX: not sure how to expose these properly to the template
+ fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))},
+ Direction=Direction,
+ ).dump (sys.stdout)
+
diff --git a/lulua/stats.py b/lulua/stats.py
index 80c269b..13d878b 100644
--- a/lulua/stats.py
+++ b/lulua/stats.py
@@ -22,7 +22,6 @@ import sys, operator, pickle, argparse, logging, yaml, math, time
from operator import itemgetter
from itertools import chain, groupby, product
from collections import defaultdict
-from decimal import Decimal
from .layout import *
from .keyboard import defaultKeyboards
@@ -313,7 +312,7 @@ def keyHeatmap (args):
buttons[k.name] = v
yaml.dump (data, sys.stdout)
-def fingerHand (args):
+def layoutstats (args):
stats = pickle.load (sys.stdin.buffer)
keyboard = defaultKeyboards[args.keyboard]
@@ -328,19 +327,14 @@ def fingerHand (args):
hands[hand] += count
fingers[(hand, finger)] += count
- print ('<div class="fingerhandstats" dir="ltr" lang="en">')
- fingerOrder = {LEFT: list (FingerType), RIGHT: reversed (FingerType)}
- for hand in Direction:
- handpct = hands[hand]/buttonPresses*100
- print (f'<div class="{hand.name.lower()}" style="width: {handpct:.3f}%;">\n\t<div class="hand">{handpct:.2f}%</div>')
- print ('\t<div class="fingers">')
- for finger in fingerOrder[hand]:
- fingerpct = fingers[(hand, finger)]/buttonPresses*100
- # finger width is relative to parent (i.e. hand)
- fingerwidth = fingers[(hand, finger)]/hands[hand]*100
- print (f'\t\t<div class="{finger.name.lower()}" style="width: {fingerwidth:.3f}%;">{fingerpct:.2f}</div>')
- print ('\t</div>\n\t</div>')
- print ('</div>')
+ asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses
+ pickle.dump (dict (
+ layout=args.layout,
+ hands=dict (hands),
+ fingers=dict (fingers),
+ buttonPresses=buttonPresses,
+ asymmetry=asymmetry,
+ ), sys.stdout.buffer)
def latinImeDict (args):
"""
@@ -379,46 +373,6 @@ def corpusStats (args):
# make document concatable
print ('---')
-def approx (i):
- """ Get approximate human-readable string for large number """
-
- units = ['', 'thousand', 'million', 'billion']
- base = Decimal (1000)
- i = Decimal (i)
- while i >= base and len (units) > 1:
- i /= base
- units.pop (0)
- i = round (i, 1)
- return int (i), int (i%1*10), units[0]
-
-def corpusHtml (args):
- meta = list (filter (lambda x: x is not None, yaml.safe_load_all (sys.stdin)))
- total = {'words': 0, 'characters': 0}
- print ('<table class="pure-table"><thead><tr><th>Source</th><th colspan="2"></th><th colspan="2">Words</th><th colspan="2">Characters</th></thead><tbody>')
- for c in sorted (meta, key=lambda x: x['source']['name'].lower ()):
- print ('<tr>')
- print (f'<td><a href="{c["source"]["url"]}">{c["source"]["name"]}</a></td>')
- count = c.get ('count')
- if count:
- print (f'<td class="numint">{count[0]//1000:d},</td><td class="numfrac">{count[0]%1000:03d}\u202f{count[1]}</td>')
- else:
- print ('<td class="numint"></td><td class="numfrac"></td>')
-
- stats = c.get ('stats')
- for k in ('words', 'characters'):
- i = approx (stats[k])
- print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>')
- print ('</tr>')
-
- for k in ('words', 'characters'):
- total[k] += c['stats'][k]
- print ('<tr><td>Total</td><td class="numint"></td><td class="numfrac"></td>')
- for k in ('words', 'characters'):
- i = approx (total[k])
- print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>')
- print ('</tr>')
- print ('</tbody></table>')
-
def main ():
parser = argparse.ArgumentParser(description='Process statistics files.')
parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name')
@@ -439,15 +393,13 @@ def main ():
sp.set_defaults (func=triadfreq)
sp = subparsers.add_parser('keyheatmap')
sp.set_defaults (func=keyHeatmap)
- sp = subparsers.add_parser('fingerhand')
- sp.set_defaults (func=fingerHand)
+ sp = subparsers.add_parser('layoutstats')
+ sp.set_defaults (func=layoutstats)
sp = subparsers.add_parser('latinime')
sp.set_defaults (func=latinImeDict)
sp = subparsers.add_parser('corpusstats')
sp.add_argument('metadata', type=argparse.FileType ('r'))
sp.set_defaults (func=corpusStats)
- sp = subparsers.add_parser('corpushtml')
- sp.set_defaults (func=corpusHtml)
logging.basicConfig (level=logging.INFO)
args = parser.parse_args()
diff --git a/setup.py b/setup.py
index 2e20067..4134766 100644
--- a/setup.py
+++ b/setup.py
@@ -25,7 +25,7 @@ setup(
version='0.1dev0',
author='Lars-Dominik Braun',
author_email='lars+lulua@6xq.net',
- #url='https://6xq.net/crocoite/',
+ url='https://6xq.net/lulua/',
packages=['lulua'],
license='LICENSE.txt',
description='Keyboard layout optimization',
@@ -39,16 +39,22 @@ setup(
'tqdm',
'html5lib',
'ebooklib',
+ 'jinja2',
],
entry_points={
'console_scripts': [
'lulua-analyze = lulua.stats:main',
'lulua-render = lulua.render:render',
+ 'lulua-report = lulua.report:render',
'lulua-import = lulua.layout:importFrom',
'lulua-optimize = lulua.optimize:optimize',
'lulua-write = lulua.text:write',
],
},
+ package_data = {
+ 'lulua': ['data/*', 'data/keyboards/*.yaml', 'data/layouts/*.yaml',
+ 'data/report/*', 'data/winkbd/*'],
+ },
setup_requires=['pytest-runner'],
tests_require=["pytest", 'pytest-cov'],
python_requires='>=3.6',