diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2020-02-22 13:20:31 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2020-02-22 13:23:26 +0100 |
commit | 0f8643954fd9507aec85bab46046e71a497bfffe (patch) | |
tree | 28708a991bd136fd255282326e93f7588120a2e3 /lulua/report.py | |
parent | a91fc5e945b841ae54f67ed331409ad857178f13 (diff) | |
download | lulua-0f8643954fd9507aec85bab46046e71a497bfffe.tar.gz lulua-0f8643954fd9507aec85bab46046e71a497bfffe.tar.bz2 lulua-0f8643954fd9507aec85bab46046e71a497bfffe.zip |
doc: Switch to jinja2-based rendering
Pre-rendering HTML was not the best idea. Instead pre-process the data,
cache it into data files and do the HTML rendering only as the final
step.
Also adds asymmetry to analysis and uses tabular numbers and spaces
instead of ugly table hacks to align numbers.
Diffstat (limited to 'lulua/report.py')
-rw-r--r-- | lulua/report.py | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/lulua/report.py b/lulua/report.py new file mode 100644 index 0000000..200bb9b --- /dev/null +++ b/lulua/report.py @@ -0,0 +1,64 @@ +import sys, argparse, logging, pickle +from gettext import GNUTranslations, NullTranslations +from decimal import Decimal + +import yaml +from jinja2 import Environment, PackageLoader +from bokeh.resources import CDN as bokehres + +from .layout import LEFT, RIGHT, Direction, FingerType + +def approx (i): + """ Get approximate human-readable string for large number """ + + units = ['', 'thousand', 'million', 'billion'] + base = Decimal (1000) + i = Decimal (i) + while i >= base and len (units) > 1: + i /= base + units.pop (0) + return round (i, 1), units[0] + +def numspace (s): + """ Replace ordinary spaces with unicode FIGURE SPACE """ + return s.replace (' ', '\u2007') + +def render (): + parser = argparse.ArgumentParser(description='Create lulua report.') + parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files') + parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files') + logging.basicConfig (level=logging.INFO) + args = parser.parse_args() + + env = Environment ( + loader=PackageLoader (__package__, 'data/report'), + ) + env.filters['approx'] = approx + env.filters['numspace'] = numspace + + corpus = [] + for x in args.corpus: + with open (x) as fd: + corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd))) + layoutstats = {} + for x in args.layoutstats: + with open (x, 'rb') as fd: + d = pickle.load (fd) + layoutstats[d['layout']] = d + + corpustotal = {} + for k in ('words', 'characters'): + corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus)) + + tpl = env.get_template('index.html') + + tpl.stream ( + corpus=corpus, + corpustotal=corpustotal, + layoutstats=layoutstats, + bokehres=bokehres, + # XXX: not sure how to expose these properly to the template + fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))}, + Direction=Direction, + ).dump (sys.stdout) + |