From 0f8643954fd9507aec85bab46046e71a497bfffe Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 22 Feb 2020 13:20:31 +0100 Subject: doc: Switch to jinja2-based rendering Pre-rendering HTML was not the best idea. Instead pre-process the data, cache it into data files and do the HTML rendering only as the final step. Also adds asymmetry to analysis and uses tabular numbers and spaces instead of ugly table hacks to align numbers. --- lulua/report.py | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 lulua/report.py (limited to 'lulua/report.py') diff --git a/lulua/report.py b/lulua/report.py new file mode 100644 index 0000000..200bb9b --- /dev/null +++ b/lulua/report.py @@ -0,0 +1,64 @@ +import sys, argparse, logging, pickle +from gettext import GNUTranslations, NullTranslations +from decimal import Decimal + +import yaml +from jinja2 import Environment, PackageLoader +from bokeh.resources import CDN as bokehres + +from .layout import LEFT, RIGHT, Direction, FingerType + +def approx (i): + """ Get approximate human-readable string for large number """ + + units = ['', 'thousand', 'million', 'billion'] + base = Decimal (1000) + i = Decimal (i) + while i >= base and len (units) > 1: + i /= base + units.pop (0) + return round (i, 1), units[0] + +def numspace (s): + """ Replace ordinary spaces with unicode FIGURE SPACE """ + return s.replace (' ', '\u2007') + +def render (): + parser = argparse.ArgumentParser(description='Create lulua report.') + parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files') + parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files') + logging.basicConfig (level=logging.INFO) + args = parser.parse_args() + + env = Environment ( + loader=PackageLoader (__package__, 'data/report'), + ) + env.filters['approx'] = approx + env.filters['numspace'] = numspace + + corpus = [] + for x in args.corpus: + with open (x) as fd: + corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd))) + layoutstats = {} + for x in args.layoutstats: + with open (x, 'rb') as fd: + d = pickle.load (fd) + layoutstats[d['layout']] = d + + corpustotal = {} + for k in ('words', 'characters'): + corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus)) + + tpl = env.get_template('index.html') + + tpl.stream ( + corpus=corpus, + corpustotal=corpustotal, + layoutstats=layoutstats, + bokehres=bokehres, + # XXX: not sure how to expose these properly to the template + fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))}, + Direction=Direction, + ).dump (sys.stdout) + -- cgit v1.2.3