summaryrefslogtreecommitdiff
path: root/lulua/report.py
blob: b6c6f7ead2de10cad889d0c5ca7e09d6ab7694bb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import sys, argparse, logging, pickle
from gettext import GNUTranslations, NullTranslations
from decimal import Decimal

import yaml
from jinja2 import Environment, PackageLoader
from bokeh.resources import CDN as bokehres

from .layout import LEFT, RIGHT, Direction, FingerType

def approx (i):
    """ Get approximate human-readable string for large number """

    units = ['', 'thousand', 'million', 'billion']
    base = Decimal (1000)
    i = Decimal (i)
    while round (i, 1) >= base and len (units) > 1:
        i /= base
        units.pop (0)
    return round (i, 1), units[0]

def numspace (s):
    """ Replace ordinary spaces with unicode FIGURE SPACE """
    return s.replace (' ', '\u2007')

def render ():
    parser = argparse.ArgumentParser(description='Create lulua report.')
    parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files')
    parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files')
    logging.basicConfig (level=logging.INFO)
    args = parser.parse_args()

    env = Environment (
            loader=PackageLoader (__package__, 'data/report'),
            )
    env.filters['approx'] = approx
    env.filters['numspace'] = numspace

    corpus = []
    for x in args.corpus:
        with open (x) as fd:
            corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd)))
    layoutstats = {}
    for x in args.layoutstats:
        with open (x, 'rb') as fd:
            d = pickle.load (fd)
            layoutstats[d['layout']] = d

    corpustotal = {}
    for k in ('words', 'characters'):
        corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus))

    tpl = env.get_template('index.html')

    tpl.stream (
            corpus=corpus,
            corpustotal=corpustotal,
            layoutstats=layoutstats,
            bokehres=bokehres,
            # XXX: not sure how to expose these properly to the template
            fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))},
            Direction=Direction,
            ).dump (sys.stdout)