summaryrefslogtreecommitdiff
path: root/lulua
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2020-02-22 13:20:31 +0100
committerLars-Dominik Braun <lars@6xq.net>2020-02-22 13:23:26 +0100
commit0f8643954fd9507aec85bab46046e71a497bfffe (patch)
tree28708a991bd136fd255282326e93f7588120a2e3 /lulua
parenta91fc5e945b841ae54f67ed331409ad857178f13 (diff)
downloadlulua-0f8643954fd9507aec85bab46046e71a497bfffe.tar.gz
lulua-0f8643954fd9507aec85bab46046e71a497bfffe.tar.bz2
lulua-0f8643954fd9507aec85bab46046e71a497bfffe.zip
doc: Switch to jinja2-based rendering
Pre-rendering HTML was not the best idea. Instead pre-process the data, cache it into data files and do the HTML rendering only as the final step. Also adds asymmetry to analysis and uses tabular numbers and spaces instead of ugly table hacks to align numbers.
Diffstat (limited to 'lulua')
-rw-r--r--lulua/data/report/index.html474
-rw-r--r--lulua/data/report/lulua-logo.svg17
-rw-r--r--lulua/data/report/style.css202
-rw-r--r--lulua/report.py64
-rw-r--r--lulua/stats.py70
5 files changed, 768 insertions, 59 deletions
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html
new file mode 100644
index 0000000..5649fab
--- /dev/null
+++ b/lulua/data/report/index.html
@@ -0,0 +1,474 @@
+<!doctype html>
+<html lang="ar">
+<head>
+ <meta charset="utf-8">
+ <title>لؤلؤة</title>
+ <!--<meta name="description" content="">-->
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+
+ <link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet">
+ <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/pure-min.css" integrity="sha384-oAOxQR6DkCoMliIh8yFnu25d7Eq/PHS21PClpwjOTeU2jRSq11vu66rf90/cZr47" crossorigin="anonymous">
+ <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css">
+ {# bokeh #}
+ {% for f in bokehres.js_files -%}
+ <script src="{{ f }}"></script>
+ {%- endfor %}
+ {% for f in bokehres.css_files -%}
+ <link rel="stylesheet" href="{{ f }}">
+ {%- endfor %}
+ <link rel="stylesheet" href="style.css">
+</head>
+<body>
+
+<div class="title-card pure-g">
+ <div class="pure-u-1 pure-u-lg-1-3">
+ <div class="lbox">
+ <h1 class="title"><img class="logo" src="lulua-logo.svg" alt="لؤلؤة"></h1>
+ <div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-sm-1-2">
+ <!--<h2 class="subtitle">لوحة مفاتيح العربية المريحة</h1>-->
+ </div>
+ <div class="pure-u-1 pure-u-sm-1-2">
+ <h2 class="subtitle" lang="en">Ergonomic Arabic Keyboard Layout</h2>
+ </div>
+ </div>
+ </div>
+ </div>
+ <div class="pure-u-1 pure-u-lg-2-3">
+ <div class="lbox">
+ <div class="layout">
+ <img src="ar-lulua.svg" alt="لؤلؤة">
+ </div>
+ </div>
+ </div>
+</div>
+
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>This is work in progress and contributions are welcome. Head over to
+ <a href="https://github.com/PromyLOPh/lulua">GitHub</a> to see where
+ you can help.</p>
+ </div>
+ </div>
+</div>
+
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Goals</h2>
+ <ul>
+ <li>Ergonomic typing of unvocalized and vocalized text with 10 fingers</li>
+ <li>Modern Standard Arabic and Quranic Arabic</li>
+ <li>Localized numbers (European/Arabic-Indic)</li>
+ <li>Usable as primary or secondary keyboard</li>
+ <li>Compose-based</li>
+ <li>Support for Markdown, RST, Wikitext and similar markup</li>
+ </ul>
+
+ <p>Other languages using the Arabic alphabet (regional dialects, Urdu,
+ Persian) are explicitly not supported.</p>
+ </div>
+ </div>
+</div>
+
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Usage</h2>
+ <dl>
+ <dt>Windows</dt>
+ <dd>Download <a href="ar-lulua-w64.zip">driver</a> and follow instructions in <kbd>INSTALL.txt</kbd></dd>
+ <dt>Android</dt>
+ <dd>Install
+ <a href="https://play.google.com/store/apps/details?id=com.menny.android.anysoftkeyboard">AnySoftKeyboard</a>
+ and
+ <a href="https://play.google.com/store/apps/details?id=com.anysoftkeyboard.languagepack.arabic">Arabic for AnySoftKeyboard</a>
+ <dt>Linux</dt>
+ <dd>Run: <code>xmodmap <a href="ar-lulua.xmodmap">ar-lulua.xmodmap</a></code></dd>
+ </dl>
+ </div>
+ </div>
+</div>
+
+<div class="indepth-card">
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h1>Learn more</h1>
+ </div>
+ </div>
+</div>
+</div>
+
+<section>
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ <div class="lbox">
+ <h2>الأبجدية العربية</h2>
+ </div>
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>The Arabic Alphabet</h2>
+ <p>
+ There are 28 letters in the Arabic alphabet, plus quite a few extra
+ symbols required for proper text input, like the hamza in its different
+ shapes <bdo dir="ltr" lang="ar">أ إ آ ء ئ ؤ</bdo>, ta marbutah <bdo
+ dir="ltr" lang="ar">ة</bdo>, alif maqsurah <bdo dir="ltr"
+ lang="ar">ى</bdo> and various diacritics for vowelized texts.
+ <!-- -->
+ Since the usability of a keyboard layout depends on the text entered
+ it is necessary to study letter and letter combination frequencies first.
+ <!-- -->
+ The corpus used for the following analysis consists of
+ </p>
+
+ <table class="pure-table"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody>
+ {% for c in corpus|sort(attribute='source.name') %}
+ <tr>
+ <td><a href="{{ c.source.url }}">{{ c.source.name }}</a></td>
+ {% set count = c.get ('count') %}
+ {% if count %}
+ {# use new style formatting, for some reason %7,d does not work #}
+ <td>{{ '{:7,d}'.format(count[0])|numspace }}&#x202f;{{ count[1] }}</td>
+ {% else %}
+ <td></td>
+ {% endif %}
+
+ {% set stats = c.get ('stats') %}
+ {% for k in ('words', 'characters') %}
+ {% set i = stats[k]|approx %}
+ <td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+ {% endfor %}
+ </tr>
+ {% endfor %}
+ <tr><td>Total</td><td></td>
+ {% for k in ('words', 'characters') %}
+ {% set i = corpustotal[k]|approx %}
+ <td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+ {% endfor %}
+ </tr>
+ </tbody></table>
+
+ <p>
+ The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be
+ considered the most frequently used letters in the Arabic language.
+ <!-- -->
+ Together they account for more than 55% of all letters in the corpus.
+ </p>
+ </div>
+ </div>
+</div>
+
+<figure id="letterfreq">
+<div class="lbox" lang="en">
+ <div id="letterfreq-div"></div>
+</div>
+<figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>Arabic letter frequency distribution</p>
+ </div>
+ </div>
+</figcaption>
+</figure>
+</section>
+
+<section>
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Layout properties</h2>
+
+ <p>The following evaluation uses color coding to identify fingers:</p>
+ <dl class="colorcodes">
+ <dt class="finger little">red</dt>
+ <dd>little finger</dd>
+ <dt class="finger ring">blue</dt>
+ <dd>ring finger</dd>
+ <dt class="finger middle">magenta</dt>
+ <dd>middle finger
+ <dt class="finger index">violet</dt>
+ <dd>index finger</dd>
+ <dt class="finger thumb">cyan</dt>
+ <dd>thumb</dd>
+ </dl>
+ <p>Asymmetry is defined as the difference between left and right hand usage.</p>
+ </div>
+ </div>
+</div>
+
+{% macro fingerhandstats(stats) %}
+{% set hands = stats.hands %}
+{% set fingers = stats.fingers %}
+<div class="fingerhandstats" dir="ltr" lang="en">
+{% for hand in Direction %}
+ {% set handpct = hands[hand]/stats.buttonPresses*100 %}
+ <div class="{{ hand.name.lower() }}" style="width: {{ '%.3f'|format(handpct) }}%;">
+ <div class="hand">{{ '%.2f'|format(handpct) }}%</div>
+ <div class="fingers">
+ {% for finger in fingerOrder[hand] %}
+ {% set fingerpct = fingers[(hand, finger)]/stats.buttonPresses*100 %}
+ {# finger width is relative to parent (i.e. hand) #}
+ {% set fingerwidth = fingers[(hand, finger)]/hands[hand]*100 %}
+ <div class="{{ finger.name.lower() }}" style="width: {{ '%.3f'|format(fingerwidth) }}%;">{{ '%.2f'|format(fingerpct) }}</div>
+ {% endfor %}
+ </div>
+ </div>
+ {% if loop.first %}
+ <div class="asymm"><small>Asymmetry: {{ '%.3f'|format(stats.asymmetry) }}</small></div>
+ {% endif %}
+{% endfor %}
+</div>
+{% endmacro %}
+
+<figure id="ar-lulua-heat">
+<div class="lbox" lang="en">
+ <img src="ar-lulua-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-lulua']) }}
+</div>
+</figure>
+</section>
+
+<section class="layoutgallery">
+ <div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Related work</h2>
+ </div>
+ </div>
+ </div>
+
+ <figure id="ar-asmo663">
+ <div class="lbox">
+ <img src="ar-asmo663-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-asmo663']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ Trying to unify existing layouts, the Arab Standardization and
+ Meterology Organization (ASMO), now part of
+ <a href="https://www.aidmo.org/">AIDMO</a>, published an Arabic
+ keyboard layout in 1987 as
+ <a href="https://www.aidmo.org/smcacc/ar/index.php?option=com_sobi2&Itemid=2&limitstart=2150">standard 663</a>.
+ <!-- -->
+ This, however, turned out to be a failure, due to lack of adoption by
+ the typewriter industry.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure id="ar-linux">
+ <div class="lbox">
+ <img src="ar-linux-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-linux']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ Instead we’re currently using this layout (on Linux), which is
+ similar, but not quite the same.
+ <!-- -->
+ Most notably this layout arranges letters by their visual similarity.
+ <!-- -->
+ Thus it allocates suboptimal or even awkward positions to frequently
+ used letters like <bdo dir="ltr" lang="ar">ا ل</bdo> and
+ <bdo dir="ltr" lang="ar">ذ</bdo>.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure id="ar-malas">
+ <div class="lbox">
+ <img src="ar-malas-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-malas']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ The work by Malas et al. (2008),
+ <a href="https://pdfs.semanticscholar.org/1bf8/74dcaa7f21c2cc3c6c5e526b61a9ee352bba.pdf">Toward Optimal Arabic Keyboard Layout Using Genetic Algorithm</a>,
+ presents an alternative layout generated by a genetic algorithm.
+ <!-- -->
+ They used a snapshot of the Arabic Wikipedia probably from around 2008 and
+ optimized for typing speed only, claiming 35% faster typing compared
+ to the <a href="#ar-linux">currently used layouts</a>.
+ <!-- -->
+ However the decision to put <bdo dir="ltr" lang="ar">ي</bdo> in the top
+ row seems odd.
+ <!-- -->
+ Assigning the same left index finger to <bdo dir="ltr" lang="ar">ا
+ ي و</bdo>, which are three of the most frequent letters, heavily
+ strains this particular finger.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure id="ar-osman">
+ <div class="lbox">
+ <img src="ar-osman-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-osman']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ In 2015 patent
+ <a href="https://patents.google.com/patent/US9041657B2/en">9,041,657 B2</a>
+ was filed in the US, presenting yet another computer-generated layout.
+ <!-- -->
+ Its genetic algorithm was seeded with just 54 Arabic e-books consisting
+ of 7 million characters in total.
+ <!-- -->
+ Overall it claims to be 9% faster than default layouts.
+ <!-- -->
+ This layout rips off most of the standard layout’s second layer,
+ but amusingly fails to include a question mark, while it does
+ provide <em>three</em> single-quote marks ’ and <em>two</em> Arabic
+ semicolon <bdo dir="ltr" lang="ar">؛</bdo>.
+ <!-- -->
+ Additionally it places <bdo dir="ltr" lang="ar">ي</bdo> in an even
+ worse position than Malas’ layout.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure>
+ <div class="lbox">
+ <img src="ar-khorshid-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-khorshid']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ In the paper
+ <a href="https://www.researchgate.net/publication/264837659_A_new_optimal_Arabic_keyboard_layout_using_genetic_algorithm">A new optimal Arabic keyboard layout using genetic algorithm</a>
+ Khorshid et al. present yet another
+ layout.
+ <!-- -->
+ They claim a 36% improvement over the standard keyboard based on
+ their criteria for ergonomic layouts.
+ <!-- -->
+ However in their layout from figure 8 the letters <bdo dir="ltr"
+ lang="ar">ل ب ر</bdo> are in suboptimal positions.
+ <!-- -->
+ Also it seems their algorithm favors the bottom row instead of the
+ easier to use top row.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure>
+ <div class="lbox">
+ <img src="ar-phonetic-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-phonetic']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ The <a href="http://arabic.omaralzabir.com/home">Arabic Phonetic Keyboard</a>
+ simply maps the QWERTY layout to Arabic letters, based on their sound.
+ Thus Q becomes <bdo dir="ltr" lang="ar">ق</bdo>, Y becomes <bdo dir="ltr" lang="ar">ي</bdo> and so on.
+ It claims to be optimized for writing vowelized texts, especially
+ Quranic Arabic, and thus includes quite a few combining characters and
+ special symbols.
+ Although it claims to make frequently used letters easily available –
+ based on the work of Intellaren – it makes no effort to arrange letters
+ according to their usage frequency.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ While technically speaking not a layout but alternative input
+ method, <a href="http://www.intellaren.com/intellark">Intellark</a> by
+ Intellaren is worth mentioning.
+ <!-- -->
+ It is based on repeatedly pressing the same button to modifiy the
+ current character.
+ <!-- -->
+ For example pressing A on the QWERTY keyboard cycles through the
+ alternatives <bdo dir="ltr" lang="ar">ا أ إ آ</bdo> and <bdo dir="ltr" lang="ar">ء</bdo>.
+ <!-- -->
+ Obviously this is slow, error-prone and violates Dvorak’s guidelines
+ for keyboard layout designs.
+ </p>
+ </div>
+ </div>
+ </div>
+</section>
+
+<section>
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Acknowledgements</h2>
+
+ <p>This work would not have been possible without Martin Krzywinski’s
+ work on <a
+ href="http://mkweb.bcgsc.ca/carpalx/?typing_effort">carpalx</a>.</p>
+ </div>
+ </div>
+</div>
+</section>
+
+<script>
+fetch('letterfreq.json')
+ .then(function(response) { return response.json(); })
+ .then(function(item) { Bokeh.embed.embed_item(item, 'letterfreq-div'); })
+</script>
+
+</body>
+</html>
diff --git a/lulua/data/report/lulua-logo.svg b/lulua/data/report/lulua-logo.svg
new file mode 100644
index 0000000..20136c0
--- /dev/null
+++ b/lulua/data/report/lulua-logo.svg
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" width="25.36014mm" height="13.828889mm" viewBox="0 0 25.36014 13.828889" version="1.1">
+ <g style="fill:#EAE0C8">
+ <g transform="matrix(0.26458333,0,0,0.26458333,-10.208781,-21.74248)">
+ <path d="m 125.36706,120.94964 0.53333,-0.53334 h 0.21333 q 3.73334,0 5.49334,-1.65333 1.76,-1.70667 1.76,-5.38667 V 82.176302 h 1.06666 V 113.3763 q 0,4.26667 -2.02666,6.29334 -1.97334,1.97333 -6.29334,1.97333 h -0.74666 z"/>
+ <path d="m 104.07539,133.2163 h 4.05334 q 6.34666,0 9.70666,-2.88 3.41334,-2.88 4.16,-8.69333 h -8.21333 q -4.21333,0 -6.45333,-2.24 -2.18667,-2.24 -2.18667,-6.29333 0,-2.4 0.64,-4.53334 0.69333,-2.13333 1.86667,-3.68 1.22666,-1.6 2.88,-2.50666 1.65333,-0.96 3.62666,-0.96 4.32,0 6.72,4.10666 2.4,4.05334 2.4,11.36 0,1.92 -0.16,3.52 h 2.82667 v 0.69334 l -0.53333,0.53333 h -2.45334 q -0.90666,6.61333 -4.53333,9.70667 -3.57333,3.09333 -10.29333,3.09333 h -4.05334 z m 2.13334,-20.10666 q 0,3.52 1.81333,5.44 1.86667,1.86666 5.38667,1.86666 h 8.69333 q 0.0533,-0.58666 0.0533,-1.17333 0.0533,-0.64 0.0533,-1.28 v -2.13333 q 0,-2.88 -0.58667,-5.28 -0.53333,-2.45334 -1.6,-4.21334 -1.01333,-1.76 -2.56,-2.72 -1.49333,-0.96 -3.41333,-0.96 -1.65334,0 -3.09334,0.8 -1.44,0.8 -2.50666,2.24 -1.01334,1.38667 -1.65334,3.30667 -0.58666,1.86667 -0.58666,4.10667 z m 3.09333,-16.90667 h 2.66667 l 0.0533,-0.16 q -0.85333,-0.746667 -1.33333,-1.706667 -0.42667,-0.96 -0.42667,-1.973334 0,-1.866666 1.22667,-3.04 1.22666,-1.226667 3.09333,-1.226667 2.24,0 3.68,1.866667 l -0.69333,0.746667 q -1.01334,-1.546667 -2.98667,-1.546667 -1.44,0 -2.4,0.906667 -0.96,0.906667 -0.96,2.293333 0,1.6 1.28,2.666667 1.28,1.066667 3.25333,1.066667 h 3.2 v 1.066667 h -9.65333 z"/>
+ <path d="m 88.075391,120.94964 0.533334,-0.53334 h 0.213333 q 3.733333,0 5.493334,-1.65333 1.76,-1.70667 1.76,-5.38667 V 82.176302 h 1.066666 V 113.3763 q 0,4.26667 -2.026666,6.29334 -1.973334,1.97333 -6.293334,1.97333 h -0.746667 z"/>
+ <path d="m 66.783725,133.2163 h 4.053334 q 6.346667,0 9.706667,-2.88 3.413334,-2.88 4.16,-8.69333 h -8.213333 q -4.213334,0 -6.453334,-2.24 -2.186667,-2.24 -2.186667,-6.29333 0,-2.4 0.64,-4.53334 0.693334,-2.13333 1.866667,-3.68 1.226667,-1.6 2.88,-2.50666 1.653334,-0.96 3.626667,-0.96 4.32,0 6.72,4.10666 2.4,4.05334 2.4,11.36 0,1.92 -0.16,3.52 h 2.826667 v 0.69334 l -0.533333,0.53333 h -2.453334 q -0.906666,6.61333 -4.533333,9.70667 -3.573334,3.09333 -10.293334,3.09333 h -4.053334 z m 2.133334,-20.10666 q 0,3.52 1.813333,5.44 1.866667,1.86666 5.386667,1.86666 h 8.693334 q 0.05333,-0.58666 0.05333,-1.17333 0.05333,-0.64 0.05333,-1.28 v -2.13333 q 0,-2.88 -0.586667,-5.28 -0.533333,-2.45334 -1.6,-4.21334 -1.013333,-1.76 -2.56,-2.72 -1.493334,-0.96 -3.413334,-0.96 -1.653333,0 -3.093333,0.8 -1.44,0.8 -2.506667,2.24 -1.013333,1.38667 -1.653333,3.30667 -0.586667,1.86667 -0.586667,4.10667 z m 3.093333,-16.90667 h 2.666667 l 0.05333,-0.16 q -0.853334,-0.746667 -1.333334,-1.706667 -0.426667,-0.96 -0.426667,-1.973334 0,-1.866666 1.226667,-3.04 1.226667,-1.226667 3.093334,-1.226667 2.24,0 3.68,1.866667 l -0.693334,0.746667 q -1.013333,-1.546667 -2.986666,-1.546667 -1.44,0 -2.4,0.906667 -0.960001,0.906667 -0.960001,2.293333 0,1.6 1.280001,2.666667 1.28,1.066667 3.253333,1.066667 h 3.2 v 1.066667 h -9.653334 z"/>
+ </g>
+ <g>
+ <g transform="translate(-16.828094,-21.74248)">
+ <path d="m 17.96875,26.736328 c -0.629138,0 -1.140625,0.511487 -1.140625,1.140625 v 3.339844 c 0,0.629138 0.511487,1.140625 1.140625,1.140625 h 3.339844 c 0.629138,0 1.138672,-0.511487 1.138672,-1.140625 v -3.339844 c 0,-0.629138 -0.509534,-1.140625 -1.138672,-1.140625 z m 0,0.28125 h 3.339844 c 0.478861,0 0.859375,0.380514 0.859375,0.859375 v 3.339844 c 0,0.478861 -0.380514,0.859375 -0.859375,0.859375 H 17.96875 c -0.478862,0 -0.861328,-0.380514 -0.861328,-0.859375 v -3.339844 c 0,-0.478861 0.382466,-0.859375 0.861328,-0.859375 z"/>
+ </g>
+ <rect y="9.2311716" x="1.6189767" height="0.32543749" width="2.3812499"/>
+ </g>
+ </g>
+</svg>
diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css
new file mode 100644
index 0000000..26b2e96
--- /dev/null
+++ b/lulua/data/report/style.css
@@ -0,0 +1,202 @@
+/*
+colorscheme derived from #EAE0C8, see https://en.wikipedia.org/wiki/Pearl_(color)
+using http://colormind.io/bootstrap/
+*/
+:root {
+ --light-shades: #EAE0C8;
+ --light-accent: #689CA9;
+ --main-brand: #A48A4E;
+ --dark-accent: #79796D;
+ --dark-shades: #1D251E;
+
+ --finger-little: #dc322f; /* red */
+ --finger-ring: #268bd2; /* blue */
+ --finger-middle: #d33682; /* magenta */
+ --finger-index: #6c71c4; /* violet */
+ --finger-thumb: #2aa198; /* cyan */
+}
+
+@font-face {
+ font-family: 'IBM Plex Arabic';
+ font-style: normal;
+ font-weight: 100;
+ src: local('IBM Plex Arabic Thin'), local('IBMPlexArabic-Thin'), url('fonts/IBMPlexArabic-Thin.woff2') format('woff2');
+}
+
+@font-face {
+ font-family: 'IBM Plex Arabic';
+ font-style: normal;
+ font-weight: 400;
+ src: local('IBM Plex Arabic Regular'), local('IBMPlexArabic-Regular'), url('fonts/IBMPlexArabic-Regular.woff2') format('woff2');
+}
+
+body {
+ font-size: 14pt;
+ background-color: var(--light-shades);
+ color: var(--dark-shades);
+}
+/* is there a better way to select _both_ fonts at the same time? */
+:lang(ar) {
+ direction: rtl;
+ font-family: "IBM Plex Arabic";
+}
+/* inside ltr text */
+:lang(ar)[dir=ltr] {
+ direction: ltr;
+}
+:lang(en) {
+ direction: ltr;
+ font-family: "IBM Plex Sans";
+}
+h1, h2, h3 {
+ font-weight: 100;
+}
+h1 {
+ font-size: 4em;
+}
+h2 {
+ font-size: 2.5em;
+}
+figure {
+ max-width: 70em;
+ margin: 1.3em auto;
+}
+img {
+ width: 100%;
+}
+code {
+ font-family: "IBM Plex Mono", monospace !important; /* override :lang(*) font setting */
+}
+a:link, a:visited {
+ color: var(--light-accent);
+ text-decoration: none;
+}
+a:focus, a:hover {
+ background-color: var(--light-accent);
+ color: var(--light-shades);
+ border-radius: 0.1em;
+}
+div.title-card {
+ background-color: var(--dark-accent);
+ color: var(--light-shades);
+}
+div.title-card .lbox {
+ margin: 2vw;
+}
+div.title-card h1 {
+ margin: 0;
+ padding: 0.2em;
+}
+div.title-card img.logo {
+ max-height: 35vh;
+ display: block;
+ margin: 0 auto;
+}
+div.title-card .subtitle {
+ padding: 0 0.5em;
+ font-size: 1.5em;
+}
+div.title-card .layout img {
+ display: block;
+ margin: 0 auto;
+}
+div.indepth-card {
+ padding: 10vh 0;
+ margin: 1em 0;
+ background-color: var(--dark-accent);
+ color: var(--light-shades);
+}
+.flexreverse {
+ flex-direction: row-reverse;
+}
+.lbox {
+ margin: 0 5vw;
+}
+/* for hand/finger stats */
+div.fingerhandstats {
+ text-align: center;
+ display: flex;
+}
+div.fingerhandstats div.fingers {
+ display: flex;
+}
+div.fingerhandstats div.fingers div {
+ margin: 0.1em;
+ overflow: hidden;
+}
+div.fingerhandstats .left {
+ margin-right: 0.5em;
+}
+div.fingerhandstats .right {
+ margin-left: 0.5em;
+}
+/* keep in sync with render-svg.css */
+div.fingerhandstats .fingers .little {
+ border: 0.1em solid var(--finger-little);
+}
+div.fingerhandstats .fingers .ring {
+ border: 0.1em solid var(--finger-ring);
+}
+div.fingerhandstats .fingers .middle {
+ border: 0.1em solid var(--finger-middle);
+}
+div.fingerhandstats .fingers .index {
+ border: 0.1em solid var(--finger-index);
+}
+div.fingerhandstats .fingers .thumb {
+ border: 0.1em solid var(--finger-thumb);
+}
+
+table {
+ font-variant-numeric: tabular-nums;
+}
+.pure-table td.numint {
+ text-align: right;
+ padding-right: 0;
+}
+
+.pure-table td.numfrac {
+ border-left: none;
+ text-align: left;
+ padding-left: 0;
+}
+
+dl.colorcodes dt, dl.colorcodes dd {
+ display: inline;
+ padding: 0;
+ margin: 0;
+}
+
+dl.colorcodes dt:after {
+ content: ":";
+}
+
+dl.colorcodes .finger:before {
+ width: 0.7em;
+ height: 0.7em;
+ display: inline-block;
+ content: " ";
+ margin-right: 0.3em;
+ vertical-align: middle;
+}
+
+dl.colorcodes .finger.little::before {
+ background-color: var(--finger-little);
+}
+
+dl.colorcodes .finger.ring::before {
+ background-color: var(--finger-ring);
+}
+
+dl.colorcodes .finger.middle::before {
+ background-color: var(--finger-middle);
+}
+
+dl.colorcodes .finger.index::before {
+ background-color: var(--finger-index);
+}
+
+dl.colorcodes .finger.thumb::before {
+ background-color: var(--finger-thumb);
+}
+
diff --git a/lulua/report.py b/lulua/report.py
new file mode 100644
index 0000000..200bb9b
--- /dev/null
+++ b/lulua/report.py
@@ -0,0 +1,64 @@
+import sys, argparse, logging, pickle
+from gettext import GNUTranslations, NullTranslations
+from decimal import Decimal
+
+import yaml
+from jinja2 import Environment, PackageLoader
+from bokeh.resources import CDN as bokehres
+
+from .layout import LEFT, RIGHT, Direction, FingerType
+
+def approx (i):
+ """ Get approximate human-readable string for large number """
+
+ units = ['', 'thousand', 'million', 'billion']
+ base = Decimal (1000)
+ i = Decimal (i)
+ while i >= base and len (units) > 1:
+ i /= base
+ units.pop (0)
+ return round (i, 1), units[0]
+
+def numspace (s):
+ """ Replace ordinary spaces with unicode FIGURE SPACE """
+ return s.replace (' ', '\u2007')
+
+def render ():
+ parser = argparse.ArgumentParser(description='Create lulua report.')
+ parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files')
+ parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files')
+ logging.basicConfig (level=logging.INFO)
+ args = parser.parse_args()
+
+ env = Environment (
+ loader=PackageLoader (__package__, 'data/report'),
+ )
+ env.filters['approx'] = approx
+ env.filters['numspace'] = numspace
+
+ corpus = []
+ for x in args.corpus:
+ with open (x) as fd:
+ corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd)))
+ layoutstats = {}
+ for x in args.layoutstats:
+ with open (x, 'rb') as fd:
+ d = pickle.load (fd)
+ layoutstats[d['layout']] = d
+
+ corpustotal = {}
+ for k in ('words', 'characters'):
+ corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus))
+
+ tpl = env.get_template('index.html')
+
+ tpl.stream (
+ corpus=corpus,
+ corpustotal=corpustotal,
+ layoutstats=layoutstats,
+ bokehres=bokehres,
+ # XXX: not sure how to expose these properly to the template
+ fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))},
+ Direction=Direction,
+ ).dump (sys.stdout)
+
diff --git a/lulua/stats.py b/lulua/stats.py
index 80c269b..13d878b 100644
--- a/lulua/stats.py
+++ b/lulua/stats.py
@@ -22,7 +22,6 @@ import sys, operator, pickle, argparse, logging, yaml, math, time
from operator import itemgetter
from itertools import chain, groupby, product
from collections import defaultdict
-from decimal import Decimal
from .layout import *
from .keyboard import defaultKeyboards
@@ -313,7 +312,7 @@ def keyHeatmap (args):
buttons[k.name] = v
yaml.dump (data, sys.stdout)
-def fingerHand (args):
+def layoutstats (args):
stats = pickle.load (sys.stdin.buffer)
keyboard = defaultKeyboards[args.keyboard]
@@ -328,19 +327,14 @@ def fingerHand (args):
hands[hand] += count
fingers[(hand, finger)] += count
- print ('<div class="fingerhandstats" dir="ltr" lang="en">')
- fingerOrder = {LEFT: list (FingerType), RIGHT: reversed (FingerType)}
- for hand in Direction:
- handpct = hands[hand]/buttonPresses*100
- print (f'<div class="{hand.name.lower()}" style="width: {handpct:.3f}%;">\n\t<div class="hand">{handpct:.2f}%</div>')
- print ('\t<div class="fingers">')
- for finger in fingerOrder[hand]:
- fingerpct = fingers[(hand, finger)]/buttonPresses*100
- # finger width is relative to parent (i.e. hand)
- fingerwidth = fingers[(hand, finger)]/hands[hand]*100
- print (f'\t\t<div class="{finger.name.lower()}" style="width: {fingerwidth:.3f}%;">{fingerpct:.2f}</div>')
- print ('\t</div>\n\t</div>')
- print ('</div>')
+ asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses
+ pickle.dump (dict (
+ layout=args.layout,
+ hands=dict (hands),
+ fingers=dict (fingers),
+ buttonPresses=buttonPresses,
+ asymmetry=asymmetry,
+ ), sys.stdout.buffer)
def latinImeDict (args):
"""
@@ -379,46 +373,6 @@ def corpusStats (args):
# make document concatable
print ('---')
-def approx (i):
- """ Get approximate human-readable string for large number """
-
- units = ['', 'thousand', 'million', 'billion']
- base = Decimal (1000)
- i = Decimal (i)
- while i >= base and len (units) > 1:
- i /= base
- units.pop (0)
- i = round (i, 1)
- return int (i), int (i%1*10), units[0]
-
-def corpusHtml (args):
- meta = list (filter (lambda x: x is not None, yaml.safe_load_all (sys.stdin)))
- total = {'words': 0, 'characters': 0}
- print ('<table class="pure-table"><thead><tr><th>Source</th><th colspan="2"></th><th colspan="2">Words</th><th colspan="2">Characters</th></thead><tbody>')
- for c in sorted (meta, key=lambda x: x['source']['name'].lower ()):
- print ('<tr>')
- print (f'<td><a href="{c["source"]["url"]}">{c["source"]["name"]}</a></td>')
- count = c.get ('count')
- if count:
- print (f'<td class="numint">{count[0]//1000:d},</td><td class="numfrac">{count[0]%1000:03d}\u202f{count[1]}</td>')
- else:
- print ('<td class="numint"></td><td class="numfrac"></td>')
-
- stats = c.get ('stats')
- for k in ('words', 'characters'):
- i = approx (stats[k])
- print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>')
- print ('</tr>')
-
- for k in ('words', 'characters'):
- total[k] += c['stats'][k]
- print ('<tr><td>Total</td><td class="numint"></td><td class="numfrac"></td>')
- for k in ('words', 'characters'):
- i = approx (total[k])
- print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>')
- print ('</tr>')
- print ('</tbody></table>')
-
def main ():
parser = argparse.ArgumentParser(description='Process statistics files.')
parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name')
@@ -439,15 +393,13 @@ def main ():
sp.set_defaults (func=triadfreq)
sp = subparsers.add_parser('keyheatmap')
sp.set_defaults (func=keyHeatmap)
- sp = subparsers.add_parser('fingerhand')
- sp.set_defaults (func=fingerHand)
+ sp = subparsers.add_parser('layoutstats')
+ sp.set_defaults (func=layoutstats)
sp = subparsers.add_parser('latinime')
sp.set_defaults (func=latinImeDict)
sp = subparsers.add_parser('corpusstats')
sp.add_argument('metadata', type=argparse.FileType ('r'))
sp.set_defaults (func=corpusStats)
- sp = subparsers.add_parser('corpushtml')
- sp.set_defaults (func=corpusHtml)
logging.basicConfig (level=logging.INFO)
args = parser.parse_args()