summaryrefslogtreecommitdiff
path: root/lulua
diff options
context:
space:
mode:
Diffstat (limited to 'lulua')
-rw-r--r--lulua/data/report/index.html474
-rw-r--r--lulua/data/report/lulua-logo.svg17
-rw-r--r--lulua/data/report/style.css202
-rw-r--r--lulua/report.py64
-rw-r--r--lulua/stats.py70
5 files changed, 768 insertions, 59 deletions
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html
new file mode 100644
index 0000000..5649fab
--- /dev/null
+++ b/lulua/data/report/index.html
@@ -0,0 +1,474 @@
+<!doctype html>
+<html lang="ar">
+<head>
+ <meta charset="utf-8">
+ <title>لؤلؤة</title>
+ <!--<meta name="description" content="">-->
+ <meta name="viewport" content="width=device-width, initial-scale=1">
+
+ <link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet">
+ <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/pure-min.css" integrity="sha384-oAOxQR6DkCoMliIh8yFnu25d7Eq/PHS21PClpwjOTeU2jRSq11vu66rf90/cZr47" crossorigin="anonymous">
+ <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css">
+ {# bokeh #}
+ {% for f in bokehres.js_files -%}
+ <script src="{{ f }}"></script>
+ {%- endfor %}
+ {% for f in bokehres.css_files -%}
+ <link rel="stylesheet" href="{{ f }}">
+ {%- endfor %}
+ <link rel="stylesheet" href="style.css">
+</head>
+<body>
+
+<div class="title-card pure-g">
+ <div class="pure-u-1 pure-u-lg-1-3">
+ <div class="lbox">
+ <h1 class="title"><img class="logo" src="lulua-logo.svg" alt="لؤلؤة"></h1>
+ <div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-sm-1-2">
+ <!--<h2 class="subtitle">لوحة مفاتيح العربية المريحة</h1>-->
+ </div>
+ <div class="pure-u-1 pure-u-sm-1-2">
+ <h2 class="subtitle" lang="en">Ergonomic Arabic Keyboard Layout</h2>
+ </div>
+ </div>
+ </div>
+ </div>
+ <div class="pure-u-1 pure-u-lg-2-3">
+ <div class="lbox">
+ <div class="layout">
+ <img src="ar-lulua.svg" alt="لؤلؤة">
+ </div>
+ </div>
+ </div>
+</div>
+
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>This is work in progress and contributions are welcome. Head over to
+ <a href="https://github.com/PromyLOPh/lulua">GitHub</a> to see where
+ you can help.</p>
+ </div>
+ </div>
+</div>
+
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Goals</h2>
+ <ul>
+ <li>Ergonomic typing of unvocalized and vocalized text with 10 fingers</li>
+ <li>Modern Standard Arabic and Quranic Arabic</li>
+ <li>Localized numbers (European/Arabic-Indic)</li>
+ <li>Usable as primary or secondary keyboard</li>
+ <li>Compose-based</li>
+ <li>Support for Markdown, RST, Wikitext and similar markup</li>
+ </ul>
+
+ <p>Other languages using the Arabic alphabet (regional dialects, Urdu,
+ Persian) are explicitly not supported.</p>
+ </div>
+ </div>
+</div>
+
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Usage</h2>
+ <dl>
+ <dt>Windows</dt>
+ <dd>Download <a href="ar-lulua-w64.zip">driver</a> and follow instructions in <kbd>INSTALL.txt</kbd></dd>
+ <dt>Android</dt>
+ <dd>Install
+ <a href="https://play.google.com/store/apps/details?id=com.menny.android.anysoftkeyboard">AnySoftKeyboard</a>
+ and
+ <a href="https://play.google.com/store/apps/details?id=com.anysoftkeyboard.languagepack.arabic">Arabic for AnySoftKeyboard</a>
+ <dt>Linux</dt>
+ <dd>Run: <code>xmodmap <a href="ar-lulua.xmodmap">ar-lulua.xmodmap</a></code></dd>
+ </dl>
+ </div>
+ </div>
+</div>
+
+<div class="indepth-card">
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h1>Learn more</h1>
+ </div>
+ </div>
+</div>
+</div>
+
+<section>
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ <div class="lbox">
+ <h2>الأبجدية العربية</h2>
+ </div>
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>The Arabic Alphabet</h2>
+ <p>
+ There are 28 letters in the Arabic alphabet, plus quite a few extra
+ symbols required for proper text input, like the hamza in its different
+ shapes <bdo dir="ltr" lang="ar">أ إ آ ء ئ ؤ</bdo>, ta marbutah <bdo
+ dir="ltr" lang="ar">ة</bdo>, alif maqsurah <bdo dir="ltr"
+ lang="ar">ى</bdo> and various diacritics for vowelized texts.
+ <!-- -->
+ Since the usability of a keyboard layout depends on the text entered
+ it is necessary to study letter and letter combination frequencies first.
+ <!-- -->
+ The corpus used for the following analysis consists of
+ </p>
+
+ <table class="pure-table"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody>
+ {% for c in corpus|sort(attribute='source.name') %}
+ <tr>
+ <td><a href="{{ c.source.url }}">{{ c.source.name }}</a></td>
+ {% set count = c.get ('count') %}
+ {% if count %}
+ {# use new style formatting, for some reason %7,d does not work #}
+ <td>{{ '{:7,d}'.format(count[0])|numspace }}&#x202f;{{ count[1] }}</td>
+ {% else %}
+ <td></td>
+ {% endif %}
+
+ {% set stats = c.get ('stats') %}
+ {% for k in ('words', 'characters') %}
+ {% set i = stats[k]|approx %}
+ <td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+ {% endfor %}
+ </tr>
+ {% endfor %}
+ <tr><td>Total</td><td></td>
+ {% for k in ('words', 'characters') %}
+ {% set i = corpustotal[k]|approx %}
+ <td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+ {% endfor %}
+ </tr>
+ </tbody></table>
+
+ <p>
+ The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be
+ considered the most frequently used letters in the Arabic language.
+ <!-- -->
+ Together they account for more than 55% of all letters in the corpus.
+ </p>
+ </div>
+ </div>
+</div>
+
+<figure id="letterfreq">
+<div class="lbox" lang="en">
+ <div id="letterfreq-div"></div>
+</div>
+<figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>Arabic letter frequency distribution</p>
+ </div>
+ </div>
+</figcaption>
+</figure>
+</section>
+
+<section>
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Layout properties</h2>
+
+ <p>The following evaluation uses color coding to identify fingers:</p>
+ <dl class="colorcodes">
+ <dt class="finger little">red</dt>
+ <dd>little finger</dd>
+ <dt class="finger ring">blue</dt>
+ <dd>ring finger</dd>
+ <dt class="finger middle">magenta</dt>
+ <dd>middle finger
+ <dt class="finger index">violet</dt>
+ <dd>index finger</dd>
+ <dt class="finger thumb">cyan</dt>
+ <dd>thumb</dd>
+ </dl>
+ <p>Asymmetry is defined as the difference between left and right hand usage.</p>
+ </div>
+ </div>
+</div>
+
+{% macro fingerhandstats(stats) %}
+{% set hands = stats.hands %}
+{% set fingers = stats.fingers %}
+<div class="fingerhandstats" dir="ltr" lang="en">
+{% for hand in Direction %}
+ {% set handpct = hands[hand]/stats.buttonPresses*100 %}
+ <div class="{{ hand.name.lower() }}" style="width: {{ '%.3f'|format(handpct) }}%;">
+ <div class="hand">{{ '%.2f'|format(handpct) }}%</div>
+ <div class="fingers">
+ {% for finger in fingerOrder[hand] %}
+ {% set fingerpct = fingers[(hand, finger)]/stats.buttonPresses*100 %}
+ {# finger width is relative to parent (i.e. hand) #}
+ {% set fingerwidth = fingers[(hand, finger)]/hands[hand]*100 %}
+ <div class="{{ finger.name.lower() }}" style="width: {{ '%.3f'|format(fingerwidth) }}%;">{{ '%.2f'|format(fingerpct) }}</div>
+ {% endfor %}
+ </div>
+ </div>
+ {% if loop.first %}
+ <div class="asymm"><small>Asymmetry: {{ '%.3f'|format(stats.asymmetry) }}</small></div>
+ {% endif %}
+{% endfor %}
+</div>
+{% endmacro %}
+
+<figure id="ar-lulua-heat">
+<div class="lbox" lang="en">
+ <img src="ar-lulua-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-lulua']) }}
+</div>
+</figure>
+</section>
+
+<section class="layoutgallery">
+ <div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Related work</h2>
+ </div>
+ </div>
+ </div>
+
+ <figure id="ar-asmo663">
+ <div class="lbox">
+ <img src="ar-asmo663-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-asmo663']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ Trying to unify existing layouts, the Arab Standardization and
+ Meterology Organization (ASMO), now part of
+ <a href="https://www.aidmo.org/">AIDMO</a>, published an Arabic
+ keyboard layout in 1987 as
+ <a href="https://www.aidmo.org/smcacc/ar/index.php?option=com_sobi2&Itemid=2&limitstart=2150">standard 663</a>.
+ <!-- -->
+ This, however, turned out to be a failure, due to lack of adoption by
+ the typewriter industry.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure id="ar-linux">
+ <div class="lbox">
+ <img src="ar-linux-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-linux']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ Instead we’re currently using this layout (on Linux), which is
+ similar, but not quite the same.
+ <!-- -->
+ Most notably this layout arranges letters by their visual similarity.
+ <!-- -->
+ Thus it allocates suboptimal or even awkward positions to frequently
+ used letters like <bdo dir="ltr" lang="ar">ا ل</bdo> and
+ <bdo dir="ltr" lang="ar">ذ</bdo>.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure id="ar-malas">
+ <div class="lbox">
+ <img src="ar-malas-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-malas']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ The work by Malas et al. (2008),
+ <a href="https://pdfs.semanticscholar.org/1bf8/74dcaa7f21c2cc3c6c5e526b61a9ee352bba.pdf">Toward Optimal Arabic Keyboard Layout Using Genetic Algorithm</a>,
+ presents an alternative layout generated by a genetic algorithm.
+ <!-- -->
+ They used a snapshot of the Arabic Wikipedia probably from around 2008 and
+ optimized for typing speed only, claiming 35% faster typing compared
+ to the <a href="#ar-linux">currently used layouts</a>.
+ <!-- -->
+ However the decision to put <bdo dir="ltr" lang="ar">ي</bdo> in the top
+ row seems odd.
+ <!-- -->
+ Assigning the same left index finger to <bdo dir="ltr" lang="ar">ا
+ ي و</bdo>, which are three of the most frequent letters, heavily
+ strains this particular finger.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure id="ar-osman">
+ <div class="lbox">
+ <img src="ar-osman-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-osman']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ In 2015 patent
+ <a href="https://patents.google.com/patent/US9041657B2/en">9,041,657 B2</a>
+ was filed in the US, presenting yet another computer-generated layout.
+ <!-- -->
+ Its genetic algorithm was seeded with just 54 Arabic e-books consisting
+ of 7 million characters in total.
+ <!-- -->
+ Overall it claims to be 9% faster than default layouts.
+ <!-- -->
+ This layout rips off most of the standard layout’s second layer,
+ but amusingly fails to include a question mark, while it does
+ provide <em>three</em> single-quote marks ’ and <em>two</em> Arabic
+ semicolon <bdo dir="ltr" lang="ar">؛</bdo>.
+ <!-- -->
+ Additionally it places <bdo dir="ltr" lang="ar">ي</bdo> in an even
+ worse position than Malas’ layout.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure>
+ <div class="lbox">
+ <img src="ar-khorshid-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-khorshid']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ In the paper
+ <a href="https://www.researchgate.net/publication/264837659_A_new_optimal_Arabic_keyboard_layout_using_genetic_algorithm">A new optimal Arabic keyboard layout using genetic algorithm</a>
+ Khorshid et al. present yet another
+ layout.
+ <!-- -->
+ They claim a 36% improvement over the standard keyboard based on
+ their criteria for ergonomic layouts.
+ <!-- -->
+ However in their layout from figure 8 the letters <bdo dir="ltr"
+ lang="ar">ل ب ر</bdo> are in suboptimal positions.
+ <!-- -->
+ Also it seems their algorithm favors the bottom row instead of the
+ easier to use top row.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <figure>
+ <div class="lbox">
+ <img src="ar-phonetic-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-phonetic']) }}
+ </div>
+ <figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ The <a href="http://arabic.omaralzabir.com/home">Arabic Phonetic Keyboard</a>
+ simply maps the QWERTY layout to Arabic letters, based on their sound.
+ Thus Q becomes <bdo dir="ltr" lang="ar">ق</bdo>, Y becomes <bdo dir="ltr" lang="ar">ي</bdo> and so on.
+ It claims to be optimized for writing vowelized texts, especially
+ Quranic Arabic, and thus includes quite a few combining characters and
+ special symbols.
+ Although it claims to make frequently used letters easily available –
+ based on the work of Intellaren – it makes no effort to arrange letters
+ according to their usage frequency.
+ </p>
+ </div>
+ </div>
+ </figcaption>
+ </figure>
+
+ <div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ While technically speaking not a layout but alternative input
+ method, <a href="http://www.intellaren.com/intellark">Intellark</a> by
+ Intellaren is worth mentioning.
+ <!-- -->
+ It is based on repeatedly pressing the same button to modifiy the
+ current character.
+ <!-- -->
+ For example pressing A on the QWERTY keyboard cycles through the
+ alternatives <bdo dir="ltr" lang="ar">ا أ إ آ</bdo> and <bdo dir="ltr" lang="ar">ء</bdo>.
+ <!-- -->
+ Obviously this is slow, error-prone and violates Dvorak’s guidelines
+ for keyboard layout designs.
+ </p>
+ </div>
+ </div>
+ </div>
+</section>
+
+<section>
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Acknowledgements</h2>
+
+ <p>This work would not have been possible without Martin Krzywinski’s
+ work on <a
+ href="http://mkweb.bcgsc.ca/carpalx/?typing_effort">carpalx</a>.</p>
+ </div>
+ </div>
+</div>
+</section>
+
+<script>
+fetch('letterfreq.json')
+ .then(function(response) { return response.json(); })
+ .then(function(item) { Bokeh.embed.embed_item(item, 'letterfreq-div'); })
+</script>
+
+</body>
+</html>
diff --git a/lulua/data/report/lulua-logo.svg b/lulua/data/report/lulua-logo.svg
new file mode 100644
index 0000000..20136c0
--- /dev/null
+++ b/lulua/data/report/lulua-logo.svg
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" width="25.36014mm" height="13.828889mm" viewBox="0 0 25.36014 13.828889" version="1.1">
+ <g style="fill:#EAE0C8">
+ <g transform="matrix(0.26458333,0,0,0.26458333,-10.208781,-21.74248)">
+ <path d="m 125.36706,120.94964 0.53333,-0.53334 h 0.21333 q 3.73334,0 5.49334,-1.65333 1.76,-1.70667 1.76,-5.38667 V 82.176302 h 1.06666 V 113.3763 q 0,4.26667 -2.02666,6.29334 -1.97334,1.97333 -6.29334,1.97333 h -0.74666 z"/>
+ <path d="m 104.07539,133.2163 h 4.05334 q 6.34666,0 9.70666,-2.88 3.41334,-2.88 4.16,-8.69333 h -8.21333 q -4.21333,0 -6.45333,-2.24 -2.18667,-2.24 -2.18667,-6.29333 0,-2.4 0.64,-4.53334 0.69333,-2.13333 1.86667,-3.68 1.22666,-1.6 2.88,-2.50666 1.65333,-0.96 3.62666,-0.96 4.32,0 6.72,4.10666 2.4,4.05334 2.4,11.36 0,1.92 -0.16,3.52 h 2.82667 v 0.69334 l -0.53333,0.53333 h -2.45334 q -0.90666,6.61333 -4.53333,9.70667 -3.57333,3.09333 -10.29333,3.09333 h -4.05334 z m 2.13334,-20.10666 q 0,3.52 1.81333,5.44 1.86667,1.86666 5.38667,1.86666 h 8.69333 q 0.0533,-0.58666 0.0533,-1.17333 0.0533,-0.64 0.0533,-1.28 v -2.13333 q 0,-2.88 -0.58667,-5.28 -0.53333,-2.45334 -1.6,-4.21334 -1.01333,-1.76 -2.56,-2.72 -1.49333,-0.96 -3.41333,-0.96 -1.65334,0 -3.09334,0.8 -1.44,0.8 -2.50666,2.24 -1.01334,1.38667 -1.65334,3.30667 -0.58666,1.86667 -0.58666,4.10667 z m 3.09333,-16.90667 h 2.66667 l 0.0533,-0.16 q -0.85333,-0.746667 -1.33333,-1.706667 -0.42667,-0.96 -0.42667,-1.973334 0,-1.866666 1.22667,-3.04 1.22666,-1.226667 3.09333,-1.226667 2.24,0 3.68,1.866667 l -0.69333,0.746667 q -1.01334,-1.546667 -2.98667,-1.546667 -1.44,0 -2.4,0.906667 -0.96,0.906667 -0.96,2.293333 0,1.6 1.28,2.666667 1.28,1.066667 3.25333,1.066667 h 3.2 v 1.066667 h -9.65333 z"/>
+ <path d="m 88.075391,120.94964 0.533334,-0.53334 h 0.213333 q 3.733333,0 5.493334,-1.65333 1.76,-1.70667 1.76,-5.38667 V 82.176302 h 1.066666 V 113.3763 q 0,4.26667 -2.026666,6.29334 -1.973334,1.97333 -6.293334,1.97333 h -0.746667 z"/>
+ <path d="m 66.783725,133.2163 h 4.053334 q 6.346667,0 9.706667,-2.88 3.413334,-2.88 4.16,-8.69333 h -8.213333 q -4.213334,0 -6.453334,-2.24 -2.186667,-2.24 -2.186667,-6.29333 0,-2.4 0.64,-4.53334 0.693334,-2.13333 1.866667,-3.68 1.226667,-1.6 2.88,-2.50666 1.653334,-0.96 3.626667,-0.96 4.32,0 6.72,4.10666 2.4,4.05334 2.4,11.36 0,1.92 -0.16,3.52 h 2.826667 v 0.69334 l -0.533333,0.53333 h -2.453334 q -0.906666,6.61333 -4.533333,9.70667 -3.573334,3.09333 -10.293334,3.09333 h -4.053334 z m 2.133334,-20.10666 q 0,3.52 1.813333,5.44 1.866667,1.86666 5.386667,1.86666 h 8.693334 q 0.05333,-0.58666 0.05333,-1.17333 0.05333,-0.64 0.05333,-1.28 v -2.13333 q 0,-2.88 -0.586667,-5.28 -0.533333,-2.45334 -1.6,-4.21334 -1.013333,-1.76 -2.56,-2.72 -1.493334,-0.96 -3.413334,-0.96 -1.653333,0 -3.093333,0.8 -1.44,0.8 -2.506667,2.24 -1.013333,1.38667 -1.653333,3.30667 -0.586667,1.86667 -0.586667,4.10667 z m 3.093333,-16.90667 h 2.666667 l 0.05333,-0.16 q -0.853334,-0.746667 -1.333334,-1.706667 -0.426667,-0.96 -0.426667,-1.973334 0,-1.866666 1.226667,-3.04 1.226667,-1.226667 3.093334,-1.226667 2.24,0 3.68,1.866667 l -0.693334,0.746667 q -1.013333,-1.546667 -2.986666,-1.546667 -1.44,0 -2.4,0.906667 -0.960001,0.906667 -0.960001,2.293333 0,1.6 1.280001,2.666667 1.28,1.066667 3.253333,1.066667 h 3.2 v 1.066667 h -9.653334 z"/>
+ </g>
+ <g>
+ <g transform="translate(-16.828094,-21.74248)">
+ <path d="m 17.96875,26.736328 c -0.629138,0 -1.140625,0.511487 -1.140625,1.140625 v 3.339844 c 0,0.629138 0.511487,1.140625 1.140625,1.140625 h 3.339844 c 0.629138,0 1.138672,-0.511487 1.138672,-1.140625 v -3.339844 c 0,-0.629138 -0.509534,-1.140625 -1.138672,-1.140625 z m 0,0.28125 h 3.339844 c 0.478861,0 0.859375,0.380514 0.859375,0.859375 v 3.339844 c 0,0.478861 -0.380514,0.859375 -0.859375,0.859375 H 17.96875 c -0.478862,0 -0.861328,-0.380514 -0.861328,-0.859375 v -3.339844 c 0,-0.478861 0.382466,-0.859375 0.861328,-0.859375 z"/>
+ </g>
+ <rect y="9.2311716" x="1.6189767" height="0.32543749" width="2.3812499"/>
+ </g>
+ </g>
+</svg>
diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css
new file mode 100644
index 0000000..26b2e96
--- /dev/null
+++ b/lulua/data/report/style.css
@@ -0,0 +1,202 @@
+/*
+colorscheme derived from #EAE0C8, see https://en.wikipedia.org/wiki/Pearl_(color)
+using http://colormind.io/bootstrap/
+*/
+:root {
+ --light-shades: #EAE0C8;
+ --light-accent: #689CA9;
+ --main-brand: #A48A4E;
+ --dark-accent: #79796D;
+ --dark-shades: #1D251E;
+
+ --finger-little: #dc322f; /* red */
+ --finger-ring: #268bd2; /* blue */
+ --finger-middle: #d33682; /* magenta */
+ --finger-index: #6c71c4; /* violet */
+ --finger-thumb: #2aa198; /* cyan */
+}
+
+@font-face {
+ font-family: 'IBM Plex Arabic';
+ font-style: normal;
+ font-weight: 100;
+ src: local('IBM Plex Arabic Thin'), local('IBMPlexArabic-Thin'), url('fonts/IBMPlexArabic-Thin.woff2') format('woff2');
+}
+
+@font-face {
+ font-family: 'IBM Plex Arabic';
+ font-style: normal;
+ font-weight: 400;
+ src: local('IBM Plex Arabic Regular'), local('IBMPlexArabic-Regular'), url('fonts/IBMPlexArabic-Regular.woff2') format('woff2');
+}
+
+body {
+ font-size: 14pt;
+ background-color: var(--light-shades);
+ color: var(--dark-shades);
+}
+/* is there a better way to select _both_ fonts at the same time? */
+:lang(ar) {
+ direction: rtl;
+ font-family: "IBM Plex Arabic";
+}
+/* inside ltr text */
+:lang(ar)[dir=ltr] {
+ direction: ltr;
+}
+:lang(en) {
+ direction: ltr;
+ font-family: "IBM Plex Sans";
+}
+h1, h2, h3 {
+ font-weight: 100;
+}
+h1 {
+ font-size: 4em;
+}
+h2 {
+ font-size: 2.5em;
+}
+figure {
+ max-width: 70em;
+ margin: 1.3em auto;
+}
+img {
+ width: 100%;
+}
+code {
+ font-family: "IBM Plex Mono", monospace !important; /* override :lang(*) font setting */
+}
+a:link, a:visited {
+ color: var(--light-accent);
+ text-decoration: none;
+}
+a:focus, a:hover {
+ background-color: var(--light-accent);
+ color: var(--light-shades);
+ border-radius: 0.1em;
+}
+div.title-card {
+ background-color: var(--dark-accent);
+ color: var(--light-shades);
+}
+div.title-card .lbox {
+ margin: 2vw;
+}
+div.title-card h1 {
+ margin: 0;
+ padding: 0.2em;
+}
+div.title-card img.logo {
+ max-height: 35vh;
+ display: block;
+ margin: 0 auto;
+}
+div.title-card .subtitle {
+ padding: 0 0.5em;
+ font-size: 1.5em;
+}
+div.title-card .layout img {
+ display: block;
+ margin: 0 auto;
+}
+div.indepth-card {
+ padding: 10vh 0;
+ margin: 1em 0;
+ background-color: var(--dark-accent);
+ color: var(--light-shades);
+}
+.flexreverse {
+ flex-direction: row-reverse;
+}
+.lbox {
+ margin: 0 5vw;
+}
+/* for hand/finger stats */
+div.fingerhandstats {
+ text-align: center;
+ display: flex;
+}
+div.fingerhandstats div.fingers {
+ display: flex;
+}
+div.fingerhandstats div.fingers div {
+ margin: 0.1em;
+ overflow: hidden;
+}
+div.fingerhandstats .left {
+ margin-right: 0.5em;
+}
+div.fingerhandstats .right {
+ margin-left: 0.5em;
+}
+/* keep in sync with render-svg.css */
+div.fingerhandstats .fingers .little {
+ border: 0.1em solid var(--finger-little);
+}
+div.fingerhandstats .fingers .ring {
+ border: 0.1em solid var(--finger-ring);
+}
+div.fingerhandstats .fingers .middle {
+ border: 0.1em solid var(--finger-middle);
+}
+div.fingerhandstats .fingers .index {
+ border: 0.1em solid var(--finger-index);
+}
+div.fingerhandstats .fingers .thumb {
+ border: 0.1em solid var(--finger-thumb);
+}
+
+table {
+ font-variant-numeric: tabular-nums;
+}
+.pure-table td.numint {
+ text-align: right;
+ padding-right: 0;
+}
+
+.pure-table td.numfrac {
+ border-left: none;
+ text-align: left;
+ padding-left: 0;
+}
+
+dl.colorcodes dt, dl.colorcodes dd {
+ display: inline;
+ padding: 0;
+ margin: 0;
+}
+
+dl.colorcodes dt:after {
+ content: ":";
+}
+
+dl.colorcodes .finger:before {
+ width: 0.7em;
+ height: 0.7em;
+ display: inline-block;
+ content: " ";
+ margin-right: 0.3em;
+ vertical-align: middle;
+}
+
+dl.colorcodes .finger.little::before {
+ background-color: var(--finger-little);
+}
+
+dl.colorcodes .finger.ring::before {
+ background-color: var(--finger-ring);
+}
+
+dl.colorcodes .finger.middle::before {
+ background-color: var(--finger-middle);
+}
+
+dl.colorcodes .finger.index::before {
+ background-color: var(--finger-index);
+}
+
+dl.colorcodes .finger.thumb::before {
+ background-color: var(--finger-thumb);
+}
+
diff --git a/lulua/report.py b/lulua/report.py
new file mode 100644
index 0000000..200bb9b
--- /dev/null
+++ b/lulua/report.py
@@ -0,0 +1,64 @@
+import sys, argparse, logging, pickle
+from gettext import GNUTranslations, NullTranslations
+from decimal import Decimal
+
+import yaml
+from jinja2 import Environment, PackageLoader
+from bokeh.resources import CDN as bokehres
+
+from .layout import LEFT, RIGHT, Direction, FingerType
+
+def approx (i):
+ """ Get approximate human-readable string for large number """
+
+ units = ['', 'thousand', 'million', 'billion']
+ base = Decimal (1000)
+ i = Decimal (i)
+ while i >= base and len (units) > 1:
+ i /= base
+ units.pop (0)
+ return round (i, 1), units[0]
+
+def numspace (s):
+ """ Replace ordinary spaces with unicode FIGURE SPACE """
+ return s.replace (' ', '\u2007')
+
+def render ():
+ parser = argparse.ArgumentParser(description='Create lulua report.')
+ parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files')
+ parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files')
+ logging.basicConfig (level=logging.INFO)
+ args = parser.parse_args()
+
+ env = Environment (
+ loader=PackageLoader (__package__, 'data/report'),
+ )
+ env.filters['approx'] = approx
+ env.filters['numspace'] = numspace
+
+ corpus = []
+ for x in args.corpus:
+ with open (x) as fd:
+ corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd)))
+ layoutstats = {}
+ for x in args.layoutstats:
+ with open (x, 'rb') as fd:
+ d = pickle.load (fd)
+ layoutstats[d['layout']] = d
+
+ corpustotal = {}
+ for k in ('words', 'characters'):
+ corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus))
+
+ tpl = env.get_template('index.html')
+
+ tpl.stream (
+ corpus=corpus,
+ corpustotal=corpustotal,
+ layoutstats=layoutstats,
+ bokehres=bokehres,
+ # XXX: not sure how to expose these properly to the template
+ fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))},
+ Direction=Direction,
+ ).dump (sys.stdout)
+
diff --git a/lulua/stats.py b/lulua/stats.py
index 80c269b..13d878b 100644
--- a/lulua/stats.py
+++ b/lulua/stats.py
@@ -22,7 +22,6 @@ import sys, operator, pickle, argparse, logging, yaml, math, time
from operator import itemgetter
from itertools import chain, groupby, product
from collections import defaultdict
-from decimal import Decimal
from .layout import *
from .keyboard import defaultKeyboards
@@ -313,7 +312,7 @@ def keyHeatmap (args):
buttons[k.name] = v
yaml.dump (data, sys.stdout)
-def fingerHand (args):
+def layoutstats (args):
stats = pickle.load (sys.stdin.buffer)
keyboard = defaultKeyboards[args.keyboard]
@@ -328,19 +327,14 @@ def fingerHand (args):
hands[hand] += count
fingers[(hand, finger)] += count
- print ('<div class="fingerhandstats" dir="ltr" lang="en">')
- fingerOrder = {LEFT: list (FingerType), RIGHT: reversed (FingerType)}
- for hand in Direction:
- handpct = hands[hand]/buttonPresses*100
- print (f'<div class="{hand.name.lower()}" style="width: {handpct:.3f}%;">\n\t<div class="hand">{handpct:.2f}%</div>')
- print ('\t<div class="fingers">')
- for finger in fingerOrder[hand]:
- fingerpct = fingers[(hand, finger)]/buttonPresses*100
- # finger width is relative to parent (i.e. hand)
- fingerwidth = fingers[(hand, finger)]/hands[hand]*100
- print (f'\t\t<div class="{finger.name.lower()}" style="width: {fingerwidth:.3f}%;">{fingerpct:.2f}</div>')
- print ('\t</div>\n\t</div>')
- print ('</div>')
+ asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses
+ pickle.dump (dict (
+ layout=args.layout,
+ hands=dict (hands),
+ fingers=dict (fingers),
+ buttonPresses=buttonPresses,
+ asymmetry=asymmetry,
+ ), sys.stdout.buffer)
def latinImeDict (args):
"""
@@ -379,46 +373,6 @@ def corpusStats (args):
# make document concatable
print ('---')
-def approx (i):
- """ Get approximate human-readable string for large number """
-
- units = ['', 'thousand', 'million', 'billion']
- base = Decimal (1000)
- i = Decimal (i)
- while i >= base and len (units) > 1:
- i /= base
- units.pop (0)
- i = round (i, 1)
- return int (i), int (i%1*10), units[0]
-
-def corpusHtml (args):
- meta = list (filter (lambda x: x is not None, yaml.safe_load_all (sys.stdin)))
- total = {'words': 0, 'characters': 0}
- print ('<table class="pure-table"><thead><tr><th>Source</th><th colspan="2"></th><th colspan="2">Words</th><th colspan="2">Characters</th></thead><tbody>')
- for c in sorted (meta, key=lambda x: x['source']['name'].lower ()):
- print ('<tr>')
- print (f'<td><a href="{c["source"]["url"]}">{c["source"]["name"]}</a></td>')
- count = c.get ('count')
- if count:
- print (f'<td class="numint">{count[0]//1000:d},</td><td class="numfrac">{count[0]%1000:03d}\u202f{count[1]}</td>')
- else:
- print ('<td class="numint"></td><td class="numfrac"></td>')
-
- stats = c.get ('stats')
- for k in ('words', 'characters'):
- i = approx (stats[k])
- print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>')
- print ('</tr>')
-
- for k in ('words', 'characters'):
- total[k] += c['stats'][k]
- print ('<tr><td>Total</td><td class="numint"></td><td class="numfrac"></td>')
- for k in ('words', 'characters'):
- i = approx (total[k])
- print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>')
- print ('</tr>')
- print ('</tbody></table>')
-
def main ():
parser = argparse.ArgumentParser(description='Process statistics files.')
parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name')
@@ -439,15 +393,13 @@ def main ():
sp.set_defaults (func=triadfreq)
sp = subparsers.add_parser('keyheatmap')
sp.set_defaults (func=keyHeatmap)
- sp = subparsers.add_parser('fingerhand')
- sp.set_defaults (func=fingerHand)
+ sp = subparsers.add_parser('layoutstats')
+ sp.set_defaults (func=layoutstats)
sp = subparsers.add_parser('latinime')
sp.set_defaults (func=latinImeDict)
sp = subparsers.add_parser('corpusstats')
sp.add_argument('metadata', type=argparse.FileType ('r'))
sp.set_defaults (func=corpusStats)
- sp = subparsers.add_parser('corpushtml')
- sp.set_defaults (func=corpusHtml)
logging.basicConfig (level=logging.INFO)
args = parser.parse_args()