From 0f8643954fd9507aec85bab46046e71a497bfffe Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 22 Feb 2020 13:20:31 +0100 Subject: doc: Switch to jinja2-based rendering Pre-rendering HTML was not the best idea. Instead pre-process the data, cache it into data files and do the HTML rendering only as the final step. Also adds asymmetry to analysis and uses tabular numbers and spaces instead of ugly table hacks to align numbers. --- lulua/data/report/index.html | 474 +++++++++++++++++++++++++++++++++++++++ lulua/data/report/lulua-logo.svg | 17 ++ lulua/data/report/style.css | 202 +++++++++++++++++ lulua/report.py | 64 ++++++ lulua/stats.py | 70 +----- 5 files changed, 768 insertions(+), 59 deletions(-) create mode 100644 lulua/data/report/index.html create mode 100644 lulua/data/report/lulua-logo.svg create mode 100644 lulua/data/report/style.css create mode 100644 lulua/report.py (limited to 'lulua') diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html new file mode 100644 index 0000000..5649fab --- /dev/null +++ b/lulua/data/report/index.html @@ -0,0 +1,474 @@ + + + + + لؤلؤة + + + + + + + {# bokeh #} + {% for f in bokehres.js_files -%} + + {%- endfor %} + {% for f in bokehres.css_files -%} + + {%- endfor %} + + + + +
+
+
+

+
+
+ +
+
+

Ergonomic Arabic Keyboard Layout

+
+
+
+
+
+
+
+ لؤلؤة +
+
+
+
+ +
+
+
+
+
+

This is work in progress and contributions are welcome. Head over to + GitHub to see where + you can help.

+
+
+
+ +
+
+
+
+
+

Goals

+
    +
  • Ergonomic typing of unvocalized and vocalized text with 10 fingers
  • +
  • Modern Standard Arabic and Quranic Arabic
  • +
  • Localized numbers (European/Arabic-Indic)
  • +
  • Usable as primary or secondary keyboard
  • +
  • Compose-based
  • +
  • Support for Markdown, RST, Wikitext and similar markup
  • +
+ +

Other languages using the Arabic alphabet (regional dialects, Urdu, + Persian) are explicitly not supported.

+
+
+
+ +
+
+
+
+
+

Usage

+
+
Windows
+
Download driver and follow instructions in INSTALL.txt
+
Android
+
Install + AnySoftKeyboard + and + Arabic for AnySoftKeyboard +
Linux
+
Run: xmodmap ar-lulua.xmodmap
+
+
+
+
+ +
+
+
+
+
+
+

Learn more

+
+
+
+
+ +
+
+
+
+

الأبجدية العربية

+
+
+
+
+

The Arabic Alphabet

+

+ There are 28 letters in the Arabic alphabet, plus quite a few extra + symbols required for proper text input, like the hamza in its different + shapes أ إ آ ء ئ ؤ, ta marbutah ة, alif maqsurah ى and various diacritics for vowelized texts. + + Since the usability of a keyboard layout depends on the text entered + it is necessary to study letter and letter combination frequencies first. + + The corpus used for the following analysis consists of +

+ + + {% for c in corpus|sort(attribute='source.name') %} + + + {% set count = c.get ('count') %} + {% if count %} + {# use new style formatting, for some reason %7,d does not work #} + + {% else %} + + {% endif %} + + {% set stats = c.get ('stats') %} + {% for k in ('words', 'characters') %} + {% set i = stats[k]|approx %} + + {% endfor %} + + {% endfor %} + + {% for k in ('words', 'characters') %} + {% set i = corpustotal[k]|approx %} + + {% endfor %} + +
SourceWordsCharacters
{{ c.source.name }}{{ '{:7,d}'.format(count[0])|numspace }} {{ count[1] }}{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}
Total{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}
+ +

+ The plot below shows ا ل ي م و ن can be + considered the most frequently used letters in the Arabic language. + + Together they account for more than 55% of all letters in the corpus. +

+
+
+
+ +
+
+
+
+
+
+
+
+
+

Arabic letter frequency distribution

+
+
+
+
+
+ +
+
+
+
+
+
+

Layout properties

+ +

The following evaluation uses color coding to identify fingers:

+
+
red
+
little finger
+
blue
+
ring finger
+
magenta
+
middle finger +
violet
+
index finger
+
cyan
+
thumb
+
+

Asymmetry is defined as the difference between left and right hand usage.

+
+
+
+ +{% macro fingerhandstats(stats) %} +{% set hands = stats.hands %} +{% set fingers = stats.fingers %} +
+{% for hand in Direction %} + {% set handpct = hands[hand]/stats.buttonPresses*100 %} +
+
{{ '%.2f'|format(handpct) }}%
+
+ {% for finger in fingerOrder[hand] %} + {% set fingerpct = fingers[(hand, finger)]/stats.buttonPresses*100 %} + {# finger width is relative to parent (i.e. hand) #} + {% set fingerwidth = fingers[(hand, finger)]/hands[hand]*100 %} +
{{ '%.2f'|format(fingerpct) }}
+ {% endfor %} +
+
+ {% if loop.first %} +
Asymmetry: {{ '%.3f'|format(stats.asymmetry) }}
+ {% endif %} +{% endfor %} +
+{% endmacro %} + +
+
+ + {{ fingerhandstats(layoutstats['ar-lulua']) }} +
+
+
+ +
+
+
+
+
+
+

Related work

+
+
+
+ +
+
+ + {{ fingerhandstats(layoutstats['ar-asmo663']) }} +
+
+
+
+
+
+

+ Trying to unify existing layouts, the Arab Standardization and + Meterology Organization (ASMO), now part of + AIDMO, published an Arabic + keyboard layout in 1987 as + standard 663. + + This, however, turned out to be a failure, due to lack of adoption by + the typewriter industry. +

+
+
+
+
+ +
+
+ + {{ fingerhandstats(layoutstats['ar-linux']) }} +
+
+
+
+
+
+

+ Instead we’re currently using this layout (on Linux), which is + similar, but not quite the same. + + Most notably this layout arranges letters by their visual similarity. + + Thus it allocates suboptimal or even awkward positions to frequently + used letters like ا ل and + ذ. +

+
+
+
+
+ +
+
+ + {{ fingerhandstats(layoutstats['ar-malas']) }} +
+
+
+
+
+
+

+ The work by Malas et al. (2008), + Toward Optimal Arabic Keyboard Layout Using Genetic Algorithm, + presents an alternative layout generated by a genetic algorithm. + + They used a snapshot of the Arabic Wikipedia probably from around 2008 and + optimized for typing speed only, claiming 35% faster typing compared + to the currently used layouts. + + However the decision to put ي in the top + row seems odd. + + Assigning the same left index finger to ا + ي و, which are three of the most frequent letters, heavily + strains this particular finger. +

+
+
+
+
+ +
+
+ + {{ fingerhandstats(layoutstats['ar-osman']) }} +
+
+
+
+
+
+

+ In 2015 patent + 9,041,657 B2 + was filed in the US, presenting yet another computer-generated layout. + + Its genetic algorithm was seeded with just 54 Arabic e-books consisting + of 7 million characters in total. + + Overall it claims to be 9% faster than default layouts. + + This layout rips off most of the standard layout’s second layer, + but amusingly fails to include a question mark, while it does + provide three single-quote marks ’ and two Arabic + semicolon ؛. + + Additionally it places ي in an even + worse position than Malas’ layout. +

+
+
+
+
+ +
+
+ + {{ fingerhandstats(layoutstats['ar-khorshid']) }} +
+
+
+
+
+
+

+ In the paper + A new optimal Arabic keyboard layout using genetic algorithm + Khorshid et al. present yet another + layout. + + They claim a 36% improvement over the standard keyboard based on + their criteria for ergonomic layouts. + + However in their layout from figure 8 the letters ل ب ر are in suboptimal positions. + + Also it seems their algorithm favors the bottom row instead of the + easier to use top row. +

+
+
+
+
+ +
+
+ + {{ fingerhandstats(layoutstats['ar-phonetic']) }} +
+
+
+
+
+
+

+ The Arabic Phonetic Keyboard + simply maps the QWERTY layout to Arabic letters, based on their sound. + Thus Q becomes ق, Y becomes ي and so on. + It claims to be optimized for writing vowelized texts, especially + Quranic Arabic, and thus includes quite a few combining characters and + special symbols. + Although it claims to make frequently used letters easily available – + based on the work of Intellaren – it makes no effort to arrange letters + according to their usage frequency. +

+
+
+
+
+ +
+
+
+
+
+

+ While technically speaking not a layout but alternative input + method, Intellark by + Intellaren is worth mentioning. + + It is based on repeatedly pressing the same button to modifiy the + current character. + + For example pressing A on the QWERTY keyboard cycles through the + alternatives ا أ إ آ and ء. + + Obviously this is slow, error-prone and violates Dvorak’s guidelines + for keyboard layout designs. +

+
+
+
+
+ +
+
+
+
+
+
+

Acknowledgements

+ +

This work would not have been possible without Martin Krzywinski’s + work on carpalx.

+
+
+
+
+ + + + + diff --git a/lulua/data/report/lulua-logo.svg b/lulua/data/report/lulua-logo.svg new file mode 100644 index 0000000..20136c0 --- /dev/null +++ b/lulua/data/report/lulua-logo.svg @@ -0,0 +1,17 @@ + + + + + + + + + + + + + + + + + diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css new file mode 100644 index 0000000..26b2e96 --- /dev/null +++ b/lulua/data/report/style.css @@ -0,0 +1,202 @@ +/* +colorscheme derived from #EAE0C8, see https://en.wikipedia.org/wiki/Pearl_(color) +using http://colormind.io/bootstrap/ +*/ +:root { + --light-shades: #EAE0C8; + --light-accent: #689CA9; + --main-brand: #A48A4E; + --dark-accent: #79796D; + --dark-shades: #1D251E; + + --finger-little: #dc322f; /* red */ + --finger-ring: #268bd2; /* blue */ + --finger-middle: #d33682; /* magenta */ + --finger-index: #6c71c4; /* violet */ + --finger-thumb: #2aa198; /* cyan */ +} + +@font-face { + font-family: 'IBM Plex Arabic'; + font-style: normal; + font-weight: 100; + src: local('IBM Plex Arabic Thin'), local('IBMPlexArabic-Thin'), url('fonts/IBMPlexArabic-Thin.woff2') format('woff2'); +} + +@font-face { + font-family: 'IBM Plex Arabic'; + font-style: normal; + font-weight: 400; + src: local('IBM Plex Arabic Regular'), local('IBMPlexArabic-Regular'), url('fonts/IBMPlexArabic-Regular.woff2') format('woff2'); +} + +body { + font-size: 14pt; + background-color: var(--light-shades); + color: var(--dark-shades); +} +/* is there a better way to select _both_ fonts at the same time? */ +:lang(ar) { + direction: rtl; + font-family: "IBM Plex Arabic"; +} +/* inside ltr text */ +:lang(ar)[dir=ltr] { + direction: ltr; +} +:lang(en) { + direction: ltr; + font-family: "IBM Plex Sans"; +} +h1, h2, h3 { + font-weight: 100; +} +h1 { + font-size: 4em; +} +h2 { + font-size: 2.5em; +} +figure { + max-width: 70em; + margin: 1.3em auto; +} +img { + width: 100%; +} +code { + font-family: "IBM Plex Mono", monospace !important; /* override :lang(*) font setting */ +} +a:link, a:visited { + color: var(--light-accent); + text-decoration: none; +} +a:focus, a:hover { + background-color: var(--light-accent); + color: var(--light-shades); + border-radius: 0.1em; +} +div.title-card { + background-color: var(--dark-accent); + color: var(--light-shades); +} +div.title-card .lbox { + margin: 2vw; +} +div.title-card h1 { + margin: 0; + padding: 0.2em; +} +div.title-card img.logo { + max-height: 35vh; + display: block; + margin: 0 auto; +} +div.title-card .subtitle { + padding: 0 0.5em; + font-size: 1.5em; +} +div.title-card .layout img { + display: block; + margin: 0 auto; +} +div.indepth-card { + padding: 10vh 0; + margin: 1em 0; + background-color: var(--dark-accent); + color: var(--light-shades); +} +.flexreverse { + flex-direction: row-reverse; +} +.lbox { + margin: 0 5vw; +} +/* for hand/finger stats */ +div.fingerhandstats { + text-align: center; + display: flex; +} +div.fingerhandstats div.fingers { + display: flex; +} +div.fingerhandstats div.fingers div { + margin: 0.1em; + overflow: hidden; +} +div.fingerhandstats .left { + margin-right: 0.5em; +} +div.fingerhandstats .right { + margin-left: 0.5em; +} +/* keep in sync with render-svg.css */ +div.fingerhandstats .fingers .little { + border: 0.1em solid var(--finger-little); +} +div.fingerhandstats .fingers .ring { + border: 0.1em solid var(--finger-ring); +} +div.fingerhandstats .fingers .middle { + border: 0.1em solid var(--finger-middle); +} +div.fingerhandstats .fingers .index { + border: 0.1em solid var(--finger-index); +} +div.fingerhandstats .fingers .thumb { + border: 0.1em solid var(--finger-thumb); +} + +table { + font-variant-numeric: tabular-nums; +} +.pure-table td.numint { + text-align: right; + padding-right: 0; +} + +.pure-table td.numfrac { + border-left: none; + text-align: left; + padding-left: 0; +} + +dl.colorcodes dt, dl.colorcodes dd { + display: inline; + padding: 0; + margin: 0; +} + +dl.colorcodes dt:after { + content: ":"; +} + +dl.colorcodes .finger:before { + width: 0.7em; + height: 0.7em; + display: inline-block; + content: " "; + margin-right: 0.3em; + vertical-align: middle; +} + +dl.colorcodes .finger.little::before { + background-color: var(--finger-little); +} + +dl.colorcodes .finger.ring::before { + background-color: var(--finger-ring); +} + +dl.colorcodes .finger.middle::before { + background-color: var(--finger-middle); +} + +dl.colorcodes .finger.index::before { + background-color: var(--finger-index); +} + +dl.colorcodes .finger.thumb::before { + background-color: var(--finger-thumb); +} + diff --git a/lulua/report.py b/lulua/report.py new file mode 100644 index 0000000..200bb9b --- /dev/null +++ b/lulua/report.py @@ -0,0 +1,64 @@ +import sys, argparse, logging, pickle +from gettext import GNUTranslations, NullTranslations +from decimal import Decimal + +import yaml +from jinja2 import Environment, PackageLoader +from bokeh.resources import CDN as bokehres + +from .layout import LEFT, RIGHT, Direction, FingerType + +def approx (i): + """ Get approximate human-readable string for large number """ + + units = ['', 'thousand', 'million', 'billion'] + base = Decimal (1000) + i = Decimal (i) + while i >= base and len (units) > 1: + i /= base + units.pop (0) + return round (i, 1), units[0] + +def numspace (s): + """ Replace ordinary spaces with unicode FIGURE SPACE """ + return s.replace (' ', '\u2007') + +def render (): + parser = argparse.ArgumentParser(description='Create lulua report.') + parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files') + parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files') + logging.basicConfig (level=logging.INFO) + args = parser.parse_args() + + env = Environment ( + loader=PackageLoader (__package__, 'data/report'), + ) + env.filters['approx'] = approx + env.filters['numspace'] = numspace + + corpus = [] + for x in args.corpus: + with open (x) as fd: + corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd))) + layoutstats = {} + for x in args.layoutstats: + with open (x, 'rb') as fd: + d = pickle.load (fd) + layoutstats[d['layout']] = d + + corpustotal = {} + for k in ('words', 'characters'): + corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus)) + + tpl = env.get_template('index.html') + + tpl.stream ( + corpus=corpus, + corpustotal=corpustotal, + layoutstats=layoutstats, + bokehres=bokehres, + # XXX: not sure how to expose these properly to the template + fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))}, + Direction=Direction, + ).dump (sys.stdout) + diff --git a/lulua/stats.py b/lulua/stats.py index 80c269b..13d878b 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -22,7 +22,6 @@ import sys, operator, pickle, argparse, logging, yaml, math, time from operator import itemgetter from itertools import chain, groupby, product from collections import defaultdict -from decimal import Decimal from .layout import * from .keyboard import defaultKeyboards @@ -313,7 +312,7 @@ def keyHeatmap (args): buttons[k.name] = v yaml.dump (data, sys.stdout) -def fingerHand (args): +def layoutstats (args): stats = pickle.load (sys.stdin.buffer) keyboard = defaultKeyboards[args.keyboard] @@ -328,19 +327,14 @@ def fingerHand (args): hands[hand] += count fingers[(hand, finger)] += count - print ('
') - fingerOrder = {LEFT: list (FingerType), RIGHT: reversed (FingerType)} - for hand in Direction: - handpct = hands[hand]/buttonPresses*100 - print (f'
\n\t
{handpct:.2f}%
') - print ('\t
') - for finger in fingerOrder[hand]: - fingerpct = fingers[(hand, finger)]/buttonPresses*100 - # finger width is relative to parent (i.e. hand) - fingerwidth = fingers[(hand, finger)]/hands[hand]*100 - print (f'\t\t
{fingerpct:.2f}
') - print ('\t
\n\t
') - print ('
') + asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses + pickle.dump (dict ( + layout=args.layout, + hands=dict (hands), + fingers=dict (fingers), + buttonPresses=buttonPresses, + asymmetry=asymmetry, + ), sys.stdout.buffer) def latinImeDict (args): """ @@ -379,46 +373,6 @@ def corpusStats (args): # make document concatable print ('---') -def approx (i): - """ Get approximate human-readable string for large number """ - - units = ['', 'thousand', 'million', 'billion'] - base = Decimal (1000) - i = Decimal (i) - while i >= base and len (units) > 1: - i /= base - units.pop (0) - i = round (i, 1) - return int (i), int (i%1*10), units[0] - -def corpusHtml (args): - meta = list (filter (lambda x: x is not None, yaml.safe_load_all (sys.stdin))) - total = {'words': 0, 'characters': 0} - print ('') - for c in sorted (meta, key=lambda x: x['source']['name'].lower ()): - print ('') - print (f'') - count = c.get ('count') - if count: - print (f'') - else: - print ('') - - stats = c.get ('stats') - for k in ('words', 'characters'): - i = approx (stats[k]) - print (f'') - print ('') - - for k in ('words', 'characters'): - total[k] += c['stats'][k] - print ('') - for k in ('words', 'characters'): - i = approx (total[k]) - print (f'') - print ('') - print ('
SourceWordsCharacters
{c["source"]["name"]}{count[0]//1000:d},{count[0]%1000:03d}\u202f{count[1]}{i[0]}.{i[1]}\u202f{i[2]}
Total{i[0]}.{i[1]}\u202f{i[2]}
') - def main (): parser = argparse.ArgumentParser(description='Process statistics files.') parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name') @@ -439,15 +393,13 @@ def main (): sp.set_defaults (func=triadfreq) sp = subparsers.add_parser('keyheatmap') sp.set_defaults (func=keyHeatmap) - sp = subparsers.add_parser('fingerhand') - sp.set_defaults (func=fingerHand) + sp = subparsers.add_parser('layoutstats') + sp.set_defaults (func=layoutstats) sp = subparsers.add_parser('latinime') sp.set_defaults (func=latinImeDict) sp = subparsers.add_parser('corpusstats') sp.add_argument('metadata', type=argparse.FileType ('r')) sp.set_defaults (func=corpusStats) - sp = subparsers.add_parser('corpushtml') - sp.set_defaults (func=corpusHtml) logging.basicConfig (level=logging.INFO) args = parser.parse_args() -- cgit v1.2.3