diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2020-11-01 17:00:45 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2020-11-01 17:00:45 +0100 |
commit | d6c5dbc925e63bebc048356c4a54a1ad6b364014 (patch) | |
tree | 31313bb87d7ec4ab256758404ee9e3d48857858f /lulua | |
parent | 6a2262a7f9bf2ab16ecfdfc52e548ef010c76e00 (diff) | |
download | lulua-d6c5dbc925e63bebc048356c4a54a1ad6b364014.tar.gz lulua-d6c5dbc925e63bebc048356c4a54a1ad6b364014.tar.bz2 lulua-d6c5dbc925e63bebc048356c4a54a1ad6b364014.zip |
Explain design decisions
Fixes #10.
Diffstat (limited to 'lulua')
-rw-r--r-- | lulua/data/report/index.html | 77 | ||||
-rw-r--r-- | lulua/data/report/style.css | 8 | ||||
-rw-r--r-- | lulua/report.py | 7 | ||||
-rw-r--r-- | lulua/stats.py | 25 |
4 files changed, 110 insertions, 7 deletions
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html index a18dcb7..cae6024 100644 --- a/lulua/data/report/index.html +++ b/lulua/data/report/index.html @@ -268,13 +268,18 @@ $$a = \frac{b_{left}}{b_{total}} - \frac{b_{right}}{b_{total}}$$ <p> - The layout proposed uses four layers and assumes a 102/105 key ISO - keyboard common in Europe to accomodate the shift keys necessary. + The layout proposed uses four shift layers in a way inspired by <a + href="https://neo-layout.org/">Neo2</a>. + <!-- --> + Thus it assumes a <a + href="https://en.wikipedia.org/wiki/IBM_PC_keyboard#Keyboard_layouts">102/105 + key ISO keyboard</a> common in Europe – but also available in Arab + countries – to accommodate for the necessary shift keys. <!-- --> These are in order: <span class="layer second">Shift on the left and right</span>, <span class="layer third">caps lock on the left and the rightmost key in the middle row</span>, <span class="layer fourth">the - key right of the left shift key and the key labeled <em>Alt Gr</em> to + key right to the left shift key and the key labeled <em>Alt Gr</em> to the right of the spacebar</span>. <!-- --> Symbols are assigned to the four layers by their function: <span @@ -282,6 +287,15 @@ second">punctuation</span>, <span class="layer third">diacritics</span>, <span class="layer fourth">other</span>. </p> + <details class="remarks"> + <summary></summary> + <p>Apple, for instance, provides an <a + href="https://www.apple.com/shop/product/MLA22AC/A/magic-keyboard-arabic">Arabic + hardware keyboard</a> with this physical layout. But both variants, + 101/104 key and 102/105 key devices, seem to exist in the Arab + world.</p> + </details> + <p> The first layer was optimized using an extended reimplementation of <a href="http://mkweb.bcgsc.ca/carpalx/?typing_effort">carpalx</a>. @@ -299,10 +313,10 @@ </p> <details class="remarks"> <summary></summary> - <p>(This is <a + <p>This is <a href="https://github.com/mw8/white_keyboard_layout/blob/master/README.md#finding-the-optimal-layout">a common way</a> of arranging brackets, because most algorithms ignore - human desire for symmetry.)</p> + human desire for symmetry.</p> </details> <p> @@ -317,6 +331,59 @@ typing load, but naturally the left middle finger is used more frequently due to its assignment to the letter alif. </p> + + <p> + The layout targets Quaranic and Modern Standard Arabic (MSA), also called Fusha + (<bdo lang="ar">الفصحى</bdo>), only. + <!-- --> + Dialectical Arabic (<bdo lang="ar">العامية</bdo>) is mainly a spoken + language, although with the rise of social media sites like Twitter and + Facebook this is changing. + <!-- --> + For now however it’s not an optimization target due to the lack of a + good, representative corpus. + </p> + + <p> + Designing the layout to be compose-based has both benefits and + disadvantages. + <!-- --> + Compose-based mainly means the hamza <bdo lang="ar" dir="ltr">ء</bdo> + is treated like an optional diacritic for Alef, Waw and Yah instead of + viewing Alef-Hamza, Waw-Hamza and Yah-Hamza as precombined, atomic + units. + <!-- --> + Although <bdo lang="ar" dir="ltr">أ</bdo> and <bdo lang="ar" + dir="ltr">ا</bdo> are not the same, the hamza can be dropped if the + writer’s intention is unambigiously inferable from context. + <!-- --> + Thus it makes sense to provide hamza as a combining character on the + keyboard. + <!-- --> + Additionally it uses two keys less than precombining it with its stems, + allowing the entire alphabet plus hamza diacritic to fit on a single + keyboard layer. + <!-- --> + However, there is a cost to this approach: + All hamza variants account for {{ + '%.1f'|format(layoutstats['ar-osx'].hamzaImpact*100) }}% of button + combinations. + <!-- --> + Splitting hamza and from its stem means doubling the total number of + button combinations and thus button presses, decreasing scores like + words per minute (WPM) slightly. + <!-- --> + Splitting Alef and Alef-Hamza could also reduce pressure on left middle + finger and allow for more even distribution, since {{ + layoutstats['ar-osx'].hamzaOnAlef|fraction }}<sup>th</sup> of all Alef + uses are with Hamza. + </p> + <details class="remarks"> + <summary></summary> + <p>See for example section 3.3 of <a + href="https://doi.org/10.1007/978-1-4020-6046-5_3">Buckwalter’s <em>Issues in Arabic Morphological Analysis</em></a>. + </p> + </details> </div> </div> </div> diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css index f62fcef..ed0d32d 100644 --- a/lulua/data/report/style.css +++ b/lulua/data/report/style.css @@ -240,6 +240,12 @@ details[open].remarks { margin-bottom: 1em; } details.remarks summary { - list-style-type: "+"; + list-style-type: "↳ Remarks"; cursor: pointer; + opacity: 0.4; } + +details.remarks p { + margin-top: 0; +} + diff --git a/lulua/report.py b/lulua/report.py index 8a50681..b25201d 100644 --- a/lulua/report.py +++ b/lulua/report.py @@ -21,6 +21,7 @@ import sys, argparse, logging, pickle, math from gettext import GNUTranslations, NullTranslations from decimal import Decimal +from fractions import Fraction import yaml from jinja2 import Environment, PackageLoader @@ -40,6 +41,11 @@ def approx (i, lang='en'): units.pop (0) return round (i, 1), units[0] +def fraction (n, maxdenom=10): + """ Turn floating number n into a human-digestable fraction """ + f = Fraction (n).limit_denominator (maxdenom) + return f'{f.numerator}\u2044{f.denominator}' + def numspace (s): """ Replace ordinary spaces with unicode FIGURE SPACE """ return s.replace (' ', '\u2007') @@ -83,6 +89,7 @@ def render (): env.filters['numspace'] = numspace env.filters['arabnum'] = arabnum env.filters['blendn'] = blendn + env.filters['fraction'] = fraction corpus = [] for x in args.corpus: diff --git a/lulua/stats.py b/lulua/stats.py index 9c11d10..1d051b3 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -360,7 +360,7 @@ from .text import mapChars, charMap def layoutstats (args): """ - Statistics for the report + Various statistics for the report """ stats = pickle.load (sys.stdin.buffer) @@ -384,6 +384,27 @@ def layoutstats (args): ] sentences = [sentenceStats (keyboard, layout, mapChars (s, charMap).replace ('\r\n', '\n')) for s in sentences] + # Impact of hamza + yah = '\u064a' + waw = '\u0648' + alef = 'ا' + hamzaAbove = '\u0654' + hamzaBelow = '\u0655' + # list of combination counts for each match + combPerGroup = defaultdict (int) + combStats = stats['simple'].combinations + letterWithHamza = [alef+hamzaAbove, alef+hamzaBelow, yah+hamzaAbove, waw+hamzaAbove] + for letter in [alef] + letterWithHamza: + match, combinations = layout (letter) + if match != letter: + # not a single key or single combination + continue + combPerGroup[letter] += sum (map (lambda x: combStats[x], combinations)) + combinations = sum (stats['simple'].combinations.values ()) + hamzaImpact = sum (map (lambda x: combPerGroup[x], letterWithHamza))/combinations + x = combPerGroup[alef+hamzaAbove] + combPerGroup[alef+hamzaBelow] + hamzaOnAlef = x/(x+combPerGroup[alef]) + pickle.dump (dict ( layout=args.layout, hands=dict (hands), @@ -391,6 +412,8 @@ def layoutstats (args): buttonPresses=buttonPresses, asymmetry=asymmetry, sentences=sentences, + hamzaImpact=hamzaImpact, + hamzaOnAlef=hamzaOnAlef, ), sys.stdout.buffer) def latinImeDict (args): |