From d6c5dbc925e63bebc048356c4a54a1ad6b364014 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 1 Nov 2020 17:00:45 +0100 Subject: Explain design decisions Fixes #10. --- lulua/data/report/index.html | 77 +++++++++++++++++++++++++++++++++++++++++--- lulua/data/report/style.css | 8 ++++- lulua/report.py | 7 ++++ lulua/stats.py | 25 +++++++++++++- 4 files changed, 110 insertions(+), 7 deletions(-) diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html index a18dcb7..cae6024 100644 --- a/lulua/data/report/index.html +++ b/lulua/data/report/index.html @@ -268,13 +268,18 @@ $$a = \frac{b_{left}}{b_{total}} - \frac{b_{right}}{b_{total}}$$

- The layout proposed uses four layers and assumes a 102/105 key ISO - keyboard common in Europe to accomodate the shift keys necessary. + The layout proposed uses four shift layers in a way inspired by Neo2. + + Thus it assumes a 102/105 + key ISO keyboard common in Europe – but also available in Arab + countries – to accommodate for the necessary shift keys. These are in order: Shift on the left and right, caps lock on the left and the rightmost key in the middle row, the - key right of the left shift key and the key labeled Alt Gr to + key right to the left shift key and the key labeled Alt Gr to the right of the spacebar. Symbols are assigned to the four layers by their function: punctuation, diacritics, other.

+
+ +

Apple, for instance, provides an Arabic + hardware keyboard with this physical layout. But both variants, + 101/104 key and 102/105 key devices, seem to exist in the Arab + world.

+
+

The first layer was optimized using an extended reimplementation of carpalx. @@ -299,10 +313,10 @@

-

(This is This is a common way of arranging brackets, because most algorithms ignore - human desire for symmetry.)

+ human desire for symmetry.

@@ -317,6 +331,59 @@ typing load, but naturally the left middle finger is used more frequently due to its assignment to the letter alif.

+ +

+ The layout targets Quaranic and Modern Standard Arabic (MSA), also called Fusha + (الفصحى), only. + + Dialectical Arabic (العامية) is mainly a spoken + language, although with the rise of social media sites like Twitter and + Facebook this is changing. + + For now however it’s not an optimization target due to the lack of a + good, representative corpus. +

+ +

+ Designing the layout to be compose-based has both benefits and + disadvantages. + + Compose-based mainly means the hamza ء + is treated like an optional diacritic for Alef, Waw and Yah instead of + viewing Alef-Hamza, Waw-Hamza and Yah-Hamza as precombined, atomic + units. + + Although أ and ا are not the same, the hamza can be dropped if the + writer’s intention is unambigiously inferable from context. + + Thus it makes sense to provide hamza as a combining character on the + keyboard. + + Additionally it uses two keys less than precombining it with its stems, + allowing the entire alphabet plus hamza diacritic to fit on a single + keyboard layer. + + However, there is a cost to this approach: + All hamza variants account for {{ + '%.1f'|format(layoutstats['ar-osx'].hamzaImpact*100) }}% of button + combinations. + + Splitting hamza and from its stem means doubling the total number of + button combinations and thus button presses, decreasing scores like + words per minute (WPM) slightly. + + Splitting Alef and Alef-Hamza could also reduce pressure on left middle + finger and allow for more even distribution, since {{ + layoutstats['ar-osx'].hamzaOnAlef|fraction }}th of all Alef + uses are with Hamza. +

+
+ +

See for example section 3.3 of Buckwalter’s Issues in Arabic Morphological Analysis. +

+
diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css index f62fcef..ed0d32d 100644 --- a/lulua/data/report/style.css +++ b/lulua/data/report/style.css @@ -240,6 +240,12 @@ details[open].remarks { margin-bottom: 1em; } details.remarks summary { - list-style-type: "+"; + list-style-type: "↳ Remarks"; cursor: pointer; + opacity: 0.4; } + +details.remarks p { + margin-top: 0; +} + diff --git a/lulua/report.py b/lulua/report.py index 8a50681..b25201d 100644 --- a/lulua/report.py +++ b/lulua/report.py @@ -21,6 +21,7 @@ import sys, argparse, logging, pickle, math from gettext import GNUTranslations, NullTranslations from decimal import Decimal +from fractions import Fraction import yaml from jinja2 import Environment, PackageLoader @@ -40,6 +41,11 @@ def approx (i, lang='en'): units.pop (0) return round (i, 1), units[0] +def fraction (n, maxdenom=10): + """ Turn floating number n into a human-digestable fraction """ + f = Fraction (n).limit_denominator (maxdenom) + return f'{f.numerator}\u2044{f.denominator}' + def numspace (s): """ Replace ordinary spaces with unicode FIGURE SPACE """ return s.replace (' ', '\u2007') @@ -83,6 +89,7 @@ def render (): env.filters['numspace'] = numspace env.filters['arabnum'] = arabnum env.filters['blendn'] = blendn + env.filters['fraction'] = fraction corpus = [] for x in args.corpus: diff --git a/lulua/stats.py b/lulua/stats.py index 9c11d10..1d051b3 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -360,7 +360,7 @@ from .text import mapChars, charMap def layoutstats (args): """ - Statistics for the report + Various statistics for the report """ stats = pickle.load (sys.stdin.buffer) @@ -384,6 +384,27 @@ def layoutstats (args): ] sentences = [sentenceStats (keyboard, layout, mapChars (s, charMap).replace ('\r\n', '\n')) for s in sentences] + # Impact of hamza + yah = '\u064a' + waw = '\u0648' + alef = 'ا' + hamzaAbove = '\u0654' + hamzaBelow = '\u0655' + # list of combination counts for each match + combPerGroup = defaultdict (int) + combStats = stats['simple'].combinations + letterWithHamza = [alef+hamzaAbove, alef+hamzaBelow, yah+hamzaAbove, waw+hamzaAbove] + for letter in [alef] + letterWithHamza: + match, combinations = layout (letter) + if match != letter: + # not a single key or single combination + continue + combPerGroup[letter] += sum (map (lambda x: combStats[x], combinations)) + combinations = sum (stats['simple'].combinations.values ()) + hamzaImpact = sum (map (lambda x: combPerGroup[x], letterWithHamza))/combinations + x = combPerGroup[alef+hamzaAbove] + combPerGroup[alef+hamzaBelow] + hamzaOnAlef = x/(x+combPerGroup[alef]) + pickle.dump (dict ( layout=args.layout, hands=dict (hands), @@ -391,6 +412,8 @@ def layoutstats (args): buttonPresses=buttonPresses, asymmetry=asymmetry, sentences=sentences, + hamzaImpact=hamzaImpact, + hamzaOnAlef=hamzaOnAlef, ), sys.stdout.buffer) def latinImeDict (args): -- cgit v1.2.3