diff options
Diffstat (limited to 'lulua')
-rw-r--r-- | lulua/data/keyboards/ibmpc105.yaml | 436 | ||||
-rw-r--r-- | lulua/data/layouts/ar-ergoarabic.yaml | 168 | ||||
-rw-r--r-- | lulua/data/layouts/ar-qtaish.yaml | 64 | ||||
-rw-r--r-- | lulua/data/report/index.html | 286 | ||||
-rw-r--r-- | lulua/data/report/style.css | 11 | ||||
-rw-r--r-- | lulua/keyboard.py | 360 | ||||
-rw-r--r-- | lulua/plot.py | 158 | ||||
-rw-r--r-- | lulua/render.py | 22 | ||||
-rw-r--r-- | lulua/report.py | 51 | ||||
-rw-r--r-- | lulua/stats.py | 56 | ||||
-rw-r--r-- | lulua/test_keyboard.py | 18 | ||||
-rw-r--r-- | lulua/test_layout.py | 2 | ||||
-rw-r--r-- | lulua/test_report.py | 12 | ||||
-rw-r--r-- | lulua/text.py | 4 | ||||
-rw-r--r-- | lulua/util.py | 18 |
15 files changed, 1174 insertions, 492 deletions
diff --git a/lulua/data/keyboards/ibmpc105.yaml b/lulua/data/keyboards/ibmpc105.yaml index d9dcb76..0938262 100644 --- a/lulua/data/keyboards/ibmpc105.yaml +++ b/lulua/data/keyboards/ibmpc105.yaml @@ -3,130 +3,558 @@ description: Standard IBM PC 105 key layout (European) rows: - - - kind: letter name: Bl1 + scancode: + keyman: K_BKSLASH + macos: 50 + windows: + - 41 + xorg: 49 + width: 1 - kind: letter name: Bl2 + scancode: + keyman: K_1 + macos: 18 + windows: + - 2 + xorg: 10 + width: 1 - kind: letter name: Bl3 + scancode: + keyman: K_2 + macos: 19 + windows: + - 3 + xorg: 11 + width: 1 - kind: letter name: Bl4 + scancode: + keyman: K_3 + macos: 20 + windows: + - 4 + xorg: 12 + width: 1 - kind: letter name: Bl5 + scancode: + keyman: K_4 + macos: 21 + windows: + - 5 + xorg: 13 + width: 1 - kind: letter name: Bl6 + scancode: + keyman: K_5 + macos: 23 + windows: + - 6 + xorg: 14 + width: 1 - kind: letter name: Bl7 + scancode: + keyman: K_6 + macos: 22 + windows: + - 7 + xorg: 15 + width: 1 - - kind: letter name: Br6 + scancode: + keyman: K_7 + macos: 26 + windows: + - 8 + xorg: 16 + width: 1 - kind: letter name: Br5 + scancode: + keyman: K_8 + macos: 28 + windows: + - 9 + xorg: 17 + width: 1 - kind: letter name: Br4 + scancode: + keyman: K_9 + macos: 25 + windows: + - 10 + xorg: 18 + width: 1 - kind: letter name: Br3 + scancode: + keyman: K_0 + macos: 29 + windows: + - 11 + xorg: 19 + width: 1 - kind: letter name: Br2 + scancode: + keyman: K_LBRKT + macos: 27 + windows: + - 12 + xorg: 20 + width: 1 - kind: letter name: Br1 + scancode: + keyman: K_RBRKT + macos: 24 + windows: + - 13 + xorg: 21 + width: 1 - name: Br_bs + scancode: + keyman: K_BKSP + macos: 51 + windows: + - 14 + xorg: 22 width: 1.75 - - - name: Cl_tab + scancode: + keyman: K_TAB + macos: 48 + windows: + - 15 + xorg: 23 width: 1.75 - kind: letter name: Cl1 + scancode: + keyman: K_Q + macos: 12 + windows: + - 16 + xorg: 24 + width: 1 - kind: letter name: Cl2 + scancode: + keyman: K_W + macos: 13 + windows: + - 17 + xorg: 25 + width: 1 - kind: letter name: Cl3 + scancode: + keyman: K_E + macos: 14 + windows: + - 18 + xorg: 26 + width: 1 - kind: letter name: Cl4 + scancode: + keyman: K_R + macos: 15 + windows: + - 19 + xorg: 27 + width: 1 - kind: letter name: Cl5 + scancode: + keyman: K_T + macos: 17 + windows: + - 20 + xorg: 28 + width: 1 - - kind: letter name: Cr7 + scancode: + keyman: K_Z + macos: 16 + windows: + - 21 + xorg: 29 + width: 1 - kind: letter name: Cr6 + scancode: + keyman: K_U + macos: 32 + windows: + - 22 + xorg: 30 + width: 1 - kind: letter name: Cr5 + scancode: + keyman: K_I + macos: 34 + windows: + - 23 + xorg: 31 + width: 1 - kind: letter name: Cr4 + scancode: + keyman: K_O + macos: 31 + windows: + - 24 + xorg: 32 + width: 1 - kind: letter name: Cr3 + scancode: + keyman: K_P + macos: 35 + windows: + - 25 + xorg: 33 + width: 1 - kind: letter name: Cr2 + scancode: + keyman: K_COLON + macos: 33 + windows: + - 26 + xorg: 34 + width: 1 - kind: letter name: Cr1 + scancode: + keyman: K_EQUAL + macos: 30 + windows: + - 27 + xorg: 35 + width: 1 - kind: multi name: CD_ret + scancode: + keyman: K_ENTER + macos: 36 + windows: + - 28 + xorg: 36 span: 2 + width: 1 - - - name: Dl_caps + scancode: + keyman: CAPS + macos: 57 + windows: + - 58 + xorg: 66 width: 2 - kind: letter name: Dl1 + scancode: + keyman: K_A + macos: 0 + windows: + - 30 + xorg: 38 + width: 1 - kind: letter name: Dl2 + scancode: + keyman: K_S + macos: 1 + windows: + - 31 + xorg: 39 + width: 1 - kind: letter name: Dl3 - - kind: letter - isMarked: true + scancode: + keyman: K_D + macos: 2 + windows: + - 32 + xorg: 40 + width: 1 + - isMarked: true + kind: letter name: Dl4 + scancode: + keyman: K_F + macos: 3 + windows: + - 33 + xorg: 41 + width: 1 - kind: letter name: Dl5 + scancode: + keyman: K_G + macos: 5 + windows: + - 34 + xorg: 42 + width: 1 - - kind: letter name: Dr7 - - kind: letter - isMarked: true + scancode: + keyman: K_H + macos: 4 + windows: + - 35 + xorg: 43 + width: 1 + - isMarked: true + kind: letter name: Dr6 + scancode: + keyman: K_J + macos: 38 + windows: + - 36 + xorg: 44 + width: 1 - kind: letter name: Dr5 + scancode: + keyman: K_K + macos: 40 + windows: + - 37 + xorg: 45 + width: 1 - kind: letter name: Dr4 + scancode: + keyman: K_L + macos: 37 + windows: + - 38 + xorg: 46 + width: 1 - kind: letter name: Dr3 + scancode: + keyman: K_BKQUOTE + macos: 41 + windows: + - 39 + xorg: 47 + width: 1 - kind: letter name: Dr2 + scancode: + keyman: K_QUOTE + macos: 39 + windows: + - 40 + xorg: 48 + width: 1 - kind: letter name: Dr1 + scancode: + keyman: K_SLASH + windows: + - 43 + xorg: 51 + width: 1 - - - name: El_shift + scancode: + keyman: SHIFT + macos: 57 + windows: + - 42 + xorg: 50 width: 1.5 - kind: letter name: El1 + scancode: + keyman: K_oE2 + windows: + - 86 + xorg: 94 + width: 1 - kind: letter name: El2 + scancode: + keyman: K_Y + macos: 6 + windows: + - 44 + xorg: 52 + width: 1 - kind: letter name: El3 + scancode: + keyman: K_X + macos: 7 + windows: + - 45 + xorg: 53 + width: 1 - kind: letter name: El4 + scancode: + keyman: K_C + macos: 8 + windows: + - 46 + xorg: 54 + width: 1 - kind: letter name: El5 + scancode: + keyman: K_V + macos: 9 + windows: + - 47 + xorg: 55 + width: 1 - kind: letter name: El6 + scancode: + keyman: K_B + macos: 11 + windows: + - 48 + xorg: 56 + width: 1 - - kind: letter name: Er5 + scancode: + keyman: K_N + macos: 45 + windows: + - 49 + xorg: 57 + width: 1 - kind: letter name: Er4 + scancode: + keyman: K_M + macos: 46 + windows: + - 50 + xorg: 58 + width: 1 - kind: letter name: Er3 + scancode: + keyman: K_COMMA + macos: 43 + windows: + - 51 + xorg: 59 + width: 1 - kind: letter name: Er2 + scancode: + keyman: K_PERIOD + macos: 47 + windows: + - 52 + xorg: 60 + width: 1 - kind: letter name: Er1 + scancode: + keyman: K_HYPHEN + macos: 44 + windows: + - 53 + xorg: 61 + width: 1 - name: Er_shift + scancode: + keyman: SHIFT + macos: 60 + windows: + - 54 + xorg: 62 width: 2.35 - - - name: Fl_ctrl + scancode: + keyman: LCTRL + macos: 59 + windows: + - 29 + xorg: 37 width: 1.75 - name: Fl_win + scancode: + keyman: K_?5B + macos: 55 + windows: + - 224 + - 91 + xorg: 133 width: 1.25 - name: Fl_alt + scancode: + keyman: LALT + macos: 58 + windows: + - 56 + xorg: 64 width: 1.25 - name: Fl_space + scancode: + keyman: K_SPACE + macos: 49 + windows: + - 57 + xorg: 65 width: 3 - - name: Fr_space + scancode: + keyman: K_SPACE + macos: 49 + windows: + - 57 + xorg: 65 width: 3 - name: Fr_altgr + scancode: + keyman: RALT + macos: 61 + windows: + - 224 + - 56 + xorg: 108 width: 1.25 - name: Fr_win + scancode: + keyman: K_?5C + macos: 55 + windows: + - 224 + - 92 + xorg: 105 width: 1.25 - name: Fr_menu + scancode: + keyman: K_?5D + windows: + - 224 + - 93 + xorg: 135 width: 1.25 - name: Fr_ctrl + scancode: + keyman: RCTRL + windows: + - 224 + - 29 + xorg: 105 width: 1.25 diff --git a/lulua/data/layouts/ar-ergoarabic.yaml b/lulua/data/layouts/ar-ergoarabic.yaml new file mode 100644 index 0000000..620ead1 --- /dev/null +++ b/lulua/data/layouts/ar-ergoarabic.yaml @@ -0,0 +1,168 @@ +name: ar-ergoarabic +layout: +- layer: + Bl1: "`" + Bl2: "1" + Bl3: "2" + Bl4: "3" + Bl5: "4" + Bl6: "5" + Bl7: "6" + Br6: "7" + Br5: "8" + Br4: "9" + Br3: "0" + Br2: "-" + Br1: "=" + Br_bs: "\b" + + Cl_tab: "\t" + Cl1: "د" + Cl2: "ص" + Cl3: "ث" + Cl4: "ق" + Cl5: "ف" + Cr7: "ء" + Cr6: "ع" + Cr5: "ه" + Cr4: "و" + Cr3: "ة" + Cr2: "]" + Cr1: "[" + + CD_ret: "\n" + + Dl1: "ش" + Dl2: "س" + Dl3: "ي" + Dl4: "ب" + Dl5: "ل" + Dr7: "ا" + Dr6: "ت" + Dr5: "ن" + Dr4: "م" + Dr3: "ك" + Dr2: "'" + Dr1: "\\" + + El2: "\u064a\u0654" # composed: ئ + El3: "ى" + El4: "ط" + El5: "ر" + El6: "خ" + Er5: "ح" + Er4: "ج" + Er3: "،" + Er2: "." + Er1: "/" + + Fl_space: " " + Fr_space: " " + modifier: + - [] +- layer: + Bl1: "~" # shadda + Bl2: "!" + Bl3: "@" + Bl4: "#" + Bl5: "$" + Bl6: "%" + Bl7: "^" + Br6: "&" + Br5: "*" + Br4: ")" + Br3: "(" + Br2: "_" + Br1: "+" + + Cl1: "ذ" + Cl2: "ض" + Cl3: "\u064f" # damma + Cl4: "\u064c" # dammatan + Cl5: "\u0651" # shadda + Cr7: "ا\u0655" # composed: إ + Cr6: "غ" + Cr5: "÷" + Cr4: "\u0648\u0654" # composed: ؤ + Cr3: "؛" + Cr2: "}" + Cr1: "{" + + Dl1: "\u0650" # kasra + Dl2: "\u064d" # kasratan + Dl3: "\u064e" # fatha + Dl4: "\u064b" # fathatan + Dl5: "ا\u0653" # composed: آ + Dr7: "ا\u0654" # composed: أ + Dr6: "\u0640" # tatweel + Dr5: "×" + Dr4: "," + Dr3: ":" + Dr2: '"' + + El3: "\u0652" # sukun + El4: "ظ" + El5: "ز" + El6: "…" + Er5: "»" + Er4: "«" + Er3: ">" + Er2: "<" + Er1: "؟" + modifier: + - [El_shift] + - [Er_shift] +- layer: + Bl2: "١" + Bl3: "٢" + Bl4: "٣" + Bl5: "٤" + Bl6: "٥" + Bl7: "٦" + Br6: "٧" + Br5: "٨" + Br4: "٩" + Br3: "٠" + Br1: "≠" + + Cl5: "ڤ" + + Dl4: "پ" + Dl5: "\u0670" # superscript alef + Dr7: "\u0671" # alef wasla + Dr5: "\u066b" # decimal separator + Dr3: "گ" + Dr2: '⟩' + Dr1: "⟨" + + El2: "›" + El3: "‹" + El5: "ژ" + Er4: "چ" + Er1: "\u066d" # five pointed star + modifier: + - [Fr_altgr] +- layer: + Bl6: "‰" + Br2: "\u2011" # non-breaking hyphen + Br1: "≈" + + Cl1: "\u2066" # LRI + Cl2: "\u2067" # RLI + Cl3: "\u2068" # first strong isolate + Cl4: "\u2069" # PDI + Cr7: "\u202a" # LRE + Cr6: "\u202b" # RLE + Cr5: "\u202c" # PDF + Cr3: "\u200e" # LRM + Cr2: "\u200f" # RLM + Cr1: "\u061c" # ALM + + Dr2: "\u200d" # ZWJ + Dr1: "\u202f" # NNBSP + + Er1: "\u200c" # ZWNJ + modifier: + - [Fr_altgr, El_shift] + - [Fr_altgr, Er_shift] + diff --git a/lulua/data/layouts/ar-qtaish.yaml b/lulua/data/layouts/ar-qtaish.yaml new file mode 100644 index 0000000..2acd0d6 --- /dev/null +++ b/lulua/data/layouts/ar-qtaish.yaml @@ -0,0 +1,64 @@ +name: ar-qtaish +layout: +- layer: + Bl2: "١" + Bl3: "٢" + Bl4: "٣" + Bl5: "٤" + Bl6: "٥" + Bl7: "٦" + Br6: "٧" + Br5: "٨" + Br4: "٩" + Br3: "٠" + + Cl_tab: "\t" + Cl1: "ق" + Cl2: "ى" + Cl3: "ة" + Cl4: "د" + Cl5: "ف" + Cr7: "ك" + Cr6: "ص" + Cr5: "س" + Cr4: "ح" + Cr3: "ذ" + Cr2: "ش" + Cr1: "ج" + + CD_ret: "\n" + + Dl1: "ر" + Dl2: "و" + Dl3: "ن" + Dl4: "ل" + Dl5: "ب" + Dr7: "ت" + Dr6: "ا" + Dr5: "ع" + Dr4: "ي" + Dr3: "م" + Dr2: "ه" + + El2: "ض" + El3: "خ" + El4: "ز" + El5: "ظ" + El6: "غ" + Er5: "ا\u0654" # composed: أ + Er4: "ء" + Er3: "ط" + Er2: "ث" + Er1: "\u064a\u0654" # composed: ئ + + Fl_space: " " + Fr_space: " " + modifier: + - [] +- layer: + Dl2: "\u0648\u0654" # composed: ؤ + Dr6: "ا\u0653" # composed: آ + Er5: "ا\u0655" # composed: إ + modifier: + - [El_shift] + - [Er_shift] diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html index e35e08f..e2108cd 100644 --- a/lulua/data/report/index.html +++ b/lulua/data/report/index.html @@ -5,15 +5,23 @@ <title>لؤلؤة: لوحة مفاتيح عربية</title> <!--<meta name="description" content="">--> <meta name="viewport" content="width=device-width, initial-scale=1"> - + {# Fonts #} <link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet"> + + {# Pure.css #} <link rel="stylesheet" href="https://unpkg.com/purecss@2.0.3/build/pure-min.css" integrity="sha384-cg6SkqEOCV1NbJoCu11+bm0NvBRc8IYLRGXkmNrqUBfTjmMYwNKPWBTIKyw9mHNJ" crossorigin="anonymous"> <link rel="stylesheet" href="https://unpkg.com/purecss@2.0.3/build/grids-responsive-min.css" integrity="sha384-+lKLtMyKzY/ZdqRXlhw2whazop7duDxgP+oWPB+EGry5wK+vJnZwSsgMnAmDS/39" crossorigin="anonymous"> - <script src="https://polyfill.io/v3/polyfill.min.js?features=es6" crossorigin="anonymous"></script> - <script id="MathJax-script" async src="https://www.unpkg.com/mathjax@3.1.0/es5/tex-mml-chtml.js" integrity="sha384-ynpCZozLxgqK3wrgBu8qH8qPG3eD8mME8z0zugAX26UMb5HfLp2PtvtDH4vdmgkm" crossorigin="anonymous"></script> + + {# TeX support #} + <link rel="stylesheet" href="https://unpkg.com/katex@0.13.18/dist/katex.min.css" integrity="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET" crossorigin="anonymous"> + <script defer src="https://unpkg.com/katex@0.13.18/dist/katex.min.js" integrity="sha384-GxNFqL3r9uRJQhR+47eDxuPoNE7yLftQM8LcxzgS4HT73tp970WS/wV5p8UzCOmb" crossorigin="anonymous"></script> + + <script defer src="https://unpkg.com/katex@0.13.18/dist/contrib/auto-render.min.js" integrity="sha384-vZTG03m+2yp6N6BNi5iM4rW4oIwk5DfcNdFfxkk9ZWpDriOkXX8voJBFrAO7MpVl" crossorigin="anonymous" onload="renderMathInElement(document.body);"></script> + {# bokeh #} + {# Defer loading bokeh, so it does not block rendering #} {% for f in bokehres.js_files -%} - <script src="{{ f }}"></script> + <script defer crossorigin="anonymous" src="{{ f }}"></script> {%- endfor %} {% for f in bokehres.css_files -%} <link rel="stylesheet" href="{{ f }}"> @@ -129,11 +137,10 @@ <div class="lbox"> <h2>The Arabic alphabet</h2> <p> - 28 letters make up the Arabic alphabet and quite a few extra - symbols are required for proper text input, like the hamza in its different - shapes <bdo dir="ltr" lang="ar">أ إ آ ء ئ ؤ</bdo>, ta marbutah <bdo - dir="ltr" lang="ar">ة</bdo>, alif maqsurah <bdo dir="ltr" - lang="ar">ى</bdo> and various diacritics for vowelized texts. + 28 letters make up the Arabic alphabet and quite a few extra symbols are + required for proper text input, like the {{ hamzah }} in its different + shapes <bdo dir="ltr" lang="ar">أ إ آ ء ئ ؤ</bdo>, {{ tamarbutah + }}, {{ alifmaqsurah }} and various diacritics for vowelized texts. <!-- --> Since the performance of a keyboard layout depends on the text entered it is necessary to study its mono-, di- and trigraph frequencies first. @@ -222,8 +229,9 @@ </details> <p> - The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be - considered the most frequently used letters in the Arabic language. + The plot below shows {{ alif }}, {{ lam }}, {{ ya }}, {{ mim }}, {{ + waw }} and {{ nun }} can be considered the most frequently used letters + in the Arabic language. <!-- --> Together they account for more than 55% of all letters in the corpus. </p> @@ -328,17 +336,17 @@ The most frequent letters have all been assigned to the home row, which makes them easily accessible. <!-- --> - <bdo lang="ar" dir="ltr">ا</bdo> and <bdo lang="ar" dir="ltr">ل</bdo> + {{ Alif }} and {{ lam }} are typed with different hands, balancing the load on hands almost evenly. <!-- --> The index and middle finger of both hands share the majority of the typing load, but naturally the left middle finger is used more - frequently due to its assignment to the letter alif. + frequently due to its assignment to the letter {{ alif }}. </p> <p> - The layout targets Quaranic and Modern Standard Arabic (MSA), also called Fusha + The layout targets Quaranic and Modern Standard Arabic (MSA), also called Fuṣḥa (<bdo lang="ar">الفصحى</bdo>), only. <!-- --> Dialectical Arabic (<bdo lang="ar">العامية</bdo>) is mainly a spoken @@ -353,35 +361,35 @@ Designing the layout to be compose-based has both benefits and disadvantages. <!-- --> - Compose-based mainly means the hamza <bdo lang="ar" dir="ltr">ء</bdo> - is treated like an optional diacritic for Alef, Waw and Yah instead of - viewing Alef-Hamza, Waw-Hamza and Yah-Hamza as precombined, atomic - units. + Compose-based mainly means the {{ hamzah }} is treated like an optional + diacritic for {{ alif }}, {{ waw }} and {{ ya }} instead of viewing + {{ alifhamzah }}, {{ wawhamzah }} and {{ yahamzah }} as precombined, + atomic units. <!-- --> - Although <bdo lang="ar" dir="ltr">أ</bdo> and <bdo lang="ar" - dir="ltr">ا</bdo> are not the same, the hamza can be dropped if the - writer’s intention is unambigiously inferable from context. + Although {{ alifhamzah_ }} and {{ alif_ }} are not the same, the {{ + hamzah_ }} can be dropped if the writer’s intention is unambigiously + inferable from context. <!-- --> - Thus it makes sense to provide hamza as a combining character on the - keyboard. + Thus it makes sense to provide {{ hamzah_ }} as a combining character + on the keyboard. <!-- --> Additionally it uses two keys less than precombining it with its stems, - allowing the entire alphabet plus hamza diacritic to fit on a single + allowing the entire alphabet plus hamzah diacritic to fit on a single keyboard layer. <!-- --> However, there is a cost to this approach: - All hamza variants account for {{ + All {{ hamzah_ }} variants account for {{ '%.1f'|format(layoutstats['ar-osx'].hamzaImpact*100) }}% of button combinations. <!-- --> - Splitting hamza and from its stem means doubling the total number of - button combinations and thus button presses, decreasing scores like + Splitting {{ hamzah_ }} and from its stem means doubling the total number + of button combinations and thus button presses, decreasing scores like words per minute (WPM) slightly. <!-- --> - Splitting Alef and Alef-Hamza could also reduce pressure on left middle - finger and allow for more even distribution, since {{ - layoutstats['ar-osx'].hamzaOnAlef|fraction }}<sup>th</sup> of all Alef - uses are with Hamza. + Splitting {{ alif }} and {{ alifhamzah }} could also reduce pressure + on left middle finger and allow for more even distribution, since {{ + layoutstats['ar-osx'].hamzaOnAlef|fraction }}<sup>th</sup> of all {{ + alif }} uses are with {{ hamzah }}. </p> <details class="remarks"> <summary></summary> @@ -415,23 +423,6 @@ {% endif %} {% endfor %} </div> -<div class="sentencestats"> -<p lang="en">Examples:</p> -<ul lang="ar"> -{% for sentence in stats.sentences %} - <li> - {% for match, weight in sentence[0] -%} - {%- if weight is none -%} - <span>{{ match }}</span> - {%- else -%} - {%- set c = weight|blendn((38, 139, 210), (108, 113, 196), (211, 54, 130), (220, 50, 47)) -%} - <span style="color: rgb({{ c[0] }}, {{ c[1] }}, {{ c[2] }});" title="{{ '%5.5f'|format(weight) }}">{{ match }}</span> - {%- endif -%} - {%- endfor %} - </li> -{% endfor %} -</ul> -</div> {% endmacro %} <figure id="ar-lulua-heat"> @@ -450,7 +441,62 @@ <div class="lbox"> <h2><a href="#related">Related work</a></h2> <p>This section explores existing keyboard layouts made for the - Arabic language and analyzes their usability.</p> + Arabic language and analyzes their usability. + <!-- --> + Comparing them with the proposed layout above is difficult at best, + because the layouts presented below cover different character sets. + <!-- --> + Some lack numbers, some do not include short vowels and others provide + no way to type symbols. + <!-- --> + Therefore no individual score is assigned to each layout, but an analysis + of each layout’s features is given. + </p> + </div> + </div> + </div> + + <figure id="triadeffort"> + <div class="lbox" lang="en"> + <div id="triadeffort-div"></div> + </div> + <figcaption class="pure-g flexreverse"> + </figcaption> + </figure> + + <div class="pure-g flexreverse"> + <div class="pure-u-1 pure-u-xl-1-2"> + </div> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> + <div class="lbox"> + <p> + Carpalx tries to minimize the effort of typing in blocks of three + consecutive keystrokes, triads, and thus a good layout in that sense + should make typing frequent triads easy. + <!-- --> + The figure above plots cumulative triad frequency on the x-axis and + weighted cumulative effort on the y-axis. + <!-- --> + Thus, at an x value of 0.5 the y-axis is the sum of triad frequencies + multiplied by their effort for all triads responsible for 50% of the + typing process. + <!-- --> + Standard layouts are the layouts from <a href="#ar-asmo663">ASMO</a>, + <a href="#ar-linux">Linux</a> and <a href="#ar-osx">OSX</a> whereas + <em>usable</em> lists only those which are actually relevant for typing. + <!-- --> + As we can see the layout presented above meets the optimization goal. + <!-- --> + Only the top 5% of all triads are “easier” to type with <a + href="#ar-malas">Malas’ layout</a>, because lulua splits {{ hamzah }} + from its {{ alif }} stem. + <!-- --> + As expected the <a href="#ar-phonetic">phonetic layout</a> is one of the + worst ones, because QWERTY is not optimized for Arabic letter frequencies. + </p> + <p> + The following sections provide details about these layouts. + </p> </div> </div> </div> @@ -474,8 +520,8 @@ dir="ltr" lang="ar">ض ص، س ش، ح ج خ</bdo>) and not frequency. <!-- --> Also it overuses the right index finger by assigning the four - high-frequency letters <bdo lang="ar" dir="ltr">ا ت و ة</bdo> to - it. + high-frequency letters {{ alif }}, {{ ta }}, {{ waw }} and {{ tamarbutah + }} to it. </p> </div> </div> @@ -497,14 +543,14 @@ <h3><a href="#ar-osx">Mac OS X</a></h3> <p> Mac OS X’s Arabic keyboard layout makes a few small changes to ASMO - 663 by moving the <bdo lang="ar" dir="ltr">ة</bdo> to a hard to + 663 by moving the {{ tamarbutah }} to a hard to reach spot on the right of the top row. <!-- --> It also moves the short vowels from the first to the top row of the second layer and replaces them with symbols. <!-- --> The bottom row keys are aditionally shifted to the right, beginning - with <bdo lang="ar" dir="ltr">ر</bdo>. + with {{ ra }}. </p> </div> </div> @@ -528,15 +574,14 @@ A more common layout is the one used on Linux, which also exists on Windows with minor changes to the first layer. <!-- --> - While its top and center row barely differ from ASMO 663 the - bottom row now contains a separate key for the ligature <bdo - lang="ar" dir="ltr">ﻻ</bdo>, likely inherited from <a + While its top and center row barely differ from + ASMO 663 the bottom row now contains a separate key + for the ligature {{ lamalif }} , likely inherited from <a href="https://oztypewriter.blogspot.com/2014/10/the-arabic-typewriter-keyboard-and.html">early typewriter layouts</a>. <!-- --> But at the cost of pushing punctuation characters to the second - layer, <bdo dir="ltr" lang="ar">د</bdo> into the top and <bdo - dir="ltr" lang="ar">ذ</bdo> even further into the number row. + layer, {{ dal }} into the top and {{ dhal }} even further into the number row. </p> </div> </div> @@ -591,10 +636,10 @@ </p> <p> While the layout distributes load between fingers quite well it - favors the left hand by assigning <bdo dir="ltr" lang="ar">ا</bdo> - and <bdo dir="ltr" lang="ar">ل</bdo> to it. + favors the left hand by assigning {{ alif }} + and {{ lam }} to it. <!-- --> - The decision to place <bdo dir="ltr" lang="ar">ث</bdo> in a very + The decision to place {{ tha }} in a very prominent spot seems weird, given it only accounts for 0.5% of all symbols, even in their own analysis. </p> @@ -636,15 +681,13 @@ <!-- --> Probably due to their unusual assumption that middle- and ring-finger rest in the top row their results are suboptimal, - placing both <bdo dir="ltr" lang="ar">ا</bdo> and <bdo dir="ltr" - lang="ar">ي</bdo> in the top row. + placing both {{ alif }} and {{ ya }} in the top row. <!-- --> Their analysis notices this and suggests improved positions for both characters, but these are not actually implemented. <!-- --> - The big asymmetry is caused by placing <bdo dir="ltr" lang="ar">ا - ل ي</bdo> and <bdo dir="ltr" lang="ar">و</bdo>, four of the five - most frequent letters, on the right hand side. + The big asymmetry is caused by placing {{ alif }}, {{ lam }}, {{ ya }} and + {{ waw }}, four of the five most frequent letters, on the right hand side. </p> </div></div> </div> @@ -672,11 +715,11 @@ optimized for typing speed only, claiming 35% faster typing compared to the <a href="#ar-linux">currently used layouts</a>. <!-- --> - However the decision to put <bdo dir="ltr" lang="ar">ي</bdo> in the top + However the decision to put {{ ya }} in the top row seems odd. <!-- --> - Assigning the same left index finger to <bdo dir="ltr" lang="ar">ا - ي و</bdo>, which are three of the most frequent letters, heavily + Assigning the same left index finger to {{ alif }}, + {{ ya }} and {{ waw }}, which are three of the most frequent letters, heavily strains this particular finger. </p> </div> @@ -711,8 +754,8 @@ well. <!-- --> However their algorithm seems to favor the bottom row instead of the - easier to use top row since it places the letters <bdo dir="ltr" - lang="ar">ب ت ر</bdo> there. + easier to use top row since it places the letters {{ ba }}, {{ ta }} + and {{ ra }} there. </p> </div> </div> @@ -746,20 +789,102 @@ provide <em>three</em> single-quote marks ’ and <em>two</em> Arabic semicolon <bdo dir="ltr" lang="ar">؛</bdo>. <!-- --> - Additionally it places <bdo dir="ltr" lang="ar">ي</bdo> in an even + Additionally it places {{ ya }} in an even worse position than Malas’ layout. </p> </div> </div> </div> - <figure id="ar-osman"> + <figure> <div class="lbox"> <img src="ar-osman-heat.svg"> {{ fingerhandstats(layoutstats['ar-osman']) }} </div> </figure> + <div id="ar-qtaish" class="pure-g flexreverse"> + <div class="pure-u-1 pure-u-xl-1-2"> + </div> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> + <div class="lbox"> + <h3><a href="#ar-qtaish">Qtaish et al</a></h3> + <p> + Qtaish et al presented this layout in + <a href="http://www.sci-int.com/pdf/637456047563529791.pdf">An Improved Arabic Keyboard Layout</a> in 2021. + <!-- --> + They use a novel corpus of 5 million words or roughly 66 million letters + <!-- 65713689 adding up the numbers in their paper --> consisting of + newspapers, (now defunct) social networks and blogs, as well as six + dictionaries (see remarks). + <!-- --> + Then letters were classified into three categories based on their + frequency, which apparently were used to populate home, top and bottom + row (in this order). + <!-- --> + Additionally bigrams were somehow used to arrange letters and avoid + placing them on the same or adjacent fingers, making rolling finger + movements incentiviced by carpalx impossible. + <!-- --> + Ultimately it looks like the layout was designed by hand and not through + an automated process. <!-- see section 5 of the paper --> + </p> + + <details class="remarks"> + <summary></summary> + <em>Alfarahindi</em> and <em>Alein Dictionary</em> are most likely the same 8th century book <a href="https://en.wikipedia.org/wiki/Kitab_al-%27Ayn">Kitab al-'Ayn</a>. + <!-- --> + There are also <a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%A7%D8%AC_%D8%A7%D9%84%D8%B9%D8%B1%D9%88%D8%B3_%D9%85%D9%86_%D8%AC%D9%88%D8%A7%D9%87%D8%B1_%D8%A7%D9%84%D9%82%D8%A7%D9%85%D9%88%D8%B3">Taj-Alaroos</a>, <a href="https://www.lesanarab.com/letter/">Lesan Alarab</a>, <a href="https://ar.wikipedia.org/wiki/%D8%A7%D9%84%D9%85%D8%B9%D8%AC%D9%85_%D8%A7%D9%84%D9%88%D8%B3%D9%8A%D8%B7">Almujam Alwaseet</a> and <a href="https://ar.wikipedia.org/wiki/%D8%A7%D9%84%D9%85%D9%86%D8%AC%D8%AF_(%D9%84%D9%88%D9%8A%D8%B3_%D9%85%D8%B9%D9%84%D9%88%D9%81)">Almunjed</a>. + <!-- --> + All of them are lexica, not dictionaries. + </details> + + <p> + Although the authors do not provide a number row, it has been added for + fair comparison. + <!-- --> + However to be actually usable the layout would need punctuation symbols and diacritics. + </p> + </div> + </div> + </div> + + <figure id="ar-qtaish"> + <div class="lbox"> + <img src="ar-qtaish-heat.svg"> + {{ fingerhandstats(layoutstats['ar-qtaish']) }} + </div> + </figure> + + <div id="ar-ergoarabic" class="pure-g flexreverse"> + <div class="pure-u-1 pure-u-xl-1-2"> + </div> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> + <div class="lbox"> + <h3><a href="#ar-ergoarabic">Ergoarabic</a></h3> + <p> + Being first released in 2021 <a + href="https://github.com/darkstego/ergoarabic">Ergoarabic</a> is the + most recent optimized layout for Arabic. + <!-- --> + On the first layer it combines the positions of 17 keys from the <a + href="#ar-linux">standard PC layout</a> with brackets and punctuation + symbols from QWERTY, aiming to retain compatibility with both. + <!-- --> + The remaining keys have been re-arranged or moved to the shift layer by + hand for improved ergonomics. + </p> + </div> + </div> + </div> + + <figure id="ar-ergoarabic"> + <div class="lbox"> + <img src="ar-ergoarabic-heat.svg"> + {{ fingerhandstats(layoutstats['ar-ergoarabic']) }} + </div> + </figure> + <div id="ar-phonetic" class="pure-g flexreverse"> <div class="pure-u-1 pure-u-xl-1-2"> </div> @@ -769,7 +894,7 @@ <p> The <a href="http://arabic.omaralzabir.com/home">Arabic Phonetic Keyboard</a> simply maps the QWERTY layout to Arabic letters, based on their sound. - Thus Q becomes <bdo dir="ltr" lang="ar">ق</bdo>, Y becomes <bdo dir="ltr" lang="ar">ي</bdo> and so on. + Thus Q becomes {{ qaf }}, Y becomes {{ ya }} and so on. It claims to be optimized for writing vowelized texts, especially Quranic Arabic, and thus includes quite a few combining characters and special symbols. @@ -836,9 +961,16 @@ </section> <script> -fetch('letterfreq.json') - .then(function(response) { return response.json(); }) - .then(function(item) { Bokeh.embed.embed_item(item, 'letterfreq-div'); }) +function loadPlots () { + const plots = ['letterfreq', 'triadeffort']; + for (const p of plots) { + fetch(p + '.json') + .then(function(response) { return response.json(); }) + .then(function(item) { Bokeh.embed.embed_item(item, p + '-div'); }); + } +} +if (document.readyState != "loading") loadPlots (); +else document.addEventListener("DOMContentLoaded", loadPlots); </script> </body> diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css index ed0d32d..2cffb03 100644 --- a/lulua/data/report/style.css +++ b/lulua/data/report/style.css @@ -30,6 +30,12 @@ using http://colormind.io/bootstrap/ src: local('IBM Plex Sans Arabic Regular'), local('IBMPlexSansArabic-Regular'), url('fonts/IBMPlexSansArabic-Regular.woff2') format('woff2'); } +html { + /* Increase line-height, because Arabic font is taller than Latin and + * we need some space between lines for descenders/ascenders */ + line-height: 1.33; +} + body { font-size: 14pt; background-color: var(--light-shades); @@ -157,11 +163,6 @@ div.fingerhandstats .fingers .index { div.fingerhandstats .fingers .thumb { border: 0.1em solid var(--finger-thumb); } -div.sentencestats ul { - list-style-type: none; - margin: 1em 0; - padding: 0; -} .table-overflow { overflow-x: auto; diff --git a/lulua/keyboard.py b/lulua/keyboard.py index cf96efc..eed58a7 100644 --- a/lulua/keyboard.py +++ b/lulua/keyboard.py @@ -24,281 +24,16 @@ from typing import Text, Dict, Iterator, List from .util import YamlLoader -# XXX move this to keyboard.yaml? -_buttonToXorgKeycode = { - 'Bl1': 49, - 'Bl2': 10, - 'Bl3': 11, - 'Bl4': 12, - 'Bl5': 13, - 'Bl6': 14, - 'Bl7': 15, - 'Br6': 16, - 'Br5': 17, - 'Br4': 18, - 'Br3': 19, - 'Br2': 20, - 'Br1': 21, - 'Br_bs': 22, - 'Cl_tab': 23, - 'Cl1': 24, - 'Cl2': 25, - 'Cl3': 26, - 'Cl4': 27, - 'Cl5': 28, - 'Cr7': 29, - 'Cr6': 30, - 'Cr5': 31, - 'Cr4': 32, - 'Cr3': 33, - 'Cr2': 34, - 'Cr1': 35, - 'CD_ret': 36, - 'Dl_caps': 66, - 'Dl1': 38, - 'Dl2': 39, - 'Dl3': 40, - 'Dl4': 41, - 'Dl5': 42, - 'Dr7': 43, - 'Dr6': 44, - 'Dr5': 45, - 'Dr4': 46, - 'Dr3': 47, - 'Dr2': 48, - 'Dr1': 51, - 'El_shift': 50, - 'El1': 94, - 'El2': 52, - 'El3': 53, - 'El4': 54, - 'El5': 55, - 'El6': 56, - 'Er5': 57, - 'Er4': 58, - 'Er3': 59, - 'Er2': 60, - 'Er1': 61, - 'Er_shift': 62, - 'Fl_ctrl': 37, - 'Fl_win': 133, - 'Fl_alt': 64, - 'Fl_space': 65, - 'Fr_space': 65, - 'Fr_altgr': 108, - 'Fr_win': 105, - 'Fr_menu': 135, - 'Fr_ctrl': 105, - } - -_buttonToKeyman = { - 'Bl1': 'K_BKSLASH', - 'Bl2': 'K_1', - 'Bl3': 'K_2', - 'Bl4': 'K_3', - 'Bl5': 'K_4', - 'Bl6': 'K_5', - 'Bl7': 'K_6', - 'Br6': 'K_7', - 'Br5': 'K_8', - 'Br4': 'K_9', - 'Br3': 'K_0', - 'Br2': 'K_LBRKT', - 'Br1': 'K_RBRKT', - 'Br_bs': 'K_BKSP', - 'Cl_tab': 'K_TAB', - 'Cl1': 'K_Q', - 'Cl2': 'K_W', - 'Cl3': 'K_E', - 'Cl4': 'K_R', - 'Cl5': 'K_T', - 'Cr7': 'K_Z', - 'Cr6': 'K_U', - 'Cr5': 'K_I', - 'Cr4': 'K_O', - 'Cr3': 'K_P', - 'Cr2': 'K_COLON', - 'Cr1': 'K_EQUAL', - 'CD_ret': 'K_ENTER', - 'Dl_caps': 'CAPS', - 'Dl1': 'K_A', - 'Dl2': 'K_S', - 'Dl3': 'K_D', - 'Dl4': 'K_F', - 'Dl5': 'K_G', - 'Dr7': 'K_H', - 'Dr6': 'K_J', - 'Dr5': 'K_K', - 'Dr4': 'K_L', - 'Dr3': 'K_BKQUOTE', - 'Dr2': 'K_QUOTE', - 'Dr1': 'K_SLASH', - 'El_shift': 'SHIFT', # XXX: there is no distinction between left/right - 'El1': 'K_oE2', - 'El2': 'K_Y', - 'El3': 'K_X', - 'El4': 'K_C', - 'El5': 'K_V', - 'El6': 'K_B', - 'Er5': 'K_N', - 'Er4': 'K_M', - 'Er3': 'K_COMMA', - 'Er2': 'K_PERIOD', - 'Er1': 'K_HYPHEN', - 'Er_shift': 'SHIFT', - 'Fl_ctrl': 'LCTRL', - 'Fl_win': 'K_?5B', - 'Fl_alt': 'LALT', - 'Fl_space': 'K_SPACE', - 'Fr_space': 'K_SPACE', - 'Fr_altgr': 'RALT', - 'Fr_win': 'K_?5C', - 'Fr_menu': 'K_?5D', - 'Fr_ctrl': 'RCTRL', - } - -# button windows scancode. See Keyboard Scan Code Specification Revision 1.3a -# (published in 2000) from the Windows Platform Design Notes for example. -_buttonToWinScancode = { - 'Bl1': (0x29, ), - 'Bl2': (0x02, ), - 'Bl3': (0x03, ), - 'Bl4': (0x04, ), - 'Bl5': (0x05, ), - 'Bl6': (0x06, ), - 'Bl7': (0x07, ), - 'Br6': (0x08, ), - 'Br5': (0x09, ), - 'Br4': (0x0A, ), - 'Br3': (0x0B, ), - 'Br2': (0x0C, ), - 'Br1': (0x0D, ), - 'Br_bs': (0x0E, ), - 'Cl_tab': (0x0F, ), - 'Cl1': (0x10, ), - 'Cl2': (0x11, ), - 'Cl3': (0x12, ), - 'Cl4': (0x13, ), - 'Cl5': (0x14, ), - 'Cr7': (0x15, ), - 'Cr6': (0x16, ), - 'Cr5': (0x17, ), - 'Cr4': (0x18, ), - 'Cr3': (0x19, ), - 'Cr2': (0x1A, ), - 'Cr1': (0x1B, ), - 'CD_ret': (0x1C, ), - 'Dl_caps': (0x3A, ), - 'Dl1': (0x1E, ), - 'Dl2': (0x1F, ), - 'Dl3': (0x20, ), - 'Dl4': (0x21, ), - 'Dl5': (0x22, ), - 'Dr7': (0x23, ), - 'Dr6': (0x24, ), - 'Dr5': (0x25, ), - 'Dr4': (0x26, ), - 'Dr3': (0x27, ), - 'Dr2': (0x28, ), - 'Dr1': (0x2B, ), - 'El_shift': (0x2A, ), - 'El1': (0x56, ), - 'El2': (0x2C, ), - 'El3': (0x2D, ), - 'El4': (0x2E, ), - 'El5': (0x2F, ), - 'El6': (0x30, ), - 'Er5': (0x31, ), - 'Er4': (0x32, ), - 'Er3': (0x33, ), - 'Er2': (0x34, ), - 'Er1': (0x35, ), - 'Er_shift': (0x36, ), - 'Fl_ctrl': (0x1D, ), - 'Fl_win': (0xe0, 0x5B, ), - 'Fl_alt': (0x38, ), - 'Fl_space': (0x39, ), - 'Fr_space': (0x39, ), - 'Fr_altgr': (0xe0, 0x38, ), - 'Fr_win': (0xe0, 0x5C, ), - 'Fr_menu': (0xe0, 0x5D, ), - 'Fr_ctrl': (0xe0, 0x1D, ), - } - -# see https://eastmanreference.com/complete-list-of-applescript-key-codes -_buttonToOsxKeycode = { - 'Bl1': 50, - 'Bl2': 18, - 'Bl3': 19, - 'Bl4': 20, - 'Bl5': 21, - 'Bl6': 23, - 'Bl7': 22, - 'Br6': 26, - 'Br5': 28, - 'Br4': 25, - 'Br3': 29, - 'Br2': 27, - 'Br1': 24, - 'Br_bs': 51, - 'Cl_tab': 48, - 'Cl1': 12, - 'Cl2': 13, - 'Cl3': 14, - 'Cl4': 15, - 'Cl5': 17, - 'Cr7': 16, - 'Cr6': 32, - 'Cr5': 34, - 'Cr4': 31, - 'Cr3': 35, - 'Cr2': 33, - 'Cr1': 30, - 'CD_ret': 36, - 'Dl_caps': 57, - 'Dl1': 0, - 'Dl2': 1, - 'Dl3': 2, - 'Dl4': 3, - 'Dl5': 5, - 'Dr7': 4, - 'Dr6': 38, - 'Dr5': 40, - 'Dr4': 37, - 'Dr3': 41, - 'Dr2': 39, - #'Dr1': 51, - 'El_shift': 57, - #'El1': 6, - 'El2': 6, - 'El3': 7, - 'El4': 8, - 'El5': 9, - 'El6': 11, - 'Er5': 45, - 'Er4': 46, - 'Er3': 43, - 'Er2': 47, - 'Er1': 44, - 'Er_shift': 60, - 'Fl_ctrl': 59, - 'Fl_win': 55, - 'Fl_alt': 58, - 'Fl_space': 49, - 'Fr_space': 49, - 'Fr_altgr': 61, - 'Fr_win': 55, - #'Fr_menu': , - #'Fr_ctrl': 105, - } - class Button: - __slots__ = ('width', 'isMarked', 'i') + """ A single physical button on the keyboard """ + + __slots__ = ('width', 'isMarked', 'i', 'scancode') _idToName : Dict[int, Text] = {} _nameToId : Dict[Text, int] = {} _nextNameId = 0 + serializedName = 'standard' - def __init__ (self, name: Text, width: float = 1, isMarked: bool = False): + def __init__ (self, name: Text, width: float = 1, isMarked: bool = False, scancode = None): # map names to integers for fast comparison/hashing i = Button._nameToId.get (name) if i is None: @@ -310,8 +45,14 @@ class Button: self.width = width # marked with an haptic line, for better orientation self.isMarked = isMarked - - def __repr__ (self): + # scancode map, although they are not all technically scancodes, they + # are some low-level representation of the physical key + self.scancode = scancode + # special case for windows + if self.scancode and 'windows' in self.scancode: + self.scancode['windows'] = tuple (self.scancode['windows']) + + def __repr__ (self): # pragma: no cover return f'Button({self.name!r}, {self.width}, {self.isMarked})' def __eq__ (self, other): @@ -326,25 +67,10 @@ class Button: def name (self): return Button._idToName[self.i] - @property - def xorgKeycode (self): - return _buttonToXorgKeycode[self.name] - - @property - def keymanCode (self): - return _buttonToKeyman[self.name] - - @property - def windowsScancode (self): - return _buttonToWinScancode[self.name] - - @property - def osxKeycode (self): - return _buttonToOsxKeycode[self.name] - @classmethod def deserialize (self, data: Dict): - kindMap = {'standard': Button, 'letter': LetterButton, 'multi': MultiRowButton} + kindMap = dict (map (lambda x: (x.serializedName, x), + (Button, LetterButton, MultiRowButton))) try: kind = data['kind'] del data['kind'] @@ -352,15 +78,28 @@ class Button: kind = 'standard' return kindMap[kind] (**data) + def serialize (self): + d = dict (name=self.name, width=self.width, scancode=self.scancode) + if self.__class__ is not Button: + d['kind'] = self.serializedName + if self.isMarked: + d['isMarked'] = self.isMarked + # turn the tuple back into a list + if d['scancode'] and 'windows' in d['scancode']: + d['scancode']['windows'] = list (d['scancode']['windows']) + return d + class LetterButton (Button): """ A letter, number or symbol button, but not special keys like modifier, tab, … """ - def __init__ (self, name, isMarked=False): - super().__init__ (name, width=1, isMarked=isMarked) + serializedName = 'letter' - def __repr__ (self): + def __init__ (self, name, width=1, isMarked=False, scancode=None): + super().__init__ (name, width=width, isMarked=isMarked, scancode=scancode) + + def __repr__ (self): # pragma: no cover return f'LetterButton({self.name!r}, {self.isMarked})' class MultiRowButton (Button): @@ -370,19 +109,26 @@ class MultiRowButton (Button): """ __slots__ = ('span', ) + serializedName = 'multi' - def __init__ (self, name, span, isMarked=False): - super ().__init__ (name, width=1, isMarked=isMarked) + def __init__ (self, name, span, width=1, isMarked=False, scancode=None): + super ().__init__ (name, width=width, isMarked=isMarked, scancode=scancode) self.span = span - def __repr__ (self): + def __repr__ (self): # pragma: no cover return f'MultiRowButton({self.name!r}, {self.span!r}, {self.isMarked!r})' + def serialize (self): + d = super ().serialize () + d['span'] = self.span + return d + class PhysicalKeyboard: - __slots__ = ('name', 'rows', '_buttonToRow') + __slots__ = ('name', 'description', 'rows', '_buttonToRow') - def __init__ (self, name: Text, rows): + def __init__ (self, name: Text, description: Text, rows): self.name = name + self.description = description self.rows = rows self._buttonToRow = dict () @@ -393,7 +139,7 @@ class PhysicalKeyboard: def __iter__ (self): return iter (self.rows) - def __repr__ (self): + def __repr__ (self): # pragma: no cover return f'<PhysicalKeyboard {self.name} with {len (self)} keys>' def __len__ (self): @@ -405,7 +151,7 @@ class PhysicalKeyboard: for k in self.keys (): if k.name == name: return k - raise AttributeError (f'{name} is not a valid button name') + raise KeyError (f'{name} is not a valid button name') def keys (self) -> Iterator[Button]: """ Iterate over all keys """ @@ -428,7 +174,19 @@ class PhysicalKeyboard: for btn in r: row[1].append (Button.deserialize (btn)) rows.append (row) - return cls (data['name'], rows) + return cls (data['name'], data['description'], rows) + + def serialize (self): + rows = [] + for l, r in self.rows: + newRow = [[], []] + for btn in l: + newRow[0].append (btn.serialize ()) + for btn in r: + newRow[1].append (btn.serialize ()) + rows.append (newRow) + return dict (name=self.name, description=self.description, rows=rows) -defaultKeyboards = YamlLoader ('data/keyboards', PhysicalKeyboard.deserialize) +dataDirectory = 'data/keyboards' +defaultKeyboards = YamlLoader (dataDirectory, PhysicalKeyboard.deserialize) diff --git a/lulua/plot.py b/lulua/plot.py index fdfc16c..9fb5cf1 100644 --- a/lulua/plot.py +++ b/lulua/plot.py @@ -20,9 +20,6 @@ import sys, argparse, json, unicodedata, pickle, logging, math from operator import itemgetter -from bokeh.plotting import figure -from bokeh.models import ColumnDataSource, LinearAxis, Range1d -from bokeh.embed import json_item from .layout import * from .keyboard import defaultKeyboards @@ -30,9 +27,31 @@ from .util import limit, displayText from .writer import Writer from .carpalx import Carpalx, models +def setPlotStyle (p): + """ Set common plot styles """ + + # Suppress warnings from bokeh if the legend is empty. + if p.legend: + p.legend.location = "top_left" + # Hide glyph on click on legend + p.legend.click_policy = "hide" + p.legend.label_text_font = 'IBM Plex Sans Arabic' + p.legend.border_line_color = None + p.legend.background_fill_color = None + p.legend.inactive_fill_color = 'black' + p.legend.inactive_fill_alpha = 0.1 + + # no border fill + p.border_fill_color = None + p.background_fill_alpha = 0.5 + def letterfreq (args): """ Map key combinations to their text, bin it and plot sorted distribution """ + from bokeh.plotting import figure + from bokeh.models import ColumnDataSource, LinearAxis, Range1d + from bokeh.embed import json_item + # show unicode class "letters other" only whitelistCategory = {'Lo'} @@ -89,15 +108,12 @@ def letterfreq (args): p.vbar(x='letters', width=0.5, top='rel', color="#dc322f", source=source, y_range_name='single') p.add_layout(LinearAxis(y_range_name="single"), 'right') + setPlotStyle (p) # styling p.xgrid.grid_line_color = None - p.xaxis.major_label_text_font_size = "1.5em" - p.xaxis.major_label_text_font_size = "1.5em" - p.xaxis.major_label_text_font = 'IBM Plex Sans Arabic' - p.yaxis.major_label_text_font = 'IBM Plex Sans Arabic' - # no border fill - p.border_fill_color = None - p.background_fill_alpha = 0.5 + for axis, size, font in ((p.xaxis, '1.5em', 'IBM Plex Sans Arabic'), (p.yaxis, '1em', 'IBM Plex Sans')): + axis.major_label_text_font_size = size + axis.major_label_text_font = font json.dump (json_item (p), sys.stdout) @@ -153,3 +169,125 @@ def triadfreq (args): return 0 +def triadEffortData (args): + """ + Plot cumulated triad frequency vs cumulative effort. + + More frequent triads should be easier to type and thus we expect an + exponential distribution for optimized layouts and linear distribution + for everything else. + """ + + import numpy as np + + stats = pickle.load (sys.stdin.buffer) + + # XXX: add layout to stats? + keyboard = defaultKeyboards['ibmpc105'] + layout = defaultLayouts[args.layout].specialize (keyboard) + writer = Writer (layout) + + # letter-based binning, in case multiple buttons are mapped to the same + # letter. + binned = defaultdict (lambda: dict (weight=0, effort=Carpalx (models['mod01'], writer), textTriad=None)) + weightSum = 0 + for triad, weight in stats['triads'].triads.items (): + textTriad = tuple (layout.getText (t) for t in triad) + data = binned[textTriad] + data['weight'] += weight + data['effort'].addTriad (triad, weight) + data['textTriad'] = textTriad + data['layers'] = tuple (layout.modifierToLayer (x.modifier)[0] for x in triad) + weightSum += weight + + # Now bin into equally-sized buckets to reduce amount of data + nBins = 200 + binWidth = weightSum//nBins + cumulativeWeight = 0 + cumulativeEffort = 0 + x = [] + y = [] + for data in sorted (binned.values (), key=lambda x: x['weight'], reverse=True): + cumulativeWeight += data['weight'] + cumulativeEffort += data['effort'].effort * data['weight'] + if not x or x[-1] + binWidth <= cumulativeWeight: + x.append (cumulativeWeight) + y.append (cumulativeEffort) + x.append (cumulativeWeight) + y.append (cumulativeEffort) + + x = np.true_divide (x, cumulativeWeight) + y = np.true_divide (y, cumulativeEffort) + + pickle.dump (dict (x=x, y=y, layout=layout), sys.stdout.buffer, pickle.HIGHEST_PROTOCOL) + +def triadEffortPlot (args): + """ Plot concatenated pickled data from triadEffortData """ + + from .stats import unpickleAll + # Initializing bokeh is an expensive operation and this module is imported + # alot, so only do it when necessary. + from bokeh.palettes import Set3 + from bokeh.plotting import figure + from bokeh.models import RadioButtonGroup, CustomJS, Slope + from bokeh.embed import json_item + from bokeh.layouts import column + + p = figure( + plot_width=1000, + plot_height=500, + sizing_mode='scale_both', + x_range=(0, 1), + y_range=(0, 1), + output_backend="webgl", + ) + data = list (unpickleAll (sys.stdin.buffer)) + colors = Set3[len(data)] + lines = dict () + for o, color in zip (data, colors): + name = o['layout'].name + assert name not in lines + lines[name] = p.line (o['x'], o['y'], line_width=1, color=color, + legend_label=name, name=name) + + # color: base1 + slope = Slope(gradient=1, y_intercept=0, + line_color='#93a1a1', line_dash='dashed', line_width=1) + p.add_layout(slope) + + setPlotStyle (p) + for axis, size, font in ((p.xaxis, '1em', 'IBM Plex Sans'), (p.yaxis, '1em', 'IBM Plex Sans')): + axis.major_label_text_font_size = size + axis.major_label_text_font = font + + LABELS = ["All", "Standard", "Usable"] + visible = { + 0: list (lines.keys ()), + 1: ['ar-asmo663', 'ar-linux', 'ar-osx'], + 2: ['ar-lulua', 'ar-ergoarabic', 'ar-malas', 'ar-linux', 'ar-osx'], + } + ranges = { + 0: [(0, 1), (0, 1)], + 1: [(0, 0.5), (0, 0.4)], + 2: [(0, 0.5), (0, 0.4)], + } + presets = RadioButtonGroup (labels=LABELS, active=0) + # Set visibility and x/yranges on click. Not sure if there’s a more pythonic way. + presets.js_on_click(CustomJS( + args=dict(lines=lines, plot=p, visible=visible, ranges=ranges), + code=""" + for (const [k, line] of Object.entries (lines)) { + line.visible = visible[this.active].includes (k); + } + const xrange = plot.x_range; + xrange.start = ranges[this.active][0][0]; + xrange.end = ranges[this.active][0][1]; + const yrange = plot.y_range; + yrange.start = ranges[this.active][1][0]; + yrange.end = ranges[this.active][1][1]; + """)) + + json.dump (json_item (column (p, presets)), sys.stdout) + + return 0 + diff --git a/lulua/render.py b/lulua/render.py index 41a6bd5..bc09e4c 100644 --- a/lulua/render.py +++ b/lulua/render.py @@ -107,7 +107,8 @@ class Renderer: gCap.add (self._drawMarker (btnWidth, btnPos)) highlight = self.keyHighlight.get (btn.name, 0) - gHighlight.add (self._drawHighlight (highlight, btnWidth, btnPos)) + if highlight > 0: + gHighlight.add (self._drawHighlight (highlight, btnWidth, btnPos)) l = self._drawLabel (buttonText, btnWidth, btnPos) if isModifier: @@ -302,6 +303,8 @@ def renderXmodmap (args): keyboard = defaultKeyboards[args.keyboard] layout = defaultLayouts[args.layout].specialize (keyboard) + xorgGetter = lambda x: x.scancode['xorg'] + with open (args.output, 'w') as fd: # inspired by https://neo-layout.org/neo_de.xmodmap fd.write ('\n'.join ([ @@ -321,12 +324,12 @@ def renderXmodmap (args): # layers: 1, 2, 3, 5, 4, None, 6, 7 for i in (0, 1, 2, 4, 3, 99999, 5, 6): if i >= len (layout.layers): - for btn in unique (keyboard.keys (), key=attrgetter ('xorgKeycode')): + for btn in unique (keyboard.keys (), key=xorgGetter): keycodeMap[btn].append ('NoSymbol') continue l = layout.layers[i] # space button shares the same keycode and must be removed - for btn in unique (keyboard.keys (), key=attrgetter ('xorgKeycode')): + for btn in unique (keyboard.keys (), key=xorgGetter): if not layout.isModifier (frozenset ([btn])): text = l.layout.get (btn) if not text: @@ -359,7 +362,7 @@ def renderXmodmap (args): for btn, v in keycodeMap.items (): v = '\t'.join (v) - fd.write (f'!! {btn.name}\nkeycode {btn.xorgKeycode} = {v}\n') + fd.write (f'!! {btn.name}\nkeycode {xorgGetter (btn)} = {v}\n') fd.write ('\n'.join (['add Mod3 = ISO_First_Group', 'add Mod5 = ISO_Level3_Shift', ''])) def renderKeyman (args): @@ -388,11 +391,12 @@ def renderKeyman (args): for i, l in enumerate (layout.layers): for m in l.modifier: for x in m: - if x.keymanCode.startswith ('K_') or x.keymanCode == 'CAPS': + keymanCode = x.scancode['keyman'] + if keymanCode.startswith ('K_') or keymanCode == 'CAPS': logging.error (f'Keyman does not support custom modifier like {m}. Your layout will not work correctly.') break for btn, text in l.layout.items (): - comb = ' '.join ([x.keymanCode for x in m] + [btn.keymanCode]) + comb = ' '.join ([x.scancode['keyman'] for x in m] + [btn.scancode['keyman']]) text = ' '.join ([f'U+{ord (x):04X}' for x in text]) fd.write (f'+ [{comb}] > {text}\n') @@ -494,7 +498,7 @@ def renderWinKbd (args): s = '\r' return s wcharMap = [] - for btn in unique (keyboard.keys (), key=attrgetter ('windowsScancode')): + for btn in unique (keyboard.keys (), key=lambda x: x.scancode['windows']): text = list (layout.getButtonText (btn)) # skip unused keys @@ -502,7 +506,7 @@ def renderWinKbd (args): continue mappedText = [toWindows (s) for s in text] - vk = next (filter (lambda x: isinstance (x, VirtualKey), scancodeToVk[btn.windowsScancode])) + vk = next (filter (lambda x: isinstance (x, VirtualKey), scancodeToVk[btn.scancode['windows']])) wcharMap.append ((vk, 0, mappedText)) fd.write (makeDriverSources (scancodeToVk, wcharMap)) @@ -539,7 +543,7 @@ def renderKeylayout (args): for i, l in enumerate (layout.layers): keymap = ET.SubElement (keymapSet, 'keyMap', index=str (i)) for btn, text in l.layout.items (): - ET.SubElement (keymap, 'key', code=str (btn.osxKeycode), output=text) + ET.SubElement (keymap, 'key', code=str (btn.scancode['macos']), output=text) layouts = ET.SubElement (docroot, 'layouts') layout = ET.SubElement (layouts, 'layout', first='0', last='0', modifiers=str (modmapId), mapSet=str (keymapSetId)) diff --git a/lulua/report.py b/lulua/report.py index b25201d..0e5ec00 100644 --- a/lulua/report.py +++ b/lulua/report.py @@ -18,7 +18,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import sys, argparse, logging, pickle, math +import sys, argparse, logging, pickle, math, unicodedata from gettext import GNUTranslations, NullTranslations from decimal import Decimal from fractions import Fraction @@ -60,21 +60,6 @@ def arabnum (s): m = {'0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤', '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩', ',': '٬', '.': '٫'} return ''.join (map (lambda x: m.get (x, x), s)) -def clamp (v, lower, upper): - return max (min (v, upper), lower) - -def blend (v, a, b): - v = clamp (v, 0, 1) - return (b-a)*v+a - -def blendn (v, *l): - assert 0 <= v <= 1 - n = len (l) - step = 1/(n-1) - i = min (int (math.floor (v/step)), n-2) - stretchedv = (v-i*step)/step - return [blend (stretchedv, x, y) for x, y in zip (l[i], l[i+1])] - def render (): parser = argparse.ArgumentParser(description='Create lulua report.') parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files') @@ -88,9 +73,41 @@ def render (): env.filters['approx'] = approx env.filters['numspace'] = numspace env.filters['arabnum'] = arabnum - env.filters['blendn'] = blendn env.filters['fraction'] = fraction + # Map global variables to Arabic letter romanizations, so we can use + # them easily in text. + # Taken from Abu-Chacra’s Arabic – An Essential Grammar. It’s + # too difficult for now to write a general-purpose romanization + # function, because it would need a dictionary. + letterNames = { + 'Hamzah': ('Hamzah', 'ء'), + 'Alif': ('ᵓAlif', 'ا'), + 'Alifhamzah': ('ᵓAlif-hamzah', 'أ'), + 'Wawhamzah': ('Wa\u0304w-hamzah', 'ؤ'), + 'Yahamzah': ('Ya\u0304ᵓ-hamzah', 'ئ'), + 'Ba': ('Baᵓ', 'ب'), + 'Ta': ('Taᵓ', 'ت'), + 'Tha': ('T\u0331aᵓ', 'ث'), + 'Ra': ('Raᵓ', 'ر'), + 'Dal': ('Da\u0304l', 'د'), + 'Dhal': ('D\u0331a\u0304l', 'ذ'), + 'Qaf': ('Qa\u0304f', 'ق'), + 'Lam': ('La\u0304m', 'ل'), + 'Lamalif': ('La\u0304m-ᵓalif', 'لا'), + 'Mim': ('Mi\u0304m', 'م'), + 'Nun': ('Nu\u0304n', 'ن'), + 'Waw': ('Wa\u0304w', 'و'), + 'Ya': ('Ya\u0304ᵓ', 'ي'), + 'Tamarbutah': ('Ta\u0304ᵓ marbu\u0304t\u0323ah', 'ة'), + 'Alifmaqsurah': ('ᵓAlif maqs\u0323u\u0304rah', 'ى'), + } + for k, (romanized, arabic) in letterNames.items (): + env.globals[k] = f'{romanized} <bdo lang="ar">({arabic})</bdo>' + env.globals[k.lower ()] = env.globals[k].lower () + env.globals[k + '_'] = romanized + env.globals[k.lower () + '_'] = romanized.lower () + corpus = [] for x in args.corpus: with open (x) as fd: diff --git a/lulua/stats.py b/lulua/stats.py index 1d051b3..9d6c537 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -28,7 +28,7 @@ from .layout import * from .keyboard import defaultKeyboards from .writer import SkipEvent, Writer from .carpalx import Carpalx, models -from .plot import letterfreq, triadfreq +from .plot import letterfreq, triadfreq, triadEffortPlot, triadEffortData from .util import displayText def updateDictOp (a, b, op): @@ -315,47 +315,6 @@ def keyHeatmap (args): buttons[k.name] = v yaml.dump (data, sys.stdout) -def sentenceStats (keyboard, layout, text): - """ - Calculate effort for every character (button) in a text - """ - - writer = Writer (layout) - - effort = Carpalx (models['mod01'], writer) - _ignored = frozenset (keyboard[x] for x in ('Fl_space', 'Fr_space', 'CD_ret', 'Cl_tab')) - writtenText = [] - skipped = 0 - for match, event in writer.type (StringIO (text)): - if isinstance (event, SkipEvent): - skipped += 1 - writtenText.append ([event.char, None, 0]) - if not isinstance (event, ButtonCombination): - continue - - writtenText.append ([match, event, 0]) - - triad = list (filter (lambda x: x[1] is not None and first (x[1].buttons) not in _ignored, writtenText))[-3:] - if len (triad) == 3: - matchTriad, buttonTriad, _ = zip (*triad) - triadEffort = effort._triadEffort (tuple (buttonTriad)) - - # now walk the existing text backwards to find the original matches and add the computed effort - writtenTextIt = iter (reversed (writtenText)) - matchTriad = list (matchTriad) - while matchTriad: - t = next (writtenTextIt) - if t[0] == matchTriad[-1]: - matchTriad.pop () - t[2] += triadEffort - - effort.addTriad (buttonTriad, 1) - - # normalize efforts to [0, 1] - s = max (map (lambda x: x[2], writtenText)) - writtenText = list (map (lambda x: (x[0], x[2]/s if x[1] is not None else None), writtenText)) - return (writtenText, effort.effort, skipped) - from .text import mapChars, charMap def layoutstats (args): @@ -378,12 +337,6 @@ def layoutstats (args): asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses - sentences = [ - 'أَوْ كَصَيِّبٍ مِّنَ السَّمَاءِ فِيهِ ظُلُمَاتٌ وَرَعْدٌ وَبَرْقٌ يَجْعَلُونَ أَصَابِعَهُمْ فِي آذَانِهِم مِّنَ الصَّوَاعِقِ حَذَرَ الْمَوْتِ وَاللَّهُ مُحِيطٌ بِالْكَافِرِينَ', - 'اللغة العربية هي أكثرُ اللغاتِ السامية تحدثاً، وإحدى أكثر اللغات انتشاراً في العالم، يتحدثُها أكثرُ من 467 مليون نسمة.', - ] - sentences = [sentenceStats (keyboard, layout, mapChars (s, charMap).replace ('\r\n', '\n')) for s in sentences] - # Impact of hamza yah = '\u064a' waw = '\u0648' @@ -411,7 +364,6 @@ def layoutstats (args): fingers=dict (fingers), buttonPresses=buttonPresses, asymmetry=asymmetry, - sentences=sentences, hamzaImpact=hamzaImpact, hamzaOnAlef=hamzaOnAlef, ), sys.stdout.buffer) @@ -472,6 +424,12 @@ def main (): sp.add_argument('-s', '--sort', choices={'weight', 'effort', 'combined'}, default='weight', help='Sorter') sp.add_argument('-n', '--limit', type=int, default=0, help='Sorter') sp.set_defaults (func=triadfreq) + + sp = subparsers.add_parser('triadeffortdata') + sp.set_defaults (func=triadEffortData) + sp = subparsers.add_parser('triadeffortplot') + sp.set_defaults (func=triadEffortPlot) + sp = subparsers.add_parser('keyheatmap') sp.set_defaults (func=keyHeatmap) sp = subparsers.add_parser('layoutstats') diff --git a/lulua/test_keyboard.py b/lulua/test_keyboard.py index 7537266..d08f6d1 100644 --- a/lulua/test_keyboard.py +++ b/lulua/test_keyboard.py @@ -20,7 +20,8 @@ import pytest -from .keyboard import defaultKeyboards, Button +from .keyboard import defaultKeyboards, Button, dataDirectory +from .util import YamlLoader def test_defaults (): k = defaultKeyboards['ibmpc105'] @@ -54,6 +55,9 @@ def test_keyboard_getattr (): assert k['CD_ret'] == k.find ('CD_ret') assert k['Cr1'] != k.find ('El1') + with pytest.raises (KeyError): + k['nonexistent_button'] + def test_button_uniqname (): a = Button ('a') assert a.name == 'a' @@ -77,3 +81,15 @@ def test_button_uniqname (): d[b] = 2 assert b in d + # make sure we can only compare to Buttons + assert a != 'hello' + assert a != 1 + assert a != dict () + +def test_serialize (): + """ Make sure serialize (deserialize (x)) of keyboards is identity """ + + rawKeyboards = YamlLoader (dataDirectory, lambda x: x) + name = 'ibmpc105' + assert defaultKeyboards[name].serialize () == rawKeyboards[name] + diff --git a/lulua/test_layout.py b/lulua/test_layout.py index 45141ae..8e911da 100644 --- a/lulua/test_layout.py +++ b/lulua/test_layout.py @@ -35,7 +35,7 @@ def test_atomic (layout): for char in text: d = unicodedata.decomposition (char) # allow compat decompositions like … -> ... - if not d.startswith ('<compat> ') and not d.startswith ('<isolated> ') and not d.startswith ('<medial> ') and not d.startswith ('<initial> '): + if d.split (' ', 1)[0] not in {'<compat>', '<isolated>', '<medial>', '<initial>', '<noBreak>'}: assert d == '', (char, btn) @pytest.mark.parametrize("layout", defaultLayouts, ids=[l.name for l in defaultLayouts]) diff --git a/lulua/test_report.py b/lulua/test_report.py index 448d796..751684c 100644 --- a/lulua/test_report.py +++ b/lulua/test_report.py @@ -20,7 +20,7 @@ from decimal import Decimal -from .report import approx, blend, blendn +from .report import approx def test_approx (): assert approx (0) == (Decimal ('0'), '') @@ -37,13 +37,3 @@ def test_approx (): assert approx (10**9) == (Decimal ('1'), 'billion') assert approx (10**12) == (Decimal ('1000'), 'billion') -def test_blend (): - assert blend (0.5, 0, 1) == 0.5 - assert blend (0.5, 0, 2) == 1 - - assert blend (0.1, 0, 1) == 0.1 - assert blend (0.9, 0, 1) == 0.9 - - assert blendn (0.5, (0, ), (1, )) == [0.5] - assert blendn (0.5, (0, ), (0.7, ), (1, )) == [0.7] - diff --git a/lulua/text.py b/lulua/text.py index 18e4dbf..ea91139 100644 --- a/lulua/text.py +++ b/lulua/text.py @@ -193,6 +193,10 @@ def filterEpub (item): stream = walker (document) s = HTMLSerializer() yield ''.join (s.serialize (stream)) + # It looks like ebooklib is leaking ZipFile instances somewhere, which + # can be prevented by resetting the book before the GC grabs it. + book.reset () + del book def filterText (fd): yield fd.read ().decode ('utf-8') diff --git a/lulua/util.py b/lulua/util.py index 5d7ea1b..0245275 100644 --- a/lulua/util.py +++ b/lulua/util.py @@ -22,7 +22,7 @@ Misc utilities """ -import os, yaml, pkg_resources, unicodedata +import os, yaml, pkg_resources, unicodedata, re first = lambda x: next (iter (x)) @@ -79,16 +79,20 @@ def displayText (text): if all (map (lambda x: unicodedata.combining (x) != 0, text)): # add circle if combining return '\u25cc' + text + if len (text) == 1 and unicodedata.category (text) == 'Cf': + stopwords = re.compile('\WTO\W', re.I) + try: + cleanName = unicodedata.name (text).replace ('-', ' ') + short = ''.join (map (lambda x: x[0], stopwords.sub(' ', cleanName).split (' '))) + return f'[{short}]' + except ValueError: + # No such name. + pass invMap = { '\t': '⭾', '\n': '↳', ' ': '\u2423', '\b': '⌦', - '\u200e': '[LRM]', # left to right mark - '\u061c': '[ALM]', # arabic letter mark - '\u202c': '[PDF]', # pop directional formatting - "\u2066": '[LRI]', # left-to-right isolate (lri) - "\u2067": '[RLI]', # right-to-left isolate (rli) - "\u2069": '[PDI]', # pop directional isolate (pdi) + '\u202f': '[NNBSP]', } return invMap.get (text, text) |