summaryrefslogtreecommitdiff
path: root/lulua
diff options
context:
space:
mode:
Diffstat (limited to 'lulua')
-rw-r--r--lulua/data/keyboards/ibmpc105.yaml436
-rw-r--r--lulua/data/layouts/ar-ergoarabic.yaml168
-rw-r--r--lulua/data/layouts/ar-qtaish.yaml64
-rw-r--r--lulua/data/report/index.html286
-rw-r--r--lulua/data/report/style.css11
-rw-r--r--lulua/keyboard.py360
-rw-r--r--lulua/plot.py158
-rw-r--r--lulua/render.py22
-rw-r--r--lulua/report.py51
-rw-r--r--lulua/stats.py56
-rw-r--r--lulua/test_keyboard.py18
-rw-r--r--lulua/test_layout.py2
-rw-r--r--lulua/test_report.py12
-rw-r--r--lulua/text.py4
-rw-r--r--lulua/util.py18
15 files changed, 1174 insertions, 492 deletions
diff --git a/lulua/data/keyboards/ibmpc105.yaml b/lulua/data/keyboards/ibmpc105.yaml
index d9dcb76..0938262 100644
--- a/lulua/data/keyboards/ibmpc105.yaml
+++ b/lulua/data/keyboards/ibmpc105.yaml
@@ -3,130 +3,558 @@ description: Standard IBM PC 105 key layout (European)
rows:
- - - kind: letter
name: Bl1
+ scancode:
+ keyman: K_BKSLASH
+ macos: 50
+ windows:
+ - 41
+ xorg: 49
+ width: 1
- kind: letter
name: Bl2
+ scancode:
+ keyman: K_1
+ macos: 18
+ windows:
+ - 2
+ xorg: 10
+ width: 1
- kind: letter
name: Bl3
+ scancode:
+ keyman: K_2
+ macos: 19
+ windows:
+ - 3
+ xorg: 11
+ width: 1
- kind: letter
name: Bl4
+ scancode:
+ keyman: K_3
+ macos: 20
+ windows:
+ - 4
+ xorg: 12
+ width: 1
- kind: letter
name: Bl5
+ scancode:
+ keyman: K_4
+ macos: 21
+ windows:
+ - 5
+ xorg: 13
+ width: 1
- kind: letter
name: Bl6
+ scancode:
+ keyman: K_5
+ macos: 23
+ windows:
+ - 6
+ xorg: 14
+ width: 1
- kind: letter
name: Bl7
+ scancode:
+ keyman: K_6
+ macos: 22
+ windows:
+ - 7
+ xorg: 15
+ width: 1
- - kind: letter
name: Br6
+ scancode:
+ keyman: K_7
+ macos: 26
+ windows:
+ - 8
+ xorg: 16
+ width: 1
- kind: letter
name: Br5
+ scancode:
+ keyman: K_8
+ macos: 28
+ windows:
+ - 9
+ xorg: 17
+ width: 1
- kind: letter
name: Br4
+ scancode:
+ keyman: K_9
+ macos: 25
+ windows:
+ - 10
+ xorg: 18
+ width: 1
- kind: letter
name: Br3
+ scancode:
+ keyman: K_0
+ macos: 29
+ windows:
+ - 11
+ xorg: 19
+ width: 1
- kind: letter
name: Br2
+ scancode:
+ keyman: K_LBRKT
+ macos: 27
+ windows:
+ - 12
+ xorg: 20
+ width: 1
- kind: letter
name: Br1
+ scancode:
+ keyman: K_RBRKT
+ macos: 24
+ windows:
+ - 13
+ xorg: 21
+ width: 1
- name: Br_bs
+ scancode:
+ keyman: K_BKSP
+ macos: 51
+ windows:
+ - 14
+ xorg: 22
width: 1.75
- - - name: Cl_tab
+ scancode:
+ keyman: K_TAB
+ macos: 48
+ windows:
+ - 15
+ xorg: 23
width: 1.75
- kind: letter
name: Cl1
+ scancode:
+ keyman: K_Q
+ macos: 12
+ windows:
+ - 16
+ xorg: 24
+ width: 1
- kind: letter
name: Cl2
+ scancode:
+ keyman: K_W
+ macos: 13
+ windows:
+ - 17
+ xorg: 25
+ width: 1
- kind: letter
name: Cl3
+ scancode:
+ keyman: K_E
+ macos: 14
+ windows:
+ - 18
+ xorg: 26
+ width: 1
- kind: letter
name: Cl4
+ scancode:
+ keyman: K_R
+ macos: 15
+ windows:
+ - 19
+ xorg: 27
+ width: 1
- kind: letter
name: Cl5
+ scancode:
+ keyman: K_T
+ macos: 17
+ windows:
+ - 20
+ xorg: 28
+ width: 1
- - kind: letter
name: Cr7
+ scancode:
+ keyman: K_Z
+ macos: 16
+ windows:
+ - 21
+ xorg: 29
+ width: 1
- kind: letter
name: Cr6
+ scancode:
+ keyman: K_U
+ macos: 32
+ windows:
+ - 22
+ xorg: 30
+ width: 1
- kind: letter
name: Cr5
+ scancode:
+ keyman: K_I
+ macos: 34
+ windows:
+ - 23
+ xorg: 31
+ width: 1
- kind: letter
name: Cr4
+ scancode:
+ keyman: K_O
+ macos: 31
+ windows:
+ - 24
+ xorg: 32
+ width: 1
- kind: letter
name: Cr3
+ scancode:
+ keyman: K_P
+ macos: 35
+ windows:
+ - 25
+ xorg: 33
+ width: 1
- kind: letter
name: Cr2
+ scancode:
+ keyman: K_COLON
+ macos: 33
+ windows:
+ - 26
+ xorg: 34
+ width: 1
- kind: letter
name: Cr1
+ scancode:
+ keyman: K_EQUAL
+ macos: 30
+ windows:
+ - 27
+ xorg: 35
+ width: 1
- kind: multi
name: CD_ret
+ scancode:
+ keyman: K_ENTER
+ macos: 36
+ windows:
+ - 28
+ xorg: 36
span: 2
+ width: 1
- - - name: Dl_caps
+ scancode:
+ keyman: CAPS
+ macos: 57
+ windows:
+ - 58
+ xorg: 66
width: 2
- kind: letter
name: Dl1
+ scancode:
+ keyman: K_A
+ macos: 0
+ windows:
+ - 30
+ xorg: 38
+ width: 1
- kind: letter
name: Dl2
+ scancode:
+ keyman: K_S
+ macos: 1
+ windows:
+ - 31
+ xorg: 39
+ width: 1
- kind: letter
name: Dl3
- - kind: letter
- isMarked: true
+ scancode:
+ keyman: K_D
+ macos: 2
+ windows:
+ - 32
+ xorg: 40
+ width: 1
+ - isMarked: true
+ kind: letter
name: Dl4
+ scancode:
+ keyman: K_F
+ macos: 3
+ windows:
+ - 33
+ xorg: 41
+ width: 1
- kind: letter
name: Dl5
+ scancode:
+ keyman: K_G
+ macos: 5
+ windows:
+ - 34
+ xorg: 42
+ width: 1
- - kind: letter
name: Dr7
- - kind: letter
- isMarked: true
+ scancode:
+ keyman: K_H
+ macos: 4
+ windows:
+ - 35
+ xorg: 43
+ width: 1
+ - isMarked: true
+ kind: letter
name: Dr6
+ scancode:
+ keyman: K_J
+ macos: 38
+ windows:
+ - 36
+ xorg: 44
+ width: 1
- kind: letter
name: Dr5
+ scancode:
+ keyman: K_K
+ macos: 40
+ windows:
+ - 37
+ xorg: 45
+ width: 1
- kind: letter
name: Dr4
+ scancode:
+ keyman: K_L
+ macos: 37
+ windows:
+ - 38
+ xorg: 46
+ width: 1
- kind: letter
name: Dr3
+ scancode:
+ keyman: K_BKQUOTE
+ macos: 41
+ windows:
+ - 39
+ xorg: 47
+ width: 1
- kind: letter
name: Dr2
+ scancode:
+ keyman: K_QUOTE
+ macos: 39
+ windows:
+ - 40
+ xorg: 48
+ width: 1
- kind: letter
name: Dr1
+ scancode:
+ keyman: K_SLASH
+ windows:
+ - 43
+ xorg: 51
+ width: 1
- - - name: El_shift
+ scancode:
+ keyman: SHIFT
+ macos: 57
+ windows:
+ - 42
+ xorg: 50
width: 1.5
- kind: letter
name: El1
+ scancode:
+ keyman: K_oE2
+ windows:
+ - 86
+ xorg: 94
+ width: 1
- kind: letter
name: El2
+ scancode:
+ keyman: K_Y
+ macos: 6
+ windows:
+ - 44
+ xorg: 52
+ width: 1
- kind: letter
name: El3
+ scancode:
+ keyman: K_X
+ macos: 7
+ windows:
+ - 45
+ xorg: 53
+ width: 1
- kind: letter
name: El4
+ scancode:
+ keyman: K_C
+ macos: 8
+ windows:
+ - 46
+ xorg: 54
+ width: 1
- kind: letter
name: El5
+ scancode:
+ keyman: K_V
+ macos: 9
+ windows:
+ - 47
+ xorg: 55
+ width: 1
- kind: letter
name: El6
+ scancode:
+ keyman: K_B
+ macos: 11
+ windows:
+ - 48
+ xorg: 56
+ width: 1
- - kind: letter
name: Er5
+ scancode:
+ keyman: K_N
+ macos: 45
+ windows:
+ - 49
+ xorg: 57
+ width: 1
- kind: letter
name: Er4
+ scancode:
+ keyman: K_M
+ macos: 46
+ windows:
+ - 50
+ xorg: 58
+ width: 1
- kind: letter
name: Er3
+ scancode:
+ keyman: K_COMMA
+ macos: 43
+ windows:
+ - 51
+ xorg: 59
+ width: 1
- kind: letter
name: Er2
+ scancode:
+ keyman: K_PERIOD
+ macos: 47
+ windows:
+ - 52
+ xorg: 60
+ width: 1
- kind: letter
name: Er1
+ scancode:
+ keyman: K_HYPHEN
+ macos: 44
+ windows:
+ - 53
+ xorg: 61
+ width: 1
- name: Er_shift
+ scancode:
+ keyman: SHIFT
+ macos: 60
+ windows:
+ - 54
+ xorg: 62
width: 2.35
- - - name: Fl_ctrl
+ scancode:
+ keyman: LCTRL
+ macos: 59
+ windows:
+ - 29
+ xorg: 37
width: 1.75
- name: Fl_win
+ scancode:
+ keyman: K_?5B
+ macos: 55
+ windows:
+ - 224
+ - 91
+ xorg: 133
width: 1.25
- name: Fl_alt
+ scancode:
+ keyman: LALT
+ macos: 58
+ windows:
+ - 56
+ xorg: 64
width: 1.25
- name: Fl_space
+ scancode:
+ keyman: K_SPACE
+ macos: 49
+ windows:
+ - 57
+ xorg: 65
width: 3
- - name: Fr_space
+ scancode:
+ keyman: K_SPACE
+ macos: 49
+ windows:
+ - 57
+ xorg: 65
width: 3
- name: Fr_altgr
+ scancode:
+ keyman: RALT
+ macos: 61
+ windows:
+ - 224
+ - 56
+ xorg: 108
width: 1.25
- name: Fr_win
+ scancode:
+ keyman: K_?5C
+ macos: 55
+ windows:
+ - 224
+ - 92
+ xorg: 105
width: 1.25
- name: Fr_menu
+ scancode:
+ keyman: K_?5D
+ windows:
+ - 224
+ - 93
+ xorg: 135
width: 1.25
- name: Fr_ctrl
+ scancode:
+ keyman: RCTRL
+ windows:
+ - 224
+ - 29
+ xorg: 105
width: 1.25
diff --git a/lulua/data/layouts/ar-ergoarabic.yaml b/lulua/data/layouts/ar-ergoarabic.yaml
new file mode 100644
index 0000000..620ead1
--- /dev/null
+++ b/lulua/data/layouts/ar-ergoarabic.yaml
@@ -0,0 +1,168 @@
+name: ar-ergoarabic
+layout:
+- layer:
+ Bl1: "`"
+ Bl2: "1"
+ Bl3: "2"
+ Bl4: "3"
+ Bl5: "4"
+ Bl6: "5"
+ Bl7: "6"
+ Br6: "7"
+ Br5: "8"
+ Br4: "9"
+ Br3: "0"
+ Br2: "-"
+ Br1: "="
+ Br_bs: "\b"
+
+ Cl_tab: "\t"
+ Cl1: "د"
+ Cl2: "ص"
+ Cl3: "ث"
+ Cl4: "ق"
+ Cl5: "ف"
+ Cr7: "ء"
+ Cr6: "ع"
+ Cr5: "ه"
+ Cr4: "و"
+ Cr3: "ة"
+ Cr2: "]"
+ Cr1: "["
+
+ CD_ret: "\n"
+
+ Dl1: "ش"
+ Dl2: "س"
+ Dl3: "ي"
+ Dl4: "ب"
+ Dl5: "ل"
+ Dr7: "ا"
+ Dr6: "ت"
+ Dr5: "ن"
+ Dr4: "م"
+ Dr3: "ك"
+ Dr2: "'"
+ Dr1: "\\"
+
+ El2: "\u064a\u0654" # composed: ئ
+ El3: "ى"
+ El4: "ط"
+ El5: "ر"
+ El6: "خ"
+ Er5: "ح"
+ Er4: "ج"
+ Er3: "،"
+ Er2: "."
+ Er1: "/"
+
+ Fl_space: " "
+ Fr_space: " "
+ modifier:
+ - []
+- layer:
+ Bl1: "~" # shadda
+ Bl2: "!"
+ Bl3: "@"
+ Bl4: "#"
+ Bl5: "$"
+ Bl6: "%"
+ Bl7: "^"
+ Br6: "&"
+ Br5: "*"
+ Br4: ")"
+ Br3: "("
+ Br2: "_"
+ Br1: "+"
+
+ Cl1: "ذ"
+ Cl2: "ض"
+ Cl3: "\u064f" # damma
+ Cl4: "\u064c" # dammatan
+ Cl5: "\u0651" # shadda
+ Cr7: "ا\u0655" # composed: إ
+ Cr6: "غ"
+ Cr5: "÷"
+ Cr4: "\u0648\u0654" # composed: ؤ
+ Cr3: "؛"
+ Cr2: "}"
+ Cr1: "{"
+
+ Dl1: "\u0650" # kasra
+ Dl2: "\u064d" # kasratan
+ Dl3: "\u064e" # fatha
+ Dl4: "\u064b" # fathatan
+ Dl5: "ا\u0653" # composed: آ
+ Dr7: "ا\u0654" # composed: أ
+ Dr6: "\u0640" # tatweel
+ Dr5: "×"
+ Dr4: ","
+ Dr3: ":"
+ Dr2: '"'
+
+ El3: "\u0652" # sukun
+ El4: "ظ"
+ El5: "ز"
+ El6: "…"
+ Er5: "»"
+ Er4: "«"
+ Er3: ">"
+ Er2: "<"
+ Er1: "؟"
+ modifier:
+ - [El_shift]
+ - [Er_shift]
+- layer:
+ Bl2: "١"
+ Bl3: "٢"
+ Bl4: "٣"
+ Bl5: "٤"
+ Bl6: "٥"
+ Bl7: "٦"
+ Br6: "٧"
+ Br5: "٨"
+ Br4: "٩"
+ Br3: "٠"
+ Br1: "≠"
+
+ Cl5: "ڤ"
+
+ Dl4: "پ"
+ Dl5: "\u0670" # superscript alef
+ Dr7: "\u0671" # alef wasla
+ Dr5: "\u066b" # decimal separator
+ Dr3: "گ‎"
+ Dr2: '⟩'
+ Dr1: "⟨"
+
+ El2: "›"
+ El3: "‹"
+ El5: "ژ"
+ Er4: "چ"
+ Er1: "\u066d" # five pointed star
+ modifier:
+ - [Fr_altgr]
+- layer:
+ Bl6: "‰"
+ Br2: "\u2011" # non-breaking hyphen
+ Br1: "≈"
+
+ Cl1: "\u2066" # LRI
+ Cl2: "\u2067" # RLI
+ Cl3: "\u2068" # first strong isolate
+ Cl4: "\u2069" # PDI
+ Cr7: "\u202a" # LRE
+ Cr6: "\u202b" # RLE
+ Cr5: "\u202c" # PDF
+ Cr3: "\u200e" # LRM
+ Cr2: "\u200f" # RLM
+ Cr1: "\u061c" # ALM
+
+ Dr2: "\u200d" # ZWJ
+ Dr1: "\u202f" # NNBSP
+
+ Er1: "\u200c" # ZWNJ
+ modifier:
+ - [Fr_altgr, El_shift]
+ - [Fr_altgr, Er_shift]
+
diff --git a/lulua/data/layouts/ar-qtaish.yaml b/lulua/data/layouts/ar-qtaish.yaml
new file mode 100644
index 0000000..2acd0d6
--- /dev/null
+++ b/lulua/data/layouts/ar-qtaish.yaml
@@ -0,0 +1,64 @@
+name: ar-qtaish
+layout:
+- layer:
+ Bl2: "١"
+ Bl3: "٢"
+ Bl4: "٣"
+ Bl5: "٤"
+ Bl6: "٥"
+ Bl7: "٦"
+ Br6: "٧"
+ Br5: "٨"
+ Br4: "٩"
+ Br3: "٠"
+
+ Cl_tab: "\t"
+ Cl1: "ق"
+ Cl2: "ى"
+ Cl3: "ة"
+ Cl4: "د"
+ Cl5: "ف"
+ Cr7: "ك"
+ Cr6: "ص"
+ Cr5: "س"
+ Cr4: "ح"
+ Cr3: "ذ"
+ Cr2: "ش"
+ Cr1: "ج"
+
+ CD_ret: "\n"
+
+ Dl1: "ر"
+ Dl2: "و"
+ Dl3: "ن"
+ Dl4: "ل"
+ Dl5: "ب"
+ Dr7: "ت"
+ Dr6: "ا"
+ Dr5: "ع"
+ Dr4: "ي"
+ Dr3: "م"
+ Dr2: "ه"
+
+ El2: "ض"
+ El3: "خ"
+ El4: "ز"
+ El5: "ظ"
+ El6: "غ"
+ Er5: "ا\u0654" # composed: أ
+ Er4: "ء"
+ Er3: "ط"
+ Er2: "ث"
+ Er1: "\u064a\u0654" # composed: ئ
+
+ Fl_space: " "
+ Fr_space: " "
+ modifier:
+ - []
+- layer:
+ Dl2: "\u0648\u0654" # composed: ؤ
+ Dr6: "ا\u0653" # composed: آ
+ Er5: "ا\u0655" # composed: إ
+ modifier:
+ - [El_shift]
+ - [Er_shift]
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html
index e35e08f..e2108cd 100644
--- a/lulua/data/report/index.html
+++ b/lulua/data/report/index.html
@@ -5,15 +5,23 @@
<title>لؤلؤة: لوحة مفاتيح عربية</title>
<!--<meta name="description" content="">-->
<meta name="viewport" content="width=device-width, initial-scale=1">
-
+ {# Fonts #}
<link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet">
+
+ {# Pure.css #}
<link rel="stylesheet" href="https://unpkg.com/purecss@2.0.3/build/pure-min.css" integrity="sha384-cg6SkqEOCV1NbJoCu11+bm0NvBRc8IYLRGXkmNrqUBfTjmMYwNKPWBTIKyw9mHNJ" crossorigin="anonymous">
<link rel="stylesheet" href="https://unpkg.com/purecss@2.0.3/build/grids-responsive-min.css" integrity="sha384-+lKLtMyKzY/ZdqRXlhw2whazop7duDxgP+oWPB+EGry5wK+vJnZwSsgMnAmDS/39" crossorigin="anonymous">
- <script src="https://polyfill.io/v3/polyfill.min.js?features=es6" crossorigin="anonymous"></script>
- <script id="MathJax-script" async src="https://www.unpkg.com/mathjax@3.1.0/es5/tex-mml-chtml.js" integrity="sha384-ynpCZozLxgqK3wrgBu8qH8qPG3eD8mME8z0zugAX26UMb5HfLp2PtvtDH4vdmgkm" crossorigin="anonymous"></script>
+
+ {# TeX support #}
+ <link rel="stylesheet" href="https://unpkg.com/katex@0.13.18/dist/katex.min.css" integrity="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET" crossorigin="anonymous">
+ <script defer src="https://unpkg.com/katex@0.13.18/dist/katex.min.js" integrity="sha384-GxNFqL3r9uRJQhR+47eDxuPoNE7yLftQM8LcxzgS4HT73tp970WS/wV5p8UzCOmb" crossorigin="anonymous"></script>
+
+ <script defer src="https://unpkg.com/katex@0.13.18/dist/contrib/auto-render.min.js" integrity="sha384-vZTG03m+2yp6N6BNi5iM4rW4oIwk5DfcNdFfxkk9ZWpDriOkXX8voJBFrAO7MpVl" crossorigin="anonymous" onload="renderMathInElement(document.body);"></script>
+
{# bokeh #}
+ {# Defer loading bokeh, so it does not block rendering #}
{% for f in bokehres.js_files -%}
- <script src="{{ f }}"></script>
+ <script defer crossorigin="anonymous" src="{{ f }}"></script>
{%- endfor %}
{% for f in bokehres.css_files -%}
<link rel="stylesheet" href="{{ f }}">
@@ -129,11 +137,10 @@
<div class="lbox">
<h2>The Arabic alphabet</h2>
<p>
- 28 letters make up the Arabic alphabet and quite a few extra
- symbols are required for proper text input, like the hamza in its different
- shapes <bdo dir="ltr" lang="ar">أ إ آ ء ئ ؤ</bdo>, ta marbutah <bdo
- dir="ltr" lang="ar">ة</bdo>, alif maqsurah <bdo dir="ltr"
- lang="ar">ى</bdo> and various diacritics for vowelized texts.
+ 28 letters make up the Arabic alphabet and quite a few extra symbols are
+ required for proper text input, like the {{ hamzah }} in its different
+ shapes <bdo dir="ltr" lang="ar">أ إ آ ء ئ ؤ</bdo>, {{ tamarbutah
+ }}, {{ alifmaqsurah }} and various diacritics for vowelized texts.
<!-- -->
Since the performance of a keyboard layout depends on the text entered
it is necessary to study its mono-, di- and trigraph frequencies first.
@@ -222,8 +229,9 @@
</details>
<p>
- The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be
- considered the most frequently used letters in the Arabic language.
+ The plot below shows {{ alif }}, {{ lam }}, {{ ya }}, {{ mim }}, {{
+ waw }} and {{ nun }} can be considered the most frequently used letters
+ in the Arabic language.
<!-- -->
Together they account for more than 55% of all letters in the corpus.
</p>
@@ -328,17 +336,17 @@
The most frequent letters have all been assigned to the home row, which
makes them easily accessible.
<!-- -->
- <bdo lang="ar" dir="ltr">ا</bdo> and <bdo lang="ar" dir="ltr">ل</bdo>
+ {{ Alif }} and {{ lam }}
are typed with different hands, balancing the load on hands almost
evenly.
<!-- -->
The index and middle finger of both hands share the majority of the
typing load, but naturally the left middle finger is used more
- frequently due to its assignment to the letter alif.
+ frequently due to its assignment to the letter {{ alif }}.
</p>
<p>
- The layout targets Quaranic and Modern Standard Arabic (MSA), also called Fusha
+ The layout targets Quaranic and Modern Standard Arabic (MSA), also called Fuṣḥa
(<bdo lang="ar">الفصحى</bdo>), only.
<!-- -->
Dialectical Arabic (<bdo lang="ar">العامية</bdo>) is mainly a spoken
@@ -353,35 +361,35 @@
Designing the layout to be compose-based has both benefits and
disadvantages.
<!-- -->
- Compose-based mainly means the hamza <bdo lang="ar" dir="ltr">ء</bdo>
- is treated like an optional diacritic for Alef, Waw and Yah instead of
- viewing Alef-Hamza, Waw-Hamza and Yah-Hamza as precombined, atomic
- units.
+ Compose-based mainly means the {{ hamzah }} is treated like an optional
+ diacritic for {{ alif }}, {{ waw }} and {{ ya }} instead of viewing
+ {{ alifhamzah }}, {{ wawhamzah }} and {{ yahamzah }} as precombined,
+ atomic units.
<!-- -->
- Although <bdo lang="ar" dir="ltr">أ</bdo> and <bdo lang="ar"
- dir="ltr">ا</bdo> are not the same, the hamza can be dropped if the
- writer’s intention is unambigiously inferable from context.
+ Although {{ alifhamzah_ }} and {{ alif_ }} are not the same, the {{
+ hamzah_ }} can be dropped if the writer’s intention is unambigiously
+ inferable from context.
<!-- -->
- Thus it makes sense to provide hamza as a combining character on the
- keyboard.
+ Thus it makes sense to provide {{ hamzah_ }} as a combining character
+ on the keyboard.
<!-- -->
Additionally it uses two keys less than precombining it with its stems,
- allowing the entire alphabet plus hamza diacritic to fit on a single
+ allowing the entire alphabet plus hamzah diacritic to fit on a single
keyboard layer.
<!-- -->
However, there is a cost to this approach:
- All hamza variants account for {{
+ All {{ hamzah_ }} variants account for {{
'%.1f'|format(layoutstats['ar-osx'].hamzaImpact*100) }}% of button
combinations.
<!-- -->
- Splitting hamza and from its stem means doubling the total number of
- button combinations and thus button presses, decreasing scores like
+ Splitting {{ hamzah_ }} and from its stem means doubling the total number
+ of button combinations and thus button presses, decreasing scores like
words per minute (WPM) slightly.
<!-- -->
- Splitting Alef and Alef-Hamza could also reduce pressure on left middle
- finger and allow for more even distribution, since {{
- layoutstats['ar-osx'].hamzaOnAlef|fraction }}<sup>th</sup> of all Alef
- uses are with Hamza.
+ Splitting {{ alif }} and {{ alifhamzah }} could also reduce pressure
+ on left middle finger and allow for more even distribution, since {{
+ layoutstats['ar-osx'].hamzaOnAlef|fraction }}<sup>th</sup> of all {{
+ alif }} uses are with {{ hamzah }}.
</p>
<details class="remarks">
<summary></summary>
@@ -415,23 +423,6 @@
{% endif %}
{% endfor %}
</div>
-<div class="sentencestats">
-<p lang="en">Examples:</p>
-<ul lang="ar">
-{% for sentence in stats.sentences %}
- <li>
- {% for match, weight in sentence[0] -%}
- {%- if weight is none -%}
- <span>{{ match }}</span>
- {%- else -%}
- {%- set c = weight|blendn((38, 139, 210), (108, 113, 196), (211, 54, 130), (220, 50, 47)) -%}
- <span style="color: rgb({{ c[0] }}, {{ c[1] }}, {{ c[2] }});" title="{{ '%5.5f'|format(weight) }}">{{ match }}</span>
- {%- endif -%}
- {%- endfor %}
- </li>
-{% endfor %}
-</ul>
-</div>
{% endmacro %}
<figure id="ar-lulua-heat">
@@ -450,7 +441,62 @@
<div class="lbox">
<h2><a href="#related">Related work</a></h2>
<p>This section explores existing keyboard layouts made for the
- Arabic language and analyzes their usability.</p>
+ Arabic language and analyzes their usability.
+ <!-- -->
+ Comparing them with the proposed layout above is difficult at best,
+ because the layouts presented below cover different character sets.
+ <!-- -->
+ Some lack numbers, some do not include short vowels and others provide
+ no way to type symbols.
+ <!-- -->
+ Therefore no individual score is assigned to each layout, but an analysis
+ of each layout’s features is given.
+ </p>
+ </div>
+ </div>
+ </div>
+
+ <figure id="triadeffort">
+ <div class="lbox" lang="en">
+ <div id="triadeffort-div"></div>
+ </div>
+ <figcaption class="pure-g flexreverse">
+ </figcaption>
+ </figure>
+
+ <div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-xl-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ Carpalx tries to minimize the effort of typing in blocks of three
+ consecutive keystrokes, triads, and thus a good layout in that sense
+ should make typing frequent triads easy.
+ <!-- -->
+ The figure above plots cumulative triad frequency on the x-axis and
+ weighted cumulative effort on the y-axis.
+ <!-- -->
+ Thus, at an x value of 0.5 the y-axis is the sum of triad frequencies
+ multiplied by their effort for all triads responsible for 50% of the
+ typing process.
+ <!-- -->
+ Standard layouts are the layouts from <a href="#ar-asmo663">ASMO</a>,
+ <a href="#ar-linux">Linux</a> and <a href="#ar-osx">OSX</a> whereas
+ <em>usable</em> lists only those which are actually relevant for typing.
+ <!-- -->
+ As we can see the layout presented above meets the optimization goal.
+ <!-- -->
+ Only the top 5% of all triads are “easier” to type with <a
+ href="#ar-malas">Malas’ layout</a>, because lulua splits {{ hamzah }}
+ from its {{ alif }} stem.
+ <!-- -->
+ As expected the <a href="#ar-phonetic">phonetic layout</a> is one of the
+ worst ones, because QWERTY is not optimized for Arabic letter frequencies.
+ </p>
+ <p>
+ The following sections provide details about these layouts.
+ </p>
</div>
</div>
</div>
@@ -474,8 +520,8 @@
dir="ltr" lang="ar">ض ص، س ش، ح ج خ</bdo>) and not frequency.
<!-- -->
Also it overuses the right index finger by assigning the four
- high-frequency letters <bdo lang="ar" dir="ltr">ا ت و ة</bdo> to
- it.
+ high-frequency letters {{ alif }}, {{ ta }}, {{ waw }} and {{ tamarbutah
+ }} to it.
</p>
</div>
</div>
@@ -497,14 +543,14 @@
<h3><a href="#ar-osx">Mac OS X</a></h3>
<p>
Mac OS X’s Arabic keyboard layout makes a few small changes to ASMO
- 663 by moving the <bdo lang="ar" dir="ltr">ة</bdo> to a hard to
+ 663 by moving the {{ tamarbutah }} to a hard to
reach spot on the right of the top row.
<!-- -->
It also moves the short vowels from the first to the top row of the
second layer and replaces them with symbols.
<!-- -->
The bottom row keys are aditionally shifted to the right, beginning
- with <bdo lang="ar" dir="ltr">ر</bdo>.
+ with {{ ra }}.
</p>
</div>
</div>
@@ -528,15 +574,14 @@
A more common layout is the one used on Linux, which also exists on
Windows with minor changes to the first layer.
<!-- -->
- While its top and center row barely differ from ASMO 663 the
- bottom row now contains a separate key for the ligature <bdo
- lang="ar" dir="ltr">ﻻ</bdo>, likely inherited from <a
+ While its top and center row barely differ from
+ ASMO 663 the bottom row now contains a separate key
+ for the ligature {{ lamalif }} , likely inherited from <a
href="https://oztypewriter.blogspot.com/2014/10/the-arabic-typewriter-keyboard-and.html">early
typewriter layouts</a>.
<!-- -->
But at the cost of pushing punctuation characters to the second
- layer, <bdo dir="ltr" lang="ar">د</bdo> into the top and <bdo
- dir="ltr" lang="ar">ذ</bdo> even further into the number row.
+ layer, {{ dal }} into the top and {{ dhal }} even further into the number row.
</p>
</div>
</div>
@@ -591,10 +636,10 @@
</p>
<p>
While the layout distributes load between fingers quite well it
- favors the left hand by assigning <bdo dir="ltr" lang="ar">ا</bdo>
- and <bdo dir="ltr" lang="ar">ل</bdo> to it.
+ favors the left hand by assigning {{ alif }}
+ and {{ lam }} to it.
<!-- -->
- The decision to place <bdo dir="ltr" lang="ar">ث</bdo> in a very
+ The decision to place {{ tha }} in a very
prominent spot seems weird, given it only accounts for 0.5% of all
symbols, even in their own analysis.
</p>
@@ -636,15 +681,13 @@
<!-- -->
Probably due to their unusual assumption that middle- and
ring-finger rest in the top row their results are suboptimal,
- placing both <bdo dir="ltr" lang="ar">ا</bdo> and <bdo dir="ltr"
- lang="ar">ي</bdo> in the top row.
+ placing both {{ alif }} and {{ ya }} in the top row.
<!-- -->
Their analysis notices this and suggests improved positions for
both characters, but these are not actually implemented.
<!-- -->
- The big asymmetry is caused by placing <bdo dir="ltr" lang="ar">ا
- ل ي</bdo> and <bdo dir="ltr" lang="ar">و</bdo>, four of the five
- most frequent letters, on the right hand side.
+ The big asymmetry is caused by placing {{ alif }}, {{ lam }}, {{ ya }} and
+ {{ waw }}, four of the five most frequent letters, on the right hand side.
</p>
</div></div>
</div>
@@ -672,11 +715,11 @@
optimized for typing speed only, claiming 35% faster typing compared
to the <a href="#ar-linux">currently used layouts</a>.
<!-- -->
- However the decision to put <bdo dir="ltr" lang="ar">ي</bdo> in the top
+ However the decision to put {{ ya }} in the top
row seems odd.
<!-- -->
- Assigning the same left index finger to <bdo dir="ltr" lang="ar">ا
- ي و</bdo>, which are three of the most frequent letters, heavily
+ Assigning the same left index finger to {{ alif }},
+ {{ ya }} and {{ waw }}, which are three of the most frequent letters, heavily
strains this particular finger.
</p>
</div>
@@ -711,8 +754,8 @@
well.
<!-- -->
However their algorithm seems to favor the bottom row instead of the
- easier to use top row since it places the letters <bdo dir="ltr"
- lang="ar">ب ت ر</bdo> there.
+ easier to use top row since it places the letters {{ ba }}, {{ ta }}
+ and {{ ra }} there.
</p>
</div>
</div>
@@ -746,20 +789,102 @@
provide <em>three</em> single-quote marks ’ and <em>two</em> Arabic
semicolon <bdo dir="ltr" lang="ar">؛</bdo>.
<!-- -->
- Additionally it places <bdo dir="ltr" lang="ar">ي</bdo> in an even
+ Additionally it places {{ ya }} in an even
worse position than Malas’ layout.
</p>
</div>
</div>
</div>
- <figure id="ar-osman">
+ <figure>
<div class="lbox">
<img src="ar-osman-heat.svg">
{{ fingerhandstats(layoutstats['ar-osman']) }}
</div>
</figure>
+ <div id="ar-qtaish" class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-xl-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
+ <div class="lbox">
+ <h3><a href="#ar-qtaish">Qtaish et al</a></h3>
+ <p>
+ Qtaish et al presented this layout in
+ <a href="http://www.sci-int.com/pdf/637456047563529791.pdf">An Improved Arabic Keyboard Layout</a> in 2021.
+ <!-- -->
+ They use a novel corpus of 5 million words or roughly 66 million letters
+ <!-- 65713689 adding up the numbers in their paper --> consisting of
+ newspapers, (now defunct) social networks and blogs, as well as six
+ dictionaries (see remarks).
+ <!-- -->
+ Then letters were classified into three categories based on their
+ frequency, which apparently were used to populate home, top and bottom
+ row (in this order).
+ <!-- -->
+ Additionally bigrams were somehow used to arrange letters and avoid
+ placing them on the same or adjacent fingers, making rolling finger
+ movements incentiviced by carpalx impossible.
+ <!-- -->
+ Ultimately it looks like the layout was designed by hand and not through
+ an automated process. <!-- see section 5 of the paper -->
+ </p>
+
+ <details class="remarks">
+ <summary></summary>
+ <em>Alfarahindi</em> and <em>Alein Dictionary</em> are most likely the same 8th century book <a href="https://en.wikipedia.org/wiki/Kitab_al-%27Ayn">Kitab al-'Ayn</a>.
+ <!-- -->
+ There are also <a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%A7%D8%AC_%D8%A7%D9%84%D8%B9%D8%B1%D9%88%D8%B3_%D9%85%D9%86_%D8%AC%D9%88%D8%A7%D9%87%D8%B1_%D8%A7%D9%84%D9%82%D8%A7%D9%85%D9%88%D8%B3">Taj-Alaroos</a>, <a href="https://www.lesanarab.com/letter/">Lesan Alarab</a>, <a href="https://ar.wikipedia.org/wiki/%D8%A7%D9%84%D9%85%D8%B9%D8%AC%D9%85_%D8%A7%D9%84%D9%88%D8%B3%D9%8A%D8%B7">Almujam Alwaseet</a> and <a href="https://ar.wikipedia.org/wiki/%D8%A7%D9%84%D9%85%D9%86%D8%AC%D8%AF_(%D9%84%D9%88%D9%8A%D8%B3_%D9%85%D8%B9%D9%84%D9%88%D9%81)">Almunjed</a>.
+ <!-- -->
+ All of them are lexica, not dictionaries.
+ </details>
+
+ <p>
+ Although the authors do not provide a number row, it has been added for
+ fair comparison.
+ <!-- -->
+ However to be actually usable the layout would need punctuation symbols and diacritics.
+ </p>
+ </div>
+ </div>
+ </div>
+
+ <figure id="ar-qtaish">
+ <div class="lbox">
+ <img src="ar-qtaish-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-qtaish']) }}
+ </div>
+ </figure>
+
+ <div id="ar-ergoarabic" class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-xl-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
+ <div class="lbox">
+ <h3><a href="#ar-ergoarabic">Ergoarabic</a></h3>
+ <p>
+ Being first released in 2021 <a
+ href="https://github.com/darkstego/ergoarabic">Ergoarabic</a> is the
+ most recent optimized layout for Arabic.
+ <!-- -->
+ On the first layer it combines the positions of 17 keys from the <a
+ href="#ar-linux">standard PC layout</a> with brackets and punctuation
+ symbols from QWERTY, aiming to retain compatibility with both.
+ <!-- -->
+ The remaining keys have been re-arranged or moved to the shift layer by
+ hand for improved ergonomics.
+ </p>
+ </div>
+ </div>
+ </div>
+
+ <figure id="ar-ergoarabic">
+ <div class="lbox">
+ <img src="ar-ergoarabic-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-ergoarabic']) }}
+ </div>
+ </figure>
+
<div id="ar-phonetic" class="pure-g flexreverse">
<div class="pure-u-1 pure-u-xl-1-2">
</div>
@@ -769,7 +894,7 @@
<p>
The <a href="http://arabic.omaralzabir.com/home">Arabic Phonetic Keyboard</a>
simply maps the QWERTY layout to Arabic letters, based on their sound.
- Thus Q becomes <bdo dir="ltr" lang="ar">ق</bdo>, Y becomes <bdo dir="ltr" lang="ar">ي</bdo> and so on.
+ Thus Q becomes {{ qaf }}, Y becomes {{ ya }} and so on.
It claims to be optimized for writing vowelized texts, especially
Quranic Arabic, and thus includes quite a few combining characters and
special symbols.
@@ -836,9 +961,16 @@
</section>
<script>
-fetch('letterfreq.json')
- .then(function(response) { return response.json(); })
- .then(function(item) { Bokeh.embed.embed_item(item, 'letterfreq-div'); })
+function loadPlots () {
+ const plots = ['letterfreq', 'triadeffort'];
+ for (const p of plots) {
+ fetch(p + '.json')
+ .then(function(response) { return response.json(); })
+ .then(function(item) { Bokeh.embed.embed_item(item, p + '-div'); });
+ }
+}
+if (document.readyState != "loading") loadPlots ();
+else document.addEventListener("DOMContentLoaded", loadPlots);
</script>
</body>
diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css
index ed0d32d..2cffb03 100644
--- a/lulua/data/report/style.css
+++ b/lulua/data/report/style.css
@@ -30,6 +30,12 @@ using http://colormind.io/bootstrap/
src: local('IBM Plex Sans Arabic Regular'), local('IBMPlexSansArabic-Regular'), url('fonts/IBMPlexSansArabic-Regular.woff2') format('woff2');
}
+html {
+ /* Increase line-height, because Arabic font is taller than Latin and
+ * we need some space between lines for descenders/ascenders */
+ line-height: 1.33;
+}
+
body {
font-size: 14pt;
background-color: var(--light-shades);
@@ -157,11 +163,6 @@ div.fingerhandstats .fingers .index {
div.fingerhandstats .fingers .thumb {
border: 0.1em solid var(--finger-thumb);
}
-div.sentencestats ul {
- list-style-type: none;
- margin: 1em 0;
- padding: 0;
-}
.table-overflow {
overflow-x: auto;
diff --git a/lulua/keyboard.py b/lulua/keyboard.py
index cf96efc..eed58a7 100644
--- a/lulua/keyboard.py
+++ b/lulua/keyboard.py
@@ -24,281 +24,16 @@ from typing import Text, Dict, Iterator, List
from .util import YamlLoader
-# XXX move this to keyboard.yaml?
-_buttonToXorgKeycode = {
- 'Bl1': 49,
- 'Bl2': 10,
- 'Bl3': 11,
- 'Bl4': 12,
- 'Bl5': 13,
- 'Bl6': 14,
- 'Bl7': 15,
- 'Br6': 16,
- 'Br5': 17,
- 'Br4': 18,
- 'Br3': 19,
- 'Br2': 20,
- 'Br1': 21,
- 'Br_bs': 22,
- 'Cl_tab': 23,
- 'Cl1': 24,
- 'Cl2': 25,
- 'Cl3': 26,
- 'Cl4': 27,
- 'Cl5': 28,
- 'Cr7': 29,
- 'Cr6': 30,
- 'Cr5': 31,
- 'Cr4': 32,
- 'Cr3': 33,
- 'Cr2': 34,
- 'Cr1': 35,
- 'CD_ret': 36,
- 'Dl_caps': 66,
- 'Dl1': 38,
- 'Dl2': 39,
- 'Dl3': 40,
- 'Dl4': 41,
- 'Dl5': 42,
- 'Dr7': 43,
- 'Dr6': 44,
- 'Dr5': 45,
- 'Dr4': 46,
- 'Dr3': 47,
- 'Dr2': 48,
- 'Dr1': 51,
- 'El_shift': 50,
- 'El1': 94,
- 'El2': 52,
- 'El3': 53,
- 'El4': 54,
- 'El5': 55,
- 'El6': 56,
- 'Er5': 57,
- 'Er4': 58,
- 'Er3': 59,
- 'Er2': 60,
- 'Er1': 61,
- 'Er_shift': 62,
- 'Fl_ctrl': 37,
- 'Fl_win': 133,
- 'Fl_alt': 64,
- 'Fl_space': 65,
- 'Fr_space': 65,
- 'Fr_altgr': 108,
- 'Fr_win': 105,
- 'Fr_menu': 135,
- 'Fr_ctrl': 105,
- }
-
-_buttonToKeyman = {
- 'Bl1': 'K_BKSLASH',
- 'Bl2': 'K_1',
- 'Bl3': 'K_2',
- 'Bl4': 'K_3',
- 'Bl5': 'K_4',
- 'Bl6': 'K_5',
- 'Bl7': 'K_6',
- 'Br6': 'K_7',
- 'Br5': 'K_8',
- 'Br4': 'K_9',
- 'Br3': 'K_0',
- 'Br2': 'K_LBRKT',
- 'Br1': 'K_RBRKT',
- 'Br_bs': 'K_BKSP',
- 'Cl_tab': 'K_TAB',
- 'Cl1': 'K_Q',
- 'Cl2': 'K_W',
- 'Cl3': 'K_E',
- 'Cl4': 'K_R',
- 'Cl5': 'K_T',
- 'Cr7': 'K_Z',
- 'Cr6': 'K_U',
- 'Cr5': 'K_I',
- 'Cr4': 'K_O',
- 'Cr3': 'K_P',
- 'Cr2': 'K_COLON',
- 'Cr1': 'K_EQUAL',
- 'CD_ret': 'K_ENTER',
- 'Dl_caps': 'CAPS',
- 'Dl1': 'K_A',
- 'Dl2': 'K_S',
- 'Dl3': 'K_D',
- 'Dl4': 'K_F',
- 'Dl5': 'K_G',
- 'Dr7': 'K_H',
- 'Dr6': 'K_J',
- 'Dr5': 'K_K',
- 'Dr4': 'K_L',
- 'Dr3': 'K_BKQUOTE',
- 'Dr2': 'K_QUOTE',
- 'Dr1': 'K_SLASH',
- 'El_shift': 'SHIFT', # XXX: there is no distinction between left/right
- 'El1': 'K_oE2',
- 'El2': 'K_Y',
- 'El3': 'K_X',
- 'El4': 'K_C',
- 'El5': 'K_V',
- 'El6': 'K_B',
- 'Er5': 'K_N',
- 'Er4': 'K_M',
- 'Er3': 'K_COMMA',
- 'Er2': 'K_PERIOD',
- 'Er1': 'K_HYPHEN',
- 'Er_shift': 'SHIFT',
- 'Fl_ctrl': 'LCTRL',
- 'Fl_win': 'K_?5B',
- 'Fl_alt': 'LALT',
- 'Fl_space': 'K_SPACE',
- 'Fr_space': 'K_SPACE',
- 'Fr_altgr': 'RALT',
- 'Fr_win': 'K_?5C',
- 'Fr_menu': 'K_?5D',
- 'Fr_ctrl': 'RCTRL',
- }
-
-# button windows scancode. See Keyboard Scan Code Specification Revision 1.3a
-# (published in 2000) from the Windows Platform Design Notes for example.
-_buttonToWinScancode = {
- 'Bl1': (0x29, ),
- 'Bl2': (0x02, ),
- 'Bl3': (0x03, ),
- 'Bl4': (0x04, ),
- 'Bl5': (0x05, ),
- 'Bl6': (0x06, ),
- 'Bl7': (0x07, ),
- 'Br6': (0x08, ),
- 'Br5': (0x09, ),
- 'Br4': (0x0A, ),
- 'Br3': (0x0B, ),
- 'Br2': (0x0C, ),
- 'Br1': (0x0D, ),
- 'Br_bs': (0x0E, ),
- 'Cl_tab': (0x0F, ),
- 'Cl1': (0x10, ),
- 'Cl2': (0x11, ),
- 'Cl3': (0x12, ),
- 'Cl4': (0x13, ),
- 'Cl5': (0x14, ),
- 'Cr7': (0x15, ),
- 'Cr6': (0x16, ),
- 'Cr5': (0x17, ),
- 'Cr4': (0x18, ),
- 'Cr3': (0x19, ),
- 'Cr2': (0x1A, ),
- 'Cr1': (0x1B, ),
- 'CD_ret': (0x1C, ),
- 'Dl_caps': (0x3A, ),
- 'Dl1': (0x1E, ),
- 'Dl2': (0x1F, ),
- 'Dl3': (0x20, ),
- 'Dl4': (0x21, ),
- 'Dl5': (0x22, ),
- 'Dr7': (0x23, ),
- 'Dr6': (0x24, ),
- 'Dr5': (0x25, ),
- 'Dr4': (0x26, ),
- 'Dr3': (0x27, ),
- 'Dr2': (0x28, ),
- 'Dr1': (0x2B, ),
- 'El_shift': (0x2A, ),
- 'El1': (0x56, ),
- 'El2': (0x2C, ),
- 'El3': (0x2D, ),
- 'El4': (0x2E, ),
- 'El5': (0x2F, ),
- 'El6': (0x30, ),
- 'Er5': (0x31, ),
- 'Er4': (0x32, ),
- 'Er3': (0x33, ),
- 'Er2': (0x34, ),
- 'Er1': (0x35, ),
- 'Er_shift': (0x36, ),
- 'Fl_ctrl': (0x1D, ),
- 'Fl_win': (0xe0, 0x5B, ),
- 'Fl_alt': (0x38, ),
- 'Fl_space': (0x39, ),
- 'Fr_space': (0x39, ),
- 'Fr_altgr': (0xe0, 0x38, ),
- 'Fr_win': (0xe0, 0x5C, ),
- 'Fr_menu': (0xe0, 0x5D, ),
- 'Fr_ctrl': (0xe0, 0x1D, ),
- }
-
-# see https://eastmanreference.com/complete-list-of-applescript-key-codes
-_buttonToOsxKeycode = {
- 'Bl1': 50,
- 'Bl2': 18,
- 'Bl3': 19,
- 'Bl4': 20,
- 'Bl5': 21,
- 'Bl6': 23,
- 'Bl7': 22,
- 'Br6': 26,
- 'Br5': 28,
- 'Br4': 25,
- 'Br3': 29,
- 'Br2': 27,
- 'Br1': 24,
- 'Br_bs': 51,
- 'Cl_tab': 48,
- 'Cl1': 12,
- 'Cl2': 13,
- 'Cl3': 14,
- 'Cl4': 15,
- 'Cl5': 17,
- 'Cr7': 16,
- 'Cr6': 32,
- 'Cr5': 34,
- 'Cr4': 31,
- 'Cr3': 35,
- 'Cr2': 33,
- 'Cr1': 30,
- 'CD_ret': 36,
- 'Dl_caps': 57,
- 'Dl1': 0,
- 'Dl2': 1,
- 'Dl3': 2,
- 'Dl4': 3,
- 'Dl5': 5,
- 'Dr7': 4,
- 'Dr6': 38,
- 'Dr5': 40,
- 'Dr4': 37,
- 'Dr3': 41,
- 'Dr2': 39,
- #'Dr1': 51,
- 'El_shift': 57,
- #'El1': 6,
- 'El2': 6,
- 'El3': 7,
- 'El4': 8,
- 'El5': 9,
- 'El6': 11,
- 'Er5': 45,
- 'Er4': 46,
- 'Er3': 43,
- 'Er2': 47,
- 'Er1': 44,
- 'Er_shift': 60,
- 'Fl_ctrl': 59,
- 'Fl_win': 55,
- 'Fl_alt': 58,
- 'Fl_space': 49,
- 'Fr_space': 49,
- 'Fr_altgr': 61,
- 'Fr_win': 55,
- #'Fr_menu': ,
- #'Fr_ctrl': 105,
- }
-
class Button:
- __slots__ = ('width', 'isMarked', 'i')
+ """ A single physical button on the keyboard """
+
+ __slots__ = ('width', 'isMarked', 'i', 'scancode')
_idToName : Dict[int, Text] = {}
_nameToId : Dict[Text, int] = {}
_nextNameId = 0
+ serializedName = 'standard'
- def __init__ (self, name: Text, width: float = 1, isMarked: bool = False):
+ def __init__ (self, name: Text, width: float = 1, isMarked: bool = False, scancode = None):
# map names to integers for fast comparison/hashing
i = Button._nameToId.get (name)
if i is None:
@@ -310,8 +45,14 @@ class Button:
self.width = width
# marked with an haptic line, for better orientation
self.isMarked = isMarked
-
- def __repr__ (self):
+ # scancode map, although they are not all technically scancodes, they
+ # are some low-level representation of the physical key
+ self.scancode = scancode
+ # special case for windows
+ if self.scancode and 'windows' in self.scancode:
+ self.scancode['windows'] = tuple (self.scancode['windows'])
+
+ def __repr__ (self): # pragma: no cover
return f'Button({self.name!r}, {self.width}, {self.isMarked})'
def __eq__ (self, other):
@@ -326,25 +67,10 @@ class Button:
def name (self):
return Button._idToName[self.i]
- @property
- def xorgKeycode (self):
- return _buttonToXorgKeycode[self.name]
-
- @property
- def keymanCode (self):
- return _buttonToKeyman[self.name]
-
- @property
- def windowsScancode (self):
- return _buttonToWinScancode[self.name]
-
- @property
- def osxKeycode (self):
- return _buttonToOsxKeycode[self.name]
-
@classmethod
def deserialize (self, data: Dict):
- kindMap = {'standard': Button, 'letter': LetterButton, 'multi': MultiRowButton}
+ kindMap = dict (map (lambda x: (x.serializedName, x),
+ (Button, LetterButton, MultiRowButton)))
try:
kind = data['kind']
del data['kind']
@@ -352,15 +78,28 @@ class Button:
kind = 'standard'
return kindMap[kind] (**data)
+ def serialize (self):
+ d = dict (name=self.name, width=self.width, scancode=self.scancode)
+ if self.__class__ is not Button:
+ d['kind'] = self.serializedName
+ if self.isMarked:
+ d['isMarked'] = self.isMarked
+ # turn the tuple back into a list
+ if d['scancode'] and 'windows' in d['scancode']:
+ d['scancode']['windows'] = list (d['scancode']['windows'])
+ return d
+
class LetterButton (Button):
"""
A letter, number or symbol button, but not special keys like modifier, tab,
"""
- def __init__ (self, name, isMarked=False):
- super().__init__ (name, width=1, isMarked=isMarked)
+ serializedName = 'letter'
- def __repr__ (self):
+ def __init__ (self, name, width=1, isMarked=False, scancode=None):
+ super().__init__ (name, width=width, isMarked=isMarked, scancode=scancode)
+
+ def __repr__ (self): # pragma: no cover
return f'LetterButton({self.name!r}, {self.isMarked})'
class MultiRowButton (Button):
@@ -370,19 +109,26 @@ class MultiRowButton (Button):
"""
__slots__ = ('span', )
+ serializedName = 'multi'
- def __init__ (self, name, span, isMarked=False):
- super ().__init__ (name, width=1, isMarked=isMarked)
+ def __init__ (self, name, span, width=1, isMarked=False, scancode=None):
+ super ().__init__ (name, width=width, isMarked=isMarked, scancode=scancode)
self.span = span
- def __repr__ (self):
+ def __repr__ (self): # pragma: no cover
return f'MultiRowButton({self.name!r}, {self.span!r}, {self.isMarked!r})'
+ def serialize (self):
+ d = super ().serialize ()
+ d['span'] = self.span
+ return d
+
class PhysicalKeyboard:
- __slots__ = ('name', 'rows', '_buttonToRow')
+ __slots__ = ('name', 'description', 'rows', '_buttonToRow')
- def __init__ (self, name: Text, rows):
+ def __init__ (self, name: Text, description: Text, rows):
self.name = name
+ self.description = description
self.rows = rows
self._buttonToRow = dict ()
@@ -393,7 +139,7 @@ class PhysicalKeyboard:
def __iter__ (self):
return iter (self.rows)
- def __repr__ (self):
+ def __repr__ (self): # pragma: no cover
return f'<PhysicalKeyboard {self.name} with {len (self)} keys>'
def __len__ (self):
@@ -405,7 +151,7 @@ class PhysicalKeyboard:
for k in self.keys ():
if k.name == name:
return k
- raise AttributeError (f'{name} is not a valid button name')
+ raise KeyError (f'{name} is not a valid button name')
def keys (self) -> Iterator[Button]:
""" Iterate over all keys """
@@ -428,7 +174,19 @@ class PhysicalKeyboard:
for btn in r:
row[1].append (Button.deserialize (btn))
rows.append (row)
- return cls (data['name'], rows)
+ return cls (data['name'], data['description'], rows)
+
+ def serialize (self):
+ rows = []
+ for l, r in self.rows:
+ newRow = [[], []]
+ for btn in l:
+ newRow[0].append (btn.serialize ())
+ for btn in r:
+ newRow[1].append (btn.serialize ())
+ rows.append (newRow)
+ return dict (name=self.name, description=self.description, rows=rows)
-defaultKeyboards = YamlLoader ('data/keyboards', PhysicalKeyboard.deserialize)
+dataDirectory = 'data/keyboards'
+defaultKeyboards = YamlLoader (dataDirectory, PhysicalKeyboard.deserialize)
diff --git a/lulua/plot.py b/lulua/plot.py
index fdfc16c..9fb5cf1 100644
--- a/lulua/plot.py
+++ b/lulua/plot.py
@@ -20,9 +20,6 @@
import sys, argparse, json, unicodedata, pickle, logging, math
from operator import itemgetter
-from bokeh.plotting import figure
-from bokeh.models import ColumnDataSource, LinearAxis, Range1d
-from bokeh.embed import json_item
from .layout import *
from .keyboard import defaultKeyboards
@@ -30,9 +27,31 @@ from .util import limit, displayText
from .writer import Writer
from .carpalx import Carpalx, models
+def setPlotStyle (p):
+ """ Set common plot styles """
+
+ # Suppress warnings from bokeh if the legend is empty.
+ if p.legend:
+ p.legend.location = "top_left"
+ # Hide glyph on click on legend
+ p.legend.click_policy = "hide"
+ p.legend.label_text_font = 'IBM Plex Sans Arabic'
+ p.legend.border_line_color = None
+ p.legend.background_fill_color = None
+ p.legend.inactive_fill_color = 'black'
+ p.legend.inactive_fill_alpha = 0.1
+
+ # no border fill
+ p.border_fill_color = None
+ p.background_fill_alpha = 0.5
+
def letterfreq (args):
""" Map key combinations to their text, bin it and plot sorted distribution """
+ from bokeh.plotting import figure
+ from bokeh.models import ColumnDataSource, LinearAxis, Range1d
+ from bokeh.embed import json_item
+
# show unicode class "letters other" only
whitelistCategory = {'Lo'}
@@ -89,15 +108,12 @@ def letterfreq (args):
p.vbar(x='letters', width=0.5, top='rel', color="#dc322f", source=source, y_range_name='single')
p.add_layout(LinearAxis(y_range_name="single"), 'right')
+ setPlotStyle (p)
# styling
p.xgrid.grid_line_color = None
- p.xaxis.major_label_text_font_size = "1.5em"
- p.xaxis.major_label_text_font_size = "1.5em"
- p.xaxis.major_label_text_font = 'IBM Plex Sans Arabic'
- p.yaxis.major_label_text_font = 'IBM Plex Sans Arabic'
- # no border fill
- p.border_fill_color = None
- p.background_fill_alpha = 0.5
+ for axis, size, font in ((p.xaxis, '1.5em', 'IBM Plex Sans Arabic'), (p.yaxis, '1em', 'IBM Plex Sans')):
+ axis.major_label_text_font_size = size
+ axis.major_label_text_font = font
json.dump (json_item (p), sys.stdout)
@@ -153,3 +169,125 @@ def triadfreq (args):
return 0
+def triadEffortData (args):
+ """
+ Plot cumulated triad frequency vs cumulative effort.
+
+ More frequent triads should be easier to type and thus we expect an
+ exponential distribution for optimized layouts and linear distribution
+ for everything else.
+ """
+
+ import numpy as np
+
+ stats = pickle.load (sys.stdin.buffer)
+
+ # XXX: add layout to stats?
+ keyboard = defaultKeyboards['ibmpc105']
+ layout = defaultLayouts[args.layout].specialize (keyboard)
+ writer = Writer (layout)
+
+ # letter-based binning, in case multiple buttons are mapped to the same
+ # letter.
+ binned = defaultdict (lambda: dict (weight=0, effort=Carpalx (models['mod01'], writer), textTriad=None))
+ weightSum = 0
+ for triad, weight in stats['triads'].triads.items ():
+ textTriad = tuple (layout.getText (t) for t in triad)
+ data = binned[textTriad]
+ data['weight'] += weight
+ data['effort'].addTriad (triad, weight)
+ data['textTriad'] = textTriad
+ data['layers'] = tuple (layout.modifierToLayer (x.modifier)[0] for x in triad)
+ weightSum += weight
+
+ # Now bin into equally-sized buckets to reduce amount of data
+ nBins = 200
+ binWidth = weightSum//nBins
+ cumulativeWeight = 0
+ cumulativeEffort = 0
+ x = []
+ y = []
+ for data in sorted (binned.values (), key=lambda x: x['weight'], reverse=True):
+ cumulativeWeight += data['weight']
+ cumulativeEffort += data['effort'].effort * data['weight']
+ if not x or x[-1] + binWidth <= cumulativeWeight:
+ x.append (cumulativeWeight)
+ y.append (cumulativeEffort)
+ x.append (cumulativeWeight)
+ y.append (cumulativeEffort)
+
+ x = np.true_divide (x, cumulativeWeight)
+ y = np.true_divide (y, cumulativeEffort)
+
+ pickle.dump (dict (x=x, y=y, layout=layout), sys.stdout.buffer, pickle.HIGHEST_PROTOCOL)
+
+def triadEffortPlot (args):
+ """ Plot concatenated pickled data from triadEffortData """
+
+ from .stats import unpickleAll
+ # Initializing bokeh is an expensive operation and this module is imported
+ # alot, so only do it when necessary.
+ from bokeh.palettes import Set3
+ from bokeh.plotting import figure
+ from bokeh.models import RadioButtonGroup, CustomJS, Slope
+ from bokeh.embed import json_item
+ from bokeh.layouts import column
+
+ p = figure(
+ plot_width=1000,
+ plot_height=500,
+ sizing_mode='scale_both',
+ x_range=(0, 1),
+ y_range=(0, 1),
+ output_backend="webgl",
+ )
+ data = list (unpickleAll (sys.stdin.buffer))
+ colors = Set3[len(data)]
+ lines = dict ()
+ for o, color in zip (data, colors):
+ name = o['layout'].name
+ assert name not in lines
+ lines[name] = p.line (o['x'], o['y'], line_width=1, color=color,
+ legend_label=name, name=name)
+
+ # color: base1
+ slope = Slope(gradient=1, y_intercept=0,
+ line_color='#93a1a1', line_dash='dashed', line_width=1)
+ p.add_layout(slope)
+
+ setPlotStyle (p)
+ for axis, size, font in ((p.xaxis, '1em', 'IBM Plex Sans'), (p.yaxis, '1em', 'IBM Plex Sans')):
+ axis.major_label_text_font_size = size
+ axis.major_label_text_font = font
+
+ LABELS = ["All", "Standard", "Usable"]
+ visible = {
+ 0: list (lines.keys ()),
+ 1: ['ar-asmo663', 'ar-linux', 'ar-osx'],
+ 2: ['ar-lulua', 'ar-ergoarabic', 'ar-malas', 'ar-linux', 'ar-osx'],
+ }
+ ranges = {
+ 0: [(0, 1), (0, 1)],
+ 1: [(0, 0.5), (0, 0.4)],
+ 2: [(0, 0.5), (0, 0.4)],
+ }
+ presets = RadioButtonGroup (labels=LABELS, active=0)
+ # Set visibility and x/yranges on click. Not sure if there’s a more pythonic way.
+ presets.js_on_click(CustomJS(
+ args=dict(lines=lines, plot=p, visible=visible, ranges=ranges),
+ code="""
+ for (const [k, line] of Object.entries (lines)) {
+ line.visible = visible[this.active].includes (k);
+ }
+ const xrange = plot.x_range;
+ xrange.start = ranges[this.active][0][0];
+ xrange.end = ranges[this.active][0][1];
+ const yrange = plot.y_range;
+ yrange.start = ranges[this.active][1][0];
+ yrange.end = ranges[this.active][1][1];
+ """))
+
+ json.dump (json_item (column (p, presets)), sys.stdout)
+
+ return 0
+
diff --git a/lulua/render.py b/lulua/render.py
index 41a6bd5..bc09e4c 100644
--- a/lulua/render.py
+++ b/lulua/render.py
@@ -107,7 +107,8 @@ class Renderer:
gCap.add (self._drawMarker (btnWidth, btnPos))
highlight = self.keyHighlight.get (btn.name, 0)
- gHighlight.add (self._drawHighlight (highlight, btnWidth, btnPos))
+ if highlight > 0:
+ gHighlight.add (self._drawHighlight (highlight, btnWidth, btnPos))
l = self._drawLabel (buttonText, btnWidth, btnPos)
if isModifier:
@@ -302,6 +303,8 @@ def renderXmodmap (args):
keyboard = defaultKeyboards[args.keyboard]
layout = defaultLayouts[args.layout].specialize (keyboard)
+ xorgGetter = lambda x: x.scancode['xorg']
+
with open (args.output, 'w') as fd:
# inspired by https://neo-layout.org/neo_de.xmodmap
fd.write ('\n'.join ([
@@ -321,12 +324,12 @@ def renderXmodmap (args):
# layers: 1, 2, 3, 5, 4, None, 6, 7
for i in (0, 1, 2, 4, 3, 99999, 5, 6):
if i >= len (layout.layers):
- for btn in unique (keyboard.keys (), key=attrgetter ('xorgKeycode')):
+ for btn in unique (keyboard.keys (), key=xorgGetter):
keycodeMap[btn].append ('NoSymbol')
continue
l = layout.layers[i]
# space button shares the same keycode and must be removed
- for btn in unique (keyboard.keys (), key=attrgetter ('xorgKeycode')):
+ for btn in unique (keyboard.keys (), key=xorgGetter):
if not layout.isModifier (frozenset ([btn])):
text = l.layout.get (btn)
if not text:
@@ -359,7 +362,7 @@ def renderXmodmap (args):
for btn, v in keycodeMap.items ():
v = '\t'.join (v)
- fd.write (f'!! {btn.name}\nkeycode {btn.xorgKeycode} = {v}\n')
+ fd.write (f'!! {btn.name}\nkeycode {xorgGetter (btn)} = {v}\n')
fd.write ('\n'.join (['add Mod3 = ISO_First_Group', 'add Mod5 = ISO_Level3_Shift', '']))
def renderKeyman (args):
@@ -388,11 +391,12 @@ def renderKeyman (args):
for i, l in enumerate (layout.layers):
for m in l.modifier:
for x in m:
- if x.keymanCode.startswith ('K_') or x.keymanCode == 'CAPS':
+ keymanCode = x.scancode['keyman']
+ if keymanCode.startswith ('K_') or keymanCode == 'CAPS':
logging.error (f'Keyman does not support custom modifier like {m}. Your layout will not work correctly.')
break
for btn, text in l.layout.items ():
- comb = ' '.join ([x.keymanCode for x in m] + [btn.keymanCode])
+ comb = ' '.join ([x.scancode['keyman'] for x in m] + [btn.scancode['keyman']])
text = ' '.join ([f'U+{ord (x):04X}' for x in text])
fd.write (f'+ [{comb}] > {text}\n')
@@ -494,7 +498,7 @@ def renderWinKbd (args):
s = '\r'
return s
wcharMap = []
- for btn in unique (keyboard.keys (), key=attrgetter ('windowsScancode')):
+ for btn in unique (keyboard.keys (), key=lambda x: x.scancode['windows']):
text = list (layout.getButtonText (btn))
# skip unused keys
@@ -502,7 +506,7 @@ def renderWinKbd (args):
continue
mappedText = [toWindows (s) for s in text]
- vk = next (filter (lambda x: isinstance (x, VirtualKey), scancodeToVk[btn.windowsScancode]))
+ vk = next (filter (lambda x: isinstance (x, VirtualKey), scancodeToVk[btn.scancode['windows']]))
wcharMap.append ((vk, 0, mappedText))
fd.write (makeDriverSources (scancodeToVk, wcharMap))
@@ -539,7 +543,7 @@ def renderKeylayout (args):
for i, l in enumerate (layout.layers):
keymap = ET.SubElement (keymapSet, 'keyMap', index=str (i))
for btn, text in l.layout.items ():
- ET.SubElement (keymap, 'key', code=str (btn.osxKeycode), output=text)
+ ET.SubElement (keymap, 'key', code=str (btn.scancode['macos']), output=text)
layouts = ET.SubElement (docroot, 'layouts')
layout = ET.SubElement (layouts, 'layout', first='0', last='0', modifiers=str (modmapId), mapSet=str (keymapSetId))
diff --git a/lulua/report.py b/lulua/report.py
index b25201d..0e5ec00 100644
--- a/lulua/report.py
+++ b/lulua/report.py
@@ -18,7 +18,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-import sys, argparse, logging, pickle, math
+import sys, argparse, logging, pickle, math, unicodedata
from gettext import GNUTranslations, NullTranslations
from decimal import Decimal
from fractions import Fraction
@@ -60,21 +60,6 @@ def arabnum (s):
m = {'0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤', '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩', ',': '٬', '.': '٫'}
return ''.join (map (lambda x: m.get (x, x), s))
-def clamp (v, lower, upper):
- return max (min (v, upper), lower)
-
-def blend (v, a, b):
- v = clamp (v, 0, 1)
- return (b-a)*v+a
-
-def blendn (v, *l):
- assert 0 <= v <= 1
- n = len (l)
- step = 1/(n-1)
- i = min (int (math.floor (v/step)), n-2)
- stretchedv = (v-i*step)/step
- return [blend (stretchedv, x, y) for x, y in zip (l[i], l[i+1])]
-
def render ():
parser = argparse.ArgumentParser(description='Create lulua report.')
parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files')
@@ -88,9 +73,41 @@ def render ():
env.filters['approx'] = approx
env.filters['numspace'] = numspace
env.filters['arabnum'] = arabnum
- env.filters['blendn'] = blendn
env.filters['fraction'] = fraction
+ # Map global variables to Arabic letter romanizations, so we can use
+ # them easily in text.
+ # Taken from Abu-Chacra’s Arabic – An Essential Grammar. It’s
+ # too difficult for now to write a general-purpose romanization
+ # function, because it would need a dictionary.
+ letterNames = {
+ 'Hamzah': ('Hamzah', 'ء'),
+ 'Alif': ('ᵓAlif', 'ا'),
+ 'Alifhamzah': ('ᵓAlif-hamzah', 'أ'),
+ 'Wawhamzah': ('Wa\u0304w-hamzah', 'ؤ'),
+ 'Yahamzah': ('Ya\u0304ᵓ-hamzah', 'ئ'),
+ 'Ba': ('Baᵓ', 'ب'),
+ 'Ta': ('Taᵓ', 'ت'),
+ 'Tha': ('T\u0331aᵓ', 'ث'),
+ 'Ra': ('Raᵓ', 'ر'),
+ 'Dal': ('Da\u0304l', 'د'),
+ 'Dhal': ('D\u0331a\u0304l', 'ذ'),
+ 'Qaf': ('Qa\u0304f', 'ق'),
+ 'Lam': ('La\u0304m', 'ل'),
+ 'Lamalif': ('La\u0304m-ᵓalif', 'لا'),
+ 'Mim': ('Mi\u0304m', 'م'),
+ 'Nun': ('Nu\u0304n', 'ن'),
+ 'Waw': ('Wa\u0304w', 'و'),
+ 'Ya': ('Ya\u0304ᵓ', 'ي'),
+ 'Tamarbutah': ('Ta\u0304ᵓ marbu\u0304t\u0323ah', 'ة'),
+ 'Alifmaqsurah': ('ᵓAlif maqs\u0323u\u0304rah', 'ى'),
+ }
+ for k, (romanized, arabic) in letterNames.items ():
+ env.globals[k] = f'{romanized} <bdo lang="ar">({arabic})</bdo>'
+ env.globals[k.lower ()] = env.globals[k].lower ()
+ env.globals[k + '_'] = romanized
+ env.globals[k.lower () + '_'] = romanized.lower ()
+
corpus = []
for x in args.corpus:
with open (x) as fd:
diff --git a/lulua/stats.py b/lulua/stats.py
index 1d051b3..9d6c537 100644
--- a/lulua/stats.py
+++ b/lulua/stats.py
@@ -28,7 +28,7 @@ from .layout import *
from .keyboard import defaultKeyboards
from .writer import SkipEvent, Writer
from .carpalx import Carpalx, models
-from .plot import letterfreq, triadfreq
+from .plot import letterfreq, triadfreq, triadEffortPlot, triadEffortData
from .util import displayText
def updateDictOp (a, b, op):
@@ -315,47 +315,6 @@ def keyHeatmap (args):
buttons[k.name] = v
yaml.dump (data, sys.stdout)
-def sentenceStats (keyboard, layout, text):
- """
- Calculate effort for every character (button) in a text
- """
-
- writer = Writer (layout)
-
- effort = Carpalx (models['mod01'], writer)
- _ignored = frozenset (keyboard[x] for x in ('Fl_space', 'Fr_space', 'CD_ret', 'Cl_tab'))
- writtenText = []
- skipped = 0
- for match, event in writer.type (StringIO (text)):
- if isinstance (event, SkipEvent):
- skipped += 1
- writtenText.append ([event.char, None, 0])
- if not isinstance (event, ButtonCombination):
- continue
-
- writtenText.append ([match, event, 0])
-
- triad = list (filter (lambda x: x[1] is not None and first (x[1].buttons) not in _ignored, writtenText))[-3:]
- if len (triad) == 3:
- matchTriad, buttonTriad, _ = zip (*triad)
- triadEffort = effort._triadEffort (tuple (buttonTriad))
-
- # now walk the existing text backwards to find the original matches and add the computed effort
- writtenTextIt = iter (reversed (writtenText))
- matchTriad = list (matchTriad)
- while matchTriad:
- t = next (writtenTextIt)
- if t[0] == matchTriad[-1]:
- matchTriad.pop ()
- t[2] += triadEffort
-
- effort.addTriad (buttonTriad, 1)
-
- # normalize efforts to [0, 1]
- s = max (map (lambda x: x[2], writtenText))
- writtenText = list (map (lambda x: (x[0], x[2]/s if x[1] is not None else None), writtenText))
- return (writtenText, effort.effort, skipped)
-
from .text import mapChars, charMap
def layoutstats (args):
@@ -378,12 +337,6 @@ def layoutstats (args):
asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses
- sentences = [
- 'أَوْ كَصَيِّبٍ مِّنَ السَّمَاءِ فِيهِ ظُلُمَاتٌ وَرَعْدٌ وَبَرْقٌ يَجْعَلُونَ أَصَابِعَهُمْ فِي آذَانِهِم مِّنَ الصَّوَاعِقِ حَذَرَ الْمَوْتِ وَاللَّهُ مُحِيطٌ بِالْكَافِرِينَ',
- 'اللغة العربية هي أكثرُ اللغاتِ السامية تحدثاً، وإحدى أكثر اللغات انتشاراً في العالم، يتحدثُها أكثرُ من 467 مليون نسمة.',
- ]
- sentences = [sentenceStats (keyboard, layout, mapChars (s, charMap).replace ('\r\n', '\n')) for s in sentences]
-
# Impact of hamza
yah = '\u064a'
waw = '\u0648'
@@ -411,7 +364,6 @@ def layoutstats (args):
fingers=dict (fingers),
buttonPresses=buttonPresses,
asymmetry=asymmetry,
- sentences=sentences,
hamzaImpact=hamzaImpact,
hamzaOnAlef=hamzaOnAlef,
), sys.stdout.buffer)
@@ -472,6 +424,12 @@ def main ():
sp.add_argument('-s', '--sort', choices={'weight', 'effort', 'combined'}, default='weight', help='Sorter')
sp.add_argument('-n', '--limit', type=int, default=0, help='Sorter')
sp.set_defaults (func=triadfreq)
+
+ sp = subparsers.add_parser('triadeffortdata')
+ sp.set_defaults (func=triadEffortData)
+ sp = subparsers.add_parser('triadeffortplot')
+ sp.set_defaults (func=triadEffortPlot)
+
sp = subparsers.add_parser('keyheatmap')
sp.set_defaults (func=keyHeatmap)
sp = subparsers.add_parser('layoutstats')
diff --git a/lulua/test_keyboard.py b/lulua/test_keyboard.py
index 7537266..d08f6d1 100644
--- a/lulua/test_keyboard.py
+++ b/lulua/test_keyboard.py
@@ -20,7 +20,8 @@
import pytest
-from .keyboard import defaultKeyboards, Button
+from .keyboard import defaultKeyboards, Button, dataDirectory
+from .util import YamlLoader
def test_defaults ():
k = defaultKeyboards['ibmpc105']
@@ -54,6 +55,9 @@ def test_keyboard_getattr ():
assert k['CD_ret'] == k.find ('CD_ret')
assert k['Cr1'] != k.find ('El1')
+ with pytest.raises (KeyError):
+ k['nonexistent_button']
+
def test_button_uniqname ():
a = Button ('a')
assert a.name == 'a'
@@ -77,3 +81,15 @@ def test_button_uniqname ():
d[b] = 2
assert b in d
+ # make sure we can only compare to Buttons
+ assert a != 'hello'
+ assert a != 1
+ assert a != dict ()
+
+def test_serialize ():
+ """ Make sure serialize (deserialize (x)) of keyboards is identity """
+
+ rawKeyboards = YamlLoader (dataDirectory, lambda x: x)
+ name = 'ibmpc105'
+ assert defaultKeyboards[name].serialize () == rawKeyboards[name]
+
diff --git a/lulua/test_layout.py b/lulua/test_layout.py
index 45141ae..8e911da 100644
--- a/lulua/test_layout.py
+++ b/lulua/test_layout.py
@@ -35,7 +35,7 @@ def test_atomic (layout):
for char in text:
d = unicodedata.decomposition (char)
# allow compat decompositions like … -> ...
- if not d.startswith ('<compat> ') and not d.startswith ('<isolated> ') and not d.startswith ('<medial> ') and not d.startswith ('<initial> '):
+ if d.split (' ', 1)[0] not in {'<compat>', '<isolated>', '<medial>', '<initial>', '<noBreak>'}:
assert d == '', (char, btn)
@pytest.mark.parametrize("layout", defaultLayouts, ids=[l.name for l in defaultLayouts])
diff --git a/lulua/test_report.py b/lulua/test_report.py
index 448d796..751684c 100644
--- a/lulua/test_report.py
+++ b/lulua/test_report.py
@@ -20,7 +20,7 @@
from decimal import Decimal
-from .report import approx, blend, blendn
+from .report import approx
def test_approx ():
assert approx (0) == (Decimal ('0'), '')
@@ -37,13 +37,3 @@ def test_approx ():
assert approx (10**9) == (Decimal ('1'), 'billion')
assert approx (10**12) == (Decimal ('1000'), 'billion')
-def test_blend ():
- assert blend (0.5, 0, 1) == 0.5
- assert blend (0.5, 0, 2) == 1
-
- assert blend (0.1, 0, 1) == 0.1
- assert blend (0.9, 0, 1) == 0.9
-
- assert blendn (0.5, (0, ), (1, )) == [0.5]
- assert blendn (0.5, (0, ), (0.7, ), (1, )) == [0.7]
-
diff --git a/lulua/text.py b/lulua/text.py
index 18e4dbf..ea91139 100644
--- a/lulua/text.py
+++ b/lulua/text.py
@@ -193,6 +193,10 @@ def filterEpub (item):
stream = walker (document)
s = HTMLSerializer()
yield ''.join (s.serialize (stream))
+ # It looks like ebooklib is leaking ZipFile instances somewhere, which
+ # can be prevented by resetting the book before the GC grabs it.
+ book.reset ()
+ del book
def filterText (fd):
yield fd.read ().decode ('utf-8')
diff --git a/lulua/util.py b/lulua/util.py
index 5d7ea1b..0245275 100644
--- a/lulua/util.py
+++ b/lulua/util.py
@@ -22,7 +22,7 @@
Misc utilities
"""
-import os, yaml, pkg_resources, unicodedata
+import os, yaml, pkg_resources, unicodedata, re
first = lambda x: next (iter (x))
@@ -79,16 +79,20 @@ def displayText (text):
if all (map (lambda x: unicodedata.combining (x) != 0, text)):
# add circle if combining
return '\u25cc' + text
+ if len (text) == 1 and unicodedata.category (text) == 'Cf':
+ stopwords = re.compile('\WTO\W', re.I)
+ try:
+ cleanName = unicodedata.name (text).replace ('-', ' ')
+ short = ''.join (map (lambda x: x[0], stopwords.sub(' ', cleanName).split (' ')))
+ return f'[{short}]'
+ except ValueError:
+ # No such name.
+ pass
invMap = {
'\t': '⭾',
'\n': '↳',
' ': '\u2423',
'\b': '⌦',
- '\u200e': '[LRM]', # left to right mark
- '\u061c': '[ALM]', # arabic letter mark
- '\u202c': '[PDF]', # pop directional formatting
- "\u2066": '[LRI]', # left-to-right isolate (lri)
- "\u2067": '[RLI]', # right-to-left isolate (rli)
- "\u2069": '[PDI]', # pop directional isolate (pdi)
+ '\u202f': '[NNBSP]',
}
return invMap.get (text, text)