summaryrefslogtreecommitdiff
path: root/lulua
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2020-03-28 11:44:58 +0100
committerLars-Dominik Braun <lars@6xq.net>2020-03-28 11:44:58 +0100
commit306fa70fb3ae1ee4c1b35c22b6c222982479bd6f (patch)
tree515f5175d2c3ff84e2a3126963675bdf8fe65b41 /lulua
parentd2126a193cf9240f633d26240eba703bc11df018 (diff)
downloadlulua-306fa70fb3ae1ee4c1b35c22b6c222982479bd6f.tar.gz
lulua-306fa70fb3ae1ee4c1b35c22b6c222982479bd6f.tar.bz2
lulua-306fa70fb3ae1ee4c1b35c22b6c222982479bd6f.zip
Add analysis for ar-idlebi and ar-alramly
Diffstat (limited to 'lulua')
-rw-r--r--lulua/data/layouts/ar-idlebi.yaml123
-rw-r--r--lulua/data/report/index.html104
-rw-r--r--lulua/test_util.py35
-rw-r--r--lulua/util.py2
4 files changed, 259 insertions, 5 deletions
diff --git a/lulua/data/layouts/ar-idlebi.yaml b/lulua/data/layouts/ar-idlebi.yaml
new file mode 100644
index 0000000..52cdc80
--- /dev/null
+++ b/lulua/data/layouts/ar-idlebi.yaml
@@ -0,0 +1,123 @@
+# Figure 3 of the article Design of Arabic Keyboard Layout Based on Statistical
+# Properties of Arabic Characters by Idlebi et al.
+name: ar-idlebi
+layout:
+- layer:
+ Bl1: "@"
+ Bl2: "١"
+ Bl3: "٢"
+ Bl4: "٣"
+ Bl5: "٤"
+ Bl6: "٥"
+ Bl7: "٦"
+ Br6: "٧"
+ Br5: "٨"
+ Br4: "٩"
+ Br3: "٠"
+ Br2: "_" # XXX: not sure
+ #Br1: "="
+
+ Cl_tab: "\t"
+ Cl1: "\u0651\u064e" # shadda+fatha
+ Cl2: "\u0650" # kasra
+ Cl3: "\u064e" # fatha
+ Cl4: "\u064f" # damma
+ Cl5: "ط"
+ Cr7: "ص"
+ Cr6: "ف"
+ Cr5: "ا"
+ Cr4: "ي"
+ Cr3: "ش"
+ Cr2: "س"
+ Cr1: ":"
+
+ CD_ret: "\n"
+
+ Dl1: "ن"
+ Dl2: "ت"
+ Dl3: "ع"
+ Dl4: "م"
+ Dl5: "ب"
+ Dr7: "ا\u0655" # hamza below
+ Dr6: "ل"
+ Dr5: "ه"
+ Dr4: "ر"
+ Dr3: "و"
+ Dr2: "ى"
+ Dr1: "."
+
+ El1: "\u0652" # sukun
+ El2: "ق"
+ El3: "ا\u0654" # hamza above
+ El4: "ج"
+ El5: "ح"
+ El6: "خ"
+ Er5: "ذ"
+ Er4: "ك"
+ Er3: "د"
+ Er2: "ة"
+ Er1: "غ"
+ #Er0: "،"
+
+ Fl_space: " "
+ Fr_space: " "
+ modifier:
+ - []
+- layer:
+ Bl1: "^"
+ Bl2: "!"
+ Bl3: "\""
+ Bl4: "#"
+ #Bl5: "" # unknown symbol
+ Bl6: "٪"
+ Bl7: "&"
+ Br6: "'"
+ Br5: "("
+ Br4: ")"
+ #Br3: "" # unknown symbol
+ Br2: "="
+ #Br1: ""
+
+ Cl1: "ز"
+ Cl2: "\u0651\u064d" # shadda+kasratan
+ Cl3: "\u064b" # fathatan
+ Cl4: "\u064c" # dammatan
+ Cl5: "ظ"
+ Cr7: "ض"
+ #Cr6: ""
+ Cr5: "\u0651\u0650" # shadda+kasra
+ #Cr4: ""
+ Cr3: "["
+ Cr2: "]"
+ Cr1: "<"
+
+ CD_ret: "\n"
+
+ Dl1: "ء"
+ Dl2: "ث"
+ Dl3: "\u0651\u064c" # shadda+dammatan
+ #Dl4: ""
+ #Dl5: ""
+ Dr7: "\u0648\u0654" # composed: ؤ
+ Dr6: "{"
+ Dr5: "}"
+ Dr4: "\\"
+ Dr3: "/"
+ Dr2: "\u064d" # kasratan
+ Dr1: ">"
+
+ El1: "\u0651\u064f" # shadda+damma
+ #El2: ""
+ El3: "\u064a\u0654" # composed: ئ
+ #El4: ""
+ #El5: ""
+ #El6: ""
+ #Er5: ""
+ Er4: "؛"
+ Er3: "ا\u0653" # composed: آ
+ Er2: "|"
+ Er1: "*"
+ #Er0: "؟"
+ modifier:
+ - [El_shift]
+ - [Er_shift]
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html
index d240ba7..db80c58 100644
--- a/lulua/data/report/index.html
+++ b/lulua/data/report/index.html
@@ -398,6 +398,101 @@
</div>
</div>
+ <div id="ar-alramly" class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h3><a href="#ar-alramly">Al-Ramly et al</a></h3>
+ <p>
+ One of the eariest accounts can be found in the article <a
+ href="https://doi.org/10.1016/S1474-6670(17)64475-6">Statistical
+ Distribution of Arabic Letters Aids to the Design of a New
+ Keyboard</a> by Al-Ramly et al published in 1980.
+ <!-- -->
+ The Arabic half of the proposed bi-lingual layout seems to be
+ hand-optimized based on several metrics including character
+ frequencies – without mentioning a source for them though – visual
+ similarity (“letter groups”) and their position on previous
+ layouts.
+ <!-- -->
+ It tries to balance load between hands, assign more work to index
+ and middle fingers and place common letters in the home row.
+ <!-- -->
+ However the asymmetry given in the article, 0.032, cannot be
+ reproduced here.
+ <!-- -->
+ For the most part the layout lacks combining and pre-combined
+ characters, a task that is left to “machine intelligence” making it
+ hard to use nowadays.
+ </p>
+ <p>
+ While the layout distributes load between fingers quite well it
+ favors the left hand by assigning <bdo dir="ltr" lang="ar">ا</bdo>
+ and <bdo dir="ltr" lang="ar">ل</bdo> to it.
+ <!-- -->
+ The decision to place <bdo dir="ltr" lang="ar">ث</bdo> in a very
+ prominent spot seems weird, given it only accounts for 0.5% of all
+ symbols, even in their own analysis.
+ </p>
+ </div></div>
+ </div>
+
+ <figure>
+ <div class="lbox">
+ <img src="ar-alramly-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-alramly']) }}
+ </div>
+ </figure>
+
+ <div id="ar-idlebi" class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h3><a href="#ar-idlebi">Idlebi et al</a></h3>
+ <p>
+ Another article from the early days of computers published in 1990
+ is <a
+ href="https://archive.org/details/computers_and_the_arabic_language_1990/page/n112/mode/2up">Design
+ of Arabic Keyboard Layout Based on Statistical Properties of Arabic
+ Characters</a> by Idlebi et al.
+ <!-- -->
+ They present two examples of programmatically optimized layouts and
+ account for character and bigram frequencies based on a corpus of
+ 100.000 characters, finger movement time of unknown origin and finger
+ load.
+ </p>
+ <p>
+ Unfortunately the results use 12 keys per row and are not suitable
+ for use with current European keyboards, which usually feature only 11 keys
+ in the bottom row.
+ <!-- -->
+ Thus the layout displayed below lacks the Arabic question mark and
+ comma in the bottom right.
+ <!-- -->
+ Probably due to their unusual assumption that middle- and
+ ring-finger rest in the top row their results are suboptimal,
+ placing both <bdo dir="ltr" lang="ar">ا</bdo> and <bdo dir="ltr"
+ lang="ar">ي</bdo> in the top row.
+ <!-- -->
+ Their analysis notices this and suggests improved positions for
+ both characters, but these are not actually implemented.
+ <!-- -->
+ The big asymmetry is caused by placing <bdo dir="ltr" lang="ar">ا
+ ل ي</bdo> and <bdo dir="ltr" lang="ar">و</bdo>, four of the five
+ most frequent letters, on the right hand side.
+ </p>
+ </div></div>
+ </div>
+
+ <figure>
+ <div class="lbox">
+ <img src="ar-idlebi-heat.svg">
+ {{ fingerhandstats(layoutstats['ar-idlebi']) }}
+ </div>
+ </figure>
+
<div id="ar-malas" class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
</div>
@@ -405,9 +500,10 @@
<div class="lbox">
<h3><a href="#ar-malas">Malas et al</a></h3>
<p>
- The work by Malas et al. (2008),
- <a href="https://pdfs.semanticscholar.org/1bf8/74dcaa7f21c2cc3c6c5e526b61a9ee352bba.pdf">Toward Optimal Arabic Keyboard Layout Using Genetic Algorithm</a>,
- presents an alternative layout generated by a genetic algorithm.
+ About 20 years later (2008) Malas et al. presented an alternative
+ layout generated by a genetic algorithm in their article <a
+ href="https://pdfs.semanticscholar.org/1bf8/74dcaa7f21c2cc3c6c5e526b61a9ee352bba.pdf">Toward
+ Optimal Arabic Keyboard Layout Using Genetic Algorithm</a>.
<!-- -->
They used a snapshot of the Arabic Wikipedia probably from around 2008 and
optimized for typing speed only, claiming 35% faster typing compared
@@ -424,7 +520,7 @@
</div>
</div>
- <figure id="ar-malas">
+ <figure>
<div class="lbox">
<img src="ar-malas-heat.svg">
{{ fingerhandstats(layoutstats['ar-malas']) }}
diff --git a/lulua/test_util.py b/lulua/test_util.py
new file mode 100644
index 0000000..1c321d9
--- /dev/null
+++ b/lulua/test_util.py
@@ -0,0 +1,35 @@
+# Copyright (c) 2019 lulua contributors
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+import pytest
+
+from .util import displayText
+
+@pytest.mark.parametrize("s,expected", [
+ ('foobar', False),
+ # lonely shadda
+ ('\u0651', True),
+ # shadda+fatha
+ ("\u0651\u064e", True),
+ ('يّ', False),
+ ])
+def test_displayTextCombining (s, expected):
+ assert displayText (s).startswith ('\u25cc') == expected
+
diff --git a/lulua/util.py b/lulua/util.py
index ce6e887..c5634c2 100644
--- a/lulua/util.py
+++ b/lulua/util.py
@@ -73,7 +73,7 @@ def displayText (text):
control or invisible characters """
if text is None:
return text
- if len (text) == 1 and unicodedata.combining (text) != 0:
+ if all (map (lambda x: unicodedata.combining (x) != 0, text)):
# add circle if combining
return '\u25cc' + text
invMap = {