From 306fa70fb3ae1ee4c1b35c22b6c222982479bd6f Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 28 Mar 2020 11:44:58 +0100 Subject: Add analysis for ar-idlebi and ar-alramly --- gen.sh | 2 +- lulua/data/layouts/ar-idlebi.yaml | 123 ++++++++++++++++++++++++++++++++++++++ lulua/data/report/index.html | 104 ++++++++++++++++++++++++++++++-- lulua/test_util.py | 35 +++++++++++ lulua/util.py | 2 +- 5 files changed, 260 insertions(+), 6 deletions(-) create mode 100644 lulua/data/layouts/ar-idlebi.yaml create mode 100644 lulua/test_util.py diff --git a/gen.sh b/gen.sh index 9ea7493..966afe9 100755 --- a/gen.sh +++ b/gen.sh @@ -2,7 +2,7 @@ # Generate build.ninja that builds the docs/stats/… corpusdir=corpus -layouts="ar-lulua ar-asmo663 ar-linux ar-malas ar-phonetic ar-osman ar-khorshid ar-osx" +layouts="ar-lulua ar-asmo663 ar-linux ar-malas ar-phonetic ar-osman ar-khorshid ar-osx ar-idlebi ar-alramly" layoutsXmodmap="ar-lulua" corpora="`ls ${corpusdir}`" diff --git a/lulua/data/layouts/ar-idlebi.yaml b/lulua/data/layouts/ar-idlebi.yaml new file mode 100644 index 0000000..52cdc80 --- /dev/null +++ b/lulua/data/layouts/ar-idlebi.yaml @@ -0,0 +1,123 @@ +# Figure 3 of the article Design of Arabic Keyboard Layout Based on Statistical +# Properties of Arabic Characters by Idlebi et al. +name: ar-idlebi +layout: +- layer: + Bl1: "@" + Bl2: "١" + Bl3: "٢" + Bl4: "٣" + Bl5: "٤" + Bl6: "٥" + Bl7: "٦" + Br6: "٧" + Br5: "٨" + Br4: "٩" + Br3: "٠" + Br2: "_" # XXX: not sure + #Br1: "=" + + Cl_tab: "\t" + Cl1: "\u0651\u064e" # shadda+fatha + Cl2: "\u0650" # kasra + Cl3: "\u064e" # fatha + Cl4: "\u064f" # damma + Cl5: "ط" + Cr7: "ص" + Cr6: "ف" + Cr5: "ا" + Cr4: "ي" + Cr3: "ش" + Cr2: "س" + Cr1: ":" + + CD_ret: "\n" + + Dl1: "ن" + Dl2: "ت" + Dl3: "ع" + Dl4: "م" + Dl5: "ب" + Dr7: "ا\u0655" # hamza below + Dr6: "ل" + Dr5: "ه" + Dr4: "ر" + Dr3: "و" + Dr2: "ى" + Dr1: "." + + El1: "\u0652" # sukun + El2: "ق" + El3: "ا\u0654" # hamza above + El4: "ج" + El5: "ح" + El6: "خ" + Er5: "ذ" + Er4: "ك" + Er3: "د" + Er2: "ة" + Er1: "غ" + #Er0: "،" + + Fl_space: " " + Fr_space: " " + modifier: + - [] +- layer: + Bl1: "^" + Bl2: "!" + Bl3: "\"" + Bl4: "#" + #Bl5: "" # unknown symbol + Bl6: "٪" + Bl7: "&" + Br6: "'" + Br5: "(" + Br4: ")" + #Br3: "" # unknown symbol + Br2: "=" + #Br1: "" + + Cl1: "ز" + Cl2: "\u0651\u064d" # shadda+kasratan + Cl3: "\u064b" # fathatan + Cl4: "\u064c" # dammatan + Cl5: "ظ" + Cr7: "ض" + #Cr6: "" + Cr5: "\u0651\u0650" # shadda+kasra + #Cr4: "" + Cr3: "[" + Cr2: "]" + Cr1: "<" + + CD_ret: "\n" + + Dl1: "ء" + Dl2: "ث" + Dl3: "\u0651\u064c" # shadda+dammatan + #Dl4: "" + #Dl5: "" + Dr7: "\u0648\u0654" # composed: ؤ + Dr6: "{" + Dr5: "}" + Dr4: "\\" + Dr3: "/" + Dr2: "\u064d" # kasratan + Dr1: ">" + + El1: "\u0651\u064f" # shadda+damma + #El2: "" + El3: "\u064a\u0654" # composed: ئ + #El4: "" + #El5: "" + #El6: "" + #Er5: "" + Er4: "؛" + Er3: "ا\u0653" # composed: آ + Er2: "|" + Er1: "*" + #Er0: "؟" + modifier: + - [El_shift] + - [Er_shift] diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html index d240ba7..db80c58 100644 --- a/lulua/data/report/index.html +++ b/lulua/data/report/index.html @@ -398,6 +398,101 @@ +
+
+
+
+
+

Al-Ramly et al

+

+ One of the eariest accounts can be found in the article Statistical + Distribution of Arabic Letters Aids to the Design of a New + Keyboard by Al-Ramly et al published in 1980. + + The Arabic half of the proposed bi-lingual layout seems to be + hand-optimized based on several metrics including character + frequencies – without mentioning a source for them though – visual + similarity (“letter groups”) and their position on previous + layouts. + + It tries to balance load between hands, assign more work to index + and middle fingers and place common letters in the home row. + + However the asymmetry given in the article, 0.032, cannot be + reproduced here. + + For the most part the layout lacks combining and pre-combined + characters, a task that is left to “machine intelligence” making it + hard to use nowadays. +

+

+ While the layout distributes load between fingers quite well it + favors the left hand by assigning ا + and ل to it. + + The decision to place ث in a very + prominent spot seems weird, given it only accounts for 0.5% of all + symbols, even in their own analysis. +

+
+
+ +
+
+ + {{ fingerhandstats(layoutstats['ar-alramly']) }} +
+
+ +
+
+
+
+
+

Idlebi et al

+

+ Another article from the early days of computers published in 1990 + is Design + of Arabic Keyboard Layout Based on Statistical Properties of Arabic + Characters by Idlebi et al. + + They present two examples of programmatically optimized layouts and + account for character and bigram frequencies based on a corpus of + 100.000 characters, finger movement time of unknown origin and finger + load. +

+

+ Unfortunately the results use 12 keys per row and are not suitable + for use with current European keyboards, which usually feature only 11 keys + in the bottom row. + + Thus the layout displayed below lacks the Arabic question mark and + comma in the bottom right. + + Probably due to their unusual assumption that middle- and + ring-finger rest in the top row their results are suboptimal, + placing both ا and ي in the top row. + + Their analysis notices this and suggests improved positions for + both characters, but these are not actually implemented. + + The big asymmetry is caused by placing ا + ل ي and و, four of the five + most frequent letters, on the right hand side. +

+
+
+ +
+
+ + {{ fingerhandstats(layoutstats['ar-idlebi']) }} +
+
+
@@ -405,9 +500,10 @@

Malas et al

- The work by Malas et al. (2008), - Toward Optimal Arabic Keyboard Layout Using Genetic Algorithm, - presents an alternative layout generated by a genetic algorithm. + About 20 years later (2008) Malas et al. presented an alternative + layout generated by a genetic algorithm in their article Toward + Optimal Arabic Keyboard Layout Using Genetic Algorithm. They used a snapshot of the Arabic Wikipedia probably from around 2008 and optimized for typing speed only, claiming 35% faster typing compared @@ -424,7 +520,7 @@

-
+
{{ fingerhandstats(layoutstats['ar-malas']) }} diff --git a/lulua/test_util.py b/lulua/test_util.py new file mode 100644 index 0000000..1c321d9 --- /dev/null +++ b/lulua/test_util.py @@ -0,0 +1,35 @@ +# Copyright (c) 2019 lulua contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import pytest + +from .util import displayText + +@pytest.mark.parametrize("s,expected", [ + ('foobar', False), + # lonely shadda + ('\u0651', True), + # shadda+fatha + ("\u0651\u064e", True), + ('يّ', False), + ]) +def test_displayTextCombining (s, expected): + assert displayText (s).startswith ('\u25cc') == expected + diff --git a/lulua/util.py b/lulua/util.py index ce6e887..c5634c2 100644 --- a/lulua/util.py +++ b/lulua/util.py @@ -73,7 +73,7 @@ def displayText (text): control or invisible characters """ if text is None: return text - if len (text) == 1 and unicodedata.combining (text) != 0: + if all (map (lambda x: unicodedata.combining (x) != 0, text)): # add circle if combining return '\u25cc' + text invMap = { -- cgit v1.2.3