diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2020-03-28 11:44:58 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2020-03-28 11:44:58 +0100 |
commit | 306fa70fb3ae1ee4c1b35c22b6c222982479bd6f (patch) | |
tree | 515f5175d2c3ff84e2a3126963675bdf8fe65b41 /lulua | |
parent | d2126a193cf9240f633d26240eba703bc11df018 (diff) | |
download | lulua-306fa70fb3ae1ee4c1b35c22b6c222982479bd6f.tar.gz lulua-306fa70fb3ae1ee4c1b35c22b6c222982479bd6f.tar.bz2 lulua-306fa70fb3ae1ee4c1b35c22b6c222982479bd6f.zip |
Add analysis for ar-idlebi and ar-alramly
Diffstat (limited to 'lulua')
-rw-r--r-- | lulua/data/layouts/ar-idlebi.yaml | 123 | ||||
-rw-r--r-- | lulua/data/report/index.html | 104 | ||||
-rw-r--r-- | lulua/test_util.py | 35 | ||||
-rw-r--r-- | lulua/util.py | 2 |
4 files changed, 259 insertions, 5 deletions
diff --git a/lulua/data/layouts/ar-idlebi.yaml b/lulua/data/layouts/ar-idlebi.yaml new file mode 100644 index 0000000..52cdc80 --- /dev/null +++ b/lulua/data/layouts/ar-idlebi.yaml @@ -0,0 +1,123 @@ +# Figure 3 of the article Design of Arabic Keyboard Layout Based on Statistical +# Properties of Arabic Characters by Idlebi et al. +name: ar-idlebi +layout: +- layer: + Bl1: "@" + Bl2: "١" + Bl3: "٢" + Bl4: "٣" + Bl5: "٤" + Bl6: "٥" + Bl7: "٦" + Br6: "٧" + Br5: "٨" + Br4: "٩" + Br3: "٠" + Br2: "_" # XXX: not sure + #Br1: "=" + + Cl_tab: "\t" + Cl1: "\u0651\u064e" # shadda+fatha + Cl2: "\u0650" # kasra + Cl3: "\u064e" # fatha + Cl4: "\u064f" # damma + Cl5: "ط" + Cr7: "ص" + Cr6: "ف" + Cr5: "ا" + Cr4: "ي" + Cr3: "ش" + Cr2: "س" + Cr1: ":" + + CD_ret: "\n" + + Dl1: "ن" + Dl2: "ت" + Dl3: "ع" + Dl4: "م" + Dl5: "ب" + Dr7: "ا\u0655" # hamza below + Dr6: "ل" + Dr5: "ه" + Dr4: "ر" + Dr3: "و" + Dr2: "ى" + Dr1: "." + + El1: "\u0652" # sukun + El2: "ق" + El3: "ا\u0654" # hamza above + El4: "ج" + El5: "ح" + El6: "خ" + Er5: "ذ" + Er4: "ك" + Er3: "د" + Er2: "ة" + Er1: "غ" + #Er0: "،" + + Fl_space: " " + Fr_space: " " + modifier: + - [] +- layer: + Bl1: "^" + Bl2: "!" + Bl3: "\"" + Bl4: "#" + #Bl5: "" # unknown symbol + Bl6: "٪" + Bl7: "&" + Br6: "'" + Br5: "(" + Br4: ")" + #Br3: "" # unknown symbol + Br2: "=" + #Br1: "" + + Cl1: "ز" + Cl2: "\u0651\u064d" # shadda+kasratan + Cl3: "\u064b" # fathatan + Cl4: "\u064c" # dammatan + Cl5: "ظ" + Cr7: "ض" + #Cr6: "" + Cr5: "\u0651\u0650" # shadda+kasra + #Cr4: "" + Cr3: "[" + Cr2: "]" + Cr1: "<" + + CD_ret: "\n" + + Dl1: "ء" + Dl2: "ث" + Dl3: "\u0651\u064c" # shadda+dammatan + #Dl4: "" + #Dl5: "" + Dr7: "\u0648\u0654" # composed: ؤ + Dr6: "{" + Dr5: "}" + Dr4: "\\" + Dr3: "/" + Dr2: "\u064d" # kasratan + Dr1: ">" + + El1: "\u0651\u064f" # shadda+damma + #El2: "" + El3: "\u064a\u0654" # composed: ئ + #El4: "" + #El5: "" + #El6: "" + #Er5: "" + Er4: "؛" + Er3: "ا\u0653" # composed: آ + Er2: "|" + Er1: "*" + #Er0: "؟" + modifier: + - [El_shift] + - [Er_shift] diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html index d240ba7..db80c58 100644 --- a/lulua/data/report/index.html +++ b/lulua/data/report/index.html @@ -398,6 +398,101 @@ </div> </div> + <div id="ar-alramly" class="pure-g flexreverse"> + <div class="pure-u-1 pure-u-md-1-2"> + </div> + <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="lbox"> + <h3><a href="#ar-alramly">Al-Ramly et al</a></h3> + <p> + One of the eariest accounts can be found in the article <a + href="https://doi.org/10.1016/S1474-6670(17)64475-6">Statistical + Distribution of Arabic Letters Aids to the Design of a New + Keyboard</a> by Al-Ramly et al published in 1980. + <!-- --> + The Arabic half of the proposed bi-lingual layout seems to be + hand-optimized based on several metrics including character + frequencies – without mentioning a source for them though – visual + similarity (“letter groups”) and their position on previous + layouts. + <!-- --> + It tries to balance load between hands, assign more work to index + and middle fingers and place common letters in the home row. + <!-- --> + However the asymmetry given in the article, 0.032, cannot be + reproduced here. + <!-- --> + For the most part the layout lacks combining and pre-combined + characters, a task that is left to “machine intelligence” making it + hard to use nowadays. + </p> + <p> + While the layout distributes load between fingers quite well it + favors the left hand by assigning <bdo dir="ltr" lang="ar">ا</bdo> + and <bdo dir="ltr" lang="ar">ل</bdo> to it. + <!-- --> + The decision to place <bdo dir="ltr" lang="ar">ث</bdo> in a very + prominent spot seems weird, given it only accounts for 0.5% of all + symbols, even in their own analysis. + </p> + </div></div> + </div> + + <figure> + <div class="lbox"> + <img src="ar-alramly-heat.svg"> + {{ fingerhandstats(layoutstats['ar-alramly']) }} + </div> + </figure> + + <div id="ar-idlebi" class="pure-g flexreverse"> + <div class="pure-u-1 pure-u-md-1-2"> + </div> + <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="lbox"> + <h3><a href="#ar-idlebi">Idlebi et al</a></h3> + <p> + Another article from the early days of computers published in 1990 + is <a + href="https://archive.org/details/computers_and_the_arabic_language_1990/page/n112/mode/2up">Design + of Arabic Keyboard Layout Based on Statistical Properties of Arabic + Characters</a> by Idlebi et al. + <!-- --> + They present two examples of programmatically optimized layouts and + account for character and bigram frequencies based on a corpus of + 100.000 characters, finger movement time of unknown origin and finger + load. + </p> + <p> + Unfortunately the results use 12 keys per row and are not suitable + for use with current European keyboards, which usually feature only 11 keys + in the bottom row. + <!-- --> + Thus the layout displayed below lacks the Arabic question mark and + comma in the bottom right. + <!-- --> + Probably due to their unusual assumption that middle- and + ring-finger rest in the top row their results are suboptimal, + placing both <bdo dir="ltr" lang="ar">ا</bdo> and <bdo dir="ltr" + lang="ar">ي</bdo> in the top row. + <!-- --> + Their analysis notices this and suggests improved positions for + both characters, but these are not actually implemented. + <!-- --> + The big asymmetry is caused by placing <bdo dir="ltr" lang="ar">ا + ل ي</bdo> and <bdo dir="ltr" lang="ar">و</bdo>, four of the five + most frequent letters, on the right hand side. + </p> + </div></div> + </div> + + <figure> + <div class="lbox"> + <img src="ar-idlebi-heat.svg"> + {{ fingerhandstats(layoutstats['ar-idlebi']) }} + </div> + </figure> + <div id="ar-malas" class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> </div> @@ -405,9 +500,10 @@ <div class="lbox"> <h3><a href="#ar-malas">Malas et al</a></h3> <p> - The work by Malas et al. (2008), - <a href="https://pdfs.semanticscholar.org/1bf8/74dcaa7f21c2cc3c6c5e526b61a9ee352bba.pdf">Toward Optimal Arabic Keyboard Layout Using Genetic Algorithm</a>, - presents an alternative layout generated by a genetic algorithm. + About 20 years later (2008) Malas et al. presented an alternative + layout generated by a genetic algorithm in their article <a + href="https://pdfs.semanticscholar.org/1bf8/74dcaa7f21c2cc3c6c5e526b61a9ee352bba.pdf">Toward + Optimal Arabic Keyboard Layout Using Genetic Algorithm</a>. <!-- --> They used a snapshot of the Arabic Wikipedia probably from around 2008 and optimized for typing speed only, claiming 35% faster typing compared @@ -424,7 +520,7 @@ </div> </div> - <figure id="ar-malas"> + <figure> <div class="lbox"> <img src="ar-malas-heat.svg"> {{ fingerhandstats(layoutstats['ar-malas']) }} diff --git a/lulua/test_util.py b/lulua/test_util.py new file mode 100644 index 0000000..1c321d9 --- /dev/null +++ b/lulua/test_util.py @@ -0,0 +1,35 @@ +# Copyright (c) 2019 lulua contributors +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import pytest + +from .util import displayText + +@pytest.mark.parametrize("s,expected", [ + ('foobar', False), + # lonely shadda + ('\u0651', True), + # shadda+fatha + ("\u0651\u064e", True), + ('يّ', False), + ]) +def test_displayTextCombining (s, expected): + assert displayText (s).startswith ('\u25cc') == expected + diff --git a/lulua/util.py b/lulua/util.py index ce6e887..c5634c2 100644 --- a/lulua/util.py +++ b/lulua/util.py @@ -73,7 +73,7 @@ def displayText (text): control or invisible characters """ if text is None: return text - if len (text) == 1 and unicodedata.combining (text) != 0: + if all (map (lambda x: unicodedata.combining (x) != 0, text)): # add circle if combining return '\u25cc' + text invMap = { |