From 2ed45cd9ff6c786a8d3415520830f52dc81b5041 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Thu, 26 Sep 2019 08:36:42 +0200 Subject: doc: Add related work i.e. Arabic keyboard layouts --- .gitignore | 3 + doc/Makefile | 24 ++- doc/index.html | 288 +++++++++++++++++++++++++++++++++++- doc/style.css | 15 +- lulua/data/layouts/ar-khorshid.yaml | 79 ++++++++++ 5 files changed, 399 insertions(+), 10 deletions(-) create mode 100644 lulua/data/layouts/ar-khorshid.yaml diff --git a/.gitignore b/.gitignore index 79d437c..053db80 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,6 @@ __pycache__ .eggs/ .coverage .mypy_cache/ +doc/*.svg +doc/*.xmodmap +doc/letterfreq.json diff --git a/doc/Makefile b/doc/Makefile index 037a78f..8ecf093 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,7 +1,7 @@ -all: ar-lulua.xmodmap ar-lulua.svg +all: ar-lulua.xmodmap ar-lulua.svg ar-asmo663.svg ar-linux.svg ar-malas.svg ar-phonetic.svg ar-osman.svg letterfreq.json ar-khorshid.svg -#letterfreq.json: ../stats.pickle -# lulua-analyze -l ar-lulua letterfreq < $< > $@ +letterfreq.json: ../stats.pickle + lulua-analyze -l ar-lulua letterfreq < $< > $@ ar-lulua.xmodmap: lulua-render xmodmap -l ar-lulua $@ @@ -9,3 +9,21 @@ ar-lulua.xmodmap: ar-lulua.svg: lulua-render svg -l ar-lulua $@ +ar-asmo663.svg: + lulua-render svg -l ar-asmo663 $@ + +ar-linux.svg: + lulua-render svg -l ar-linux $@ + +ar-malas.svg: + lulua-render svg -l ar-malas $@ + +ar-phonetic.svg: + lulua-render svg -l ar-phonetic $@ + +ar-osman.svg: + lulua-render svg -l ar-osman $@ + +ar-khorshid.svg: + lulua-render svg -l ar-khorshid $@ + diff --git a/doc/index.html b/doc/index.html index 815b2a4..4e14658 100644 --- a/doc/index.html +++ b/doc/index.html @@ -16,9 +16,9 @@
-
-

+
+

@@ -28,7 +28,9 @@
-
+
+
+
لؤلؤة
@@ -77,11 +79,289 @@

Usage

Linux
-
xmodmap ar-lulua.xmodmap
+
Run: xmodmap ar-lulua.xmodmap
+
+
+
+
+
+
+

Learn more

+
+
+
+
+ +
+
+
+
+

الأبجدية العربية

+
+
+
+
+

The Arabic Alphabet

+

+ There are 28 letters in the Arabic alphabet, plus quite a few extra + symbols required for proper text input, like the hamza in its different + shapes أ إ آ ء ئ ؤ, ta marbutah ة, alif maqsurah ى and various diacritics for vowelized texts. + + Since the usability of a keyboard layout depends on the text entered + it is necessary to study letter and letter combination frequencies first. + + The corpus used for the following analysis consists of +

+
    +
  • 547,110 articles from + aljazeera.net, an + Arabic-language news site
  • +
  • 149,901 articles from BBC + Arabic, another Arabic-language news site
  • +
  • a + dump of the Arabic + Wikipedia as of July 2019, extracted using + wikiextractor + containing 857386 articles
  • +
  • and a plain-text copy of the Quran from tanzil.net using the + options Simple Enhanced and Text (for inclusion of diacritics)
  • +
+

+ summing up to roughly 1.5 billion characters. + + The plot below shows ا ل ي و م ن can be + considered the most frequently used letters in the Arabic language. +

+
+
+
+ +
+
+
+
+
+
+
+
+
+

Arabic letter frequency distribution

+
+
+
+
+
+ +
+
+
+
+
+
+

Related work

+
+
+
+ +
+
+ +
+
+
+
+
+
+

+ Trying to unify existing layouts, the Arab Standardization and + Meterology Organization (ASMO), now part of + AIDMO, published an Arabic + keyboard layout in 1987 as + standard 663. + + This, however, turned out to be a failure, due to lack of adoption by + the typewriter industry. +

+
+
+
+
+ +
+
+ +
+
+
+
+
+
+

+ Instead we’re currently using this layout (on Linux), which is + similar, but not quite the same. + + Most notably this layout arranges letters by their visual similarity. + + Thus it allocates suboptimal or even awkward positions to frequently + used letters like ا ل and + ذ. +

+
+
+
+
+ +
+
+ +
+
+
+
+
+
+

+ The work by Malas et al. (2008), + Toward Optimal Arabic Keyboard Layout Using Genetic Algorithm, + presents an alternative layout generated by a genetic algorithm. + + They used a snapshot of the Arabic Wikipedia probably from around 2008 and + optimized for typing speed only, claiming 35% faster typing compared + to the currently used layouts. + + However the choice to put ي in the top + row seems odd and suggests the authors did not take the time to review + the layout manually, given this letter is the third most frequent one + even in their own research. +

+
+
+
+
+ +
+
+ +
+
+
+
+
+
+

+ In 2015 patent + 9,041,657 B2 + was filed in the US, presenting yet another computer-generated layout. + + Its genetic algorithm was seeded with just 54 Arabic e-books consisting + of 7 million characters in total. + + Overall it claims to be 9% faster than default layouts. + + This layout rips off most of the standard layout’s second layer, + but amusingly fails to include a question mark, while it does + provide three single-quote marks ’ and two Arabic + semicolon ؛. + + Additionally it places ي in an even + worse position than Malas’ layout. +

+
+
+
+
+ +
+
+ +
+
+
+
+
+
+

+ In the paper + A new optimal Arabic keyboard layout using genetic algorithm + Khorshid et al. present yet another + layout. + + They claim a 36% improvement over the standard keyboard based on + their criteria for ergonomic layouts. + + However in their layout from figure 8 both letters ب ر are in suboptimal positions. +

+
+
+
+
+ +
+
+ +
+
+
+
+
+
+

+ The Arabic Phonetic Keyboard + simply maps the QWERTY layout to Arabic letters, based on their sound. + Thus Q becomes ق, Y becomes ي and so on. + It claims to be optimized for writing vowelized texts, especially + Quranic Arabic, and thus includes quite a few combining characters and + special symbols. + Although it claims to make frequently used letters easily available – + based on the work of Intellaren – it makes no effort to arrange letters + according to their usage frequency. +

+
+
+
+
+ +
+
+
+
+
+

+ While technically speaking not a layout but alternative input + method, Intellark by + Intellaren is worth mentioning. + + It is based on repeatedly pressing the same button to modifiy the + current character. + + For example pressing A on the QWERTY keyboard cycles through the + alternatives ا أ إ آ and ء. + + Obviously this is slow, error-prone and violates Dvorak’s guidelines + for keyboard layout designs. +

+
+
+
+
+ + + +
+ + diff --git a/doc/style.css b/doc/style.css index 995be7a..a61170b 100644 --- a/doc/style.css +++ b/doc/style.css @@ -33,8 +33,11 @@ body { h1, h2, h3 { font-weight: 100; } +h1 { + font-size: 4em; +} h2 { - font-size: 3em; + font-size: 2.5em; } figure { max-width: 70em; @@ -61,11 +64,11 @@ div.title-card { border-top: 1em solid #888a85; } div.title-card .lbox { - margin: 1em; + margin: 2vw; } div.title-card h1 { margin: 0; - padding: 1em; + padding: 0.2em; } div.title-card img.logo { max-width: 20em; @@ -80,6 +83,12 @@ div.title-card .layout img { display: block; margin: 0 auto; } +div.indepth-card { + padding: 10vh 0; + margin: 1em 0; + background-color: #555753; + color: #eeeeec; +} .flexreverse { flex-direction: row-reverse; } diff --git a/lulua/data/layouts/ar-khorshid.yaml b/lulua/data/layouts/ar-khorshid.yaml new file mode 100644 index 0000000..04a457e --- /dev/null +++ b/lulua/data/layouts/ar-khorshid.yaml @@ -0,0 +1,79 @@ +name: ar-khorshid +layout: +- layer: + Bl1: "ء" + Bl2: "1" + Bl3: "2" + Bl4: "3" + Bl5: "4" + Bl6: "5" + Bl7: "6" + Br6: "7" + Br5: "8" + Br4: "9" + Br3: "0" + Br2: "-" + Br1: "=" + + Cl1: "ف" + Cl2: "ئ" + Cl3: "ش" + Cl4: "لا" + Cl5: "ط" + Cr7: "ؤ" + Cr6: "ث" + Cr5: "س" + Cr4: "ص" + Cr3: "ج" + Cr2: "خ" + Cr1: "غ" + #Cr0: "\\" + + CD_ret: "\n" + + Dl1: "ز" + Dl2: "ل" + Dl3: "م" + Dl4: "ن" + Dl5: "ذ" + Dr7: "ه" + Dr6: "ا" + Dr5: "و" + Dr4: "ي" + Dr3: "ع" + Dr2: "ق" + #Dr1: "" + + El1: "" + El2: "ب" + El3: "ح" + El4: "ت" + El5: "د" + El6: "ر" + Er5: "ى" + Er4: "ة" + Er3: "ك" + Er2: "ض" + Er1: "ظ" + + Fl_space: " " + Fr_space: " " + modifier: + - [] +- layer: + #Bl1: "!" + Bl2: "!" + Bl3: "@" + Bl4: "#" + Bl5: "$" + Bl6: "%" + Bl7: "^" + Br6: "&" + Br5: "*" + Br4: "(" + Br3: ")" + Br2: "_" + Br1: "+" + modifier: + - [El_shift] + - [Er_shift] -- cgit v1.2.3