From 4679f89e8fe2541e10eb1c834eb9f56a68b0e3ee Mon Sep 17 00:00:00 2001
From: Lars-Dominik Braun
Date: Sat, 25 Apr 2020 21:07:11 +0200
Subject: ar-lulua: Optimize layer two and three
Take another stab at the symbol layers and call it v0.3.
---
lulua/data/layouts/ar-lulua.yaml | 105 +++++++++++++++++++--------------------
lulua/data/report/index.html | 8 ++-
2 files changed, 59 insertions(+), 54 deletions(-)
(limited to 'lulua')
diff --git a/lulua/data/layouts/ar-lulua.yaml b/lulua/data/layouts/ar-lulua.yaml
index 419df9a..60f0c10 100644
--- a/lulua/data/layouts/ar-lulua.yaml
+++ b/lulua/data/layouts/ar-lulua.yaml
@@ -54,42 +54,44 @@ layout:
modifier:
- []
- layer:
- #Bl2: "›" # SINGLE RIGHT-POINTING ANGLE QUOTATIONMARK
- #Bl7: '$'
- #Br4: "‹" # SINGLE LEFT-POINTING ANGLE QUOTATIONMARK
- #Br6: '%'
+ Bl4: '%'
+ Bl6: $
+ Br6: "\u2026" # HORIZONTAL ELLIPSIS
+ Br5: '@'
+ Br4: ^
- #Cl2: '+'
- #Cl3: ']'
- Cl4: '-'
- #Cl5: '*'
- #Cr6: '['
- Cr6: '!'
- #Cr4: '}'
- Cr3: "…" # HORIZONTAL ELLIPSIS
- #Cr2: '&'
+ Cl2: +
+ Cl3: ']'
+ Cl4: )
+ Cl5: '}'
+ Cr7: '{'
+ Cr6: (
+ Cr5: '['
+ Cr4: _
+ Cr3: ;
+ Cr1: '?'
- Dl1: ':'
- Dl2: '('
- Dl3: "؛" # ARABIC SEMICOLON
- Dl4: "؟" # ARABIC QUESTION MARK
- #Dl3: '"'
- #Dl5: '_'
- Dl5: "«" # LEFT-POINTING DOUBLE ANGLE QUOTATIONMARK
- Dr7: "»" # RIGHT-POINTING DOUBLE ANGLE QUOTATIONMARK
- Dr6: '.'
- Dr5: "،" # ARABIC COMMA
- Dr4: ')'
- #Dr3: '/'
- #Dr2: '@'
+ Dl1: '"'
+ Dl2: "\u061B" # ARABIC SEMICOLON
+ Dl3: ':'
+ Dl4: "\u061F" # ARABIC QUESTION MARK
+ Dl5: "\xBB"
+ Dr7: "\xAB"
+ Dr6: .
+ Dr5: "\u060C" # ARABIC COMMA
+ Dr4: '!'
+ Dr3: '|'
+ Dr2: \
- #El3: '~'
- #El4: '>'
- #El5: '='
- #El6: '{'
- #Er5: '#'
- #Er4: '<'
- #Er2: '^'
+ El2: '`'
+ El3: '~'
+ El4: '>'
+ El5: ''''
+ El6: "\u203A" # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+ Er5: "\u2039" # SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+ Er4: '='
+ Er3: <
+ Er2: '&'
modifier:
- [El_shift]
- [Er_shift]
@@ -107,27 +109,10 @@ layout:
#Br1: "\u06E2" # ARABIC SMALL HIGH MEEM ISOLATED FORM
#Cl2: "\u06D9" # ARABIC SMALL HIGH LAM ALEF
- # above ordinary ALEF
- Cl3: "\u0671" # ARABIC LETTER ALEF WASLA
- # same key as the dash
- Cl4: "\u0640" # ARABIC TATWEEL
#Cr4: "\u06DD" # ARABIC END OF AYAH
#Cr2: "\u06DA" # ARABIC SMALL HIGH JEEM
#Cr1: "\u06E5" # ARABIC SMALL WAW
- # above damma, fatha and kasra
- Cr6: "\u064C" # ARABIC DAMMATAN
- Cr5: "\u064B" # ARABIC FATHATAN
- Cr4: "\u064D" # ARABIC KASRATAN
-
- Dl3: "\u0652" # ARABIC SUKUN
- Dl4: "\u0651" # ARABIC SHADDA
- Dl5: "\u0670" # ARABIC LETTER SUPERSCRIPT ALEF
- Dr7: "\u0653" # ARABIC MADDAH ABOVE
- Dr6: "\u064F" # ARABIC DAMMA
- Dr5: "\u064E" # ARABIC FATHA
- Dr4: "\u0650" # ARABIC KASRA
-
#Dl1: "\u06DC" # ARABIC SMALL HIGH SEEN
#Dr3: "\u06D7" # ARABIC SMALL HIGH LIGATURE QAF WITH LAM WITH ALEF MAKSURA
@@ -138,6 +123,20 @@ layout:
#Er5: "\u06ED" # ARABIC SMALL LOW MEEM
#Er4: "\u06E9" # ARABIC PLACE OF SAJDAH
#Er1: "\u06D6" # ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA
+
+ Cl3: "\u0670" # ARABIC LETTER SUPERSCRIPT ALEF
+ Cl4: "\u0671" # ARABIC LETTER ALEF WASLA
+ Cr6: "\u0640" # ARABIC TATWEEL
+ Cr5: "\u064C" # ARABIC DAMMATAN
+
+ Dl2: "\u064D" # ARABIC KASRATAN
+ Dl3: "\u0651" # ARABIC SHADDA
+ Dl4: "\u0650" # ARABIC KASRA
+ Dl5: "\u0653" # ARABIC MADDAH ABOVE
+ Dr7: "\u064B" # ARABIC FATHATAN
+ Dr6: "\u064F" # ARABIC DAMMA
+ Dr5: "\u064E" # ARABIC FATHA
+ Dr4: "\u0652" # ARABIC SUKUN
modifier:
- [Dl_caps]
- [Dr1]
@@ -151,7 +150,7 @@ layout:
Dl2: "٦"
Dl3: "٥"
Dl4: "٤"
- Dl5: "\u2212"
+ Dl5: "\u2212" # MINUS SIGN
El2: "٬" # ARABIC THOUSANDS SEPARATOR
El3: "٫" # ARABIC DECIMAL SEPARATOR
El4: "٩"
@@ -165,5 +164,5 @@ layout:
- [Fr_altgr]
- [El1]
name: ar-lulua
-version: 0.2
-date: 2019-10-06
+version: 0.3
+date: 2020-04-25
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html
index 96725b7..0e4c779 100644
--- a/lulua/data/report/index.html
+++ b/lulua/data/report/index.html
@@ -230,7 +230,13 @@
From several runs with 100.000 iterations each the layout which had
good scores and looked reasonable to the human eye was picked.
- Optimal arrengement of layers two and up are still under investigation.
+ Afterwards the second layer was optimized using the same process, but
+ only using data from the Hindawi corpus, because it is the only one
+ with at least some fully diacriticised texts.
+
+ Finally the different brackets were arranged by hand and the remaining
+ symbols algorithmically distributed on the third layer using the raw
+ Wikitext from the Arabic Wikipedia dataset.
--
cgit v1.2.3