summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-09-27 09:12:57 +0200
committerLars-Dominik Braun <lars@6xq.net>2019-09-27 09:12:57 +0200
commite6b0d57967a05fb8073cc7f071c66aa5bc4a5713 (patch)
treeebd7d484f8528a645c627c0cca9835431a6b026c
parentcabbb5b4e8790fe55e10949bdebce08d0a35127c (diff)
downloadlulua-e6b0d57967a05fb8073cc7f071c66aa5bc4a5713.tar.gz
lulua-e6b0d57967a05fb8073cc7f071c66aa5bc4a5713.tar.bz2
lulua-e6b0d57967a05fb8073cc7f071c66aa5bc4a5713.zip
Improve docs
Document how stats were generated in doc/Makefile
-rw-r--r--README.rst10
-rw-r--r--doc/Makefile31
2 files changed, 39 insertions, 2 deletions
diff --git a/README.rst b/README.rst
index a930e22..cb333e3 100644
--- a/README.rst
+++ b/README.rst
@@ -1,13 +1,15 @@
لؤلؤة
=====
-Ergonomic Arabic Keyboard layout. See https://6xq.net/لؤلؤة/ for details.
+Ergonomic Arabic Keyboard layout. See website_ for details.
+
+.. _website: https://6xq.net/لؤلؤة/
Creating layouts
----------------
Although optimized for the Arabic language it should be possible to create
-layouts for other (non-RTL) languages as well. Here’s how to proceed. First,
+layouts for other (non-RTL) languages as well. Here’s how to proceed: First,
create a data file ``my-layout.yaml`` that contains all key to character
mappings the new layout should have. Look at ``lulua/data/layouts`` for
examples. Then create statistics for a lzip-compressed corpus of your
@@ -29,6 +31,10 @@ To get a pretty picture (SVG) of your layout render it:
lulua-render -l evolved.yaml svg evolved.svg
+It is highly recommended to use pypy3_ instead of CPython.
+
+.. _pypy3: http://pypy.org/
+
Acknowledgements
----------------
diff --git a/doc/Makefile b/doc/Makefile
index 8ecf093..0cd56a4 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -1,3 +1,11 @@
+### settings ###
+CORPUSDIR:=../corpus
+STATSDIR:=..
+WIKIEXTRACTOR:=../3rdparty/wikiextractor/WikiExtractor.py
+OPTROUNDS=100000
+# pin layers, keep hand-optimized numbers, keep top row free
+OPTPINS='0;1;2;0,Bl1;0,Bl2;0,Bl3;0,Bl4;0,Bl5;0,Bl6;0,Bl7;0,Br6;0,Br5;0,Br4;0,Br3;0,Br2;0,Br1;3,Cl4;3,Cl3;3,Cl2;3,Cl1;3,Dl4;3,Dl3;3,Dl2;3,Dl1;3,El5;3,El4;3,El3;3,El2;3,Dl5;3,Cl5;3,El6'
+
all: ar-lulua.xmodmap ar-lulua.svg ar-asmo663.svg ar-linux.svg ar-malas.svg ar-phonetic.svg ar-osman.svg letterfreq.json ar-khorshid.svg
letterfreq.json: ../stats.pickle
@@ -27,3 +35,26 @@ ar-osman.svg:
ar-khorshid.svg:
lulua-render svg -l ar-khorshid $@
+### corpora to stats ###
+# Sorry, but I can’t provide corpus data due to copyright issues
+$(STATSDIR)/stats-bbcarabic.pickle: $(CORPUSDIR)/bbcarabic/raw
+ time find $< -type f | lulua-write bbcarabic ar-lulua > $@
+
+$(STATSDIR)/stats-aljazeera.pickle: $(CORPUSDIR)/aljazeera/raw
+ time find $< -type f | lulua-write aljazeera ar-lulua > $@
+
+$(STATSDIR)/stats-tanzil.pickle: $(CORPUSDIR)/tanzil-quaran/plain.txt.lz
+ echo $< | lulua-write text ar-lulua | lulua-combine-stats > $@
+
+$(STATSDIR)/stats-arwiki.pickle: $(CORPUSDIR)/arwiki/arwiki-20190701-pages-articles.xml.bz2
+ $(WIKIEXTRACTOR) -ns 0 --json -o - $< 2>/dev/null | jq .text | lulua-write json ar-lulua | lulua-combine-stats > $@
+
+$(STATSDIR)/stats.pickle: $(STATSDIR)/stats-arwiki.pickle \
+ $(STATSDIR)/stats-tanzil.pickle $(STATSDIR)/stats-aljazeera.pickle \
+ $(STATSDIR)/stats-bbcarabic.pickle
+ cat $^ | lulua-combine-stats > $@
+
+### optimization ###
+optimized.yaml: $(STATSDIR)/stats.pickle
+ lulua-optimize -n $(OPTROUNDS) -r -p $(OPTPINS) -l ar-lulua < $< > $@
+