summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore5
-rw-r--r--README.rst17
-rw-r--r--doc/Makefile61
-rw-r--r--doc/index.html41
-rwxr-xr-xgen.sh109
5 files changed, 165 insertions, 68 deletions
diff --git a/.gitignore b/.gitignore
index 053db80..62a43bc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,5 +4,10 @@ __pycache__
.coverage
.mypy_cache/
doc/*.svg
+doc/*.yaml
doc/*.xmodmap
doc/letterfreq.json
+.ninja_*
+build.ninja
+stats/
+corpus/
diff --git a/README.rst b/README.rst
index cb333e3..e26a49b 100644
--- a/README.rst
+++ b/README.rst
@@ -17,7 +17,7 @@ choosing:
.. code:: bash
- ls corpus/*.txt.lz | lulua-write text my-layout.yaml | lulua-stats combine > stats.pickle
+ find corpus/*.txt.lz | lulua-write text my-layout.yaml | lulua-analyze combine > stats.pickle
Now you can optimize your layout using:
@@ -35,6 +35,21 @@ It is highly recommended to use pypy3_ instead of CPython.
.. _pypy3: http://pypy.org/
+Building documentation
+----------------------
+
+This essentially means building the website_ and reproducing my results. You’ll
+need to obtain the corpora from me_, which are not public due to copyright
+issues. Then simply run
+
+.. code:: bash
+
+ ./gen.sh > build.ninja && ninja
+
+to analyze them and create pretty pictures as well as statistics in ``doc/``.
+
+.. _me: lars+lulua@6xq.net
+
Acknowledgements
----------------
diff --git a/doc/Makefile b/doc/Makefile
deleted file mode 100644
index e280a53..0000000
--- a/doc/Makefile
+++ /dev/null
@@ -1,61 +0,0 @@
-### settings ###
-CORPUSDIR:=../corpus
-STATSDIR:=..
-WIKIEXTRACTOR:=../3rdparty/wikiextractor/WikiExtractor.py
-OPTROUNDS=100000
-# pin layers, keep hand-optimized numbers, keep top row free
-OPTPINS='0;1;2;0,Bl1;0,Bl2;0,Bl3;0,Bl4;0,Bl5;0,Bl6;0,Bl7;0,Br6;0,Br5;0,Br4;0,Br3;0,Br2;0,Br1;3,Cl4;3,Cl3;3,Cl2;3,Cl1;3,Dl4;3,Dl3;3,Dl2;3,Dl1;3,El5;3,El4;3,El3;3,El2;3,Dl5;3,Cl5;3,El6'
-OPTMODEL=mod01
-
-all: ar-lulua.xmodmap ar-lulua.svg ar-asmo663.svg ar-linux.svg ar-malas.svg ar-phonetic.svg ar-osman.svg letterfreq.json ar-khorshid.svg
-
-letterfreq.json: ../stats.pickle
- lulua-analyze -l ar-lulua letterfreq < $< > $@
-
-ar-lulua.xmodmap:
- lulua-render xmodmap -l ar-lulua $@
-
-ar-lulua.svg:
- lulua-render svg -l ar-lulua $@
-
-ar-asmo663.svg:
- lulua-render svg -l ar-asmo663 $@
-
-ar-linux.svg:
- lulua-render svg -l ar-linux $@
-
-ar-malas.svg:
- lulua-render svg -l ar-malas $@
-
-ar-phonetic.svg:
- lulua-render svg -l ar-phonetic $@
-
-ar-osman.svg:
- lulua-render svg -l ar-osman $@
-
-ar-khorshid.svg:
- lulua-render svg -l ar-khorshid $@
-
-### corpora to stats ###
-# Sorry, but I can’t provide corpus data due to copyright issues
-$(STATSDIR)/stats-bbcarabic.pickle: $(CORPUSDIR)/bbcarabic/raw
- time find $< -type f | lulua-write bbcarabic ar-lulua > $@
-
-$(STATSDIR)/stats-aljazeera.pickle: $(CORPUSDIR)/aljazeera/raw
- time find $< -type f | lulua-write aljazeera ar-lulua > $@
-
-$(STATSDIR)/stats-tanzil.pickle: $(CORPUSDIR)/tanzil-quaran/plain.txt.lz
- echo $< | lulua-write text ar-lulua | lulua-combine-stats > $@
-
-$(STATSDIR)/stats-arwiki.pickle: $(CORPUSDIR)/arwiki/arwiki-20190701-pages-articles.xml.bz2
- $(WIKIEXTRACTOR) -ns 0 --json -o - $< 2>/dev/null | jq .text | lulua-write json ar-lulua | lulua-combine-stats > $@
-
-$(STATSDIR)/stats.pickle: $(STATSDIR)/stats-arwiki.pickle \
- $(STATSDIR)/stats-tanzil.pickle $(STATSDIR)/stats-aljazeera.pickle \
- $(STATSDIR)/stats-bbcarabic.pickle
- cat $^ | lulua-combine-stats > $@
-
-### optimization ###
-optimized.yaml: $(STATSDIR)/stats.pickle
- lulua-optimize -n $(OPTROUNDS) -r -p $(OPTPINS) -l ar-lulua -m $(OPTMODEL) < $< > $@
-
diff --git a/doc/index.html b/doc/index.html
index d4610af..e5f7973 100644
--- a/doc/index.html
+++ b/doc/index.html
@@ -162,6 +162,35 @@
</figure>
</section>
+<section>
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <h2>Layout properties</h2>
+
+ <p>Below are statistics for the proposed layout.</p>
+ </div>
+ </div>
+</div>
+
+<figure id="ar-lulua-heat">
+<div class="lbox" lang="en">
+ <img src="ar-lulua-heat.svg">
+</div>
+<figcaption class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-md-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="lbox">
+ <p>Button heatmap</p>
+ </div>
+ </div>
+</figcaption>
+</figure>
+</section>
+
<section class="layoutgallery">
<div class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -175,7 +204,7 @@
<figure id="ar-asmo663">
<div class="lbox">
- <img src="ar-asmo663.svg">
+ <img src="ar-asmo663-heat.svg">
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -199,7 +228,7 @@
<figure id="ar-linux">
<div class="lbox">
- <img src="ar-linux.svg">
+ <img src="ar-linux-heat.svg">
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -223,7 +252,7 @@
<figure id="ar-malas">
<div class="lbox">
- <img src="ar-malas.svg">
+ <img src="ar-malas-heat.svg">
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -251,7 +280,7 @@
<figure id="ar-osman">
<div class="lbox">
- <img src="ar-osman.svg">
+ <img src="ar-osman-heat.svg">
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -283,7 +312,7 @@
<figure>
<div class="lbox">
- <img src="ar-khorshid.svg">
+ <img src="ar-khorshid-heat.svg">
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
@@ -309,7 +338,7 @@
<figure>
<div class="lbox">
- <img src="ar-phonetic.svg">
+ <img src="ar-phonetic-heat.svg">
</div>
<figcaption class="pure-g flexreverse">
<div class="pure-u-1 pure-u-md-1-2">
diff --git a/gen.sh b/gen.sh
new file mode 100755
index 0000000..b9b9028
--- /dev/null
+++ b/gen.sh
@@ -0,0 +1,109 @@
+#!/bin/sh
+# Generate build.ninja that builds the docs/stats/…
+
+layouts="ar-lulua ar-asmo663 ar-linux ar-malas ar-phonetic ar-osman ar-khorshid"
+layoutsXmodmap="ar-lulua"
+
+cat <<EOF
+### auto-generated by gen.sh. Do not edit. ###
+
+### settings ###
+corpusdir=corpus
+statsdir=stats
+docdir=doc
+wikiextractor=3rdparty/wikiextractor/WikiExtractor.py
+optrounds=100000
+# pin layers, keep hand-optimized numbers, keep top row free
+optpins=0;1;2;0,Bl1;0,Bl2;0,Bl3;0,Bl4;0,Bl5;0,Bl6;0,Bl7;0,Br6;0,Br5;0,Br4;0,Br3;0,Br2;0,Br1;3,Cl4;3,Cl3;3,Cl2;3,Cl1;3,Dl4;3,Dl3;3,Dl2;3,Dl1;3,El5;3,El4;3,El3;3,El2;3,Dl5;3,Cl5;3,El6
+optmodel=mod01
+
+### pools ###
+# lulua-write uses internal parallelization and should not be run more than
+# once concurrently. It also uses alot of memory, so…
+pool write
+ depth = 1
+
+### rules ###
+rule opt
+ command = lulua-optimize -n \$optrounds -r -p \$optpins -l ar-lulua -m \$optmodel < \$in > \$out
+
+rule render-svg
+ command = lulua-render -l \$layout svg \$out
+
+rule render-svg-heat
+ command = lulua-render -l \$layout svg --heatmap=\$in \$out
+
+rule render-xmodmap
+ command = lulua-render -l \$layout xmodmap \$out
+
+rule analyze-heat
+ command = lulua-analyze -l \$layout keyheatmap < \$in > \$out
+
+rule write-bbcarabic
+ command = find \$in -type f | lulua-write bbcarabic \$layout > \$out
+ pool = write
+
+rule write-aljazeera
+ command = find \$in -type f | lulua-write aljazeera \$layout > \$out
+ pool = write
+
+rule write-tanzil
+ command = echo \$in | lulua-write text \$layout | lulua-analyze combine > \$out
+ pool = write
+
+rule write-arwiki
+ command = \$wikiextractor -ns 0 --json -o - \$in 2>/dev/null | jq .text | lulua-write json \$layout | lulua-analyze combine > \$out
+ pool = write
+
+rule combine
+ command = cat \$in | lulua-analyze combine > \$out
+
+rule mkdir
+ command = mkdir -p \$out
+
+rule letterfreq
+ command = lulua-analyze -l ar-lulua letterfreq < \$in > \$out
+
+### build targets ###
+build \$docdir/letterfreq.json: letterfreq \$statsdir/ar-lulua/all.pickle
+
+EOF
+
+for l in $layouts; do
+cat <<EOF
+build \$statsdir/${l}: mkdir
+
+build \$statsdir/${l}/bbcarabic.pickle: write-bbcarabic \$corpusdir/bbcarabic/raw || \$statsdir/${l}
+ layout = ${l}
+
+build \$statsdir/${l}/aljazeera.pickle: write-aljazeera \$corpusdir/aljazeera/raw || \$statsdir/${l}
+ layout = ${l}
+
+build \$statsdir/${l}/tanzil.pickle: write-tanzil \$corpusdir/tanzil-quaran/plain.txt.lz || \$statsdir/${l}
+ layout = ${l}
+
+build \$statsdir/${l}/arwiki.pickle: write-arwiki \$corpusdir/arwiki/arwiki-20190701-pages-articles.xml.bz2 || \$statsdir/${l}
+ layout = ${l}
+
+build \$statsdir/${l}/all.pickle: combine \$statsdir/${l}/bbcarabic.pickle \$statsdir/${l}/aljazeera.pickle \$statsdir/${l}/tanzil.pickle \$statsdir/${l}/arwiki.pickle || \$statsdir/${l}
+
+build \$docdir/${l}.svg: render-svg
+ layout = ${l}
+
+build \$docdir/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle
+ layout = ${l}
+
+build \$docdir/${l}-heat.svg: render-svg-heat \$docdir/${l}-heat.yaml
+ layout = ${l}
+
+EOF
+done
+
+for l in $layoutsXmodmap; do
+cat <<EOF
+build \$docdir/${l}.xmodmap: render-xmodmap
+ layout = ${l}
+
+EOF
+done
+