diff options
-rw-r--r-- | .gitignore | 5 | ||||
-rw-r--r-- | README.rst | 17 | ||||
-rw-r--r-- | doc/Makefile | 61 | ||||
-rw-r--r-- | doc/index.html | 41 | ||||
-rwxr-xr-x | gen.sh | 109 |
5 files changed, 165 insertions, 68 deletions
@@ -4,5 +4,10 @@ __pycache__ .coverage .mypy_cache/ doc/*.svg +doc/*.yaml doc/*.xmodmap doc/letterfreq.json +.ninja_* +build.ninja +stats/ +corpus/ @@ -17,7 +17,7 @@ choosing: .. code:: bash - ls corpus/*.txt.lz | lulua-write text my-layout.yaml | lulua-stats combine > stats.pickle + find corpus/*.txt.lz | lulua-write text my-layout.yaml | lulua-analyze combine > stats.pickle Now you can optimize your layout using: @@ -35,6 +35,21 @@ It is highly recommended to use pypy3_ instead of CPython. .. _pypy3: http://pypy.org/ +Building documentation +---------------------- + +This essentially means building the website_ and reproducing my results. You’ll +need to obtain the corpora from me_, which are not public due to copyright +issues. Then simply run + +.. code:: bash + + ./gen.sh > build.ninja && ninja + +to analyze them and create pretty pictures as well as statistics in ``doc/``. + +.. _me: lars+lulua@6xq.net + Acknowledgements ---------------- diff --git a/doc/Makefile b/doc/Makefile deleted file mode 100644 index e280a53..0000000 --- a/doc/Makefile +++ /dev/null @@ -1,61 +0,0 @@ -### settings ### -CORPUSDIR:=../corpus -STATSDIR:=.. -WIKIEXTRACTOR:=../3rdparty/wikiextractor/WikiExtractor.py -OPTROUNDS=100000 -# pin layers, keep hand-optimized numbers, keep top row free -OPTPINS='0;1;2;0,Bl1;0,Bl2;0,Bl3;0,Bl4;0,Bl5;0,Bl6;0,Bl7;0,Br6;0,Br5;0,Br4;0,Br3;0,Br2;0,Br1;3,Cl4;3,Cl3;3,Cl2;3,Cl1;3,Dl4;3,Dl3;3,Dl2;3,Dl1;3,El5;3,El4;3,El3;3,El2;3,Dl5;3,Cl5;3,El6' -OPTMODEL=mod01 - -all: ar-lulua.xmodmap ar-lulua.svg ar-asmo663.svg ar-linux.svg ar-malas.svg ar-phonetic.svg ar-osman.svg letterfreq.json ar-khorshid.svg - -letterfreq.json: ../stats.pickle - lulua-analyze -l ar-lulua letterfreq < $< > $@ - -ar-lulua.xmodmap: - lulua-render xmodmap -l ar-lulua $@ - -ar-lulua.svg: - lulua-render svg -l ar-lulua $@ - -ar-asmo663.svg: - lulua-render svg -l ar-asmo663 $@ - -ar-linux.svg: - lulua-render svg -l ar-linux $@ - -ar-malas.svg: - lulua-render svg -l ar-malas $@ - -ar-phonetic.svg: - lulua-render svg -l ar-phonetic $@ - -ar-osman.svg: - lulua-render svg -l ar-osman $@ - -ar-khorshid.svg: - lulua-render svg -l ar-khorshid $@ - -### corpora to stats ### -# Sorry, but I can’t provide corpus data due to copyright issues -$(STATSDIR)/stats-bbcarabic.pickle: $(CORPUSDIR)/bbcarabic/raw - time find $< -type f | lulua-write bbcarabic ar-lulua > $@ - -$(STATSDIR)/stats-aljazeera.pickle: $(CORPUSDIR)/aljazeera/raw - time find $< -type f | lulua-write aljazeera ar-lulua > $@ - -$(STATSDIR)/stats-tanzil.pickle: $(CORPUSDIR)/tanzil-quaran/plain.txt.lz - echo $< | lulua-write text ar-lulua | lulua-combine-stats > $@ - -$(STATSDIR)/stats-arwiki.pickle: $(CORPUSDIR)/arwiki/arwiki-20190701-pages-articles.xml.bz2 - $(WIKIEXTRACTOR) -ns 0 --json -o - $< 2>/dev/null | jq .text | lulua-write json ar-lulua | lulua-combine-stats > $@ - -$(STATSDIR)/stats.pickle: $(STATSDIR)/stats-arwiki.pickle \ - $(STATSDIR)/stats-tanzil.pickle $(STATSDIR)/stats-aljazeera.pickle \ - $(STATSDIR)/stats-bbcarabic.pickle - cat $^ | lulua-combine-stats > $@ - -### optimization ### -optimized.yaml: $(STATSDIR)/stats.pickle - lulua-optimize -n $(OPTROUNDS) -r -p $(OPTPINS) -l ar-lulua -m $(OPTMODEL) < $< > $@ - diff --git a/doc/index.html b/doc/index.html index d4610af..e5f7973 100644 --- a/doc/index.html +++ b/doc/index.html @@ -162,6 +162,35 @@ </figure> </section> +<section> +<div class="pure-g flexreverse"> + <div class="pure-u-1 pure-u-md-1-2"> + </div> + <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="lbox"> + <h2>Layout properties</h2> + + <p>Below are statistics for the proposed layout.</p> + </div> + </div> +</div> + +<figure id="ar-lulua-heat"> +<div class="lbox" lang="en"> + <img src="ar-lulua-heat.svg"> +</div> +<figcaption class="pure-g flexreverse"> + <div class="pure-u-1 pure-u-md-1-2"> + </div> + <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="lbox"> + <p>Button heatmap</p> + </div> + </div> +</figcaption> +</figure> +</section> + <section class="layoutgallery"> <div class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -175,7 +204,7 @@ <figure id="ar-asmo663"> <div class="lbox"> - <img src="ar-asmo663.svg"> + <img src="ar-asmo663-heat.svg"> </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -199,7 +228,7 @@ <figure id="ar-linux"> <div class="lbox"> - <img src="ar-linux.svg"> + <img src="ar-linux-heat.svg"> </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -223,7 +252,7 @@ <figure id="ar-malas"> <div class="lbox"> - <img src="ar-malas.svg"> + <img src="ar-malas-heat.svg"> </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -251,7 +280,7 @@ <figure id="ar-osman"> <div class="lbox"> - <img src="ar-osman.svg"> + <img src="ar-osman-heat.svg"> </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -283,7 +312,7 @@ <figure> <div class="lbox"> - <img src="ar-khorshid.svg"> + <img src="ar-khorshid-heat.svg"> </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -309,7 +338,7 @@ <figure> <div class="lbox"> - <img src="ar-phonetic.svg"> + <img src="ar-phonetic-heat.svg"> </div> <figcaption class="pure-g flexreverse"> <div class="pure-u-1 pure-u-md-1-2"> @@ -0,0 +1,109 @@ +#!/bin/sh +# Generate build.ninja that builds the docs/stats/… + +layouts="ar-lulua ar-asmo663 ar-linux ar-malas ar-phonetic ar-osman ar-khorshid" +layoutsXmodmap="ar-lulua" + +cat <<EOF +### auto-generated by gen.sh. Do not edit. ### + +### settings ### +corpusdir=corpus +statsdir=stats +docdir=doc +wikiextractor=3rdparty/wikiextractor/WikiExtractor.py +optrounds=100000 +# pin layers, keep hand-optimized numbers, keep top row free +optpins=0;1;2;0,Bl1;0,Bl2;0,Bl3;0,Bl4;0,Bl5;0,Bl6;0,Bl7;0,Br6;0,Br5;0,Br4;0,Br3;0,Br2;0,Br1;3,Cl4;3,Cl3;3,Cl2;3,Cl1;3,Dl4;3,Dl3;3,Dl2;3,Dl1;3,El5;3,El4;3,El3;3,El2;3,Dl5;3,Cl5;3,El6 +optmodel=mod01 + +### pools ### +# lulua-write uses internal parallelization and should not be run more than +# once concurrently. It also uses alot of memory, so… +pool write + depth = 1 + +### rules ### +rule opt + command = lulua-optimize -n \$optrounds -r -p \$optpins -l ar-lulua -m \$optmodel < \$in > \$out + +rule render-svg + command = lulua-render -l \$layout svg \$out + +rule render-svg-heat + command = lulua-render -l \$layout svg --heatmap=\$in \$out + +rule render-xmodmap + command = lulua-render -l \$layout xmodmap \$out + +rule analyze-heat + command = lulua-analyze -l \$layout keyheatmap < \$in > \$out + +rule write-bbcarabic + command = find \$in -type f | lulua-write bbcarabic \$layout > \$out + pool = write + +rule write-aljazeera + command = find \$in -type f | lulua-write aljazeera \$layout > \$out + pool = write + +rule write-tanzil + command = echo \$in | lulua-write text \$layout | lulua-analyze combine > \$out + pool = write + +rule write-arwiki + command = \$wikiextractor -ns 0 --json -o - \$in 2>/dev/null | jq .text | lulua-write json \$layout | lulua-analyze combine > \$out + pool = write + +rule combine + command = cat \$in | lulua-analyze combine > \$out + +rule mkdir + command = mkdir -p \$out + +rule letterfreq + command = lulua-analyze -l ar-lulua letterfreq < \$in > \$out + +### build targets ### +build \$docdir/letterfreq.json: letterfreq \$statsdir/ar-lulua/all.pickle + +EOF + +for l in $layouts; do +cat <<EOF +build \$statsdir/${l}: mkdir + +build \$statsdir/${l}/bbcarabic.pickle: write-bbcarabic \$corpusdir/bbcarabic/raw || \$statsdir/${l} + layout = ${l} + +build \$statsdir/${l}/aljazeera.pickle: write-aljazeera \$corpusdir/aljazeera/raw || \$statsdir/${l} + layout = ${l} + +build \$statsdir/${l}/tanzil.pickle: write-tanzil \$corpusdir/tanzil-quaran/plain.txt.lz || \$statsdir/${l} + layout = ${l} + +build \$statsdir/${l}/arwiki.pickle: write-arwiki \$corpusdir/arwiki/arwiki-20190701-pages-articles.xml.bz2 || \$statsdir/${l} + layout = ${l} + +build \$statsdir/${l}/all.pickle: combine \$statsdir/${l}/bbcarabic.pickle \$statsdir/${l}/aljazeera.pickle \$statsdir/${l}/tanzil.pickle \$statsdir/${l}/arwiki.pickle || \$statsdir/${l} + +build \$docdir/${l}.svg: render-svg + layout = ${l} + +build \$docdir/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle + layout = ${l} + +build \$docdir/${l}-heat.svg: render-svg-heat \$docdir/${l}-heat.yaml + layout = ${l} + +EOF +done + +for l in $layoutsXmodmap; do +cat <<EOF +build \$docdir/${l}.xmodmap: render-xmodmap + layout = ${l} + +EOF +done + |