diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2019-09-27 09:12:57 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2019-09-27 09:12:57 +0200 |
commit | e6b0d57967a05fb8073cc7f071c66aa5bc4a5713 (patch) | |
tree | ebd7d484f8528a645c627c0cca9835431a6b026c /doc | |
parent | cabbb5b4e8790fe55e10949bdebce08d0a35127c (diff) | |
download | lulua-e6b0d57967a05fb8073cc7f071c66aa5bc4a5713.tar.gz lulua-e6b0d57967a05fb8073cc7f071c66aa5bc4a5713.tar.bz2 lulua-e6b0d57967a05fb8073cc7f071c66aa5bc4a5713.zip |
Improve docs
Document how stats were generated in doc/Makefile
Diffstat (limited to 'doc')
-rw-r--r-- | doc/Makefile | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/doc/Makefile b/doc/Makefile index 8ecf093..0cd56a4 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -1,3 +1,11 @@ +### settings ### +CORPUSDIR:=../corpus +STATSDIR:=.. +WIKIEXTRACTOR:=../3rdparty/wikiextractor/WikiExtractor.py +OPTROUNDS=100000 +# pin layers, keep hand-optimized numbers, keep top row free +OPTPINS='0;1;2;0,Bl1;0,Bl2;0,Bl3;0,Bl4;0,Bl5;0,Bl6;0,Bl7;0,Br6;0,Br5;0,Br4;0,Br3;0,Br2;0,Br1;3,Cl4;3,Cl3;3,Cl2;3,Cl1;3,Dl4;3,Dl3;3,Dl2;3,Dl1;3,El5;3,El4;3,El3;3,El2;3,Dl5;3,Cl5;3,El6' + all: ar-lulua.xmodmap ar-lulua.svg ar-asmo663.svg ar-linux.svg ar-malas.svg ar-phonetic.svg ar-osman.svg letterfreq.json ar-khorshid.svg letterfreq.json: ../stats.pickle @@ -27,3 +35,26 @@ ar-osman.svg: ar-khorshid.svg: lulua-render svg -l ar-khorshid $@ +### corpora to stats ### +# Sorry, but I can’t provide corpus data due to copyright issues +$(STATSDIR)/stats-bbcarabic.pickle: $(CORPUSDIR)/bbcarabic/raw + time find $< -type f | lulua-write bbcarabic ar-lulua > $@ + +$(STATSDIR)/stats-aljazeera.pickle: $(CORPUSDIR)/aljazeera/raw + time find $< -type f | lulua-write aljazeera ar-lulua > $@ + +$(STATSDIR)/stats-tanzil.pickle: $(CORPUSDIR)/tanzil-quaran/plain.txt.lz + echo $< | lulua-write text ar-lulua | lulua-combine-stats > $@ + +$(STATSDIR)/stats-arwiki.pickle: $(CORPUSDIR)/arwiki/arwiki-20190701-pages-articles.xml.bz2 + $(WIKIEXTRACTOR) -ns 0 --json -o - $< 2>/dev/null | jq .text | lulua-write json ar-lulua | lulua-combine-stats > $@ + +$(STATSDIR)/stats.pickle: $(STATSDIR)/stats-arwiki.pickle \ + $(STATSDIR)/stats-tanzil.pickle $(STATSDIR)/stats-aljazeera.pickle \ + $(STATSDIR)/stats-bbcarabic.pickle + cat $^ | lulua-combine-stats > $@ + +### optimization ### +optimized.yaml: $(STATSDIR)/stats.pickle + lulua-optimize -n $(OPTROUNDS) -r -p $(OPTPINS) -l ar-lulua < $< > $@ + |