blob: 3500b226a24ff9468936097160c9ec78e074db10 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
#!/bin/sh
# Generate build.ninja that builds the docs/stats/…
layouts="ar-lulua ar-asmo663 ar-linux ar-malas ar-phonetic ar-osman ar-khorshid"
layoutsXmodmap="ar-lulua"
cat <<EOF
### auto-generated by gen.sh. Do not edit. ###
### settings ###
corpusdir=corpus
statsdir=stats
docdir=doc
wikiextractor=3rdparty/wikiextractor/WikiExtractor.py
fontdir=3rdparty/plex/IBM-Plex-Arabic/fonts/complete/woff2/
optrounds=100000
# pin layers, keep hand-optimized numbers, keep top row free
optpins=0;1;2;0,B*;3,*
optmodel=mod01
### pools ###
# lulua-write uses internal parallelization and should not be run more than
# once concurrently. It also uses alot of memory, so…
pool write
depth = 1
### rules ###
rule opt
command = lulua-optimize -n \$optrounds -r -p \$optpins -l ar-lulua -m \$optmodel < \$in > \$out
rule render-svg
command = lulua-render -l \$layout svg \$out
rule render-svg-heat
command = lulua-render -l \$layout svg --heatmap=\$in \$out
rule render-xmodmap
command = lulua-render -l \$layout xmodmap \$out
rule analyze-heat
command = lulua-analyze -l \$layout keyheatmap < \$in > \$out
rule write-bbcarabic
command = find \$in -type f | lulua-write bbcarabic \$layout | lulua-analyze combine > \$out
pool = write
rule write-aljazeera
command = find \$in -type f | lulua-write aljazeera \$layout | lulua-analyze combine > \$out
pool = write
rule write-epub
command = find \$in -type f | lulua-write epub \$layout | lulua-analyze combine > \$out
pool = write
rule write-tanzil
command = echo \$in | lulua-write text \$layout | lulua-analyze combine > \$out
pool = write
rule write-arwiki
command = \$wikiextractor -ns 0 --json -o - \$in 2>/dev/null | jq .text | lulua-write json \$layout | lulua-analyze combine > \$out
pool = write
rule combine
command = cat \$in | lulua-analyze combine > \$out
rule mkdir
command = mkdir -p \$out
rule letterfreq
command = lulua-analyze -l ar-lulua letterfreq < \$in > \$out
rule analyze-fingerhand
command = lulua-analyze -l \$layout fingerhand < \$in > \$out
rule wordlist
command = lulua-analyze -l ar-lulua latinime < \$in > \$out
rule cpp
command = gcc -E -x c -nostdinc -MMD -MF \$out.d -C -P -I \$docdir/_temp \$in -o \$out
depfile = \$out.d
deps = gcc
rule cp
command = cp \$in \$out
rule gz
command = gzip -c \$in > \$out
### build targets ###
build \$docdir/_build: mkdir
build \$docdir/_build/fonts: mkdir
build \$docdir/_temp: mkdir
build \$docdir/_build/index.html: cpp \$docdir/index.html || \$docdir/_build
build \$docdir/_build/letterfreq.json: letterfreq \$statsdir/ar-lulua/all.pickle || \$docdir/_build
build \$docdir/_build/style.css: cp \$docdir/style.css || \$docdir/_build
build \$docdir/_build/lulua-logo.svg: cp \$docdir/lulua-logo.svg || \$docdir/_build
# wordlist
build \$docdir/_temp/lulua.combined: wordlist \$statsdir/ar-lulua/all.pickle || \$docdir/_temp
build \$docdir/_build/lulua.combined.gz: gz \$docdir/_temp/lulua.combined || \$docdir/_build
build \$docdir/_build/fonts/IBMPlexArabic-Regular.woff2: cp \$fontdir/IBMPlexArabic-Regular.woff2 || \$docdir/_build/fonts
build \$docdir/_build/fonts/IBMPlexArabic-Thin.woff2: cp \$fontdir/IBMPlexArabic-Thin.woff2 || \$docdir/_build/fonts
EOF
for l in $layouts; do
cat <<EOF
build \$statsdir/${l}: mkdir
build \$statsdir/${l}/bbcarabic.pickle: write-bbcarabic \$corpusdir/bbcarabic/raw || \$statsdir/${l}
layout = ${l}
build \$statsdir/${l}/aljazeera.pickle: write-aljazeera \$corpusdir/aljazeera/raw || \$statsdir/${l}
layout = ${l}
build \$statsdir/${l}/hindawi.pickle: write-epub \$corpusdir/hindawi/raw || \$statsdir/${l}
layout = ${l}
build \$statsdir/${l}/tanzil.pickle: write-tanzil \$corpusdir/tanzil-quaran/plain.txt.lz || \$statsdir/${l}
layout = ${l}
build \$statsdir/${l}/arwiki.pickle: write-arwiki \$corpusdir/arwiki/arwiki-20190701-pages-articles.xml.bz2 || \$statsdir/${l}
layout = ${l}
build \$statsdir/${l}/all.pickle: combine \$statsdir/${l}/bbcarabic.pickle \$statsdir/${l}/aljazeera.pickle \$statsdir/${l}/tanzil.pickle \$statsdir/${l}/arwiki.pickle \$statsdir/${l}/hindawi.pickle || \$statsdir/${l}
build \$docdir/_build/${l}.svg: render-svg || \$docdir/_build
layout = ${l}
build \$docdir/_temp/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle || \$docdir/_temp
layout = ${l}
build \$docdir/_build/${l}-heat.svg: render-svg-heat \$docdir/_temp/${l}-heat.yaml || \$docdir/_build
layout = ${l}
build \$docdir/_temp/${l}-fingerhand.html: analyze-fingerhand \$statsdir/${l}/all.pickle || \$docdir/_temp
layout = ${l}
EOF
done
for l in $layoutsXmodmap; do
cat <<EOF
build \$docdir/_build/${l}.xmodmap: render-xmodmap || \$docdir/_build
layout = ${l}
EOF
done
|