summaryrefslogtreecommitdiff
path: root/gen.sh
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-11-10 09:44:35 +0100
committerLars-Dominik Braun <lars@6xq.net>2019-11-16 10:17:26 +0100
commit14daa5644598836fd6321038c6b0a496c7874374 (patch)
tree2309443ffec0acea7b8eda095dba17436c0deb0a /gen.sh
parent38c9ed5b042ae488ee12287bf8c19457189889aa (diff)
downloadlulua-14daa5644598836fd6321038c6b0a496c7874374.tar.gz
lulua-14daa5644598836fd6321038c6b0a496c7874374.tar.bz2
lulua-14daa5644598836fd6321038c6b0a496c7874374.zip
doc: Auto-generate corpus table
Diffstat (limited to 'gen.sh')
-rwxr-xr-xgen.sh28
1 files changed, 26 insertions, 2 deletions
diff --git a/gen.sh b/gen.sh
index 762eecc..de1fed0 100755
--- a/gen.sh
+++ b/gen.sh
@@ -3,6 +3,7 @@
layouts="ar-lulua ar-asmo663 ar-linux ar-malas ar-phonetic ar-osman ar-khorshid"
layoutsXmodmap="ar-lulua"
+corpora="`ls corpus`"
cat <<EOF
### auto-generated by gen.sh. Do not edit. ###
@@ -80,6 +81,12 @@ rule letterfreq
rule analyze-fingerhand
command = lulua-analyze -l \$layout fingerhand < \$in > \$out
+rule analyze-corpusstats
+ command = lulua-analyze -l ar-lulua corpusstats \$metadata < \$stats > \$out
+
+rule analyze-corpushtml
+ command = cat \$in | lulua-analyze -l ar-lulua corpushtml > \$out
+
rule wordlist
command = lulua-analyze -l ar-lulua latinime < \$in > \$out
@@ -111,6 +118,7 @@ build \$docdir/_build/fonts/IBMPlexArabic-Regular.woff2: cp \$fontdir/IBMPlexAra
build \$docdir/_build/fonts/IBMPlexArabic-Thin.woff2: cp \$fontdir/IBMPlexArabic-Thin.woff2 || \$docdir/_build/fonts
EOF
+# targets for every layout
for l in $layouts; do
cat <<EOF
build \$statsdir/${l}: mkdir
@@ -124,7 +132,7 @@ build \$statsdir/${l}/aljazeera.pickle: write-aljazeera \$corpusdir/aljazeera/ra
build \$statsdir/${l}/hindawi.pickle: write-epub \$corpusdir/hindawi/raw || \$statsdir/${l}
layout = ${l}
-build \$statsdir/${l}/tanzil.pickle: write-tanzil \$corpusdir/tanzil-quaran/plain.txt.lz || \$statsdir/${l}
+build \$statsdir/${l}/tanzil-quaran.pickle: write-tanzil \$corpusdir/tanzil-quaran/plain.txt.lz || \$statsdir/${l}
layout = ${l}
build \$statsdir/${l}/arwiki.pickle: write-arwiki \$corpusdir/arwiki/arwiki-20190701-pages-articles.xml.bz2 || \$statsdir/${l}
@@ -136,7 +144,7 @@ build \$statsdir/${l}/un-v1.0-tei.pickle: write-tei2 \$corpusdir/un-v1.0-tei ||
build \$statsdir/${l}/opensubtitles-2018.pickle: write-opensubtitles \$corpusdir/opensubtitles-2018 || \$statsdir/${l}
layout = ${l}
-build \$statsdir/${l}/all.pickle: combine \$statsdir/${l}/bbcarabic.pickle \$statsdir/${l}/aljazeera.pickle \$statsdir/${l}/tanzil.pickle \$statsdir/${l}/arwiki.pickle \$statsdir/${l}/hindawi.pickle \$statsdir/${l}/un-v1.0-tei.pickle \$statsdir/${l}/opensubtitles-2018.pickle || \$statsdir/${l}
+build \$statsdir/${l}/all.pickle: combine \$statsdir/${l}/bbcarabic.pickle \$statsdir/${l}/aljazeera.pickle \$statsdir/${l}/tanzil-quaran.pickle \$statsdir/${l}/arwiki.pickle \$statsdir/${l}/hindawi.pickle \$statsdir/${l}/un-v1.0-tei.pickle \$statsdir/${l}/opensubtitles-2018.pickle || \$statsdir/${l}
build \$docdir/_build/${l}.svg: render-svg || \$docdir/_build
layout = ${l}
@@ -153,6 +161,7 @@ build \$docdir/_temp/${l}-fingerhand.html: analyze-fingerhand \$statsdir/${l}/al
EOF
done
+# layouts with xmodmap support
for l in $layoutsXmodmap; do
cat <<EOF
build \$docdir/_build/${l}.xmodmap: render-xmodmap || \$docdir/_build
@@ -161,3 +170,18 @@ build \$docdir/_build/${l}.xmodmap: render-xmodmap || \$docdir/_build
EOF
done
+# statistics for each corpus (ar-lulua) and html rendering
+outfiles=""
+for c in $corpora; do
+cat <<EOF
+build \$docdir/_temp/metadata-$c.yaml: analyze-corpusstats \$statsdir/ar-lulua/$c.pickle \$corpusdir/$c/metadata.yaml || \$docdir/_temp \$corpusdir/$c/metadata.yaml
+ metadata = \$corpusdir/$c/metadata.yaml
+ stats = \$statsdir/ar-lulua/$c.pickle
+EOF
+outfiles+=" \$docdir/_temp/metadata-$c.yaml"
+done
+
+cat <<EOF
+build \$docdir/_temp/corpus.html: analyze-corpushtml $outfiles || \$docdir/_temp
+EOF
+