summaryrefslogtreecommitdiff
path: root/gen.sh
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-10-03 17:23:53 +0200
committerLars-Dominik Braun <lars@6xq.net>2019-10-03 17:23:53 +0200
commit2d45ef655f8791037373ab83174fc6c3596227b0 (patch)
treea05d506928fcc16f8dfdddb860c6ce4c5193bfc4 /gen.sh
parent8048f6351fb4611134c2f6e2d9129ec025376914 (diff)
downloadlulua-2d45ef655f8791037373ab83174fc6c3596227b0.tar.gz
lulua-2d45ef655f8791037373ab83174fc6c3596227b0.tar.bz2
lulua-2d45ef655f8791037373ab83174fc6c3596227b0.zip
text: Add epub reader and hindawi corpus
See issue #5.
Diffstat (limited to 'gen.sh')
-rwxr-xr-xgen.sh10
1 files changed, 9 insertions, 1 deletions
diff --git a/gen.sh b/gen.sh
index 77fbf81..0d7a066 100755
--- a/gen.sh
+++ b/gen.sh
@@ -39,6 +39,7 @@ rule render-xmodmap
rule analyze-heat
command = lulua-analyze -l \$layout keyheatmap < \$in > \$out
+# XXX: add lulua-analyze combine here
rule write-bbcarabic
command = find \$in -type f | lulua-write bbcarabic \$layout > \$out
pool = write
@@ -47,6 +48,10 @@ rule write-aljazeera
command = find \$in -type f | lulua-write aljazeera \$layout > \$out
pool = write
+rule write-epub
+ command = find \$in -type f | lulua-write epub \$layout | lulua-analyze combine > \$out
+ pool = write
+
rule write-tanzil
command = echo \$in | lulua-write text \$layout | lulua-analyze combine > \$out
pool = write
@@ -98,13 +103,16 @@ build \$statsdir/${l}/bbcarabic.pickle: write-bbcarabic \$corpusdir/bbcarabic/ra
build \$statsdir/${l}/aljazeera.pickle: write-aljazeera \$corpusdir/aljazeera/raw || \$statsdir/${l}
layout = ${l}
+build \$statsdir/${l}/hindawi.pickle: write-epub \$corpusdir/hindawi/raw || \$statsdir/${l}
+ layout = ${l}
+
build \$statsdir/${l}/tanzil.pickle: write-tanzil \$corpusdir/tanzil-quaran/plain.txt.lz || \$statsdir/${l}
layout = ${l}
build \$statsdir/${l}/arwiki.pickle: write-arwiki \$corpusdir/arwiki/arwiki-20190701-pages-articles.xml.bz2 || \$statsdir/${l}
layout = ${l}
-build \$statsdir/${l}/all.pickle: combine \$statsdir/${l}/bbcarabic.pickle \$statsdir/${l}/aljazeera.pickle \$statsdir/${l}/tanzil.pickle \$statsdir/${l}/arwiki.pickle || \$statsdir/${l}
+build \$statsdir/${l}/all.pickle: combine \$statsdir/${l}/bbcarabic.pickle \$statsdir/${l}/aljazeera.pickle \$statsdir/${l}/tanzil.pickle \$statsdir/${l}/arwiki.pickle \$statsdir/${l}/hindawi.pickle || \$statsdir/${l}
build \$docdir/_build/${l}.svg: render-svg || \$docdir/_build
layout = ${l}