From 2d45ef655f8791037373ab83174fc6c3596227b0 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Thu, 3 Oct 2019 17:23:53 +0200 Subject: text: Add epub reader and hindawi corpus See issue #5. --- gen.sh | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'gen.sh') diff --git a/gen.sh b/gen.sh index 77fbf81..0d7a066 100755 --- a/gen.sh +++ b/gen.sh @@ -39,6 +39,7 @@ rule render-xmodmap rule analyze-heat command = lulua-analyze -l \$layout keyheatmap < \$in > \$out +# XXX: add lulua-analyze combine here rule write-bbcarabic command = find \$in -type f | lulua-write bbcarabic \$layout > \$out pool = write @@ -47,6 +48,10 @@ rule write-aljazeera command = find \$in -type f | lulua-write aljazeera \$layout > \$out pool = write +rule write-epub + command = find \$in -type f | lulua-write epub \$layout | lulua-analyze combine > \$out + pool = write + rule write-tanzil command = echo \$in | lulua-write text \$layout | lulua-analyze combine > \$out pool = write @@ -98,13 +103,16 @@ build \$statsdir/${l}/bbcarabic.pickle: write-bbcarabic \$corpusdir/bbcarabic/ra build \$statsdir/${l}/aljazeera.pickle: write-aljazeera \$corpusdir/aljazeera/raw || \$statsdir/${l} layout = ${l} +build \$statsdir/${l}/hindawi.pickle: write-epub \$corpusdir/hindawi/raw || \$statsdir/${l} + layout = ${l} + build \$statsdir/${l}/tanzil.pickle: write-tanzil \$corpusdir/tanzil-quaran/plain.txt.lz || \$statsdir/${l} layout = ${l} build \$statsdir/${l}/arwiki.pickle: write-arwiki \$corpusdir/arwiki/arwiki-20190701-pages-articles.xml.bz2 || \$statsdir/${l} layout = ${l} -build \$statsdir/${l}/all.pickle: combine \$statsdir/${l}/bbcarabic.pickle \$statsdir/${l}/aljazeera.pickle \$statsdir/${l}/tanzil.pickle \$statsdir/${l}/arwiki.pickle || \$statsdir/${l} +build \$statsdir/${l}/all.pickle: combine \$statsdir/${l}/bbcarabic.pickle \$statsdir/${l}/aljazeera.pickle \$statsdir/${l}/tanzil.pickle \$statsdir/${l}/arwiki.pickle \$statsdir/${l}/hindawi.pickle || \$statsdir/${l} build \$docdir/_build/${l}.svg: render-svg || \$docdir/_build layout = ${l} -- cgit v1.2.3