diff options
-rw-r--r-- | corpus/aljazeera/metadata.yaml | 10 | ||||
-rw-r--r-- | corpus/arwiki/metadata.yaml | 10 | ||||
-rw-r--r-- | corpus/bbcarabic/metadata.yaml | 10 | ||||
-rw-r--r-- | corpus/hindawi/metadata.yaml | 10 | ||||
-rw-r--r-- | corpus/opensubtitles-2018/metadata.yaml | 10 | ||||
-rw-r--r-- | corpus/osm/metadata.yaml | 10 | ||||
-rw-r--r-- | corpus/tanzil-quaran/metadata.yaml | 4 | ||||
-rw-r--r-- | corpus/un-v1.0-tei/metadata.yaml | 10 | ||||
-rw-r--r-- | lulua/data/report/index.html | 156 | ||||
-rw-r--r-- | lulua/data/report/style.css | 27 | ||||
-rw-r--r-- | lulua/report.py | 16 |
11 files changed, 197 insertions, 76 deletions
diff --git a/corpus/aljazeera/metadata.yaml b/corpus/aljazeera/metadata.yaml index f5908c6..623cf91 100644 --- a/corpus/aljazeera/metadata.yaml +++ b/corpus/aljazeera/metadata.yaml @@ -1,7 +1,13 @@ source: - name: Al-Jazeera + name: + en: Al-Jazeera + ar: الجزيرة url: https://www.aljazeera.net/ extractor: name: Custom scripts date: 2019-07 -count: [547110, articles] +count: + num: 547110 + kind: + en: articles + ar: مقالة diff --git a/corpus/arwiki/metadata.yaml b/corpus/arwiki/metadata.yaml index 2a1ff72..b033919 100644 --- a/corpus/arwiki/metadata.yaml +++ b/corpus/arwiki/metadata.yaml @@ -1,8 +1,14 @@ source: - name: Arabic Wikipedia + name: + en: Arabic Wikipedia + ar: ويكيبيديا العربية url: https://ar.wikipedia.org/ extractor: name: wikiextractor url: https://github.com/attardi/wikiextractor/tree/3162bb6c3c9ebd2d15be507aa11d6fa818a454ac date: 2019-07-01 -count: [857386, articles] +count: + num: 857386 + kind: + en: articles + ar: مقالة diff --git a/corpus/bbcarabic/metadata.yaml b/corpus/bbcarabic/metadata.yaml index d1c06a5..085b80f 100644 --- a/corpus/bbcarabic/metadata.yaml +++ b/corpus/bbcarabic/metadata.yaml @@ -1,7 +1,13 @@ source: - name: BBC Arabic + name: + en: BBC Arabic + ar: بي بي سي العربية url: http://www.bbc.com/arabic extractor: name: Custom scripts date: 2019-07 -count: [149901, articles] +count: + num: 149901 + kind: + en: articles + ar: مقالة diff --git a/corpus/hindawi/metadata.yaml b/corpus/hindawi/metadata.yaml index c92e428..5cadffc 100644 --- a/corpus/hindawi/metadata.yaml +++ b/corpus/hindawi/metadata.yaml @@ -1,5 +1,11 @@ source: - name: hindawi.org + name: + en: hindawi.org + ar: هنداوي url: https://www.hindawi.org/books date: 2019-10-02 -count: [1709, books] +count: + num: 1709 + kind: + en: books + ar: كتاب diff --git a/corpus/opensubtitles-2018/metadata.yaml b/corpus/opensubtitles-2018/metadata.yaml index 310ae82..32454db 100644 --- a/corpus/opensubtitles-2018/metadata.yaml +++ b/corpus/opensubtitles-2018/metadata.yaml @@ -1,5 +1,11 @@ source: - name: ORPUS OpenSubtitles 2018 + name: + en: ORPUS OpenSubtitles 2018 + ar: ORPUS OpenSubtitles 2018 url: http://opus.nlpl.eu/OpenSubtitles-v2018.php date: 2018 -count: [94093, movies] +count: + num: 94093 + kind: + en: movies + ar: فيلم diff --git a/corpus/osm/metadata.yaml b/corpus/osm/metadata.yaml index e3aff73..1759531 100644 --- a/corpus/osm/metadata.yaml +++ b/corpus/osm/metadata.yaml @@ -1,5 +1,11 @@ source: - name: OpenStreetMap Arabic Labels + name: + en: OpenStreetMap Arabic Labels + ar: خريطة الشارع المفتوحة Arabic Labels url: https://planet.openstreetmap.org/ date: 2019-11-04 -count: [376148, labels] +count: + num: 376148 + kind: + en: labels + ar: labels diff --git a/corpus/tanzil-quaran/metadata.yaml b/corpus/tanzil-quaran/metadata.yaml index 0207da4..9932aa5 100644 --- a/corpus/tanzil-quaran/metadata.yaml +++ b/corpus/tanzil-quaran/metadata.yaml @@ -1,5 +1,7 @@ source: - name: tanzil.net Quran + name: + en: tanzil.net Quran + ar: القرآن (بن tanzil.net) url: http://tanzil.net/docs/download # notes: options Simple Enhanced and Text (for inclusion of diacritics) date: 2019-10-02 diff --git a/corpus/un-v1.0-tei/metadata.yaml b/corpus/un-v1.0-tei/metadata.yaml index 36eaa6d..224da94 100644 --- a/corpus/un-v1.0-tei/metadata.yaml +++ b/corpus/un-v1.0-tei/metadata.yaml @@ -1,5 +1,11 @@ source: - name: United Nations Parallel Corpus v1.0 + name: + en: United Nations Parallel Corpus v1.0 + ar: الأمم المتحدة Parallel Corpus v1.0 url: https://conferences.unite.un.org/UNCorpus/en/DownloadOverview date: 2016 -count: [116754, documents] +count: + num: 116754 + kind: + en: documents + ar: ملف diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html index 0e4c779..cda1c9d 100644 --- a/lulua/data/report/index.html +++ b/lulua/data/report/index.html @@ -2,13 +2,15 @@ <html lang="ar"> <head> <meta charset="utf-8"> - <title>لؤلؤة</title> + <title>لؤلؤة: لوحة مفاتيح عربية</title> <!--<meta name="description" content="">--> <meta name="viewport" content="width=device-width, initial-scale=1"> <link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet"> <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/pure-min.css" integrity="sha384-oAOxQR6DkCoMliIh8yFnu25d7Eq/PHS21PClpwjOTeU2jRSq11vu66rf90/cZr47" crossorigin="anonymous"> - <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css"> + <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css" integrity="sha384-e+NM0rMilIXo+lz6+dXhoHMjd2iTSxNsCHpqkvuSBsAhwMDRF/Wn2QRRNaLxTcN/" crossorigin="anonymous"> + <script src="https://polyfill.io/v3/polyfill.min.js?features=es6" crossorigin="anonymous"></script> + <script id="MathJax-script" async src="https://www.unpkg.com/mathjax@3.0.5/es5/tex-mml-chtml.js" integrity="sha384-L+g9M+CMLhBJNIlTx7C1IQRQV7IL/wRmGNxXVP6CPEJjVhE5LE1EBMVJtmmdueyg" crossorigin="anonymous"></script> {# bokeh #} {% for f in bokehres.js_files -%} <script src="{{ f }}"></script> @@ -26,7 +28,7 @@ <h1 class="title"><img class="logo" src="lulua-logo.svg" alt="لؤلؤة"></h1> <div class="pure-g flexreverse"> <div class="pure-u-1 pure-u-sm-1-2"> - <!--<h2 class="subtitle">لوحة مفاتيح العربية المريحة</h1>--> + <h2 class="subtitle">لوحة مفاتيح عربية</h2> </div> <div class="pure-u-1 pure-u-sm-1-2"> <h2 class="subtitle" lang="en">Ergonomic Arabic Keyboard Layout</h2> @@ -44,9 +46,9 @@ </div> <div class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <p>This is work in progress and contributions are welcome. Head over to <a href="https://github.com/PromyLOPh/lulua">GitHub</a> to see where @@ -56,9 +58,9 @@ </div> <div class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h2>Goals</h2> <ul> @@ -77,9 +79,9 @@ </div> <div class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h2>Usage</h2> <dl> @@ -99,9 +101,9 @@ <div class="indepth-card"> <div class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h1>Learn more</h1> </div> @@ -111,12 +113,12 @@ <section> <div class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> <div class="lbox"> <h2>الأبجدية العربية</h2> </div> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h2>The Arabic alphabet</h2> <p> @@ -131,15 +133,56 @@ <!-- --> The novel corpus built for the following analysis consists of </p> + </div> + </div> +</div> + +<div class="pure-g flexreverse"> + <div class="pure-u-1 pure-u-xl-1-2"> + <div class="lbox"> + <figure class="table-overflow"> + <table class="pure-table pure-table-horizontal"><thead><tr><th>مصدر</th><th></th><th>كلام</th><th>حروف</th></thead><tbody> + {% for c in corpus|sort(attribute='source.name.en') %} + <tr> + <td><a href="{{ c.source.url }}">{{ c.source.name.ar }}</a></td> + {% set count = c.get ('count') %} + {% if count %} + {# use new style formatting, for some reason %7,d does not work #} + <td>{{ '{:7,d}'.format(count.num)|arabnum }} {{ count.kind.ar }}</td> + {% else %} + <td></td> + {% endif %} - <table class="pure-table"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody> - {% for c in corpus|sort(attribute='source.name') %} + {% set stats = c.get ('stats') %} + {% for k in ('words', 'characters') %} + {% set i = stats[k]|approx('ar') %} + <td>{{ '%5.1f'|format(i[0])|arabnum }} {{ i[1] }}</td> + {% endfor %} + </tr> + {% endfor %} + <tr><td>مجموع</td><td></td> + {% for k in ('words', 'characters') %} + {% set i = corpustotal[k]|approx('ar') %} + <td>{{ '%5.1f'|format(i[0])|arabnum }} {{ i[1] }}</td> + {% endfor %} + </tr> + </tbody></table> + <figcaption> + </figcaption> + </figure> + </div> + </div> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> + <div class="lbox"> + <figure class="table-overflow"> + <table class="pure-table pure-table-horizontal"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody> + {% for c in corpus|sort(attribute='source.name.en') %} <tr> - <td><a href="{{ c.source.url }}">{{ c.source.name }}</a></td> + <td><a href="{{ c.source.url }}">{{ c.source.name.en }}</a></td> {% set count = c.get ('count') %} {% if count %} {# use new style formatting, for some reason %7,d does not work #} - <td>{{ '{:7,d}'.format(count[0])|numspace }} {{ count[1] }}</td> + <td>{{ '{:7,d}'.format(count.num)|numspace }} {{ count.kind.en }}</td> {% else %} <td></td> {% endif %} @@ -147,17 +190,28 @@ {% set stats = c.get ('stats') %} {% for k in ('words', 'characters') %} {% set i = stats[k]|approx %} - <td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> + <td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> {% endfor %} </tr> {% endfor %} <tr><td>Total</td><td></td> {% for k in ('words', 'characters') %} {% set i = corpustotal[k]|approx %} - <td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> + <td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> {% endfor %} </tr> </tbody></table> + <figcaption> + <p> + The chosen Quran representation does not include all quranic + diacritization symbols, like <a + href="https://github.com/khaledhosny/quran-data">other datasets</a>. + <!-- --> + This makes comparison fairer, since most keyboards presented below do + not include any of them. + </p> + </figcaption> + </figure> <p> The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be @@ -174,9 +228,9 @@ <div id="letterfreq-div"></div> </div> <figcaption class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <p>Arabic letter frequency distribution</p> </div> @@ -187,9 +241,9 @@ <section> <div class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h2>Layout properties</h2> @@ -206,7 +260,9 @@ <dt class="finger thumb">cyan</dt> <dd>thumb</dd> </dl> - <p>Asymmetry is defined as the difference between left and right hand usage.</p> + <p>Asymmetry is defined as the difference between left and right hand button + usage \(b_{left/right}\) and includes the thumb:</p> + $$a = \frac{b_{left}}{b_{total}} - \frac{b_{right}}{b_{total}}$$ <p> The layout proposed uses four layers and assumes a 102/105 key ISO @@ -289,9 +345,9 @@ <section id="related"> <div class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h2><a href="#related">Related work</a></h2> <p>This section explores existing keyboard layouts made for the @@ -301,9 +357,9 @@ </div> <div id="ar-asmo663" class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#ar-asmo663">ASMO 663</a></h3> <p> @@ -335,9 +391,9 @@ </figure> <div id="ar-osx" class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#ar-osx">Mac OS X</a></h3> <p> @@ -364,9 +420,9 @@ <div id="ar-linux" class="pure-g flexreverse"> <figcaption class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#ar-linux">Linux</a></h3> <p> @@ -395,9 +451,9 @@ </figure> <div class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <p>In contrast to the layouts presented so far the following layouts claim to be optimized for the Arabic language and were, at @@ -407,9 +463,9 @@ </div> <div id="ar-alramly" class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#ar-alramly">Al-Ramly et al</a></h3> <p> @@ -454,9 +510,9 @@ </figure> <div id="ar-idlebi" class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#ar-idlebi">Idlebi et al</a></h3> <p> @@ -502,9 +558,9 @@ </figure> <div id="ar-malas" class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#ar-malas">Malas et al</a></h3> <p> @@ -536,9 +592,9 @@ </figure> <div id="ar-khorshid" class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#ar-khorshid">Khorshid et al</a></h3> <p> @@ -571,9 +627,9 @@ </figure> <div id="ar-osman" class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#ar-osman">Osman et al</a></h3> <p> @@ -606,9 +662,9 @@ </figure> <div id="ar-phonetic" class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#ar-phonetic">Phonetic</a></h3> <p> @@ -634,9 +690,9 @@ </figure> <div id="intellark" class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h3><a href="#intellark">Intellark</a></h3> <p> @@ -660,9 +716,9 @@ <section> <div class="pure-g flexreverse"> - <div class="pure-u-1 pure-u-md-1-2"> + <div class="pure-u-1 pure-u-xl-1-2"> </div> - <div class="pure-u-1 pure-u-md-1-2" lang="en"> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> <div class="lbox"> <h2>Acknowledgements</h2> diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css index d829aa4..e734a0e 100644 --- a/lulua/data/report/style.css +++ b/lulua/data/report/style.css @@ -39,6 +39,7 @@ body { :lang(ar) { direction: rtl; font-family: "IBM Plex Sans Arabic"; + text-align: right; } /* inside ltr text */ :lang(ar)[dir=ltr] { @@ -47,8 +48,9 @@ body { :lang(en) { direction: ltr; font-family: "IBM Plex Sans"; + text-align: left; } -h1, h2, h3 { +h1, h2, h3, th { font-weight: 100; } h1 { @@ -121,7 +123,6 @@ div.indepth-card { } /* for hand/finger stats */ div.fingerhandstats { - text-align: center; display: flex; } div.fingerhandstats div.fingers { @@ -131,6 +132,9 @@ div.fingerhandstats div.fingers div { margin: 0.1em; overflow: hidden; } +div.fingerhandstats div.fingers div, div.fingerhandstats div.asymm, div.fingerhandstats div.hand { + text-align: center; +} div.fingerhandstats .left { margin-right: 0.5em; } @@ -154,18 +158,20 @@ div.fingerhandstats .fingers .thumb { border: 0.1em solid var(--finger-thumb); } +.table-overflow { + overflow-x: auto; +} + table { font-variant-numeric: tabular-nums; } -.pure-table td.numint { - text-align: right; - padding-right: 0; + +table thead { + background-color: inherit !important; } -.pure-table td.numfrac { - border-left: none; - text-align: left; - padding-left: 0; +table.pure-table { + border: none; } dl.colorcodes dt, dl.colorcodes dd { @@ -221,3 +227,6 @@ dl.colorcodes .finger.thumb::before { .layer.fourth:before { content: "⭨"; } +p.remark { + font-size: 0.9em; +} diff --git a/lulua/report.py b/lulua/report.py index 9a08068..06bb724 100644 --- a/lulua/report.py +++ b/lulua/report.py @@ -28,10 +28,11 @@ from bokeh.resources import CDN as bokehres from .layout import LEFT, RIGHT, Direction, FingerType -def approx (i): +def approx (i, lang='en'): """ Get approximate human-readable string for large number """ - units = ['', 'thousand', 'million', 'billion'] + units = {'en': ['', 'thousand', 'million', 'billion'], + 'ar': ['', 'ألف', 'مليون', 'مليار']}[lang] base = Decimal (1000) i = Decimal (i) while round (i, 1) >= base and len (units) > 1: @@ -43,6 +44,16 @@ def numspace (s): """ Replace ordinary spaces with unicode FIGURE SPACE """ return s.replace (' ', '\u2007') +def arabnum (s): + """ + Convert number to arabic-indic ordinals. + + Granted, we could use setlocale and do proper formatting, but who has an + arabic locale installed…? + """ + m = {'0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤', '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩', ',': '٬', '.': '٫'} + return ''.join (map (lambda x: m.get (x, x), s)) + def render (): parser = argparse.ArgumentParser(description='Create lulua report.') parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files') @@ -55,6 +66,7 @@ def render (): ) env.filters['approx'] = approx env.filters['numspace'] = numspace + env.filters['arabnum'] = arabnum corpus = [] for x in args.corpus: |