summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--corpus/aljazeera/metadata.yaml10
-rw-r--r--corpus/arwiki/metadata.yaml10
-rw-r--r--corpus/bbcarabic/metadata.yaml10
-rw-r--r--corpus/hindawi/metadata.yaml10
-rw-r--r--corpus/opensubtitles-2018/metadata.yaml10
-rw-r--r--corpus/osm/metadata.yaml10
-rw-r--r--corpus/tanzil-quaran/metadata.yaml4
-rw-r--r--corpus/un-v1.0-tei/metadata.yaml10
-rw-r--r--lulua/data/report/index.html156
-rw-r--r--lulua/data/report/style.css27
-rw-r--r--lulua/report.py16
11 files changed, 197 insertions, 76 deletions
diff --git a/corpus/aljazeera/metadata.yaml b/corpus/aljazeera/metadata.yaml
index f5908c6..623cf91 100644
--- a/corpus/aljazeera/metadata.yaml
+++ b/corpus/aljazeera/metadata.yaml
@@ -1,7 +1,13 @@
source:
- name: Al-Jazeera
+ name:
+ en: Al-Jazeera
+ ar: الجزيرة
url: https://www.aljazeera.net/
extractor:
name: Custom scripts
date: 2019-07
-count: [547110, articles]
+count:
+ num: 547110
+ kind:
+ en: articles
+ ar: مقالة
diff --git a/corpus/arwiki/metadata.yaml b/corpus/arwiki/metadata.yaml
index 2a1ff72..b033919 100644
--- a/corpus/arwiki/metadata.yaml
+++ b/corpus/arwiki/metadata.yaml
@@ -1,8 +1,14 @@
source:
- name: Arabic Wikipedia
+ name:
+ en: Arabic Wikipedia
+ ar: ويكيبيديا العربية
url: https://ar.wikipedia.org/
extractor:
name: wikiextractor
url: https://github.com/attardi/wikiextractor/tree/3162bb6c3c9ebd2d15be507aa11d6fa818a454ac
date: 2019-07-01
-count: [857386, articles]
+count:
+ num: 857386
+ kind:
+ en: articles
+ ar: مقالة
diff --git a/corpus/bbcarabic/metadata.yaml b/corpus/bbcarabic/metadata.yaml
index d1c06a5..085b80f 100644
--- a/corpus/bbcarabic/metadata.yaml
+++ b/corpus/bbcarabic/metadata.yaml
@@ -1,7 +1,13 @@
source:
- name: BBC Arabic
+ name:
+ en: BBC Arabic
+ ar: بي بي سي العربية
url: http://www.bbc.com/arabic
extractor:
name: Custom scripts
date: 2019-07
-count: [149901, articles]
+count:
+ num: 149901
+ kind:
+ en: articles
+ ar: مقالة
diff --git a/corpus/hindawi/metadata.yaml b/corpus/hindawi/metadata.yaml
index c92e428..5cadffc 100644
--- a/corpus/hindawi/metadata.yaml
+++ b/corpus/hindawi/metadata.yaml
@@ -1,5 +1,11 @@
source:
- name: hindawi.org
+ name:
+ en: hindawi.org
+ ar: هنداوي
url: https://www.hindawi.org/books
date: 2019-10-02
-count: [1709, books]
+count:
+ num: 1709
+ kind:
+ en: books
+ ar: كتاب
diff --git a/corpus/opensubtitles-2018/metadata.yaml b/corpus/opensubtitles-2018/metadata.yaml
index 310ae82..32454db 100644
--- a/corpus/opensubtitles-2018/metadata.yaml
+++ b/corpus/opensubtitles-2018/metadata.yaml
@@ -1,5 +1,11 @@
source:
- name: ORPUS OpenSubtitles 2018
+ name:
+ en: ORPUS OpenSubtitles 2018
+ ar: ORPUS OpenSubtitles 2018
url: http://opus.nlpl.eu/OpenSubtitles-v2018.php
date: 2018
-count: [94093, movies]
+count:
+ num: 94093
+ kind:
+ en: movies
+ ar: فيلم
diff --git a/corpus/osm/metadata.yaml b/corpus/osm/metadata.yaml
index e3aff73..1759531 100644
--- a/corpus/osm/metadata.yaml
+++ b/corpus/osm/metadata.yaml
@@ -1,5 +1,11 @@
source:
- name: OpenStreetMap Arabic Labels
+ name:
+ en: OpenStreetMap Arabic Labels
+ ar: خريطة الشارع المفتوحة Arabic Labels
url: https://planet.openstreetmap.org/
date: 2019-11-04
-count: [376148, labels]
+count:
+ num: 376148
+ kind:
+ en: labels
+ ar: labels
diff --git a/corpus/tanzil-quaran/metadata.yaml b/corpus/tanzil-quaran/metadata.yaml
index 0207da4..9932aa5 100644
--- a/corpus/tanzil-quaran/metadata.yaml
+++ b/corpus/tanzil-quaran/metadata.yaml
@@ -1,5 +1,7 @@
source:
- name: tanzil.net Quran
+ name:
+ en: tanzil.net Quran
+ ar: القرآن (بن tanzil.net)
url: http://tanzil.net/docs/download
# notes: options Simple Enhanced and Text (for inclusion of diacritics)
date: 2019-10-02
diff --git a/corpus/un-v1.0-tei/metadata.yaml b/corpus/un-v1.0-tei/metadata.yaml
index 36eaa6d..224da94 100644
--- a/corpus/un-v1.0-tei/metadata.yaml
+++ b/corpus/un-v1.0-tei/metadata.yaml
@@ -1,5 +1,11 @@
source:
- name: United Nations Parallel Corpus v1.0
+ name:
+ en: United Nations Parallel Corpus v1.0
+ ar: الأمم المتحدة Parallel Corpus v1.0
url: https://conferences.unite.un.org/UNCorpus/en/DownloadOverview
date: 2016
-count: [116754, documents]
+count:
+ num: 116754
+ kind:
+ en: documents
+ ar: ملف
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html
index 0e4c779..cda1c9d 100644
--- a/lulua/data/report/index.html
+++ b/lulua/data/report/index.html
@@ -2,13 +2,15 @@
<html lang="ar">
<head>
<meta charset="utf-8">
- <title>لؤلؤة</title>
+ <title>لؤلؤة: لوحة مفاتيح عربية</title>
<!--<meta name="description" content="">-->
<meta name="viewport" content="width=device-width, initial-scale=1">
<link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet">
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/pure-min.css" integrity="sha384-oAOxQR6DkCoMliIh8yFnu25d7Eq/PHS21PClpwjOTeU2jRSq11vu66rf90/cZr47" crossorigin="anonymous">
- <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css">
+ <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css" integrity="sha384-e+NM0rMilIXo+lz6+dXhoHMjd2iTSxNsCHpqkvuSBsAhwMDRF/Wn2QRRNaLxTcN/" crossorigin="anonymous">
+ <script src="https://polyfill.io/v3/polyfill.min.js?features=es6" crossorigin="anonymous"></script>
+ <script id="MathJax-script" async src="https://www.unpkg.com/mathjax@3.0.5/es5/tex-mml-chtml.js" integrity="sha384-L+g9M+CMLhBJNIlTx7C1IQRQV7IL/wRmGNxXVP6CPEJjVhE5LE1EBMVJtmmdueyg" crossorigin="anonymous"></script>
{# bokeh #}
{% for f in bokehres.js_files -%}
<script src="{{ f }}"></script>
@@ -26,7 +28,7 @@
<h1 class="title"><img class="logo" src="lulua-logo.svg" alt="لؤلؤة"></h1>
<div class="pure-g flexreverse">
<div class="pure-u-1 pure-u-sm-1-2">
- <!--<h2 class="subtitle">لوحة مفاتيح العربية المريحة</h1>-->
+ <h2 class="subtitle">لوحة مفاتيح عربية</h2>
</div>
<div class="pure-u-1 pure-u-sm-1-2">
<h2 class="subtitle" lang="en">Ergonomic Arabic Keyboard Layout</h2>
@@ -44,9 +46,9 @@
</div>
<div class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<p>This is work in progress and contributions are welcome. Head over to
<a href="https://github.com/PromyLOPh/lulua">GitHub</a> to see where
@@ -56,9 +58,9 @@
</div>
<div class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h2>Goals</h2>
<ul>
@@ -77,9 +79,9 @@
</div>
<div class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h2>Usage</h2>
<dl>
@@ -99,9 +101,9 @@
<div class="indepth-card">
<div class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h1>Learn more</h1>
</div>
@@ -111,12 +113,12 @@
<section>
<div class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
<div class="lbox">
<h2>الأبجدية العربية</h2>
</div>
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h2>The Arabic alphabet</h2>
<p>
@@ -131,15 +133,56 @@
<!-- -->
The novel corpus built for the following analysis consists of
</p>
+ </div>
+ </div>
+</div>
+
+<div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-xl-1-2">
+ <div class="lbox">
+ <figure class="table-overflow">
+ <table class="pure-table pure-table-horizontal"><thead><tr><th>مصدر</th><th></th><th>كلام</th><th>حروف</th></thead><tbody>
+ {% for c in corpus|sort(attribute='source.name.en') %}
+ <tr>
+ <td><a href="{{ c.source.url }}">{{ c.source.name.ar }}</a></td>
+ {% set count = c.get ('count') %}
+ {% if count %}
+ {# use new style formatting, for some reason %7,d does not work #}
+ <td>{{ '{:7,d}'.format(count.num)|arabnum }} {{ count.kind.ar }}</td>
+ {% else %}
+ <td></td>
+ {% endif %}
- <table class="pure-table"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody>
- {% for c in corpus|sort(attribute='source.name') %}
+ {% set stats = c.get ('stats') %}
+ {% for k in ('words', 'characters') %}
+ {% set i = stats[k]|approx('ar') %}
+ <td>{{ '%5.1f'|format(i[0])|arabnum }} {{ i[1] }}</td>
+ {% endfor %}
+ </tr>
+ {% endfor %}
+ <tr><td>مجموع</td><td></td>
+ {% for k in ('words', 'characters') %}
+ {% set i = corpustotal[k]|approx('ar') %}
+ <td>{{ '%5.1f'|format(i[0])|arabnum }} {{ i[1] }}</td>
+ {% endfor %}
+ </tr>
+ </tbody></table>
+ <figcaption>
+ </figcaption>
+ </figure>
+ </div>
+ </div>
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
+ <div class="lbox">
+ <figure class="table-overflow">
+ <table class="pure-table pure-table-horizontal"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody>
+ {% for c in corpus|sort(attribute='source.name.en') %}
<tr>
- <td><a href="{{ c.source.url }}">{{ c.source.name }}</a></td>
+ <td><a href="{{ c.source.url }}">{{ c.source.name.en }}</a></td>
{% set count = c.get ('count') %}
{% if count %}
{# use new style formatting, for some reason %7,d does not work #}
- <td>{{ '{:7,d}'.format(count[0])|numspace }}&#x202f;{{ count[1] }}</td>
+ <td>{{ '{:7,d}'.format(count.num)|numspace }}&#x2008;{{ count.kind.en }}</td>
{% else %}
<td></td>
{% endif %}
@@ -147,17 +190,28 @@
{% set stats = c.get ('stats') %}
{% for k in ('words', 'characters') %}
{% set i = stats[k]|approx %}
- <td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+ <td>{{ '%5.1f'|format(i[0])|numspace }}&#x2008;{{ i[1] }}</td>
{% endfor %}
</tr>
{% endfor %}
<tr><td>Total</td><td></td>
{% for k in ('words', 'characters') %}
{% set i = corpustotal[k]|approx %}
- <td>{{ '%5.1f'|format(i[0])|numspace }}&#x202f;{{ i[1] }}</td>
+ <td>{{ '%5.1f'|format(i[0])|numspace }}&#x2008;{{ i[1] }}</td>
{% endfor %}
</tr>
</tbody></table>
+ <figcaption>
+ <p>
+ The chosen Quran representation does not include all quranic
+ diacritization symbols, like <a
+ href="https://github.com/khaledhosny/quran-data">other datasets</a>.
+ <!-- -->
+ This makes comparison fairer, since most keyboards presented below do
+ not include any of them.
+ </p>
+ </figcaption>
+ </figure>
<p>
The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be
@@ -174,9 +228,9 @@
<div id="letterfreq-div"></div>
</div>
<figcaption class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<p>Arabic letter frequency distribution</p>
</div>
@@ -187,9 +241,9 @@
<section>
<div class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h2>Layout properties</h2>
@@ -206,7 +260,9 @@
<dt class="finger thumb">cyan</dt>
<dd>thumb</dd>
</dl>
- <p>Asymmetry is defined as the difference between left and right hand usage.</p>
+ <p>Asymmetry is defined as the difference between left and right hand button
+ usage \(b_{left/right}\) and includes the thumb:</p>
+ $$a = \frac{b_{left}}{b_{total}} - \frac{b_{right}}{b_{total}}$$
<p>
The layout proposed uses four layers and assumes a 102/105 key ISO
@@ -289,9 +345,9 @@
<section id="related">
<div class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h2><a href="#related">Related work</a></h2>
<p>This section explores existing keyboard layouts made for the
@@ -301,9 +357,9 @@
</div>
<div id="ar-asmo663" class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#ar-asmo663">ASMO 663</a></h3>
<p>
@@ -335,9 +391,9 @@
</figure>
<div id="ar-osx" class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#ar-osx">Mac OS X</a></h3>
<p>
@@ -364,9 +420,9 @@
<div id="ar-linux" class="pure-g flexreverse">
<figcaption class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#ar-linux">Linux</a></h3>
<p>
@@ -395,9 +451,9 @@
</figure>
<div class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<p>In contrast to the layouts presented so far the following
layouts claim to be optimized for the Arabic language and were, at
@@ -407,9 +463,9 @@
</div>
<div id="ar-alramly" class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#ar-alramly">Al-Ramly et al</a></h3>
<p>
@@ -454,9 +510,9 @@
</figure>
<div id="ar-idlebi" class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#ar-idlebi">Idlebi et al</a></h3>
<p>
@@ -502,9 +558,9 @@
</figure>
<div id="ar-malas" class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#ar-malas">Malas et al</a></h3>
<p>
@@ -536,9 +592,9 @@
</figure>
<div id="ar-khorshid" class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#ar-khorshid">Khorshid et al</a></h3>
<p>
@@ -571,9 +627,9 @@
</figure>
<div id="ar-osman" class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#ar-osman">Osman et al</a></h3>
<p>
@@ -606,9 +662,9 @@
</figure>
<div id="ar-phonetic" class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#ar-phonetic">Phonetic</a></h3>
<p>
@@ -634,9 +690,9 @@
</figure>
<div id="intellark" class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h3><a href="#intellark">Intellark</a></h3>
<p>
@@ -660,9 +716,9 @@
<section>
<div class="pure-g flexreverse">
- <div class="pure-u-1 pure-u-md-1-2">
+ <div class="pure-u-1 pure-u-xl-1-2">
</div>
- <div class="pure-u-1 pure-u-md-1-2" lang="en">
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
<div class="lbox">
<h2>Acknowledgements</h2>
diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css
index d829aa4..e734a0e 100644
--- a/lulua/data/report/style.css
+++ b/lulua/data/report/style.css
@@ -39,6 +39,7 @@ body {
:lang(ar) {
direction: rtl;
font-family: "IBM Plex Sans Arabic";
+ text-align: right;
}
/* inside ltr text */
:lang(ar)[dir=ltr] {
@@ -47,8 +48,9 @@ body {
:lang(en) {
direction: ltr;
font-family: "IBM Plex Sans";
+ text-align: left;
}
-h1, h2, h3 {
+h1, h2, h3, th {
font-weight: 100;
}
h1 {
@@ -121,7 +123,6 @@ div.indepth-card {
}
/* for hand/finger stats */
div.fingerhandstats {
- text-align: center;
display: flex;
}
div.fingerhandstats div.fingers {
@@ -131,6 +132,9 @@ div.fingerhandstats div.fingers div {
margin: 0.1em;
overflow: hidden;
}
+div.fingerhandstats div.fingers div, div.fingerhandstats div.asymm, div.fingerhandstats div.hand {
+ text-align: center;
+}
div.fingerhandstats .left {
margin-right: 0.5em;
}
@@ -154,18 +158,20 @@ div.fingerhandstats .fingers .thumb {
border: 0.1em solid var(--finger-thumb);
}
+.table-overflow {
+ overflow-x: auto;
+}
+
table {
font-variant-numeric: tabular-nums;
}
-.pure-table td.numint {
- text-align: right;
- padding-right: 0;
+
+table thead {
+ background-color: inherit !important;
}
-.pure-table td.numfrac {
- border-left: none;
- text-align: left;
- padding-left: 0;
+table.pure-table {
+ border: none;
}
dl.colorcodes dt, dl.colorcodes dd {
@@ -221,3 +227,6 @@ dl.colorcodes .finger.thumb::before {
.layer.fourth:before {
content: "⭨";
}
+p.remark {
+ font-size: 0.9em;
+}
diff --git a/lulua/report.py b/lulua/report.py
index 9a08068..06bb724 100644
--- a/lulua/report.py
+++ b/lulua/report.py
@@ -28,10 +28,11 @@ from bokeh.resources import CDN as bokehres
from .layout import LEFT, RIGHT, Direction, FingerType
-def approx (i):
+def approx (i, lang='en'):
""" Get approximate human-readable string for large number """
- units = ['', 'thousand', 'million', 'billion']
+ units = {'en': ['', 'thousand', 'million', 'billion'],
+ 'ar': ['', 'ألف', 'مليون', 'مليار']}[lang]
base = Decimal (1000)
i = Decimal (i)
while round (i, 1) >= base and len (units) > 1:
@@ -43,6 +44,16 @@ def numspace (s):
""" Replace ordinary spaces with unicode FIGURE SPACE """
return s.replace (' ', '\u2007')
+def arabnum (s):
+ """
+ Convert number to arabic-indic ordinals.
+
+ Granted, we could use setlocale and do proper formatting, but who has an
+ arabic locale installed…?
+ """
+ m = {'0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤', '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩', ',': '٬', '.': '٫'}
+ return ''.join (map (lambda x: m.get (x, x), s))
+
def render ():
parser = argparse.ArgumentParser(description='Create lulua report.')
parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files')
@@ -55,6 +66,7 @@ def render ():
)
env.filters['approx'] = approx
env.filters['numspace'] = numspace
+ env.filters['arabnum'] = arabnum
corpus = []
for x in args.corpus: