diff options
Diffstat (limited to 'lulua')
| -rw-r--r-- | lulua/data/report/index.html | 474 | ||||
| -rw-r--r-- | lulua/data/report/lulua-logo.svg | 17 | ||||
| -rw-r--r-- | lulua/data/report/style.css | 202 | ||||
| -rw-r--r-- | lulua/report.py | 64 | ||||
| -rw-r--r-- | lulua/stats.py | 70 | 
5 files changed, 768 insertions, 59 deletions
| diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html new file mode 100644 index 0000000..5649fab --- /dev/null +++ b/lulua/data/report/index.html @@ -0,0 +1,474 @@ +<!doctype html> +<html lang="ar"> +<head> +  <meta charset="utf-8"> +  <title>لؤلؤة</title> +  <!--<meta name="description" content="">--> +  <meta name="viewport" content="width=device-width, initial-scale=1"> + +	<link href="https://fonts.googleapis.com/css?family=IBM+Plex+Mono|IBM+Plex+Sans:100,400&display=swap" rel="stylesheet"> +	<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/pure-min.css" integrity="sha384-oAOxQR6DkCoMliIh8yFnu25d7Eq/PHS21PClpwjOTeU2jRSq11vu66rf90/cZr47" crossorigin="anonymous"> +	<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.1/build/grids-responsive-min.css"> +	{# bokeh #} +	{% for f in bokehres.js_files -%} +		<script src="{{ f }}"></script> +	{%- endfor %} +	{% for f in bokehres.css_files -%} +		<link rel="stylesheet" href="{{ f }}"> +	{%- endfor %} +	<link rel="stylesheet" href="style.css"> +</head> +<body> + +<div class="title-card pure-g"> +	<div class="pure-u-1 pure-u-lg-1-3"> +	<div class="lbox"> +		<h1 class="title"><img class="logo" src="lulua-logo.svg" alt="لؤلؤة"></h1> +		<div class="pure-g flexreverse"> +			<div class="pure-u-1 pure-u-sm-1-2"> +				<!--<h2 class="subtitle">لوحة مفاتيح العربية المريحة</h1>--> +			</div> +			<div class="pure-u-1 pure-u-sm-1-2"> +				<h2 class="subtitle" lang="en">Ergonomic Arabic Keyboard Layout</h2> +			</div> +		</div> +	</div> +	</div> +	<div class="pure-u-1 pure-u-lg-2-3"> +	<div class="lbox"> +		<div class="layout"> +			<img src="ar-lulua.svg" alt="لؤلؤة"> +		</div> +	</div> +	</div> +</div> + +<div class="pure-g flexreverse"> +	<div class="pure-u-1 pure-u-md-1-2"> +	</div> +	<div class="pure-u-1 pure-u-md-1-2" lang="en"> +	<div class="lbox"> +		<p>This is work in progress and contributions are welcome. Head over to +		<a href="https://github.com/PromyLOPh/lulua">GitHub</a> to see where +		you can help.</p> +	</div> +	</div> +</div> + +<div class="pure-g flexreverse"> +	<div class="pure-u-1 pure-u-md-1-2"> +	</div> +	<div class="pure-u-1 pure-u-md-1-2" lang="en"> +	<div class="lbox"> +		<h2>Goals</h2> +		<ul> +			<li>Ergonomic typing of unvocalized and vocalized text with 10 fingers</li> +			<li>Modern Standard Arabic and Quranic Arabic</li> +			<li>Localized numbers (European/Arabic-Indic)</li> +			<li>Usable as primary or secondary keyboard</li> +			<li>Compose-based</li> +			<li>Support for Markdown, RST, Wikitext and similar markup</li> +		</ul> + +		<p>Other languages using the Arabic alphabet (regional dialects, Urdu, +		Persian) are explicitly not supported.</p> +	</div> +	</div> +</div> + +<div class="pure-g flexreverse"> +	<div class="pure-u-1 pure-u-md-1-2"> +	</div> +	<div class="pure-u-1 pure-u-md-1-2" lang="en"> +	<div class="lbox"> +		<h2>Usage</h2> +		<dl> +			<dt>Windows</dt> +			<dd>Download <a href="ar-lulua-w64.zip">driver</a> and follow instructions in <kbd>INSTALL.txt</kbd></dd> +			<dt>Android</dt> +			<dd>Install +			<a href="https://play.google.com/store/apps/details?id=com.menny.android.anysoftkeyboard">AnySoftKeyboard</a> +			and +			<a href="https://play.google.com/store/apps/details?id=com.anysoftkeyboard.languagepack.arabic">Arabic for AnySoftKeyboard</a> +			<dt>Linux</dt> +			<dd>Run: <code>xmodmap <a href="ar-lulua.xmodmap">ar-lulua.xmodmap</a></code></dd> +		</dl> +	</div> +	</div> +</div> + +<div class="indepth-card"> +<div class="pure-g flexreverse"> +	<div class="pure-u-1 pure-u-md-1-2"> +	</div> +	<div class="pure-u-1 pure-u-md-1-2" lang="en"> +	<div class="lbox"> +		<h1>Learn more</h1> +	</div> +	</div> +</div> +</div> + +<section> +<div class="pure-g flexreverse"> +	<div class="pure-u-1 pure-u-md-1-2"> +	<div class="lbox"> +		<h2>الأبجدية العربية</h2> +	</div> +	</div> +	<div class="pure-u-1 pure-u-md-1-2" lang="en"> +	<div class="lbox"> +		<h2>The Arabic Alphabet</h2> +		<p> +		There are 28 letters in the Arabic alphabet, plus quite a few extra +		symbols required for proper text input, like the hamza in its different +		shapes <bdo dir="ltr" lang="ar">أ إ آ ء ئ ؤ</bdo>, ta marbutah <bdo +		dir="ltr" lang="ar">ة</bdo>, alif maqsurah <bdo dir="ltr" +		lang="ar">ى</bdo> and various diacritics for vowelized texts. +		<!-- --> +		Since the usability of a keyboard layout depends on the text entered +		it is necessary to study letter and letter combination frequencies first. +		<!-- --> +		The corpus used for the following analysis consists of +		</p> + +		<table class="pure-table"><thead><tr><th>Source</th><th></th><th>Words</th><th>Characters</th></thead><tbody> +		{% for c in corpus|sort(attribute='source.name') %} +			<tr> +			<td><a href="{{ c.source.url }}">{{ c.source.name }}</a></td> +			{% set count = c.get ('count') %} +			{% if count %} +				{# use new style formatting, for some reason %7,d does not work #} +				<td>{{ '{:7,d}'.format(count[0])|numspace }} {{ count[1] }}</td> +			{% else %} +				<td></td> +			{% endif %} + +			{% set stats = c.get ('stats') %} +			{% for k in ('words', 'characters') %} +				{% set i = stats[k]|approx %} +				<td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> +			{% endfor %} +			</tr> +		{% endfor %} +		<tr><td>Total</td><td></td> +		{% for k in ('words', 'characters') %} +			{% set i = corpustotal[k]|approx %} +			<td>{{ '%5.1f'|format(i[0])|numspace }} {{ i[1] }}</td> +		{% endfor %} +		</tr> +		</tbody></table> + +		<p> +		The plot below shows <bdo dir="ltr" lang="ar">ا ل ي م و ن</bdo> can be +		considered the most frequently used letters in the Arabic language. +		<!-- --> +		Together they account for more than 55% of all letters in the corpus. +		</p> +	</div> +	</div> +</div> + +<figure id="letterfreq"> +<div class="lbox" lang="en"> +	<div id="letterfreq-div"></div> +</div> +<figcaption class="pure-g flexreverse"> +	<div class="pure-u-1 pure-u-md-1-2"> +	</div> +	<div class="pure-u-1 pure-u-md-1-2" lang="en"> +	<div class="lbox"> +		<p>Arabic letter frequency distribution</p> +	</div> +	</div> +</figcaption> +</figure> +</section> + +<section> +<div class="pure-g flexreverse"> +	<div class="pure-u-1 pure-u-md-1-2"> +	</div> +	<div class="pure-u-1 pure-u-md-1-2" lang="en"> +	<div class="lbox"> +		<h2>Layout properties</h2> + +		<p>The following evaluation uses color coding to identify fingers:</p> +		<dl class="colorcodes"> +			<dt class="finger little">red</dt> +			<dd>little finger</dd> +			<dt class="finger ring">blue</dt> +			<dd>ring finger</dd> +			<dt class="finger middle">magenta</dt> +			<dd>middle finger +			<dt class="finger index">violet</dt> +			<dd>index finger</dd> +			<dt class="finger thumb">cyan</dt> +			<dd>thumb</dd> +		</dl> +		<p>Asymmetry is defined as the difference between left and right hand usage.</p> +	</div> +	</div> +</div> + +{% macro fingerhandstats(stats) %} +{% set hands = stats.hands %} +{% set fingers = stats.fingers %} +<div class="fingerhandstats" dir="ltr" lang="en"> +{% for hand in Direction %} +	{% set handpct = hands[hand]/stats.buttonPresses*100 %} +	<div class="{{ hand.name.lower() }}" style="width: {{ '%.3f'|format(handpct) }}%;"> +	<div class="hand">{{ '%.2f'|format(handpct) }}%</div> +	<div class="fingers"> +	{% for finger in fingerOrder[hand] %} +		{% set fingerpct = fingers[(hand, finger)]/stats.buttonPresses*100 %} +		{# finger width is relative to parent (i.e. hand) #} +		{% set fingerwidth = fingers[(hand, finger)]/hands[hand]*100 %} +		<div class="{{ finger.name.lower() }}" style="width: {{ '%.3f'|format(fingerwidth) }}%;">{{ '%.2f'|format(fingerpct) }}</div> +	{% endfor %} +	</div> +	</div> +	{% if loop.first %} +		<div class="asymm"><small>Asymmetry: {{ '%.3f'|format(stats.asymmetry) }}</small></div> +	{% endif %} +{% endfor %} +</div> +{% endmacro %} + +<figure id="ar-lulua-heat"> +<div class="lbox" lang="en"> +	<img src="ar-lulua-heat.svg"> +	{{ fingerhandstats(layoutstats['ar-lulua']) }} +</div> +</figure> +</section> + +<section class="layoutgallery"> +	<div class="pure-g flexreverse"> +		<div class="pure-u-1 pure-u-md-1-2"> +		</div> +		<div class="pure-u-1 pure-u-md-1-2" lang="en"> +		<div class="lbox"> +			<h2>Related work</h2> +		</div> +		</div> +	</div> + +	<figure id="ar-asmo663"> +	<div class="lbox"> +		<img src="ar-asmo663-heat.svg"> +		{{ fingerhandstats(layoutstats['ar-asmo663']) }} +	</div> +	<figcaption class="pure-g flexreverse"> +		<div class="pure-u-1 pure-u-md-1-2"> +		</div> +		<div class="pure-u-1 pure-u-md-1-2" lang="en"> +		<div class="lbox"> +			<p> +			Trying to unify existing layouts, the Arab Standardization and +			Meterology Organization (ASMO), now part of +			<a href="https://www.aidmo.org/">AIDMO</a>, published an Arabic +			keyboard layout in 1987 as +			<a href="https://www.aidmo.org/smcacc/ar/index.php?option=com_sobi2&Itemid=2&limitstart=2150">standard 663</a>. +			<!-- --> +			This, however, turned out to be a failure, due to lack of adoption by +			the typewriter industry. +			</p> +		</div> +		</div> +	</figcaption> +	</figure> + +	<figure id="ar-linux"> +	<div class="lbox"> +		<img src="ar-linux-heat.svg"> +		{{ fingerhandstats(layoutstats['ar-linux']) }} +	</div> +	<figcaption class="pure-g flexreverse"> +		<div class="pure-u-1 pure-u-md-1-2"> +		</div> +		<div class="pure-u-1 pure-u-md-1-2" lang="en"> +		<div class="lbox"> +			<p> +			Instead we’re currently using this layout (on Linux), which is +			similar, but not quite the same. +			<!-- --> +			Most notably this layout arranges letters by their visual similarity. +			<!-- --> +			Thus it allocates suboptimal or even awkward positions to frequently +			used letters like <bdo dir="ltr" lang="ar">ا ل</bdo> and +			<bdo dir="ltr" lang="ar">ذ</bdo>. +			</p> +		</div> +		</div> +	</figcaption> +	</figure> + +	<figure id="ar-malas"> +	<div class="lbox"> +		<img src="ar-malas-heat.svg"> +		{{ fingerhandstats(layoutstats['ar-malas']) }} +	</div> +	<figcaption class="pure-g flexreverse"> +		<div class="pure-u-1 pure-u-md-1-2"> +		</div> +		<div class="pure-u-1 pure-u-md-1-2" lang="en"> +		<div class="lbox"> +			<p> +			The work by Malas et al. (2008), +			<a href="https://pdfs.semanticscholar.org/1bf8/74dcaa7f21c2cc3c6c5e526b61a9ee352bba.pdf">Toward Optimal Arabic Keyboard Layout Using Genetic Algorithm</a>, +			presents an alternative layout generated by a genetic algorithm. +			<!-- --> +			They used a snapshot of the Arabic Wikipedia probably from around 2008 and +			optimized for typing speed only, claiming 35% faster typing compared +			to the <a href="#ar-linux">currently used layouts</a>. +			<!-- --> +			However the decision to put <bdo dir="ltr" lang="ar">ي</bdo> in the top +			row seems odd. +			<!-- --> +			Assigning the same left index finger to <bdo dir="ltr" lang="ar">ا +			ي و</bdo>, which are three of the most frequent letters, heavily +			strains this particular finger. +			</p> +		</div> +		</div> +	</figcaption> +	</figure> + +	<figure id="ar-osman"> +	<div class="lbox"> +		<img src="ar-osman-heat.svg"> +		{{ fingerhandstats(layoutstats['ar-osman']) }} +	</div> +	<figcaption class="pure-g flexreverse"> +		<div class="pure-u-1 pure-u-md-1-2"> +		</div> +		<div class="pure-u-1 pure-u-md-1-2" lang="en"> +		<div class="lbox"> +			<p> +			In 2015 patent +			<a href="https://patents.google.com/patent/US9041657B2/en">9,041,657 B2</a> +			was filed in the US, presenting yet another computer-generated layout. +			<!-- --> +			Its genetic algorithm was seeded with just 54 Arabic e-books consisting +			of 7 million characters in total. +			<!-- --> +			Overall it claims to be 9% faster than default layouts. +			<!-- --> +			This layout rips off most of the standard layout’s second layer, +			but amusingly fails to include a question mark, while it does +			provide <em>three</em> single-quote marks ’ and <em>two</em> Arabic +			semicolon <bdo dir="ltr" lang="ar">؛</bdo>. +			<!-- --> +			Additionally it places <bdo dir="ltr" lang="ar">ي</bdo> in an even +			worse position than Malas’ layout. +			</p> +		</div> +		</div> +	</figcaption> +	</figure> + +	<figure> +	<div class="lbox"> +		<img src="ar-khorshid-heat.svg"> +		{{ fingerhandstats(layoutstats['ar-khorshid']) }} +	</div> +	<figcaption class="pure-g flexreverse"> +		<div class="pure-u-1 pure-u-md-1-2"> +		</div> +		<div class="pure-u-1 pure-u-md-1-2" lang="en"> +		<div class="lbox"> +			<p> +			In the paper +			<a href="https://www.researchgate.net/publication/264837659_A_new_optimal_Arabic_keyboard_layout_using_genetic_algorithm">A new optimal Arabic keyboard layout using genetic algorithm</a> +			Khorshid et al. present yet another +			layout. +			<!-- --> +			They claim a 36% improvement over the standard keyboard based on +			their criteria for ergonomic layouts. +			<!-- --> +			However in their layout from figure 8 the letters <bdo dir="ltr" +			lang="ar">ل ب ر</bdo> are in suboptimal positions. +			<!-- --> +			Also it seems their algorithm favors the bottom row instead of the +			easier to use top row. +			</p> +		</div> +		</div> +	</figcaption> +	</figure> + +	<figure> +	<div class="lbox"> +		<img src="ar-phonetic-heat.svg"> +		{{ fingerhandstats(layoutstats['ar-phonetic']) }} +	</div> +	<figcaption class="pure-g flexreverse"> +		<div class="pure-u-1 pure-u-md-1-2"> +		</div> +		<div class="pure-u-1 pure-u-md-1-2" lang="en"> +		<div class="lbox"> +			<p> +			The <a href="http://arabic.omaralzabir.com/home">Arabic Phonetic Keyboard</a> +			simply maps the QWERTY layout to Arabic letters, based on their sound. +			Thus Q becomes <bdo dir="ltr" lang="ar">ق</bdo>, Y becomes <bdo dir="ltr" lang="ar">ي</bdo> and so on. +			It claims to be optimized for writing vowelized texts, especially +			Quranic Arabic, and thus includes quite a few combining characters and +			special symbols. +			Although it claims to make frequently used letters easily available – +			based on the work of Intellaren – it makes no effort to arrange letters +			according to their usage frequency. +			</p> +		</div> +		</div> +	</figcaption> +	</figure> + +	<div class="pure-g flexreverse"> +		<div class="pure-u-1 pure-u-md-1-2"> +		</div> +		<div class="pure-u-1 pure-u-md-1-2" lang="en"> +		<div class="lbox"> +			<p> +			While technically speaking not a layout but alternative input +			method, <a href="http://www.intellaren.com/intellark">Intellark</a> by +			Intellaren is worth mentioning. +			<!-- --> +			It is based on repeatedly pressing the same button to modifiy the +			current character. +			<!-- --> +			For example pressing A on the QWERTY keyboard cycles through the +			alternatives <bdo dir="ltr" lang="ar">ا أ إ آ</bdo> and <bdo dir="ltr" lang="ar">ء</bdo>. +			<!-- --> +			Obviously this is slow, error-prone and violates Dvorak’s guidelines +			for keyboard layout designs. +			</p> +		</div> +		</div> +	</div> +</section> + +<section> +<div class="pure-g flexreverse"> +	<div class="pure-u-1 pure-u-md-1-2"> +	</div> +	<div class="pure-u-1 pure-u-md-1-2" lang="en"> +	<div class="lbox"> +		<h2>Acknowledgements</h2> + +		<p>This work would not have been possible without Martin Krzywinski’s +		work on <a +		href="http://mkweb.bcgsc.ca/carpalx/?typing_effort">carpalx</a>.</p> +	</div> +	</div> +</div> +</section> + +<script> +fetch('letterfreq.json') +    .then(function(response) { return response.json(); }) +    .then(function(item) { Bokeh.embed.embed_item(item, 'letterfreq-div'); }) +</script> + +</body> +</html> diff --git a/lulua/data/report/lulua-logo.svg b/lulua/data/report/lulua-logo.svg new file mode 100644 index 0000000..20136c0 --- /dev/null +++ b/lulua/data/report/lulua-logo.svg @@ -0,0 +1,17 @@ +<?xml version="1.0" encoding="UTF-8" standalone="no"?> +<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" width="25.36014mm" height="13.828889mm" viewBox="0 0 25.36014 13.828889" version="1.1"> +  <g style="fill:#EAE0C8"> +    <g transform="matrix(0.26458333,0,0,0.26458333,-10.208781,-21.74248)"> +      <path d="m 125.36706,120.94964 0.53333,-0.53334 h 0.21333 q 3.73334,0 5.49334,-1.65333 1.76,-1.70667 1.76,-5.38667 V 82.176302 h 1.06666 V 113.3763 q 0,4.26667 -2.02666,6.29334 -1.97334,1.97333 -6.29334,1.97333 h -0.74666 z"/> +      <path d="m 104.07539,133.2163 h 4.05334 q 6.34666,0 9.70666,-2.88 3.41334,-2.88 4.16,-8.69333 h -8.21333 q -4.21333,0 -6.45333,-2.24 -2.18667,-2.24 -2.18667,-6.29333 0,-2.4 0.64,-4.53334 0.69333,-2.13333 1.86667,-3.68 1.22666,-1.6 2.88,-2.50666 1.65333,-0.96 3.62666,-0.96 4.32,0 6.72,4.10666 2.4,4.05334 2.4,11.36 0,1.92 -0.16,3.52 h 2.82667 v 0.69334 l -0.53333,0.53333 h -2.45334 q -0.90666,6.61333 -4.53333,9.70667 -3.57333,3.09333 -10.29333,3.09333 h -4.05334 z m 2.13334,-20.10666 q 0,3.52 1.81333,5.44 1.86667,1.86666 5.38667,1.86666 h 8.69333 q 0.0533,-0.58666 0.0533,-1.17333 0.0533,-0.64 0.0533,-1.28 v -2.13333 q 0,-2.88 -0.58667,-5.28 -0.53333,-2.45334 -1.6,-4.21334 -1.01333,-1.76 -2.56,-2.72 -1.49333,-0.96 -3.41333,-0.96 -1.65334,0 -3.09334,0.8 -1.44,0.8 -2.50666,2.24 -1.01334,1.38667 -1.65334,3.30667 -0.58666,1.86667 -0.58666,4.10667 z m 3.09333,-16.90667 h 2.66667 l 0.0533,-0.16 q -0.85333,-0.746667 -1.33333,-1.706667 -0.42667,-0.96 -0.42667,-1.973334 0,-1.866666 1.22667,-3.04 1.22666,-1.226667 3.09333,-1.226667 2.24,0 3.68,1.866667 l -0.69333,0.746667 q -1.01334,-1.546667 -2.98667,-1.546667 -1.44,0 -2.4,0.906667 -0.96,0.906667 -0.96,2.293333 0,1.6 1.28,2.666667 1.28,1.066667 3.25333,1.066667 h 3.2 v 1.066667 h -9.65333 z"/> +      <path d="m 88.075391,120.94964 0.533334,-0.53334 h 0.213333 q 3.733333,0 5.493334,-1.65333 1.76,-1.70667 1.76,-5.38667 V 82.176302 h 1.066666 V 113.3763 q 0,4.26667 -2.026666,6.29334 -1.973334,1.97333 -6.293334,1.97333 h -0.746667 z"/> +      <path d="m 66.783725,133.2163 h 4.053334 q 6.346667,0 9.706667,-2.88 3.413334,-2.88 4.16,-8.69333 h -8.213333 q -4.213334,0 -6.453334,-2.24 -2.186667,-2.24 -2.186667,-6.29333 0,-2.4 0.64,-4.53334 0.693334,-2.13333 1.866667,-3.68 1.226667,-1.6 2.88,-2.50666 1.653334,-0.96 3.626667,-0.96 4.32,0 6.72,4.10666 2.4,4.05334 2.4,11.36 0,1.92 -0.16,3.52 h 2.826667 v 0.69334 l -0.533333,0.53333 h -2.453334 q -0.906666,6.61333 -4.533333,9.70667 -3.573334,3.09333 -10.293334,3.09333 h -4.053334 z m 2.133334,-20.10666 q 0,3.52 1.813333,5.44 1.866667,1.86666 5.386667,1.86666 h 8.693334 q 0.05333,-0.58666 0.05333,-1.17333 0.05333,-0.64 0.05333,-1.28 v -2.13333 q 0,-2.88 -0.586667,-5.28 -0.533333,-2.45334 -1.6,-4.21334 -1.013333,-1.76 -2.56,-2.72 -1.493334,-0.96 -3.413334,-0.96 -1.653333,0 -3.093333,0.8 -1.44,0.8 -2.506667,2.24 -1.013333,1.38667 -1.653333,3.30667 -0.586667,1.86667 -0.586667,4.10667 z m 3.093333,-16.90667 h 2.666667 l 0.05333,-0.16 q -0.853334,-0.746667 -1.333334,-1.706667 -0.426667,-0.96 -0.426667,-1.973334 0,-1.866666 1.226667,-3.04 1.226667,-1.226667 3.093334,-1.226667 2.24,0 3.68,1.866667 l -0.693334,0.746667 q -1.013333,-1.546667 -2.986666,-1.546667 -1.44,0 -2.4,0.906667 -0.960001,0.906667 -0.960001,2.293333 0,1.6 1.280001,2.666667 1.28,1.066667 3.253333,1.066667 h 3.2 v 1.066667 h -9.653334 z"/> +    </g> +    <g> +      <g transform="translate(-16.828094,-21.74248)"> +        <path d="m 17.96875,26.736328 c -0.629138,0 -1.140625,0.511487 -1.140625,1.140625 v 3.339844 c 0,0.629138 0.511487,1.140625 1.140625,1.140625 h 3.339844 c 0.629138,0 1.138672,-0.511487 1.138672,-1.140625 v -3.339844 c 0,-0.629138 -0.509534,-1.140625 -1.138672,-1.140625 z m 0,0.28125 h 3.339844 c 0.478861,0 0.859375,0.380514 0.859375,0.859375 v 3.339844 c 0,0.478861 -0.380514,0.859375 -0.859375,0.859375 H 17.96875 c -0.478862,0 -0.861328,-0.380514 -0.861328,-0.859375 v -3.339844 c 0,-0.478861 0.382466,-0.859375 0.861328,-0.859375 z"/> +      </g> +      <rect y="9.2311716" x="1.6189767" height="0.32543749" width="2.3812499"/> +    </g> +  </g> +</svg> diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css new file mode 100644 index 0000000..26b2e96 --- /dev/null +++ b/lulua/data/report/style.css @@ -0,0 +1,202 @@ +/* +colorscheme derived from #EAE0C8, see https://en.wikipedia.org/wiki/Pearl_(color) +using http://colormind.io/bootstrap/ +*/ +:root { +	--light-shades: #EAE0C8; +	--light-accent: #689CA9; +	--main-brand: #A48A4E; +	--dark-accent: #79796D; +	--dark-shades: #1D251E; + +	--finger-little: #dc322f; /* red */ +	--finger-ring: #268bd2; /* blue */ +	--finger-middle: #d33682; /* magenta */ +	--finger-index: #6c71c4; /* violet */ +	--finger-thumb: #2aa198; /* cyan */ +} + +@font-face { +	font-family: 'IBM Plex Arabic'; +	font-style: normal; +	font-weight: 100; +	src: local('IBM Plex Arabic Thin'), local('IBMPlexArabic-Thin'), url('fonts/IBMPlexArabic-Thin.woff2') format('woff2'); +} + +@font-face { +	font-family: 'IBM Plex Arabic'; +	font-style: normal; +	font-weight: 400; +	src: local('IBM Plex Arabic Regular'), local('IBMPlexArabic-Regular'), url('fonts/IBMPlexArabic-Regular.woff2') format('woff2'); +} + +body { +	font-size: 14pt; +	background-color: var(--light-shades); +	color: var(--dark-shades); +} +/* is there a better way to select _both_ fonts at the same time? */ +:lang(ar) { +	direction: rtl; +	font-family: "IBM Plex Arabic"; +} +/* inside ltr text */ +:lang(ar)[dir=ltr] { +	direction: ltr; +} +:lang(en) { +	direction: ltr; +	font-family: "IBM Plex Sans"; +} +h1, h2, h3 { +	font-weight: 100; +} +h1 { +	font-size: 4em; +} +h2 { +	font-size: 2.5em; +} +figure { +	max-width: 70em; +	margin: 1.3em auto; +} +img { +	width: 100%; +} +code { +	font-family: "IBM Plex Mono", monospace !important; /* override :lang(*) font setting */ +} +a:link, a:visited { +	color: var(--light-accent); +	text-decoration: none; +} +a:focus, a:hover { +	background-color: var(--light-accent); +	color: var(--light-shades); +	border-radius: 0.1em; +} +div.title-card { +	background-color: var(--dark-accent); +	color: var(--light-shades); +} +div.title-card .lbox { +	margin: 2vw; +} +div.title-card h1 { +	margin: 0; +	padding: 0.2em; +} +div.title-card img.logo { +	max-height: 35vh; +	display: block; +	margin: 0 auto; +} +div.title-card .subtitle { +	padding: 0 0.5em; +	font-size: 1.5em; +} +div.title-card .layout img { +	display: block; +	margin: 0 auto; +} +div.indepth-card { +	padding: 10vh 0; +	margin: 1em 0; +	background-color: var(--dark-accent); +	color: var(--light-shades); +} +.flexreverse { +	flex-direction: row-reverse; +} +.lbox { +	margin: 0 5vw; +} +/* for hand/finger stats */ +div.fingerhandstats { +	text-align: center; +	display: flex; +} +div.fingerhandstats div.fingers { +	display: flex; +} +div.fingerhandstats div.fingers div { +	margin: 0.1em; +	overflow: hidden; +} +div.fingerhandstats .left { +	margin-right: 0.5em; +} +div.fingerhandstats .right { +	margin-left: 0.5em; +} +/* keep in sync with render-svg.css */ +div.fingerhandstats .fingers .little { +	border: 0.1em solid var(--finger-little); +} +div.fingerhandstats .fingers .ring { +	border: 0.1em solid var(--finger-ring); +} +div.fingerhandstats .fingers .middle  { +	border: 0.1em solid var(--finger-middle); +} +div.fingerhandstats .fingers .index { +	border: 0.1em solid var(--finger-index); +} +div.fingerhandstats .fingers .thumb { +	border: 0.1em solid var(--finger-thumb); +} + +table { +	font-variant-numeric: tabular-nums; +} +.pure-table td.numint { +	text-align: right; +	padding-right: 0; +} + +.pure-table td.numfrac { +	border-left: none; +	text-align: left; +	padding-left: 0; +} + +dl.colorcodes dt, dl.colorcodes dd { +	display: inline; +	padding: 0; +	margin: 0; +} + +dl.colorcodes dt:after { +	content: ":"; +} + +dl.colorcodes .finger:before { +	width: 0.7em; +	height: 0.7em; +	display: inline-block; +	content: " "; +	margin-right: 0.3em; +	vertical-align: middle; +} + +dl.colorcodes .finger.little::before { +	background-color: var(--finger-little); +} + +dl.colorcodes .finger.ring::before { +	background-color: var(--finger-ring); +} + +dl.colorcodes .finger.middle::before { +	background-color: var(--finger-middle); +} + +dl.colorcodes .finger.index::before { +	background-color: var(--finger-index); +} + +dl.colorcodes .finger.thumb::before { +	background-color: var(--finger-thumb); +} + diff --git a/lulua/report.py b/lulua/report.py new file mode 100644 index 0000000..200bb9b --- /dev/null +++ b/lulua/report.py @@ -0,0 +1,64 @@ +import sys, argparse, logging, pickle +from gettext import GNUTranslations, NullTranslations +from decimal import Decimal + +import yaml +from jinja2 import Environment, PackageLoader +from bokeh.resources import CDN as bokehres + +from .layout import LEFT, RIGHT, Direction, FingerType + +def approx (i): +    """ Get approximate human-readable string for large number """ + +    units = ['', 'thousand', 'million', 'billion'] +    base = Decimal (1000) +    i = Decimal (i) +    while i >= base and len (units) > 1: +        i /= base +        units.pop (0) +    return round (i, 1), units[0] + +def numspace (s): +    """ Replace ordinary spaces with unicode FIGURE SPACE """ +    return s.replace (' ', '\u2007') + +def render (): +    parser = argparse.ArgumentParser(description='Create lulua report.') +    parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files') +    parser.add_argument('-l', '--layoutstats', nargs='+', metavar='FILE', help='Layout statistics files') +    logging.basicConfig (level=logging.INFO) +    args = parser.parse_args() + +    env = Environment ( +            loader=PackageLoader (__package__, 'data/report'), +            ) +    env.filters['approx'] = approx +    env.filters['numspace'] = numspace + +    corpus = [] +    for x in args.corpus: +        with open (x) as fd: +            corpus.extend (filter (lambda x: x is not None, yaml.safe_load_all (fd))) +    layoutstats = {} +    for x in args.layoutstats: +        with open (x, 'rb') as fd: +            d = pickle.load (fd) +            layoutstats[d['layout']] = d + +    corpustotal = {} +    for k in ('words', 'characters'): +        corpustotal[k] = sum (map (lambda x: x['stats'][k], corpus)) + +    tpl = env.get_template('index.html') + +    tpl.stream ( +            corpus=corpus, +            corpustotal=corpustotal, +            layoutstats=layoutstats, +            bokehres=bokehres, +            # XXX: not sure how to expose these properly to the template +            fingerOrder={LEFT: list (FingerType), RIGHT: list (reversed (FingerType))}, +            Direction=Direction, +            ).dump (sys.stdout) + diff --git a/lulua/stats.py b/lulua/stats.py index 80c269b..13d878b 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -22,7 +22,6 @@ import sys, operator, pickle, argparse, logging, yaml, math, time  from operator import itemgetter  from itertools import chain, groupby, product  from collections import defaultdict -from decimal import Decimal  from .layout import *  from .keyboard import defaultKeyboards @@ -313,7 +312,7 @@ def keyHeatmap (args):          buttons[k.name] = v      yaml.dump (data, sys.stdout) -def fingerHand (args): +def layoutstats (args):      stats = pickle.load (sys.stdin.buffer)      keyboard = defaultKeyboards[args.keyboard] @@ -328,19 +327,14 @@ def fingerHand (args):          hands[hand] += count          fingers[(hand, finger)] += count -    print ('<div class="fingerhandstats" dir="ltr" lang="en">') -    fingerOrder = {LEFT: list (FingerType), RIGHT: reversed (FingerType)} -    for hand in Direction: -        handpct = hands[hand]/buttonPresses*100 -        print (f'<div class="{hand.name.lower()}" style="width: {handpct:.3f}%;">\n\t<div class="hand">{handpct:.2f}%</div>') -        print ('\t<div class="fingers">') -        for finger in fingerOrder[hand]: -            fingerpct = fingers[(hand, finger)]/buttonPresses*100 -            # finger width is relative to parent (i.e. hand) -            fingerwidth = fingers[(hand, finger)]/hands[hand]*100 -            print (f'\t\t<div class="{finger.name.lower()}" style="width: {fingerwidth:.3f}%;">{fingerpct:.2f}</div>') -        print ('\t</div>\n\t</div>') -    print ('</div>') +    asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses +    pickle.dump (dict ( +            layout=args.layout, +            hands=dict (hands), +            fingers=dict (fingers), +            buttonPresses=buttonPresses, +            asymmetry=asymmetry, +            ), sys.stdout.buffer)  def latinImeDict (args):      """ @@ -379,46 +373,6 @@ def corpusStats (args):      # make document concatable      print ('---') -def approx (i): -    """ Get approximate human-readable string for large number """ - -    units = ['', 'thousand', 'million', 'billion'] -    base = Decimal (1000) -    i = Decimal (i) -    while i >= base and len (units) > 1: -        i /= base -        units.pop (0) -    i = round (i, 1) -    return int (i), int (i%1*10), units[0] - -def corpusHtml (args): -    meta = list (filter (lambda x: x is not None, yaml.safe_load_all (sys.stdin))) -    total = {'words': 0, 'characters': 0} -    print ('<table class="pure-table"><thead><tr><th>Source</th><th colspan="2"></th><th colspan="2">Words</th><th colspan="2">Characters</th></thead><tbody>') -    for c in sorted (meta, key=lambda x: x['source']['name'].lower ()): -        print ('<tr>') -        print (f'<td><a href="{c["source"]["url"]}">{c["source"]["name"]}</a></td>') -        count = c.get ('count') -        if count: -            print (f'<td class="numint">{count[0]//1000:d},</td><td class="numfrac">{count[0]%1000:03d}\u202f{count[1]}</td>') -        else: -            print ('<td class="numint"></td><td class="numfrac"></td>') - -        stats = c.get ('stats') -        for k in ('words', 'characters'): -            i = approx (stats[k]) -            print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>') -        print ('</tr>') - -        for k in ('words', 'characters'): -            total[k] += c['stats'][k] -    print ('<tr><td>Total</td><td class="numint"></td><td class="numfrac"></td>') -    for k in ('words', 'characters'): -        i = approx (total[k]) -        print (f'<td class="numint">{i[0]}.</td><td class="numfrac">{i[1]}\u202f{i[2]}</td>') -    print ('</tr>') -    print ('</tbody></table>') -  def main ():      parser = argparse.ArgumentParser(description='Process statistics files.')      parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name') @@ -439,15 +393,13 @@ def main ():      sp.set_defaults (func=triadfreq)      sp = subparsers.add_parser('keyheatmap')      sp.set_defaults (func=keyHeatmap) -    sp = subparsers.add_parser('fingerhand') -    sp.set_defaults (func=fingerHand) +    sp = subparsers.add_parser('layoutstats') +    sp.set_defaults (func=layoutstats)      sp = subparsers.add_parser('latinime')      sp.set_defaults (func=latinImeDict)      sp = subparsers.add_parser('corpusstats')      sp.add_argument('metadata', type=argparse.FileType ('r'))      sp.set_defaults (func=corpusStats) -    sp = subparsers.add_parser('corpushtml') -    sp.set_defaults (func=corpusHtml)      logging.basicConfig (level=logging.INFO)      args = parser.parse_args() | 
