summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2021-10-24 09:47:25 +0200
committerLars-Dominik Braun <lars@6xq.net>2021-10-27 15:44:31 +0200
commita2104773180458a9184466e32075f470f371207c (patch)
treee5a8fe0448efe775c9c8aa6283108c24fc314b39
parent5c494a2cfd17aaa92a15d907a47ac5172f6f458b (diff)
downloadlulua-a2104773180458a9184466e32075f470f371207c.tar.gz
lulua-a2104773180458a9184466e32075f470f371207c.tar.bz2
lulua-a2104773180458a9184466e32075f470f371207c.zip
report: Add triad analysis
Create and add a new plot that should indicate how difficult typing common triads is.
-rwxr-xr-xgen.sh15
-rw-r--r--lulua/data/report/index.html67
-rw-r--r--lulua/plot.py158
-rw-r--r--lulua/stats.py8
4 files changed, 233 insertions, 15 deletions
diff --git a/gen.sh b/gen.sh
index fb2cf8a..cde2be4 100755
--- a/gen.sh
+++ b/gen.sh
@@ -102,6 +102,12 @@ rule analyze-corpusstats
rule wordlist
command = lulua-analyze -l ar-lulua latinime < \$in > \$out
+rule analyze-triadeffortdata
+ command = lulua-analyze -l \$layout triadeffortdata < \$in > \$out
+
+rule analyze-triadeffortplot
+ command = cat \$in | lulua-analyze -l ar-lulua triadeffortplot > \$out
+
rule report
command = lulua-report -c \$corpus -l \$layoutstats > \$out
@@ -218,6 +224,9 @@ build \$reportdir/${l}.svg: render-svg || \$reportdir
build \$tempdir/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle || \$tempdir
layout = ${l}
+build \$tempdir/${l}-triadeffort.pickle: analyze-triadeffortdata \$statsdir/${l}/all.pickle || \$tempdir
+ layout = ${l}
+
build \$reportdir/${l}-heat.svg: render-svg-heat \$tempdir/${l}-heat.yaml || \$reportdir
layout = ${l}
@@ -255,6 +264,12 @@ EOF
metafiles+=" \$tempdir/metadata-$c.yaml"
done
+echo -n "build \$reportdir/triadeffort.json: analyze-triadeffortplot "
+for l in $layouts; do
+ echo -n "\$tempdir/${l}-triadeffort.pickle "
+done
+echo "|| \$reportdir"
+
# dependencies are not properly modeled, always rebuild
cat <<EOF
build always: phony
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html
index 5d08c5b..ed9dc30 100644
--- a/lulua/data/report/index.html
+++ b/lulua/data/report/index.html
@@ -433,7 +433,63 @@
<div class="lbox">
<h2><a href="#related">Related work</a></h2>
<p>This section explores existing keyboard layouts made for the
- Arabic language and analyzes their usability.</p>
+ Arabic language and analyzes their usability.
+ <!-- -->
+ Comparing them with the proposed layout above is difficult at best,
+ because the layouts presented below cover different character sets.
+ <!-- -->
+ Some lack numbers, some do not include short vowels and others provide
+ no way to type symbols.
+ <!-- -->
+ Therefore no individual score is assigned to each layout, but an analysis
+ of each layout’s features is given.
+ </p>
+ </div>
+ </div>
+ </div>
+
+ <figure id="triadeffort">
+ <div class="lbox" lang="en">
+ <div id="triadeffort-div"></div>
+ </div>
+ <figcaption class="pure-g flexreverse">
+ </figcaption>
+ </figure>
+
+ <div class="pure-g flexreverse">
+ <div class="pure-u-1 pure-u-xl-1-2">
+ </div>
+ <div class="pure-u-1 pure-u-xl-1-2" lang="en">
+ <div class="lbox">
+ <p>
+ Carpalx tries to minimize the effort of typing in blocks of three
+ consecutive keystrokes, triads, and thus a good layout in that sense
+ should make typing frequent triads easy.
+ <!-- -->
+ The figure above plots cumulative triad frequency on the x-axis and
+ weighted cumulative effort on the y-axis.
+ <!-- -->
+ Thus, at an x value of 0.5 the y-axis is the sum of triad frequencies
+ multiplied by their effort for all triads responsible for 50% of the
+ typing process.
+ <!-- -->
+ Standard layouts are the layouts from <a href="#ar-asmo663">ASMO</a>,
+ <a href="#ar-linux">Linux</a> and <a href="#ar-osx">OSX</a> whereas
+ <em>usable</em> lists only those which are actually relevant for typing.
+ <!-- -->
+ As we can see the layout presented above meets the optimization goal.
+ <!-- -->
+ Only the top 5% of all triads are “easier” to type with <a
+ href="#ar-malas">Malas’ layout</a>, because lulua splits hamza
+ <bdo lang="ar" dir="rtl">(ء)</bdo> from its alef <bdo lang="ar"
+ dir="rtl">(ا)</bdo> stem.
+ <!-- -->
+ As expected the <a href="#ar-phonetic">phonetic layout</a> is one of the
+ worst ones, because QWERTY is not optimized for Arabic letter frequencies.
+ </p>
+ <p>
+ The following sections provide details about these layouts.
+ </p>
</div>
</div>
</div>
@@ -901,9 +957,12 @@
</section>
<script>
-fetch('letterfreq.json')
- .then(function(response) { return response.json(); })
- .then(function(item) { Bokeh.embed.embed_item(item, 'letterfreq-div'); })
+const plots = ['letterfreq', 'triadeffort'];
+for (const p of plots) {
+ fetch(p + '.json')
+ .then(function(response) { return response.json(); })
+ .then(function(item) { Bokeh.embed.embed_item(item, p + '-div'); });
+}
</script>
</body>
diff --git a/lulua/plot.py b/lulua/plot.py
index fdfc16c..9fb5cf1 100644
--- a/lulua/plot.py
+++ b/lulua/plot.py
@@ -20,9 +20,6 @@
import sys, argparse, json, unicodedata, pickle, logging, math
from operator import itemgetter
-from bokeh.plotting import figure
-from bokeh.models import ColumnDataSource, LinearAxis, Range1d
-from bokeh.embed import json_item
from .layout import *
from .keyboard import defaultKeyboards
@@ -30,9 +27,31 @@ from .util import limit, displayText
from .writer import Writer
from .carpalx import Carpalx, models
+def setPlotStyle (p):
+ """ Set common plot styles """
+
+ # Suppress warnings from bokeh if the legend is empty.
+ if p.legend:
+ p.legend.location = "top_left"
+ # Hide glyph on click on legend
+ p.legend.click_policy = "hide"
+ p.legend.label_text_font = 'IBM Plex Sans Arabic'
+ p.legend.border_line_color = None
+ p.legend.background_fill_color = None
+ p.legend.inactive_fill_color = 'black'
+ p.legend.inactive_fill_alpha = 0.1
+
+ # no border fill
+ p.border_fill_color = None
+ p.background_fill_alpha = 0.5
+
def letterfreq (args):
""" Map key combinations to their text, bin it and plot sorted distribution """
+ from bokeh.plotting import figure
+ from bokeh.models import ColumnDataSource, LinearAxis, Range1d
+ from bokeh.embed import json_item
+
# show unicode class "letters other" only
whitelistCategory = {'Lo'}
@@ -89,15 +108,12 @@ def letterfreq (args):
p.vbar(x='letters', width=0.5, top='rel', color="#dc322f", source=source, y_range_name='single')
p.add_layout(LinearAxis(y_range_name="single"), 'right')
+ setPlotStyle (p)
# styling
p.xgrid.grid_line_color = None
- p.xaxis.major_label_text_font_size = "1.5em"
- p.xaxis.major_label_text_font_size = "1.5em"
- p.xaxis.major_label_text_font = 'IBM Plex Sans Arabic'
- p.yaxis.major_label_text_font = 'IBM Plex Sans Arabic'
- # no border fill
- p.border_fill_color = None
- p.background_fill_alpha = 0.5
+ for axis, size, font in ((p.xaxis, '1.5em', 'IBM Plex Sans Arabic'), (p.yaxis, '1em', 'IBM Plex Sans')):
+ axis.major_label_text_font_size = size
+ axis.major_label_text_font = font
json.dump (json_item (p), sys.stdout)
@@ -153,3 +169,125 @@ def triadfreq (args):
return 0
+def triadEffortData (args):
+ """
+ Plot cumulated triad frequency vs cumulative effort.
+
+ More frequent triads should be easier to type and thus we expect an
+ exponential distribution for optimized layouts and linear distribution
+ for everything else.
+ """
+
+ import numpy as np
+
+ stats = pickle.load (sys.stdin.buffer)
+
+ # XXX: add layout to stats?
+ keyboard = defaultKeyboards['ibmpc105']
+ layout = defaultLayouts[args.layout].specialize (keyboard)
+ writer = Writer (layout)
+
+ # letter-based binning, in case multiple buttons are mapped to the same
+ # letter.
+ binned = defaultdict (lambda: dict (weight=0, effort=Carpalx (models['mod01'], writer), textTriad=None))
+ weightSum = 0
+ for triad, weight in stats['triads'].triads.items ():
+ textTriad = tuple (layout.getText (t) for t in triad)
+ data = binned[textTriad]
+ data['weight'] += weight
+ data['effort'].addTriad (triad, weight)
+ data['textTriad'] = textTriad
+ data['layers'] = tuple (layout.modifierToLayer (x.modifier)[0] for x in triad)
+ weightSum += weight
+
+ # Now bin into equally-sized buckets to reduce amount of data
+ nBins = 200
+ binWidth = weightSum//nBins
+ cumulativeWeight = 0
+ cumulativeEffort = 0
+ x = []
+ y = []
+ for data in sorted (binned.values (), key=lambda x: x['weight'], reverse=True):
+ cumulativeWeight += data['weight']
+ cumulativeEffort += data['effort'].effort * data['weight']
+ if not x or x[-1] + binWidth <= cumulativeWeight:
+ x.append (cumulativeWeight)
+ y.append (cumulativeEffort)
+ x.append (cumulativeWeight)
+ y.append (cumulativeEffort)
+
+ x = np.true_divide (x, cumulativeWeight)
+ y = np.true_divide (y, cumulativeEffort)
+
+ pickle.dump (dict (x=x, y=y, layout=layout), sys.stdout.buffer, pickle.HIGHEST_PROTOCOL)
+
+def triadEffortPlot (args):
+ """ Plot concatenated pickled data from triadEffortData """
+
+ from .stats import unpickleAll
+ # Initializing bokeh is an expensive operation and this module is imported
+ # alot, so only do it when necessary.
+ from bokeh.palettes import Set3
+ from bokeh.plotting import figure
+ from bokeh.models import RadioButtonGroup, CustomJS, Slope
+ from bokeh.embed import json_item
+ from bokeh.layouts import column
+
+ p = figure(
+ plot_width=1000,
+ plot_height=500,
+ sizing_mode='scale_both',
+ x_range=(0, 1),
+ y_range=(0, 1),
+ output_backend="webgl",
+ )
+ data = list (unpickleAll (sys.stdin.buffer))
+ colors = Set3[len(data)]
+ lines = dict ()
+ for o, color in zip (data, colors):
+ name = o['layout'].name
+ assert name not in lines
+ lines[name] = p.line (o['x'], o['y'], line_width=1, color=color,
+ legend_label=name, name=name)
+
+ # color: base1
+ slope = Slope(gradient=1, y_intercept=0,
+ line_color='#93a1a1', line_dash='dashed', line_width=1)
+ p.add_layout(slope)
+
+ setPlotStyle (p)
+ for axis, size, font in ((p.xaxis, '1em', 'IBM Plex Sans'), (p.yaxis, '1em', 'IBM Plex Sans')):
+ axis.major_label_text_font_size = size
+ axis.major_label_text_font = font
+
+ LABELS = ["All", "Standard", "Usable"]
+ visible = {
+ 0: list (lines.keys ()),
+ 1: ['ar-asmo663', 'ar-linux', 'ar-osx'],
+ 2: ['ar-lulua', 'ar-ergoarabic', 'ar-malas', 'ar-linux', 'ar-osx'],
+ }
+ ranges = {
+ 0: [(0, 1), (0, 1)],
+ 1: [(0, 0.5), (0, 0.4)],
+ 2: [(0, 0.5), (0, 0.4)],
+ }
+ presets = RadioButtonGroup (labels=LABELS, active=0)
+ # Set visibility and x/yranges on click. Not sure if there’s a more pythonic way.
+ presets.js_on_click(CustomJS(
+ args=dict(lines=lines, plot=p, visible=visible, ranges=ranges),
+ code="""
+ for (const [k, line] of Object.entries (lines)) {
+ line.visible = visible[this.active].includes (k);
+ }
+ const xrange = plot.x_range;
+ xrange.start = ranges[this.active][0][0];
+ xrange.end = ranges[this.active][0][1];
+ const yrange = plot.y_range;
+ yrange.start = ranges[this.active][1][0];
+ yrange.end = ranges[this.active][1][1];
+ """))
+
+ json.dump (json_item (column (p, presets)), sys.stdout)
+
+ return 0
+
diff --git a/lulua/stats.py b/lulua/stats.py
index 0925c0d..9d6c537 100644
--- a/lulua/stats.py
+++ b/lulua/stats.py
@@ -28,7 +28,7 @@ from .layout import *
from .keyboard import defaultKeyboards
from .writer import SkipEvent, Writer
from .carpalx import Carpalx, models
-from .plot import letterfreq, triadfreq
+from .plot import letterfreq, triadfreq, triadEffortPlot, triadEffortData
from .util import displayText
def updateDictOp (a, b, op):
@@ -424,6 +424,12 @@ def main ():
sp.add_argument('-s', '--sort', choices={'weight', 'effort', 'combined'}, default='weight', help='Sorter')
sp.add_argument('-n', '--limit', type=int, default=0, help='Sorter')
sp.set_defaults (func=triadfreq)
+
+ sp = subparsers.add_parser('triadeffortdata')
+ sp.set_defaults (func=triadEffortData)
+ sp = subparsers.add_parser('triadeffortplot')
+ sp.set_defaults (func=triadEffortPlot)
+
sp = subparsers.add_parser('keyheatmap')
sp.set_defaults (func=keyHeatmap)
sp = subparsers.add_parser('layoutstats')