diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2021-10-24 09:47:25 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2021-10-27 15:44:31 +0200 |
commit | a2104773180458a9184466e32075f470f371207c (patch) | |
tree | e5a8fe0448efe775c9c8aa6283108c24fc314b39 | |
parent | 5c494a2cfd17aaa92a15d907a47ac5172f6f458b (diff) | |
download | lulua-a2104773180458a9184466e32075f470f371207c.tar.gz lulua-a2104773180458a9184466e32075f470f371207c.tar.bz2 lulua-a2104773180458a9184466e32075f470f371207c.zip |
report: Add triad analysis
Create and add a new plot that should indicate how difficult typing
common triads is.
-rwxr-xr-x | gen.sh | 15 | ||||
-rw-r--r-- | lulua/data/report/index.html | 67 | ||||
-rw-r--r-- | lulua/plot.py | 158 | ||||
-rw-r--r-- | lulua/stats.py | 8 |
4 files changed, 233 insertions, 15 deletions
@@ -102,6 +102,12 @@ rule analyze-corpusstats rule wordlist command = lulua-analyze -l ar-lulua latinime < \$in > \$out +rule analyze-triadeffortdata + command = lulua-analyze -l \$layout triadeffortdata < \$in > \$out + +rule analyze-triadeffortplot + command = cat \$in | lulua-analyze -l ar-lulua triadeffortplot > \$out + rule report command = lulua-report -c \$corpus -l \$layoutstats > \$out @@ -218,6 +224,9 @@ build \$reportdir/${l}.svg: render-svg || \$reportdir build \$tempdir/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle || \$tempdir layout = ${l} +build \$tempdir/${l}-triadeffort.pickle: analyze-triadeffortdata \$statsdir/${l}/all.pickle || \$tempdir + layout = ${l} + build \$reportdir/${l}-heat.svg: render-svg-heat \$tempdir/${l}-heat.yaml || \$reportdir layout = ${l} @@ -255,6 +264,12 @@ EOF metafiles+=" \$tempdir/metadata-$c.yaml" done +echo -n "build \$reportdir/triadeffort.json: analyze-triadeffortplot " +for l in $layouts; do + echo -n "\$tempdir/${l}-triadeffort.pickle " +done +echo "|| \$reportdir" + # dependencies are not properly modeled, always rebuild cat <<EOF build always: phony diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html index 5d08c5b..ed9dc30 100644 --- a/lulua/data/report/index.html +++ b/lulua/data/report/index.html @@ -433,7 +433,63 @@ <div class="lbox"> <h2><a href="#related">Related work</a></h2> <p>This section explores existing keyboard layouts made for the - Arabic language and analyzes their usability.</p> + Arabic language and analyzes their usability. + <!-- --> + Comparing them with the proposed layout above is difficult at best, + because the layouts presented below cover different character sets. + <!-- --> + Some lack numbers, some do not include short vowels and others provide + no way to type symbols. + <!-- --> + Therefore no individual score is assigned to each layout, but an analysis + of each layout’s features is given. + </p> + </div> + </div> + </div> + + <figure id="triadeffort"> + <div class="lbox" lang="en"> + <div id="triadeffort-div"></div> + </div> + <figcaption class="pure-g flexreverse"> + </figcaption> + </figure> + + <div class="pure-g flexreverse"> + <div class="pure-u-1 pure-u-xl-1-2"> + </div> + <div class="pure-u-1 pure-u-xl-1-2" lang="en"> + <div class="lbox"> + <p> + Carpalx tries to minimize the effort of typing in blocks of three + consecutive keystrokes, triads, and thus a good layout in that sense + should make typing frequent triads easy. + <!-- --> + The figure above plots cumulative triad frequency on the x-axis and + weighted cumulative effort on the y-axis. + <!-- --> + Thus, at an x value of 0.5 the y-axis is the sum of triad frequencies + multiplied by their effort for all triads responsible for 50% of the + typing process. + <!-- --> + Standard layouts are the layouts from <a href="#ar-asmo663">ASMO</a>, + <a href="#ar-linux">Linux</a> and <a href="#ar-osx">OSX</a> whereas + <em>usable</em> lists only those which are actually relevant for typing. + <!-- --> + As we can see the layout presented above meets the optimization goal. + <!-- --> + Only the top 5% of all triads are “easier” to type with <a + href="#ar-malas">Malas’ layout</a>, because lulua splits hamza + <bdo lang="ar" dir="rtl">(ء)</bdo> from its alef <bdo lang="ar" + dir="rtl">(ا)</bdo> stem. + <!-- --> + As expected the <a href="#ar-phonetic">phonetic layout</a> is one of the + worst ones, because QWERTY is not optimized for Arabic letter frequencies. + </p> + <p> + The following sections provide details about these layouts. + </p> </div> </div> </div> @@ -901,9 +957,12 @@ </section> <script> -fetch('letterfreq.json') - .then(function(response) { return response.json(); }) - .then(function(item) { Bokeh.embed.embed_item(item, 'letterfreq-div'); }) +const plots = ['letterfreq', 'triadeffort']; +for (const p of plots) { + fetch(p + '.json') + .then(function(response) { return response.json(); }) + .then(function(item) { Bokeh.embed.embed_item(item, p + '-div'); }); +} </script> </body> diff --git a/lulua/plot.py b/lulua/plot.py index fdfc16c..9fb5cf1 100644 --- a/lulua/plot.py +++ b/lulua/plot.py @@ -20,9 +20,6 @@ import sys, argparse, json, unicodedata, pickle, logging, math from operator import itemgetter -from bokeh.plotting import figure -from bokeh.models import ColumnDataSource, LinearAxis, Range1d -from bokeh.embed import json_item from .layout import * from .keyboard import defaultKeyboards @@ -30,9 +27,31 @@ from .util import limit, displayText from .writer import Writer from .carpalx import Carpalx, models +def setPlotStyle (p): + """ Set common plot styles """ + + # Suppress warnings from bokeh if the legend is empty. + if p.legend: + p.legend.location = "top_left" + # Hide glyph on click on legend + p.legend.click_policy = "hide" + p.legend.label_text_font = 'IBM Plex Sans Arabic' + p.legend.border_line_color = None + p.legend.background_fill_color = None + p.legend.inactive_fill_color = 'black' + p.legend.inactive_fill_alpha = 0.1 + + # no border fill + p.border_fill_color = None + p.background_fill_alpha = 0.5 + def letterfreq (args): """ Map key combinations to their text, bin it and plot sorted distribution """ + from bokeh.plotting import figure + from bokeh.models import ColumnDataSource, LinearAxis, Range1d + from bokeh.embed import json_item + # show unicode class "letters other" only whitelistCategory = {'Lo'} @@ -89,15 +108,12 @@ def letterfreq (args): p.vbar(x='letters', width=0.5, top='rel', color="#dc322f", source=source, y_range_name='single') p.add_layout(LinearAxis(y_range_name="single"), 'right') + setPlotStyle (p) # styling p.xgrid.grid_line_color = None - p.xaxis.major_label_text_font_size = "1.5em" - p.xaxis.major_label_text_font_size = "1.5em" - p.xaxis.major_label_text_font = 'IBM Plex Sans Arabic' - p.yaxis.major_label_text_font = 'IBM Plex Sans Arabic' - # no border fill - p.border_fill_color = None - p.background_fill_alpha = 0.5 + for axis, size, font in ((p.xaxis, '1.5em', 'IBM Plex Sans Arabic'), (p.yaxis, '1em', 'IBM Plex Sans')): + axis.major_label_text_font_size = size + axis.major_label_text_font = font json.dump (json_item (p), sys.stdout) @@ -153,3 +169,125 @@ def triadfreq (args): return 0 +def triadEffortData (args): + """ + Plot cumulated triad frequency vs cumulative effort. + + More frequent triads should be easier to type and thus we expect an + exponential distribution for optimized layouts and linear distribution + for everything else. + """ + + import numpy as np + + stats = pickle.load (sys.stdin.buffer) + + # XXX: add layout to stats? + keyboard = defaultKeyboards['ibmpc105'] + layout = defaultLayouts[args.layout].specialize (keyboard) + writer = Writer (layout) + + # letter-based binning, in case multiple buttons are mapped to the same + # letter. + binned = defaultdict (lambda: dict (weight=0, effort=Carpalx (models['mod01'], writer), textTriad=None)) + weightSum = 0 + for triad, weight in stats['triads'].triads.items (): + textTriad = tuple (layout.getText (t) for t in triad) + data = binned[textTriad] + data['weight'] += weight + data['effort'].addTriad (triad, weight) + data['textTriad'] = textTriad + data['layers'] = tuple (layout.modifierToLayer (x.modifier)[0] for x in triad) + weightSum += weight + + # Now bin into equally-sized buckets to reduce amount of data + nBins = 200 + binWidth = weightSum//nBins + cumulativeWeight = 0 + cumulativeEffort = 0 + x = [] + y = [] + for data in sorted (binned.values (), key=lambda x: x['weight'], reverse=True): + cumulativeWeight += data['weight'] + cumulativeEffort += data['effort'].effort * data['weight'] + if not x or x[-1] + binWidth <= cumulativeWeight: + x.append (cumulativeWeight) + y.append (cumulativeEffort) + x.append (cumulativeWeight) + y.append (cumulativeEffort) + + x = np.true_divide (x, cumulativeWeight) + y = np.true_divide (y, cumulativeEffort) + + pickle.dump (dict (x=x, y=y, layout=layout), sys.stdout.buffer, pickle.HIGHEST_PROTOCOL) + +def triadEffortPlot (args): + """ Plot concatenated pickled data from triadEffortData """ + + from .stats import unpickleAll + # Initializing bokeh is an expensive operation and this module is imported + # alot, so only do it when necessary. + from bokeh.palettes import Set3 + from bokeh.plotting import figure + from bokeh.models import RadioButtonGroup, CustomJS, Slope + from bokeh.embed import json_item + from bokeh.layouts import column + + p = figure( + plot_width=1000, + plot_height=500, + sizing_mode='scale_both', + x_range=(0, 1), + y_range=(0, 1), + output_backend="webgl", + ) + data = list (unpickleAll (sys.stdin.buffer)) + colors = Set3[len(data)] + lines = dict () + for o, color in zip (data, colors): + name = o['layout'].name + assert name not in lines + lines[name] = p.line (o['x'], o['y'], line_width=1, color=color, + legend_label=name, name=name) + + # color: base1 + slope = Slope(gradient=1, y_intercept=0, + line_color='#93a1a1', line_dash='dashed', line_width=1) + p.add_layout(slope) + + setPlotStyle (p) + for axis, size, font in ((p.xaxis, '1em', 'IBM Plex Sans'), (p.yaxis, '1em', 'IBM Plex Sans')): + axis.major_label_text_font_size = size + axis.major_label_text_font = font + + LABELS = ["All", "Standard", "Usable"] + visible = { + 0: list (lines.keys ()), + 1: ['ar-asmo663', 'ar-linux', 'ar-osx'], + 2: ['ar-lulua', 'ar-ergoarabic', 'ar-malas', 'ar-linux', 'ar-osx'], + } + ranges = { + 0: [(0, 1), (0, 1)], + 1: [(0, 0.5), (0, 0.4)], + 2: [(0, 0.5), (0, 0.4)], + } + presets = RadioButtonGroup (labels=LABELS, active=0) + # Set visibility and x/yranges on click. Not sure if there’s a more pythonic way. + presets.js_on_click(CustomJS( + args=dict(lines=lines, plot=p, visible=visible, ranges=ranges), + code=""" + for (const [k, line] of Object.entries (lines)) { + line.visible = visible[this.active].includes (k); + } + const xrange = plot.x_range; + xrange.start = ranges[this.active][0][0]; + xrange.end = ranges[this.active][0][1]; + const yrange = plot.y_range; + yrange.start = ranges[this.active][1][0]; + yrange.end = ranges[this.active][1][1]; + """)) + + json.dump (json_item (column (p, presets)), sys.stdout) + + return 0 + diff --git a/lulua/stats.py b/lulua/stats.py index 0925c0d..9d6c537 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -28,7 +28,7 @@ from .layout import * from .keyboard import defaultKeyboards from .writer import SkipEvent, Writer from .carpalx import Carpalx, models -from .plot import letterfreq, triadfreq +from .plot import letterfreq, triadfreq, triadEffortPlot, triadEffortData from .util import displayText def updateDictOp (a, b, op): @@ -424,6 +424,12 @@ def main (): sp.add_argument('-s', '--sort', choices={'weight', 'effort', 'combined'}, default='weight', help='Sorter') sp.add_argument('-n', '--limit', type=int, default=0, help='Sorter') sp.set_defaults (func=triadfreq) + + sp = subparsers.add_parser('triadeffortdata') + sp.set_defaults (func=triadEffortData) + sp = subparsers.add_parser('triadeffortplot') + sp.set_defaults (func=triadEffortPlot) + sp = subparsers.add_parser('keyheatmap') sp.set_defaults (func=keyHeatmap) sp = subparsers.add_parser('layoutstats') |