From a2104773180458a9184466e32075f470f371207c Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 24 Oct 2021 09:47:25 +0200 Subject: report: Add triad analysis Create and add a new plot that should indicate how difficult typing common triads is. --- gen.sh | 15 ++++ lulua/data/report/index.html | 67 ++++++++++++++++-- lulua/plot.py | 158 ++++++++++++++++++++++++++++++++++++++++--- lulua/stats.py | 8 ++- 4 files changed, 233 insertions(+), 15 deletions(-) diff --git a/gen.sh b/gen.sh index fb2cf8a..cde2be4 100755 --- a/gen.sh +++ b/gen.sh @@ -102,6 +102,12 @@ rule analyze-corpusstats rule wordlist command = lulua-analyze -l ar-lulua latinime < \$in > \$out +rule analyze-triadeffortdata + command = lulua-analyze -l \$layout triadeffortdata < \$in > \$out + +rule analyze-triadeffortplot + command = cat \$in | lulua-analyze -l ar-lulua triadeffortplot > \$out + rule report command = lulua-report -c \$corpus -l \$layoutstats > \$out @@ -218,6 +224,9 @@ build \$reportdir/${l}.svg: render-svg || \$reportdir build \$tempdir/${l}-heat.yaml: analyze-heat \$statsdir/${l}/all.pickle || \$tempdir layout = ${l} +build \$tempdir/${l}-triadeffort.pickle: analyze-triadeffortdata \$statsdir/${l}/all.pickle || \$tempdir + layout = ${l} + build \$reportdir/${l}-heat.svg: render-svg-heat \$tempdir/${l}-heat.yaml || \$reportdir layout = ${l} @@ -255,6 +264,12 @@ EOF metafiles+=" \$tempdir/metadata-$c.yaml" done +echo -n "build \$reportdir/triadeffort.json: analyze-triadeffortplot " +for l in $layouts; do + echo -n "\$tempdir/${l}-triadeffort.pickle " +done +echo "|| \$reportdir" + # dependencies are not properly modeled, always rebuild cat <

Related work

This section explores existing keyboard layouts made for the - Arabic language and analyzes their usability.

+ Arabic language and analyzes their usability. + + Comparing them with the proposed layout above is difficult at best, + because the layouts presented below cover different character sets. + + Some lack numbers, some do not include short vowels and others provide + no way to type symbols. + + Therefore no individual score is assigned to each layout, but an analysis + of each layout’s features is given. +

+ + + + +
+
+
+
+
+
+
+ +
+
+
+
+
+

+ Carpalx tries to minimize the effort of typing in blocks of three + consecutive keystrokes, triads, and thus a good layout in that sense + should make typing frequent triads easy. + + The figure above plots cumulative triad frequency on the x-axis and + weighted cumulative effort on the y-axis. + + Thus, at an x value of 0.5 the y-axis is the sum of triad frequencies + multiplied by their effort for all triads responsible for 50% of the + typing process. + + Standard layouts are the layouts from ASMO, + Linux and OSX whereas + usable lists only those which are actually relevant for typing. + + As we can see the layout presented above meets the optimization goal. + + Only the top 5% of all triads are “easier” to type with Malas’ layout, because lulua splits hamza + (ء) from its alef (ا) stem. + + As expected the phonetic layout is one of the + worst ones, because QWERTY is not optimized for Arabic letter frequencies. +

+

+ The following sections provide details about these layouts. +

@@ -901,9 +957,12 @@ diff --git a/lulua/plot.py b/lulua/plot.py index fdfc16c..9fb5cf1 100644 --- a/lulua/plot.py +++ b/lulua/plot.py @@ -20,9 +20,6 @@ import sys, argparse, json, unicodedata, pickle, logging, math from operator import itemgetter -from bokeh.plotting import figure -from bokeh.models import ColumnDataSource, LinearAxis, Range1d -from bokeh.embed import json_item from .layout import * from .keyboard import defaultKeyboards @@ -30,9 +27,31 @@ from .util import limit, displayText from .writer import Writer from .carpalx import Carpalx, models +def setPlotStyle (p): + """ Set common plot styles """ + + # Suppress warnings from bokeh if the legend is empty. + if p.legend: + p.legend.location = "top_left" + # Hide glyph on click on legend + p.legend.click_policy = "hide" + p.legend.label_text_font = 'IBM Plex Sans Arabic' + p.legend.border_line_color = None + p.legend.background_fill_color = None + p.legend.inactive_fill_color = 'black' + p.legend.inactive_fill_alpha = 0.1 + + # no border fill + p.border_fill_color = None + p.background_fill_alpha = 0.5 + def letterfreq (args): """ Map key combinations to their text, bin it and plot sorted distribution """ + from bokeh.plotting import figure + from bokeh.models import ColumnDataSource, LinearAxis, Range1d + from bokeh.embed import json_item + # show unicode class "letters other" only whitelistCategory = {'Lo'} @@ -89,15 +108,12 @@ def letterfreq (args): p.vbar(x='letters', width=0.5, top='rel', color="#dc322f", source=source, y_range_name='single') p.add_layout(LinearAxis(y_range_name="single"), 'right') + setPlotStyle (p) # styling p.xgrid.grid_line_color = None - p.xaxis.major_label_text_font_size = "1.5em" - p.xaxis.major_label_text_font_size = "1.5em" - p.xaxis.major_label_text_font = 'IBM Plex Sans Arabic' - p.yaxis.major_label_text_font = 'IBM Plex Sans Arabic' - # no border fill - p.border_fill_color = None - p.background_fill_alpha = 0.5 + for axis, size, font in ((p.xaxis, '1.5em', 'IBM Plex Sans Arabic'), (p.yaxis, '1em', 'IBM Plex Sans')): + axis.major_label_text_font_size = size + axis.major_label_text_font = font json.dump (json_item (p), sys.stdout) @@ -153,3 +169,125 @@ def triadfreq (args): return 0 +def triadEffortData (args): + """ + Plot cumulated triad frequency vs cumulative effort. + + More frequent triads should be easier to type and thus we expect an + exponential distribution for optimized layouts and linear distribution + for everything else. + """ + + import numpy as np + + stats = pickle.load (sys.stdin.buffer) + + # XXX: add layout to stats? + keyboard = defaultKeyboards['ibmpc105'] + layout = defaultLayouts[args.layout].specialize (keyboard) + writer = Writer (layout) + + # letter-based binning, in case multiple buttons are mapped to the same + # letter. + binned = defaultdict (lambda: dict (weight=0, effort=Carpalx (models['mod01'], writer), textTriad=None)) + weightSum = 0 + for triad, weight in stats['triads'].triads.items (): + textTriad = tuple (layout.getText (t) for t in triad) + data = binned[textTriad] + data['weight'] += weight + data['effort'].addTriad (triad, weight) + data['textTriad'] = textTriad + data['layers'] = tuple (layout.modifierToLayer (x.modifier)[0] for x in triad) + weightSum += weight + + # Now bin into equally-sized buckets to reduce amount of data + nBins = 200 + binWidth = weightSum//nBins + cumulativeWeight = 0 + cumulativeEffort = 0 + x = [] + y = [] + for data in sorted (binned.values (), key=lambda x: x['weight'], reverse=True): + cumulativeWeight += data['weight'] + cumulativeEffort += data['effort'].effort * data['weight'] + if not x or x[-1] + binWidth <= cumulativeWeight: + x.append (cumulativeWeight) + y.append (cumulativeEffort) + x.append (cumulativeWeight) + y.append (cumulativeEffort) + + x = np.true_divide (x, cumulativeWeight) + y = np.true_divide (y, cumulativeEffort) + + pickle.dump (dict (x=x, y=y, layout=layout), sys.stdout.buffer, pickle.HIGHEST_PROTOCOL) + +def triadEffortPlot (args): + """ Plot concatenated pickled data from triadEffortData """ + + from .stats import unpickleAll + # Initializing bokeh is an expensive operation and this module is imported + # alot, so only do it when necessary. + from bokeh.palettes import Set3 + from bokeh.plotting import figure + from bokeh.models import RadioButtonGroup, CustomJS, Slope + from bokeh.embed import json_item + from bokeh.layouts import column + + p = figure( + plot_width=1000, + plot_height=500, + sizing_mode='scale_both', + x_range=(0, 1), + y_range=(0, 1), + output_backend="webgl", + ) + data = list (unpickleAll (sys.stdin.buffer)) + colors = Set3[len(data)] + lines = dict () + for o, color in zip (data, colors): + name = o['layout'].name + assert name not in lines + lines[name] = p.line (o['x'], o['y'], line_width=1, color=color, + legend_label=name, name=name) + + # color: base1 + slope = Slope(gradient=1, y_intercept=0, + line_color='#93a1a1', line_dash='dashed', line_width=1) + p.add_layout(slope) + + setPlotStyle (p) + for axis, size, font in ((p.xaxis, '1em', 'IBM Plex Sans'), (p.yaxis, '1em', 'IBM Plex Sans')): + axis.major_label_text_font_size = size + axis.major_label_text_font = font + + LABELS = ["All", "Standard", "Usable"] + visible = { + 0: list (lines.keys ()), + 1: ['ar-asmo663', 'ar-linux', 'ar-osx'], + 2: ['ar-lulua', 'ar-ergoarabic', 'ar-malas', 'ar-linux', 'ar-osx'], + } + ranges = { + 0: [(0, 1), (0, 1)], + 1: [(0, 0.5), (0, 0.4)], + 2: [(0, 0.5), (0, 0.4)], + } + presets = RadioButtonGroup (labels=LABELS, active=0) + # Set visibility and x/yranges on click. Not sure if there’s a more pythonic way. + presets.js_on_click(CustomJS( + args=dict(lines=lines, plot=p, visible=visible, ranges=ranges), + code=""" + for (const [k, line] of Object.entries (lines)) { + line.visible = visible[this.active].includes (k); + } + const xrange = plot.x_range; + xrange.start = ranges[this.active][0][0]; + xrange.end = ranges[this.active][0][1]; + const yrange = plot.y_range; + yrange.start = ranges[this.active][1][0]; + yrange.end = ranges[this.active][1][1]; + """)) + + json.dump (json_item (column (p, presets)), sys.stdout) + + return 0 + diff --git a/lulua/stats.py b/lulua/stats.py index 0925c0d..9d6c537 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -28,7 +28,7 @@ from .layout import * from .keyboard import defaultKeyboards from .writer import SkipEvent, Writer from .carpalx import Carpalx, models -from .plot import letterfreq, triadfreq +from .plot import letterfreq, triadfreq, triadEffortPlot, triadEffortData from .util import displayText def updateDictOp (a, b, op): @@ -424,6 +424,12 @@ def main (): sp.add_argument('-s', '--sort', choices={'weight', 'effort', 'combined'}, default='weight', help='Sorter') sp.add_argument('-n', '--limit', type=int, default=0, help='Sorter') sp.set_defaults (func=triadfreq) + + sp = subparsers.add_parser('triadeffortdata') + sp.set_defaults (func=triadEffortData) + sp = subparsers.add_parser('triadeffortplot') + sp.set_defaults (func=triadEffortPlot) + sp = subparsers.add_parser('keyheatmap') sp.set_defaults (func=keyHeatmap) sp = subparsers.add_parser('layoutstats') -- cgit v1.2.3