From 862d0462e56f1472b0bc9119286618b2127427d4 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 16 May 2020 15:16:01 +0200 Subject: report: Add simple sentence effort visualization Trying to give the reader a sense for how difficult carpalx thinks certain parts of a sentence are. Conclusions are only valid for a single layout and comparison between them is not possible. --- lulua/data/report/index.html | 17 ++++++++++++++ lulua/data/report/style.css | 5 ++++ lulua/report.py | 18 +++++++++++++- lulua/stats.py | 56 ++++++++++++++++++++++++++++++++++++++++++++ lulua/test_report.py | 11 ++++++++- 5 files changed, 105 insertions(+), 2 deletions(-) (limited to 'lulua') diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html index cda1c9d..749c5e9 100644 --- a/lulua/data/report/index.html +++ b/lulua/data/report/index.html @@ -333,6 +333,23 @@ {% endif %} {% endfor %} +
+

Examples:

+ +
{% endmacro %}
diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css index e734a0e..da2787f 100644 --- a/lulua/data/report/style.css +++ b/lulua/data/report/style.css @@ -157,6 +157,11 @@ div.fingerhandstats .fingers .index { div.fingerhandstats .fingers .thumb { border: 0.1em solid var(--finger-thumb); } +div.sentencestats ul { + list-style-type: none; + margin: 1em 0; + padding: 0; +} .table-overflow { overflow-x: auto; diff --git a/lulua/report.py b/lulua/report.py index 06bb724..8a50681 100644 --- a/lulua/report.py +++ b/lulua/report.py @@ -18,7 +18,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import sys, argparse, logging, pickle +import sys, argparse, logging, pickle, math from gettext import GNUTranslations, NullTranslations from decimal import Decimal @@ -54,6 +54,21 @@ def arabnum (s): m = {'0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤', '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩', ',': '٬', '.': '٫'} return ''.join (map (lambda x: m.get (x, x), s)) +def clamp (v, lower, upper): + return max (min (v, upper), lower) + +def blend (v, a, b): + v = clamp (v, 0, 1) + return (b-a)*v+a + +def blendn (v, *l): + assert 0 <= v <= 1 + n = len (l) + step = 1/(n-1) + i = min (int (math.floor (v/step)), n-2) + stretchedv = (v-i*step)/step + return [blend (stretchedv, x, y) for x, y in zip (l[i], l[i+1])] + def render (): parser = argparse.ArgumentParser(description='Create lulua report.') parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files') @@ -67,6 +82,7 @@ def render (): env.filters['approx'] = approx env.filters['numspace'] = numspace env.filters['arabnum'] = arabnum + env.filters['blendn'] = blendn corpus = [] for x in args.corpus: diff --git a/lulua/stats.py b/lulua/stats.py index 550f85a..9c11d10 100644 --- a/lulua/stats.py +++ b/lulua/stats.py @@ -22,6 +22,7 @@ import sys, operator, pickle, argparse, logging, yaml, math, time from operator import itemgetter from itertools import chain, groupby, product from collections import defaultdict +from io import StringIO from .layout import * from .keyboard import defaultKeyboards @@ -314,7 +315,53 @@ def keyHeatmap (args): buttons[k.name] = v yaml.dump (data, sys.stdout) +def sentenceStats (keyboard, layout, text): + """ + Calculate effort for every character (button) in a text + """ + + writer = Writer (layout) + + effort = Carpalx (models['mod01'], writer) + _ignored = frozenset (keyboard[x] for x in ('Fl_space', 'Fr_space', 'CD_ret', 'Cl_tab')) + writtenText = [] + skipped = 0 + for match, event in writer.type (StringIO (text)): + if isinstance (event, SkipEvent): + skipped += 1 + writtenText.append ([event.char, None, 0]) + if not isinstance (event, ButtonCombination): + continue + + writtenText.append ([match, event, 0]) + + triad = list (filter (lambda x: x[1] is not None and first (x[1].buttons) not in _ignored, writtenText))[-3:] + if len (triad) == 3: + matchTriad, buttonTriad, _ = zip (*triad) + triadEffort = effort._triadEffort (tuple (buttonTriad)) + + # now walk the existing text backwards to find the original matches and add the computed effort + writtenTextIt = iter (reversed (writtenText)) + matchTriad = list (matchTriad) + while matchTriad: + t = next (writtenTextIt) + if t[0] == matchTriad[-1]: + matchTriad.pop () + t[2] += triadEffort + + effort.addTriad (buttonTriad, 1) + + # normalize efforts to [0, 1] + s = max (map (lambda x: x[2], writtenText)) + writtenText = list (map (lambda x: (x[0], x[2]/s if x[1] is not None else None), writtenText)) + return (writtenText, effort.effort, skipped) + +from .text import mapChars, charMap + def layoutstats (args): + """ + Statistics for the report + """ stats = pickle.load (sys.stdin.buffer) keyboard = defaultKeyboards[args.keyboard] @@ -330,12 +377,20 @@ def layoutstats (args): fingers[(hand, finger)] += count asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses + + sentences = [ + 'أَوْ كَصَيِّبٍ مِّنَ السَّمَاءِ فِيهِ ظُلُمَاتٌ وَرَعْدٌ وَبَرْقٌ يَجْعَلُونَ أَصَابِعَهُمْ فِي آذَانِهِم مِّنَ الصَّوَاعِقِ حَذَرَ الْمَوْتِ وَاللَّهُ مُحِيطٌ بِالْكَافِرِينَ', + 'اللغة العربية هي أكثرُ اللغاتِ السامية تحدثاً، وإحدى أكثر اللغات انتشاراً في العالم، يتحدثُها أكثرُ من 467 مليون نسمة.', + ] + sentences = [sentenceStats (keyboard, layout, mapChars (s, charMap).replace ('\r\n', '\n')) for s in sentences] + pickle.dump (dict ( layout=args.layout, hands=dict (hands), fingers=dict (fingers), buttonPresses=buttonPresses, asymmetry=asymmetry, + sentences=sentences, ), sys.stdout.buffer) def latinImeDict (args): @@ -375,6 +430,7 @@ def corpusStats (args): # make document concatable print ('---') + def main (): parser = argparse.ArgumentParser(description='Process statistics files.') parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name') diff --git a/lulua/test_report.py b/lulua/test_report.py index 16a751a..448d796 100644 --- a/lulua/test_report.py +++ b/lulua/test_report.py @@ -20,7 +20,7 @@ from decimal import Decimal -from .report import approx +from .report import approx, blend, blendn def test_approx (): assert approx (0) == (Decimal ('0'), '') @@ -37,4 +37,13 @@ def test_approx (): assert approx (10**9) == (Decimal ('1'), 'billion') assert approx (10**12) == (Decimal ('1000'), 'billion') +def test_blend (): + assert blend (0.5, 0, 1) == 0.5 + assert blend (0.5, 0, 2) == 1 + + assert blend (0.1, 0, 1) == 0.1 + assert blend (0.9, 0, 1) == 0.9 + + assert blendn (0.5, (0, ), (1, )) == [0.5] + assert blendn (0.5, (0, ), (0.7, ), (1, )) == [0.7] -- cgit v1.2.3