From 862d0462e56f1472b0bc9119286618b2127427d4 Mon Sep 17 00:00:00 2001
From: Lars-Dominik Braun <lars@6xq.net>
Date: Sat, 16 May 2020 15:16:01 +0200
Subject: report: Add simple sentence effort visualization

Trying to give the reader a sense for how difficult carpalx thinks
certain parts of a sentence are. Conclusions are only valid for a single
layout and comparison between them is not possible.
---
 lulua/data/report/index.html | 17 ++++++++++++++
 lulua/data/report/style.css  |  5 ++++
 lulua/report.py              | 18 +++++++++++++-
 lulua/stats.py               | 56 ++++++++++++++++++++++++++++++++++++++++++++
 lulua/test_report.py         | 11 ++++++++-
 5 files changed, 105 insertions(+), 2 deletions(-)
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html
index cda1c9d..749c5e9 100644
--- a/lulua/data/report/index.html
+++ b/lulua/data/report/index.html
@@ -333,6 +333,23 @@
 	{% endif %}
 {% endfor %}
 </div>
+<div class="sentencestats">
+<p lang="en">Examples:</p>
+<ul lang="ar">
+{% for sentence in stats.sentences %}
+	<li>
+	{% for match, weight in sentence[0] -%}
+		{%- if weight is none -%}
+			<span>{{ match }}</span>
+		{%- else -%}
+			{%- set c = weight|blendn((38, 139, 210), (108, 113, 196), (211, 54, 130), (220, 50, 47)) -%}
+			<span style="color: rgb({{ c[0] }}, {{ c[1] }}, {{ c[2] }});" title="{{ '%5.5f'|format(weight) }}">{{ match }}</span>
+		{%- endif -%}
+	{%- endfor %}
+	</li>
+{% endfor %}
+</ul>
+</div>
 {% endmacro %}
 
 <figure id="ar-lulua-heat">
diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css
index e734a0e..da2787f 100644
--- a/lulua/data/report/style.css
+++ b/lulua/data/report/style.css
@@ -157,6 +157,11 @@ div.fingerhandstats .fingers .index {
 div.fingerhandstats .fingers .thumb {
 	border: 0.1em solid var(--finger-thumb);
 }
+div.sentencestats ul {
+	list-style-type: none;
+	margin: 1em 0;
+	padding: 0;
+}
 
 .table-overflow {
 	overflow-x: auto;
diff --git a/lulua/report.py b/lulua/report.py
index 06bb724..8a50681 100644
--- a/lulua/report.py
+++ b/lulua/report.py
@@ -18,7 +18,7 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 
-import sys, argparse, logging, pickle
+import sys, argparse, logging, pickle, math
 from gettext import GNUTranslations, NullTranslations
 from decimal import Decimal
 
@@ -54,6 +54,21 @@ def arabnum (s):
     m = {'0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤', '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩', ',': '٬', '.': '٫'}
     return ''.join (map (lambda x: m.get (x, x), s))
 
+def clamp (v, lower, upper):
+    return max (min (v, upper), lower)
+
+def blend (v, a, b):
+    v = clamp (v, 0, 1)
+    return (b-a)*v+a
+
+def blendn (v, *l):
+    assert 0 <= v <= 1
+    n = len (l)
+    step = 1/(n-1)
+    i = min (int (math.floor (v/step)), n-2)
+    stretchedv = (v-i*step)/step
+    return [blend (stretchedv, x, y) for x, y in zip (l[i], l[i+1])]
+
 def render ():
     parser = argparse.ArgumentParser(description='Create lulua report.')
     parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files')
@@ -67,6 +82,7 @@ def render ():
     env.filters['approx'] = approx
     env.filters['numspace'] = numspace
     env.filters['arabnum'] = arabnum
+    env.filters['blendn'] = blendn
 
     corpus = []
     for x in args.corpus:
diff --git a/lulua/stats.py b/lulua/stats.py
index 550f85a..9c11d10 100644
--- a/lulua/stats.py
+++ b/lulua/stats.py
@@ -22,6 +22,7 @@ import sys, operator, pickle, argparse, logging, yaml, math, time
 from operator import itemgetter
 from itertools import chain, groupby, product
 from collections import defaultdict
+from io import StringIO
 
 from .layout import *
 from .keyboard import defaultKeyboards
@@ -314,7 +315,53 @@ def keyHeatmap (args):
         buttons[k.name] = v
     yaml.dump (data, sys.stdout)
 
+def sentenceStats (keyboard, layout, text):
+    """
+    Calculate effort for every character (button) in a text
+    """
+
+    writer = Writer (layout)
+   
+    effort = Carpalx (models['mod01'], writer)
+    _ignored = frozenset (keyboard[x] for x in ('Fl_space', 'Fr_space', 'CD_ret', 'Cl_tab'))
+    writtenText = []
+    skipped = 0
+    for match, event in writer.type (StringIO (text)):
+        if isinstance (event, SkipEvent):
+            skipped += 1
+            writtenText.append ([event.char, None, 0])
+        if not isinstance (event, ButtonCombination):
+            continue
+
+        writtenText.append ([match, event, 0])
+
+        triad = list (filter (lambda x: x[1] is not None and first (x[1].buttons) not in _ignored, writtenText))[-3:]
+        if len (triad) == 3:
+            matchTriad, buttonTriad, _ = zip (*triad)
+            triadEffort = effort._triadEffort (tuple (buttonTriad))
+
+            # now walk the existing text backwards to find the original matches and add the computed effort
+            writtenTextIt = iter (reversed (writtenText))
+            matchTriad = list (matchTriad)
+            while matchTriad:
+                t = next (writtenTextIt)
+                if t[0] == matchTriad[-1]:
+                    matchTriad.pop ()
+                    t[2] += triadEffort
+
+            effort.addTriad (buttonTriad, 1)
+
+    # normalize efforts to [0, 1]
+    s = max (map (lambda x: x[2], writtenText))
+    writtenText = list (map (lambda x: (x[0], x[2]/s if x[1] is not None else None), writtenText))
+    return (writtenText, effort.effort, skipped)
+
+from .text import mapChars, charMap
+
 def layoutstats (args):
+    """
+    Statistics for the report
+    """
     stats = pickle.load (sys.stdin.buffer)
 
     keyboard = defaultKeyboards[args.keyboard]
@@ -330,12 +377,20 @@ def layoutstats (args):
         fingers[(hand, finger)] += count
 
     asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses
+
+    sentences = [
+        'أَوْ كَصَيِّبٍ مِّنَ السَّمَاءِ فِيهِ ظُلُمَاتٌ وَرَعْدٌ وَبَرْقٌ يَجْعَلُونَ أَصَابِعَهُمْ فِي آذَانِهِم مِّنَ الصَّوَاعِقِ حَذَرَ الْمَوْتِ وَاللَّهُ مُحِيطٌ بِالْكَافِرِينَ',
+        'اللغة العربية هي أكثرُ اللغاتِ السامية تحدثاً، وإحدى أكثر اللغات انتشاراً في العالم، يتحدثُها أكثرُ من 467 مليون نسمة.',
+        ]
+    sentences = [sentenceStats (keyboard, layout, mapChars (s, charMap).replace ('\r\n', '\n')) for s in sentences]
+
     pickle.dump (dict (
             layout=args.layout,
             hands=dict (hands),
             fingers=dict (fingers),
             buttonPresses=buttonPresses,
             asymmetry=asymmetry,
+            sentences=sentences,
             ), sys.stdout.buffer)
 
 def latinImeDict (args):
@@ -375,6 +430,7 @@ def corpusStats (args):
     # make document concatable
     print ('---')
 
+
 def main ():
     parser = argparse.ArgumentParser(description='Process statistics files.')
     parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name')
diff --git a/lulua/test_report.py b/lulua/test_report.py
index 16a751a..448d796 100644
--- a/lulua/test_report.py
+++ b/lulua/test_report.py
@@ -20,7 +20,7 @@
 
 from decimal import Decimal
 
-from .report import approx
+from .report import approx, blend, blendn
 
 def test_approx ():
     assert approx (0) == (Decimal ('0'), '')
@@ -37,4 +37,13 @@ def test_approx ():
     assert approx (10**9) == (Decimal ('1'), 'billion')
     assert approx (10**12) == (Decimal ('1000'), 'billion')
 
+def test_blend ():
+    assert blend (0.5, 0, 1) == 0.5
+    assert blend (0.5, 0, 2) == 1
+
+    assert blend (0.1, 0, 1) == 0.1
+    assert blend (0.9, 0, 1) == 0.9
+
+    assert blendn (0.5, (0, ), (1, )) == [0.5]
+    assert blendn (0.5, (0, ), (0.7, ), (1, )) == [0.7]
 
-- 
cgit v1.2.3