summaryrefslogtreecommitdiff
path: root/lulua
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2020-05-16 15:16:01 +0200
committerLars-Dominik Braun <lars@6xq.net>2020-05-16 15:19:53 +0200
commit862d0462e56f1472b0bc9119286618b2127427d4 (patch)
tree1c23251f0e666dbae28ae4c3b42888533e8b7337 /lulua
parent244197b5f8b1f4d73d4ab9ac838334860b55662c (diff)
downloadlulua-862d0462e56f1472b0bc9119286618b2127427d4.tar.gz
lulua-862d0462e56f1472b0bc9119286618b2127427d4.tar.bz2
lulua-862d0462e56f1472b0bc9119286618b2127427d4.zip
report: Add simple sentence effort visualization
Trying to give the reader a sense for how difficult carpalx thinks certain parts of a sentence are. Conclusions are only valid for a single layout and comparison between them is not possible.
Diffstat (limited to 'lulua')
-rw-r--r--lulua/data/report/index.html17
-rw-r--r--lulua/data/report/style.css5
-rw-r--r--lulua/report.py18
-rw-r--r--lulua/stats.py56
-rw-r--r--lulua/test_report.py11
5 files changed, 105 insertions, 2 deletions
diff --git a/lulua/data/report/index.html b/lulua/data/report/index.html
index cda1c9d..749c5e9 100644
--- a/lulua/data/report/index.html
+++ b/lulua/data/report/index.html
@@ -333,6 +333,23 @@
{% endif %}
{% endfor %}
</div>
+<div class="sentencestats">
+<p lang="en">Examples:</p>
+<ul lang="ar">
+{% for sentence in stats.sentences %}
+ <li>
+ {% for match, weight in sentence[0] -%}
+ {%- if weight is none -%}
+ <span>{{ match }}</span>
+ {%- else -%}
+ {%- set c = weight|blendn((38, 139, 210), (108, 113, 196), (211, 54, 130), (220, 50, 47)) -%}
+ <span style="color: rgb({{ c[0] }}, {{ c[1] }}, {{ c[2] }});" title="{{ '%5.5f'|format(weight) }}">{{ match }}</span>
+ {%- endif -%}
+ {%- endfor %}
+ </li>
+{% endfor %}
+</ul>
+</div>
{% endmacro %}
<figure id="ar-lulua-heat">
diff --git a/lulua/data/report/style.css b/lulua/data/report/style.css
index e734a0e..da2787f 100644
--- a/lulua/data/report/style.css
+++ b/lulua/data/report/style.css
@@ -157,6 +157,11 @@ div.fingerhandstats .fingers .index {
div.fingerhandstats .fingers .thumb {
border: 0.1em solid var(--finger-thumb);
}
+div.sentencestats ul {
+ list-style-type: none;
+ margin: 1em 0;
+ padding: 0;
+}
.table-overflow {
overflow-x: auto;
diff --git a/lulua/report.py b/lulua/report.py
index 06bb724..8a50681 100644
--- a/lulua/report.py
+++ b/lulua/report.py
@@ -18,7 +18,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-import sys, argparse, logging, pickle
+import sys, argparse, logging, pickle, math
from gettext import GNUTranslations, NullTranslations
from decimal import Decimal
@@ -54,6 +54,21 @@ def arabnum (s):
m = {'0': '٠', '1': '١', '2': '٢', '3': '٣', '4': '٤', '5': '٥', '6': '٦', '7': '٧', '8': '٨', '9': '٩', ',': '٬', '.': '٫'}
return ''.join (map (lambda x: m.get (x, x), s))
+def clamp (v, lower, upper):
+ return max (min (v, upper), lower)
+
+def blend (v, a, b):
+ v = clamp (v, 0, 1)
+ return (b-a)*v+a
+
+def blendn (v, *l):
+ assert 0 <= v <= 1
+ n = len (l)
+ step = 1/(n-1)
+ i = min (int (math.floor (v/step)), n-2)
+ stretchedv = (v-i*step)/step
+ return [blend (stretchedv, x, y) for x, y in zip (l[i], l[i+1])]
+
def render ():
parser = argparse.ArgumentParser(description='Create lulua report.')
parser.add_argument('-c', '--corpus', nargs='+', metavar='FILE', help='Corpus metadata files')
@@ -67,6 +82,7 @@ def render ():
env.filters['approx'] = approx
env.filters['numspace'] = numspace
env.filters['arabnum'] = arabnum
+ env.filters['blendn'] = blendn
corpus = []
for x in args.corpus:
diff --git a/lulua/stats.py b/lulua/stats.py
index 550f85a..9c11d10 100644
--- a/lulua/stats.py
+++ b/lulua/stats.py
@@ -22,6 +22,7 @@ import sys, operator, pickle, argparse, logging, yaml, math, time
from operator import itemgetter
from itertools import chain, groupby, product
from collections import defaultdict
+from io import StringIO
from .layout import *
from .keyboard import defaultKeyboards
@@ -314,7 +315,53 @@ def keyHeatmap (args):
buttons[k.name] = v
yaml.dump (data, sys.stdout)
+def sentenceStats (keyboard, layout, text):
+ """
+ Calculate effort for every character (button) in a text
+ """
+
+ writer = Writer (layout)
+
+ effort = Carpalx (models['mod01'], writer)
+ _ignored = frozenset (keyboard[x] for x in ('Fl_space', 'Fr_space', 'CD_ret', 'Cl_tab'))
+ writtenText = []
+ skipped = 0
+ for match, event in writer.type (StringIO (text)):
+ if isinstance (event, SkipEvent):
+ skipped += 1
+ writtenText.append ([event.char, None, 0])
+ if not isinstance (event, ButtonCombination):
+ continue
+
+ writtenText.append ([match, event, 0])
+
+ triad = list (filter (lambda x: x[1] is not None and first (x[1].buttons) not in _ignored, writtenText))[-3:]
+ if len (triad) == 3:
+ matchTriad, buttonTriad, _ = zip (*triad)
+ triadEffort = effort._triadEffort (tuple (buttonTriad))
+
+ # now walk the existing text backwards to find the original matches and add the computed effort
+ writtenTextIt = iter (reversed (writtenText))
+ matchTriad = list (matchTriad)
+ while matchTriad:
+ t = next (writtenTextIt)
+ if t[0] == matchTriad[-1]:
+ matchTriad.pop ()
+ t[2] += triadEffort
+
+ effort.addTriad (buttonTriad, 1)
+
+ # normalize efforts to [0, 1]
+ s = max (map (lambda x: x[2], writtenText))
+ writtenText = list (map (lambda x: (x[0], x[2]/s if x[1] is not None else None), writtenText))
+ return (writtenText, effort.effort, skipped)
+
+from .text import mapChars, charMap
+
def layoutstats (args):
+ """
+ Statistics for the report
+ """
stats = pickle.load (sys.stdin.buffer)
keyboard = defaultKeyboards[args.keyboard]
@@ -330,12 +377,20 @@ def layoutstats (args):
fingers[(hand, finger)] += count
asymmetry = hands[LEFT]/buttonPresses - hands[RIGHT]/buttonPresses
+
+ sentences = [
+ 'أَوْ كَصَيِّبٍ مِّنَ السَّمَاءِ فِيهِ ظُلُمَاتٌ وَرَعْدٌ وَبَرْقٌ يَجْعَلُونَ أَصَابِعَهُمْ فِي آذَانِهِم مِّنَ الصَّوَاعِقِ حَذَرَ الْمَوْتِ وَاللَّهُ مُحِيطٌ بِالْكَافِرِينَ',
+ 'اللغة العربية هي أكثرُ اللغاتِ السامية تحدثاً، وإحدى أكثر اللغات انتشاراً في العالم، يتحدثُها أكثرُ من 467 مليون نسمة.',
+ ]
+ sentences = [sentenceStats (keyboard, layout, mapChars (s, charMap).replace ('\r\n', '\n')) for s in sentences]
+
pickle.dump (dict (
layout=args.layout,
hands=dict (hands),
fingers=dict (fingers),
buttonPresses=buttonPresses,
asymmetry=asymmetry,
+ sentences=sentences,
), sys.stdout.buffer)
def latinImeDict (args):
@@ -375,6 +430,7 @@ def corpusStats (args):
# make document concatable
print ('---')
+
def main ():
parser = argparse.ArgumentParser(description='Process statistics files.')
parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name')
diff --git a/lulua/test_report.py b/lulua/test_report.py
index 16a751a..448d796 100644
--- a/lulua/test_report.py
+++ b/lulua/test_report.py
@@ -20,7 +20,7 @@
from decimal import Decimal
-from .report import approx
+from .report import approx, blend, blendn
def test_approx ():
assert approx (0) == (Decimal ('0'), '')
@@ -37,4 +37,13 @@ def test_approx ():
assert approx (10**9) == (Decimal ('1'), 'billion')
assert approx (10**12) == (Decimal ('1000'), 'billion')
+def test_blend ():
+ assert blend (0.5, 0, 1) == 0.5
+ assert blend (0.5, 0, 2) == 1
+
+ assert blend (0.1, 0, 1) == 0.1
+ assert blend (0.9, 0, 1) == 0.9
+
+ assert blendn (0.5, (0, ), (1, )) == [0.5]
+ assert blendn (0.5, (0, ), (0.7, ), (1, )) == [0.7]