diff options
Diffstat (limited to 'lulua')
| -rw-r--r-- | lulua/__init__.py | 0 | ||||
| -rw-r--r-- | lulua/carpalx.py | 325 | ||||
| -rw-r--r-- | lulua/data/keyboards/ibmpc105.yaml | 132 | ||||
| -rw-r--r-- | lulua/data/layouts/ar-asmo663.yaml | 113 | ||||
| -rw-r--r-- | lulua/data/layouts/ar-linux.yaml | 119 | ||||
| -rw-r--r-- | lulua/data/layouts/ar-lulua.yaml | 37 | ||||
| -rw-r--r-- | lulua/data/layouts/ar-malas.yaml | 123 | ||||
| -rw-r--r-- | lulua/data/layouts/ar-osman.yaml | 121 | ||||
| -rw-r--r-- | lulua/data/layouts/ar-phonetic.yaml | 147 | ||||
| -rw-r--r-- | lulua/data/layouts/null.yaml | 3 | ||||
| -rw-r--r-- | lulua/keyboard.py | 221 | ||||
| -rw-r--r-- | lulua/layout.py | 351 | ||||
| -rw-r--r-- | lulua/optimize.py | 341 | ||||
| -rw-r--r-- | lulua/plot.py | 146 | ||||
| -rw-r--r-- | lulua/render.py | 353 | ||||
| -rw-r--r-- | lulua/stats.py | 222 | ||||
| -rw-r--r-- | lulua/test_carpalx.py | 201 | ||||
| -rw-r--r-- | lulua/test_keyboard.py | 59 | ||||
| -rw-r--r-- | lulua/test_layout.py | 75 | ||||
| -rw-r--r-- | lulua/test_optimize.py | 39 | ||||
| -rw-r--r-- | lulua/test_stats.py | 39 | ||||
| -rw-r--r-- | lulua/test_writer.py | 118 | ||||
| -rw-r--r-- | lulua/text.py | 260 | ||||
| -rw-r--r-- | lulua/util.py | 67 | ||||
| -rw-r--r-- | lulua/writer.py | 202 | 
25 files changed, 3814 insertions, 0 deletions
| diff --git a/lulua/__init__.py b/lulua/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/lulua/__init__.py diff --git a/lulua/carpalx.py b/lulua/carpalx.py new file mode 100644 index 0000000..3e104bb --- /dev/null +++ b/lulua/carpalx.py @@ -0,0 +1,325 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +""" +Compute typing effort for triads according to +http://mkweb.bcgsc.ca/carpalx/?typing_effort + +Extended by support for multiple layers/multiple key presses based on +suggestion by Martin Krzywinski. b_ix and p_ix with x in {1, 2, 3} are now a +sum of all key’s effort/penalty plus a multi-key penalty weighted by model +parameter k_s. Additionally the stroke path is evaluated for all triple +combinations (see code of _triadEffort). + +Optimized for pypy, not cpython +""" + +from collections import defaultdict, namedtuple +from itertools import chain, product +from typing import List, Tuple, Callable, Mapping, Dict + +from .layout import LEFT, RIGHT, THUMB, INDEX, MIDDLE, RING, LITTLE, ButtonCombination +from .writer import Writer +from .util import first +from .keyboard import Button + +ModelParams = namedtuple ('ModelParams', ['kBPS', 'k123S', +        'w0HRF', 'pHand', 'pRow', 'pFinger', 'fHRF', 'baselineEffort']) + +# model parameters mod_01 from http://mkweb.bcgsc.ca/carpalx/?model_parameters +model01 = ModelParams ( +    # k_b, k_p, k_s +    kBPS = (0.3555, 0.6423, 0.4268), +    # k_1, k_2, k_3 plus extension k_S (weight for simultaneous key presses) +    k123S = (1.0, 0.367, 0.235, 1.0), +    # w0, wHand, wRow, wFinger +    w0HRF = (0.0, 1.0, 1.3088, 2.5948), +    pHand = {LEFT: 0.0, RIGHT: 0.0}, +    # numbers, top, base, bottom, control (XXX not part of original model) +    pRow = (1.5, 0.5, 0.0, 1.0, 1.5), +    # symmetric penalties +    pFinger = { +        LEFT: { +            THUMB: 0.0, # XXX: not part of the original model +            INDEX: 0.0, +            MIDDLE: 0.0, +            RING: 0.5, +            LITTLE: 1.0, +            }, +        RIGHT: { +            THUMB: 0.0, # XXX: not part of the original model +            INDEX: 0.0, +            MIDDLE: 0.0, +            RING: 0.5, +            LITTLE: 1.0, +            }, +        }, +    # fHand, fRow, fFinger +    fHRF = (1.0, 0.3, 0.3), +    # baseline key effort +    baselineEffort = { +        'Bl1': 5.0, +        'Bl2': 5.0, +        'Bl3': 4.0, +        'Bl4': 4.0, +        'Bl5': 4.0, +        'Bl6': 3.5, +        'Bl7': 4.5, +        'Br6': 4.0, +        'Br5': 4.0, +        'Br4': 4.0, +        'Br3': 4.0, +        'Br2': 4.0, +        'Br1': 4.5, + +        'Cl1': 2.0, +        'Cl2': 2.0, +        'Cl3': 2.0, +        'Cl4': 2.0, +        'Cl5': 2.5, +        'Cr7': 3.0, +        'Cr6': 2.0, +        'Cr5': 2.0, +        'Cr4': 2.0, +        'Cr3': 2.5, +        'Cr2': 4.0, +        'Cr1': 6.0, + +        'Dl_caps': 2.0, # XXX: dito +        'Dl1': 0.0, +        'Dl2': 0.0, +        'Dl3': 0.0, +        'Dl4': 0.0, +        'Dl5': 2.0, +        'Dr7': 2.0, +        'Dr6': 0.0, +        'Dr5': 0.0, +        'Dr4': 0.0, +        'Dr3': 0.0, +        'Dr2': 2.0, +        'Dr1': 4.0, # XXX: not in the original model + +        'El_shift': 4.0,  # XXX: dito +        'El1': 4.0, # XXX: dito +        'El2': 2.0, +        'El3': 2.0, +        'El4': 2.0, +        'El5': 2.0, +        'El6': 3.5, +        'Er5': 2.0, +        'Er4': 2.0, +        'Er3': 2.0, +        'Er2': 2.0, +        'Er1': 2.0, +        'Er_shift': 4.0, # XXX: dito + +        'Fr_altgr': 4.0, # XXX: dito +        }, +    ) + +def madd (a, b): +    """ Given indexables a and b, computes a[0]*b[0]+a[1]*b[1]+… """ +    s = 0 +    for i in range (len (a)): +        s += a[i] * b[i] +    return s + +class Carpalx: +    __slots__ = ('absEffort', 'N', 'params', '_cache', 'writer') + +    def __init__ (self, params: ModelParams, writer: Writer): +        self.params = params +        self.writer = writer +        # reset should not reset the cache +        self._cache : Dict[Tuple[ButtonCombination], float] = dict () +        self.reset () + +        # some runtime tests +        keyboard = writer.layout.keyboard +        assert keyboard.getRow (keyboard['Bl1']) == 0 +        assert keyboard.getRow (keyboard['Cl1']) == 1 +        assert keyboard.getRow (keyboard['Dl1']) == 2 +        assert keyboard.getRow (keyboard['El1']) == 3 + +    def addTriad (self, triad : Tuple[ButtonCombination], n: float): +        self.absEffort += n*self._triadEffort (triad) +        self.N += n + +    def removeTriad (self, triad: Tuple[ButtonCombination], n: float): +        self.absEffort -= n*self._triadEffort (triad) +        self.N -= n + +    def addTriads (self, triads: Mapping[Tuple[ButtonCombination], float]) -> None: +        for t, n in triads.items (): +            self.addTriad (t, n) + +    def reset (self) -> None: +        self.absEffort = 0.0 +        self.N = 0.0 + +    def copy (self): +        """ Create a copy of this instance, sharing the cache """ +        c = Carpalx (self.params, self.writer) +        c._cache = self._cache +        c.absEffort = self.absEffort +        c.N = self.N +        return c + +    @property +    def effort (self) -> float: +        if self.N == 0: +            return 0 +        else: +            return self.absEffort/self.N + +    @staticmethod +    def _strokePathHand (hands) -> int: +        same = hands[0] == hands[1] and hands[1] == hands[2] +        alternating = hands[0] == hands[2] and hands[0] != hands[1] +        if alternating: +            return 1 +        elif same: +            return 2 +        else: +            # both hands, but not alternating +            return 0 + +    @staticmethod +    def _strokePathRow (rows: List[int]) -> int: +        # d will be positive for upward row changes and negative for downward +        d = (rows[0]-rows[1], rows[1]-rows[2], rows[0]-rows[2]) +        #print ('rows', t, rows, d) +        if d[0] == 0 and d[1] == 0: +            # same row +            return 0 +        elif (rows[0] == rows[1] and rows[2] > rows[1]) or (rows[1] > rows[0] and rows[1] == rows[2]): +            # downward progression, with repetition +            return 1 +        elif (rows[0] == rows[1] and rows[2] < rows[1]) or (rows[1] < rows[0] and rows[1] == rows[2]): +            # upward progression, with repetition +            return 2 +        elif max (map (abs, d)) <= 1: +            # some different, not monotonic, max row change 1 +            return 3 +        elif d[0] < 0 and d[1] < 0: +            # downward progression +            return 4 +        elif d[0] > 0 and d[1] > 0: +            # upward progression +            # needs to be before 5 +            return 6 +        elif min (d[0], d[1]) < -1: +            # some different, not monotonic, max row change downward >1 +            return 5 +        elif max (d[0], d[1]) > 1: +            # some different, not monotonic, max row change upward >1 +            return 7 +        else: +            assert False, (rows, d) + +    @staticmethod +    def _strokePathFinger (fingers, t) -> int: +        fingers = [int (f[1]) if f[0] == LEFT else 6+(5-f[1]) for f in fingers] +        same = fingers[0] == fingers[1] == fingers[2] +        allDifferent = fingers[0] != fingers[1] and fingers[1] != fingers[2] and fingers[0] != fingers[2] +        someDifferent = not same and not allDifferent +        if same: +            keyRepeat = t[0] == t[1] or t[1] == t[2] or t[0] == t[2] +            if keyRepeat: +                return 5 +            else: # not keyRepeat +                return 7 +        elif fingers[0] > fingers[2] > fingers[1] or fingers[0] < fingers[2] < fingers[1]: +            # rolling +            return 2 +        elif allDifferent: +            monotonic = fingers[0] <= fingers[1] <= fingers[2] or fingers[0] >= fingers[1] >= fingers[2] +            if monotonic: +                return 0 +            else: +                return 3 +        elif someDifferent: +            monotonic = fingers[0] <= fingers[1] <= fingers[2] or fingers[0] >= fingers[1] >= fingers[2] +            if monotonic: +                keyRepeat = t[0] == t[1] or t[1] == t[2] or t[0] == t[2] +                if keyRepeat: +                    return 1 +                else: +                    return 6 +            else: +                return 4 +        else: +            assert False + +    def _strokePath (self, t: Tuple[Button, Button, Button]) -> Tuple[int, int, int]: +        """ Compute stroke path s for triad t """ +        fingers = [self.writer.getHandFinger (x) for x in t] +        hands = [f[0] for f in fingers] +        keyboard = self.writer.layout.keyboard +        rows = [keyboard.getRow (key) for key in t] + +        return self._strokePathHand (hands), self._strokePathRow (rows), self._strokePathFinger (fingers, t) + +    def _penalty (self, key): +        hand, finger = self.writer.getHandFinger (key) +        keyboard = self.writer.layout.keyboard +        row = keyboard.getRow (key) +        params = self.params +        return madd (self.params.w0HRF, (1, params.pHand[hand], params.pRow[row], params.pFinger[hand][finger])) + +    def _baseEffort (self, triad: Tuple[ButtonCombination], f: Callable[[Button], float]) -> float: +        """ +        Compute b_i or p_i, depending on function f +        """ + +        k1, k2, k3, kS = self.params.k123S +        b = [] +        for comb in triad: +            perButton = [f (btn) for btn in comb] +            numKeys = len (perButton) +            # extra effort for hitting multiple buttons, no extra effort for +            # just one button +            simultaneousPenalty = (numKeys-1)*kS +            b.append (sum (perButton) + simultaneousPenalty) +        return k1 * b[0] * (1 + k2 * b[1] * (1 + k3 * b[2])) + +    def _triadEffort (self, triad: Tuple[ButtonCombination]) -> float: +        """ Compute effort for a single triad t, e_i """ +        ret = self._cache.get (triad) +        if ret is not None: +            return ret +        #t = [first (x.buttons) for x in triad] +        params = self.params +        bmap = params.baselineEffort + +        b = self._baseEffort (triad, lambda x: bmap[x.name]) +        p = self._baseEffort (triad, self._penalty) + +        # calculate stroke path for all possible triad combinations, i.e. +        # (Mod1-a, b, c) -> (Mod1, b, c), (a, b, c) and use the smallest +        # value. Suggested by Martin Krzywinski XXX: why? +        s = [madd (params.fHRF, self._strokePath (singleBtnTriad)) \ +                for singleBtnTriad in product (*map (iter, triad))] +        s = min (s) + +        ret = madd (params.kBPS, (b, p, s)) +        self._cache[triad] = ret +        return ret + diff --git a/lulua/data/keyboards/ibmpc105.yaml b/lulua/data/keyboards/ibmpc105.yaml new file mode 100644 index 0000000..d9dcb76 --- /dev/null +++ b/lulua/data/keyboards/ibmpc105.yaml @@ -0,0 +1,132 @@ +name: ibmpc105 +description: Standard IBM PC 105 key layout (European) +rows: +- - - kind: letter +      name: Bl1 +    - kind: letter +      name: Bl2 +    - kind: letter +      name: Bl3 +    - kind: letter +      name: Bl4 +    - kind: letter +      name: Bl5 +    - kind: letter +      name: Bl6 +    - kind: letter +      name: Bl7 +  - - kind: letter +      name: Br6 +    - kind: letter +      name: Br5 +    - kind: letter +      name: Br4 +    - kind: letter +      name: Br3 +    - kind: letter +      name: Br2 +    - kind: letter +      name: Br1 +    - name: Br_bs +      width: 1.75 +- - - name: Cl_tab +      width: 1.75 +    - kind: letter +      name: Cl1 +    - kind: letter +      name: Cl2 +    - kind: letter +      name: Cl3 +    - kind: letter +      name: Cl4 +    - kind: letter +      name: Cl5 +  - - kind: letter +      name: Cr7 +    - kind: letter +      name: Cr6 +    - kind: letter +      name: Cr5 +    - kind: letter +      name: Cr4 +    - kind: letter +      name: Cr3 +    - kind: letter +      name: Cr2 +    - kind: letter +      name: Cr1 +    - kind: multi +      name: CD_ret +      span: 2 +- - - name: Dl_caps +      width: 2 +    - kind: letter +      name: Dl1 +    - kind: letter +      name: Dl2 +    - kind: letter +      name: Dl3 +    - kind: letter +      isMarked: true +      name: Dl4 +    - kind: letter +      name: Dl5 +  - - kind: letter +      name: Dr7 +    - kind: letter +      isMarked: true +      name: Dr6 +    - kind: letter +      name: Dr5 +    - kind: letter +      name: Dr4 +    - kind: letter +      name: Dr3 +    - kind: letter +      name: Dr2 +    - kind: letter +      name: Dr1 +- - - name: El_shift +      width: 1.5 +    - kind: letter +      name: El1 +    - kind: letter +      name: El2 +    - kind: letter +      name: El3 +    - kind: letter +      name: El4 +    - kind: letter +      name: El5 +    - kind: letter +      name: El6 +  - - kind: letter +      name: Er5 +    - kind: letter +      name: Er4 +    - kind: letter +      name: Er3 +    - kind: letter +      name: Er2 +    - kind: letter +      name: Er1 +    - name: Er_shift +      width: 2.35 +- - - name: Fl_ctrl +      width: 1.75 +    - name: Fl_win +      width: 1.25 +    - name: Fl_alt +      width: 1.25 +    - name: Fl_space +      width: 3 +  - - name: Fr_space +      width: 3 +    - name: Fr_altgr +      width: 1.25 +    - name: Fr_win +      width: 1.25 +    - name: Fr_menu +      width: 1.25 +    - name: Fr_ctrl +      width: 1.25 diff --git a/lulua/data/layouts/ar-asmo663.yaml b/lulua/data/layouts/ar-asmo663.yaml new file mode 100644 index 0000000..dcb2dfc --- /dev/null +++ b/lulua/data/layouts/ar-asmo663.yaml @@ -0,0 +1,113 @@ +name: ar-asmo663 +layout: +- layer: +    #Bl1: "ذ" # unknown symbol +    Bl2: "1" +    Bl3: "2" +    Bl4: "3" +    Bl5: "4" +    Bl6: "5" +    Bl7: "6" +    Br6: "7" +    Br5: "8" +    Br4: "9" +    Br3: "0" +    Br2: "-" +    Br1: "^" + +    Cl1: "ض" +    Cl2: "ص" +    Cl3: "ث" +    Cl4: "ق" +    Cl5: "ف" +    Cr7: "غ" +    Cr6: "ع" +    Cr5: "ه" +    Cr4: "خ" +    Cr3: "ح" +    Cr2: "ج" +    Cr1: "–" # not sure + +    CD_ret: "\n" + +    Dl1: "ش" +    Dl2: "س" +    Dl3: "ي" +    Dl4: "ب" +    Dl5: "ل" +    Dr7: "ا" +    Dr6: "ت" +    Dr5: "ن" +    Dr4: "م" +    Dr3: "ك" +    Dr2: "\u064f" # damma +    Dr1: "ا\u0654" # composed: أ + +    El1: "ظ" +    El2: "ط" +    El3: "ذ" +    El4: "د" +    El5: "ز" +    El6: "ر" +    Er5: "\u064e" # fatha +    Er4: "و" +    Er3: "،" +    Er2: "." +    Er1: "\u0650" # kasra + +    Fl_space: " " +    Fr_space: " " +  modifier: +  - [] +- layer: +    Bl1: "@" +    Bl2: "!" +    Bl3: "\"" +    Bl4: "#" +    #Bl5: "" # unnown symbol +    Bl6: "%" +    Bl7: "&" +    Br6: "'" +    Br5: "(" +    Br4: ")" +    #Br3: "" +    Br2: "=" +    #Br1: "" # unknown symbol + +    Cl1: "{" +    Cl2: "[" +    Cl3: "\u064b" # fathatan +    Cl4: "\u0651" # shadda +    Cl5: "\u0652" # sukun +    Cr2: "]" +    Cr1: "}" + +    CD_ret: "\n" + +    Dl1: "\\" +    Dl2: "\u064c" # dammatan +    Dl3: "\u064a\u0654" # composed: ئ +    #Dl4: +    Dl5: "ا\u0655" # composed: إ +    Dr7: "ء" +    Dr6: "ة" +    Dr5: "ى" +    Dr4: "/" +    Dr3: "+" +    Dr2: "*" +    Dr1: "ا\u0653" # composed: آ + +    El1: "|" +    El2: "\u064d" # kasratan +    #El3: "" +    #El4: "" +    #El5: "" +    El6: "؛" +    Er5: ":" +    Er4: "\u0648\u0654" # composed: ؤ +    Er3: "<" +    Er2: ">" +    Er1: "؟" +  modifier: +  - [El_shift] +  - [Er_shift] diff --git a/lulua/data/layouts/ar-linux.yaml b/lulua/data/layouts/ar-linux.yaml new file mode 100644 index 0000000..7e9130c --- /dev/null +++ b/lulua/data/layouts/ar-linux.yaml @@ -0,0 +1,119 @@ +name: ar-linux +layout: +- layer: +    Bl1: "ذ" +    Bl2: "١" +    Bl3: "٢" +    Bl4: "٣" +    Bl5: "٤" +    Bl6: "٥" +    Bl7: "٦" +    Br6: "٧" +    Br5: "٨" +    Br4: "٩" +    Br3: "٠" +    Br2: "-" +    Br1: "=" + +    Cl_tab: "\t" +    Cl1: "ض" +    Cl2: "ص" +    Cl3: "ث" +    Cl4: "ق" +    Cl5: "ف" +    Cr7: "غ" +    Cr6: "ع" +    Cr5: "ه" +    Cr4: "خ" +    Cr3: "ح" +    Cr2: "ج" +    Cr1: "د" + +    CD_ret: "\n" + +    Dl1: "ش" +    Dl2: "س" +    Dl3: "ي" +    Dl4: "ب" +    Dl5: "ل" +    Dr7: "ا" +    Dr6: "ت" +    Dr5: "ن" +    Dr4: "م" +    Dr3: "ك" +    Dr2: "ط" +    Dr1: "\\" + +    El1: "|" +    El2: "\u064a\u0654" # composed: ئ +    El3: "ء" +    El4: "\u0648\u0654" # composed: ؤ +    El5: "ر" +    El6: "لا" # composed: ﻻ +    Er5: "ى" +    Er4: "ة" +    Er3: "و" +    Er2: "ز" +    Er1: "ظ" + +    Fl_space: " " +    Fr_space: " " +  modifier: +  - [] +- layer: +    Bl1: "\u0651" # shadda +    Bl2: "!" +    Bl3: "@" +    Bl4: "#" +    Bl5: "$" +    Bl6: "٪" +    Bl7: "^" +    Br6: "&" +    Br5: "*" +    Br4: ")" +    Br3: "(" +    Br2: "_" +    Br1: "+" + +    Cl1: "\u064e" # fatha +    Cl2: "\u064b" # fathatan +    Cl3: "\u064f" # damma +    Cl4: "\u064c" # dammatan +    Cl5: "لا\u0655" # composed: ﻹ +    Cr7: "ا\u0655" # composed: إ +    Cr6: "`" +    Cr5: "÷" +    Cr4: "×" +    Cr3: "؛" +    Cr2: "<" +    Cr1: ">" + +    CD_ret: "\n" + +    Dl1: "\u0650" # kasra +    Dl2: "\u064d" # kasratan +    Dl3: "]" +    Dl4: "[" +    Dl5: "لا\u0654" # composed: ﻷ +    Dr7: "ا\u0654" # composed: أ +    Dr6: "ـ" +    Dr5: "،" +    Dr4: "/" +    Dr3: ":" +    Dr2: '"' +    Dr1: "…" + +    El1: "¦" +    El2: "~" +    El3: "\u0652" # sukun +    El4: "}" +    El5: "{" +    El6: "لا\u0653" # composed: ﻵ +    Er5: "ا\u0653" # composed: آ +    Er4: "'" +    Er3: "," +    Er2: "." +    Er1: "؟" +  modifier: +  - [El_shift] +  - [Er_shift] diff --git a/lulua/data/layouts/ar-lulua.yaml b/lulua/data/layouts/ar-lulua.yaml new file mode 100644 index 0000000..ca43f9b --- /dev/null +++ b/lulua/data/layouts/ar-lulua.yaml @@ -0,0 +1,37 @@ +layout: +- layer: {CD_ret: '\n', Cl1: "\u062B", Cl2: "\u0637", Cl3: "\u0641", Cl4: "\u0629", Cl5: "\u0654", +    Cl_tab: "\t", Cr1: "\u0638", Cr2: "\u0621", Cr3: "\u0636", Cr4: "\u062D", Cr5: "\u0639", +    Cr6: "\u062F", Cr7: "\u0642", Dl1: "\u0628", Dl2: "\u0645", Dl3: "\u0627", Dl4: "\u0648", +    Dl5: "\u062A", Dr2: "\u0635", Dr3: "\u0633", Dr4: "\u0646", Dr5: "\u064A", Dr6: "\u0644", +    Dr7: "\u0631", El2: "\u0630", El3: "\u0649", El4: "\u062C", El5: "\u0634", El6: "\u0655", +    Er1: "\u063A", Er2: "\u062E", Er3: "\u0643", Er4: "\u0632", Er5: "\u0647", Fl_space: ' ', +    Fr_space: ' '} +  modifier: +  - [] +- layer: {Bl2: "\u203A", Bl7: $, Br4: "\u2039", Br6: '%', Cl2: +, Cl3: ']', Cl4: '!', +    Cl5: '*', Cr2: '&', Cr3: "\u2026", Cr4: '}', Cr5: "\u061F", Cr6: '[', Cr7: "\xAB", +    Dl1: "\u061B", Dl2: ':', Dl3: '"', Dl4: '-', Dl5: _, Dr2: '@', Dr3: /, Dr4: ), +    Dr5: "\u060C", Dr6: ., Dr7: (, El3: '~', El4: '>', El5: '=', El6: '{', Er2: ^, +    Er3: "\xBB", Er4: <, Er5: '#'} +  modifier: +  - [El_shift] +  - [Er_shift] +- layer: {Bl1: "\u06E6", Bl2: "\u06D8", Bl4: "\u06E4", Bl6: "\u06E8", Bl7: "\u06DB", +    Br1: "\u06E2", Br2: "\u06DF", Br3: "\u06DE", Br4: "\u061C", Br6: "\u2067", Cl2: "\u06D9", +    Cl3: "\u2066", Cl4: "\u0671", Cr1: "\u06E5", Cr2: "\u06DA", Cr4: "\u06DD", Cr5: "\u0652", +    Cr6: "\u064C", Dl1: "\u06DC", Dl2: "\u064D", Dl3: "\u064E", Dl4: "\u0640", Dl5: "\u0650", +    Dr2: "\u2069", Dr3: "\u06D7", Dr4: "\u064F", Dr5: "\u0651", Dr6: "\u064B", Dr7: "\u0653", +    El2: "\u06E7", El3: "\u06E0", El4: "\u066D", El5: "\u06E3", Er1: "\u06D6", Er3: "\u0670", +    Er4: "\u06E9", Er5: "\u06ED"} +  modifier: +  - [Dl_caps] +  - [Dr1] +- layer: {Cl1: "\u0663", Cl2: "\u0662", Cl3: "\u0661", Cl4: "\u0660", Cl5: "\u066A", +    Dl1: "\u0667", Dl2: "\u0666", Dl3: "\u0665", Dl4: "\u0664", Dl5: "\u2212", El2: "\u066C", +    El3: "\u066B", El4: "\u0669", El5: "\u0668", El6: "\u0609"} +  modifier: +  - [Fr_altgr] +  - [El1] +name: ar-lulua +version: 0.1 +date: 2019-09-15 diff --git a/lulua/data/layouts/ar-malas.yaml b/lulua/data/layouts/ar-malas.yaml new file mode 100644 index 0000000..c2d9ef8 --- /dev/null +++ b/lulua/data/layouts/ar-malas.yaml @@ -0,0 +1,123 @@ +name: ar-malas +layout: +- layer: +    Bl2: "1" +    Bl3: "2" +    Bl4: "3" +    Bl5: "4" +    Bl6: "5" +    Bl7: "6" +    Br6: "7" +    Br5: "8" +    Br4: "9" +    Br3: "0" +    Br2: "-" +    Br1: "=" +    #Br0: "\\" # extra key? + +    Cl_tab: "\t" +    Cl1: "ق" +    Cl2: "غ" +    Cl3: "ع" +    Cl4: "ي" +    Cl5: "ة" +    Cr7: "ف" +    Cr6: "ط" +    Cr5: "ر" +    Cr4: "ص" +    Cr3: "ب" +    Cr2: "ش" +    Cr1: "ض" + +    CD_ret: "\n" + +    Dl1: "ه" +    Dl2: "ج" +    Dl3: "ك" +    Dl4: "ا" +    Dl5: "و" +    Dr7: "ت" +    Dr6: "د" +    Dr5: "ل" +    Dr4: "ن" +    Dr3: "م" +    Dr2: "س" + +    El1: "\\" +    El2: "خ" +    El3: "ى" +    El4: "ا\u0655" # composed: إ +    El5: "ا\u0654" +    El6: "ح" +    Er5: "\u064a\u0654" +    Er4: "ز" +    Er3: "." +    Er2: "ث" +    Er1: "ذ" + +    Fl_space: " " +    Fr_space: " " + +  modifier: +  - [] +- layer: +    Bl2: "!" +    Bl3: "@" +    Bl4: "#" +    Bl5: "$" +    Bl6: "%" +    Bl7: "^" +    Br6: "&" +    Br5: "*" +    Br4: "(" +    Br3: ")" +    Br2: "_" +    Br1: "+" +    #Br0: "|" # extra key? + +    Cl_tab: "\t" +    Cl1: "\u064e" # fatha +    Cl2: "\u064b" # fathatan +    Cl3: "\u064f" # damma +    Cl4: "\u064c" # dammatan +    Cl5: "\u0651" # shadda +    #Cr7: "" +    Cr6: "ظ" +    Cr5: "÷" +    Cr4: "×" +    Cr3: "؛" +    Cr2: ">" +    Cr1: "<" + +    CD_ret: "\n" + +    Dl1: "\u0650" # kasra +    Dl2: "\u064d" # kasratan +    Dl3: "[" +    Dl4: "]" +    Dl5: "\u0648\u0654" # composed: ؤ +    #Dr7: "" +    Dr6: "ـ" +    Dr5: "،" +    Dr4: "/" +    Dr3: ":" +    Dr2: "\"" + +    El1: "|" +    El2: "~" +    El3: "\u0652" # sukun +    El4: "ا\u0653" # composed: آ +    El5: "ء" +    El6: "{" +    Er5: "}" +    Er4: "‘" +    Er3: "’" +    Er2: "," +    Er1: "؟" + +    Fl_space: " " +    Fr_space: " " + +  modifier: +  - [El_shift] +  - [Er_shift] diff --git a/lulua/data/layouts/ar-osman.yaml b/lulua/data/layouts/ar-osman.yaml new file mode 100644 index 0000000..bc0bb7a --- /dev/null +++ b/lulua/data/layouts/ar-osman.yaml @@ -0,0 +1,121 @@ +name: ar-osman +layout: +- layer: +    Bl1: "\u0648\u0654" # composed: ؤ +    Bl2: "1" +    Bl3: "2" +    Bl4: "3" +    Bl5: "4" +    Bl6: "5" +    Bl7: "6" +    Br6: "7" +    Br5: "8" +    Br4: "9" +    Br3: "0" +    Br2: "-" +    Br1: "=" + +    Cl1: "ظ" +    Cl2: "ض" +    Cl3: "ص" +    Cl4: "ق" +    Cl5: "ف" +    Cr7: "غ" +    Cr6: "ع" +    Cr5: "ه" +    Cr4: "ح" +    Cr3: "ج" +    Cr2: "خ" +    Cr1: "ء" +    #Cr0: "\\" + +    CD_ret: "\n" + +    Dl1: "ط" +    Dl2: "ث" +    Dl3: "ت" +    Dl4: "ب" +    Dl5: "ل" +    Dr7: "ا" +    Dr6: "ن" +    Dr5: "م" +    Dr4: "و" +    Dr3: "س" +    Dr2: "ش" +    #Dr1: "" + +    #El1: "" +    El2: "\u064a\u0654" # composed: ئ +    El3: "ذ" +    El4: "د" +    El5: "لا" +    El6: "ي" +    Er5: "ى" +    Er4: "ر" +    Er3: "ز" +    Er2: "ك" +    Er1: "ة" + +    Fl_space: " " +    Fr_space: " " +  modifier: +  - [] +- layer: +    #Bl1: "" +    Bl2: "!" +    Bl3: "@" +    Bl4: "#" +    Bl5: "$" +    Bl6: "%" +    Bl7: "^" +    Br6: "&" +    Br5: "*" +    Br4: "(" +    Br3: ")" +    Br2: "_" +    Br1: "+" + +    Cl1: "\u064e" # fatha +    Cl2: "\u064b" # fathatan +    Cl3: "\u064f" # damma +    Cl4: "\u064c" # dammatan +    Cl5: "لا\u0655" # composed: ﻹ +    Cr7: "ا\u0655" # composed: إ +    Cr6: "`" +    Cr5: "÷" +    Cr4: "×" +    Cr3: "؛" +    Cr2: ">" +    Cr1: "<" +    #Cr0: "|" + +    CD_ret: "\n" + +    Dl1: "\u0650" # kasra +    Dl2: "\u064d" # kasratan +    Dl3: "[" +    Dl4: "]" +    Dl5: "لا\u0654" # composed: ﻷ +    Dr7: "ا\u0654" # composed: أ +    Dr6: "ـ" +    Dr5: "،" +    Dr4: "/" +    Dr3: ":" +    Dr2: '"' +    #Dr1: "…" + +    #El1: "¦" +    El2: "~" +    El3: "\u0652" # sukun +    El4: "{" +    El5: "}" +    El6: "لا\u0653" # composed: ﻵ +    Er5: "ا\u0653" # composed: آ +    Er4: "'" +    Er3: "÷" +    Er2: "×" +    Er1: "؛" +  modifier: +  - [El_shift] +  - [Er_shift] + diff --git a/lulua/data/layouts/ar-phonetic.yaml b/lulua/data/layouts/ar-phonetic.yaml new file mode 100644 index 0000000..cb383b0 --- /dev/null +++ b/lulua/data/layouts/ar-phonetic.yaml @@ -0,0 +1,147 @@ +name: ar-phonetic +layout: +- layer: +    Bl2: '''' +    Bl3: "\u0662" +    Bl4: "\u0663" +    Bl5: "\u0664" +    Bl6: "\u0665" +    Bl7: "\u0666" +    Br1: '=' +    Br2: '-' +    Br3: "\u0660" +    Br4: "\u0669" +    Br5: "\u0668" +    Br6: "\u0667" +    Cl1: "\u0642" +    Cl2: "\u0648" +    Cl3: "\u0639" +    Cl4: "\u0631" +    Cl5: "\u062A" +    #Cr0: \ +    Cr1: ']' +    Cr2: '[' +    Cr3: "\u0671" +    Cr4: "\u064F" +    Cr5: "\u0650" +    Cr6: "\u064E" +    Cr7: "\u064A" +    Dl1: "\u0627" +    Dl2: "\u0633" +    Dl3: "\u062F" +    Dl4: "\u0641" +    Dl5: "\u0621" +    Dr2: '#' +    Dr3: "\u061B" +    Dr4: "\u0644" +    Dr5: "\u0643" +    Dr6: "\u0630" +    Dr7: "\u0647" +    El1: \ +    El2: "\u0632" +    El3: "\u062B" +    El4: "\u0635" +    El5: "\u0652" +    El6: "\u0628" +    Er1: / +    Er2: . +    Er3: "\u060C" +    Er4: "\u0645" +    Er5: "\u0646" +    Fl_space: ' ' +  modifier: +  - [] +- layer: +    Bl2: '"' +    Bl3: '@' +    Bl4: "\xA3" +    Bl5: $ +    Bl6: '%' +    Bl7: ^ +    Br1: + +    Br2: _ +    Br3: ) +    Br4: ( +    Br5: '*' +    Br6: '&' +    Cl1: "\u064A\u0654" +    Cl2: "\u0648\u0654" +    Cl3: "\u0670" +    Cl4: "\u0653" +    Cl5: "\u0637" +    #Cr0: '|' +    Cr1: '}' +    Cr2: '{' +    Cr3: "\u0627\u0653" +    Cr4: "\u064C" +    Cr5: "\u064D" +    Cr6: "\u064B" +    Cr7: "\u0649" +    Dl1: "\u0627\u0654" +    Dl2: "\u0634" +    Dl3: "\u0636" +    Dl4: "\u0642" +    Dl5: "\u063A" +    Dr2: '~' +    Dr3: ':' +    Dr4: "\u0627\u0655" +    Dr5: "\u062E" +    Dr6: "\u062C" +    Dr7: "\u062D" +    El1: '|' +    El2: "\u0638" +    El3: "\u0629" +    El4: "\u0654" +    El5: "\u0651" +    El6: "\u0640" +    Er1: "\u061F" +    Er2: '>' +    Er3: < +    Er4: "\u06E2" +    Er5: "\u0655" +    Fl_space: ' ' +    Fr_space: ' ' +  modifier: +  - - El_shift +  - - Er_shift +- layer: +    Bl2: "\u0627\u0655" +    Bl3: "\u274A" +    Bl4: "\u0610" +    Bl5: "\u0611" +    Bl6: "\u0613" +    Bl7: "\u0612" +    Br5: "\u0655" +    Br6: "\u0654" +    Cl1: "\u06D7" +    Cl2: "\u06E5" +    Cl3: "\u06D2" +    Cl4: "\u0698" +    Cl5: "\u0615" +    #Cr0: "\u06DE" +    Cr3: "\uFDFA" +    Cr5: "\uFE8C" +    Cr7: "\u06E6" +    Dl1: "\u0627\u0655" +    Dl2: "\u06DC" +    Dl3: "\u0636" +    Dl4: "\u06A4" +    Dl5: "\u0639" +    Dr2: "\u06DD" +    Dr3: "\u061E" +    Dr4: "\u06D9" +    Dr5: "\u06AA" +    Dr6: "\u06DA" +    Dr7: "\uFBA9" +    El1: "\uFDFB" +    El2: "\uFDFB" +    El3: "\u06DB" +    El4: "\u06D6" +    El5: "\u06E8" +    El6: "\u067E" +    Er2: "\xAB" +    Er3: "\xBB" +    Er4: "\uFEE3" +    Er5: "\u06BD" +  modifier: +  - - Fr_altgr diff --git a/lulua/data/layouts/null.yaml b/lulua/data/layouts/null.yaml new file mode 100644 index 0000000..736e47a --- /dev/null +++ b/lulua/data/layouts/null.yaml @@ -0,0 +1,3 @@ +# empty layout +name: null +layout: [] diff --git a/lulua/keyboard.py b/lulua/keyboard.py new file mode 100644 index 0000000..8fb7913 --- /dev/null +++ b/lulua/keyboard.py @@ -0,0 +1,221 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import pkg_resources +from itertools import chain +from typing import Text, Dict, Iterator, List + +from .util import YamlLoader + +# XXX move this to keyboard.yaml? +_buttonToXorgKeycode = { +    'Bl1': 49, +    'Bl2': 10, +    'Bl3': 11, +    'Bl4': 12, +    'Bl5': 13, +    'Bl6': 14, +    'Bl7': 15, +    'Br6': 16, +    'Br5': 17, +    'Br4': 18, +    'Br3': 19, +    'Br2': 20, +    'Br1': 21, +    'Br_bs': 22, +    'Cl_tab': 23, +    'Cl1': 24, +    'Cl2': 25, +    'Cl3': 26, +    'Cl4': 27, +    'Cl5': 28, +    'Cr7': 29, +    'Cr6': 30, +    'Cr5': 31, +    'Cr4': 32, +    'Cr3': 33, +    'Cr2': 34, +    'Cr1': 35, +    'CD_ret': 36, +    'Dl_caps': 66, +    'Dl1': 38, +    'Dl2': 39, +    'Dl3': 40, +    'Dl4': 41, +    'Dl5': 42, +    'Dr7': 43, +    'Dr6': 44, +    'Dr5': 45, +    'Dr4': 46, +    'Dr3': 47, +    'Dr2': 48, +    'Dr1': 51, +    'El_shift': 50, +    'El1': 94, +    'El2': 52, +    'El3': 53, +    'El4': 54, +    'El5': 55, +    'El6': 56, +    'Er5': 57, +    'Er4': 58, +    'Er3': 59, +    'Er2': 60, +    'Er1': 61, +    'Er_shift': 62, +    'Fl_ctrl': 37, +    'Fl_win': 133, +    'Fl_alt': 64, +    'Fl_space': 65, +    'Fr_space': 65, +    'Fr_altgr': 108, +    'Fr_win': 105, +    'Fr_menu': 135, +    'Fr_ctrl': 105, +    } + +class Button: +    __slots__ = ('width', 'isMarked', 'i') +    _idToName : Dict[int, Text] = {} +    _nameToId : Dict[Text, int] = {} +    _nextNameId = 0 + +    def __init__ (self, name: Text, width: float = 1, isMarked: bool = False): +        # map names to integers for fast comparison/hashing +        i = Button._nameToId.get (name) +        if i is None: +            i = Button._nextNameId +            Button._nextNameId += 1 +            Button._idToName[i] = name +            Button._nameToId[name] = i +        self.i = i +        self.width = width +        # marked with an haptic line, for better orientation +        self.isMarked = isMarked + +    def __repr__ (self): +        return f'Button({self.name!r}, {self.width}, {self.isMarked})' + +    def __eq__ (self, other): +        if not isinstance (other, Button): +            return NotImplemented +        return self.i == other.i + +    def __hash__ (self): +        return hash (self.i) + +    @property +    def name (self): +        return Button._idToName[self.i] + +    @property +    def xorgKeycode (self): +        return _buttonToXorgKeycode[self.name] + +    @classmethod +    def deserialize (self, data: Dict): +        kindMap = {'standard': Button, 'letter': LetterButton, 'multi': MultiRowButton} +        try: +            kind = data['kind'] +            del data['kind'] +        except KeyError: +            kind = 'standard' +        return kindMap[kind] (**data) + +class LetterButton (Button): +    """ +    A letter, number or symbol button, but not special keys like modifier, tab, +    … +    """ +    def __init__ (self, name, isMarked=False): +        super().__init__ (name, width=1, isMarked=isMarked) + +    def __repr__ (self): +        return f'LetterButton({self.name!r}, {self.isMarked})' + +class MultiRowButton (Button): +    """ +    A button spanning multiple rows, like the return button on european +    keyboards +    """ + +    __slots__ = ('span', ) + +    def __init__ (self, name, span, isMarked=False): +        super ().__init__ (name, width=1, isMarked=isMarked) +        self.span = span + +    def __repr__ (self): +        return f'MultiRowButton({self.name!r}, {self.span!r}, {self.isMarked!r})' + +class PhysicalKeyboard: +    __slots__ = ('name', 'rows', '_buttonToRow') + +    def __init__ (self, name: Text, rows): +        self.name = name +        self.rows = rows + +        self._buttonToRow = dict () +        for i, (l, r) in enumerate (rows): +            for btn in chain (l, r): +                self._buttonToRow[btn] = i + +    def __iter__ (self): +        return iter (self.rows) + +    def __repr__ (self): +        return f'<PhysicalKeyboard {self.name} with {len (self)} keys>' + +    def __len__ (self): +        return sum (map (lambda x: len(x[0])+len(x[1]), self)) + +    def __getitem__ (self, name: Text) -> Button: +        """ Find button by name """ +        # XXX: speed up +        for k in self.keys (): +            if k.name == name: +                return k +        raise AttributeError (f'{name} is not a valid button name') + +    def keys (self) -> Iterator[Button]: +        """ Iterate over all keys """ +        for row in self.rows: +            yield from chain.from_iterable (row) + +    def find (self, name: Text) -> Button: +        return self[name] + +    def getRow (self, btn: Button): +        return self._buttonToRow[btn] + +    @classmethod +    def deserialize (cls, data: Dict): +        rows = [] +        for l, r in data['rows']: +            row : List[List[Button]] = [[], []] +            for btn in l: +                row[0].append (Button.deserialize (btn)) +            for btn in r: +                row[1].append (Button.deserialize (btn)) +            rows.append (row) +        return cls (data['name'], rows) + +defaultKeyboards = YamlLoader ('data/keyboards', PhysicalKeyboard.deserialize) + diff --git a/lulua/layout.py b/lulua/layout.py new file mode 100644 index 0000000..05a6083 --- /dev/null +++ b/lulua/layout.py @@ -0,0 +1,351 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys, re, unicodedata, copy +from enum import IntEnum, unique +from collections import defaultdict, namedtuple +from itertools import chain +from typing import Text, FrozenSet, Iterator, List, Dict, Any, Tuple + +from pygtrie import CharTrie +import pkg_resources +import yaml + +from .util import first, YamlLoader + +@unique +class Direction(IntEnum): +    LEFT = 1 +    RIGHT = 2 + +# shortcut binds +LEFT = Direction.LEFT +RIGHT = Direction.RIGHT + +@unique +class FingerType(IntEnum): +    LITTLE = 1 +    RING = 2 +    MIDDLE = 3 +    INDEX = 4 +    THUMB = 5 + +# shortcut binds +LITTLE = FingerType.LITTLE +RING = FingerType.RING +MIDDLE = FingerType.MIDDLE +INDEX = FingerType.INDEX +THUMB = FingerType.THUMB + +class Hand: +    __slots__ = ('position', 'fingers') + +    def __init__ (self, position, fingers=None): +        self.position = position +        self.fingers = [] +        if fingers: +            for f in fingers: +                self.addFinger (f) + +    def __repr__ (self): +        return f'Hand({self.position.name}, {self.fingers})' + +    def __getitem__ (self, k): +        return next (filter (lambda x: x.number == k, self.fingers)) + +    def addFinger (self, f): +        self.fingers.append (f) +        f.hand = self + +class Finger: +    __slots__ = ('number', 'hand') + +    def __init__ (self, number): +        self.number = number +        self.hand = None + +    def __repr__ (self): +        return f'Finger({self.number.name}) # {self.hand.position.name}' + +from .keyboard import Button + +class ButtonCombination: +    __slots__ = ('modifier', 'buttons', '_hash') + +    def __init__ (self, modifier: FrozenSet[Button], buttons: FrozenSet[Button]): +        self.modifier = modifier +        self.buttons = buttons +        self._hash = hash ((self.modifier, self.buttons)) + +    def __len__ (self) -> int: +        return len (self.modifier) + len (self.buttons) + +    def __iter__ (self) -> Iterator[Button]: +        return chain (self.modifier, self.buttons) + +    def __repr__ (self): +        return f'ButtonCombination({self.modifier!r}, {self.buttons!r})' + +    def __hash__ (self): +        return self._hash + +    def __eq__ (self, other: Any) -> bool: +        if not isinstance (other, ButtonCombination): +            return NotImplemented +        return self.buttons == other.buttons and self.modifier == other.modifier + +    def __getstate__ (self): +        return (self.modifier, self.buttons) + +    def __setstate__ (self, state): +        self.__init__ (modifier=state[0], buttons=state[1]) + +Layer = namedtuple ('Layer', ['modifier', 'layout']) + +from .keyboard import PhysicalKeyboard + +class KeyboardLayout: +    """ Keyboard layout, i.e. physical button to character mapping """ + +    __slots__ = ('name', 'bufferLen', 't', 'layers', '_modifierToLayer', 'keyboard') + +    def __init__ (self, name: Text, layers: List[Layer], keyboard: PhysicalKeyboard): +        # XXX: add sanity checks (i.e. modifier are not used elsewhere, no duplicates, …) +        self.name = name +        self.layers = layers +        self.keyboard = keyboard +        self._modifierToLayer : Dict[FrozenSet[Button], Tuple[int, Layer]] = dict () +        self.bufferLen = 0 +        t = self.t = CharTrie () +        for i, l in enumerate (layers): +            for m in l.modifier: +                self._modifierToLayer[m] = (i, l) +            for button, v in l.layout.items (): +                if isinstance (v, str): +                    t.setdefault (v, []) +                    for m in l.modifier: +                        comb = ButtonCombination (m, frozenset ([button])) +                        t[v].append (comb) +                    self.bufferLen = max (len (v), self.bufferLen) + +    def __call__ (self, buf: Text): +        """ Lookup a string and find the key used to type it """ +        p = self.t.longest_prefix (buf) +        if p.key is None: +            raise KeyError () +        return (p.key, p.value) + +    def __iter__ (self): +        return iter (self.t.items ()) + +    def __eq__ (self, other): +        return self.layers == other.layers + +    def __repr__ (self): +        return f'<KeyboardLayout {self.name}: {len (self.layers)} layers>' + +    def copy (self): +        layers = copy.deepcopy (self.layers) +        return self.__class__ (self.name[:], layers) + +    def getText (self, comb: ButtonCombination) -> Text: +        """ Get input text for combination """ +        return self.modifierToLayer (comb.modifier)[1].layout[first (comb.buttons)] + +    def getButtonText (self, button: Button) -> Iterator[Text]: +        """ Get text from all layers for a single button """ +        for l in self.layers: +            yield l.layout.get (button, None) + +    def modifierToLayer (self, mod: FrozenSet[Button]) -> Tuple[int, Layer]: +        """ +        Look up (layer number, layer) for a given modifier combination mod +        """ +        return self._modifierToLayer[mod] + +    def isModifier (self, mod: FrozenSet[Button]) -> bool: +        """ Check if a given set of buttons is a modifier key """ +        return mod in self._modifierToLayer + +class GenericLayout: +    """ Layout for _any_ kind of keyboard, i.e. not specialized """ + +    __slots__ = ('name', 'layers') + +    def __init__ (self, name: Text, layers: List): +        self.name = name +        self.layers = layers + +    def __eq__ (self, other): +        return self.layers == other.layers + +    def __len__ (self): +        return sum (len (layer.layout) for layer in self.layers) + +    def buttons (self) -> Iterator[Tuple[Button, Text]]: +        """ Iterate over all layers and buttons """ +        for l in self.layers: +            yield from l.layout.items () + +    @classmethod +    def deserialize (cls, data: Dict): +        layout = [] +        layerSwitches = {} +        for layer in data['layout']: +            layout.append (Layer (modifier=[frozenset (x) for x in layer['modifier']], layout=layer['layer'])) +        return cls (data['name'], layout) + +    def serialize (self): +        def convertLayer (l): +            modifier = [list (x) for x in l.modifier] +            return dict (layer=l.layout, modifier=modifier) +        data = dict (name=self.name, layout=[convertLayer (x) for x in self.layers]) +        return data + +    def specialize (self, keyboard: PhysicalKeyboard) -> KeyboardLayout: +        """ Adapt this layout to an actual keyboard """ +        def findButton (args): +            name, value = args +            return keyboard.find (name), value +        layers = [] +        for l in self.layers: +            modifier = [] +            for m in l.modifier: +                modifier.append (frozenset (keyboard.find (x) for x in m)) +            layers.append (Layer (modifier=modifier, layout=dict (map (findButton, l.layout.items ())))) +        return KeyboardLayout (self.name, layers, keyboard=keyboard) + +    @classmethod +    def fromKlc (cls, fd): +        """ Parse Microsoft Keyboard Layout Creator project file """ +        def codeToText (c): +            # two symbols for NULL? Seriously Microsoft? +            if c == '%%': +                return None +            n = int (c, 16) +            if n == -1: +                return None +            return unicodedata.normalize ('NFD', chr (n)) + +        vkToButton = { +            'OEM_3': 'Bl2', +            '1': 'Bl2', +            '2': 'Bl3', +            '3': 'Bl4', +            '4': 'Bl5', +            '5': 'Bl6', +            '6': 'Bl7', +            '7': 'Br6', +            '8': 'Br5', +            '9': 'Br4', +            '0': 'Br3', +            'OEM_MINUS': 'Br2', +            'OEM_PLUS': 'Br1', + +            'Q': 'Cl1', +            'W': 'Cl2', +            'E': 'Cl3', +            'R': 'Cl4', +            'T': 'Cl5', +            'Y': 'Cr7', +            'U': 'Cr6', +            'I': 'Cr5', +            'O': 'Cr4', +            'P': 'Cr3', +            'OEM_4': 'Cr2', +            'OEM_6': 'Cr1', +            'OEM_5': 'Cr0', + +            'A': 'Dl1', +            'S': 'Dl2', +            'D': 'Dl3', +            'F': 'Dl4', +            'G': 'Dl5', +            'H': 'Dr7', +            'J': 'Dr6', +            'K': 'Dr5', +            'L': 'Dr4', +            'OEM_1': 'Dr3', +            'OEM_7': 'Dr2', +            #Dr1 + +            'OEM_102': 'El1', +            'Z': 'El2', +            'X': 'El3', +            'C': 'El4', +            'V': 'El5', +            'B': 'El6', +            'N': 'Er5', +            'M': 'Er4', +            'OEM_COMMA': 'Er3', +            'OEM_PERIOD': 'Er2', +            'OEM_2': 'Er1', + +            'SPACE': 'Fl_space', +            } + +        with fd: +            mode = None +            layers = [{} for i in range (6)] +            for line in fd: +                # strip comments +                try: +                    line = line[:line.index ('//')] +                    line = line[:line.index (';')] +                except ValueError: +                    pass +                line = line.strip () +                if line.startswith ('LAYOUT'): +                    mode = 'layout' +                elif line == 'LIGATURE': +                    mode = None +                elif mode == 'layout': +                    try: +                        scancode, virtKey, cap, *code = re.split (r'\s+', line) +                    except ValueError: +                        continue +                    code = list (map (codeToText, code)) +                    try: +                        button = vkToButton[virtKey] +                        for i, c in enumerate (code): +                            if c is not None: +                                layers[i][button] = c +                    except KeyError: +                        assert virtKey == 'DECIMAL' +                        pass + +            layerSwitches = { +                0: [tuple ()], +                1: [('El_shift', ), ('Er_shift', )], +                2: [('Fl_ctrl', ), ('Fr_ctrl', )], +                3: [('El_shift', 'Fl_ctrl'), ('Er_shift', 'Fr_ctrl')], +                4: [('Er_altgr', )], +                5: [('El_shift', 'Er_altgr')], +                } +            return layers, layerSwitches + +defaultLayouts = YamlLoader ('data/layouts', GenericLayout.deserialize) + +def importKlc (): +    with open (sys.argv[1], 'r', encoding='utf16') as fd: +        layers, layerSwitches = Layout.fromKlc (fd) +        data = {'name': None, 'layout': [{'layer': l, 'modifier': [list (x) for x in layerSwitches[i]]} for i, l in enumerate (layers)]} +        yaml.dump (data, sys.stdout) + diff --git a/lulua/optimize.py b/lulua/optimize.py new file mode 100644 index 0000000..879f531 --- /dev/null +++ b/lulua/optimize.py @@ -0,0 +1,341 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import pickle, sys, random, time, logging, argparse +from copy import deepcopy +from typing import List, Tuple, Optional, Text, FrozenSet +from abc import abstractmethod +from operator import itemgetter +from collections import defaultdict +from itertools import chain + +from tqdm import tqdm +# work around pypy bug https://bitbucket.org/pypy/pypy/issues/2953/deadlock +tqdm.get_lock().locks = [] +import yaml + +from .layout import defaultLayouts, ButtonCombination, Layer, KeyboardLayout, GenericLayout +from .carpalx import Carpalx +from .carpalx import model01 as cmodel01 +from .writer import Writer +from .util import first +from .keyboard import defaultKeyboards, LetterButton + +class Annealer: +    """ +    Simulated annealing. + +    Override .mutate() to suit your needs. Uses exponential cooling (10^(-progress*factor)) + +    Inspired by https://github.com/perrygeo/simanneal +    """ + +    __slots__ = ('state', 'best', 'coolingFactor') + +    def __init__ (self, state): +        self.state = state +        self.best = None +        self.coolingFactor = 6 + +    @abstractmethod +    def mutate (self): +        """ Modify current state, returns energy change """ +        raise NotImplementedError () + +    def run (self, steps=10000): +        # this is not the absolute energy, but relative +        energy = 0 +        energyMax = energy +        # figure out the max mutation impact, so we can gradually reduce the +        # amount of allowed changes (i.e. simulated annealing) +        energyDiffMax = 0 + +        self.best = (self.state.copy (), energy) +        bar = tqdm (total=steps, unit='mut', smoothing=0.1) +        for i in range (steps): +            start = time.time () + +            progress = i/steps +            acceptDiff = 10**-(progress*self.coolingFactor) + +            prev = (self.state.copy (), energy) +            energyDiff = self.mutate () +            newEnergy = energy+energyDiff +            energyMax = max (newEnergy, energyMax) +            energyDiffAbs = abs (energyDiff) +            energyDiffMax = max (energyDiffAbs, energyDiffMax) +            relDiff = energyDiffAbs/energyDiffMax if energyDiffMax != 0 else 1 + +            # accept if the energy is lower or the relative difference is small +            # (decreasing with temperature, avoids running into local minimum) +            if energyDiff < 0 or relDiff < acceptDiff: +                # accept +                if newEnergy < self.best[1]: +                    self.best = (self.state.copy (), newEnergy) +                energy = newEnergy +            else: +                # restore +                self.state, energy = prev + +            bar.set_description (desc=f'{energy:5.4f}{energyDiff:+5.4f}{relDiff:+5.4f}({acceptDiff:5.4f}) [{self.best[1]:5.4f},{energyMax:5.4f}{energyDiffMax:+5.4f}]', refresh=False) +            bar.update () + +        return self.best + +def mapButton (layout, buttonMap, b : ButtonCombination) -> ButtonCombination: +    (layerNum, _) = layout.modifierToLayer (b.modifier) +    assert len (b.buttons) == 1 +    button = first (b.buttons) +    (newLayerNum, newButton) = buttonMap[(layerNum, button)] +    # XXX: this might not be correct for layer changes! use a Writer() +    # instead +    ret = ButtonCombination (layout.layers[newLayerNum].modifier[0], frozenset ([newButton])) +    return ret + +class LayoutOptimizerState: +    __slots__ = ('carpalx', 'buttonMap') + +    def __init__ (self, carpalx, buttonMap): +        self.carpalx = carpalx +        self.buttonMap = buttonMap + +    def copy (self): +        carpalx = self.carpalx.copy () +        buttonMap = self.buttonMap.copy () +        return LayoutOptimizerState (carpalx, buttonMap) + +class LayoutOptimizer (Annealer): +    """ +    Optimize a keyboard layout. + +    The state here is +    a) a carpalx instance which knows the current state’s effort/energy +    b) a map (layerNumber: int, button: Button) → (layerNumber: int, +       button: Button) + +    b can be used to map each ButtonCombination for each triad to the new +    layout. And these mapped triads can then be fed into carpalx again to +    compute a new effort/energy. + +    Since the whole process is pretty slow with lots of triads (and we want to +    have alot) only those affected by a mutation (self.stateToTriad) are +    recomputed via carpalx. This gives a nice speedup of about 10x with 200k +    triads (“it takes a day” → “it takes one (long) coffee break”). +    """ + +    __slots__ = ('triads', 'allButtons', 'best', 'layout', 'pins', 'stateToTriad') + +    def __init__ (self, +            buttonMap, +            triads: List[Tuple[ButtonCombination]], +            layout: KeyboardLayout, +            pins: FrozenSet[Tuple[int, Optional[Text]]], +            writer: Writer): +        carpalx = Carpalx (cmodel01, writer) +        super ().__init__ (LayoutOptimizerState (carpalx, buttonMap)) + +        self.triads = triads +        self.layout = layout +        self.pins = pins +        self.allButtons = list (buttonMap.keys ()) + +        # which triads are affected by which state? +        self.stateToTriad = defaultdict (set) +        for i, (t, v) in enumerate (self.triads): +            for comb in t: +                layer, _ = layout.modifierToLayer (comb.modifier) +                assert len (comb.buttons) == 1 +                button = first (comb.buttons) +                self.stateToTriad[(layer, button)].add (i) + +    def _acceptMutation (self, state, a, b) -> bool: +        if a == b: +            return False + +        newa = state[b] +        newb = state[a] + +        # respect pins +        if a in self.pins or b in self.pins or \ +                (a[0], None) in self.pins and newa[0] != a[0] or \ +                (b[0], None) in self.pins and newb[0] != b[0]: +            return False + +        return True + +    def mutate (self, withEnergy=True): +        """ Single step to find a neighbor """ +        buttonMap = self.state.buttonMap +        while True: +            a = random.choice (self.allButtons) +            b = random.choice (self.allButtons) +            if self._acceptMutation (self.state.buttonMap, a, b): +                break +        if not withEnergy: +            buttonMap[b], buttonMap[a] = buttonMap[a], buttonMap[b] +            return + +        carpalx = self.state.carpalx +        oldEffort = carpalx.effort +        #logging.info (f'old effort is {oldEffort}') + +        # see which *original* buttons are affected by the change, then map all +        # triads according to state, remove them and re-add them after the swap +        affected = set (chain (self.stateToTriad[a], self.stateToTriad[b])) +        for i in affected: +            t, v = self.triads[i] +            newTriad = tuple (mapButton (self.layout, buttonMap, x) for x in t) +            carpalx.removeTriad (newTriad, v) +            #logging.info (f'removing triad {newTriad} {v}') + +        #logging.info (f'swapping {buttonMap[a]} and {buttonMap[b]}') +        buttonMap[b], buttonMap[a] = buttonMap[a], buttonMap[b] + +        for i in affected: +            t, v = self.triads[i] +            newTriad = tuple (mapButton (self.layout, buttonMap, x) for x in t) +            carpalx.addTriad (newTriad, v) +        newEffort = carpalx.effort +        #logging.info (f'new effort is {newEffort}') + +        return newEffort-oldEffort + +    def energy (self): +        """ Current system energy """ +        return self.state.carpalx.effort + +    def _resetEnergy (self): +        # if the user calls mutate(withEnergy=False) (for speed) the initial +        # energy is wrong. thus, we need to recalculate it here. +        carpalx = self.state.carpalx +        buttonMap = self.state.buttonMap +        carpalx.reset () +        for t, v in self.triads: +            newTriad = tuple (mapButton (self.layout, buttonMap, x) for x in t) +            carpalx.addTriad (newTriad, v) +        logging.info (f'initial effort is {carpalx.effort}') + +    def run (self, steps=10000): +        self._resetEnergy () +        return super().run (steps) + +def parsePin (s: Text): +    """ Parse --pin argument """ +    pins = [] +    for p in s.split (';'): +        p = p.split (',', 1) +        layer = int (p[0]) +        button = p[1] if len (p) > 1 else None +        pins.append ((layer, button)) +    return frozenset (pins) + +def optimize (): +    parser = argparse.ArgumentParser(description='Optimize keyboard layout.') +    parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name') +    parser.add_argument('-k', '--keyboard', metavar='KEYBOARD', +            default='ibmpc105', help='Physical keyboard name') +    parser.add_argument('--triad-limit', dest='triadLimit', metavar='NUM', +            type=int, default=0, help='Limit number of triads to use') +    parser.add_argument('-n', '--steps', type=int, default=10000, help='Number of iterations') +    parser.add_argument('-r', '--randomize', action='store_true', help='Randomize layout before optimizing') +    parser.add_argument('-p', '--pin', type=parsePin, help='Pin these layers/buttons') + +    args = parser.parse_args() + +    logging.basicConfig (level=logging.INFO) + +    stats = pickle.load (sys.stdin.buffer) + +    keyboard = defaultKeyboards[args.keyboard] +    layout = defaultLayouts[args.layout].specialize (keyboard) +    writer = Writer (layout) +    triads = stats['triads'].triads + +    logging.info (f'using keyboard {keyboard.name}, layout {layout.name} ' +            f'and {args.triadLimit}/{len (triads)} triads') + +    # limit number of triads to increase performance +    triads = list (sorted (triads.items (), key=itemgetter (1), reverse=True)) +    if args.triadLimit > 0: +        triads = triads[:args.triadLimit] + +    # map layer+button combinations, because a layer may have multiple modifier +    # keys (→ can’t use ButtonCombination) +    keys = [] +    values = [] +    for i, l in enumerate (layout.layers): +        # get all available keys from the keyboard instead the layout, so +        # currently unused keys are considered as well +        for k in keyboard.keys (): +            # ignore buttons that are not letter keys for now. Also do not +            # mutate modifier key positions. +            # XXX: only works for single-button-modifier +            if not isinstance (k, LetterButton) or layout.isModifier (frozenset ([k])): +                logging.info (f'ignoring {k}') +                continue +            keys.append ((i, k)) +            values.append ((i, k)) +    buttonMap = dict (zip (keys, values)) + +    pins = [(x, keyboard[y] if y else None) for x, y in args.pin] + +    opt = LayoutOptimizer (buttonMap, triads, layout, pins, writer) +    if args.randomize: +        logging.info ('randomizing initial layout') +        for i in range (len (buttonMap)*2): +            opt.mutate (withEnergy=False) +    try: +        state, relEnergy = opt.run (steps=args.steps) +        energy = opt.energy () +        optimalButtonMap = state.buttonMap +    except KeyboardInterrupt: +        logging.info ('interrupted') +        return 1 + +    # plausibility checks: 1:1 mapping for every button +    assert set (optimalButtonMap.keys ()) == set (optimalButtonMap.values ()) +    opt._resetEnergy () +    expectEnergy = opt.energy () +    # there may be some error due to floating point semantics +    assert abs (expectEnergy - energy) < 0.0001, (expectEnergy, energy) + +    layers = [Layer (modifier=[], layout=dict ()) for l in layout.layers] +    for i, l in enumerate (layout.layers): +        for m in l.modifier: +            layers[i].modifier.append ([k.name for k in m]) +        for k, v in l.layout.items (): +            try: +                (newLayer, newK) = optimalButtonMap[(i, k)] +            except KeyError: +                # not found, probably not used and thus not mapped +                print ('key', i, k, 'not in mapping table, assuming id()', file=sys.stderr) +                layers[i].layout[k.name] = v +            else: +                assert newK not in layers[newLayer].layout +                layers[newLayer].layout[newK.name] = v + +    newLayout = GenericLayout (f'{layout.name}-new', layers) +    print (f'# steps: {args.steps}\n# keyboard: {args.keyboard}\n# layout: {args.layout}\n# triads: {len (triads)}\n# energy: {energy}') +    yaml.dump (newLayout.serialize (), sys.stdout) + +    print (f'final energy {energy}', file=sys.stderr) + +    return 0 + diff --git a/lulua/plot.py b/lulua/plot.py new file mode 100644 index 0000000..2cd7759 --- /dev/null +++ b/lulua/plot.py @@ -0,0 +1,146 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys, argparse, json, unicodedata, pickle, logging +from operator import itemgetter +from bokeh.plotting import figure +from bokeh.models import ColumnDataSource +from bokeh.embed import json_item + +from .layout import * +from .keyboard import defaultKeyboards +from .util import limit +from .writer import Writer +from .carpalx import Carpalx, model01 + +def letterfreq (args): +    """ Map key combinations to their text, bin it and plot sorted distribution """ + +    # show unicode class "letters other" only +    whitelistCategory = {'Lo'} + +    stats = pickle.load (sys.stdin.buffer) + +    # XXX: add layout to stats? +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts[args.layout].specialize (keyboard) + +    xdata = [] +    xlabel = [] +    ydata = [] +    ydataAbs = [] + +    # letter-based binning, in case multiple buttons are mapped to the same +    # letter. +    binned = defaultdict (int) +    for k, v in stats['simple'].combinations.items (): +        # assuming multiple characters have the same category +        text = layout.getText (k) +        category = unicodedata.category (text[0]) +        if category in whitelistCategory: +            binned[text] += v +    combinationTotal = sum (binned.values ()) +    logging.info (f'total binned combinations {combinationTotal}') + +    for i, (k, v) in enumerate (sorted (binned.items (), key=itemgetter (1))): +        xdata.append (i) +        xlabel.append (k) +        ydata.append (v/combinationTotal*100) +        ydataAbs.append (v) + +    source = ColumnDataSource(data=dict(x=xdata, letters=xlabel, rel=ydata, abs=ydataAbs)) +    p = figure(plot_width=1000, plot_height=500, x_range=xlabel, sizing_mode='scale_both', tooltips=[('frequency', '@rel%'), ('count', '@abs')]) +    p.vbar(x='letters', width=0.5, top='rel', color="#dc322f", source=source) +    p.xgrid.grid_line_color = None +    p.xaxis.major_label_text_font_size = "2em" +    p.xaxis.major_label_text_font_size = "2em" + +    json.dump (json_item (p), sys.stdout) + +    return 0 + +def triadfreq (args): +    stats = pickle.load (sys.stdin.buffer) + +    # XXX: add layout to stats? +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts[args.layout].specialize (keyboard) +    writer = Writer (layout) + +    # letter-based binning, in case multiple buttons are mapped to the same +    # letter. +    binned = defaultdict (lambda: dict (weight=0, effort=Carpalx (model01, writer), textTriad=None)) +    weightSum = 0 +    for triad, weight in stats['triads'].triads.items (): +        textTriad = tuple (layout.getText (t) for t in triad) +        data = binned[textTriad] +        data['weight'] += weight +        data['effort'].addTriad (triad, weight) +        data['textTriad'] = textTriad +        data['layers'] = tuple (layout.modifierToLayer (x.modifier)[0] for x in triad) +        weightSum += weight + +    # triads that contribute to x% of the weight +    topTriads = list () +    topTriadsCutoff = 0.50 +    topTriadsWeight = 0 +    for data in sorted (binned.values (), key=lambda x: x['weight'], reverse=True): +        if topTriadsWeight < weightSum*topTriadsCutoff: +            topTriads.append (data) +            topTriadsWeight += data['weight'] + +    # get top triads (by weight) +    print ('by weight') +    for data in limit (sorted (binned.values (), key=lambda x: x['weight'], reverse=True), 20): +        print (data['textTriad'], data['weight'], data['effort'].effort) + +    logging.info (f'{len (topTriads)}/{len (stats["triads"].triads)} triads contribute to {topTriadsCutoff*100}% of the typing') + +    print ('by effort') +    # only base layer +    includeBaseLayer = iter (topTriads) +    sortByEffort = sorted (includeBaseLayer, key=lambda x: x['effort'].effort, reverse=True) +    for data in limit (sortByEffort, 20): +        print (data['textTriad'], data['weight'], data['effort'].effort) + +    print ('by effort and weight') +    includeBaseLayer = iter (topTriads) +    sortByEffortWeight = sorted (includeBaseLayer, key=lambda x: (x['weight']/weightSum)*x['effort'].effort, reverse=True) +    for data in limit (sortByEffortWeight, 20): +        print (data['textTriad'], data['weight'], data['effort'].effort) + +    return 0 + +def plot (): +    plotKinds = { +        'letterfreq': letterfreq, +        'triadfreq': triadfreq, +        } + +    parser = argparse.ArgumentParser (description='Plot stuff') +    parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name') +    parser.add_argument('kind', type=lambda x: plotKinds[x]) + +    args = parser.parse_args() + +    logging.basicConfig (level=logging.INFO) + +    return args.kind (args) + diff --git a/lulua/render.py b/lulua/render.py new file mode 100644 index 0000000..cbe553b --- /dev/null +++ b/lulua/render.py @@ -0,0 +1,353 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import argparse, sys, unicodedata, logging +from collections import namedtuple, defaultdict +from operator import attrgetter +from datetime import datetime + +import svgwrite +from svgwrite import em +import yaml + +from .layout import LITTLE, RING, MIDDLE, INDEX, THUMB, GenericLayout, defaultLayouts +from .writer import Writer +from .keyboard import defaultKeyboards +from .util import first + +RendererSettings = namedtuple ('RendererSetting', ['buttonMargin', 'middleGap', 'buttonWidth', 'rounded', 'shadowOffset']) + +class Renderer: +    """ Keyboard to SVG renderer """ + +    __slots__ = ('keyboard', 'layout', 'settings', 'cursor', 'writer') + +    defaultSettings = RendererSettings ( +            buttonMargin=0.2, +            middleGap=0.1, +            buttonWidth=2, +            rounded=0.1, +            shadowOffset=0.05, +            ) + +    def __init__ (self, keyboard, layout=None, writer=None, settings=None): +        self.keyboard = keyboard +        self.layout = layout +        self.writer = writer +        self.settings = settings or self.defaultSettings + +        self.cursor = [0, 0] + +    def render (self): +        maxWidth = 0 +        maxHeight = 0 + +        settings = self.settings +        self.cursor = [0, 0] + +        # compute row widths so we can apply margin correction, balancing +        # out their widths +        rowWidth = [] +        for l, r in self.keyboard: +            w = 0 +            for btn in l: +                w += self.buttonWidth (btn) + settings.buttonMargin +            w += settings.middleGap +            for btn in r: +                w += self.buttonWidth (btn) + settings.buttonMargin +            w -= settings.buttonMargin +            rowWidth.append (w) +        logging.info (f'row width {rowWidth}') + +        g = svgwrite.container.Group () + +        for l, r in self.keyboard: +            for btn in l: +                b, width = self._addButton (btn) +                g.add (b) +                self.cursor[0] += width + settings.buttonMargin +            self.cursor[0] += settings.middleGap +            for btn in r: +                b, width = self._addButton (btn) +                g.add (b) +                self.cursor[0] += width + settings.buttonMargin +            self.cursor[1] += settings.buttonWidth + settings.buttonMargin +            maxWidth = max (self.cursor[0], maxWidth) +            self.cursor[0] = 0 +        maxHeight = self.cursor[1] + +        return g, (maxWidth, maxHeight) + +    def buttonWidth (self, btn): +        return btn.width * self.settings.buttonWidth + +    def _addButton (self, btn): +        def toDisplayText (text): +            if text is None: +                return text +            if len (text) == 1 and unicodedata.combining (text) != 0: +                # add circle if combining +                return '\u25cc' + text +            invMap = { +                '\t': '⭾', +                '\n': '↳', +                ' ': '\u2423', +                '\u200e': '[LRM]', # left to right mark +                '\u061c': '[ALM]', # arabic letter mark +                '\u202c': '[PDF]', # pop directional formatting +                "\u2066": '[LRI]', # left-to-right isolate (lri) +                "\u2067": '[RLI]', # right-to-left isolate (rli) +                "\u2069": '[PDI]', # pop directional isolate (pdi) +                } +            return invMap.get (text, text) + +        xoff, yoff = self.cursor +        settings = self.settings +        width = self.buttonWidth (btn) + +        hand, finger = self.writer.getHandFinger (btn) + +        gclass = ['button', f'finger-{finger.name.lower ()}', f'hand-{hand.name.lower ()}'] + +        g = svgwrite.container.Group () + +        # map modifier keys to arrows +        mod = frozenset ([btn]) +        isModifier = self.layout.isModifier (mod) +        if isModifier: +            layerToArrow = {1: '⭡', 2: '⭧', 3: '⭨'} +            i, layer = self.layout.modifierToLayer (mod) +            buttonText = [layerToArrow[i]] +            gclass.append ('modifier') +        else: +            buttonText = list (map (toDisplayText, self.layout.getButtonText (btn))) + +        # background rect +        if any (buttonText): +            b = svgwrite.shapes.Rect ( +                    insert=((xoff+settings.shadowOffset)*em, (yoff+settings.shadowOffset)*em), +                    size=(width*em, settings.buttonWidth*em), +                    rx=settings.rounded*em, +                    ry=settings.rounded*em, +                    class_='shadow') +            g.add (b) +        else: +            gclass.append ('unused') +        b = svgwrite.shapes.Rect ( +                insert=(xoff*em, yoff*em), +                size=(width*em, settings.buttonWidth*em), +                rx=settings.rounded*em, +                ry=settings.rounded*em, +                class_='cap') +        g.add (b) + +        g.attribs['class'] = ' '.join (gclass) + +        # button marker +        if btn.isMarked: +            start = (xoff+width*0.3, yoff+settings.buttonWidth*0.9) +            end = (xoff+width*0.7, yoff+settings.buttonWidth*0.9) +            # its shadow +            l = svgwrite.shapes.Line ( +                    map (lambda x: (x+settings.shadowOffset)*em, start), +                    map (lambda x: (x+settings.shadowOffset)*em, end), +                    stroke_width=0.07*em, +                    class_='marker-shadow') +            g.add (l) +            # the marker itself +            l = svgwrite.shapes.Line ( +                    map (em, start), +                    map (em, end), +                    stroke_width=0.07*em, +                    class_='marker') +            g.add (l) + +        # clock-wise from bottom-left to bottom-right +        textParam = [ +            (-0.5, 0.6, 'layer-1'), +            (-0.5, -1/3, 'layer-2'), +            (0.5, -1/3, 'layer-3'), +            (0.5, 2/3, 'layer-4'), +            ] +        for text, (txoff, tyoff, style) in zip (buttonText, textParam): +            if text is None: +                continue +            # actual text must be inside tspan, so we can apply smaller font size +            # without affecting element position +            t = svgwrite.text.Text ('', +                    insert=((xoff+width/2+txoff)*em, (yoff+settings.buttonWidth/2+tyoff)*em), +                    text_anchor='middle', +                    class_='label') +            if text.startswith ('[') and text.endswith (']'): +                t.add (svgwrite.text.TSpan (text[1:-1], +                        class_='controlchar', +                        direction='ltr')) +                g.add (svgwrite.shapes.Rect ( +                        insert=((xoff+width/2+txoff-0.4)*em, (yoff+settings.buttonWidth/2+tyoff-0.4)*em), +                        size=(0.8*em, 0.5*em), +                        stroke_width=0.05*em, +                        stroke_dasharray='5,3', +                        class_='controllabel')) +            else: +                t.add (svgwrite.text.TSpan (text, class_=style, direction='rtl')) +            g.add (t) + +        return g, width + +def unique (l, key): +    return dict ((key (v), v) for v in l).values () + +def render (): +    parser = argparse.ArgumentParser(description='Render keyboard into output format.') +    parser.add_argument('-l', '--layout', metavar='LAYOUT', help='Keyboard layout name') +    parser.add_argument('-k', '--keyboard', metavar='KEYBOARD', +            default='ibmpc105', help='Physical keyboard name') +    parser.add_argument('format', metavar='FORMAT', choices={'svg', 'xmodmap'}, help='Output format') +    parser.add_argument('output', metavar='FILE', help='Output file') + +    logging.basicConfig (level=logging.INFO) +    args = parser.parse_args() + +    keyboard = defaultKeyboards[args.keyboard] +    layout = defaultLayouts[args.layout].specialize (keyboard) +    writer = Writer (layout) + +    if args.format == 'svg': +        style = """ +                svg { +                    font-family: "IBM Plex Arabic"; +                    font-size: 25pt; +                } +                .button.unused { +                    opacity: 0.6; +                } +                .button .label .layer-1 { +                } +                .button.modifier .label .layer-1 { +                    font-size: 80%; +                } +                .button .label .layer-2, .button .label .layer-3, .button .label .layer-4 { +                    font-size: 80%; +                    font-weight: 200; +                } +                .button .label .controlchar { +                font-size: 40%; font-family: sans-serif; +                } +                .button .cap { +                    fill: #eee8d5; +                } +                .button.finger-little .shadow { +                    fill: #dc322f; /* red */ +                } +                .button.finger-ring .shadow { +                    fill: #268bd2; /* blue */ +                } +                .button.finger-middle .shadow { +                    fill: #d33682; /* magenta */ +                } +                .button.finger-index .shadow { +                    fill: #6c71c4; /* violet */ +                } +                .button.finger-thumb .shadow { +                    fill: #2aa198; /* cyan */ +                } +                .button .label { +                    fill: #657b83; +                } +                .button .controllabel { +                    stroke: #657b83; +                    fill: none; +                } +                .button .marker-shadow { +                    stroke: #93a1a1; +                } +                .button .marker { +                    stroke: #fdf6e3; +                } +                """ +        r = Renderer (keyboard, layout=layout, writer=writer) +        rendered, (w, h) = r.render () +        d = svgwrite.Drawing(args.output, size=(w*em, h*em), profile='full') +        d.defs.add (d.style (style)) +        d.add (rendered) +        d.save() +    elif args.format == 'xmodmap': +        with open (args.output, 'w') as fd: +            # inspired by https://neo-layout.org/neo_de.xmodmap +            fd.write ('\n'.join ([ +                '!! auto-generated xmodmap', +                f'!! layout: {layout.name}', +                f'!! generated: {datetime.utcnow ()}', +                '', +                'clear Lock', +                'clear Mod2', +                'clear Mod3', +                'clear Mod5', +                '', +                ])) + +            keycodeMap = defaultdict (list) +            # XXX: this is an ugly quirk to get layer 4 working +            # layers: 1, 2, 3, 5, 4, None, 6, 7 +            for i in (0, 1, 2, 4, 3, 99999, 5, 6): +                if i >= len (layout.layers): +                    for btn in unique (keyboard.keys (), key=attrgetter ('xorgKeycode')): +                        keycodeMap[btn].append ('NoSymbol') +                    continue +                l = layout.layers[i] +                # space button shares the same keycode and must be removed +                for btn in unique (keyboard.keys (), key=attrgetter ('xorgKeycode')): +                    if not layout.isModifier (frozenset ([btn])): +                        text = l.layout.get (btn) +                        if not text: +                            if btn.name == 'Br_bs' and i == 0: +                                text = 'BackSpace' +                            else: +                                text = 'NoSymbol' +                        else: +                            # some keys cannot be represented by unicode +                            # characters and must be mapped +                            specialMap = { +                                '\t': 'Tab', +                                '\n': 'Return', +                                ' ': 'space', +                                } +                            text = specialMap.get (text, f'U{ord (text):04X}') +                        keycodeMap[btn].append (text) +            # XXX layer modmap functionality is fixed for now +            layerMap = [ +                [], +                ['Shift_L', 'Shift_Lock'], +                ['ISO_Group_Shift', 'ISO_Group_Shift', 'ISO_First_Group', 'NoSymbol'], +                ['ISO_Level3_Shift', 'ISO_Level3_Shift', 'ISO_Group_Shift', 'ISO_Group_Shift', 'ISO_Level3_Lock', 'NoSymbol'], +                ] +            for i, l in enumerate (layout.layers): +                for m in l.modifier: +                    assert len (m) <= 1, ('multi-key modifier not supported', m) +                    if not m: +                        continue +                    btn = first (m) +                    keycodeMap[btn] = layerMap[i] + +            for btn, v in keycodeMap.items (): +                v = '\t'.join (v) +                fd.write (f'!! {btn.name}\nkeycode {btn.xorgKeycode} = {v}\n') +            fd.write ('\n'.join (['add Mod3 = ISO_First_Group', 'add Mod5 = ISO_Level3_Shift', ''])) + diff --git a/lulua/stats.py b/lulua/stats.py new file mode 100644 index 0000000..3efa1c0 --- /dev/null +++ b/lulua/stats.py @@ -0,0 +1,222 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import sys, operator, pickle +from operator import itemgetter +from itertools import chain, groupby, product +from collections import defaultdict + +from .layout import * +from .keyboard import defaultKeyboards +from .writer import SkipEvent, Writer +from .carpalx import Carpalx, model01 as cmodel01 + +def updateDictOp (a, b, op): +    """ Update dict a by adding items from b using op """ +    for k, v in b.items (): +        if k not in a: +            # simple +            a[k] = v +        else: +            if isinstance (v, dict): +                # recursive +                assert isinstance (a[k], dict) +                updateDictOp (a[k], v, op) +            elif isinstance (v, list): +                assert False +            else: +                a[k] = op (a[k], v) + +class Stats: +    name = 'invalid' + +class RunlenStats (Stats): +    __slots__ = ('lastHand', 'perHandRunlenDist', 'curPerHandRunlen', +            'fingerRunlen', 'lastFinger', 'fingerRunlenDist', 'writer') + +    name = 'runlen' + +    def __init__ (self, writer): +        self.writer = writer + +        self.lastHand = None +        self.perHandRunlenDist = dict ((x, defaultdict (int)) for x in Direction) +        self.curPerHandRunlen = 0 + +        self.lastFinger = None +        self.fingerRunlenDist = dict (((x, y), defaultdict (int)) for x, y in product (iter (Direction), iter (FingerType))) +        self.fingerRunlen = 0 + +    def process (self, event): +        if isinstance (event, ButtonCombination): +            assert len (event.buttons) == 1 +            thisHand, thisFinger = self.writer.getHandFinger (first (event.buttons)) +            if self.lastHand and thisHand != self.lastHand: +                self.perHandRunlenDist[self.lastHand][self.curPerHandRunlen] += 1 +                self.curPerHandRunlen = 0 +            self.curPerHandRunlen += 1 +            self.lastHand = thisHand + +            fingerKey = (thisHand, thisFinger) +            if self.lastFinger and fingerKey != self.lastFinger: +                self.fingerRunlenDist[fingerKey][self.fingerRunlen] += 1 +                self.fingerRunlen = 0 +            self.fingerRunlen += 1 +            self.lastFinger = fingerKey +        elif isinstance (event, SkipEvent): +            # reset state, we don’t know which button to press +            self.lastHand = None +            self.curPerHandRunlen = 0 + +            self.lastFinger = None +            self.fingerRunlen = 0 + +    def update (self, other): +        updateDictOp (self.perHandRunlenDist, other.perHandRunlenDist, operator.add) + +class SimpleStats (Stats): +    __slots__ = ('buttons', 'combinations', 'unknown') + +    name = 'simple' +     +    def __init__ (self, writer): +        # single buttons +        self.buttons = defaultdict (int) +        # button combinations +        self.combinations = defaultdict (int) +        self.unknown = defaultdict (int) + +    def process (self, event): +        if isinstance (event, SkipEvent): +            self.unknown[event.char] += 1 +        elif isinstance (event, ButtonCombination): +            for b in event: +                self.buttons[b] += 1 +            self.combinations[event] += 1 + +    def update (self, other): +        updateDictOp (self.buttons, other.buttons, operator.add) +        updateDictOp (self.combinations, other.combinations, operator.add) +        updateDictOp (self.unknown, other.unknown, operator.add) + +class TriadStats (Stats): +    """ +    Button triad stats with an overlap of two. + +    Whitespace buttons are ignored. +    """ + +    __slots__ = ('_triad', 'triads', '_writer', '_ignored') + +    name = 'triads' + +    def __init__ (self, writer): +        self._writer = writer + +        self._triad = [] +        self.triads = defaultdict (int) +        keyboard = self._writer.layout.keyboard +        self._ignored = frozenset (keyboard[x] for x in ('Fl_space', 'Fr_space', 'CD_ret', 'Cl_tab')) + +    def process (self, event): +        if isinstance (event, SkipEvent): +            # reset +            self._triad = [] +        elif isinstance (event, ButtonCombination): +            assert len (event.buttons) == 1 +            btn = first (event.buttons) +            if btn not in self._ignored: +                self._triad.append (event) + +                if len (self._triad) > 3: +                    self._triad = self._triad[1:] +                    assert len (self._triad) == 3 +                if len (self._triad) == 3: +                    k = tuple (self._triad) +                    self.triads[k] += 1 + +    def update (self, other): +        updateDictOp (self.triads, other.triads, operator.add) + +allStats = [SimpleStats, RunlenStats, TriadStats] + +def unpickleAll (fd): +    while True: +        try: +            yield pickle.load (fd) +        except EOFError: +            break + +def combine (): +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts['null'].specialize (keyboard) +    w = Writer (layout) +    combined = dict ((cls.name, cls(w)) for cls in allStats) +    for r in unpickleAll (sys.stdin.buffer): +        for s in allStats: +            combined[s.name].update (r[s.name]) +    pickle.dump (combined, sys.stdout.buffer, pickle.HIGHEST_PROTOCOL) + +def pretty (): +    stats = pickle.load (sys.stdin.buffer) + +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts[sys.argv[1]].specialize (keyboard) +    writer = Writer (layout) + +    buttonPresses = sum (stats['simple'].buttons.values ()) +    for k, v in sorted (stats['simple'].buttons.items (), key=itemgetter (1)): +        print (f'{k} {v:10d} {v/buttonPresses*100:5.1f}%') +    print ('combinations') +    combinationTotal = sum (stats['simple'].combinations.values ()) +    for k, v in sorted (stats['simple'].combinations.items (), key=itemgetter (1)): +        t = layout.getText (k) +        print (f'{t:4s} {k} {v:10d} {v/combinationTotal*100:5.1f}%') +    print ('unknown') +    for k, v in sorted (stats['simple'].unknown.items (), key=itemgetter (1)): +        print (f'{k!r} {v:10d}') + +    #print ('fingers') +    #for k, v in sorted (stats['simple'].fingers.items (), key=itemgetter (0)): +    #    print (f'{k[0].name:5s} {k[1].name:6s} {v:10d} {v/buttonPresses*100:5.1f}%') + +    #print ('hands') +    #for hand, fingers in groupby (sorted (stats['simple'].fingers.keys ()), key=itemgetter (0)): +    #    used = sum (map (lambda x: stats['simple'].fingers[x], fingers)) +    #    print (f'{hand.name:5s} {used:10d} {used/buttonPresses*100:5.1f}%') + +    combined = defaultdict (int) +    for hand, dist in stats['runlen'].perHandRunlenDist.items (): +        print (hand) +        total = sum (dist.values ()) +        for k, v in sorted (dist.items (), key=itemgetter (0)): +            print (f'{k:2d} {v:10d} {v/total*100:5.1f}%') +            combined[k] += v +    print ('combined') +    total = sum (combined.values ()) +    for k, v in combined.items (): +        print (f'{k:2d} {v:10d} {v/total*100:5.1f}%') + +    for triad, count in sorted (stats['triads'].triads.items (), key=itemgetter (1)): +        print (f'{triad} {count:10d}') +    effort = Carpalx (cmodel01, writer) +    effort.addTriads (stats['triads'].triads) +    print ('total effort (carpalx)', effort.effort) + diff --git a/lulua/test_carpalx.py b/lulua/test_carpalx.py new file mode 100644 index 0000000..ac72a14 --- /dev/null +++ b/lulua/test_carpalx.py @@ -0,0 +1,201 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import pytest + +from .carpalx import Carpalx, model01, ModelParams +from .keyboard import defaultKeyboards +from .layout import defaultLayouts, LEFT, RIGHT, INDEX, MIDDLE, RING, LITTLE +from .writer import Writer + +strokePathData = [ +    # hands +    (('Dl1', 'Dl3', 'Dr7'), 0, 0), +    (('Dl1', 'Dr7', 'Cr7'), 0, 0), +    (('Dr1', 'Dl5', 'Cl1'), 0, 0), + +    (('Dl1', 'Dr7', 'Cl1'), 0, 1), +    (('Dr1', 'Bl1', 'Cr1'), 0, 1), + +    (('Dr1', 'Br1', 'Cr1'), 0, 2), +    (('Dl1', 'Bl1', 'Cl1'), 0, 2), + +    # rows +    (('Dl1', 'Dl3', 'Dr7'), 1, 0), + +    (('Dl3', 'Dl1', 'Er4'), 1, 1), +    (('Cl3', 'Dl1', 'Dr4'), 1, 1), +    (('Cl1', 'Cl2', 'El1'), 1, 1), + +    (('Dl1', 'Dl1', 'Cr5'), 1, 2), +    (('El1', 'El1', 'Cr5'), 1, 2), + +    (('El6', 'Dl1', 'Er4'), 1, 3), + +    (('Cl3', 'Dl3', 'Er4'), 1, 4), +    (('Bl3', 'Dl3', 'Er4'), 1, 4), + +    (('Dl1', 'Cl3', 'El6'), 1, 5), # aeb +    (('Dr7', 'Cl3', 'Er5'), 1, 5), # hen +    (('Bl1', 'Dl3', 'Bl1'), 1, 5), # XXX not sure about this one + +    (('El6', 'Dl3', 'Cl1'), 1, 6), # bdq +    (('El6', 'Cl3', 'Bl1'), 1, 6), # bdq + +    (('Dl1', 'El6', 'Cr6'), 1, 7), # abu +    (('Dl1', 'El3', 'Cl3'), 1, 7), # axe + +    # fingers +    (('Dl1', 'Dl2', 'Dl3'), 2, 0), # asd +    (('Cr3', 'Cr6', 'Dl1'), 2, 0), # pua + +    (('Dl1', 'Dl1', 'Dl3'), 2, 1), # aad +    (('Dl1', 'Dl2', 'Dl2'), 2, 1), # ass +    (('Cr3', 'Cr4', 'Cr4'), 2, 1), # poo +    (('Er4', 'Er4', 'Cl1'), 2, 1), # mmq + +    (('El6', 'Cr5', 'Dr7'), 2, 2), # bih +    (('Dl4', 'Dl1', 'Dl3'), 2, 2), # fad + +    (('Cr7', 'Dl1', 'Dr5'), 2, 3), # yak +    (('Er5', 'Cl3', 'Cr3'), 2, 3), # nep + +    (('Dr5', 'Cl4', 'Cr5'), 2, 4), # kri +    (('Er4', 'Dl1', 'Dr6'), 2, 4), # maj +    (('Dl1', 'El6', 'El2'), 2, 4), # abz +    (('Dl1', 'Dl2', 'Dl1'), 2, 4), # asa +    (('Dl1', 'Dl3', 'Dl1'), 2, 4), # ada + +    (('El4', 'Cl3', 'Cl3'), 2, 5), # cee +    (('Dr4', 'Dr4', 'Cr4'), 2, 5), # llo +    (('El6', 'Cl4', 'El6'), 2, 5), # brb + +    (('Dl1', 'El6', 'Cl4'), 2, 6), # abr +    (('El6', 'Dl3', 'Cl3'), 2, 6), # bde +    (('El6', 'El5', 'El2'), 2, 6), # bvz + +    (('Cl5', 'Dl4', 'El6'), 2, 7), # tfb +    (('Dl3', 'Cl3', 'El4'), 2, 7), # dec +    ] + +# Testing components, since they are independent +@pytest.mark.parametrize("t, i, expect", strokePathData) +def test_strokePath (t, i, expect): +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts['ar-linux'].specialize (keyboard) +    writer = Writer (layout) +    c = Carpalx (model01, writer) +    t = tuple (map (keyboard.find, t)) +    assert c._strokePath (t)[i] == expect + +# null model: all parameters are zero +nullmodel = ModelParams ( +    kBPS = (0, 0, 0), +    k123S = (0, 0, 0, 0), +    # w0, wHand, wRow, wFinger +    w0HRF = (0, 0, 0, 0), +    pHand = {LEFT: 0, RIGHT: 0}, +    pRow = (0, 0), +    # symmetric penalties +    pFinger = { +        LEFT: { +            INDEX: 0, +            MIDDLE: 0, +            RING: 0, +            LITTLE: 0, +            }, +        RIGHT: { +            INDEX: 0, +            MIDDLE: 0, +            RING: 0, +            LITTLE: 0, +            }, +        }, +    # fHand, fRow, fFinger +    fHRF = (0, 0, 0), +    # baseline key effort +    baselineEffort = { +        'Bl1': 0, +        'Bl2': 0, +        'Bl3': 0, +        'Bl4': 0, +        'Bl5': 0, +        'Bl6': 0, +        'Bl7': 0, +        'Br6': 0, +        'Br5': 0, +        'Br4': 0, +        'Br3': 0, +        'Br2': 0, +        'Br1': 0, + +        'Cl1': 0, +        'Cl2': 0, +        'Cl3': 0, +        'Cl4': 0, +        'Cl5': 0, +        'Cr7': 0, +        'Cr6': 0, +        'Cr5': 0, +        'Cr4': 0, +        'Cr3': 0, +        'Cr2': 0, +        'Cr1': 0, + +        'Dl_caps': 0, # XXX: dito +        'Dl1': 0, +        'Dl2': 0, +        'Dl3': 0, +        'Dl4': 0, +        'Dl5': 0, +        'Dr7': 0, +        'Dr6': 0, +        'Dr5': 0, +        'Dr4': 0, +        'Dr3': 0, +        'Dr2': 0, +        'Dr1': 0, # XXX: not in the original model + +        'El_shift': 0,  # XXX: dito +        'El1': 0, # XXX: dito +        'El2': 0, +        'El3': 0, +        'El4': 0, +        'El5': 0, +        'El6': 0, +        'Er5': 0, +        'Er4': 0, +        'Er3': 0, +        'Er2': 0, +        'Er1': 0, +        'Er_shift': 0, # XXX: dito +        }, +    ) + +def test_carpalx (): +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts['ar-linux'].specialize (keyboard) +    writer = Writer (layout) +    c = Carpalx (nullmodel, writer) + +    assert c.effort == 0.0 +    #c.addTriads (x) +    assert c.effort == 0.0 + diff --git a/lulua/test_keyboard.py b/lulua/test_keyboard.py new file mode 100644 index 0000000..b49a40e --- /dev/null +++ b/lulua/test_keyboard.py @@ -0,0 +1,59 @@ +import pytest + +from .keyboard import defaultKeyboards, Button + +def test_defaults (): +    k = defaultKeyboards['ibmpc105'] +    assert k.name == 'ibmpc105' + +    with pytest.raises (KeyError): +        k = defaultKeyboards['nonexistent'] + +    assert len (list (defaultKeyboards)) > 0 + +def test_keys_unique (): +    for kbd in defaultKeyboards: +        # both, ids and names must be unique +        havei = set () +        havename = set () +        for btn in kbd.keys (): +            assert btn.i not in havei +            havei.add (btn.i) + +            assert btn.name not in havename +            havename.add (btn.name) + +def test_keyboard_getRow (): +    k = defaultKeyboards['ibmpc105'] +    for btn, expect in [(k['Bl1'], 0), (k['Cr1'], 1), (k['Dr1'], 2)]: +        assert k.getRow (btn) == expect +     +def test_keyboard_getattr (): +    k = defaultKeyboards['ibmpc105'] +    assert k['Dr1'] == k.find ('Dr1') +    assert k['CD_ret'] == k.find ('CD_ret') +    assert k['Cr1'] != k.find ('El1') + +def test_button_uniqname (): +    a = Button ('a') +    assert a.name == 'a' + +    b = Button ('b') +    assert b.name == 'b' + +    assert a != b + +    c = Button ('a') +    assert c.name == 'a' + +    assert a == c +    assert b != c + +    d = dict () +    d[a] = 1 +    assert a in d +    assert b not in d +    assert c in d +    d[b] = 2 +    assert b in d + diff --git a/lulua/test_layout.py b/lulua/test_layout.py new file mode 100644 index 0000000..5c8bb7f --- /dev/null +++ b/lulua/test_layout.py @@ -0,0 +1,75 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import unicodedata +from itertools import product + +import pytest + +from .layout import defaultLayouts, GenericLayout, ButtonCombination +from .keyboard import defaultKeyboards + +@pytest.mark.parametrize("layout", defaultLayouts, ids=[l.name for l in defaultLayouts]) +def test_atomic (layout): +    """ Make sure layout text strings are atomic (i.e. not decomposeable) """ +    for _, text in layout.buttons (): +        assert isinstance (text, str) +        for char in text: +            d = unicodedata.decomposition (char) +            # allow compat decompositions like … -> ... +            if not d.startswith ('<compat> ') and not d.startswith ('<isolated> ') and not d.startswith ('<medial> ') and not d.startswith ('<initial> '): +                assert d == '', char + +@pytest.mark.parametrize("layout", defaultLayouts, ids=[l.name for l in defaultLayouts]) +def test_genericlayout_len (layout): +    assert len (layout) == len (list (layout.buttons ())) + +@pytest.mark.parametrize("layout", defaultLayouts, ids=[l.name for l in defaultLayouts]) +def test_layout_serialize (layout): +    assert GenericLayout.deserialize (layout.serialize ()) == layout + +@pytest.mark.parametrize("a, b", product (defaultLayouts, defaultLayouts)) +def test_layout_equality (a, b): +    if a.name == b.name: +        # this is true for our default layouts only +        assert a == b +    else: +        assert a != b + +def test_layout_isModifier (): +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts['ar-linux'].specialize (keyboard) +    assert layout.isModifier (frozenset ([keyboard['El_shift']])) +    assert layout.isModifier (frozenset ([keyboard['Er_shift']])) +    assert not layout.isModifier (frozenset ([keyboard['Dr1']])) + +def test_buttoncomb_eq (): +    a = ButtonCombination (frozenset (['a']), frozenset (['b'])) +    b = ButtonCombination (frozenset (['a']), frozenset (['b'])) +    c = ButtonCombination (frozenset (['a']), frozenset (['c'])) + +    assert a == b +    assert a != c and b != c + +    d = dict () +    d[a] = 'a' +    assert b in d +    assert c not in d + diff --git a/lulua/test_optimize.py b/lulua/test_optimize.py new file mode 100644 index 0000000..7c4c193 --- /dev/null +++ b/lulua/test_optimize.py @@ -0,0 +1,39 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from .optimize import Annealer + +class NullAnnealer (Annealer): +    """ Simple dummy annealer for testing """ +    def energy (self): +        return sum (self.state) + +    def mutate (self): +        prev = self.energy () +        self.state = [x-1 for x in self.state] +        return self.energy () - prev + +def test_null_annealer (): +    dut = NullAnnealer ([1, 2, 3]) +    optimal, energy = dut.run (1) +    assert optimal == [0, 1, 2] +    assert energy == sum ([0, 1, 2])-sum([1, 2, 3]) +    assert dut.energy () == sum([0, 1, 2]) + diff --git a/lulua/test_stats.py b/lulua/test_stats.py new file mode 100644 index 0000000..2fff6ce --- /dev/null +++ b/lulua/test_stats.py @@ -0,0 +1,39 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import operator +import pytest + +from .stats import updateDictOp + +def test_updateDictOp (): +    a = {1: 3} +    b = {1: 11, 7: 13} + +    updateDictOp (a, b, operator.add) +    assert a == {1: 3+11, 7: 13} +    assert b == {1: 11, 7: 13} + +    a = {'foo': {1: 3}} +    b = {'foo': {1: 7}} +    updateDictOp (a, b, operator.add) +    assert a == {'foo': {1: 3+7}} +    assert b == {'foo': {1: 7}} + diff --git a/lulua/test_writer.py b/lulua/test_writer.py new file mode 100644 index 0000000..bc02a7e --- /dev/null +++ b/lulua/test_writer.py @@ -0,0 +1,118 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +from io import StringIO + +import pytest + +from .writer import Writer, SkipEvent +from .layout import * +from .keyboard import defaultKeyboards + +def toButtonComb (keyboard, data): +    lookupButton = lambda x: keyboard.find (x) +    return ButtonCombination (*map (lambda y: frozenset (lookupButton (z) for z in y), data)) +     +def test_writer (): +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts['ar-linux'].specialize (keyboard) +    w = Writer (layout) + +    f = w[LEFT][RING] +    assert f.number == RING +    assert f.hand.position == LEFT + +typeData = [ +    ('شسضص', [ +        ('ش', (tuple (), ('Dl1', ))), +        ('س', (tuple (), ('Dl2', ))), +        ('ض', (tuple (), ('Cl1', ))), +        ('ص', (tuple (), ('Cl2', ))), +        ]), +    ('aصb', [ +        (None, SkipEvent ('a')), +        ('ص', (tuple (), ('Cl2', ))), +        (None, SkipEvent ('b')), +        ]), +    ] + +@pytest.mark.parametrize("s, expect", typeData) +def test_writer_type (s, expect): +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts['ar-linux'].specialize (keyboard) +    w = Writer (layout) + +    data = StringIO (s) +    result = list (w.type (data)) + +    newExpect = [] +    for char, comb in expect: +        if isinstance (comb, SkipEvent): +            newExpect.append ((char, comb)) +        else: +            newExpect.append ((char, toButtonComb (keyboard, comb))) +    expect = newExpect +    assert result == expect + +testCombs = [ +    ([ +        (('El_shift', ), ('Dr7', )), +        (('Er_shift', ), ('Dr7', )), +    ], 0, None +    ), ([ +        (('El_shift', ), ('Dl5', )), +        (('Er_shift', ), ('Dl5', )), +    ], 1, None +    ), ([ +        (tuple (), ('Fl_space', )), +        (tuple (), ('Fr_space', )), +    ], 0, (tuple (), ('Dr7', )) +    ), ([ +        (tuple (), ('Fl_space', )), +        (tuple (), ('Fr_space', )), +    ], 1, (tuple (), ('Dl5', )) +    ), ([ +        (tuple (), ('Fl_space', )), +        (tuple (), ('Fr_space', )), +    ], 0, (('El_shift', ), ('Dr7', )) +    ), ([ +        (tuple (), ('Fl_space', )), +        (tuple (), ('Fr_space', )), +    ], 0, (('Er_shift', ), ('Dl5', )) +    ), ([ +        # choose the shortest combination if there’s two available +        (tuple (), ('CD_ret', )), +        (('Er_shift', ), ('CD_ret', )), +        (('El_shift', ), ('CD_ret', )), +    ], 0, None), +    ] + +@pytest.mark.parametrize("combs, expect, prev", testCombs) +def test_writer_chooseComb (combs, expect, prev): +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts['ar-linux'].specialize (keyboard) +    w = Writer (layout) + +    if prev: +        prev = toButtonComb (keyboard, prev) +        w.press (prev) +    combs = [toButtonComb (keyboard, x) for x in combs] +    assert w.chooseCombination (combs) == combs[expect] + diff --git a/lulua/text.py b/lulua/text.py new file mode 100644 index 0000000..f0a1b3b --- /dev/null +++ b/lulua/text.py @@ -0,0 +1,260 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +""" +Text/corpus handling tools +""" + +import sys, argparse, pickle, json, logging +from io import StringIO +from functools import partial +from multiprocessing import Process, Queue, cpu_count, current_process +from subprocess import Popen, PIPE +from tqdm import tqdm + +import html5lib +from html5lib.filters.base import Filter + +from .keyboard import defaultKeyboards +from .layout import defaultLayouts +from .writer import Writer +from .stats import allStats + +def iterchar (fd): +    batchsize = 1*1024*1024 +    while True: +        c = fd.read (batchsize) +        if not c: +            break +        yield from c + +class Select (Filter): +    def __init__ (self, source, f): +        Filter.__init__ (self, source) +        self.inside = None +        self.f = f + +    def __iter__(self): +        isScript = None +        for token in Filter.__iter__(self): +            ttype = token['type'] +            if ttype == 'StartTag': +                tname = token['name'] +                tdata = token['data'] +                if self.f (token): +                    self.inside = 0 +                if tname in {'script', 'style'}: +                    isScript = 0 + +            if isScript is not None: +                if ttype == 'EndTag': +                    isScript -= 1 +                    if isScript <= 0: +                        isScript = None +            elif self.inside is not None: +                if ttype == 'StartTag': +                    self.inside += 1 +                if ttype == 'EndTag': +                    self.inside -= 1 +                if self.inside <= 0: +                    self.inside = None + +                yield token + +class HTMLSerializer(object): +    def serialize(self, treewalker): +        for token in treewalker: +            type = token["type"] +            if type == "Doctype": +                pass +            elif type == "Characters": +                yield token['data'] +            elif type == "SpaceCharacters": +                yield ' ' +            elif type in ("StartTag", "EmptyTag"): +                name = token["name"] +                pass +            elif type == "EndTag": +                name = token["name"] +                if name in ('p', 'div'): +                    yield '\n\n' +            elif type == "Comment": +                pass +            elif type == "Entity": +                name = token["name"] +                key = name + ";" +                if key not in html5lib.constants.entities: +                    self.serializeError("Entity %s not recognized" % name) +                yield entities[key] +            else: +                assert False + +f = dict( +    aljazeera=lambda x: x['name'] == 'div' and x['data'].get ((None, 'id')) == 'DynamicContentContainer', +    bbcarabic=lambda x: x['name'] == 'div' and x['data'].get ((None, 'property')) == 'articleBody', +    ) + +class LzipFile: +    __slots__ = ('p', ) + +    def __init__ (self, path): +        self.p = Popen (['/usr/bin/lzip', '-c', '-d', path], stdout=PIPE) + +    def __enter__ (self): +        return self + +    def __exit__ (self, exc_type, exc_val, exc_tb): +        self.close () +        return True + +    def read (self, num=None): +        return self.p.stdout.read (num) + +    def close (self): +        self.p.wait () +        assert self.p.returncode == 0 + +def sourceHtml (selectFunc, item): +    with LzipFile (item.rstrip ()) as fd: +        document = html5lib.parse (fd) +        walker = html5lib.getTreeWalker("etree") +        stream = walker (document) +        s = HTMLSerializer() +        return ''.join (s.serialize(Select (stream, selectFunc))) + +def sourceText (item): +    with LzipFile (item.rstrip ()) as fd: +        return fd.read ().decode ('utf-8') + +def sourceJson (item): +    return json.loads (item) + +sources = dict( +    aljazeera=partial(sourceHtml, f['aljazeera']), +    bbcarabic=partial(sourceHtml, f['bbcarabic']), +    text=sourceText, +    json=sourceJson, +    ) + +charMap = { +    'ﻻ': 'لا', +    'أ': 'أ', +    'إ': 'إ', +    'ئ': 'ئ', +    'ؤ': 'ؤ', +    ',': '،', +    'آ': 'آ', +    '%': '٪', +    '0': '٠', +    '1': '١', +    '2': '٢', +    '3': '٣', +    '4': '٤', +    '5': '٥', +    '6': '٦', +    '7': '٧', +    '8': '٨', +    '9': '٩', +    '?': '؟', +    ';': '؛', +    # nbsp +    '\u00a0': ' ', +    } + +def writeWorker (args, inq, outq): +    keyboard = defaultKeyboards['ibmpc105'] +    layout = defaultLayouts['null'].specialize (keyboard) +    w = Writer (layout) +    combined = dict ((cls.name, cls(w)) for cls in allStats) + +    while True: +        keyboard = defaultKeyboards[args.keyboard] +        layout = defaultLayouts[args.layout].specialize (keyboard) +        w = Writer (layout) + +        item = inq.get () +        if item is None: +            break + +        # extract +        text = sources[args.source] (item) +        text = ''.join (map (lambda x: charMap.get (x, x), text)) +        # XXX sanity checks, disable +        for c in charMap.keys (): +            if c in text: +                #print (c, 'is in text', file=sys.stderr) +                assert False, c + +        # stats +        stats = [cls(w) for cls in allStats] +        for match, event in w.type (StringIO (text)): +            for s in stats: +                s.process (event) + +        for s in stats: +            combined[s.name].update (s) + +    outq.put (combined) + +def write (): +    """ Extract corpus source file, convert to plain text, map chars and create stats """ + +    parser = argparse.ArgumentParser(description='Import text and create stats.') +    parser.add_argument('-k', '--keyboard', metavar='KEYBOARD', +            default='ibmpc105', help='Physical keyboard name') +    parser.add_argument('-j', '--jobs', metavar='NUM', +            default=cpu_count (), help='Number of parallel jobs') +    parser.add_argument('source', metavar='SOURCE', choices=sources.keys(), help='Data source extractor name') +    parser.add_argument('layout', metavar='LAYOUT', help='Keyboard layout name') + +    args = parser.parse_args() + +    logging.basicConfig (level=logging.INFO) + +    # limit queue sizes to limit memory usage +    inq = Queue (args.jobs*2) +    outq = Queue (args.jobs+1) + +    logging.info (f'using {args.jobs} workers') +    workers = [] +    for i in range (args.jobs): +        p = Process(target=writeWorker, args=(args, inq, outq), daemon=True, name=f'worker-{i}') +        p.start() +        workers.append (p) + +    try: +        with tqdm (unit='item') as bar: +            for l in sys.stdin: +                inq.put (l) +                bar.update (n=1) +    except KeyboardInterrupt: +        pass + +    # exit workers +    # every one of them will consume exactly one item and write one in return +    for w in workers: +        inq.put (None) +        pickle.dump (outq.get (), sys.stdout.buffer, pickle.HIGHEST_PROTOCOL) +    assert outq.empty () +    # and then we can kill them +    for w in workers: +        w.join () + + diff --git a/lulua/util.py b/lulua/util.py new file mode 100644 index 0000000..dd35c23 --- /dev/null +++ b/lulua/util.py @@ -0,0 +1,67 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +""" +Misc utilities +""" + +import os, yaml, pkg_resources + +first = lambda x: next (iter (x)) + +def limit (l, n): +    it = iter (l) +    for i in range (n): +        yield next (it) + +class YamlLoader: +    """ +    Simple YAML loader that searches the current path and the package’s +    resources (for defaults) +    """ + +    __slots__ = ('defaultDir', 'deserialize') + +    def __init__ (self, defaultDir, deserialize): +        self.defaultDir = defaultDir +        self.deserialize = deserialize + +    def __getitem__ (self, k, onlyRes=False): +        openfunc = [] +        if not onlyRes: +            openfunc.append (lambda k: open (k, 'r')) +        # try with and without appending extension +        openfunc.append (lambda k: pkg_resources.resource_stream (__package__, os.path.join (self.defaultDir, k + '.yaml'))) +        openfunc.append (lambda k: pkg_resources.resource_stream (__package__, os.path.join (self.defaultDir, k))) +        for f in openfunc: +            try: +                with f (k) as fd: +                    return self.deserialize (yaml.safe_load (fd)) +            except FileNotFoundError: +                pass +            except yaml.reader.ReaderError: +                pass + +        raise KeyError + +    def __iter__ (self): +        for res in pkg_resources.resource_listdir (__package__, self.defaultDir): +            yield self.__getitem__ (res, onlyRes=True) + diff --git a/lulua/writer.py b/lulua/writer.py new file mode 100644 index 0000000..38dc01c --- /dev/null +++ b/lulua/writer.py @@ -0,0 +1,202 @@ +# Copyright (c) 2019 lulua contributors +#  +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +#  +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +#  +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import json +from operator import itemgetter + +from .layout import * + +# XXX: dynamically index this by Button()? +defaultFingermap = { +    # fingers: hand (L/R), finger (counting from left to right on left hand and right to left on right hand) +    # B: number row +    # number keys left side +    'Bl1': (LEFT, LITTLE), +    'Bl2': (LEFT, LITTLE), +    'Bl3': (LEFT, LITTLE), +    'Bl4': (LEFT, RING), +    'Bl5': (LEFT, MIDDLE), +    'Bl6': (LEFT, INDEX), +    'Bl7': (LEFT, INDEX), +    # number keys right side +    'Br6': (RIGHT, INDEX), +    'Br5': (RIGHT, INDEX), +    'Br4': (RIGHT, MIDDLE), +    'Br3': (RIGHT, RING), +    'Br2': (RIGHT, LITTLE), +    'Br1': (RIGHT, LITTLE), +    'Br_bs': (RIGHT, LITTLE), +    # C: top row +    'Cl_tab': (LEFT, LITTLE), +    # letter keys left side +    'Cl1': (LEFT, LITTLE), +    'Cl2': (LEFT, RING), +    'Cl3': (LEFT, MIDDLE), +    'Cl4': (LEFT, INDEX), +    'Cl5': (LEFT, INDEX), +    # letter keys right side +    'Cr7': (RIGHT, INDEX), +    'Cr6': (RIGHT, INDEX), +    'Cr5': (RIGHT, MIDDLE), +    'Cr4': (RIGHT, RING), +    'Cr3': (RIGHT, LITTLE), +    'Cr2': (RIGHT, LITTLE), +    'Cr1': (RIGHT, LITTLE), +    # return key +    'CD_ret': (RIGHT, LITTLE), +    # D: middle row +    'Dl_caps': (LEFT, LITTLE), +    # letter keys left side +    'Dl1': (LEFT, LITTLE), +    'Dl2': (LEFT, RING), +    'Dl3': (LEFT, MIDDLE), +    'Dl4': (LEFT, INDEX), +    'Dl5': (LEFT, INDEX), +    # letter keys right side +    'Dr7': (RIGHT, INDEX), +    'Dr6': (RIGHT, INDEX), +    'Dr5': (RIGHT, MIDDLE), +    'Dr4': (RIGHT, RING), +    'Dr3': (RIGHT, LITTLE), +    'Dr2': (RIGHT, LITTLE), +    'Dr1': (RIGHT, LITTLE), +    # E: bottom row +    'El_shift': (LEFT, LITTLE), +    # letter keys left side +    'El1': (LEFT, LITTLE), +    'El2': (LEFT, LITTLE), +    'El3': (LEFT, RING), +    'El4': (LEFT, MIDDLE), +    'El5': (LEFT, INDEX), +    'El6': (LEFT, INDEX), +    # letter keys right side +    'Er5': (RIGHT, INDEX), +    'Er4': (RIGHT, INDEX), +    'Er3': (RIGHT, MIDDLE), +    'Er2': (RIGHT, RING), +    'Er1': (RIGHT, LITTLE), +    'Er_shift': (RIGHT, LITTLE), +    # F: bottom control row +    'Fl_ctrl': (LEFT, LITTLE), +    'Fl_fn': (LEFT, LITTLE), +    'Fl_win': (LEFT, THUMB), +    'Fl_alt': (LEFT, THUMB), +    'Fl_space': (LEFT, THUMB), +    'Fr_space': (RIGHT, THUMB), +    'Fr_altgr': (RIGHT, THUMB), +    'Fr_win': (RIGHT, THUMB), +    'Fr_menu': (RIGHT, THUMB), +    'Fr_ctrl': (RIGHT, LITTLE), +    } + +class SkipEvent: +    __slots__ = ('char', ) + +    def __init__ (self, char): +        self.char = char + +    def __eq__ (self, other): +        if not isinstance (other, SkipEvent): +            return NotImplemented +        return self.char == other.char + +    def __repr__ (self): +        return f'SkipEvent({self.char!r})' + +class Writer: +    """ The magical being whose commands the machine obeys """ + +    __slots__ = ('hands', 'lastCombination', 'layout') + +    def __init__ (self, layout: KeyboardLayout): +        self.layout = layout +        # assuming 10 finger typing +        self.hands = { +                LEFT: Hand (LEFT, [Finger (x) for x in FingerType]), +                RIGHT: Hand (RIGHT, [Finger (x) for x in reversed (FingerType)]), +                } +        self.lastCombination = None + +    def __getitem__ (self, k): +        return self.hands[k] + +    def getHandFinger (self, button: Button): +        return defaultFingermap[button.name] + +    def chooseCombination (self, combinations): +        """ +        Choose the best button combination from the ones given. + +        Return the actual button combination used. + +        For instance: +        - A key on the right is usually combined with the shift button on the +          left and vice versa. +        - The spacebar is usually hit by the thumb of the previously unused +          hand or the one on the left if two buttons were pressed at the same +          time. +        - The combination with the minimum amount of fingers required is chosen +          if multiple options are available +        """ +        dirToScore = {LEFT: 1, RIGHT: -1} +        def calcEffort (comb): +            prev = self.lastCombination +            if prev is None: +                e = 0 +            elif len (prev) > 1: +                # prefer left side +                e = dirToScore[RIGHT] +            else: +                assert len (prev.buttons) == 1 +                e = dirToScore[self.getHandFinger (first (prev.buttons))[0]] +            for b in comb: +                pos = self.getHandFinger (b)[0] +                e += dirToScore[pos] +            #print ('score for', buttons, abs (e)) +            return abs (e) + len (comb) + +        return min (zip (map (calcEffort, combinations), combinations), key=itemgetter (0))[1] + +    def press (self, comb): +        self.lastCombination = comb + +    def type (self, fd): +        buf = '' +        while True: +            buf += fd.read (self.layout.bufferLen-len (buf)) +            if not buf: +                break + +            try: +                match, combinations = self.layout (buf) +                assert len (match) > 0, match + +                comb = self.chooseCombination (combinations) + +                yield match, comb + +                self.press (comb) +                buf = buf[len (match):] +            except KeyError: +                # ignore unknown characters +                yield None, SkipEvent (buf[0]) +                buf = buf[1:] +                continue + | 
