summaryrefslogtreecommitdiff
path: root/lulua/text.py
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-11-17 10:09:37 +0100
committerLars-Dominik Braun <lars@6xq.net>2019-11-17 10:09:37 +0100
commit1e6ad5d702181bce6aeb3d0704c36f124417227d (patch)
tree62946bc5090b10d24a4cbbf7fe09dadc3b40c6c5 /lulua/text.py
parent41f342e12b975e785de9d755d38eb92cf38f5ec5 (diff)
downloadlulua-1e6ad5d702181bce6aeb3d0704c36f124417227d.tar.gz
lulua-1e6ad5d702181bce6aeb3d0704c36f124417227d.tar.bz2
lulua-1e6ad5d702181bce6aeb3d0704c36f124417227d.zip
Add more tests
Diffstat (limited to 'lulua/text.py')
-rw-r--r--lulua/text.py36
1 files changed, 19 insertions, 17 deletions
diff --git a/lulua/text.py b/lulua/text.py
index 2d8398d..1d46af8 100644
--- a/lulua/text.py
+++ b/lulua/text.py
@@ -37,7 +37,7 @@ from html5lib.filters.base import Filter
from .keyboard import defaultKeyboards
from .layout import defaultLayouts
from .writer import Writer
-from .stats import allStats
+from .stats import allStats, makeCombined
def iterchar (fd):
batchsize = 1*1024*1024
@@ -244,32 +244,28 @@ charMap = {
'\u00a0': ' ',
}
-def writeWorker (args, inq, outq):
+def mapChars (text, m):
+ """ For all characters in text, replace if found in map m or keep as-is """
+ return ''.join (map (lambda x: m.get (x, x), text))
+
+def writeWorker (layout, sourceFunc, inq, outq):
try:
keyboard = defaultKeyboards['ibmpc105']
- layout = defaultLayouts['null'].specialize (keyboard)
- w = Writer (layout)
- combined = dict ((cls.name, cls(w)) for cls in allStats)
+ combined = makeCombined (keyboard)
itemsProcessed = 0
while True:
- keyboard = defaultKeyboards[args.keyboard]
- layout = defaultLayouts[args.layout].specialize (keyboard)
- w = Writer (layout)
-
item = inq.get ()
if item is None:
break
# extract (can be multiple items per source)
- for text in sources[args.source] (item):
- text = ''.join (map (lambda x: charMap.get (x, x), text))
- # XXX sanity checks, disable
- for c in charMap.keys ():
- if c in text:
- #print (c, 'is in text', file=sys.stderr)
- assert False, c
+ for text in sourceFunc (item):
+ # map chars
+ text = mapChars (text, charMap)
+ # init a new writer for every item
+ w = Writer (layout)
# stats
stats = [cls(w) for cls in allStats]
for match, event in w.type (StringIO (text)):
@@ -309,6 +305,9 @@ def write ():
else:
logging.basicConfig (level=logging.INFO)
+ keyboard = defaultKeyboards[args.keyboard]
+ layout = defaultLayouts[args.layout].specialize (keyboard)
+
# limit queue sizes to limit memory usage
inq = Queue (args.jobs*2)
outq = Queue (args.jobs+1)
@@ -316,7 +315,10 @@ def write ():
logging.info (f'using {args.jobs} workers')
workers = []
for i in range (args.jobs):
- p = Process(target=writeWorker, args=(args, inq, outq), daemon=True, name=f'worker-{i}')
+ p = Process(target=writeWorker,
+ args=(layout, sources[args.source], inq, outq),
+ daemon=True,
+ name=f'worker-{i}')
p.start()
workers.append (p)