From a6d474471dddc2d7a187a66358aafcb86235ca69 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 21 Jan 2017 11:24:56 +0100 Subject: Restructure git Move tools into separate repo, split TTL file. --- tools/eumel.py | 327 --------------------------------------------------------- 1 file changed, 327 deletions(-) delete mode 100644 tools/eumel.py (limited to 'tools/eumel.py') diff --git a/tools/eumel.py b/tools/eumel.py deleted file mode 100644 index a421e0a..0000000 --- a/tools/eumel.py +++ /dev/null @@ -1,327 +0,0 @@ -""" -EUMEL utility functions, including: - -""" - -import logging -import codecs - -# EUMEL character map. See “Benutzerhandbuch 1.7”, page 107 and file -# ZEICHENSATZ from the archive disk std.graphik. -eumel2unicodemap = dict ([ - # standard newlines - (10, '\n'), - (13, '\r'), - # mark start. technically \15 and \14 would be a choice here, but they do - # different things on different systems and thus we’re just gonna strip - # them. - (15, ''), - (14, ''), # mark end - # same as ascii - (32, ' '), - (33, '!'), - (34, '"'), - (35, '#'), - (36, '$'), - (37, '%'), - (38, '&'), - (39, "'"), - (40, '('), - (41, ')'), - (42, '*'), - (43, '+'), - (44, ','), - (45, '-'), - (46, '.'), - (47, '/'), - (48, '0'), - (49, '1'), - (50, '2'), - (51, '3'), - (52, '4'), - (53, '5'), - (54, '6'), - (55, '7'), - (56, '8'), - (57, '9'), - (58, ':'), - (59, ';'), - (60, '<'), - (61, '='), - (62, '>'), - (63, '?'), - # then the paragraph symbol - (64, '§'), - # uppercase and lowercase letters from ascii - (65, 'A'), - (66, 'B'), - (67, 'C'), - (68, 'D'), - (69, 'E'), - (70, 'F'), - (71, 'G'), - (72, 'H'), - (73, 'I'), - (74, 'J'), - (75, 'K'), - (76, 'L'), - (77, 'M'), - (78, 'N'), - (79, 'O'), - (80, 'P'), - (81, 'Q'), - (82, 'R'), - (83, 'S'), - (84, 'T'), - (85, 'U'), - (86, 'V'), - (87, 'W'), - (88, 'X'), - (89, 'Y'), - (90, 'Z'), - (91, '['), - (92, '\\'), - (93, ']'), - (94, '^'), - (95, '_'), - (96, '`'), - (97, 'a'), - (98, 'b'), - (99, 'c'), - (100, 'd'), - (101, 'e'), - (102, 'f'), - (103, 'g'), - (104, 'h'), - (105, 'i'), - (106, 'j'), - (107, 'k'), - (108, 'l'), - (109, 'm'), - (110, 'n'), - (111, 'o'), - (112, 'p'), - (113, 'q'), - (114, 'r'), - (115, 's'), - (116, 't'), - (117, 'u'), - (118, 'v'), - (119, 'w'), - (120, 'x'), - (121, 'y'), - (122, 'z'), - (123, '{'), - (124, '|'), - (125, '}'), - (126, '~'), - # uppercase greek - (129, 'Α'), - (130, 'Β'), - (131, 'Γ'), - (132, 'Δ'), - (133, 'Ε'), - (134, 'Ζ'), - (135, 'Η'), - (136, 'Θ'), - (137, 'Ι'), - (138, 'Κ'), - (139, 'Λ'), - (140, 'Μ'), - (141, 'Ν'), - (142, 'Ξ'), - (143, 'Ο'), - (144, 'Π'), - (145, 'Ρ'), - (146, 'Σ'), - (147, 'Τ'), - (148, 'Υ'), - (149, 'Φ'), - (150, 'Χ'), - (151, 'Ψ'), - (152, 'Ω'), - # lowercase greek - (161, 'α'), - (162, 'β'), - (163, 'γ'), - (164, 'δ'), - (165, 'ε'), - (166, 'ζ'), - (167, 'η'), - (168, 'θ'), - (169, 'ι'), - (170, 'κ'), - (171, 'λ'), - (172, 'μ'), - (173, 'ν'), - (174, 'ξ'), - (175, 'ο'), - (176, 'π'), - (177, 'ρ'), - (178, 'ς'), - (179, 'σ'), - (180, 'τ'), - (181, 'υ'), - (182, 'φ'), - (183, 'χ'), - (184, 'ψ'), - (185, 'ω'), - # these seem to be combining diacritic, not sure how they work though - # 192 looks like a cross, dunno what it could be - (193, '\u0301'), # acute - (194, '\u0300'), # grave - (195, '\u0302'), # circumflex - (196, '\u0303'), # tilde - (197, '\u0304'), # macron - # 198: dunno - (199, '\u0307'), # dot above - (200, '\u0308'), # diaeresis - # 201: dunno - (202, '\u030a'), # ring above - (203, '\u0317'), # acute below - # 204: dunno - (205, '\u030a'), # ring above (again for small letters?) - # 206: dunno - (207, '\u030c'), # caron - # german umlauts - (214, 'Ä'), - (215, 'Ö'), - (216, 'Ü'), - (217, 'ä'), - (218, 'ö'), - (219, 'ü'), - (220, 'k'), # handbuch says: Trenn-'k' bei der Umwandlung von 'ck' in 'kk' - (221, '\u00ad'), # soft hyphen, inserted by eumel’s hyphenation program - (222, '\\#'), # printable hash (i.e. literal hash, not a printer/editor command) - (223, '\u00a0'), # protected space - (251, 'ß'), - ]) - -def decode (input, errors='strict'): - ret = [] - pos = 0 - for pos in range (len (input)): - c = input[pos] - m = eumel2unicodemap.get (c, None) - if m is not None: - ret.append (m) - else: - if errors == 'strict': - raise UnicodeError ('unknown char {}'.format (c)) - elif errors == 'ignore': - pass - elif errors == 'replace': - logging.debug ('replacing unknown symbol {} at position {}, context {}'.format (c, pos, input[pos-30:pos+30])) - ret.append ('\uFFFD') - else: - break - return (''.join (ret), pos) - -def lookup (name): - if name == 'eumel': - return codecs.CodecInfo(None, decode) - return None - -codecs.register (lookup) - -# Dataspace utilities -import struct, os - -class DataspaceTypeMismatch (ValueError): - pass - -class Dataspace: - # Expected type - TYPE = None - - def __init__ (self, fd): - self.fd = fd - self.lastaddr, self.firstaddr, self.type, _ = self._parseHeader () - if self.TYPE is not None and self.type != self.TYPE: - raise DataspaceTypeMismatch () - self.heap = {} - - def _parseHeader (self): - """ - :return: (last heap address, first heap address, dataspace type, unknown) - """ - buf = self.fd.read (8) - return struct.unpack (''.format (self.address, self.length) - - @property - def item (self): - if self._item: - return self._item - elif self.address in self.heap: - self._item = self.heap[self.address] - return self._item - else: - raise HeapReferenceUnresolved (self.address, self.length) - -class HeapReferenceUnresolved (Exception): - def __init__ (self, address, length): - Exception.__init__ (self, 'addr: {:x}, len: {}'.format (address, length)) - -# Machine constants -intsize = 2 -pagesize = 512 - -- cgit v1.2.3