From 903cb02e7235bbdcc5e77373836bc8eb36e5ec65 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 17 Sep 2016 11:06:16 +0200 Subject: Add disk extraction tools --- tools/eumel.py | 145 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 tools/eumel.py (limited to 'tools/eumel.py') diff --git a/tools/eumel.py b/tools/eumel.py new file mode 100644 index 0000000..0434b35 --- /dev/null +++ b/tools/eumel.py @@ -0,0 +1,145 @@ +""" +EUMEL utility functions, including: + +""" + +import logging +import codecs + +# EUMEL character map. See “Benutzerhandbuch 1.7”, page 107. +# map eumel character to unicode codepoint +eumel2unicodemap = dict ( + [(10, '\n'), (13, '\r')] + + # first part is same as ascii + [(i, chr (i)) for i in range (32, 126)] + + [(126, '~')] + + [(214, 'Ä'), (215, 'Ö'), (216, 'Ü'), (217, 'ä'), (218, 'ö'), (219, 'ü'), (220, 'k'), (221, '-'), (222, '#'), (223, ' ')] + + [(251, 'ß')]) + +def decode (input, errors='strict'): + ret = [] + pos = 0 + for pos in range (len (input)): + c = input[pos] + m = eumel2unicodemap.get (c, None) + if m: + ret.append (m) + else: + if errors == 'strict': + raise UnicodeError ('unknown char {}'.format (c)) + elif errors == 'ignore': + pass + elif errors == 'replace': + ret.append ('\uFFFD') + else: + break + return (''.join (ret), pos) + +def lookup (name): + if name == 'eumel': + return codecs.CodecInfo(None, decode) + return None + +codecs.register (lookup) + +# Dataspace utilities +import struct, os + +class DataspaceTypeMismatch (ValueError): + pass + +class Dataspace: + # Expected type + TYPE = None + + def __init__ (self, fd): + self.fd = fd + self.lastaddr, self.firstaddr, self.type, _ = self._parseHeader () + if self.TYPE is not None and self.type != self.TYPE: + raise DataspaceTypeMismatch () + self.heap = {} + + def _parseHeader (self): + """ + :return: (last heap address, first heap address, dataspace type, unknown) + """ + buf = self.fd.read (8) + return struct.unpack (''.format (self.address, self.length) + + @property + def item (self): + if self._item: + return self._item + elif self.address in self.heap: + self._item = self.heap[self.address] + return self._item + else: + raise HeapReferenceUnresolved (self.address, self.length) + +class HeapReferenceUnresolved (Exception): + def __init__ (self, address, length): + Exception.__init__ (self, 'addr: {:x}, len: {}'.format (address, length)) + +# Machine constants +intsize = 2 +pagesize = 512 + -- cgit v1.2.3