diff options
-rw-r--r-- | README.rst | 16 | ||||
-rwxr-xr-x | convertFileDs.py | 7 | ||||
-rw-r--r-- | elan.py | 135 | ||||
-rw-r--r-- | elan.vim | 51 | ||||
-rwxr-xr-x | extractAll.sh | 2 | ||||
-rwxr-xr-x | extractHintergrund.py | 244 |
6 files changed, 452 insertions, 3 deletions
diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..4aecec7 --- /dev/null +++ b/README.rst @@ -0,0 +1,16 @@ +EUMEL-python +============ + +Tools for dealing with EUMEL_ datastructures and files, mostly written in Python. + +.. _EUMEL: https://6xq.net/eumel/ + +``elan.py`` + is a lexer for pygments and used to highlight the packages found + `here <https://6xq.net/eumel/src/>`__. +``extractAll.sh`` + bulk-extracts all archive disk images whose paths are read from stdin. It + also converts text dataspaces to text files usable with modern computers. + + Calls ``convertCharset.py``, ``convertFileDs.py``, ``extractArchive.py`` + and ``linearizeDisk.py``. diff --git a/convertFileDs.py b/convertFileDs.py index 89e0cab..d5c234b 100755 --- a/convertFileDs.py +++ b/convertFileDs.py @@ -9,7 +9,7 @@ datastructure here. See EUMEL packet “file handling”. import struct, copy from collections import namedtuple -from eumel import Dataspace, DataspaceTypeMismatch, HeapReferenceUnresolved +from eumel import Dataspace, DataspaceTypeMismatch, HeapReferenceUnresolved, pagesize Segment = namedtuple ('Segment', ['succ', 'pred', 'end']) Sequence = namedtuple ('Sequence', ['index', 'segmentbegin', 'segmentend', 'lineno', 'lines']) @@ -77,12 +77,13 @@ class FileDataspace (Dataspace): Dataspace.__init__ (self, fd) # header of the BOUND LIST (aka TYPE FILE) + start = fd.tell () self.used = self.parseSequence () self.parseInt (2) self.parseSequence () self.parseSequence () self.parseInt (7) - assert self.fd.tell () == 0x38 + assert self.fd.tell ()-start == 0x30 rows = self.parseRows () @@ -150,6 +151,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='Convert EUMEL FILE dataspace into plain text file.') parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true') + parser.add_argument ('-s', '--skip', metavar='PAGES', type=int, default=0, help='Skip pages at the beginning of the file') parser.add_argument ('file', help='Input file') args = parser.parse_args () @@ -160,6 +162,7 @@ if __name__ == '__main__': with open (args.file, 'rb') as fd: try: + fd.seek (args.skip*pagesize) ds = FileDataspace (fd) linecount = len (ds.text.splitlines ()) if linecount != ds.used.lines: @@ -0,0 +1,135 @@ +""" +pygments lexer for Elementary Language (ELAN) + +- Rainer Hahn, Peter Stock: ELAN Handbuch. 1979. +- Rainer Hahn, Dietmar Heinrichs, Peter Heyderhoff: EUMEL Benutzerhandbuch Version 1.7. 1984. +""" + +from pygments.lexer import RegexLexer, bygroups, include, words +from pygments.token import * + +__all__ = ['ElanLexer'] + +def uppercaseWords (l): + """ + Match only uppercase words provided in l. For example FOR should not match + FORMAT. + """ + return words (l, prefix=r'(?<![A-Z])', suffix=r'(?![A-Z])') + +class ElanLexer(RegexLexer): + name = 'ELAN' + aliases = ['elan'] + filenames = ['*.elan'] + + tokens = { + 'root': [ + include('comment'), + # strings + (r'"', String.Double, 'string'), + # numbers. lookbehind, because identifiers may contain numbers too + (r'([-+]|(?<![a-z]))\d+', Number.Integer), + (r'[-+]?\d+\.\d+(E[+-]?\d+)?', Number.Float), + # keywords + (uppercaseWords (( + # not sure + 'CONCR', + # if-then-else + 'IF', 'THEN', 'ELSE', 'ELIF', 'ENDIF', 'END IF', + # found in the wild: + 'FI', + # select statement + 'SELECT', 'OF', 'CASE', 'OTHERWISE', 'ENDSELECT', 'END SELECT', + # loops + 'FOR', 'FROM', 'DOWNTO', 'UPTO', 'WHILE', 'REPEAT', 'UNTIL', + 'ENDREPEAT', 'END REPEAT', + # found in the wild: + 'REP', 'PER', 'END REP', + # return statements + 'LEAVE', 'WITH', + )), Keyword.Reserved), + (uppercaseWords (( + # type declaration + 'TYPE', + # shorthand declaration + 'LET', + )), Keyword.Declaration), + (uppercaseWords (( + # proper packet + 'DEFINES', + )), Keyword.Namespace), + (uppercaseWords (('VAR', 'CONST', 'BOUND')), Name.Attribute), + (uppercaseWords (('BOOL', 'INT', 'REAL', 'TEXT', 'STRUCT', 'ROW', + 'DATASPACE')), Keyword.Type), + # thruth values + (uppercaseWords (('TRUE', 'FALSE')), Name.Builtin), + # semi-builtin functions/operators, see Benutzerhandbuch pp. 329 + # "Standartpakete" + (uppercaseWords (( + # boolean + 'NOT', 'AND', 'OR', 'XOR', + # text + 'CAT', 'LENGTH', 'TIMESOUT', + # math + 'DECR', 'DIV', 'INCR', 'MOD', 'SUB', + )), Operator), + # and the same with symbols + (words (( + # assignments + ':=', '::', + # comparison + '=', '<>', '<=', '>=', '<', '>', + # math + '**', '*','+', '-', '/', + ), prefix=r'(?<![:=<>*+-/])', suffix=r'(?![:=<>*+-/])'), + Operator), + # packets, function and operators + # no space required between keyword and identifier + # XXX comments may be allowed between keyword and name + (r'((?:END\s*)?PACKET)([^A-Za-z]*)([a-z][a-z0-9 ]+)', + bygroups (Keyword.Declaration, Text, Name.Namespace)), + (r'((?:END\s*)?PROC)([^A-Za-z]*)([a-z][a-z0-9 ]+)', + bygroups (Keyword.Declaration, Text, Name.Function)), + (r'((?:END\s*)?OP)([^A-Za-z]*)([^a-z0-9 (;]+)', + bygroups (Keyword.Declaration, Text, Name.Function)), + # Refinements + (r'\.(?![a-z])', Text, 'refinement'), + (r'.', Text), + ], + 'comment': [ + (r'\(\*', Comment, 'comment-inside1'), + (r'\{', Comment, 'comment-inside2'), + (r'#\(', Comment, 'comment-inside3'), + ], + 'comment-inside1': [ + # comment can be nested + include('comment'), + (r'\*\)', Comment, '#pop'), + (r'(.|\n)', Comment), + ], + 'comment-inside2': [ + # comment can be nested + include('comment'), + (r'\}', Comment, '#pop'), + (r'(.|\n)', Comment), + ], + 'comment-inside3': [ + # comment can be nested + include('comment'), + (r'#\)', Comment, '#pop'), + (r'(.|\n)', Comment), + ], + 'string': [ + # "" equals '\"', "12" is '\12' + (r'"[0-9]*"', String.Escape), + (r'"', String.Double, '#pop'), + (r'.', String.Double), + ], + 'refinement': [ + include('comment'), + (r'\s+', Text), + (r'([a-z][a-z0-9 ]*)(:\s+)', bygroups(Name.Label, Text), '#pop'), + (r'', Text, '#pop'), + ] + } + diff --git a/elan.vim b/elan.vim new file mode 100644 index 0000000..bd0f4c8 --- /dev/null +++ b/elan.vim @@ -0,0 +1,51 @@ +" Vim syntax file +" Copy to ~/.vim/syntax/ and enable with :set filetype=elan +" Language: ELAN +" Maintainer: Lars-Dominik Braun <lars+eumel@6xq.net> +" Latest Revision: 2019-02-07 + +if exists("b:current_syntax") + finish +endif + +syn keyword elanStatement PROC ENDPROC OP PACKET ENDPACKET LEAVE WITH END LET DEFINES +syn keyword elanConditional IF ELSE FI THEN SELECT OF ELIF +syn keyword elanRepeat FOR FROM UPTO REP PER WHILE UNTIL +syn keyword elanBoolean TRUE FALSE +syn keyword elanType DATASPACE INT TEXT BOOL THESAURUS FILE REAL +syn match elanOperator ":=" +syn match elanOperator "::" +syn match elanOperator "\*" +syn match elanOperator "<>" +syn keyword elanOperator AND OR CAND COR NOT XOR +syn keyword elanOperator DIV MUL ISUB INCR DECR MOD SUB LENGTH CAT LIKE CONTAINS +syn keyword elanStorageClass VAR CONST BOUND ROW +syn keyword elanStructure STRUCT TYPE +syn keyword elanLabel CASE OTHERWISE +syn match elanNumber "-\=\<\d\+\>" +syn match elanFloat "\d\+\.\d\+" + +syn region elanComment start=+(\*+ end=+\*)+ +" XXX: tried to fix strings containing numbers that are not escapes, like "2", +syn region elanString start=+"+rs=s+1 end=+"+re=e-1 contains=elanStringEscape +"syn match elanStringEscape contained +"[0-9]\+"+ + + +hi def link elanBoolean Boolean +hi def link elanConditional Conditional +hi def link elanRepeat Repeat +hi def link elanType Type +hi def link elanComment Comment +hi def link elanOperator Operator +hi def link elanString String +hi def link elanStringEscape Special +hi def link elanStorageClass StorageClass +hi def link elanStructure Structure +hi def link elanLabel Label +hi def link elanStatement Statement +hi def link elanNumber Number +hi def link elanFloat Float + +let b:current_syntax = "elan" + + diff --git a/extractAll.sh b/extractAll.sh index 8b8649f..5870e1f 100755 --- a/extractAll.sh +++ b/extractAll.sh @@ -13,7 +13,7 @@ while read -r F; do pushd "$destdir" || continue for G in ./*; do echo "Converting $G to ${G}.txt" - $root/convertFileDs.py "$G" > "${G}.txt" || rm "${G}.txt" + $root/convertFileDs.py "$G" > "${G}.txt" && touch -r "${G}" "${G}.txt" && rm "${G}" || rm "${G}.txt" done popd rm "$linear" diff --git a/extractHintergrund.py b/extractHintergrund.py new file mode 100755 index 0000000..5795d8d --- /dev/null +++ b/extractHintergrund.py @@ -0,0 +1,244 @@ +#!/usr/bin/env python3 + +""" +Extract EUMEL Hintergrund floppy disk image. Known to work only with version +1.8 images. +""" + +import os, logging +from enum import IntEnum, unique +from operator import attrgetter + +from eumel import pagesize + +from construct import Struct, Const, Padding, PaddedString, Int8ul, Int16ul, \ + Int24ul, Int32ul, Flag, Computed, this, Array, BitStruct, Bitwise, \ + BitsInteger, Embedded, Nibble, Sequence, Enum + +hgIdent = Struct( + "signature" / Const(b"EUMEL-"), + "version" / PaddedString(6, "ascii"), + Padding(1), + "isShutup" / Int8ul * "true if value is 0", # XXX + "bootCount" / Int16ul, + Padding(0x24) * "undocumented", + "_hgblocks2" / Int16ul, + Padding(0x50) * "unknown/undocumented", + "_hgblocks" / Int16ul, + "_plusident" / Int16ul, + "isPlus" / Computed(this._hgblocks == 1 and this._plusident == 0), + "blocks" / Computed(this._hgblocks if this.isPlus else this._hgblocks2), # XXX: this is not correct + ) * "First block of Hintergrund" + +blockref = Struct( + "value" / Int24ul, + "control" / Int8ul, + ) + +anchor = Struct( + Const(b"\xff"*4), + "akttab" / blockref, + "clorX" / blockref, + Const(b"\xff"*4*3), + "taskRoot" / blockref, + Const(b"\xff"*4), + ) * "System anchor block" + +assert pagesize//blockref.sizeof() == 128 +blockTable = Array(pagesize//blockref.sizeof(), blockref) + +# XXX: skip const +segmentTable = Sequence (Const (2*blockref.sizeof ()*b'\xff'), Array (14, blockref)) + +drinfo = Struct( + "count" / blockref * "Number of blocks/pages allocated", + "blocks" / Array(3, blockref) * "Direct block references for page 1, 2 and 3", + "blockTables" / Array (2, blockref) * "Block references to block tables", + "segmentTables" / Array (2, blockref) * "Block references to segment tables, which refer to block tables", + ) * "Dataspace descriptor" + +# see src/devel/misc/unknown/src/XSTATUS.ELA +# EUMEL’s pcb function returns the 16 bit word at position (0x1e+2*<id>)%0x40 +# i.e. module is pcb(23) → at offset 0x0c +pcb = Struct( + "wstate" / Int32ul, + "millis" / Int8ul, + "unknown" / BitStruct ( + "unused" / Flag, # bit 7 + Padding(6), + "comflag" / Flag, # bit 0 + ), + "status" / Int8ul, + "statusflags" / Int8ul * "unknown status flags", + "pricnt" / Int8ul, + "_icount" / Int16ul, + "flags" / BitStruct( # XXX: embedding BitStruct is not possible + "iserror" / Flag, # bit 7 + "disablestop" / Flag, # bit 6 + Padding(1), + "arith" / Flag, # bit 4 + Padding(2), + "_codesegment" / BitsInteger(2), # bits 0…1 + ), + "icount" / Computed(this._icount | (this.flags._codesegment<<16)), # XXX: byte-swapping 18 bit int is not possible? is codesegment low/high bits of icount? + "module" / Int16ul, + "pbase" / Int8ul, + "c8k" / Int8ul, + "lbase" / Int16ul, + "ltop" / Int16ul, + "lsTop" / Int16ul, + "heap" / BitStruct( # XXX: is this just a 16 bit pointer? + "top" / BitsInteger(12), # XXX: incorrect byte order + "segment" / Nibble, # bit 0…3 + ), + Padding(4), + "priclk" / Int8ul, + "priv" / Int8ul, + Padding(2), + "linenr" / Int16ul, # ↓ See library/entwurf-systemdokumentation-1982.djvu section 2.4.13 (page 29) + "errorline" / Int16ul, + "errorcode" / Int16ul, + "channel" / Int16ul, + Padding(2), # XXX: sure about this padding? + "prio" / Int16ul, + "msgcode" / Int16ul, + "msgds" / Int16ul, + "taskid" / Int16ul, + "version" / Int16ul, + "fromid" / Int32ul, + Padding(8) * "unknown", + Padding(64) * "usually ff", + ) * "Leitblock" +assert pcb.sizeof() == 4*drinfo.sizeof(), (pcb.sizeof(), drinfo.sizeof()) + +class CpuType (IntEnum): + Z80 = 1 + INTEL8088 = 3 + M68K = 1024 + +urladerlink = Struct ( + "signature" / Const(b'EUMEL' + b' '*11), + "blocks" / Int16ul, + "hgver" / Int16ul, + "cputype" / Enum (Int16ul, CpuType), + "urver" / Int16ul, + Padding (2), + "shdvermin" / Int16ul, + "shdvermax" / Int16ul, + ) * "Urlader Linkleiste" + +def copyblock (block, infd, outfd): + if block == 0xffffff: + logging.debug (f'copying empty block') + written = outfd.write (b'\xff'*pagesize) + assert written == pagesize + else: + logging.debug (f'copying block {block}@{block*pagesize:x}h') + infd.seek (block*pagesize, os.SEEK_SET) + buf = infd.read (pagesize) + assert len (buf) == pagesize + written = outfd.write (buf) + assert written == pagesize + +def copyBlockTable (block, infd, outfd, skip=0): + if block != 0xffffff: + logging.debug (f'copying block table {block}@{block*pagesize:x}h, skipping {skip}') + fd.seek (block*pagesize, os.SEEK_SET) + for i, refl2 in enumerate (blockTable.parse_stream (infd)): + if i >= skip: + copyblock (refl2.value, fd, outfd) + else: + logging.debug (f'copying empty block table') + entries = (blockTable.sizeof()//blockref.sizeof())-skip + outfd.write (b'\xff'*(pagesize*entries)) + +if __name__ == '__main__': + import argparse + + parser = argparse.ArgumentParser(description='Extract EUMEL Hintergrund.') + parser.add_argument('-v', '--verbose', action='store_true', help='Verbose debugging output') + parser.add_argument('input', metavar='FILE', type=argparse.FileType('rb'), help='Input file') + + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + else: + logging.basicConfig(level=logging.INFO) + + with args.input as fd: + # ident + logging.info (hgIdent.parse_stream (fd)) + fd.seek (0x1400, os.SEEK_SET) + logging.info (urladerlink.parse_stream (fd)) + + fd.seek (pagesize) + a = anchor.parse_stream (fd) + + # task root (level 1) + fd.seek (a.taskRoot.value*pagesize) + taskRoot = blockTable.parse_stream (fd) + + # task dataspaces(?) (level 2) + for taskid, taskref in enumerate (taskRoot): + if taskref.value == 0xffffff: + continue + logging.info (f'task {taskid} is at {taskref.value} 0x{taskref.value*pagesize:x}') + + fd.seek (taskref.value*pagesize) + dataspaces = blockTable.parse_stream (fd) + + for dsidhigh, dsref in enumerate (dataspaces): + if dsref.value == 0xffffff: + continue + logging.info (f'\ttaskid {taskid} dsid {dsidhigh<<4} is at {dsref.value} 0x{dsref.value*pagesize:x}') + + # pcb and drinfo (level 3) + fd.seek (dsref.value*pagesize) + drinfoStart = 0 + if dsidhigh == 0: + p = pcb.parse_stream (fd) + logging.info (f'\t+pcb taskid {p.taskid} version {p.version} icount {p.icount:x} arith {p.flags.arith} disablestop {p.flags.disablestop} iserror {p.flags.iserror} pbase {p.pbase:x} module {p.module}') + drinfoStart = 4 + logging.info (f'\t\tdrinfo starting at {fd.tell():x}') + for dsidlow in range (drinfoStart, 16): + dsid = dsidlow | dsidhigh << 4 + d = drinfo.parse_stream (fd) + if d.count.value != 0xffffff and d.count.value != 0: + # pbt (page block table) 1/2 contain block refs for pages 0…127 and 128…256 + # pst (page segment table) 1/2 contain block refs to page block tables for pages > 256 + logging.info (f'\t\tdrinfo {dsid} #{d.count.value} @ {[x.value for x in d.blocks]}, ind {[x.value for x in d.blockTables]}, ind2 {[x.value for x in d.segmentTables]}') + + pos = fd.tell () + with open (f'{taskid:04d}_{dsid:04d}.ds', 'wb') as outfd: + os.ftruncate (outfd.fileno(), 0) + + # the first page of a dataspace is used by the OS + # and not stored to the Hintergrund + outfd.seek (pagesize) + + # get the first three pages + for ref in d.blocks: + copyblock (ref.value, fd, outfd) + + # indirect block refs (level 4a) + assert len (d.blockTables) == 2 + # first four entries of first table are empty and must not be written! + copyBlockTable (d.blockTables[0].value, fd, outfd, 4) + copyBlockTable (d.blockTables[1].value, fd, outfd) + + # segment tables (level 4b) + for segref in d.segmentTables: + if segref.value != 0xffffff: + fd.seek (segref.value*pagesize, os.SEEK_SET) + segtbl = segmentTable.parse_stream (fd) + for ref in segtbl[1]: + copyBlockTable (ref.value, fd, outfd) + else: + outfd.write((14*128*pagesize)*b'\xff') + + # 2*128 pages through block table, 2 segment tables with 14 refs to block tables each + expectedSize = (2*128+2*14*128)*pagesize + assert outfd.tell() == expectedSize, (outfd.tell(), expectedSize) + fd.seek (pos, os.SEEK_SET) + |