summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.rst16
-rwxr-xr-xconvertFileDs.py7
-rw-r--r--elan.py135
-rw-r--r--elan.vim51
-rwxr-xr-xextractAll.sh2
-rwxr-xr-xextractHintergrund.py244
6 files changed, 452 insertions, 3 deletions
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..4aecec7
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,16 @@
+EUMEL-python
+============
+
+Tools for dealing with EUMEL_ datastructures and files, mostly written in Python.
+
+.. _EUMEL: https://6xq.net/eumel/
+
+``elan.py``
+ is a lexer for pygments and used to highlight the packages found
+ `here <https://6xq.net/eumel/src/>`__.
+``extractAll.sh``
+ bulk-extracts all archive disk images whose paths are read from stdin. It
+ also converts text dataspaces to text files usable with modern computers.
+
+ Calls ``convertCharset.py``, ``convertFileDs.py``, ``extractArchive.py``
+ and ``linearizeDisk.py``.
diff --git a/convertFileDs.py b/convertFileDs.py
index 89e0cab..d5c234b 100755
--- a/convertFileDs.py
+++ b/convertFileDs.py
@@ -9,7 +9,7 @@ datastructure here. See EUMEL packet “file handling”.
import struct, copy
from collections import namedtuple
-from eumel import Dataspace, DataspaceTypeMismatch, HeapReferenceUnresolved
+from eumel import Dataspace, DataspaceTypeMismatch, HeapReferenceUnresolved, pagesize
Segment = namedtuple ('Segment', ['succ', 'pred', 'end'])
Sequence = namedtuple ('Sequence', ['index', 'segmentbegin', 'segmentend', 'lineno', 'lines'])
@@ -77,12 +77,13 @@ class FileDataspace (Dataspace):
Dataspace.__init__ (self, fd)
# header of the BOUND LIST (aka TYPE FILE)
+ start = fd.tell ()
self.used = self.parseSequence ()
self.parseInt (2)
self.parseSequence ()
self.parseSequence ()
self.parseInt (7)
- assert self.fd.tell () == 0x38
+ assert self.fd.tell ()-start == 0x30
rows = self.parseRows ()
@@ -150,6 +151,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Convert EUMEL FILE dataspace into plain text file.')
parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
+ parser.add_argument ('-s', '--skip', metavar='PAGES', type=int, default=0, help='Skip pages at the beginning of the file')
parser.add_argument ('file', help='Input file')
args = parser.parse_args ()
@@ -160,6 +162,7 @@ if __name__ == '__main__':
with open (args.file, 'rb') as fd:
try:
+ fd.seek (args.skip*pagesize)
ds = FileDataspace (fd)
linecount = len (ds.text.splitlines ())
if linecount != ds.used.lines:
diff --git a/elan.py b/elan.py
new file mode 100644
index 0000000..beeb9f8
--- /dev/null
+++ b/elan.py
@@ -0,0 +1,135 @@
+"""
+pygments lexer for Elementary Language (ELAN)
+
+- Rainer Hahn, Peter Stock: ELAN Handbuch. 1979.
+- Rainer Hahn, Dietmar Heinrichs, Peter Heyderhoff: EUMEL Benutzerhandbuch Version 1.7. 1984.
+"""
+
+from pygments.lexer import RegexLexer, bygroups, include, words
+from pygments.token import *
+
+__all__ = ['ElanLexer']
+
+def uppercaseWords (l):
+ """
+ Match only uppercase words provided in l. For example FOR should not match
+ FORMAT.
+ """
+ return words (l, prefix=r'(?<![A-Z])', suffix=r'(?![A-Z])')
+
+class ElanLexer(RegexLexer):
+ name = 'ELAN'
+ aliases = ['elan']
+ filenames = ['*.elan']
+
+ tokens = {
+ 'root': [
+ include('comment'),
+ # strings
+ (r'"', String.Double, 'string'),
+ # numbers. lookbehind, because identifiers may contain numbers too
+ (r'([-+]|(?<![a-z]))\d+', Number.Integer),
+ (r'[-+]?\d+\.\d+(E[+-]?\d+)?', Number.Float),
+ # keywords
+ (uppercaseWords ((
+ # not sure
+ 'CONCR',
+ # if-then-else
+ 'IF', 'THEN', 'ELSE', 'ELIF', 'ENDIF', 'END IF',
+ # found in the wild:
+ 'FI',
+ # select statement
+ 'SELECT', 'OF', 'CASE', 'OTHERWISE', 'ENDSELECT', 'END SELECT',
+ # loops
+ 'FOR', 'FROM', 'DOWNTO', 'UPTO', 'WHILE', 'REPEAT', 'UNTIL',
+ 'ENDREPEAT', 'END REPEAT',
+ # found in the wild:
+ 'REP', 'PER', 'END REP',
+ # return statements
+ 'LEAVE', 'WITH',
+ )), Keyword.Reserved),
+ (uppercaseWords ((
+ # type declaration
+ 'TYPE',
+ # shorthand declaration
+ 'LET',
+ )), Keyword.Declaration),
+ (uppercaseWords ((
+ # proper packet
+ 'DEFINES',
+ )), Keyword.Namespace),
+ (uppercaseWords (('VAR', 'CONST', 'BOUND')), Name.Attribute),
+ (uppercaseWords (('BOOL', 'INT', 'REAL', 'TEXT', 'STRUCT', 'ROW',
+ 'DATASPACE')), Keyword.Type),
+ # thruth values
+ (uppercaseWords (('TRUE', 'FALSE')), Name.Builtin),
+ # semi-builtin functions/operators, see Benutzerhandbuch pp. 329
+ # "Standartpakete"
+ (uppercaseWords ((
+ # boolean
+ 'NOT', 'AND', 'OR', 'XOR',
+ # text
+ 'CAT', 'LENGTH', 'TIMESOUT',
+ # math
+ 'DECR', 'DIV', 'INCR', 'MOD', 'SUB',
+ )), Operator),
+ # and the same with symbols
+ (words ((
+ # assignments
+ ':=', '::',
+ # comparison
+ '=', '<>', '<=', '>=', '<', '>',
+ # math
+ '**', '*','+', '-', '/',
+ ), prefix=r'(?<![:=<>*+-/])', suffix=r'(?![:=<>*+-/])'),
+ Operator),
+ # packets, function and operators
+ # no space required between keyword and identifier
+ # XXX comments may be allowed between keyword and name
+ (r'((?:END\s*)?PACKET)([^A-Za-z]*)([a-z][a-z0-9 ]+)',
+ bygroups (Keyword.Declaration, Text, Name.Namespace)),
+ (r'((?:END\s*)?PROC)([^A-Za-z]*)([a-z][a-z0-9 ]+)',
+ bygroups (Keyword.Declaration, Text, Name.Function)),
+ (r'((?:END\s*)?OP)([^A-Za-z]*)([^a-z0-9 (;]+)',
+ bygroups (Keyword.Declaration, Text, Name.Function)),
+ # Refinements
+ (r'\.(?![a-z])', Text, 'refinement'),
+ (r'.', Text),
+ ],
+ 'comment': [
+ (r'\(\*', Comment, 'comment-inside1'),
+ (r'\{', Comment, 'comment-inside2'),
+ (r'#\(', Comment, 'comment-inside3'),
+ ],
+ 'comment-inside1': [
+ # comment can be nested
+ include('comment'),
+ (r'\*\)', Comment, '#pop'),
+ (r'(.|\n)', Comment),
+ ],
+ 'comment-inside2': [
+ # comment can be nested
+ include('comment'),
+ (r'\}', Comment, '#pop'),
+ (r'(.|\n)', Comment),
+ ],
+ 'comment-inside3': [
+ # comment can be nested
+ include('comment'),
+ (r'#\)', Comment, '#pop'),
+ (r'(.|\n)', Comment),
+ ],
+ 'string': [
+ # "" equals '\"', "12" is '\12'
+ (r'"[0-9]*"', String.Escape),
+ (r'"', String.Double, '#pop'),
+ (r'.', String.Double),
+ ],
+ 'refinement': [
+ include('comment'),
+ (r'\s+', Text),
+ (r'([a-z][a-z0-9 ]*)(:\s+)', bygroups(Name.Label, Text), '#pop'),
+ (r'', Text, '#pop'),
+ ]
+ }
+
diff --git a/elan.vim b/elan.vim
new file mode 100644
index 0000000..bd0f4c8
--- /dev/null
+++ b/elan.vim
@@ -0,0 +1,51 @@
+" Vim syntax file
+" Copy to ~/.vim/syntax/ and enable with :set filetype=elan
+" Language: ELAN
+" Maintainer: Lars-Dominik Braun <lars+eumel@6xq.net>
+" Latest Revision: 2019-02-07
+
+if exists("b:current_syntax")
+ finish
+endif
+
+syn keyword elanStatement PROC ENDPROC OP PACKET ENDPACKET LEAVE WITH END LET DEFINES
+syn keyword elanConditional IF ELSE FI THEN SELECT OF ELIF
+syn keyword elanRepeat FOR FROM UPTO REP PER WHILE UNTIL
+syn keyword elanBoolean TRUE FALSE
+syn keyword elanType DATASPACE INT TEXT BOOL THESAURUS FILE REAL
+syn match elanOperator ":="
+syn match elanOperator "::"
+syn match elanOperator "\*"
+syn match elanOperator "<>"
+syn keyword elanOperator AND OR CAND COR NOT XOR
+syn keyword elanOperator DIV MUL ISUB INCR DECR MOD SUB LENGTH CAT LIKE CONTAINS
+syn keyword elanStorageClass VAR CONST BOUND ROW
+syn keyword elanStructure STRUCT TYPE
+syn keyword elanLabel CASE OTHERWISE
+syn match elanNumber "-\=\<\d\+\>"
+syn match elanFloat "\d\+\.\d\+"
+
+syn region elanComment start=+(\*+ end=+\*)+
+" XXX: tried to fix strings containing numbers that are not escapes, like "2",
+syn region elanString start=+"+rs=s+1 end=+"+re=e-1 contains=elanStringEscape
+"syn match elanStringEscape contained +"[0-9]\+"+
+
+
+hi def link elanBoolean Boolean
+hi def link elanConditional Conditional
+hi def link elanRepeat Repeat
+hi def link elanType Type
+hi def link elanComment Comment
+hi def link elanOperator Operator
+hi def link elanString String
+hi def link elanStringEscape Special
+hi def link elanStorageClass StorageClass
+hi def link elanStructure Structure
+hi def link elanLabel Label
+hi def link elanStatement Statement
+hi def link elanNumber Number
+hi def link elanFloat Float
+
+let b:current_syntax = "elan"
+
+
diff --git a/extractAll.sh b/extractAll.sh
index 8b8649f..5870e1f 100755
--- a/extractAll.sh
+++ b/extractAll.sh
@@ -13,7 +13,7 @@ while read -r F; do
pushd "$destdir" || continue
for G in ./*; do
echo "Converting $G to ${G}.txt"
- $root/convertFileDs.py "$G" > "${G}.txt" || rm "${G}.txt"
+ $root/convertFileDs.py "$G" > "${G}.txt" && touch -r "${G}" "${G}.txt" && rm "${G}" || rm "${G}.txt"
done
popd
rm "$linear"
diff --git a/extractHintergrund.py b/extractHintergrund.py
new file mode 100755
index 0000000..5795d8d
--- /dev/null
+++ b/extractHintergrund.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+
+"""
+Extract EUMEL Hintergrund floppy disk image. Known to work only with version
+1.8 images.
+"""
+
+import os, logging
+from enum import IntEnum, unique
+from operator import attrgetter
+
+from eumel import pagesize
+
+from construct import Struct, Const, Padding, PaddedString, Int8ul, Int16ul, \
+ Int24ul, Int32ul, Flag, Computed, this, Array, BitStruct, Bitwise, \
+ BitsInteger, Embedded, Nibble, Sequence, Enum
+
+hgIdent = Struct(
+ "signature" / Const(b"EUMEL-"),
+ "version" / PaddedString(6, "ascii"),
+ Padding(1),
+ "isShutup" / Int8ul * "true if value is 0", # XXX
+ "bootCount" / Int16ul,
+ Padding(0x24) * "undocumented",
+ "_hgblocks2" / Int16ul,
+ Padding(0x50) * "unknown/undocumented",
+ "_hgblocks" / Int16ul,
+ "_plusident" / Int16ul,
+ "isPlus" / Computed(this._hgblocks == 1 and this._plusident == 0),
+ "blocks" / Computed(this._hgblocks if this.isPlus else this._hgblocks2), # XXX: this is not correct
+ ) * "First block of Hintergrund"
+
+blockref = Struct(
+ "value" / Int24ul,
+ "control" / Int8ul,
+ )
+
+anchor = Struct(
+ Const(b"\xff"*4),
+ "akttab" / blockref,
+ "clorX" / blockref,
+ Const(b"\xff"*4*3),
+ "taskRoot" / blockref,
+ Const(b"\xff"*4),
+ ) * "System anchor block"
+
+assert pagesize//blockref.sizeof() == 128
+blockTable = Array(pagesize//blockref.sizeof(), blockref)
+
+# XXX: skip const
+segmentTable = Sequence (Const (2*blockref.sizeof ()*b'\xff'), Array (14, blockref))
+
+drinfo = Struct(
+ "count" / blockref * "Number of blocks/pages allocated",
+ "blocks" / Array(3, blockref) * "Direct block references for page 1, 2 and 3",
+ "blockTables" / Array (2, blockref) * "Block references to block tables",
+ "segmentTables" / Array (2, blockref) * "Block references to segment tables, which refer to block tables",
+ ) * "Dataspace descriptor"
+
+# see src/devel/misc/unknown/src/XSTATUS.ELA
+# EUMEL’s pcb function returns the 16 bit word at position (0x1e+2*<id>)%0x40
+# i.e. module is pcb(23) → at offset 0x0c
+pcb = Struct(
+ "wstate" / Int32ul,
+ "millis" / Int8ul,
+ "unknown" / BitStruct (
+ "unused" / Flag, # bit 7
+ Padding(6),
+ "comflag" / Flag, # bit 0
+ ),
+ "status" / Int8ul,
+ "statusflags" / Int8ul * "unknown status flags",
+ "pricnt" / Int8ul,
+ "_icount" / Int16ul,
+ "flags" / BitStruct( # XXX: embedding BitStruct is not possible
+ "iserror" / Flag, # bit 7
+ "disablestop" / Flag, # bit 6
+ Padding(1),
+ "arith" / Flag, # bit 4
+ Padding(2),
+ "_codesegment" / BitsInteger(2), # bits 0…1
+ ),
+ "icount" / Computed(this._icount | (this.flags._codesegment<<16)), # XXX: byte-swapping 18 bit int is not possible? is codesegment low/high bits of icount?
+ "module" / Int16ul,
+ "pbase" / Int8ul,
+ "c8k" / Int8ul,
+ "lbase" / Int16ul,
+ "ltop" / Int16ul,
+ "lsTop" / Int16ul,
+ "heap" / BitStruct( # XXX: is this just a 16 bit pointer?
+ "top" / BitsInteger(12), # XXX: incorrect byte order
+ "segment" / Nibble, # bit 0…3
+ ),
+ Padding(4),
+ "priclk" / Int8ul,
+ "priv" / Int8ul,
+ Padding(2),
+ "linenr" / Int16ul, # ↓ See library/entwurf-systemdokumentation-1982.djvu section 2.4.13 (page 29)
+ "errorline" / Int16ul,
+ "errorcode" / Int16ul,
+ "channel" / Int16ul,
+ Padding(2), # XXX: sure about this padding?
+ "prio" / Int16ul,
+ "msgcode" / Int16ul,
+ "msgds" / Int16ul,
+ "taskid" / Int16ul,
+ "version" / Int16ul,
+ "fromid" / Int32ul,
+ Padding(8) * "unknown",
+ Padding(64) * "usually ff",
+ ) * "Leitblock"
+assert pcb.sizeof() == 4*drinfo.sizeof(), (pcb.sizeof(), drinfo.sizeof())
+
+class CpuType (IntEnum):
+ Z80 = 1
+ INTEL8088 = 3
+ M68K = 1024
+
+urladerlink = Struct (
+ "signature" / Const(b'EUMEL' + b' '*11),
+ "blocks" / Int16ul,
+ "hgver" / Int16ul,
+ "cputype" / Enum (Int16ul, CpuType),
+ "urver" / Int16ul,
+ Padding (2),
+ "shdvermin" / Int16ul,
+ "shdvermax" / Int16ul,
+ ) * "Urlader Linkleiste"
+
+def copyblock (block, infd, outfd):
+ if block == 0xffffff:
+ logging.debug (f'copying empty block')
+ written = outfd.write (b'\xff'*pagesize)
+ assert written == pagesize
+ else:
+ logging.debug (f'copying block {block}@{block*pagesize:x}h')
+ infd.seek (block*pagesize, os.SEEK_SET)
+ buf = infd.read (pagesize)
+ assert len (buf) == pagesize
+ written = outfd.write (buf)
+ assert written == pagesize
+
+def copyBlockTable (block, infd, outfd, skip=0):
+ if block != 0xffffff:
+ logging.debug (f'copying block table {block}@{block*pagesize:x}h, skipping {skip}')
+ fd.seek (block*pagesize, os.SEEK_SET)
+ for i, refl2 in enumerate (blockTable.parse_stream (infd)):
+ if i >= skip:
+ copyblock (refl2.value, fd, outfd)
+ else:
+ logging.debug (f'copying empty block table')
+ entries = (blockTable.sizeof()//blockref.sizeof())-skip
+ outfd.write (b'\xff'*(pagesize*entries))
+
+if __name__ == '__main__':
+ import argparse
+
+ parser = argparse.ArgumentParser(description='Extract EUMEL Hintergrund.')
+ parser.add_argument('-v', '--verbose', action='store_true', help='Verbose debugging output')
+ parser.add_argument('input', metavar='FILE', type=argparse.FileType('rb'), help='Input file')
+
+ args = parser.parse_args()
+
+ if args.verbose:
+ logging.basicConfig(level=logging.DEBUG)
+ else:
+ logging.basicConfig(level=logging.INFO)
+
+ with args.input as fd:
+ # ident
+ logging.info (hgIdent.parse_stream (fd))
+ fd.seek (0x1400, os.SEEK_SET)
+ logging.info (urladerlink.parse_stream (fd))
+
+ fd.seek (pagesize)
+ a = anchor.parse_stream (fd)
+
+ # task root (level 1)
+ fd.seek (a.taskRoot.value*pagesize)
+ taskRoot = blockTable.parse_stream (fd)
+
+ # task dataspaces(?) (level 2)
+ for taskid, taskref in enumerate (taskRoot):
+ if taskref.value == 0xffffff:
+ continue
+ logging.info (f'task {taskid} is at {taskref.value} 0x{taskref.value*pagesize:x}')
+
+ fd.seek (taskref.value*pagesize)
+ dataspaces = blockTable.parse_stream (fd)
+
+ for dsidhigh, dsref in enumerate (dataspaces):
+ if dsref.value == 0xffffff:
+ continue
+ logging.info (f'\ttaskid {taskid} dsid {dsidhigh<<4} is at {dsref.value} 0x{dsref.value*pagesize:x}')
+
+ # pcb and drinfo (level 3)
+ fd.seek (dsref.value*pagesize)
+ drinfoStart = 0
+ if dsidhigh == 0:
+ p = pcb.parse_stream (fd)
+ logging.info (f'\t+pcb taskid {p.taskid} version {p.version} icount {p.icount:x} arith {p.flags.arith} disablestop {p.flags.disablestop} iserror {p.flags.iserror} pbase {p.pbase:x} module {p.module}')
+ drinfoStart = 4
+ logging.info (f'\t\tdrinfo starting at {fd.tell():x}')
+ for dsidlow in range (drinfoStart, 16):
+ dsid = dsidlow | dsidhigh << 4
+ d = drinfo.parse_stream (fd)
+ if d.count.value != 0xffffff and d.count.value != 0:
+ # pbt (page block table) 1/2 contain block refs for pages 0…127 and 128…256
+ # pst (page segment table) 1/2 contain block refs to page block tables for pages > 256
+ logging.info (f'\t\tdrinfo {dsid} #{d.count.value} @ {[x.value for x in d.blocks]}, ind {[x.value for x in d.blockTables]}, ind2 {[x.value for x in d.segmentTables]}')
+
+ pos = fd.tell ()
+ with open (f'{taskid:04d}_{dsid:04d}.ds', 'wb') as outfd:
+ os.ftruncate (outfd.fileno(), 0)
+
+ # the first page of a dataspace is used by the OS
+ # and not stored to the Hintergrund
+ outfd.seek (pagesize)
+
+ # get the first three pages
+ for ref in d.blocks:
+ copyblock (ref.value, fd, outfd)
+
+ # indirect block refs (level 4a)
+ assert len (d.blockTables) == 2
+ # first four entries of first table are empty and must not be written!
+ copyBlockTable (d.blockTables[0].value, fd, outfd, 4)
+ copyBlockTable (d.blockTables[1].value, fd, outfd)
+
+ # segment tables (level 4b)
+ for segref in d.segmentTables:
+ if segref.value != 0xffffff:
+ fd.seek (segref.value*pagesize, os.SEEK_SET)
+ segtbl = segmentTable.parse_stream (fd)
+ for ref in segtbl[1]:
+ copyBlockTable (ref.value, fd, outfd)
+ else:
+ outfd.write((14*128*pagesize)*b'\xff')
+
+ # 2*128 pages through block table, 2 segment tables with 14 refs to block tables each
+ expectedSize = (2*128+2*14*128)*pagesize
+ assert outfd.tell() == expectedSize, (outfd.tell(), expectedSize)
+ fd.seek (pos, os.SEEK_SET)
+