summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rwxr-xr-xtools/convertCharset.py71
-rwxr-xr-xtools/convertFileDs.py167
-rw-r--r--tools/eumel.py327
-rwxr-xr-xtools/extractAll.sh21
-rwxr-xr-xtools/extractArchive.py110
-rwxr-xr-xtools/formatRefs.py2
-rwxr-xr-xtools/formatSoftware.py54
-rwxr-xr-xtools/linearizeDisk.py49
-rw-r--r--tools/rdf.py54
9 files changed, 57 insertions, 798 deletions
diff --git a/tools/convertCharset.py b/tools/convertCharset.py
deleted file mode 100755
index 59163aa..0000000
--- a/tools/convertCharset.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Convert file ZEICHENSATZ from graphics package to PNG files
-"""
-
-from eumel import *
-
-class ZeichensatzDataspace(Dataspace):
- TYPE = 0x44c
-
- def __init__ (self, fd):
- Dataspace.__init__ (self, fd)
-
- # just an array with 255 elements
- self.rows = []
- for i in range (255):
- self.rows.append (self.parseText ())
- self.parseHeap ()
-
-if __name__ == '__main__':
- import argparse, sys, cairo, math
-
- def transform (w, h, x, y):
- return ((2+x), (11-y))
-
- parser = argparse.ArgumentParser(description='Convert ZEICHENSATZ dataspace to PNG')
- parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
- parser.add_argument ('file', help='Input file')
- parser.add_argument ('prefix', help='Output prefix')
- args = parser.parse_args ()
-
- if args.verbose:
- logging.basicConfig (level=logging.DEBUG)
- else:
- logging.basicConfig (level=logging.WARNING)
-
- m = []
- with open (args.file, 'rb') as fd:
- ds = ZeichensatzDataspace (fd)
- # no character with code 0
- for (j, r) in zip (range (1, len (ds.rows)+1), ds.rows):
- if len (r) == 0:
- continue
-
- out = '{}{:03d}.png'.format (args.prefix, j)
- logging.info ('Converting character {} to {}'.format (j, out))
- w, h = 1024, 1024
- surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, w, h)
- ctx = cairo.Context(surface)
- ctx.scale (64, 64)
- ctx.set_line_width (0.1)
- ctx.set_source_rgb (1, 0, 0)
-
- r = bytes (r)
- lastxy = (0, 0)
- for i in range (0, len (r), 4):
- x0, y0, x1, y1 = struct.unpack ('<bbbb', r[i:i+4])
- m.extend ([x0, y0, x1, y1])
- if (x0, y0) != lastxy:
- ctx.move_to (*transform (w, h, x0, y0))
- if (x0, y0) != (x1, y1):
- ctx.line_to (*transform (w, h, x1, y1))
- else:
- x1, y1 = transform (w, h, x1, y1)
- ctx.arc (x1, y1, 0.1, 0, 2*math.pi)
- lastxy = (x1, y1)
- ctx.stroke ()
-
- surface.write_to_png (out)
-
diff --git a/tools/convertFileDs.py b/tools/convertFileDs.py
deleted file mode 100755
index c4037db..0000000
--- a/tools/convertFileDs.py
+++ /dev/null
@@ -1,167 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Convert EUMEL FILE dataspace into a plain text file.
-
-Since there are no “files” in EUMEL we’re dealing with the editor’s in-memory
-datastructure here. See EUMEL packet “file handling”.
-"""
-
-import struct, copy
-from collections import namedtuple
-from eumel import Dataspace, DataspaceTypeMismatch
-
-Segment = namedtuple ('Segment', ['succ', 'pred', 'end'])
-Sequence = namedtuple ('Sequence', ['index', 'segmentbegin', 'segmentend', 'lineno', 'lines'])
-Atom = namedtuple ('Atom', ['seg', 'type', 'line'])
-
-class Chain:
- """
- A chain is a cyclic datastructure, pointing to segments. Segments contain
- one or more rows, which in turn reference a single line’s text.
- """
- def __init__ (self, sequence, rows):
- self.lineno = sequence.lineno
- # current atom
- self.pos = sequence.index
- # current segment
- self.segpos = sequence.segmentbegin
- self.rows = rows
-
- def next (self):
- atom = self.rows[self.segpos]
- if self.pos == atom.seg.end:
- # move to next segment
- self.pos = atom.seg.succ
- self.segpos = atom.seg.succ
- else:
- # just use the next atom in this segment
- self.pos += 1
- self.lineno += 1
-
- def prev (self):
- # backwards is a little more involved: seg.pred points to the *first* segment row
- logging.debug ('prev at pos {} seg {} line {}'.format (self.pos, self.segpos, self.lineno))
- if self.pos == self.segpos:
- # get previous segment
- atom = self.rows[self.segpos]
- self.segpos = atom.seg.pred
- atom = self.rows[self.segpos]
- self.pos = atom.seg.end
- else:
- self.pos -= 1
- self.lineno -= 1
-
- def first (self):
- """
- Seek to first line
- """
- while self.lineno > 1:
- self.prev ()
-
- @property
- def atom (self):
- """
- Get atom at current position
- """
- return self.rows[self.pos]
-
-class FileDataspace (Dataspace):
- """
- EUMEL’s FILE datatype
- """
-
- TYPE = 1003
-
- def __init__ (self, fd):
- Dataspace.__init__ (self, fd)
-
- # header of the BOUND LIST (aka TYPE FILE)
- self.used = self.parseSequence ()
- self.parseInt (2)
- self.parseSequence ()
- self.parseSequence ()
- self.parseInt (7)
- assert self.fd.tell () == 0x38
-
- rows = self.parseRows ()
-
- self.parseHeap ()
-
- self.text = self.reconstructText (rows)
-
- def parseSegment (self):
- return Segment (*self.parseInt (3))
-
- def parseSequence (self):
- return Sequence (*self.parseInt (5))
-
- def parseRows (self):
- rows = []
- # read lines
- while True:
- # check data
- data = self.fd.read (24)
- if data == 24*b'\xff':
- break
- self.skip (-24)
- # and parse it
- seg = self.parseSegment ()
- rowtype = self.parseInt ()
- text = self.parseText ()
- rows.append (Atom (seg, rowtype, text))
- logging.debug ('got row {} {}'.format (len (rows)-1, rows[-1]))
- return rows
-
- def reconstructText (self, rows):
- # XXX: use
- logging.debug ('Used first {}, last {}, starts at line {}, {} lines in total'.format (self.used.segmentbegin, self.used.segmentend, self.used.lineno, self.used.lines))
- chain = Chain (self.used, rows)
- chain.first ()
- firstrow = chain.pos
- lines = []
- visited = set ()
- while True:
- if chain.pos in visited:
- logging.warning ('Row {} already has been used'.format (chain.pos))
- visited.add (chain.pos)
-
- r = chain.atom
- lbytes = bytes (r.line)
- lbytesStripped = lbytes.rstrip (b'\xff')
- if len (lbytes) != len (lbytesStripped):
- logging.warning ('Line {} length incorrect. Is {}, should be {}, fixing. {}'.format (chain.lineno, r.line.length, len (lbytesStripped), lbytes))
- lbytes = lbytesStripped
- lines.append (lbytes)
- chain.next ()
-
- # chains are cyclic
- if chain.pos == firstrow:
- break
- return codecs.decode (b'\n'.join (lines), 'eumel', 'replace')
-
-if __name__ == '__main__':
- import sys, os, codecs, logging
- import argparse, sys
-
- parser = argparse.ArgumentParser(description='Convert EUMEL FILE dataspace into plain text file.')
- parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
- parser.add_argument ('file', help='Input file')
- args = parser.parse_args ()
-
- if args.verbose:
- logging.basicConfig (level=logging.DEBUG)
- else:
- logging.basicConfig (level=logging.WARNING)
-
- with open (args.file, 'rb') as fd:
- try:
- ds = FileDataspace (fd)
- linecount = len (ds.text.splitlines ())
- if linecount != ds.used.lines:
- logging.warning ('Got {} lines, but should have been {}'.format (linecount, ds.used.lines))
- print (ds.text)
- except DataspaceTypeMismatch:
- logging.error ('Not a text file, cannot convert')
- sys.exit (1)
-
diff --git a/tools/eumel.py b/tools/eumel.py
deleted file mode 100644
index a421e0a..0000000
--- a/tools/eumel.py
+++ /dev/null
@@ -1,327 +0,0 @@
-"""
-EUMEL utility functions, including:
-
-"""
-
-import logging
-import codecs
-
-# EUMEL character map. See “Benutzerhandbuch 1.7”, page 107 and file
-# ZEICHENSATZ from the archive disk std.graphik.
-eumel2unicodemap = dict ([
- # standard newlines
- (10, '\n'),
- (13, '\r'),
- # mark start. technically \15 and \14 would be a choice here, but they do
- # different things on different systems and thus we’re just gonna strip
- # them.
- (15, ''),
- (14, ''), # mark end
- # same as ascii
- (32, ' '),
- (33, '!'),
- (34, '"'),
- (35, '#'),
- (36, '$'),
- (37, '%'),
- (38, '&'),
- (39, "'"),
- (40, '('),
- (41, ')'),
- (42, '*'),
- (43, '+'),
- (44, ','),
- (45, '-'),
- (46, '.'),
- (47, '/'),
- (48, '0'),
- (49, '1'),
- (50, '2'),
- (51, '3'),
- (52, '4'),
- (53, '5'),
- (54, '6'),
- (55, '7'),
- (56, '8'),
- (57, '9'),
- (58, ':'),
- (59, ';'),
- (60, '<'),
- (61, '='),
- (62, '>'),
- (63, '?'),
- # then the paragraph symbol
- (64, '§'),
- # uppercase and lowercase letters from ascii
- (65, 'A'),
- (66, 'B'),
- (67, 'C'),
- (68, 'D'),
- (69, 'E'),
- (70, 'F'),
- (71, 'G'),
- (72, 'H'),
- (73, 'I'),
- (74, 'J'),
- (75, 'K'),
- (76, 'L'),
- (77, 'M'),
- (78, 'N'),
- (79, 'O'),
- (80, 'P'),
- (81, 'Q'),
- (82, 'R'),
- (83, 'S'),
- (84, 'T'),
- (85, 'U'),
- (86, 'V'),
- (87, 'W'),
- (88, 'X'),
- (89, 'Y'),
- (90, 'Z'),
- (91, '['),
- (92, '\\'),
- (93, ']'),
- (94, '^'),
- (95, '_'),
- (96, '`'),
- (97, 'a'),
- (98, 'b'),
- (99, 'c'),
- (100, 'd'),
- (101, 'e'),
- (102, 'f'),
- (103, 'g'),
- (104, 'h'),
- (105, 'i'),
- (106, 'j'),
- (107, 'k'),
- (108, 'l'),
- (109, 'm'),
- (110, 'n'),
- (111, 'o'),
- (112, 'p'),
- (113, 'q'),
- (114, 'r'),
- (115, 's'),
- (116, 't'),
- (117, 'u'),
- (118, 'v'),
- (119, 'w'),
- (120, 'x'),
- (121, 'y'),
- (122, 'z'),
- (123, '{'),
- (124, '|'),
- (125, '}'),
- (126, '~'),
- # uppercase greek
- (129, 'Α'),
- (130, 'Β'),
- (131, 'Γ'),
- (132, 'Δ'),
- (133, 'Ε'),
- (134, 'Ζ'),
- (135, 'Η'),
- (136, 'Θ'),
- (137, 'Ι'),
- (138, 'Κ'),
- (139, 'Λ'),
- (140, 'Μ'),
- (141, 'Ν'),
- (142, 'Ξ'),
- (143, 'Ο'),
- (144, 'Π'),
- (145, 'Ρ'),
- (146, 'Σ'),
- (147, 'Τ'),
- (148, 'Υ'),
- (149, 'Φ'),
- (150, 'Χ'),
- (151, 'Ψ'),
- (152, 'Ω'),
- # lowercase greek
- (161, 'α'),
- (162, 'β'),
- (163, 'γ'),
- (164, 'δ'),
- (165, 'ε'),
- (166, 'ζ'),
- (167, 'η'),
- (168, 'θ'),
- (169, 'ι'),
- (170, 'κ'),
- (171, 'λ'),
- (172, 'μ'),
- (173, 'ν'),
- (174, 'ξ'),
- (175, 'ο'),
- (176, 'π'),
- (177, 'ρ'),
- (178, 'ς'),
- (179, 'σ'),
- (180, 'τ'),
- (181, 'υ'),
- (182, 'φ'),
- (183, 'χ'),
- (184, 'ψ'),
- (185, 'ω'),
- # these seem to be combining diacritic, not sure how they work though
- # 192 looks like a cross, dunno what it could be
- (193, '\u0301'), # acute
- (194, '\u0300'), # grave
- (195, '\u0302'), # circumflex
- (196, '\u0303'), # tilde
- (197, '\u0304'), # macron
- # 198: dunno
- (199, '\u0307'), # dot above
- (200, '\u0308'), # diaeresis
- # 201: dunno
- (202, '\u030a'), # ring above
- (203, '\u0317'), # acute below
- # 204: dunno
- (205, '\u030a'), # ring above (again for small letters?)
- # 206: dunno
- (207, '\u030c'), # caron
- # german umlauts
- (214, 'Ä'),
- (215, 'Ö'),
- (216, 'Ü'),
- (217, 'ä'),
- (218, 'ö'),
- (219, 'ü'),
- (220, 'k'), # handbuch says: Trenn-'k' bei der Umwandlung von 'ck' in 'kk'
- (221, '\u00ad'), # soft hyphen, inserted by eumel’s hyphenation program
- (222, '\\#'), # printable hash (i.e. literal hash, not a printer/editor command)
- (223, '\u00a0'), # protected space
- (251, 'ß'),
- ])
-
-def decode (input, errors='strict'):
- ret = []
- pos = 0
- for pos in range (len (input)):
- c = input[pos]
- m = eumel2unicodemap.get (c, None)
- if m is not None:
- ret.append (m)
- else:
- if errors == 'strict':
- raise UnicodeError ('unknown char {}'.format (c))
- elif errors == 'ignore':
- pass
- elif errors == 'replace':
- logging.debug ('replacing unknown symbol {} at position {}, context {}'.format (c, pos, input[pos-30:pos+30]))
- ret.append ('\uFFFD')
- else:
- break
- return (''.join (ret), pos)
-
-def lookup (name):
- if name == 'eumel':
- return codecs.CodecInfo(None, decode)
- return None
-
-codecs.register (lookup)
-
-# Dataspace utilities
-import struct, os
-
-class DataspaceTypeMismatch (ValueError):
- pass
-
-class Dataspace:
- # Expected type
- TYPE = None
-
- def __init__ (self, fd):
- self.fd = fd
- self.lastaddr, self.firstaddr, self.type, _ = self._parseHeader ()
- if self.TYPE is not None and self.type != self.TYPE:
- raise DataspaceTypeMismatch ()
- self.heap = {}
-
- def _parseHeader (self):
- """
- :return: (last heap address, first heap address, dataspace type, unknown)
- """
- buf = self.fd.read (8)
- return struct.unpack ('<HHHH', buf)
-
- def parseText (self):
- """
- Parse TEXT datatype, which can either be embedded (up to 13? chars) or in the heap (i.e. address)
- """
- buf = self.fd.read (16)
- address, length = struct.unpack ('<HB', buf[:3])
- if length <= 13:
- r = buf[3:3+length]
- else:
- length, = struct.unpack ('<H', buf[3:5])
- r = HeapReference (self.heap, address, length)
- return r
-
- def parseInt (self, count=1):
- if count == 1:
- return struct.unpack ('<H', self.fd.read (1*intsize))[0]
- else:
- return [self.parseInt () for i in range (count)]
-
- def parseHeap (self):
- heapaddr = self.firstaddr
- maxaddr = 2**(intsize*8)-1
- while True:
- head = self.fd.read (2)
- # XXX: not sure how to find its offset
- if head == b'\xff\xff':
- continue
- if not head or len (head) < 2:
- break
- length, = struct.unpack ('<H', head)
- self.heap[heapaddr] = self.fd.read (length)
- logging.debug ('got heap entry {:x} = ({}) {}'.format (heapaddr, length, self.heap[heapaddr]))
- heapaddr = (heapaddr+2+length) % maxaddr
-
- def skip (self, n):
- self.fd.seek (n, os.SEEK_CUR)
-
- def seek (self, pos):
- self.fd.seek (pos, os.SEEK_SET)
-
-class HeapReference:
- def __init__ (self, heap, address, length):
- self.heap = heap
- self.address = address
- self.length = length
- self._item = None
-
- def __bytes__ (self):
- return self.item[:self.length]
-
- def __len__ (self):
- return self.length
-
- def __getitem__ (self, key):
- return self.item[key]
-
- def __repr__ (self):
- return '<HeapReference to {:x} length {}>'.format (self.address, self.length)
-
- @property
- def item (self):
- if self._item:
- return self._item
- elif self.address in self.heap:
- self._item = self.heap[self.address]
- return self._item
- else:
- raise HeapReferenceUnresolved (self.address, self.length)
-
-class HeapReferenceUnresolved (Exception):
- def __init__ (self, address, length):
- Exception.__init__ (self, 'addr: {:x}, len: {}'.format (address, length))
-
-# Machine constants
-intsize = 2
-pagesize = 512
-
diff --git a/tools/extractAll.sh b/tools/extractAll.sh
deleted file mode 100755
index 8b8649f..0000000
--- a/tools/extractAll.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-
-root=`dirname "$0"`
-root=`realpath "$root"`
-
-while read -r F; do
- base=`basename "$F"`
- linear=`mktemp`
- destdir="${base}.extracted"
- echo "Extracting $F to $destdir"
- $root/linearizeDisk.py "$F" "$linear"
- $root/extractArchive.py -n -o "$destdir" "$linear"
- pushd "$destdir" || continue
- for G in ./*; do
- echo "Converting $G to ${G}.txt"
- $root/convertFileDs.py "$G" > "${G}.txt" || rm "${G}.txt"
- done
- popd
- rm "$linear"
-done
-
diff --git a/tools/extractArchive.py b/tools/extractArchive.py
deleted file mode 100755
index f14a6b6..0000000
--- a/tools/extractArchive.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Extract linearized (see linearizeDisk.py) EUMEL archive disk.
-"""
-
-import struct, sys, io, logging
-import codecs
-from eumel import Dataspace
-
-def take (it, n):
- for i in range (n):
- yield next (it)
-
-def parseEntry (blocks):
- while True:
- header = next (blocks)
- unknown1, unknown2, length, unknown3 = struct.unpack ('<HHHH', header[:8])
- logging.debug ('Got dataspace with {} blocks'.format (length))
- yield b''.join (take (blocks, length))
-
-def readBlocks (fd):
- while True:
- buf = fd.read (512)
- if not buf:
- break
- yield buf
-
-class FileHeaderDataspace (Dataspace):
- TYPE = 0
-
- def __init__ (self, fd):
- Dataspace.__init__ (self, fd)
- self.name = self.parseText ()
- self.mtime = self.parseText ()
- self.seek (0x40)
- self.parseHeap ()
-
-if __name__ == '__main__':
- import argparse, sys, codecs, os
- from datetime import datetime
- from io import BytesIO
- from eumel import pagesize
-
- parser = argparse.ArgumentParser(description='Extract EUMEL disk archive.')
- parser.add_argument ('-f', '--force', help='Overwrite existing files', action='store_true')
- parser.add_argument ('-o', '--output', help='Output directory, defaults to archive name')
- parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
- parser.add_argument ('-n', '--number', help='Number files based on their position in the archive',
- action='store_true')
- parser.add_argument ('file', help='Input file')
- args = parser.parse_args ()
-
- if args.verbose:
- logging.basicConfig (level=logging.DEBUG)
- else:
- logging.basicConfig (level=logging.INFO)
-
- with open (args.file, 'rb') as infd:
- entries = parseEntry (readBlocks (infd))
-
- # first entry is always disk info
- diskinfo = FileHeaderDataspace (BytesIO (next (entries)))
- if not args.output:
- args.output = codecs.decode (diskinfo.name, 'eumel', 'replace')
- logging.debug ('Using disk name {} as output directory'.format (args.output))
-
- # create output dir
- try:
- os.makedirs (args.output)
- except FileExistsError:
- pass
-
- i = 1
- while True:
- # file header dataspace
- fileheader = FileHeaderDataspace (BytesIO (next (entries)))
- filename = codecs.decode (fileheader.name, 'eumel', 'replace').replace ('/', '-')
- if len (filename) == 0:
- logging.debug ('Filename was empty, i.e. last item in archive. I’m done')
- break
- try:
- mtime = datetime.strptime (codecs.decode (fileheader.mtime, 'eumel', 'replace'), '%d.%m.%y')
- except ValueError as e:
- logging.warning ('Cannot parse date of file {}, {}'.format (filename, e))
- mtime = datetime.now ()
- logging.debug ('Got file {}, last modified {}'.format (filename, mtime))
-
- # actual file contents
- e = next (entries)
-
- # quirks: if the first page starts with a magic sequence, skip it.
- # Not sure what it is used for.
- if e.startswith (2*b'\x30\x00\x00\x00'):
- logging.debug ('skipping quirks')
- e = e[pagesize:]
-
- if args.number:
- filename = '{:03d}_{}'.format (i, filename)
- outfile = os.path.join (args.output, filename)
- if os.path.exists (outfile) and not args.force:
- logging.info ('File {} exists, skipping'.format (outfile))
- continue
- logging.info ('Extracting {} bytes to file {}'.format (len (e), outfile))
- with open (outfile, 'wb') as outfd:
- outfd.write (e)
- stamp = mtime.timestamp ()
- os.utime (outfile, (stamp, stamp))
- i += 1
-
diff --git a/tools/formatRefs.py b/tools/formatRefs.py
index 280a444..31e458a 100755
--- a/tools/formatRefs.py
+++ b/tools/formatRefs.py
@@ -115,7 +115,7 @@ def warnUnusedButDefined (graph, rootNode):
if __name__ == '__main__':
g = Graph()
- result = g.parse ("index.ttl", format='turtle')
+ result = g.parse (sys.stdin, format='turtle')
rootUri = sys.argv[1]
rootNode = URIRef (rootUri)
s = Namespace("https://schema.org/")
diff --git a/tools/formatSoftware.py b/tools/formatSoftware.py
index a54a740..756a247 100755
--- a/tools/formatSoftware.py
+++ b/tools/formatSoftware.py
@@ -7,57 +7,7 @@ import sys
from itertools import chain, groupby
from jinja2 import Environment
from formatRefs import first
-
-class RDFWalker:
- """
- Simple RDF graph walker
- """
-
- def __init__ (self, g, s, n, path=[]):
- """
- :param g: Graph
- :param s: Namespace
- :param n: Start node
- """
- self.g = g
- self.n = n
- self.s = s
- self._path = path
-
- def __getattr__ (self, k):
- """
- If k is underscore _, walk up tree one level, otherwise search for
- direct descendents and get first one.
- """
- if k == '_':
- return RDFWalker (self.g, self.s, self._path[0], self._path[1:])
- yieldall = False
- if k.endswith ('_'):
- yieldall = True
- k = k[:-1]
-
- if k == 'a':
- attr = RDF.type
- else:
- attr = getattr (self.s, k)
-
- ret = [RDFWalker (self.g, self.s, n, [self.n] + self._path) for n in self.g.objects (self.n, attr)]
-
- if yieldall:
- return ret
- elif not ret:
- return None
- else:
- return ret[0]
-
- def __eq__ (self, b):
- return self.n == b.n
-
- def __lt__ (self, b):
- return str (self) < str (b)
-
- def __str__ (self):
- return str (self.n)
+from rdf import RDFWalker
if __name__ == '__main__':
env = Environment ()
@@ -108,7 +58,7 @@ if __name__ == '__main__':
</div>
{% endfor %}""")
g = Graph()
- result = g.parse ("index.ttl", format='turtle')
+ result = g.parse (sys.stdin, format='turtle')
s = Namespace ("https://schema.org/")
items = []
diff --git a/tools/linearizeDisk.py b/tools/linearizeDisk.py
deleted file mode 100755
index 55f4b06..0000000
--- a/tools/linearizeDisk.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-For some reason blocks in the bitsavers images are not in linear order, but
-shuffled. Not sure why and if other disks are affected as well, but this script
-reorders them.
-"""
-
-import os, logging
-from itertools import chain
-
-def linearBlocks (fd):
- fd.seek (0, os.SEEK_END)
- size = fd.tell ()
- logging.debug ('File size is {} bytes'.format (size))
-
- blockSize = 512
- blocksPerChunk = 15
- chunkSize = blockSize*blocksPerChunk
- chunks = size//chunkSize
- skip = 1
- if size%chunkSize != 0:
- logging.warning ('File size {} is not multiple of chunk size {}'.format (size, chunkSize))
-
- # first even then odd chunks
- for j in chain (range (0, chunks, 2), range (1, chunks, 2)):
- pos = j*chunkSize
- logging.debug ('Seeking to {} for chunk {} and reading {} blocks @ {} bytes'.format (pos, j, blocksPerChunk, blockSize))
- fd.seek (pos, os.SEEK_SET)
- for i in range (blocksPerChunk):
- yield fd.read (blockSize)
-
-if __name__ == '__main__':
- import argparse, sys
-
- parser = argparse.ArgumentParser(description='Reorder EUMEL archive disk’s blocks.')
- parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
- parser.add_argument ('input', help='Input file')
- parser.add_argument ('output', help='Out file')
- args = parser.parse_args ()
- if args.verbose:
- logging.basicConfig (level=logging.DEBUG)
- else:
- logging.basicConfig (level=logging.WARNING)
-
- with open (args.input, 'rb') as infd, open (args.output, 'wb') as outfd:
- for b in linearBlocks (infd):
- outfd.write (b)
-
diff --git a/tools/rdf.py b/tools/rdf.py
new file mode 100644
index 0000000..6aaa682
--- /dev/null
+++ b/tools/rdf.py
@@ -0,0 +1,54 @@
+from rdflib.namespace import RDF, NamespaceManager
+
+class RDFWalker:
+ """
+ Simple RDF graph walker
+ """
+
+ def __init__ (self, g, s, n, path=[]):
+ """
+ :param g: Graph
+ :param s: Namespace
+ :param n: Start node
+ """
+ self.g = g
+ self.n = n
+ self.s = s
+ self._path = path
+
+ def __getattr__ (self, k):
+ """
+ If k is underscore _, walk up tree one level, otherwise search for
+ direct descendents and get first one.
+ """
+ if k == '_':
+ return RDFWalker (self.g, self.s, self._path[0], self._path[1:])
+ yieldall = False
+ if k.endswith ('_'):
+ yieldall = True
+ k = k[:-1]
+
+ if k == 'a':
+ attr = RDF.type
+ else:
+ attr = getattr (self.s, k)
+
+ ret = [RDFWalker (self.g, self.s, n, [self.n] + self._path) for n in self.g.objects (self.n, attr)]
+
+ if yieldall:
+ return ret
+ elif not ret:
+ return None
+ else:
+ return ret[0]
+
+ def __eq__ (self, b):
+ return self.n == b.n
+
+ def __lt__ (self, b):
+ return str (self) < str (b)
+
+ def __str__ (self):
+ return str (self.n)
+
+