Restructure git

Move tools into separate repo, split TTL file.
author: Lars-Dominik Braun <lars@6xq.net> 2017-01-21 11:24:56 +0100
committer: Lars-Dominik Braun <lars@6xq.net> 2017-01-28 14:31:28 +0100
commit: a6d474471dddc2d7a187a66358aafcb86235ca69 (patch)
tree: 7242cbf0f0e645b2156e143dbebaa2d28b4fcdba /tools
parent: c7befe173ed2b0f5fd82228fa45c7a105ac44818 (diff)
download: eumel-a6d474471dddc2d7a187a66358aafcb86235ca69.tar.gz
eumel-a6d474471dddc2d7a187a66358aafcb86235ca69.tar.bz2
eumel-a6d474471dddc2d7a187a66358aafcb86235ca69.zip
9 files changed, 57 insertions, 798 deletions
diff --git a/tools/convertCharset.py b/tools/convertCharset.py
deleted file mode 100755
index 59163aa..0000000
--- a/tools/convertCharset.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Convert file ZEICHENSATZ from graphics package to PNG files
-"""
-
-from eumel import *
-
-class ZeichensatzDataspace(Dataspace):
-    TYPE = 0x44c
-
-    def __init__ (self, fd):
-        Dataspace.__init__ (self, fd)
-        
-        # just an array with 255 elements
-        self.rows = []
-        for i in range (255):
-            self.rows.append (self.parseText ())
-        self.parseHeap ()
-
-if __name__ == '__main__':
-    import argparse, sys, cairo, math
-
-    def transform (w, h, x, y):
-        return ((2+x), (11-y))
-
-    parser = argparse.ArgumentParser(description='Convert ZEICHENSATZ dataspace to PNG')
-    parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
-    parser.add_argument ('file', help='Input file')
-    parser.add_argument ('prefix', help='Output prefix')
-    args = parser.parse_args ()
-
-    if args.verbose:
-        logging.basicConfig (level=logging.DEBUG)
-    else:
-        logging.basicConfig (level=logging.WARNING)
-
-    m = []
-    with open (args.file, 'rb') as fd:
-        ds = ZeichensatzDataspace (fd)
-        # no character with code 0
-        for (j, r) in zip (range (1, len (ds.rows)+1), ds.rows):
-            if len (r) == 0:
-                continue
-
-            out = '{}{:03d}.png'.format (args.prefix, j)
-            logging.info ('Converting character {} to {}'.format (j, out))
-            w, h = 1024, 1024
-            surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, w, h)
-            ctx = cairo.Context(surface)
-            ctx.scale (64, 64)
-            ctx.set_line_width (0.1)
-            ctx.set_source_rgb (1, 0, 0)
-
-            r = bytes (r)
-            lastxy = (0, 0)
-            for i in range (0, len (r), 4):
-                x0, y0, x1, y1 = struct.unpack ('<bbbb', r[i:i+4])
-                m.extend ([x0, y0, x1, y1])
-                if (x0, y0) != lastxy:
-                    ctx.move_to (*transform (w, h, x0, y0))
-                if (x0, y0) != (x1, y1):
-                    ctx.line_to (*transform (w, h, x1, y1))
-                else:
-                    x1, y1 = transform (w, h, x1, y1)
-                    ctx.arc (x1, y1, 0.1, 0, 2*math.pi)
-                lastxy = (x1, y1)
-            ctx.stroke ()
-
-            surface.write_to_png (out)
-
diff --git a/tools/convertFileDs.py b/tools/convertFileDs.py
deleted file mode 100755
index c4037db..0000000
--- a/tools/convertFileDs.py
+++ /dev/null
@@ -1,167 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Convert EUMEL FILE dataspace into a plain text file.
-
-Since there are no “files” in EUMEL we’re dealing with the editor’s in-memory
-datastructure here. See EUMEL packet “file handling”.
-"""
-
-import struct, copy
-from collections import namedtuple
-from eumel import Dataspace, DataspaceTypeMismatch
-
-Segment = namedtuple ('Segment', ['succ', 'pred', 'end'])
-Sequence = namedtuple ('Sequence', ['index', 'segmentbegin', 'segmentend', 'lineno', 'lines'])
-Atom = namedtuple ('Atom', ['seg', 'type', 'line'])
-
-class Chain:
-    """
-    A chain is a cyclic datastructure, pointing to segments. Segments contain
-    one or more rows, which in turn reference a single line’s text.
-    """
-    def __init__ (self, sequence, rows):
-        self.lineno = sequence.lineno
-        # current atom
-        self.pos = sequence.index
-        # current segment
-        self.segpos = sequence.segmentbegin
-        self.rows = rows
-
-    def next (self):
-        atom = self.rows[self.segpos]
-        if self.pos == atom.seg.end:
-            # move to next segment
-            self.pos = atom.seg.succ
-            self.segpos = atom.seg.succ
-        else:
-            # just use the next atom in this segment
-            self.pos += 1
-        self.lineno += 1
-
-    def prev (self):
-        # backwards is a little more involved: seg.pred points to the *first* segment row
-        logging.debug ('prev at pos {} seg {} line {}'.format (self.pos, self.segpos, self.lineno))
-        if self.pos == self.segpos:
-            # get previous segment
-            atom = self.rows[self.segpos]
-            self.segpos = atom.seg.pred
-            atom = self.rows[self.segpos]
-            self.pos = atom.seg.end
-        else:
-            self.pos -= 1
-        self.lineno -= 1
-
-    def first (self):
-        """
-        Seek to first line
-        """
-        while self.lineno > 1:
-            self.prev ()
-
-    @property
-    def atom (self):
-        """
-        Get atom at current position
-        """
-        return self.rows[self.pos]
-
-class FileDataspace (Dataspace):
-    """
-    EUMEL’s FILE datatype
-    """
-
-    TYPE = 1003
-
-    def __init__ (self, fd):
-        Dataspace.__init__ (self, fd)
-
-        # header of the BOUND LIST (aka TYPE FILE)
-        self.used = self.parseSequence ()
-        self.parseInt (2)
-        self.parseSequence ()
-        self.parseSequence ()
-        self.parseInt (7)
-        assert self.fd.tell () == 0x38
-
-        rows = self.parseRows ()
-
-        self.parseHeap ()
-
-        self.text = self.reconstructText (rows)
-
-    def parseSegment (self):
-        return Segment (*self.parseInt (3))
-
-    def parseSequence (self):
-        return Sequence (*self.parseInt (5))
-
-    def parseRows (self):
-        rows = []
-        # read lines
-        while True:
-            # check data
-            data = self.fd.read (24)
-            if data == 24*b'\xff':
-                break
-            self.skip (-24)
-            # and parse it
-            seg = self.parseSegment ()
-            rowtype = self.parseInt ()
-            text = self.parseText ()
-            rows.append (Atom (seg, rowtype, text))
-            logging.debug ('got row {} {}'.format (len (rows)-1, rows[-1]))
-        return rows
-
-    def reconstructText (self, rows):
-        # XXX: use
-        logging.debug ('Used first {}, last {}, starts at line {}, {} lines in total'.format (self.used.segmentbegin, self.used.segmentend, self.used.lineno, self.used.lines))
-        chain = Chain (self.used, rows)
-        chain.first ()
-        firstrow = chain.pos
-        lines = []
-        visited = set ()
-        while True:
-            if chain.pos in visited:
-                logging.warning ('Row {} already has been used'.format (chain.pos))
-            visited.add (chain.pos)
-
-            r = chain.atom
-            lbytes = bytes (r.line)
-            lbytesStripped = lbytes.rstrip (b'\xff')
-            if len (lbytes) != len (lbytesStripped):
-                logging.warning ('Line {} length incorrect. Is {}, should be {}, fixing. {}'.format (chain.lineno, r.line.length, len (lbytesStripped), lbytes))
-                lbytes = lbytesStripped
-            lines.append (lbytes)
-            chain.next ()
-
-            # chains are cyclic
-            if chain.pos == firstrow:
-                break
-        return codecs.decode (b'\n'.join (lines), 'eumel', 'replace')
-
-if __name__ == '__main__':
-    import sys, os, codecs, logging
-    import argparse, sys
-    
-    parser = argparse.ArgumentParser(description='Convert EUMEL FILE dataspace into plain text file.')
-    parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
-    parser.add_argument ('file', help='Input file')
-    args = parser.parse_args ()
-
-    if args.verbose:
-        logging.basicConfig (level=logging.DEBUG)
-    else:
-        logging.basicConfig (level=logging.WARNING)
-
-    with open (args.file, 'rb') as fd:
-        try:
-            ds = FileDataspace (fd)
-            linecount = len (ds.text.splitlines ())
-            if linecount != ds.used.lines:
-                logging.warning ('Got {} lines, but should have been {}'.format (linecount, ds.used.lines))
-            print (ds.text)
-        except DataspaceTypeMismatch:
-            logging.error ('Not a text file, cannot convert')
-            sys.exit (1)
-
diff --git a/tools/eumel.py b/tools/eumel.py
deleted file mode 100644
index a421e0a..0000000
--- a/tools/eumel.py
+++ /dev/null
@@ -1,327 +0,0 @@
-"""
-EUMEL utility functions, including:
-
-"""
-
-import logging
-import codecs
-
-# EUMEL character map. See “Benutzerhandbuch 1.7”, page 107 and file
-# ZEICHENSATZ from the archive disk std.graphik.
-eumel2unicodemap = dict ([
-    # standard newlines
-    (10, '\n'),
-    (13, '\r'),
-    # mark start. technically \15 and \14 would be a choice here, but they do
-    # different things on different systems and thus we’re just gonna strip
-    # them.
-    (15, ''),
-    (14, ''), # mark end
-    # same as ascii
-    (32, ' '),
-    (33, '!'),
-    (34, '"'),
-    (35, '#'),
-    (36, '$'),
-    (37, '%'),
-    (38, '&'),
-    (39, "'"),
-    (40, '('),
-    (41, ')'),
-    (42, '*'),
-    (43, '+'),
-    (44, ','),
-    (45, '-'),
-    (46, '.'),
-    (47, '/'),
-    (48, '0'),
-    (49, '1'),
-    (50, '2'),
-    (51, '3'),
-    (52, '4'),
-    (53, '5'),
-    (54, '6'),
-    (55, '7'),
-    (56, '8'),
-    (57, '9'),
-    (58, ':'),
-    (59, ';'),
-    (60, '<'),
-    (61, '='),
-    (62, '>'),
-    (63, '?'),
-    # then the paragraph symbol
-    (64, '§'),
-    # uppercase and lowercase letters from ascii
-    (65, 'A'),
-    (66, 'B'),
-    (67, 'C'),
-    (68, 'D'),
-    (69, 'E'),
-    (70, 'F'),
-    (71, 'G'),
-    (72, 'H'),
-    (73, 'I'),
-    (74, 'J'),
-    (75, 'K'),
-    (76, 'L'),
-    (77, 'M'),
-    (78, 'N'),
-    (79, 'O'),
-    (80, 'P'),
-    (81, 'Q'),
-    (82, 'R'),
-    (83, 'S'),
-    (84, 'T'),
-    (85, 'U'),
-    (86, 'V'),
-    (87, 'W'),
-    (88, 'X'),
-    (89, 'Y'),
-    (90, 'Z'),
-    (91, '['),
-    (92, '\\'),
-    (93, ']'),
-    (94, '^'),
-    (95, '_'),
-    (96, '`'),
-    (97, 'a'),
-    (98, 'b'),
-    (99, 'c'),
-    (100, 'd'),
-    (101, 'e'),
-    (102, 'f'),
-    (103, 'g'),
-    (104, 'h'),
-    (105, 'i'),
-    (106, 'j'),
-    (107, 'k'),
-    (108, 'l'),
-    (109, 'm'),
-    (110, 'n'),
-    (111, 'o'),
-    (112, 'p'),
-    (113, 'q'),
-    (114, 'r'),
-    (115, 's'),
-    (116, 't'),
-    (117, 'u'),
-    (118, 'v'),
-    (119, 'w'),
-    (120, 'x'),
-    (121, 'y'),
-    (122, 'z'),
-    (123, '{'),
-    (124, '|'),
-    (125, '}'),
-    (126, '~'),
-    # uppercase greek
-    (129, 'Α'),
-    (130, 'Β'),
-    (131, 'Γ'),
-    (132, 'Δ'),
-    (133, 'Ε'),
-    (134, 'Ζ'),
-    (135, 'Η'),
-    (136, 'Θ'),
-    (137, 'Ι'),
-    (138, 'Κ'),
-    (139, 'Λ'),
-    (140, 'Μ'),
-    (141, 'Ν'),
-    (142, 'Ξ'),
-    (143, 'Ο'),
-    (144, 'Π'),
-    (145, 'Ρ'),
-    (146, 'Σ'),
-    (147, 'Τ'),
-    (148, 'Υ'),
-    (149, 'Φ'),
-    (150, 'Χ'),
-    (151, 'Ψ'),
-    (152, 'Ω'),
-    # lowercase greek
-    (161, 'α'),
-    (162, 'β'),
-    (163, 'γ'),
-    (164, 'δ'),
-    (165, 'ε'),
-    (166, 'ζ'),
-    (167, 'η'),
-    (168, 'θ'),
-    (169, 'ι'),
-    (170, 'κ'),
-    (171, 'λ'),
-    (172, 'μ'),
-    (173, 'ν'),
-    (174, 'ξ'),
-    (175, 'ο'),
-    (176, 'π'),
-    (177, 'ρ'),
-    (178, 'ς'),
-    (179, 'σ'),
-    (180, 'τ'),
-    (181, 'υ'),
-    (182, 'φ'),
-    (183, 'χ'),
-    (184, 'ψ'),
-    (185, 'ω'),
-    # these seem to be combining diacritic, not sure how they work though
-    # 192 looks like a cross, dunno what it could be
-    (193, '\u0301'), # acute
-    (194, '\u0300'), # grave
-    (195, '\u0302'), # circumflex
-    (196, '\u0303'), # tilde
-    (197, '\u0304'), # macron
-    # 198: dunno
-    (199, '\u0307'), # dot above
-    (200, '\u0308'), # diaeresis
-    # 201: dunno
-    (202, '\u030a'), # ring above
-    (203, '\u0317'), # acute below
-    # 204: dunno
-    (205, '\u030a'), # ring above (again for small letters?)
-    # 206: dunno
-    (207, '\u030c'), # caron
-    # german umlauts
-    (214, 'Ä'),
-    (215, 'Ö'),
-    (216, 'Ü'),
-    (217, 'ä'),
-    (218, 'ö'),
-    (219, 'ü'),
-    (220, 'k'), # handbuch says: Trenn-'k' bei der Umwandlung von 'ck' in 'kk'
-    (221, '\u00ad'), # soft hyphen, inserted by eumel’s hyphenation program
-    (222, '\\#'), # printable hash (i.e. literal hash, not a printer/editor command)
-    (223, '\u00a0'), # protected space
-    (251, 'ß'),
-    ])
-
-def decode (input, errors='strict'):
-    ret = []
-    pos = 0
-    for pos in range (len (input)):
-        c = input[pos]
-        m = eumel2unicodemap.get (c, None)
-        if m is not None:
-            ret.append (m)
-        else:
-            if errors == 'strict':
-                raise UnicodeError ('unknown char {}'.format (c))
-            elif errors == 'ignore':
-                pass
-            elif errors == 'replace':
-                logging.debug ('replacing unknown symbol {} at position {}, context {}'.format (c, pos, input[pos-30:pos+30]))
-                ret.append ('\uFFFD')
-            else:
-                break
-    return (''.join (ret), pos)
-
-def lookup (name):
-    if name == 'eumel':
-        return codecs.CodecInfo(None, decode)
-    return None
-
-codecs.register (lookup)
-
-# Dataspace utilities
-import struct, os
-
-class DataspaceTypeMismatch (ValueError):
-    pass
-
-class Dataspace:
-    # Expected type
-    TYPE = None
-
-    def __init__ (self, fd):
-        self.fd = fd
-        self.lastaddr, self.firstaddr, self.type, _ = self._parseHeader ()
-        if self.TYPE is not None and self.type != self.TYPE:
-            raise DataspaceTypeMismatch ()
-        self.heap = {}
-
-    def _parseHeader (self):
-        """
-        :return: (last heap address, first heap address, dataspace type, unknown)
-        """
-        buf = self.fd.read (8)
-        return struct.unpack ('<HHHH', buf)
-
-    def parseText (self):
-        """
-        Parse TEXT datatype, which can either be embedded (up to 13? chars) or in the heap (i.e. address)
-        """
-        buf = self.fd.read (16)
-        address, length = struct.unpack ('<HB', buf[:3])
-        if length <= 13:
-            r = buf[3:3+length]
-        else:
-            length, = struct.unpack ('<H', buf[3:5])
-            r = HeapReference (self.heap, address, length)
-        return r
-
-    def parseInt (self, count=1):
-        if count == 1:
-            return struct.unpack ('<H', self.fd.read (1*intsize))[0]
-        else:
-            return [self.parseInt () for i in range (count)]
-
-    def parseHeap (self):
-        heapaddr = self.firstaddr
-        maxaddr = 2**(intsize*8)-1
-        while True:
-            head = self.fd.read (2)
-            # XXX: not sure how to find its offset
-            if head == b'\xff\xff':
-                continue
-            if not head or len (head) < 2:
-                break
-            length, = struct.unpack ('<H', head)
-            self.heap[heapaddr] = self.fd.read (length)
-            logging.debug ('got heap entry {:x} = ({}) {}'.format (heapaddr, length, self.heap[heapaddr]))
-            heapaddr = (heapaddr+2+length) % maxaddr
-
-    def skip (self, n):
-        self.fd.seek (n, os.SEEK_CUR)
-
-    def seek (self, pos):
-        self.fd.seek (pos, os.SEEK_SET)
-
-class HeapReference:
-    def __init__ (self, heap, address, length):
-        self.heap = heap
-        self.address = address
-        self.length = length
-        self._item = None
-
-    def __bytes__ (self):
-        return self.item[:self.length]
-
-    def __len__ (self):
-        return self.length
-
-    def __getitem__ (self, key):
-        return self.item[key]
-
-    def __repr__ (self):
-        return '<HeapReference to {:x} length {}>'.format (self.address, self.length)
-    
-    @property
-    def item (self):
-        if self._item:
-            return self._item
-        elif self.address in self.heap:
-            self._item = self.heap[self.address]
-            return self._item
-        else:
-            raise HeapReferenceUnresolved (self.address, self.length)
-
-class HeapReferenceUnresolved (Exception):
-    def __init__ (self, address, length):
-        Exception.__init__ (self, 'addr: {:x}, len: {}'.format (address, length))
-
-# Machine constants
-intsize = 2
-pagesize = 512
-
diff --git a/tools/extractAll.sh b/tools/extractAll.sh
deleted file mode 100755
index 8b8649f..0000000
--- a/tools/extractAll.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/sh
-
-root=`dirname "$0"`
-root=`realpath "$root"`
-
-while read -r F; do
-	base=`basename "$F"`
-	linear=`mktemp`
-	destdir="${base}.extracted"
-	echo "Extracting $F to $destdir"
-	$root/linearizeDisk.py "$F" "$linear"
-	$root/extractArchive.py -n -o "$destdir" "$linear"
-	pushd "$destdir" || continue
-	for G in ./*; do
-		echo "Converting $G to ${G}.txt"
-		$root/convertFileDs.py "$G" > "${G}.txt" || rm "${G}.txt"
-	done
-	popd
-	rm "$linear"
-done
-
diff --git a/tools/extractArchive.py b/tools/extractArchive.py
deleted file mode 100755
index f14a6b6..0000000
--- a/tools/extractArchive.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Extract linearized (see linearizeDisk.py) EUMEL archive disk.
-"""
-
-import struct, sys, io, logging
-import codecs
-from eumel import Dataspace
-
-def take (it, n):
-    for i in range (n):
-        yield next (it)
-
-def parseEntry (blocks):
-    while True:
-        header = next (blocks)
-        unknown1, unknown2, length, unknown3 = struct.unpack ('<HHHH', header[:8])
-        logging.debug ('Got dataspace with {} blocks'.format (length))
-        yield b''.join (take (blocks, length))
-
-def readBlocks (fd):
-    while True:
-        buf = fd.read (512)
-        if not buf:
-            break
-        yield buf
-
-class FileHeaderDataspace (Dataspace):
-    TYPE = 0
-
-    def __init__ (self, fd):
-        Dataspace.__init__ (self, fd)
-        self.name = self.parseText ()
-        self.mtime = self.parseText ()
-        self.seek (0x40)
-        self.parseHeap ()
-
-if __name__ == '__main__':
-    import argparse, sys, codecs, os
-    from datetime import datetime
-    from io import BytesIO
-    from eumel import pagesize
-    
-    parser = argparse.ArgumentParser(description='Extract EUMEL disk archive.')
-    parser.add_argument ('-f', '--force', help='Overwrite existing files', action='store_true')
-    parser.add_argument ('-o', '--output', help='Output directory, defaults to archive name')
-    parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
-    parser.add_argument ('-n', '--number', help='Number files based on their position in the archive',
-            action='store_true')
-    parser.add_argument ('file', help='Input file')
-    args = parser.parse_args ()
-
-    if args.verbose:
-        logging.basicConfig (level=logging.DEBUG)
-    else:
-        logging.basicConfig (level=logging.INFO)
-
-    with open (args.file, 'rb') as infd:
-        entries = parseEntry (readBlocks (infd))
-
-        # first entry is always disk info
-        diskinfo = FileHeaderDataspace (BytesIO (next (entries)))
-        if not args.output:
-            args.output = codecs.decode (diskinfo.name, 'eumel', 'replace')
-            logging.debug ('Using disk name {} as output directory'.format (args.output))
-
-        # create output dir
-        try:
-            os.makedirs (args.output)
-        except FileExistsError:
-            pass
-
-        i = 1
-        while True:
-            # file header dataspace
-            fileheader = FileHeaderDataspace (BytesIO (next (entries)))
-            filename = codecs.decode (fileheader.name, 'eumel', 'replace').replace ('/', '-')
-            if len (filename) == 0:
-                logging.debug ('Filename was empty, i.e. last item in archive. I’m done')
-                break
-            try:
-                mtime = datetime.strptime (codecs.decode (fileheader.mtime, 'eumel', 'replace'), '%d.%m.%y')
-            except ValueError as e:
-                logging.warning ('Cannot parse date of file {}, {}'.format (filename, e))
-                mtime = datetime.now ()
-            logging.debug ('Got file {}, last modified {}'.format (filename, mtime))
-
-            # actual file contents
-            e = next (entries)
-
-            # quirks: if the first page starts with a magic sequence, skip it.
-            # Not sure what it is used for.
-            if e.startswith (2*b'\x30\x00\x00\x00'):
-                logging.debug ('skipping quirks')
-                e = e[pagesize:]
-
-            if args.number:
-                filename = '{:03d}_{}'.format (i, filename)
-            outfile = os.path.join (args.output, filename)
-            if os.path.exists (outfile) and not args.force:
-                logging.info ('File {} exists, skipping'.format (outfile))
-                continue
-            logging.info ('Extracting {} bytes to file {}'.format (len (e), outfile))
-            with open (outfile, 'wb') as outfd:
-                outfd.write (e)
-            stamp = mtime.timestamp ()
-            os.utime (outfile, (stamp, stamp))
-            i += 1
-
diff --git a/tools/formatRefs.py b/tools/formatRefs.py
index 280a444..31e458a 100755
--- a/tools/formatRefs.py
+++ b/tools/formatRefs.py
@@ -115,7 +115,7 @@ def warnUnusedButDefined (graph, rootNode):
 
 if __name__ == '__main__':
     g = Graph()
-    result = g.parse ("index.ttl", format='turtle')
+    result = g.parse (sys.stdin, format='turtle')
     rootUri = sys.argv[1]
     rootNode = URIRef (rootUri)
     s = Namespace("https://schema.org/")
diff --git a/tools/formatSoftware.py b/tools/formatSoftware.py
index a54a740..756a247 100755
--- a/tools/formatSoftware.py
+++ b/tools/formatSoftware.py
@@ -7,57 +7,7 @@ import sys
 from itertools import chain, groupby
 from jinja2 import Environment
 from formatRefs import first
-
-class RDFWalker:
-    """
-    Simple RDF graph walker
-    """
-
-    def __init__ (self, g, s, n, path=[]):
-        """
-        :param g: Graph
-        :param s: Namespace
-        :param n: Start node
-        """
-        self.g = g
-        self.n = n
-        self.s = s
-        self._path = path
-
-    def __getattr__ (self, k):
-        """
-        If k is underscore _, walk up tree one level, otherwise search for
-        direct descendents and get first one.
-        """
-        if k == '_':
-            return RDFWalker (self.g, self.s, self._path[0], self._path[1:])
-        yieldall = False
-        if k.endswith ('_'):
-            yieldall = True
-            k = k[:-1]
-
-        if k == 'a':
-            attr = RDF.type
-        else:
-            attr = getattr (self.s, k)
-
-        ret = [RDFWalker (self.g, self.s, n, [self.n] + self._path) for n in self.g.objects (self.n, attr)]
-
-        if yieldall:
-            return ret
-        elif not ret:
-            return None
-        else:
-            return ret[0]
-
-    def __eq__ (self, b):
-        return self.n == b.n
-
-    def __lt__ (self, b):
-        return str (self) < str (b)
-
-    def __str__ (self):
-        return str (self.n)
+from rdf import RDFWalker
 
 if __name__ == '__main__':
     env = Environment ()
@@ -108,7 +58,7 @@ if __name__ == '__main__':
     </div>
 {% endfor %}""")
     g = Graph()
-    result = g.parse ("index.ttl", format='turtle')
+    result = g.parse (sys.stdin, format='turtle')
     s = Namespace ("https://schema.org/")
 
     items = []
diff --git a/tools/linearizeDisk.py b/tools/linearizeDisk.py
deleted file mode 100755
index 55f4b06..0000000
--- a/tools/linearizeDisk.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-For some reason blocks in the bitsavers images are not in linear order, but
-shuffled. Not sure why and if other disks are affected as well, but this script
-reorders them.
-"""
-
-import os, logging
-from itertools import chain
-
-def linearBlocks (fd):
-    fd.seek (0, os.SEEK_END)
-    size = fd.tell ()
-    logging.debug ('File size is {} bytes'.format (size))
-
-    blockSize = 512
-    blocksPerChunk = 15
-    chunkSize = blockSize*blocksPerChunk
-    chunks = size//chunkSize
-    skip = 1
-    if size%chunkSize != 0:
-        logging.warning ('File size {} is not multiple of chunk size {}'.format (size, chunkSize))
-
-    # first even then odd chunks
-    for j in chain (range (0, chunks, 2), range (1, chunks, 2)):
-        pos = j*chunkSize
-        logging.debug ('Seeking to {} for chunk {} and reading {} blocks @ {} bytes'.format (pos, j, blocksPerChunk, blockSize))
-        fd.seek (pos, os.SEEK_SET)
-        for i in range (blocksPerChunk):
-            yield fd.read (blockSize)
-
-if __name__ == '__main__':
-    import argparse, sys
-    
-    parser = argparse.ArgumentParser(description='Reorder EUMEL archive disk’s blocks.')
-    parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
-    parser.add_argument ('input', help='Input file')
-    parser.add_argument ('output', help='Out file')
-    args = parser.parse_args ()
-    if args.verbose:
-        logging.basicConfig (level=logging.DEBUG)
-    else:
-        logging.basicConfig (level=logging.WARNING)
-
-    with open (args.input, 'rb') as infd, open (args.output, 'wb') as outfd:
-        for b in linearBlocks (infd):
-            outfd.write (b)
-
diff --git a/tools/rdf.py b/tools/rdf.py
new file mode 100644
index 0000000..6aaa682
--- /dev/null
+++ b/tools/rdf.py
@@ -0,0 +1,54 @@
+from rdflib.namespace import RDF, NamespaceManager
+
+class RDFWalker:
+    """
+    Simple RDF graph walker
+    """
+
+    def __init__ (self, g, s, n, path=[]):
+        """
+        :param g: Graph
+        :param s: Namespace
+        :param n: Start node
+        """
+        self.g = g
+        self.n = n
+        self.s = s
+        self._path = path
+
+    def __getattr__ (self, k):
+        """
+        If k is underscore _, walk up tree one level, otherwise search for
+        direct descendents and get first one.
+        """
+        if k == '_':
+            return RDFWalker (self.g, self.s, self._path[0], self._path[1:])
+        yieldall = False
+        if k.endswith ('_'):
+            yieldall = True
+            k = k[:-1]
+
+        if k == 'a':
+            attr = RDF.type
+        else:
+            attr = getattr (self.s, k)
+
+        ret = [RDFWalker (self.g, self.s, n, [self.n] + self._path) for n in self.g.objects (self.n, attr)]
+
+        if yieldall:
+            return ret
+        elif not ret:
+            return None
+        else:
+            return ret[0]
+
+    def __eq__ (self, b):
+        return self.n == b.n
+
+    def __lt__ (self, b):
+        return str (self) < str (b)
+
+    def __str__ (self):
+        return str (self.n)
+
+
author	Lars-Dominik Braun <lars@6xq.net>	2017-01-21 11:24:56 +0100
committer	Lars-Dominik Braun <lars@6xq.net>	2017-01-28 14:31:28 +0100
commit	a6d474471dddc2d7a187a66358aafcb86235ca69 (patch)
tree	7242cbf0f0e645b2156e143dbebaa2d28b4fcdba /tools
parent	c7befe173ed2b0f5fd82228fa45c7a105ac44818 (diff)
download	eumel-a6d474471dddc2d7a187a66358aafcb86235ca69.tar.gz eumel-a6d474471dddc2d7a187a66358aafcb86235ca69.tar.bz2 eumel-a6d474471dddc2d7a187a66358aafcb86235ca69.zip