diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2017-01-21 11:24:56 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2017-01-28 14:31:28 +0100 |
commit | a6d474471dddc2d7a187a66358aafcb86235ca69 (patch) | |
tree | 7242cbf0f0e645b2156e143dbebaa2d28b4fcdba /tools | |
parent | c7befe173ed2b0f5fd82228fa45c7a105ac44818 (diff) | |
download | eumel-a6d474471dddc2d7a187a66358aafcb86235ca69.tar.gz eumel-a6d474471dddc2d7a187a66358aafcb86235ca69.tar.bz2 eumel-a6d474471dddc2d7a187a66358aafcb86235ca69.zip |
Restructure git
Move tools into separate repo, split TTL file.
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/convertCharset.py | 71 | ||||
-rwxr-xr-x | tools/convertFileDs.py | 167 | ||||
-rw-r--r-- | tools/eumel.py | 327 | ||||
-rwxr-xr-x | tools/extractAll.sh | 21 | ||||
-rwxr-xr-x | tools/extractArchive.py | 110 | ||||
-rwxr-xr-x | tools/formatRefs.py | 2 | ||||
-rwxr-xr-x | tools/formatSoftware.py | 54 | ||||
-rwxr-xr-x | tools/linearizeDisk.py | 49 | ||||
-rw-r--r-- | tools/rdf.py | 54 |
9 files changed, 57 insertions, 798 deletions
diff --git a/tools/convertCharset.py b/tools/convertCharset.py deleted file mode 100755 index 59163aa..0000000 --- a/tools/convertCharset.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 - -""" -Convert file ZEICHENSATZ from graphics package to PNG files -""" - -from eumel import * - -class ZeichensatzDataspace(Dataspace): - TYPE = 0x44c - - def __init__ (self, fd): - Dataspace.__init__ (self, fd) - - # just an array with 255 elements - self.rows = [] - for i in range (255): - self.rows.append (self.parseText ()) - self.parseHeap () - -if __name__ == '__main__': - import argparse, sys, cairo, math - - def transform (w, h, x, y): - return ((2+x), (11-y)) - - parser = argparse.ArgumentParser(description='Convert ZEICHENSATZ dataspace to PNG') - parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true') - parser.add_argument ('file', help='Input file') - parser.add_argument ('prefix', help='Output prefix') - args = parser.parse_args () - - if args.verbose: - logging.basicConfig (level=logging.DEBUG) - else: - logging.basicConfig (level=logging.WARNING) - - m = [] - with open (args.file, 'rb') as fd: - ds = ZeichensatzDataspace (fd) - # no character with code 0 - for (j, r) in zip (range (1, len (ds.rows)+1), ds.rows): - if len (r) == 0: - continue - - out = '{}{:03d}.png'.format (args.prefix, j) - logging.info ('Converting character {} to {}'.format (j, out)) - w, h = 1024, 1024 - surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, w, h) - ctx = cairo.Context(surface) - ctx.scale (64, 64) - ctx.set_line_width (0.1) - ctx.set_source_rgb (1, 0, 0) - - r = bytes (r) - lastxy = (0, 0) - for i in range (0, len (r), 4): - x0, y0, x1, y1 = struct.unpack ('<bbbb', r[i:i+4]) - m.extend ([x0, y0, x1, y1]) - if (x0, y0) != lastxy: - ctx.move_to (*transform (w, h, x0, y0)) - if (x0, y0) != (x1, y1): - ctx.line_to (*transform (w, h, x1, y1)) - else: - x1, y1 = transform (w, h, x1, y1) - ctx.arc (x1, y1, 0.1, 0, 2*math.pi) - lastxy = (x1, y1) - ctx.stroke () - - surface.write_to_png (out) - diff --git a/tools/convertFileDs.py b/tools/convertFileDs.py deleted file mode 100755 index c4037db..0000000 --- a/tools/convertFileDs.py +++ /dev/null @@ -1,167 +0,0 @@ -#!/usr/bin/env python3 - -""" -Convert EUMEL FILE dataspace into a plain text file. - -Since there are no “files” in EUMEL we’re dealing with the editor’s in-memory -datastructure here. See EUMEL packet “file handling”. -""" - -import struct, copy -from collections import namedtuple -from eumel import Dataspace, DataspaceTypeMismatch - -Segment = namedtuple ('Segment', ['succ', 'pred', 'end']) -Sequence = namedtuple ('Sequence', ['index', 'segmentbegin', 'segmentend', 'lineno', 'lines']) -Atom = namedtuple ('Atom', ['seg', 'type', 'line']) - -class Chain: - """ - A chain is a cyclic datastructure, pointing to segments. Segments contain - one or more rows, which in turn reference a single line’s text. - """ - def __init__ (self, sequence, rows): - self.lineno = sequence.lineno - # current atom - self.pos = sequence.index - # current segment - self.segpos = sequence.segmentbegin - self.rows = rows - - def next (self): - atom = self.rows[self.segpos] - if self.pos == atom.seg.end: - # move to next segment - self.pos = atom.seg.succ - self.segpos = atom.seg.succ - else: - # just use the next atom in this segment - self.pos += 1 - self.lineno += 1 - - def prev (self): - # backwards is a little more involved: seg.pred points to the *first* segment row - logging.debug ('prev at pos {} seg {} line {}'.format (self.pos, self.segpos, self.lineno)) - if self.pos == self.segpos: - # get previous segment - atom = self.rows[self.segpos] - self.segpos = atom.seg.pred - atom = self.rows[self.segpos] - self.pos = atom.seg.end - else: - self.pos -= 1 - self.lineno -= 1 - - def first (self): - """ - Seek to first line - """ - while self.lineno > 1: - self.prev () - - @property - def atom (self): - """ - Get atom at current position - """ - return self.rows[self.pos] - -class FileDataspace (Dataspace): - """ - EUMEL’s FILE datatype - """ - - TYPE = 1003 - - def __init__ (self, fd): - Dataspace.__init__ (self, fd) - - # header of the BOUND LIST (aka TYPE FILE) - self.used = self.parseSequence () - self.parseInt (2) - self.parseSequence () - self.parseSequence () - self.parseInt (7) - assert self.fd.tell () == 0x38 - - rows = self.parseRows () - - self.parseHeap () - - self.text = self.reconstructText (rows) - - def parseSegment (self): - return Segment (*self.parseInt (3)) - - def parseSequence (self): - return Sequence (*self.parseInt (5)) - - def parseRows (self): - rows = [] - # read lines - while True: - # check data - data = self.fd.read (24) - if data == 24*b'\xff': - break - self.skip (-24) - # and parse it - seg = self.parseSegment () - rowtype = self.parseInt () - text = self.parseText () - rows.append (Atom (seg, rowtype, text)) - logging.debug ('got row {} {}'.format (len (rows)-1, rows[-1])) - return rows - - def reconstructText (self, rows): - # XXX: use - logging.debug ('Used first {}, last {}, starts at line {}, {} lines in total'.format (self.used.segmentbegin, self.used.segmentend, self.used.lineno, self.used.lines)) - chain = Chain (self.used, rows) - chain.first () - firstrow = chain.pos - lines = [] - visited = set () - while True: - if chain.pos in visited: - logging.warning ('Row {} already has been used'.format (chain.pos)) - visited.add (chain.pos) - - r = chain.atom - lbytes = bytes (r.line) - lbytesStripped = lbytes.rstrip (b'\xff') - if len (lbytes) != len (lbytesStripped): - logging.warning ('Line {} length incorrect. Is {}, should be {}, fixing. {}'.format (chain.lineno, r.line.length, len (lbytesStripped), lbytes)) - lbytes = lbytesStripped - lines.append (lbytes) - chain.next () - - # chains are cyclic - if chain.pos == firstrow: - break - return codecs.decode (b'\n'.join (lines), 'eumel', 'replace') - -if __name__ == '__main__': - import sys, os, codecs, logging - import argparse, sys - - parser = argparse.ArgumentParser(description='Convert EUMEL FILE dataspace into plain text file.') - parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true') - parser.add_argument ('file', help='Input file') - args = parser.parse_args () - - if args.verbose: - logging.basicConfig (level=logging.DEBUG) - else: - logging.basicConfig (level=logging.WARNING) - - with open (args.file, 'rb') as fd: - try: - ds = FileDataspace (fd) - linecount = len (ds.text.splitlines ()) - if linecount != ds.used.lines: - logging.warning ('Got {} lines, but should have been {}'.format (linecount, ds.used.lines)) - print (ds.text) - except DataspaceTypeMismatch: - logging.error ('Not a text file, cannot convert') - sys.exit (1) - diff --git a/tools/eumel.py b/tools/eumel.py deleted file mode 100644 index a421e0a..0000000 --- a/tools/eumel.py +++ /dev/null @@ -1,327 +0,0 @@ -""" -EUMEL utility functions, including: - -""" - -import logging -import codecs - -# EUMEL character map. See “Benutzerhandbuch 1.7”, page 107 and file -# ZEICHENSATZ from the archive disk std.graphik. -eumel2unicodemap = dict ([ - # standard newlines - (10, '\n'), - (13, '\r'), - # mark start. technically \15 and \14 would be a choice here, but they do - # different things on different systems and thus we’re just gonna strip - # them. - (15, ''), - (14, ''), # mark end - # same as ascii - (32, ' '), - (33, '!'), - (34, '"'), - (35, '#'), - (36, '$'), - (37, '%'), - (38, '&'), - (39, "'"), - (40, '('), - (41, ')'), - (42, '*'), - (43, '+'), - (44, ','), - (45, '-'), - (46, '.'), - (47, '/'), - (48, '0'), - (49, '1'), - (50, '2'), - (51, '3'), - (52, '4'), - (53, '5'), - (54, '6'), - (55, '7'), - (56, '8'), - (57, '9'), - (58, ':'), - (59, ';'), - (60, '<'), - (61, '='), - (62, '>'), - (63, '?'), - # then the paragraph symbol - (64, '§'), - # uppercase and lowercase letters from ascii - (65, 'A'), - (66, 'B'), - (67, 'C'), - (68, 'D'), - (69, 'E'), - (70, 'F'), - (71, 'G'), - (72, 'H'), - (73, 'I'), - (74, 'J'), - (75, 'K'), - (76, 'L'), - (77, 'M'), - (78, 'N'), - (79, 'O'), - (80, 'P'), - (81, 'Q'), - (82, 'R'), - (83, 'S'), - (84, 'T'), - (85, 'U'), - (86, 'V'), - (87, 'W'), - (88, 'X'), - (89, 'Y'), - (90, 'Z'), - (91, '['), - (92, '\\'), - (93, ']'), - (94, '^'), - (95, '_'), - (96, '`'), - (97, 'a'), - (98, 'b'), - (99, 'c'), - (100, 'd'), - (101, 'e'), - (102, 'f'), - (103, 'g'), - (104, 'h'), - (105, 'i'), - (106, 'j'), - (107, 'k'), - (108, 'l'), - (109, 'm'), - (110, 'n'), - (111, 'o'), - (112, 'p'), - (113, 'q'), - (114, 'r'), - (115, 's'), - (116, 't'), - (117, 'u'), - (118, 'v'), - (119, 'w'), - (120, 'x'), - (121, 'y'), - (122, 'z'), - (123, '{'), - (124, '|'), - (125, '}'), - (126, '~'), - # uppercase greek - (129, 'Α'), - (130, 'Β'), - (131, 'Γ'), - (132, 'Δ'), - (133, 'Ε'), - (134, 'Ζ'), - (135, 'Η'), - (136, 'Θ'), - (137, 'Ι'), - (138, 'Κ'), - (139, 'Λ'), - (140, 'Μ'), - (141, 'Ν'), - (142, 'Ξ'), - (143, 'Ο'), - (144, 'Π'), - (145, 'Ρ'), - (146, 'Σ'), - (147, 'Τ'), - (148, 'Υ'), - (149, 'Φ'), - (150, 'Χ'), - (151, 'Ψ'), - (152, 'Ω'), - # lowercase greek - (161, 'α'), - (162, 'β'), - (163, 'γ'), - (164, 'δ'), - (165, 'ε'), - (166, 'ζ'), - (167, 'η'), - (168, 'θ'), - (169, 'ι'), - (170, 'κ'), - (171, 'λ'), - (172, 'μ'), - (173, 'ν'), - (174, 'ξ'), - (175, 'ο'), - (176, 'π'), - (177, 'ρ'), - (178, 'ς'), - (179, 'σ'), - (180, 'τ'), - (181, 'υ'), - (182, 'φ'), - (183, 'χ'), - (184, 'ψ'), - (185, 'ω'), - # these seem to be combining diacritic, not sure how they work though - # 192 looks like a cross, dunno what it could be - (193, '\u0301'), # acute - (194, '\u0300'), # grave - (195, '\u0302'), # circumflex - (196, '\u0303'), # tilde - (197, '\u0304'), # macron - # 198: dunno - (199, '\u0307'), # dot above - (200, '\u0308'), # diaeresis - # 201: dunno - (202, '\u030a'), # ring above - (203, '\u0317'), # acute below - # 204: dunno - (205, '\u030a'), # ring above (again for small letters?) - # 206: dunno - (207, '\u030c'), # caron - # german umlauts - (214, 'Ä'), - (215, 'Ö'), - (216, 'Ü'), - (217, 'ä'), - (218, 'ö'), - (219, 'ü'), - (220, 'k'), # handbuch says: Trenn-'k' bei der Umwandlung von 'ck' in 'kk' - (221, '\u00ad'), # soft hyphen, inserted by eumel’s hyphenation program - (222, '\\#'), # printable hash (i.e. literal hash, not a printer/editor command) - (223, '\u00a0'), # protected space - (251, 'ß'), - ]) - -def decode (input, errors='strict'): - ret = [] - pos = 0 - for pos in range (len (input)): - c = input[pos] - m = eumel2unicodemap.get (c, None) - if m is not None: - ret.append (m) - else: - if errors == 'strict': - raise UnicodeError ('unknown char {}'.format (c)) - elif errors == 'ignore': - pass - elif errors == 'replace': - logging.debug ('replacing unknown symbol {} at position {}, context {}'.format (c, pos, input[pos-30:pos+30])) - ret.append ('\uFFFD') - else: - break - return (''.join (ret), pos) - -def lookup (name): - if name == 'eumel': - return codecs.CodecInfo(None, decode) - return None - -codecs.register (lookup) - -# Dataspace utilities -import struct, os - -class DataspaceTypeMismatch (ValueError): - pass - -class Dataspace: - # Expected type - TYPE = None - - def __init__ (self, fd): - self.fd = fd - self.lastaddr, self.firstaddr, self.type, _ = self._parseHeader () - if self.TYPE is not None and self.type != self.TYPE: - raise DataspaceTypeMismatch () - self.heap = {} - - def _parseHeader (self): - """ - :return: (last heap address, first heap address, dataspace type, unknown) - """ - buf = self.fd.read (8) - return struct.unpack ('<HHHH', buf) - - def parseText (self): - """ - Parse TEXT datatype, which can either be embedded (up to 13? chars) or in the heap (i.e. address) - """ - buf = self.fd.read (16) - address, length = struct.unpack ('<HB', buf[:3]) - if length <= 13: - r = buf[3:3+length] - else: - length, = struct.unpack ('<H', buf[3:5]) - r = HeapReference (self.heap, address, length) - return r - - def parseInt (self, count=1): - if count == 1: - return struct.unpack ('<H', self.fd.read (1*intsize))[0] - else: - return [self.parseInt () for i in range (count)] - - def parseHeap (self): - heapaddr = self.firstaddr - maxaddr = 2**(intsize*8)-1 - while True: - head = self.fd.read (2) - # XXX: not sure how to find its offset - if head == b'\xff\xff': - continue - if not head or len (head) < 2: - break - length, = struct.unpack ('<H', head) - self.heap[heapaddr] = self.fd.read (length) - logging.debug ('got heap entry {:x} = ({}) {}'.format (heapaddr, length, self.heap[heapaddr])) - heapaddr = (heapaddr+2+length) % maxaddr - - def skip (self, n): - self.fd.seek (n, os.SEEK_CUR) - - def seek (self, pos): - self.fd.seek (pos, os.SEEK_SET) - -class HeapReference: - def __init__ (self, heap, address, length): - self.heap = heap - self.address = address - self.length = length - self._item = None - - def __bytes__ (self): - return self.item[:self.length] - - def __len__ (self): - return self.length - - def __getitem__ (self, key): - return self.item[key] - - def __repr__ (self): - return '<HeapReference to {:x} length {}>'.format (self.address, self.length) - - @property - def item (self): - if self._item: - return self._item - elif self.address in self.heap: - self._item = self.heap[self.address] - return self._item - else: - raise HeapReferenceUnresolved (self.address, self.length) - -class HeapReferenceUnresolved (Exception): - def __init__ (self, address, length): - Exception.__init__ (self, 'addr: {:x}, len: {}'.format (address, length)) - -# Machine constants -intsize = 2 -pagesize = 512 - diff --git a/tools/extractAll.sh b/tools/extractAll.sh deleted file mode 100755 index 8b8649f..0000000 --- a/tools/extractAll.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/sh - -root=`dirname "$0"` -root=`realpath "$root"` - -while read -r F; do - base=`basename "$F"` - linear=`mktemp` - destdir="${base}.extracted" - echo "Extracting $F to $destdir" - $root/linearizeDisk.py "$F" "$linear" - $root/extractArchive.py -n -o "$destdir" "$linear" - pushd "$destdir" || continue - for G in ./*; do - echo "Converting $G to ${G}.txt" - $root/convertFileDs.py "$G" > "${G}.txt" || rm "${G}.txt" - done - popd - rm "$linear" -done - diff --git a/tools/extractArchive.py b/tools/extractArchive.py deleted file mode 100755 index f14a6b6..0000000 --- a/tools/extractArchive.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python3 - -""" -Extract linearized (see linearizeDisk.py) EUMEL archive disk. -""" - -import struct, sys, io, logging -import codecs -from eumel import Dataspace - -def take (it, n): - for i in range (n): - yield next (it) - -def parseEntry (blocks): - while True: - header = next (blocks) - unknown1, unknown2, length, unknown3 = struct.unpack ('<HHHH', header[:8]) - logging.debug ('Got dataspace with {} blocks'.format (length)) - yield b''.join (take (blocks, length)) - -def readBlocks (fd): - while True: - buf = fd.read (512) - if not buf: - break - yield buf - -class FileHeaderDataspace (Dataspace): - TYPE = 0 - - def __init__ (self, fd): - Dataspace.__init__ (self, fd) - self.name = self.parseText () - self.mtime = self.parseText () - self.seek (0x40) - self.parseHeap () - -if __name__ == '__main__': - import argparse, sys, codecs, os - from datetime import datetime - from io import BytesIO - from eumel import pagesize - - parser = argparse.ArgumentParser(description='Extract EUMEL disk archive.') - parser.add_argument ('-f', '--force', help='Overwrite existing files', action='store_true') - parser.add_argument ('-o', '--output', help='Output directory, defaults to archive name') - parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true') - parser.add_argument ('-n', '--number', help='Number files based on their position in the archive', - action='store_true') - parser.add_argument ('file', help='Input file') - args = parser.parse_args () - - if args.verbose: - logging.basicConfig (level=logging.DEBUG) - else: - logging.basicConfig (level=logging.INFO) - - with open (args.file, 'rb') as infd: - entries = parseEntry (readBlocks (infd)) - - # first entry is always disk info - diskinfo = FileHeaderDataspace (BytesIO (next (entries))) - if not args.output: - args.output = codecs.decode (diskinfo.name, 'eumel', 'replace') - logging.debug ('Using disk name {} as output directory'.format (args.output)) - - # create output dir - try: - os.makedirs (args.output) - except FileExistsError: - pass - - i = 1 - while True: - # file header dataspace - fileheader = FileHeaderDataspace (BytesIO (next (entries))) - filename = codecs.decode (fileheader.name, 'eumel', 'replace').replace ('/', '-') - if len (filename) == 0: - logging.debug ('Filename was empty, i.e. last item in archive. I’m done') - break - try: - mtime = datetime.strptime (codecs.decode (fileheader.mtime, 'eumel', 'replace'), '%d.%m.%y') - except ValueError as e: - logging.warning ('Cannot parse date of file {}, {}'.format (filename, e)) - mtime = datetime.now () - logging.debug ('Got file {}, last modified {}'.format (filename, mtime)) - - # actual file contents - e = next (entries) - - # quirks: if the first page starts with a magic sequence, skip it. - # Not sure what it is used for. - if e.startswith (2*b'\x30\x00\x00\x00'): - logging.debug ('skipping quirks') - e = e[pagesize:] - - if args.number: - filename = '{:03d}_{}'.format (i, filename) - outfile = os.path.join (args.output, filename) - if os.path.exists (outfile) and not args.force: - logging.info ('File {} exists, skipping'.format (outfile)) - continue - logging.info ('Extracting {} bytes to file {}'.format (len (e), outfile)) - with open (outfile, 'wb') as outfd: - outfd.write (e) - stamp = mtime.timestamp () - os.utime (outfile, (stamp, stamp)) - i += 1 - diff --git a/tools/formatRefs.py b/tools/formatRefs.py index 280a444..31e458a 100755 --- a/tools/formatRefs.py +++ b/tools/formatRefs.py @@ -115,7 +115,7 @@ def warnUnusedButDefined (graph, rootNode): if __name__ == '__main__': g = Graph() - result = g.parse ("index.ttl", format='turtle') + result = g.parse (sys.stdin, format='turtle') rootUri = sys.argv[1] rootNode = URIRef (rootUri) s = Namespace("https://schema.org/") diff --git a/tools/formatSoftware.py b/tools/formatSoftware.py index a54a740..756a247 100755 --- a/tools/formatSoftware.py +++ b/tools/formatSoftware.py @@ -7,57 +7,7 @@ import sys from itertools import chain, groupby from jinja2 import Environment from formatRefs import first - -class RDFWalker: - """ - Simple RDF graph walker - """ - - def __init__ (self, g, s, n, path=[]): - """ - :param g: Graph - :param s: Namespace - :param n: Start node - """ - self.g = g - self.n = n - self.s = s - self._path = path - - def __getattr__ (self, k): - """ - If k is underscore _, walk up tree one level, otherwise search for - direct descendents and get first one. - """ - if k == '_': - return RDFWalker (self.g, self.s, self._path[0], self._path[1:]) - yieldall = False - if k.endswith ('_'): - yieldall = True - k = k[:-1] - - if k == 'a': - attr = RDF.type - else: - attr = getattr (self.s, k) - - ret = [RDFWalker (self.g, self.s, n, [self.n] + self._path) for n in self.g.objects (self.n, attr)] - - if yieldall: - return ret - elif not ret: - return None - else: - return ret[0] - - def __eq__ (self, b): - return self.n == b.n - - def __lt__ (self, b): - return str (self) < str (b) - - def __str__ (self): - return str (self.n) +from rdf import RDFWalker if __name__ == '__main__': env = Environment () @@ -108,7 +58,7 @@ if __name__ == '__main__': </div> {% endfor %}""") g = Graph() - result = g.parse ("index.ttl", format='turtle') + result = g.parse (sys.stdin, format='turtle') s = Namespace ("https://schema.org/") items = [] diff --git a/tools/linearizeDisk.py b/tools/linearizeDisk.py deleted file mode 100755 index 55f4b06..0000000 --- a/tools/linearizeDisk.py +++ /dev/null @@ -1,49 +0,0 @@ -#!/usr/bin/env python3 - -""" -For some reason blocks in the bitsavers images are not in linear order, but -shuffled. Not sure why and if other disks are affected as well, but this script -reorders them. -""" - -import os, logging -from itertools import chain - -def linearBlocks (fd): - fd.seek (0, os.SEEK_END) - size = fd.tell () - logging.debug ('File size is {} bytes'.format (size)) - - blockSize = 512 - blocksPerChunk = 15 - chunkSize = blockSize*blocksPerChunk - chunks = size//chunkSize - skip = 1 - if size%chunkSize != 0: - logging.warning ('File size {} is not multiple of chunk size {}'.format (size, chunkSize)) - - # first even then odd chunks - for j in chain (range (0, chunks, 2), range (1, chunks, 2)): - pos = j*chunkSize - logging.debug ('Seeking to {} for chunk {} and reading {} blocks @ {} bytes'.format (pos, j, blocksPerChunk, blockSize)) - fd.seek (pos, os.SEEK_SET) - for i in range (blocksPerChunk): - yield fd.read (blockSize) - -if __name__ == '__main__': - import argparse, sys - - parser = argparse.ArgumentParser(description='Reorder EUMEL archive disk’s blocks.') - parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true') - parser.add_argument ('input', help='Input file') - parser.add_argument ('output', help='Out file') - args = parser.parse_args () - if args.verbose: - logging.basicConfig (level=logging.DEBUG) - else: - logging.basicConfig (level=logging.WARNING) - - with open (args.input, 'rb') as infd, open (args.output, 'wb') as outfd: - for b in linearBlocks (infd): - outfd.write (b) - diff --git a/tools/rdf.py b/tools/rdf.py new file mode 100644 index 0000000..6aaa682 --- /dev/null +++ b/tools/rdf.py @@ -0,0 +1,54 @@ +from rdflib.namespace import RDF, NamespaceManager + +class RDFWalker: + """ + Simple RDF graph walker + """ + + def __init__ (self, g, s, n, path=[]): + """ + :param g: Graph + :param s: Namespace + :param n: Start node + """ + self.g = g + self.n = n + self.s = s + self._path = path + + def __getattr__ (self, k): + """ + If k is underscore _, walk up tree one level, otherwise search for + direct descendents and get first one. + """ + if k == '_': + return RDFWalker (self.g, self.s, self._path[0], self._path[1:]) + yieldall = False + if k.endswith ('_'): + yieldall = True + k = k[:-1] + + if k == 'a': + attr = RDF.type + else: + attr = getattr (self.s, k) + + ret = [RDFWalker (self.g, self.s, n, [self.n] + self._path) for n in self.g.objects (self.n, attr)] + + if yieldall: + return ret + elif not ret: + return None + else: + return ret[0] + + def __eq__ (self, b): + return self.n == b.n + + def __lt__ (self, b): + return str (self) < str (b) + + def __str__ (self): + return str (self.n) + + |