summaryrefslogtreecommitdiff
path: root/extractArchive.py
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2016-09-17 11:06:16 +0200
committerLars-Dominik Braun <lars@6xq.net>2016-09-17 11:06:16 +0200
commit12989393311cdca62f376bea6883ee36e8fa43ac (patch)
treeadeb4f42250bfaa887b08539d98c27b26935bcef /extractArchive.py
downloadeumel-tools-12989393311cdca62f376bea6883ee36e8fa43ac.tar.gz
eumel-tools-12989393311cdca62f376bea6883ee36e8fa43ac.tar.bz2
eumel-tools-12989393311cdca62f376bea6883ee36e8fa43ac.zip
Add disk extraction tools
Diffstat (limited to 'extractArchive.py')
-rwxr-xr-xextractArchive.py104
1 files changed, 104 insertions, 0 deletions
diff --git a/extractArchive.py b/extractArchive.py
new file mode 100755
index 0000000..2e66879
--- /dev/null
+++ b/extractArchive.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+
+"""
+Extract linearized (see linearizeDisk.py) EUMEL archive disk.
+"""
+
+import struct, sys, io, logging
+import codecs
+from eumel import Dataspace
+
+def take (it, n):
+ for i in range (n):
+ yield next (it)
+
+def parseEntry (blocks):
+ while True:
+ header = next (blocks)
+ unknown1, unknown2, length, unknown3 = struct.unpack ('<HHHH', header[:8])
+ logging.debug ('Got dataspace with {} blocks'.format (length))
+ yield b''.join (take (blocks, length))
+
+def readBlocks (fd):
+ while True:
+ buf = fd.read (512)
+ if not buf:
+ break
+ yield buf
+
+class FileHeaderDataspace (Dataspace):
+ TYPE = 0
+
+ def __init__ (self, fd):
+ Dataspace.__init__ (self, fd)
+ self.name = self.parseText ()
+ self.mtime = self.parseText ()
+ self.seek (0x40)
+ self.parseHeap ()
+
+if __name__ == '__main__':
+ import argparse, sys, codecs, os
+ from datetime import datetime
+ from io import BytesIO
+ from eumel import pagesize
+
+ parser = argparse.ArgumentParser(description='Extract EUMEL disk archive.')
+ parser.add_argument ('-f', '--force', help='Overwrite existing files', action='store_true')
+ parser.add_argument ('-o', '--output', help='Output directory, defaults to archive name')
+ parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
+ parser.add_argument ('file', help='Input file')
+ args = parser.parse_args ()
+
+ if args.verbose:
+ logging.basicConfig (level=logging.DEBUG)
+ else:
+ logging.basicConfig (level=logging.INFO)
+
+ with open (args.file, 'rb') as infd:
+ entries = parseEntry (readBlocks (infd))
+
+ # first entry is always disk info
+ diskinfo = FileHeaderDataspace (BytesIO (next (entries)))
+ if not args.output:
+ args.output = codecs.decode (diskinfo.name, 'eumel', 'replace')
+ logging.debug ('Using disk name {} as output directory'.format (args.output))
+
+ # create output dir
+ try:
+ os.makedirs (args.output)
+ except FileExistsError:
+ pass
+
+ while True:
+ # file header dataspace
+ fileheader = FileHeaderDataspace (BytesIO (next (entries)))
+ filename = codecs.decode (fileheader.name, 'eumel', 'replace').replace ('/', '-')
+ if len (filename) == 0:
+ logging.debug ('Filename was empty, i.e. last item in archive. I’m done')
+ break
+ try:
+ mtime = datetime.strptime (codecs.decode (fileheader.mtime, 'eumel', 'replace'), '%d.%m.%y')
+ except ValueError as e:
+ logging.warning ('Cannot parse date of file {}, {}'.format (filename, e))
+ mtime = datetime.now ()
+ logging.debug ('Got file {}, last modified {}'.format (filename, mtime))
+
+ # actual file contents
+ e = next (entries)
+
+ # quirks: if the first page starts with a magic sequence, skip it.
+ # Not sure what it is used for.
+ if e.startswith (2*b'\x30\x00\x00\x00'):
+ logging.debug ('skipping quirks')
+ e = e[pagesize:]
+
+ outfile = os.path.join (args.output, filename)
+ if os.path.exists (outfile) and not args.force:
+ logging.info ('File {} exists, skipping'.format (outfile))
+ continue
+ logging.info ('Extracting {} bytes to file {}'.format (len (e), outfile))
+ with open (outfile, 'wb') as outfd:
+ outfd.write (e)
+ stamp = mtime.timestamp ()
+ os.utime (outfile, (stamp, stamp))
+