summaryrefslogtreecommitdiff
path: root/tools/extractArchive.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/extractArchive.py')
-rwxr-xr-xtools/extractArchive.py110
1 files changed, 0 insertions, 110 deletions
diff --git a/tools/extractArchive.py b/tools/extractArchive.py
deleted file mode 100755
index f14a6b6..0000000
--- a/tools/extractArchive.py
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Extract linearized (see linearizeDisk.py) EUMEL archive disk.
-"""
-
-import struct, sys, io, logging
-import codecs
-from eumel import Dataspace
-
-def take (it, n):
- for i in range (n):
- yield next (it)
-
-def parseEntry (blocks):
- while True:
- header = next (blocks)
- unknown1, unknown2, length, unknown3 = struct.unpack ('<HHHH', header[:8])
- logging.debug ('Got dataspace with {} blocks'.format (length))
- yield b''.join (take (blocks, length))
-
-def readBlocks (fd):
- while True:
- buf = fd.read (512)
- if not buf:
- break
- yield buf
-
-class FileHeaderDataspace (Dataspace):
- TYPE = 0
-
- def __init__ (self, fd):
- Dataspace.__init__ (self, fd)
- self.name = self.parseText ()
- self.mtime = self.parseText ()
- self.seek (0x40)
- self.parseHeap ()
-
-if __name__ == '__main__':
- import argparse, sys, codecs, os
- from datetime import datetime
- from io import BytesIO
- from eumel import pagesize
-
- parser = argparse.ArgumentParser(description='Extract EUMEL disk archive.')
- parser.add_argument ('-f', '--force', help='Overwrite existing files', action='store_true')
- parser.add_argument ('-o', '--output', help='Output directory, defaults to archive name')
- parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
- parser.add_argument ('-n', '--number', help='Number files based on their position in the archive',
- action='store_true')
- parser.add_argument ('file', help='Input file')
- args = parser.parse_args ()
-
- if args.verbose:
- logging.basicConfig (level=logging.DEBUG)
- else:
- logging.basicConfig (level=logging.INFO)
-
- with open (args.file, 'rb') as infd:
- entries = parseEntry (readBlocks (infd))
-
- # first entry is always disk info
- diskinfo = FileHeaderDataspace (BytesIO (next (entries)))
- if not args.output:
- args.output = codecs.decode (diskinfo.name, 'eumel', 'replace')
- logging.debug ('Using disk name {} as output directory'.format (args.output))
-
- # create output dir
- try:
- os.makedirs (args.output)
- except FileExistsError:
- pass
-
- i = 1
- while True:
- # file header dataspace
- fileheader = FileHeaderDataspace (BytesIO (next (entries)))
- filename = codecs.decode (fileheader.name, 'eumel', 'replace').replace ('/', '-')
- if len (filename) == 0:
- logging.debug ('Filename was empty, i.e. last item in archive. I’m done')
- break
- try:
- mtime = datetime.strptime (codecs.decode (fileheader.mtime, 'eumel', 'replace'), '%d.%m.%y')
- except ValueError as e:
- logging.warning ('Cannot parse date of file {}, {}'.format (filename, e))
- mtime = datetime.now ()
- logging.debug ('Got file {}, last modified {}'.format (filename, mtime))
-
- # actual file contents
- e = next (entries)
-
- # quirks: if the first page starts with a magic sequence, skip it.
- # Not sure what it is used for.
- if e.startswith (2*b'\x30\x00\x00\x00'):
- logging.debug ('skipping quirks')
- e = e[pagesize:]
-
- if args.number:
- filename = '{:03d}_{}'.format (i, filename)
- outfile = os.path.join (args.output, filename)
- if os.path.exists (outfile) and not args.force:
- logging.info ('File {} exists, skipping'.format (outfile))
- continue
- logging.info ('Extracting {} bytes to file {}'.format (len (e), outfile))
- with open (outfile, 'wb') as outfd:
- outfd.write (e)
- stamp = mtime.timestamp ()
- os.utime (outfile, (stamp, stamp))
- i += 1
-