1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
#!/usr/bin/env python3
"""
Extract linearized (see linearizeDisk.py) EUMEL archive disk.
"""
import struct, sys, io, logging
import codecs
from eumel import Dataspace
def take (it, n):
for i in range (n):
yield next (it)
def parseEntry (blocks):
while True:
header = next (blocks)
unknown1, unknown2, length, unknown3 = struct.unpack ('<HHHH', header[:8])
logging.debug ('Got dataspace with {} blocks'.format (length))
yield b''.join (take (blocks, length))
def readBlocks (fd):
while True:
buf = fd.read (512)
if not buf:
break
yield buf
class FileHeaderDataspace (Dataspace):
TYPE = 0
def __init__ (self, fd):
Dataspace.__init__ (self, fd)
self.name = self.parseText ()
self.mtime = self.parseText ()
self.seek (0x40)
self.parseHeap ()
if __name__ == '__main__':
import argparse, sys, codecs, os
from datetime import datetime
from io import BytesIO
from eumel import pagesize
parser = argparse.ArgumentParser(description='Extract EUMEL disk archive.')
parser.add_argument ('-f', '--force', help='Overwrite existing files', action='store_true')
parser.add_argument ('-o', '--output', help='Output directory, defaults to archive name')
parser.add_argument ('-v', '--verbose', help='Enable debugging messages', action='store_true')
parser.add_argument ('-n', '--number', help='Number files based on their position in the archive',
action='store_true')
parser.add_argument ('file', help='Input file')
args = parser.parse_args ()
if args.verbose:
logging.basicConfig (level=logging.DEBUG)
else:
logging.basicConfig (level=logging.INFO)
with open (args.file, 'rb') as infd:
entries = parseEntry (readBlocks (infd))
# first entry is always disk info
diskinfo = FileHeaderDataspace (BytesIO (next (entries)))
if not args.output:
args.output = codecs.decode (diskinfo.name, 'eumel', 'replace')
logging.debug ('Using disk name {} as output directory'.format (args.output))
# create output dir
try:
os.makedirs (args.output)
except FileExistsError:
pass
i = 1
while True:
# file header dataspace
fileheader = FileHeaderDataspace (BytesIO (next (entries)))
filename = codecs.decode (fileheader.name, 'eumel', 'replace').replace ('/', '-')
if len (filename) == 0:
logging.debug ('Filename was empty, i.e. last item in archive. I’m done')
break
try:
mtime = datetime.strptime (codecs.decode (fileheader.mtime, 'eumel', 'replace'), '%d.%m.%y')
except ValueError as e:
logging.warning ('Cannot parse date of file {}, {}'.format (filename, e))
mtime = datetime.now ()
logging.debug ('Got file {}, last modified {}'.format (filename, mtime))
# actual file contents
e = next (entries)
# quirks: if the first page starts with a magic sequence, skip it.
# Not sure what it is used for.
if e.startswith (2*b'\x30\x00\x00\x00'):
logging.debug ('skipping quirks')
e = e[pagesize:]
if args.number:
filename = '{:03d}_{}'.format (i, filename)
outfile = os.path.join (args.output, filename)
if os.path.exists (outfile) and not args.force:
logging.info ('File {} exists, skipping'.format (outfile))
continue
logging.info ('Extracting {} bytes to file {}'.format (len (e), outfile))
with open (outfile, 'wb') as outfd:
outfd.write (e)
stamp = mtime.timestamp ()
os.utime (outfile, (stamp, stamp))
i += 1
|