diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-12-08 09:05:12 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-12-08 09:05:45 +0100 |
commit | 6ccd72ab96cfba36c217a77641b3b8a91906c512 (patch) | |
tree | 0a96f837e8ef4776af1b87aa7dd43edc6c55be3a /crocoite/test_tools.py | |
parent | aec7a8c583c8228e9538c923d39ef80862bafdde (diff) | |
download | crocoite-6ccd72ab96cfba36c217a77641b3b8a91906c512.tar.gz crocoite-6ccd72ab96cfba36c217a77641b3b8a91906c512.tar.bz2 crocoite-6ccd72ab96cfba36c217a77641b3b8a91906c512.zip |
tools: Add version info to merged WARCs
In preparation for #9.
I was hoping to reuse one of schema.org’s microdata schema’s, but
neither Action (archival action) nor SoftwareApplication (version
information) seem to be suitable.
Diffstat (limited to 'crocoite/test_tools.py')
-rw-r--r-- | crocoite/test_tools.py | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/crocoite/test_tools.py b/crocoite/test_tools.py index 5e980d0..947d020 100644 --- a/crocoite/test_tools.py +++ b/crocoite/test_tools.py @@ -27,6 +27,7 @@ from warcio.warcwriter import WARCWriter from warcio.statusandheaders import StatusAndHeaders from .tools import mergeWarc +from .util import packageUrl @pytest.fixture def writer(): @@ -45,12 +46,21 @@ def recordsEqual(golden, underTest): assert aheader == bheader assert a.http_headers == b.http_headers +def makeGolden(writer, records): + # additional warcinfo is written. Content does not matter. + record = writer.create_warc_record (packageUrl ('warcinfo'), 'warcinfo', + payload=b'', + warc_headers_dict={'Content-Type': 'text/plain; encoding=utf-8'}) + records.insert (0, record) + return records + def test_unmodified(writer): """ Single request/response pair, no revisits """ records = [] + httpHeaders = StatusAndHeaders('GET / HTTP/1.1', {}, is_http_request=True) warcHeaders = {} record = writer.create_warc_record ('http://example.com/', 'request', payload=BytesIO(b'foobar'), @@ -69,7 +79,7 @@ def test_unmodified(writer): mergeWarc ([writer.out.name], output) output.seek(0) - recordsEqual (records, ArchiveIterator (output)) + recordsEqual (makeGolden (writer, records), ArchiveIterator (output)) def test_different_payload(writer): """ @@ -97,7 +107,7 @@ def test_different_payload(writer): mergeWarc ([writer.out.name], output) output.seek(0) - recordsEqual (records, ArchiveIterator (output)) + recordsEqual (makeGolden (writer, records), ArchiveIterator (output)) def makeRevisit(writer, ref, dup): """ Make revisit record for reference """ @@ -141,7 +151,7 @@ def test_resp_revisit_same_url(writer): mergeWarc ([writer.out.name], output) output.seek(0) - recordsEqual (records, ArchiveIterator (output)) + recordsEqual (makeGolden (writer, records), ArchiveIterator (output)) def test_resp_revisit_other_url(writer): """ @@ -183,5 +193,5 @@ def test_resp_revisit_other_url(writer): mergeWarc ([writer.out.name], output) output.seek(0) - recordsEqual (records, ArchiveIterator (output)) + recordsEqual (makeGolden (writer, records), ArchiveIterator (output)) |