From 6ccd72ab96cfba36c217a77641b3b8a91906c512 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 8 Dec 2018 09:05:12 +0100 Subject: tools: Add version info to merged WARCs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for #9. I was hoping to reuse one of schema.org’s microdata schema’s, but neither Action (archival action) nor SoftwareApplication (version information) seem to be suitable. --- crocoite/test_tools.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'crocoite/test_tools.py') diff --git a/crocoite/test_tools.py b/crocoite/test_tools.py index 5e980d0..947d020 100644 --- a/crocoite/test_tools.py +++ b/crocoite/test_tools.py @@ -27,6 +27,7 @@ from warcio.warcwriter import WARCWriter from warcio.statusandheaders import StatusAndHeaders from .tools import mergeWarc +from .util import packageUrl @pytest.fixture def writer(): @@ -45,12 +46,21 @@ def recordsEqual(golden, underTest): assert aheader == bheader assert a.http_headers == b.http_headers +def makeGolden(writer, records): + # additional warcinfo is written. Content does not matter. + record = writer.create_warc_record (packageUrl ('warcinfo'), 'warcinfo', + payload=b'', + warc_headers_dict={'Content-Type': 'text/plain; encoding=utf-8'}) + records.insert (0, record) + return records + def test_unmodified(writer): """ Single request/response pair, no revisits """ records = [] + httpHeaders = StatusAndHeaders('GET / HTTP/1.1', {}, is_http_request=True) warcHeaders = {} record = writer.create_warc_record ('http://example.com/', 'request', payload=BytesIO(b'foobar'), @@ -69,7 +79,7 @@ def test_unmodified(writer): mergeWarc ([writer.out.name], output) output.seek(0) - recordsEqual (records, ArchiveIterator (output)) + recordsEqual (makeGolden (writer, records), ArchiveIterator (output)) def test_different_payload(writer): """ @@ -97,7 +107,7 @@ def test_different_payload(writer): mergeWarc ([writer.out.name], output) output.seek(0) - recordsEqual (records, ArchiveIterator (output)) + recordsEqual (makeGolden (writer, records), ArchiveIterator (output)) def makeRevisit(writer, ref, dup): """ Make revisit record for reference """ @@ -141,7 +151,7 @@ def test_resp_revisit_same_url(writer): mergeWarc ([writer.out.name], output) output.seek(0) - recordsEqual (records, ArchiveIterator (output)) + recordsEqual (makeGolden (writer, records), ArchiveIterator (output)) def test_resp_revisit_other_url(writer): """ @@ -183,5 +193,5 @@ def test_resp_revisit_other_url(writer): mergeWarc ([writer.out.name], output) output.seek(0) - recordsEqual (records, ArchiveIterator (output)) + recordsEqual (makeGolden (writer, records), ArchiveIterator (output)) -- cgit v1.2.3