From 9ff793e96139ed40090ab9d8c3cae99b284858e5 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Tue, 2 Jul 2019 09:14:55 +0200 Subject: Stabilize WARC headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for 1.0 release: - Correct mime types - Add X-Crocoite-Type, so logs, scripts, dom-snapshots and screenshots can be identified easily - Remove random WARC headers like X-Chrome-Initiator. We don’t want to maintain those. - Remove non-standard urn-based package URLs. Can’t use them without a urn-registration --- crocoite/tools.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'crocoite/tools.py') diff --git a/crocoite/tools.py b/crocoite/tools.py index bc14f84..42ced35 100644 --- a/crocoite/tools.py +++ b/crocoite/tools.py @@ -31,7 +31,7 @@ from yarl import URL from pkg_resources import parse_version, parse_requirements -from .util import packageUrl, getSoftwareInfo, StrJsonEncoder +from .util import getSoftwareInfo, StrJsonEncoder def mergeWarc (files, output): # stats @@ -57,7 +57,7 @@ def mergeWarc (files, output): 'parameters': {'inputs': files}, } payload = BytesIO (json.dumps (warcinfo, indent=2).encode ('utf-8')) - record = writer.create_warc_record (packageUrl ('warcinfo'), 'warcinfo', + record = writer.create_warc_record ('', 'warcinfo', payload=payload, warc_headers_dict={'Content-Type': 'text/plain; encoding=utf-8'}) writer.write_record (record) -- cgit v1.2.3