summaryrefslogtreecommitdiff
path: root/crocoite/tools.py
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-07-02 09:14:55 +0200
committerLars-Dominik Braun <lars@6xq.net>2019-07-02 09:16:55 +0200
commit9ff793e96139ed40090ab9d8c3cae99b284858e5 (patch)
treee1b568fc77c0600a767fea1f541de1d5e85d87a5 /crocoite/tools.py
parent9d8d48358bf44d7a3e4918bcdac3f4ef1348541b (diff)
downloadcrocoite-9ff793e96139ed40090ab9d8c3cae99b284858e5.tar.gz
crocoite-9ff793e96139ed40090ab9d8c3cae99b284858e5.tar.bz2
crocoite-9ff793e96139ed40090ab9d8c3cae99b284858e5.zip
Stabilize WARC headers
In preparation for 1.0 release: - Correct mime types - Add X-Crocoite-Type, so logs, scripts, dom-snapshots and screenshots can be identified easily - Remove random WARC headers like X-Chrome-Initiator. We don’t want to maintain those. - Remove non-standard urn-based package URLs. Can’t use them without a urn-registration
Diffstat (limited to 'crocoite/tools.py')
-rw-r--r--crocoite/tools.py4
1 files changed, 2 insertions, 2 deletions
diff --git a/crocoite/tools.py b/crocoite/tools.py
index bc14f84..42ced35 100644
--- a/crocoite/tools.py
+++ b/crocoite/tools.py
@@ -31,7 +31,7 @@ from yarl import URL
from pkg_resources import parse_version, parse_requirements
-from .util import packageUrl, getSoftwareInfo, StrJsonEncoder
+from .util import getSoftwareInfo, StrJsonEncoder
def mergeWarc (files, output):
# stats
@@ -57,7 +57,7 @@ def mergeWarc (files, output):
'parameters': {'inputs': files},
}
payload = BytesIO (json.dumps (warcinfo, indent=2).encode ('utf-8'))
- record = writer.create_warc_record (packageUrl ('warcinfo'), 'warcinfo',
+ record = writer.create_warc_record ('', 'warcinfo',
payload=payload,
warc_headers_dict={'Content-Type': 'text/plain; encoding=utf-8'})
writer.write_record (record)