diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2019-07-02 09:14:55 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2019-07-02 09:16:55 +0200 |
commit | 9ff793e96139ed40090ab9d8c3cae99b284858e5 (patch) | |
tree | e1b568fc77c0600a767fea1f541de1d5e85d87a5 /crocoite/tools.py | |
parent | 9d8d48358bf44d7a3e4918bcdac3f4ef1348541b (diff) | |
download | crocoite-9ff793e96139ed40090ab9d8c3cae99b284858e5.tar.gz crocoite-9ff793e96139ed40090ab9d8c3cae99b284858e5.tar.bz2 crocoite-9ff793e96139ed40090ab9d8c3cae99b284858e5.zip |
Stabilize WARC headers
In preparation for 1.0 release:
- Correct mime types
- Add X-Crocoite-Type, so logs, scripts, dom-snapshots and screenshots
can be identified easily
- Remove random WARC headers like X-Chrome-Initiator. We don’t want to
maintain those.
- Remove non-standard urn-based package URLs. Can’t use them without a
urn-registration
Diffstat (limited to 'crocoite/tools.py')
-rw-r--r-- | crocoite/tools.py | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/crocoite/tools.py b/crocoite/tools.py index bc14f84..42ced35 100644 --- a/crocoite/tools.py +++ b/crocoite/tools.py @@ -31,7 +31,7 @@ from yarl import URL from pkg_resources import parse_version, parse_requirements -from .util import packageUrl, getSoftwareInfo, StrJsonEncoder +from .util import getSoftwareInfo, StrJsonEncoder def mergeWarc (files, output): # stats @@ -57,7 +57,7 @@ def mergeWarc (files, output): 'parameters': {'inputs': files}, } payload = BytesIO (json.dumps (warcinfo, indent=2).encode ('utf-8')) - record = writer.create_warc_record (packageUrl ('warcinfo'), 'warcinfo', + record = writer.create_warc_record ('', 'warcinfo', payload=payload, warc_headers_dict={'Content-Type': 'text/plain; encoding=utf-8'}) writer.write_record (record) |