From 9ff793e96139ed40090ab9d8c3cae99b284858e5 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Tue, 2 Jul 2019 09:14:55 +0200 Subject: Stabilize WARC headers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for 1.0 release: - Correct mime types - Add X-Crocoite-Type, so logs, scripts, dom-snapshots and screenshots can be identified easily - Remove random WARC headers like X-Chrome-Initiator. We don’t want to maintain those. - Remove non-standard urn-based package URLs. Can’t use them without a urn-registration --- crocoite/behavior.py | 10 ++++++-- crocoite/test_tools.py | 5 ++-- crocoite/test_warc.py | 28 +++++++++++++--------- crocoite/tools.py | 4 ++-- crocoite/util.py | 8 ++----- crocoite/warc.py | 64 ++++++++++++++++++++++++++++++++------------------ 6 files changed, 73 insertions(+), 46 deletions(-) diff --git a/crocoite/behavior.py b/crocoite/behavior.py index fd4d066..efb2ced 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -52,11 +52,12 @@ class Script: """ A JavaScript resource """ __slots__ = ('path', 'data') + datadir = 'data' def __init__ (self, path=None, encoding='utf-8'): self.path = path if path: - self.data = pkg_resources.resource_string (__name__, os.path.join ('data', path)).decode (encoding) + self.data = pkg_resources.resource_string (__name__, os.path.join (self.datadir, path)).decode (encoding) def __repr__ (self): return f'