From 785ef19736cc9a21746e00a022b76fd756c162de Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Mon, 25 Jun 2018 19:55:48 +0200 Subject: warc: Save DOM-/image screenshot as WARC conversion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Judging from the docs this is the proper way to store these resources. Enable both for the IRC bot by default, since they won’t interfere with IA’s wayback machine. --- crocoite/util.py | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'crocoite/util.py') diff --git a/crocoite/util.py b/crocoite/util.py index ec257f1..fe43f01 100644 --- a/crocoite/util.py +++ b/crocoite/util.py @@ -23,6 +23,7 @@ Random utility functions """ import random +from urllib.parse import urlsplit, urlunsplit def randomString (length=None, chars='abcdefghijklmnopqrstuvwxyz'): if length is None: @@ -41,3 +42,8 @@ def getFormattedViewportMetrics (tab): return '{}x{}'.format (layoutMetrics['layoutViewport']['clientWidth'], layoutMetrics['layoutViewport']['clientHeight']) +def removeFragment (u): + """ Remove fragment from url (i.e. #hashvalue) """ + s = urlsplit (u) + return urlunsplit ((s.scheme, s.netloc, s.path, s.query, '')) + -- cgit v1.2.3