diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-06-25 19:55:48 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-06-25 19:55:48 +0200 |
commit | 785ef19736cc9a21746e00a022b76fd756c162de (patch) | |
tree | 041a8696c852294fe9573485831398933e26ee13 /crocoite/controller.py | |
parent | 344a6b449075a8fb42054801144c40760f791366 (diff) | |
download | crocoite-785ef19736cc9a21746e00a022b76fd756c162de.tar.gz crocoite-785ef19736cc9a21746e00a022b76fd756c162de.tar.bz2 crocoite-785ef19736cc9a21746e00a022b76fd756c162de.zip |
warc: Save DOM-/image screenshot as WARC conversion
Judging from the docs this is the proper way to store these resources.
Enable both for the IRC bot by default, since they won’t interfere with
IA’s wayback machine.
Diffstat (limited to 'crocoite/controller.py')
-rw-r--r-- | crocoite/controller.py | 8 |
1 files changed, 1 insertions, 7 deletions
diff --git a/crocoite/controller.py b/crocoite/controller.py index 84001b7..ef042cc 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -71,11 +71,10 @@ class StatsHandler (EventHandler): self.stats['crashed'] += 1 import logging, time -from urllib.parse import urlsplit, urlunsplit from . import behavior as cbehavior from .browser import ChromeService, SiteLoader, Item -from .util import getFormattedViewportMetrics +from .util import getFormattedViewportMetrics, removeFragment class ControllerStart: __slots__ = ('payload') @@ -238,11 +237,6 @@ class PrefixLimit (RecursionPolicy): def __call__ (self, urls): return set (filter (lambda u: u.startswith (self.prefix), urls)) -def removeFragment (u): - """ Remove fragment from url (i.e. #hashvalue) """ - s = urlsplit (u) - return urlunsplit ((s.scheme, s.netloc, s.path, s.query, '')) - from .behavior import ExtractLinksEvent class RecursiveController (EventHandler): |