summaryrefslogtreecommitdiff
path: root/crocoite/controller.py
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-06-25 19:55:48 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-06-25 19:55:48 +0200
commit785ef19736cc9a21746e00a022b76fd756c162de (patch)
tree041a8696c852294fe9573485831398933e26ee13 /crocoite/controller.py
parent344a6b449075a8fb42054801144c40760f791366 (diff)
downloadcrocoite-785ef19736cc9a21746e00a022b76fd756c162de.tar.gz
crocoite-785ef19736cc9a21746e00a022b76fd756c162de.tar.bz2
crocoite-785ef19736cc9a21746e00a022b76fd756c162de.zip
warc: Save DOM-/image screenshot as WARC conversion
Judging from the docs this is the proper way to store these resources. Enable both for the IRC bot by default, since they won’t interfere with IA’s wayback machine.
Diffstat (limited to 'crocoite/controller.py')
-rw-r--r--crocoite/controller.py8
1 files changed, 1 insertions, 7 deletions
diff --git a/crocoite/controller.py b/crocoite/controller.py
index 84001b7..ef042cc 100644
--- a/crocoite/controller.py
+++ b/crocoite/controller.py
@@ -71,11 +71,10 @@ class StatsHandler (EventHandler):
self.stats['crashed'] += 1
import logging, time
-from urllib.parse import urlsplit, urlunsplit
from . import behavior as cbehavior
from .browser import ChromeService, SiteLoader, Item
-from .util import getFormattedViewportMetrics
+from .util import getFormattedViewportMetrics, removeFragment
class ControllerStart:
__slots__ = ('payload')
@@ -238,11 +237,6 @@ class PrefixLimit (RecursionPolicy):
def __call__ (self, urls):
return set (filter (lambda u: u.startswith (self.prefix), urls))
-def removeFragment (u):
- """ Remove fragment from url (i.e. #hashvalue) """
- s = urlsplit (u)
- return urlunsplit ((s.scheme, s.netloc, s.path, s.query, ''))
-
from .behavior import ExtractLinksEvent
class RecursiveController (EventHandler):