diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-11-19 15:49:28 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-11-19 15:50:16 +0100 |
commit | 3c009f3ff45d89a703bc833c900e06a79b32f1c1 (patch) | |
tree | 16f398f4c2adf1d9d072281983a4b7f27d77820f | |
parent | a5561785fb66f49c2b76557c6f6745eedb4d8b73 (diff) | |
download | crocoite-3c009f3ff45d89a703bc833c900e06a79b32f1c1.tar.gz crocoite-3c009f3ff45d89a703bc833c900e06a79b32f1c1.tar.bz2 crocoite-3c009f3ff45d89a703bc833c900e06a79b32f1c1.zip |
Coding style
Fix a few random issues pointed out by pylint, mainly unused imports.
-rw-r--r-- | crocoite/browser.py | 6 | ||||
-rw-r--r-- | crocoite/cli.py | 13 | ||||
-rw-r--r-- | crocoite/controller.py | 36 | ||||
-rw-r--r-- | crocoite/devtools.py | 14 | ||||
-rw-r--r-- | crocoite/html.py | 8 | ||||
-rw-r--r-- | crocoite/irc.py | 8 | ||||
-rw-r--r-- | crocoite/logger.py | 2 | ||||
-rw-r--r-- | crocoite/test_browser.py | 5 | ||||
-rw-r--r-- | crocoite/test_devtools.py | 2 | ||||
-rw-r--r-- | crocoite/tools.py | 2 | ||||
-rw-r--r-- | crocoite/util.py | 2 | ||||
-rw-r--r-- | crocoite/warc.py | 4 |
12 files changed, 44 insertions, 58 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py index 91f0a0a..44b94e1 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -25,8 +25,6 @@ Chrome browser interactions. import asyncio from urllib.parse import urlsplit from base64 import b64decode -from collections import deque -from threading import Event from http.server import BaseHTTPRequestHandler from .logger import Level @@ -40,7 +38,7 @@ class Item: __slots__ = ('chromeRequest', 'chromeResponse', 'chromeFinished', 'isRedirect', 'failed', 'body', 'requestBody') - def __init__ (self, tab): + def __init__ (self): self.chromeRequest = {} self.chromeResponse = {} self.chromeFinished = {} @@ -274,7 +272,7 @@ class SiteLoader: else: logger.warning ('request exists', uuid='2c989142-ba00-4791-bb03-c2a14e91a56b') - item = Item (self.tab) + item = Item () item.setRequest (kwargs) self.requests[reqId] = item logger.debug ('request', uuid='55c17564-1bd0-4499-8724-fa7aad65478f') diff --git a/crocoite/cli.py b/crocoite/cli.py index 8ebf557..e4a46ee 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -22,12 +22,13 @@ Command line interface """ -import argparse, json, sys, signal +import argparse, sys, signal, asyncio, os from enum import IntEnum from . import behavior -from .controller import SinglePageController, defaultSettings, \ - ControllerSettings, StatsHandler, LogHandler +from .controller import SinglePageController, \ + ControllerSettings, StatsHandler, LogHandler, \ + RecursiveController, DepthLimit, PrefixLimit from .devtools import Passthrough, Process from .warc import WarcHandler from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, WarcHandlerConsumer @@ -79,9 +80,6 @@ def single (): return ret -import asyncio, os -from .controller import RecursiveController, DepthLimit, PrefixLimit - def parsePolicy (recursive, url): if recursive is None: return DepthLimit (0) @@ -89,8 +87,7 @@ def parsePolicy (recursive, url): return DepthLimit (int (recursive)) elif recursive == 'prefix': return PrefixLimit (url) - else: - raise ValueError ('Unsupported') + raise ValueError ('Unsupported') def recursive (): logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()]) diff --git a/crocoite/controller.py b/crocoite/controller.py index 3acbf26..1a41117 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -22,6 +22,17 @@ Controller classes, handling actions required for archival """ +import time, platform +import tempfile, asyncio, json, os +from itertools import islice +from datetime import datetime +from urllib.parse import urlparse + +from . import behavior as cbehavior +from .browser import SiteLoader, Item +from .util import getFormattedViewportMetrics, getRequirements, removeFragment +from .behavior import ExtractLinksEvent + class ControllerSettings: __slots__ = ('idleTimeout', 'timeout') @@ -47,7 +58,7 @@ class EventHandler: raise NotImplementedError () class StatsHandler (EventHandler): - __slots__ = ('stats') + __slots__ = ('stats', ) acceptException = True @@ -63,13 +74,10 @@ class StatsHandler (EventHandler): self.stats['finished'] += 1 self.stats['bytesRcv'] += item.encodedDataLength -from .behavior import ExtractLinksEvent -from itertools import islice - class LogHandler (EventHandler): """ Handle items by logging information about them """ - __slots__ = ('logger') + __slots__ = ('logger', ) def __init__ (self, logger): self.logger = logger.bind (context=type (self).__name__) @@ -86,15 +94,9 @@ class LogHandler (EventHandler): self.logger.info ('extracted links', context=type (item).__name__, uuid='8ee5e9c9-1130-4c5c-88ff-718508546e0c', links=limitlinks) -import time, platform - -from . import behavior as cbehavior -from .browser import SiteLoader, Item -from .devtools import Process -from .util import getFormattedViewportMetrics, getRequirements class ControllerStart: - __slots__ = ('payload') + __slots__ = ('payload', ) def __init__ (self, payload): self.payload = payload @@ -214,7 +216,7 @@ class DepthLimit (RecursionPolicy): depth==0 means no recursion, depth==1 is the page and outgoing links """ - __slots__ = ('maxdepth') + __slots__ = ('maxdepth', ) def __init__ (self, maxdepth=0): if maxdepth < 0 or maxdepth > 1: @@ -240,7 +242,7 @@ class PrefixLimit (RecursionPolicy): accepted: http://example.com/foobar http://example.com/foo/bar """ - __slots__ = ('prefix') + __slots__ = ('prefix', ) def __init__ (self, prefix): self.prefix = prefix @@ -248,12 +250,6 @@ class PrefixLimit (RecursionPolicy): def __call__ (self, urls): return set (filter (lambda u: u.startswith (self.prefix), urls)) -import tempfile, asyncio, json, os -from datetime import datetime -from urllib.parse import urlparse -from .behavior import ExtractLinksEvent -from .util import removeFragment - class RecursiveController: """ Simple recursive controller diff --git a/crocoite/devtools.py b/crocoite/devtools.py index 0bf2255..b071d2e 100644 --- a/crocoite/devtools.py +++ b/crocoite/devtools.py @@ -22,7 +22,10 @@ Communication with Google Chrome through its DevTools protocol. """ -import aiohttp, websockets, json, asyncio, logging +import json, asyncio, logging, os +from tempfile import mkdtemp +import shutil +import aiohttp, websockets logger = logging.getLogger (__name__) @@ -228,8 +231,7 @@ class Tab: if '.' in name: n, ext = name.split ('.', 1) return getattrRecursive (getattr (obj, n), ext) - else: - return getattr (obj, name) + return getattr (obj, name) if self.crashed: raise Crashed () @@ -252,10 +254,6 @@ class Tab: await ret.run () return ret -import os, time -from tempfile import mkdtemp -import shutil - class Process: """ Start Google Chrome listening on a random port """ @@ -327,7 +325,7 @@ class Process: return False class Passthrough: - __slots__ = ('url') + __slots__ = ('url', ) def __init__ (self, url): self.url = url diff --git a/crocoite/html.py b/crocoite/html.py index c929a10..fec9760 100644 --- a/crocoite/html.py +++ b/crocoite/html.py @@ -22,6 +22,10 @@ HTML helper """ +from html5lib.treewalkers.base import TreeWalker +from html5lib.filters.base import Filter +from html5lib import constants + # HTML void tags, see https://html.spec.whatwg.org/multipage/syntax.html#void-elements voidTags = {'area', 'base', @@ -103,10 +107,6 @@ eventAttributes = {'onabort', 'onvolumechange', 'onwaiting'} -from html5lib.treewalkers.base import TreeWalker -from html5lib.filters.base import Filter -from html5lib import constants - class ChromeTreeWalker (TreeWalker): """ Recursive html5lib TreeWalker for Google Chrome method DOM.getDocument diff --git a/crocoite/irc.py b/crocoite/irc.py index 095c55f..99485e4 100644 --- a/crocoite/irc.py +++ b/crocoite/irc.py @@ -29,8 +29,8 @@ from enum import IntEnum, Enum from collections import defaultdict from abc import abstractmethod from functools import wraps -from io import BytesIO import bottom +import websockets ### helper functions ### def prettyTimeDelta (seconds): @@ -333,11 +333,11 @@ class ArgparseBot (bottom.Client): with self._quit: await args.func (user=user, args=args, reply=reply) - async def onDisconnect (**kwargs): + async def onDisconnect (self, **kwargs): """ Auto-reconnect """ self.logger.info ('disconnect', uuid='4c74b2c8-2403-4921-879d-2279ad85db72') if not self._quit.armed: - await asynio.sleep (10, loop=self.loop) + await asyncio.sleep (10, loop=self.loop) self.logger.info ('reconnect', uuid='c53555cb-e1a4-4b69-b1c9-3320269c19d7') await self.connect () @@ -492,8 +492,6 @@ class Chromebot (ArgparseBot): if job.process and job.process.returncode is None: job.process.terminate () -import websockets - class Dashboard: __slots__ = ('fd', 'clients', 'loop', 'log', 'maxLog', 'pingInterval', 'pingTimeout') # these messages will not be forwarded to the browser diff --git a/crocoite/logger.py b/crocoite/logger.py index e69df5e..cddc42d 100644 --- a/crocoite/logger.py +++ b/crocoite/logger.py @@ -85,7 +85,7 @@ class Logger: self.consumer.remove (consumer) class Consumer: - def __call__ (self, level, *args, **kwargs): # pragma: no cover + def __call__ (self, **kwargs): # pragma: no cover raise NotImplementedError () class NullConsumer (Consumer): diff --git a/crocoite/test_browser.py b/crocoite/test_browser.py index f72d899..8adf0cd 100644 --- a/crocoite/test_browser.py +++ b/crocoite/test_browser.py @@ -18,7 +18,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import logging import asyncio import pytest from operator import itemgetter @@ -26,7 +25,7 @@ from aiohttp import web from http.server import BaseHTTPRequestHandler from .browser import Item, SiteLoader -from .logger import Logger, Consumer, JsonPrintConsumer +from .logger import Logger, Consumer from .devtools import Crashed, Process # if you want to know what’s going on: @@ -39,7 +38,7 @@ class TItem (Item): base = 'http://localhost:8000/' def __init__ (self, path, status, headers, bodyReceive, bodySend=None, requestBody=None, failed=False, isRedirect=False): - super ().__init__ (tab=None) + super ().__init__ () self.chromeResponse = {'response': {'headers': headers, 'status': status, 'url': self.base + path}} self.body = bodyReceive, False self.bodySend = bodyReceive if not bodySend else bodySend diff --git a/crocoite/test_devtools.py b/crocoite/test_devtools.py index 8676e6c..74d223f 100644 --- a/crocoite/test_devtools.py +++ b/crocoite/test_devtools.py @@ -103,7 +103,7 @@ async def test_tab_crash (tab): # caling anything else now should fail as well with pytest.raises (Crashed): - version = await tab.Browser.getVersion () + await tab.Browser.getVersion () @pytest.mark.asyncio async def test_load (tab, server): diff --git a/crocoite/tools.py b/crocoite/tools.py index da32f85..843270e 100644 --- a/crocoite/tools.py +++ b/crocoite/tools.py @@ -22,7 +22,7 @@ Misc tools """ -import shutil, sys, re, os, logging, argparse +import shutil, sys, os, logging, argparse from warcio.archiveiterator import ArchiveIterator from warcio.warcwriter import WARCWriter diff --git a/crocoite/util.py b/crocoite/util.py index daa60db..18a051a 100644 --- a/crocoite/util.py +++ b/crocoite/util.py @@ -23,7 +23,7 @@ Random utility functions """ import random, sys -import hashlib, os, pkg_resources +import hashlib, pkg_resources from urllib.parse import urlsplit, urlunsplit def randomString (length=None, chars='abcdefghijklmnopqrstuvwxyz'): diff --git a/crocoite/warc.py b/crocoite/warc.py index c1cbff2..ebc460d 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -24,15 +24,15 @@ Classes writing data to WARC files import json, threading from io import BytesIO -from warcio.statusandheaders import StatusAndHeaders from urllib.parse import urlsplit from datetime import datetime from warcio.timeutils import datetime_to_iso_date from warcio.warcwriter import WARCWriter +from warcio.statusandheaders import StatusAndHeaders from .util import packageUrl -from .controller import defaultSettings, EventHandler, ControllerStart +from .controller import EventHandler, ControllerStart from .behavior import Script, DomSnapshotEvent, ScreenshotEvent from .browser import Item |