diff options
Diffstat (limited to 'crocoite')
-rw-r--r-- | crocoite/behavior.py | 10 | ||||
-rw-r--r-- | crocoite/browser.py | 2 | ||||
-rw-r--r-- | crocoite/controller.py | 2 | ||||
-rw-r--r-- | crocoite/devtools.py | 23 | ||||
-rw-r--r-- | crocoite/irc.py | 32 | ||||
-rw-r--r-- | crocoite/test_browser.py | 2 | ||||
-rw-r--r-- | crocoite/test_devtools.py | 14 | ||||
-rw-r--r-- | crocoite/test_tools.py | 2 | ||||
-rw-r--r-- | crocoite/tools.py | 13 | ||||
-rw-r--r-- | crocoite/util.py | 4 | ||||
-rw-r--r-- | crocoite/warc.py | 19 |
11 files changed, 63 insertions, 60 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py index 321b65c..7f3a3a0 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -59,7 +59,7 @@ class Script: self.data = pkg_resources.resource_string (__name__, os.path.join ('data', path)).decode (encoding) def __repr__ (self): - return '<Script {}>'.format (self.path) + return f'<Script {self.path}>' def __str__ (self): return self.data @@ -89,7 +89,7 @@ class Behavior: return True def __repr__ (self): - return '<Behavior {}>'.format (self.name) + return f'<Behavior {self.name}>' async def onload (self): """ After loading the page started """ @@ -138,7 +138,7 @@ class JsOnload (Behavior): constructor = result['objectId'] if self.options: - yield Script.fromStr (json.dumps (self.options, indent=2), '{}/options'.format (self.script.path)) + yield Script.fromStr (json.dumps (self.options, indent=2), f'{self.script.path}/options') result = await tab.Runtime.callFunctionOn ( functionDeclaration='function(options){return new this(options);}', objectId=constructor, @@ -231,9 +231,9 @@ class DomSnapshot (Behavior): if url in haveUrls: # ignore duplicate URLs. they are usually caused by # javascript-injected iframes (advertising) with no(?) src - self.logger.warning ('have DOM snapshot for URL {}, ignoring'.format (url)) + self.logger.warning (f'have DOM snapshot for URL {url}, ignoring') elif url.scheme in ('http', 'https'): - self.logger.debug ('saving DOM snapshot for url {}, base {}'.format (doc['documentURL'], doc['baseURL'])) + self.logger.debug (f'saving DOM snapshot for url {url}, base {doc["baseURL"]}') haveUrls.add (url) walker = ChromeTreeWalker (doc) # remove script, to make the page static and noscript, because at the diff --git a/crocoite/browser.py b/crocoite/browser.py index 1c7ac3b..3de61f0 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -48,7 +48,7 @@ class Item: self.requestBody = None def __repr__ (self): - return '<Item {}>'.format (self.url) + return f'<Item {self.url}>' @property def request (self): diff --git a/crocoite/controller.py b/crocoite/controller.py index 53cb08d..504fa23 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -248,7 +248,7 @@ class DepthLimit (RecursionPolicy): return urls def __repr__ (self): - return '<DepthLimit {}>'.format (self.maxdepth) + return f'<DepthLimit {self.maxdepth}>' class PrefixLimit (RecursionPolicy): """ diff --git a/crocoite/devtools.py b/crocoite/devtools.py index e62d4e0..fd56896 100644 --- a/crocoite/devtools.py +++ b/crocoite/devtools.py @@ -26,6 +26,7 @@ import json, asyncio, logging, os from tempfile import mkdtemp import shutil import aiohttp, websockets +from yarl import URL from .util import StrJsonEncoder @@ -42,7 +43,7 @@ class Browser: __slots__ = ('session', 'url', 'tab', 'loop') def __init__ (self, url, loop=None): - self.url = url + self.url = URL (url) self.session = None self.tab = None self.loop = loop @@ -50,7 +51,7 @@ class Browser: async def __aiter__ (self): """ List all tabs """ async with aiohttp.ClientSession (loop=self.loop) as session: - async with session.get ('{}/json/list'.format (self.url)) as r: + async with session.get (self.url.with_path ('/json/list')) as r: resp = await r.json () for tab in resp: if tab['type'] == 'page': @@ -61,7 +62,7 @@ class Browser: assert self.tab is None assert self.session is None self.session = aiohttp.ClientSession (loop=self.loop) - async with self.session.get ('{}/json/new'.format (self.url)) as r: + async with self.session.get (self.url.with_path ('/json/new')) as r: resp = await r.json () self.tab = await Tab.create (**resp) return self.tab @@ -70,7 +71,7 @@ class Browser: assert self.tab is not None assert self.session is not None await self.tab.close () - async with self.session.get ('{}/json/close/{}'.format (self.url, self.tab.id)) as r: + async with self.session.get (self.url.with_path (f'/json/close/{self.tab.id}')) as r: resp = await r.text () assert resp == 'Target is closing' self.tab = None @@ -103,13 +104,13 @@ class TabFunction: return hash (self.name) def __getattr__ (self, k): - return TabFunction ('{}.{}'.format (self.name, k), self.tab) + return TabFunction (f'{self.name}.{k}', self.tab) async def __call__ (self, **kwargs): return await self.tab (self.name, **kwargs) def __repr__ (self): - return '<TabFunction {}>'.format (self.name) + return f'<TabFunction {self.name}>' class TabException (Exception): pass @@ -156,7 +157,7 @@ class Tab: self.msgid += 1 message = {'method': method, 'params': kwargs, 'id': msgid} t = self.transactions[msgid] = {'event': asyncio.Event (), 'result': None} - logger.debug ('← {}'.format (message)) + logger.debug (f'← {message}') await self.ws.send (json.dumps (message, cls=StrJsonEncoder)) await t['event'].wait () ret = t['result'] @@ -191,7 +192,7 @@ class Tab: # right now we cannot recover from this await markCrashed (e) break - logger.debug ('→ {}'.format (msg)) + logger.debug (f'→ {msg}') if 'id' in msg: msgid = msg['id'] t = self.transactions.get (msgid, None) @@ -272,7 +273,7 @@ class Process: # see https://github.com/GoogleChrome/chrome-launcher/blob/master/docs/chrome-flags-for-tools.md args = [self.binary, '--window-size={},{}'.format (*self.windowSize), - '--user-data-dir={}'.format (self.userDataDir), # use temporory user dir + f'--user-data-dir={self.userDataDir}', # use temporory user dir '--no-default-browser-check', '--no-first-run', # don’t show first run screen '--disable-breakpad', # no error reports @@ -317,7 +318,7 @@ class Process: if port is None: raise Exception ('Chrome died on us.') - return 'http://localhost:{}'.format (port) + return URL.build(scheme='http', host='localhost', port=port) async def __aexit__ (self, *exc): self.p.terminate () @@ -330,7 +331,7 @@ class Passthrough: __slots__ = ('url', ) def __init__ (self, url): - self.url = url + self.url = URL (url) async def __aenter__ (self): return self.url diff --git a/crocoite/irc.py b/crocoite/irc.py index 99485e4..96d0e3e 100644 --- a/crocoite/irc.py +++ b/crocoite/irc.py @@ -53,7 +53,7 @@ def prettyBytes (b): while b >= 1024 and len (prefixes) > 1: b /= 1024 prefixes.pop (0) - return '{:.1f} {}'.format (b, prefixes[0]) + return f'{b:.1f} {prefixes[0]}' def isValidUrl (s): url = urlsplit (s) @@ -104,16 +104,13 @@ class Job: def formatStatus (self): stats = self.stats rstats = self.rstats - return '{} ({}) {}. {} pages finished, {} pending; {} crashed, {} requests, {} failed, {} received.'.format ( - self.url, - self.id, - self.status.name, - rstats.get ('have', 0), - rstats.get ('pending', 0), - stats.get ('crashed', 0), - stats.get ('requests', 0), - stats.get ('failed', 0), - prettyBytes (stats.get ('bytesRcv', 0))) + return (f"{self.url} ({self.id}) {self.status.name}. " + "{rstats.get ('have', 0)} pages finished, " + "{rstats.get ('pending', 0)} pending; " + "{stats.get ('crashed', 0)} crashed, " + "{stats.get ('requests', 0)} requests, " + "{stats.get ('failed', 0)} failed, " + "{prettyBytes (stats.get ('bytesRcv', 0))} received.") class NickMode(Enum): operator = '@' @@ -138,7 +135,7 @@ class User: return hash (self.name) def __repr__ (self): - return '<User {} {}>'.format (self.name, self.modes) + return f'<User {self.name} {self.modes}>' @classmethod def fromName (cls, name): @@ -159,7 +156,8 @@ class ReplyContext: self.user = user def __call__ (self, message): - self.client.send ('PRIVMSG', target=self.target, message='{}: {}'.format (self.user.name, message)) + self.client.send ('PRIVMSG', target=self.target, + message=f'{self.user.name}: {message}') class RefCountEvent: """ @@ -321,10 +319,10 @@ class ArgparseBot (bottom.Client): try: args = self.parser.parse_args (command) except Exception as e: - reply ('{} -- {}'.format (e.args[1], e.args[0].format_usage ())) + reply (f'{e.args[1]} -- {e.args[0].format_usage ()}') return if not args: - reply ('Sorry, I don’t understand {}'.format (command)) + reply (f'Sorry, I don’t understand {command}') return if self._quit.armed and not getattr (args, 'allowOnShutdown', False): @@ -363,7 +361,7 @@ def jobExists (func): reply = kwargs.get ('reply') j = self.jobs.get (args.id, None) if not j: - reply ('Job {} is unknown'.format (args.id)) + reply (f'Job {args.id} is unknown') else: ret = await func (self, job=j, **kwargs) return ret @@ -426,7 +424,7 @@ class Chromebot (ArgparseBot): 'concurrency': args.concurrency, } strargs = ', '.join (map (lambda x: '{}={}'.format (*x), showargs.items ())) - reply ('{} has been queued as {} with {}'.format (args.url, j.id, strargs)) + reply (f'{args.url} has been queued as {j.id} with {strargs}') logger.info ('queue', user=user.name, url=args.url, cmdline=cmdline, uuid='36cc34a6-061b-4cc5-84a9-4ab6552c8d75') diff --git a/crocoite/test_browser.py b/crocoite/test_browser.py index 8a8d81f..6015a2f 100644 --- a/crocoite/test_browser.py +++ b/crocoite/test_browser.py @@ -142,7 +142,7 @@ async def itemsLoaded (l, items): assert item.chromeResponse is not None golden = items.pop (item.url.path) if not golden: - assert False, 'url {} not supposed to be fetched'.format (item.url) + assert False, f'url {item.url} not supposed to be fetched' assert item.failed == golden.failed if item.failed: # response will be invalid if request failed diff --git a/crocoite/test_devtools.py b/crocoite/test_devtools.py index 74d223f..3993edd 100644 --- a/crocoite/test_devtools.py +++ b/crocoite/test_devtools.py @@ -24,7 +24,8 @@ import pytest from aiohttp import web import websockets -from .devtools import Browser, Tab, MethodNotFound, Crashed, InvalidParameter, Process, Passthrough +from .devtools import Browser, Tab, MethodNotFound, Crashed, \ + InvalidParameter, Process, Passthrough @pytest.fixture async def browser (): @@ -73,8 +74,10 @@ async def test_tab_close (browser): @pytest.mark.asyncio async def test_tab_notify_enable_disable (tab): - """ Make sure enabling/disabling notifications works for all known namespaces """ - for name in ('Debugger', 'DOM', 'Log', 'Network', 'Page', 'Performance', 'Profiler', 'Runtime', 'Security'): + """ Make sure enabling/disabling notifications works for all known + namespaces """ + for name in ('Debugger', 'DOM', 'Log', 'Network', 'Page', 'Performance', + 'Profiler', 'Runtime', 'Security'): f = getattr (tab, name) await f.enable () await f.disable () @@ -149,7 +152,8 @@ async def test_tab_function (tab): @pytest.mark.asyncio async def test_tab_function_hash (tab): - d = {tab.Network.enable: 1, tab.Network.disable: 2, tab.Page: 3, tab.Page.enable: 4} + d = {tab.Network.enable: 1, tab.Network.disable: 2, tab.Page: 3, + tab.Page.enable: 4} assert len (d) == 4 @pytest.mark.asyncio @@ -168,5 +172,5 @@ async def test_passthrough (): url = 'http://localhost:12345' async with Passthrough (url) as u: - assert u == url + assert str (u) == url diff --git a/crocoite/test_tools.py b/crocoite/test_tools.py index c320ad9..106aa59 100644 --- a/crocoite/test_tools.py +++ b/crocoite/test_tools.py @@ -97,7 +97,7 @@ def test_different_payload(writer): httpHeaders = StatusAndHeaders('200 OK', {}, protocol='HTTP/1.1') record = writer.create_warc_record ('http://example.com/', 'response', - payload=BytesIO('data{}'.format(i).encode ('utf8')), + payload=BytesIO(f'data{i}'.encode ('utf8')), warc_headers_dict=warcHeaders, http_headers=httpHeaders) records.append (record) diff --git a/crocoite/tools.py b/crocoite/tools.py index 84c6f44..9c5d836 100644 --- a/crocoite/tools.py +++ b/crocoite/tools.py @@ -67,7 +67,7 @@ def mergeWarc (files, output): 'id': rid, 'date': headers.get_header('WARC-Date')} unique += 1 else: - logging.debug ('Record {} is duplicate of {}'.format (rid, dup['id'])) + logging.debug (f'Record {rid} is duplicate of {dup["id"]}') # Payload may be identical, but HTTP headers are # (probably) not. Include them. record = writer.create_revisit_record ( @@ -80,7 +80,7 @@ def mergeWarc (files, output): else: unique += 1 writer.write_record (record) - logging.info ('Wrote {} unique records, {} revisits'.format (unique, revisit)) + logging.info (f'Wrote {unique} unique records, {revisit} revisits') def mergeWarcCli(): parser = argparse.ArgumentParser(description='Merge WARCs, reads filenames from stdin.') @@ -116,12 +116,12 @@ def extractScreenshot (): urlSanitized = headers.get_header('WARC-Target-URI').replace ('/', '_') xoff = 0 yoff = int (headers.get_header ('X-Crocoite-Screenshot-Y-Offset')) - outpath = '{}-{}-{}-{}.png'.format (args.prefix, urlSanitized, xoff, yoff) + outpath = f'{args.prefix}-{urlSanitized}-{xoff}-{yoff}.png' if args.force or not os.path.exists (outpath): with open (outpath, 'wb') as out: shutil.copyfileobj (record.raw_stream, out) else: - print ('not overwriting {}'.format (outpath)) + print (f'not overwriting {outputh}') class Errata: __slots__ = ('uuid', 'description', 'affects') @@ -145,8 +145,7 @@ class Errata: return all (matchedAll) def __repr__ (self): - return '{}({!r}, {!r}, {!r})'.format (self.__class__.__name__, - self.uuid, self.description, self.affects) + return f'{self.__class__.__name__}({self.uuid!r}, {self.description!r}, {self.affects!r})' @property def fixable (self): @@ -180,7 +179,7 @@ def makeReport (fd): pass def errata (): - parser = argparse.ArgumentParser(description='Show/fix erratas for WARCs generated by {}.'.format (__package__)) + parser = argparse.ArgumentParser(description=f'Show/fix erratas for WARCs generated by {__package__}.') parser.add_argument('input', type=argparse.FileType ('rb'), help='Input WARC') args = parser.parse_args() diff --git a/crocoite/util.py b/crocoite/util.py index eebe909..5bced53 100644 --- a/crocoite/util.py +++ b/crocoite/util.py @@ -48,8 +48,8 @@ def packageUrl (path): async def getFormattedViewportMetrics (tab): layoutMetrics = await tab.Page.getLayoutMetrics () # XXX: I’m not entirely sure which one we should use here - return '{}x{}'.format (layoutMetrics['layoutViewport']['clientWidth'], - layoutMetrics['layoutViewport']['clientHeight']) + viewport = layoutMetrics['layoutViewport'] + return f"{viewport['clientWidth']}x{viewport['clientHeight']}" def getSoftwareInfo (): """ Get software info for inclusion into warcinfo """ diff --git a/crocoite/warc.py b/crocoite/warc.py index 21a99aa..04dd871 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -87,7 +87,7 @@ class WarcHandler (EventHandler): url = item.url path = url.relative().with_fragment(None) - httpHeaders = StatusAndHeaders('{} {} HTTP/1.1'.format (req['method'], path), + httpHeaders = StatusAndHeaders(f'{req["method"]} {path} HTTP/1.1', item.requestHeaders, protocol='HTTP/1.1', is_http_request=True) initiator = item.initiator warcHeaders = { @@ -144,8 +144,8 @@ class WarcHandler (EventHandler): else: warcHeaders['X-Chrome-Base64Body'] = str (base64Encoded) - httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'], - item.statusText), item.responseHeaders, + httpHeaders = StatusAndHeaders(f'{resp["status"]} {item.statusText}', + item.responseHeaders, protocol='HTTP/1.1') # Content is saved decompressed and decoded, remove these headers @@ -163,7 +163,7 @@ class WarcHandler (EventHandler): httpHeaders.replace_header ('content-type', contentType) if rawBody is not None: - httpHeaders.replace_header ('content-length', '{:d}'.format (len (rawBody))) + httpHeaders.replace_header ('content-length', str (len (rawBody))) bodyIo = BytesIO (rawBody) else: bodyIo = BytesIO () @@ -178,9 +178,10 @@ class WarcHandler (EventHandler): def _writeScript (self, item): writer = self.writer encoding = 'utf-8' - self.writeRecord (packageUrl ('script/{}'.format (item.path)), 'metadata', + self.writeRecord (packageUrl (f'script/{item.path}'), 'metadata', payload=BytesIO (str (item).encode (encoding)), - warc_headers_dict={'Content-Type': 'application/javascript; charset={}'.format (encoding)}) + warc_headers_dict={'Content-Type': + f'application/javascript; charset={encoding}'}) def _writeItem (self, item): if item.failed: @@ -195,7 +196,7 @@ class WarcHandler (EventHandler): if refersTo: headers['WARC-Refers-To'] = refersTo else: - self.logger.error ('No document record found for {}'.format (url)) + self.logger.error (f'No document record found for {url}') return headers def _writeDomSnapshot (self, item): @@ -234,7 +235,7 @@ class WarcHandler (EventHandler): self.log.seek (0) # XXX: we should use the type continuation here self.writeRecord (packageUrl ('log'), 'resource', payload=self.log, - warc_headers_dict={'Content-Type': 'text/plain; encoding={}'.format (self.logEncoding)}) + warc_headers_dict={'Content-Type': f'text/plain; encoding={self.logEncoding}'}) self.log = BytesIO () def _writeLog (self, item): @@ -262,5 +263,5 @@ class WarcHandler (EventHandler): break if not processed: - self.logger.debug ('unknown event {}'.format (repr (item))) + self.logger.debug (f'unknown event {item!r}') |