summaryrefslogtreecommitdiff
path: root/crocoite
diff options
context:
space:
mode:
Diffstat (limited to 'crocoite')
-rw-r--r--crocoite/behavior.py10
-rw-r--r--crocoite/browser.py2
-rw-r--r--crocoite/controller.py2
-rw-r--r--crocoite/devtools.py23
-rw-r--r--crocoite/irc.py32
-rw-r--r--crocoite/test_browser.py2
-rw-r--r--crocoite/test_devtools.py14
-rw-r--r--crocoite/test_tools.py2
-rw-r--r--crocoite/tools.py13
-rw-r--r--crocoite/util.py4
-rw-r--r--crocoite/warc.py19
11 files changed, 63 insertions, 60 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py
index 321b65c..7f3a3a0 100644
--- a/crocoite/behavior.py
+++ b/crocoite/behavior.py
@@ -59,7 +59,7 @@ class Script:
self.data = pkg_resources.resource_string (__name__, os.path.join ('data', path)).decode (encoding)
def __repr__ (self):
- return '<Script {}>'.format (self.path)
+ return f'<Script {self.path}>'
def __str__ (self):
return self.data
@@ -89,7 +89,7 @@ class Behavior:
return True
def __repr__ (self):
- return '<Behavior {}>'.format (self.name)
+ return f'<Behavior {self.name}>'
async def onload (self):
""" After loading the page started """
@@ -138,7 +138,7 @@ class JsOnload (Behavior):
constructor = result['objectId']
if self.options:
- yield Script.fromStr (json.dumps (self.options, indent=2), '{}/options'.format (self.script.path))
+ yield Script.fromStr (json.dumps (self.options, indent=2), f'{self.script.path}/options')
result = await tab.Runtime.callFunctionOn (
functionDeclaration='function(options){return new this(options);}',
objectId=constructor,
@@ -231,9 +231,9 @@ class DomSnapshot (Behavior):
if url in haveUrls:
# ignore duplicate URLs. they are usually caused by
# javascript-injected iframes (advertising) with no(?) src
- self.logger.warning ('have DOM snapshot for URL {}, ignoring'.format (url))
+ self.logger.warning (f'have DOM snapshot for URL {url}, ignoring')
elif url.scheme in ('http', 'https'):
- self.logger.debug ('saving DOM snapshot for url {}, base {}'.format (doc['documentURL'], doc['baseURL']))
+ self.logger.debug (f'saving DOM snapshot for url {url}, base {doc["baseURL"]}')
haveUrls.add (url)
walker = ChromeTreeWalker (doc)
# remove script, to make the page static and noscript, because at the
diff --git a/crocoite/browser.py b/crocoite/browser.py
index 1c7ac3b..3de61f0 100644
--- a/crocoite/browser.py
+++ b/crocoite/browser.py
@@ -48,7 +48,7 @@ class Item:
self.requestBody = None
def __repr__ (self):
- return '<Item {}>'.format (self.url)
+ return f'<Item {self.url}>'
@property
def request (self):
diff --git a/crocoite/controller.py b/crocoite/controller.py
index 53cb08d..504fa23 100644
--- a/crocoite/controller.py
+++ b/crocoite/controller.py
@@ -248,7 +248,7 @@ class DepthLimit (RecursionPolicy):
return urls
def __repr__ (self):
- return '<DepthLimit {}>'.format (self.maxdepth)
+ return f'<DepthLimit {self.maxdepth}>'
class PrefixLimit (RecursionPolicy):
"""
diff --git a/crocoite/devtools.py b/crocoite/devtools.py
index e62d4e0..fd56896 100644
--- a/crocoite/devtools.py
+++ b/crocoite/devtools.py
@@ -26,6 +26,7 @@ import json, asyncio, logging, os
from tempfile import mkdtemp
import shutil
import aiohttp, websockets
+from yarl import URL
from .util import StrJsonEncoder
@@ -42,7 +43,7 @@ class Browser:
__slots__ = ('session', 'url', 'tab', 'loop')
def __init__ (self, url, loop=None):
- self.url = url
+ self.url = URL (url)
self.session = None
self.tab = None
self.loop = loop
@@ -50,7 +51,7 @@ class Browser:
async def __aiter__ (self):
""" List all tabs """
async with aiohttp.ClientSession (loop=self.loop) as session:
- async with session.get ('{}/json/list'.format (self.url)) as r:
+ async with session.get (self.url.with_path ('/json/list')) as r:
resp = await r.json ()
for tab in resp:
if tab['type'] == 'page':
@@ -61,7 +62,7 @@ class Browser:
assert self.tab is None
assert self.session is None
self.session = aiohttp.ClientSession (loop=self.loop)
- async with self.session.get ('{}/json/new'.format (self.url)) as r:
+ async with self.session.get (self.url.with_path ('/json/new')) as r:
resp = await r.json ()
self.tab = await Tab.create (**resp)
return self.tab
@@ -70,7 +71,7 @@ class Browser:
assert self.tab is not None
assert self.session is not None
await self.tab.close ()
- async with self.session.get ('{}/json/close/{}'.format (self.url, self.tab.id)) as r:
+ async with self.session.get (self.url.with_path (f'/json/close/{self.tab.id}')) as r:
resp = await r.text ()
assert resp == 'Target is closing'
self.tab = None
@@ -103,13 +104,13 @@ class TabFunction:
return hash (self.name)
def __getattr__ (self, k):
- return TabFunction ('{}.{}'.format (self.name, k), self.tab)
+ return TabFunction (f'{self.name}.{k}', self.tab)
async def __call__ (self, **kwargs):
return await self.tab (self.name, **kwargs)
def __repr__ (self):
- return '<TabFunction {}>'.format (self.name)
+ return f'<TabFunction {self.name}>'
class TabException (Exception):
pass
@@ -156,7 +157,7 @@ class Tab:
self.msgid += 1
message = {'method': method, 'params': kwargs, 'id': msgid}
t = self.transactions[msgid] = {'event': asyncio.Event (), 'result': None}
- logger.debug ('← {}'.format (message))
+ logger.debug (f'← {message}')
await self.ws.send (json.dumps (message, cls=StrJsonEncoder))
await t['event'].wait ()
ret = t['result']
@@ -191,7 +192,7 @@ class Tab:
# right now we cannot recover from this
await markCrashed (e)
break
- logger.debug ('→ {}'.format (msg))
+ logger.debug (f'→ {msg}')
if 'id' in msg:
msgid = msg['id']
t = self.transactions.get (msgid, None)
@@ -272,7 +273,7 @@ class Process:
# see https://github.com/GoogleChrome/chrome-launcher/blob/master/docs/chrome-flags-for-tools.md
args = [self.binary,
'--window-size={},{}'.format (*self.windowSize),
- '--user-data-dir={}'.format (self.userDataDir), # use temporory user dir
+ f'--user-data-dir={self.userDataDir}', # use temporory user dir
'--no-default-browser-check',
'--no-first-run', # don’t show first run screen
'--disable-breakpad', # no error reports
@@ -317,7 +318,7 @@ class Process:
if port is None:
raise Exception ('Chrome died on us.')
- return 'http://localhost:{}'.format (port)
+ return URL.build(scheme='http', host='localhost', port=port)
async def __aexit__ (self, *exc):
self.p.terminate ()
@@ -330,7 +331,7 @@ class Passthrough:
__slots__ = ('url', )
def __init__ (self, url):
- self.url = url
+ self.url = URL (url)
async def __aenter__ (self):
return self.url
diff --git a/crocoite/irc.py b/crocoite/irc.py
index 99485e4..96d0e3e 100644
--- a/crocoite/irc.py
+++ b/crocoite/irc.py
@@ -53,7 +53,7 @@ def prettyBytes (b):
while b >= 1024 and len (prefixes) > 1:
b /= 1024
prefixes.pop (0)
- return '{:.1f} {}'.format (b, prefixes[0])
+ return f'{b:.1f} {prefixes[0]}'
def isValidUrl (s):
url = urlsplit (s)
@@ -104,16 +104,13 @@ class Job:
def formatStatus (self):
stats = self.stats
rstats = self.rstats
- return '{} ({}) {}. {} pages finished, {} pending; {} crashed, {} requests, {} failed, {} received.'.format (
- self.url,
- self.id,
- self.status.name,
- rstats.get ('have', 0),
- rstats.get ('pending', 0),
- stats.get ('crashed', 0),
- stats.get ('requests', 0),
- stats.get ('failed', 0),
- prettyBytes (stats.get ('bytesRcv', 0)))
+ return (f"{self.url} ({self.id}) {self.status.name}. "
+ "{rstats.get ('have', 0)} pages finished, "
+ "{rstats.get ('pending', 0)} pending; "
+ "{stats.get ('crashed', 0)} crashed, "
+ "{stats.get ('requests', 0)} requests, "
+ "{stats.get ('failed', 0)} failed, "
+ "{prettyBytes (stats.get ('bytesRcv', 0))} received.")
class NickMode(Enum):
operator = '@'
@@ -138,7 +135,7 @@ class User:
return hash (self.name)
def __repr__ (self):
- return '<User {} {}>'.format (self.name, self.modes)
+ return f'<User {self.name} {self.modes}>'
@classmethod
def fromName (cls, name):
@@ -159,7 +156,8 @@ class ReplyContext:
self.user = user
def __call__ (self, message):
- self.client.send ('PRIVMSG', target=self.target, message='{}: {}'.format (self.user.name, message))
+ self.client.send ('PRIVMSG', target=self.target,
+ message=f'{self.user.name}: {message}')
class RefCountEvent:
"""
@@ -321,10 +319,10 @@ class ArgparseBot (bottom.Client):
try:
args = self.parser.parse_args (command)
except Exception as e:
- reply ('{} -- {}'.format (e.args[1], e.args[0].format_usage ()))
+ reply (f'{e.args[1]} -- {e.args[0].format_usage ()}')
return
if not args:
- reply ('Sorry, I don’t understand {}'.format (command))
+ reply (f'Sorry, I don’t understand {command}')
return
if self._quit.armed and not getattr (args, 'allowOnShutdown', False):
@@ -363,7 +361,7 @@ def jobExists (func):
reply = kwargs.get ('reply')
j = self.jobs.get (args.id, None)
if not j:
- reply ('Job {} is unknown'.format (args.id))
+ reply (f'Job {args.id} is unknown')
else:
ret = await func (self, job=j, **kwargs)
return ret
@@ -426,7 +424,7 @@ class Chromebot (ArgparseBot):
'concurrency': args.concurrency,
}
strargs = ', '.join (map (lambda x: '{}={}'.format (*x), showargs.items ()))
- reply ('{} has been queued as {} with {}'.format (args.url, j.id, strargs))
+ reply (f'{args.url} has been queued as {j.id} with {strargs}')
logger.info ('queue', user=user.name, url=args.url, cmdline=cmdline,
uuid='36cc34a6-061b-4cc5-84a9-4ab6552c8d75')
diff --git a/crocoite/test_browser.py b/crocoite/test_browser.py
index 8a8d81f..6015a2f 100644
--- a/crocoite/test_browser.py
+++ b/crocoite/test_browser.py
@@ -142,7 +142,7 @@ async def itemsLoaded (l, items):
assert item.chromeResponse is not None
golden = items.pop (item.url.path)
if not golden:
- assert False, 'url {} not supposed to be fetched'.format (item.url)
+ assert False, f'url {item.url} not supposed to be fetched'
assert item.failed == golden.failed
if item.failed:
# response will be invalid if request failed
diff --git a/crocoite/test_devtools.py b/crocoite/test_devtools.py
index 74d223f..3993edd 100644
--- a/crocoite/test_devtools.py
+++ b/crocoite/test_devtools.py
@@ -24,7 +24,8 @@ import pytest
from aiohttp import web
import websockets
-from .devtools import Browser, Tab, MethodNotFound, Crashed, InvalidParameter, Process, Passthrough
+from .devtools import Browser, Tab, MethodNotFound, Crashed, \
+ InvalidParameter, Process, Passthrough
@pytest.fixture
async def browser ():
@@ -73,8 +74,10 @@ async def test_tab_close (browser):
@pytest.mark.asyncio
async def test_tab_notify_enable_disable (tab):
- """ Make sure enabling/disabling notifications works for all known namespaces """
- for name in ('Debugger', 'DOM', 'Log', 'Network', 'Page', 'Performance', 'Profiler', 'Runtime', 'Security'):
+ """ Make sure enabling/disabling notifications works for all known
+ namespaces """
+ for name in ('Debugger', 'DOM', 'Log', 'Network', 'Page', 'Performance',
+ 'Profiler', 'Runtime', 'Security'):
f = getattr (tab, name)
await f.enable ()
await f.disable ()
@@ -149,7 +152,8 @@ async def test_tab_function (tab):
@pytest.mark.asyncio
async def test_tab_function_hash (tab):
- d = {tab.Network.enable: 1, tab.Network.disable: 2, tab.Page: 3, tab.Page.enable: 4}
+ d = {tab.Network.enable: 1, tab.Network.disable: 2, tab.Page: 3,
+ tab.Page.enable: 4}
assert len (d) == 4
@pytest.mark.asyncio
@@ -168,5 +172,5 @@ async def test_passthrough ():
url = 'http://localhost:12345'
async with Passthrough (url) as u:
- assert u == url
+ assert str (u) == url
diff --git a/crocoite/test_tools.py b/crocoite/test_tools.py
index c320ad9..106aa59 100644
--- a/crocoite/test_tools.py
+++ b/crocoite/test_tools.py
@@ -97,7 +97,7 @@ def test_different_payload(writer):
httpHeaders = StatusAndHeaders('200 OK', {}, protocol='HTTP/1.1')
record = writer.create_warc_record ('http://example.com/', 'response',
- payload=BytesIO('data{}'.format(i).encode ('utf8')),
+ payload=BytesIO(f'data{i}'.encode ('utf8')),
warc_headers_dict=warcHeaders, http_headers=httpHeaders)
records.append (record)
diff --git a/crocoite/tools.py b/crocoite/tools.py
index 84c6f44..9c5d836 100644
--- a/crocoite/tools.py
+++ b/crocoite/tools.py
@@ -67,7 +67,7 @@ def mergeWarc (files, output):
'id': rid, 'date': headers.get_header('WARC-Date')}
unique += 1
else:
- logging.debug ('Record {} is duplicate of {}'.format (rid, dup['id']))
+ logging.debug (f'Record {rid} is duplicate of {dup["id"]}')
# Payload may be identical, but HTTP headers are
# (probably) not. Include them.
record = writer.create_revisit_record (
@@ -80,7 +80,7 @@ def mergeWarc (files, output):
else:
unique += 1
writer.write_record (record)
- logging.info ('Wrote {} unique records, {} revisits'.format (unique, revisit))
+ logging.info (f'Wrote {unique} unique records, {revisit} revisits')
def mergeWarcCli():
parser = argparse.ArgumentParser(description='Merge WARCs, reads filenames from stdin.')
@@ -116,12 +116,12 @@ def extractScreenshot ():
urlSanitized = headers.get_header('WARC-Target-URI').replace ('/', '_')
xoff = 0
yoff = int (headers.get_header ('X-Crocoite-Screenshot-Y-Offset'))
- outpath = '{}-{}-{}-{}.png'.format (args.prefix, urlSanitized, xoff, yoff)
+ outpath = f'{args.prefix}-{urlSanitized}-{xoff}-{yoff}.png'
if args.force or not os.path.exists (outpath):
with open (outpath, 'wb') as out:
shutil.copyfileobj (record.raw_stream, out)
else:
- print ('not overwriting {}'.format (outpath))
+ print (f'not overwriting {outputh}')
class Errata:
__slots__ = ('uuid', 'description', 'affects')
@@ -145,8 +145,7 @@ class Errata:
return all (matchedAll)
def __repr__ (self):
- return '{}({!r}, {!r}, {!r})'.format (self.__class__.__name__,
- self.uuid, self.description, self.affects)
+ return f'{self.__class__.__name__}({self.uuid!r}, {self.description!r}, {self.affects!r})'
@property
def fixable (self):
@@ -180,7 +179,7 @@ def makeReport (fd):
pass
def errata ():
- parser = argparse.ArgumentParser(description='Show/fix erratas for WARCs generated by {}.'.format (__package__))
+ parser = argparse.ArgumentParser(description=f'Show/fix erratas for WARCs generated by {__package__}.')
parser.add_argument('input', type=argparse.FileType ('rb'), help='Input WARC')
args = parser.parse_args()
diff --git a/crocoite/util.py b/crocoite/util.py
index eebe909..5bced53 100644
--- a/crocoite/util.py
+++ b/crocoite/util.py
@@ -48,8 +48,8 @@ def packageUrl (path):
async def getFormattedViewportMetrics (tab):
layoutMetrics = await tab.Page.getLayoutMetrics ()
# XXX: I’m not entirely sure which one we should use here
- return '{}x{}'.format (layoutMetrics['layoutViewport']['clientWidth'],
- layoutMetrics['layoutViewport']['clientHeight'])
+ viewport = layoutMetrics['layoutViewport']
+ return f"{viewport['clientWidth']}x{viewport['clientHeight']}"
def getSoftwareInfo ():
""" Get software info for inclusion into warcinfo """
diff --git a/crocoite/warc.py b/crocoite/warc.py
index 21a99aa..04dd871 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -87,7 +87,7 @@ class WarcHandler (EventHandler):
url = item.url
path = url.relative().with_fragment(None)
- httpHeaders = StatusAndHeaders('{} {} HTTP/1.1'.format (req['method'], path),
+ httpHeaders = StatusAndHeaders(f'{req["method"]} {path} HTTP/1.1',
item.requestHeaders, protocol='HTTP/1.1', is_http_request=True)
initiator = item.initiator
warcHeaders = {
@@ -144,8 +144,8 @@ class WarcHandler (EventHandler):
else:
warcHeaders['X-Chrome-Base64Body'] = str (base64Encoded)
- httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'],
- item.statusText), item.responseHeaders,
+ httpHeaders = StatusAndHeaders(f'{resp["status"]} {item.statusText}',
+ item.responseHeaders,
protocol='HTTP/1.1')
# Content is saved decompressed and decoded, remove these headers
@@ -163,7 +163,7 @@ class WarcHandler (EventHandler):
httpHeaders.replace_header ('content-type', contentType)
if rawBody is not None:
- httpHeaders.replace_header ('content-length', '{:d}'.format (len (rawBody)))
+ httpHeaders.replace_header ('content-length', str (len (rawBody)))
bodyIo = BytesIO (rawBody)
else:
bodyIo = BytesIO ()
@@ -178,9 +178,10 @@ class WarcHandler (EventHandler):
def _writeScript (self, item):
writer = self.writer
encoding = 'utf-8'
- self.writeRecord (packageUrl ('script/{}'.format (item.path)), 'metadata',
+ self.writeRecord (packageUrl (f'script/{item.path}'), 'metadata',
payload=BytesIO (str (item).encode (encoding)),
- warc_headers_dict={'Content-Type': 'application/javascript; charset={}'.format (encoding)})
+ warc_headers_dict={'Content-Type':
+ f'application/javascript; charset={encoding}'})
def _writeItem (self, item):
if item.failed:
@@ -195,7 +196,7 @@ class WarcHandler (EventHandler):
if refersTo:
headers['WARC-Refers-To'] = refersTo
else:
- self.logger.error ('No document record found for {}'.format (url))
+ self.logger.error (f'No document record found for {url}')
return headers
def _writeDomSnapshot (self, item):
@@ -234,7 +235,7 @@ class WarcHandler (EventHandler):
self.log.seek (0)
# XXX: we should use the type continuation here
self.writeRecord (packageUrl ('log'), 'resource', payload=self.log,
- warc_headers_dict={'Content-Type': 'text/plain; encoding={}'.format (self.logEncoding)})
+ warc_headers_dict={'Content-Type': f'text/plain; encoding={self.logEncoding}'})
self.log = BytesIO ()
def _writeLog (self, item):
@@ -262,5 +263,5 @@ class WarcHandler (EventHandler):
break
if not processed:
- self.logger.debug ('unknown event {}'.format (repr (item)))
+ self.logger.debug (f'unknown event {item!r}')