diff options
Diffstat (limited to 'crocoite')
| -rw-r--r-- | crocoite/behavior.py | 10 | ||||
| -rw-r--r-- | crocoite/browser.py | 2 | ||||
| -rw-r--r-- | crocoite/controller.py | 2 | ||||
| -rw-r--r-- | crocoite/devtools.py | 23 | ||||
| -rw-r--r-- | crocoite/irc.py | 32 | ||||
| -rw-r--r-- | crocoite/test_browser.py | 2 | ||||
| -rw-r--r-- | crocoite/test_devtools.py | 14 | ||||
| -rw-r--r-- | crocoite/test_tools.py | 2 | ||||
| -rw-r--r-- | crocoite/tools.py | 13 | ||||
| -rw-r--r-- | crocoite/util.py | 4 | ||||
| -rw-r--r-- | crocoite/warc.py | 19 | 
11 files changed, 63 insertions, 60 deletions
| diff --git a/crocoite/behavior.py b/crocoite/behavior.py index 321b65c..7f3a3a0 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -59,7 +59,7 @@ class Script:              self.data = pkg_resources.resource_string (__name__, os.path.join ('data', path)).decode (encoding)      def __repr__ (self): -        return '<Script {}>'.format (self.path) +        return f'<Script {self.path}>'      def __str__ (self):          return self.data @@ -89,7 +89,7 @@ class Behavior:          return True      def __repr__ (self): -        return '<Behavior {}>'.format (self.name) +        return f'<Behavior {self.name}>'      async def onload (self):          """ After loading the page started """ @@ -138,7 +138,7 @@ class JsOnload (Behavior):          constructor = result['objectId']          if self.options: -            yield Script.fromStr (json.dumps (self.options, indent=2), '{}/options'.format (self.script.path)) +            yield Script.fromStr (json.dumps (self.options, indent=2), f'{self.script.path}/options')          result = await tab.Runtime.callFunctionOn (                  functionDeclaration='function(options){return new this(options);}',                  objectId=constructor, @@ -231,9 +231,9 @@ class DomSnapshot (Behavior):              if url in haveUrls:                  # ignore duplicate URLs. they are usually caused by                  # javascript-injected iframes (advertising) with no(?) src -                self.logger.warning ('have DOM snapshot for URL {}, ignoring'.format (url)) +                self.logger.warning (f'have DOM snapshot for URL {url}, ignoring')              elif url.scheme in ('http', 'https'): -                self.logger.debug ('saving DOM snapshot for url {}, base {}'.format (doc['documentURL'], doc['baseURL'])) +                self.logger.debug (f'saving DOM snapshot for url {url}, base {doc["baseURL"]}')                  haveUrls.add (url)                  walker = ChromeTreeWalker (doc)                  # remove script, to make the page static and noscript, because at the diff --git a/crocoite/browser.py b/crocoite/browser.py index 1c7ac3b..3de61f0 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -48,7 +48,7 @@ class Item:          self.requestBody = None      def __repr__ (self): -        return '<Item {}>'.format (self.url) +        return f'<Item {self.url}>'      @property      def request (self): diff --git a/crocoite/controller.py b/crocoite/controller.py index 53cb08d..504fa23 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -248,7 +248,7 @@ class DepthLimit (RecursionPolicy):              return urls      def __repr__ (self): -        return '<DepthLimit {}>'.format (self.maxdepth) +        return f'<DepthLimit {self.maxdepth}>'  class PrefixLimit (RecursionPolicy):      """ diff --git a/crocoite/devtools.py b/crocoite/devtools.py index e62d4e0..fd56896 100644 --- a/crocoite/devtools.py +++ b/crocoite/devtools.py @@ -26,6 +26,7 @@ import json, asyncio, logging, os  from tempfile import mkdtemp  import shutil  import aiohttp, websockets +from yarl import URL  from .util import StrJsonEncoder @@ -42,7 +43,7 @@ class Browser:      __slots__ = ('session', 'url', 'tab', 'loop')      def __init__ (self, url, loop=None): -        self.url = url +        self.url = URL (url)          self.session = None          self.tab = None          self.loop = loop @@ -50,7 +51,7 @@ class Browser:      async def __aiter__ (self):          """ List all tabs """          async with aiohttp.ClientSession (loop=self.loop) as session: -            async with session.get ('{}/json/list'.format (self.url)) as r: +            async with session.get (self.url.with_path ('/json/list')) as r:                  resp = await r.json ()                  for tab in resp:                      if tab['type'] == 'page': @@ -61,7 +62,7 @@ class Browser:          assert self.tab is None          assert self.session is None          self.session = aiohttp.ClientSession (loop=self.loop) -        async with self.session.get ('{}/json/new'.format (self.url)) as r: +        async with self.session.get (self.url.with_path ('/json/new')) as r:              resp = await r.json ()              self.tab = await Tab.create (**resp)              return self.tab @@ -70,7 +71,7 @@ class Browser:          assert self.tab is not None          assert self.session is not None          await self.tab.close () -        async with self.session.get ('{}/json/close/{}'.format (self.url, self.tab.id)) as r: +        async with self.session.get (self.url.with_path (f'/json/close/{self.tab.id}')) as r:              resp = await r.text ()              assert resp == 'Target is closing'          self.tab = None @@ -103,13 +104,13 @@ class TabFunction:          return hash (self.name)      def __getattr__ (self, k): -        return TabFunction ('{}.{}'.format (self.name, k), self.tab) +        return TabFunction (f'{self.name}.{k}', self.tab)      async def __call__ (self, **kwargs):          return await self.tab (self.name, **kwargs)      def __repr__ (self): -        return '<TabFunction {}>'.format (self.name) +        return f'<TabFunction {self.name}>'  class TabException (Exception):      pass @@ -156,7 +157,7 @@ class Tab:          self.msgid += 1          message = {'method': method, 'params': kwargs, 'id': msgid}          t = self.transactions[msgid] = {'event': asyncio.Event (), 'result': None} -        logger.debug ('← {}'.format (message)) +        logger.debug (f'← {message}')          await self.ws.send (json.dumps (message, cls=StrJsonEncoder))          await t['event'].wait ()          ret = t['result'] @@ -191,7 +192,7 @@ class Tab:                  # right now we cannot recover from this                  await markCrashed (e)                  break -            logger.debug ('→ {}'.format (msg)) +            logger.debug (f'→ {msg}')              if 'id' in msg:                  msgid = msg['id']                  t = self.transactions.get (msgid, None) @@ -272,7 +273,7 @@ class Process:          # see https://github.com/GoogleChrome/chrome-launcher/blob/master/docs/chrome-flags-for-tools.md          args = [self.binary,                  '--window-size={},{}'.format (*self.windowSize), -                '--user-data-dir={}'.format (self.userDataDir), # use temporory user dir +                f'--user-data-dir={self.userDataDir}', # use temporory user dir                  '--no-default-browser-check',                  '--no-first-run', # don’t show first run screen                  '--disable-breakpad', # no error reports @@ -317,7 +318,7 @@ class Process:          if port is None:              raise Exception ('Chrome died on us.') -        return 'http://localhost:{}'.format (port) +        return URL.build(scheme='http', host='localhost', port=port)      async def __aexit__ (self, *exc):          self.p.terminate () @@ -330,7 +331,7 @@ class Passthrough:      __slots__ = ('url', )      def __init__ (self, url): -        self.url = url +        self.url = URL (url)      async def __aenter__ (self):          return self.url diff --git a/crocoite/irc.py b/crocoite/irc.py index 99485e4..96d0e3e 100644 --- a/crocoite/irc.py +++ b/crocoite/irc.py @@ -53,7 +53,7 @@ def prettyBytes (b):      while b >= 1024 and len (prefixes) > 1:          b /= 1024          prefixes.pop (0) -    return '{:.1f} {}'.format (b, prefixes[0]) +    return f'{b:.1f} {prefixes[0]}'  def isValidUrl (s):      url = urlsplit (s) @@ -104,16 +104,13 @@ class Job:      def formatStatus (self):          stats = self.stats          rstats = self.rstats -        return '{} ({}) {}. {} pages finished, {} pending; {} crashed, {} requests, {} failed, {} received.'.format ( -                self.url, -                self.id, -                self.status.name, -                rstats.get ('have', 0), -                rstats.get ('pending', 0), -                stats.get ('crashed', 0), -                stats.get ('requests', 0), -                stats.get ('failed', 0), -                prettyBytes (stats.get ('bytesRcv', 0))) +        return (f"{self.url} ({self.id}) {self.status.name}. " +                "{rstats.get ('have', 0)} pages finished, " +                "{rstats.get ('pending', 0)} pending; " +                "{stats.get ('crashed', 0)} crashed, " +                "{stats.get ('requests', 0)} requests, " +                "{stats.get ('failed', 0)} failed, " +                "{prettyBytes (stats.get ('bytesRcv', 0))} received.")  class NickMode(Enum):      operator = '@' @@ -138,7 +135,7 @@ class User:          return hash (self.name)      def __repr__ (self): -        return '<User {} {}>'.format (self.name, self.modes) +        return f'<User {self.name} {self.modes}>'      @classmethod      def fromName (cls, name): @@ -159,7 +156,8 @@ class ReplyContext:          self.user = user      def __call__ (self, message): -        self.client.send ('PRIVMSG', target=self.target, message='{}: {}'.format (self.user.name, message)) +        self.client.send ('PRIVMSG', target=self.target, +                message=f'{self.user.name}: {message}')  class RefCountEvent:      """ @@ -321,10 +319,10 @@ class ArgparseBot (bottom.Client):              try:                  args = self.parser.parse_args (command)              except Exception as e: -                reply ('{} -- {}'.format (e.args[1], e.args[0].format_usage ())) +                reply (f'{e.args[1]} -- {e.args[0].format_usage ()}')                  return              if not args: -                reply ('Sorry, I don’t understand {}'.format (command)) +                reply (f'Sorry, I don’t understand {command}')                  return              if self._quit.armed and not getattr (args, 'allowOnShutdown', False): @@ -363,7 +361,7 @@ def jobExists (func):          reply = kwargs.get ('reply')          j = self.jobs.get (args.id, None)          if not j: -            reply ('Job {} is unknown'.format (args.id)) +            reply (f'Job {args.id} is unknown')          else:              ret = await func (self, job=j, **kwargs)              return ret @@ -426,7 +424,7 @@ class Chromebot (ArgparseBot):                  'concurrency': args.concurrency,                  }          strargs = ', '.join (map (lambda x: '{}={}'.format (*x), showargs.items ())) -        reply ('{} has been queued as {} with {}'.format (args.url, j.id, strargs)) +        reply (f'{args.url} has been queued as {j.id} with {strargs}')          logger.info ('queue', user=user.name, url=args.url, cmdline=cmdline,                  uuid='36cc34a6-061b-4cc5-84a9-4ab6552c8d75') diff --git a/crocoite/test_browser.py b/crocoite/test_browser.py index 8a8d81f..6015a2f 100644 --- a/crocoite/test_browser.py +++ b/crocoite/test_browser.py @@ -142,7 +142,7 @@ async def itemsLoaded (l, items):          assert item.chromeResponse is not None          golden = items.pop (item.url.path)          if not golden: -            assert False, 'url {} not supposed to be fetched'.format (item.url) +            assert False, f'url {item.url} not supposed to be fetched'          assert item.failed == golden.failed          if item.failed:              # response will be invalid if request failed diff --git a/crocoite/test_devtools.py b/crocoite/test_devtools.py index 74d223f..3993edd 100644 --- a/crocoite/test_devtools.py +++ b/crocoite/test_devtools.py @@ -24,7 +24,8 @@ import pytest  from aiohttp import web  import websockets -from .devtools import Browser, Tab, MethodNotFound, Crashed, InvalidParameter, Process, Passthrough +from .devtools import Browser, Tab, MethodNotFound, Crashed, \ +        InvalidParameter, Process, Passthrough  @pytest.fixture  async def browser (): @@ -73,8 +74,10 @@ async def test_tab_close (browser):  @pytest.mark.asyncio  async def test_tab_notify_enable_disable (tab): -    """ Make sure enabling/disabling notifications works for all known namespaces """ -    for name in ('Debugger', 'DOM', 'Log', 'Network', 'Page', 'Performance', 'Profiler', 'Runtime', 'Security'): +    """ Make sure enabling/disabling notifications works for all known +    namespaces """ +    for name in ('Debugger', 'DOM', 'Log', 'Network', 'Page', 'Performance', +            'Profiler', 'Runtime', 'Security'):          f = getattr (tab, name)          await f.enable ()          await f.disable () @@ -149,7 +152,8 @@ async def test_tab_function (tab):  @pytest.mark.asyncio  async def test_tab_function_hash (tab): -    d = {tab.Network.enable: 1, tab.Network.disable: 2, tab.Page: 3, tab.Page.enable: 4} +    d = {tab.Network.enable: 1, tab.Network.disable: 2, tab.Page: 3, +            tab.Page.enable: 4}      assert len (d) == 4  @pytest.mark.asyncio @@ -168,5 +172,5 @@ async def test_passthrough ():      url = 'http://localhost:12345'      async with Passthrough (url) as u: -        assert u == url +        assert str (u) == url diff --git a/crocoite/test_tools.py b/crocoite/test_tools.py index c320ad9..106aa59 100644 --- a/crocoite/test_tools.py +++ b/crocoite/test_tools.py @@ -97,7 +97,7 @@ def test_different_payload(writer):          httpHeaders = StatusAndHeaders('200 OK', {}, protocol='HTTP/1.1')          record = writer.create_warc_record ('http://example.com/', 'response', -                payload=BytesIO('data{}'.format(i).encode ('utf8')), +                payload=BytesIO(f'data{i}'.encode ('utf8')),                  warc_headers_dict=warcHeaders, http_headers=httpHeaders)          records.append (record) diff --git a/crocoite/tools.py b/crocoite/tools.py index 84c6f44..9c5d836 100644 --- a/crocoite/tools.py +++ b/crocoite/tools.py @@ -67,7 +67,7 @@ def mergeWarc (files, output):                                  'id': rid, 'date': headers.get_header('WARC-Date')}                          unique += 1                      else: -                        logging.debug ('Record {} is duplicate of {}'.format (rid, dup['id'])) +                        logging.debug (f'Record {rid} is duplicate of {dup["id"]}')                          # Payload may be identical, but HTTP headers are                          # (probably) not. Include them.                          record = writer.create_revisit_record ( @@ -80,7 +80,7 @@ def mergeWarc (files, output):                  else:                      unique += 1                  writer.write_record (record) -    logging.info ('Wrote {} unique records, {} revisits'.format (unique, revisit)) +    logging.info (f'Wrote {unique} unique records, {revisit} revisits')  def mergeWarcCli():      parser = argparse.ArgumentParser(description='Merge WARCs, reads filenames from stdin.') @@ -116,12 +116,12 @@ def extractScreenshot ():              urlSanitized = headers.get_header('WARC-Target-URI').replace ('/', '_')              xoff = 0              yoff = int (headers.get_header ('X-Crocoite-Screenshot-Y-Offset')) -            outpath = '{}-{}-{}-{}.png'.format (args.prefix, urlSanitized, xoff, yoff) +            outpath = f'{args.prefix}-{urlSanitized}-{xoff}-{yoff}.png'              if args.force or not os.path.exists (outpath):                  with open (outpath, 'wb') as out:                      shutil.copyfileobj (record.raw_stream, out)              else: -                print ('not overwriting {}'.format (outpath)) +                print (f'not overwriting {outputh}')  class Errata:      __slots__ = ('uuid', 'description', 'affects') @@ -145,8 +145,7 @@ class Errata:          return all (matchedAll)      def __repr__ (self): -        return '{}({!r}, {!r}, {!r})'.format (self.__class__.__name__, -                self.uuid, self.description, self.affects) +        return f'{self.__class__.__name__}({self.uuid!r}, {self.description!r}, {self.affects!r})'      @property      def fixable (self): @@ -180,7 +179,7 @@ def makeReport (fd):                  pass  def errata (): -    parser = argparse.ArgumentParser(description='Show/fix erratas for WARCs generated by {}.'.format (__package__)) +    parser = argparse.ArgumentParser(description=f'Show/fix erratas for WARCs generated by {__package__}.')      parser.add_argument('input', type=argparse.FileType ('rb'), help='Input WARC')      args = parser.parse_args() diff --git a/crocoite/util.py b/crocoite/util.py index eebe909..5bced53 100644 --- a/crocoite/util.py +++ b/crocoite/util.py @@ -48,8 +48,8 @@ def packageUrl (path):  async def getFormattedViewportMetrics (tab):      layoutMetrics = await tab.Page.getLayoutMetrics ()      # XXX: I’m not entirely sure which one we should use here -    return '{}x{}'.format (layoutMetrics['layoutViewport']['clientWidth'], -                layoutMetrics['layoutViewport']['clientHeight']) +    viewport = layoutMetrics['layoutViewport'] +    return f"{viewport['clientWidth']}x{viewport['clientHeight']}"  def getSoftwareInfo ():      """ Get software info for inclusion into warcinfo """ diff --git a/crocoite/warc.py b/crocoite/warc.py index 21a99aa..04dd871 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -87,7 +87,7 @@ class WarcHandler (EventHandler):          url = item.url          path = url.relative().with_fragment(None) -        httpHeaders = StatusAndHeaders('{} {} HTTP/1.1'.format (req['method'], path), +        httpHeaders = StatusAndHeaders(f'{req["method"]} {path} HTTP/1.1',                  item.requestHeaders, protocol='HTTP/1.1', is_http_request=True)          initiator = item.initiator          warcHeaders = { @@ -144,8 +144,8 @@ class WarcHandler (EventHandler):          else:              warcHeaders['X-Chrome-Base64Body'] = str (base64Encoded) -        httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'], -                item.statusText), item.responseHeaders, +        httpHeaders = StatusAndHeaders(f'{resp["status"]} {item.statusText}', +                item.responseHeaders,                  protocol='HTTP/1.1')          # Content is saved decompressed and decoded, remove these headers @@ -163,7 +163,7 @@ class WarcHandler (EventHandler):              httpHeaders.replace_header ('content-type', contentType)          if rawBody is not None: -            httpHeaders.replace_header ('content-length', '{:d}'.format (len (rawBody))) +            httpHeaders.replace_header ('content-length', str (len (rawBody)))              bodyIo = BytesIO (rawBody)          else:              bodyIo = BytesIO () @@ -178,9 +178,10 @@ class WarcHandler (EventHandler):      def _writeScript (self, item):          writer = self.writer          encoding = 'utf-8' -        self.writeRecord (packageUrl ('script/{}'.format (item.path)), 'metadata', +        self.writeRecord (packageUrl (f'script/{item.path}'), 'metadata',                  payload=BytesIO (str (item).encode (encoding)), -                warc_headers_dict={'Content-Type': 'application/javascript; charset={}'.format (encoding)}) +                warc_headers_dict={'Content-Type': +                f'application/javascript; charset={encoding}'})      def _writeItem (self, item):          if item.failed: @@ -195,7 +196,7 @@ class WarcHandler (EventHandler):          if refersTo:              headers['WARC-Refers-To'] = refersTo          else: -            self.logger.error ('No document record found for {}'.format (url)) +            self.logger.error (f'No document record found for {url}')          return headers      def _writeDomSnapshot (self, item): @@ -234,7 +235,7 @@ class WarcHandler (EventHandler):          self.log.seek (0)          # XXX: we should use the type continuation here          self.writeRecord (packageUrl ('log'), 'resource', payload=self.log, -                warc_headers_dict={'Content-Type': 'text/plain; encoding={}'.format (self.logEncoding)}) +                warc_headers_dict={'Content-Type': f'text/plain; encoding={self.logEncoding}'})          self.log = BytesIO ()      def _writeLog (self, item): @@ -262,5 +263,5 @@ class WarcHandler (EventHandler):                  break          if not processed: -            self.logger.debug ('unknown event {}'.format (repr (item))) +            self.logger.debug (f'unknown event {item!r}') | 
