summaryrefslogtreecommitdiff
path: root/crocoite/devtools.py
diff options
context:
space:
mode:
Diffstat (limited to 'crocoite/devtools.py')
-rw-r--r--crocoite/devtools.py392
1 files changed, 392 insertions, 0 deletions
diff --git a/crocoite/devtools.py b/crocoite/devtools.py
new file mode 100644
index 0000000..8b5c69d
--- /dev/null
+++ b/crocoite/devtools.py
@@ -0,0 +1,392 @@
+# Copyright (c) 2017 crocoite contributors
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"""
+Communication with Google Chrome through its DevTools protocol.
+"""
+
+import json, asyncio, logging, os
+from tempfile import mkdtemp
+import shutil
+from http.cookies import Morsel
+
+import aiohttp, websockets
+from yarl import URL
+
+from .util import StrJsonEncoder
+
+logger = logging.getLogger (__name__)
+
+class Browser:
+ """
+ Communicate with Google Chrome through its DevTools protocol.
+
+ Asynchronous context manager that creates a new Tab when entering.
+ Destroyed upon exit.
+ """
+
+ __slots__ = ('session', 'url', 'tab')
+
+ def __init__ (self, url):
+ self.url = URL (url)
+ self.session = None
+ self.tab = None
+
+ async def __aiter__ (self):
+ """ List all tabs """
+ async with aiohttp.ClientSession () as session:
+ async with session.get (self.url.with_path ('/json/list')) as r:
+ resp = await r.json ()
+ for tab in resp:
+ if tab['type'] == 'page':
+ yield tab
+
+ async def __aenter__ (self):
+ """ Create tab """
+ assert self.tab is None
+ assert self.session is None
+ self.session = aiohttp.ClientSession ()
+ async with self.session.get (self.url.with_path ('/json/new')) as r:
+ resp = await r.json ()
+ self.tab = await Tab.create (**resp)
+ return self.tab
+
+ async def __aexit__ (self, excType, excValue, traceback):
+ assert self.tab is not None
+ assert self.session is not None
+
+ await self.tab.close ()
+
+ try:
+ async with self.session.get (self.url.with_path (f'/json/close/{self.tab.id}')) as r:
+ resp = await r.text ()
+ assert resp == 'Target is closing'
+ except aiohttp.client_exceptions.ClientConnectorError:
+ # oh boy, the whole browser crashed instead
+ if excType is Crashed:
+ # exception is reraised by `return False`
+ pass
+ else:
+ # this one is more important
+ raise
+
+ self.tab = None
+ await self.session.close ()
+ self.session = None
+
+ return False
+
+class TabFunction:
+ """
+ Helper class for infinite-depth tab functions.
+
+ A method usually consists of namespace (Page, Network, …) and function name
+ (getFoobar) separated by a dot. This class creates these function names
+ while providing an intuitive Python interface (tab.Network.getFoobar).
+
+ This was inspired by pychrome.
+ """
+
+ __slots__ = ('name', 'tab')
+
+ def __init__ (self, name, tab):
+ self.name = name
+ self.tab = tab
+
+ def __eq__ (self, b):
+ assert isinstance (b, TabFunction)
+ return self.name == b.name
+
+ def __hash__ (self):
+ return hash (self.name)
+
+ def __getattr__ (self, k):
+ return TabFunction (f'{self.name}.{k}', self.tab)
+
+ async def __call__ (self, **kwargs):
+ return await self.tab (self.name, **kwargs)
+
+ def __repr__ (self):
+ return f'<TabFunction {self.name}>'
+
+class TabException (Exception):
+ pass
+
+class Crashed (TabException):
+ pass
+
+class MethodNotFound (TabException):
+ pass
+
+class InvalidParameter (TabException):
+ pass
+
+# map error codes to native exceptions
+errorMap = {-32601: MethodNotFound, -32602: InvalidParameter}
+
+class Tab:
+ """
+ Communicate with a single Google Chrome browser tab.
+ """
+ __slots__ = ('id', 'wsUrl', 'ws', 'msgid', 'transactions', 'queue', '_recvHandle', 'crashed')
+
+ def __init__ (self, tabid, ws):
+ """ Do not use this method, use Browser context manager. """
+ self.id = tabid
+ self.ws = ws
+ self.msgid = 1
+ self.crashed = False
+ self.transactions = {}
+ self.queue = asyncio.Queue ()
+
+ def __getattr__ (self, k):
+ return TabFunction (k, self)
+
+ async def __call__ (self, method, **kwargs):
+ """
+ Actually call browser method with kwargs
+ """
+
+ if self.crashed or self._recvHandle.done ():
+ raise Crashed ()
+
+ msgid = self.msgid
+ self.msgid += 1
+ message = {'method': method, 'params': kwargs, 'id': msgid}
+ t = self.transactions[msgid] = {'event': asyncio.Event (), 'result': None}
+ logger.debug (f'← {message}')
+ await self.ws.send (json.dumps (message, cls=StrJsonEncoder))
+ await t['event'].wait ()
+ ret = t['result']
+ del self.transactions[msgid]
+ if isinstance (ret, Exception):
+ raise ret
+ return ret
+
+ async def _recvProcess (self):
+ """
+ Receive process that dispatches received websocket frames
+
+ These are either events which will be put into a queue or request
+ responses which unblock a __call__.
+ """
+
+ async def markCrashed (reason):
+ # all pending requests can be considered failed since the
+ # browser state is lost
+ for v in self.transactions.values ():
+ v['result'] = Crashed (reason)
+ v['event'].set ()
+ # and all future requests will fail as well until reloaded
+ self.crashed = True
+ await self.queue.put (Crashed (reason))
+
+ while True:
+ try:
+ msg = await self.ws.recv ()
+ msg = json.loads (msg)
+ except Exception as e:
+ # right now we cannot recover from this
+ await markCrashed (e)
+ break
+ logger.debug (f'→ {msg}')
+ if 'id' in msg:
+ msgid = msg['id']
+ t = self.transactions.get (msgid, None)
+ if t is not None:
+ if 'error' in msg:
+ e = msg['error']
+ t['result'] = errorMap.get (e['code'], TabException) (e['code'], e['message'])
+ else:
+ t['result'] = msg['result']
+ t['event'].set ()
+ else:
+ # ignore stale result
+ pass # pragma: no cover
+ elif 'method' in msg:
+ # special treatment
+ if msg['method'] == 'Inspector.targetCrashed':
+ await markCrashed ('target')
+ else:
+ await self.queue.put (msg)
+ else:
+ assert False # pragma: no cover
+
+ async def run (self):
+ self._recvHandle = asyncio.ensure_future (self._recvProcess ())
+
+ async def close (self):
+ self._recvHandle.cancel ()
+ await self.ws.close ()
+ # no join, throw away the queue. There will be nobody listening on the
+ # other end.
+ #await self.queue.join ()
+
+ @property
+ def pending (self):
+ return self.queue.qsize ()
+
+ async def get (self):
+ def getattrRecursive (obj, name):
+ if '.' in name:
+ n, ext = name.split ('.', 1)
+ return getattrRecursive (getattr (obj, n), ext)
+ return getattr (obj, name)
+
+ if self.crashed:
+ raise Crashed ()
+
+ ret = await self.queue.get ()
+ if isinstance (ret, Exception):
+ raise ret
+ return getattrRecursive (self, ret['method']), ret['params']
+
+ @classmethod
+ async def create (cls, **kwargs):
+ """ Async init """
+ # increase size limit of a single frame to something ridiciously high,
+ # so we can safely grab screenshots
+ maxSize = 100*1024*1024 # 100 MB
+ # chrome does not like pings and kills the connection, disable them
+ ws = await websockets.connect(kwargs['webSocketDebuggerUrl'],
+ max_size=maxSize, ping_interval=None)
+ ret = cls (kwargs['id'], ws)
+ await ret.run ()
+ return ret
+
+class Process:
+ """ Start Google Chrome listening on a random port """
+
+ __slots__ = ('binary', 'windowSize', 'p', 'userDataDir')
+
+ def __init__ (self, binary='google-chrome-stable', windowSize=(1920, 1080)):
+ self.binary = binary
+ self.windowSize = windowSize
+ self.p = None
+
+ async def __aenter__ (self):
+ assert self.p is None
+ self.userDataDir = mkdtemp (prefix=__package__ + '-chrome-userdata-')
+ # see https://github.com/GoogleChrome/chrome-launcher/blob/master/docs/chrome-flags-for-tools.md
+ args = [self.binary,
+ '--window-size={},{}'.format (*self.windowSize),
+ f'--user-data-dir={self.userDataDir}', # use temporory user dir
+ '--no-default-browser-check',
+ '--no-first-run', # don’t show first run screen
+ '--disable-breakpad', # no error reports
+ '--disable-extensions',
+ '--disable-infobars',
+ '--disable-notifications', # no libnotify
+ '--disable-background-networking', # disable background services (updating, safe browsing, …)
+ '--safebrowsing-disable-auto-update',
+ '--disable-sync', # no google account syncing
+ '--metrics-recording-only', # do not submit metrics
+ '--disable-default-apps',
+ '--disable-background-timer-throttling',
+ '--disable-client-side-phishing-detection',
+ '--disable-popup-blocking',
+ '--disable-prompt-on-repost',
+ '--enable-automation', # enable various automation-related things
+ '--password-store=basic',
+ '--headless',
+ '--disable-gpu',
+ '--hide-scrollbars', # hide scrollbars on screenshots
+ '--mute-audio', # don’t play any audio
+ '--remote-debugging-port=0', # pick a port. XXX: we may want to use --remote-debugging-pipe instead
+ '--homepage=about:blank',
+ 'about:blank']
+ # start new session, so ^C does not affect subprocess
+ self.p = await asyncio.create_subprocess_exec (*args,
+ stdout=asyncio.subprocess.DEVNULL,
+ stderr=asyncio.subprocess.DEVNULL,
+ stdin=asyncio.subprocess.DEVNULL,
+ start_new_session=True)
+ port = None
+ # chrome writes its current active devtools port to a file. due to the
+ # sleep() this is rather ugly, but should work with all versions of the
+ # browser.
+ for i in range (100):
+ try:
+ with open (os.path.join (self.userDataDir, 'DevToolsActivePort'), 'r') as fd:
+ port = int (fd.readline ().strip ())
+ break
+ except FileNotFoundError:
+ await asyncio.sleep (0.2)
+ if port is None:
+ raise Exception ('Chrome died on us.')
+
+ return URL.build(scheme='http', host='localhost', port=port)
+
+ async def __aexit__ (self, *exc):
+ try:
+ self.p.terminate ()
+ await self.p.wait ()
+ except ProcessLookupError:
+ # ok, fine, dead already
+ pass
+
+ # Try to delete the temporary directory multiple times. It looks like
+ # Chrome will change files in there even after it exited (i.e. .wait()
+ # returned). Very strange.
+ for i in range (5):
+ try:
+ shutil.rmtree (self.userDataDir)
+ break
+ except:
+ await asyncio.sleep (0.2)
+
+ self.p = None
+ return False
+
+class Passthrough:
+ __slots__ = ('url', )
+
+ def __init__ (self, url):
+ self.url = URL (url)
+
+ async def __aenter__ (self):
+ return self.url
+
+ async def __aexit__ (self, *exc):
+ return False
+
+def toCookieParam (m):
+ """
+ Convert Python’s http.cookies.Morsel to Chrome’s CookieParam, see
+ https://chromedevtools.github.io/devtools-protocol/1-3/Network#type-CookieParam
+ """
+
+ assert isinstance (m, Morsel)
+
+ out = {'name': m.key, 'value': m.value}
+
+ # unsupported by chrome
+ for k in ('max-age', 'comment', 'version'):
+ if m[k]:
+ raise ValueError (f'Unsupported cookie attribute {k} set, cannot convert')
+
+ for mname, cname in [('expires', None), ('path', None), ('domain', None), ('secure', None), ('httponly', 'httpOnly')]:
+ value = m[mname]
+ if value:
+ cname = cname or mname
+ out[cname] = value
+
+ return out
+