diff options
| author | Lars-Dominik Braun <lars@6xq.net> | 2018-11-14 18:40:28 +0100 | 
|---|---|---|
| committer | Lars-Dominik Braun <lars@6xq.net> | 2018-11-14 18:40:28 +0100 | 
| commit | 20634f87124e0529f45db4e5e801f1bb5c6de32c (patch) | |
| tree | 27f5307865f3a5188a71a7d14f1790bb021034a2 /crocoite | |
| parent | f273341d6486f139eed073e4664b985209567e96 (diff) | |
| download | crocoite-20634f87124e0529f45db4e5e801f1bb5c6de32c.tar.gz crocoite-20634f87124e0529f45db4e5e801f1bb5c6de32c.tar.bz2 crocoite-20634f87124e0529f45db4e5e801f1bb5c6de32c.zip | |
Async chrome process startup
Move it to .devtools. Seems more fitting.
Diffstat (limited to 'crocoite')
| -rw-r--r-- | crocoite/browser.py | 72 | ||||
| -rw-r--r-- | crocoite/cli.py | 6 | ||||
| -rw-r--r-- | crocoite/controller.py | 132 | ||||
| -rw-r--r-- | crocoite/devtools.py | 74 | ||||
| -rw-r--r-- | crocoite/test_browser.py | 15 | ||||
| -rw-r--r-- | crocoite/test_devtools.py | 19 | 
6 files changed, 161 insertions, 157 deletions
| diff --git a/crocoite/browser.py b/crocoite/browser.py index 515d06b..1b6debf 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -357,75 +357,3 @@ class SiteLoader:              self.logger.warning ('js dialog unknown',                      uuid='3ef7292e-8595-4e89-b834-0cc6bc40ee38', **kwargs) -import subprocess, os, time -from tempfile import mkdtemp -import shutil - -class ChromeService: -    """ Start Google Chrome listening on a random port """ - -    __slots__ = ('binary', 'windowSize', 'p', 'userDataDir') - -    def __init__ (self, binary='google-chrome-stable', windowSize=(1920, 1080)): -        self.binary = binary -        self.windowSize = windowSize -        self.p = None - -    def __enter__ (self): -        assert self.p is None -        self.userDataDir = mkdtemp () -        args = [self.binary, -                '--window-size={},{}'.format (*self.windowSize), -                '--user-data-dir={}'.format (self.userDataDir), # use temporory user dir -                '--no-default-browser-check', -                '--no-first-run', # don’t show first run screen -                '--disable-breakpad', # no error reports -                '--disable-extensions', -                '--disable-infobars', -                '--disable-notifications', # no libnotify -                '--headless', -                '--disable-gpu', -                '--hide-scrollbars', # hide scrollbars on screenshots -                '--mute-audio', # don’t play any audio -                '--remote-debugging-port=0', # pick a port. XXX: we may want to use --remote-debugging-pipe instead -                '--homepage=about:blank', -                'about:blank'] -        # start new session, so ^C does not affect subprocess -        self.p = subprocess.Popen (args, start_new_session=True, -                stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, -                stderr=subprocess.DEVNULL) -        port = None -        # chrome writes its current active devtools port to a file. due to the -        # sleep() this is rather ugly, but should work with all versions of the -        # browser. -        for i in range (100): -            try: -                with open (os.path.join (self.userDataDir, 'DevToolsActivePort'), 'r') as fd: -                    port = int (fd.readline ().strip ()) -                    break -            except FileNotFoundError: -                time.sleep (0.2) -        if port is None: -            raise Exception ('Chrome died on us.') - -        return 'http://localhost:{}'.format (port) - -    def __exit__ (self, *exc): -        self.p.terminate () -        self.p.wait () -        shutil.rmtree (self.userDataDir) -        self.p = None -        return False - -class NullService: -    __slots__ = ('url') - -    def __init__ (self, url): -        self.url = url - -    def __enter__ (self): -        return self.url - -    def __exit__ (self, *exc): -        return False - diff --git a/crocoite/cli.py b/crocoite/cli.py index c5dee35..8ebf557 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -28,7 +28,7 @@ from enum import IntEnum  from . import behavior  from .controller import SinglePageController, defaultSettings, \          ControllerSettings, StatsHandler, LogHandler -from .browser import NullService, ChromeService +from .devtools import Passthrough, Process  from .warc import WarcHandler  from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, WarcHandlerConsumer  from .devtools import Crashed @@ -56,9 +56,9 @@ def single ():      logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()])      ret = SingleExitStatus.Fail -    service = ChromeService () +    service = Process ()      if args.browser: -        service = NullService (args.browser) +        service = Passthrough (args.browser)      settings = ControllerSettings (idleTimeout=args.idleTimeout, timeout=args.timeout)      with open (args.output, 'wb') as fd, WarcHandler (fd, logger) as warcHandler:          logger.connect (WarcHandlerConsumer (warcHandler)) diff --git a/crocoite/controller.py b/crocoite/controller.py index dd32331..3acbf26 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -89,7 +89,8 @@ class LogHandler (EventHandler):  import time, platform  from . import behavior as cbehavior -from .browser import ChromeService, SiteLoader, Item +from .browser import SiteLoader, Item +from .devtools import Process  from .util import getFormattedViewportMetrics, getRequirements  class ControllerStart: @@ -109,7 +110,7 @@ class SinglePageController:      __slots__ = ('url', 'output', 'service', 'behavior', 'settings', 'logger', 'handler')      def __init__ (self, url, output, logger, \ -            service=ChromeService (), behavior=cbehavior.available, \ +            service, behavior=cbehavior.available, \              settings=defaultSettings, handler=[]):          self.url = url          self.output = output @@ -129,75 +130,74 @@ class SinglePageController:              async for item in l:                  self.processItem (item) -        with self.service as browser: -            async with SiteLoader (browser, self.url, logger=logger) as l: -                handle = asyncio.ensure_future (processQueue ()) - -                start = time.time () - -                version = await l.tab.Browser.getVersion () -                payload = { -                        'software': { -                            'platform': platform.platform (), -                            'python': { -                                'implementation': platform.python_implementation(), -                                'version': platform.python_version (), -                                'build': platform.python_build () -                                }, -                            'self': getRequirements (__package__) -                            }, -                        'browser': { -                            'product': version['product'], -                            'useragent': version['userAgent'], -                            'viewport': await getFormattedViewportMetrics (l.tab), -                            }, -                        } -                self.processItem (ControllerStart (payload)) - -                # not all behavior scripts are allowed for every URL, filter them -                enabledBehavior = list (filter (lambda x: self.url in x, -                        map (lambda x: x (l, logger), self.behavior))) - -                for b in enabledBehavior: -                    async for item in b.onload (): -                        self.processItem (item) -                await l.start () - -                # XXX: this does not detect idle changes properly -                idleSince = None -                while True: -                    now = time.time() -                    runtime = now-start -                    if runtime >= self.settings.timeout or (idleSince and now-idleSince > self.settings.idleTimeout): -                        break -                    if len (l) == 0: -                        if idleSince is None: -                            idleSince = time.time () -                    else: -                        idleSince = None -                    await asyncio.sleep (1) -                await l.tab.Page.stopLoading () - -                for b in enabledBehavior: -                    async for item in b.onstop (): -                        self.processItem (item) +        async with self.service as browser, SiteLoader (browser, self.url, logger=logger) as l: +            handle = asyncio.ensure_future (processQueue ()) + +            start = time.time () +            version = await l.tab.Browser.getVersion () +            payload = { +                    'software': { +                        'platform': platform.platform (), +                        'python': { +                            'implementation': platform.python_implementation(), +                            'version': platform.python_version (), +                            'build': platform.python_build () +                            }, +                        'self': getRequirements (__package__) +                        }, +                    'browser': { +                        'product': version['product'], +                        'useragent': version['userAgent'], +                        'viewport': await getFormattedViewportMetrics (l.tab), +                        }, +                    } +            self.processItem (ControllerStart (payload)) + +            # not all behavior scripts are allowed for every URL, filter them +            enabledBehavior = list (filter (lambda x: self.url in x, +                    map (lambda x: x (l, logger), self.behavior))) + +            for b in enabledBehavior: +                async for item in b.onload (): +                    self.processItem (item) +            await l.start () + +            # XXX: this does not detect idle changes properly +            idleSince = None +            while True: +                now = time.time() +                runtime = now-start +                if runtime >= self.settings.timeout or (idleSince and now-idleSince > self.settings.idleTimeout): +                    break +                if len (l) == 0: +                    if idleSince is None: +                        idleSince = time.time () +                else: +                    idleSince = None                  await asyncio.sleep (1) +            await l.tab.Page.stopLoading () -                for b in enabledBehavior: -                    async for item in b.onfinish (): -                        self.processItem (item) +            for b in enabledBehavior: +                async for item in b.onstop (): +                    self.processItem (item) -                # drain the queue XXX detect idle properly -                i = 0 -                while len (l) and i < 20: -                    i += 1 -                    await asyncio.sleep (1) +            await asyncio.sleep (1) -                if handle.done (): -                    handle.result () -                else: -                    handle.cancel () +            for b in enabledBehavior: +                async for item in b.onfinish (): +                    self.processItem (item) + +            # drain the queue XXX detect idle properly +            i = 0 +            while len (l) and i < 20: +                i += 1 +                await asyncio.sleep (1) + +            if handle.done (): +                handle.result () +            else: +                handle.cancel ()  class RecursionPolicy:      """ Abstract recursion policy """ diff --git a/crocoite/devtools.py b/crocoite/devtools.py index 6e97ca3..9ce4333 100644 --- a/crocoite/devtools.py +++ b/crocoite/devtools.py @@ -252,3 +252,77 @@ class Tab:          await ret.run ()          return ret +import os, time +from tempfile import mkdtemp +import shutil + +class Process: +    """ Start Google Chrome listening on a random port """ + +    __slots__ = ('binary', 'windowSize', 'p', 'userDataDir') + +    def __init__ (self, binary='google-chrome-stable', windowSize=(1920, 1080)): +        self.binary = binary +        self.windowSize = windowSize +        self.p = None + +    async def __aenter__ (self): +        assert self.p is None +        self.userDataDir = mkdtemp () +        args = [self.binary, +                '--window-size={},{}'.format (*self.windowSize), +                '--user-data-dir={}'.format (self.userDataDir), # use temporory user dir +                '--no-default-browser-check', +                '--no-first-run', # don’t show first run screen +                '--disable-breakpad', # no error reports +                '--disable-extensions', +                '--disable-infobars', +                '--disable-notifications', # no libnotify +                '--headless', +                '--disable-gpu', +                '--hide-scrollbars', # hide scrollbars on screenshots +                '--mute-audio', # don’t play any audio +                '--remote-debugging-port=0', # pick a port. XXX: we may want to use --remote-debugging-pipe instead +                '--homepage=about:blank', +                'about:blank'] +        # start new session, so ^C does not affect subprocess +        self.p = await asyncio.create_subprocess_exec (*args, +                stdout=asyncio.subprocess.DEVNULL, +                stderr=asyncio.subprocess.DEVNULL, +                stdin=asyncio.subprocess.DEVNULL, +                start_new_session=True) +        port = None +        # chrome writes its current active devtools port to a file. due to the +        # sleep() this is rather ugly, but should work with all versions of the +        # browser. +        for i in range (100): +            try: +                with open (os.path.join (self.userDataDir, 'DevToolsActivePort'), 'r') as fd: +                    port = int (fd.readline ().strip ()) +                    break +            except FileNotFoundError: +                await asyncio.sleep (0.2) +        if port is None: +            raise Exception ('Chrome died on us.') + +        return 'http://localhost:{}'.format (port) + +    async def __aexit__ (self, *exc): +        self.p.terminate () +        await self.p.wait () +        shutil.rmtree (self.userDataDir) +        self.p = None +        return False + +class Passthrough: +    __slots__ = ('url') + +    def __init__ (self, url): +        self.url = url + +    async def __aenter__ (self): +        return self.url + +    async def __aexit__ (self, *exc): +        return False + diff --git a/crocoite/test_browser.py b/crocoite/test_browser.py index 030ffb1..331fa49 100644 --- a/crocoite/test_browser.py +++ b/crocoite/test_browser.py @@ -25,9 +25,9 @@ from operator import itemgetter  from aiohttp import web  from http.server import BaseHTTPRequestHandler -from .browser import Item, SiteLoader, ChromeService, NullService +from .browser import Item, SiteLoader  from .logger import Logger, Consumer, JsonPrintConsumer -from .devtools import Crashed +from .devtools import Crashed, Process  # if you want to know what’s going on:  #logging.basicConfig(level=logging.DEBUG) @@ -122,12 +122,12 @@ def logger ():      return Logger (consumer=[AssertConsumer ()])  @pytest.fixture -def loader (server, logger): +async def loader (server, logger):      def f (path):          if path.startswith ('/'):              path = 'http://localhost:8080{}'.format (path)          return SiteLoader (browser, path, logger) -    with ChromeService () as browser: +    async with Process () as browser:          yield f  async def itemsLoaded (l, items): @@ -228,10 +228,3 @@ async def test_invalidurl (loader):              assert it.failed              break -def test_nullservice (): -    """ Null service returns the url as is """ - -    url = 'http://localhost:12345' -    with NullService (url) as u: -        assert u == url - diff --git a/crocoite/test_devtools.py b/crocoite/test_devtools.py index 4ffbbf8..8676e6c 100644 --- a/crocoite/test_devtools.py +++ b/crocoite/test_devtools.py @@ -24,12 +24,11 @@ import pytest  from aiohttp import web  import websockets -from .browser import ChromeService, NullService -from .devtools import Browser, Tab, MethodNotFound, Crashed, InvalidParameter +from .devtools import Browser, Tab, MethodNotFound, Crashed, InvalidParameter, Process, Passthrough  @pytest.fixture  async def browser (): -    with ChromeService () as url: +    async with Process () as url:          yield Browser (url)  @pytest.fixture @@ -138,7 +137,8 @@ async def test_recv_failure(browser):          with pytest.raises (Crashed):              await handle -def test_tab_function (tab): +@pytest.mark.asyncio +async def test_tab_function (tab):      assert tab.Network.enable.name == 'Network.enable'      assert tab.Network.disable == tab.Network.disable      assert tab.Network.enable != tab.Network.disable @@ -147,7 +147,8 @@ def test_tab_function (tab):      assert not callable (tab.Network.enable.name)      assert 'Network.enable' in repr (tab.Network.enable) -def test_tab_function_hash (tab): +@pytest.mark.asyncio +async def test_tab_function_hash (tab):      d = {tab.Network.enable: 1, tab.Network.disable: 2, tab.Page: 3, tab.Page.enable: 4}      assert len (d) == 4 @@ -161,3 +162,11 @@ async def test_ws_ping(tab):          await tab.ws.ping ()          await tab.Browser.getVersion () +@pytest.mark.asyncio +async def test_passthrough (): +    """ Null service returns the url as is """ + +    url = 'http://localhost:12345' +    async with Passthrough (url) as u: +        assert u == url + | 
