diff options
Diffstat (limited to 'crocoite')
-rw-r--r-- | crocoite/browser.py | 72 | ||||
-rw-r--r-- | crocoite/cli.py | 6 | ||||
-rw-r--r-- | crocoite/controller.py | 132 | ||||
-rw-r--r-- | crocoite/devtools.py | 74 | ||||
-rw-r--r-- | crocoite/test_browser.py | 15 | ||||
-rw-r--r-- | crocoite/test_devtools.py | 19 |
6 files changed, 161 insertions, 157 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py index 515d06b..1b6debf 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -357,75 +357,3 @@ class SiteLoader: self.logger.warning ('js dialog unknown', uuid='3ef7292e-8595-4e89-b834-0cc6bc40ee38', **kwargs) -import subprocess, os, time -from tempfile import mkdtemp -import shutil - -class ChromeService: - """ Start Google Chrome listening on a random port """ - - __slots__ = ('binary', 'windowSize', 'p', 'userDataDir') - - def __init__ (self, binary='google-chrome-stable', windowSize=(1920, 1080)): - self.binary = binary - self.windowSize = windowSize - self.p = None - - def __enter__ (self): - assert self.p is None - self.userDataDir = mkdtemp () - args = [self.binary, - '--window-size={},{}'.format (*self.windowSize), - '--user-data-dir={}'.format (self.userDataDir), # use temporory user dir - '--no-default-browser-check', - '--no-first-run', # don’t show first run screen - '--disable-breakpad', # no error reports - '--disable-extensions', - '--disable-infobars', - '--disable-notifications', # no libnotify - '--headless', - '--disable-gpu', - '--hide-scrollbars', # hide scrollbars on screenshots - '--mute-audio', # don’t play any audio - '--remote-debugging-port=0', # pick a port. XXX: we may want to use --remote-debugging-pipe instead - '--homepage=about:blank', - 'about:blank'] - # start new session, so ^C does not affect subprocess - self.p = subprocess.Popen (args, start_new_session=True, - stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) - port = None - # chrome writes its current active devtools port to a file. due to the - # sleep() this is rather ugly, but should work with all versions of the - # browser. - for i in range (100): - try: - with open (os.path.join (self.userDataDir, 'DevToolsActivePort'), 'r') as fd: - port = int (fd.readline ().strip ()) - break - except FileNotFoundError: - time.sleep (0.2) - if port is None: - raise Exception ('Chrome died on us.') - - return 'http://localhost:{}'.format (port) - - def __exit__ (self, *exc): - self.p.terminate () - self.p.wait () - shutil.rmtree (self.userDataDir) - self.p = None - return False - -class NullService: - __slots__ = ('url') - - def __init__ (self, url): - self.url = url - - def __enter__ (self): - return self.url - - def __exit__ (self, *exc): - return False - diff --git a/crocoite/cli.py b/crocoite/cli.py index c5dee35..8ebf557 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -28,7 +28,7 @@ from enum import IntEnum from . import behavior from .controller import SinglePageController, defaultSettings, \ ControllerSettings, StatsHandler, LogHandler -from .browser import NullService, ChromeService +from .devtools import Passthrough, Process from .warc import WarcHandler from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, WarcHandlerConsumer from .devtools import Crashed @@ -56,9 +56,9 @@ def single (): logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()]) ret = SingleExitStatus.Fail - service = ChromeService () + service = Process () if args.browser: - service = NullService (args.browser) + service = Passthrough (args.browser) settings = ControllerSettings (idleTimeout=args.idleTimeout, timeout=args.timeout) with open (args.output, 'wb') as fd, WarcHandler (fd, logger) as warcHandler: logger.connect (WarcHandlerConsumer (warcHandler)) diff --git a/crocoite/controller.py b/crocoite/controller.py index dd32331..3acbf26 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -89,7 +89,8 @@ class LogHandler (EventHandler): import time, platform from . import behavior as cbehavior -from .browser import ChromeService, SiteLoader, Item +from .browser import SiteLoader, Item +from .devtools import Process from .util import getFormattedViewportMetrics, getRequirements class ControllerStart: @@ -109,7 +110,7 @@ class SinglePageController: __slots__ = ('url', 'output', 'service', 'behavior', 'settings', 'logger', 'handler') def __init__ (self, url, output, logger, \ - service=ChromeService (), behavior=cbehavior.available, \ + service, behavior=cbehavior.available, \ settings=defaultSettings, handler=[]): self.url = url self.output = output @@ -129,75 +130,74 @@ class SinglePageController: async for item in l: self.processItem (item) - with self.service as browser: - async with SiteLoader (browser, self.url, logger=logger) as l: - handle = asyncio.ensure_future (processQueue ()) - - start = time.time () - - version = await l.tab.Browser.getVersion () - payload = { - 'software': { - 'platform': platform.platform (), - 'python': { - 'implementation': platform.python_implementation(), - 'version': platform.python_version (), - 'build': platform.python_build () - }, - 'self': getRequirements (__package__) - }, - 'browser': { - 'product': version['product'], - 'useragent': version['userAgent'], - 'viewport': await getFormattedViewportMetrics (l.tab), - }, - } - self.processItem (ControllerStart (payload)) - - # not all behavior scripts are allowed for every URL, filter them - enabledBehavior = list (filter (lambda x: self.url in x, - map (lambda x: x (l, logger), self.behavior))) - - for b in enabledBehavior: - async for item in b.onload (): - self.processItem (item) - await l.start () - - # XXX: this does not detect idle changes properly - idleSince = None - while True: - now = time.time() - runtime = now-start - if runtime >= self.settings.timeout or (idleSince and now-idleSince > self.settings.idleTimeout): - break - if len (l) == 0: - if idleSince is None: - idleSince = time.time () - else: - idleSince = None - await asyncio.sleep (1) - await l.tab.Page.stopLoading () - - for b in enabledBehavior: - async for item in b.onstop (): - self.processItem (item) + async with self.service as browser, SiteLoader (browser, self.url, logger=logger) as l: + handle = asyncio.ensure_future (processQueue ()) + + start = time.time () + version = await l.tab.Browser.getVersion () + payload = { + 'software': { + 'platform': platform.platform (), + 'python': { + 'implementation': platform.python_implementation(), + 'version': platform.python_version (), + 'build': platform.python_build () + }, + 'self': getRequirements (__package__) + }, + 'browser': { + 'product': version['product'], + 'useragent': version['userAgent'], + 'viewport': await getFormattedViewportMetrics (l.tab), + }, + } + self.processItem (ControllerStart (payload)) + + # not all behavior scripts are allowed for every URL, filter them + enabledBehavior = list (filter (lambda x: self.url in x, + map (lambda x: x (l, logger), self.behavior))) + + for b in enabledBehavior: + async for item in b.onload (): + self.processItem (item) + await l.start () + + # XXX: this does not detect idle changes properly + idleSince = None + while True: + now = time.time() + runtime = now-start + if runtime >= self.settings.timeout or (idleSince and now-idleSince > self.settings.idleTimeout): + break + if len (l) == 0: + if idleSince is None: + idleSince = time.time () + else: + idleSince = None await asyncio.sleep (1) + await l.tab.Page.stopLoading () - for b in enabledBehavior: - async for item in b.onfinish (): - self.processItem (item) + for b in enabledBehavior: + async for item in b.onstop (): + self.processItem (item) - # drain the queue XXX detect idle properly - i = 0 - while len (l) and i < 20: - i += 1 - await asyncio.sleep (1) + await asyncio.sleep (1) - if handle.done (): - handle.result () - else: - handle.cancel () + for b in enabledBehavior: + async for item in b.onfinish (): + self.processItem (item) + + # drain the queue XXX detect idle properly + i = 0 + while len (l) and i < 20: + i += 1 + await asyncio.sleep (1) + + if handle.done (): + handle.result () + else: + handle.cancel () class RecursionPolicy: """ Abstract recursion policy """ diff --git a/crocoite/devtools.py b/crocoite/devtools.py index 6e97ca3..9ce4333 100644 --- a/crocoite/devtools.py +++ b/crocoite/devtools.py @@ -252,3 +252,77 @@ class Tab: await ret.run () return ret +import os, time +from tempfile import mkdtemp +import shutil + +class Process: + """ Start Google Chrome listening on a random port """ + + __slots__ = ('binary', 'windowSize', 'p', 'userDataDir') + + def __init__ (self, binary='google-chrome-stable', windowSize=(1920, 1080)): + self.binary = binary + self.windowSize = windowSize + self.p = None + + async def __aenter__ (self): + assert self.p is None + self.userDataDir = mkdtemp () + args = [self.binary, + '--window-size={},{}'.format (*self.windowSize), + '--user-data-dir={}'.format (self.userDataDir), # use temporory user dir + '--no-default-browser-check', + '--no-first-run', # don’t show first run screen + '--disable-breakpad', # no error reports + '--disable-extensions', + '--disable-infobars', + '--disable-notifications', # no libnotify + '--headless', + '--disable-gpu', + '--hide-scrollbars', # hide scrollbars on screenshots + '--mute-audio', # don’t play any audio + '--remote-debugging-port=0', # pick a port. XXX: we may want to use --remote-debugging-pipe instead + '--homepage=about:blank', + 'about:blank'] + # start new session, so ^C does not affect subprocess + self.p = await asyncio.create_subprocess_exec (*args, + stdout=asyncio.subprocess.DEVNULL, + stderr=asyncio.subprocess.DEVNULL, + stdin=asyncio.subprocess.DEVNULL, + start_new_session=True) + port = None + # chrome writes its current active devtools port to a file. due to the + # sleep() this is rather ugly, but should work with all versions of the + # browser. + for i in range (100): + try: + with open (os.path.join (self.userDataDir, 'DevToolsActivePort'), 'r') as fd: + port = int (fd.readline ().strip ()) + break + except FileNotFoundError: + await asyncio.sleep (0.2) + if port is None: + raise Exception ('Chrome died on us.') + + return 'http://localhost:{}'.format (port) + + async def __aexit__ (self, *exc): + self.p.terminate () + await self.p.wait () + shutil.rmtree (self.userDataDir) + self.p = None + return False + +class Passthrough: + __slots__ = ('url') + + def __init__ (self, url): + self.url = url + + async def __aenter__ (self): + return self.url + + async def __aexit__ (self, *exc): + return False + diff --git a/crocoite/test_browser.py b/crocoite/test_browser.py index 030ffb1..331fa49 100644 --- a/crocoite/test_browser.py +++ b/crocoite/test_browser.py @@ -25,9 +25,9 @@ from operator import itemgetter from aiohttp import web from http.server import BaseHTTPRequestHandler -from .browser import Item, SiteLoader, ChromeService, NullService +from .browser import Item, SiteLoader from .logger import Logger, Consumer, JsonPrintConsumer -from .devtools import Crashed +from .devtools import Crashed, Process # if you want to know what’s going on: #logging.basicConfig(level=logging.DEBUG) @@ -122,12 +122,12 @@ def logger (): return Logger (consumer=[AssertConsumer ()]) @pytest.fixture -def loader (server, logger): +async def loader (server, logger): def f (path): if path.startswith ('/'): path = 'http://localhost:8080{}'.format (path) return SiteLoader (browser, path, logger) - with ChromeService () as browser: + async with Process () as browser: yield f async def itemsLoaded (l, items): @@ -228,10 +228,3 @@ async def test_invalidurl (loader): assert it.failed break -def test_nullservice (): - """ Null service returns the url as is """ - - url = 'http://localhost:12345' - with NullService (url) as u: - assert u == url - diff --git a/crocoite/test_devtools.py b/crocoite/test_devtools.py index 4ffbbf8..8676e6c 100644 --- a/crocoite/test_devtools.py +++ b/crocoite/test_devtools.py @@ -24,12 +24,11 @@ import pytest from aiohttp import web import websockets -from .browser import ChromeService, NullService -from .devtools import Browser, Tab, MethodNotFound, Crashed, InvalidParameter +from .devtools import Browser, Tab, MethodNotFound, Crashed, InvalidParameter, Process, Passthrough @pytest.fixture async def browser (): - with ChromeService () as url: + async with Process () as url: yield Browser (url) @pytest.fixture @@ -138,7 +137,8 @@ async def test_recv_failure(browser): with pytest.raises (Crashed): await handle -def test_tab_function (tab): +@pytest.mark.asyncio +async def test_tab_function (tab): assert tab.Network.enable.name == 'Network.enable' assert tab.Network.disable == tab.Network.disable assert tab.Network.enable != tab.Network.disable @@ -147,7 +147,8 @@ def test_tab_function (tab): assert not callable (tab.Network.enable.name) assert 'Network.enable' in repr (tab.Network.enable) -def test_tab_function_hash (tab): +@pytest.mark.asyncio +async def test_tab_function_hash (tab): d = {tab.Network.enable: 1, tab.Network.disable: 2, tab.Page: 3, tab.Page.enable: 4} assert len (d) == 4 @@ -161,3 +162,11 @@ async def test_ws_ping(tab): await tab.ws.ping () await tab.Browser.getVersion () +@pytest.mark.asyncio +async def test_passthrough (): + """ Null service returns the url as is """ + + url = 'http://localhost:12345' + async with Passthrough (url) as u: + assert u == url + |