summaryrefslogtreecommitdiff
path: root/crocoite
diff options
context:
space:
mode:
Diffstat (limited to 'crocoite')
-rw-r--r--crocoite/browser.py72
-rw-r--r--crocoite/cli.py6
-rw-r--r--crocoite/controller.py132
-rw-r--r--crocoite/devtools.py74
-rw-r--r--crocoite/test_browser.py15
-rw-r--r--crocoite/test_devtools.py19
6 files changed, 161 insertions, 157 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py
index 515d06b..1b6debf 100644
--- a/crocoite/browser.py
+++ b/crocoite/browser.py
@@ -357,75 +357,3 @@ class SiteLoader:
self.logger.warning ('js dialog unknown',
uuid='3ef7292e-8595-4e89-b834-0cc6bc40ee38', **kwargs)
-import subprocess, os, time
-from tempfile import mkdtemp
-import shutil
-
-class ChromeService:
- """ Start Google Chrome listening on a random port """
-
- __slots__ = ('binary', 'windowSize', 'p', 'userDataDir')
-
- def __init__ (self, binary='google-chrome-stable', windowSize=(1920, 1080)):
- self.binary = binary
- self.windowSize = windowSize
- self.p = None
-
- def __enter__ (self):
- assert self.p is None
- self.userDataDir = mkdtemp ()
- args = [self.binary,
- '--window-size={},{}'.format (*self.windowSize),
- '--user-data-dir={}'.format (self.userDataDir), # use temporory user dir
- '--no-default-browser-check',
- '--no-first-run', # don’t show first run screen
- '--disable-breakpad', # no error reports
- '--disable-extensions',
- '--disable-infobars',
- '--disable-notifications', # no libnotify
- '--headless',
- '--disable-gpu',
- '--hide-scrollbars', # hide scrollbars on screenshots
- '--mute-audio', # don’t play any audio
- '--remote-debugging-port=0', # pick a port. XXX: we may want to use --remote-debugging-pipe instead
- '--homepage=about:blank',
- 'about:blank']
- # start new session, so ^C does not affect subprocess
- self.p = subprocess.Popen (args, start_new_session=True,
- stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL,
- stderr=subprocess.DEVNULL)
- port = None
- # chrome writes its current active devtools port to a file. due to the
- # sleep() this is rather ugly, but should work with all versions of the
- # browser.
- for i in range (100):
- try:
- with open (os.path.join (self.userDataDir, 'DevToolsActivePort'), 'r') as fd:
- port = int (fd.readline ().strip ())
- break
- except FileNotFoundError:
- time.sleep (0.2)
- if port is None:
- raise Exception ('Chrome died on us.')
-
- return 'http://localhost:{}'.format (port)
-
- def __exit__ (self, *exc):
- self.p.terminate ()
- self.p.wait ()
- shutil.rmtree (self.userDataDir)
- self.p = None
- return False
-
-class NullService:
- __slots__ = ('url')
-
- def __init__ (self, url):
- self.url = url
-
- def __enter__ (self):
- return self.url
-
- def __exit__ (self, *exc):
- return False
-
diff --git a/crocoite/cli.py b/crocoite/cli.py
index c5dee35..8ebf557 100644
--- a/crocoite/cli.py
+++ b/crocoite/cli.py
@@ -28,7 +28,7 @@ from enum import IntEnum
from . import behavior
from .controller import SinglePageController, defaultSettings, \
ControllerSettings, StatsHandler, LogHandler
-from .browser import NullService, ChromeService
+from .devtools import Passthrough, Process
from .warc import WarcHandler
from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, WarcHandlerConsumer
from .devtools import Crashed
@@ -56,9 +56,9 @@ def single ():
logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()])
ret = SingleExitStatus.Fail
- service = ChromeService ()
+ service = Process ()
if args.browser:
- service = NullService (args.browser)
+ service = Passthrough (args.browser)
settings = ControllerSettings (idleTimeout=args.idleTimeout, timeout=args.timeout)
with open (args.output, 'wb') as fd, WarcHandler (fd, logger) as warcHandler:
logger.connect (WarcHandlerConsumer (warcHandler))
diff --git a/crocoite/controller.py b/crocoite/controller.py
index dd32331..3acbf26 100644
--- a/crocoite/controller.py
+++ b/crocoite/controller.py
@@ -89,7 +89,8 @@ class LogHandler (EventHandler):
import time, platform
from . import behavior as cbehavior
-from .browser import ChromeService, SiteLoader, Item
+from .browser import SiteLoader, Item
+from .devtools import Process
from .util import getFormattedViewportMetrics, getRequirements
class ControllerStart:
@@ -109,7 +110,7 @@ class SinglePageController:
__slots__ = ('url', 'output', 'service', 'behavior', 'settings', 'logger', 'handler')
def __init__ (self, url, output, logger, \
- service=ChromeService (), behavior=cbehavior.available, \
+ service, behavior=cbehavior.available, \
settings=defaultSettings, handler=[]):
self.url = url
self.output = output
@@ -129,75 +130,74 @@ class SinglePageController:
async for item in l:
self.processItem (item)
- with self.service as browser:
- async with SiteLoader (browser, self.url, logger=logger) as l:
- handle = asyncio.ensure_future (processQueue ())
-
- start = time.time ()
-
- version = await l.tab.Browser.getVersion ()
- payload = {
- 'software': {
- 'platform': platform.platform (),
- 'python': {
- 'implementation': platform.python_implementation(),
- 'version': platform.python_version (),
- 'build': platform.python_build ()
- },
- 'self': getRequirements (__package__)
- },
- 'browser': {
- 'product': version['product'],
- 'useragent': version['userAgent'],
- 'viewport': await getFormattedViewportMetrics (l.tab),
- },
- }
- self.processItem (ControllerStart (payload))
-
- # not all behavior scripts are allowed for every URL, filter them
- enabledBehavior = list (filter (lambda x: self.url in x,
- map (lambda x: x (l, logger), self.behavior)))
-
- for b in enabledBehavior:
- async for item in b.onload ():
- self.processItem (item)
- await l.start ()
-
- # XXX: this does not detect idle changes properly
- idleSince = None
- while True:
- now = time.time()
- runtime = now-start
- if runtime >= self.settings.timeout or (idleSince and now-idleSince > self.settings.idleTimeout):
- break
- if len (l) == 0:
- if idleSince is None:
- idleSince = time.time ()
- else:
- idleSince = None
- await asyncio.sleep (1)
- await l.tab.Page.stopLoading ()
-
- for b in enabledBehavior:
- async for item in b.onstop ():
- self.processItem (item)
+ async with self.service as browser, SiteLoader (browser, self.url, logger=logger) as l:
+ handle = asyncio.ensure_future (processQueue ())
+
+ start = time.time ()
+ version = await l.tab.Browser.getVersion ()
+ payload = {
+ 'software': {
+ 'platform': platform.platform (),
+ 'python': {
+ 'implementation': platform.python_implementation(),
+ 'version': platform.python_version (),
+ 'build': platform.python_build ()
+ },
+ 'self': getRequirements (__package__)
+ },
+ 'browser': {
+ 'product': version['product'],
+ 'useragent': version['userAgent'],
+ 'viewport': await getFormattedViewportMetrics (l.tab),
+ },
+ }
+ self.processItem (ControllerStart (payload))
+
+ # not all behavior scripts are allowed for every URL, filter them
+ enabledBehavior = list (filter (lambda x: self.url in x,
+ map (lambda x: x (l, logger), self.behavior)))
+
+ for b in enabledBehavior:
+ async for item in b.onload ():
+ self.processItem (item)
+ await l.start ()
+
+ # XXX: this does not detect idle changes properly
+ idleSince = None
+ while True:
+ now = time.time()
+ runtime = now-start
+ if runtime >= self.settings.timeout or (idleSince and now-idleSince > self.settings.idleTimeout):
+ break
+ if len (l) == 0:
+ if idleSince is None:
+ idleSince = time.time ()
+ else:
+ idleSince = None
await asyncio.sleep (1)
+ await l.tab.Page.stopLoading ()
- for b in enabledBehavior:
- async for item in b.onfinish ():
- self.processItem (item)
+ for b in enabledBehavior:
+ async for item in b.onstop ():
+ self.processItem (item)
- # drain the queue XXX detect idle properly
- i = 0
- while len (l) and i < 20:
- i += 1
- await asyncio.sleep (1)
+ await asyncio.sleep (1)
- if handle.done ():
- handle.result ()
- else:
- handle.cancel ()
+ for b in enabledBehavior:
+ async for item in b.onfinish ():
+ self.processItem (item)
+
+ # drain the queue XXX detect idle properly
+ i = 0
+ while len (l) and i < 20:
+ i += 1
+ await asyncio.sleep (1)
+
+ if handle.done ():
+ handle.result ()
+ else:
+ handle.cancel ()
class RecursionPolicy:
""" Abstract recursion policy """
diff --git a/crocoite/devtools.py b/crocoite/devtools.py
index 6e97ca3..9ce4333 100644
--- a/crocoite/devtools.py
+++ b/crocoite/devtools.py
@@ -252,3 +252,77 @@ class Tab:
await ret.run ()
return ret
+import os, time
+from tempfile import mkdtemp
+import shutil
+
+class Process:
+ """ Start Google Chrome listening on a random port """
+
+ __slots__ = ('binary', 'windowSize', 'p', 'userDataDir')
+
+ def __init__ (self, binary='google-chrome-stable', windowSize=(1920, 1080)):
+ self.binary = binary
+ self.windowSize = windowSize
+ self.p = None
+
+ async def __aenter__ (self):
+ assert self.p is None
+ self.userDataDir = mkdtemp ()
+ args = [self.binary,
+ '--window-size={},{}'.format (*self.windowSize),
+ '--user-data-dir={}'.format (self.userDataDir), # use temporory user dir
+ '--no-default-browser-check',
+ '--no-first-run', # don’t show first run screen
+ '--disable-breakpad', # no error reports
+ '--disable-extensions',
+ '--disable-infobars',
+ '--disable-notifications', # no libnotify
+ '--headless',
+ '--disable-gpu',
+ '--hide-scrollbars', # hide scrollbars on screenshots
+ '--mute-audio', # don’t play any audio
+ '--remote-debugging-port=0', # pick a port. XXX: we may want to use --remote-debugging-pipe instead
+ '--homepage=about:blank',
+ 'about:blank']
+ # start new session, so ^C does not affect subprocess
+ self.p = await asyncio.create_subprocess_exec (*args,
+ stdout=asyncio.subprocess.DEVNULL,
+ stderr=asyncio.subprocess.DEVNULL,
+ stdin=asyncio.subprocess.DEVNULL,
+ start_new_session=True)
+ port = None
+ # chrome writes its current active devtools port to a file. due to the
+ # sleep() this is rather ugly, but should work with all versions of the
+ # browser.
+ for i in range (100):
+ try:
+ with open (os.path.join (self.userDataDir, 'DevToolsActivePort'), 'r') as fd:
+ port = int (fd.readline ().strip ())
+ break
+ except FileNotFoundError:
+ await asyncio.sleep (0.2)
+ if port is None:
+ raise Exception ('Chrome died on us.')
+
+ return 'http://localhost:{}'.format (port)
+
+ async def __aexit__ (self, *exc):
+ self.p.terminate ()
+ await self.p.wait ()
+ shutil.rmtree (self.userDataDir)
+ self.p = None
+ return False
+
+class Passthrough:
+ __slots__ = ('url')
+
+ def __init__ (self, url):
+ self.url = url
+
+ async def __aenter__ (self):
+ return self.url
+
+ async def __aexit__ (self, *exc):
+ return False
+
diff --git a/crocoite/test_browser.py b/crocoite/test_browser.py
index 030ffb1..331fa49 100644
--- a/crocoite/test_browser.py
+++ b/crocoite/test_browser.py
@@ -25,9 +25,9 @@ from operator import itemgetter
from aiohttp import web
from http.server import BaseHTTPRequestHandler
-from .browser import Item, SiteLoader, ChromeService, NullService
+from .browser import Item, SiteLoader
from .logger import Logger, Consumer, JsonPrintConsumer
-from .devtools import Crashed
+from .devtools import Crashed, Process
# if you want to know what’s going on:
#logging.basicConfig(level=logging.DEBUG)
@@ -122,12 +122,12 @@ def logger ():
return Logger (consumer=[AssertConsumer ()])
@pytest.fixture
-def loader (server, logger):
+async def loader (server, logger):
def f (path):
if path.startswith ('/'):
path = 'http://localhost:8080{}'.format (path)
return SiteLoader (browser, path, logger)
- with ChromeService () as browser:
+ async with Process () as browser:
yield f
async def itemsLoaded (l, items):
@@ -228,10 +228,3 @@ async def test_invalidurl (loader):
assert it.failed
break
-def test_nullservice ():
- """ Null service returns the url as is """
-
- url = 'http://localhost:12345'
- with NullService (url) as u:
- assert u == url
-
diff --git a/crocoite/test_devtools.py b/crocoite/test_devtools.py
index 4ffbbf8..8676e6c 100644
--- a/crocoite/test_devtools.py
+++ b/crocoite/test_devtools.py
@@ -24,12 +24,11 @@ import pytest
from aiohttp import web
import websockets
-from .browser import ChromeService, NullService
-from .devtools import Browser, Tab, MethodNotFound, Crashed, InvalidParameter
+from .devtools import Browser, Tab, MethodNotFound, Crashed, InvalidParameter, Process, Passthrough
@pytest.fixture
async def browser ():
- with ChromeService () as url:
+ async with Process () as url:
yield Browser (url)
@pytest.fixture
@@ -138,7 +137,8 @@ async def test_recv_failure(browser):
with pytest.raises (Crashed):
await handle
-def test_tab_function (tab):
+@pytest.mark.asyncio
+async def test_tab_function (tab):
assert tab.Network.enable.name == 'Network.enable'
assert tab.Network.disable == tab.Network.disable
assert tab.Network.enable != tab.Network.disable
@@ -147,7 +147,8 @@ def test_tab_function (tab):
assert not callable (tab.Network.enable.name)
assert 'Network.enable' in repr (tab.Network.enable)
-def test_tab_function_hash (tab):
+@pytest.mark.asyncio
+async def test_tab_function_hash (tab):
d = {tab.Network.enable: 1, tab.Network.disable: 2, tab.Page: 3, tab.Page.enable: 4}
assert len (d) == 4
@@ -161,3 +162,11 @@ async def test_ws_ping(tab):
await tab.ws.ping ()
await tab.Browser.getVersion ()
+@pytest.mark.asyncio
+async def test_passthrough ():
+ """ Null service returns the url as is """
+
+ url = 'http://localhost:12345'
+ async with Passthrough (url) as u:
+ assert u == url
+