From 28c991d1f622046fcd22e9d471b3a817f706f0bb Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Thu, 22 Nov 2018 16:40:50 +0100 Subject: controller: Improve idle waiting --- crocoite/browser.py | 40 ++++++++++++++++++++++++++++++++++++++-- crocoite/controller.py | 43 +++++++++++++++++++++++++++---------------- crocoite/test_browser.py | 25 ++++++++++++++++++++++++- 3 files changed, 89 insertions(+), 19 deletions(-) (limited to 'crocoite') diff --git a/crocoite/browser.py b/crocoite/browser.py index 44b94e1..93d9228 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -153,6 +153,29 @@ class Item: except TabException: self.body = None +class VarChangeEvent: + """ Notify when variable is changed """ + + __slots__ = ('_value', 'event') + + def __init__ (self, value): + self._value = value + self.event = asyncio.Event() + + def set (self, value): + if value != self._value: + self._value = value + # unblock waiting threads + self.event.set () + self.event.clear () + + def get (self): + return self._value + + async def wait (self): + await self.event.wait () + return self._value + class SiteLoader: """ Load site in Chrome and monitor network requests @@ -160,7 +183,7 @@ class SiteLoader: XXX: track popup windows/new tabs and close them """ - __slots__ = ('requests', 'browser', 'url', 'logger', 'tab', '_iterRunning') + __slots__ = ('requests', 'browser', 'url', 'logger', 'tab', '_iterRunning', 'idle', '_framesLoading') allowedSchemes = {'http', 'https'} def __init__ (self, browser, url, logger): @@ -170,6 +193,9 @@ class SiteLoader: self.logger = logger.bind (context=type (self).__name__, url=url) self._iterRunning = [] + self.idle = VarChangeEvent (True) + self._framesLoading = set () + async def __aenter__ (self): tab = self.tab = await self.browser.__aenter__ () @@ -208,7 +234,8 @@ class SiteLoader: tab.Network.loadingFailed: self._loadingFailed, tab.Log.entryAdded: self._entryAdded, tab.Page.javascriptDialogOpening: self._javascriptDialogOpening, - #tab.Inspector.targetCrashed: self._targetCrashed, + tab.Page.frameStartedLoading: self._frameStartedLoading, + tab.Page.frameStoppedLoading: self._frameStoppedLoading, } # The implementation is a little advanced. Why? The goal here is to @@ -352,3 +379,12 @@ class SiteLoader: self.logger.warning ('js dialog unknown', uuid='3ef7292e-8595-4e89-b834-0cc6bc40ee38', **kwargs) + async def _frameStartedLoading (self, **kwargs): + self._framesLoading.add (kwargs['frameId']) + self.idle.set (False) + + async def _frameStoppedLoading (self, **kwargs): + self._framesLoading.remove (kwargs['frameId']) + if not self._framesLoading: + self.idle.set (True) + diff --git a/crocoite/controller.py b/crocoite/controller.py index 62676ea..dd47776 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -172,19 +172,31 @@ class SinglePageController: self.processItem (item) await l.start () - # XXX: this does not detect idle changes properly - idleSince = None + # wait until the browser has a) been idle for at least + # settings.idleTimeout or b) settings.timeout is exceeded + timeoutProc = asyncio.ensure_future (asyncio.sleep (self.settings.timeout)) + idleTimeout = None while True: - now = time.time() - runtime = now-start - if runtime >= self.settings.timeout or (idleSince and now-idleSince > self.settings.idleTimeout): + idleProc = asyncio.ensure_future (l.idle.wait ()) + finished, pending = await asyncio.wait([idleProc, timeoutProc], return_when=asyncio.FIRST_COMPLETED, timeout=idleTimeout) + if not finished: + # idle timeout + idleProc.cancel () + timeoutProc.cancel () break - if len (l) == 0: - if idleSince is None: - idleSince = time.time () - else: - idleSince = None - await asyncio.sleep (1) + elif timeoutProc in finished: + # global timeout + idleProc.cancel () + timeoutProc.result () + break + elif idleProc in finished: + # idle state change + isIdle = idleProc.result () + if isIdle: + # browser is idle, start the clock + idleTimeout = self.settings.idleTimeout + else: + idleTimeout = None await l.tab.Page.stopLoading () for b in enabledBehavior: @@ -197,11 +209,10 @@ class SinglePageController: async for item in b.onfinish (): self.processItem (item) - # drain the queue XXX detect idle properly - i = 0 - while len (l) and i < 20: - i += 1 - await asyncio.sleep (1) + # wait until loads from behavior scripts are done + await asyncio.sleep (1) + if not l.idle.get (): + while not await l.idle.wait (): pass if handle.done (): handle.result () diff --git a/crocoite/test_browser.py b/crocoite/test_browser.py index 8adf0cd..06492b1 100644 --- a/crocoite/test_browser.py +++ b/crocoite/test_browser.py @@ -24,7 +24,7 @@ from operator import itemgetter from aiohttp import web from http.server import BaseHTTPRequestHandler -from .browser import Item, SiteLoader +from .browser import Item, SiteLoader, VarChangeEvent from .logger import Logger, Consumer from .devtools import Crashed, Process @@ -266,3 +266,26 @@ async def test_invalidurl (loader): assert it.failed break +@pytest.mark.asyncio +async def test_varchangeevent (): + e = VarChangeEvent (True) + assert e.get () == True + + # no change at all + w = asyncio.ensure_future (e.wait ()) + finished, pending = await asyncio.wait ([w], timeout=0.1) + assert not finished and pending + + # no change + e.set (True) + finished, pending = await asyncio.wait ([w], timeout=0.1) + assert not finished and pending + + # changed + e.set (False) + await asyncio.sleep (0.1) # XXX: is there a yield() ? + assert w.done () + ret = w.result () + assert ret == False + assert e.get () == ret + -- cgit v1.2.3