From 070bf5b2196955e6447869c23147422b4c64ffd6 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 24 Nov 2018 12:57:03 +0100 Subject: behavior: Fix scrolling - Introduce stop() method callable from Python. Looks like the old method (global variable) was not working (any more?). This is much better anyway. - Restore state of scrolled elements (not window). Fixes weird screenshots of twitter.com. --- crocoite/behavior.py | 34 +++++++++++++--------------------- crocoite/controller.py | 4 ++-- crocoite/data/click.js | 9 +++++++-- crocoite/data/scroll.js | 44 +++++++++++++++++++++++++++----------------- 4 files changed, 49 insertions(+), 42 deletions(-) diff --git a/crocoite/behavior.py b/crocoite/behavior.py index 1a4aac1..d5c82a0 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -79,7 +79,7 @@ class Behavior: return ''.format (self.name) async def onload (self): - """ Before loading the page """ + """ After loading the page started """ # this is a dirty hack to make this function an async generator return yield @@ -107,23 +107,29 @@ class HostnameFilter: class JsOnload (Behavior): """ Execute JavaScript on page load """ - __slots__ = ('script', 'scriptHandle') + __slots__ = ('script', 'context') scriptPath = None def __init__ (self, loader, logger): super ().__init__ (loader, logger) self.script = Script (self.scriptPath) - self.scriptHandle = None + self.context = None async def onload (self): + tab = self.loader.tab yield self.script - result = await self.loader.tab.Page.addScriptToEvaluateOnNewDocument (source=str (self.script)) - self.scriptHandle = result['identifier'] + result = await tab.Runtime.evaluate (expression=str (self.script)) + result = result['result'] + assert result['type'] == 'object' + assert result.get ('subtype') != 'error' + self.context = result['objectId'] async def onstop (self): - if self.scriptHandle: - await self.loader.tab.Page.removeScriptToEvaluateOnNewDocument (identifier=self.scriptHandle) + tab = self.loader.tab + assert self.context is not None + await tab.Runtime.callFunctionOn (functionDeclaration='function(){return this.stop();}', objectId=self.context) + await tab.Runtime.releaseObject (objectId=self.context) return yield @@ -135,20 +141,6 @@ class Scroll (JsOnload): name = 'scroll' scriptPath = 'scroll.js' - def __init__ (self, loader, logger): - super ().__init__ (loader, logger) - stopVarname = '__' + __package__ + '_stop__' - newStopVarname = randomString () - self.script.data = self.script.data.replace (stopVarname, newStopVarname) - self.stopVarname = newStopVarname - - async def onstop (self): - super ().onstop () - # removing the script does not stop it if running - script = Script.fromStr ('{} = true; window.scrollTo (0, 0);'.format (self.stopVarname)) - yield script - await self.loader.tab.Runtime.evaluate (expression=str (script), returnByValue=True) - class EmulateScreenMetrics (Behavior): name = 'emulateScreenMetrics' diff --git a/crocoite/controller.py b/crocoite/controller.py index dd47776..4d95b09 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -167,10 +167,10 @@ class SinglePageController: enabledBehavior = list (filter (lambda x: self.url in x, map (lambda x: x (l, logger), self.behavior))) + await l.start () for b in enabledBehavior: async for item in b.onload (): self.processItem (item) - await l.start () # wait until the browser has a) been idle for at least # settings.idleTimeout or b) settings.timeout is exceeded @@ -197,11 +197,11 @@ class SinglePageController: idleTimeout = self.settings.idleTimeout else: idleTimeout = None - await l.tab.Page.stopLoading () for b in enabledBehavior: async for item in b.onstop (): self.processItem (item) + await l.tab.Page.stopLoading () await asyncio.sleep (1) diff --git a/crocoite/data/click.js b/crocoite/data/click.js index 88c8f24..b098810 100644 --- a/crocoite/data/click.js +++ b/crocoite/data/click.js @@ -165,5 +165,10 @@ function discover () { } /* XXX: can we use a mutation observer instead? */ -window.setInterval (discover, discoverInterval); -}()); +let interval = window.setInterval (discover, discoverInterval); + +function stop() { + window.clearInterval (interval); +} +return {'stop': stop}; +}()) diff --git a/crocoite/data/scroll.js b/crocoite/data/scroll.js index 13e856d..e1fbbcb 100644 --- a/crocoite/data/scroll.js +++ b/crocoite/data/scroll.js @@ -1,23 +1,33 @@ /* Continuously scrolls the page */ -var __crocoite_stop__ = false; (function(){ -function scroll (event) { - if (__crocoite_stop__) { - return false; - } else { - window.scrollBy (0, window.innerHeight/2); - document.querySelectorAll ('*').forEach ( - function (d) { - if (d.clientHeight < d.scrollHeight) { - d.scrollBy (0, d.clientHeight/2); - } - }); - return true; +let scrolled = new Map (); +let interval = null; +function stop() { + window.clearInterval (interval); + window.scrollTo (0, 0); + scrolled.forEach (function (value, key, map) { + key.scrollTop = value; + }); +} +/* save initial scroll state */ +function save(obj) { + if (!scrolled.has (obj)) { + scrolled.set (obj, obj.scrollTop); } } -function onload (event) { - window.setInterval (scroll, 200); +/* perform a single scroll step */ +function scroll (event) { + window.scrollBy (0, window.innerHeight/2); + document.querySelectorAll ('*').forEach ( + function (d) { + if (d.scrollHeight-d.scrollTop > d.clientHeight) { + save (d); + d.scrollBy (0, d.clientHeight/2); + } + }); + return true; } -document.addEventListener("DOMContentLoaded", onload); -}()); +interval = window.setInterval (scroll, 200); +return {'stop': stop}; +}()) -- cgit v1.2.3