From eb818f0c6eb86461a0db1845876f2a0b39b99b7f Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Tue, 1 May 2018 10:37:08 +0200 Subject: behavior: Add link extraction script --- crocoite/behavior.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'crocoite/behavior.py') diff --git a/crocoite/behavior.py b/crocoite/behavior.py index f6dfd3f..c658699 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -238,11 +238,29 @@ class Click (JsOnload): name = 'click' scriptPath = 'click.js' -### Site-specific scripts ### +class ExtractLinks (Behavior): + """ + Extract links from a page using JavaScript + + We could retrieve a HTML snapshot and extract links here, but we’d have to + manually resolve relative links. + """ + + name = 'extractLinks' + + def __init__ (self, loader): + super ().__init__ (loader) + self.script = self.loadScript ('extract-links.js') + self.links = None + + def onfinish (self): + tab = self.loader.tab + self.useScript (self.script) + self.links = list (set (tab.Runtime.evaluate (expression=self.script, returnByValue=True)['result']['value'])) # available behavior scripts. Order matters, move those modifying the page # towards the end of available -generic = [Scroll, EmulateScreenMetrics, Click] +generic = [Scroll, EmulateScreenMetrics, Click, ExtractLinks] perSite = [] available = generic + perSite + [Screenshot, DomSnapshot] availableNames = set (map (lambda x: x.name, available)) -- cgit v1.2.3