summaryrefslogtreecommitdiff
path: root/crocoite/behavior.py
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-05-01 10:37:08 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-05-04 16:00:05 +0200
commiteb818f0c6eb86461a0db1845876f2a0b39b99b7f (patch)
tree2b25cb248e4a2aca0577a336f48ae112c7e54408 /crocoite/behavior.py
parentd8193765df85293f825abc486ac5cb1f5ac0a028 (diff)
downloadcrocoite-eb818f0c6eb86461a0db1845876f2a0b39b99b7f.tar.gz
crocoite-eb818f0c6eb86461a0db1845876f2a0b39b99b7f.tar.bz2
crocoite-eb818f0c6eb86461a0db1845876f2a0b39b99b7f.zip
behavior: Add link extraction script
Diffstat (limited to 'crocoite/behavior.py')
-rw-r--r--crocoite/behavior.py22
1 files changed, 20 insertions, 2 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py
index f6dfd3f..c658699 100644
--- a/crocoite/behavior.py
+++ b/crocoite/behavior.py
@@ -238,11 +238,29 @@ class Click (JsOnload):
name = 'click'
scriptPath = 'click.js'
-### Site-specific scripts ###
+class ExtractLinks (Behavior):
+ """
+ Extract links from a page using JavaScript
+
+ We could retrieve a HTML snapshot and extract links here, but we’d have to
+ manually resolve relative links.
+ """
+
+ name = 'extractLinks'
+
+ def __init__ (self, loader):
+ super ().__init__ (loader)
+ self.script = self.loadScript ('extract-links.js')
+ self.links = None
+
+ def onfinish (self):
+ tab = self.loader.tab
+ self.useScript (self.script)
+ self.links = list (set (tab.Runtime.evaluate (expression=self.script, returnByValue=True)['result']['value']))
# available behavior scripts. Order matters, move those modifying the page
# towards the end of available
-generic = [Scroll, EmulateScreenMetrics, Click]
+generic = [Scroll, EmulateScreenMetrics, Click, ExtractLinks]
perSite = []
available = generic + perSite + [Screenshot, DomSnapshot]
availableNames = set (map (lambda x: x.name, available))