From fd442b1f8ddc6f9f91aae902cf7018567e818a61 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 5 May 2018 10:42:44 +0200 Subject: Extract only visible and clickable links --- crocoite/controller.py | 2 +- crocoite/data/extract-links.js | 31 ++++++++++++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'crocoite') diff --git a/crocoite/controller.py b/crocoite/controller.py index 113c139..bc6f948 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -67,7 +67,7 @@ class SinglePageController: self.logger = logger def run (self): - ret = {'stats': None, 'links': None} + ret = {'stats': None, 'links': []} with self.service as browser: browser = pychrome.Browser (url=browser) diff --git a/crocoite/data/extract-links.js b/crocoite/data/extract-links.js index f2a37aa..5199a63 100644 --- a/crocoite/data/extract-links.js +++ b/crocoite/data/extract-links.js @@ -1,9 +1,34 @@ /* Extract links from a page */ + +/* --- copy&paste from click.js --- */ +/* Element is visible if itself and all of its parents are + */ +function isVisible (o) { + if (o === null || !(o instanceof Element)) { + return true; + } + let style = window.getComputedStyle (o); + if ('parentNode' in o) { + return style.display !== 'none' && isVisible (o.parentNode); + } else { + return style.display !== 'none'; + } +} + +/* Elements are considered clickable if they are a) visible and b) not + * disabled + */ +function isClickable (o) { + return !o.hasAttribute ('disabled') && isVisible (o); +} +/* --- end copy&paste */ + let x = document.body.querySelectorAll('a[href]'); let ret = []; -let index = 0; -for( index=0; index < x.length; index++ ) { - ret.push (x[index].href); +for (let i=0; i < x.length; i++) { + if (isClickable (x[i])) { + ret.push (x[i].href); + } } ret; /* immediately return results, for use with Runtime.evaluate() */ -- cgit v1.2.3