diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-05-05 10:42:44 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-05-05 10:42:44 +0200 |
commit | fd442b1f8ddc6f9f91aae902cf7018567e818a61 (patch) | |
tree | 7d64a3d2602cdabc51dfa1596eb8f3f65b351659 /crocoite | |
parent | f3a7115648be5d60b6afb74ca62370f347e10df2 (diff) | |
download | crocoite-fd442b1f8ddc6f9f91aae902cf7018567e818a61.tar.gz crocoite-fd442b1f8ddc6f9f91aae902cf7018567e818a61.tar.bz2 crocoite-fd442b1f8ddc6f9f91aae902cf7018567e818a61.zip |
Extract only visible and clickable links
Diffstat (limited to 'crocoite')
-rw-r--r-- | crocoite/controller.py | 2 | ||||
-rw-r--r-- | crocoite/data/extract-links.js | 31 |
2 files changed, 29 insertions, 4 deletions
diff --git a/crocoite/controller.py b/crocoite/controller.py index 113c139..bc6f948 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -67,7 +67,7 @@ class SinglePageController: self.logger = logger def run (self): - ret = {'stats': None, 'links': None} + ret = {'stats': None, 'links': []} with self.service as browser: browser = pychrome.Browser (url=browser) diff --git a/crocoite/data/extract-links.js b/crocoite/data/extract-links.js index f2a37aa..5199a63 100644 --- a/crocoite/data/extract-links.js +++ b/crocoite/data/extract-links.js @@ -1,9 +1,34 @@ /* Extract links from a page */ + +/* --- copy&paste from click.js --- */ +/* Element is visible if itself and all of its parents are + */ +function isVisible (o) { + if (o === null || !(o instanceof Element)) { + return true; + } + let style = window.getComputedStyle (o); + if ('parentNode' in o) { + return style.display !== 'none' && isVisible (o.parentNode); + } else { + return style.display !== 'none'; + } +} + +/* Elements are considered clickable if they are a) visible and b) not + * disabled + */ +function isClickable (o) { + return !o.hasAttribute ('disabled') && isVisible (o); +} +/* --- end copy&paste */ + let x = document.body.querySelectorAll('a[href]'); let ret = []; -let index = 0; -for( index=0; index < x.length; index++ ) { - ret.push (x[index].href); +for (let i=0; i < x.length; i++) { + if (isClickable (x[i])) { + ret.push (x[i].href); + } } ret; /* immediately return results, for use with Runtime.evaluate() */ |