diff options
Diffstat (limited to 'crocoite/data/extract-links.js')
-rw-r--r-- | crocoite/data/extract-links.js | 21 |
1 files changed, 18 insertions, 3 deletions
diff --git a/crocoite/data/extract-links.js b/crocoite/data/extract-links.js index 4d1a3d0..5a4f9f0 100644 --- a/crocoite/data/extract-links.js +++ b/crocoite/data/extract-links.js @@ -25,11 +25,26 @@ function isClickable (o) { } /* --- end copy&paste */ -let x = document.body.querySelectorAll('a[href]'); let ret = []; +['a[href]', 'area[href]'].forEach (function (s) { + let x = document.querySelectorAll(s); + for (let i=0; i < x.length; i++) { + if (isClickable (x[i])) { + ret.push (x[i].href); + } + } +}); + +/* If Chrome loads plain-text documents it’ll wrap them into <pre>. Check those + * for links as well, assuming the whole line is a link (i.e. list of links). */ +let x = document.querySelectorAll ('body > pre'); for (let i=0; i < x.length; i++) { - if (isClickable (x[i])) { - ret.push (x[i].href); + if (isVisible (x[i])) { + x[i].innerText.split ('\n').forEach (function (s) { + if (s.match ('^https?://')) { + ret.push (s); + } + }); } } return ret; /* immediately return results, for use with Runtime.evaluate() */ |