summaryrefslogtreecommitdiff
path: root/crocoite
diff options
context:
space:
mode:
Diffstat (limited to 'crocoite')
-rw-r--r--crocoite/data/extract-links.js12
-rw-r--r--crocoite/test_behavior.py71
2 files changed, 76 insertions, 7 deletions
diff --git a/crocoite/data/extract-links.js b/crocoite/data/extract-links.js
index 4d1a3d0..483a35f 100644
--- a/crocoite/data/extract-links.js
+++ b/crocoite/data/extract-links.js
@@ -25,12 +25,14 @@ function isClickable (o) {
}
/* --- end copy&paste */
-let x = document.body.querySelectorAll('a[href]');
let ret = [];
-for (let i=0; i < x.length; i++) {
- if (isClickable (x[i])) {
- ret.push (x[i].href);
+['a[href]', 'area[href]'].forEach (function (s) {
+ let x = document.body.querySelectorAll(s);
+ for (let i=0; i < x.length; i++) {
+ if (isClickable (x[i])) {
+ ret.push (x[i].href);
+ }
}
-}
+});
return ret; /* immediately return results, for use with Runtime.evaluate() */
})();
diff --git a/crocoite/test_behavior.py b/crocoite/test_behavior.py
index 0433918..a1eef79 100644
--- a/crocoite/test_behavior.py
+++ b/crocoite/test_behavior.py
@@ -20,14 +20,16 @@
import asyncio, os, yaml, re
from functools import partial
+
import pytest
from yarl import URL
+from aiohttp import web
import pkg_resources
from .logger import Logger
from .devtools import Process
-from .behavior import Scroll, Behavior
-from .controller import SinglePageController
+from .behavior import Scroll, Behavior, ExtractLinks, ExtractLinksEvent
+from .controller import SinglePageController, EventHandler
with pkg_resources.resource_stream (__name__, os.path.join ('data', 'click.yaml')) as fd:
sites = list (yaml.load_all (fd))
@@ -96,3 +98,68 @@ async def test_click_match (match, url):
# keep this aligned with click.js
assert re.match (match, url.host, re.I)
+class ExtractLinksCheck(EventHandler):
+ """ Test adapter that accumulates all incoming links from ExtractLinks """
+ __slots__ = ('links')
+
+ def __init__ (self):
+ super().__init__ ()
+ self.links = []
+
+ def push (self, item):
+ if isinstance (item, ExtractLinksEvent):
+ self.links.extend (item.links)
+
+@pytest.mark.asyncio
+async def test_extract_links ():
+ """
+ Make sure the CSS selector exists on an example url
+ """
+ async def f (req):
+ return web.Response (body="""<html><head></head>
+ <body>
+ <div>
+ <a href="/relative">foo</a>
+ <a href="http://example.com/absolute/">foo</a>
+ <a href="https://example.com/absolute/secure">foo</a>
+ <a href="#anchor">foo</a>
+
+ <a href="/hidden/visibility" style="visibility: hidden">foo</a>
+ <a href="/hidden/display" style="display: none">foo</a>
+ <div style="display: none">
+ <a href="/hidden/display/insidediv">foo</a>
+ </div>
+ <!--<a href="/hidden/comment">foo</a>-->
+
+ <p><img src="shapes.png" usemap="#shapes">
+ <map name="shapes"><area shape=rect coords="50,50,100,100" href="/map/rect"></map></p>
+ </div>
+ </body></html>""", status=200, content_type='text/html', charset='utf-8')
+
+ url = URL.build (scheme='http', host='localhost', port=8080)
+
+ app = web.Application ()
+ app.router.add_route ('GET', '/', f)
+ runner = web.AppRunner(app)
+ await runner.setup()
+ site = web.TCPSite(runner, url.host, url.port)
+ await site.start()
+
+ try:
+ handler = ExtractLinksCheck ()
+ logger = Logger ()
+ controller = SinglePageController (url=url, logger=logger,
+ service=Process (), behavior=[ExtractLinks], handler=[handler])
+ await controller.run ()
+
+ assert sorted (handler.links) == sorted ([
+ url.with_path ('/relative'),
+ url.with_fragment ('anchor'),
+ URL ('http://example.com/absolute/'),
+ URL ('https://example.com/absolute/secure'),
+ url.with_path ('/hidden/visibility'), # XXX: shall we ignore these as well?
+ url.with_path ('/map/rect'),
+ ])
+ finally:
+ await runner.cleanup ()
+