diff options
-rw-r--r-- | crocoite/data/extract-links.js | 12 | ||||
-rw-r--r-- | crocoite/test_behavior.py | 71 |
2 files changed, 76 insertions, 7 deletions
diff --git a/crocoite/data/extract-links.js b/crocoite/data/extract-links.js index 4d1a3d0..483a35f 100644 --- a/crocoite/data/extract-links.js +++ b/crocoite/data/extract-links.js @@ -25,12 +25,14 @@ function isClickable (o) { } /* --- end copy&paste */ -let x = document.body.querySelectorAll('a[href]'); let ret = []; -for (let i=0; i < x.length; i++) { - if (isClickable (x[i])) { - ret.push (x[i].href); +['a[href]', 'area[href]'].forEach (function (s) { + let x = document.body.querySelectorAll(s); + for (let i=0; i < x.length; i++) { + if (isClickable (x[i])) { + ret.push (x[i].href); + } } -} +}); return ret; /* immediately return results, for use with Runtime.evaluate() */ })(); diff --git a/crocoite/test_behavior.py b/crocoite/test_behavior.py index 0433918..a1eef79 100644 --- a/crocoite/test_behavior.py +++ b/crocoite/test_behavior.py @@ -20,14 +20,16 @@ import asyncio, os, yaml, re from functools import partial + import pytest from yarl import URL +from aiohttp import web import pkg_resources from .logger import Logger from .devtools import Process -from .behavior import Scroll, Behavior -from .controller import SinglePageController +from .behavior import Scroll, Behavior, ExtractLinks, ExtractLinksEvent +from .controller import SinglePageController, EventHandler with pkg_resources.resource_stream (__name__, os.path.join ('data', 'click.yaml')) as fd: sites = list (yaml.load_all (fd)) @@ -96,3 +98,68 @@ async def test_click_match (match, url): # keep this aligned with click.js assert re.match (match, url.host, re.I) +class ExtractLinksCheck(EventHandler): + """ Test adapter that accumulates all incoming links from ExtractLinks """ + __slots__ = ('links') + + def __init__ (self): + super().__init__ () + self.links = [] + + def push (self, item): + if isinstance (item, ExtractLinksEvent): + self.links.extend (item.links) + +@pytest.mark.asyncio +async def test_extract_links (): + """ + Make sure the CSS selector exists on an example url + """ + async def f (req): + return web.Response (body="""<html><head></head> + <body> + <div> + <a href="/relative">foo</a> + <a href="http://example.com/absolute/">foo</a> + <a href="https://example.com/absolute/secure">foo</a> + <a href="#anchor">foo</a> + + <a href="/hidden/visibility" style="visibility: hidden">foo</a> + <a href="/hidden/display" style="display: none">foo</a> + <div style="display: none"> + <a href="/hidden/display/insidediv">foo</a> + </div> + <!--<a href="/hidden/comment">foo</a>--> + + <p><img src="shapes.png" usemap="#shapes"> + <map name="shapes"><area shape=rect coords="50,50,100,100" href="/map/rect"></map></p> + </div> + </body></html>""", status=200, content_type='text/html', charset='utf-8') + + url = URL.build (scheme='http', host='localhost', port=8080) + + app = web.Application () + app.router.add_route ('GET', '/', f) + runner = web.AppRunner(app) + await runner.setup() + site = web.TCPSite(runner, url.host, url.port) + await site.start() + + try: + handler = ExtractLinksCheck () + logger = Logger () + controller = SinglePageController (url=url, logger=logger, + service=Process (), behavior=[ExtractLinks], handler=[handler]) + await controller.run () + + assert sorted (handler.links) == sorted ([ + url.with_path ('/relative'), + url.with_fragment ('anchor'), + URL ('http://example.com/absolute/'), + URL ('https://example.com/absolute/secure'), + url.with_path ('/hidden/visibility'), # XXX: shall we ignore these as well? + url.with_path ('/map/rect'), + ]) + finally: + await runner.cleanup () + |