diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2019-05-12 15:37:48 +0300 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2019-05-12 15:37:48 +0300 |
commit | ba5dbfd061d328a2140f0a7541ef0fdb6acf5903 (patch) | |
tree | d092aa0e9b401404b6463db22741511165ca1575 | |
parent | 0299acfb6edf7d54ed112834a2b639567f782ab4 (diff) | |
download | crocoite-ba5dbfd061d328a2140f0a7541ef0fdb6acf5903.tar.gz crocoite-ba5dbfd061d328a2140f0a7541ef0fdb6acf5903.tar.bz2 crocoite-ba5dbfd061d328a2140f0a7541ef0fdb6acf5903.zip |
behavior: Ignore invalid URLs when extracting links
Fixes #18.
-rw-r--r-- | crocoite/behavior.py | 9 | ||||
-rw-r--r-- | crocoite/test_behavior.py | 11 |
2 files changed, 18 insertions, 2 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py index dca9ea0..d079603 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -328,6 +328,13 @@ class ExtractLinksEvent: def __repr__ (self): return f'<ExtractLinksEvent {self.links!r}>' +def mapOrIgnore (f, l): + for e in l: + try: + yield f (e) + except: + pass + class ExtractLinks (Behavior): """ Extract links from a page using JavaScript @@ -348,7 +355,7 @@ class ExtractLinks (Behavior): tab = self.loader.tab yield self.script result = await tab.Runtime.evaluate (expression=str (self.script), returnByValue=True) - yield ExtractLinksEvent (list (set (map (URL, result['result']['value'])))) + yield ExtractLinksEvent (list (set (mapOrIgnore (URL, result['result']['value'])))) class Crash (Behavior): """ Crash the browser. For testing only. Obviously. """ diff --git a/crocoite/test_behavior.py b/crocoite/test_behavior.py index 7a723c6..9a13c65 100644 --- a/crocoite/test_behavior.py +++ b/crocoite/test_behavior.py @@ -30,7 +30,7 @@ import pkg_resources from .logger import Logger from .devtools import Process from .behavior import Scroll, Behavior, ExtractLinks, ExtractLinksEvent, Crash, \ - Screenshot, ScreenshotEvent, DomSnapshot, DomSnapshotEvent + Screenshot, ScreenshotEvent, DomSnapshot, DomSnapshotEvent, mapOrIgnore from .controller import SinglePageController, EventHandler from .devtools import Crashed @@ -139,6 +139,7 @@ async def test_extract_links (): <a href="http://example.com/absolute/">foo</a> <a href="https://example.com/absolute/secure">foo</a> <a href="#anchor">foo</a> + <a href="http://neue_preise_f%c3%bcr_zahnimplantate_k%c3%b6nnten_sie_%c3%bcberraschen">foo</a> <a href="/hidden/visibility" style="visibility: hidden">foo</a> <a href="/hidden/display" style="display: none">foo</a> @@ -252,3 +253,11 @@ async def test_dom_snapshot (): finally: await runner.cleanup () +def test_mapOrIgnore (): + def fail (x): + if x < 50: + raise Exception () + return x+1 + + assert list (mapOrIgnore (fail, range (100))) == list (range (51, 101)) + |