From ba5dbfd061d328a2140f0a7541ef0fdb6acf5903 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 12 May 2019 15:37:48 +0300 Subject: behavior: Ignore invalid URLs when extracting links Fixes #18. --- crocoite/behavior.py | 9 ++++++++- crocoite/test_behavior.py | 11 ++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/crocoite/behavior.py b/crocoite/behavior.py index dca9ea0..d079603 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -328,6 +328,13 @@ class ExtractLinksEvent: def __repr__ (self): return f'' +def mapOrIgnore (f, l): + for e in l: + try: + yield f (e) + except: + pass + class ExtractLinks (Behavior): """ Extract links from a page using JavaScript @@ -348,7 +355,7 @@ class ExtractLinks (Behavior): tab = self.loader.tab yield self.script result = await tab.Runtime.evaluate (expression=str (self.script), returnByValue=True) - yield ExtractLinksEvent (list (set (map (URL, result['result']['value'])))) + yield ExtractLinksEvent (list (set (mapOrIgnore (URL, result['result']['value'])))) class Crash (Behavior): """ Crash the browser. For testing only. Obviously. """ diff --git a/crocoite/test_behavior.py b/crocoite/test_behavior.py index 7a723c6..9a13c65 100644 --- a/crocoite/test_behavior.py +++ b/crocoite/test_behavior.py @@ -30,7 +30,7 @@ import pkg_resources from .logger import Logger from .devtools import Process from .behavior import Scroll, Behavior, ExtractLinks, ExtractLinksEvent, Crash, \ - Screenshot, ScreenshotEvent, DomSnapshot, DomSnapshotEvent + Screenshot, ScreenshotEvent, DomSnapshot, DomSnapshotEvent, mapOrIgnore from .controller import SinglePageController, EventHandler from .devtools import Crashed @@ -139,6 +139,7 @@ async def test_extract_links (): foo foo foo + foo foo foo @@ -252,3 +253,11 @@ async def test_dom_snapshot (): finally: await runner.cleanup () +def test_mapOrIgnore (): + def fail (x): + if x < 50: + raise Exception () + return x+1 + + assert list (mapOrIgnore (fail, range (100))) == list (range (51, 101)) + -- cgit v1.2.3