From 7730e0d64ec895091a0dd7eb0e3c6ce2ed02d981 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Wed, 20 Jun 2018 11:13:37 +0200 Subject: Synchronous SiteLoader event handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously a browser crash stalled the entire grab, since events from pychrome were handled asynchronously in a different thread and exceptions were not propagated to the main thread. Now all browser events are stored in a queue and processed by the main thread, allowing us to handle browser crashes gracefully (more or less). This made the following additional changes necessary: - Clear separation between producer (browser) and consumer (WARC, stats, …) - Behavior scripts now yield events as well, instead of accessing the WARC writer - WARC logging was removed (for now) and WARC writer does not require serialization any more --- contrib/celerycrocoite.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'contrib') diff --git a/contrib/celerycrocoite.py b/contrib/celerycrocoite.py index 26c35ce..d0a02e9 100644 --- a/contrib/celerycrocoite.py +++ b/contrib/celerycrocoite.py @@ -88,10 +88,9 @@ def checkCompletedJobs (bot, jobs): if Identifier (channel) not in bot.channels: continue try: - result = handle.get (timeout=0.1) - stats = result['stats'] - bot.msg (channel, '{}: {} ({}) finished. {} requests, {} failed, {} received.'.format (user, url, - handle.id, stats['requests'], stats['failed'], + stats = handle.get (timeout=0.1) + bot.msg (channel, '{}: {} ({}) finished. {} crashed, {} requests, {} failed, {} received.'.format (user, url, + handle.id, stats['crashed'], stats['requests'], stats['failed'], prettyBytes (stats['bytesRcv']))) delete.add (handle.id) except celery.exceptions.TimeoutError: @@ -198,7 +197,7 @@ def archive (bot, trigger): logBuffer=defaultSettings.logBuffer, idleTimeout=args.idleTimeout, timeout=args.timeout) args = dict (url=args.url, - enabledBehaviorNames=list (behavior.availableNames-blacklistedBehavior), + enabledBehaviorNames=list (set (behavior.availableMap.keys())-blacklistedBehavior), settings=settings, recursive=args.recursive, concurrency=args.concurrency) q = bot.memory['crocoite']['q'] -- cgit v1.2.3