From 3deded13df1339ef59a760c188804adffd9ed902 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 28 Jul 2018 20:25:49 +0200 Subject: Reintroduce WARC logging Commit 7730e0d64ec895091a0dd7eb0e3c6ce2ed02d981 removed logging to WARC files. Add it again, but with a different implementation.. Credits to structlog for inspiration. --- crocoite/task.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'crocoite/task.py') diff --git a/crocoite/task.py b/crocoite/task.py index 6b3c9d1..06dd022 100644 --- a/crocoite/task.py +++ b/crocoite/task.py @@ -22,7 +22,7 @@ Celery distributed tasks """ -import os, logging +import os from urllib.parse import urlsplit from datetime import datetime @@ -113,8 +113,8 @@ class DistributedRecursiveController (RecursiveController): __slots__ = ('concurrency', 'stats') - def __init__ (self, url, service=ChromeService (), behavior=behavior.available, \ - logger=logging.getLogger(__name__), settings=defaultSettings, + def __init__ (self, url, logger, service=ChromeService (), behavior=behavior.available, \ + settings=defaultSettings, recursionPolicy=DepthLimit (0), concurrency=1): super ().__init__ (url, None, service, behavior, logger, settings, recursionPolicy) self.concurrency = concurrency @@ -134,10 +134,11 @@ class DistributedRecursiveController (RecursiveController): def controller (self, url, settings, enabledBehaviorNames, recursive, concurrency): """ Recursive controller """ + logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()]) recursionPolicy = parseRecursive (recursive, url) enabledBehavior = list (filter (lambda x: x.name in enabledBehaviorNames, behavior.available)) settings = ControllerSettings (**settings) - c = DistributedRecursiveController (url, None, behavior=enabledBehavior, + c = DistributedRecursiveController (url, None, logger=logger, behavior=enabledBehavior, settings=settings, recursionPolicy=recursionPolicy, concurrency=concurrency) c.run () return dict (c.stats) -- cgit v1.2.3