From 3deded13df1339ef59a760c188804adffd9ed902 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 28 Jul 2018 20:25:49 +0200 Subject: Reintroduce WARC logging Commit 7730e0d64ec895091a0dd7eb0e3c6ce2ed02d981 removed logging to WARC files. Add it again, but with a different implementation.. Credits to structlog for inspiration. --- crocoite/cli.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'crocoite/cli.py') diff --git a/crocoite/cli.py b/crocoite/cli.py index d631f10..8e225d9 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -22,13 +22,14 @@ Command line interface """ -import logging, argparse, json, sys +import argparse, json, sys from . import behavior from .controller import RecursiveController, defaultSettings, \ ControllerSettings, DepthLimit, PrefixLimit, StatsHandler from .browser import NullService, ChromeService from .warc import WarcHandler +from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, WarcHandlerConsumer def parseRecursive (recursive, url): if recursive is None: @@ -42,7 +43,6 @@ def parseRecursive (recursive, url): def main (): parser = argparse.ArgumentParser(description='Save website to WARC using Google Chrome.') - parser.add_argument('--debug', help='Enable debug messages', action='store_true') parser.add_argument('--browser', help='DevTools URL', metavar='URL') parser.add_argument('--recursive', help='Follow links recursively') parser.add_argument('--concurrency', '-j', type=int, default=1) @@ -73,8 +73,7 @@ def main (): recursive=args.recursive, concurrency=args.concurrency) r = result.get () else: - level = logging.DEBUG if args.debug else logging.INFO - logging.basicConfig (level=level) + logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()]) try: recursionPolicy = parseRecursive (args.recursive, args.url) @@ -86,15 +85,16 @@ def main (): settings = ControllerSettings (maxBodySize=args.maxBodySize, logBuffer=args.logBuffer, idleTimeout=args.idleTimeout, timeout=args.timeout) - with open (args.output, 'wb') as fd: - handler = [StatsHandler (), WarcHandler (fd)] + with open (args.output, 'wb') as fd, WarcHandler (fd, logger) as warcHandler: + logger.connect (WarcHandlerConsumer (warcHandler)) + handler = [StatsHandler (), warcHandler] b = list (map (lambda x: behavior.availableMap[x], args.enabledBehaviorNames)) controller = RecursiveController (args.url, fd, settings=settings, recursionPolicy=recursionPolicy, service=service, - handler=handler, behavior=b) + handler=handler, behavior=b, logger=logger) controller.run () r = handler[0].stats - json.dump (r, sys.stdout) + logger.info ('stats', context='cli', uuid='24d92d16-770e-4088-b769-4020e127a7ff', **r) return True -- cgit v1.2.3