diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-07-28 20:25:49 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-08-04 14:11:31 +0200 |
commit | 3deded13df1339ef59a760c188804adffd9ed902 (patch) | |
tree | 5eaf69ee38389073e7323585c6afdbbf5eeab487 /crocoite/cli.py | |
parent | 33a137f2d7c04468038d689b53a70fb534297f55 (diff) | |
download | crocoite-3deded13df1339ef59a760c188804adffd9ed902.tar.gz crocoite-3deded13df1339ef59a760c188804adffd9ed902.tar.bz2 crocoite-3deded13df1339ef59a760c188804adffd9ed902.zip |
Reintroduce WARC logging
Commit 7730e0d64ec895091a0dd7eb0e3c6ce2ed02d981 removed logging to WARC
files. Add it again, but with a different implementation.. Credits to
structlog for inspiration.
Diffstat (limited to 'crocoite/cli.py')
-rw-r--r-- | crocoite/cli.py | 16 |
1 files changed, 8 insertions, 8 deletions
diff --git a/crocoite/cli.py b/crocoite/cli.py index d631f10..8e225d9 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -22,13 +22,14 @@ Command line interface """ -import logging, argparse, json, sys +import argparse, json, sys from . import behavior from .controller import RecursiveController, defaultSettings, \ ControllerSettings, DepthLimit, PrefixLimit, StatsHandler from .browser import NullService, ChromeService from .warc import WarcHandler +from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, WarcHandlerConsumer def parseRecursive (recursive, url): if recursive is None: @@ -42,7 +43,6 @@ def parseRecursive (recursive, url): def main (): parser = argparse.ArgumentParser(description='Save website to WARC using Google Chrome.') - parser.add_argument('--debug', help='Enable debug messages', action='store_true') parser.add_argument('--browser', help='DevTools URL', metavar='URL') parser.add_argument('--recursive', help='Follow links recursively') parser.add_argument('--concurrency', '-j', type=int, default=1) @@ -73,8 +73,7 @@ def main (): recursive=args.recursive, concurrency=args.concurrency) r = result.get () else: - level = logging.DEBUG if args.debug else logging.INFO - logging.basicConfig (level=level) + logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()]) try: recursionPolicy = parseRecursive (args.recursive, args.url) @@ -86,15 +85,16 @@ def main (): settings = ControllerSettings (maxBodySize=args.maxBodySize, logBuffer=args.logBuffer, idleTimeout=args.idleTimeout, timeout=args.timeout) - with open (args.output, 'wb') as fd: - handler = [StatsHandler (), WarcHandler (fd)] + with open (args.output, 'wb') as fd, WarcHandler (fd, logger) as warcHandler: + logger.connect (WarcHandlerConsumer (warcHandler)) + handler = [StatsHandler (), warcHandler] b = list (map (lambda x: behavior.availableMap[x], args.enabledBehaviorNames)) controller = RecursiveController (args.url, fd, settings=settings, recursionPolicy=recursionPolicy, service=service, - handler=handler, behavior=b) + handler=handler, behavior=b, logger=logger) controller.run () r = handler[0].stats - json.dump (r, sys.stdout) + logger.info ('stats', context='cli', uuid='24d92d16-770e-4088-b769-4020e127a7ff', **r) return True |