From 5ad1cc9ef693e4832fc3be7617efccc782a37e3f Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 4 May 2019 19:05:54 +0300 Subject: cli: Allow adding extra data to warcinfo record --- crocoite/cli.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'crocoite/cli.py') diff --git a/crocoite/cli.py b/crocoite/cli.py index d9ebc4d..4e64b97 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -22,7 +22,7 @@ Command line interface """ -import argparse, sys, signal, asyncio, os +import argparse, sys, signal, asyncio, os, json from traceback import TracebackException from enum import IntEnum from yarl import URL @@ -72,6 +72,8 @@ def single (): default=list (behavior.availableMap.keys ()), choices=list (behavior.availableMap.keys ()), metavar='NAME', nargs='*') + parser.add_argument('--warcinfo', help='Add extra information to warcinfo record', + metavar='JSON', type=json.loads) parser.add_argument('url', help='Website URL', type=URL, metavar='URL') parser.add_argument('output', help='WARC filename', metavar='FILE') @@ -89,7 +91,8 @@ def single (): handler = [StatsHandler (), LogHandler (logger), warcHandler] b = list (map (lambda x: behavior.availableMap[x], args.enabledBehaviorNames)) controller = SinglePageController (url=args.url, settings=settings, - service=service, handler=handler, behavior=b, logger=logger) + service=service, handler=handler, behavior=b, logger=logger, + warcinfo=args.warcinfo) try: loop = asyncio.get_event_loop() run = asyncio.ensure_future (controller.run ()) -- cgit v1.2.3