diff options
Diffstat (limited to 'crocoite')
| -rw-r--r-- | crocoite/cli.py | 7 | ||||
| -rw-r--r-- | crocoite/controller.py | 9 | 
2 files changed, 12 insertions, 4 deletions
| diff --git a/crocoite/cli.py b/crocoite/cli.py index d9ebc4d..4e64b97 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -22,7 +22,7 @@  Command line interface  """ -import argparse, sys, signal, asyncio, os +import argparse, sys, signal, asyncio, os, json  from traceback import TracebackException  from enum import IntEnum  from yarl import URL @@ -72,6 +72,8 @@ def single ():              default=list (behavior.availableMap.keys ()),              choices=list (behavior.availableMap.keys ()),              metavar='NAME', nargs='*') +    parser.add_argument('--warcinfo', help='Add extra information to warcinfo record', +            metavar='JSON', type=json.loads)      parser.add_argument('url', help='Website URL', type=URL, metavar='URL')      parser.add_argument('output', help='WARC filename', metavar='FILE') @@ -89,7 +91,8 @@ def single ():          handler = [StatsHandler (), LogHandler (logger), warcHandler]          b = list (map (lambda x: behavior.availableMap[x], args.enabledBehaviorNames))          controller = SinglePageController (url=args.url, settings=settings, -                service=service, handler=handler, behavior=b, logger=logger) +                service=service, handler=handler, behavior=b, logger=logger, +                warcinfo=args.warcinfo)          try:              loop = asyncio.get_event_loop()              run = asyncio.ensure_future (controller.run ()) diff --git a/crocoite/controller.py b/crocoite/controller.py index 772bf44..9105997 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -103,17 +103,20 @@ class SinglePageController:      (stats, warc writer).      """ -    __slots__ = ('url', 'service', 'behavior', 'settings', 'logger', 'handler') +    __slots__ = ('url', 'service', 'behavior', 'settings', 'logger', 'handler', +            'warcinfo')      def __init__ (self, url, logger, \              service, behavior=cbehavior.available, \ -            settings=defaultSettings, handler=None): +            settings=defaultSettings, handler=None, \ +            warcinfo=None):          self.url = url          self.service = service          self.behavior = behavior          self.settings = settings          self.logger = logger.bind (context=type (self).__name__, url=url)          self.handler = handler or [] +        self.warcinfo = warcinfo      def processItem (self, item):          for h in self.handler: @@ -150,6 +153,8 @@ class SinglePageController:                          'behavior': list (map (attrgetter('name'), enabledBehavior)),                          },                      } +            if self.warcinfo: +                payload['extra'] = self.warcinfo              self.processItem (ControllerStart (payload))              await l.navigate (self.url) | 
