From e21e16d753f6a7ba787fdb3a3cd1ef504de7f69a Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 4 May 2019 21:15:20 +0300 Subject: irc: Add job info to warcinfo record --- crocoite/controller.py | 6 +++++- crocoite/irc.py | 22 +++++++++++++++++----- 2 files changed, 22 insertions(+), 6 deletions(-) (limited to 'crocoite') diff --git a/crocoite/controller.py b/crocoite/controller.py index 9105997..432d434 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -320,7 +320,11 @@ class RecursiveController: logger = self.logger.bind (url=url) def formatCommand (e): - return e.format (url=url, dest=dest.name) + # provide means to disable variable expansion + if e.startswith ('!'): + return e[1:] + else: + return e.format (url=url, dest=dest.name) def formatPrefix (p): return p.format (host=url.host, date=datetime.utcnow ().isoformat ()) diff --git a/crocoite/irc.py b/crocoite/irc.py index 973d7d1..c9b8cd7 100644 --- a/crocoite/irc.py +++ b/crocoite/irc.py @@ -32,6 +32,8 @@ from functools import wraps import bottom import websockets +from .util import StrJsonEncoder + ### helper functions ### def prettyTimeDelta (seconds): """ @@ -445,15 +447,25 @@ class Chromebot (ArgparseBot): logger = self.logger.bind (job=j.id) - cmdline = ['crocoite-recursive', args.url, '--tempdir', self.tempdir, - '--prefix', j.id + '-{host}-{date}-', '--policy', - args.recursive, '--concurrency', str (args.concurrency), - self.destdir] - showargs = { 'recursive': args.recursive, 'concurrency': args.concurrency, } + warcinfo = {'chromebot': { + 'jobid': j.id, + 'user': user.name, + 'queued': j.started, + 'url': args.url, + }} + warcinfo['chromebot'].update (showargs) + # prefix warcinfo with !, so it won’t get expanded + cmdline = ['crocoite-recursive', args.url, '--tempdir', self.tempdir, + '--prefix', j.id + '-{host}-{date}-', '--policy', + args.recursive, '--concurrency', str (args.concurrency), + self.destdir, '--', 'crocoite-grab', '--warcinfo', + '!' + json.dumps (warcinfo, cls=StrJsonEncoder), '{url}', + '{dest}'] + strargs = ', '.join (map (lambda x: '{}={}'.format (*x), showargs.items ())) reply (f'{args.url} has been queued as {j.id} with {strargs}') logger.info ('queue', user=user.name, url=args.url, cmdline=cmdline, -- cgit v1.2.3