diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2019-05-04 21:15:20 +0300 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2019-05-05 18:55:40 +0200 |
commit | e21e16d753f6a7ba787fdb3a3cd1ef504de7f69a (patch) | |
tree | 45b0c2dc4aea4ff1934f2a9368ff20801575dcb6 | |
parent | 5ad1cc9ef693e4832fc3be7617efccc782a37e3f (diff) | |
download | crocoite-e21e16d753f6a7ba787fdb3a3cd1ef504de7f69a.tar.gz crocoite-e21e16d753f6a7ba787fdb3a3cd1ef504de7f69a.tar.bz2 crocoite-e21e16d753f6a7ba787fdb3a3cd1ef504de7f69a.zip |
irc: Add job info to warcinfo record
-rw-r--r-- | crocoite/controller.py | 6 | ||||
-rw-r--r-- | crocoite/irc.py | 22 |
2 files changed, 22 insertions, 6 deletions
diff --git a/crocoite/controller.py b/crocoite/controller.py index 9105997..432d434 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -320,7 +320,11 @@ class RecursiveController: logger = self.logger.bind (url=url) def formatCommand (e): - return e.format (url=url, dest=dest.name) + # provide means to disable variable expansion + if e.startswith ('!'): + return e[1:] + else: + return e.format (url=url, dest=dest.name) def formatPrefix (p): return p.format (host=url.host, date=datetime.utcnow ().isoformat ()) diff --git a/crocoite/irc.py b/crocoite/irc.py index 973d7d1..c9b8cd7 100644 --- a/crocoite/irc.py +++ b/crocoite/irc.py @@ -32,6 +32,8 @@ from functools import wraps import bottom import websockets +from .util import StrJsonEncoder + ### helper functions ### def prettyTimeDelta (seconds): """ @@ -445,15 +447,25 @@ class Chromebot (ArgparseBot): logger = self.logger.bind (job=j.id) - cmdline = ['crocoite-recursive', args.url, '--tempdir', self.tempdir, - '--prefix', j.id + '-{host}-{date}-', '--policy', - args.recursive, '--concurrency', str (args.concurrency), - self.destdir] - showargs = { 'recursive': args.recursive, 'concurrency': args.concurrency, } + warcinfo = {'chromebot': { + 'jobid': j.id, + 'user': user.name, + 'queued': j.started, + 'url': args.url, + }} + warcinfo['chromebot'].update (showargs) + # prefix warcinfo with !, so it won’t get expanded + cmdline = ['crocoite-recursive', args.url, '--tempdir', self.tempdir, + '--prefix', j.id + '-{host}-{date}-', '--policy', + args.recursive, '--concurrency', str (args.concurrency), + self.destdir, '--', 'crocoite-grab', '--warcinfo', + '!' + json.dumps (warcinfo, cls=StrJsonEncoder), '{url}', + '{dest}'] + strargs = ', '.join (map (lambda x: '{}={}'.format (*x), showargs.items ())) reply (f'{args.url} has been queued as {j.id} with {strargs}') logger.info ('queue', user=user.name, url=args.url, cmdline=cmdline, |