summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2019-05-04 21:15:20 +0300
committerLars-Dominik Braun <lars@6xq.net>2019-05-05 18:55:40 +0200
commite21e16d753f6a7ba787fdb3a3cd1ef504de7f69a (patch)
tree45b0c2dc4aea4ff1934f2a9368ff20801575dcb6
parent5ad1cc9ef693e4832fc3be7617efccc782a37e3f (diff)
downloadcrocoite-e21e16d753f6a7ba787fdb3a3cd1ef504de7f69a.tar.gz
crocoite-e21e16d753f6a7ba787fdb3a3cd1ef504de7f69a.tar.bz2
crocoite-e21e16d753f6a7ba787fdb3a3cd1ef504de7f69a.zip
irc: Add job info to warcinfo record
-rw-r--r--crocoite/controller.py6
-rw-r--r--crocoite/irc.py22
2 files changed, 22 insertions, 6 deletions
diff --git a/crocoite/controller.py b/crocoite/controller.py
index 9105997..432d434 100644
--- a/crocoite/controller.py
+++ b/crocoite/controller.py
@@ -320,7 +320,11 @@ class RecursiveController:
logger = self.logger.bind (url=url)
def formatCommand (e):
- return e.format (url=url, dest=dest.name)
+ # provide means to disable variable expansion
+ if e.startswith ('!'):
+ return e[1:]
+ else:
+ return e.format (url=url, dest=dest.name)
def formatPrefix (p):
return p.format (host=url.host, date=datetime.utcnow ().isoformat ())
diff --git a/crocoite/irc.py b/crocoite/irc.py
index 973d7d1..c9b8cd7 100644
--- a/crocoite/irc.py
+++ b/crocoite/irc.py
@@ -32,6 +32,8 @@ from functools import wraps
import bottom
import websockets
+from .util import StrJsonEncoder
+
### helper functions ###
def prettyTimeDelta (seconds):
"""
@@ -445,15 +447,25 @@ class Chromebot (ArgparseBot):
logger = self.logger.bind (job=j.id)
- cmdline = ['crocoite-recursive', args.url, '--tempdir', self.tempdir,
- '--prefix', j.id + '-{host}-{date}-', '--policy',
- args.recursive, '--concurrency', str (args.concurrency),
- self.destdir]
-
showargs = {
'recursive': args.recursive,
'concurrency': args.concurrency,
}
+ warcinfo = {'chromebot': {
+ 'jobid': j.id,
+ 'user': user.name,
+ 'queued': j.started,
+ 'url': args.url,
+ }}
+ warcinfo['chromebot'].update (showargs)
+ # prefix warcinfo with !, so it won’t get expanded
+ cmdline = ['crocoite-recursive', args.url, '--tempdir', self.tempdir,
+ '--prefix', j.id + '-{host}-{date}-', '--policy',
+ args.recursive, '--concurrency', str (args.concurrency),
+ self.destdir, '--', 'crocoite-grab', '--warcinfo',
+ '!' + json.dumps (warcinfo, cls=StrJsonEncoder), '{url}',
+ '{dest}']
+
strargs = ', '.join (map (lambda x: '{}={}'.format (*x), showargs.items ()))
reply (f'{args.url} has been queued as {j.id} with {strargs}')
logger.info ('queue', user=user.name, url=args.url, cmdline=cmdline,