diff options
Diffstat (limited to 'contrib')
-rw-r--r-- | contrib/celerycrocoite.py | 231 | ||||
-rw-r--r-- | contrib/chromebot.json | 16 | ||||
-rw-r--r-- | contrib/dashboard.css | 7 | ||||
-rw-r--r-- | contrib/dashboard.html | 23 | ||||
-rw-r--r-- | contrib/dashboard.js | 129 |
5 files changed, 175 insertions, 231 deletions
diff --git a/contrib/celerycrocoite.py b/contrib/celerycrocoite.py deleted file mode 100644 index 26c35ce..0000000 --- a/contrib/celerycrocoite.py +++ /dev/null @@ -1,231 +0,0 @@ -# Copyright (c) 2017 crocoite contributors -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -""" -Module for Sopel IRC bot -""" - -import os, logging, argparse -from sopel.module import nickname_commands, require_chanmsg, thread, example, require_privilege, VOICE -from sopel.tools import Identifier, SopelMemory -import celery, celery.exceptions -from celery.result import AsyncResult -from urllib.parse import urlsplit -from threading import Thread -from queue import Queue -import queue - -from crocoite import behavior, task -from crocoite.controller import defaultSettings - -def prettyTimeDelta (seconds): - """ - Pretty-print seconds to human readable string 1d 1h 1m 1s - """ - seconds = int(seconds) - days, seconds = divmod(seconds, 86400) - hours, seconds = divmod(seconds, 3600) - minutes, seconds = divmod(seconds, 60) - s = [(days, 'd'), (hours, 'h'), (minutes, 'm'), (seconds, 's')] - s = filter (lambda x: x[0] != 0, s) - return ' '.join (map (lambda x: '{}{}'.format (*x), s)) - -def prettyBytes (b): - """ - Pretty-print bytes - """ - prefixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB'] - while b >= 1024 and len (prefixes) > 1: - b /= 1024 - prefixes.pop (0) - return '{:.1f} {}'.format (b, prefixes[0]) - -def setup (bot): - m = bot.memory['crocoite'] = {} - q = m['q'] = Queue () - t = m['t'] = Thread (target=celeryWorker, args=(bot, q)) - t.start () - -def shutdown (bot): - m = bot.memory['crocoite'] - q = m['q'] - t = m['t'] - q.put_nowait (None) - t.join () - -def isValidUrl (s): - url = urlsplit (s) - if url.scheme and url.netloc and url.scheme in {'http', 'https'}: - return s - raise TypeError () - -def checkCompletedJobs (bot, jobs): - delete = set () - for i, data in jobs.items (): - handle = data['handle'] - trigger = data['trigger'] - args = data['args'] - url = args['url'] - channel = trigger.sender - user = trigger.nick - if Identifier (channel) not in bot.channels: - continue - try: - result = handle.get (timeout=0.1) - stats = result['stats'] - bot.msg (channel, '{}: {} ({}) finished. {} requests, {} failed, {} received.'.format (user, url, - handle.id, stats['requests'], stats['failed'], - prettyBytes (stats['bytesRcv']))) - delete.add (handle.id) - except celery.exceptions.TimeoutError: - pass - except Exception as e: - # json serialization does not work well with exceptions. If their class - # names are unique we can still distinguish them. - ename = type (e).__name__ - if ename == 'TaskRevokedError': - bot.msg (channel, '{}: {} ({}) was revoked'.format (user, url, handle.id)) - else: - bot.msg (channel, '{} ({}) failed'.format (user, url, handle.id)) - logging.exception ('{} ({}) failed'.format (url, handle.id)) - delete.add (handle.id) - for d in delete: - del jobs[d] - -def celeryWorker (bot, q): - """ - Serialize celery operations in a single thread. This is a workaround for - https://github.com/celery/celery/issues/4480 - """ - - jobs = {} - - while True: - try: - item = q.get (timeout=1) - except queue.Empty: - checkCompletedJobs (bot, jobs) - continue - - if item is None: - break - action, trigger, args = item - if action == 'a': - handle = task.controller.delay (**args) - j = jobs[handle.id] = {'handle': handle, 'trigger': trigger, 'args': args} - - # pretty-print a few selected args - showargs = { - 'idleTimeout': prettyTimeDelta (args['settings']['idleTimeout']), - 'timeout': prettyTimeDelta (args['settings']['timeout']), - 'maxBodySize': prettyBytes (args['settings']['maxBodySize']), - 'recursive': args['recursive'], - 'concurrency': args['concurrency'], - } - strargs = ', '.join (map (lambda x: '{}={}'.format (*x), showargs.items ())) - bot.msg (trigger.sender, '{}: {} has been queued as {} with {}'.format (trigger.nick, args['url'], handle.id, strargs)) - elif action == 'status': - if args and args in jobs: - j = jobs[args] - jtrigger = j['trigger'] - handle = j['handle'] - bot.msg (trigger.sender, '{}: {}, queued {}, by {}'.format (handle.id, - handle.status, jtrigger.time, jtrigger.nick)) - else: - bot.msg (trigger.sender, "Job not found.") - elif action == 'revoke': - if args and args in jobs: - j = jobs[args] - handle = j['handle'] - handle.revoke (terminate=True) - # response is handled above - else: - bot.msg (trigger.sender, "Job not found.") - q.task_done () - -class NonExitingArgumentParser (argparse.ArgumentParser): - def exit (self, status=0, message=None): - # should never be called - pass - - def error (self, message): - raise Exception (message) - -archiveparser = NonExitingArgumentParser (prog='a', add_help=False) -archiveparser.add_argument('--timeout', default=1*60*60, type=int, help='Maximum time for archival', metavar='SEC', choices=[60, 1*60*60, 2*60*60]) -archiveparser.add_argument('--idle-timeout', default=10, type=int, help='Maximum idle seconds (i.e. no requests)', dest='idleTimeout', metavar='SEC', choices=[1, 10, 20, 30, 60]) -archiveparser.add_argument('--max-body-size', default=defaultSettings.maxBodySize, type=int, dest='maxBodySize', help='Max body size', metavar='BYTES', choices=[1*1024*1024, 10*1024*1024, defaultSettings.maxBodySize, 100*1024*1024]) -archiveparser.add_argument('--concurrency', default=1, type=int, help='Parallel workers for this job', choices=range (9)) -archiveparser.add_argument('--recursive', help='Enable recursion', choices=['0', '1', '2', '3', 'prefix']) -archiveparser.add_argument('url', help='Website URL', type=isValidUrl) - -@nickname_commands ('a', 'archive') -@require_chanmsg () -@require_privilege (VOICE) -@example ('a http://example.com') -def archive (bot, trigger): - """ - Archive a URL to WARC - """ - - try: - args = archiveparser.parse_args (trigger.group (2).split ()) - except Exception as e: - bot.reply ('{} -- {}'.format (e.args[0], archiveparser.format_usage ())) - return - if not args: - bot.reply ('Sorry, I don’t understand {}'.format (trigger.group (2))) - return - blacklistedBehavior = {'domSnapshot', 'screenshot'} - settings = dict (maxBodySize=args.maxBodySize, - logBuffer=defaultSettings.logBuffer, idleTimeout=args.idleTimeout, - timeout=args.timeout) - args = dict (url=args.url, - enabledBehaviorNames=list (behavior.availableNames-blacklistedBehavior), - settings=settings, recursive=args.recursive, - concurrency=args.concurrency) - q = bot.memory['crocoite']['q'] - q.put_nowait (('a', trigger, args)) - -@nickname_commands ('s', 'status') -@example ('s c251f09e-3c26-481f-96e0-4b5f58bd1170') -@require_chanmsg () -def status (bot, trigger): - """ - Retrieve status for a job - """ - - i = trigger.group(2) - q = bot.memory['crocoite']['q'] - q.put_nowait (('status', trigger, i)) - -@nickname_commands ('r', 'revoke') -@example ('r c251f09e-3c26-481f-96e0-4b5f58bd1170') -@require_privilege (VOICE) -@require_chanmsg () -def revoke (bot, trigger): - """ - Cancel (revoke) a job - """ - - i = trigger.group(2) - q = bot.memory['crocoite']['q'] - q.put_nowait (('revoke', trigger, i)) - diff --git a/contrib/chromebot.json b/contrib/chromebot.json new file mode 100644 index 0000000..214b770 --- /dev/null +++ b/contrib/chromebot.json @@ -0,0 +1,16 @@ +{ + "irc": { + "host": "irc.example.com", + "port": 6667, + "ssl": false, + "nick": "chromebot", + "channels": ["#testchannel"] + }, + "tempdir": "/path/to/tmp", + "destdir": "/path/to/warc", + "process_limit": 1 + "blacklist": { + "^https?://(.+\\.)?local(host)?/": "Not acceptable" + }, + "need_voice": false +} diff --git a/contrib/dashboard.css b/contrib/dashboard.css new file mode 100644 index 0000000..469db78 --- /dev/null +++ b/contrib/dashboard.css @@ -0,0 +1,7 @@ +.jid { + font-family: Consolas, mono; +} +.job .urls { + max-height: 10em; + overflow-y: scroll; +} diff --git a/contrib/dashboard.html b/contrib/dashboard.html new file mode 100644 index 0000000..49a15bc --- /dev/null +++ b/contrib/dashboard.html @@ -0,0 +1,23 @@ +<!DOCTYPE html> +<html lang="en"> + <head> + <meta charset="utf-8"> + <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> + <title>chromebot dashboard</title> + <!--<script src="https://cdn.jsdelivr.net/npm/vue@2/dist/vue.js"></script>--> + <script src="https://cdn.jsdelivr.net/npm/vue@2/dist/vue.min.js"></script> + <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.7/css/bulma.min.css"> + <link rel="stylesheet" href="dashboard.css"> + </head> + <body> + <noscript>Please enable JavaScript.</noscript> + <section id="app" class="section"> + <h1 class="title">chromebot dashboard</h1> + <bot-status v-bind:jobs="jobs"></bot-status> + <div class="jobs"> + <job-item v-for="j in jobs" v-bind:job="j" v-bind:jobs="jobs" v-bind:key="j.id"></job-item> + </div> + </section> + <script src="dashboard.js"></script> + </body> +</html> diff --git a/contrib/dashboard.js b/contrib/dashboard.js new file mode 100644 index 0000000..b5520dc --- /dev/null +++ b/contrib/dashboard.js @@ -0,0 +1,129 @@ +/* configuration */ +let socket = "wss://localhost:6789/", + urllogMax = 100; + +function formatSize (bytes) { + let prefixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB']; + while (bytes >= 1024 && prefixes.length > 1) { + bytes /= 1024; + prefixes.shift (); + } + return bytes.toFixed (1) + ' ' + prefixes[0]; +} + +class Job { + constructor (id, url, user, queued) { + this.id = id; + this.url = url; + this.user = user; + this.status = undefined; + this.stats = {'pending': 0, 'have': 0, 'running': 0, + 'requests': 0, 'finished': 0, 'failed': 0, + 'bytesRcv': 0, 'crashed': 0, 'ignored': 0}; + this.urllog = []; + this.queued = queued; + this.started = undefined; + this.finished = undefined; + this.aborted = undefined; + } + + addUrl (url) { + if (this.urllog.push (url) > urllogMax) { + this.urllog.shift (); + } + } +} + +let jobs = {}; +let ws = new WebSocket(socket); +ws.onmessage = function (event) { + var msg = JSON.parse (event.data); + let msgdate = new Date (Date.parse (msg.date)); + var j = undefined; + if (msg.job) { + j = jobs[msg.job]; + if (j === undefined) { + j = new Job (msg.job, 'unknown', '<unknown>', new Date ()); + Vue.set (jobs, msg.job, j); + } + } + if (msg.uuid == '36cc34a6-061b-4cc5-84a9-4ab6552c8d75') { + j = new Job (msg.job, msg.url, msg.user, msgdate); + /* jobs[msg.job] = j does not work with vue, see + https://vuejs.org/v2/guide/list.html#Object-Change-Detection-Caveats + */ + Vue.set (jobs, msg.job, j); + j.status = 'pending'; + } else if (msg.uuid == '46e62d60-f498-4ab0-90e1-d08a073b10fb') { + j.status = 'running'; + j.started = msgdate; + } else if (msg.uuid == '7b40ffbb-faab-4224-90ed-cd4febd8f7ec') { + j.status = 'finished'; + j.finished = msgdate; + } else if (msg.uuid == '865b3b3e-a54a-4a56-a545-f38a37bac295') { + j.status = 'aborted'; + j.aborted = msgdate; + } else if (msg.uuid == '5c0f9a11-dcd8-4182-a60f-54f4d3ab3687') { + /* forwarded job message */ + let rmsg = msg.data; + if (rmsg.uuid == '24d92d16-770e-4088-b769-4020e127a7ff') { + /* job status */ + Object.assign (j.stats, rmsg); + } else if (rmsg.uuid == '5b8498e4-868d-413c-a67e-004516b8452c') { + /* recursion status */ + Object.assign (j.stats, rmsg); + } else if (rmsg.uuid == 'd1288fbe-8bae-42c8-af8c-f2fa8b41794f') { + /* fetch */ + j.addUrl (rmsg.url); + } + } +}; +ws.onopen = function (event) { +}; +ws.onerror = function (event) { +}; + +Vue.component('job-item', { + props: ['job', 'jobs'], + template: '<div class="job box" :id="job.id"><ul class="columns"><li class="jid column is-narrow"><a :href="\'#\' + job.id">{{ job.id }}</a></li><li class="url column"><a :href="job.url">{{ job.url }}</a></li><li class="status column is-narrow"><job-status v-bind:job="job"></job-status></li></ul><job-stats v-bind:job="job"></job-stats></div>', +}); +Vue.component('job-status', { + props: ['job'], + template: '<span v-if="job.status == \'pending\'">queued on {{ job.queued.toLocaleString() }}</span><span v-else-if="job.status == \'aborted\'">aborted on {{ job.aborted.toLocaleString() }}</span><span v-else-if="job.status == \'running\'">running since {{ job.started.toLocaleString() }}</span><span v-else-if="job.status == \'finished\'">finished since {{ job.finished.toLocaleString() }}</span>' +}); +Vue.component('job-stats', { + props: ['job'], + template: '<div><progress class="progress is-info" :value="job.stats.have" :max="job.stats.have+job.stats.pending+job.stats.running"></progress><ul class="stats columns"><li class="column">{{ job.stats.have }} <small>have</small></li><li class="column">{{ job.stats.running }} <small>running</small></li><li class="column">{{ job.stats.pending }} <small>pending</small></li><li class="column">{{ job.stats.requests }} <small>requests</small><li class="column"><filesize v-bind:value="job.stats.bytesRcv"></filesize></li></ul><job-urls v-bind:job="job"></job-urls></div>' +}); +Vue.component('job-urls', { + props: ['job'], + template: '<ul class="urls"><li v-for="u in job.urllog">{{ u }}</li></ul>' +}); +Vue.component('filesize', { + props: ['value'], + template: '<span class="filesize">{{ fvalue }}</span>', + computed: { fvalue: function () { return formatSize (this.value); } } +}); +Vue.component('bot-status', { + props: ['jobs'], + template: '<nav class="level"><div class="level-item has-text-centered"><div><p class="heading">Pending</p><p class="title">{{ stats.pending }}</p></div></div><div class="level-item has-text-centered"><div><p class="heading">Running</p><p class="title">{{ stats.running }}</p></div></div><div class="level-item has-text-centered"><div><p class="heading">Finished</p><p class="title">{{ stats.finished+stats.aborted }}</p></div></div><div class="level-item has-text-centered"><div><p class="heading">Transferred</p><p class="title"><filesize v-bind:value="stats.totalBytes"></filesize></p></div></div></nav>', + computed: { + stats: function () { + let s = {pending: 0, running: 0, finished: 0, aborted: 0, totalBytes: 0}; + for (let k in this.jobs) { + let j = this.jobs[k]; + s[j.status]++; + s.totalBytes += j.stats.bytesRcv; + } + return s; + } + } +}); + +let app = new Vue({ + el: '#app', + data: { + jobs: jobs, + } +}); + |