summaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
Diffstat (limited to 'contrib')
-rw-r--r--contrib/celerycrocoite.py231
-rw-r--r--contrib/chromebot.json16
-rw-r--r--contrib/dashboard.css7
-rw-r--r--contrib/dashboard.html23
-rw-r--r--contrib/dashboard.js129
5 files changed, 175 insertions, 231 deletions
diff --git a/contrib/celerycrocoite.py b/contrib/celerycrocoite.py
deleted file mode 100644
index 26c35ce..0000000
--- a/contrib/celerycrocoite.py
+++ /dev/null
@@ -1,231 +0,0 @@
-# Copyright (c) 2017 crocoite contributors
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-"""
-Module for Sopel IRC bot
-"""
-
-import os, logging, argparse
-from sopel.module import nickname_commands, require_chanmsg, thread, example, require_privilege, VOICE
-from sopel.tools import Identifier, SopelMemory
-import celery, celery.exceptions
-from celery.result import AsyncResult
-from urllib.parse import urlsplit
-from threading import Thread
-from queue import Queue
-import queue
-
-from crocoite import behavior, task
-from crocoite.controller import defaultSettings
-
-def prettyTimeDelta (seconds):
- """
- Pretty-print seconds to human readable string 1d 1h 1m 1s
- """
- seconds = int(seconds)
- days, seconds = divmod(seconds, 86400)
- hours, seconds = divmod(seconds, 3600)
- minutes, seconds = divmod(seconds, 60)
- s = [(days, 'd'), (hours, 'h'), (minutes, 'm'), (seconds, 's')]
- s = filter (lambda x: x[0] != 0, s)
- return ' '.join (map (lambda x: '{}{}'.format (*x), s))
-
-def prettyBytes (b):
- """
- Pretty-print bytes
- """
- prefixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB']
- while b >= 1024 and len (prefixes) > 1:
- b /= 1024
- prefixes.pop (0)
- return '{:.1f} {}'.format (b, prefixes[0])
-
-def setup (bot):
- m = bot.memory['crocoite'] = {}
- q = m['q'] = Queue ()
- t = m['t'] = Thread (target=celeryWorker, args=(bot, q))
- t.start ()
-
-def shutdown (bot):
- m = bot.memory['crocoite']
- q = m['q']
- t = m['t']
- q.put_nowait (None)
- t.join ()
-
-def isValidUrl (s):
- url = urlsplit (s)
- if url.scheme and url.netloc and url.scheme in {'http', 'https'}:
- return s
- raise TypeError ()
-
-def checkCompletedJobs (bot, jobs):
- delete = set ()
- for i, data in jobs.items ():
- handle = data['handle']
- trigger = data['trigger']
- args = data['args']
- url = args['url']
- channel = trigger.sender
- user = trigger.nick
- if Identifier (channel) not in bot.channels:
- continue
- try:
- result = handle.get (timeout=0.1)
- stats = result['stats']
- bot.msg (channel, '{}: {} ({}) finished. {} requests, {} failed, {} received.'.format (user, url,
- handle.id, stats['requests'], stats['failed'],
- prettyBytes (stats['bytesRcv'])))
- delete.add (handle.id)
- except celery.exceptions.TimeoutError:
- pass
- except Exception as e:
- # json serialization does not work well with exceptions. If their class
- # names are unique we can still distinguish them.
- ename = type (e).__name__
- if ename == 'TaskRevokedError':
- bot.msg (channel, '{}: {} ({}) was revoked'.format (user, url, handle.id))
- else:
- bot.msg (channel, '{} ({}) failed'.format (user, url, handle.id))
- logging.exception ('{} ({}) failed'.format (url, handle.id))
- delete.add (handle.id)
- for d in delete:
- del jobs[d]
-
-def celeryWorker (bot, q):
- """
- Serialize celery operations in a single thread. This is a workaround for
- https://github.com/celery/celery/issues/4480
- """
-
- jobs = {}
-
- while True:
- try:
- item = q.get (timeout=1)
- except queue.Empty:
- checkCompletedJobs (bot, jobs)
- continue
-
- if item is None:
- break
- action, trigger, args = item
- if action == 'a':
- handle = task.controller.delay (**args)
- j = jobs[handle.id] = {'handle': handle, 'trigger': trigger, 'args': args}
-
- # pretty-print a few selected args
- showargs = {
- 'idleTimeout': prettyTimeDelta (args['settings']['idleTimeout']),
- 'timeout': prettyTimeDelta (args['settings']['timeout']),
- 'maxBodySize': prettyBytes (args['settings']['maxBodySize']),
- 'recursive': args['recursive'],
- 'concurrency': args['concurrency'],
- }
- strargs = ', '.join (map (lambda x: '{}={}'.format (*x), showargs.items ()))
- bot.msg (trigger.sender, '{}: {} has been queued as {} with {}'.format (trigger.nick, args['url'], handle.id, strargs))
- elif action == 'status':
- if args and args in jobs:
- j = jobs[args]
- jtrigger = j['trigger']
- handle = j['handle']
- bot.msg (trigger.sender, '{}: {}, queued {}, by {}'.format (handle.id,
- handle.status, jtrigger.time, jtrigger.nick))
- else:
- bot.msg (trigger.sender, "Job not found.")
- elif action == 'revoke':
- if args and args in jobs:
- j = jobs[args]
- handle = j['handle']
- handle.revoke (terminate=True)
- # response is handled above
- else:
- bot.msg (trigger.sender, "Job not found.")
- q.task_done ()
-
-class NonExitingArgumentParser (argparse.ArgumentParser):
- def exit (self, status=0, message=None):
- # should never be called
- pass
-
- def error (self, message):
- raise Exception (message)
-
-archiveparser = NonExitingArgumentParser (prog='a', add_help=False)
-archiveparser.add_argument('--timeout', default=1*60*60, type=int, help='Maximum time for archival', metavar='SEC', choices=[60, 1*60*60, 2*60*60])
-archiveparser.add_argument('--idle-timeout', default=10, type=int, help='Maximum idle seconds (i.e. no requests)', dest='idleTimeout', metavar='SEC', choices=[1, 10, 20, 30, 60])
-archiveparser.add_argument('--max-body-size', default=defaultSettings.maxBodySize, type=int, dest='maxBodySize', help='Max body size', metavar='BYTES', choices=[1*1024*1024, 10*1024*1024, defaultSettings.maxBodySize, 100*1024*1024])
-archiveparser.add_argument('--concurrency', default=1, type=int, help='Parallel workers for this job', choices=range (9))
-archiveparser.add_argument('--recursive', help='Enable recursion', choices=['0', '1', '2', '3', 'prefix'])
-archiveparser.add_argument('url', help='Website URL', type=isValidUrl)
-
-@nickname_commands ('a', 'archive')
-@require_chanmsg ()
-@require_privilege (VOICE)
-@example ('a http://example.com')
-def archive (bot, trigger):
- """
- Archive a URL to WARC
- """
-
- try:
- args = archiveparser.parse_args (trigger.group (2).split ())
- except Exception as e:
- bot.reply ('{} -- {}'.format (e.args[0], archiveparser.format_usage ()))
- return
- if not args:
- bot.reply ('Sorry, I don’t understand {}'.format (trigger.group (2)))
- return
- blacklistedBehavior = {'domSnapshot', 'screenshot'}
- settings = dict (maxBodySize=args.maxBodySize,
- logBuffer=defaultSettings.logBuffer, idleTimeout=args.idleTimeout,
- timeout=args.timeout)
- args = dict (url=args.url,
- enabledBehaviorNames=list (behavior.availableNames-blacklistedBehavior),
- settings=settings, recursive=args.recursive,
- concurrency=args.concurrency)
- q = bot.memory['crocoite']['q']
- q.put_nowait (('a', trigger, args))
-
-@nickname_commands ('s', 'status')
-@example ('s c251f09e-3c26-481f-96e0-4b5f58bd1170')
-@require_chanmsg ()
-def status (bot, trigger):
- """
- Retrieve status for a job
- """
-
- i = trigger.group(2)
- q = bot.memory['crocoite']['q']
- q.put_nowait (('status', trigger, i))
-
-@nickname_commands ('r', 'revoke')
-@example ('r c251f09e-3c26-481f-96e0-4b5f58bd1170')
-@require_privilege (VOICE)
-@require_chanmsg ()
-def revoke (bot, trigger):
- """
- Cancel (revoke) a job
- """
-
- i = trigger.group(2)
- q = bot.memory['crocoite']['q']
- q.put_nowait (('revoke', trigger, i))
-
diff --git a/contrib/chromebot.json b/contrib/chromebot.json
new file mode 100644
index 0000000..214b770
--- /dev/null
+++ b/contrib/chromebot.json
@@ -0,0 +1,16 @@
+{
+ "irc": {
+ "host": "irc.example.com",
+ "port": 6667,
+ "ssl": false,
+ "nick": "chromebot",
+ "channels": ["#testchannel"]
+ },
+ "tempdir": "/path/to/tmp",
+ "destdir": "/path/to/warc",
+ "process_limit": 1
+ "blacklist": {
+ "^https?://(.+\\.)?local(host)?/": "Not acceptable"
+ },
+ "need_voice": false
+}
diff --git a/contrib/dashboard.css b/contrib/dashboard.css
new file mode 100644
index 0000000..469db78
--- /dev/null
+++ b/contrib/dashboard.css
@@ -0,0 +1,7 @@
+.jid {
+ font-family: Consolas, mono;
+}
+.job .urls {
+ max-height: 10em;
+ overflow-y: scroll;
+}
diff --git a/contrib/dashboard.html b/contrib/dashboard.html
new file mode 100644
index 0000000..49a15bc
--- /dev/null
+++ b/contrib/dashboard.html
@@ -0,0 +1,23 @@
+<!DOCTYPE html>
+<html lang="en">
+ <head>
+ <meta charset="utf-8">
+ <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
+ <title>chromebot dashboard</title>
+ <!--<script src="https://cdn.jsdelivr.net/npm/vue@2/dist/vue.js"></script>-->
+ <script src="https://cdn.jsdelivr.net/npm/vue@2/dist/vue.min.js"></script>
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/bulma@0.7/css/bulma.min.css">
+ <link rel="stylesheet" href="dashboard.css">
+ </head>
+ <body>
+ <noscript>Please enable JavaScript.</noscript>
+ <section id="app" class="section">
+ <h1 class="title">chromebot dashboard</h1>
+ <bot-status v-bind:jobs="jobs"></bot-status>
+ <div class="jobs">
+ <job-item v-for="j in jobs" v-bind:job="j" v-bind:jobs="jobs" v-bind:key="j.id"></job-item>
+ </div>
+ </section>
+ <script src="dashboard.js"></script>
+ </body>
+</html>
diff --git a/contrib/dashboard.js b/contrib/dashboard.js
new file mode 100644
index 0000000..b5520dc
--- /dev/null
+++ b/contrib/dashboard.js
@@ -0,0 +1,129 @@
+/* configuration */
+let socket = "wss://localhost:6789/",
+ urllogMax = 100;
+
+function formatSize (bytes) {
+ let prefixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB'];
+ while (bytes >= 1024 && prefixes.length > 1) {
+ bytes /= 1024;
+ prefixes.shift ();
+ }
+ return bytes.toFixed (1) + ' ' + prefixes[0];
+}
+
+class Job {
+ constructor (id, url, user, queued) {
+ this.id = id;
+ this.url = url;
+ this.user = user;
+ this.status = undefined;
+ this.stats = {'pending': 0, 'have': 0, 'running': 0,
+ 'requests': 0, 'finished': 0, 'failed': 0,
+ 'bytesRcv': 0, 'crashed': 0, 'ignored': 0};
+ this.urllog = [];
+ this.queued = queued;
+ this.started = undefined;
+ this.finished = undefined;
+ this.aborted = undefined;
+ }
+
+ addUrl (url) {
+ if (this.urllog.push (url) > urllogMax) {
+ this.urllog.shift ();
+ }
+ }
+}
+
+let jobs = {};
+let ws = new WebSocket(socket);
+ws.onmessage = function (event) {
+ var msg = JSON.parse (event.data);
+ let msgdate = new Date (Date.parse (msg.date));
+ var j = undefined;
+ if (msg.job) {
+ j = jobs[msg.job];
+ if (j === undefined) {
+ j = new Job (msg.job, 'unknown', '<unknown>', new Date ());
+ Vue.set (jobs, msg.job, j);
+ }
+ }
+ if (msg.uuid == '36cc34a6-061b-4cc5-84a9-4ab6552c8d75') {
+ j = new Job (msg.job, msg.url, msg.user, msgdate);
+ /* jobs[msg.job] = j does not work with vue, see
+ https://vuejs.org/v2/guide/list.html#Object-Change-Detection-Caveats
+ */
+ Vue.set (jobs, msg.job, j);
+ j.status = 'pending';
+ } else if (msg.uuid == '46e62d60-f498-4ab0-90e1-d08a073b10fb') {
+ j.status = 'running';
+ j.started = msgdate;
+ } else if (msg.uuid == '7b40ffbb-faab-4224-90ed-cd4febd8f7ec') {
+ j.status = 'finished';
+ j.finished = msgdate;
+ } else if (msg.uuid == '865b3b3e-a54a-4a56-a545-f38a37bac295') {
+ j.status = 'aborted';
+ j.aborted = msgdate;
+ } else if (msg.uuid == '5c0f9a11-dcd8-4182-a60f-54f4d3ab3687') {
+ /* forwarded job message */
+ let rmsg = msg.data;
+ if (rmsg.uuid == '24d92d16-770e-4088-b769-4020e127a7ff') {
+ /* job status */
+ Object.assign (j.stats, rmsg);
+ } else if (rmsg.uuid == '5b8498e4-868d-413c-a67e-004516b8452c') {
+ /* recursion status */
+ Object.assign (j.stats, rmsg);
+ } else if (rmsg.uuid == 'd1288fbe-8bae-42c8-af8c-f2fa8b41794f') {
+ /* fetch */
+ j.addUrl (rmsg.url);
+ }
+ }
+};
+ws.onopen = function (event) {
+};
+ws.onerror = function (event) {
+};
+
+Vue.component('job-item', {
+ props: ['job', 'jobs'],
+ template: '<div class="job box" :id="job.id"><ul class="columns"><li class="jid column is-narrow"><a :href="\'#\' + job.id">{{ job.id }}</a></li><li class="url column"><a :href="job.url">{{ job.url }}</a></li><li class="status column is-narrow"><job-status v-bind:job="job"></job-status></li></ul><job-stats v-bind:job="job"></job-stats></div>',
+});
+Vue.component('job-status', {
+ props: ['job'],
+ template: '<span v-if="job.status == \'pending\'">queued on {{ job.queued.toLocaleString() }}</span><span v-else-if="job.status == \'aborted\'">aborted on {{ job.aborted.toLocaleString() }}</span><span v-else-if="job.status == \'running\'">running since {{ job.started.toLocaleString() }}</span><span v-else-if="job.status == \'finished\'">finished since {{ job.finished.toLocaleString() }}</span>'
+});
+Vue.component('job-stats', {
+ props: ['job'],
+ template: '<div><progress class="progress is-info" :value="job.stats.have" :max="job.stats.have+job.stats.pending+job.stats.running"></progress><ul class="stats columns"><li class="column">{{ job.stats.have }} <small>have</small></li><li class="column">{{ job.stats.running }} <small>running</small></li><li class="column">{{ job.stats.pending }} <small>pending</small></li><li class="column">{{ job.stats.requests }} <small>requests</small><li class="column"><filesize v-bind:value="job.stats.bytesRcv"></filesize></li></ul><job-urls v-bind:job="job"></job-urls></div>'
+});
+Vue.component('job-urls', {
+ props: ['job'],
+ template: '<ul class="urls"><li v-for="u in job.urllog">{{ u }}</li></ul>'
+});
+Vue.component('filesize', {
+ props: ['value'],
+ template: '<span class="filesize">{{ fvalue }}</span>',
+ computed: { fvalue: function () { return formatSize (this.value); } }
+});
+Vue.component('bot-status', {
+ props: ['jobs'],
+ template: '<nav class="level"><div class="level-item has-text-centered"><div><p class="heading">Pending</p><p class="title">{{ stats.pending }}</p></div></div><div class="level-item has-text-centered"><div><p class="heading">Running</p><p class="title">{{ stats.running }}</p></div></div><div class="level-item has-text-centered"><div><p class="heading">Finished</p><p class="title">{{ stats.finished+stats.aborted }}</p></div></div><div class="level-item has-text-centered"><div><p class="heading">Transferred</p><p class="title"><filesize v-bind:value="stats.totalBytes"></filesize></p></div></div></nav>',
+ computed: {
+ stats: function () {
+ let s = {pending: 0, running: 0, finished: 0, aborted: 0, totalBytes: 0};
+ for (let k in this.jobs) {
+ let j = this.jobs[k];
+ s[j.status]++;
+ s.totalBytes += j.stats.bytesRcv;
+ }
+ return s;
+ }
+ }
+});
+
+let app = new Vue({
+ el: '#app',
+ data: {
+ jobs: jobs,
+ }
+});
+