summaryrefslogtreecommitdiff
path: root/contrib/celerycrocoite.py
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/celerycrocoite.py')
-rw-r--r--contrib/celerycrocoite.py176
1 files changed, 0 insertions, 176 deletions
diff --git a/contrib/celerycrocoite.py b/contrib/celerycrocoite.py
deleted file mode 100644
index 6a6ac1c..0000000
--- a/contrib/celerycrocoite.py
+++ /dev/null
@@ -1,176 +0,0 @@
-# Copyright (c) 2017 crocoite contributors
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-"""
-Module for Sopel IRC bot
-"""
-
-import os, logging
-from sopel.module import nickname_commands, require_chanmsg, thread, example, require_privilege, VOICE
-from sopel.tools import Identifier, SopelMemory
-import celery
-from urllib.parse import urlsplit
-
-from crocoite import behavior, cli, defaults
-
-def prettyTimeDelta (seconds):
- """
- Pretty-print seconds to human readable string 1d 1h 1m 1s
- """
- seconds = int(seconds)
- days, seconds = divmod(seconds, 86400)
- hours, seconds = divmod(seconds, 3600)
- minutes, seconds = divmod(seconds, 60)
- s = [(days, 'd'), (hours, 'h'), (minutes, 'm'), (seconds, 's')]
- s = filter (lambda x: x[0] != 0, s)
- return ' '.join (map (lambda x: '{}{}'.format (*x), s))
-
-def prettyBytes (b):
- """
- Pretty-print bytes
- """
- prefixes = ['B', 'KiB', 'MiB', 'GiB', 'TiB']
- while b >= 1024 and len (prefixes) > 1:
- b /= 1024
- prefixes.pop (0)
- return '{:.1f} {}'.format (b, prefixes[0])
-
-def setup (bot):
- m = bot.memory['crocoite'] = SopelMemory ()
- m['jobs'] = {}
-
-def isValidUrl (s):
- url = urlsplit (s)
- return url.scheme and url.netloc and url.scheme in {'http', 'https'}
-
-@nickname_commands ('ao', 'archiveonly')
-@require_chanmsg ()
-#@require_privilege (VOICE)
-@thread (True)
-@example ('ao http://example.com')
-def archive (bot, trigger):
- """
- Archive a single page (no recursion) to WARC
- """
-
- def updateState (job, data):
- job['state'] = data
-
- url = trigger.group(2)
- if not url:
- bot.reply ('Need a URL')
- return
- if not isValidUrl (url):
- bot.reply ('{} is not a valid URL'.format (url))
- return
-
- blacklistedBehavior = {'domSnapshot', 'screenshot'}
- args = {
- 'url': url,
- 'output': None,
- 'enabledBehaviorNames': list (behavior.availableNames-blacklistedBehavior),
- 'browser': None,
- 'logBuffer': defaults.logBuffer,
- 'maxBodySize': defaults.maxBodySize,
- 'idleTimeout': 10,
- 'timeout': 1*60*60, # 1 hour
- }
-
- handle = cli.archive.delay (**args)
- m = bot.memory['crocoite']
- jobs = m['jobs']
- # XXX: for some reason we cannot access the job’s state through handle,
- # instead use a callback quirk
- j = jobs[handle.id] = {'handle': handle, 'trigger': trigger, 'state': {}}
-
- # pretty-print a few selected args
- showargs = {
- 'behavior': ','.join (args['enabledBehaviorNames']),
- 'idleTimeout': prettyTimeDelta (args['idleTimeout']),
- 'timeout': prettyTimeDelta (args['timeout']),
- 'maxBodySize': prettyBytes (args['maxBodySize']),
- }
- strargs = ', '.join (map (lambda x: '{}={}'.format (*x), showargs.items ()))
- bot.reply ('{} has been queued as {} with {}'.format (url, handle.id, strargs))
-
- try:
- result = handle.get (on_message=lambda x: updateState (j, x))
- stats = result['stats']
- bot.reply ('{} ({}) finished. {} requests, {} failed, {} received.'.format (url,
- handle.id, stats['requests'], stats['failed'],
- prettyBytes (stats['bytesRcv'])))
- except Exception as e:
- # json serialization does not work well with exceptions. If their class
- # names are unique we can still distinguish them.
- ename = type (e).__name__
- if ename == 'TaskRevokedError':
- bot.reply ('{} ({}) was revoked'.format (url, handle.id))
- else:
- bot.reply ('{} ({}) failed'.format (url, handle.id))
- logging.exception ('{} ({}) failed'.format (url, handle.id))
- finally:
- del jobs[handle.id]
-
-@nickname_commands ('s', 'status')
-@example ('s c251f09e-3c26-481f-96e0-4b5f58bd1170')
-@require_chanmsg ()
-def status (bot, trigger):
- """
- Retrieve status for a job
- """
-
- m = bot.memory['crocoite']
- jobs = m['jobs']
-
- i = trigger.group(2)
- if not i or i not in jobs:
- bot.reply("Job not found.")
- return
-
- j = jobs[i]
- jtrigger = j['trigger']
- jhandle = j['handle']
- jstate = j['state']
- jresult = jstate.get ('result', {})
- bot.reply ('{}: {}, queued {}, by {}'.format (jhandle.id,
- jstate.get ('status', 'UNKNOWN'), jtrigger.time, jtrigger.nick))
-
-@nickname_commands ('r', 'revoke')
-@example ('r c251f09e-3c26-481f-96e0-4b5f58bd1170')
-@require_privilege (VOICE)
-@require_chanmsg ()
-def revoke (bot, trigger):
- """
- Cancel (revoke) a job
- """
-
- m = bot.memory['crocoite']
- jobs = m['jobs']
-
- i = trigger.group(2)
- if not i or i not in jobs:
- bot.reply ("Job not found.")
- return
-
- j = jobs[i]
- jhandle = j['handle']
- jhandle.revoke (terminate=True)
- # response is handled by long-running initiation thread
-