diff options
Diffstat (limited to 'contrib')
| -rw-r--r-- | contrib/celerycrocoite.py | 50 | 
1 files changed, 33 insertions, 17 deletions
| diff --git a/contrib/celerycrocoite.py b/contrib/celerycrocoite.py index 3d8c786..b92e5c8 100644 --- a/contrib/celerycrocoite.py +++ b/contrib/celerycrocoite.py @@ -22,7 +22,7 @@  Module for Sopel IRC bot  """ -import os, logging +import os, logging, argparse  from sopel.module import nickname_commands, require_chanmsg, thread, example, require_privilege, VOICE  from sopel.tools import Identifier, SopelMemory  import celery, celery.exceptions @@ -72,7 +72,9 @@ def shutdown (bot):  def isValidUrl (s):      url = urlsplit (s) -    return url.scheme and url.netloc and url.scheme in {'http', 'https'} +    if url.scheme and url.netloc and url.scheme in {'http', 'https'}: +        return s +    raise TypeError ()  def checkCompletedJobs (bot, jobs):      delete = set () @@ -125,7 +127,7 @@ def celeryWorker (bot, q):          if item is None:              break          action, trigger, args = item -        if action == 'ao': +        if action == 'a':              handle = task.archive.delay (**args)              j = jobs[handle.id] = {'handle': handle, 'trigger': trigger, 'args': args} @@ -156,32 +158,46 @@ def celeryWorker (bot, q):                  bot.msg (trigger.sender, "Job not found.")          q.task_done () -@nickname_commands ('ao', 'archiveonly') +class NonExitingArgumentParser (argparse.ArgumentParser): +    def exit (self, status=0, message=None): +        # should never be called +        pass + +    def error (self, message): +        raise Exception (message) + +archiveparser = NonExitingArgumentParser (prog='a', add_help=False) +archiveparser.add_argument('--timeout', default=1*60*60, type=int, help='Maximum time for archival', metavar='SEC', choices=[60, 1*60*60, 2*60*60]) +archiveparser.add_argument('--idle-timeout', default=10, type=int, help='Maximum idle seconds (i.e. no requests)', dest='idleTimeout', metavar='SEC', choices=[1, 10, 20, 30, 60]) +archiveparser.add_argument('--max-body-size', default=defaultSettings.maxBodySize, type=int, dest='maxBodySize', help='Max body size', metavar='BYTES', choices=[1*1024*1024, 10*1024*1024, defaultSettings.maxBodySize, 100*1024*1024]) +archiveparser.add_argument('url', help='Website URL', type=isValidUrl) + +@nickname_commands ('a', 'archive')  @require_chanmsg ()  @require_privilege (VOICE) -@example ('ao http://example.com') +@example ('a http://example.com')  def archive (bot, trigger):      """ -    Archive a single page (no recursion) to WARC +    Archive a URL to WARC      """ -    url = trigger.group(2) -    if not url: -        bot.reply ('Need a URL') +    try: +        args = archiveparser.parse_args (trigger.group (2).split ()) +    except Exception as e: +        bot.reply ('{} -- {}'.format (e.args[0], archiveparser.format_usage ()))          return -    if not isValidUrl (url): -        bot.reply ('{} is not a valid URL'.format (url)) +    if not args: +        bot.reply ('Sorry, I don’t understand {}'.format (trigger.group (2)))          return -      blacklistedBehavior = {'domSnapshot', 'screenshot'} -    settings = dict (maxBodySize=defaultSettings.maxBodySize, -            logBuffer=defaultSettings.logBuffer, idleTimeout=10, -            timeout=1*60*60) -    args = dict (url=url, +    settings = dict (maxBodySize=args.maxBodySize, +            logBuffer=defaultSettings.logBuffer, idleTimeout=args.idleTimeout, +            timeout=args.timeout) +    args = dict (url=args.url,              enabledBehaviorNames=list (behavior.availableNames-blacklistedBehavior),              settings=settings)      q = bot.memory['crocoite']['q'] -    q.put_nowait (('ao', trigger, args)) +    q.put_nowait (('a', trigger, args))  @nickname_commands ('s', 'status')  @example ('s c251f09e-3c26-481f-96e0-4b5f58bd1170') | 
