diff options
Diffstat (limited to 'crocoite/cli.py')
| -rw-r--r-- | crocoite/cli.py | 33 | 
1 files changed, 33 insertions, 0 deletions
| diff --git a/crocoite/cli.py b/crocoite/cli.py index 93b742b..53a0b32 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -26,6 +26,8 @@ import argparse, sys, signal, asyncio, os, json  from traceback import TracebackException  from enum import IntEnum  from yarl import URL +from http.cookies import SimpleCookie +import pkg_resources  try:      import manhole      manhole.install (patch_fork=False, oneshot_on='USR1') @@ -49,6 +51,29 @@ def absurl (s):          return u      raise argparse.ArgumentTypeError ('Must be absolute') +def cookie (s): +    """ argparse: Cookie """ +    c = SimpleCookie (s) +    # for some reason the constructor does not raise an exception if the cookie +    # supplied is invalid. It’ll simply be empty. +    if len (c) != 1: +        raise argparse.ArgumentTypeError ('Invalid cookie') +    # we want a single Morsel +    return next (iter (c.values ())) + +def cookiejar (f): +    """ argparse: Cookies from file """ +    cookies = [] +    try: +        with open (f, 'r') as fd: +            for l in fd: +                l = l.lstrip () +                if l and not l.startswith ('#'): +                    cookies.append (cookie (l)) +    except FileNotFoundError: +        raise argparse.ArgumentTypeError (f'Cookie jar "{f}" does not exist') +    return cookies +  class SingleExitStatus(IntEnum):      """ Exit status for single-shot command line """      Ok = 0 @@ -68,9 +93,16 @@ def single ():              metavar='NAME', nargs='*')      parser.add_argument('--warcinfo', help='Add extra information to warcinfo record',              metavar='JSON', type=json.loads) +    # re-using curl’s short/long switch names whenever possible      parser.add_argument('-k', '--insecure',              action='store_true',              help='Disable certificate validation') +    parser.add_argument ('-b', '--cookie', type=cookie, metavar='SET-COOKIE', +            action='append', default=[], help='Cookies in Set-Cookie format.') +    parser.add_argument ('-c', '--cookie-jar', dest='cookieJar', +            type=cookiejar, metavar='FILE', +            default=pkg_resources.resource_filename (__name__, 'data/cookies.txt'), +            help='Cookie jar file, read-only.')      parser.add_argument('url', help='Website URL', type=absurl, metavar='URL')      parser.add_argument('output', help='WARC filename', metavar='FILE') @@ -86,6 +118,7 @@ def single ():              idleTimeout=args.idleTimeout,              timeout=args.timeout,              insecure=args.insecure, +            cookies=args.cookieJar + args.cookie,              )      with open (args.output, 'wb') as fd, WarcHandler (fd, logger) as warcHandler:          logger.connect (WarcHandlerConsumer (warcHandler)) | 
