From 2ef2ed8202bd5249cda78f135d64f5add9a461ea Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Tue, 25 Sep 2018 16:17:03 +0200 Subject: Add recursive controller Simple and sequential. --- crocoite/cli.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'crocoite/cli.py') diff --git a/crocoite/cli.py b/crocoite/cli.py index 73ddca1..6167249 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -65,3 +65,43 @@ def single (): return True +import asyncio, os +from .controller import RecursiveController, DepthLimit, PrefixLimit + +def parsePolicy (recursive, url): + if recursive is None: + return DepthLimit (0) + elif recursive.isdigit (): + return DepthLimit (int (recursive)) + elif recursive == 'prefix': + return PrefixLimit (url) + else: + raise ValueError ('Unsupported') + +def recursive (): + logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()]) + + parser = argparse.ArgumentParser(description='Recursively run crocoite-grab.') + parser.add_argument('--policy', help='Recursion policy', metavar='POLICY') + parser.add_argument('--tempdir', help='Directory for temporary files', metavar='DIR') + parser.add_argument('--prefix', help='Output filename prefix, supports templates {host} and {date}', metavar='FILENAME', default='{host}-{date}-') + parser.add_argument('url', help='Seed URL', metavar='URL') + parser.add_argument('output', help='Output directory', metavar='DIR') + parser.add_argument('command', help='Fetch command, supports templates {url} and {dest}', metavar='CMD', nargs='*', default=['crocoite-grab', '{url}', '{dest}']) + + args = parser.parse_args () + try: + policy = parsePolicy (args.policy, args.url) + except ValueError: + parser.error ('Invalid argument for --policy') + + os.makedirs (args.output, exist_ok=True) + + controller = RecursiveController (url=args.url, output=args.output, + command=args.command, logger=logger, policy=policy, + tempdir=args.tempdir, prefix=args.prefix) + + loop = asyncio.get_event_loop() + loop.run_until_complete(controller.run ()) + loop.close() + -- cgit v1.2.3