summaryrefslogtreecommitdiff
path: root/crocoite/cli.py
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-09-25 16:17:03 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-09-25 17:00:21 +0200
commit2ef2ed8202bd5249cda78f135d64f5add9a461ea (patch)
tree0661b9a2a70e9e7feeda51d4fa9422a38784516a /crocoite/cli.py
parent329de53c7c8cea725249089df87d6cd9823f6972 (diff)
downloadcrocoite-2ef2ed8202bd5249cda78f135d64f5add9a461ea.tar.gz
crocoite-2ef2ed8202bd5249cda78f135d64f5add9a461ea.tar.bz2
crocoite-2ef2ed8202bd5249cda78f135d64f5add9a461ea.zip
Add recursive controller
Simple and sequential.
Diffstat (limited to 'crocoite/cli.py')
-rw-r--r--crocoite/cli.py40
1 files changed, 40 insertions, 0 deletions
diff --git a/crocoite/cli.py b/crocoite/cli.py
index 73ddca1..6167249 100644
--- a/crocoite/cli.py
+++ b/crocoite/cli.py
@@ -65,3 +65,43 @@ def single ():
return True
+import asyncio, os
+from .controller import RecursiveController, DepthLimit, PrefixLimit
+
+def parsePolicy (recursive, url):
+ if recursive is None:
+ return DepthLimit (0)
+ elif recursive.isdigit ():
+ return DepthLimit (int (recursive))
+ elif recursive == 'prefix':
+ return PrefixLimit (url)
+ else:
+ raise ValueError ('Unsupported')
+
+def recursive ():
+ logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()])
+
+ parser = argparse.ArgumentParser(description='Recursively run crocoite-grab.')
+ parser.add_argument('--policy', help='Recursion policy', metavar='POLICY')
+ parser.add_argument('--tempdir', help='Directory for temporary files', metavar='DIR')
+ parser.add_argument('--prefix', help='Output filename prefix, supports templates {host} and {date}', metavar='FILENAME', default='{host}-{date}-')
+ parser.add_argument('url', help='Seed URL', metavar='URL')
+ parser.add_argument('output', help='Output directory', metavar='DIR')
+ parser.add_argument('command', help='Fetch command, supports templates {url} and {dest}', metavar='CMD', nargs='*', default=['crocoite-grab', '{url}', '{dest}'])
+
+ args = parser.parse_args ()
+ try:
+ policy = parsePolicy (args.policy, args.url)
+ except ValueError:
+ parser.error ('Invalid argument for --policy')
+
+ os.makedirs (args.output, exist_ok=True)
+
+ controller = RecursiveController (url=args.url, output=args.output,
+ command=args.command, logger=logger, policy=policy,
+ tempdir=args.tempdir, prefix=args.prefix)
+
+ loop = asyncio.get_event_loop()
+ loop.run_until_complete(controller.run ())
+ loop.close()
+