summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-05-04 15:35:44 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-05-04 16:00:05 +0200
commitb6d198aed3fa17ac28aa62c9679ec2c33dc0a3da (patch)
treef8c5a2cf8293dea0f4314b7929e8b277b795947e
parent2f5cc22dc3277de7a22b9839ff3c42ed40ff50b0 (diff)
downloadcrocoite-b6d198aed3fa17ac28aa62c9679ec2c33dc0a3da.tar.gz
crocoite-b6d198aed3fa17ac28aa62c9679ec2c33dc0a3da.tar.bz2
crocoite-b6d198aed3fa17ac28aa62c9679ec2c33dc0a3da.zip
Share recursive argument parser
-rw-r--r--crocoite/cli.py20
-rw-r--r--crocoite/task.py9
2 files changed, 15 insertions, 14 deletions
diff --git a/crocoite/cli.py b/crocoite/cli.py
index efd30ad..f6454da 100644
--- a/crocoite/cli.py
+++ b/crocoite/cli.py
@@ -29,6 +29,16 @@ from .controller import RecursiveController, defaultSettings, \
ControllerSettings, DepthLimit, PrefixLimit
from .browser import NullService, ChromeService
+def parseRecursive (recursive, url):
+ if recursive is None:
+ return DepthLimit (0)
+ elif recursive.isdigit ():
+ return DepthLimit (int (recursive))
+ elif recursive == 'prefix':
+ return PrefixLimit (url)
+ else:
+ raise ValueError ('Unsupported')
+
def main ():
parser = argparse.ArgumentParser(description='Save website to WARC using Google Chrome.')
parser.add_argument('--browser', help='DevTools URL', metavar='URL')
@@ -63,13 +73,9 @@ def main ():
else:
logging.basicConfig (level=logging.INFO)
- if args.recursive is None:
- recursionPolicy = DepthLimit (0)
- elif args.recursive.isdigit ():
- recursionPolicy = DepthLimit (int (args.recursive))
- elif args.recursive == 'prefix':
- recursionPolicy = PrefixLimit (args.url)
- else:
+ try:
+ recursionPolicy = parseRecursive (args.recursive, args.url)
+ except ValueError:
parser.error ('Invalid argument for --recursive')
service = ChromeService ()
if args.browser:
diff --git a/crocoite/task.py b/crocoite/task.py
index 52d3b26..e93cfde 100644
--- a/crocoite/task.py
+++ b/crocoite/task.py
@@ -41,6 +41,7 @@ from celery.utils.log import get_task_logger
from .browser import ChromeService
from .controller import SinglePageController, ControllerSettings, RecursiveController, defaultSettings, DepthLimit, PrefixLimit
from . import behavior
+from .cli import parseRecursive
app = Celery ('crocoite.distributed')
app.config_from_object('celeryconfig')
@@ -109,13 +110,7 @@ class DistributedRecursiveController (RecursiveController):
def controller (self, url, settings, enabledBehaviorNames, recursive, concurrency):
""" Recursive controller """
- if recursive is None:
- recursionPolicy = DepthLimit (0)
- elif recursive.isdigit ():
- recursionPolicy = DepthLimit (int (recursive))
- elif recursive == 'prefix':
- recursionPolicy = PrefixLimit (url)
-
+ recursionPolicy = parseRecursive (recursive, url)
enabledBehavior = list (filter (lambda x: x.name in enabledBehaviorNames, behavior.available))
settings = ControllerSettings (**settings)
controller = DistributedRecursiveController (url, None, behavior=enabledBehavior,