From 5e444dd6511d97308a84ae9c86ebf14547d01f01 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Tue, 18 Dec 2018 12:34:25 +0100 Subject: Parse URLs by default Use library yarl (already pulled in by aiohttp). No URL processed should be a string. --- crocoite/cli.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'crocoite/cli.py') diff --git a/crocoite/cli.py b/crocoite/cli.py index c3c41a4..b0ad53a 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -24,6 +24,7 @@ Command line interface import argparse, sys, signal, asyncio, os from enum import IntEnum +from yarl import URL from . import behavior from .controller import SinglePageController, \ @@ -50,7 +51,7 @@ def single (): default=list (behavior.availableMap.keys ()), choices=list (behavior.availableMap.keys ()), metavar='NAME', nargs='*') - parser.add_argument('url', help='Website URL', metavar='URL') + parser.add_argument('url', help='Website URL', type=URL, metavar='URL') parser.add_argument('output', help='WARC filename', metavar='FILE') args = parser.parse_args () @@ -102,7 +103,7 @@ def recursive (): parser.add_argument('--tempdir', help='Directory for temporary files', metavar='DIR') parser.add_argument('--prefix', help='Output filename prefix, supports templates {host} and {date}', metavar='FILENAME', default='{host}-{date}-') parser.add_argument('--concurrency', '-j', help='Run at most N jobs', metavar='N', default=1, type=int) - parser.add_argument('url', help='Seed URL', metavar='URL') + parser.add_argument('url', help='Seed URL', type=URL, metavar='URL') parser.add_argument('output', help='Output directory', metavar='DIR') parser.add_argument('command', help='Fetch command, supports templates {url} and {dest}', metavar='CMD', nargs='*', default=['crocoite-grab', '{url}', '{dest}']) -- cgit v1.2.3