From 0af80da0b506a06593c81d3686e91b8b82a4f3ba Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 27 Jan 2019 11:36:29 +0100 Subject: irc: Add URL blacklist --- contrib/chromebot.json | 3 +++ crocoite/cli.py | 4 +++- crocoite/irc.py | 16 ++++++++++++++-- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/contrib/chromebot.json b/contrib/chromebot.json index 98a48f9..9ebd099 100644 --- a/contrib/chromebot.json +++ b/contrib/chromebot.json @@ -9,4 +9,7 @@ "tempdir": "/path/to/tmp", "destdir": "/path/to/warc", "process_limit": 1 + "blacklist": { + "^https?://(.+\\.)?local(host)?/": "Not acceptable" + } } diff --git a/crocoite/cli.py b/crocoite/cli.py index b73051b..be3538a 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -132,7 +132,7 @@ def recursive (): return 0 def irc (): - import json + import json, re from .irc import Chromebot logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()]) @@ -145,6 +145,7 @@ def irc (): with open (args.config) as fd: config = json.load (fd) s = config['irc'] + blacklist = dict (map (lambda x: (re.compile (x[0], re.I), x[1]), config['blacklist'].items ())) loop = asyncio.get_event_loop() bot = Chromebot ( @@ -157,6 +158,7 @@ def irc (): destdir=config['destdir'], processLimit=config['process_limit'], logger=logger, + blacklist=blacklist, loop=loop) stop = lambda signum: bot.cancel () loop.add_signal_handler (signal.SIGINT, stop, signal.SIGINT) diff --git a/crocoite/irc.py b/crocoite/irc.py index 1b0fa1b..5351a85 100644 --- a/crocoite/irc.py +++ b/crocoite/irc.py @@ -368,11 +368,11 @@ def jobExists (func): return inner class Chromebot (ArgparseBot): - __slots__ = ('jobs', 'tempdir', 'destdir', 'processLimit') + __slots__ = ('jobs', 'tempdir', 'destdir', 'processLimit', 'blacklist') def __init__ (self, host, port, ssl, nick, logger, channels=[], tempdir=tempfile.gettempdir(), destdir='.', processLimit=1, - loop=None): + blacklist={}, loop=None): super().__init__ (host=host, port=port, ssl=ssl, nick=nick, logger=logger, channels=channels, loop=loop) @@ -380,6 +380,7 @@ class Chromebot (ArgparseBot): self.tempdir = tempdir self.destdir = destdir self.processLimit = asyncio.Semaphore (processLimit) + self.blacklist = blacklist def getParser (self): parser = NonExitingArgumentParser (prog=self.nick + ': ', add_help=False) @@ -404,10 +405,21 @@ class Chromebot (ArgparseBot): return parser + def isBlacklisted (self, url): + for k, v in self.blacklist.items(): + if k.match (url): + return v + return False + @voice async def handleArchive (self, user, args, reply): """ Handle the archive command """ + msg = self.isBlacklisted (args.url) + if msg: + reply (f'{args.url} cannot be queued: {msg}') + return + j = Job (args.url, user.name) assert j.id not in self.jobs, 'duplicate job id' self.jobs[j.id] = j -- cgit v1.2.3