From 0af80da0b506a06593c81d3686e91b8b82a4f3ba Mon Sep 17 00:00:00 2001
From: Lars-Dominik Braun <lars@6xq.net>
Date: Sun, 27 Jan 2019 11:36:29 +0100
Subject: irc: Add URL blacklist

---
 contrib/chromebot.json |  3 +++
 crocoite/cli.py        |  4 +++-
 crocoite/irc.py        | 16 ++++++++++++++--
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/contrib/chromebot.json b/contrib/chromebot.json
index 98a48f9..9ebd099 100644
--- a/contrib/chromebot.json
+++ b/contrib/chromebot.json
@@ -9,4 +9,7 @@
   "tempdir": "/path/to/tmp",
   "destdir": "/path/to/warc",
   "process_limit": 1
+  "blacklist": {
+    "^https?://(.+\\.)?local(host)?/": "Not acceptable"
+  }
 }
diff --git a/crocoite/cli.py b/crocoite/cli.py
index b73051b..be3538a 100644
--- a/crocoite/cli.py
+++ b/crocoite/cli.py
@@ -132,7 +132,7 @@ def recursive ():
     return 0
 
 def irc ():
-    import json
+    import json, re
     from .irc import Chromebot
 
     logger = Logger (consumer=[DatetimeConsumer (), JsonPrintConsumer ()])
@@ -145,6 +145,7 @@ def irc ():
     with open (args.config) as fd:
         config = json.load (fd)
     s = config['irc']
+    blacklist = dict (map (lambda x: (re.compile (x[0], re.I), x[1]), config['blacklist'].items ()))
 
     loop = asyncio.get_event_loop()
     bot = Chromebot (
@@ -157,6 +158,7 @@ def irc ():
             destdir=config['destdir'],
             processLimit=config['process_limit'],
             logger=logger,
+            blacklist=blacklist,
             loop=loop)
     stop = lambda signum: bot.cancel ()
     loop.add_signal_handler (signal.SIGINT, stop, signal.SIGINT)
diff --git a/crocoite/irc.py b/crocoite/irc.py
index 1b0fa1b..5351a85 100644
--- a/crocoite/irc.py
+++ b/crocoite/irc.py
@@ -368,11 +368,11 @@ def jobExists (func):
     return inner
 
 class Chromebot (ArgparseBot):
-    __slots__ = ('jobs', 'tempdir', 'destdir', 'processLimit')
+    __slots__ = ('jobs', 'tempdir', 'destdir', 'processLimit', 'blacklist')
 
     def __init__ (self, host, port, ssl, nick, logger, channels=[],
             tempdir=tempfile.gettempdir(), destdir='.', processLimit=1,
-            loop=None):
+            blacklist={}, loop=None):
         super().__init__ (host=host, port=port, ssl=ssl, nick=nick,
                 logger=logger, channels=channels, loop=loop)
 
@@ -380,6 +380,7 @@ class Chromebot (ArgparseBot):
         self.tempdir = tempdir
         self.destdir = destdir
         self.processLimit = asyncio.Semaphore (processLimit)
+        self.blacklist = blacklist
 
     def getParser (self):
         parser = NonExitingArgumentParser (prog=self.nick + ': ', add_help=False)
@@ -404,10 +405,21 @@ class Chromebot (ArgparseBot):
 
         return parser
 
+    def isBlacklisted (self, url):
+        for k, v in self.blacklist.items():
+            if k.match (url):
+                return v
+        return False
+
     @voice
     async def handleArchive (self, user, args, reply):
         """ Handle the archive command """
 
+        msg = self.isBlacklisted (args.url)
+        if msg:
+            reply (f'{args.url} cannot be queued: {msg}')
+            return
+
         j = Job (args.url, user.name)
         assert j.id not in self.jobs, 'duplicate job id'
         self.jobs[j.id] = j
-- 
cgit v1.2.3