From 958563a3602780b48599c27acf212139c2e6904d Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 14 Oct 2018 12:41:37 +0200 Subject: irc: Add PoC dashboard Using websockets, vue and bulma. --- crocoite/controller.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'crocoite/controller.py') diff --git a/crocoite/controller.py b/crocoite/controller.py index 8916726..45d9442 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -337,9 +337,9 @@ class RecursiveController: prefix=formatPrefix (self.prefix), suffix='.warc.gz', delete=False) destpath = os.path.join (self.output, os.path.basename (dest.name)) - logger = self.logger.bind (url=url, destfile=destpath) + logger = self.logger.bind (url=url) command = list (map (formatCommand, self.command)) - logger.info ('fetch', uuid='1680f384-744c-4b8a-815b-7346e632e8db', command=command) + logger.info ('fetch', uuid='1680f384-744c-4b8a-815b-7346e632e8db', command=command, destfile=destpath) process = await asyncio.create_subprocess_exec (*command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.DEVNULL, stdin=asyncio.subprocess.DEVNULL, start_new_session=True) @@ -370,22 +370,28 @@ class RecursiveController: self._quit = True async def run (self): - self.have = set () - self.pending = set ([self.url]) - - while self.pending and not self._quit: + def log (): self.logger.info ('recursing', uuid='5b8498e4-868d-413c-a67e-004516b8452c', pending=len (self.pending), have=len (self.have), running=len (self.running)) + self.have = set () + self.pending = set ([self.url]) + + while self.pending: # since pending is a set this picks a random item, which is fine u = self.pending.pop () self.have.add (u) t = asyncio.ensure_future (self.fetch (u)) self.running.add (t) + + log () + if len (self.running) >= self.concurrency or not self.pending: done, pending = await asyncio.wait (self.running, return_when=asyncio.FIRST_COMPLETED) self.running.difference_update (done) + log () + -- cgit v1.2.3