diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-08-21 13:46:08 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-09-25 16:15:13 +0200 |
commit | fd383fd5f5bac0a4cebbacf7e1ffccfd0be04e50 (patch) | |
tree | eb3823a81ca6fbca3f133ed24732e94504d1c0bf /crocoite/controller.py | |
parent | 53e4df3fe732417988532e5b3d8b4dc7e781a3df (diff) | |
download | crocoite-fd383fd5f5bac0a4cebbacf7e1ffccfd0be04e50.tar.gz crocoite-fd383fd5f5bac0a4cebbacf7e1ffccfd0be04e50.tar.bz2 crocoite-fd383fd5f5bac0a4cebbacf7e1ffccfd0be04e50.zip |
Log extracted links
Diffstat (limited to 'crocoite/controller.py')
-rw-r--r-- | crocoite/controller.py | 23 |
1 files changed, 23 insertions, 0 deletions
diff --git a/crocoite/controller.py b/crocoite/controller.py index 9dae96f..01edc44 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -69,6 +69,29 @@ class StatsHandler (EventHandler): elif isinstance (item, BrowserCrashed): self.stats['crashed'] += 1 +from .behavior import ExtractLinksEvent +from itertools import islice + +class LogHandler (EventHandler): + """ Handle items by logging information about them """ + + __slots__ = ('logger') + + def __init__ (self, logger): + self.logger = logger.bind (context=type (self).__name__) + + def push (self, item): + if isinstance (item, ExtractLinksEvent): + # limit number of links per message, so json blob won’t get too big + it = iter (item.links) + limit = 100 + while True: + limitlinks = list (islice (it, 0, limit)) + if not limitlinks: + break + self.logger.info ('extracted links', context=type (item).__name__, + uuid='8ee5e9c9-1130-4c5c-88ff-718508546e0c', links=limitlinks) + import time, platform from . import behavior as cbehavior |