diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2017-12-22 17:43:52 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2017-12-22 17:43:52 +0100 |
commit | fd279ff3168c91be2ed8a012af6395034475ccf5 (patch) | |
tree | f4a8b55db5f158a4be4cf8c48aa82d944c206595 /crocoite/warc.py | |
parent | bcfbdd9b45b7e872ee77e1366197443d855d8c7c (diff) | |
download | crocoite-fd279ff3168c91be2ed8a012af6395034475ccf5.tar.gz crocoite-fd279ff3168c91be2ed8a012af6395034475ccf5.tar.bz2 crocoite-fd279ff3168c91be2ed8a012af6395034475ccf5.zip |
Add simple stats-keeping SiteLoader
Diffstat (limited to 'crocoite/warc.py')
-rw-r--r-- | crocoite/warc.py | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/crocoite/warc.py b/crocoite/warc.py index 1c844bc..d9afab2 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -24,7 +24,7 @@ Classes writing data to WARC files import logging import json -from .browser import SiteLoader +from .browser import AccountingSiteLoader from . import packageUrl from http.server import BaseHTTPRequestHandler from base64 import b64decode @@ -100,11 +100,11 @@ class WARCLogHandler (BufferingHandler): finally: self.release () -class WarcLoader (SiteLoader): +class WarcLoader (AccountingSiteLoader): def __init__ (self, browser, url, writer, logger=logging.getLogger(__name__), logBuffer=1000, maxBodySize=10*1024*1024): - SiteLoader.__init__ (self, browser, url, logger) + super ().__init__ (browser, url, logger) self.writer = writer self.maxBodySize = maxBodySize self.warcLogger = WARCLogHandler (logBuffer, writer) @@ -113,7 +113,7 @@ class WarcLoader (SiteLoader): def __exit__ (self, exc_type, exc_value, traceback): self.logger.removeHandler (self.warcLogger) self.warcLogger.flush () - return SiteLoader.__exit__ (self, exc_type, exc_value, traceback) + return super ().__exit__ (exc_type, exc_value, traceback) @staticmethod def getStatusText (response): @@ -244,6 +244,8 @@ class WarcLoader (SiteLoader): writer.write_record(record) def loadingFinished (self, item, redirect=False): + super ().loadingFinished (item, redirect) + writer = self.writer req = item.request |