diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-03-25 13:58:47 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-03-25 14:43:25 +0200 |
commit | a21d7332e33a3e47a363004196451721d449e70b (patch) | |
tree | b6f4af4ea0838b49575a7b3ea7ab55f25fd9470d | |
parent | beabea4623484e7d6a9d18219efd9bf321ecc7fe (diff) | |
download | crocoite-a21d7332e33a3e47a363004196451721d449e70b.tar.gz crocoite-a21d7332e33a3e47a363004196451721d449e70b.tar.bz2 crocoite-a21d7332e33a3e47a363004196451721d449e70b.zip |
Move getResponseBody call to Item wrapper
-rw-r--r-- | crocoite/browser.py | 21 | ||||
-rw-r--r-- | crocoite/warc.py | 13 |
2 files changed, 21 insertions, 13 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py index baa0d83..b4ade56 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -24,13 +24,15 @@ Chrome browser interactions. import logging from urllib.parse import urlsplit +from base64 import b64decode class Item: """ Simple wrapper containing Chrome request and response """ - def __init__ (self): + def __init__ (self, tab): + self.tab = tab self.chromeRequest = None self.chromeResponse = None self.chromeFinished = None @@ -58,6 +60,21 @@ class Item: def encodedDataLength (self): return self.chromeFinished['encodedDataLength'] + @property + def body (self): + """ Return response body or None """ + try: + body = self.tab.Network.getResponseBody (requestId=self.id) + rawBody = body['body'] + base64Encoded = body['base64Encoded'] + if base64Encoded: + rawBody = b64decode (rawBody) + else: + rawBody = rawBody.encode ('utf8') + return rawBody + except pychrome.exceptions.CallMethodException: + return None + def setRequest (self, req): self.chromeRequest = req @@ -185,7 +202,7 @@ class SiteLoader: else: self.logger.warn ('request {} already exists, overwriting.'.format (reqId)) - item = Item () + item = Item (self.tab) item.setRequest (kwargs) self.requests[reqId] = item diff --git a/crocoite/warc.py b/crocoite/warc.py index e04bee4..b56d315 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -25,12 +25,10 @@ Classes writing data to WARC files import logging import json from http.server import BaseHTTPRequestHandler -from base64 import b64decode from io import BytesIO from warcio.statusandheaders import StatusAndHeaders from urllib.parse import urlsplit from logging.handlers import BufferingHandler -import pychrome from datetime import datetime from threading import Thread from queue import Queue @@ -188,15 +186,8 @@ class WarcLoader (AccountingSiteLoader): raise ValueError ('body for {} too large {} vs {}'.format (reqId, item.encodedDataLength, self.maxBodySize)) else: - try: - body = self.tab.Network.getResponseBody (requestId=reqId) - rawBody = body['body'] - base64Encoded = body['base64Encoded'] - if base64Encoded: - rawBody = b64decode (rawBody) - else: - rawBody = rawBody.encode ('utf8') - except pychrome.exceptions.CallMethodException: + rawBody = item.body + if rawBody is None: raise ValueError ('no data for {} {} {}'.format (resp['url'], resp['status'], reqId)) return rawBody, base64Encoded |