summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crocoite/browser.py21
-rw-r--r--crocoite/warc.py13
2 files changed, 21 insertions, 13 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py
index baa0d83..b4ade56 100644
--- a/crocoite/browser.py
+++ b/crocoite/browser.py
@@ -24,13 +24,15 @@ Chrome browser interactions.
import logging
from urllib.parse import urlsplit
+from base64 import b64decode
class Item:
"""
Simple wrapper containing Chrome request and response
"""
- def __init__ (self):
+ def __init__ (self, tab):
+ self.tab = tab
self.chromeRequest = None
self.chromeResponse = None
self.chromeFinished = None
@@ -58,6 +60,21 @@ class Item:
def encodedDataLength (self):
return self.chromeFinished['encodedDataLength']
+ @property
+ def body (self):
+ """ Return response body or None """
+ try:
+ body = self.tab.Network.getResponseBody (requestId=self.id)
+ rawBody = body['body']
+ base64Encoded = body['base64Encoded']
+ if base64Encoded:
+ rawBody = b64decode (rawBody)
+ else:
+ rawBody = rawBody.encode ('utf8')
+ return rawBody
+ except pychrome.exceptions.CallMethodException:
+ return None
+
def setRequest (self, req):
self.chromeRequest = req
@@ -185,7 +202,7 @@ class SiteLoader:
else:
self.logger.warn ('request {} already exists, overwriting.'.format (reqId))
- item = Item ()
+ item = Item (self.tab)
item.setRequest (kwargs)
self.requests[reqId] = item
diff --git a/crocoite/warc.py b/crocoite/warc.py
index e04bee4..b56d315 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -25,12 +25,10 @@ Classes writing data to WARC files
import logging
import json
from http.server import BaseHTTPRequestHandler
-from base64 import b64decode
from io import BytesIO
from warcio.statusandheaders import StatusAndHeaders
from urllib.parse import urlsplit
from logging.handlers import BufferingHandler
-import pychrome
from datetime import datetime
from threading import Thread
from queue import Queue
@@ -188,15 +186,8 @@ class WarcLoader (AccountingSiteLoader):
raise ValueError ('body for {} too large {} vs {}'.format (reqId,
item.encodedDataLength, self.maxBodySize))
else:
- try:
- body = self.tab.Network.getResponseBody (requestId=reqId)
- rawBody = body['body']
- base64Encoded = body['base64Encoded']
- if base64Encoded:
- rawBody = b64decode (rawBody)
- else:
- rawBody = rawBody.encode ('utf8')
- except pychrome.exceptions.CallMethodException:
+ rawBody = item.body
+ if rawBody is None:
raise ValueError ('no data for {} {} {}'.format (resp['url'],
resp['status'], reqId))
return rawBody, base64Encoded