summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-04-28 15:43:01 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-05-04 15:59:50 +0200
commitce888f5b5eb96abd5d575f272f11087bef4cd068 (patch)
tree3651386b413baa523f4b5b9446ffbf8b1b1fda4e
parentcd51a009add3f2f5f8bba89ae4dc663e829a575c (diff)
downloadcrocoite-ce888f5b5eb96abd5d575f272f11087bef4cd068.tar.gz
crocoite-ce888f5b5eb96abd5d575f272f11087bef4cd068.tar.bz2
crocoite-ce888f5b5eb96abd5d575f272f11087bef4cd068.zip
Fetch request POST body
If there is any and it was not included in the response already.
-rw-r--r--crocoite/browser.py16
-rw-r--r--crocoite/warc.py12
2 files changed, 20 insertions, 8 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py
index 0840374..efe739a 100644
--- a/crocoite/browser.py
+++ b/crocoite/browser.py
@@ -82,7 +82,21 @@ class Item:
rawBody = rawBody.encode ('utf8')
return rawBody, base64Encoded
except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException):
- return None, False
+ raise ValueError ('Cannot fetch response body')
+
+ @property
+ def requestBody (self):
+ """ Get request/POST body """
+ req = self.request
+ postData = req.get ('postData')
+ if postData:
+ return postData.encode ('utf8'), False
+ elif req.get ('hasPostData', False):
+ try:
+ return b64decode (self.tab.Network.getRequestPostData (requestId=self.id, _timeout=60)['postData']), True
+ except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException):
+ raise ValueError ('Cannot fetch request body')
+ return None, False
def setRequest (self, req):
self.chromeRequest = req
diff --git a/crocoite/warc.py b/crocoite/warc.py
index 8664e5a..9e7ba04 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -150,9 +150,6 @@ class WarcLoader (AccountingSiteLoader):
if newReqHeaders:
req['headers'] = newReqHeaders
- postData = req.get ('postData')
- if postData:
- postData = BytesIO (postData.encode ('utf8'))
path = url.path
if url.query:
path += '?' + url.query
@@ -163,8 +160,12 @@ class WarcLoader (AccountingSiteLoader):
'X-Chrome-Initiator': json.dumps (initiator),
'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (item.chromeRequest['wallTime'])),
}
+ payload, payloadBase64Encoded = item.requestBody
+ if payload:
+ payload = BytesIO (payload)
+ warcHeaders['X-Chrome-Base64Body'] = str (payloadBase64Encoded)
record = writer.create_warc_record(req['url'], 'request',
- payload=postData, http_headers=httpHeaders,
+ payload=payload, http_headers=httpHeaders,
warc_headers_dict=warcHeaders)
writer.write_record(record)
@@ -187,9 +188,6 @@ class WarcLoader (AccountingSiteLoader):
item.encodedDataLength, self.maxBodySize))
else:
rawBody, base64Encoded = item.body
- if rawBody is None:
- raise ValueError ('no data for {} {} {}'.format (resp['url'],
- resp['status'], reqId))
return rawBody, base64Encoded
def _writeResponse (self, item, redirect, concurrentTo, rawBody, base64Encoded):