diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-04-28 15:43:01 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-05-04 15:59:50 +0200 |
commit | ce888f5b5eb96abd5d575f272f11087bef4cd068 (patch) | |
tree | 3651386b413baa523f4b5b9446ffbf8b1b1fda4e | |
parent | cd51a009add3f2f5f8bba89ae4dc663e829a575c (diff) | |
download | crocoite-ce888f5b5eb96abd5d575f272f11087bef4cd068.tar.gz crocoite-ce888f5b5eb96abd5d575f272f11087bef4cd068.tar.bz2 crocoite-ce888f5b5eb96abd5d575f272f11087bef4cd068.zip |
Fetch request POST body
If there is any and it was not included in the response already.
-rw-r--r-- | crocoite/browser.py | 16 | ||||
-rw-r--r-- | crocoite/warc.py | 12 |
2 files changed, 20 insertions, 8 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py index 0840374..efe739a 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -82,7 +82,21 @@ class Item: rawBody = rawBody.encode ('utf8') return rawBody, base64Encoded except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException): - return None, False + raise ValueError ('Cannot fetch response body') + + @property + def requestBody (self): + """ Get request/POST body """ + req = self.request + postData = req.get ('postData') + if postData: + return postData.encode ('utf8'), False + elif req.get ('hasPostData', False): + try: + return b64decode (self.tab.Network.getRequestPostData (requestId=self.id, _timeout=60)['postData']), True + except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException): + raise ValueError ('Cannot fetch request body') + return None, False def setRequest (self, req): self.chromeRequest = req diff --git a/crocoite/warc.py b/crocoite/warc.py index 8664e5a..9e7ba04 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -150,9 +150,6 @@ class WarcLoader (AccountingSiteLoader): if newReqHeaders: req['headers'] = newReqHeaders - postData = req.get ('postData') - if postData: - postData = BytesIO (postData.encode ('utf8')) path = url.path if url.query: path += '?' + url.query @@ -163,8 +160,12 @@ class WarcLoader (AccountingSiteLoader): 'X-Chrome-Initiator': json.dumps (initiator), 'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (item.chromeRequest['wallTime'])), } + payload, payloadBase64Encoded = item.requestBody + if payload: + payload = BytesIO (payload) + warcHeaders['X-Chrome-Base64Body'] = str (payloadBase64Encoded) record = writer.create_warc_record(req['url'], 'request', - payload=postData, http_headers=httpHeaders, + payload=payload, http_headers=httpHeaders, warc_headers_dict=warcHeaders) writer.write_record(record) @@ -187,9 +188,6 @@ class WarcLoader (AccountingSiteLoader): item.encodedDataLength, self.maxBodySize)) else: rawBody, base64Encoded = item.body - if rawBody is None: - raise ValueError ('no data for {} {} {}'.format (resp['url'], - resp['status'], reqId)) return rawBody, base64Encoded def _writeResponse (self, item, redirect, concurrentTo, rawBody, base64Encoded): |