From ce888f5b5eb96abd5d575f272f11087bef4cd068 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 28 Apr 2018 15:43:01 +0200 Subject: Fetch request POST body If there is any and it was not included in the response already. --- crocoite/browser.py | 16 +++++++++++++++- crocoite/warc.py | 12 +++++------- 2 files changed, 20 insertions(+), 8 deletions(-) (limited to 'crocoite') diff --git a/crocoite/browser.py b/crocoite/browser.py index 0840374..efe739a 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -82,7 +82,21 @@ class Item: rawBody = rawBody.encode ('utf8') return rawBody, base64Encoded except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException): - return None, False + raise ValueError ('Cannot fetch response body') + + @property + def requestBody (self): + """ Get request/POST body """ + req = self.request + postData = req.get ('postData') + if postData: + return postData.encode ('utf8'), False + elif req.get ('hasPostData', False): + try: + return b64decode (self.tab.Network.getRequestPostData (requestId=self.id, _timeout=60)['postData']), True + except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException): + raise ValueError ('Cannot fetch request body') + return None, False def setRequest (self, req): self.chromeRequest = req diff --git a/crocoite/warc.py b/crocoite/warc.py index 8664e5a..9e7ba04 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -150,9 +150,6 @@ class WarcLoader (AccountingSiteLoader): if newReqHeaders: req['headers'] = newReqHeaders - postData = req.get ('postData') - if postData: - postData = BytesIO (postData.encode ('utf8')) path = url.path if url.query: path += '?' + url.query @@ -163,8 +160,12 @@ class WarcLoader (AccountingSiteLoader): 'X-Chrome-Initiator': json.dumps (initiator), 'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (item.chromeRequest['wallTime'])), } + payload, payloadBase64Encoded = item.requestBody + if payload: + payload = BytesIO (payload) + warcHeaders['X-Chrome-Base64Body'] = str (payloadBase64Encoded) record = writer.create_warc_record(req['url'], 'request', - payload=postData, http_headers=httpHeaders, + payload=payload, http_headers=httpHeaders, warc_headers_dict=warcHeaders) writer.write_record(record) @@ -187,9 +188,6 @@ class WarcLoader (AccountingSiteLoader): item.encodedDataLength, self.maxBodySize)) else: rawBody, base64Encoded = item.body - if rawBody is None: - raise ValueError ('no data for {} {} {}'.format (resp['url'], - resp['status'], reqId)) return rawBody, base64Encoded def _writeResponse (self, item, redirect, concurrentTo, rawBody, base64Encoded): -- cgit v1.2.3