diff options
| author | Lars-Dominik Braun <lars@6xq.net> | 2018-04-28 15:43:01 +0200 | 
|---|---|---|
| committer | Lars-Dominik Braun <lars@6xq.net> | 2018-05-04 15:59:50 +0200 | 
| commit | ce888f5b5eb96abd5d575f272f11087bef4cd068 (patch) | |
| tree | 3651386b413baa523f4b5b9446ffbf8b1b1fda4e | |
| parent | cd51a009add3f2f5f8bba89ae4dc663e829a575c (diff) | |
| download | crocoite-ce888f5b5eb96abd5d575f272f11087bef4cd068.tar.gz crocoite-ce888f5b5eb96abd5d575f272f11087bef4cd068.tar.bz2 crocoite-ce888f5b5eb96abd5d575f272f11087bef4cd068.zip | |
Fetch request POST body
If there is any and it was not included in the response already.
| -rw-r--r-- | crocoite/browser.py | 16 | ||||
| -rw-r--r-- | crocoite/warc.py | 12 | 
2 files changed, 20 insertions, 8 deletions
| diff --git a/crocoite/browser.py b/crocoite/browser.py index 0840374..efe739a 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -82,7 +82,21 @@ class Item:                  rawBody = rawBody.encode ('utf8')              return rawBody, base64Encoded          except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException): -            return None, False +            raise ValueError ('Cannot fetch response body') + +    @property +    def requestBody (self): +        """ Get request/POST body """ +        req = self.request +        postData = req.get ('postData') +        if postData: +            return postData.encode ('utf8'), False +        elif req.get ('hasPostData', False): +            try: +                return b64decode (self.tab.Network.getRequestPostData (requestId=self.id, _timeout=60)['postData']), True +            except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException): +                raise ValueError ('Cannot fetch request body') +        return None, False      def setRequest (self, req):          self.chromeRequest = req diff --git a/crocoite/warc.py b/crocoite/warc.py index 8664e5a..9e7ba04 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -150,9 +150,6 @@ class WarcLoader (AccountingSiteLoader):          if newReqHeaders:              req['headers'] = newReqHeaders -        postData = req.get ('postData') -        if postData: -            postData = BytesIO (postData.encode ('utf8'))          path = url.path          if url.query:              path += '?' + url.query @@ -163,8 +160,12 @@ class WarcLoader (AccountingSiteLoader):                  'X-Chrome-Initiator': json.dumps (initiator),                  'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (item.chromeRequest['wallTime'])),                  } +        payload, payloadBase64Encoded = item.requestBody +        if payload: +            payload = BytesIO (payload) +            warcHeaders['X-Chrome-Base64Body'] = str (payloadBase64Encoded)          record = writer.create_warc_record(req['url'], 'request', -                payload=postData, http_headers=httpHeaders, +                payload=payload, http_headers=httpHeaders,                  warc_headers_dict=warcHeaders)          writer.write_record(record) @@ -187,9 +188,6 @@ class WarcLoader (AccountingSiteLoader):                      item.encodedDataLength, self.maxBodySize))          else:              rawBody, base64Encoded = item.body -            if rawBody is None: -                raise ValueError ('no data for {} {} {}'.format (resp['url'], -                    resp['status'], reqId))          return rawBody, base64Encoded      def _writeResponse (self, item, redirect, concurrentTo, rawBody, base64Encoded): | 
