diff options
Diffstat (limited to 'crocoite')
| -rw-r--r-- | crocoite/browser.py | 16 | ||||
| -rw-r--r-- | crocoite/warc.py | 12 | 
2 files changed, 20 insertions, 8 deletions
| diff --git a/crocoite/browser.py b/crocoite/browser.py index 0840374..efe739a 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -82,7 +82,21 @@ class Item:                  rawBody = rawBody.encode ('utf8')              return rawBody, base64Encoded          except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException): -            return None, False +            raise ValueError ('Cannot fetch response body') + +    @property +    def requestBody (self): +        """ Get request/POST body """ +        req = self.request +        postData = req.get ('postData') +        if postData: +            return postData.encode ('utf8'), False +        elif req.get ('hasPostData', False): +            try: +                return b64decode (self.tab.Network.getRequestPostData (requestId=self.id, _timeout=60)['postData']), True +            except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException): +                raise ValueError ('Cannot fetch request body') +        return None, False      def setRequest (self, req):          self.chromeRequest = req diff --git a/crocoite/warc.py b/crocoite/warc.py index 8664e5a..9e7ba04 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -150,9 +150,6 @@ class WarcLoader (AccountingSiteLoader):          if newReqHeaders:              req['headers'] = newReqHeaders -        postData = req.get ('postData') -        if postData: -            postData = BytesIO (postData.encode ('utf8'))          path = url.path          if url.query:              path += '?' + url.query @@ -163,8 +160,12 @@ class WarcLoader (AccountingSiteLoader):                  'X-Chrome-Initiator': json.dumps (initiator),                  'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (item.chromeRequest['wallTime'])),                  } +        payload, payloadBase64Encoded = item.requestBody +        if payload: +            payload = BytesIO (payload) +            warcHeaders['X-Chrome-Base64Body'] = str (payloadBase64Encoded)          record = writer.create_warc_record(req['url'], 'request', -                payload=postData, http_headers=httpHeaders, +                payload=payload, http_headers=httpHeaders,                  warc_headers_dict=warcHeaders)          writer.write_record(record) @@ -187,9 +188,6 @@ class WarcLoader (AccountingSiteLoader):                      item.encodedDataLength, self.maxBodySize))          else:              rawBody, base64Encoded = item.body -            if rawBody is None: -                raise ValueError ('no data for {} {} {}'.format (resp['url'], -                    resp['status'], reqId))          return rawBody, base64Encoded      def _writeResponse (self, item, redirect, concurrentTo, rawBody, base64Encoded): | 
