summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.rst1
-rw-r--r--crocoite/warc.py20
2 files changed, 13 insertions, 8 deletions
diff --git a/README.rst b/README.rst
index 3d5af5f..145477f 100644
--- a/README.rst
+++ b/README.rst
@@ -60,6 +60,7 @@ Caveats
won’t work. Example: weather.com.
- Range requests (Range: bytes=1-100) are captured as-is, making playback
difficult
+- Content body of HTTP redirects cannot be retrived due to race condition
Most of these issues can be worked around by using the DOM snapshot, which is
also saved. This causes its own set of issues though:
diff --git a/crocoite/warc.py b/crocoite/warc.py
index 252e8cb..8e443fd 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -136,9 +136,16 @@ class WarcLoader (SiteLoader):
rawBody = b''
base64Encoded = False
- try:
+ if redirect:
+ # redirects reuse the same request, thus we cannot safely retrieve
+ # the body (i.e getResponseBody may return the new location’s body)
+ pass
+ elif item.encodedDataLength > self.maxBodySize:
# check body size first, since we’re loading everything into memory
- if item.encodedDataLength < self.maxBodySize:
+ self.logger.error ('body for {} too large {} vs {}'.format (reqId,
+ item.encodedDataLength, self.maxBodySize))
+ else:
+ try:
body = self.tab.Network.getResponseBody (requestId=reqId)
rawBody = body['body']
base64Encoded = body['base64Encoded']
@@ -147,12 +154,9 @@ class WarcLoader (SiteLoader):
warcHeaders['X-Chrome-Base64Body'] = str (True)
else:
rawBody = rawBody.encode ('utf8')
- else:
- self.logger.error ('body for {} too large {} vs {}'.format (reqId,
- item.encodedDataLength, self.maxBodySize))
- except pychrome.exceptions.CallMethodException:
- self.logger.error ('no data for {} {} {}'.format (resp['url'],
- resp['status'], reqId))
+ except pychrome.exceptions.CallMethodException:
+ self.logger.error ('no data for {} {} {}'.format (resp['url'],
+ resp['status'], reqId))
httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'],
self.getStatusText (resp)), resp['headers'], protocol='HTTP/1.1')