summaryrefslogtreecommitdiff
path: root/crocoite/warc.py
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-08-04 15:31:12 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-08-04 15:31:12 +0200
commitfabd84cb10beab2b2e5aed7489fc04df9fda7e83 (patch)
tree6866d445b3efc2c5e98d1eec2c554696f71daa44 /crocoite/warc.py
parent6a6a7e80dc94b306cda8e5c93a2173b834ff5e3c (diff)
downloadcrocoite-fabd84cb10beab2b2e5aed7489fc04df9fda7e83.tar.gz
crocoite-fabd84cb10beab2b2e5aed7489fc04df9fda7e83.tar.bz2
crocoite-fabd84cb10beab2b2e5aed7489fc04df9fda7e83.zip
Properly handle failure to retrieve request body
Just truncate the WARC record like we do with responses. Also add a few tests, but they’re not covering the call to getRequestPostData. Not sure what we have to do here.
Diffstat (limited to 'crocoite/warc.py')
-rw-r--r--crocoite/warc.py16
1 files changed, 15 insertions, 1 deletions
diff --git a/crocoite/warc.py b/crocoite/warc.py
index 32fe5d6..9b97e75 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -84,6 +84,7 @@ class WarcHandler (EventHandler):
return record
def _writeRequest (self, item):
+ logger = self.logger.bind (reqId=item.id)
req = item.request
resp = item.response
@@ -97,9 +98,21 @@ class WarcHandler (EventHandler):
initiator = item.initiator
warcHeaders = {
'X-Chrome-Initiator': json.dumps (initiator),
+ 'X-Chrome-Request-ID': item.id,
'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (item.chromeRequest['wallTime'])),
}
- payload, payloadBase64Encoded = item.requestBody
+ try:
+ bodyTruncated = None
+ payload, payloadBase64Encoded = item.requestBody
+ except ValueError:
+ # oops, don’t know what went wrong here
+ bodyTruncated = 'unspecified'
+ logger.error ('requestBody missing', uuid='ee9adc58-e723-4595-9feb-312a67ead6a0')
+
+ if bodyTruncated:
+ warcHeaders['WARC-Truncated'] = bodyTruncated
+ payload = None
+
if payload:
payload = BytesIO (payload)
warcHeaders['X-Chrome-Base64Body'] = str (payloadBase64Encoded)
@@ -139,6 +152,7 @@ class WarcHandler (EventHandler):
'X-Chrome-Protocol': resp.get ('protocol', ''),
'X-Chrome-FromDiskCache': str (resp.get ('fromDiskCache')),
'X-Chrome-ConnectionReused': str (resp.get ('connectionReused')),
+ 'X-Chrome-Request-ID': item.id,
'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (
item.chromeRequest['wallTime']+
(item.chromeResponse['timestamp']-item.chromeRequest['timestamp']))),