From fabd84cb10beab2b2e5aed7489fc04df9fda7e83 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 4 Aug 2018 15:31:12 +0200 Subject: Properly handle failure to retrieve request body MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Just truncate the WARC record like we do with responses. Also add a few tests, but they’re not covering the call to getRequestPostData. Not sure what we have to do here. --- crocoite/warc.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'crocoite/warc.py') diff --git a/crocoite/warc.py b/crocoite/warc.py index 32fe5d6..9b97e75 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -84,6 +84,7 @@ class WarcHandler (EventHandler): return record def _writeRequest (self, item): + logger = self.logger.bind (reqId=item.id) req = item.request resp = item.response @@ -97,9 +98,21 @@ class WarcHandler (EventHandler): initiator = item.initiator warcHeaders = { 'X-Chrome-Initiator': json.dumps (initiator), + 'X-Chrome-Request-ID': item.id, 'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (item.chromeRequest['wallTime'])), } - payload, payloadBase64Encoded = item.requestBody + try: + bodyTruncated = None + payload, payloadBase64Encoded = item.requestBody + except ValueError: + # oops, don’t know what went wrong here + bodyTruncated = 'unspecified' + logger.error ('requestBody missing', uuid='ee9adc58-e723-4595-9feb-312a67ead6a0') + + if bodyTruncated: + warcHeaders['WARC-Truncated'] = bodyTruncated + payload = None + if payload: payload = BytesIO (payload) warcHeaders['X-Chrome-Base64Body'] = str (payloadBase64Encoded) @@ -139,6 +152,7 @@ class WarcHandler (EventHandler): 'X-Chrome-Protocol': resp.get ('protocol', ''), 'X-Chrome-FromDiskCache': str (resp.get ('fromDiskCache')), 'X-Chrome-ConnectionReused': str (resp.get ('connectionReused')), + 'X-Chrome-Request-ID': item.id, 'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp ( item.chromeRequest['wallTime']+ (item.chromeResponse['timestamp']-item.chromeRequest['timestamp']))), -- cgit v1.2.3