diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-08-04 15:31:12 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-08-04 15:31:12 +0200 |
commit | fabd84cb10beab2b2e5aed7489fc04df9fda7e83 (patch) | |
tree | 6866d445b3efc2c5e98d1eec2c554696f71daa44 /crocoite/warc.py | |
parent | 6a6a7e80dc94b306cda8e5c93a2173b834ff5e3c (diff) | |
download | crocoite-fabd84cb10beab2b2e5aed7489fc04df9fda7e83.tar.gz crocoite-fabd84cb10beab2b2e5aed7489fc04df9fda7e83.tar.bz2 crocoite-fabd84cb10beab2b2e5aed7489fc04df9fda7e83.zip |
Properly handle failure to retrieve request body
Just truncate the WARC record like we do with responses. Also add a few
tests, but they’re not covering the call to getRequestPostData. Not sure
what we have to do here.
Diffstat (limited to 'crocoite/warc.py')
-rw-r--r-- | crocoite/warc.py | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/crocoite/warc.py b/crocoite/warc.py index 32fe5d6..9b97e75 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -84,6 +84,7 @@ class WarcHandler (EventHandler): return record def _writeRequest (self, item): + logger = self.logger.bind (reqId=item.id) req = item.request resp = item.response @@ -97,9 +98,21 @@ class WarcHandler (EventHandler): initiator = item.initiator warcHeaders = { 'X-Chrome-Initiator': json.dumps (initiator), + 'X-Chrome-Request-ID': item.id, 'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (item.chromeRequest['wallTime'])), } - payload, payloadBase64Encoded = item.requestBody + try: + bodyTruncated = None + payload, payloadBase64Encoded = item.requestBody + except ValueError: + # oops, don’t know what went wrong here + bodyTruncated = 'unspecified' + logger.error ('requestBody missing', uuid='ee9adc58-e723-4595-9feb-312a67ead6a0') + + if bodyTruncated: + warcHeaders['WARC-Truncated'] = bodyTruncated + payload = None + if payload: payload = BytesIO (payload) warcHeaders['X-Chrome-Base64Body'] = str (payloadBase64Encoded) @@ -139,6 +152,7 @@ class WarcHandler (EventHandler): 'X-Chrome-Protocol': resp.get ('protocol', ''), 'X-Chrome-FromDiskCache': str (resp.get ('fromDiskCache')), 'X-Chrome-ConnectionReused': str (resp.get ('connectionReused')), + 'X-Chrome-Request-ID': item.id, 'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp ( item.chromeRequest['wallTime']+ (item.chromeResponse['timestamp']-item.chromeRequest['timestamp']))), |