diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2017-12-20 11:29:34 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2017-12-20 11:29:34 +0100 |
commit | 563a016fd0ca705f61abd7ea67a6da411b9115f2 (patch) | |
tree | a6c05a2d8959152ac254b5be5fc0f698b7631a8d | |
parent | d20528cf1f5ee85162c449d9c74c2c862ab3d4ff (diff) | |
download | crocoite-563a016fd0ca705f61abd7ea67a6da411b9115f2.tar.gz crocoite-563a016fd0ca705f61abd7ea67a6da411b9115f2.tar.bz2 crocoite-563a016fd0ca705f61abd7ea67a6da411b9115f2.zip |
Fix HTTP headers using the same key more than once
This is an undocumented DevTools feature.
-rw-r--r-- | crocoite/warc.py | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/crocoite/warc.py b/crocoite/warc.py index 92ae601..1e3ea06 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -125,6 +125,18 @@ class WarcLoader (SiteLoader): return text[0] return 'No status text available' + @staticmethod + def _unfoldHeaders (headers): + """ + A host may send multiple headers using the same key, which Chrome folds + into the same item. Separate those. + """ + items = [] + for k in headers.keys (): + for v in headers[k].split ('\n'): + items.append ((k, v)) + return items + def loadingFinished (self, item, redirect=False): writer = self.writer @@ -145,7 +157,7 @@ class WarcLoader (SiteLoader): if url.query: path += '?' + url.query httpHeaders = StatusAndHeaders('{} {} HTTP/1.1'.format (req['method'], path), - req['headers'], protocol='HTTP/1.1', is_http_request=True) + self._unfoldHeaders (req['headers']), protocol='HTTP/1.1', is_http_request=True) initiator = item.initiator warcHeaders = { 'X-Chrome-Initiator': json.dumps (initiator), @@ -194,7 +206,8 @@ class WarcLoader (SiteLoader): resp['status'], reqId)) httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'], - self.getStatusText (resp)), resp['headers'], protocol='HTTP/1.1') + self.getStatusText (resp)), self._unfoldHeaders (resp['headers']), + protocol='HTTP/1.1') # Content is saved decompressed and decoded, remove these headers blacklistedHeaders = {'transfer-encoding', 'content-encoding'} |