summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2017-12-20 11:29:34 +0100
committerLars-Dominik Braun <lars@6xq.net>2017-12-20 11:29:34 +0100
commit563a016fd0ca705f61abd7ea67a6da411b9115f2 (patch)
treea6c05a2d8959152ac254b5be5fc0f698b7631a8d
parentd20528cf1f5ee85162c449d9c74c2c862ab3d4ff (diff)
downloadcrocoite-563a016fd0ca705f61abd7ea67a6da411b9115f2.zip
crocoite-563a016fd0ca705f61abd7ea67a6da411b9115f2.tar.gz
crocoite-563a016fd0ca705f61abd7ea67a6da411b9115f2.tar.bz2
Fix HTTP headers using the same key more than once
This is an undocumented DevTools feature.
-rw-r--r--crocoite/warc.py17
1 files changed, 15 insertions, 2 deletions
diff --git a/crocoite/warc.py b/crocoite/warc.py
index 92ae601..1e3ea06 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -125,6 +125,18 @@ class WarcLoader (SiteLoader):
return text[0]
return 'No status text available'
+ @staticmethod
+ def _unfoldHeaders (headers):
+ """
+ A host may send multiple headers using the same key, which Chrome folds
+ into the same item. Separate those.
+ """
+ items = []
+ for k in headers.keys ():
+ for v in headers[k].split ('\n'):
+ items.append ((k, v))
+ return items
+
def loadingFinished (self, item, redirect=False):
writer = self.writer
@@ -145,7 +157,7 @@ class WarcLoader (SiteLoader):
if url.query:
path += '?' + url.query
httpHeaders = StatusAndHeaders('{} {} HTTP/1.1'.format (req['method'], path),
- req['headers'], protocol='HTTP/1.1', is_http_request=True)
+ self._unfoldHeaders (req['headers']), protocol='HTTP/1.1', is_http_request=True)
initiator = item.initiator
warcHeaders = {
'X-Chrome-Initiator': json.dumps (initiator),
@@ -194,7 +206,8 @@ class WarcLoader (SiteLoader):
resp['status'], reqId))
httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'],
- self.getStatusText (resp)), resp['headers'], protocol='HTTP/1.1')
+ self.getStatusText (resp)), self._unfoldHeaders (resp['headers']),
+ protocol='HTTP/1.1')
# Content is saved decompressed and decoded, remove these headers
blacklistedHeaders = {'transfer-encoding', 'content-encoding'}