From d15b498505dc0362fbd7e92bf7ba2945cad5a118 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 28 Apr 2018 15:52:01 +0200 Subject: Move header unfolding into Item --- crocoite/warc.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) (limited to 'crocoite/warc.py') diff --git a/crocoite/warc.py b/crocoite/warc.py index 9e7ba04..9c96900 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -126,18 +126,6 @@ class WarcLoader (AccountingSiteLoader): return text[0] return 'No status text available' - @staticmethod - def _unfoldHeaders (headers): - """ - A host may send multiple headers using the same key, which Chrome folds - into the same item. Separate those. - """ - items = [] - for k in headers.keys (): - for v in headers[k].split ('\n'): - items.append ((k, v)) - return items - def _writeRequest (self, item): writer = self.writer @@ -145,16 +133,11 @@ class WarcLoader (AccountingSiteLoader): resp = item.response url = urlsplit (resp['url']) - # overwrite request headers with those actually sent - newReqHeaders = resp.get ('requestHeaders') - if newReqHeaders: - req['headers'] = newReqHeaders - path = url.path if url.query: path += '?' + url.query httpHeaders = StatusAndHeaders('{} {} HTTP/1.1'.format (req['method'], path), - self._unfoldHeaders (req['headers']), protocol='HTTP/1.1', is_http_request=True) + item.requestHeaders, protocol='HTTP/1.1', is_http_request=True) initiator = item.initiator warcHeaders = { 'X-Chrome-Initiator': json.dumps (initiator), @@ -208,10 +191,8 @@ class WarcLoader (AccountingSiteLoader): (item.chromeResponse['timestamp']-item.chromeRequest['timestamp']))), } - - httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'], - self.getStatusText (resp)), self._unfoldHeaders (resp['headers']), + self.getStatusText (resp)), item.responseHeaders, protocol='HTTP/1.1') # Content is saved decompressed and decoded, remove these headers -- cgit v1.2.3