From d15b498505dc0362fbd7e92bf7ba2945cad5a118 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 28 Apr 2018 15:52:01 +0200 Subject: Move header unfolding into Item --- crocoite/browser.py | 22 ++++++++++++++++++++++ crocoite/warc.py | 23 ++--------------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/crocoite/browser.py b/crocoite/browser.py index efe739a..7250b11 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -98,6 +98,28 @@ class Item: raise ValueError ('Cannot fetch request body') return None, False + @property + def requestHeaders (self): + # the response object may contain refined headers, which were + # *actually* sent over the wire + return self._unfoldHeaders (self.response.get ('requestHeaders', self.request['headers'])) + + @property + def responseHeaders (self): + return self._unfoldHeaders (self.response['headers']) + + @staticmethod + def _unfoldHeaders (headers): + """ + A host may send multiple headers using the same key, which Chrome folds + into the same item. Separate those. + """ + items = [] + for k in headers.keys (): + for v in headers[k].split ('\n'): + items.append ((k, v)) + return items + def setRequest (self, req): self.chromeRequest = req diff --git a/crocoite/warc.py b/crocoite/warc.py index 9e7ba04..9c96900 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -126,18 +126,6 @@ class WarcLoader (AccountingSiteLoader): return text[0] return 'No status text available' - @staticmethod - def _unfoldHeaders (headers): - """ - A host may send multiple headers using the same key, which Chrome folds - into the same item. Separate those. - """ - items = [] - for k in headers.keys (): - for v in headers[k].split ('\n'): - items.append ((k, v)) - return items - def _writeRequest (self, item): writer = self.writer @@ -145,16 +133,11 @@ class WarcLoader (AccountingSiteLoader): resp = item.response url = urlsplit (resp['url']) - # overwrite request headers with those actually sent - newReqHeaders = resp.get ('requestHeaders') - if newReqHeaders: - req['headers'] = newReqHeaders - path = url.path if url.query: path += '?' + url.query httpHeaders = StatusAndHeaders('{} {} HTTP/1.1'.format (req['method'], path), - self._unfoldHeaders (req['headers']), protocol='HTTP/1.1', is_http_request=True) + item.requestHeaders, protocol='HTTP/1.1', is_http_request=True) initiator = item.initiator warcHeaders = { 'X-Chrome-Initiator': json.dumps (initiator), @@ -208,10 +191,8 @@ class WarcLoader (AccountingSiteLoader): (item.chromeResponse['timestamp']-item.chromeRequest['timestamp']))), } - - httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'], - self.getStatusText (resp)), self._unfoldHeaders (resp['headers']), + self.getStatusText (resp)), item.responseHeaders, protocol='HTTP/1.1') # Content is saved decompressed and decoded, remove these headers -- cgit v1.2.3