diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-04-28 15:52:01 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-05-04 15:59:50 +0200 |
commit | d15b498505dc0362fbd7e92bf7ba2945cad5a118 (patch) | |
tree | c2f7c046220ab5a6d07d98c75d83e6e3a2d7ffcd | |
parent | ce888f5b5eb96abd5d575f272f11087bef4cd068 (diff) | |
download | crocoite-d15b498505dc0362fbd7e92bf7ba2945cad5a118.tar.gz crocoite-d15b498505dc0362fbd7e92bf7ba2945cad5a118.tar.bz2 crocoite-d15b498505dc0362fbd7e92bf7ba2945cad5a118.zip |
Move header unfolding into Item
-rw-r--r-- | crocoite/browser.py | 22 | ||||
-rw-r--r-- | crocoite/warc.py | 23 |
2 files changed, 24 insertions, 21 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py index efe739a..7250b11 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -98,6 +98,28 @@ class Item: raise ValueError ('Cannot fetch request body') return None, False + @property + def requestHeaders (self): + # the response object may contain refined headers, which were + # *actually* sent over the wire + return self._unfoldHeaders (self.response.get ('requestHeaders', self.request['headers'])) + + @property + def responseHeaders (self): + return self._unfoldHeaders (self.response['headers']) + + @staticmethod + def _unfoldHeaders (headers): + """ + A host may send multiple headers using the same key, which Chrome folds + into the same item. Separate those. + """ + items = [] + for k in headers.keys (): + for v in headers[k].split ('\n'): + items.append ((k, v)) + return items + def setRequest (self, req): self.chromeRequest = req diff --git a/crocoite/warc.py b/crocoite/warc.py index 9e7ba04..9c96900 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -126,18 +126,6 @@ class WarcLoader (AccountingSiteLoader): return text[0] return 'No status text available' - @staticmethod - def _unfoldHeaders (headers): - """ - A host may send multiple headers using the same key, which Chrome folds - into the same item. Separate those. - """ - items = [] - for k in headers.keys (): - for v in headers[k].split ('\n'): - items.append ((k, v)) - return items - def _writeRequest (self, item): writer = self.writer @@ -145,16 +133,11 @@ class WarcLoader (AccountingSiteLoader): resp = item.response url = urlsplit (resp['url']) - # overwrite request headers with those actually sent - newReqHeaders = resp.get ('requestHeaders') - if newReqHeaders: - req['headers'] = newReqHeaders - path = url.path if url.query: path += '?' + url.query httpHeaders = StatusAndHeaders('{} {} HTTP/1.1'.format (req['method'], path), - self._unfoldHeaders (req['headers']), protocol='HTTP/1.1', is_http_request=True) + item.requestHeaders, protocol='HTTP/1.1', is_http_request=True) initiator = item.initiator warcHeaders = { 'X-Chrome-Initiator': json.dumps (initiator), @@ -208,10 +191,8 @@ class WarcLoader (AccountingSiteLoader): (item.chromeResponse['timestamp']-item.chromeRequest['timestamp']))), } - - httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'], - self.getStatusText (resp)), self._unfoldHeaders (resp['headers']), + self.getStatusText (resp)), item.responseHeaders, protocol='HTTP/1.1') # Content is saved decompressed and decoded, remove these headers |