summaryrefslogtreecommitdiff
path: root/crocoite
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-04-28 15:52:01 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-05-04 15:59:50 +0200
commitd15b498505dc0362fbd7e92bf7ba2945cad5a118 (patch)
treec2f7c046220ab5a6d07d98c75d83e6e3a2d7ffcd /crocoite
parentce888f5b5eb96abd5d575f272f11087bef4cd068 (diff)
downloadcrocoite-d15b498505dc0362fbd7e92bf7ba2945cad5a118.tar.gz
crocoite-d15b498505dc0362fbd7e92bf7ba2945cad5a118.tar.bz2
crocoite-d15b498505dc0362fbd7e92bf7ba2945cad5a118.zip
Move header unfolding into Item
Diffstat (limited to 'crocoite')
-rw-r--r--crocoite/browser.py22
-rw-r--r--crocoite/warc.py23
2 files changed, 24 insertions, 21 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py
index efe739a..7250b11 100644
--- a/crocoite/browser.py
+++ b/crocoite/browser.py
@@ -98,6 +98,28 @@ class Item:
raise ValueError ('Cannot fetch request body')
return None, False
+ @property
+ def requestHeaders (self):
+ # the response object may contain refined headers, which were
+ # *actually* sent over the wire
+ return self._unfoldHeaders (self.response.get ('requestHeaders', self.request['headers']))
+
+ @property
+ def responseHeaders (self):
+ return self._unfoldHeaders (self.response['headers'])
+
+ @staticmethod
+ def _unfoldHeaders (headers):
+ """
+ A host may send multiple headers using the same key, which Chrome folds
+ into the same item. Separate those.
+ """
+ items = []
+ for k in headers.keys ():
+ for v in headers[k].split ('\n'):
+ items.append ((k, v))
+ return items
+
def setRequest (self, req):
self.chromeRequest = req
diff --git a/crocoite/warc.py b/crocoite/warc.py
index 9e7ba04..9c96900 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -126,18 +126,6 @@ class WarcLoader (AccountingSiteLoader):
return text[0]
return 'No status text available'
- @staticmethod
- def _unfoldHeaders (headers):
- """
- A host may send multiple headers using the same key, which Chrome folds
- into the same item. Separate those.
- """
- items = []
- for k in headers.keys ():
- for v in headers[k].split ('\n'):
- items.append ((k, v))
- return items
-
def _writeRequest (self, item):
writer = self.writer
@@ -145,16 +133,11 @@ class WarcLoader (AccountingSiteLoader):
resp = item.response
url = urlsplit (resp['url'])
- # overwrite request headers with those actually sent
- newReqHeaders = resp.get ('requestHeaders')
- if newReqHeaders:
- req['headers'] = newReqHeaders
-
path = url.path
if url.query:
path += '?' + url.query
httpHeaders = StatusAndHeaders('{} {} HTTP/1.1'.format (req['method'], path),
- self._unfoldHeaders (req['headers']), protocol='HTTP/1.1', is_http_request=True)
+ item.requestHeaders, protocol='HTTP/1.1', is_http_request=True)
initiator = item.initiator
warcHeaders = {
'X-Chrome-Initiator': json.dumps (initiator),
@@ -208,10 +191,8 @@ class WarcLoader (AccountingSiteLoader):
(item.chromeResponse['timestamp']-item.chromeRequest['timestamp']))),
}
-
-
httpHeaders = StatusAndHeaders('{} {}'.format (resp['status'],
- self.getStatusText (resp)), self._unfoldHeaders (resp['headers']),
+ self.getStatusText (resp)), item.responseHeaders,
protocol='HTTP/1.1')
# Content is saved decompressed and decoded, remove these headers