diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-04-14 14:25:22 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-04-14 14:26:00 +0200 |
commit | 4a6494b19f287848588641aa1330807e69031e8b (patch) | |
tree | 62fbb88145eaed4e43dd56ab15aeb3ca50d235fe | |
parent | 048085595975b02389e8d58ed74e5a18573ee569 (diff) | |
download | crocoite-4a6494b19f287848588641aa1330807e69031e8b.tar.gz crocoite-4a6494b19f287848588641aa1330807e69031e8b.tar.bz2 crocoite-4a6494b19f287848588641aa1330807e69031e8b.zip |
Fix base64 body detection
Broken by commit a21d7332e33a3e47a363004196451721d449e70b
-rw-r--r-- | crocoite/browser.py | 18 | ||||
-rw-r--r-- | crocoite/warc.py | 2 |
2 files changed, 10 insertions, 10 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py index 744d064..693d796 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -76,9 +76,9 @@ class Item: rawBody = b64decode (rawBody) else: rawBody = rawBody.encode ('utf8') - return rawBody + return rawBody, base64Encoded except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException): - return None + return None, False def setRequest (self, req): self.chromeRequest = req @@ -484,7 +484,7 @@ class TestSiteLoader (unittest.TestCase): for item in l.finished: if item.url.endswith ('/empty'): self.assertEqual (item.response['status'], 200) - self.assertEqual (item.body, b'') + self.assertEqual (item.body[0], b'') elif item.url.endswith ('/redirect/301'): self.assertEqual (item.response['status'], 301) else: @@ -499,7 +499,7 @@ class TestSiteLoader (unittest.TestCase): l.waitIdle () self.assertEqual (len (l.finished), 1) self.assertUrls (l, ['/encoding/{}'.format (encoding)]) - self.assertEqual (l.finished[0].body, expected.encode ('utf8')) + self.assertEqual (l.finished[0].body[0], expected.encode ('utf8')) def test_binary (self): """ Browser should ignore content it cannot display (i.e. octet-stream) """ @@ -515,7 +515,7 @@ class TestSiteLoader (unittest.TestCase): l.waitIdle () self.assertEqual (len (l.finished), 1) self.assertUrls (l, ['/image']) - self.assertEqual (l.finished[0].body, TestHTTPRequestHandler.imageTestData) + self.assertEqual (l.finished[0].body[0], TestHTTPRequestHandler.imageTestData) def test_attachment (self): """ And downloads won’t work in headless mode """ @@ -533,10 +533,10 @@ class TestSiteLoader (unittest.TestCase): for item in l.finished: if item.url.endswith ('/html'): self.assertEqual (item.response['status'], 200) - self.assertEqual (item.body, TestHTTPRequestHandler.htmlTestData.encode ('utf-8')) + self.assertEqual (item.body[0], TestHTTPRequestHandler.htmlTestData.encode ('utf-8')) elif item.url.endswith ('/image'): self.assertEqual (item.response['status'], 200) - self.assertEqual (item.body, TestHTTPRequestHandler.imageTestData) + self.assertEqual (item.body[0], TestHTTPRequestHandler.imageTestData) elif item.url.endswith ('/nonexistent'): self.assertEqual (item.response['status'], 404) else: @@ -550,10 +550,10 @@ class TestSiteLoader (unittest.TestCase): for item in l.finished: if item.url.endswith ('/alert'): self.assertEqual (item.response['status'], 200) - self.assertEqual (item.body, TestHTTPRequestHandler.alertData.encode ('utf-8')) + self.assertEqual (item.body[0], TestHTTPRequestHandler.alertData.encode ('utf-8')) elif item.url.endswith ('/image'): self.assertEqual (item.response['status'], 200) - self.assertEqual (item.body, TestHTTPRequestHandler.imageTestData) + self.assertEqual (item.body[0], TestHTTPRequestHandler.imageTestData) else: self.fail ('unknown url') diff --git a/crocoite/warc.py b/crocoite/warc.py index b56d315..8664e5a 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -186,7 +186,7 @@ class WarcLoader (AccountingSiteLoader): raise ValueError ('body for {} too large {} vs {}'.format (reqId, item.encodedDataLength, self.maxBodySize)) else: - rawBody = item.body + rawBody, base64Encoded = item.body if rawBody is None: raise ValueError ('no data for {} {} {}'.format (resp['url'], resp['status'], reqId)) |