summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-04-14 14:25:22 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-04-14 14:26:00 +0200
commit4a6494b19f287848588641aa1330807e69031e8b (patch)
tree62fbb88145eaed4e43dd56ab15aeb3ca50d235fe
parent048085595975b02389e8d58ed74e5a18573ee569 (diff)
downloadcrocoite-4a6494b19f287848588641aa1330807e69031e8b.tar.gz
crocoite-4a6494b19f287848588641aa1330807e69031e8b.tar.bz2
crocoite-4a6494b19f287848588641aa1330807e69031e8b.zip
Fix base64 body detection
Broken by commit a21d7332e33a3e47a363004196451721d449e70b
-rw-r--r--crocoite/browser.py18
-rw-r--r--crocoite/warc.py2
2 files changed, 10 insertions, 10 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py
index 744d064..693d796 100644
--- a/crocoite/browser.py
+++ b/crocoite/browser.py
@@ -76,9 +76,9 @@ class Item:
rawBody = b64decode (rawBody)
else:
rawBody = rawBody.encode ('utf8')
- return rawBody
+ return rawBody, base64Encoded
except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException):
- return None
+ return None, False
def setRequest (self, req):
self.chromeRequest = req
@@ -484,7 +484,7 @@ class TestSiteLoader (unittest.TestCase):
for item in l.finished:
if item.url.endswith ('/empty'):
self.assertEqual (item.response['status'], 200)
- self.assertEqual (item.body, b'')
+ self.assertEqual (item.body[0], b'')
elif item.url.endswith ('/redirect/301'):
self.assertEqual (item.response['status'], 301)
else:
@@ -499,7 +499,7 @@ class TestSiteLoader (unittest.TestCase):
l.waitIdle ()
self.assertEqual (len (l.finished), 1)
self.assertUrls (l, ['/encoding/{}'.format (encoding)])
- self.assertEqual (l.finished[0].body, expected.encode ('utf8'))
+ self.assertEqual (l.finished[0].body[0], expected.encode ('utf8'))
def test_binary (self):
""" Browser should ignore content it cannot display (i.e. octet-stream) """
@@ -515,7 +515,7 @@ class TestSiteLoader (unittest.TestCase):
l.waitIdle ()
self.assertEqual (len (l.finished), 1)
self.assertUrls (l, ['/image'])
- self.assertEqual (l.finished[0].body, TestHTTPRequestHandler.imageTestData)
+ self.assertEqual (l.finished[0].body[0], TestHTTPRequestHandler.imageTestData)
def test_attachment (self):
""" And downloads won’t work in headless mode """
@@ -533,10 +533,10 @@ class TestSiteLoader (unittest.TestCase):
for item in l.finished:
if item.url.endswith ('/html'):
self.assertEqual (item.response['status'], 200)
- self.assertEqual (item.body, TestHTTPRequestHandler.htmlTestData.encode ('utf-8'))
+ self.assertEqual (item.body[0], TestHTTPRequestHandler.htmlTestData.encode ('utf-8'))
elif item.url.endswith ('/image'):
self.assertEqual (item.response['status'], 200)
- self.assertEqual (item.body, TestHTTPRequestHandler.imageTestData)
+ self.assertEqual (item.body[0], TestHTTPRequestHandler.imageTestData)
elif item.url.endswith ('/nonexistent'):
self.assertEqual (item.response['status'], 404)
else:
@@ -550,10 +550,10 @@ class TestSiteLoader (unittest.TestCase):
for item in l.finished:
if item.url.endswith ('/alert'):
self.assertEqual (item.response['status'], 200)
- self.assertEqual (item.body, TestHTTPRequestHandler.alertData.encode ('utf-8'))
+ self.assertEqual (item.body[0], TestHTTPRequestHandler.alertData.encode ('utf-8'))
elif item.url.endswith ('/image'):
self.assertEqual (item.response['status'], 200)
- self.assertEqual (item.body, TestHTTPRequestHandler.imageTestData)
+ self.assertEqual (item.body[0], TestHTTPRequestHandler.imageTestData)
else:
self.fail ('unknown url')
diff --git a/crocoite/warc.py b/crocoite/warc.py
index b56d315..8664e5a 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -186,7 +186,7 @@ class WarcLoader (AccountingSiteLoader):
raise ValueError ('body for {} too large {} vs {}'.format (reqId,
item.encodedDataLength, self.maxBodySize))
else:
- rawBody = item.body
+ rawBody, base64Encoded = item.body
if rawBody is None:
raise ValueError ('no data for {} {} {}'.format (resp['url'],
resp['status'], reqId))