summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-08-04 15:31:12 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-08-04 15:31:12 +0200
commitfabd84cb10beab2b2e5aed7489fc04df9fda7e83 (patch)
tree6866d445b3efc2c5e98d1eec2c554696f71daa44
parent6a6a7e80dc94b306cda8e5c93a2173b834ff5e3c (diff)
downloadcrocoite-fabd84cb10beab2b2e5aed7489fc04df9fda7e83.tar.gz
crocoite-fabd84cb10beab2b2e5aed7489fc04df9fda7e83.tar.bz2
crocoite-fabd84cb10beab2b2e5aed7489fc04df9fda7e83.zip
Properly handle failure to retrieve request body
Just truncate the WARC record like we do with responses. Also add a few tests, but they’re not covering the call to getRequestPostData. Not sure what we have to do here.
-rw-r--r--crocoite/browser.py4
-rw-r--r--crocoite/test_browser.py35
-rw-r--r--crocoite/warc.py16
3 files changed, 50 insertions, 5 deletions
diff --git a/crocoite/browser.py b/crocoite/browser.py
index fbd12fd..c3ef5ce 100644
--- a/crocoite/browser.py
+++ b/crocoite/browser.py
@@ -103,7 +103,8 @@ class Item:
return postData.encode ('utf8'), False
elif req.get ('hasPostData', False):
try:
- return b64decode (self.tab.Network.getRequestPostData (requestId=self.id, _timeout=10)['postData']), True
+ postData = self.tab.Network.getRequestPostData (requestId=self.id, _timeout=10)['postData']
+ return b64decode (postData), True
except (pychrome.exceptions.CallMethodException, pychrome.exceptions.TimeoutException):
raise ValueError ('Cannot fetch request body')
return None, False
@@ -315,6 +316,7 @@ class SiteLoader:
level = {'verbose': Level.DEBUG, 'info': Level.INFO,
'warning': Level.WARNING,
'error': Level.ERROR}.get (entry.pop ('level'), Level.INFO)
+ entry['uuid'] = 'e62ffb5a-0521-459c-a3d9-1124551934d2'
self.logger (level, 'console', **entry)
def _javascriptDialogOpening (self, **kwargs):
diff --git a/crocoite/test_browser.py b/crocoite/test_browser.py
index dfcd71c..483a298 100644
--- a/crocoite/test_browser.py
+++ b/crocoite/test_browser.py
@@ -29,19 +29,24 @@ from .logger import Logger, Consumer
class TItem (Item):
""" This should be as close to Item as possible """
- __slots__ = ('bodySend', '_body')
+ __slots__ = ('bodySend', '_body', '_requestBody')
base = 'http://localhost:8000/'
- def __init__ (self, path, status, headers, bodyReceive, bodySend=None):
+ def __init__ (self, path, status, headers, bodyReceive, bodySend=None, requestBody=None):
super ().__init__ (tab=None)
self.chromeResponse = {'response': {'headers': headers, 'status': status, 'url': self.base + path}}
self._body = bodyReceive, False
self.bodySend = bodyReceive if not bodySend else bodySend
+ self._requestBody = requestBody, False
@property
def body (self):
return self._body
+ @property
+ def requestBody (self):
+ return self._requestBody
+
testItems = [
TItem ('binary', 200, {'Content-Type': 'application/octet-stream'}, b'\x00\x01\x02'),
TItem ('attachment', 200,
@@ -68,6 +73,18 @@ testItems = [
'<html><body><img src="/image"><img src="/nonexistent"></body></html>'.encode ('utf8')),
TItem ('html/alert', 200, {'Content-Type': 'html'},
'<html><body><script>window.addEventListener("beforeunload", function (e) { e.returnValue = "bye?"; return e.returnValue; }); alert("stopping here"); if (confirm("are you sure?") || prompt ("42?")) { window.location = "/nonexistent"; }</script><img src="/image"></body></html>'.encode ('utf8')),
+ TItem ('html/fetchPost', 200, {'Content-Type': 'html'},
+ r"""<html><body><script>
+ let a = fetch("/html/fetchPost/binary", {"method": "POST", "body": "\x00"});
+ let b = fetch("/html/fetchPost/form", {"method": "POST", "body": new URLSearchParams({"data": "!"})});
+ let c = fetch("/html/fetchPost/binary/large", {"method": "POST", "body": "\x00".repeat(100*1024)});
+ let d = fetch("/html/fetchPost/form/large", {"method": "POST", "body": new URLSearchParams({"data": "!".repeat(100*1024)})});
+ </script></body></html>""".encode ('utf8')),
+ TItem ('html/fetchPost/binary', 200, {'Content-Type': 'application/octet-stream'}, b'\x00', requestBody=b'\x00'),
+ TItem ('html/fetchPost/form', 200, {'Content-Type': 'application/octet-stream'}, b'\x00', requestBody=b'data=%21'),
+ # XXX: these should trigger the need for getRequestPostData, but they don’t. oh well.
+ TItem ('html/fetchPost/binary/large', 200, {'Content-Type': 'application/octet-stream'}, b'\x00', requestBody=(100*1024)*b'\x00'),
+ TItem ('html/fetchPost/form/large', 200, {'Content-Type': 'application/octet-stream'}, b'\x00', requestBody=b'data=' + (100*1024)*b'%21'),
]
testItemMap = dict ([(item.parsedUrl.path, item) for item in testItems])
@@ -83,7 +100,9 @@ class RequestHandler (BaseHTTPRequestHandler):
self.end_headers()
self.wfile.write (body)
return
-
+
+ do_POST = do_GET
+
def log_message (self, format, *args):
pass
@@ -134,11 +153,13 @@ def itemsLoaded (l, items):
item = l.queue.popleft ()
if isinstance (item, Exception):
raise item
+ assert not item.failed
assert item.chromeResponse is not None
golden = items.pop (item.parsedUrl.path)
if not golden:
assert False, 'url {} not supposed to be fetched'.format (item.url)
assert item.body[0] == golden.body[0]
+ assert item.requestBody[0] == golden.requestBody[0]
assert item.response['status'] == golden.response['status']
assert item.statusText == BaseHTTPRequestHandler.responses.get (item.response['status'])[0]
for k, v in golden.responseHeaders:
@@ -189,6 +210,14 @@ def test_html (loader):
# make sure alerts are dismissed correctly (image won’t load otherwise)
literalItem (loader, testItemMap['/html/alert'], [testItemMap['/image']])
+def test_post (loader):
+ """ XHR POST request with binary data"""
+ literalItem (loader, testItemMap['/html/fetchPost'],
+ [testItemMap['/html/fetchPost/binary'],
+ testItemMap['/html/fetchPost/binary/large'],
+ testItemMap['/html/fetchPost/form'],
+ testItemMap['/html/fetchPost/form/large']])
+
def test_crash (loader):
with loader ('/html') as l:
l.start ()
diff --git a/crocoite/warc.py b/crocoite/warc.py
index 32fe5d6..9b97e75 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -84,6 +84,7 @@ class WarcHandler (EventHandler):
return record
def _writeRequest (self, item):
+ logger = self.logger.bind (reqId=item.id)
req = item.request
resp = item.response
@@ -97,9 +98,21 @@ class WarcHandler (EventHandler):
initiator = item.initiator
warcHeaders = {
'X-Chrome-Initiator': json.dumps (initiator),
+ 'X-Chrome-Request-ID': item.id,
'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (item.chromeRequest['wallTime'])),
}
- payload, payloadBase64Encoded = item.requestBody
+ try:
+ bodyTruncated = None
+ payload, payloadBase64Encoded = item.requestBody
+ except ValueError:
+ # oops, don’t know what went wrong here
+ bodyTruncated = 'unspecified'
+ logger.error ('requestBody missing', uuid='ee9adc58-e723-4595-9feb-312a67ead6a0')
+
+ if bodyTruncated:
+ warcHeaders['WARC-Truncated'] = bodyTruncated
+ payload = None
+
if payload:
payload = BytesIO (payload)
warcHeaders['X-Chrome-Base64Body'] = str (payloadBase64Encoded)
@@ -139,6 +152,7 @@ class WarcHandler (EventHandler):
'X-Chrome-Protocol': resp.get ('protocol', ''),
'X-Chrome-FromDiskCache': str (resp.get ('fromDiskCache')),
'X-Chrome-ConnectionReused': str (resp.get ('connectionReused')),
+ 'X-Chrome-Request-ID': item.id,
'WARC-Date': datetime_to_iso_date (datetime.utcfromtimestamp (
item.chromeRequest['wallTime']+
(item.chromeResponse['timestamp']-item.chromeRequest['timestamp']))),