summaryrefslogtreecommitdiff
path: root/crocoite
diff options
context:
space:
mode:
Diffstat (limited to 'crocoite')
-rw-r--r--crocoite/behavior.py9
-rw-r--r--crocoite/browser.py30
-rw-r--r--crocoite/cli.py3
-rw-r--r--crocoite/logger.py4
-rw-r--r--crocoite/warc.py5
5 files changed, 32 insertions, 19 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py
index 2c94bb8..3ecc92f 100644
--- a/crocoite/behavior.py
+++ b/crocoite/behavior.py
@@ -131,6 +131,8 @@ class JsOnload (Behavior):
# parameter.
# XXX: is there a better way to do this?
result = await tab.Runtime.evaluate (expression=str (self.script))
+ self.logger.debug ('behavior onload inject',
+ uuid='a2da9b78-5648-44c5-bfa8-5c7573e13ad3', result=result)
exception = result.get ('exceptionDetails', None)
result = result['result']
assert result['type'] == 'function', result
@@ -143,6 +145,8 @@ class JsOnload (Behavior):
functionDeclaration='function(options){return new this(options);}',
objectId=constructor,
arguments=[{'value': self.options}])
+ self.logger.debug ('behavior onload start',
+ uuid='6c0605ae-93b3-46b3-b575-ba45790909a7', result=result)
result = result['result']
assert result['type'] == 'object', result
assert result.get ('subtype') != 'error', result
@@ -160,8 +164,7 @@ class JsOnload (Behavior):
self.logger.error ('jsonload onstop failed',
uuid='1786726f-c8ec-4f79-8769-30954d4e32f5',
exception=e.args,
- objectId=self.context,
- context=self.__class__.__name__)
+ objectId=self.context)
return
yield # pragma: no cover
@@ -238,6 +241,8 @@ class DomSnapshot (Behavior):
viewport = await getFormattedViewportMetrics (tab)
dom = await tab.DOM.getDocument (depth=-1, pierce=True)
+ self.logger.debug ('dom snapshot document',
+ uuid='0c720784-8bd1-4fdc-a811-84394d753539', dom=dom)
haveUrls = set ()
for doc in ChromeTreeWalker (dom['root']).split ():
url = URL (doc['documentURL'])
diff --git a/crocoite/browser.py b/crocoite/browser.py
index 902a068..6ad305c 100644
--- a/crocoite/browser.py
+++ b/crocoite/browser.py
@@ -361,10 +361,15 @@ class SiteLoader:
self._iterRunning = running
async def navigate (self, url):
- await self.tab.Page.navigate(url=url)
+ ret = await self.tab.Page.navigate(url=url)
+ self.logger.debug ('navigate',
+ uuid='9d47ded2-951f-4e09-86ee-fd4151e20666', result=ret)
# internal chrome callbacks
async def _requestWillBeSent (self, **kwargs):
+ self.logger.debug ('requestWillBeSent',
+ uuid='b828d75a-650d-42d2-8c66-14f4547512da', args=kwargs)
+
reqId = kwargs['requestId']
req = kwargs['request']
url = URL (req['url'])
@@ -398,11 +403,13 @@ class SiteLoader:
item = RequestResponsePair ()
item.fromRequestWillBeSent (kwargs)
self.requests[reqId] = item
- logger.debug ('request', uuid='55c17564-1bd0-4499-8724-fa7aad65478f')
return ret
async def _responseReceived (self, **kwargs):
+ self.logger.debug ('responseReceived',
+ uuid='ecd67e69-401a-41cb-b4ec-eeb1f1ec6abb', args=kwargs)
+
reqId = kwargs['requestId']
item = self.requests.get (reqId)
if item is None:
@@ -414,7 +421,6 @@ class SiteLoader:
if item.url != url:
logger.error ('url mismatch', uuid='7385f45f-0b06-4cbc-81f9-67bcd72ee7d0', respUrl=url)
if url.scheme in self.allowedSchemes:
- logger.debug ('response', uuid='84461c4e-e8ef-4cbd-8e8e-e10a901c8bd0')
item.fromResponseReceived (kwargs)
else:
logger.warning ('scheme forbidden', uuid='2ea6e5d7-dd3b-4881-b9de-156c1751c666')
@@ -424,27 +430,27 @@ class SiteLoader:
Item was fully loaded. For some items the request body is not available
when responseReceived is fired, thus move everything here.
"""
+ self.logger.debug ('loadingFinished',
+ uuid='35479405-a5b5-4395-8c33-d3601d1796b9', args=kwargs)
+
reqId = kwargs['requestId']
item = self.requests.pop (reqId, None)
if item is None:
# we never recorded this request (blacklisted scheme, for example)
return
req = item.request
- logger = self.logger.bind (reqId=reqId, reqUrl=item.url)
if item.url.scheme in self.allowedSchemes:
- logger.info ('finished', uuid='5a8b4bad-f86a-4fe6-a53e-8da4130d6a02')
item.fromLoadingFinished (kwargs)
# XXX queue both
await asyncio.gather (item.prefetchRequestBody (self.tab), item.prefetchResponseBody (self.tab))
return item
async def _loadingFailed (self, **kwargs):
+ self.logger.info ('loadingFailed',
+ uuid='35479405-a5b5-4395-8c33-d3601d1796b9', args=kwargs)
+
reqId = kwargs['requestId']
logger = self.logger.bind (reqId=reqId)
- logger.warning ('loading failed',
- uuid='68410f13-6eea-453e-924e-c1af4601748b',
- errorText=kwargs['errorText'],
- blockedReason=kwargs.get ('blockedReason'))
item = self.requests.pop (reqId, None)
if item is not None:
item.fromLoadingFailed (kwargs)
@@ -477,10 +483,16 @@ class SiteLoader:
uuid='3ef7292e-8595-4e89-b834-0cc6bc40ee38', **kwargs)
async def _frameStartedLoading (self, **kwargs):
+ self.logger.debug ('frameStartedLoading',
+ uuid='bbeb39c0-3304-4221-918e-f26bd443c566', args=kwargs)
+
self._framesLoading.add (kwargs['frameId'])
self.idle.set (False)
async def _frameStoppedLoading (self, **kwargs):
+ self.logger.debug ('frameStoppedLoading',
+ uuid='fcbe8110-511c-4cbb-ac2b-f61a5782c5a0', args=kwargs)
+
self._framesLoading.remove (kwargs['frameId'])
if not self._framesLoading:
self.idle.set (True)
diff --git a/crocoite/cli.py b/crocoite/cli.py
index f9ef52c..a8de73b 100644
--- a/crocoite/cli.py
+++ b/crocoite/cli.py
@@ -32,7 +32,8 @@ from .controller import SinglePageController, \
RecursiveController, DepthLimit, PrefixLimit
from .devtools import Passthrough, Process
from .warc import WarcHandler
-from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, WarcHandlerConsumer
+from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, \
+ WarcHandlerConsumer, Level
from .devtools import Crashed
class SingleExitStatus(IntEnum):
diff --git a/crocoite/logger.py b/crocoite/logger.py
index d882eaf..ac389ca 100644
--- a/crocoite/logger.py
+++ b/crocoite/logger.py
@@ -105,7 +105,7 @@ class PrintConsumer (Consumer):
return kwargs
class JsonPrintConsumer (Consumer):
- def __init__ (self, minLevel=Level.INFO):
+ def __init__ (self, minLevel=Level.DEBUG):
self.minLevel = minLevel
def __call__ (self, **kwargs):
@@ -121,7 +121,7 @@ class DatetimeConsumer (Consumer):
return kwargs
class WarcHandlerConsumer (Consumer):
- def __init__ (self, warc, minLevel=Level.INFO):
+ def __init__ (self, warc, minLevel=Level.DEBUG):
self.warc = warc
self.minLevel = minLevel
diff --git a/crocoite/warc.py b/crocoite/warc.py
index cb1f2f7..4106995 100644
--- a/crocoite/warc.py
+++ b/crocoite/warc.py
@@ -242,13 +242,8 @@ class WarcHandler (EventHandler):
}
def push (self, item):
- processed = False
for k, v in self.route.items ():
if isinstance (item, k):
v (self, item)
- processed = True
break
- if not processed:
- self.logger.debug (f'unknown event {item!r}')
-