diff options
Diffstat (limited to 'crocoite')
| -rw-r--r-- | crocoite/behavior.py | 9 | ||||
| -rw-r--r-- | crocoite/browser.py | 30 | ||||
| -rw-r--r-- | crocoite/cli.py | 3 | ||||
| -rw-r--r-- | crocoite/logger.py | 4 | ||||
| -rw-r--r-- | crocoite/warc.py | 5 | 
5 files changed, 32 insertions, 19 deletions
| diff --git a/crocoite/behavior.py b/crocoite/behavior.py index 2c94bb8..3ecc92f 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -131,6 +131,8 @@ class JsOnload (Behavior):          # parameter.          # XXX: is there a better way to do this?          result = await tab.Runtime.evaluate (expression=str (self.script)) +        self.logger.debug ('behavior onload inject', +                uuid='a2da9b78-5648-44c5-bfa8-5c7573e13ad3', result=result)          exception = result.get ('exceptionDetails', None)          result = result['result']          assert result['type'] == 'function', result @@ -143,6 +145,8 @@ class JsOnload (Behavior):                  functionDeclaration='function(options){return new this(options);}',                  objectId=constructor,                  arguments=[{'value': self.options}]) +        self.logger.debug ('behavior onload start', +                uuid='6c0605ae-93b3-46b3-b575-ba45790909a7', result=result)          result = result['result']          assert result['type'] == 'object', result          assert result.get ('subtype') != 'error', result @@ -160,8 +164,7 @@ class JsOnload (Behavior):              self.logger.error ('jsonload onstop failed',                      uuid='1786726f-c8ec-4f79-8769-30954d4e32f5',                      exception=e.args, -                    objectId=self.context, -                    context=self.__class__.__name__) +                    objectId=self.context)          return          yield # pragma: no cover @@ -238,6 +241,8 @@ class DomSnapshot (Behavior):          viewport = await getFormattedViewportMetrics (tab)          dom = await tab.DOM.getDocument (depth=-1, pierce=True) +        self.logger.debug ('dom snapshot document', +                uuid='0c720784-8bd1-4fdc-a811-84394d753539', dom=dom)          haveUrls = set ()          for doc in ChromeTreeWalker (dom['root']).split ():              url = URL (doc['documentURL']) diff --git a/crocoite/browser.py b/crocoite/browser.py index 902a068..6ad305c 100644 --- a/crocoite/browser.py +++ b/crocoite/browser.py @@ -361,10 +361,15 @@ class SiteLoader:              self._iterRunning = running      async def navigate (self, url): -        await self.tab.Page.navigate(url=url) +        ret = await self.tab.Page.navigate(url=url) +        self.logger.debug ('navigate', +                uuid='9d47ded2-951f-4e09-86ee-fd4151e20666', result=ret)      # internal chrome callbacks      async def _requestWillBeSent (self, **kwargs): +        self.logger.debug ('requestWillBeSent', +                uuid='b828d75a-650d-42d2-8c66-14f4547512da', args=kwargs) +          reqId = kwargs['requestId']          req = kwargs['request']          url = URL (req['url']) @@ -398,11 +403,13 @@ class SiteLoader:          item = RequestResponsePair ()          item.fromRequestWillBeSent (kwargs)          self.requests[reqId] = item -        logger.debug ('request', uuid='55c17564-1bd0-4499-8724-fa7aad65478f')          return ret      async def _responseReceived (self, **kwargs): +        self.logger.debug ('responseReceived', +                uuid='ecd67e69-401a-41cb-b4ec-eeb1f1ec6abb', args=kwargs) +          reqId = kwargs['requestId']          item = self.requests.get (reqId)          if item is None: @@ -414,7 +421,6 @@ class SiteLoader:          if item.url != url:              logger.error ('url mismatch', uuid='7385f45f-0b06-4cbc-81f9-67bcd72ee7d0', respUrl=url)          if url.scheme in self.allowedSchemes: -            logger.debug ('response', uuid='84461c4e-e8ef-4cbd-8e8e-e10a901c8bd0')              item.fromResponseReceived (kwargs)          else:              logger.warning ('scheme forbidden', uuid='2ea6e5d7-dd3b-4881-b9de-156c1751c666') @@ -424,27 +430,27 @@ class SiteLoader:          Item was fully loaded. For some items the request body is not available          when responseReceived is fired, thus move everything here.          """ +        self.logger.debug ('loadingFinished', +                uuid='35479405-a5b5-4395-8c33-d3601d1796b9', args=kwargs) +          reqId = kwargs['requestId']          item = self.requests.pop (reqId, None)          if item is None:              # we never recorded this request (blacklisted scheme, for example)              return          req = item.request -        logger = self.logger.bind (reqId=reqId, reqUrl=item.url)          if item.url.scheme in self.allowedSchemes: -            logger.info ('finished', uuid='5a8b4bad-f86a-4fe6-a53e-8da4130d6a02')              item.fromLoadingFinished (kwargs)              # XXX queue both              await asyncio.gather (item.prefetchRequestBody (self.tab), item.prefetchResponseBody (self.tab))              return item      async def _loadingFailed (self, **kwargs): +        self.logger.info ('loadingFailed', +                uuid='35479405-a5b5-4395-8c33-d3601d1796b9', args=kwargs) +          reqId = kwargs['requestId']          logger = self.logger.bind (reqId=reqId) -        logger.warning ('loading failed', -                uuid='68410f13-6eea-453e-924e-c1af4601748b', -                errorText=kwargs['errorText'], -                blockedReason=kwargs.get ('blockedReason'))          item = self.requests.pop (reqId, None)          if item is not None:              item.fromLoadingFailed (kwargs) @@ -477,10 +483,16 @@ class SiteLoader:                      uuid='3ef7292e-8595-4e89-b834-0cc6bc40ee38', **kwargs)      async def _frameStartedLoading (self, **kwargs): +        self.logger.debug ('frameStartedLoading', +                uuid='bbeb39c0-3304-4221-918e-f26bd443c566', args=kwargs) +          self._framesLoading.add (kwargs['frameId'])          self.idle.set (False)      async def _frameStoppedLoading (self, **kwargs): +        self.logger.debug ('frameStoppedLoading', +                uuid='fcbe8110-511c-4cbb-ac2b-f61a5782c5a0', args=kwargs) +          self._framesLoading.remove (kwargs['frameId'])          if not self._framesLoading:              self.idle.set (True) diff --git a/crocoite/cli.py b/crocoite/cli.py index f9ef52c..a8de73b 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -32,7 +32,8 @@ from .controller import SinglePageController, \          RecursiveController, DepthLimit, PrefixLimit  from .devtools import Passthrough, Process  from .warc import WarcHandler -from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, WarcHandlerConsumer +from .logger import Logger, JsonPrintConsumer, DatetimeConsumer, \ +        WarcHandlerConsumer, Level  from .devtools import Crashed  class SingleExitStatus(IntEnum): diff --git a/crocoite/logger.py b/crocoite/logger.py index d882eaf..ac389ca 100644 --- a/crocoite/logger.py +++ b/crocoite/logger.py @@ -105,7 +105,7 @@ class PrintConsumer (Consumer):          return kwargs  class JsonPrintConsumer (Consumer): -    def __init__ (self, minLevel=Level.INFO): +    def __init__ (self, minLevel=Level.DEBUG):          self.minLevel = minLevel      def __call__ (self, **kwargs): @@ -121,7 +121,7 @@ class DatetimeConsumer (Consumer):          return kwargs  class WarcHandlerConsumer (Consumer): -    def __init__ (self, warc, minLevel=Level.INFO): +    def __init__ (self, warc, minLevel=Level.DEBUG):          self.warc = warc          self.minLevel = minLevel diff --git a/crocoite/warc.py b/crocoite/warc.py index cb1f2f7..4106995 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -242,13 +242,8 @@ class WarcHandler (EventHandler):              }      def push (self, item): -        processed = False          for k, v in self.route.items ():              if isinstance (item, k):                  v (self, item) -                processed = True                  break -        if not processed: -            self.logger.debug (f'unknown event {item!r}') - | 
