diff options
Diffstat (limited to 'crocoite')
-rw-r--r-- | crocoite/behavior.py | 17 | ||||
-rw-r--r-- | crocoite/test_behavior.py | 60 |
2 files changed, 61 insertions, 16 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py index 02fe2ea..dca9ea0 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -281,6 +281,10 @@ class Screenshot (Behavior): name = 'screenshot' + # see https://github.com/GoogleChrome/puppeteer/blob/230be28b067b521f0577206899db01f0ca7fc0d2/examples/screenshots-longpage.js + # Hardcoded max texture size of 16,384 (crbug.com/770769) + maxDim = 16*1024 + async def onfinish (self): tab = self.loader.tab @@ -291,16 +295,14 @@ class Screenshot (Behavior): self.logger.error ('frame without url', tree=tree) url = None - # see https://github.com/GoogleChrome/puppeteer/blob/230be28b067b521f0577206899db01f0ca7fc0d2/examples/screenshots-longpage.js - # Hardcoded max texture size of 16,384 (crbug.com/770769) - maxDim = 16*1024 + metrics = await tab.Page.getLayoutMetrics () contentSize = metrics['contentSize'] - width = min (contentSize['width'], maxDim) + width = min (contentSize['width'], self.maxDim) # we’re ignoring horizontal scroll intentionally. Most horizontal # layouts use JavaScript scrolling and don’t extend the viewport. - for yoff in range (0, contentSize['height'], maxDim): - height = min (contentSize['height'] - yoff, maxDim) + for yoff in range (0, contentSize['height'], self.maxDim): + height = min (contentSize['height'] - yoff, self.maxDim) clip = {'x': 0, 'y': yoff, 'width': width, 'height': height, 'scale': 1} ret = await tab.Page.captureScreenshot (format='png', clip=clip) data = b64decode (ret['data']) @@ -323,6 +325,9 @@ class ExtractLinksEvent: def __init__ (self, links): self.links = links + def __repr__ (self): + return f'<ExtractLinksEvent {self.links!r}>' + class ExtractLinks (Behavior): """ Extract links from a page using JavaScript diff --git a/crocoite/test_behavior.py b/crocoite/test_behavior.py index bc2dab6..bbbd8ba 100644 --- a/crocoite/test_behavior.py +++ b/crocoite/test_behavior.py @@ -18,8 +18,9 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import asyncio, os, yaml, re +import asyncio, os, yaml, re, math, struct from functools import partial +from operator import attrgetter import pytest from yarl import URL @@ -28,7 +29,8 @@ from aiohttp import web import pkg_resources from .logger import Logger from .devtools import Process -from .behavior import Scroll, Behavior, ExtractLinks, ExtractLinksEvent, Crash +from .behavior import Scroll, Behavior, ExtractLinks, ExtractLinksEvent, Crash, \ + Screenshot, ScreenshotEvent from .controller import SinglePageController, EventHandler from .devtools import Crashed @@ -99,17 +101,17 @@ async def test_click_match (match, url): # keep this aligned with click.js assert re.match (match, url.host, re.I) -class ExtractLinksCheck(EventHandler): - """ Test adapter that accumulates all incoming links from ExtractLinks """ - __slots__ = ('links') + +class AccumHandler (EventHandler): + """ Test adapter that accumulates all incoming items """ + __slots__ = ('data') def __init__ (self): super().__init__ () - self.links = [] + self.data = [] def push (self, item): - if isinstance (item, ExtractLinksEvent): - self.links.extend (item.links) + self.data.append (item) async def simpleServer (url, response): async def f (req): @@ -151,13 +153,17 @@ async def test_extract_links (): </body></html>""") try: - handler = ExtractLinksCheck () + handler = AccumHandler () logger = Logger () controller = SinglePageController (url=url, logger=logger, service=Process (), behavior=[ExtractLinks], handler=[handler]) await controller.run () - assert sorted (handler.links) == sorted ([ + links = [] + for d in handler.data: + if isinstance (d, ExtractLinksEvent): + links.extend (d.links) + assert sorted (links) == sorted ([ url.with_path ('/relative'), url.with_fragment ('anchor'), URL ('http://example.com/absolute/'), @@ -186,3 +192,37 @@ async def test_crash (): finally: await runner.cleanup () +@pytest.mark.asyncio +async def test_screenshot (): + """ + Make sure screenshots are taken and have the correct dimensions. We can’t + and don’t want to check their content. + """ + # ceil(0) == 0, so starting with 1 + for expectHeight in (1, Screenshot.maxDim, Screenshot.maxDim+1, Screenshot.maxDim*2+Screenshot.maxDim//2): + url = URL.build (scheme='http', host='localhost', port=8080) + runner = await simpleServer (url, f'<html><body style="margin: 0; padding: 0;"><div style="height: {expectHeight}"></div></body></html>') + + try: + handler = AccumHandler () + logger = Logger () + controller = SinglePageController (url=url, logger=logger, + service=Process (), behavior=[Screenshot], handler=[handler]) + await controller.run () + + screenshots = list (filter (lambda x: isinstance (x, ScreenshotEvent), handler.data)) + assert len (screenshots) == math.ceil (expectHeight/Screenshot.maxDim) + totalHeight = 0 + for s in screenshots: + assert s.url == url + # PNG ident is fixed, IHDR is always the first chunk + assert s.data.startswith (b'\x89PNG\r\n\x1a\n\x00\x00\x00\x0dIHDR') + width, height = struct.unpack ('>II', s.data[16:24]) + assert height <= Screenshot.maxDim + totalHeight += height + # screenshot height is at least canvas height (XXX: get hardcoded + # value from devtools.Process) + assert totalHeight == max (expectHeight, 1080) + finally: + await runner.cleanup () + |