summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crocoite/behavior.py17
-rw-r--r--crocoite/test_behavior.py60
2 files changed, 61 insertions, 16 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py
index 02fe2ea..dca9ea0 100644
--- a/crocoite/behavior.py
+++ b/crocoite/behavior.py
@@ -281,6 +281,10 @@ class Screenshot (Behavior):
name = 'screenshot'
+ # see https://github.com/GoogleChrome/puppeteer/blob/230be28b067b521f0577206899db01f0ca7fc0d2/examples/screenshots-longpage.js
+ # Hardcoded max texture size of 16,384 (crbug.com/770769)
+ maxDim = 16*1024
+
async def onfinish (self):
tab = self.loader.tab
@@ -291,16 +295,14 @@ class Screenshot (Behavior):
self.logger.error ('frame without url', tree=tree)
url = None
- # see https://github.com/GoogleChrome/puppeteer/blob/230be28b067b521f0577206899db01f0ca7fc0d2/examples/screenshots-longpage.js
- # Hardcoded max texture size of 16,384 (crbug.com/770769)
- maxDim = 16*1024
+
metrics = await tab.Page.getLayoutMetrics ()
contentSize = metrics['contentSize']
- width = min (contentSize['width'], maxDim)
+ width = min (contentSize['width'], self.maxDim)
# we’re ignoring horizontal scroll intentionally. Most horizontal
# layouts use JavaScript scrolling and don’t extend the viewport.
- for yoff in range (0, contentSize['height'], maxDim):
- height = min (contentSize['height'] - yoff, maxDim)
+ for yoff in range (0, contentSize['height'], self.maxDim):
+ height = min (contentSize['height'] - yoff, self.maxDim)
clip = {'x': 0, 'y': yoff, 'width': width, 'height': height, 'scale': 1}
ret = await tab.Page.captureScreenshot (format='png', clip=clip)
data = b64decode (ret['data'])
@@ -323,6 +325,9 @@ class ExtractLinksEvent:
def __init__ (self, links):
self.links = links
+ def __repr__ (self):
+ return f'<ExtractLinksEvent {self.links!r}>'
+
class ExtractLinks (Behavior):
"""
Extract links from a page using JavaScript
diff --git a/crocoite/test_behavior.py b/crocoite/test_behavior.py
index bc2dab6..bbbd8ba 100644
--- a/crocoite/test_behavior.py
+++ b/crocoite/test_behavior.py
@@ -18,8 +18,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
-import asyncio, os, yaml, re
+import asyncio, os, yaml, re, math, struct
from functools import partial
+from operator import attrgetter
import pytest
from yarl import URL
@@ -28,7 +29,8 @@ from aiohttp import web
import pkg_resources
from .logger import Logger
from .devtools import Process
-from .behavior import Scroll, Behavior, ExtractLinks, ExtractLinksEvent, Crash
+from .behavior import Scroll, Behavior, ExtractLinks, ExtractLinksEvent, Crash, \
+ Screenshot, ScreenshotEvent
from .controller import SinglePageController, EventHandler
from .devtools import Crashed
@@ -99,17 +101,17 @@ async def test_click_match (match, url):
# keep this aligned with click.js
assert re.match (match, url.host, re.I)
-class ExtractLinksCheck(EventHandler):
- """ Test adapter that accumulates all incoming links from ExtractLinks """
- __slots__ = ('links')
+
+class AccumHandler (EventHandler):
+ """ Test adapter that accumulates all incoming items """
+ __slots__ = ('data')
def __init__ (self):
super().__init__ ()
- self.links = []
+ self.data = []
def push (self, item):
- if isinstance (item, ExtractLinksEvent):
- self.links.extend (item.links)
+ self.data.append (item)
async def simpleServer (url, response):
async def f (req):
@@ -151,13 +153,17 @@ async def test_extract_links ():
</body></html>""")
try:
- handler = ExtractLinksCheck ()
+ handler = AccumHandler ()
logger = Logger ()
controller = SinglePageController (url=url, logger=logger,
service=Process (), behavior=[ExtractLinks], handler=[handler])
await controller.run ()
- assert sorted (handler.links) == sorted ([
+ links = []
+ for d in handler.data:
+ if isinstance (d, ExtractLinksEvent):
+ links.extend (d.links)
+ assert sorted (links) == sorted ([
url.with_path ('/relative'),
url.with_fragment ('anchor'),
URL ('http://example.com/absolute/'),
@@ -186,3 +192,37 @@ async def test_crash ():
finally:
await runner.cleanup ()
+@pytest.mark.asyncio
+async def test_screenshot ():
+ """
+ Make sure screenshots are taken and have the correct dimensions. We can’t
+ and don’t want to check their content.
+ """
+ # ceil(0) == 0, so starting with 1
+ for expectHeight in (1, Screenshot.maxDim, Screenshot.maxDim+1, Screenshot.maxDim*2+Screenshot.maxDim//2):
+ url = URL.build (scheme='http', host='localhost', port=8080)
+ runner = await simpleServer (url, f'<html><body style="margin: 0; padding: 0;"><div style="height: {expectHeight}"></div></body></html>')
+
+ try:
+ handler = AccumHandler ()
+ logger = Logger ()
+ controller = SinglePageController (url=url, logger=logger,
+ service=Process (), behavior=[Screenshot], handler=[handler])
+ await controller.run ()
+
+ screenshots = list (filter (lambda x: isinstance (x, ScreenshotEvent), handler.data))
+ assert len (screenshots) == math.ceil (expectHeight/Screenshot.maxDim)
+ totalHeight = 0
+ for s in screenshots:
+ assert s.url == url
+ # PNG ident is fixed, IHDR is always the first chunk
+ assert s.data.startswith (b'\x89PNG\r\n\x1a\n\x00\x00\x00\x0dIHDR')
+ width, height = struct.unpack ('>II', s.data[16:24])
+ assert height <= Screenshot.maxDim
+ totalHeight += height
+ # screenshot height is at least canvas height (XXX: get hardcoded
+ # value from devtools.Process)
+ assert totalHeight == max (expectHeight, 1080)
+ finally:
+ await runner.cleanup ()
+