Save screenshot of entire page

…and not just the current viewport. Due to limitations within Chrome it may be necessary to manually stitch multiple images if the page height exceeds 16k pixels.
author: Lars-Dominik Braun <lars@6xq.net> 2018-04-20 13:29:15 +0200
committer: Lars-Dominik Braun <lars@6xq.net> 2018-04-20 13:29:15 +0200
commit: f38288483fdaa756007266f4e15d40920d8b760f (patch)
tree: 237ed0863443a5e8041e9caa4b72014ae67f5b25 /crocoite/behavior.py
parent: 4a6494b19f287848588641aa1330807e69031e8b (diff)
download: crocoite-f38288483fdaa756007266f4e15d40920d8b760f.tar.gz
crocoite-f38288483fdaa756007266f4e15d40920d8b760f.tar.bz2
crocoite-f38288483fdaa756007266f4e15d40920d8b760f.zip
1 files changed, 16 insertions, 6 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py
index 26841aa..f6dfd3f 100644
--- a/crocoite/behavior.py
+++ b/crocoite/behavior.py
@@ -215,12 +215,22 @@ class Screenshot (Behavior):
         tab = self.loader.tab
         writer = self.loader.writer
 
-        viewport = getFormattedViewportMetrics (tab)
-        data = b64decode (tab.Page.captureScreenshot (format='png')['data'])
-        record = writer.create_warc_record (packageUrl ('screenshot.png'), 'resource',
-                payload=BytesIO (data), warc_headers_dict={'Content-Type': 'image/png',
-                'X-Chrome-Viewport': viewport})
-        writer.write_record (record)
+        # see https://github.com/GoogleChrome/puppeteer/blob/230be28b067b521f0577206899db01f0ca7fc0d2/examples/screenshots-longpage.js
+        # Hardcoded max texture size of 16,384 (crbug.com/770769)
+        maxDim = 16*1024
+        metrics = tab.Page.getLayoutMetrics ()
+        contentSize = metrics['contentSize']
+        width = min (contentSize['width'], maxDim)
+        # we’re ignoring horizontal scroll intentionally. Most horizontal
+        # layouts use JavaScript scrolling and don’t extend the viewport.
+        for yoff in range (0, contentSize['height'], maxDim):
+            height = min (contentSize['height'] - yoff, maxDim)
+            clip = {'x': 0, 'y': yoff, 'width': width, 'height': height, 'scale': 1}
+            data = b64decode (tab.Page.captureScreenshot (format='png', clip=clip)['data'])
+            url = packageUrl ('screenshot-{}-{}.png'.format (0, yoff))
+            record = writer.create_warc_record (url, 'resource',
+                    payload=BytesIO (data), warc_headers_dict={'Content-Type': 'image/png'})
+            writer.write_record (record)
 
 class Click (JsOnload):
     """ Generic link clicking """
author	Lars-Dominik Braun <lars@6xq.net>	2018-04-20 13:29:15 +0200
committer	Lars-Dominik Braun <lars@6xq.net>	2018-04-20 13:29:15 +0200
commit	f38288483fdaa756007266f4e15d40920d8b760f (patch)
tree	237ed0863443a5e8041e9caa4b72014ae67f5b25 /crocoite/behavior.py
parent	4a6494b19f287848588641aa1330807e69031e8b (diff)
download	crocoite-f38288483fdaa756007266f4e15d40920d8b760f.tar.gz crocoite-f38288483fdaa756007266f4e15d40920d8b760f.tar.bz2 crocoite-f38288483fdaa756007266f4e15d40920d8b760f.zip