summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2018-04-20 13:29:15 +0200
committerLars-Dominik Braun <lars@6xq.net>2018-04-20 13:29:15 +0200
commitf38288483fdaa756007266f4e15d40920d8b760f (patch)
tree237ed0863443a5e8041e9caa4b72014ae67f5b25
parent4a6494b19f287848588641aa1330807e69031e8b (diff)
downloadcrocoite-f38288483fdaa756007266f4e15d40920d8b760f.tar.gz
crocoite-f38288483fdaa756007266f4e15d40920d8b760f.tar.bz2
crocoite-f38288483fdaa756007266f4e15d40920d8b760f.zip
Save screenshot of entire page
…and not just the current viewport. Due to limitations within Chrome it may be necessary to manually stitch multiple images if the page height exceeds 16k pixels.
-rw-r--r--crocoite/behavior.py22
1 files changed, 16 insertions, 6 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py
index 26841aa..f6dfd3f 100644
--- a/crocoite/behavior.py
+++ b/crocoite/behavior.py
@@ -215,12 +215,22 @@ class Screenshot (Behavior):
tab = self.loader.tab
writer = self.loader.writer
- viewport = getFormattedViewportMetrics (tab)
- data = b64decode (tab.Page.captureScreenshot (format='png')['data'])
- record = writer.create_warc_record (packageUrl ('screenshot.png'), 'resource',
- payload=BytesIO (data), warc_headers_dict={'Content-Type': 'image/png',
- 'X-Chrome-Viewport': viewport})
- writer.write_record (record)
+ # see https://github.com/GoogleChrome/puppeteer/blob/230be28b067b521f0577206899db01f0ca7fc0d2/examples/screenshots-longpage.js
+ # Hardcoded max texture size of 16,384 (crbug.com/770769)
+ maxDim = 16*1024
+ metrics = tab.Page.getLayoutMetrics ()
+ contentSize = metrics['contentSize']
+ width = min (contentSize['width'], maxDim)
+ # we’re ignoring horizontal scroll intentionally. Most horizontal
+ # layouts use JavaScript scrolling and don’t extend the viewport.
+ for yoff in range (0, contentSize['height'], maxDim):
+ height = min (contentSize['height'] - yoff, maxDim)
+ clip = {'x': 0, 'y': yoff, 'width': width, 'height': height, 'scale': 1}
+ data = b64decode (tab.Page.captureScreenshot (format='png', clip=clip)['data'])
+ url = packageUrl ('screenshot-{}-{}.png'.format (0, yoff))
+ record = writer.create_warc_record (url, 'resource',
+ payload=BytesIO (data), warc_headers_dict={'Content-Type': 'image/png'})
+ writer.write_record (record)
class Click (JsOnload):
""" Generic link clicking """