diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2018-04-20 13:29:15 +0200 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2018-04-20 13:29:15 +0200 |
commit | f38288483fdaa756007266f4e15d40920d8b760f (patch) | |
tree | 237ed0863443a5e8041e9caa4b72014ae67f5b25 | |
parent | 4a6494b19f287848588641aa1330807e69031e8b (diff) | |
download | crocoite-f38288483fdaa756007266f4e15d40920d8b760f.tar.gz crocoite-f38288483fdaa756007266f4e15d40920d8b760f.tar.bz2 crocoite-f38288483fdaa756007266f4e15d40920d8b760f.zip |
Save screenshot of entire page
…and not just the current viewport. Due to limitations within Chrome it
may be necessary to manually stitch multiple images if the page height
exceeds 16k pixels.
-rw-r--r-- | crocoite/behavior.py | 22 |
1 files changed, 16 insertions, 6 deletions
diff --git a/crocoite/behavior.py b/crocoite/behavior.py index 26841aa..f6dfd3f 100644 --- a/crocoite/behavior.py +++ b/crocoite/behavior.py @@ -215,12 +215,22 @@ class Screenshot (Behavior): tab = self.loader.tab writer = self.loader.writer - viewport = getFormattedViewportMetrics (tab) - data = b64decode (tab.Page.captureScreenshot (format='png')['data']) - record = writer.create_warc_record (packageUrl ('screenshot.png'), 'resource', - payload=BytesIO (data), warc_headers_dict={'Content-Type': 'image/png', - 'X-Chrome-Viewport': viewport}) - writer.write_record (record) + # see https://github.com/GoogleChrome/puppeteer/blob/230be28b067b521f0577206899db01f0ca7fc0d2/examples/screenshots-longpage.js + # Hardcoded max texture size of 16,384 (crbug.com/770769) + maxDim = 16*1024 + metrics = tab.Page.getLayoutMetrics () + contentSize = metrics['contentSize'] + width = min (contentSize['width'], maxDim) + # we’re ignoring horizontal scroll intentionally. Most horizontal + # layouts use JavaScript scrolling and don’t extend the viewport. + for yoff in range (0, contentSize['height'], maxDim): + height = min (contentSize['height'] - yoff, maxDim) + clip = {'x': 0, 'y': yoff, 'width': width, 'height': height, 'scale': 1} + data = b64decode (tab.Page.captureScreenshot (format='png', clip=clip)['data']) + url = packageUrl ('screenshot-{}-{}.png'.format (0, yoff)) + record = writer.create_warc_record (url, 'resource', + payload=BytesIO (data), warc_headers_dict={'Content-Type': 'image/png'}) + writer.write_record (record) class Click (JsOnload): """ Generic link clicking """ |