From a1aa7b1fe501476b78f4413ff813ad2f40546b7c Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 3 Dec 2017 10:17:51 +0100 Subject: Add page screenshot to WARC --- crocoite/cli.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'crocoite') diff --git a/crocoite/cli.py b/crocoite/cli.py index 1502884..e7ed3db 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -21,6 +21,7 @@ def main (): import os, random, logging, argparse from io import BytesIO + from base64 import b64decode import pychrome from urllib.parse import urlsplit from warcio.warcwriter import WARCWriter @@ -122,6 +123,17 @@ def main (): warc_headers_dict={'Content-Type': 'application/javascript; charset=utf-8'}) writer.write_record (record) + def writeScreenshot (tab, writer): + """ + Create screenshot from tab and write it to WARC + """ + viewport = getFormattedViewportMetrics (tab) + data = b64decode (l.tab.Page.captureScreenshot (format='png')['data']) + record = writer.create_warc_record (packageUrl ('screenshot.png'), 'resource', + payload=BytesIO (data), warc_headers_dict={'Content-Type': 'image/png', + 'X-Chrome-Viewport': viewport}) + writer.write_record (record) + logger = logging.getLogger(__name__) logging.basicConfig (level=logging.DEBUG) @@ -184,5 +196,7 @@ def main (): l.tab.Runtime.evaluate (expression=script, returnByValue=True) writeDOMSnapshot (l.tab, writer) + writeScreenshot (l.tab, writer) + return True -- cgit v1.2.3