From 62bc23f7dd7be0f62dfb4d4bbccdb0586ff9ae8c Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Fri, 24 Nov 2017 16:16:16 +0100 Subject: Save onsnapshot script to WARC --- crocoite/cli.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crocoite/cli.py b/crocoite/cli.py index 4f36583..2a2c174 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -353,6 +353,12 @@ def main (): scripts.append (fd.read ()) return '\n'.join (scripts) + def writeScript (path, source, writer): + record = writer.create_warc_record (packageUrl (path), 'metadata', + payload=BytesIO (source.encode ('utf8')), + warc_headers_dict={'Content-Type': 'application/javascript; charset=utf-8'}) + writer.write_record (record) + logging.basicConfig (level=logging.DEBUG) parser = argparse.ArgumentParser(description='Save website to WARC using Google Chrome.') @@ -409,10 +415,7 @@ def main (): logger.addHandler (warcLogger) # save onload script - record = writer.create_warc_record (packageUrl ('onload'), 'metadata', - payload=BytesIO (onload.encode ('utf8')), - warc_headers_dict={'Content-Type': 'application/javascript; charset=utf-8'}) - writer.write_record (record) + writeScript ('onload', onload, writer) # enable events tab.Network.enable() @@ -453,6 +456,7 @@ def main (): tab.Page.loadEventFired = None script = loadScripts (args.runBeforeSnapshot) + writeScript ('onsnapshot', script, writer) tab.Runtime.evaluate (expression=script, returnByValue=True) writeDOMSnapshot (tab, writer) -- cgit v1.2.3