diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2017-11-24 16:16:16 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2017-11-24 16:16:16 +0100 |
commit | 62bc23f7dd7be0f62dfb4d4bbccdb0586ff9ae8c (patch) | |
tree | c57f4a336cf8d6e9d29fdab6af1860f16fd2f965 | |
parent | 5f6343a37b36a0008b2c2a2a7805fcdb907aa9f5 (diff) | |
download | crocoite-62bc23f7dd7be0f62dfb4d4bbccdb0586ff9ae8c.tar.gz crocoite-62bc23f7dd7be0f62dfb4d4bbccdb0586ff9ae8c.tar.bz2 crocoite-62bc23f7dd7be0f62dfb4d4bbccdb0586ff9ae8c.zip |
Save onsnapshot script to WARC
-rw-r--r-- | crocoite/cli.py | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/crocoite/cli.py b/crocoite/cli.py index 4f36583..2a2c174 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -353,6 +353,12 @@ def main (): scripts.append (fd.read ()) return '\n'.join (scripts) + def writeScript (path, source, writer): + record = writer.create_warc_record (packageUrl (path), 'metadata', + payload=BytesIO (source.encode ('utf8')), + warc_headers_dict={'Content-Type': 'application/javascript; charset=utf-8'}) + writer.write_record (record) + logging.basicConfig (level=logging.DEBUG) parser = argparse.ArgumentParser(description='Save website to WARC using Google Chrome.') @@ -409,10 +415,7 @@ def main (): logger.addHandler (warcLogger) # save onload script - record = writer.create_warc_record (packageUrl ('onload'), 'metadata', - payload=BytesIO (onload.encode ('utf8')), - warc_headers_dict={'Content-Type': 'application/javascript; charset=utf-8'}) - writer.write_record (record) + writeScript ('onload', onload, writer) # enable events tab.Network.enable() @@ -453,6 +456,7 @@ def main (): tab.Page.loadEventFired = None script = loadScripts (args.runBeforeSnapshot) + writeScript ('onsnapshot', script, writer) tab.Runtime.evaluate (expression=script, returnByValue=True) writeDOMSnapshot (tab, writer) |