diff options
author | Lars-Dominik Braun <lars@6xq.net> | 2017-11-22 11:30:51 +0100 |
---|---|---|
committer | Lars-Dominik Braun <lars@6xq.net> | 2017-11-22 11:30:51 +0100 |
commit | 5f6343a37b36a0008b2c2a2a7805fcdb907aa9f5 (patch) | |
tree | cbfffad34a69dbd2146abb6693c3d03bcd0b4ed8 | |
parent | 9cff3074e52b91c49298f80fd3e73d77f1f1c7dd (diff) | |
download | crocoite-5f6343a37b36a0008b2c2a2a7805fcdb907aa9f5.tar.gz crocoite-5f6343a37b36a0008b2c2a2a7805fcdb907aa9f5.tar.bz2 crocoite-5f6343a37b36a0008b2c2a2a7805fcdb907aa9f5.zip |
Make <canvas> static before DOM snapshot
Use --run-before-snapshot=canvas-snapshot.js. Replaces <canvas> with
image snapshot. We could use .captureStream() as well.
-rw-r--r-- | README.rst | 1 | ||||
-rw-r--r-- | crocoite/cli.py | 21 | ||||
-rw-r--r-- | crocoite/data/canvas-snapshot.js | 18 |
3 files changed, 31 insertions, 9 deletions
@@ -47,5 +47,4 @@ Most of these issues can be worked around by using the DOM snapshot, which is also saved. This causes its own set of issues though: - JavaScript-based navigation does not work. -- Canvas contents are probably not preserved. diff --git a/crocoite/cli.py b/crocoite/cli.py index f9e0fd2..4f36583 100644 --- a/crocoite/cli.py +++ b/crocoite/cli.py @@ -344,6 +344,15 @@ def main (): while len (requests) != 0: tab.wait (1) + def loadScripts (paths, scripts=[]): + for p in paths: + if not os.path.exists (p): + # search for defaults scripts in package data directory + p = packageData (p) + with open (p, 'r') as fd: + scripts.append (fd.read ()) + return '\n'.join (scripts) + logging.basicConfig (level=logging.DEBUG) parser = argparse.ArgumentParser(description='Save website to WARC using Google Chrome.') @@ -353,6 +362,7 @@ def main (): parser.add_argument('--onload', action='append', help='') parser.add_argument('--log-buffer', default=1000, type=int, dest='logBuffer') parser.add_argument('--keep-tab', action='store_true', default=False, dest='keepTab', help='Keep tab open') + parser.add_argument('--run-before-snapshot', default=[], action='append', dest='runBeforeSnapshot', help='Run JavaScript files before creating DOM snapshot') parser.add_argument('url', help='Website URL') parser.add_argument('output', help='WARC filename') @@ -361,14 +371,7 @@ def main (): stopVarname = '__' + __package__ + '_stop__' # avoid sites messing with our scripts by using a random stop variable name newStopVarname = randomString () - onload = ['var {} = false;\n'.format (newStopVarname)] - for path in args.onload: - if not os.path.exists (path): - # search for defaults scripts in package data directory - path = packageData (path) - with open (path, 'r') as fd: - onload.append (fd.read ().replace (stopVarname, newStopVarname)) - onload = '\n'.join (onload) + onload = loadScripts (args.onload, ['var {} = false;\n'.format (stopVarname)]).replace (stopVarname, newStopVarname) stopVarname = newStopVarname # temporary store for requests @@ -449,6 +452,8 @@ def main (): tab.Network.loadingFailed = None tab.Page.loadEventFired = None + script = loadScripts (args.runBeforeSnapshot) + tab.Runtime.evaluate (expression=script, returnByValue=True) writeDOMSnapshot (tab, writer) tab.stop() diff --git a/crocoite/data/canvas-snapshot.js b/crocoite/data/canvas-snapshot.js new file mode 100644 index 0000000..2395411 --- /dev/null +++ b/crocoite/data/canvas-snapshot.js @@ -0,0 +1,18 @@ +/* Replace canvas with image snapshot + */ +(function(){ + var canvas = document.querySelectorAll ("canvas"); + for (var i = 0; i < canvas.length; i++) { + var c = canvas[i]; + var data = c.toDataURL (); + var parent = c.parentNode; + var img = document.createElement ('img'); + /* copy all attributes */ + for (var i = 0; i < c.attributes.length; i++) { + var attr = c.attributes.item(i); + img.setAttribute (attr.nodeName, attr.nodeValue); + } + img.src = data; + parent.replaceChild (img, c); + } +}()); |