summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2017-11-22 11:30:51 +0100
committerLars-Dominik Braun <lars@6xq.net>2017-11-22 11:30:51 +0100
commit5f6343a37b36a0008b2c2a2a7805fcdb907aa9f5 (patch)
treecbfffad34a69dbd2146abb6693c3d03bcd0b4ed8
parent9cff3074e52b91c49298f80fd3e73d77f1f1c7dd (diff)
downloadcrocoite-5f6343a37b36a0008b2c2a2a7805fcdb907aa9f5.tar.gz
crocoite-5f6343a37b36a0008b2c2a2a7805fcdb907aa9f5.tar.bz2
crocoite-5f6343a37b36a0008b2c2a2a7805fcdb907aa9f5.zip
Make <canvas> static before DOM snapshot
Use --run-before-snapshot=canvas-snapshot.js. Replaces <canvas> with image snapshot. We could use .captureStream() as well.
-rw-r--r--README.rst1
-rw-r--r--crocoite/cli.py21
-rw-r--r--crocoite/data/canvas-snapshot.js18
3 files changed, 31 insertions, 9 deletions
diff --git a/README.rst b/README.rst
index 2b3642f..760742b 100644
--- a/README.rst
+++ b/README.rst
@@ -47,5 +47,4 @@ Most of these issues can be worked around by using the DOM snapshot, which is
also saved. This causes its own set of issues though:
- JavaScript-based navigation does not work.
-- Canvas contents are probably not preserved.
diff --git a/crocoite/cli.py b/crocoite/cli.py
index f9e0fd2..4f36583 100644
--- a/crocoite/cli.py
+++ b/crocoite/cli.py
@@ -344,6 +344,15 @@ def main ():
while len (requests) != 0:
tab.wait (1)
+ def loadScripts (paths, scripts=[]):
+ for p in paths:
+ if not os.path.exists (p):
+ # search for defaults scripts in package data directory
+ p = packageData (p)
+ with open (p, 'r') as fd:
+ scripts.append (fd.read ())
+ return '\n'.join (scripts)
+
logging.basicConfig (level=logging.DEBUG)
parser = argparse.ArgumentParser(description='Save website to WARC using Google Chrome.')
@@ -353,6 +362,7 @@ def main ():
parser.add_argument('--onload', action='append', help='')
parser.add_argument('--log-buffer', default=1000, type=int, dest='logBuffer')
parser.add_argument('--keep-tab', action='store_true', default=False, dest='keepTab', help='Keep tab open')
+ parser.add_argument('--run-before-snapshot', default=[], action='append', dest='runBeforeSnapshot', help='Run JavaScript files before creating DOM snapshot')
parser.add_argument('url', help='Website URL')
parser.add_argument('output', help='WARC filename')
@@ -361,14 +371,7 @@ def main ():
stopVarname = '__' + __package__ + '_stop__'
# avoid sites messing with our scripts by using a random stop variable name
newStopVarname = randomString ()
- onload = ['var {} = false;\n'.format (newStopVarname)]
- for path in args.onload:
- if not os.path.exists (path):
- # search for defaults scripts in package data directory
- path = packageData (path)
- with open (path, 'r') as fd:
- onload.append (fd.read ().replace (stopVarname, newStopVarname))
- onload = '\n'.join (onload)
+ onload = loadScripts (args.onload, ['var {} = false;\n'.format (stopVarname)]).replace (stopVarname, newStopVarname)
stopVarname = newStopVarname
# temporary store for requests
@@ -449,6 +452,8 @@ def main ():
tab.Network.loadingFailed = None
tab.Page.loadEventFired = None
+ script = loadScripts (args.runBeforeSnapshot)
+ tab.Runtime.evaluate (expression=script, returnByValue=True)
writeDOMSnapshot (tab, writer)
tab.stop()
diff --git a/crocoite/data/canvas-snapshot.js b/crocoite/data/canvas-snapshot.js
new file mode 100644
index 0000000..2395411
--- /dev/null
+++ b/crocoite/data/canvas-snapshot.js
@@ -0,0 +1,18 @@
+/* Replace canvas with image snapshot
+ */
+(function(){
+ var canvas = document.querySelectorAll ("canvas");
+ for (var i = 0; i < canvas.length; i++) {
+ var c = canvas[i];
+ var data = c.toDataURL ();
+ var parent = c.parentNode;
+ var img = document.createElement ('img');
+ /* copy all attributes */
+ for (var i = 0; i < c.attributes.length; i++) {
+ var attr = c.attributes.item(i);
+ img.setAttribute (attr.nodeName, attr.nodeValue);
+ }
+ img.src = data;
+ parent.replaceChild (img, c);
+ }
+}());