summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--swayback/__init__.py42
-rw-r--r--swayback/static/sw.js34
-rw-r--r--swayback/templates/sw.html13
3 files changed, 66 insertions, 23 deletions
diff --git a/swayback/__init__.py b/swayback/__init__.py
index 80600f3..f609736 100644
--- a/swayback/__init__.py
+++ b/swayback/__init__.py
@@ -1,11 +1,11 @@
import os
+from io import BytesIO
+from urllib.parse import urlparse, urlunparse
from flask import Flask, render_template, send_file, request, make_response
from warcio.archiveiterator import ArchiveIterator
from warcio.recordloader import ArcWarcRecordLoader
from warcio.bufferedreaders import DecompressingBufferedReader
-from io import BytesIO
-from urllib.parse import urlparse, urlunparse
app = Flask(__name__)
app.url_map.host_matching = True
@@ -33,24 +33,40 @@ def index ():
""" A simple index of all HTML pages inside the WARCs """
return render_template ('index.html', index=htmlindex)
-@app.route('/raw', host='swayback.localhost:5000', methods=['POST'])
-def raw ():
- """ Retrieve the original response for a given request """
- print (request.form)
- url = urlparse (request.form['url'])
+@app.route('/raw', host='swayback.localhost:5000', methods=['OPTIONS'])
+def rawPreflight ():
+ """ CORS preflight request, allow user-defined fetch() headers """
+ resp = make_response ('', 200)
+ resp.headers.add ('Access-Control-Allow-Origin', '*')
+ resp.headers.add ('Access-Control-Allow-Headers', 'Content-Type')
+ resp.headers.add ('Access-Control-Allow-Methods', 'POST')
+ return resp
+
+def lookupRecord (url):
+ """ Look up URL in database. """
try:
filename, offset, length = urlmap[url]
with open(filename, 'rb') as stream:
stream.seek (offset, 0)
buf = BytesIO (stream.read (length))
loader = ArcWarcRecordLoader ()
- record = loader.parse_record_stream (DecompressingBufferedReader (buf))
- statuscode = record.http_headers.get_statuscode ()
- record.http_headers.remove_header ('Content-Security-Policy')
- record.http_headers.replace_header ('Access-Control-Allow-Origin', '*')
- headers = record.http_headers.headers
- return record.content_stream().read(), statuscode, headers
+ return loader.parse_record_stream (DecompressingBufferedReader (buf))
except KeyError:
+ return None
+
+@app.route('/raw', host='swayback.localhost:5000', methods=['POST'])
+def raw ():
+ """ Retrieve the original response for a given request """
+ data = request.get_json ()
+ url = urlparse (data['url'])
+ record = lookupRecord (url)
+ if record:
+ statuscode = record.http_headers.get_statuscode ()
+ record.http_headers.remove_header ('Content-Security-Policy')
+ record.http_headers.replace_header ('Access-Control-Allow-Origin', '*')
+ headers = record.http_headers.headers
+ return record.content_stream().read(), statuscode, headers
+ else:
resp = make_response ('', 404)
resp.headers.add ('Access-Control-Allow-Origin', '*')
return resp
diff --git a/swayback/static/sw.js b/swayback/static/sw.js
index f3a63db..56889c0 100644
--- a/swayback/static/sw.js
+++ b/swayback/static/sw.js
@@ -3,13 +3,18 @@ self.addEventListener('install', function(event) {
self.skipWaiting();
});
/* load stuff through service worker immediately? XXX: only debugging? */
-self.addEventListener('activate', event => {
- event.waitUntil(clients.claim());
+self.addEventListener('activate', async function() {
+ if (self.registration.navigationPreload) {
+ // Enable navigation preloads!
+ await self.registration.navigationPreload.enable();
+ } /*event => {
+ event.waitUntil(clients.claim());*/
});
self.addEventListener('fetch', function(event) {
- console.log ('fetch event', event.request.url, event);
- let url = new URL (event.request.url);
+ let origreq = event.request;
+ console.log ('fetch event', origreq.url, event);
+ let url = new URL (origreq.url);
url.protocol = 'https:';
url.port = 443;
url.hash = '';
@@ -17,10 +22,23 @@ self.addEventListener('fetch', function(event) {
url.hostname = url.hostname.slice (0, url.hostname.length-'.swayback.localhost'.length);
}
console.log ('orig url', url);
- let body = new FormData ();
- body.append ('url', url);
- body.append ('method', event.request.method);
- let req = new Request ('http://swayback.localhost:5000/raw', {method: 'POST', body: body});
+ /* should contain everything we cannot use in the actual request (i.e. url and method) */
+ let body = {
+ 'url': url.href,
+ 'method': origreq.method,
+ };
+ let headers = {
+ 'Content-Type': 'application/json',
+ };
+ /* add a few well-known request headers */
+ let origheaders = origreq.headers;
+ if (origheaders.has ('accept')) {
+ headers['Accept'] = origreq.headers.get ('accept');
+ }
+ console.log ('sending', body, headers);
+ let req = new Request ('http://swayback.localhost:5000/raw',
+ {method: 'POST', body: JSON.stringify (body), headers: headers,
+ mode: 'cors'});
event.respondWith (
fetch(req)
diff --git a/swayback/templates/sw.html b/swayback/templates/sw.html
index 2abfac2..0735ae9 100644
--- a/swayback/templates/sw.html
+++ b/swayback/templates/sw.html
@@ -24,8 +24,17 @@
/* service workers must be hosted in the same origin (i.e. subdomain) */
navigator.serviceWorker.register('/static/sw.js', {scope: '/'})
.then(function(reg) {
- /* load actual content using the service worker */
- window.location.reload ();
+ /* load actual content using the service worker when done installing */
+ if (reg.installing) {
+ reg.installing.addEventListener ('statechange', function (e) {
+ if (e.target.state !== 'installing') {
+ console.log ('reloading');
+ window.location.reload ();
+ return false;
+ }
+ return true;
+ });
+ }
}).catch(function(error) {
console.log ('sw error', error);
document.getElementById ('unsupported').classList.remove ('hidden');