From d187373fd3b8f3aa16af8998f4e6481bb4505e60 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Fri, 16 Mar 2018 14:47:34 +0100 Subject: Use JSON-encoded POST body for /raw --- swayback/__init__.py | 42 +++++++++++++++++++++++++++++------------- swayback/static/sw.js | 34 ++++++++++++++++++++++++++-------- swayback/templates/sw.html | 13 +++++++++++-- 3 files changed, 66 insertions(+), 23 deletions(-) diff --git a/swayback/__init__.py b/swayback/__init__.py index 80600f3..f609736 100644 --- a/swayback/__init__.py +++ b/swayback/__init__.py @@ -1,11 +1,11 @@ import os +from io import BytesIO +from urllib.parse import urlparse, urlunparse from flask import Flask, render_template, send_file, request, make_response from warcio.archiveiterator import ArchiveIterator from warcio.recordloader import ArcWarcRecordLoader from warcio.bufferedreaders import DecompressingBufferedReader -from io import BytesIO -from urllib.parse import urlparse, urlunparse app = Flask(__name__) app.url_map.host_matching = True @@ -33,24 +33,40 @@ def index (): """ A simple index of all HTML pages inside the WARCs """ return render_template ('index.html', index=htmlindex) -@app.route('/raw', host='swayback.localhost:5000', methods=['POST']) -def raw (): - """ Retrieve the original response for a given request """ - print (request.form) - url = urlparse (request.form['url']) +@app.route('/raw', host='swayback.localhost:5000', methods=['OPTIONS']) +def rawPreflight (): + """ CORS preflight request, allow user-defined fetch() headers """ + resp = make_response ('', 200) + resp.headers.add ('Access-Control-Allow-Origin', '*') + resp.headers.add ('Access-Control-Allow-Headers', 'Content-Type') + resp.headers.add ('Access-Control-Allow-Methods', 'POST') + return resp + +def lookupRecord (url): + """ Look up URL in database. """ try: filename, offset, length = urlmap[url] with open(filename, 'rb') as stream: stream.seek (offset, 0) buf = BytesIO (stream.read (length)) loader = ArcWarcRecordLoader () - record = loader.parse_record_stream (DecompressingBufferedReader (buf)) - statuscode = record.http_headers.get_statuscode () - record.http_headers.remove_header ('Content-Security-Policy') - record.http_headers.replace_header ('Access-Control-Allow-Origin', '*') - headers = record.http_headers.headers - return record.content_stream().read(), statuscode, headers + return loader.parse_record_stream (DecompressingBufferedReader (buf)) except KeyError: + return None + +@app.route('/raw', host='swayback.localhost:5000', methods=['POST']) +def raw (): + """ Retrieve the original response for a given request """ + data = request.get_json () + url = urlparse (data['url']) + record = lookupRecord (url) + if record: + statuscode = record.http_headers.get_statuscode () + record.http_headers.remove_header ('Content-Security-Policy') + record.http_headers.replace_header ('Access-Control-Allow-Origin', '*') + headers = record.http_headers.headers + return record.content_stream().read(), statuscode, headers + else: resp = make_response ('', 404) resp.headers.add ('Access-Control-Allow-Origin', '*') return resp diff --git a/swayback/static/sw.js b/swayback/static/sw.js index f3a63db..56889c0 100644 --- a/swayback/static/sw.js +++ b/swayback/static/sw.js @@ -3,13 +3,18 @@ self.addEventListener('install', function(event) { self.skipWaiting(); }); /* load stuff through service worker immediately? XXX: only debugging? */ -self.addEventListener('activate', event => { - event.waitUntil(clients.claim()); +self.addEventListener('activate', async function() { + if (self.registration.navigationPreload) { + // Enable navigation preloads! + await self.registration.navigationPreload.enable(); + } /*event => { + event.waitUntil(clients.claim());*/ }); self.addEventListener('fetch', function(event) { - console.log ('fetch event', event.request.url, event); - let url = new URL (event.request.url); + let origreq = event.request; + console.log ('fetch event', origreq.url, event); + let url = new URL (origreq.url); url.protocol = 'https:'; url.port = 443; url.hash = ''; @@ -17,10 +22,23 @@ self.addEventListener('fetch', function(event) { url.hostname = url.hostname.slice (0, url.hostname.length-'.swayback.localhost'.length); } console.log ('orig url', url); - let body = new FormData (); - body.append ('url', url); - body.append ('method', event.request.method); - let req = new Request ('http://swayback.localhost:5000/raw', {method: 'POST', body: body}); + /* should contain everything we cannot use in the actual request (i.e. url and method) */ + let body = { + 'url': url.href, + 'method': origreq.method, + }; + let headers = { + 'Content-Type': 'application/json', + }; + /* add a few well-known request headers */ + let origheaders = origreq.headers; + if (origheaders.has ('accept')) { + headers['Accept'] = origreq.headers.get ('accept'); + } + console.log ('sending', body, headers); + let req = new Request ('http://swayback.localhost:5000/raw', + {method: 'POST', body: JSON.stringify (body), headers: headers, + mode: 'cors'}); event.respondWith ( fetch(req) diff --git a/swayback/templates/sw.html b/swayback/templates/sw.html index 2abfac2..0735ae9 100644 --- a/swayback/templates/sw.html +++ b/swayback/templates/sw.html @@ -24,8 +24,17 @@ /* service workers must be hosted in the same origin (i.e. subdomain) */ navigator.serviceWorker.register('/static/sw.js', {scope: '/'}) .then(function(reg) { - /* load actual content using the service worker */ - window.location.reload (); + /* load actual content using the service worker when done installing */ + if (reg.installing) { + reg.installing.addEventListener ('statechange', function (e) { + if (e.target.state !== 'installing') { + console.log ('reloading'); + window.location.reload (); + return false; + } + return true; + }); + } }).catch(function(error) { console.log ('sw error', error); document.getElementById ('unsupported').classList.remove ('hidden'); -- cgit v1.2.3