From d187373fd3b8f3aa16af8998f4e6481bb4505e60 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Fri, 16 Mar 2018 14:47:34 +0100 Subject: Use JSON-encoded POST body for /raw --- swayback/__init__.py | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) (limited to 'swayback/__init__.py') diff --git a/swayback/__init__.py b/swayback/__init__.py index 80600f3..f609736 100644 --- a/swayback/__init__.py +++ b/swayback/__init__.py @@ -1,11 +1,11 @@ import os +from io import BytesIO +from urllib.parse import urlparse, urlunparse from flask import Flask, render_template, send_file, request, make_response from warcio.archiveiterator import ArchiveIterator from warcio.recordloader import ArcWarcRecordLoader from warcio.bufferedreaders import DecompressingBufferedReader -from io import BytesIO -from urllib.parse import urlparse, urlunparse app = Flask(__name__) app.url_map.host_matching = True @@ -33,24 +33,40 @@ def index (): """ A simple index of all HTML pages inside the WARCs """ return render_template ('index.html', index=htmlindex) -@app.route('/raw', host='swayback.localhost:5000', methods=['POST']) -def raw (): - """ Retrieve the original response for a given request """ - print (request.form) - url = urlparse (request.form['url']) +@app.route('/raw', host='swayback.localhost:5000', methods=['OPTIONS']) +def rawPreflight (): + """ CORS preflight request, allow user-defined fetch() headers """ + resp = make_response ('', 200) + resp.headers.add ('Access-Control-Allow-Origin', '*') + resp.headers.add ('Access-Control-Allow-Headers', 'Content-Type') + resp.headers.add ('Access-Control-Allow-Methods', 'POST') + return resp + +def lookupRecord (url): + """ Look up URL in database. """ try: filename, offset, length = urlmap[url] with open(filename, 'rb') as stream: stream.seek (offset, 0) buf = BytesIO (stream.read (length)) loader = ArcWarcRecordLoader () - record = loader.parse_record_stream (DecompressingBufferedReader (buf)) - statuscode = record.http_headers.get_statuscode () - record.http_headers.remove_header ('Content-Security-Policy') - record.http_headers.replace_header ('Access-Control-Allow-Origin', '*') - headers = record.http_headers.headers - return record.content_stream().read(), statuscode, headers + return loader.parse_record_stream (DecompressingBufferedReader (buf)) except KeyError: + return None + +@app.route('/raw', host='swayback.localhost:5000', methods=['POST']) +def raw (): + """ Retrieve the original response for a given request """ + data = request.get_json () + url = urlparse (data['url']) + record = lookupRecord (url) + if record: + statuscode = record.http_headers.get_statuscode () + record.http_headers.remove_header ('Content-Security-Policy') + record.http_headers.replace_header ('Access-Control-Allow-Origin', '*') + headers = record.http_headers.headers + return record.content_stream().read(), statuscode, headers + else: resp = make_response ('', 404) resp.headers.add ('Access-Control-Allow-Origin', '*') return resp -- cgit v1.2.3