From a21d7332e33a3e47a363004196451721d449e70b Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 25 Mar 2018 13:58:47 +0200 Subject: Move getResponseBody call to Item wrapper --- crocoite/warc.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'crocoite/warc.py') diff --git a/crocoite/warc.py b/crocoite/warc.py index e04bee4..b56d315 100644 --- a/crocoite/warc.py +++ b/crocoite/warc.py @@ -25,12 +25,10 @@ Classes writing data to WARC files import logging import json from http.server import BaseHTTPRequestHandler -from base64 import b64decode from io import BytesIO from warcio.statusandheaders import StatusAndHeaders from urllib.parse import urlsplit from logging.handlers import BufferingHandler -import pychrome from datetime import datetime from threading import Thread from queue import Queue @@ -188,15 +186,8 @@ class WarcLoader (AccountingSiteLoader): raise ValueError ('body for {} too large {} vs {}'.format (reqId, item.encodedDataLength, self.maxBodySize)) else: - try: - body = self.tab.Network.getResponseBody (requestId=reqId) - rawBody = body['body'] - base64Encoded = body['base64Encoded'] - if base64Encoded: - rawBody = b64decode (rawBody) - else: - rawBody = rawBody.encode ('utf8') - except pychrome.exceptions.CallMethodException: + rawBody = item.body + if rawBody is None: raise ValueError ('no data for {} {} {}'.format (resp['url'], resp['status'], reqId)) return rawBody, base64Encoded -- cgit v1.2.3