From b25c4cccafbd9572fe3e3c9c83c48c19b714a6c3 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sun, 29 Jul 2018 09:19:06 +0200 Subject: Add package information to warcinfo Change warcinfo record format to JSON (this is permitted by the specs) and add Python version, dependencies and their versions as well as file hashes. This should give us enough information to figure out the exact environment used to create the WARC. --- crocoite/controller.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'crocoite/controller.py') diff --git a/crocoite/controller.py b/crocoite/controller.py index cbf0037..178d11c 100644 --- a/crocoite/controller.py +++ b/crocoite/controller.py @@ -70,11 +70,11 @@ class StatsHandler (EventHandler): elif isinstance (item, BrowserCrashed): self.stats['crashed'] += 1 -import time +import time, platform from . import behavior as cbehavior from .browser import ChromeService, SiteLoader, Item -from .util import getFormattedViewportMetrics, removeFragment +from .util import getFormattedViewportMetrics, removeFragment, getRequirements class ControllerStart: __slots__ = ('payload') @@ -163,10 +163,20 @@ class SinglePageController: version = l.tab.Browser.getVersion () payload = { - 'software': __package__, - 'browser': version['product'], - 'useragent': version['userAgent'], - 'viewport': getFormattedViewportMetrics (l.tab), + 'software': { + 'platform': platform.platform (), + 'python': { + 'implementation': platform.python_implementation(), + 'version': platform.python_version (), + 'build': platform.python_build () + }, + 'self': getRequirements (__package__) + }, + 'browser': { + 'product': version['product'], + 'useragent': version['userAgent'], + 'viewport': getFormattedViewportMetrics (l.tab), + }, } self.processItem (ControllerStart (payload)) -- cgit v1.2.3