diff options
Diffstat (limited to 'crocoite/util.py')
-rw-r--r-- | crocoite/util.py | 88 |
1 files changed, 74 insertions, 14 deletions
diff --git a/crocoite/util.py b/crocoite/util.py index ec257f1..da377a3 100644 --- a/crocoite/util.py +++ b/crocoite/util.py @@ -22,22 +22,82 @@ Random utility functions """ -import random +import random, sys, platform, os, json, urllib +from datetime import datetime +import hashlib, pkg_resources -def randomString (length=None, chars='abcdefghijklmnopqrstuvwxyz'): - if length is None: - length = random.randint (16, 32) - return ''.join (map (lambda x: random.choice (chars), range (length))) +from yarl import URL -def packageUrl (path): - """ - Create URL for package data stored into WARC - """ - return 'urn:' + __package__ + ':' + path +class StrJsonEncoder (json.JSONEncoder): + """ JSON encoder that turns unknown classes into a string and thus never + fails """ + def default (self, obj): + if isinstance (obj, datetime): + return obj.isoformat () + + # make sure serialization always succeeds + try: + return json.JSONEncoder.default(self, obj) + except TypeError: + return str (obj) -def getFormattedViewportMetrics (tab): - layoutMetrics = tab.Page.getLayoutMetrics () +async def getFormattedViewportMetrics (tab): + layoutMetrics = await tab.Page.getLayoutMetrics () # XXX: I’m not entirely sure which one we should use here - return '{}x{}'.format (layoutMetrics['layoutViewport']['clientWidth'], - layoutMetrics['layoutViewport']['clientHeight']) + viewport = layoutMetrics['layoutViewport'] + return f"{viewport['clientWidth']}x{viewport['clientHeight']}" + +def getSoftwareInfo (): + """ Get software info for inclusion into warcinfo """ + return { + 'platform': platform.platform (), + 'python': { + 'implementation': platform.python_implementation(), + 'version': platform.python_version (), + 'build': platform.python_build () + }, + 'self': getRequirements (__package__) + } + +def getRequirements (dist): + """ Get dependencies of a package. + + Figure out packages’ dependencies based on setup/distutils, then look at + modules loaded and compute hashes of each loaded dependency. + + This does not and cannot protect against malicious people. It’s only + purpose is to recreate this exact environment. + """ + + pending = {dist} + have = set () + packages = [] + while pending: + d = pkg_resources.get_distribution (pending.pop ()) + + modules = list (filter (lambda x: x, d.get_metadata ('top_level.txt').split ('\n'))) + modhashes = {} + # hash loaded modules + for m in sys.modules.values (): + f = getattr (m, '__file__', None) + pkg = getattr (m, '__package__', None) + # is loaded? + if pkg in modules: + if f and os.path.isfile (f): + with open (f, 'rb') as fd: + contents = fd.read () + h = hashlib.new ('sha512') + h.update (contents) + modhashes[m.__name__] = {'sha512': h.hexdigest (), 'len': len (contents)} + else: + modhashes[m.__name__] = {} + + # only if one of the packages’ modules is actually loaded + if modhashes: + packages.append ({'projectName': d.project_name, 'modules': modhashes, 'version': d.version}) + + have.add (dist) + pending.update (d.requires ()) + pending.difference_update (have) + return packages |