From 73ee7a566f0d902a4c833e96e6c7fbb38a3bbec3 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Wed, 31 Aug 2016 20:04:52 +0200 Subject: Initial import --- formatRefs.py | 78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100755 formatRefs.py (limited to 'formatRefs.py') diff --git a/formatRefs.py b/formatRefs.py new file mode 100755 index 0000000..b6e0be3 --- /dev/null +++ b/formatRefs.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +from rdflib import URIRef, BNode, Literal, Graph, Namespace +from rdflib.namespace import RDF, NamespaceManager +from urllib.parse import urlparse +import sys + +def first (it): + try: + return next (it) + except StopIteration: + return None + +def humanList (l): + if len (l) == 0: + return '' + elif len (l) == 1: + return l[0] + else: + return ', '.join (l[:-1]) + ' and ' + l[-1] + +def formatDomain (url): + d = urlparse (url).hostname + if d.startswith ('www.'): + d = d[4:] + return d + +def formatPerson (s, g, n): + firstname = first (g.objects (n, s.givenName)) + familyname = first (g.objects (n, s.familyName)) + if firstname: + return '{} {}'.format (firstname, familyname) + else: + return familyname + +def formatParent (s, g, n): + parent = first (g.objects (n, s.isPartOf)) + parentname = first (g.objects (parent, s.name)) if parent else None + volume = first (g.objects (n, s.volumeNumber)) + ret = '' + if parentname: + ret += '{}'.format (parentname) + if volume: + ret += ', volume {}'.format (volume) + issue = first (g.objects (parent, s.issueNumber)) + if issue: + ret += ', issue {}'.format (issue) + return ret + +if __name__ == '__main__': + g = Graph() + result = g.parse ("index.ttl", format='turtle') + s = Namespace("https://schema.org/") + for ref in result.objects (predicate=s.citation): + t = list (g.objects (ref, RDF.type)) + assert len (t) == 1 + t = t[0] + + name = first (g.objects (ref, s.name)) + authors = humanList ([formatPerson (s, g, author) for author in g.objects (ref, s.author)]) + published = first (g.objects (ref, s.datePublished )) + refname = urlparse (ref).fragment + if not refname: + refname = first (g.objects (ref, s.alternateName)) + ret = '.. [{}]'.format (refname) + if authors: + ret += ' {}:'.format (authors) + ret += ' *{}*.'.format (name) + parent = formatParent (s, g, ref) + if parent: + ret += ' {}.'.format (parent) + if published: + ret += ' {}.'.format (published) + urls = ['`{} <{}>`__'.format (formatDomain (url), url) for url in g.objects (ref, s.url)] + if urls: + ret += ' {}.'.format (', '.join (urls)) + print (ret) + -- cgit v1.2.3