From fef880f42c74cc40c3faa34bd439a0f4e9f0ebd5 Mon Sep 17 00:00:00 2001 From: Lars-Dominik Braun Date: Sat, 17 Sep 2016 11:19:41 +0200 Subject: Move formatRefs to tools --- formatRefs.py | 150 ---------------------------------------------------- tools/formatRefs.py | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 150 deletions(-) delete mode 100755 formatRefs.py create mode 100755 tools/formatRefs.py diff --git a/formatRefs.py b/formatRefs.py deleted file mode 100755 index 8189f2d..0000000 --- a/formatRefs.py +++ /dev/null @@ -1,150 +0,0 @@ -#!/usr/bin/env python3 - -from rdflib import URIRef, BNode, Literal, Graph, Namespace -from rdflib.namespace import RDF, NamespaceManager -from urllib.parse import urlparse -import sys -from itertools import chain - -def first (it): - try: - return next (it) - except StopIteration: - return None - -def humanList (l): - if len (l) == 0: - return '' - elif len (l) == 1: - return l[0] - else: - return ', '.join (l[:-1]) + ' and ' + l[-1] - -def formatDomain (url): - d = urlparse (url).hostname - if d.startswith ('www.'): - d = d[4:] - return d - -def formatPerson (s, g, n): - firstname = first (g.objects (n, s.givenName)) - familyname = first (g.objects (n, s.familyName)) - if firstname: - return '{} {}'.format (firstname, familyname) - else: - return familyname - -def formatParent (s, g, n, useName = True): - ret = [] - - if useName: - parentname = first (g.objects (n, s.name)) - if parentname: - ret.append (parentname) - - volume = first (g.objects (n, s.volumeNumber)) - if volume: - ret.append ('volume {}'.format (volume)) - - issue = first (g.objects (n, s.issueNumber)) - if issue: - ret.append ('issue {}'.format (issue)) - - # pages - start = first (g.objects (n, s.pageStart)) - end = first (g.objects (n, s.pageEnd)) - if start: - num = int (end)-int (start) - if end and num >= 1: - ret.append ('pp. {}–{}'.format (start, end)) - else: - ret.append ('p. {}'.format (start)) - - return ', '.join (ret) - -def relUri (base, u): - if u.startswith (base): - return u[len (base):] - else: - return u - -def hideLocalUri (base, l): - """ - Show local uris only iff no other sources are available - """ - l = list (l) - notLocal = list (filter (lambda u: relUri (base, u) == u, l)) - return notLocal or l - -def getRecursive (s, g, n, predicate): - """ - Look for predicate in n and all Things it is a part of until it is found - """ - res = g.objects (n, predicate) - if res: - yield from res - parents = g.objects (n, s.isPartOf) - for p in parents: - yield from getRecursive (s, g, p, predicate) - -def getRecursiveAll (s, g, n, predicate): - parents = list (g.objects (n, predicate)) - yield from parents - for p in parents: - yield from getRecursiveAll (s, g, p, predicate) - -if __name__ == '__main__': - g = Graph() - result = g.parse ("index.ttl", format='turtle') - rootUri = sys.argv[1] - rootNode = URIRef (rootUri) - s = Namespace("https://schema.org/") - for ref in result.objects (rootNode, s.citation): - t = list (g.objects (ref, RDF.type)) - assert len (t) == 1 - t = t[0] - - # object _must_ have a name - what = first (g.objects (ref, s.name)) - - # look for people who wrote/translated/edited it - who = map (lambda a: formatPerson (s, g, a), getRecursive (s, g, ref, s.author)) - #who = chain (who, map (lambda a: formatPerson (s, g, a) + ' (ed.)', getRecursive (s, g, ref, s.editor))) - #who = chain (who, map (lambda a: formatPerson (s, g, a) + ' (trans.)', getRecursive (s, g, ref, s.translator))) - who = humanList (list (who)) - - # when was it published? - when = first (getRecursive (s, g, ref, s.datePublished)) - - # where can we find it? (print) - # print from root to ref (i.e. magazine, volume, issue) - parents = reversed (list (getRecursiveAll (s, g, ref, s.isPartOf))) - where = [formatParent (s, g, p) for p in parents] - thiswhere = formatParent (s, g, ref, False) - if thiswhere: - where.append (thiswhere) - - # where can we find it? (online) - urls = hideLocalUri (rootUri, g.objects (ref, s.url)) - urls = ['`{} <{}>`__'.format (formatDomain (url), relUri (rootUri, url)) for url in urls] - - refname = urlparse (ref).fragment - if not refname: - refname = first (g.objects (ref, s.alternateName)) - ret = '.. [{}] \\ '.format (refname) - - if who: - ret += ' {}:'.format (who) - - ret += ' *{}*.'.format (what) - - if where: - ret += ' {}.'.format (', '.join (where)) - - if when: - ret += ' {}.'.format (when) - - if urls: - ret += ' {}.'.format (', '.join (urls)) - print (ret) - diff --git a/tools/formatRefs.py b/tools/formatRefs.py new file mode 100755 index 0000000..8189f2d --- /dev/null +++ b/tools/formatRefs.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 + +from rdflib import URIRef, BNode, Literal, Graph, Namespace +from rdflib.namespace import RDF, NamespaceManager +from urllib.parse import urlparse +import sys +from itertools import chain + +def first (it): + try: + return next (it) + except StopIteration: + return None + +def humanList (l): + if len (l) == 0: + return '' + elif len (l) == 1: + return l[0] + else: + return ', '.join (l[:-1]) + ' and ' + l[-1] + +def formatDomain (url): + d = urlparse (url).hostname + if d.startswith ('www.'): + d = d[4:] + return d + +def formatPerson (s, g, n): + firstname = first (g.objects (n, s.givenName)) + familyname = first (g.objects (n, s.familyName)) + if firstname: + return '{} {}'.format (firstname, familyname) + else: + return familyname + +def formatParent (s, g, n, useName = True): + ret = [] + + if useName: + parentname = first (g.objects (n, s.name)) + if parentname: + ret.append (parentname) + + volume = first (g.objects (n, s.volumeNumber)) + if volume: + ret.append ('volume {}'.format (volume)) + + issue = first (g.objects (n, s.issueNumber)) + if issue: + ret.append ('issue {}'.format (issue)) + + # pages + start = first (g.objects (n, s.pageStart)) + end = first (g.objects (n, s.pageEnd)) + if start: + num = int (end)-int (start) + if end and num >= 1: + ret.append ('pp. {}–{}'.format (start, end)) + else: + ret.append ('p. {}'.format (start)) + + return ', '.join (ret) + +def relUri (base, u): + if u.startswith (base): + return u[len (base):] + else: + return u + +def hideLocalUri (base, l): + """ + Show local uris only iff no other sources are available + """ + l = list (l) + notLocal = list (filter (lambda u: relUri (base, u) == u, l)) + return notLocal or l + +def getRecursive (s, g, n, predicate): + """ + Look for predicate in n and all Things it is a part of until it is found + """ + res = g.objects (n, predicate) + if res: + yield from res + parents = g.objects (n, s.isPartOf) + for p in parents: + yield from getRecursive (s, g, p, predicate) + +def getRecursiveAll (s, g, n, predicate): + parents = list (g.objects (n, predicate)) + yield from parents + for p in parents: + yield from getRecursiveAll (s, g, p, predicate) + +if __name__ == '__main__': + g = Graph() + result = g.parse ("index.ttl", format='turtle') + rootUri = sys.argv[1] + rootNode = URIRef (rootUri) + s = Namespace("https://schema.org/") + for ref in result.objects (rootNode, s.citation): + t = list (g.objects (ref, RDF.type)) + assert len (t) == 1 + t = t[0] + + # object _must_ have a name + what = first (g.objects (ref, s.name)) + + # look for people who wrote/translated/edited it + who = map (lambda a: formatPerson (s, g, a), getRecursive (s, g, ref, s.author)) + #who = chain (who, map (lambda a: formatPerson (s, g, a) + ' (ed.)', getRecursive (s, g, ref, s.editor))) + #who = chain (who, map (lambda a: formatPerson (s, g, a) + ' (trans.)', getRecursive (s, g, ref, s.translator))) + who = humanList (list (who)) + + # when was it published? + when = first (getRecursive (s, g, ref, s.datePublished)) + + # where can we find it? (print) + # print from root to ref (i.e. magazine, volume, issue) + parents = reversed (list (getRecursiveAll (s, g, ref, s.isPartOf))) + where = [formatParent (s, g, p) for p in parents] + thiswhere = formatParent (s, g, ref, False) + if thiswhere: + where.append (thiswhere) + + # where can we find it? (online) + urls = hideLocalUri (rootUri, g.objects (ref, s.url)) + urls = ['`{} <{}>`__'.format (formatDomain (url), relUri (rootUri, url)) for url in urls] + + refname = urlparse (ref).fragment + if not refname: + refname = first (g.objects (ref, s.alternateName)) + ret = '.. [{}] \\ '.format (refname) + + if who: + ret += ' {}:'.format (who) + + ret += ' *{}*.'.format (what) + + if where: + ret += ' {}.'.format (', '.join (where)) + + if when: + ret += ' {}.'.format (when) + + if urls: + ret += ' {}.'.format (', '.join (urls)) + print (ret) + -- cgit v1.2.3