summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorLars-Dominik Braun <lars@6xq.net>2016-09-17 11:19:41 +0200
committerLars-Dominik Braun <lars@6xq.net>2016-09-17 11:19:41 +0200
commitfef880f42c74cc40c3faa34bd439a0f4e9f0ebd5 (patch)
tree2cea0f524d6052afc92c657593eccefb064d53e1 /tools
parent903cb02e7235bbdcc5e77373836bc8eb36e5ec65 (diff)
downloadeumel-fef880f42c74cc40c3faa34bd439a0f4e9f0ebd5.tar.gz
eumel-fef880f42c74cc40c3faa34bd439a0f4e9f0ebd5.tar.bz2
eumel-fef880f42c74cc40c3faa34bd439a0f4e9f0ebd5.zip
Move formatRefs to tools
Diffstat (limited to 'tools')
-rwxr-xr-xtools/formatRefs.py150
1 files changed, 150 insertions, 0 deletions
diff --git a/tools/formatRefs.py b/tools/formatRefs.py
new file mode 100755
index 0000000..8189f2d
--- /dev/null
+++ b/tools/formatRefs.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+
+from rdflib import URIRef, BNode, Literal, Graph, Namespace
+from rdflib.namespace import RDF, NamespaceManager
+from urllib.parse import urlparse
+import sys
+from itertools import chain
+
+def first (it):
+ try:
+ return next (it)
+ except StopIteration:
+ return None
+
+def humanList (l):
+ if len (l) == 0:
+ return ''
+ elif len (l) == 1:
+ return l[0]
+ else:
+ return ', '.join (l[:-1]) + ' and ' + l[-1]
+
+def formatDomain (url):
+ d = urlparse (url).hostname
+ if d.startswith ('www.'):
+ d = d[4:]
+ return d
+
+def formatPerson (s, g, n):
+ firstname = first (g.objects (n, s.givenName))
+ familyname = first (g.objects (n, s.familyName))
+ if firstname:
+ return '{} {}'.format (firstname, familyname)
+ else:
+ return familyname
+
+def formatParent (s, g, n, useName = True):
+ ret = []
+
+ if useName:
+ parentname = first (g.objects (n, s.name))
+ if parentname:
+ ret.append (parentname)
+
+ volume = first (g.objects (n, s.volumeNumber))
+ if volume:
+ ret.append ('volume {}'.format (volume))
+
+ issue = first (g.objects (n, s.issueNumber))
+ if issue:
+ ret.append ('issue {}'.format (issue))
+
+ # pages
+ start = first (g.objects (n, s.pageStart))
+ end = first (g.objects (n, s.pageEnd))
+ if start:
+ num = int (end)-int (start)
+ if end and num >= 1:
+ ret.append ('pp. {}–{}'.format (start, end))
+ else:
+ ret.append ('p. {}'.format (start))
+
+ return ', '.join (ret)
+
+def relUri (base, u):
+ if u.startswith (base):
+ return u[len (base):]
+ else:
+ return u
+
+def hideLocalUri (base, l):
+ """
+ Show local uris only iff no other sources are available
+ """
+ l = list (l)
+ notLocal = list (filter (lambda u: relUri (base, u) == u, l))
+ return notLocal or l
+
+def getRecursive (s, g, n, predicate):
+ """
+ Look for predicate in n and all Things it is a part of until it is found
+ """
+ res = g.objects (n, predicate)
+ if res:
+ yield from res
+ parents = g.objects (n, s.isPartOf)
+ for p in parents:
+ yield from getRecursive (s, g, p, predicate)
+
+def getRecursiveAll (s, g, n, predicate):
+ parents = list (g.objects (n, predicate))
+ yield from parents
+ for p in parents:
+ yield from getRecursiveAll (s, g, p, predicate)
+
+if __name__ == '__main__':
+ g = Graph()
+ result = g.parse ("index.ttl", format='turtle')
+ rootUri = sys.argv[1]
+ rootNode = URIRef (rootUri)
+ s = Namespace("https://schema.org/")
+ for ref in result.objects (rootNode, s.citation):
+ t = list (g.objects (ref, RDF.type))
+ assert len (t) == 1
+ t = t[0]
+
+ # object _must_ have a name
+ what = first (g.objects (ref, s.name))
+
+ # look for people who wrote/translated/edited it
+ who = map (lambda a: formatPerson (s, g, a), getRecursive (s, g, ref, s.author))
+ #who = chain (who, map (lambda a: formatPerson (s, g, a) + ' (ed.)', getRecursive (s, g, ref, s.editor)))
+ #who = chain (who, map (lambda a: formatPerson (s, g, a) + ' (trans.)', getRecursive (s, g, ref, s.translator)))
+ who = humanList (list (who))
+
+ # when was it published?
+ when = first (getRecursive (s, g, ref, s.datePublished))
+
+ # where can we find it? (print)
+ # print from root to ref (i.e. magazine, volume, issue)
+ parents = reversed (list (getRecursiveAll (s, g, ref, s.isPartOf)))
+ where = [formatParent (s, g, p) for p in parents]
+ thiswhere = formatParent (s, g, ref, False)
+ if thiswhere:
+ where.append (thiswhere)
+
+ # where can we find it? (online)
+ urls = hideLocalUri (rootUri, g.objects (ref, s.url))
+ urls = ['`{} <{}>`__'.format (formatDomain (url), relUri (rootUri, url)) for url in urls]
+
+ refname = urlparse (ref).fragment
+ if not refname:
+ refname = first (g.objects (ref, s.alternateName))
+ ret = '.. [{}] \\ '.format (refname)
+
+ if who:
+ ret += ' {}:'.format (who)
+
+ ret += ' *{}*.'.format (what)
+
+ if where:
+ ret += ' {}.'.format (', '.join (where))
+
+ if when:
+ ret += ' {}.'.format (when)
+
+ if urls:
+ ret += ' {}.'.format (', '.join (urls))
+ print (ret)
+