summaryrefslogtreecommitdiff
path: root/formatRefs.py
blob: b6e0be3bd7c8e5464d711fa6ee9a2ba7188b0ba1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#!/usr/bin/env python3

from rdflib import URIRef, BNode, Literal, Graph, Namespace
from rdflib.namespace import RDF, NamespaceManager
from urllib.parse import urlparse
import sys

def first (it):
    try:
        return next (it)
    except StopIteration:
        return None

def humanList (l):
    if len (l) == 0:
        return ''
    elif len (l) == 1:
        return l[0]
    else:
        return ', '.join (l[:-1]) + ' and ' + l[-1]

def formatDomain (url):
    d = urlparse (url).hostname
    if d.startswith ('www.'):
        d = d[4:]
    return d

def formatPerson (s, g, n):
    firstname = first (g.objects (n, s.givenName))
    familyname = first (g.objects (n, s.familyName))
    if firstname:
        return '{} {}'.format (firstname, familyname)
    else:
        return familyname

def formatParent (s, g, n):
    parent = first (g.objects (n, s.isPartOf))
    parentname = first (g.objects (parent, s.name)) if parent else None
    volume = first (g.objects (n, s.volumeNumber))
    ret = ''
    if parentname:
        ret += '{}'.format (parentname)
        if volume:
            ret += ', volume {}'.format (volume)
        issue = first (g.objects (parent, s.issueNumber))
        if issue:
            ret += ', issue {}'.format (issue)
    return ret

if __name__ == '__main__':
    g = Graph()
    result = g.parse ("index.ttl", format='turtle')
    s = Namespace("https://schema.org/")
    for ref in result.objects (predicate=s.citation):
        t = list (g.objects (ref, RDF.type))
        assert len (t) == 1
        t = t[0]

        name = first (g.objects (ref, s.name))
        authors = humanList ([formatPerson (s, g, author) for author in g.objects (ref, s.author)])
        published = first (g.objects (ref, s.datePublished ))
        refname = urlparse (ref).fragment
        if not refname:
            refname = first (g.objects (ref, s.alternateName))
        ret = '.. [{}]'.format (refname)
        if authors:
            ret += ' {}:'.format (authors)
        ret += ' *{}*.'.format (name)
        parent = formatParent (s, g, ref)
        if parent:
            ret += ' {}.'.format (parent)
        if published:
            ret += ' {}.'.format (published)
        urls = ['`{} <{}>`__'.format (formatDomain (url), url) for url in g.objects (ref, s.url)]
        if urls:
            ret += ' {}.'.format (', '.join (urls))
        print (ret)