elan.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135

"""
pygments lexer for Elementary Language (ELAN)

- Rainer Hahn, Peter Stock: ELAN Handbuch. 1979.
- Rainer Hahn, Dietmar Heinrichs, Peter Heyderhoff: EUMEL Benutzerhandbuch Version 1.7. 1984.
"""

from pygments.lexer import RegexLexer, bygroups, include, words
from pygments.token import *

__all__ = ['ElanLexer']

def uppercaseWords (l):
    """
    Match only uppercase words provided in l. For example FOR should not match
    FORMAT.
    """
    return words (l, prefix=r'(?<![A-Z])', suffix=r'(?![A-Z])')

class ElanLexer(RegexLexer):
    name = 'ELAN'
    aliases = ['elan']
    filenames = ['*.elan']

    tokens = {
        'root': [
            include('comment'),
            # strings
            (r'"', String.Double, 'string'),
            # numbers. lookbehind, because identifiers may contain numbers too
            (r'([-+]|(?<![a-z]))\d+', Number.Integer),
            (r'[-+]?\d+\.\d+(E[+-]?\d+)?', Number.Float),
            # keywords
            (uppercaseWords ((
                # not sure
                'CONCR',
                # if-then-else
                'IF', 'THEN', 'ELSE', 'ELIF', 'ENDIF', 'END IF',
                # found in the wild:
                'FI',
                # select statement
                'SELECT', 'OF', 'CASE', 'OTHERWISE', 'ENDSELECT', 'END SELECT',
                # loops
                'FOR', 'FROM', 'DOWNTO', 'UPTO', 'WHILE', 'REPEAT', 'UNTIL',
                'ENDREPEAT', 'END REPEAT',
                # found in the wild:
                'REP', 'PER', 'END REP',
                # return statements
                'LEAVE', 'WITH',
                )), Keyword.Reserved),
            (uppercaseWords ((
                # type declaration
                'TYPE',
                # shorthand declaration
                'LET',
                )), Keyword.Declaration),
            (uppercaseWords ((
                # proper packet
                'DEFINES',
                )), Keyword.Namespace),
            (uppercaseWords (('VAR', 'CONST', 'BOUND')), Name.Attribute),
            (uppercaseWords (('BOOL', 'INT', 'REAL', 'TEXT', 'STRUCT', 'ROW',
            'DATASPACE')), Keyword.Type),
            # thruth values
            (uppercaseWords (('TRUE', 'FALSE')), Name.Builtin),
            # semi-builtin functions/operators, see Benutzerhandbuch pp. 329
            # "Standartpakete"
            (uppercaseWords ((
                # boolean
                'NOT', 'AND', 'OR', 'XOR',
                # text
                'CAT', 'LENGTH', 'TIMESOUT',
                # math
                'DECR', 'DIV', 'INCR', 'MOD', 'SUB',
            )), Operator),
            # and the same with symbols
            (words ((
                # assignments
                ':=', '::',
                # comparison
                '=', '<>', '<=', '>=', '<', '>',
                # math
                '**', '*','+', '-', '/',
                ), prefix=r'(?<![:=<>*+-/])', suffix=r'(?![:=<>*+-/])'),
                Operator),
            # packets, function and operators
            # no space required between keyword and identifier
            # XXX comments may be allowed between keyword and name
            (r'((?:END\s*)?PACKET)([^A-Za-z]*)([a-z][a-z0-9 ]+)',
                    bygroups (Keyword.Declaration, Text, Name.Namespace)),
            (r'((?:END\s*)?PROC)([^A-Za-z]*)([a-z][a-z0-9 ]+)',
                    bygroups (Keyword.Declaration, Text, Name.Function)),
            (r'((?:END\s*)?OP)([^A-Za-z]*)([^a-z0-9 (;]+)',
                    bygroups (Keyword.Declaration, Text, Name.Function)),
            # Refinements
            (r'\.(?![a-z])', Text, 'refinement'),
            (r'.', Text),
        ],
        'comment': [
            (r'\(\*', Comment, 'comment-inside1'),
            (r'\{', Comment, 'comment-inside2'),
            (r'#\(', Comment, 'comment-inside3'),
        ],
        'comment-inside1': [
            # comment can be nested
            include('comment'),
            (r'\*\)', Comment, '#pop'),
            (r'(.|\n)', Comment),
        ],
        'comment-inside2': [
            # comment can be nested
            include('comment'),
            (r'\}', Comment, '#pop'),
            (r'(.|\n)', Comment),
        ],
        'comment-inside3': [
            # comment can be nested
            include('comment'),
            (r'#\)', Comment, '#pop'),
            (r'(.|\n)', Comment),
        ],
        'string': [
            # "" equals '\"', "12" is '\12'
            (r'"[0-9]*"', String.Escape),
            (r'"', String.Double, '#pop'),
            (r'.', String.Double),
        ],
        'refinement': [
            include('comment'),
            (r'\s+', Text),
            (r'([a-z][a-z0-9 ]*)(:\s+)', bygroups(Name.Label, Text), '#pop'),
            (r'', Text, '#pop'),
        ]
    }