1from pygments.lexer import RegexLexer, bygroups, include
2from pygments.token import Comment, Generic, Keyword, Name, Operator, Punctuation, Text
3
4from sphinx.highlighting import lexers
5
6
7class PEGLexer(RegexLexer):
8    """Pygments Lexer for PEG grammar (.gram) files
9
10    This lexer strips the following elements from the grammar:
11
12        - Meta-tags
13        - Variable assignments
14        - Actions
15        - Lookaheads
16        - Rule types
17        - Rule options
18        - Rules named `invalid_*` or `incorrect_*`
19    """
20
21    name = "PEG"
22    aliases = ["peg"]
23    filenames = ["*.gram"]
24    _name = r"([^\W\d]\w*)"
25    _text_ws = r"(\s*)"
26
27    tokens = {
28        "ws": [(r"\n", Text), (r"\s+", Text), (r"#.*$", Comment.Singleline),],
29        "lookaheads": [
30            # Forced tokens
31            (r"(&&)(?=\w+\s?)", bygroups(None)),
32            (r"(&&)(?='.+'\s?)", bygroups(None)),
33            (r'(&&)(?=".+"\s?)', bygroups(None)),
34            (r"(&&)(?=\(.+\)\s?)", bygroups(None)),
35
36            (r"(?<=\|\s)(&\w+\s?)", bygroups(None)),
37            (r"(?<=\|\s)(&'.+'\s?)", bygroups(None)),
38            (r'(?<=\|\s)(&".+"\s?)', bygroups(None)),
39            (r"(?<=\|\s)(&\(.+\)\s?)", bygroups(None)),
40        ],
41        "metas": [
42            (r"(@\w+ '''(.|\n)+?''')", bygroups(None)),
43            (r"^(@.*)$", bygroups(None)),
44        ],
45        "actions": [
46            (r"{(.|\n)+?}", bygroups(None)),
47        ],
48        "strings": [
49            (r"'\w+?'", Keyword),
50            (r'"\w+?"', Keyword),
51            (r"'\W+?'", Text),
52            (r'"\W+?"', Text),
53        ],
54        "variables": [
55            (_name + _text_ws + "(=)", bygroups(None, None, None),),
56            (_name + _text_ws + r"(\[[\w\d_\*]+?\])" + _text_ws + "(=)", bygroups(None, None, None, None, None),),
57        ],
58        "invalids": [
59            (r"^(\s+\|\s+.*invalid_\w+.*\n)", bygroups(None)),
60            (r"^(\s+\|\s+.*incorrect_\w+.*\n)", bygroups(None)),
61            (r"^(#.*invalid syntax.*(?:.|\n)*)", bygroups(None),),
62        ],
63        "root": [
64            include("invalids"),
65            include("ws"),
66            include("lookaheads"),
67            include("metas"),
68            include("actions"),
69            include("strings"),
70            include("variables"),
71            (r"\b(?!(NULL|EXTRA))([A-Z_]+)\b\s*(?!\()", Text,),
72            (
73                r"^\s*" + _name + r"\s*" + r"(\[.*\])?" + r"\s*" + r"(\(.+\))?" + r"\s*(:)",
74                bygroups(Name.Function, None, None, Punctuation),
75            ),
76            (_name, Name.Function),
77            (r"[\||\.|\+|\*|\?]", Operator),
78            (r"{|}|\(|\)|\[|\]", Punctuation),
79            (r".", Text),
80        ],
81    }
82
83
84def setup(app):
85    lexers["peg"] = PEGLexer()
86    return {"version": "1.0", "parallel_read_safe": True}
87