1// pest. The Elegant Parser
2// Copyright (c) 2018 Dragoș Tiselice
3//
4// Licensed under the Apache License, Version 2.0
5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. All files in the project carrying such notice may not be copied,
8// modified, or distributed except according to those terms.
9//! Pest meta-grammar
10//!
11//! # Warning: Semantic Versioning
12//! There may be non-breaking changes to the meta-grammar
13//! between minor versions. Those non-breaking changes, however,
14//! may translate into semver-breaking changes due to the additional variants
15//! added to the `Rule` enum. This is a known issue and will be fixed in the
16//! future (e.g. by increasing MSRV and non_exhaustive annotations).
17
18/// The top-level rule of a grammar.
19grammar_rules = _{ SOI ~ grammar_doc* ~ grammar_rule* ~ EOI }
20
21/// A rule of a grammar.
22grammar_rule = {
23    identifier ~ assignment_operator ~ modifier? ~ opening_brace ~ expression ~ closing_brace
24  | line_doc
25}
26
27/// Assignment operator.
28assignment_operator = { "=" }
29
30/// Opening brace for a rule.
31opening_brace = { "{" }
32
33/// Closing brace for a rule.
34closing_brace = { "}" }
35
36/// Opening parenthesis for a branch, PUSH, etc.
37opening_paren = { "(" }
38
39/// Closing parenthesis for a branch, PUSH, etc.
40closing_paren = { ")" }
41
42/// Opening bracket for PEEK (slice inside).
43opening_brack = { "[" }
44
45/// Closing bracket for PEEK (slice inside).
46closing_brack = { "]" }
47
48/// A rule modifier.
49modifier = _{
50    silent_modifier
51  | atomic_modifier
52  | compound_atomic_modifier
53  | non_atomic_modifier
54}
55
56/// Silent rule prefix.
57silent_modifier = { "_" }
58
59/// Atomic rule prefix.
60atomic_modifier = { "@" }
61
62/// Compound atomic rule prefix.
63compound_atomic_modifier = { "$" }
64
65/// Non-atomic rule prefix.
66non_atomic_modifier = { "!" }
67
68/// A tag label.
69tag_id = @{ "#" ~ ("_" | alpha) ~ ("_" | alpha_num)* }
70
71/// For assigning labels to nodes.
72node_tag = _{ tag_id ~ assignment_operator }
73
74/// A rule expression.
75expression = { choice_operator? ~ term ~ (infix_operator ~ term)* }
76
77/// A rule term.
78term = { node_tag? ~ prefix_operator* ~ node ~ postfix_operator* }
79
80/// A rule node (inside terms).
81node = _{ opening_paren ~ expression ~ closing_paren | terminal }
82
83/// A terminal expression.
84terminal = _{ _push | peek_slice | identifier | string | insensitive_string | range }
85
86/// Possible predicates for a rule.
87prefix_operator = _{ positive_predicate_operator | negative_predicate_operator }
88
89/// Branches or sequences.
90infix_operator = _{ sequence_operator | choice_operator }
91
92/// Possible modifiers for a rule.
93postfix_operator = _{
94    optional_operator
95  | repeat_operator
96  | repeat_once_operator
97  | repeat_exact
98  | repeat_min
99  | repeat_max
100  | repeat_min_max
101}
102
103/// A positive predicate.
104positive_predicate_operator = { "&" }
105
106/// A negative predicate.
107negative_predicate_operator = { "!" }
108
109/// A sequence operator.
110sequence_operator = { "~" }
111
112/// A choice operator.
113choice_operator = { "|" }
114
115/// An optional operator.
116optional_operator = { "?" }
117
118/// A repeat operator.
119repeat_operator = { "*" }
120
121/// A repeat at least once operator.
122repeat_once_operator = { "+" }
123
124/// A repeat exact times.
125repeat_exact = { opening_brace ~ number ~ closing_brace }
126
127/// A repeat at least times.
128repeat_min = { opening_brace ~ number ~ comma ~ closing_brace }
129
130/// A repeat at most times.
131repeat_max = { opening_brace ~ comma ~ number ~ closing_brace }
132
133/// A repeat in a range.
134repeat_min_max = { opening_brace ~ number ~ comma ~ number ~ closing_brace }
135
136/// A number.
137number = @{ '0'..'9'+ }
138
139/// An integer number (positive or negative).
140integer = @{ number | "-" ~ "0"* ~ '1'..'9' ~ number? }
141
142/// A comma terminal.
143comma = { "," }
144
145/// A PUSH expression.
146_push = { "PUSH" ~ opening_paren ~ expression ~ closing_paren }
147
148/// A PEEK expression.
149peek_slice = { "PEEK" ~ opening_brack ~ integer? ~ range_operator ~ integer? ~ closing_brack }
150
151/// An identifier.
152identifier = @{ !"PUSH" ~ ("_" | alpha) ~ ("_" | alpha_num)* }
153
154/// An alpha character.
155alpha = _{ 'a'..'z' | 'A'..'Z' }
156
157/// An alphanumeric character.
158alpha_num = _{ alpha | '0'..'9' }
159
160/// A string.
161string = ${ quote ~ inner_str ~ quote }
162
163/// An insensitive string.
164insensitive_string = { "^" ~ string }
165
166/// A character range.
167range = { character ~ range_operator ~ character }
168
169/// A single quoted character
170character = ${ single_quote ~ inner_chr ~ single_quote }
171
172/// A quoted string.
173inner_str = @{ (!("\"" | "\\") ~ ANY)* ~ (escape ~ inner_str)? }
174
175/// An escaped or any character.
176inner_chr = @{ escape | ANY }
177
178/// An escape sequence.
179escape = @{ "\\" ~ ("\"" | "\\" | "r" | "n" | "t" | "0" | "'" | code | unicode) }
180
181/// A hexadecimal code.
182code = @{ "x" ~ hex_digit{2} }
183
184/// A unicode code.
185unicode = @{ "u" ~ opening_brace ~ hex_digit{2, 6} ~ closing_brace }
186
187/// A hexadecimal digit.
188hex_digit = @{ '0'..'9' | 'a'..'f' | 'A'..'F' }
189
190/// A double quote.
191quote = { "\"" }
192
193/// A single quote.
194single_quote = { "'" }
195
196/// A range operator.
197range_operator = { ".." }
198
199/// A newline character.
200newline = _{ "\n" | "\r\n" }
201
202/// A whitespace character.
203WHITESPACE = _{ " " | "\t" | newline }
204
205/// A single line comment.
206line_comment = _{ ("//" ~ !("/" | "!") ~ (!newline ~ ANY)*) }
207
208/// A multi-line comment.
209block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" }
210
211/// A grammar comment.
212COMMENT = _{ block_comment | line_comment }
213
214// ref: https://doc.rust-lang.org/reference/comments.html
215/// A space character.
216space = _{ " " | "\t" }
217
218/// A top-level comment.
219grammar_doc = ${ "//!" ~ space? ~ inner_doc }
220
221/// A rule comment.
222line_doc = ${ "///" ~ space? ~ inner_doc }
223
224/// A comment content.
225inner_doc = @{ (!newline ~ ANY)* }
226