1// pest. The Elegant Parser 2// Copyright (c) 2018 Dragoș Tiselice 3// 4// Licensed under the Apache License, Version 2.0 5// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT 6// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your 7// option. All files in the project carrying such notice may not be copied, 8// modified, or distributed except according to those terms. 9//! Pest meta-grammar 10//! 11//! # Warning: Semantic Versioning 12//! There may be non-breaking changes to the meta-grammar 13//! between minor versions. Those non-breaking changes, however, 14//! may translate into semver-breaking changes due to the additional variants 15//! added to the `Rule` enum. This is a known issue and will be fixed in the 16//! future (e.g. by increasing MSRV and non_exhaustive annotations). 17 18/// The top-level rule of a grammar. 19grammar_rules = _{ SOI ~ grammar_doc* ~ grammar_rule* ~ EOI } 20 21/// A rule of a grammar. 22grammar_rule = { 23 identifier ~ assignment_operator ~ modifier? ~ opening_brace ~ expression ~ closing_brace 24 | line_doc 25} 26 27/// Assignment operator. 28assignment_operator = { "=" } 29 30/// Opening brace for a rule. 31opening_brace = { "{" } 32 33/// Closing brace for a rule. 34closing_brace = { "}" } 35 36/// Opening parenthesis for a branch, PUSH, etc. 37opening_paren = { "(" } 38 39/// Closing parenthesis for a branch, PUSH, etc. 40closing_paren = { ")" } 41 42/// Opening bracket for PEEK (slice inside). 43opening_brack = { "[" } 44 45/// Closing bracket for PEEK (slice inside). 46closing_brack = { "]" } 47 48/// A rule modifier. 49modifier = _{ 50 silent_modifier 51 | atomic_modifier 52 | compound_atomic_modifier 53 | non_atomic_modifier 54} 55 56/// Silent rule prefix. 57silent_modifier = { "_" } 58 59/// Atomic rule prefix. 60atomic_modifier = { "@" } 61 62/// Compound atomic rule prefix. 63compound_atomic_modifier = { "$" } 64 65/// Non-atomic rule prefix. 66non_atomic_modifier = { "!" } 67 68/// A tag label. 69tag_id = @{ "#" ~ ("_" | alpha) ~ ("_" | alpha_num)* } 70 71/// For assigning labels to nodes. 72node_tag = _{ tag_id ~ assignment_operator } 73 74/// A rule expression. 75expression = { choice_operator? ~ term ~ (infix_operator ~ term)* } 76 77/// A rule term. 78term = { node_tag? ~ prefix_operator* ~ node ~ postfix_operator* } 79 80/// A rule node (inside terms). 81node = _{ opening_paren ~ expression ~ closing_paren | terminal } 82 83/// A terminal expression. 84terminal = _{ _push | peek_slice | identifier | string | insensitive_string | range } 85 86/// Possible predicates for a rule. 87prefix_operator = _{ positive_predicate_operator | negative_predicate_operator } 88 89/// Branches or sequences. 90infix_operator = _{ sequence_operator | choice_operator } 91 92/// Possible modifiers for a rule. 93postfix_operator = _{ 94 optional_operator 95 | repeat_operator 96 | repeat_once_operator 97 | repeat_exact 98 | repeat_min 99 | repeat_max 100 | repeat_min_max 101} 102 103/// A positive predicate. 104positive_predicate_operator = { "&" } 105 106/// A negative predicate. 107negative_predicate_operator = { "!" } 108 109/// A sequence operator. 110sequence_operator = { "~" } 111 112/// A choice operator. 113choice_operator = { "|" } 114 115/// An optional operator. 116optional_operator = { "?" } 117 118/// A repeat operator. 119repeat_operator = { "*" } 120 121/// A repeat at least once operator. 122repeat_once_operator = { "+" } 123 124/// A repeat exact times. 125repeat_exact = { opening_brace ~ number ~ closing_brace } 126 127/// A repeat at least times. 128repeat_min = { opening_brace ~ number ~ comma ~ closing_brace } 129 130/// A repeat at most times. 131repeat_max = { opening_brace ~ comma ~ number ~ closing_brace } 132 133/// A repeat in a range. 134repeat_min_max = { opening_brace ~ number ~ comma ~ number ~ closing_brace } 135 136/// A number. 137number = @{ '0'..'9'+ } 138 139/// An integer number (positive or negative). 140integer = @{ number | "-" ~ "0"* ~ '1'..'9' ~ number? } 141 142/// A comma terminal. 143comma = { "," } 144 145/// A PUSH expression. 146_push = { "PUSH" ~ opening_paren ~ expression ~ closing_paren } 147 148/// A PEEK expression. 149peek_slice = { "PEEK" ~ opening_brack ~ integer? ~ range_operator ~ integer? ~ closing_brack } 150 151/// An identifier. 152identifier = @{ !"PUSH" ~ ("_" | alpha) ~ ("_" | alpha_num)* } 153 154/// An alpha character. 155alpha = _{ 'a'..'z' | 'A'..'Z' } 156 157/// An alphanumeric character. 158alpha_num = _{ alpha | '0'..'9' } 159 160/// A string. 161string = ${ quote ~ inner_str ~ quote } 162 163/// An insensitive string. 164insensitive_string = { "^" ~ string } 165 166/// A character range. 167range = { character ~ range_operator ~ character } 168 169/// A single quoted character 170character = ${ single_quote ~ inner_chr ~ single_quote } 171 172/// A quoted string. 173inner_str = @{ (!("\"" | "\\") ~ ANY)* ~ (escape ~ inner_str)? } 174 175/// An escaped or any character. 176inner_chr = @{ escape | ANY } 177 178/// An escape sequence. 179escape = @{ "\\" ~ ("\"" | "\\" | "r" | "n" | "t" | "0" | "'" | code | unicode) } 180 181/// A hexadecimal code. 182code = @{ "x" ~ hex_digit{2} } 183 184/// A unicode code. 185unicode = @{ "u" ~ opening_brace ~ hex_digit{2, 6} ~ closing_brace } 186 187/// A hexadecimal digit. 188hex_digit = @{ '0'..'9' | 'a'..'f' | 'A'..'F' } 189 190/// A double quote. 191quote = { "\"" } 192 193/// A single quote. 194single_quote = { "'" } 195 196/// A range operator. 197range_operator = { ".." } 198 199/// A newline character. 200newline = _{ "\n" | "\r\n" } 201 202/// A whitespace character. 203WHITESPACE = _{ " " | "\t" | newline } 204 205/// A single line comment. 206line_comment = _{ ("//" ~ !("/" | "!") ~ (!newline ~ ANY)*) } 207 208/// A multi-line comment. 209block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" } 210 211/// A grammar comment. 212COMMENT = _{ block_comment | line_comment } 213 214// ref: https://doc.rust-lang.org/reference/comments.html 215/// A space character. 216space = _{ " " | "\t" } 217 218/// A top-level comment. 219grammar_doc = ${ "//!" ~ space? ~ inner_doc } 220 221/// A rule comment. 222line_doc = ${ "///" ~ space? ~ inner_doc } 223 224/// A comment content. 225inner_doc = @{ (!newline ~ ANY)* } 226