1 //! A nom-based protobuf file parser
2 //!
3 //! This crate can be seen as a rust transcription of the
4 //! [descriptor.proto](https://github.com/google/protobuf/blob/master/src/google/protobuf/descriptor.proto) file
5 
6 use std::fmt;
7 use std::fmt::Write;
8 use std::ops::Deref;
9 
10 use indexmap::IndexMap;
11 use protobuf::reflect::ReflectValueBox;
12 use protobuf::reflect::RuntimeType;
13 use protobuf_support::lexer::float::format_protobuf_float;
14 use protobuf_support::lexer::loc::Loc;
15 use protobuf_support::lexer::str_lit::StrLit;
16 
17 use crate::model;
18 use crate::proto_path::ProtoPathBuf;
19 use crate::protobuf_abs_path::ProtobufAbsPath;
20 use crate::protobuf_ident::ProtobufIdent;
21 use crate::protobuf_path::ProtobufPath;
22 use crate::pure::parser::Parser;
23 pub use crate::pure::parser::ParserErrorWithLocation;
24 
25 #[derive(thiserror::Error, Debug)]
26 enum ModelError {
27     #[error("cannot convert value `{1}` to type `{0}`")]
28     InconvertibleValue(RuntimeType, model::ProtobufConstant),
29 }
30 
31 #[derive(Debug, Clone, PartialEq)]
32 pub(crate) struct WithLoc<T> {
33     pub loc: Loc,
34     pub t: T,
35 }
36 
37 impl<T> Deref for WithLoc<T> {
38     type Target = T;
39 
deref(&self) -> &Self::Target40     fn deref(&self) -> &Self::Target {
41         &self.t
42     }
43 }
44 
45 impl<T> WithLoc<T> {
with_loc(loc: Loc) -> impl FnOnce(T) -> WithLoc<T>46     pub fn with_loc(loc: Loc) -> impl FnOnce(T) -> WithLoc<T> {
47         move |t| WithLoc {
48             t,
49             loc: loc.clone(),
50         }
51     }
52 }
53 
54 /// Protobuf syntax.
55 #[derive(Debug, Clone, Copy, Eq, PartialEq)]
56 pub(crate) enum Syntax {
57     /// Protobuf syntax [2](https://developers.google.com/protocol-buffers/docs/proto) (default)
58     Proto2,
59     /// Protobuf syntax [3](https://developers.google.com/protocol-buffers/docs/proto3)
60     Proto3,
61 }
62 
63 impl Default for Syntax {
default() -> Syntax64     fn default() -> Syntax {
65         Syntax::Proto2
66     }
67 }
68 
69 /// A field rule
70 #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)]
71 pub(crate) enum Rule {
72     /// A well-formed message can have zero or one of this field (but not more than one).
73     Optional,
74     /// This field can be repeated any number of times (including zero) in a well-formed message.
75     /// The order of the repeated values will be preserved.
76     Repeated,
77     /// A well-formed message must have exactly one of this field.
78     Required,
79 }
80 
81 impl Rule {
82     pub(crate) const ALL: [Rule; 3] = [Rule::Optional, Rule::Repeated, Rule::Required];
83 
as_str(&self) -> &'static str84     pub(crate) const fn as_str(&self) -> &'static str {
85         match self {
86             Rule::Optional => "optional",
87             Rule::Repeated => "repeated",
88             Rule::Required => "required",
89         }
90     }
91 }
92 
93 /// Protobuf group
94 #[derive(Debug, Clone, PartialEq)]
95 pub(crate) struct Group {
96     /// Group name
97     pub name: String,
98     pub fields: Vec<WithLoc<Field>>,
99 }
100 
101 /// Protobuf supported field types
102 #[derive(Debug, Clone, PartialEq)]
103 pub(crate) enum FieldType {
104     /// Protobuf int32
105     ///
106     /// # Remarks
107     ///
108     /// Uses variable-length encoding. Inefficient for encoding negative numbers – if
109     /// your field is likely to have negative values, use sint32 instead.
110     Int32,
111     /// Protobuf int64
112     ///
113     /// # Remarks
114     ///
115     /// Uses variable-length encoding. Inefficient for encoding negative numbers – if
116     /// your field is likely to have negative values, use sint64 instead.
117     Int64,
118     /// Protobuf uint32
119     ///
120     /// # Remarks
121     ///
122     /// Uses variable-length encoding.
123     Uint32,
124     /// Protobuf uint64
125     ///
126     /// # Remarks
127     ///
128     /// Uses variable-length encoding.
129     Uint64,
130     /// Protobuf sint32
131     ///
132     /// # Remarks
133     ///
134     /// Uses ZigZag variable-length encoding. Signed int value. These more efficiently
135     /// encode negative numbers than regular int32s.
136     Sint32,
137     /// Protobuf sint64
138     ///
139     /// # Remarks
140     ///
141     /// Uses ZigZag variable-length encoding. Signed int value. These more efficiently
142     /// encode negative numbers than regular int32s.
143     Sint64,
144     /// Protobuf bool
145     Bool,
146     /// Protobuf fixed64
147     ///
148     /// # Remarks
149     ///
150     /// Always eight bytes. More efficient than uint64 if values are often greater than 2^56.
151     Fixed64,
152     /// Protobuf sfixed64
153     ///
154     /// # Remarks
155     ///
156     /// Always eight bytes.
157     Sfixed64,
158     /// Protobuf double
159     Double,
160     /// Protobuf string
161     ///
162     /// # Remarks
163     ///
164     /// A string must always contain UTF-8 encoded or 7-bit ASCII text.
165     String,
166     /// Protobuf bytes
167     ///
168     /// # Remarks
169     ///
170     /// May contain any arbitrary sequence of bytes.
171     Bytes,
172     /// Protobut fixed32
173     ///
174     /// # Remarks
175     ///
176     /// Always four bytes. More efficient than uint32 if values are often greater than 2^28.
177     Fixed32,
178     /// Protobut sfixed32
179     ///
180     /// # Remarks
181     ///
182     /// Always four bytes.
183     Sfixed32,
184     /// Protobut float
185     Float,
186     /// Protobuf message or enum (holds the name)
187     MessageOrEnum(ProtobufPath),
188     /// Protobut map
189     Map(Box<(FieldType, FieldType)>),
190     /// Protobuf group (deprecated)
191     Group(Group),
192 }
193 
194 /// A Protobuf Field
195 #[derive(Debug, Clone, PartialEq)]
196 pub(crate) struct Field {
197     /// Field name
198     pub name: String,
199     /// Field `Rule`
200     pub rule: Option<Rule>,
201     /// Field type
202     pub typ: FieldType,
203     /// Tag number
204     pub number: i32,
205     /// Non-builtin options
206     pub options: Vec<ProtobufOption>,
207 }
208 
209 /// A Protobuf field of oneof group
210 #[derive(Debug, Clone, PartialEq)]
211 pub(crate) enum FieldOrOneOf {
212     Field(WithLoc<Field>),
213     OneOf(OneOf),
214 }
215 
216 /// Extension range
217 #[derive(Default, Debug, Eq, PartialEq, Copy, Clone)]
218 pub(crate) struct FieldNumberRange {
219     /// First number
220     pub from: i32,
221     /// Inclusive
222     pub to: i32,
223 }
224 
225 /// A protobuf message
226 #[derive(Debug, Clone, Default)]
227 pub(crate) struct Message {
228     /// Message name
229     pub name: String,
230     /// Message fields and oneofs
231     pub fields: Vec<WithLoc<FieldOrOneOf>>,
232     /// Message reserved numbers
233     ///
234     /// TODO: use RangeInclusive once stable
235     pub reserved_nums: Vec<FieldNumberRange>,
236     /// Message reserved names
237     pub reserved_names: Vec<String>,
238     /// Nested messages
239     pub messages: Vec<WithLoc<Message>>,
240     /// Nested enums
241     pub enums: Vec<WithLoc<Enumeration>>,
242     /// Non-builtin options
243     pub options: Vec<ProtobufOption>,
244     /// Extension field numbers
245     pub extension_ranges: Vec<FieldNumberRange>,
246     /// Extensions
247     pub extensions: Vec<WithLoc<Extension>>,
248 }
249 
250 impl Message {
regular_fields_including_in_oneofs(&self) -> Vec<&WithLoc<Field>>251     pub fn regular_fields_including_in_oneofs(&self) -> Vec<&WithLoc<Field>> {
252         self.fields
253             .iter()
254             .flat_map(|fo| match &fo.t {
255                 FieldOrOneOf::Field(f) => vec![f],
256                 FieldOrOneOf::OneOf(o) => o.fields.iter().collect(),
257             })
258             .collect()
259     }
260 
261     /** Find a field by name. */
field_by_name(&self, name: &str) -> Option<&Field>262     pub fn field_by_name(&self, name: &str) -> Option<&Field> {
263         self.regular_fields_including_in_oneofs()
264             .iter()
265             .find(|f| f.t.name == name)
266             .map(|f| &f.t)
267     }
268 
_nested_extensions(&self) -> Vec<&Group>269     pub fn _nested_extensions(&self) -> Vec<&Group> {
270         self.regular_fields_including_in_oneofs()
271             .into_iter()
272             .flat_map(|f| match &f.t.typ {
273                 FieldType::Group(g) => Some(g),
274                 _ => None,
275             })
276             .collect()
277     }
278 
279     #[cfg(test)]
regular_fields_for_test(&self) -> Vec<&Field>280     pub fn regular_fields_for_test(&self) -> Vec<&Field> {
281         self.fields
282             .iter()
283             .flat_map(|fo| match &fo.t {
284                 FieldOrOneOf::Field(f) => Some(&f.t),
285                 FieldOrOneOf::OneOf(_) => None,
286             })
287             .collect()
288     }
289 
oneofs(&self) -> Vec<&OneOf>290     pub(crate) fn oneofs(&self) -> Vec<&OneOf> {
291         self.fields
292             .iter()
293             .flat_map(|fo| match &fo.t {
294                 FieldOrOneOf::Field(_) => None,
295                 FieldOrOneOf::OneOf(o) => Some(o),
296             })
297             .collect()
298     }
299 }
300 
301 /// A protobuf enumeration field
302 #[derive(Debug, Clone)]
303 pub(crate) struct EnumValue {
304     /// enum value name
305     pub name: String,
306     /// enum value number
307     pub number: i32,
308     /// enum value options
309     pub options: Vec<ProtobufOption>,
310 }
311 
312 /// A protobuf enumerator
313 #[derive(Debug, Clone)]
314 pub(crate) struct Enumeration {
315     /// enum name
316     pub name: String,
317     /// enum values
318     pub values: Vec<EnumValue>,
319     /// enum options
320     pub options: Vec<ProtobufOption>,
321 }
322 
323 /// A OneOf
324 #[derive(Debug, Clone, Default, PartialEq)]
325 pub(crate) struct OneOf {
326     /// OneOf name
327     pub name: String,
328     /// OneOf fields
329     pub fields: Vec<WithLoc<Field>>,
330     /// oneof options
331     pub options: Vec<ProtobufOption>,
332 }
333 
334 #[derive(Debug, Clone)]
335 pub(crate) struct Extension {
336     /// Extend this type with field
337     pub extendee: ProtobufPath,
338     /// Extension field
339     pub field: WithLoc<Field>,
340 }
341 
342 /// Service method
343 #[derive(Debug, Clone)]
344 pub(crate) struct Method {
345     /// Method name
346     pub name: String,
347     /// Input type
348     pub input_type: ProtobufPath,
349     /// Output type
350     pub output_type: ProtobufPath,
351     /// If this method is client streaming
352     #[allow(dead_code)] // TODO
353     pub client_streaming: bool,
354     /// If this method is server streaming
355     #[allow(dead_code)] // TODO
356     pub server_streaming: bool,
357     /// Method options
358     pub options: Vec<ProtobufOption>,
359 }
360 
361 /// Service definition
362 #[derive(Debug, Clone)]
363 pub(crate) struct Service {
364     /// Service name
365     pub name: String,
366     pub methods: Vec<Method>,
367     pub options: Vec<ProtobufOption>,
368 }
369 
370 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
371 pub(crate) struct AnyTypeUrl {
372     pub(crate) prefix: String,
373     pub(crate) full_type_name: ProtobufPath,
374 }
375 
376 impl fmt::Display for AnyTypeUrl {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result377     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
378         write!(f, "{}/{}", self.prefix, self.full_type_name)
379     }
380 }
381 
382 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
383 pub(crate) enum ProtobufConstantMessageFieldName {
384     Regular(String),
385     Extension(ProtobufPath),
386     AnyTypeUrl(AnyTypeUrl),
387 }
388 
389 impl fmt::Display for ProtobufConstantMessageFieldName {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result390     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
391         match self {
392             ProtobufConstantMessageFieldName::Regular(s) => write!(f, "{}", s),
393             ProtobufConstantMessageFieldName::Extension(p) => write!(f, "[{}]", p),
394             ProtobufConstantMessageFieldName::AnyTypeUrl(a) => write!(f, "[{}]", a),
395         }
396     }
397 }
398 
399 #[derive(Debug, Clone, PartialEq, Default)]
400 pub(crate) struct ProtobufConstantMessage {
401     pub(crate) fields: IndexMap<ProtobufConstantMessageFieldName, ProtobufConstant>,
402 }
403 
404 /// constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) |
405 //                 strLit | boolLit
406 #[derive(Debug, Clone, PartialEq)]
407 pub(crate) enum ProtobufConstant {
408     U64(u64),
409     I64(i64),
410     F64(f64), // TODO: eq
411     Bool(bool),
412     Ident(ProtobufPath),
413     String(StrLit),
414     Message(ProtobufConstantMessage),
415 }
416 
417 impl fmt::Display for ProtobufConstant {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result418     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
419         match self {
420             ProtobufConstant::U64(v) => write!(f, "{}", v),
421             ProtobufConstant::I64(v) => write!(f, "{}", v),
422             ProtobufConstant::F64(v) => write!(f, "{}", format_protobuf_float(*v)),
423             ProtobufConstant::Bool(v) => write!(f, "{}", v),
424             ProtobufConstant::Ident(v) => write!(f, "{}", v),
425             ProtobufConstant::String(v) => write!(f, "{}", v),
426             // TODO: text format explicitly
427             ProtobufConstant::Message(v) => write!(f, "{:?}", v),
428         }
429     }
430 }
431 
432 impl ProtobufConstantMessage {
format(&self) -> String433     pub fn format(&self) -> String {
434         let mut s = String::new();
435         write!(s, "{{").unwrap();
436         for (n, v) in &self.fields {
437             match v {
438                 ProtobufConstant::Message(m) => write!(s, "{} {}", n, m.format()).unwrap(),
439                 v => write!(s, "{}: {}", n, v.format()).unwrap(),
440             }
441         }
442         write!(s, "}}").unwrap();
443         s
444     }
445 }
446 
447 impl ProtobufConstant {
format(&self) -> String448     pub fn format(&self) -> String {
449         match *self {
450             ProtobufConstant::U64(u) => u.to_string(),
451             ProtobufConstant::I64(i) => i.to_string(),
452             ProtobufConstant::F64(f) => format_protobuf_float(f),
453             ProtobufConstant::Bool(b) => b.to_string(),
454             ProtobufConstant::Ident(ref i) => format!("{}", i),
455             ProtobufConstant::String(ref s) => s.quoted(),
456             ProtobufConstant::Message(ref s) => s.format(),
457         }
458     }
459 
460     /** Interpret .proto constant as an reflection value. */
as_type(&self, ty: RuntimeType) -> anyhow::Result<ReflectValueBox>461     pub fn as_type(&self, ty: RuntimeType) -> anyhow::Result<ReflectValueBox> {
462         match (self, &ty) {
463             (ProtobufConstant::Ident(ident), RuntimeType::Enum(e)) => {
464                 if let Some(v) = e.value_by_name(&ident.to_string()) {
465                     return Ok(ReflectValueBox::Enum(e.clone(), v.value()));
466                 }
467             }
468             (ProtobufConstant::Bool(b), RuntimeType::Bool) => return Ok(ReflectValueBox::Bool(*b)),
469             (ProtobufConstant::String(lit), RuntimeType::String) => {
470                 return Ok(ReflectValueBox::String(lit.decode_utf8()?))
471             }
472             _ => {}
473         }
474         Err(ModelError::InconvertibleValue(ty.clone(), self.clone()).into())
475     }
476 }
477 
478 /// Equivalent of `UninterpretedOption.NamePart`.
479 #[derive(Debug, Clone, PartialEq)]
480 pub(crate) enum ProtobufOptionNamePart {
481     Direct(ProtobufIdent),
482     Ext(ProtobufPath),
483 }
484 
485 impl fmt::Display for ProtobufOptionNamePart {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result486     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
487         match self {
488             ProtobufOptionNamePart::Direct(n) => write!(f, "{}", n),
489             ProtobufOptionNamePart::Ext(n) => write!(f, "({})", n),
490         }
491     }
492 }
493 
494 #[derive(Debug, Clone, PartialEq)]
495 pub(crate) struct ProtobufOptionNameExt(pub Vec<ProtobufOptionNamePart>);
496 
497 #[derive(Debug, Clone, PartialEq)]
498 pub(crate) enum ProtobufOptionName {
499     Builtin(ProtobufIdent),
500     Ext(ProtobufOptionNameExt),
501 }
502 
503 impl ProtobufOptionName {
simple(name: &str) -> ProtobufOptionName504     pub fn simple(name: &str) -> ProtobufOptionName {
505         ProtobufOptionName::Builtin(ProtobufIdent::new(name))
506     }
507 }
508 
509 impl fmt::Display for ProtobufOptionNameExt {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result510     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
511         for (index, comp) in self.0.iter().enumerate() {
512             if index != 0 {
513                 write!(f, ".")?;
514             }
515             write!(f, "{}", comp)?;
516         }
517         Ok(())
518     }
519 }
520 
521 impl fmt::Display for ProtobufOptionName {
fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result522     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
523         match self {
524             ProtobufOptionName::Builtin(n) => write!(f, "{}", n),
525             ProtobufOptionName::Ext(n) => write!(f, "{}", n),
526         }
527     }
528 }
529 
530 #[derive(Debug, Clone, PartialEq)]
531 pub(crate) struct ProtobufOption {
532     pub name: ProtobufOptionName,
533     pub value: ProtobufConstant,
534 }
535 
536 /// Visibility of import statement
537 #[derive(Debug, Clone, Eq, PartialEq)]
538 pub(crate) enum ImportVis {
539     Default,
540     Public,
541     Weak,
542 }
543 
544 impl Default for ImportVis {
default() -> Self545     fn default() -> Self {
546         ImportVis::Default
547     }
548 }
549 
550 /// Import statement
551 #[derive(Debug, Default, Clone)]
552 pub(crate) struct Import {
553     pub path: ProtoPathBuf,
554     pub vis: ImportVis,
555 }
556 
557 /// A File descriptor representing a whole .proto file
558 #[derive(Debug, Default, Clone)]
559 pub(crate) struct FileDescriptor {
560     /// Imports
561     pub imports: Vec<Import>,
562     /// Package
563     pub package: ProtobufAbsPath,
564     /// Protobuf Syntax
565     pub syntax: Syntax,
566     /// Top level messages
567     pub messages: Vec<WithLoc<Message>>,
568     /// Enums
569     pub enums: Vec<WithLoc<Enumeration>>,
570     /// Extensions
571     pub extensions: Vec<WithLoc<Extension>>,
572     /// Services
573     pub services: Vec<WithLoc<Service>>,
574     /// Non-builtin options
575     pub options: Vec<ProtobufOption>,
576 }
577 
578 impl FileDescriptor {
579     /// Parses a .proto file content into a `FileDescriptor`
parse<S: AsRef<str>>(file: S) -> Result<Self, ParserErrorWithLocation>580     pub fn parse<S: AsRef<str>>(file: S) -> Result<Self, ParserErrorWithLocation> {
581         let mut parser = Parser::new(file.as_ref());
582         match parser.next_proto() {
583             Ok(r) => Ok(r),
584             Err(error) => {
585                 let Loc { line, col } = parser.tokenizer.loc();
586                 Err(ParserErrorWithLocation { error, line, col })
587             }
588         }
589     }
590 }
591