1 //! A nom-based protobuf file parser 2 //! 3 //! This crate can be seen as a rust transcription of the 4 //! [descriptor.proto](https://github.com/google/protobuf/blob/master/src/google/protobuf/descriptor.proto) file 5 6 use std::fmt; 7 use std::fmt::Write; 8 use std::ops::Deref; 9 10 use indexmap::IndexMap; 11 use protobuf::reflect::ReflectValueBox; 12 use protobuf::reflect::RuntimeType; 13 use protobuf_support::lexer::float::format_protobuf_float; 14 use protobuf_support::lexer::loc::Loc; 15 use protobuf_support::lexer::str_lit::StrLit; 16 17 use crate::model; 18 use crate::proto_path::ProtoPathBuf; 19 use crate::protobuf_abs_path::ProtobufAbsPath; 20 use crate::protobuf_ident::ProtobufIdent; 21 use crate::protobuf_path::ProtobufPath; 22 use crate::pure::parser::Parser; 23 pub use crate::pure::parser::ParserErrorWithLocation; 24 25 #[derive(thiserror::Error, Debug)] 26 enum ModelError { 27 #[error("cannot convert value `{1}` to type `{0}`")] 28 InconvertibleValue(RuntimeType, model::ProtobufConstant), 29 } 30 31 #[derive(Debug, Clone, PartialEq)] 32 pub(crate) struct WithLoc<T> { 33 pub loc: Loc, 34 pub t: T, 35 } 36 37 impl<T> Deref for WithLoc<T> { 38 type Target = T; 39 deref(&self) -> &Self::Target40 fn deref(&self) -> &Self::Target { 41 &self.t 42 } 43 } 44 45 impl<T> WithLoc<T> { with_loc(loc: Loc) -> impl FnOnce(T) -> WithLoc<T>46 pub fn with_loc(loc: Loc) -> impl FnOnce(T) -> WithLoc<T> { 47 move |t| WithLoc { 48 t, 49 loc: loc.clone(), 50 } 51 } 52 } 53 54 /// Protobuf syntax. 55 #[derive(Debug, Clone, Copy, Eq, PartialEq)] 56 pub(crate) enum Syntax { 57 /// Protobuf syntax [2](https://developers.google.com/protocol-buffers/docs/proto) (default) 58 Proto2, 59 /// Protobuf syntax [3](https://developers.google.com/protocol-buffers/docs/proto3) 60 Proto3, 61 } 62 63 impl Default for Syntax { default() -> Syntax64 fn default() -> Syntax { 65 Syntax::Proto2 66 } 67 } 68 69 /// A field rule 70 #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] 71 pub(crate) enum Rule { 72 /// A well-formed message can have zero or one of this field (but not more than one). 73 Optional, 74 /// This field can be repeated any number of times (including zero) in a well-formed message. 75 /// The order of the repeated values will be preserved. 76 Repeated, 77 /// A well-formed message must have exactly one of this field. 78 Required, 79 } 80 81 impl Rule { 82 pub(crate) const ALL: [Rule; 3] = [Rule::Optional, Rule::Repeated, Rule::Required]; 83 as_str(&self) -> &'static str84 pub(crate) const fn as_str(&self) -> &'static str { 85 match self { 86 Rule::Optional => "optional", 87 Rule::Repeated => "repeated", 88 Rule::Required => "required", 89 } 90 } 91 } 92 93 /// Protobuf group 94 #[derive(Debug, Clone, PartialEq)] 95 pub(crate) struct Group { 96 /// Group name 97 pub name: String, 98 pub fields: Vec<WithLoc<Field>>, 99 } 100 101 /// Protobuf supported field types 102 #[derive(Debug, Clone, PartialEq)] 103 pub(crate) enum FieldType { 104 /// Protobuf int32 105 /// 106 /// # Remarks 107 /// 108 /// Uses variable-length encoding. Inefficient for encoding negative numbers – if 109 /// your field is likely to have negative values, use sint32 instead. 110 Int32, 111 /// Protobuf int64 112 /// 113 /// # Remarks 114 /// 115 /// Uses variable-length encoding. Inefficient for encoding negative numbers – if 116 /// your field is likely to have negative values, use sint64 instead. 117 Int64, 118 /// Protobuf uint32 119 /// 120 /// # Remarks 121 /// 122 /// Uses variable-length encoding. 123 Uint32, 124 /// Protobuf uint64 125 /// 126 /// # Remarks 127 /// 128 /// Uses variable-length encoding. 129 Uint64, 130 /// Protobuf sint32 131 /// 132 /// # Remarks 133 /// 134 /// Uses ZigZag variable-length encoding. Signed int value. These more efficiently 135 /// encode negative numbers than regular int32s. 136 Sint32, 137 /// Protobuf sint64 138 /// 139 /// # Remarks 140 /// 141 /// Uses ZigZag variable-length encoding. Signed int value. These more efficiently 142 /// encode negative numbers than regular int32s. 143 Sint64, 144 /// Protobuf bool 145 Bool, 146 /// Protobuf fixed64 147 /// 148 /// # Remarks 149 /// 150 /// Always eight bytes. More efficient than uint64 if values are often greater than 2^56. 151 Fixed64, 152 /// Protobuf sfixed64 153 /// 154 /// # Remarks 155 /// 156 /// Always eight bytes. 157 Sfixed64, 158 /// Protobuf double 159 Double, 160 /// Protobuf string 161 /// 162 /// # Remarks 163 /// 164 /// A string must always contain UTF-8 encoded or 7-bit ASCII text. 165 String, 166 /// Protobuf bytes 167 /// 168 /// # Remarks 169 /// 170 /// May contain any arbitrary sequence of bytes. 171 Bytes, 172 /// Protobut fixed32 173 /// 174 /// # Remarks 175 /// 176 /// Always four bytes. More efficient than uint32 if values are often greater than 2^28. 177 Fixed32, 178 /// Protobut sfixed32 179 /// 180 /// # Remarks 181 /// 182 /// Always four bytes. 183 Sfixed32, 184 /// Protobut float 185 Float, 186 /// Protobuf message or enum (holds the name) 187 MessageOrEnum(ProtobufPath), 188 /// Protobut map 189 Map(Box<(FieldType, FieldType)>), 190 /// Protobuf group (deprecated) 191 Group(Group), 192 } 193 194 /// A Protobuf Field 195 #[derive(Debug, Clone, PartialEq)] 196 pub(crate) struct Field { 197 /// Field name 198 pub name: String, 199 /// Field `Rule` 200 pub rule: Option<Rule>, 201 /// Field type 202 pub typ: FieldType, 203 /// Tag number 204 pub number: i32, 205 /// Non-builtin options 206 pub options: Vec<ProtobufOption>, 207 } 208 209 /// A Protobuf field of oneof group 210 #[derive(Debug, Clone, PartialEq)] 211 pub(crate) enum FieldOrOneOf { 212 Field(WithLoc<Field>), 213 OneOf(OneOf), 214 } 215 216 /// Extension range 217 #[derive(Default, Debug, Eq, PartialEq, Copy, Clone)] 218 pub(crate) struct FieldNumberRange { 219 /// First number 220 pub from: i32, 221 /// Inclusive 222 pub to: i32, 223 } 224 225 /// A protobuf message 226 #[derive(Debug, Clone, Default)] 227 pub(crate) struct Message { 228 /// Message name 229 pub name: String, 230 /// Message fields and oneofs 231 pub fields: Vec<WithLoc<FieldOrOneOf>>, 232 /// Message reserved numbers 233 /// 234 /// TODO: use RangeInclusive once stable 235 pub reserved_nums: Vec<FieldNumberRange>, 236 /// Message reserved names 237 pub reserved_names: Vec<String>, 238 /// Nested messages 239 pub messages: Vec<WithLoc<Message>>, 240 /// Nested enums 241 pub enums: Vec<WithLoc<Enumeration>>, 242 /// Non-builtin options 243 pub options: Vec<ProtobufOption>, 244 /// Extension field numbers 245 pub extension_ranges: Vec<FieldNumberRange>, 246 /// Extensions 247 pub extensions: Vec<WithLoc<Extension>>, 248 } 249 250 impl Message { regular_fields_including_in_oneofs(&self) -> Vec<&WithLoc<Field>>251 pub fn regular_fields_including_in_oneofs(&self) -> Vec<&WithLoc<Field>> { 252 self.fields 253 .iter() 254 .flat_map(|fo| match &fo.t { 255 FieldOrOneOf::Field(f) => vec![f], 256 FieldOrOneOf::OneOf(o) => o.fields.iter().collect(), 257 }) 258 .collect() 259 } 260 261 /** Find a field by name. */ field_by_name(&self, name: &str) -> Option<&Field>262 pub fn field_by_name(&self, name: &str) -> Option<&Field> { 263 self.regular_fields_including_in_oneofs() 264 .iter() 265 .find(|f| f.t.name == name) 266 .map(|f| &f.t) 267 } 268 _nested_extensions(&self) -> Vec<&Group>269 pub fn _nested_extensions(&self) -> Vec<&Group> { 270 self.regular_fields_including_in_oneofs() 271 .into_iter() 272 .flat_map(|f| match &f.t.typ { 273 FieldType::Group(g) => Some(g), 274 _ => None, 275 }) 276 .collect() 277 } 278 279 #[cfg(test)] regular_fields_for_test(&self) -> Vec<&Field>280 pub fn regular_fields_for_test(&self) -> Vec<&Field> { 281 self.fields 282 .iter() 283 .flat_map(|fo| match &fo.t { 284 FieldOrOneOf::Field(f) => Some(&f.t), 285 FieldOrOneOf::OneOf(_) => None, 286 }) 287 .collect() 288 } 289 oneofs(&self) -> Vec<&OneOf>290 pub(crate) fn oneofs(&self) -> Vec<&OneOf> { 291 self.fields 292 .iter() 293 .flat_map(|fo| match &fo.t { 294 FieldOrOneOf::Field(_) => None, 295 FieldOrOneOf::OneOf(o) => Some(o), 296 }) 297 .collect() 298 } 299 } 300 301 /// A protobuf enumeration field 302 #[derive(Debug, Clone)] 303 pub(crate) struct EnumValue { 304 /// enum value name 305 pub name: String, 306 /// enum value number 307 pub number: i32, 308 /// enum value options 309 pub options: Vec<ProtobufOption>, 310 } 311 312 /// A protobuf enumerator 313 #[derive(Debug, Clone)] 314 pub(crate) struct Enumeration { 315 /// enum name 316 pub name: String, 317 /// enum values 318 pub values: Vec<EnumValue>, 319 /// enum options 320 pub options: Vec<ProtobufOption>, 321 } 322 323 /// A OneOf 324 #[derive(Debug, Clone, Default, PartialEq)] 325 pub(crate) struct OneOf { 326 /// OneOf name 327 pub name: String, 328 /// OneOf fields 329 pub fields: Vec<WithLoc<Field>>, 330 /// oneof options 331 pub options: Vec<ProtobufOption>, 332 } 333 334 #[derive(Debug, Clone)] 335 pub(crate) struct Extension { 336 /// Extend this type with field 337 pub extendee: ProtobufPath, 338 /// Extension field 339 pub field: WithLoc<Field>, 340 } 341 342 /// Service method 343 #[derive(Debug, Clone)] 344 pub(crate) struct Method { 345 /// Method name 346 pub name: String, 347 /// Input type 348 pub input_type: ProtobufPath, 349 /// Output type 350 pub output_type: ProtobufPath, 351 /// If this method is client streaming 352 #[allow(dead_code)] // TODO 353 pub client_streaming: bool, 354 /// If this method is server streaming 355 #[allow(dead_code)] // TODO 356 pub server_streaming: bool, 357 /// Method options 358 pub options: Vec<ProtobufOption>, 359 } 360 361 /// Service definition 362 #[derive(Debug, Clone)] 363 pub(crate) struct Service { 364 /// Service name 365 pub name: String, 366 pub methods: Vec<Method>, 367 pub options: Vec<ProtobufOption>, 368 } 369 370 #[derive(Debug, Clone, PartialEq, Eq, Hash)] 371 pub(crate) struct AnyTypeUrl { 372 pub(crate) prefix: String, 373 pub(crate) full_type_name: ProtobufPath, 374 } 375 376 impl fmt::Display for AnyTypeUrl { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result377 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 378 write!(f, "{}/{}", self.prefix, self.full_type_name) 379 } 380 } 381 382 #[derive(Debug, Clone, PartialEq, Eq, Hash)] 383 pub(crate) enum ProtobufConstantMessageFieldName { 384 Regular(String), 385 Extension(ProtobufPath), 386 AnyTypeUrl(AnyTypeUrl), 387 } 388 389 impl fmt::Display for ProtobufConstantMessageFieldName { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result390 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 391 match self { 392 ProtobufConstantMessageFieldName::Regular(s) => write!(f, "{}", s), 393 ProtobufConstantMessageFieldName::Extension(p) => write!(f, "[{}]", p), 394 ProtobufConstantMessageFieldName::AnyTypeUrl(a) => write!(f, "[{}]", a), 395 } 396 } 397 } 398 399 #[derive(Debug, Clone, PartialEq, Default)] 400 pub(crate) struct ProtobufConstantMessage { 401 pub(crate) fields: IndexMap<ProtobufConstantMessageFieldName, ProtobufConstant>, 402 } 403 404 /// constant = fullIdent | ( [ "-" | "+" ] intLit ) | ( [ "-" | "+" ] floatLit ) | 405 // strLit | boolLit 406 #[derive(Debug, Clone, PartialEq)] 407 pub(crate) enum ProtobufConstant { 408 U64(u64), 409 I64(i64), 410 F64(f64), // TODO: eq 411 Bool(bool), 412 Ident(ProtobufPath), 413 String(StrLit), 414 Message(ProtobufConstantMessage), 415 } 416 417 impl fmt::Display for ProtobufConstant { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result418 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 419 match self { 420 ProtobufConstant::U64(v) => write!(f, "{}", v), 421 ProtobufConstant::I64(v) => write!(f, "{}", v), 422 ProtobufConstant::F64(v) => write!(f, "{}", format_protobuf_float(*v)), 423 ProtobufConstant::Bool(v) => write!(f, "{}", v), 424 ProtobufConstant::Ident(v) => write!(f, "{}", v), 425 ProtobufConstant::String(v) => write!(f, "{}", v), 426 // TODO: text format explicitly 427 ProtobufConstant::Message(v) => write!(f, "{:?}", v), 428 } 429 } 430 } 431 432 impl ProtobufConstantMessage { format(&self) -> String433 pub fn format(&self) -> String { 434 let mut s = String::new(); 435 write!(s, "{{").unwrap(); 436 for (n, v) in &self.fields { 437 match v { 438 ProtobufConstant::Message(m) => write!(s, "{} {}", n, m.format()).unwrap(), 439 v => write!(s, "{}: {}", n, v.format()).unwrap(), 440 } 441 } 442 write!(s, "}}").unwrap(); 443 s 444 } 445 } 446 447 impl ProtobufConstant { format(&self) -> String448 pub fn format(&self) -> String { 449 match *self { 450 ProtobufConstant::U64(u) => u.to_string(), 451 ProtobufConstant::I64(i) => i.to_string(), 452 ProtobufConstant::F64(f) => format_protobuf_float(f), 453 ProtobufConstant::Bool(b) => b.to_string(), 454 ProtobufConstant::Ident(ref i) => format!("{}", i), 455 ProtobufConstant::String(ref s) => s.quoted(), 456 ProtobufConstant::Message(ref s) => s.format(), 457 } 458 } 459 460 /** Interpret .proto constant as an reflection value. */ as_type(&self, ty: RuntimeType) -> anyhow::Result<ReflectValueBox>461 pub fn as_type(&self, ty: RuntimeType) -> anyhow::Result<ReflectValueBox> { 462 match (self, &ty) { 463 (ProtobufConstant::Ident(ident), RuntimeType::Enum(e)) => { 464 if let Some(v) = e.value_by_name(&ident.to_string()) { 465 return Ok(ReflectValueBox::Enum(e.clone(), v.value())); 466 } 467 } 468 (ProtobufConstant::Bool(b), RuntimeType::Bool) => return Ok(ReflectValueBox::Bool(*b)), 469 (ProtobufConstant::String(lit), RuntimeType::String) => { 470 return Ok(ReflectValueBox::String(lit.decode_utf8()?)) 471 } 472 _ => {} 473 } 474 Err(ModelError::InconvertibleValue(ty.clone(), self.clone()).into()) 475 } 476 } 477 478 /// Equivalent of `UninterpretedOption.NamePart`. 479 #[derive(Debug, Clone, PartialEq)] 480 pub(crate) enum ProtobufOptionNamePart { 481 Direct(ProtobufIdent), 482 Ext(ProtobufPath), 483 } 484 485 impl fmt::Display for ProtobufOptionNamePart { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result486 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 487 match self { 488 ProtobufOptionNamePart::Direct(n) => write!(f, "{}", n), 489 ProtobufOptionNamePart::Ext(n) => write!(f, "({})", n), 490 } 491 } 492 } 493 494 #[derive(Debug, Clone, PartialEq)] 495 pub(crate) struct ProtobufOptionNameExt(pub Vec<ProtobufOptionNamePart>); 496 497 #[derive(Debug, Clone, PartialEq)] 498 pub(crate) enum ProtobufOptionName { 499 Builtin(ProtobufIdent), 500 Ext(ProtobufOptionNameExt), 501 } 502 503 impl ProtobufOptionName { simple(name: &str) -> ProtobufOptionName504 pub fn simple(name: &str) -> ProtobufOptionName { 505 ProtobufOptionName::Builtin(ProtobufIdent::new(name)) 506 } 507 } 508 509 impl fmt::Display for ProtobufOptionNameExt { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result510 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 511 for (index, comp) in self.0.iter().enumerate() { 512 if index != 0 { 513 write!(f, ".")?; 514 } 515 write!(f, "{}", comp)?; 516 } 517 Ok(()) 518 } 519 } 520 521 impl fmt::Display for ProtobufOptionName { fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result522 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 523 match self { 524 ProtobufOptionName::Builtin(n) => write!(f, "{}", n), 525 ProtobufOptionName::Ext(n) => write!(f, "{}", n), 526 } 527 } 528 } 529 530 #[derive(Debug, Clone, PartialEq)] 531 pub(crate) struct ProtobufOption { 532 pub name: ProtobufOptionName, 533 pub value: ProtobufConstant, 534 } 535 536 /// Visibility of import statement 537 #[derive(Debug, Clone, Eq, PartialEq)] 538 pub(crate) enum ImportVis { 539 Default, 540 Public, 541 Weak, 542 } 543 544 impl Default for ImportVis { default() -> Self545 fn default() -> Self { 546 ImportVis::Default 547 } 548 } 549 550 /// Import statement 551 #[derive(Debug, Default, Clone)] 552 pub(crate) struct Import { 553 pub path: ProtoPathBuf, 554 pub vis: ImportVis, 555 } 556 557 /// A File descriptor representing a whole .proto file 558 #[derive(Debug, Default, Clone)] 559 pub(crate) struct FileDescriptor { 560 /// Imports 561 pub imports: Vec<Import>, 562 /// Package 563 pub package: ProtobufAbsPath, 564 /// Protobuf Syntax 565 pub syntax: Syntax, 566 /// Top level messages 567 pub messages: Vec<WithLoc<Message>>, 568 /// Enums 569 pub enums: Vec<WithLoc<Enumeration>>, 570 /// Extensions 571 pub extensions: Vec<WithLoc<Extension>>, 572 /// Services 573 pub services: Vec<WithLoc<Service>>, 574 /// Non-builtin options 575 pub options: Vec<ProtobufOption>, 576 } 577 578 impl FileDescriptor { 579 /// Parses a .proto file content into a `FileDescriptor` parse<S: AsRef<str>>(file: S) -> Result<Self, ParserErrorWithLocation>580 pub fn parse<S: AsRef<str>>(file: S) -> Result<Self, ParserErrorWithLocation> { 581 let mut parser = Parser::new(file.as_ref()); 582 match parser.next_proto() { 583 Ok(r) => Ok(r), 584 Err(error) => { 585 let Loc { line, col } = parser.tokenizer.loc(); 586 Err(ParserErrorWithLocation { error, line, col }) 587 } 588 } 589 } 590 } 591