xref: /aosp_15_r20/external/openscreen/tools/cddl/parse.cc (revision 3f982cf4871df8771c9d4abe6e9a6f8d829b2736)
1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "tools/cddl/parse.h"
6 
7 #include <unistd.h>
8 
9 #include <algorithm>
10 #include <iostream>
11 #include <memory>
12 #include <sstream>
13 #include <utility>
14 #include <vector>
15 
16 #include "absl/strings/ascii.h"
17 #include "absl/strings/match.h"
18 #include "absl/types/optional.h"
19 #include "tools/cddl/logging.h"
20 
21 static_assert(sizeof(absl::string_view::size_type) == sizeof(size_t),
22               "We assume string_view's size_type is the same as size_t. If "
23               "not, the following file needs to be refactored");
24 
25 // All of the parsing methods in this file that operate on Parser are named
26 // either Parse* or Skip* and are named according to the CDDL grammar in
27 // grammar.abnf.  Similarly, methods like ParseMemberKey1 attempt to parse the
28 // first choice in the memberkey rule.
29 struct Parser {
30   Parser(const Parser&) = delete;
31   Parser& operator=(const Parser&) = delete;
32 
33   const char* data;
34   std::vector<std::unique_ptr<AstNode>> nodes;
35 };
36 
AddNode(Parser * p,AstNode::Type type,absl::string_view text,AstNode * children=nullptr)37 AstNode* AddNode(Parser* p,
38                  AstNode::Type type,
39                  absl::string_view text,
40                  AstNode* children = nullptr) {
41   p->nodes.emplace_back(new AstNode);
42   AstNode* node = p->nodes.back().get();
43   node->children = children;
44   node->sibling = nullptr;
45   node->type = type;
46   node->text = std::string(text);
47   return node;
48 }
49 
IsBinaryDigit(char x)50 bool IsBinaryDigit(char x) {
51   return '0' == x || x == '1';
52 }
53 
54 // Determines if the given character matches regex '[a-zA-Z@_$]'.
IsExtendedAlpha(char x)55 bool IsExtendedAlpha(char x) {
56   return absl::ascii_isalpha(x) || x == '@' || x == '_' || x == '$';
57 }
58 
IsNewline(char x)59 bool IsNewline(char x) {
60   return x == '\r' || x == '\n';
61 }
62 
IsWhitespaceOrSemicolon(char c)63 bool IsWhitespaceOrSemicolon(char c) {
64   return c == ' ' || c == ';' || c == '\r' || c == '\n';
65 }
66 
SkipNewline(absl::string_view view)67 absl::string_view SkipNewline(absl::string_view view) {
68   size_t index = 0;
69   while (IsNewline(view[index])) {
70     ++index;
71   }
72 
73   return view.substr(index);
74 }
75 
76 // Skips over a comment that makes up the remainder of the current line.
SkipComment(absl::string_view view)77 absl::string_view SkipComment(absl::string_view view) {
78   size_t index = 0;
79   if (view[index] == ';') {
80     ++index;
81     while (!IsNewline(view[index]) && index < view.length()) {
82       CHECK(absl::ascii_isprint(view[index]));
83       ++index;
84     }
85 
86     while (IsNewline(view[index])) {
87       ++index;
88     }
89   }
90 
91   return view.substr(index);
92 }
93 
SkipWhitespace(Parser * p,bool skip_comments=true)94 void SkipWhitespace(Parser* p, bool skip_comments = true) {
95   if (!skip_comments) {
96     p->data = absl::StripLeadingAsciiWhitespace(p->data).data();
97     return;
98   }
99 
100   absl::string_view view = p->data;
101   absl::string_view new_view;
102 
103   while (true) {
104     new_view = SkipComment(view);
105     if (new_view.data() == view.data()) {
106       new_view = absl::StripLeadingAsciiWhitespace(view);
107     }
108 
109     if (new_view == view) {
110       break;
111     }
112 
113     view = new_view;
114   }
115 
116   p->data = new_view.data();
117 }
118 
TrySkipNewline(Parser * p)119 bool TrySkipNewline(Parser* p) {
120   auto* new_view = SkipNewline(p->data).data();
121   bool is_changed = p->data == new_view;
122   p->data = new_view;
123   return is_changed;
124 }
125 
TrySkipCharacter(Parser * p,char c)126 bool TrySkipCharacter(Parser* p, char c) {
127   if (p->data[0] == c) {
128     p->data++;
129     return true;
130   }
131 
132   return false;
133 }
134 
135 enum class AssignType {
136   kInvalid = -1,
137   kAssign,
138   kAssignT,
139   kAssignG,
140 };
141 
ParseAssignmentType(Parser * p)142 AssignType ParseAssignmentType(Parser* p) {
143   const char* it = p->data;
144   if (it[0] == '=') {
145     p->data = it + 1;
146     return AssignType::kAssign;
147   } else if (it[0] == '/') {
148     ++it;
149     if (it[0] == '/' && it[1] == '=') {
150       p->data = it + 2;
151       return AssignType::kAssignG;
152     } else if (it[0] == '=') {
153       p->data = it + 1;
154       return AssignType::kAssignT;
155     }
156   }
157   return AssignType::kInvalid;
158 }
159 
160 AstNode* ParseType1(Parser* p);
161 AstNode* ParseType(Parser* p, bool skip_comments = true);
162 AstNode* ParseId(Parser* p);
163 
SkipUint(Parser * p)164 void SkipUint(Parser* p) {
165   absl::string_view view = p->data;
166 
167   bool is_binary = false;
168   size_t index = 0;
169   if (absl::StartsWith(view, "0b")) {
170     is_binary = true;
171     index = 2;
172   } else if (absl::StartsWith(view, "0x")) {
173     index = 2;
174   }
175 
176   while (index < view.length() && absl::ascii_isdigit(view[index])) {
177     if (is_binary) {
178       CHECK(IsBinaryDigit(view[index]));
179     }
180 
181     ++index;
182   }
183 
184   p->data = view.substr(index).data();
185 }
186 
ParseNumber(Parser * p)187 AstNode* ParseNumber(Parser* p) {
188   Parser p_speculative{p->data};
189   if (!absl::ascii_isdigit(p_speculative.data[0]) &&
190       p_speculative.data[0] != '-') {
191     // TODO(btolsch): Add support for hexfloat, fraction, exponent.
192     return nullptr;
193   }
194   if (p_speculative.data[0] == '-') {
195     ++p_speculative.data;
196   }
197 
198   SkipUint(&p_speculative);
199 
200   AstNode* node =
201       AddNode(p, AstNode::Type::kNumber,
202               absl::string_view(p->data, p_speculative.data - p->data));
203   p->data = p_speculative.data;
204   std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
205             std::back_inserter(p->nodes));
206   return node;
207 }
208 
ParseText(Parser * p)209 AstNode* ParseText(Parser* p) {
210   return nullptr;
211 }
212 
ParseBytes(Parser * p)213 AstNode* ParseBytes(Parser* p) {
214   return nullptr;
215 }
216 
217 // Returns whether |c| could be the first character in a valid "value" string.
218 // This is not a guarantee however, since 'h' and 'b' could also indicate the
219 // start of an ID, but value needs to be tried first.
IsValue(char c)220 bool IsValue(char c) {
221   return (c == '-' || absl::ascii_isdigit(c) ||  // FIRST(number)
222           c == '"' ||                            // FIRST(text)
223           c == '\'' || c == 'h' || c == 'b');    // FIRST(bytes)
224 }
225 
ParseValue(Parser * p)226 AstNode* ParseValue(Parser* p) {
227   AstNode* node = ParseNumber(p);
228   if (!node) {
229     node = ParseText(p);
230   }
231   if (!node) {
232     ParseBytes(p);
233   }
234   return node;
235 }
236 
237 // Determines whether an occurrence operator (such as '*' or '?') prefacing
238 // the group definition occurs before the next whitespace character, and
239 // creates a new Occurrence node if so.
ParseOccur(Parser * p)240 AstNode* ParseOccur(Parser* p) {
241   Parser p_speculative{p->data};
242 
243   if (*p_speculative.data == '?' || *p_speculative.data == '+') {
244     p_speculative.data++;
245   } else {
246     SkipUint(&p_speculative);
247     if (*p_speculative.data++ != '*') {
248       return nullptr;
249     }
250     SkipUint(&p_speculative);
251   }
252 
253   AstNode* node =
254       AddNode(p, AstNode::Type::kOccur,
255               absl::string_view(p->data, p_speculative.data - p->data));
256   p->data = p_speculative.data;
257   return node;
258 }
259 
ParseTypeKeyFromComment(Parser * p)260 absl::optional<std::string> ParseTypeKeyFromComment(Parser* p) {
261   Parser p_speculative{p->data};
262   if (!TrySkipCharacter(&p_speculative, ';')) {
263     return absl::nullopt;
264   }
265 
266   SkipWhitespace(&p_speculative, false);
267   const char kTypeKeyPrefix[] = "type key";
268   if (!absl::StartsWith(p_speculative.data, kTypeKeyPrefix)) {
269     return absl::nullopt;
270   }
271   p_speculative.data += strlen(kTypeKeyPrefix);
272 
273   SkipWhitespace(&p_speculative, false);
274   Parser p_speculative2{p_speculative.data};
275   for (; absl::ascii_isdigit(p_speculative2.data[0]); p_speculative2.data++) {
276   }
277   auto result = absl::string_view(p_speculative.data,
278                                   p_speculative2.data - p_speculative.data);
279   p->data = p_speculative2.data;
280   return std::string(result.data()).substr(0, result.length());
281 }
282 
ParseMemberKeyFromComment(Parser * p)283 AstNode* ParseMemberKeyFromComment(Parser* p) {
284   Parser p_speculative{p->data};
285   if (!TrySkipCharacter(&p_speculative, ';')) {
286     return nullptr;
287   }
288 
289   SkipWhitespace(&p_speculative, false);
290 
291   AstNode* value = ParseId(&p_speculative);
292   if (!value) {
293     return nullptr;
294   }
295 
296   SkipWhitespace(&p_speculative, false);
297   if (!TrySkipNewline(&p_speculative)) {
298     return nullptr;
299   }
300 
301   AstNode* node = AddNode(p, AstNode::Type::kMemberKey, value->text, value);
302   p->data = p_speculative.data;
303   std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
304             std::back_inserter(p->nodes));
305 
306   return node;
307 }
308 
ParseMemberKey1(Parser * p)309 AstNode* ParseMemberKey1(Parser* p) {
310   Parser p_speculative{p->data};
311   if (!ParseType1(&p_speculative)) {
312     return nullptr;
313   }
314 
315   SkipWhitespace(&p_speculative);
316 
317   if (*p_speculative.data++ != '=' || *p_speculative.data++ != '>') {
318     return nullptr;
319   }
320   AstNode* node =
321       AddNode(p, AstNode::Type::kMemberKey,
322               absl::string_view(p->data, p_speculative.data - p->data));
323   p->data = p_speculative.data;
324   std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
325             std::back_inserter(p->nodes));
326   return node;
327 }
328 
ParseMemberKey2(Parser * p)329 AstNode* ParseMemberKey2(Parser* p) {
330   Parser p_speculative{p->data};
331   AstNode* id = ParseId(&p_speculative);
332   if (!id) {
333     return nullptr;
334   }
335 
336   SkipWhitespace(&p_speculative);
337 
338   if (*p_speculative.data++ != ':') {
339     return nullptr;
340   }
341 
342   AstNode* node =
343       AddNode(p, AstNode::Type::kMemberKey,
344               absl::string_view(p->data, p_speculative.data - p->data), id);
345   p->data = p_speculative.data;
346   std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
347             std::back_inserter(p->nodes));
348   return node;
349 }
350 
ParseMemberKey3(Parser * p)351 AstNode* ParseMemberKey3(Parser* p) {
352   Parser p_speculative{p->data};
353   AstNode* value = ParseValue(&p_speculative);
354   if (!value) {
355     return nullptr;
356   }
357 
358   SkipWhitespace(&p_speculative);
359 
360   if (*p_speculative.data++ != ':') {
361     return nullptr;
362   }
363   AstNode* node =
364       AddNode(p, AstNode::Type::kMemberKey,
365               absl::string_view(p->data, p_speculative.data - p->data), value);
366   p->data = p_speculative.data;
367   std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
368             std::back_inserter(p->nodes));
369   return node;
370 }
371 
372 // Iteratively tries all valid member key formats, retuning a Node representing
373 // the member key if found or nullptr if not.
ParseMemberKey(Parser * p)374 AstNode* ParseMemberKey(Parser* p) {
375   AstNode* node = ParseMemberKey1(p);
376   if (!node) {
377     node = ParseMemberKey2(p);
378   }
379   if (!node) {
380     node = ParseMemberKey3(p);
381   }
382   return node;
383 }
384 
385 AstNode* ParseGroupEntry(Parser* p);
386 
SkipOptionalComma(Parser * p)387 bool SkipOptionalComma(Parser* p) {
388   SkipWhitespace(p);
389   if (p->data[0] == ',') {
390     ++p->data;
391     SkipWhitespace(p);
392   }
393   return true;
394 }
395 
396 // Parse the group contained inside of other brackets. Since the brackets around
397 // the group are optional inside of other brackets, we can't directly call
398 // ParseGroupEntry(...) and instead need this wrapper around it.
ParseGroupChoice(Parser * p)399 AstNode* ParseGroupChoice(Parser* p) {
400   Parser p_speculative{p->data};
401   AstNode* tail = nullptr;
402   AstNode* group_node =
403       AddNode(&p_speculative, AstNode::Type::kGrpchoice, absl::string_view());
404   const char* group_node_text = p_speculative.data;
405   while (true) {
406     const char* orig = p_speculative.data;
407     AstNode* group_entry = ParseGroupEntry(&p_speculative);
408     if (!group_entry) {
409       p_speculative.data = orig;
410       if (!group_node->children) {
411         return nullptr;
412       }
413       group_node->text =
414           std::string(group_node_text, p_speculative.data - group_node_text);
415       p->data = p_speculative.data;
416       std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
417                 std::back_inserter(p->nodes));
418       return group_node;
419     }
420     if (!group_node->children) {
421       group_node->children = group_entry;
422     }
423     if (tail) {
424       tail->sibling = group_entry;
425     }
426     tail = group_entry;
427     if (!SkipOptionalComma(&p_speculative)) {
428       return nullptr;
429     }
430   }
431 }
432 
ParseGroup(Parser * p)433 AstNode* ParseGroup(Parser* p) {
434   const char* orig = p->data;
435   AstNode* group_choice = ParseGroupChoice(p);
436   if (!group_choice) {
437     return nullptr;
438   }
439   return AddNode(p, AstNode::Type::kGroup,
440                  absl::string_view(orig, p->data - orig), group_choice);
441 }
442 
443 // Parse optional range operator .. (inlcusive) or ... (exclusive)
444 // ABNF rule: rangeop = "..." / ".."
ParseRangeop(Parser * p)445 AstNode* ParseRangeop(Parser* p) {
446   absl::string_view view(p->data);
447   if (absl::StartsWith(view, "...")) {
448     // rangeop ...
449     p->data += 3;
450     return AddNode(p, AstNode::Type::kRangeop, view.substr(0, 3));
451   } else if (absl::StartsWith(view, "..")) {
452     // rangeop ..
453     p->data += 2;
454     return AddNode(p, AstNode::Type::kRangeop, view.substr(0, 2));
455   }
456   return nullptr;
457 }
458 
459 // Parse optional control operator .id
460 // ABNF rule: ctlop = "." id
ParseCtlop(Parser * p)461 AstNode* ParseCtlop(Parser* p) {
462   absl::string_view view(p->data);
463   if (!absl::StartsWith(view, ".")) {
464     return nullptr;
465   }
466   ++p->data;
467   AstNode* id = ParseId(p);
468   if (!id) {
469     return nullptr;
470   }
471   return AddNode(p, AstNode::Type::kCtlop,
472                  view.substr(0, p->data - view.begin()), id);
473 }
474 
ParseType2(Parser * p)475 AstNode* ParseType2(Parser* p) {
476   const char* orig = p->data;
477   const char* it = p->data;
478   AstNode* node = AddNode(p, AstNode::Type::kType2, absl::string_view());
479   if (IsValue(it[0])) {
480     AstNode* value = ParseValue(p);
481     if (!value) {
482       if (it[0] == 'h' || it[0] == 'b') {
483         AstNode* id = ParseId(p);
484         if (!id) {
485           return nullptr;
486         }
487         id->type = AstNode::Type::kTypename;
488         node->children = id;
489       } else {
490         return nullptr;
491       }
492     } else {
493       node->children = value;
494     }
495   } else if (IsExtendedAlpha(it[0])) {
496     AstNode* id = ParseId(p);
497     if (!id) {
498       return nullptr;
499     }
500     if (p->data[0] == '<') {
501       std::cerr << "It looks like you're trying to use a generic argument, "
502                    "which we don't support"
503                 << std::endl;
504       return nullptr;
505     }
506     id->type = AstNode::Type::kTypename;
507     node->children = id;
508   } else if (it[0] == '(') {
509     p->data = it + 1;
510     SkipWhitespace(p);
511     if (p->data[0] == ')') {
512       std::cerr << "It looks like you're trying to provide an empty Type (), "
513                    "which we don't support"
514                 << std::endl;
515       return nullptr;
516     }
517     AstNode* type = ParseType(p);
518     if (!type) {
519       return nullptr;
520     }
521     SkipWhitespace(p);
522     if (p->data[0] != ')') {
523       return nullptr;
524     }
525     ++p->data;
526     node->children = type;
527   } else if (it[0] == '{') {
528     p->data = it + 1;
529     SkipWhitespace(p);
530     if (p->data[0] == '}') {
531       std::cerr << "It looks like you're trying to provide an empty Group {}, "
532                    "which we don't support"
533                 << std::endl;
534       return nullptr;
535     }
536     AstNode* group = ParseGroup(p);
537     if (!group) {
538       return nullptr;
539     }
540     SkipWhitespace(p);
541     if (p->data[0] != '}') {
542       return nullptr;
543     }
544     ++p->data;
545     node->children = group;
546   } else if (it[0] == '[') {
547     p->data = it + 1;
548     SkipWhitespace(p);
549     AstNode* group = ParseGroup(p);
550     if (!group) {
551       return nullptr;
552     }
553     SkipWhitespace(p);
554     if (p->data[0] != ']') {
555       return nullptr;
556     }
557     ++p->data;
558     node->children = group;
559   } else if (it[0] == '~') {
560     p->data = it + 1;
561     SkipWhitespace(p);
562     if (!ParseId(p)) {
563       return nullptr;
564     }
565     if (p->data[0] == '<') {
566       std::cerr << "It looks like you're trying to use a generic argument, "
567                    "which we don't support"
568                 << std::endl;
569       return nullptr;
570     }
571   } else if (it[0] == '&') {
572     p->data = it + 1;
573     SkipWhitespace(p);
574     if (p->data[0] == '(') {
575       ++p->data;
576       SkipWhitespace(p);
577       if (p->data[0] == ')') {
578         std::cerr << "It looks like you're trying to provide an empty Type &(),"
579                      " which we don't support"
580                   << std::endl;
581         return nullptr;
582       }
583       AstNode* group = ParseGroup(p);
584       if (!group) {
585         return nullptr;
586       }
587       SkipWhitespace(p);
588       if (p->data[0] != ')') {
589         return nullptr;
590       }
591       ++p->data;
592       node->children = group;
593     } else {
594       AstNode* id = ParseId(p);
595       if (id) {
596         if (p->data[0] == '<') {
597           std::cerr << "It looks like you're trying to use a generic argument, "
598                        "which we don't support"
599                     << std::endl;
600           return nullptr;
601         }
602         id->type = AstNode::Type::kGroupname;
603         node->children = id;
604       } else {
605         return nullptr;
606       }
607     }
608   } else if (it[0] == '#') {
609     ++it;
610     if (it[0] == '6') {
611       ++it;
612       if (it[0] == '.') {
613         p->data = it + 1;
614         SkipUint(p);
615         it = p->data;
616       }
617       if (it[0] != '(') {
618         return nullptr;
619       }
620       p->data = ++it;
621       SkipWhitespace(p);
622       AstNode* type = ParseType(p);
623       if (!type) {
624         return nullptr;
625       }
626       SkipWhitespace(p);
627       if (p->data[0] != ')') {
628         return nullptr;
629       }
630       ++p->data;
631       node->children = type;
632     } else if (absl::ascii_isdigit(it[0])) {
633       std::cerr << "# MAJOR unimplemented" << std::endl;
634       return nullptr;
635     } else {
636       p->data = it;
637     }
638   } else {
639     return nullptr;
640   }
641   node->text = std::string(orig, p->data - orig);
642   return node;
643 }
644 
ParseType1(Parser * p)645 AstNode* ParseType1(Parser* p) {
646   const char* orig = p->data;
647   AstNode* type2 = ParseType2(p);
648   if (!type2) {
649     return nullptr;
650   }
651   SkipWhitespace(p, false);
652   AstNode* rangeop_or_ctlop = ParseRangeop(p);
653   if (!rangeop_or_ctlop) {
654     rangeop_or_ctlop = ParseCtlop(p);
655   }
656   if (rangeop_or_ctlop) {
657     SkipWhitespace(p, false);
658     AstNode* param = ParseType2(p);
659     if (!param) {
660       return nullptr;
661     }
662     type2->sibling = rangeop_or_ctlop;
663     rangeop_or_ctlop->sibling = param;
664   }
665   return AddNode(p, AstNode::Type::kType1,
666                  absl::string_view(orig, p->data - orig), type2);
667 }
668 
669 // Different valid types for a call are specified as type1 / type2, so we split
670 // at the '/' character and process each allowed type separately.
ParseType(Parser * p,bool skip_comments)671 AstNode* ParseType(Parser* p, bool skip_comments) {
672   Parser p_speculative{p->data};
673 
674   // Parse all allowed types into a linked list starting in type1's sibling ptr.
675   AstNode* type1 = ParseType1(&p_speculative);
676   if (!type1) {
677     return nullptr;
678   }
679   SkipWhitespace(&p_speculative, skip_comments);
680 
681   AstNode* tail = type1;
682   while (*p_speculative.data == '/') {
683     ++p_speculative.data;
684     SkipWhitespace(&p_speculative, skip_comments);
685 
686     AstNode* next_type1 = ParseType1(&p_speculative);
687     if (!next_type1) {
688       return nullptr;
689     }
690     tail->sibling = next_type1;
691     tail = next_type1;
692     SkipWhitespace(&p_speculative, skip_comments);
693   }
694 
695   // Create a new AstNode with all parsed types.
696   AstNode* node =
697       AddNode(p, AstNode::Type::kType,
698               absl::string_view(p->data, p_speculative.data - p->data), type1);
699   p->data = p_speculative.data;
700   std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
701             std::back_inserter(p->nodes));
702   return node;
703 }
704 
ParseId(Parser * p)705 AstNode* ParseId(Parser* p) {
706   const char* id = p->data;
707 
708   // If the id doesnt start with a valid name character, return null.
709   if (!IsExtendedAlpha(id[0])) {
710     return nullptr;
711   }
712 
713   // Read through the name character by character and make sure it's valid.
714   const char* it = id + 1;
715   while (true) {
716     if (it[0] == '-' || it[0] == '.') {
717       ++it;
718       if (!IsExtendedAlpha(it[0]) && !absl::ascii_isdigit(it[0])) {
719         return nullptr;
720       }
721       ++it;
722     } else if (IsExtendedAlpha(it[0]) || absl::ascii_isdigit(it[0])) {
723       ++it;
724     } else {
725       break;
726     }
727   }
728 
729   // Create and return a new node with the parsed data.
730   AstNode* node =
731       AddNode(p, AstNode::Type::kId, absl::string_view(p->data, it - p->data));
732   p->data = it;
733   return node;
734 }
735 
UpdateNodesForGroupEntry(Parser * p,Parser * p_speculative,AstNode * occur,AstNode * member_key,AstNode * type)736 AstNode* UpdateNodesForGroupEntry(Parser* p,
737                                   Parser* p_speculative,
738                                   AstNode* occur,
739                                   AstNode* member_key,
740                                   AstNode* type) {
741   AstNode* node = AddNode(p, AstNode::Type::kGrpent, absl::string_view());
742   if (occur) {
743     node->children = occur;
744     if (member_key) {
745       occur->sibling = member_key;
746       member_key->sibling = type;
747     } else {
748       occur->sibling = type;
749     }
750   } else if (member_key) {
751     node->children = member_key;
752     member_key->sibling = type;
753   } else {
754     node->children = type;
755   }
756   node->text = std::string(p->data, p_speculative->data - p->data);
757   p->data = p_speculative->data;
758   std::move(p_speculative->nodes.begin(), p_speculative->nodes.end(),
759             std::back_inserter(p->nodes));
760   return node;
761 }
762 
763 // Parse a group entry of form <id_num>: <type> ; <name>
ParseGroupEntryWithNameInComment(Parser * p)764 AstNode* ParseGroupEntryWithNameInComment(Parser* p) {
765   Parser p_speculative{p->data};
766   AstNode* occur = ParseOccur(&p_speculative);
767   if (occur) {
768     SkipWhitespace(&p_speculative, false);
769   }
770   AstNode* member_key_num = ParseValue(&p_speculative);
771   if (!member_key_num) {
772     return nullptr;
773   }
774   SkipWhitespace(&p_speculative, false);
775   if (*p_speculative.data++ != ':') {
776     return nullptr;
777   }
778   SkipWhitespace(&p_speculative, false);
779   AstNode* type = ParseType(&p_speculative, false);
780   if (!type) {
781     return nullptr;
782   }
783   SkipWhitespace(&p_speculative, false);
784   AstNode* member_key = ParseMemberKeyFromComment(&p_speculative);
785   if (!member_key) {
786     return nullptr;
787   }
788 
789   member_key->integer_member_key_text = member_key_num->text;
790 
791   return UpdateNodesForGroupEntry(p, &p_speculative, occur, member_key, type);
792 }
793 
ParseGroupEntryWithNameAsId(Parser * p)794 AstNode* ParseGroupEntryWithNameAsId(Parser* p) {
795   Parser p_speculative{p->data};
796   AstNode* occur = ParseOccur(&p_speculative);
797   if (occur) {
798     SkipWhitespace(&p_speculative);
799   }
800   AstNode* member_key = ParseMemberKey(&p_speculative);
801   if (member_key) {
802     SkipWhitespace(&p_speculative);
803   }
804   AstNode* type = ParseType(&p_speculative);
805   if (!type) {
806     return nullptr;
807   }
808   return UpdateNodesForGroupEntry(p, &p_speculative, occur, member_key, type);
809 }
810 
811 // NOTE: This should probably never be hit, why is it in the grammar?
ParseGroupEntryWithGroupReference(Parser * p)812 AstNode* ParseGroupEntryWithGroupReference(Parser* p) {
813   Parser p_speculative{p->data};
814 
815   // Check for an occurance indicator ('?', '*', "+") before the sub-group
816   // definition.
817   AstNode* occur = ParseOccur(&p_speculative);
818   if (occur) {
819     SkipWhitespace(&p_speculative);
820   }
821 
822   // Parse the ID of the sub-group.
823   AstNode* id = ParseId(&p_speculative);
824   if (!id) {
825     return nullptr;
826   }
827   id->type = AstNode::Type::kGroupname;
828   if (*p_speculative.data == '<') {
829     std::cerr << "It looks like you're trying to use a generic argument, "
830                  "which we don't support"
831               << std::endl;
832     return nullptr;
833   }
834 
835   // Create a new node containing this sub-group reference.
836   AstNode* node = AddNode(p, AstNode::Type::kGrpent, absl::string_view());
837   if (occur) {
838     occur->sibling = id;
839     node->children = occur;
840   } else {
841     node->children = id;
842   }
843   node->text = std::string(p->data, p_speculative.data - p->data);
844   p->data = p_speculative.data;
845   std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
846             std::back_inserter(p->nodes));
847   return node;
848 }
849 
850 // Recursively parse a group entry that's an inline-defined group of the form
851 // '(...<some contents>...)'.
ParseGroupEntryWithInlineGroupDefinition(Parser * p)852 AstNode* ParseGroupEntryWithInlineGroupDefinition(Parser* p) {
853   Parser p_speculative{p->data};
854   AstNode* occur = ParseOccur(&p_speculative);
855   if (occur) {
856     SkipWhitespace(&p_speculative);
857   }
858   if (*p_speculative.data != '(') {
859     return nullptr;
860   }
861   ++p_speculative.data;
862   SkipWhitespace(&p_speculative);
863   AstNode* group = ParseGroup(&p_speculative);  // Recursive call here.
864   if (!group) {
865     return nullptr;
866   }
867 
868   SkipWhitespace(&p_speculative);
869   if (*p_speculative.data != ')') {
870     return nullptr;
871   }
872   ++p_speculative.data;
873   AstNode* node = AddNode(p, AstNode::Type::kGrpent, absl::string_view());
874   if (occur) {
875     node->children = occur;
876     occur->sibling = group;
877   } else {
878     node->children = group;
879   }
880   node->text = std::string(p->data, p_speculative.data - p->data);
881   p->data = p_speculative.data;
882   std::move(p_speculative.nodes.begin(), p_speculative.nodes.end(),
883             std::back_inserter(p->nodes));
884   return node;
885 }
886 
887 // Recursively parse the group assignemnt.
ParseGroupEntry(Parser * p)888 AstNode* ParseGroupEntry(Parser* p) {
889   // Parse a group entry of form '#: type ; name'
890   AstNode* node = ParseGroupEntryWithNameInComment(p);
891 
892   // Parse a group entry of form 'id: type'.
893   if (!node) {
894     node = ParseGroupEntryWithNameAsId(p);
895   }
896 
897   // Parse a group entry of form 'subgroupName'.
898   if (!node) {
899     node = ParseGroupEntryWithGroupReference(p);
900   }
901 
902   // Parse a group entry of the form: '(' <some contents> ')'.
903   // NOTE: This is the method hit during the top-level group parsing, and the
904   // recursive call occurs inside this method.
905   if (!node) {
906     node = ParseGroupEntryWithInlineGroupDefinition(p);
907   }
908 
909   // Return the results of the recursive call.
910   return node;
911 }
912 
ParseRule(Parser * p)913 AstNode* ParseRule(Parser* p) {
914   const char* start = p->data;
915 
916   // Parse the type key, if it's present
917   absl::optional<std::string> type_key = ParseTypeKeyFromComment(p);
918   SkipWhitespace(p);
919 
920   // Use the parser to extract the id and data.
921   AstNode* id = ParseId(p);
922   if (!id) {
923     Logger::Error("No id found!");
924     return nullptr;
925   }
926   if (p->data[0] == '<') {
927     std::cerr << "It looks like you're trying to use a generic parameter, "
928                  "which we don't support"
929               << std::endl;
930     return nullptr;
931   }
932 
933   // Determine the type of assignment being done to this variable name (ie '=').
934   SkipWhitespace(p);
935   const char* assign_start = p->data;
936   AssignType assign_type = ParseAssignmentType(p);
937   if (assign_type != AssignType::kAssign) {
938     Logger::Error("No assignment operator found! assign_type: %d", assign_type);
939     return nullptr;
940   }
941   AstNode* assign_node = AddNode(
942       p,
943       (assign_type == AssignType::kAssign)
944           ? AstNode::Type::kAssign
945           : (assign_type == AssignType::kAssignT) ? AstNode::Type::kAssignT
946                                                   : AstNode::Type::kAssignG,
947       absl::string_view(assign_start, p->data - assign_start));
948   id->sibling = assign_node;
949 
950   // Parse the object type being assigned.
951   SkipWhitespace(p);
952   AstNode* type = ParseType(p, false);  // Try to parse it as a type.
953   id->type = AstNode::Type::kTypename;
954   if (!type) {  // If it's not a type, try and parse it as a group.
955     type = ParseGroupEntry(p);
956     id->type = AstNode::Type::kGroupname;
957   }
958   if (!type) {  // if it's not a type or a group, exit as failure.
959     Logger::Error("No node type found!");
960     return nullptr;
961   }
962   assign_node->sibling = type;
963   SkipWhitespace(p, false);
964 
965   // Return the results.
966   auto rule_node = AddNode(p, AstNode::Type::kRule,
967                            absl::string_view(start, p->data - start), id);
968   rule_node->type_key = type_key;
969   return rule_node;
970 }
971 
972 // Iteratively parse the CDDL spec into a tree structure.
ParseCddl(absl::string_view data)973 ParseResult ParseCddl(absl::string_view data) {
974   if (data[0] == 0) {
975     return {nullptr, {}};
976   }
977   Parser p{data.data()};
978 
979   SkipWhitespace(&p);
980   AstNode* root = nullptr;
981   AstNode* tail = nullptr;
982   do {
983     AstNode* next = ParseRule(&p);
984     if (!next) {
985       Logger::Error("Failed to parse next node. Failed starting at: '%s'",
986                     p.data);
987       return {nullptr, {}};
988     } else {
989       Logger::Log("Processed text \"%s\" into node: ", next->text);
990       DumpAst(next);
991     }
992 
993     if (!root) {
994       root = next;
995     }
996     if (tail) {
997       tail->sibling = next;
998     }
999     tail = next;
1000   } while (p.data[0]);
1001   return {root, std::move(p.nodes)};
1002 }
1003 
1004 // Recursively print out the AstNode graph.
DumpAst(AstNode * node,int indent_level)1005 void DumpAst(AstNode* node, int indent_level) {
1006   while (node) {
1007     // Prefix with '-'s so the levels of the graph are clear.
1008     std::string node_text = "";
1009     for (int i = 0; i <= indent_level; ++i) {
1010       node_text += "--";
1011     }
1012 
1013     // Print the type.
1014     switch (node->type) {
1015       case AstNode::Type::kRule:
1016         node_text += "kRule";
1017         break;
1018       case AstNode::Type::kTypename:
1019         node_text += "kTypename";
1020         break;
1021       case AstNode::Type::kGroupname:
1022         node_text += "kGroupname";
1023         break;
1024       case AstNode::Type::kAssign:
1025         node_text += "kAssign";
1026         break;
1027       case AstNode::Type::kAssignT:
1028         node_text += "kAssignT";
1029         break;
1030       case AstNode::Type::kAssignG:
1031         node_text += "kAssignG";
1032         break;
1033       case AstNode::Type::kType:
1034         node_text += "kType";
1035         break;
1036       case AstNode::Type::kGrpent:
1037         node_text += "kGrpent";
1038         break;
1039       case AstNode::Type::kType1:
1040         node_text += "kType1";
1041         break;
1042       case AstNode::Type::kType2:
1043         node_text += "kType2";
1044         break;
1045       case AstNode::Type::kValue:
1046         node_text += "kValue";
1047         break;
1048       case AstNode::Type::kGroup:
1049         node_text += "kGroup";
1050         break;
1051       case AstNode::Type::kUint:
1052         node_text += "kUint";
1053         break;
1054       case AstNode::Type::kDigit:
1055         node_text += "kDigit";
1056         break;
1057       case AstNode::Type::kRangeop:
1058         node_text += "kRangeop";
1059         break;
1060       case AstNode::Type::kCtlop:
1061         node_text += "kCtlop";
1062         break;
1063       case AstNode::Type::kGrpchoice:
1064         node_text += "kGrpchoice";
1065         break;
1066       case AstNode::Type::kOccur:
1067         node_text += "kOccur";
1068         break;
1069       case AstNode::Type::kMemberKey:
1070         node_text += "kMemberKey";
1071         break;
1072       case AstNode::Type::kId:
1073         node_text += "kId";
1074         break;
1075       case AstNode::Type::kNumber:
1076         node_text += "kNumber";
1077         break;
1078       case AstNode::Type::kText:
1079         node_text += "kText";
1080         break;
1081       case AstNode::Type::kBytes:
1082         node_text += "kBytes";
1083         break;
1084       case AstNode::Type::kOther:
1085         node_text += "kOther";
1086         break;
1087     }
1088     if (node->type_key != absl::nullopt) {
1089       node_text += " (type key=\"" + node->type_key.value() + "\")";
1090     }
1091     node_text += ": ";
1092 
1093     // Print the contents.
1094     int size = static_cast<int>(node->text.size());
1095     absl::string_view text = node->text.data();
1096     for (int i = 0; i < size; ++i) {
1097       if (text[i] == ' ' || text[i] == '\n') {
1098         node_text += " ";
1099         while (i < size - 1 && (text[i + 1] == ' ' || text[i + 1] == '\n')) {
1100           ++i;
1101         }
1102         continue;
1103       } else {
1104         node_text += text[i];
1105       }
1106     }
1107     Logger::Log(node_text);
1108 
1109     // Recursively print children then iteratively print siblings.
1110     DumpAst(node->children, indent_level + 1);
1111     node = node->sibling;
1112   }
1113 }
1114