xref: /aosp_15_r20/external/image_io/includes/image_io/xml/xml_rule.h (revision ca0779eb572efbbfda2e47f806647c3c7eeea8c3)
1 #ifndef IMAGE_IO_XML_XML_RULE_H_  // NOLINT
2 #define IMAGE_IO_XML_XML_RULE_H_  // NOLINT
3 
4 #include <memory>
5 #include <string>
6 #include <vector>
7 
8 #include "image_io/base/data_match_result.h"
9 #include "image_io/xml/xml_handler_context.h"
10 #include "image_io/xml/xml_terminal.h"
11 
12 namespace photos_editing_formats {
13 namespace image_io {
14 
15 /// A rule represents a sequence of terminals to match text from a DataSource,
16 /// and the state needed to keep track the parsing operation in case the text
17 /// is split across multiple DataSegments. XmlRules collaborate with an instance
18 /// of XmlHandler to process the token values the terminals produce.
19 ///
20 /// Terminals are added in the constructors of the rule subclasses, and are
21 /// not typically accessed directly from the clients of an XmlRule. Instead,
22 /// XmlRule clients normally just call the rule's Parse function and take action
23 /// based on the DataMatchResult value that is returned. The functions of the
24 /// XmlHandler are called internally by the rule's terminals as they parse the
25 /// text in the data segment.
26 ///
27 /// Normally, the terminals are parsed by the Parse() function in a sequential
28 /// manner until they are exhausted. At which time the Parse function returns
29 /// with a DataMatchResult that has a type equal to kFull. If the DataSegment
30 /// runs out of data before the end of the final terminal, the result type will
31 /// be kPartialOutOfData. Of course if any of the terminals' scanners detect an
32 /// error the result type will be kError.
33 ///
34 /// Rules may decide to delegate the parsing process to another rule. There are
35 /// two types of delegation:
36 /// 1. Rule chaining - in this case a rule decides that another rule should
37 ///    be used instead to continue the parsing process. This situation is
38 ///    indicated when the result type is kFull and the rule's HasNextRule()
39 ///    function returns true. The chained-to rule is obtained by calling the
40 ///    rule's GetNextRule() function. The current rule can be discarded.
41 /// 2. Child rules - in this case a "parent" rule decides that the next set of
42 ///    syntax should be parsed by another "child" rule, and after that rule
43 ///    completes, the parsing task should be returned to the parent rule. This
44 ///    situaltion is indicated when the result type is kPartial and the rule's
45 ///    HasNextRule() returns true. The child rule is obtained by calling the
46 ///    rule's GetNextRule() function. The current parent rule should be placed
47 ///    on a stack until the child rule is done, and then the child discarded and
48 ///    the parent rule used for the next Parse operation.
49 /// The action functions associated with a terminal are typically used to create
50 /// the next rule and set the result type and thus initiate the delegation
51 /// process. When the XmlRule::Parse function detects a delegation has been
52 /// requested, it returns to its caller so that the caller can handle the
53 /// delegation in the appropriate fashion. For an example, see the XmlReader's
54 /// Parse() function.
55 ///
56 /// In addition to delegation the action functions associated with a terminal
57 /// can change the order of the terminals processed from a strictly sequential
58 /// order to whatever the rule so desires. This is done by calling the rule's
59 /// SetTerminalIndex() function. Terminals can be identified by name using the
60 /// GetTerminalIndexFromName() function if the rule's terminals were
61 /// constructed with names.  If the terminal index of a rule is set to a
62 /// terminal that has already been used, the terminal's scanners state must be
63 /// reset in order for it to parse successfully again.  Sometimes the entire
64 /// rule is "restarted" in which case the ResetTerminalScanners() function can
65 /// be called to reset the scanners of all the rules terminals.
66 ///
67 /// Finally, because of the look-ahead needs of the XML grammar, some rules
68 /// support alternate "starting points", allowing them to skip some set of
69 /// initial terminals when the rule's Parse() function is called. Rules that
70 /// support this feature will have a constructor with an StartPoint parameter.
71 class XmlRule {
72  public:
73   /// For rules that support alternate starting points, this enum provides the
74   /// values at which a rule's Parse() function can begin.
75   enum StartPoint {
76     /// Start parsing at the first terminal position.
77     kFirstStartPoint,
78 
79     /// STart parsing at a second (alternative) position.
80     kSecondStartPoint,
81   };
82 
83   virtual ~XmlRule() = default;
84   explicit XmlRule(const std::string& name);
85 
86   /// @return The name of the rule.
GetName()87   const std::string& GetName() const { return name_; }
88 
89   /// Parse the text indicated in the context's data segment and range and call
90   /// the context's XmlHandler functions as needed. The implementation of this
91   /// function makes use of the terminals contained by the rule, but it is
92   /// declared virtual so that subclasses can customize as needed.
93   /// @param context The context describing the text to parse and the handler
94   /// to call.
95   /// @param A result that indicates the type of match that occurred, the number
96   /// of bytes consumed and an error message if needed.
97   virtual DataMatchResult Parse(XmlHandlerContext context);
98 
99   /// Some rules are written such that there are optional tokens at the end,
100   /// and thus may be active on the XmlReader's rule stack when the end of the
101   /// text reached. This function determines whether it is permissible to finish
102   /// the parsing process even though this rule is active. Unless overridden,
103   /// this function returns false.
104   /// @param error_text A string pointer that will be used in the error message
105   /// that the caller produces if this function returns false. If left unset,
106   /// and the function returns false the caller is expected to use its own text.
107   /// @return Whether its ok for this rule to be active at the end of parsing.
108   virtual bool IsPermissibleToFinish(std::string* error_text) const;
109 
110   /// Adds a literal terminal to the rule.
111   /// @param literal The literal value to scan for.
112   /// @return The terminal, enabling direct calls to WithName()/WithAction().
113   XmlTerminal& AddLiteralTerminal(const std::string& literal);
114 
115   /// Adds a name terminal to the rule.
116   /// @return The terminal, enabling direct calls to WithName()/WithAction().
117   XmlTerminal& AddNameTerminal();
118 
119   /// Adds a quoted string terminal to the rule.
120   /// @return The terminal, enabling direct calls to WithName()/WithAction().
121   XmlTerminal& AddQuotedStringTerminal();
122 
123   /// Adds a sentinel terminal to the rule.
124   /// @param sentinels The sentinel values to scan for.
125   /// @return The terminal, enabling direct calls to WithName()/WithAction().
126   XmlTerminal& AddSentinelTerminal(const std::string& sentinels);
127 
128   /// Adds a scan through literal terminal to the rule.
129   /// @param literal The literal value to scan through.
130   /// @return The terminal, enabling direct calls to WithName()/WithAction().
131   XmlTerminal& AddThroughLiteralTerminal(const std::string& literal);
132 
133   /// Adds a whitespace terminal to the rule.
134   /// @return The terminal, enabling direct calls to WithName()/WithAction().
135   XmlTerminal& AddWhitespaceTerminal();
136 
137   /// Adds an optional whitespace terminal to the rule.
138   /// @return The terminal, enabling direct calls to WithName()/WithAction().
139   XmlTerminal& AddOptionalWhitespaceTerminal();
140 
141   /// @return The number of terminals in the rule.
GetTerminalCount()142   size_t GetTerminalCount() const { return terminals_.size(); }
143 
144   /// @return The index of the terminal currently parsing text.
GetTerminalIndex()145   size_t GetTerminalIndex() const { return terminal_index_; }
146 
147   /// @param name The name of the terminal to look for.
148   /// @return The index of the terminal with the given name, or the value
149   /// returned by the rule's GetTerminalCount() if not found.
150   size_t GetTerminalIndexFromName(const std::string name) const;
151 
152   /// @param terminal_index The index of the terminal that should next be used
153   /// for parsing the input text.
154   void SetTerminalIndex(size_t terminal_index);
155 
156   /// @return The terminal currently parsing text, or nullptr if there is none.
157   XmlTerminal* GetCurrentTerminal();
158 
159   /// @param index The index of the terminal to get.
160   /// @return The terminal at the given index, or nullptr if index is invalid.
161   XmlTerminal* GetTerminal(size_t index);
162 
163   /// Resets the scanner's state of all the terminals in the rule.
164   void ResetTerminalScanners();
165 
166   /// @return Whether the rule has a next rule for delegation.
167   bool HasNextRule() const;
168 
169   /// @return Returns the next rule to the caller. If there is no next rule,
170   /// the get function of the returned unique_ptr will return nullptr.
171   std::unique_ptr<XmlRule> ReleaseNextRule();
172 
173   /// @param next_rule The new rule to use for delegation purposes.
174   void SetNextRule(std::unique_ptr<XmlRule> next_rule);
175 
176  private:
177   std::string name_;
178   std::vector<XmlTerminal> terminals_;
179   std::unique_ptr<XmlRule> next_rule_;
180   size_t terminal_index_;
181 };
182 
183 }  // namespace image_io
184 }  // namespace photos_editing_formats
185 
186 #endif // IMAGE_IO_XML_XML_RULE_H_  // NOLINT
187