1 #ifndef IMAGE_IO_XML_XML_RULE_H_ // NOLINT 2 #define IMAGE_IO_XML_XML_RULE_H_ // NOLINT 3 4 #include <memory> 5 #include <string> 6 #include <vector> 7 8 #include "image_io/base/data_match_result.h" 9 #include "image_io/xml/xml_handler_context.h" 10 #include "image_io/xml/xml_terminal.h" 11 12 namespace photos_editing_formats { 13 namespace image_io { 14 15 /// A rule represents a sequence of terminals to match text from a DataSource, 16 /// and the state needed to keep track the parsing operation in case the text 17 /// is split across multiple DataSegments. XmlRules collaborate with an instance 18 /// of XmlHandler to process the token values the terminals produce. 19 /// 20 /// Terminals are added in the constructors of the rule subclasses, and are 21 /// not typically accessed directly from the clients of an XmlRule. Instead, 22 /// XmlRule clients normally just call the rule's Parse function and take action 23 /// based on the DataMatchResult value that is returned. The functions of the 24 /// XmlHandler are called internally by the rule's terminals as they parse the 25 /// text in the data segment. 26 /// 27 /// Normally, the terminals are parsed by the Parse() function in a sequential 28 /// manner until they are exhausted. At which time the Parse function returns 29 /// with a DataMatchResult that has a type equal to kFull. If the DataSegment 30 /// runs out of data before the end of the final terminal, the result type will 31 /// be kPartialOutOfData. Of course if any of the terminals' scanners detect an 32 /// error the result type will be kError. 33 /// 34 /// Rules may decide to delegate the parsing process to another rule. There are 35 /// two types of delegation: 36 /// 1. Rule chaining - in this case a rule decides that another rule should 37 /// be used instead to continue the parsing process. This situation is 38 /// indicated when the result type is kFull and the rule's HasNextRule() 39 /// function returns true. The chained-to rule is obtained by calling the 40 /// rule's GetNextRule() function. The current rule can be discarded. 41 /// 2. Child rules - in this case a "parent" rule decides that the next set of 42 /// syntax should be parsed by another "child" rule, and after that rule 43 /// completes, the parsing task should be returned to the parent rule. This 44 /// situaltion is indicated when the result type is kPartial and the rule's 45 /// HasNextRule() returns true. The child rule is obtained by calling the 46 /// rule's GetNextRule() function. The current parent rule should be placed 47 /// on a stack until the child rule is done, and then the child discarded and 48 /// the parent rule used for the next Parse operation. 49 /// The action functions associated with a terminal are typically used to create 50 /// the next rule and set the result type and thus initiate the delegation 51 /// process. When the XmlRule::Parse function detects a delegation has been 52 /// requested, it returns to its caller so that the caller can handle the 53 /// delegation in the appropriate fashion. For an example, see the XmlReader's 54 /// Parse() function. 55 /// 56 /// In addition to delegation the action functions associated with a terminal 57 /// can change the order of the terminals processed from a strictly sequential 58 /// order to whatever the rule so desires. This is done by calling the rule's 59 /// SetTerminalIndex() function. Terminals can be identified by name using the 60 /// GetTerminalIndexFromName() function if the rule's terminals were 61 /// constructed with names. If the terminal index of a rule is set to a 62 /// terminal that has already been used, the terminal's scanners state must be 63 /// reset in order for it to parse successfully again. Sometimes the entire 64 /// rule is "restarted" in which case the ResetTerminalScanners() function can 65 /// be called to reset the scanners of all the rules terminals. 66 /// 67 /// Finally, because of the look-ahead needs of the XML grammar, some rules 68 /// support alternate "starting points", allowing them to skip some set of 69 /// initial terminals when the rule's Parse() function is called. Rules that 70 /// support this feature will have a constructor with an StartPoint parameter. 71 class XmlRule { 72 public: 73 /// For rules that support alternate starting points, this enum provides the 74 /// values at which a rule's Parse() function can begin. 75 enum StartPoint { 76 /// Start parsing at the first terminal position. 77 kFirstStartPoint, 78 79 /// STart parsing at a second (alternative) position. 80 kSecondStartPoint, 81 }; 82 83 virtual ~XmlRule() = default; 84 explicit XmlRule(const std::string& name); 85 86 /// @return The name of the rule. GetName()87 const std::string& GetName() const { return name_; } 88 89 /// Parse the text indicated in the context's data segment and range and call 90 /// the context's XmlHandler functions as needed. The implementation of this 91 /// function makes use of the terminals contained by the rule, but it is 92 /// declared virtual so that subclasses can customize as needed. 93 /// @param context The context describing the text to parse and the handler 94 /// to call. 95 /// @param A result that indicates the type of match that occurred, the number 96 /// of bytes consumed and an error message if needed. 97 virtual DataMatchResult Parse(XmlHandlerContext context); 98 99 /// Some rules are written such that there are optional tokens at the end, 100 /// and thus may be active on the XmlReader's rule stack when the end of the 101 /// text reached. This function determines whether it is permissible to finish 102 /// the parsing process even though this rule is active. Unless overridden, 103 /// this function returns false. 104 /// @param error_text A string pointer that will be used in the error message 105 /// that the caller produces if this function returns false. If left unset, 106 /// and the function returns false the caller is expected to use its own text. 107 /// @return Whether its ok for this rule to be active at the end of parsing. 108 virtual bool IsPermissibleToFinish(std::string* error_text) const; 109 110 /// Adds a literal terminal to the rule. 111 /// @param literal The literal value to scan for. 112 /// @return The terminal, enabling direct calls to WithName()/WithAction(). 113 XmlTerminal& AddLiteralTerminal(const std::string& literal); 114 115 /// Adds a name terminal to the rule. 116 /// @return The terminal, enabling direct calls to WithName()/WithAction(). 117 XmlTerminal& AddNameTerminal(); 118 119 /// Adds a quoted string terminal to the rule. 120 /// @return The terminal, enabling direct calls to WithName()/WithAction(). 121 XmlTerminal& AddQuotedStringTerminal(); 122 123 /// Adds a sentinel terminal to the rule. 124 /// @param sentinels The sentinel values to scan for. 125 /// @return The terminal, enabling direct calls to WithName()/WithAction(). 126 XmlTerminal& AddSentinelTerminal(const std::string& sentinels); 127 128 /// Adds a scan through literal terminal to the rule. 129 /// @param literal The literal value to scan through. 130 /// @return The terminal, enabling direct calls to WithName()/WithAction(). 131 XmlTerminal& AddThroughLiteralTerminal(const std::string& literal); 132 133 /// Adds a whitespace terminal to the rule. 134 /// @return The terminal, enabling direct calls to WithName()/WithAction(). 135 XmlTerminal& AddWhitespaceTerminal(); 136 137 /// Adds an optional whitespace terminal to the rule. 138 /// @return The terminal, enabling direct calls to WithName()/WithAction(). 139 XmlTerminal& AddOptionalWhitespaceTerminal(); 140 141 /// @return The number of terminals in the rule. GetTerminalCount()142 size_t GetTerminalCount() const { return terminals_.size(); } 143 144 /// @return The index of the terminal currently parsing text. GetTerminalIndex()145 size_t GetTerminalIndex() const { return terminal_index_; } 146 147 /// @param name The name of the terminal to look for. 148 /// @return The index of the terminal with the given name, or the value 149 /// returned by the rule's GetTerminalCount() if not found. 150 size_t GetTerminalIndexFromName(const std::string name) const; 151 152 /// @param terminal_index The index of the terminal that should next be used 153 /// for parsing the input text. 154 void SetTerminalIndex(size_t terminal_index); 155 156 /// @return The terminal currently parsing text, or nullptr if there is none. 157 XmlTerminal* GetCurrentTerminal(); 158 159 /// @param index The index of the terminal to get. 160 /// @return The terminal at the given index, or nullptr if index is invalid. 161 XmlTerminal* GetTerminal(size_t index); 162 163 /// Resets the scanner's state of all the terminals in the rule. 164 void ResetTerminalScanners(); 165 166 /// @return Whether the rule has a next rule for delegation. 167 bool HasNextRule() const; 168 169 /// @return Returns the next rule to the caller. If there is no next rule, 170 /// the get function of the returned unique_ptr will return nullptr. 171 std::unique_ptr<XmlRule> ReleaseNextRule(); 172 173 /// @param next_rule The new rule to use for delegation purposes. 174 void SetNextRule(std::unique_ptr<XmlRule> next_rule); 175 176 private: 177 std::string name_; 178 std::vector<XmlTerminal> terminals_; 179 std::unique_ptr<XmlRule> next_rule_; 180 size_t terminal_index_; 181 }; 182 183 } // namespace image_io 184 } // namespace photos_editing_formats 185 186 #endif // IMAGE_IO_XML_XML_RULE_H_ // NOLINT 187