1 // © 2024 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #include "unicode/utypes.h" 5 6 #ifndef U_HIDE_DEPRECATED_API 7 8 #ifndef MESSAGEFORMAT_PARSER_H 9 #define MESSAGEFORMAT_PARSER_H 10 11 #include "unicode/messageformat2_data_model.h" 12 #include "unicode/parseerr.h" 13 14 #include "messageformat2_allocation.h" 15 #include "messageformat2_errors.h" 16 17 #if U_SHOW_CPLUSPLUS_API 18 19 #if !UCONFIG_NO_FORMATTING 20 21 #if !UCONFIG_NO_MF2 22 23 U_NAMESPACE_BEGIN 24 25 namespace message2 { 26 27 using namespace data_model; 28 29 // Used for parameterizing options parsing code 30 // over the two builders that use it (Operator and Markup) 31 template <class T> 32 class OptionAdder { 33 private: 34 T& builder; 35 public: OptionAdder(T & b)36 OptionAdder(T& b) : builder(b) {} addOption(const UnicodeString & k,Operand && r,UErrorCode & s)37 void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) { 38 builder.addOption(k, std::move(r), s); 39 } 40 }; 41 42 // Used for parameterizing attributes parsing code 43 // over the two builders that use it (Expression and Markup) 44 // Unfortunately the same OptionAdder class can't just be reused, 45 // becaues duplicate options are forbidden while duplicate attributes are not 46 template <class T> 47 class AttributeAdder { 48 private: 49 T& builder; 50 public: AttributeAdder(T & b)51 AttributeAdder(T& b) : builder(b) {} addAttribute(const UnicodeString & k,Operand && r,UErrorCode & s)52 void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) { 53 builder.addAttribute(k, std::move(r), s); 54 } 55 }; 56 57 // Parser class (private) 58 class Parser : public UMemory { 59 public: 60 virtual ~Parser(); 61 private: 62 friend class MessageFormatter; 63 64 void parse(UParseError&, UErrorCode&); 65 66 /* 67 Use an internal "parse error" structure to make it easier to translate 68 absolute offsets to line offsets. 69 This is translated back to a `UParseError` at the end of parsing. 70 */ 71 typedef struct MessageParseError { 72 // The line on which the error occurred 73 uint32_t line; 74 // The offset, relative to the erroneous line, on which the error occurred 75 uint32_t offset; 76 // The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0. 77 // It includes newline characters, because the index does too. 78 uint32_t lengthBeforeCurrentLine; 79 80 // This parser doesn't yet use the last two fields. 81 UChar preContext[U_PARSE_CONTEXT_LEN]; 82 UChar postContext[U_PARSE_CONTEXT_LEN]; 83 } MessageParseError; 84 Parser(const UnicodeString & input,MFDataModel::Builder & dataModelBuilder,StaticErrors & e,UnicodeString & normalizedInputRef)85 Parser(const UnicodeString &input, MFDataModel::Builder& dataModelBuilder, StaticErrors& e, UnicodeString& normalizedInputRef) 86 : source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) { 87 parseError.line = 0; 88 parseError.offset = 0; 89 parseError.lengthBeforeCurrentLine = 0; 90 parseError.preContext[0] = '\0'; 91 parseError.postContext[0] = '\0'; 92 } 93 94 // Used so `parseEscapeSequence()` can handle all types of escape sequences 95 // (literal, text, and reserved) 96 typedef enum { LITERAL, TEXT, RESERVED } EscapeKind; 97 98 static void translateParseError(const MessageParseError&, UParseError&); 99 static void setParseError(MessageParseError&, uint32_t); 100 void maybeAdvanceLine(); 101 Pattern parseSimpleMessage(UErrorCode&); 102 void parseBody(UErrorCode&); 103 void parseDeclarations(UErrorCode&); 104 void parseUnsupportedStatement(UErrorCode&); 105 void parseLocalDeclaration(UErrorCode&); 106 void parseInputDeclaration(UErrorCode&); 107 void parseSelectors(UErrorCode&); 108 109 void parseWhitespaceMaybeRequired(bool, UErrorCode&); 110 void parseRequiredWhitespace(UErrorCode&); 111 void parseOptionalWhitespace(UErrorCode&); 112 void parseToken(UChar32, UErrorCode&); 113 void parseTokenWithWhitespace(UChar32, UErrorCode&); 114 template <int32_t N> 115 void parseToken(const UChar32 (&)[N], UErrorCode&); 116 template <int32_t N> 117 void parseTokenWithWhitespace(const UChar32 (&)[N], UErrorCode&); 118 bool nextIsMatch() const; 119 UnicodeString parseName(UErrorCode&); 120 UnicodeString parseIdentifier(UErrorCode&); 121 UnicodeString parseDigits(UErrorCode&); 122 VariableName parseVariableName(UErrorCode&); 123 FunctionName parseFunction(UErrorCode&); 124 void parseEscapeSequence(EscapeKind, UnicodeString&, UErrorCode&); 125 void parseLiteralEscape(UnicodeString&, UErrorCode&); 126 Literal parseUnquotedLiteral(UErrorCode&); 127 Literal parseQuotedLiteral(UErrorCode&); 128 Literal parseLiteral(UErrorCode&); 129 template<class T> 130 void parseAttribute(AttributeAdder<T>&, UErrorCode&); 131 template<class T> 132 void parseAttributes(AttributeAdder<T>&, UErrorCode&); 133 template<class T> 134 void parseOption(OptionAdder<T>&, UErrorCode&); 135 template<class T> 136 void parseOptions(OptionAdder<T>&, UErrorCode&); 137 void parseReservedEscape(UnicodeString&, UErrorCode&); 138 void parseReservedChunk(Reserved::Builder&, UErrorCode&); 139 Reserved parseReserved(UErrorCode&); 140 Reserved parseReservedBody(Reserved::Builder&, UErrorCode&); 141 Operator parseAnnotation(UErrorCode&); 142 void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&); 143 Markup parseMarkup(UErrorCode&); 144 Expression parseExpression(UErrorCode&); 145 std::variant<Expression, Markup> parsePlaceholder(UErrorCode&); 146 void parseTextEscape(UnicodeString&, UErrorCode&); 147 UnicodeString parseText(UErrorCode&); 148 Key parseKey(UErrorCode&); 149 SelectorKeys parseNonEmptyKeys(UErrorCode&); 150 void errorPattern(UErrorCode& status); 151 Pattern parseQuotedPattern(UErrorCode&); 152 153 // The input string 154 const UnicodeString &source; 155 // The current position within the input string 156 uint32_t index; 157 // Represents the current line (and when an error is indicated), 158 // character offset within the line of the parse error 159 MessageParseError parseError; 160 161 // The structure to use for recording errors 162 StaticErrors& errors; 163 164 // Normalized version of the input string (optional whitespace removed) 165 UnicodeString& normalizedInput; 166 167 // The parent builder 168 MFDataModel::Builder &dataModel; 169 }; // class Parser 170 171 } // namespace message2 172 173 U_NAMESPACE_END 174 175 #endif /* #if !UCONFIG_NO_MF2 */ 176 177 #endif /* #if !UCONFIG_NO_FORMATTING */ 178 179 #endif /* U_SHOW_CPLUSPLUS_API */ 180 181 #endif // MESSAGEFORMAT_PARSER_H 182 183 #endif // U_HIDE_DEPRECATED_API 184 // eof 185