xref: /aosp_15_r20/external/icu/libicu/cts_headers/messageformat2_parser.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #ifndef U_HIDE_DEPRECATED_API
7 
8 #ifndef MESSAGEFORMAT_PARSER_H
9 #define MESSAGEFORMAT_PARSER_H
10 
11 #include "unicode/messageformat2_data_model.h"
12 #include "unicode/parseerr.h"
13 
14 #include "messageformat2_allocation.h"
15 #include "messageformat2_errors.h"
16 
17 #if U_SHOW_CPLUSPLUS_API
18 
19 #if !UCONFIG_NO_FORMATTING
20 
21 #if !UCONFIG_NO_MF2
22 
23 U_NAMESPACE_BEGIN
24 
25 namespace message2 {
26 
27     using namespace data_model;
28 
29     // Used for parameterizing options parsing code
30     // over the two builders that use it (Operator and Markup)
31     template <class T>
32     class OptionAdder {
33         private:
34             T& builder;
35         public:
OptionAdder(T & b)36             OptionAdder(T& b) : builder(b) {}
addOption(const UnicodeString & k,Operand && r,UErrorCode & s)37             void addOption(const UnicodeString& k, Operand&& r, UErrorCode& s) {
38                 builder.addOption(k, std::move(r), s);
39             }
40     };
41 
42     // Used for parameterizing attributes parsing code
43     // over the two builders that use it (Expression and Markup)
44     // Unfortunately the same OptionAdder class can't just be reused,
45     // becaues duplicate options are forbidden while duplicate attributes are not
46     template <class T>
47     class AttributeAdder {
48         private:
49             T& builder;
50         public:
AttributeAdder(T & b)51             AttributeAdder(T& b) : builder(b) {}
addAttribute(const UnicodeString & k,Operand && r,UErrorCode & s)52             void addAttribute(const UnicodeString& k, Operand&& r, UErrorCode& s) {
53                 builder.addAttribute(k, std::move(r), s);
54             }
55     };
56 
57     // Parser class (private)
58     class Parser : public UMemory {
59     public:
60 	virtual ~Parser();
61     private:
62         friend class MessageFormatter;
63 
64         void parse(UParseError&, UErrorCode&);
65 
66 	/*
67 	  Use an internal "parse error" structure to make it easier to translate
68 	  absolute offsets to line offsets.
69 	  This is translated back to a `UParseError` at the end of parsing.
70 	*/
71 	typedef struct MessageParseError {
72 	    // The line on which the error occurred
73 	    uint32_t line;
74 	    // The offset, relative to the erroneous line, on which the error occurred
75 	    uint32_t offset;
76 	    // The total number of characters seen before advancing to the current line. It has a value of 0 if line == 0.
77 	    // It includes newline characters, because the index does too.
78 	    uint32_t lengthBeforeCurrentLine;
79 
80 	    // This parser doesn't yet use the last two fields.
81 	    UChar   preContext[U_PARSE_CONTEXT_LEN];
82 	    UChar   postContext[U_PARSE_CONTEXT_LEN];
83 	} MessageParseError;
84 
Parser(const UnicodeString & input,MFDataModel::Builder & dataModelBuilder,StaticErrors & e,UnicodeString & normalizedInputRef)85 	Parser(const UnicodeString &input, MFDataModel::Builder& dataModelBuilder, StaticErrors& e, UnicodeString& normalizedInputRef)
86 	  : source(input), index(0), errors(e), normalizedInput(normalizedInputRef), dataModel(dataModelBuilder) {
87 	  parseError.line = 0;
88 	  parseError.offset = 0;
89 	  parseError.lengthBeforeCurrentLine = 0;
90 	  parseError.preContext[0] = '\0';
91 	  parseError.postContext[0] = '\0';
92 	}
93 
94 	// Used so `parseEscapeSequence()` can handle all types of escape sequences
95 	// (literal, text, and reserved)
96 	typedef enum { LITERAL, TEXT, RESERVED } EscapeKind;
97 
98 	static void translateParseError(const MessageParseError&, UParseError&);
99 	static void setParseError(MessageParseError&, uint32_t);
100 	void maybeAdvanceLine();
101         Pattern parseSimpleMessage(UErrorCode&);
102         void parseBody(UErrorCode&);
103 	void parseDeclarations(UErrorCode&);
104         void parseUnsupportedStatement(UErrorCode&);
105         void parseLocalDeclaration(UErrorCode&);
106         void parseInputDeclaration(UErrorCode&);
107 	void parseSelectors(UErrorCode&);
108 
109 	void parseWhitespaceMaybeRequired(bool, UErrorCode&);
110 	void parseRequiredWhitespace(UErrorCode&);
111 	void parseOptionalWhitespace(UErrorCode&);
112 	void parseToken(UChar32, UErrorCode&);
113 	void parseTokenWithWhitespace(UChar32, UErrorCode&);
114 	template <int32_t N>
115 	void parseToken(const UChar32 (&)[N], UErrorCode&);
116 	template <int32_t N>
117 	void parseTokenWithWhitespace(const UChar32 (&)[N], UErrorCode&);
118         bool nextIsMatch() const;
119 	UnicodeString parseName(UErrorCode&);
120         UnicodeString parseIdentifier(UErrorCode&);
121         UnicodeString parseDigits(UErrorCode&);
122 	VariableName parseVariableName(UErrorCode&);
123 	FunctionName parseFunction(UErrorCode&);
124 	void parseEscapeSequence(EscapeKind, UnicodeString&, UErrorCode&);
125 	void parseLiteralEscape(UnicodeString&, UErrorCode&);
126         Literal parseUnquotedLiteral(UErrorCode&);
127         Literal parseQuotedLiteral(UErrorCode&);
128 	Literal parseLiteral(UErrorCode&);
129         template<class T>
130         void parseAttribute(AttributeAdder<T>&, UErrorCode&);
131         template<class T>
132         void parseAttributes(AttributeAdder<T>&, UErrorCode&);
133         template<class T>
134         void parseOption(OptionAdder<T>&, UErrorCode&);
135         template<class T>
136         void parseOptions(OptionAdder<T>&, UErrorCode&);
137 	void parseReservedEscape(UnicodeString&, UErrorCode&);
138 	void parseReservedChunk(Reserved::Builder&, UErrorCode&);
139 	Reserved parseReserved(UErrorCode&);
140         Reserved parseReservedBody(Reserved::Builder&, UErrorCode&);
141 	Operator parseAnnotation(UErrorCode&);
142 	void parseLiteralOrVariableWithAnnotation(bool, Expression::Builder&, UErrorCode&);
143         Markup parseMarkup(UErrorCode&);
144 	Expression parseExpression(UErrorCode&);
145         std::variant<Expression, Markup> parsePlaceholder(UErrorCode&);
146 	void parseTextEscape(UnicodeString&, UErrorCode&);
147 	UnicodeString parseText(UErrorCode&);
148 	Key parseKey(UErrorCode&);
149 	SelectorKeys parseNonEmptyKeys(UErrorCode&);
150 	void errorPattern(UErrorCode& status);
151 	Pattern parseQuotedPattern(UErrorCode&);
152 
153 	// The input string
154 	const UnicodeString &source;
155 	// The current position within the input string
156 	uint32_t index;
157 	// Represents the current line (and when an error is indicated),
158 	// character offset within the line of the parse error
159 	MessageParseError parseError;
160 
161 	// The structure to use for recording errors
162 	StaticErrors& errors;
163 
164 	// Normalized version of the input string (optional whitespace removed)
165 	UnicodeString& normalizedInput;
166 
167 	// The parent builder
168 	MFDataModel::Builder &dataModel;
169     }; // class Parser
170 
171 } // namespace message2
172 
173 U_NAMESPACE_END
174 
175 #endif /* #if !UCONFIG_NO_MF2 */
176 
177 #endif /* #if !UCONFIG_NO_FORMATTING */
178 
179 #endif /* U_SHOW_CPLUSPLUS_API */
180 
181 #endif // MESSAGEFORMAT_PARSER_H
182 
183 #endif // U_HIDE_DEPRECATED_API
184 // eof
185