xref: /aosp_15_r20/external/icu/icu4c/source/i18n/messageformat2_serializer.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #if !UCONFIG_NO_MF2
9 
10 #include "unicode/messageformat2_data_model.h"
11 #include "messageformat2_macros.h"
12 #include "messageformat2_serializer.h"
13 #include "uvector.h" // U_ASSERT
14 
15 U_NAMESPACE_BEGIN
16 
17 namespace message2 {
18 
19 // Generates a string representation of a data model
20 // ------------------------------------------------
21 
22 using namespace data_model;
23 
24 // Private helper methods
25 
whitespace()26 void Serializer::whitespace() {
27     result += SPACE;
28 }
29 
emit(UChar32 c)30 void Serializer::emit(UChar32 c) {
31     result += c;
32 }
33 
emit(const UnicodeString & s)34 void Serializer::emit(const UnicodeString& s) {
35     result += s;
36 }
37 
38 template <int32_t N>
emit(const UChar32 (& token)[N])39 void Serializer::emit(const UChar32 (&token)[N]) {
40     // Don't emit the terminator
41     for (int32_t i = 0; i < N - 1; i++) {
42         emit(token[i]);
43     }
44 }
45 
emit(const Literal & l)46 void Serializer::emit(const Literal& l) {
47     if (l.isQuoted()) {
48       emit(PIPE);
49       const UnicodeString& contents = l.unquoted();
50       for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) {
51         // Re-escape any PIPE or BACKSLASH characters
52         switch(contents[i]) {
53         case BACKSLASH:
54         case PIPE: {
55           emit(BACKSLASH);
56           break;
57         }
58         default: {
59           break;
60         }
61         }
62         emit(contents[i]);
63       }
64       emit(PIPE);
65     } else {
66       emit(l.unquoted());
67     }
68 }
69 
emit(const Key & k)70 void Serializer::emit(const Key& k) {
71     if (k.isWildcard()) {
72         emit(ASTERISK);
73         return;
74     }
75     emit(k.asLiteral());
76 }
77 
emit(const SelectorKeys & k)78 void Serializer::emit(const SelectorKeys& k) {
79   const Key* ks = k.getKeysInternal();
80   int32_t len = k.len;
81   // It would be an error for `keys` to be empty;
82   // that would mean this is the single `pattern`
83   // variant, and in that case, this method shouldn't be called
84   U_ASSERT(len > 0);
85   for (int32_t i = 0; i < len; i++) {
86     if (i != 0) {
87       whitespace();
88     }
89     emit(ks[i]);
90   }
91 }
92 
emit(const Operand & rand)93 void Serializer::emit(const Operand& rand) {
94     U_ASSERT(!rand.isNull());
95 
96     if (rand.isVariable()) {
97         emit(DOLLAR);
98         emit(rand.asVariable());
99     } else {
100         // Literal: quoted or unquoted
101         emit(rand.asLiteral());
102     }
103 }
104 
emit(const OptionMap & options)105 void Serializer::emit(const OptionMap& options) {
106     // Errors should have been checked before this point
107     UErrorCode localStatus = U_ZERO_ERROR;
108     U_ASSERT(!options.bogus);
109     for (int32_t i = 0; i < options.size(); i++) {
110         const Option& opt = options.getOption(i, localStatus);
111         // No need to check error code, since we already checked
112         // that !bogus
113         whitespace();
114         emit(opt.getName());
115         emit(EQUALS);
116         emit(opt.getValue());
117     }
118 }
119 
emitAttributes(const OptionMap & attributes)120 void Serializer::emitAttributes(const OptionMap& attributes) {
121     // Errors should have been checked before this point
122     UErrorCode localStatus = U_ZERO_ERROR;
123     U_ASSERT(!attributes.bogus);
124     for (int32_t i = 0; i < attributes.size(); i++) {
125         const Option& attr = attributes.getOption(i, localStatus);
126         // No need to check error code, since we already checked
127         // that !bogus
128         whitespace();
129         emit(AT);
130         emit(attr.getName());
131         const Operand& v = attr.getValue();
132         if (!v.isNull()) {
133             emit(EQUALS);
134             emit(v);
135         }
136     }
137 }
138 
emit(const Reserved & reserved)139 void Serializer::emit(const Reserved& reserved) {
140     // Re-escape '\' / '{' / '|' / '}'
141     for (int32_t i = 0; i < reserved.numParts(); i++) {
142         const Literal& l = reserved.getPart(i);
143         if (l.isQuoted()) {
144             emit(l);
145         } else {
146             const UnicodeString& s = l.unquoted();
147             for (int32_t j = 0; ((int32_t) j) < s.length(); j++) {
148                 switch(s[j]) {
149                 case LEFT_CURLY_BRACE:
150                 case PIPE:
151                 case RIGHT_CURLY_BRACE:
152                 case BACKSLASH: {
153                     emit(BACKSLASH);
154                     break;
155                 }
156                 default:
157                     break;
158                 }
159                 emit(s[j]);
160             }
161         }
162     }
163 }
164 
emit(const Expression & expr)165  void Serializer::emit(const Expression& expr) {
166     emit(LEFT_CURLY_BRACE);
167 
168     if (!expr.isReserved() && !expr.isFunctionCall()) {
169         // Literal or variable, no annotation
170         emit(expr.getOperand());
171     } else {
172         // Function call or reserved
173         if (!expr.isStandaloneAnnotation()) {
174           // Must be a function call that has an operand
175           emit(expr.getOperand());
176           whitespace();
177         }
178         UErrorCode localStatus = U_ZERO_ERROR;
179         const Operator* rator = expr.getOperator(localStatus);
180         U_ASSERT(U_SUCCESS(localStatus));
181         if (rator->isReserved()) {
182           const Reserved& reserved = rator->asReserved();
183           emit(reserved);
184         } else {
185             emit(COLON);
186             emit(rator->getFunctionName());
187             // No whitespace after function name, in case it has
188             // no options. (when there are options, emit(OptionMap) will
189             // emit the leading whitespace)
190             emit(rator->getOptionsInternal());
191         }
192     }
193     emitAttributes(expr.getAttributesInternal());
194     emit(RIGHT_CURLY_BRACE);
195 }
196 
emit(const PatternPart & part)197 void Serializer::emit(const PatternPart& part) {
198     if (part.isText()) {
199         // Raw text
200         const UnicodeString& text = part.asText();
201         // Re-escape '{'/'}'/'\'
202         for (int32_t i = 0; ((int32_t) i) < text.length(); i++) {
203           switch(text[i]) {
204           case BACKSLASH:
205           case LEFT_CURLY_BRACE:
206           case RIGHT_CURLY_BRACE: {
207             emit(BACKSLASH);
208             break;
209           }
210           default:
211             break;
212           }
213           emit(text[i]);
214         }
215         return;
216     }
217     // Markup
218     if (part.isMarkup()) {
219         const Markup& markup = part.asMarkup();
220         emit(LEFT_CURLY_BRACE);
221         if (markup.isClose()) {
222             emit(SLASH);
223             } else {
224             emit(NUMBER_SIGN);
225         }
226         emit(markup.getName());
227         emit(markup.getOptionsInternal());
228         emitAttributes(markup.getAttributesInternal());
229         if (markup.isStandalone()) {
230             emit(SLASH);
231         }
232         emit(RIGHT_CURLY_BRACE);
233         return;
234     }
235     // Expression
236     emit(part.contents());
237 }
238 
emit(const Pattern & pat)239 void Serializer::emit(const Pattern& pat) {
240     int32_t len = pat.numParts();
241     // Always quote pattern, which should match the normalized input
242     // if the parser is constructing it correctly
243     emit(LEFT_CURLY_BRACE);
244     emit(LEFT_CURLY_BRACE);
245     for (int32_t i = 0; i < len; i++) {
246         // No whitespace is needed here -- see the `pattern` nonterminal in the grammar
247         emit(pat.getPart(i));
248     }
249     emit(RIGHT_CURLY_BRACE);
250     emit(RIGHT_CURLY_BRACE);
251 }
252 
serializeDeclarations()253 void Serializer::serializeDeclarations() {
254     const Binding* bindings = dataModel.getLocalVariablesInternal();
255     U_ASSERT(bindings != nullptr);
256 
257     for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
258         const Binding& b = bindings[i];
259         if (b.isLocal()) {
260             // No whitespace needed here -- see `message` in the grammar
261             emit(ID_LOCAL);
262             whitespace();
263             emit(DOLLAR);
264             emit(b.getVariable());
265             // No whitespace needed here -- see `local-declaration` in the grammar
266             emit(EQUALS);
267             // No whitespace needed here -- see `local-declaration` in the grammar
268         } else {
269             // Input declaration
270             emit(ID_INPUT);
271             // No whitespace needed here -- see `input-declaration` in the grammar
272         }
273         emit(b.getValue());
274     }
275 }
276 
serializeUnsupported()277 void Serializer::serializeUnsupported() {
278     const UnsupportedStatement* statements = dataModel.getUnsupportedStatementsInternal();
279     U_ASSERT(statements != nullptr);
280 
281     for (int32_t i = 0; i < dataModel.unsupportedStatementsLen; i++) {
282         const UnsupportedStatement& s = statements[i];
283         emit(s.getKeyword());
284         UErrorCode localErrorCode = U_ZERO_ERROR;
285         const Reserved* r = s.getBody(localErrorCode);
286         if (U_SUCCESS(localErrorCode)) {
287             whitespace();
288             emit(*r);
289         }
290         const Expression* e = s.getExpressionsInternal();
291         for (int32_t j = 0; j < s.expressionsLen; j++) {
292             emit(e[j]);
293         }
294     }
295 }
296 
serializeSelectors()297 void Serializer::serializeSelectors() {
298     U_ASSERT(!dataModel.hasPattern());
299     const Expression* selectors = dataModel.getSelectorsInternal();
300 
301     emit(ID_MATCH);
302     for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
303         // No whitespace needed here -- see `selectors` in the grammar
304         emit(selectors[i]);
305     }
306 }
307 
serializeVariants()308 void Serializer::serializeVariants() {
309     U_ASSERT(!dataModel.hasPattern());
310     const Variant* variants = dataModel.getVariantsInternal();
311     for (int32_t i = 0; i < dataModel.numVariants(); i++) {
312         const Variant& v = variants[i];
313         emit(v.getKeys());
314         // No whitespace needed here -- see `variant` in the grammar
315         emit(v.getPattern());
316     }
317 }
318 
319 
320 // Main (public) serializer method
serialize()321 void Serializer::serialize() {
322     serializeDeclarations();
323     serializeUnsupported();
324     // Pattern message
325     if (dataModel.hasPattern()) {
326       emit(dataModel.getPattern());
327     } else {
328       // Selectors message
329       serializeSelectors();
330       serializeVariants();
331     }
332 }
333 
334 } // namespace message2
335 U_NAMESPACE_END
336 
337 #endif /* #if !UCONFIG_NO_MF2 */
338 
339 #endif /* #if !UCONFIG_NO_FORMATTING */
340 
341