1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #if !UCONFIG_NO_MF2
9
10 #include "unicode/messageformat2_data_model.h"
11 #include "messageformat2_macros.h"
12 #include "messageformat2_serializer.h"
13 #include "uvector.h" // U_ASSERT
14
15 U_NAMESPACE_BEGIN
16
17 namespace message2 {
18
19 // Generates a string representation of a data model
20 // ------------------------------------------------
21
22 using namespace data_model;
23
24 // Private helper methods
25
whitespace()26 void Serializer::whitespace() {
27 result += SPACE;
28 }
29
emit(UChar32 c)30 void Serializer::emit(UChar32 c) {
31 result += c;
32 }
33
emit(const UnicodeString & s)34 void Serializer::emit(const UnicodeString& s) {
35 result += s;
36 }
37
38 template <int32_t N>
emit(const UChar32 (& token)[N])39 void Serializer::emit(const UChar32 (&token)[N]) {
40 // Don't emit the terminator
41 for (int32_t i = 0; i < N - 1; i++) {
42 emit(token[i]);
43 }
44 }
45
emit(const Literal & l)46 void Serializer::emit(const Literal& l) {
47 if (l.isQuoted()) {
48 emit(PIPE);
49 const UnicodeString& contents = l.unquoted();
50 for (int32_t i = 0; ((int32_t) i) < contents.length(); i++) {
51 // Re-escape any PIPE or BACKSLASH characters
52 switch(contents[i]) {
53 case BACKSLASH:
54 case PIPE: {
55 emit(BACKSLASH);
56 break;
57 }
58 default: {
59 break;
60 }
61 }
62 emit(contents[i]);
63 }
64 emit(PIPE);
65 } else {
66 emit(l.unquoted());
67 }
68 }
69
emit(const Key & k)70 void Serializer::emit(const Key& k) {
71 if (k.isWildcard()) {
72 emit(ASTERISK);
73 return;
74 }
75 emit(k.asLiteral());
76 }
77
emit(const SelectorKeys & k)78 void Serializer::emit(const SelectorKeys& k) {
79 const Key* ks = k.getKeysInternal();
80 int32_t len = k.len;
81 // It would be an error for `keys` to be empty;
82 // that would mean this is the single `pattern`
83 // variant, and in that case, this method shouldn't be called
84 U_ASSERT(len > 0);
85 for (int32_t i = 0; i < len; i++) {
86 if (i != 0) {
87 whitespace();
88 }
89 emit(ks[i]);
90 }
91 }
92
emit(const Operand & rand)93 void Serializer::emit(const Operand& rand) {
94 U_ASSERT(!rand.isNull());
95
96 if (rand.isVariable()) {
97 emit(DOLLAR);
98 emit(rand.asVariable());
99 } else {
100 // Literal: quoted or unquoted
101 emit(rand.asLiteral());
102 }
103 }
104
emit(const OptionMap & options)105 void Serializer::emit(const OptionMap& options) {
106 // Errors should have been checked before this point
107 UErrorCode localStatus = U_ZERO_ERROR;
108 U_ASSERT(!options.bogus);
109 for (int32_t i = 0; i < options.size(); i++) {
110 const Option& opt = options.getOption(i, localStatus);
111 // No need to check error code, since we already checked
112 // that !bogus
113 whitespace();
114 emit(opt.getName());
115 emit(EQUALS);
116 emit(opt.getValue());
117 }
118 }
119
emitAttributes(const OptionMap & attributes)120 void Serializer::emitAttributes(const OptionMap& attributes) {
121 // Errors should have been checked before this point
122 UErrorCode localStatus = U_ZERO_ERROR;
123 U_ASSERT(!attributes.bogus);
124 for (int32_t i = 0; i < attributes.size(); i++) {
125 const Option& attr = attributes.getOption(i, localStatus);
126 // No need to check error code, since we already checked
127 // that !bogus
128 whitespace();
129 emit(AT);
130 emit(attr.getName());
131 const Operand& v = attr.getValue();
132 if (!v.isNull()) {
133 emit(EQUALS);
134 emit(v);
135 }
136 }
137 }
138
emit(const Reserved & reserved)139 void Serializer::emit(const Reserved& reserved) {
140 // Re-escape '\' / '{' / '|' / '}'
141 for (int32_t i = 0; i < reserved.numParts(); i++) {
142 const Literal& l = reserved.getPart(i);
143 if (l.isQuoted()) {
144 emit(l);
145 } else {
146 const UnicodeString& s = l.unquoted();
147 for (int32_t j = 0; ((int32_t) j) < s.length(); j++) {
148 switch(s[j]) {
149 case LEFT_CURLY_BRACE:
150 case PIPE:
151 case RIGHT_CURLY_BRACE:
152 case BACKSLASH: {
153 emit(BACKSLASH);
154 break;
155 }
156 default:
157 break;
158 }
159 emit(s[j]);
160 }
161 }
162 }
163 }
164
emit(const Expression & expr)165 void Serializer::emit(const Expression& expr) {
166 emit(LEFT_CURLY_BRACE);
167
168 if (!expr.isReserved() && !expr.isFunctionCall()) {
169 // Literal or variable, no annotation
170 emit(expr.getOperand());
171 } else {
172 // Function call or reserved
173 if (!expr.isStandaloneAnnotation()) {
174 // Must be a function call that has an operand
175 emit(expr.getOperand());
176 whitespace();
177 }
178 UErrorCode localStatus = U_ZERO_ERROR;
179 const Operator* rator = expr.getOperator(localStatus);
180 U_ASSERT(U_SUCCESS(localStatus));
181 if (rator->isReserved()) {
182 const Reserved& reserved = rator->asReserved();
183 emit(reserved);
184 } else {
185 emit(COLON);
186 emit(rator->getFunctionName());
187 // No whitespace after function name, in case it has
188 // no options. (when there are options, emit(OptionMap) will
189 // emit the leading whitespace)
190 emit(rator->getOptionsInternal());
191 }
192 }
193 emitAttributes(expr.getAttributesInternal());
194 emit(RIGHT_CURLY_BRACE);
195 }
196
emit(const PatternPart & part)197 void Serializer::emit(const PatternPart& part) {
198 if (part.isText()) {
199 // Raw text
200 const UnicodeString& text = part.asText();
201 // Re-escape '{'/'}'/'\'
202 for (int32_t i = 0; ((int32_t) i) < text.length(); i++) {
203 switch(text[i]) {
204 case BACKSLASH:
205 case LEFT_CURLY_BRACE:
206 case RIGHT_CURLY_BRACE: {
207 emit(BACKSLASH);
208 break;
209 }
210 default:
211 break;
212 }
213 emit(text[i]);
214 }
215 return;
216 }
217 // Markup
218 if (part.isMarkup()) {
219 const Markup& markup = part.asMarkup();
220 emit(LEFT_CURLY_BRACE);
221 if (markup.isClose()) {
222 emit(SLASH);
223 } else {
224 emit(NUMBER_SIGN);
225 }
226 emit(markup.getName());
227 emit(markup.getOptionsInternal());
228 emitAttributes(markup.getAttributesInternal());
229 if (markup.isStandalone()) {
230 emit(SLASH);
231 }
232 emit(RIGHT_CURLY_BRACE);
233 return;
234 }
235 // Expression
236 emit(part.contents());
237 }
238
emit(const Pattern & pat)239 void Serializer::emit(const Pattern& pat) {
240 int32_t len = pat.numParts();
241 // Always quote pattern, which should match the normalized input
242 // if the parser is constructing it correctly
243 emit(LEFT_CURLY_BRACE);
244 emit(LEFT_CURLY_BRACE);
245 for (int32_t i = 0; i < len; i++) {
246 // No whitespace is needed here -- see the `pattern` nonterminal in the grammar
247 emit(pat.getPart(i));
248 }
249 emit(RIGHT_CURLY_BRACE);
250 emit(RIGHT_CURLY_BRACE);
251 }
252
serializeDeclarations()253 void Serializer::serializeDeclarations() {
254 const Binding* bindings = dataModel.getLocalVariablesInternal();
255 U_ASSERT(bindings != nullptr);
256
257 for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
258 const Binding& b = bindings[i];
259 if (b.isLocal()) {
260 // No whitespace needed here -- see `message` in the grammar
261 emit(ID_LOCAL);
262 whitespace();
263 emit(DOLLAR);
264 emit(b.getVariable());
265 // No whitespace needed here -- see `local-declaration` in the grammar
266 emit(EQUALS);
267 // No whitespace needed here -- see `local-declaration` in the grammar
268 } else {
269 // Input declaration
270 emit(ID_INPUT);
271 // No whitespace needed here -- see `input-declaration` in the grammar
272 }
273 emit(b.getValue());
274 }
275 }
276
serializeUnsupported()277 void Serializer::serializeUnsupported() {
278 const UnsupportedStatement* statements = dataModel.getUnsupportedStatementsInternal();
279 U_ASSERT(statements != nullptr);
280
281 for (int32_t i = 0; i < dataModel.unsupportedStatementsLen; i++) {
282 const UnsupportedStatement& s = statements[i];
283 emit(s.getKeyword());
284 UErrorCode localErrorCode = U_ZERO_ERROR;
285 const Reserved* r = s.getBody(localErrorCode);
286 if (U_SUCCESS(localErrorCode)) {
287 whitespace();
288 emit(*r);
289 }
290 const Expression* e = s.getExpressionsInternal();
291 for (int32_t j = 0; j < s.expressionsLen; j++) {
292 emit(e[j]);
293 }
294 }
295 }
296
serializeSelectors()297 void Serializer::serializeSelectors() {
298 U_ASSERT(!dataModel.hasPattern());
299 const Expression* selectors = dataModel.getSelectorsInternal();
300
301 emit(ID_MATCH);
302 for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
303 // No whitespace needed here -- see `selectors` in the grammar
304 emit(selectors[i]);
305 }
306 }
307
serializeVariants()308 void Serializer::serializeVariants() {
309 U_ASSERT(!dataModel.hasPattern());
310 const Variant* variants = dataModel.getVariantsInternal();
311 for (int32_t i = 0; i < dataModel.numVariants(); i++) {
312 const Variant& v = variants[i];
313 emit(v.getKeys());
314 // No whitespace needed here -- see `variant` in the grammar
315 emit(v.getPattern());
316 }
317 }
318
319
320 // Main (public) serializer method
serialize()321 void Serializer::serialize() {
322 serializeDeclarations();
323 serializeUnsupported();
324 // Pattern message
325 if (dataModel.hasPattern()) {
326 emit(dataModel.getPattern());
327 } else {
328 // Selectors message
329 serializeSelectors();
330 serializeVariants();
331 }
332 }
333
334 } // namespace message2
335 U_NAMESPACE_END
336
337 #endif /* #if !UCONFIG_NO_MF2 */
338
339 #endif /* #if !UCONFIG_NO_FORMATTING */
340
341