xref: /aosp_15_r20/external/icu/icu4c/source/i18n/messageformat2_checker.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #if !UCONFIG_NO_MF2
9 
10 #include "messageformat2_allocation.h"
11 #include "messageformat2_checker.h"
12 #include "messageformat2_macros.h"
13 #include "uvector.h" // U_ASSERT
14 
15 U_NAMESPACE_BEGIN
16 
17 namespace message2 {
18 
19 /*
20 Checks data model errors
21 (see https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#error-handling )
22 
23 The following are checked here:
24 Variant Key Mismatch
25 Missing Fallback Variant (called NonexhaustivePattern here)
26 Missing Selector Annotation
27 Duplicate Declaration
28   - Most duplicate declaration errors are checked by the parser,
29     but the checker checks for declarations of input variables
30     that were previously implicitly declared
31 (Duplicate option names and duplicate declarations are checked by the parser)
32 */
33 
34 // Type environments
35 // -----------------
36 
TypeEnvironment(UErrorCode & status)37 TypeEnvironment::TypeEnvironment(UErrorCode& status) {
38     CHECK_ERROR(status);
39 
40     UVector* temp;
41     temp = createStringVectorNoAdopt(status);
42     CHECK_ERROR(status);
43     annotated.adoptInstead(temp);
44     temp = createStringVectorNoAdopt(status);
45     CHECK_ERROR(status);
46     unannotated.adoptInstead(temp);
47     temp = createStringVectorNoAdopt(status);
48     CHECK_ERROR(status);
49     freeVars.adoptInstead(temp);
50 }
51 
has(const UVector & v,const VariableName & var)52  static bool has(const UVector& v, const VariableName& var) {
53      return v.contains(const_cast<void*>(static_cast<const void*>(&var)));
54  }
55 
56 // Returns true if `var` was either previously used (implicit declaration),
57 // or is in scope by an explicit declaration
known(const VariableName & var) const58 bool TypeEnvironment::known(const VariableName& var) const {
59     return has(*annotated, var) || has(*unannotated, var) || has(*freeVars, var);
60 }
61 
get(const VariableName & var) const62 TypeEnvironment::Type TypeEnvironment::get(const VariableName& var) const {
63     U_ASSERT(annotated.isValid());
64     if (has(*annotated, var)) {
65         return Annotated;
66     }
67     U_ASSERT(unannotated.isValid());
68     if (has(*unannotated, var)) {
69         return Unannotated;
70     }
71     U_ASSERT(freeVars.isValid());
72     if (has(*freeVars, var)) {
73         return FreeVariable;
74     }
75     // This case is a "free variable without an implicit declaration",
76     // i.e. one used only in a selector expression and not in a declaration RHS
77     return Unannotated;
78 }
79 
extend(const VariableName & var,TypeEnvironment::Type t,UErrorCode & status)80 void TypeEnvironment::extend(const VariableName& var, TypeEnvironment::Type t, UErrorCode& status) {
81     if (t == Unannotated) {
82         U_ASSERT(unannotated.isValid());
83         // See comment below
84         unannotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
85         return;
86     }
87 
88     if (t == FreeVariable) {
89         U_ASSERT(freeVars.isValid());
90         // See comment below
91         freeVars->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
92         return;
93     }
94 
95     U_ASSERT(annotated.isValid());
96     // This is safe because elements of `annotated` are never written
97     // and the lifetime of `var` is guaranteed to include the lifetime of
98     // `annotated`
99     annotated->addElement(const_cast<void*>(static_cast<const void*>(&var)), status);
100 }
101 
~TypeEnvironment()102 TypeEnvironment::~TypeEnvironment() {}
103 
104 // ---------------------
105 
areDefaultKeys(const Key * keys,int32_t len)106 static bool areDefaultKeys(const Key* keys, int32_t len) {
107     U_ASSERT(len > 0);
108     for (int32_t i = 0; i < len; i++) {
109         if (!keys[i].isWildcard()) {
110             return false;
111         }
112     }
113     return true;
114 }
115 
addFreeVars(TypeEnvironment & t,const Operand & rand,UErrorCode & status)116 void Checker::addFreeVars(TypeEnvironment& t, const Operand& rand, UErrorCode& status) {
117     CHECK_ERROR(status);
118 
119     if (rand.isVariable()) {
120         const VariableName& v = rand.asVariable();
121         if (!t.known(v)) {
122             t.extend(v, TypeEnvironment::Type::FreeVariable, status);
123         }
124     }
125 }
126 
addFreeVars(TypeEnvironment & t,const OptionMap & opts,UErrorCode & status)127 void Checker::addFreeVars(TypeEnvironment& t, const OptionMap& opts, UErrorCode& status) {
128     for (int32_t i = 0; i < opts.size(); i++) {
129         const Option& o = opts.getOption(i, status);
130         CHECK_ERROR(status);
131         addFreeVars(t, o.getValue(), status);
132     }
133 }
134 
addFreeVars(TypeEnvironment & t,const Operator & rator,UErrorCode & status)135 void Checker::addFreeVars(TypeEnvironment& t, const Operator& rator, UErrorCode& status) {
136     CHECK_ERROR(status);
137 
138     if (!rator.isReserved()) {
139         addFreeVars(t, rator.getOptionsInternal(), status);
140     }
141 }
142 
addFreeVars(TypeEnvironment & t,const Expression & rhs,UErrorCode & status)143 void Checker::addFreeVars(TypeEnvironment& t, const Expression& rhs, UErrorCode& status) {
144     CHECK_ERROR(status);
145 
146     if (rhs.isFunctionCall()) {
147         const Operator* rator = rhs.getOperator(status);
148         U_ASSERT(U_SUCCESS(status));
149         addFreeVars(t, *rator, status);
150     }
151     addFreeVars(t, rhs.getOperand(), status);
152 }
153 
checkVariants(UErrorCode & status)154 void Checker::checkVariants(UErrorCode& status) {
155     CHECK_ERROR(status);
156 
157     U_ASSERT(!dataModel.hasPattern());
158 
159     // Check that each variant has a key list with size
160     // equal to the number of selectors
161     const Variant* variants = dataModel.getVariantsInternal();
162 
163     // Check that one variant includes only wildcards
164     bool defaultExists = false;
165 
166     for (int32_t i = 0; i < dataModel.numVariants(); i++) {
167         const SelectorKeys& k = variants[i].getKeys();
168         const Key* keys = k.getKeysInternal();
169         int32_t len = k.len;
170         if (len != dataModel.numSelectors()) {
171             // Variant key mismatch
172             errors.addError(StaticErrorType::VariantKeyMismatchError, status);
173             return;
174         }
175         defaultExists |= areDefaultKeys(keys, len);
176     }
177     if (!defaultExists) {
178         errors.addError(StaticErrorType::NonexhaustivePattern, status);
179         return;
180     }
181 }
182 
requireAnnotated(const TypeEnvironment & t,const Expression & selectorExpr,UErrorCode & status)183 void Checker::requireAnnotated(const TypeEnvironment& t, const Expression& selectorExpr, UErrorCode& status) {
184     CHECK_ERROR(status);
185 
186     if (selectorExpr.isFunctionCall()) {
187         return; // No error
188     }
189     if (!selectorExpr.isReserved()) {
190         const Operand& rand = selectorExpr.getOperand();
191         if (rand.isVariable()) {
192             if (t.get(rand.asVariable()) == TypeEnvironment::Type::Annotated) {
193                 return; // No error
194             }
195         }
196     }
197     // If this code is reached, an error was detected
198     errors.addError(StaticErrorType::MissingSelectorAnnotation, status);
199 }
200 
checkSelectors(const TypeEnvironment & t,UErrorCode & status)201 void Checker::checkSelectors(const TypeEnvironment& t, UErrorCode& status) {
202     U_ASSERT(!dataModel.hasPattern());
203 
204     // Check each selector; if it's not annotated, emit a
205     // "missing selector annotation" error
206     const Expression* selectors = dataModel.getSelectorsInternal();
207     for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
208         requireAnnotated(t, selectors[i], status);
209     }
210 }
211 
typeOf(TypeEnvironment & t,const Expression & expr)212 TypeEnvironment::Type typeOf(TypeEnvironment& t, const Expression& expr) {
213     if (expr.isFunctionCall()) {
214         return TypeEnvironment::Type::Annotated;
215     }
216     if (expr.isReserved()) {
217         return TypeEnvironment::Type::Unannotated;
218     }
219     const Operand& rand = expr.getOperand();
220     U_ASSERT(!rand.isNull());
221     if (rand.isLiteral()) {
222         return TypeEnvironment::Type::Unannotated;
223     }
224     U_ASSERT(rand.isVariable());
225     return t.get(rand.asVariable());
226 }
227 
checkDeclarations(TypeEnvironment & t,UErrorCode & status)228 void Checker::checkDeclarations(TypeEnvironment& t, UErrorCode& status) {
229     CHECK_ERROR(status);
230 
231     // For each declaration, extend the type environment with its type
232     // Only a very simple type system is necessary: variables
233     // have the type "annotated", "unannotated", or "free".
234     // For "missing selector annotation" checking, free variables
235     // (message arguments) are treated as unannotated.
236     // Free variables are also used for checking duplicate declarations.
237     const Binding* env = dataModel.getLocalVariablesInternal();
238     for (int32_t i = 0; i < dataModel.bindingsLen; i++) {
239         const Binding& b = env[i];
240         const VariableName& lhs = b.getVariable();
241         const Expression& rhs = b.getValue();
242 
243         // First, add free variables from the RHS of b
244         // This must be done first so we can catch:
245         // .local $foo = {$foo}
246         // (where the RHS is the first use of $foo)
247         if (b.isLocal()) {
248             addFreeVars(t, rhs, status);
249 
250             // Next, check if the LHS equals any free variables
251             // whose implicit declarations are in scope
252             if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) {
253                 errors.addError(StaticErrorType::DuplicateDeclarationError, status);
254             }
255         } else {
256             // Input declaration; if b has no annotation, there's nothing to check
257             if (!b.isLocal() && b.hasAnnotation()) {
258                 const OptionMap& opts = b.getOptionsInternal();
259                 // For .input declarations, we just need to add any variables
260                 // referenced in the options
261                 addFreeVars(t, opts, status);
262              }
263             // Next, check if the LHS equals any free variables
264             // whose implicit declarations are in scope
265             if (t.known(lhs) && t.get(lhs) == TypeEnvironment::Type::FreeVariable) {
266                 errors.addError(StaticErrorType::DuplicateDeclarationError, status);
267             }
268         }
269         // Next, extend the type environment with a binding from lhs to its type
270         t.extend(lhs, typeOf(t, rhs), status);
271     }
272 
273     // Check for unsupported statements
274     if (dataModel.unsupportedStatementsLen > 0) {
275         errors.addError(StaticErrorType::UnsupportedStatementError, status);
276     }
277 }
278 
check(UErrorCode & status)279 void Checker::check(UErrorCode& status) {
280     CHECK_ERROR(status);
281 
282     TypeEnvironment typeEnv(status);
283     checkDeclarations(typeEnv, status);
284     // Pattern message
285     if (dataModel.hasPattern()) {
286         return;
287     } else {
288       // Selectors message
289       checkSelectors(typeEnv, status);
290       checkVariants(status);
291     }
292 }
293 
294 } // namespace message2
295 U_NAMESPACE_END
296 
297 #endif /* #if !UCONFIG_NO_MF2 */
298 
299 #endif /* #if !UCONFIG_NO_FORMATTING */
300