xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/LocaleValidator.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.HashMultimap;
5 import com.google.common.collect.ImmutableMap;
6 import com.google.common.collect.ImmutableMultimap;
7 import com.google.common.collect.Multimap;
8 import com.ibm.icu.impl.Row;
9 import com.ibm.icu.impl.Row.R2;
10 import com.ibm.icu.lang.UCharacter;
11 import java.util.Collection;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Set;
16 import java.util.function.Function;
17 import java.util.regex.Pattern;
18 import java.util.stream.Collectors;
19 import org.unicode.cldr.util.StandardCodes.LstrField;
20 import org.unicode.cldr.util.StandardCodes.LstrType;
21 
22 public class LocaleValidator {
23     static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance();
24 
25     static final Validity VALIDITY = Validity.getInstance();
26     static final Set<LstrType> FIELD_ALLOWS_EMPTY = Set.of(LstrType.script, LstrType.region);
27     // Map<LstrType, Map<String, Map<LstrField, String>>>
28     static final Map<String, Validity.Status> VALID_VARIANTS =
29             ImmutableMap.copyOf(
30                     StandardCodes.getEnumLstreg().get(LstrType.variant).entrySet().stream()
31                             .collect(
32                                     Collectors.toMap(
33                                             x -> x.getKey(),
34                                             y ->
35                                                     y.getValue().get(LstrField.Deprecated) == null
36                                                             ? Validity.Status.regular
37                                                             : Validity.Status.deprecated)));
38 
39     private static final Map<String, Validity.Status> KR_REORDER =
40             SupplementalDataInfo.getInstance().getBcp47Keys().get("kr").stream()
41                     .filter(x -> !x.equals("REORDER_CODE"))
42                     .collect(
43                             Collectors.toMap(
44                                     Function.identity(),
45                                     y -> {
46                                         String temp =
47                                                 SupplementalDataInfo.getInstance()
48                                                         .getBcp47Deprecated()
49                                                         .get(Row.of("kr", y));
50                                         return "false".equals(temp)
51                                                 ? Validity.Status.regular
52                                                 : Validity.Status.deprecated;
53                                     }));
54     private static final Map<String, Validity.Status> LOWERCASE_SCRIPT =
55             VALIDITY.getCodeToStatus(LstrType.script).entrySet().stream()
56                     .collect(
57                             Collectors.toMap(
58                                     x -> UCharacter.toLowerCase(x.getKey()), x -> x.getValue()));
59 
60     private static final Map<String, Validity.Status> LOWERCASE_REGION =
61             VALIDITY.getCodeToStatus(LstrType.script).entrySet().stream()
62                     .collect(
63                             Collectors.toMap(
64                                     x -> UCharacter.toLowerCase(x.getKey()), x -> x.getValue()));
65 
66     public static class AllowedMatch {
67         final Pattern key;
68         final Pattern value;
69         final Validity.Status status;
70 
AllowedMatch(String code)71         public AllowedMatch(String code) {
72             this(code, null, null);
73         }
74 
AllowedMatch(String code, String value)75         public AllowedMatch(String code, String value) {
76             this(code, value, null);
77         }
78 
AllowedMatch(String code, String value, Validity.Status status)79         public AllowedMatch(String code, String value, Validity.Status status) {
80             this.key = code == null ? null : Pattern.compile(code);
81             this.value = value == null ? null : Pattern.compile(value);
82             this.status = status;
83         }
84 
matches(String key0, String value0, Validity.Status status)85         public boolean matches(String key0, String value0, Validity.Status status) {
86             return (key == null || key.matcher(key0).matches())
87                     && (value == null
88                             || value.matcher(value0).matches()
89                                     && (status == null || status == status));
90         }
91 
92         @Override
toString()93         public String toString() {
94             return key + "→" + value;
95         }
96     }
97 
98     public static class AllowedValid {
99 
100         private final Set<Validity.Status> allowedStatus; // allowed without exception
101         private final Multimap<LstrType, AllowedMatch> allowedExceptions;
102 
isAllowed(Validity.Status status)103         boolean isAllowed(Validity.Status status) {
104             return allowedStatus.contains(status);
105         }
106 
107         /** Only called if isAllowed is not true */
isAllowed(LstrType lstrType, String key, String value, Validity.Status status)108         boolean isAllowed(LstrType lstrType, String key, String value, Validity.Status status) {
109             Collection<AllowedMatch> allowedMatches = allowedExceptions.get(lstrType);
110             if (allowedMatches == null) {
111                 return false;
112             }
113             for (AllowedMatch allowedMatch : allowedMatches) {
114                 if (allowedMatch.matches(key, value, status)) return true;
115             }
116             return false;
117         }
118 
AllowedValid(Set<Validity.Status> allowedStatus, Object... allowedExceptions)119         public AllowedValid(Set<Validity.Status> allowedStatus, Object... allowedExceptions) {
120             this.allowedStatus =
121                     allowedStatus == null
122                             ? Set.of(Validity.Status.regular)
123                             : Set.copyOf(allowedStatus);
124             Multimap<LstrType, AllowedMatch> allowed = HashMultimap.create();
125             if (allowedExceptions != null) {
126                 for (int i = 0; i < allowedExceptions.length; i += 2) {
127                     allowed.put(
128                             (LstrType) allowedExceptions[i],
129                             (AllowedMatch) allowedExceptions[i + 1]);
130                 }
131             }
132             this.allowedExceptions = ImmutableMultimap.copyOf(allowed);
133         }
134 
135         @Override
toString()136         public String toString() {
137             return allowedStatus + " " + allowedExceptions;
138         }
139     }
140 
141     /**
142      * @return true iff the component validates
143      */
isValid( LanguageTagParser ltp, LocaleValidator.AllowedValid allowed, Set<String> errors)144     public static boolean isValid(
145             LanguageTagParser ltp, LocaleValidator.AllowedValid allowed, Set<String> errors) {
146         if (errors != null) {
147             errors.clear();
148         }
149         if (allowed == null) {
150             allowed = new AllowedValid(null, null);
151         }
152         if (ltp.isLegacy() && allowed.isAllowed(Validity.Status.deprecated)) {
153             return true; // don't need further checking, since we already did so when parsing
154         }
155         if (Validation.abort
156                 == validates(LstrType.language, ltp.getLanguage(), null, allowed, errors)) {
157             return false;
158         }
159         if (Validation.abort
160                 == validates(LstrType.script, ltp.getScript(), null, allowed, errors)) {
161             return false;
162         }
163         if (Validation.abort
164                 == validates(LstrType.region, ltp.getRegion(), null, allowed, errors)) {
165             return false;
166         }
167         for (String variant : ltp.getVariants()) {
168             if (Validation.abort == validates(LstrType.variant, variant, null, allowed, errors)) {
169                 return false;
170             }
171         }
172         for (Entry<String, List<String>> entry : ltp.getLocaleExtensionsDetailed().entrySet()) {
173             if (Validation.abort
174                     == validates(
175                             LstrType.extension,
176                             entry.getKey(),
177                             entry.getValue(),
178                             allowed,
179                             errors)) {
180                 return false;
181             }
182         }
183         for (Entry<String, List<String>> entry : ltp.getExtensionsDetailed().entrySet()) {
184             if (Validation.abort
185                     == validates(
186                             LstrType.extension,
187                             entry.getKey(),
188                             entry.getValue(),
189                             allowed,
190                             errors)) {
191                 return false;
192             }
193         }
194         return errors.isEmpty(); // if we didn't abort, then we recorded errors in the set
195     }
196 
197     private enum Validation {
198         abort,
199         keepOn
200     }
201     /**
202      * Returns true if it doesn't validate and errors == null (allows for fast rejection)
203      *
204      * @param type
205      * @param values TODO
206      * @param subtag
207      * @return true if the subtag is empty, or it is an allowed status
208      */
validates( LstrType type, String field, List<String> values, LocaleValidator.AllowedValid allowed, Set<String> errors)209     private static LocaleValidator.Validation validates(
210             LstrType type,
211             String field,
212             List<String> values,
213             LocaleValidator.AllowedValid allowed,
214             Set<String> errors) {
215         Validity.Status status;
216         switch (type) {
217             case language:
218             case script:
219             case region:
220                 status = VALIDITY.getCodeToStatus(type).get(field);
221                 if (status == null) {
222                     status = Validity.Status.invalid;
223                 }
224                 if (allowed.isAllowed(status)
225                         || allowed.isAllowed(type, field, null, null)
226                         || field.length() == 0) {
227                     return Validation.keepOn;
228                 }
229                 break;
230             case variant:
231                 status = VALID_VARIANTS.get(field);
232                 if (status == null) {
233                     status = Validity.Status.invalid;
234                 }
235                 if (allowed.isAllowed(status)) {
236                     return Validation.keepOn;
237                 }
238                 break;
239             case extension:
240                 switch (field.length()) {
241                     case 1:
242                         switch (field) {
243                             case "t": // value is an LSRV
244                                 String lsvr = Joiner.on("-").join(values);
245                                 status = Validity.Status.invalid;
246                                 try {
247                                     LanguageTagParser ltp2 = new LanguageTagParser().set(lsvr);
248                                     if (isValid(ltp2, allowed, errors)) {
249                                         return Validation.keepOn;
250                                     }
251                                 } catch (Exception e) {
252                                     if (errors != null) {
253                                         errors.add(
254                                                 String.format(
255                                                         "Disallowed %s=%s, tlang=%s, status=%s",
256                                                         type, lsvr, field, status));
257                                         return Validation.keepOn;
258                                     }
259                                 }
260                                 return Validation.abort;
261                             case "x": // private use, everything is valid
262                                 status = Validity.Status.private_use;
263                                 break;
264                             case "u": // value is an attribute, none currently valid
265                                 status = Validity.Status.invalid;
266                                 break;
267                             default:
268                                 status = Validity.Status.invalid;
269                                 break;
270                         }
271                         break;
272                     case 2:
273                         // field is a tkey or a ukey, based on last char
274                         String tOrU = field.charAt(1) < 'A' ? "t" : "u";
275                         Set<String> subtypes = SDI.getBcp47Keys().get(field);
276                         if (subtypes == null) {
277                             status = Validity.Status.invalid;
278                         } else {
279                             String subtype = Joiner.on("-").join(values);
280                             final Map<R2<String, String>, String> bcp47Deprecated =
281                                     SDI.getBcp47Deprecated();
282                             if ("true".equals(bcp47Deprecated.get(Row.of(field, subtype)))) {
283                                 status = Validity.Status.deprecated;
284                             } else {
285                                 if (subtypes.contains(subtype)) {
286                                     status = Validity.Status.regular;
287                                 } else {
288                                     boolean mapUnknownToRegular = false;
289                                     fieldSwitch:
290                                     switch (field) {
291                                         case "x0":
292                                             status = Validity.Status.deprecated;
293                                             break;
294                                         case "dx":
295                                             status =
296                                                     checkSpecials(
297                                                             type,
298                                                             field,
299                                                             values,
300                                                             allowed,
301                                                             LOWERCASE_SCRIPT);
302                                             break;
303                                         case "kr":
304                                             status =
305                                                     checkSpecials(
306                                                             type,
307                                                             field,
308                                                             values,
309                                                             allowed,
310                                                             LOWERCASE_SCRIPT,
311                                                             KR_REORDER);
312                                             break;
313                                         case "rg":
314                                             mapUnknownToRegular = true;
315                                         case "sd":
316                                             status =
317                                                     checkSpecials(
318                                                             type,
319                                                             field,
320                                                             values,
321                                                             allowed,
322                                                             VALIDITY.getCodeToStatus(
323                                                                     LstrType.subdivision));
324                                             break;
325                                         case "vt":
326                                             status = Validity.Status.invalid;
327                                             if (values.isEmpty()) {
328                                                 break fieldSwitch;
329                                             }
330                                             for (String value : values) {
331                                                 try {
332                                                     int intValue = Integer.parseInt(value, 16);
333                                                     if (intValue < 0
334                                                             || intValue > 0x10FFFF
335                                                             || (Character.MIN_SURROGATE <= intValue
336                                                                     && intValue
337                                                                             <= Character
338                                                                                     .MAX_SURROGATE)) {
339                                                         break fieldSwitch;
340                                                     }
341                                                 } catch (NumberFormatException e) {
342                                                     break fieldSwitch;
343                                                 }
344                                             }
345                                             status = Validity.Status.regular;
346                                             break;
347                                         default:
348                                             status = Validity.Status.invalid;
349                                             break;
350                                     }
351                                     if (mapUnknownToRegular == true
352                                             && status == Validity.Status.unknown) {
353                                         status = Validity.Status.regular;
354                                     }
355                                 }
356                             }
357                             if (allowed.isAllowed(status)
358                                     || allowed.isAllowed(
359                                             LstrType.extension, field, subtype, status)) {
360                                 return Validation.keepOn;
361                             } else if (errors == null) {
362                                 return Validation.abort;
363                             }
364                             errors.add(
365                                     String.format(
366                                             "Disallowed %s=%s=%s, status=%s",
367                                             type, field, subtype, status));
368                             return Validation.keepOn;
369                         }
370                         break;
371                     default:
372                         status = Validity.Status.invalid;
373                         break;
374                 }
375                 break;
376             default:
377                 status = null;
378                 break;
379         }
380         if (errors == null) {
381             return Validation.abort;
382         }
383         errors.add(String.format("Disallowed %s=%s, status=%s", type, field, status));
384         return Validation.keepOn;
385     }
386 
387     public static Validity.Status checkSpecials(
388             LstrType type,
389             String field,
390             List<String> values,
391             LocaleValidator.AllowedValid allowed,
392             Map<String, Validity.Status>... validityMaps) {
393         if (values.size() > 1
394                 && (field.equals("sd") || field.equals("rg"))) { // TODO generalize this
395             return Validity.Status.invalid;
396         }
397         Validity.Status best = null;
398         for (String value : values) {
399             Validity.Status status = null;
400             for (Map<String, Validity.Status> validityMap : validityMaps) {
401                 status = validityMap.get(value);
402                 if (status != null) {
403                     break;
404                 }
405             }
406             if (status == null) {
407                 return Validity.Status.invalid;
408             }
409             if (allowed.isAllowed(status) || allowed.isAllowed(type, field, value, status)) {
410                 if (best == null) {
411                     best = status;
412                 }
413             } else {
414                 return status;
415             }
416         }
417         return best == null ? Validity.Status.invalid : best;
418     }
419 
420     public Validity.Status checkRegion(
421             LstrType type,
422             String field,
423             List<String> values,
424             LocaleValidator.AllowedValid allowed) {
425         Validity.Status best = null;
426         for (String value : values) {
427             String value2 = UCharacter.toTitleCase(value, null);
428             Validity.Status status = VALIDITY.getCodeToStatus(LstrType.script).get(value2);
429             if (status == null) {
430                 return Validity.Status.invalid;
431             }
432             if (allowed.isAllowed(status) || allowed.isAllowed(type, field, value, null)) {
433                 if (best == null) {
434                     best = status;
435                 }
436             } else {
437                 return status;
438             }
439         }
440         return best == null ? Validity.Status.invalid : best;
441     }
442 }
443