xref: /aosp_15_r20/external/icu/icu4c/source/i18n/units_data.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "bytesinkutil.h"
9 #include "charstr.h"
10 #include "cstring.h"
11 #include "measunit_impl.h"
12 #include "number_decimalquantity.h"
13 #include "resource.h"
14 #include "uassert.h"
15 #include "ulocimp.h"
16 #include "unicode/locid.h"
17 #include "unicode/unistr.h"
18 #include "unicode/ures.h"
19 #include "units_data.h"
20 #include "uresimp.h"
21 #include "util.h"
22 #include <utility>
23 
24 U_NAMESPACE_BEGIN
25 namespace units {
26 
27 namespace {
28 
29 using icu::number::impl::DecimalQuantity;
30 
trimSpaces(CharString & factor,UErrorCode & status)31 void trimSpaces(CharString& factor, UErrorCode& status){
32    CharString trimmed;
33    for (int i = 0 ; i < factor.length(); i++) {
34        if (factor[i] == ' ') continue;
35 
36        trimmed.append(factor[i], status);
37    }
38 
39    factor = std::move(trimmed);
40 }
41 
42 /**
43  * A ResourceSink that collects conversion rate information.
44  *
45  * This class is for use by ures_getAllItemsWithFallback.
46  */
47 class ConversionRateDataSink : public ResourceSink {
48   public:
49     /**
50      * Constructor.
51      * @param out The vector to which ConversionRateInfo instances are to be
52      * added. This vector must outlive the use of the ResourceSink.
53      */
ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> * out)54     explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
55 
56     /**
57      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
58      * conversion rates that are found in `value` to the output vector.
59      *
60      * @param source This string must be "convertUnits": the resource that this
61      * class supports reading.
62      * @param value The "convertUnits" resource, containing unit conversion rate
63      * information.
64      * @param noFallback Ignored.
65      * @param status The standard ICU error code output parameter.
66      */
put(const char * source,ResourceValue & value,UBool,UErrorCode & status)67     void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
68         if (U_FAILURE(status)) { return; }
69         if (uprv_strcmp(source, "convertUnits") != 0) {
70             // This is very strict, however it is the cheapest way to be sure
71             // that with `value`, we're looking at the convertUnits table.
72             status = U_ILLEGAL_ARGUMENT_ERROR;
73             return;
74         }
75         ResourceTable conversionRateTable = value.getTable(status);
76         const char *srcUnit;
77         // We're reusing `value`, which seems to be a common pattern:
78         for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
79             ResourceTable unitTable = value.getTable(status);
80             const char *key;
81             UnicodeString baseUnit = ICU_Utility::makeBogusString();
82             UnicodeString factor = ICU_Utility::makeBogusString();
83             UnicodeString offset = ICU_Utility::makeBogusString();
84             UnicodeString special = ICU_Utility::makeBogusString();
85             UnicodeString systems = ICU_Utility::makeBogusString();
86             for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
87                 if (uprv_strcmp(key, "target") == 0) {
88                     baseUnit = value.getUnicodeString(status);
89                 } else if (uprv_strcmp(key, "factor") == 0) {
90                     factor = value.getUnicodeString(status);
91                 } else if (uprv_strcmp(key, "offset") == 0) {
92                     offset = value.getUnicodeString(status);
93                 } else if (uprv_strcmp(key, "special") == 0) {
94                     special = value.getUnicodeString(status); // the name of a special mapping used instead of factor + optional offset.
95                 } else if (uprv_strcmp(key, "systems") == 0) {
96                     systems = value.getUnicodeString(status);
97                 }
98             }
99             if (U_FAILURE(status)) { return; }
100             if (baseUnit.isBogus() || (factor.isBogus() && special.isBogus())) {
101                 // We could not find a usable conversion rate: bad resource.
102                 status = U_MISSING_RESOURCE_ERROR;
103                 return;
104             }
105 
106             // We don't have this ConversionRateInfo yet: add it.
107             ConversionRateInfo *cr = outVector->emplaceBack();
108             if (!cr) {
109                 status = U_MEMORY_ALLOCATION_ERROR;
110                 return;
111             } else {
112                 cr->sourceUnit.append(srcUnit, status);
113                 cr->baseUnit.appendInvariantChars(baseUnit, status);
114                 if (!factor.isBogus()) {
115                     cr->factor.appendInvariantChars(factor, status);
116                     trimSpaces(cr->factor, status);
117                 }
118                 if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
119                 if (!special.isBogus()) cr->specialMappingName.appendInvariantChars(special, status);
120                 cr->systems.appendInvariantChars(systems, status);
121             }
122         }
123     }
124 
125   private:
126     MaybeStackVector<ConversionRateInfo> *outVector;
127 };
128 
operator <(const UnitPreferenceMetadata & a,const UnitPreferenceMetadata & b)129 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
130     return a.compareTo(b) < 0;
131 }
132 
133 /**
134  * A ResourceSink that collects unit preferences information.
135  *
136  * This class is for use by ures_getAllItemsWithFallback.
137  */
138 class UnitPreferencesSink : public ResourceSink {
139   public:
140     /**
141      * Constructor.
142      * @param outPrefs The vector to which UnitPreference instances are to be
143      * added. This vector must outlive the use of the ResourceSink.
144      * @param outMetadata  The vector to which UnitPreferenceMetadata instances
145      * are to be added. This vector must outlive the use of the ResourceSink.
146      */
UnitPreferencesSink(MaybeStackVector<UnitPreference> * outPrefs,MaybeStackVector<UnitPreferenceMetadata> * outMetadata)147     explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
148                                  MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
149         : preferences(outPrefs), metadata(outMetadata) {}
150 
151     /**
152      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
153      * preferences info that are found in `value` to the output vector.
154      *
155      * @param source This string must be "unitPreferenceData": the resource that
156      * this class supports reading.
157      * @param value The "unitPreferenceData" resource, containing unit
158      * preferences data.
159      * @param noFallback Ignored.
160      * @param status The standard ICU error code output parameter. Note: if an
161      * error is returned, outPrefs and outMetadata may be inconsistent.
162      */
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)163     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
164         if (U_FAILURE(status)) { return; }
165         if (uprv_strcmp(key, "unitPreferenceData") != 0) {
166             // This is very strict, however it is the cheapest way to be sure
167             // that with `value`, we're looking at the convertUnits table.
168             status = U_ILLEGAL_ARGUMENT_ERROR;
169             return;
170         }
171         // The unitPreferenceData structure (see data/misc/units.txt) contains a
172         // hierarchy of category/usage/region, within which are a set of
173         // preferences. Hence three for-loops and another loop for the
174         // preferences themselves:
175         ResourceTable unitPreferenceDataTable = value.getTable(status);
176         const char *category;
177         for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
178             ResourceTable categoryTable = value.getTable(status);
179             const char *usage;
180             for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
181                 ResourceTable regionTable = value.getTable(status);
182                 const char *region;
183                 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
184                     // `value` now contains the set of preferences for
185                     // category/usage/region.
186                     ResourceArray unitPrefs = value.getArray(status);
187                     if (U_FAILURE(status)) { return; }
188                     int32_t prefLen = unitPrefs.getSize();
189 
190                     // Update metadata for this set of preferences.
191                     UnitPreferenceMetadata *meta = metadata->emplaceBack(
192                         category, usage, region, preferences->length(), prefLen, status);
193                     if (!meta) {
194                         status = U_MEMORY_ALLOCATION_ERROR;
195                         return;
196                     }
197                     if (U_FAILURE(status)) { return; }
198                     if (metadata->length() > 1) {
199                         // Verify that unit preferences are sorted and
200                         // without duplicates.
201                         if (!(*(*metadata)[metadata->length() - 2] <
202                               *(*metadata)[metadata->length() - 1])) {
203                             status = U_INVALID_FORMAT_ERROR;
204                             return;
205                         }
206                     }
207 
208                     // Collect the individual preferences.
209                     for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
210                         UnitPreference *up = preferences->emplaceBack();
211                         if (!up) {
212                             status = U_MEMORY_ALLOCATION_ERROR;
213                             return;
214                         }
215                         ResourceTable unitPref = value.getTable(status);
216                         if (U_FAILURE(status)) { return; }
217                         for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
218                             if (uprv_strcmp(key, "unit") == 0) {
219                                 int32_t length;
220                                 const char16_t *u = value.getString(length, status);
221                                 up->unit.appendInvariantChars(u, length, status);
222                             } else if (uprv_strcmp(key, "geq") == 0) {
223                                 int32_t length;
224                                 const char16_t *g = value.getString(length, status);
225                                 CharString geq;
226                                 geq.appendInvariantChars(g, length, status);
227                                 DecimalQuantity dq;
228                                 dq.setToDecNumber(geq.data(), status);
229                                 up->geq = dq.toDouble();
230                             } else if (uprv_strcmp(key, "skeleton") == 0) {
231                                 up->skeleton = value.getUnicodeString(status);
232                             }
233                         }
234                     }
235                 }
236             }
237         }
238     }
239 
240   private:
241     MaybeStackVector<UnitPreference> *preferences;
242     MaybeStackVector<UnitPreferenceMetadata> *metadata;
243 };
244 
binarySearch(const MaybeStackVector<UnitPreferenceMetadata> * metadata,const UnitPreferenceMetadata & desired,bool * foundCategory,bool * foundUsage,bool * foundRegion,UErrorCode & status)245 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
246                      const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
247                      bool *foundRegion, UErrorCode &status) {
248     if (U_FAILURE(status)) { return -1; }
249     int32_t start = 0;
250     int32_t end = metadata->length();
251     *foundCategory = false;
252     *foundUsage = false;
253     *foundRegion = false;
254     while (start < end) {
255         int32_t mid = (start + end) / 2;
256         int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
257         if (cmp < 0) {
258             start = mid + 1;
259         } else if (cmp > 0) {
260             end = mid;
261         } else {
262             return mid;
263         }
264     }
265     return -1;
266 }
267 
268 /**
269  * Finds the UnitPreferenceMetadata instance that matches the given category,
270  * usage and region: if missing, region falls back to "001", and usage
271  * repeatedly drops tailing components, eventually trying "default"
272  * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
273  *
274  * @param metadata The full list of UnitPreferenceMetadata instances.
275  * @param category The category to search for. See getUnitCategory().
276  * @param usage The usage for which formatting preferences is needed. If the
277  * given usage is not known, automatic fallback occurs, see function description
278  * above.
279  * @param region The region for which preferences are needed. If there are no
280  * region-specific preferences, this function automatically falls back to the
281  * "001" region (global).
282  * @param status The standard ICU error code output parameter.
283  *   * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
284  *   * If fallback to "default" or "001" didn't resolve, status will be
285  *     U_MISSING_RESOURCE.
286  * @return The index into the metadata vector which represents the appropriate
287  * preferences. If appropriate preferences are not found, -1 is returned.
288  */
getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> * metadata,StringPiece category,StringPiece usage,StringPiece region,UErrorCode & status)289 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
290                                    StringPiece category, StringPiece usage, StringPiece region,
291                                    UErrorCode &status) {
292     if (U_FAILURE(status)) { return -1; }
293     bool foundCategory, foundUsage, foundRegion;
294     UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
295     int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
296     if (U_FAILURE(status)) { return -1; }
297     if (idx >= 0) { return idx; }
298     if (!foundCategory) {
299         // TODO: failures can happen if units::getUnitCategory returns a category
300         // that does not appear in unitPreferenceData. Do we want a unit test that
301         // checks unitPreferenceData has full coverage of categories? Or just trust
302         // CLDR?
303         status = U_ILLEGAL_ARGUMENT_ERROR;
304         return -1;
305     }
306     U_ASSERT(foundCategory);
307     while (!foundUsage) {
308         int32_t lastDashIdx = desired.usage.lastIndexOf('-');
309         if (lastDashIdx > 0) {
310             desired.usage.truncate(lastDashIdx);
311         } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
312             desired.usage.truncate(0).append("default", status);
313         } else {
314             // "default" is not supposed to be missing for any valid category.
315             status = U_MISSING_RESOURCE_ERROR;
316             return -1;
317         }
318         idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
319         if (U_FAILURE(status)) { return -1; }
320     }
321     U_ASSERT(foundCategory);
322     U_ASSERT(foundUsage);
323     if (!foundRegion) {
324         if (uprv_strcmp(desired.region.data(), "001") != 0) {
325             desired.region.truncate(0).append("001", status);
326             idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
327         }
328         if (!foundRegion) {
329             // "001" is not supposed to be missing for any valid usage.
330             status = U_MISSING_RESOURCE_ERROR;
331             return -1;
332         }
333     }
334     U_ASSERT(foundCategory);
335     U_ASSERT(foundUsage);
336     U_ASSERT(foundRegion);
337     U_ASSERT(idx >= 0);
338     return idx;
339 }
340 
341 } // namespace
342 
UnitPreferenceMetadata(StringPiece category,StringPiece usage,StringPiece region,int32_t prefsOffset,int32_t prefsCount,UErrorCode & status)343 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
344                                                StringPiece region, int32_t prefsOffset,
345                                                int32_t prefsCount, UErrorCode &status) {
346     this->category.append(category, status);
347     this->usage.append(usage, status);
348     this->region.append(region, status);
349     this->prefsOffset = prefsOffset;
350     this->prefsCount = prefsCount;
351 }
352 
compareTo(const UnitPreferenceMetadata & other) const353 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
354     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
355     if (cmp == 0) {
356         cmp = uprv_strcmp(usage.data(), other.usage.data());
357     }
358     if (cmp == 0) {
359         cmp = uprv_strcmp(region.data(), other.region.data());
360     }
361     return cmp;
362 }
363 
compareTo(const UnitPreferenceMetadata & other,bool * foundCategory,bool * foundUsage,bool * foundRegion) const364 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
365                                           bool *foundUsage, bool *foundRegion) const {
366     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
367     if (cmp == 0) {
368         *foundCategory = true;
369         cmp = uprv_strcmp(usage.data(), other.usage.data());
370     }
371     if (cmp == 0) {
372         *foundUsage = true;
373         cmp = uprv_strcmp(region.data(), other.region.data());
374     }
375     if (cmp == 0) {
376         *foundRegion = true;
377     }
378     return cmp;
379 }
380 
381 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
getAllConversionRates(MaybeStackVector<ConversionRateInfo> & result,UErrorCode & status)382 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
383     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
384     ConversionRateDataSink sink(&result);
385     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
386 }
387 
extractConversionInfo(StringPiece source,UErrorCode & status) const388 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
389                                                                  UErrorCode &status) const {
390     for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
391         if (conversionInfo_[i]->sourceUnit == source) return conversionInfo_[i];
392     }
393 
394     status = U_INTERNAL_PROGRAM_ERROR;
395     return nullptr;
396 }
397 
UnitPreferences(UErrorCode & status)398 U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
399     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
400     UnitPreferencesSink sink(&unitPrefs_, &metadata_);
401     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
402 }
403 
getKeyWordValue(const Locale & locale,StringPiece kw,UErrorCode & status)404 CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) {
405     if (U_FAILURE(status)) { return {}; }
406     auto result = locale.getKeywordValue<CharString>(kw, status);
407     if (U_SUCCESS(status) && result.isEmpty()) {
408         status = U_MISSING_RESOURCE_ERROR;
409     }
410     return result;
411 }
412 
413 MaybeStackVector<UnitPreference>
getPreferencesFor(StringPiece category,StringPiece usage,const Locale & locale,UErrorCode & status) const414     U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
415                                                   const Locale &locale, UErrorCode &status) const {
416 
417     MaybeStackVector<UnitPreference> result;
418 
419     // TODO: remove this once all the categories are allowed.
420     // WARNING: when this is removed please make sure to keep the "fahrenhe" => "fahrenheit" mapping
421     UErrorCode internalMuStatus = U_ZERO_ERROR;
422     if (category.compare("temperature") == 0) {
423         CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus);
424         if (U_SUCCESS(internalMuStatus)) {
425             // The value for -u-mu- is `fahrenhe`, but CLDR and everything else uses `fahrenheit`
426             if (localeUnitCharString == "fahrenhe") {
427                 localeUnitCharString = CharString("fahrenheit", status);
428             }
429             // TODO: use the unit category as Java especially when all the categories are allowed..
430             if (localeUnitCharString == "celsius"
431                 || localeUnitCharString == "fahrenheit"
432                 || localeUnitCharString == "kelvin"
433             ) {
434                 UnitPreference unitPref;
435                 unitPref.unit.append(localeUnitCharString, status);
436                 result.emplaceBackAndCheckErrorCode(status, unitPref);
437                 return result;
438             }
439         }
440     }
441 
442     CharString region = ulocimp_getRegionForSupplementalData(locale.getName(), true, status);
443 
444     // Check the locale system tag, e.g `ms=metric`.
445     UErrorCode internalMeasureTagStatus = U_ZERO_ERROR;
446     CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus);
447     bool isLocaleSystem = false;
448     if (U_SUCCESS(internalMeasureTagStatus) && (localeSystem == "metric" || localeSystem == "ussystem" || localeSystem == "uksystem")) {
449         isLocaleSystem = true;
450     }
451 
452     int32_t idx =
453         getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
454     if (U_FAILURE(status)) {
455         return result;
456     }
457 
458     U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
459     const UnitPreferenceMetadata *m = metadata_[idx];
460 
461     if (isLocaleSystem) {
462         // if the locale ID specifies a measurment system, check if ALL of the units we got back
463         // are members of that system (or are "metric_adjacent", which we consider to match all
464         // the systems)
465         bool unitsMatchSystem = true;
466         ConversionRates rates(status);
467         for (int32_t i = 0; unitsMatchSystem && i < m->prefsCount; i++) {
468             const UnitPreference& unitPref = *(unitPrefs_[i + m->prefsOffset]);
469             MeasureUnitImpl measureUnit = MeasureUnitImpl::forIdentifier(unitPref.unit.data(), status);
470             for (int32_t j = 0; unitsMatchSystem && j < measureUnit.singleUnits.length(); j++) {
471                 const SingleUnitImpl* singleUnit = measureUnit.singleUnits[j];
472                 const ConversionRateInfo* rateInfo = rates.extractConversionInfo(singleUnit->getSimpleUnitID(), status);
473                 CharString systems(rateInfo->systems, status);
474                 if (!systems.contains("metric_adjacent")) { // "metric-adjacent" is considered to match all the locale systems
475                     if (!systems.contains(localeSystem.data())) {
476                         unitsMatchSystem = false;
477                     }
478                 }
479             }
480         }
481 
482         // if any of the units we got back above don't match the mearurement system the locale ID asked for,
483         // throw out the region and just load the units for the base region for the requested measurement system
484         if (!unitsMatchSystem) {
485             region.clear();
486             if (localeSystem == "ussystem") {
487                 region.append("US", status);
488             } else if (localeSystem == "uksystem") {
489                 region.append("GB", status);
490             } else {
491                 region.append("001", status);
492             }
493             idx = getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
494             if (U_FAILURE(status)) {
495                 return result;
496             }
497 
498             m = metadata_[idx];
499         }
500     }
501 
502     for (int32_t i = 0; i < m->prefsCount; i++) {
503         result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset]));
504     }
505     return result;
506 }
507 
508 } // namespace units
509 U_NAMESPACE_END
510 
511 #endif /* #if !UCONFIG_NO_FORMATTING */
512