xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/NistUnits.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Splitter;
5 import com.google.common.collect.ImmutableMap;
6 import com.google.common.collect.ImmutableMultimap;
7 import com.google.common.collect.ImmutableSet;
8 import com.google.common.collect.ImmutableSortedSet;
9 import com.google.common.collect.LinkedHashMultimap;
10 import com.google.common.collect.Multimap;
11 import com.google.common.collect.TreeMultimap;
12 import com.ibm.icu.util.ICUUncheckedIOException;
13 import com.ibm.icu.util.Output;
14 import java.io.BufferedReader;
15 import java.io.IOException;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Collections;
19 import java.util.LinkedHashSet;
20 import java.util.List;
21 import java.util.Locale;
22 import java.util.Map;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import java.util.regex.Matcher;
27 import java.util.regex.Pattern;
28 import java.util.stream.Collectors;
29 import java.util.stream.Stream;
30 import org.unicode.cldr.util.CldrUtility;
31 import org.unicode.cldr.util.Rational;
32 import org.unicode.cldr.util.UnitConverter.ConversionInfo;
33 import org.unicode.cldr.util.UnitConverter.TargetInfo;
34 import org.unicode.cldr.util.UnitConverter.UnitSystem;
35 
36 final class NistUnits {
37     private static final boolean DEBUG = false;
38 
39     public static final String NIST_CONVERSIONS = "nistConversions";
40     public static final String NIST_DERIVED_UNITS = "nistDerivedUnits";
41     public static final String NIST_BASE_UNITS = "nistBaseUnits";
42     public static final String NIST_ACCEPTED_UNITS = "nistAcceptedUnits";
43 
44     static final Splitter SPLIT_MIDDOT = Splitter.on('·').trimResults();
45     static final Splitter SPLIT_TABS = Splitter.on('\t').trimResults();
46     static final Splitter SPLIT_COMMAS = Splitter.on(',').trimResults();
47     static final Splitter SPLIT_PARENS = Splitter.on('(').trimResults();
48 
49     static final Pattern flatExponent = Pattern.compile("([a-zA-Z]+)(-?[0-9]+)?");
50     static final Pattern footnotes = Pattern.compile(" \\d+$");
51     static final Pattern firstPart = Pattern.compile("([^\\[(,]*)(.*)");
52     static final Pattern temperature = Pattern.compile("\\((\\d+)(?:\\.(\\d+))? °([CF])\\)");
53     static final Pattern addHyphens = Pattern.compile("[- ]+");
54     static final Pattern finalParens = Pattern.compile("\\(([^()]+)\\)$");
55 
56     static final Set<UnitSystem> SI_METRIC = Set.of(UnitSystem.si, UnitSystem.metric);
57 
58     static final Multimap<String, String> unitToQuantity;
59     static final Map<String, TargetInfo> derivedUnitToConversion;
60     static final Set<ExternalUnitConversionData> externalConversionData;
61     static final Multimap<String, String> idChanges;
62     static final Set<String> skipping;
63     static final Multimap<String, String> unitToSystems = null;
64     static final Map<String, ExternalUnitConversionData> unitToData;
65     static final Set<String> SiAcceptable;
66 
67     // HACK for temperature
68     /**
69      * degree Celsius (°C) kelvin (K) T/K = t/°C + 273.15 degree centigrade 15 degree Celsius (°C)
70      * t/°C ≈ t/deg. cent. degree Fahrenheit (°F) degree Celsius (°C) t/°C = (t/°F - 32)/1.8 degree
71      * Fahrenheit (°F) kelvin (K) T/K = (t/°F + 459.67)/1.8 degree Rankine (°R) kelvin (K) T/K =
72      * (T/°R)/1.8 kelvin (K) degree Celsius (°C) t/°C = T/K - 273.15
73      */
74     static final Map<String, Rational> temperatureHack =
75             ImmutableMap.of(
76                     "fahrenheit|celsius", Rational.of("-32/1.8"),
77                     "fahrenheit|kelvin", Rational.of("459.67/1.8"),
78                     "celsius|kelvin", Rational.of("273.15"));
79 
80     static {
81         Multimap<String, String> _idChanges = LinkedHashMultimap.create();
82         Set<String> _skipping = new LinkedHashSet<>();
83         List<ExternalUnitConversionData> _externalConversionData = new ArrayList<>();
84         Multimap<String, String> _unitToQuantity = TreeMultimap.create();
85         Map<String, TargetInfo> unitToTargetInfo = new TreeMap<>();
86         Map<String, ExternalUnitConversionData> _unitToData = new TreeMap<>();
87         Set<String> _siAcceptable = new TreeSet<>();
88 
load( _externalConversionData, _unitToQuantity, unitToTargetInfo, _idChanges, _skipping, _unitToData, _siAcceptable)89         load(
90                 _externalConversionData,
91                 _unitToQuantity,
92                 unitToTargetInfo,
93                 _idChanges,
94                 _skipping,
95                 _unitToData,
96                 _siAcceptable);
97 
98         skipping = ImmutableSet.copyOf(_skipping);
99         idChanges = ImmutableMultimap.copyOf(_idChanges);
100         externalConversionData = ImmutableSortedSet.copyOf(_externalConversionData);
101         unitToData = ImmutableMap.copyOf(_unitToData);
102         unitToQuantity = ImmutableMultimap.copyOf(_unitToQuantity);
103         derivedUnitToConversion = ImmutableMap.copyOf(unitToTargetInfo);
104         SiAcceptable = ImmutableSet.copyOf(_siAcceptable);
105         if (DEBUG) {
106             for (ExternalUnitConversionData item : externalConversionData) {
107                 System.out.println(item);
108             }
109         }
110     }
111 
load( List<ExternalUnitConversionData> _externalConversionData, Multimap<String, String> _unitToQuantity, Map<String, TargetInfo> unitToTargetInfo, Multimap<String, String> _idChanges, Set<String> _skipping, Map<String, ExternalUnitConversionData> _unitToData, Set<String> _siAcceptable)112     static void load(
113             List<ExternalUnitConversionData> _externalConversionData,
114             Multimap<String, String> _unitToQuantity,
115             Map<String, TargetInfo> unitToTargetInfo,
116             Multimap<String, String> _idChanges,
117             Set<String> _skipping,
118             Map<String, ExternalUnitConversionData> _unitToData,
119             Set<String> _siAcceptable) {
120         try {
121             // Get the SI acceptable units
122             // Unfortunately, this page has inconsistent formats, so we just mine it for
123             // the systems
124             try (BufferedReader in = CldrUtility.getUTF8Data("external/nistAcceptedUnits.txt")) {
125                 try (Stream<String> s = in.lines()) {
126                     for (String line : (Iterable<String>) s::iterator) {
127                         if (line.startsWith("#") || line.isBlank()) {
128                             continue;
129                         }
130                         List<String> parts = SPLIT_TABS.splitToList(line);
131                         _siAcceptable.add(parts.get(0).toLowerCase(Locale.ROOT).replace(' ', '-'));
132                     }
133                 }
134             }
135             // There is also no conversion data for the following.
136             // The conversion value for daltons is given in a footnote
137             // The only reason we need 'gram' is to get the systems
138             _externalConversionData.add(
139                     new ExternalUnitConversionData(
140                             "mass",
141                             "dalton",
142                             "Da",
143                             "kilogram",
144                             Rational.of("1.660538782E-27"),
145                             null,
146                             Set.of(UnitSystem.si_acceptable),
147                             "HACK",
148                             "hack"));
149             _externalConversionData.add(
150                     new ExternalUnitConversionData(
151                             "mass",
152                             "gram",
153                             "g",
154                             "kilogram",
155                             Rational.of("1E-3"),
156                             null,
157                             Set.of(UnitSystem.si),
158                             "HACK",
159                             "hack"));
160 
161             try (BufferedReader in = CldrUtility.getUTF8Data("external/nistConversions.txt")) {
162                 String quantity = null;
163                 try (Stream<String> s = in.lines()) {
164                     for (String line : (Iterable<String>) s::iterator) {
165                         if (line.startsWith("#")
166                                 || line.isBlank()
167                                 || line.equals("To convert from\tto\tMultiply by")
168                                 || line.startsWith(
169                                         "degree Fahrenheit hour square foot per British thermal unitth inch") // bad NIST data
170                         ) {
171                             continue;
172                         }
173                         List<String> parts = SPLIT_TABS.splitToList(line);
174                         switch (parts.size()) {
175                             case 1:
176                                 quantity = parts.get(0);
177                                 break;
178                             case 4:
179                                 Rational factor =
180                                         Rational.of((parts.get(2) + parts.get(3)).replace(" ", ""));
181                                 ExternalUnitConversionData data =
182                                         getExternalUnitConversionData(
183                                                 quantity,
184                                                 parts.get(0),
185                                                 null,
186                                                 parts.get(1),
187                                                 factor,
188                                                 null,
189                                                 _siAcceptable,
190                                                 NIST_CONVERSIONS,
191                                                 line,
192                                                 _idChanges);
193                                 _externalConversionData.add(data);
194                                 break;
195                             default:
196                                 _skipping.add(line);
197                         }
198                     }
199                 }
200             }
201 
202             Map<String, String> _symbolToUnit = new TreeMap<>();
203             try (BufferedReader in = CldrUtility.getUTF8Data("external/nistBaseUnits.txt")) {
204                 try (Stream<String> s = in.lines()) {
205                     for (String line : (Iterable<String>) s::iterator) {
206                         if (line.startsWith("#") || line.isBlank()) {
207                             continue;
208                         }
209                         List<String> parts = SPLIT_TABS.splitToList(line);
210                         // #Base quantity  Name    Symbol
211                         String quantity2 = parts.get(0);
212                         String name = parts.get(1);
213                         String symbol = parts.get(2);
214                         switch (parts.size()) {
215                             case 3:
216                                 _symbolToUnit.put(symbol, name);
217                                 _unitToQuantity.put(name, quantity2);
218                                 ExternalUnitConversionData data =
219                                         getExternalUnitConversionData(
220                                                 quantity2, //
221                                                 name, //
222                                                 symbol,
223                                                 name, //
224                                                 Rational.ONE, //
225                                                 null, //
226                                                 _siAcceptable, //
227                                                 NIST_BASE_UNITS,
228                                                 line,
229                                                 _idChanges);
230                                 _externalConversionData.add(data);
231                                 break;
232                         }
233                     }
234                 }
235             }
236 
237             try (BufferedReader in = CldrUtility.getUTF8Data("external/nistDerivedUnits.txt")) {
238                 try (Stream<String> s = in.lines()) {
239                     for (String line : (Iterable<String>) s::iterator) {
240                         if (line.startsWith("#") || line.isBlank()) {
241                             continue;
242                         }
243                         List<String> parts = SPLIT_TABS.splitToList(line);
244                         // #Quantity   Special Name    Special symbol  Expression in terms of other
245                         // SI units   Expression in terms of SI base units
246 
247                         String quantity = parts.get(0);
248                         List<String> quantities =
249                                 SPLIT_COMMAS.splitToList(quantity).stream()
250                                         .map(
251                                                 x ->
252                                                         SPLIT_PARENS
253                                                                 .split(parts.get(0))
254                                                                 .iterator()
255                                                                 .next())
256                                         .collect(Collectors.toList());
257                         quantity = Joiner.on(", ").join(quantities);
258 
259                         String name = SPLIT_PARENS.split(parts.get(1)).iterator().next();
260                         if (name.equals("degree Celsius")) {
261                             name = "celsius";
262                         }
263 
264                         String symbol = parts.get(2);
265                         String expressionInOtherSymbols = parts.get(4);
266                         String expressionInBaseSymbols = parts.get(4);
267                         _symbolToUnit.put(symbol, name);
268                         _unitToQuantity.putAll(name, quantities);
269 
270                         final String targetUnit =
271                                 getUnitFromSymbols(expressionInBaseSymbols, _symbolToUnit);
272                         unitToTargetInfo.put(
273                                 name,
274                                 new TargetInfo(
275                                         targetUnit,
276                                         new ConversionInfo(Rational.ONE, Rational.ZERO),
277                                         Collections.emptyMap()));
278 
279                         ExternalUnitConversionData data =
280                                 getExternalUnitConversionData(
281                                         quantity, //
282                                         name, //
283                                         symbol,
284                                         targetUnit, //
285                                         Rational.ONE, //
286                                         null, //
287                                         _siAcceptable, //
288                                         NIST_DERIVED_UNITS,
289                                         line,
290                                         _idChanges);
291                         _externalConversionData.add(data);
292                     }
293                 }
294             }
295             for (ExternalUnitConversionData data : _externalConversionData) {
296                 _unitToData.put(data.source, data);
297             }
298         } catch (IOException e) {
299             throw new ICUUncheckedIOException(e);
300         }
301     }
302 
systems( String unit, Set<String> _siAcceptable, UnitSystem... system)303     private static Set<UnitSystem> systems(
304             String unit, Set<String> _siAcceptable, UnitSystem... system) {
305         TreeSet<UnitSystem> result = new TreeSet<>(Arrays.asList(system));
306         if (_siAcceptable.contains(unit)) {
307             result.add(UnitSystem.si_acceptable);
308         }
309         return ImmutableSet.copyOf(result);
310     }
311 
getUnitFromSymbols( String expressionInBaseSymbols, Map<String, String> symbolToUnit)312     public static String getUnitFromSymbols(
313             String expressionInBaseSymbols, Map<String, String> symbolToUnit) {
314         String result;
315         // handle the irregular formats
316         if (expressionInBaseSymbols.equals("m/m")) {
317             result = "meter-per-meter";
318         } else if (expressionInBaseSymbols.equals("m2/m2")) {
319             result = "square-meter-per-square-meter";
320         } else {
321             // m2 · kg · s-3 · A-1
322             StringBuilder numerator = new StringBuilder();
323             StringBuilder denominator = new StringBuilder();
324             for (String part : SPLIT_MIDDOT.split(expressionInBaseSymbols)) {
325                 final Matcher parts = flatExponent.matcher(part);
326                 if (!parts.matches()) {
327                     throw new IllegalArgumentException("bad symbol: " + part);
328                 }
329                 String unit = symbolToUnit.get(parts.group(1));
330                 String pow = null;
331                 int power = 0;
332                 final String exponent = parts.group(2);
333                 if (exponent != null) {
334                     power = Integer.parseInt(exponent);
335                     switch (Math.abs(power)) {
336                         case 0:
337                         case 1:
338                             break; // skip
339                         case 2:
340                             pow = "square-";
341                             break;
342                         case 3:
343                             pow = "cubic-";
344                             break;
345                         default:
346                             pow = "pow" + Math.abs(power) + "-";
347                             break;
348                     }
349                 }
350                 StringBuilder target = power >= 0 ? numerator : denominator;
351                 if (target.length() != 0) {
352                     target.append('-');
353                 }
354                 if (pow != null) {
355                     target.append(pow);
356                 }
357                 target.append(unit);
358             }
359             result =
360                     (numerator.length() == 0 ? "" : numerator)
361                             + (denominator.length() == 0
362                                     ? ""
363                                     : (numerator.length() == 0 ? "per-" : "-per-") + denominator);
364         }
365         if (DEBUG) System.out.println(expressionInBaseSymbols + " => " + result);
366         return result;
367     }
368 
369     // https://www.nist.gov/pml/special-publication-811/nist-guide-si-appendix-b-conversion-factors/nist-guide-si-appendix-b9
370 
getExternalUnitConversionData( String quantity, String sourceRaw, String symbolRaw, String targetRaw, Rational factor, Rational offset, Set<String> acceptable, String from, String line, Multimap<String, String> changes)371     public static ExternalUnitConversionData getExternalUnitConversionData(
372             String quantity,
373             String sourceRaw,
374             String symbolRaw,
375             String targetRaw,
376             Rational factor,
377             Rational offset,
378             Set<String> acceptable,
379             String from,
380             String line,
381             Multimap<String, String> changes) {
382         LinkedHashSet<String> sourceChanges = new LinkedHashSet<>();
383         Output<String> symbolOut = new Output<>();
384         symbolOut.value = symbolRaw;
385         String source = extractUnit(quantity, sourceRaw, sourceChanges, symbolOut);
386         String symbol = symbolOut.value;
387         changes.putAll(source, sourceChanges);
388 
389         LinkedHashSet<String> targetChanges = new LinkedHashSet<>();
390         String target = extractUnit(quantity, targetRaw, targetChanges, symbolOut);
391         changes.putAll(target, targetChanges);
392 
393         offset = temperatureHack.get(source + "|" + target);
394         TreeSet<UnitSystem> systems = new TreeSet<>();
395         if (acceptable.contains(source)) {
396             systems.add(UnitSystem.si_acceptable);
397         }
398         switch (from) {
399             case NIST_BASE_UNITS:
400                 systems.addAll(SI_METRIC);
401                 break;
402             case NIST_DERIVED_UNITS:
403                 systems.addAll(SI_METRIC);
404                 break;
405         }
406         return new ExternalUnitConversionData(
407                 quantity, source, symbol, target, factor, offset, systems, from, line);
408     }
409 
extractUnit( String quantity, String source, Set<String> changes, Output<String> symbolOut)410     private static String extractUnit(
411             String quantity, String source, Set<String> changes, Output<String> symbolOut) {
412         // drop footnotes
413         source = replace(footnotes, source, "", changes);
414 
415         if (source.contains("(15 °C)")) {
416             int debug = 0;
417         }
418         source = replace(temperature, source, " $1$2$3", changes);
419 
420         String oldSource = source;
421         source = source.replace("(sidereal)", "sidereal");
422         source = source.replace("(mean)", "mean");
423         source = source.replace("(printer's)", "printer");
424 
425         source = source.replace("therm (U.S.)", "therm-us");
426         source = source.replace("(U.S.)", "");
427 
428         source = source.replace("(long, 112 lb)", " long");
429         source = source.replace("(troy or apothecary)", "troy");
430         source = source.replace("(U.S. survey)", "survey");
431         source = source.replace("(tropical)", "tropical");
432         source = source.replace("(based on U.S. survey foot)", "survey");
433         source = source.replace("(avoirdupois)", "");
434         source = source.replace("(metric)", "metric");
435         source = source.replace("(electric)", "electric");
436         source = source.replace("(water)", "water");
437         source = source.replace("(boiler)", "boiler");
438         source = source.replace("(0.001 in)", "inch");
439         source = source.replace("(365 days)", "365");
440 
441         source = source.replace("(U.K.)", "imperial");
442         source = source.replace("[Canadian and U.K. (Imperial)]", "imperial");
443         source = source.replace("[Canadian and U.K. fluid (Imperial)]", "fluid imperial");
444 
445         source = source.replace("second squared", "square second");
446         source = source.replace("foot squared", "square foot");
447         source = source.replace("meter squared", "square meter");
448         source = source.replace("inch squared", "square inch");
449 
450         source = source.replace("mile, nautical", "nautical-mile");
451         source = source.replace(", technical", " technical");
452         source = source.replace(", kilogram (nutrition)", " nutrition");
453         source = source.replace("(nutrition)", "nutrition");
454         source = source.replace(", metric", " metric");
455         source = source.replace(", assay", " assay");
456         source = source.replace(", long", " long");
457         source = source.replace(", register", " register");
458 
459         source = source.replace("foot to the fourth power", "pow4-foot");
460         source = source.replace("inch to the fourth power", "pow4-inch");
461         source = source.replace("meter to the fourth power", "pow4-meter");
462 
463         source = source.replace("Britsh", "british");
464         source = source.replace("reciprocal", "per");
465 
466         source = replaceWhole(oldSource, source, changes);
467 
468         final Matcher match = firstPart.matcher(source);
469         match.matches();
470         String newSource = match.group(1).trim();
471         String remainder = match.group(2);
472         if (symbolOut.value == null) {
473             Matcher endParens = finalParens.matcher(remainder);
474             if (endParens.find()) {
475                 symbolOut.value = endParens.group(1).trim();
476             }
477         }
478         source = replaceWhole(source, newSource, changes);
479 
480         if (remainder.contains("dry")) {
481             source = replaceWhole(source, source + " dry", changes);
482         } else if (remainder.contains("fluid")) {
483             source = replaceWhole(source, source + " fluid", changes);
484         }
485 
486         if (source.contains("squared")) {
487             System.out.println("*FIX squared: " + source);
488         }
489 
490         oldSource = source;
491 
492         source = source.replace("degree ", "");
493         source = source.replace("metric-ton", "tonne");
494         source = source.replace("ton-metric", "tonne");
495         source = source.replace("psi", "pound-force-per-square-inch");
496         source = source.replace("ounce fluid", "fluid-ounce");
497         source = source.replace("unitthi", "unit");
498         source = source.replace("calorieth", "calorie");
499         source = source.replace("acceleration of free fall", "g-force");
500         source = source.replace("of mercury", "ofhg");
501         if (quantity.equals("ANGLE")) {
502             source = source.replace("minute", "arc-minute");
503             source = source.replace("second", "arc-second");
504         }
505         source = source.replace("British thermal unitth", "british thermal unit");
506         source = source.replace("British thermal unitIT", "british thermal unit it");
507         source = source.replace("calorieIT", "calorie it");
508 
509         source = replaceWhole(oldSource, source, changes);
510 
511         // don't record these
512         source = source.toLowerCase(Locale.ROOT);
513         source = addHyphens.matcher(source.trim()).replaceAll("-");
514 
515         return source;
516     }
517 
replaceWhole(String source, String newSource, Set<String> changes)518     private static String replaceWhole(String source, String newSource, Set<String> changes) {
519         if (!newSource.equals(source)) {
520             changes.add(" ⟹ " + newSource);
521         }
522         return newSource;
523     }
524 
replace( Pattern pattern, String source, String replacement, Set<String> changes)525     private static String replace(
526             Pattern pattern, String source, String replacement, Set<String> changes) {
527         String newSource = pattern.matcher(source).replaceAll(replacement);
528         if (!newSource.equals(source)) {
529             changes.add(" ⟹ " + newSource);
530         }
531         return newSource;
532     }
533 }
534