1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Splitter; 5 import com.google.common.collect.ImmutableMap; 6 import com.google.common.collect.ImmutableMultimap; 7 import com.google.common.collect.ImmutableSet; 8 import com.google.common.collect.ImmutableSortedSet; 9 import com.google.common.collect.LinkedHashMultimap; 10 import com.google.common.collect.Multimap; 11 import com.google.common.collect.TreeMultimap; 12 import com.ibm.icu.util.ICUUncheckedIOException; 13 import com.ibm.icu.util.Output; 14 import java.io.BufferedReader; 15 import java.io.IOException; 16 import java.util.ArrayList; 17 import java.util.Arrays; 18 import java.util.Collections; 19 import java.util.LinkedHashSet; 20 import java.util.List; 21 import java.util.Locale; 22 import java.util.Map; 23 import java.util.Set; 24 import java.util.TreeMap; 25 import java.util.TreeSet; 26 import java.util.regex.Matcher; 27 import java.util.regex.Pattern; 28 import java.util.stream.Collectors; 29 import java.util.stream.Stream; 30 import org.unicode.cldr.util.CldrUtility; 31 import org.unicode.cldr.util.Rational; 32 import org.unicode.cldr.util.UnitConverter.ConversionInfo; 33 import org.unicode.cldr.util.UnitConverter.TargetInfo; 34 import org.unicode.cldr.util.UnitConverter.UnitSystem; 35 36 final class NistUnits { 37 private static final boolean DEBUG = false; 38 39 public static final String NIST_CONVERSIONS = "nistConversions"; 40 public static final String NIST_DERIVED_UNITS = "nistDerivedUnits"; 41 public static final String NIST_BASE_UNITS = "nistBaseUnits"; 42 public static final String NIST_ACCEPTED_UNITS = "nistAcceptedUnits"; 43 44 static final Splitter SPLIT_MIDDOT = Splitter.on('·').trimResults(); 45 static final Splitter SPLIT_TABS = Splitter.on('\t').trimResults(); 46 static final Splitter SPLIT_COMMAS = Splitter.on(',').trimResults(); 47 static final Splitter SPLIT_PARENS = Splitter.on('(').trimResults(); 48 49 static final Pattern flatExponent = Pattern.compile("([a-zA-Z]+)(-?[0-9]+)?"); 50 static final Pattern footnotes = Pattern.compile(" \\d+$"); 51 static final Pattern firstPart = Pattern.compile("([^\\[(,]*)(.*)"); 52 static final Pattern temperature = Pattern.compile("\\((\\d+)(?:\\.(\\d+))? °([CF])\\)"); 53 static final Pattern addHyphens = Pattern.compile("[- ]+"); 54 static final Pattern finalParens = Pattern.compile("\\(([^()]+)\\)$"); 55 56 static final Set<UnitSystem> SI_METRIC = Set.of(UnitSystem.si, UnitSystem.metric); 57 58 static final Multimap<String, String> unitToQuantity; 59 static final Map<String, TargetInfo> derivedUnitToConversion; 60 static final Set<ExternalUnitConversionData> externalConversionData; 61 static final Multimap<String, String> idChanges; 62 static final Set<String> skipping; 63 static final Multimap<String, String> unitToSystems = null; 64 static final Map<String, ExternalUnitConversionData> unitToData; 65 static final Set<String> SiAcceptable; 66 67 // HACK for temperature 68 /** 69 * degree Celsius (°C) kelvin (K) T/K = t/°C + 273.15 degree centigrade 15 degree Celsius (°C) 70 * t/°C ≈ t/deg. cent. degree Fahrenheit (°F) degree Celsius (°C) t/°C = (t/°F - 32)/1.8 degree 71 * Fahrenheit (°F) kelvin (K) T/K = (t/°F + 459.67)/1.8 degree Rankine (°R) kelvin (K) T/K = 72 * (T/°R)/1.8 kelvin (K) degree Celsius (°C) t/°C = T/K - 273.15 73 */ 74 static final Map<String, Rational> temperatureHack = 75 ImmutableMap.of( 76 "fahrenheit|celsius", Rational.of("-32/1.8"), 77 "fahrenheit|kelvin", Rational.of("459.67/1.8"), 78 "celsius|kelvin", Rational.of("273.15")); 79 80 static { 81 Multimap<String, String> _idChanges = LinkedHashMultimap.create(); 82 Set<String> _skipping = new LinkedHashSet<>(); 83 List<ExternalUnitConversionData> _externalConversionData = new ArrayList<>(); 84 Multimap<String, String> _unitToQuantity = TreeMultimap.create(); 85 Map<String, TargetInfo> unitToTargetInfo = new TreeMap<>(); 86 Map<String, ExternalUnitConversionData> _unitToData = new TreeMap<>(); 87 Set<String> _siAcceptable = new TreeSet<>(); 88 load( _externalConversionData, _unitToQuantity, unitToTargetInfo, _idChanges, _skipping, _unitToData, _siAcceptable)89 load( 90 _externalConversionData, 91 _unitToQuantity, 92 unitToTargetInfo, 93 _idChanges, 94 _skipping, 95 _unitToData, 96 _siAcceptable); 97 98 skipping = ImmutableSet.copyOf(_skipping); 99 idChanges = ImmutableMultimap.copyOf(_idChanges); 100 externalConversionData = ImmutableSortedSet.copyOf(_externalConversionData); 101 unitToData = ImmutableMap.copyOf(_unitToData); 102 unitToQuantity = ImmutableMultimap.copyOf(_unitToQuantity); 103 derivedUnitToConversion = ImmutableMap.copyOf(unitToTargetInfo); 104 SiAcceptable = ImmutableSet.copyOf(_siAcceptable); 105 if (DEBUG) { 106 for (ExternalUnitConversionData item : externalConversionData) { 107 System.out.println(item); 108 } 109 } 110 } 111 load( List<ExternalUnitConversionData> _externalConversionData, Multimap<String, String> _unitToQuantity, Map<String, TargetInfo> unitToTargetInfo, Multimap<String, String> _idChanges, Set<String> _skipping, Map<String, ExternalUnitConversionData> _unitToData, Set<String> _siAcceptable)112 static void load( 113 List<ExternalUnitConversionData> _externalConversionData, 114 Multimap<String, String> _unitToQuantity, 115 Map<String, TargetInfo> unitToTargetInfo, 116 Multimap<String, String> _idChanges, 117 Set<String> _skipping, 118 Map<String, ExternalUnitConversionData> _unitToData, 119 Set<String> _siAcceptable) { 120 try { 121 // Get the SI acceptable units 122 // Unfortunately, this page has inconsistent formats, so we just mine it for 123 // the systems 124 try (BufferedReader in = CldrUtility.getUTF8Data("external/nistAcceptedUnits.txt")) { 125 try (Stream<String> s = in.lines()) { 126 for (String line : (Iterable<String>) s::iterator) { 127 if (line.startsWith("#") || line.isBlank()) { 128 continue; 129 } 130 List<String> parts = SPLIT_TABS.splitToList(line); 131 _siAcceptable.add(parts.get(0).toLowerCase(Locale.ROOT).replace(' ', '-')); 132 } 133 } 134 } 135 // There is also no conversion data for the following. 136 // The conversion value for daltons is given in a footnote 137 // The only reason we need 'gram' is to get the systems 138 _externalConversionData.add( 139 new ExternalUnitConversionData( 140 "mass", 141 "dalton", 142 "Da", 143 "kilogram", 144 Rational.of("1.660538782E-27"), 145 null, 146 Set.of(UnitSystem.si_acceptable), 147 "HACK", 148 "hack")); 149 _externalConversionData.add( 150 new ExternalUnitConversionData( 151 "mass", 152 "gram", 153 "g", 154 "kilogram", 155 Rational.of("1E-3"), 156 null, 157 Set.of(UnitSystem.si), 158 "HACK", 159 "hack")); 160 161 try (BufferedReader in = CldrUtility.getUTF8Data("external/nistConversions.txt")) { 162 String quantity = null; 163 try (Stream<String> s = in.lines()) { 164 for (String line : (Iterable<String>) s::iterator) { 165 if (line.startsWith("#") 166 || line.isBlank() 167 || line.equals("To convert from\tto\tMultiply by") 168 || line.startsWith( 169 "degree Fahrenheit hour square foot per British thermal unitth inch") // bad NIST data 170 ) { 171 continue; 172 } 173 List<String> parts = SPLIT_TABS.splitToList(line); 174 switch (parts.size()) { 175 case 1: 176 quantity = parts.get(0); 177 break; 178 case 4: 179 Rational factor = 180 Rational.of((parts.get(2) + parts.get(3)).replace(" ", "")); 181 ExternalUnitConversionData data = 182 getExternalUnitConversionData( 183 quantity, 184 parts.get(0), 185 null, 186 parts.get(1), 187 factor, 188 null, 189 _siAcceptable, 190 NIST_CONVERSIONS, 191 line, 192 _idChanges); 193 _externalConversionData.add(data); 194 break; 195 default: 196 _skipping.add(line); 197 } 198 } 199 } 200 } 201 202 Map<String, String> _symbolToUnit = new TreeMap<>(); 203 try (BufferedReader in = CldrUtility.getUTF8Data("external/nistBaseUnits.txt")) { 204 try (Stream<String> s = in.lines()) { 205 for (String line : (Iterable<String>) s::iterator) { 206 if (line.startsWith("#") || line.isBlank()) { 207 continue; 208 } 209 List<String> parts = SPLIT_TABS.splitToList(line); 210 // #Base quantity Name Symbol 211 String quantity2 = parts.get(0); 212 String name = parts.get(1); 213 String symbol = parts.get(2); 214 switch (parts.size()) { 215 case 3: 216 _symbolToUnit.put(symbol, name); 217 _unitToQuantity.put(name, quantity2); 218 ExternalUnitConversionData data = 219 getExternalUnitConversionData( 220 quantity2, // 221 name, // 222 symbol, 223 name, // 224 Rational.ONE, // 225 null, // 226 _siAcceptable, // 227 NIST_BASE_UNITS, 228 line, 229 _idChanges); 230 _externalConversionData.add(data); 231 break; 232 } 233 } 234 } 235 } 236 237 try (BufferedReader in = CldrUtility.getUTF8Data("external/nistDerivedUnits.txt")) { 238 try (Stream<String> s = in.lines()) { 239 for (String line : (Iterable<String>) s::iterator) { 240 if (line.startsWith("#") || line.isBlank()) { 241 continue; 242 } 243 List<String> parts = SPLIT_TABS.splitToList(line); 244 // #Quantity Special Name Special symbol Expression in terms of other 245 // SI units Expression in terms of SI base units 246 247 String quantity = parts.get(0); 248 List<String> quantities = 249 SPLIT_COMMAS.splitToList(quantity).stream() 250 .map( 251 x -> 252 SPLIT_PARENS 253 .split(parts.get(0)) 254 .iterator() 255 .next()) 256 .collect(Collectors.toList()); 257 quantity = Joiner.on(", ").join(quantities); 258 259 String name = SPLIT_PARENS.split(parts.get(1)).iterator().next(); 260 if (name.equals("degree Celsius")) { 261 name = "celsius"; 262 } 263 264 String symbol = parts.get(2); 265 String expressionInOtherSymbols = parts.get(4); 266 String expressionInBaseSymbols = parts.get(4); 267 _symbolToUnit.put(symbol, name); 268 _unitToQuantity.putAll(name, quantities); 269 270 final String targetUnit = 271 getUnitFromSymbols(expressionInBaseSymbols, _symbolToUnit); 272 unitToTargetInfo.put( 273 name, 274 new TargetInfo( 275 targetUnit, 276 new ConversionInfo(Rational.ONE, Rational.ZERO), 277 Collections.emptyMap())); 278 279 ExternalUnitConversionData data = 280 getExternalUnitConversionData( 281 quantity, // 282 name, // 283 symbol, 284 targetUnit, // 285 Rational.ONE, // 286 null, // 287 _siAcceptable, // 288 NIST_DERIVED_UNITS, 289 line, 290 _idChanges); 291 _externalConversionData.add(data); 292 } 293 } 294 } 295 for (ExternalUnitConversionData data : _externalConversionData) { 296 _unitToData.put(data.source, data); 297 } 298 } catch (IOException e) { 299 throw new ICUUncheckedIOException(e); 300 } 301 } 302 systems( String unit, Set<String> _siAcceptable, UnitSystem... system)303 private static Set<UnitSystem> systems( 304 String unit, Set<String> _siAcceptable, UnitSystem... system) { 305 TreeSet<UnitSystem> result = new TreeSet<>(Arrays.asList(system)); 306 if (_siAcceptable.contains(unit)) { 307 result.add(UnitSystem.si_acceptable); 308 } 309 return ImmutableSet.copyOf(result); 310 } 311 getUnitFromSymbols( String expressionInBaseSymbols, Map<String, String> symbolToUnit)312 public static String getUnitFromSymbols( 313 String expressionInBaseSymbols, Map<String, String> symbolToUnit) { 314 String result; 315 // handle the irregular formats 316 if (expressionInBaseSymbols.equals("m/m")) { 317 result = "meter-per-meter"; 318 } else if (expressionInBaseSymbols.equals("m2/m2")) { 319 result = "square-meter-per-square-meter"; 320 } else { 321 // m2 · kg · s-3 · A-1 322 StringBuilder numerator = new StringBuilder(); 323 StringBuilder denominator = new StringBuilder(); 324 for (String part : SPLIT_MIDDOT.split(expressionInBaseSymbols)) { 325 final Matcher parts = flatExponent.matcher(part); 326 if (!parts.matches()) { 327 throw new IllegalArgumentException("bad symbol: " + part); 328 } 329 String unit = symbolToUnit.get(parts.group(1)); 330 String pow = null; 331 int power = 0; 332 final String exponent = parts.group(2); 333 if (exponent != null) { 334 power = Integer.parseInt(exponent); 335 switch (Math.abs(power)) { 336 case 0: 337 case 1: 338 break; // skip 339 case 2: 340 pow = "square-"; 341 break; 342 case 3: 343 pow = "cubic-"; 344 break; 345 default: 346 pow = "pow" + Math.abs(power) + "-"; 347 break; 348 } 349 } 350 StringBuilder target = power >= 0 ? numerator : denominator; 351 if (target.length() != 0) { 352 target.append('-'); 353 } 354 if (pow != null) { 355 target.append(pow); 356 } 357 target.append(unit); 358 } 359 result = 360 (numerator.length() == 0 ? "" : numerator) 361 + (denominator.length() == 0 362 ? "" 363 : (numerator.length() == 0 ? "per-" : "-per-") + denominator); 364 } 365 if (DEBUG) System.out.println(expressionInBaseSymbols + " => " + result); 366 return result; 367 } 368 369 // https://www.nist.gov/pml/special-publication-811/nist-guide-si-appendix-b-conversion-factors/nist-guide-si-appendix-b9 370 getExternalUnitConversionData( String quantity, String sourceRaw, String symbolRaw, String targetRaw, Rational factor, Rational offset, Set<String> acceptable, String from, String line, Multimap<String, String> changes)371 public static ExternalUnitConversionData getExternalUnitConversionData( 372 String quantity, 373 String sourceRaw, 374 String symbolRaw, 375 String targetRaw, 376 Rational factor, 377 Rational offset, 378 Set<String> acceptable, 379 String from, 380 String line, 381 Multimap<String, String> changes) { 382 LinkedHashSet<String> sourceChanges = new LinkedHashSet<>(); 383 Output<String> symbolOut = new Output<>(); 384 symbolOut.value = symbolRaw; 385 String source = extractUnit(quantity, sourceRaw, sourceChanges, symbolOut); 386 String symbol = symbolOut.value; 387 changes.putAll(source, sourceChanges); 388 389 LinkedHashSet<String> targetChanges = new LinkedHashSet<>(); 390 String target = extractUnit(quantity, targetRaw, targetChanges, symbolOut); 391 changes.putAll(target, targetChanges); 392 393 offset = temperatureHack.get(source + "|" + target); 394 TreeSet<UnitSystem> systems = new TreeSet<>(); 395 if (acceptable.contains(source)) { 396 systems.add(UnitSystem.si_acceptable); 397 } 398 switch (from) { 399 case NIST_BASE_UNITS: 400 systems.addAll(SI_METRIC); 401 break; 402 case NIST_DERIVED_UNITS: 403 systems.addAll(SI_METRIC); 404 break; 405 } 406 return new ExternalUnitConversionData( 407 quantity, source, symbol, target, factor, offset, systems, from, line); 408 } 409 extractUnit( String quantity, String source, Set<String> changes, Output<String> symbolOut)410 private static String extractUnit( 411 String quantity, String source, Set<String> changes, Output<String> symbolOut) { 412 // drop footnotes 413 source = replace(footnotes, source, "", changes); 414 415 if (source.contains("(15 °C)")) { 416 int debug = 0; 417 } 418 source = replace(temperature, source, " $1$2$3", changes); 419 420 String oldSource = source; 421 source = source.replace("(sidereal)", "sidereal"); 422 source = source.replace("(mean)", "mean"); 423 source = source.replace("(printer's)", "printer"); 424 425 source = source.replace("therm (U.S.)", "therm-us"); 426 source = source.replace("(U.S.)", ""); 427 428 source = source.replace("(long, 112 lb)", " long"); 429 source = source.replace("(troy or apothecary)", "troy"); 430 source = source.replace("(U.S. survey)", "survey"); 431 source = source.replace("(tropical)", "tropical"); 432 source = source.replace("(based on U.S. survey foot)", "survey"); 433 source = source.replace("(avoirdupois)", ""); 434 source = source.replace("(metric)", "metric"); 435 source = source.replace("(electric)", "electric"); 436 source = source.replace("(water)", "water"); 437 source = source.replace("(boiler)", "boiler"); 438 source = source.replace("(0.001 in)", "inch"); 439 source = source.replace("(365 days)", "365"); 440 441 source = source.replace("(U.K.)", "imperial"); 442 source = source.replace("[Canadian and U.K. (Imperial)]", "imperial"); 443 source = source.replace("[Canadian and U.K. fluid (Imperial)]", "fluid imperial"); 444 445 source = source.replace("second squared", "square second"); 446 source = source.replace("foot squared", "square foot"); 447 source = source.replace("meter squared", "square meter"); 448 source = source.replace("inch squared", "square inch"); 449 450 source = source.replace("mile, nautical", "nautical-mile"); 451 source = source.replace(", technical", " technical"); 452 source = source.replace(", kilogram (nutrition)", " nutrition"); 453 source = source.replace("(nutrition)", "nutrition"); 454 source = source.replace(", metric", " metric"); 455 source = source.replace(", assay", " assay"); 456 source = source.replace(", long", " long"); 457 source = source.replace(", register", " register"); 458 459 source = source.replace("foot to the fourth power", "pow4-foot"); 460 source = source.replace("inch to the fourth power", "pow4-inch"); 461 source = source.replace("meter to the fourth power", "pow4-meter"); 462 463 source = source.replace("Britsh", "british"); 464 source = source.replace("reciprocal", "per"); 465 466 source = replaceWhole(oldSource, source, changes); 467 468 final Matcher match = firstPart.matcher(source); 469 match.matches(); 470 String newSource = match.group(1).trim(); 471 String remainder = match.group(2); 472 if (symbolOut.value == null) { 473 Matcher endParens = finalParens.matcher(remainder); 474 if (endParens.find()) { 475 symbolOut.value = endParens.group(1).trim(); 476 } 477 } 478 source = replaceWhole(source, newSource, changes); 479 480 if (remainder.contains("dry")) { 481 source = replaceWhole(source, source + " dry", changes); 482 } else if (remainder.contains("fluid")) { 483 source = replaceWhole(source, source + " fluid", changes); 484 } 485 486 if (source.contains("squared")) { 487 System.out.println("*FIX squared: " + source); 488 } 489 490 oldSource = source; 491 492 source = source.replace("degree ", ""); 493 source = source.replace("metric-ton", "tonne"); 494 source = source.replace("ton-metric", "tonne"); 495 source = source.replace("psi", "pound-force-per-square-inch"); 496 source = source.replace("ounce fluid", "fluid-ounce"); 497 source = source.replace("unitthi", "unit"); 498 source = source.replace("calorieth", "calorie"); 499 source = source.replace("acceleration of free fall", "g-force"); 500 source = source.replace("of mercury", "ofhg"); 501 if (quantity.equals("ANGLE")) { 502 source = source.replace("minute", "arc-minute"); 503 source = source.replace("second", "arc-second"); 504 } 505 source = source.replace("British thermal unitth", "british thermal unit"); 506 source = source.replace("British thermal unitIT", "british thermal unit it"); 507 source = source.replace("calorieIT", "calorie it"); 508 509 source = replaceWhole(oldSource, source, changes); 510 511 // don't record these 512 source = source.toLowerCase(Locale.ROOT); 513 source = addHyphens.matcher(source.trim()).replaceAll("-"); 514 515 return source; 516 } 517 replaceWhole(String source, String newSource, Set<String> changes)518 private static String replaceWhole(String source, String newSource, Set<String> changes) { 519 if (!newSource.equals(source)) { 520 changes.add(" ⟹ " + newSource); 521 } 522 return newSource; 523 } 524 replace( Pattern pattern, String source, String replacement, Set<String> changes)525 private static String replace( 526 Pattern pattern, String source, String replacement, Set<String> changes) { 527 String newSource = pattern.matcher(source).replaceAll(replacement); 528 if (!newSource.equals(source)) { 529 changes.add(" ⟹ " + newSource); 530 } 531 return newSource; 532 } 533 } 534