1 package org.unicode.cldr.test; 2 3 import com.ibm.icu.text.SimpleFormatter; 4 import com.ibm.icu.text.UnicodeSet; 5 import java.util.Collection; 6 import java.util.List; 7 import java.util.regex.Pattern; 8 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 9 import org.unicode.cldr.util.*; 10 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 11 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; 12 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 13 import org.unicode.cldr.util.UnitConverter.UnitId; 14 15 public class CheckUnits extends CheckCLDR { 16 private static final Pattern HOUR_SYMBOL = PatternCache.get("h{1,2}"); 17 private static final Pattern MINUTE_SYMBOL = PatternCache.get("m{1,2}"); 18 private static final Pattern SECONDS_SYMBOL = PatternCache.get("ss"); 19 private static final UnicodeSet DISALLOW_LONG_POWER = new UnicodeSet("[²³]").freeze(); 20 21 static final UnitConverter unitConverter = 22 CLDRConfig.getInstance().getSupplementalDataInfo().getUnitConverter(); 23 24 private Collection<String> genders = null; 25 26 @Override handleSetCldrFileToCheck( CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)27 public CheckCLDR handleSetCldrFileToCheck( 28 CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors) { 29 super.handleSetCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 30 31 GrammarInfo grammarInfo = 32 CLDRConfig.getInstance() 33 .getSupplementalDataInfo() 34 .getGrammarInfo(cldrFileToCheck.getLocaleID()); 35 genders = 36 grammarInfo == null 37 ? null 38 : grammarInfo.get( 39 GrammaticalTarget.nominal, 40 GrammaticalFeature.grammaticalGender, 41 GrammaticalScope.units); 42 43 return this; 44 } 45 46 @Override handleCheck( String path, String fullPath, String value, Options options, List<CheckStatus> result)47 public CheckCLDR handleCheck( 48 String path, String fullPath, String value, Options options, List<CheckStatus> result) { 49 50 if (value == null || !path.startsWith("//ldml/units")) { 51 return this; 52 } 53 if (!accept(result)) return this; 54 final XPathParts parts = XPathParts.getFrozenInstance(path); 55 String finalElement = parts.getElement(-1); 56 57 if (genders != null && !genders.isEmpty() && finalElement.equals("gender")) { 58 if (!genders.contains(value)) { 59 result.add( 60 new CheckStatus() 61 .setCause(this) 62 .setMainType(CheckStatus.errorType) 63 .setSubtype(Subtype.invalidGenderCode) 64 .setMessage( 65 "The gender value for this locale must be one of: {0}", 66 genders)); 67 } 68 } 69 70 // Note, the following test has some overlaps with the checkAndReplacePlaceholders 71 // test in CheckForExamplars (why there?). That is probably OK, they check in 72 // different ways, but some errors will produce two somewhat different error messages. 73 UnitPathType pathType = UnitPathType.getPathType(parts); 74 if (pathType != null) { 75 int min = 0; 76 int max = 0; 77 switch (pathType) { 78 case power: 79 case prefix: 80 min = 1; 81 max = 1; 82 break; 83 case times: 84 case per: 85 min = 2; 86 max = 2; 87 break; 88 case perUnit: 89 case coordinate: // coordinateUnitPattern 90 min = 1; 91 max = 1; 92 break; 93 case unit: 94 min = 0; 95 max = 1; 96 break; 97 default: // 0, 0 98 } 99 if (max > 0) { 100 try { 101 SimpleFormatter sf = SimpleFormatter.compileMinMaxArguments(value, min, max); 102 } catch (Exception e) { 103 result.add( 104 new CheckStatus() 105 .setCause(this) 106 .setMainType(CheckStatus.errorType) 107 .setSubtype(Subtype.invalidPlaceHolder) 108 .setMessage( 109 "Invalid unit pattern, must have min " 110 + min 111 + " and max " 112 + max 113 + " distinct placeholders of the form {n}")); 114 } 115 } 116 String idType; 117 switch (pathType) { 118 case power: 119 { 120 final String width = parts.getAttributeValue(-3, "type"); 121 if (value != null && "long".contentEquals(width)) { 122 if (DISALLOW_LONG_POWER.containsSome( 123 fixedValueIfInherited(value, path))) { 124 String unresolvedValue = 125 getCldrFileToCheck().getUnresolved().getStringValue(path); 126 if (unresolvedValue != null) { 127 final String message = 128 genders == null 129 ? "Long value for power can’t use superscripts; it must be spelled out." 130 : "Long value for power can’t use superscripts; it must be spelled out. [NOTE: values can vary by gender.]"; 131 result.add( 132 new CheckStatus() 133 .setCause(this) 134 .setMainType(CheckStatus.errorType) 135 .setSubtype(Subtype.longPowerWithSubscripts) 136 .setMessage(message)); 137 } 138 } 139 } 140 } 141 // fall through 142 case prefix: 143 idType = parts.getAttributeValue(-2, "type"); 144 for (String shortUnitId : pathType.sampleComposedShortUnitIds.get(idType)) { 145 final UnitId unitId = unitConverter.createUnitId(shortUnitId); 146 final String width = parts.getAttributeValue(-3, "type"); 147 String count = parts.getAttributeValue(-1, "count"); 148 String caseVariant = parts.getAttributeValue(-1, "case"); 149 final CLDRFile cldrFile = getCldrFileToCheck(); 150 String explicitPattern = 151 UnitPathType.unit.getTrans( 152 cldrFile, 153 width, 154 shortUnitId, 155 count, 156 caseVariant, 157 null, 158 null); 159 if (explicitPattern != null) { 160 String composedPattern = 161 unitId.toString( 162 cldrFile, width, count, caseVariant, null, false); 163 if (composedPattern != null 164 && !explicitPattern.equals(composedPattern)) { 165 unitId.toString( 166 cldrFile, 167 width, 168 count, 169 caseVariant, 170 null, 171 false); // for debugging 172 final String MESSAGE = 173 "Mismatched component: «{0}» produces «{1}», but the explicit translation is «{2}»." 174 + " See " 175 + CLDRURLS.COMPOUND_UNITS_HELP; 176 result.add( 177 new CheckStatus() 178 .setCause(this) 179 .setMainType(CheckStatus.warningType) 180 .setSubtype(Subtype.mismatchedUnitComponent) 181 .setMessage( 182 MESSAGE, 183 value, 184 composedPattern, 185 explicitPattern)); 186 } 187 } 188 } 189 break; 190 default: 191 break; 192 } 193 } 194 195 if (pathType == UnitPathType.duration) { 196 XPathParts xpp = parts; 197 String durationUnitType = xpp.findAttributeValue("durationUnit", "type"); 198 boolean hasHourSymbol = HOUR_SYMBOL.matcher(value).find(); 199 boolean hasMinuteSymbol = MINUTE_SYMBOL.matcher(value).find(); 200 boolean hasSecondsSymbol = SECONDS_SYMBOL.matcher(value).find(); 201 202 if (durationUnitType.contains("h") && !hasHourSymbol) { 203 /* Changed message from "The hour symbol (h or hh) is missing" 204 * to "The hour indicator should be either h or hh for duration" 205 * per http://unicode.org/cldr/trac/ticket/10999 206 */ 207 result.add( 208 new CheckStatus() 209 .setCause(this) 210 .setMainType(CheckStatus.errorType) 211 .setSubtype(Subtype.invalidDurationUnitPattern) 212 .setMessage( 213 "The hour indicator should be either h or hh for duration.")); 214 } else if (durationUnitType.contains("m") && !hasMinuteSymbol) { 215 result.add( 216 new CheckStatus() 217 .setCause(this) 218 .setMainType(CheckStatus.errorType) 219 .setSubtype(Subtype.invalidDurationUnitPattern) 220 .setMessage("The minutes symbol (m or mm) is missing.")); 221 } else if (durationUnitType.contains("s") && !hasSecondsSymbol) { 222 result.add( 223 new CheckStatus() 224 .setCause(this) 225 .setMainType(CheckStatus.errorType) 226 .setSubtype(Subtype.invalidDurationUnitPattern) 227 .setMessage("The seconds symbol (ss) is missing.")); 228 } 229 } 230 return this; 231 } 232 } 233