1 package org.unicode.cldr.test; 2 3 import com.ibm.icu.impl.Relation; 4 import com.ibm.icu.impl.Row; 5 import com.ibm.icu.impl.Row.R2; 6 import com.ibm.icu.text.UnicodeSet; 7 import java.util.Arrays; 8 import java.util.Collection; 9 import java.util.Collections; 10 import java.util.EnumMap; 11 import java.util.HashMap; 12 import java.util.HashSet; 13 import java.util.LinkedHashSet; 14 import java.util.List; 15 import java.util.Map; 16 import java.util.Map.Entry; 17 import java.util.Set; 18 import java.util.TreeMap; 19 import java.util.TreeSet; 20 import java.util.function.Predicate; 21 import java.util.regex.Pattern; 22 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 23 import org.unicode.cldr.util.CLDRConfig; 24 import org.unicode.cldr.util.CLDRFile; 25 import org.unicode.cldr.util.CldrUtility; 26 import org.unicode.cldr.util.DtdData; 27 import org.unicode.cldr.util.DtdData.Attribute; 28 import org.unicode.cldr.util.DtdData.Element; 29 import org.unicode.cldr.util.DtdType; 30 import org.unicode.cldr.util.Factory; 31 import org.unicode.cldr.util.LocaleIDParser; 32 import org.unicode.cldr.util.PatternCache; 33 import org.unicode.cldr.util.SupplementalDataInfo; 34 import org.unicode.cldr.util.SupplementalDataInfo.AttributeValidityInfo; 35 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 36 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 37 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 38 import org.unicode.cldr.util.XPathParts; 39 40 public class CheckAttributeValues extends FactoryCheckCLDR { 41 42 private static final Predicate<String> NOT_DONE_YET = 43 new RegexMatcher().set(".*", Pattern.COMMENTS); 44 private static final boolean FIND_MISSING = 45 CldrUtility.getProperty( 46 "FIND_MISSING_ATTRIBUTE_TESTS", 47 false); // turn on to show <attributeValues> that are missing. 48 private static final boolean SHOW_UNNECESSARY = 49 false; // turn on to show <attributeValues> we should delete. 50 51 static LinkedHashSet<String> elementOrder = new LinkedHashSet<>(); 52 static LinkedHashSet<String> attributeOrder = new LinkedHashSet<>(); 53 static LinkedHashSet<String> serialElements = new LinkedHashSet<>(); 54 static Map<String, Map<String, MatcherPattern>> element_attribute_validity = new HashMap<>(); 55 static Map<String, MatcherPattern> common_attribute_validity = new HashMap<>(); 56 static Map<String, MatcherPattern> variables = new HashMap<>(); 57 // static VariableReplacer variableReplacer = new VariableReplacer(); // note: this can be 58 // coalesced with the above 59 // -- to do later. 60 static boolean initialized = false; 61 static LocaleMatcher localeMatcher; 62 static Map<String, Map<String, String>> code_type_replacement = new TreeMap<>(); 63 static final SupplementalDataInfo supplementalData = 64 CLDRConfig.getInstance().getSupplementalDataInfo(); 65 static DtdData ldmlDtdData = DtdData.getInstance(DtdType.ldml); 66 67 boolean isEnglish; 68 PluralInfo pluralInfo; 69 Relation<String, String> missingTests = Relation.of(new TreeMap(), TreeSet.class); 70 71 static final UnicodeSet DIGITS = new UnicodeSet("[0-9]").freeze(); 72 CheckAttributeValues(Factory factory)73 public CheckAttributeValues(Factory factory) { 74 super(factory); 75 } 76 77 @Override handleFinish()78 public void handleFinish() { 79 for (Entry<String, Set<String>> entry : missingTests.keyValuesSet()) { 80 System.out.println( 81 "Missing element: " + entry.getKey() + ", attributes: " + entry.getValue()); 82 } 83 } 84 85 @Override handleCheck( String path, String fullPath, String value, Options options, List<CheckStatus> result)86 public CheckCLDR handleCheck( 87 String path, String fullPath, String value, Options options, List<CheckStatus> result) { 88 if (fullPath == null) return this; // skip paths that we don't have 89 if (fullPath.indexOf('[') < 0) return this; // skip paths with no attributes 90 String locale = getCldrFileToCheck().getSourceLocaleID(path, null); 91 92 // skip paths that are not in the immediate locale 93 if (!getCldrFileToCheck().getLocaleID().equals(locale)) { 94 return this; 95 } 96 if (!accept(result)) return this; 97 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 98 for (int i = 0; i < parts.size(); ++i) { 99 if (parts.getAttributeCount(i) == 0) { 100 continue; 101 } 102 Map<String, String> attributes = parts.getAttributes(i); 103 String element = parts.getElement(i); 104 Element elementInfo = ldmlDtdData.getElementFromName().get(element); 105 106 Map<String, MatcherPattern> attribute_validity = 107 element_attribute_validity.get(element); 108 for (String attribute : attributes.keySet()) { 109 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute); 110 if (!attributeInfo.values.isEmpty()) { 111 // we don't need to check, since the DTD will enforce values 112 continue; 113 } 114 String attributeValue = attributes.get(attribute); 115 116 // special hack for // <type key="calendar" type="chinese">Chinese 117 // Calendar</type> 118 if (element.equals("type") && attribute.equals("type")) { 119 Set<String> typeValues = BCP47_KEY_VALUES.get(attributes.get("key")); 120 if (!typeValues.contains(attributeValue)) { 121 result.add( 122 new CheckStatus() 123 .setCause(this) 124 .setMainType(CheckStatus.errorType) 125 .setSubtype(Subtype.unexpectedAttributeValue) 126 .setMessage( 127 "Unexpected Attribute Value {0}={1}: expected: {2}", 128 new Object[] { 129 attribute, attributeValue, typeValues 130 })); 131 } 132 continue; 133 } 134 // check the common attributes first 135 boolean haveTest = 136 check(common_attribute_validity, attribute, attributeValue, result); 137 // then for the specific element 138 haveTest = haveTest || check(attribute_validity, attribute, attributeValue, result); 139 if (!haveTest && FIND_MISSING) { 140 missingTests.put(element, attribute); 141 } 142 143 // now for plurals 144 145 if (attribute.equals("count")) { 146 if (DIGITS.containsAll(attributeValue)) { 147 // ok, keep going 148 } else { 149 final Count countValue = PluralInfo.Count.valueOf(attributeValue); 150 if (!pluralInfo.getCounts().contains(countValue) 151 && !isPluralException(countValue, locale)) { 152 result.add( 153 new CheckStatus() 154 .setCause(this) 155 .setMainType(CheckStatus.errorType) 156 .setSubtype(Subtype.illegalPlural) 157 .setMessage( 158 "Illegal plural value {0}; must be one of: {1}", 159 new Object[] { 160 countValue, pluralInfo.getCounts() 161 })); 162 } 163 } 164 } 165 166 // TODO check other variable elements, like dayPeriods 167 } 168 } 169 return this; 170 } 171 172 static final Relation<PluralInfo.Count, String> PLURAL_EXCEPTIONS = 173 Relation.of( 174 new EnumMap<PluralInfo.Count, Set<String>>(PluralInfo.Count.class), 175 HashSet.class); 176 177 static { PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr")178 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "hr"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr")179 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sr"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh")180 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "sh"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs")181 PLURAL_EXCEPTIONS.put(PluralInfo.Count.many, "bs"); PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru")182 PLURAL_EXCEPTIONS.put(PluralInfo.Count.few, "ru"); 183 } 184 isPluralException(Count countValue, String locale)185 static boolean isPluralException(Count countValue, String locale) { 186 Set<String> exceptions = PLURAL_EXCEPTIONS.get(countValue); 187 if (exceptions == null) { 188 return false; 189 } 190 if (exceptions.contains(locale)) { 191 return true; 192 } 193 int bar = locale.indexOf('_'); // catch bs_Cyrl, etc. 194 if (bar > 0) { 195 String base = locale.substring(0, bar); 196 if (exceptions.contains(base)) { 197 return true; 198 } 199 } 200 return false; 201 } 202 203 /** 204 * return true if we performed a test 205 * 206 * @param attribute_validity 207 * @param attribute 208 * @param attributeValue 209 * @param result 210 * @return 211 */ check( Map<String, MatcherPattern> attribute_validity, String attribute, String attributeValue, List<CheckStatus> result)212 private boolean check( 213 Map<String, MatcherPattern> attribute_validity, 214 String attribute, 215 String attributeValue, 216 List<CheckStatus> result) { 217 if (attribute_validity == null) { 218 return false; // no test 219 } 220 MatcherPattern matcherPattern = attribute_validity.get(attribute); 221 if (matcherPattern == null) { 222 return false; // no test 223 } 224 if (matcherPattern.matcher.test(attributeValue)) { 225 return true; 226 } 227 // special check for deprecated codes 228 String replacement = getReplacement(matcherPattern.value, attributeValue); 229 if (replacement != null) { 230 if (isEnglish) { 231 return true; // don't flag English 232 } 233 if (replacement.length() == 0) { 234 result.add( 235 new CheckStatus() 236 .setCause(this) 237 .setMainType(CheckStatus.warningType) 238 .setSubtype(Subtype.deprecatedAttribute) 239 .setMessage( 240 "Deprecated Attribute Value {0}={1}. Consider removing.", 241 new Object[] {attribute, attributeValue})); 242 } else { 243 result.add( 244 new CheckStatus() 245 .setCause(this) 246 .setMainType(CheckStatus.warningType) 247 .setSubtype(Subtype.deprecatedAttributeWithReplacement) 248 .setMessage( 249 "Deprecated Attribute Value {0}={1}. Consider removing, and possibly modifying the related value for {2}.", 250 new Object[] {attribute, attributeValue, replacement})); 251 } 252 } else { 253 result.add( 254 new CheckStatus() 255 .setCause(this) 256 .setMainType(CheckStatus.errorType) 257 .setSubtype(Subtype.unexpectedAttributeValue) 258 .setMessage( 259 "Unexpected Attribute Value {0}={1}: expected: {2}", 260 new Object[] { 261 attribute, attributeValue, matcherPattern.pattern 262 })); 263 } 264 return true; 265 } 266 267 /** 268 * Returns replacement, or null if there is none. "" if the code is deprecated, but without a 269 * replacement. Input is of the form $language 270 * 271 * @return 272 */ getReplacement(String value, String attributeValue)273 String getReplacement(String value, String attributeValue) { 274 Map<String, String> type_replacement = code_type_replacement.get(value); 275 if (type_replacement == null) { 276 return null; 277 } 278 return type_replacement.get(attributeValue); 279 } 280 281 LocaleIDParser localeIDParser = new LocaleIDParser(); 282 283 @Override handleSetCldrFileToCheck( CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)284 public CheckCLDR handleSetCldrFileToCheck( 285 CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors) { 286 if (cldrFileToCheck == null) return this; 287 if (Phase.FINAL_TESTING == getPhase() || Phase.BUILD == getPhase()) { 288 setSkipTest(false); // ok 289 } else { 290 setSkipTest(true); 291 return this; 292 } 293 294 pluralInfo = 295 supplementalData.getPlurals(PluralType.cardinal, cldrFileToCheck.getLocaleID()); 296 super.handleSetCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 297 isEnglish = "en".equals(localeIDParser.set(cldrFileToCheck.getLocaleID()).getLanguage()); 298 synchronized (elementOrder) { 299 if (!initialized) { 300 getMetadata(); 301 initialized = true; 302 localeMatcher = LocaleMatcher.make(); 303 } 304 } 305 if (!localeMatcher.test(cldrFileToCheck.getLocaleID())) { 306 possibleErrors.add( 307 new CheckStatus() 308 .setCause(this) 309 .setMainType(CheckStatus.errorType) 310 .setSubtype(Subtype.invalidLocale) 311 .setMessage( 312 "Invalid Locale {0}", 313 new Object[] {cldrFileToCheck.getLocaleID()})); 314 } 315 return this; 316 } 317 getMetadata()318 private void getMetadata() { 319 320 // sorting is expensive, but we need it here. 321 322 Map<String, R2<String, String>> rawVariables = supplementalData.getValidityInfo(); 323 for (Entry<String, R2<String, String>> item : rawVariables.entrySet()) { 324 String id = item.getKey(); 325 String type = item.getValue().get0(); 326 String value = item.getValue().get1(); 327 MatcherPattern mp = getMatcherPattern2(type, value); 328 if (mp != null) { 329 variables.put(id, mp); 330 // variableReplacer.add(id, value); 331 } 332 } 333 // System.out.println("Variables: " + variables.keySet()); 334 335 Map<AttributeValidityInfo, String> rawAttributeValueInfo = 336 supplementalData.getAttributeValidity(); 337 338 for (Entry<AttributeValidityInfo, String> entry : rawAttributeValueInfo.entrySet()) { 339 AttributeValidityInfo item = entry.getKey(); 340 String value = entry.getValue(); 341 MatcherPattern mp = getMatcherPattern2(item.getType(), value); 342 if (mp == null) { 343 System.out.println("Failed to make matcher for: " + item); 344 continue; 345 } 346 if (FIND_MISSING && mp.matcher == NOT_DONE_YET) { 347 missingTests.put(item.getElements().toString(), item.getAttributes().toString()); 348 } 349 350 Set<DtdType> dtds = item.getDtds(); 351 // TODO handle other DTDs 352 if (!dtds.contains(DtdType.ldml)) { 353 continue; 354 } 355 Set<String> attributeList = item.getAttributes(); 356 Set<String> elementList = item.getElements(); 357 if (elementList.size() == 0) { 358 addAttributes(attributeList, common_attribute_validity, mp); 359 } else { 360 for (String element : elementList) { 361 // check if unnecessary 362 Element elementInfo = ldmlDtdData.getElementFromName().get(element); 363 if (elementInfo == null) { 364 System.out.println( 365 "Illegal <attributeValues>, element not valid: element: " 366 + element); 367 } else { 368 for (String attribute : attributeList) { 369 Attribute attributeInfo = elementInfo.getAttributeNamed(attribute); 370 if (attributeInfo == null) { 371 System.out.println( 372 "Illegal <attributeValues>, attribute not valid: element: " 373 + element 374 + ", attribute: " 375 + attribute); 376 } else if (!attributeInfo.values.isEmpty()) { 377 if (SHOW_UNNECESSARY) { 378 System.out.println( 379 "Unnecessary <attributeValues …>, the DTD has specific list: element: " 380 + element 381 + ", attribute: " 382 + attribute 383 + ", " 384 + attributeInfo.values); 385 } 386 } 387 } 388 } 389 // System.out.println("\t" + element); 390 Map<String, MatcherPattern> attribute_validity = 391 element_attribute_validity.get(element); 392 if (attribute_validity == null) { 393 element_attribute_validity.put( 394 element, attribute_validity = new TreeMap<>()); 395 } 396 addAttributes(attributeList, attribute_validity, mp); 397 } 398 } 399 } 400 } 401 402 static final Map<String, Set<String>> BCP47_KEY_VALUES; 403 404 static { 405 Map<String, Set<String>> temp = new HashMap<>(); 406 Relation<R2<String, String>, String> bcp47Aliases = supplementalData.getBcp47Aliases(); 407 for (Entry<String, Set<String>> keyValues : 408 supplementalData.getBcp47Keys().keyValuesSet()) { 409 Set<String> fullValues = new TreeSet<>(); 410 String key = keyValues.getKey(); 411 Set<String> rawValues = keyValues.getValue(); 412 for (String value : rawValues) { 413 if (key.equals("cu")) { // Currency codes are in upper case. value.toUpperCase()414 fullValues.add(value.toUpperCase()); 415 } else { 416 fullValues.add(value); 417 } 418 R2<String, String> keyValue = R2.of(key, value); 419 Set<String> aliases = bcp47Aliases.getAll(keyValue); 420 if (aliases != null) { 421 fullValues.addAll(aliases); 422 } 423 } 424 // Special case exception for generic calendar, since we don't want to expose it in 425 // bcp47 426 if (key.equals("ca")) { 427 fullValues.add("generic"); 428 } 429 fullValues = Collections.unmodifiableSet(fullValues); temp.put(key, fullValues)430 temp.put(key, fullValues); 431 // add aliased keys 432 Set<String> aliases = supplementalData.getBcp47Aliases().getAll(Row.of(key, "")); 433 if (aliases != null) { 434 for (String aliasKey : aliases) { temp.put(aliasKey, fullValues)435 temp.put(aliasKey, fullValues); 436 } 437 } 438 temp.put("x", Collections.EMPTY_SET); // Hack for 'x', private use. 439 } 440 BCP47_KEY_VALUES = Collections.unmodifiableMap(temp); 441 } 442 getBcp47MatcherPattern(String key)443 private MatcherPattern getBcp47MatcherPattern(String key) { 444 // <key type="calendar">Calendar</key> 445 // <type key="calendar" type="chinese">Chinese Calendar</type> 446 447 // <attributeValues elements="key" attributes="type" type="bcp47">key</attributeValues> 448 // <attributeValues elements="type" attributes="key" type="bcp47">key</attributeValues> 449 // <attributeValues elements="type" attributes="type" type="bcp47">use-key</attributeValues> 450 451 MatcherPattern m = new MatcherPattern(); 452 Set<String> values; 453 if (key.equals("key")) { 454 values = BCP47_KEY_VALUES.keySet(); 455 } else { 456 values = BCP47_KEY_VALUES.get(key); 457 } 458 m.value = key; 459 m.pattern = values.toString(); 460 m.matcher = new CollectionMatcher().set(values); 461 return m; 462 } 463 getMatcherPattern2(String type, String value)464 private MatcherPattern getMatcherPattern2(String type, String value) { 465 String typeAttribute = type; 466 MatcherPattern result = variables.get(value); 467 if (result != null) { 468 MatcherPattern temp = new MatcherPattern(); 469 temp.pattern = result.pattern; 470 temp.matcher = result.matcher; 471 temp.value = value; 472 result = temp; 473 if ("list".equals(typeAttribute)) { 474 temp.matcher = new ListMatcher().set(result.matcher); 475 } 476 return result; 477 } 478 479 result = new MatcherPattern(); 480 result.pattern = value; 481 result.value = value; 482 if ("choice".equals(typeAttribute)) { 483 result.matcher = 484 new CollectionMatcher() 485 .set(new HashSet<>(Arrays.asList(value.trim().split("\\s+")))); 486 } else if ("bcp47".equals(typeAttribute)) { 487 result = getBcp47MatcherPattern(value); 488 } else if ("regex".equals(typeAttribute)) { 489 result.matcher = 490 new RegexMatcher() 491 .set(value, Pattern.COMMENTS); // Pattern.COMMENTS to get whitespace 492 } else if ("locale".equals(typeAttribute)) { 493 result.matcher = LocaleMatcher.make(); 494 } else if ("notDoneYet".equals(typeAttribute) || "notDoneYet".equals(value)) { 495 result.matcher = NOT_DONE_YET; 496 } else { 497 System.out.println("unknown type; value: <" + value + ">,\t" + typeAttribute); 498 return null; 499 } 500 return result; 501 } 502 addAttributes( Set<String> attributes, Map<String, MatcherPattern> attribute_validity, MatcherPattern mp)503 private void addAttributes( 504 Set<String> attributes, 505 Map<String, MatcherPattern> attribute_validity, 506 MatcherPattern mp) { 507 for (String attribute : attributes) { 508 MatcherPattern old = attribute_validity.get(attribute); 509 if (old != null) { 510 mp.matcher = new OrMatcher().set(old.matcher, mp.matcher); 511 mp.pattern = old.pattern + " OR " + mp.pattern; 512 } 513 attribute_validity.put(attribute, mp); 514 } 515 } 516 517 private static class MatcherPattern { 518 public String value; 519 Predicate<String> matcher; 520 String pattern; 521 522 @Override toString()523 public String toString() { 524 return matcher.getClass().getName() + "\t" + pattern; 525 } 526 } 527 528 public static class RegexMatcher implements Predicate<String> { 529 private java.util.regex.Matcher matcher; 530 set(String pattern)531 public Predicate<String> set(String pattern) { 532 matcher = PatternCache.get(pattern).matcher(""); 533 return this; 534 } 535 set(String pattern, int flags)536 public Predicate<String> set(String pattern, int flags) { 537 matcher = Pattern.compile(pattern, flags).matcher(""); 538 return this; 539 } 540 541 @Override test(String value)542 public boolean test(String value) { 543 matcher.reset(value.toString()); 544 return matcher.matches(); 545 } 546 } 547 548 public static class CollectionMatcher implements Predicate<String> { 549 private Collection<String> collection; 550 set(Collection<String> collection)551 public Predicate<String> set(Collection<String> collection) { 552 this.collection = collection; 553 return this; 554 } 555 556 @Override test(String value)557 public boolean test(String value) { 558 return collection.contains(value); 559 } 560 } 561 562 public static class OrMatcher implements Predicate<String> { 563 private Predicate<String> a; 564 private Predicate<String> b; 565 set(Predicate<String> a, Predicate<String> b)566 public Predicate<String> set(Predicate<String> a, Predicate<String> b) { 567 this.a = a; 568 this.b = b; 569 return this; 570 } 571 572 @Override test(String value)573 public boolean test(String value) { 574 return a.test(value) || b.test(value); 575 } 576 } 577 578 public static class ListMatcher implements Predicate<String> { 579 private Predicate<String> other; 580 set(Predicate<String> other)581 public Predicate<String> set(Predicate<String> other) { 582 this.other = other; 583 return this; 584 } 585 586 @Override test(String value)587 public boolean test(String value) { 588 String[] values = value.trim().split("\\s+"); 589 if (values.length == 1 && values[0].length() == 0) return true; 590 for (int i = 0; i < values.length; ++i) { 591 if (!other.test(values[i])) { 592 return false; 593 } 594 } 595 return true; 596 } 597 } 598 599 public static class LocaleMatcher implements Predicate<String> { 600 Predicate<String> legacy = variables.get("$grandfathered").matcher; 601 Predicate<String> language = variables.get("$language").matcher; 602 Predicate<String> script = variables.get("$script").matcher; 603 Predicate<String> territory = variables.get("$territory").matcher; 604 Predicate<String> variant = variables.get("$variant").matcher; 605 LocaleIDParser lip = new LocaleIDParser(); 606 LocaleMatcher()607 private LocaleMatcher() {} 608 609 private static final class LocaleMatcherHelper { 610 static LocaleMatcher SINGLETON = new LocaleMatcher(); 611 } 612 make()613 public static LocaleMatcher make() { 614 return LocaleMatcherHelper.SINGLETON; 615 } 616 617 @Override test(String value)618 public boolean test(String value) { 619 if (legacy.test(value)) return true; 620 lip.set((String) value); 621 String field = lip.getLanguage(); 622 if (!language.test(field)) return false; 623 field = lip.getScript(); 624 if (field.length() != 0 && !script.test(field)) return false; 625 field = lip.getRegion(); 626 if (field.length() != 0 && !territory.test(field)) return false; 627 String[] fields = lip.getVariants(); 628 for (int i = 0; i < fields.length; ++i) { 629 if (!variant.test(fields[i])) return false; 630 } 631 return true; 632 } 633 } 634 } 635