1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Objects; 5 import com.google.common.collect.ImmutableMultimap; 6 import com.google.common.collect.ImmutableSet; 7 import com.google.common.collect.Multimap; 8 import com.google.common.collect.TreeMultimap; 9 import com.ibm.icu.impl.Relation; 10 import com.ibm.icu.impl.Row; 11 import com.ibm.icu.impl.Row.R2; 12 import com.ibm.icu.impl.Row.R3; 13 import com.ibm.icu.impl.Utility; 14 import com.ibm.icu.lang.UCharacter; 15 import com.ibm.icu.text.Collator; 16 import com.ibm.icu.text.DecimalFormat; 17 import com.ibm.icu.text.Normalizer; 18 import com.ibm.icu.text.NumberFormat; 19 import com.ibm.icu.text.UTF16; 20 import com.ibm.icu.text.UnicodeSet; 21 import com.ibm.icu.text.UnicodeSetIterator; 22 import com.ibm.icu.util.Currency; 23 import com.ibm.icu.util.ULocale; 24 import java.io.File; 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.io.PrintWriter; 28 import java.io.StringWriter; 29 import java.util.ArrayList; 30 import java.util.Arrays; 31 import java.util.Collection; 32 import java.util.Collections; 33 import java.util.Comparator; 34 import java.util.EnumSet; 35 import java.util.HashSet; 36 import java.util.Iterator; 37 import java.util.LinkedHashSet; 38 import java.util.List; 39 import java.util.Map; 40 import java.util.Map.Entry; 41 import java.util.Set; 42 import java.util.TreeMap; 43 import java.util.TreeSet; 44 import org.unicode.cldr.test.DisplayAndInputProcessor; 45 import org.unicode.cldr.tool.CldrVersion; 46 import org.unicode.cldr.tool.LikelySubtags; 47 import org.unicode.cldr.util.Builder; 48 import org.unicode.cldr.util.CLDRConfig; 49 import org.unicode.cldr.util.CLDRFile; 50 import org.unicode.cldr.util.CLDRFile.DraftStatus; 51 import org.unicode.cldr.util.CLDRFile.Status; 52 import org.unicode.cldr.util.CLDRFile.WinningChoice; 53 import org.unicode.cldr.util.CLDRPaths; 54 import org.unicode.cldr.util.ChainedMap; 55 import org.unicode.cldr.util.ChainedMap.M4; 56 import org.unicode.cldr.util.CharacterFallbacks; 57 import org.unicode.cldr.util.CldrUtility; 58 import org.unicode.cldr.util.Counter; 59 import org.unicode.cldr.util.DiscreteComparator; 60 import org.unicode.cldr.util.DiscreteComparator.Ordering; 61 import org.unicode.cldr.util.DoctypeXmlStreamWrapper; 62 import org.unicode.cldr.util.DtdData; 63 import org.unicode.cldr.util.DtdData.Attribute; 64 import org.unicode.cldr.util.DtdData.Element; 65 import org.unicode.cldr.util.DtdData.ElementType; 66 import org.unicode.cldr.util.DtdType; 67 import org.unicode.cldr.util.DtdType.DtdStatus; 68 import org.unicode.cldr.util.ElementAttributeInfo; 69 import org.unicode.cldr.util.Factory; 70 import org.unicode.cldr.util.InputStreamFactory; 71 import org.unicode.cldr.util.LanguageTagParser; 72 import org.unicode.cldr.util.Level; 73 import org.unicode.cldr.util.LocaleIDParser; 74 import org.unicode.cldr.util.Pair; 75 import org.unicode.cldr.util.PathHeader; 76 import org.unicode.cldr.util.PathUtilities; 77 import org.unicode.cldr.util.StandardCodes; 78 import org.unicode.cldr.util.SupplementalDataInfo; 79 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 80 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 81 import org.unicode.cldr.util.TestCLDRPaths; 82 import org.unicode.cldr.util.XMLFileReader; 83 import org.unicode.cldr.util.XPathParts; 84 import org.xml.sax.ErrorHandler; 85 import org.xml.sax.InputSource; 86 import org.xml.sax.SAXException; 87 import org.xml.sax.SAXParseException; 88 import org.xml.sax.XMLReader; 89 90 public class TestBasic extends TestFmwkPlus { 91 92 private static final boolean DEBUG = false; 93 94 static CLDRConfig testInfo = CLDRConfig.getInstance(); 95 96 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = 97 testInfo.getSupplementalDataInfo(); 98 99 private static final ImmutableSet<Pair<String, String>> knownElementExceptions = 100 ImmutableSet.of(Pair.of("ldml", "usesMetazone"), Pair.of("ldmlICU", "usesMetazone")); 101 102 private static final ImmutableSet<Pair<String, String>> knownAttributeExceptions = 103 ImmutableSet.of( 104 Pair.of("ldml", "version"), 105 Pair.of("supplementalData", "version"), 106 Pair.of("ldmlICU", "version"), 107 Pair.of("layout", "standard"), 108 Pair.of("currency", "id"), // for v1.1.1 109 Pair.of("monthNames", "type"), // for v1.1.1 110 Pair.of("alias", "type") // for v1.1.1 111 ); 112 113 private static final ImmutableSet<Pair<String, String>> knownChildExceptions = 114 ImmutableSet.of( 115 Pair.of("abbreviationFallback", "special"), 116 Pair.of("inList", "special"), 117 Pair.of("preferenceOrdering", "special")); 118 119 /** 120 * Simple test that loads each file in the cldr directory, thus verifying that the DTD works, 121 * and also checks that the PrettyPaths work. 122 * 123 * @author markdavis 124 */ main(String[] args)125 public static void main(String[] args) { 126 new TestBasic().run(args); 127 } 128 129 private static final ImmutableSet<String> skipAttributes = 130 ImmutableSet.of("alt", "draft", "references"); 131 132 private final ImmutableSet<String> eightPointLocales = 133 ImmutableSet.of( 134 "ar", "ca", "cs", "da", "de", "el", "es", "fi", "fr", "he", "hi", "hr", "hu", 135 "id", "it", "ja", "ko", "lt", "lv", "nl", "no", "pl", "pt", "pt_PT", "ro", "ru", 136 "sk", "sl", "sr", "sv", "th", "tr", "uk", "vi", "zh", "zh_Hant"); 137 138 // private final boolean showForceZoom = Utility.getProperty("forcezoom", 139 // false); 140 141 private final boolean resolved = CldrUtility.getProperty("resolved", false); 142 143 private final Exception[] internalException = new Exception[1]; 144 TestDtds()145 public void TestDtds() throws IOException { 146 Relation<Row.R2<DtdType, String>, String> foundAttributes = 147 Relation.of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), TreeSet.class); 148 final CLDRConfig config = CLDRConfig.getInstance(); 149 final File basedir = config.getCldrBaseDirectory(); 150 List<TimingInfo> data = new ArrayList<>(); 151 152 for (String subdir : CLDRConfig.getCLDRDataDirectories()) { 153 checkDtds(new File(basedir, subdir), 0, foundAttributes, data); 154 } 155 if (foundAttributes.size() > 0) { 156 showFoundElements(foundAttributes); 157 } 158 if (isVerbose()) { 159 long totalBytes = 0; 160 long totalNanos = 0; 161 for (TimingInfo i : data) { 162 long length = i.file.length(); 163 totalBytes += length; 164 totalNanos += i.nanos; 165 logln(i.nanos + "\t" + length + "\t" + i.file); 166 } 167 logln(totalNanos + "\t" + totalBytes); 168 } 169 } 170 checkDtds( File directoryFile, int level, Relation<R2<DtdType, String>, String> foundAttributes, List<TimingInfo> data)171 private void checkDtds( 172 File directoryFile, 173 int level, 174 Relation<R2<DtdType, String>, String> foundAttributes, 175 List<TimingInfo> data) 176 throws IOException { 177 boolean deepCheck = getInclusion() >= 10; 178 if (directoryFile.getName().equals("import") 179 && directoryFile.getParentFile().getName().equals("keyboards")) { 180 return; // skip imports 181 } 182 File[] listFiles = directoryFile.listFiles(); 183 String normalizedPath = PathUtilities.getNormalizedPathString(directoryFile); 184 String indent = Utility.repeat("\t", level); 185 if (listFiles == null) { 186 throw new IllegalArgumentException(indent + "Empty directory: " + normalizedPath); 187 } 188 logln("Checking files for DTD errors in: " + indent + normalizedPath); 189 for (File fileName : listFiles) { 190 String name = fileName.getName(); 191 if (CLDRConfig.isJunkFile(name)) { 192 continue; 193 } else if (fileName.isDirectory()) { 194 checkDtds(fileName, level + 1, foundAttributes, data); 195 } else if (name.endsWith(".xml")) { 196 data.add(check(fileName)); 197 if (deepCheck // takes too long to do all the time 198 ) { 199 CLDRFile cldrfile = 200 CLDRFile.loadFromFile(fileName, "temp", DraftStatus.unconfirmed); 201 for (String xpath : cldrfile) { 202 String fullPath = cldrfile.getFullXPath(xpath); 203 if (fullPath == null) { 204 fullPath = cldrfile.getFullXPath(xpath); 205 assertNotNull("", fullPath); 206 continue; 207 } 208 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 209 DtdType type = parts.getDtdData().dtdType; 210 for (int i = 0; i < parts.size(); ++i) { 211 String element = parts.getElement(i); 212 R2<DtdType, String> typeElement = Row.of(type, element); 213 if (parts.getAttributeCount(i) == 0) { 214 foundAttributes.put(typeElement, "NONE"); 215 } else { 216 for (String attribute : parts.getAttributeKeys(i)) { 217 foundAttributes.put(typeElement, attribute); 218 } 219 } 220 } 221 } 222 } 223 } 224 } 225 } 226 showFoundElements(Relation<Row.R2<DtdType, String>, String> foundAttributes)227 public void showFoundElements(Relation<Row.R2<DtdType, String>, String> foundAttributes) { 228 Relation<Row.R2<DtdType, String>, String> theoryAttributes = 229 Relation.of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), TreeSet.class); 230 for (DtdType type : DtdType.values()) { 231 if (type.getStatus() != DtdType.DtdStatus.active) { 232 continue; 233 } 234 DtdData dtdData = DtdData.getInstance(type); 235 for (Element element : dtdData.getElementFromName().values()) { 236 String name = element.getName(); 237 Set<Attribute> attributes = element.getAttributes().keySet(); 238 R2<DtdType, String> typeElement = Row.of(type, name); 239 if (attributes.isEmpty()) { 240 theoryAttributes.put(typeElement, "NONE"); 241 } else { 242 for (Attribute attribute : attributes) { 243 theoryAttributes.put(typeElement, attribute.name); 244 } 245 } 246 } 247 } 248 Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed = 249 Relation.of( 250 new TreeMap<String, Set<R3<Boolean, DtdType, String>>>(), 251 LinkedHashSet.class); 252 253 for (Entry<R2<DtdType, String>, Set<String>> s : theoryAttributes.keyValuesSet()) { 254 R2<DtdType, String> typeElement = s.getKey(); 255 Set<String> theoryAttributeSet = s.getValue(); 256 DtdType type = typeElement.get0(); 257 String element = typeElement.get1(); 258 if (element.equals("ANY") || element.equals("#PCDATA")) { 259 continue; 260 } 261 boolean deprecatedElement = 262 SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, "*", "*"); 263 String header = type + "\t" + element + "\t" + (deprecatedElement ? "X" : "") + "\t"; 264 Set<String> usedAttributes = foundAttributes.get(typeElement); 265 Set<String> unusedAttributes = new LinkedHashSet<>(theoryAttributeSet); 266 if (usedAttributes == null) { 267 logln( 268 header 269 + "<NOT-FOUND>\t\t" 270 + siftDeprecated( 271 type, 272 element, 273 unusedAttributes, 274 attributesToTypeElementUsed, 275 false)); 276 continue; 277 } 278 unusedAttributes.removeAll(usedAttributes); 279 logln( 280 header 281 + siftDeprecated( 282 type, 283 element, 284 usedAttributes, 285 attributesToTypeElementUsed, 286 true) 287 + "\t" 288 + siftDeprecated( 289 type, 290 element, 291 unusedAttributes, 292 attributesToTypeElementUsed, 293 false)); 294 } 295 296 logln("Undeprecated Attributes\t"); 297 for (Entry<String, R3<Boolean, DtdType, String>> s : 298 attributesToTypeElementUsed.keyValueSet()) { 299 R3<Boolean, DtdType, String> typeElementUsed = s.getValue(); 300 logln( 301 s.getKey() 302 + "\t" 303 + typeElementUsed.get0() 304 + "\t" 305 + typeElementUsed.get1() 306 + "\t" 307 + typeElementUsed.get2()); 308 } 309 } 310 siftDeprecated( DtdType type, String element, Set<String> attributeSet, Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed, boolean used)311 private String siftDeprecated( 312 DtdType type, 313 String element, 314 Set<String> attributeSet, 315 Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed, 316 boolean used) { 317 StringBuilder b = new StringBuilder(); 318 StringBuilder bdep = new StringBuilder(); 319 for (String attribute : attributeSet) { 320 String attributeName = 321 "«" 322 + attribute 323 + (!"NONE".equals(attribute) 324 && CLDRFile.isDistinguishing(type, element, attribute) 325 ? "*" 326 : "") 327 + "»"; 328 if (!"NONE".equals(attribute) 329 && SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, attribute, "*")) { 330 if (bdep.length() != 0) { 331 bdep.append(" "); 332 } 333 bdep.append(attributeName); 334 } else { 335 if (b.length() != 0) { 336 b.append(" "); 337 } 338 b.append(attributeName); 339 if (!"NONE".equals(attribute)) { 340 attributesToTypeElementUsed.put(attribute, Row.of(used, type, element)); 341 } 342 } 343 } 344 return b.toString() + "\t" + bdep.toString(); 345 } 346 347 class MyErrorHandler implements ErrorHandler { 348 @Override error(SAXParseException exception)349 public void error(SAXParseException exception) throws SAXException { 350 errln("error: " + XMLFileReader.showSAX(exception)); 351 throw exception; 352 } 353 354 @Override fatalError(SAXParseException exception)355 public void fatalError(SAXParseException exception) throws SAXException { 356 errln("fatalError: " + XMLFileReader.showSAX(exception)); 357 throw exception; 358 } 359 360 @Override warning(SAXParseException exception)361 public void warning(SAXParseException exception) throws SAXException { 362 errln("warning: " + XMLFileReader.showSAX(exception)); 363 throw exception; 364 } 365 } 366 367 private class TimingInfo { 368 File file; 369 long nanos; 370 } 371 check(File systemID)372 public TimingInfo check(File systemID) { 373 long start = System.nanoTime(); 374 try (InputStream fis = InputStreamFactory.createInputStream(systemID)) { 375 // FileInputStream fis = new FileInputStream(systemID); 376 XMLReader xmlReader = XMLFileReader.createXMLReader(true); 377 xmlReader.setErrorHandler(new MyErrorHandler()); 378 InputSource is = new InputSource(fis); 379 is.setSystemId(systemID.toString()); 380 DoctypeXmlStreamWrapper.wrap(is); 381 xmlReader.parse(is); 382 // fis.close(); 383 } catch (SAXException | IOException e) { 384 errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + e.getMessage()); 385 } 386 // catch (SAXParseException e) { 387 // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + 388 // e.getMessage()); 389 // } catch (IOException e) { 390 // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + 391 // e.getMessage()); 392 // } 393 TimingInfo timingInfo = new TimingInfo(); 394 timingInfo.nanos = System.nanoTime() - start; 395 timingInfo.file = systemID; 396 return timingInfo; 397 } 398 TestCurrencyFallback()399 public void TestCurrencyFallback() { 400 Factory cldrFactory = testInfo.getCldrFactory(); 401 Set<String> currencies = StandardCodes.make().getAvailableCodes("currency"); 402 403 final UnicodeSet CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS = 404 new UnicodeSet("[[:sc:]-[\\u0000-\\u00FF]]").freeze(); 405 406 CharacterFallbacks fallbacks = CharacterFallbacks.make(); 407 408 for (String locale : cldrFactory.getAvailable()) { 409 if (!StandardCodes.isLocaleAtLeastBasic(locale)) { 410 continue; 411 } 412 CLDRFile file = testInfo.getCLDRFile(locale, false); 413 if (file.isNonInheriting()) continue; 414 415 final UnicodeSet OK_CURRENCY_FALLBACK = 416 new UnicodeSet("[\\u0000-\\u00FF]") 417 .addAll(safeExemplars(file, "")) 418 .addAll(safeExemplars(file, "auxiliary")) 419 .freeze(); 420 UnicodeSet badSoFar = new UnicodeSet(); 421 422 for (Iterator<String> it = file.iterator(); it.hasNext(); ) { 423 String path = it.next(); 424 if (path.endsWith("/alias")) { 425 continue; 426 } 427 String value = file.getStringValue(path); 428 429 // check for special characters 430 if (CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS.containsSome(value)) { 431 XPathParts parts = XPathParts.getFrozenInstance(path); 432 if (!parts.getElement(-1).equals("symbol")) { 433 continue; 434 } 435 // We don't care about fallbacks for narrow currency symbols 436 if ("narrow".equals(parts.getAttributeValue(-1, "alt"))) { 437 continue; 438 } 439 String currencyType = parts.getAttributeValue(-2, "type"); 440 441 UnicodeSet fishy = 442 new UnicodeSet() 443 .addAll(value) 444 .retainAll(CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS) 445 .removeAll(badSoFar); 446 for (UnicodeSetIterator it2 = new UnicodeSetIterator(fishy); it2.next(); ) { 447 final int fishyCodepoint = it2.codepoint; 448 List<String> fallbackList = fallbacks.getSubstitutes(fishyCodepoint); 449 450 String nfkc = Normalizer.normalize(fishyCodepoint, Normalizer.NFKC); 451 if (!nfkc.equals(UTF16.valueOf(fishyCodepoint))) { 452 if (fallbackList == null) { 453 fallbackList = new ArrayList<>(); 454 } else { 455 fallbackList = new ArrayList<>(fallbackList); // writable 456 } 457 fallbackList.add(nfkc); 458 } 459 // later test for all Latin-1 460 if (fallbackList == null) { 461 if (locale.equals("nqo") 462 && logKnownIssue("CLDR-16987", "fishy fallback test")) { 463 continue; 464 } 465 errln( 466 "Locale:\t" 467 + locale 468 + ";\tCharacter with no fallback:\t" 469 + it2.getString() 470 + "\t" 471 + UCharacter.getName(fishyCodepoint)); 472 badSoFar.add(fishyCodepoint); 473 } else { 474 String fallback = null; 475 for (String fb : fallbackList) { 476 if (OK_CURRENCY_FALLBACK.containsAll(fb)) { 477 if (!fb.equals(currencyType) && currencies.contains(fb)) { 478 errln( 479 "Locale:\t" 480 + locale 481 + ";\tCurrency:\t" 482 + currencyType 483 + ";\tFallback converts to different code!:\t" 484 + fb 485 + "\t" 486 + it2.getString() 487 + "\t" 488 + UCharacter.getName(fishyCodepoint)); 489 } 490 if (fallback == null) { 491 fallback = fb; 492 } 493 } 494 } 495 if (fallback == null) { 496 errln( 497 "Locale:\t" 498 + locale 499 + ";\tCharacter with no good fallback (exemplars+Latin1):\t" 500 + it2.getString() 501 + "\t" 502 + UCharacter.getName(fishyCodepoint)); 503 badSoFar.add(fishyCodepoint); 504 } else { 505 logln( 506 "Locale:\t" 507 + locale 508 + ";\tCharacter with good fallback:\t" 509 + it2.getString() 510 + " " 511 + UCharacter.getName(fishyCodepoint) 512 + " => " 513 + fallback); 514 // badSoFar.add(fishyCodepoint); 515 } 516 } 517 } 518 } 519 } 520 } 521 } 522 TestAbstractPaths()523 public void TestAbstractPaths() { 524 Factory cldrFactory = testInfo.getCldrFactory(); 525 CLDRFile english = testInfo.getEnglish(); 526 Map<String, Counter<Level>> abstactPaths = new TreeMap<>(); 527 RegexTransform abstractPathTransform = 528 new RegexTransform(RegexTransform.Processing.ONE_PASS) 529 .add("//ldml/", "") 530 .add("\\[@alt=\"[^\"]*\"\\]", "") 531 .add("=\"[^\"]*\"", "=\"*\"") 532 .add("([^]])\\[", "$1\t[") 533 .add("([^]])/", "$1\t/") 534 .add("/", "\t"); 535 536 for (String locale : getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable()) { 537 CLDRFile file = testInfo.getCLDRFile(locale, resolved); 538 if (file.isNonInheriting()) continue; 539 logln(locale + "\t-\t" + english.getName(locale)); 540 541 for (Iterator<String> it = file.iterator(); it.hasNext(); ) { 542 String path = it.next(); 543 if (path.endsWith("/alias")) { 544 continue; 545 } 546 // collect abstracted paths 547 String abstractPath = abstractPathTransform.transform(path); 548 Level level = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale); 549 if (level == Level.OPTIONAL) { 550 level = Level.COMPREHENSIVE; 551 } 552 Counter<Level> row = abstactPaths.get(abstractPath); 553 if (row == null) { 554 abstactPaths.put(abstractPath, row = new Counter<>()); 555 } 556 row.add(level, 1); 557 } 558 } 559 logln(CldrUtility.LINE_SEPARATOR + "Abstract Paths"); 560 for (Entry<String, Counter<Level>> pathInfo : abstactPaths.entrySet()) { 561 String path = pathInfo.getKey(); 562 Counter<Level> counter = pathInfo.getValue(); 563 logln(counter.getTotal() + "\t" + getCoverage(counter) + "\t" + path); 564 } 565 } 566 getCoverage(Counter<Level> counter)567 private CharSequence getCoverage(Counter<Level> counter) { 568 StringBuilder result = new StringBuilder(); 569 boolean first = true; 570 for (Level level : counter.getKeysetSortedByKey()) { 571 if (first) { 572 first = false; 573 } else { 574 result.append(' '); 575 } 576 result.append("L").append(level.ordinal()).append("=").append(counter.get(level)); 577 } 578 return result; 579 } 580 581 // public void TestCLDRFileCache() { 582 // long start = System.nanoTime(); 583 // Factory cldrFactory = testInfo.getCldrFactory(); 584 // String unusualLocale = "hi"; 585 // CLDRFile file = cldrFactory.make(unusualLocale, true); 586 // long afterOne = System.nanoTime(); 587 // logln("First: " + (afterOne-start)); 588 // CLDRFile file2 = cldrFactory.make(unusualLocale, true); 589 // long afterTwo = System.nanoTime(); 590 // logln("Second: " + (afterTwo-afterOne)); 591 // } 592 // TestPaths()593 public void TestPaths() { 594 Relation<String, String> distinguishing = 595 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 596 Relation<String, String> nonDistinguishing = 597 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class); 598 Factory cldrFactory = testInfo.getCldrFactory(); 599 CLDRFile english = testInfo.getEnglish(); 600 601 Relation<String, String> pathToLocale = 602 Relation.of( 603 new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)), 604 TreeSet.class, 605 null); 606 Set<String> localesToTest = 607 getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable(); 608 for (String locale : localesToTest) { 609 CLDRFile file = testInfo.getCLDRFile(locale, resolved); 610 DtdType dtdType = null; 611 if (file.isNonInheriting()) continue; 612 DisplayAndInputProcessor displayAndInputProcessor = 613 new DisplayAndInputProcessor(file, false); 614 615 logln(locale + "\t-\t" + english.getName(locale)); 616 617 for (Iterator<String> it = file.iterator(); it.hasNext(); ) { 618 String path = it.next(); 619 if (dtdType == null) { 620 dtdType = DtdType.fromPath(path); 621 } 622 623 if (path.endsWith("/alias")) { 624 continue; 625 } 626 String value = file.getStringValue(path); 627 if (value == null) { 628 throw new IllegalArgumentException( 629 locale + "\tError: in null value at " + path); 630 } 631 632 String displayValue = displayAndInputProcessor.processForDisplay(path, value); 633 if (!displayValue.equals(value)) { 634 logln( 635 "\t" 636 + locale 637 + "\tdisplayAndInputProcessor changes display value <" 638 + value 639 + ">\t=>\t<" 640 + displayValue 641 + ">\t\t" 642 + path); 643 } 644 String inputValue = 645 displayAndInputProcessor.processInput( 646 path, displayValue, internalException); 647 if (internalException[0] != null) { 648 errln( 649 "\t" 650 + locale 651 + "\tdisplayAndInputProcessor internal error <" 652 + value 653 + ">\t=>\t<" 654 + inputValue 655 + ">\t\t" 656 + path); 657 internalException[0].printStackTrace(System.out); 658 } 659 if (isVerbose() && !inputValue.equals(value)) { 660 displayAndInputProcessor.processInput(path, value, internalException); // for 661 // debugging 662 logln( 663 "\t" 664 + locale 665 + "\tdisplayAndInputProcessor changes input value <" 666 + value 667 + ">\t=>\t<" 668 + inputValue 669 + ">\t\t" 670 + path); 671 } 672 673 pathToLocale.put(path, locale); 674 675 // also check for non-distinguishing attributes 676 if (path.contains("/identity")) continue; 677 678 String fullPath = file.getFullXPath(path); 679 XPathParts parts = XPathParts.getFrozenInstance(fullPath); 680 for (int i = 0; i < parts.size(); ++i) { 681 if (parts.getAttributeCount(i) == 0) { 682 continue; 683 } 684 String element = parts.getElement(i); 685 for (String attribute : parts.getAttributeKeys(i)) { 686 if (skipAttributes.contains(attribute)) continue; 687 if (CLDRFile.isDistinguishing(dtdType, element, attribute)) { 688 distinguishing.put(element, attribute); 689 } else { 690 nonDistinguishing.put(element, attribute); 691 } 692 } 693 } 694 } 695 } 696 697 if (isVerbose()) { 698 System.out.format( 699 "Distinguishing Elements: %s" + CldrUtility.LINE_SEPARATOR, distinguishing); 700 System.out.format( 701 "Nondistinguishing Elements: %s" + CldrUtility.LINE_SEPARATOR, 702 nonDistinguishing); 703 System.out.format("Skipped %s" + CldrUtility.LINE_SEPARATOR, skipAttributes); 704 } 705 } 706 707 /** The verbose output shows the results of 1..3 \u00a4 signs. */ checkCurrency()708 public void checkCurrency() { 709 Map<String, Set<R2<String, Integer>>> results = 710 new TreeMap<>(Collator.getInstance(ULocale.ENGLISH)); 711 for (ULocale locale : ULocale.getAvailableLocales()) { 712 if (locale.getCountry().length() != 0) { 713 continue; 714 } 715 for (int i = 1; i < 4; ++i) { 716 NumberFormat format = getCurrencyInstance(locale, i); 717 for (Currency c : 718 new Currency[] { 719 Currency.getInstance("USD"), 720 Currency.getInstance("EUR"), 721 Currency.getInstance("INR") 722 }) { 723 format.setCurrency(c); 724 final String formatted = format.format(12345.67); 725 Set<R2<String, Integer>> set = results.get(formatted); 726 if (set == null) { 727 results.put(formatted, set = new TreeSet<>()); 728 } 729 set.add(Row.of(locale.toString(), i)); 730 } 731 } 732 } 733 for (String formatted : results.keySet()) { 734 logln(formatted + "\t" + results.get(formatted)); 735 } 736 } 737 getCurrencyInstance(ULocale locale, int type)738 private static NumberFormat getCurrencyInstance(ULocale locale, int type) { 739 NumberFormat format = NumberFormat.getCurrencyInstance(locale); 740 if (type > 1) { 741 DecimalFormat format2 = (DecimalFormat) format; 742 String pattern = format2.toPattern(); 743 String replacement = "\u00a4\u00a4"; 744 for (int i = 2; i < type; ++i) { 745 replacement += "\u00a4"; 746 } 747 pattern = pattern.replace("\u00a4", replacement); 748 format2.applyPattern(pattern); 749 } 750 return format; 751 } 752 safeExemplars(CLDRFile file, String string)753 private UnicodeSet safeExemplars(CLDRFile file, String string) { 754 final UnicodeSet result = file.getExemplarSet(string, WinningChoice.NORMAL); 755 return result != null ? result : new UnicodeSet(); 756 } 757 TestAPath()758 public void TestAPath() { 759 // <month type="1">1</month> 760 String path = 761 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]"; 762 CLDRFile root = testInfo.getRoot(); 763 logln("path: " + path); 764 String fullpath = root.getFullXPath(path); 765 logln("fullpath: " + fullpath); 766 String value = root.getStringValue(path); 767 logln("value: " + value); 768 Status status = new Status(); 769 String source = root.getSourceLocaleID(path, status); 770 logln("locale: " + source); 771 logln("status: " + status); 772 } 773 TestDefaultContents()774 public void TestDefaultContents() { 775 Set<String> defaultContents = Inheritance.defaultContents; 776 Multimap<String, String> parentToChildren = Inheritance.parentToChildren; 777 778 // Put a list of locales that should be default content here. 779 final String expectDC[] = { 780 "os_GE" // see CLDR-14118 781 }; 782 for (final String locale : expectDC) { 783 assertTrue( 784 "expect " + locale + " to be a default content locale", 785 defaultContents.contains(locale)); 786 } 787 788 if (DEBUG) { 789 Inheritance.showChain("", "", "root"); 790 } 791 792 for (String locale : defaultContents) { 793 CLDRFile cldrFile; 794 try { 795 cldrFile = testInfo.getCLDRFile(locale, false); 796 } catch (RuntimeException e) { 797 logln("Can't open default content file:\t" + locale); 798 continue; 799 } 800 // we check that the default content locale is always empty 801 for (Iterator<String> it = cldrFile.iterator(); it.hasNext(); ) { 802 String path = it.next(); 803 if (path.contains("/identity")) { 804 continue; 805 } 806 errln("Default content file not empty:\t" + locale); 807 showDifferences(locale); 808 break; 809 } 810 } 811 812 // check that if a locale has any children, that exactly one of them is 813 // the default content. Ignore locales with variants 814 815 for (Entry<String, Collection<String>> localeAndKids : 816 parentToChildren.asMap().entrySet()) { 817 String locale = localeAndKids.getKey(); 818 if (locale.equals("root")) { 819 continue; 820 } 821 822 Collection<String> rawChildren = localeAndKids.getValue(); 823 824 // remove variant children 825 Set<String> children = new LinkedHashSet<>(); 826 for (String child : rawChildren) { 827 if (new LocaleIDParser().set(child).getVariants().length == 0) { 828 children.add(child); 829 } 830 } 831 if (children.isEmpty()) { 832 continue; 833 } 834 835 Set<String> defaultContentChildren = new LinkedHashSet<>(children); 836 defaultContentChildren.retainAll(defaultContents); 837 if (defaultContentChildren.size() == 1) { 838 continue; 839 // If we're already down to the region level then it's OK not to have 840 // default contents. 841 } else if (!new LocaleIDParser().set(locale).getRegion().isEmpty()) { 842 continue; 843 } else if (defaultContentChildren.isEmpty()) { 844 Object possible = highestShared(locale, children); 845 errln( 846 "Locale has children but is missing default contents locale: " 847 + locale 848 + ", children: " 849 + children 850 + "; possible fixes for children:\n" 851 + possible); 852 } else { 853 errln( 854 "Locale has too many defaultContent locales!!: " 855 + locale 856 + ", defaultContents: " 857 + defaultContentChildren); 858 } 859 } 860 861 // check that each default content locale is likely-subtag equivalent to 862 // its parent. 863 864 for (String locale : defaultContents) { 865 String maxLocale = LikelySubtags.maximize(locale, likelyData); 866 String localeParent = LocaleIDParser.getParent(locale); 867 String maxLocaleParent = LikelySubtags.maximize(localeParent, likelyData); 868 if (locale.equals("ar_001") || locale.equals("nb")) { 869 logln( 870 "Known exception to likelyMax(locale=" 871 + locale 872 + ")" 873 + " == " 874 + "likelyMax(defaultContent=" 875 + localeParent 876 + ")"); 877 continue; 878 } 879 assertEquals( 880 "likelyMax(locale=" 881 + locale 882 + ")" 883 + " == " 884 + "likelyMax(defaultContent=" 885 + localeParent 886 + ")", 887 maxLocaleParent, 888 maxLocale); 889 } 890 } 891 highestShared(String parent, Set<String> children)892 private String highestShared(String parent, Set<String> children) { 893 M4<PathHeader, String, String, Boolean> data = 894 ChainedMap.of( 895 new TreeMap<PathHeader, Object>(), 896 new TreeMap<String, Object>(), 897 new TreeMap<String, Object>(), 898 Boolean.class); 899 CLDRFile parentFile = testInfo.getCLDRFile(parent, true); 900 PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish()); 901 for (String child : children) { 902 CLDRFile cldrFile = testInfo.getCLDRFile(child, false); 903 for (String path : cldrFile) { 904 if (path.contains("/identity")) { 905 continue; 906 } 907 if (path.contains("provisional") || path.contains("unconfirmed")) { 908 continue; 909 } 910 String value = cldrFile.getStringValue(path); 911 // double-check 912 String parentValue = parentFile.getStringValue(path); 913 if (value.equals(parentValue)) { 914 continue; 915 } 916 PathHeader ph = phf.fromPath(path); 917 data.put(ph, value, child, Boolean.TRUE); 918 data.put(ph, parentValue == null ? "∅∅∅" : parentValue, child, Boolean.TRUE); 919 } 920 } 921 StringBuilder result = new StringBuilder(); 922 for (Entry<PathHeader, Map<String, Map<String, Boolean>>> entry : data) { 923 for (Entry<String, Map<String, Boolean>> item : entry.getValue().entrySet()) { 924 result.append("\n") 925 .append(entry.getKey()) 926 .append("\t") 927 .append(item.getKey() + "\t" + item.getValue().keySet()); 928 } 929 } 930 return result.toString(); 931 } 932 933 public static class Inheritance { 934 public static final Set<String> defaultContents = 935 SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales(); 936 public static final Multimap<String, String> parentToChildren; 937 938 static { 939 Multimap<String, String> _parentToChildren = TreeMultimap.create(); 940 for (String child : testInfo.getCldrFactory().getAvailable()) { 941 if (child.equals("root")) { 942 continue; 943 } 944 String localeParent = LocaleIDParser.getParent(child); _parentToChildren.put(localeParent, child)945 _parentToChildren.put(localeParent, child); 946 } 947 parentToChildren = ImmutableMultimap.copyOf(_parentToChildren); 948 } 949 showChain(String prefix, String gparent, String current)950 public static void showChain(String prefix, String gparent, String current) { 951 Collection<String> children = parentToChildren.get(current); 952 if (children == null) { 953 throw new IllegalArgumentException(); 954 } 955 prefix += 956 current 957 + (defaultContents.contains(current) ? "*" : "") 958 + (isLikelyEquivalent(gparent, current) ? "~" : "") 959 + "\t"; 960 961 // find leaves 962 Set<String> parents = new LinkedHashSet<>(children); 963 parents.retainAll(parentToChildren.keySet()); 964 Set<String> leaves = new LinkedHashSet<>(children); 965 leaves.removeAll(parentToChildren.keySet()); 966 if (!leaves.isEmpty()) { 967 List<String> presentation = new ArrayList<>(); 968 boolean gotDc = false; 969 for (String s : leaves) { 970 String shown = s; 971 if (isLikelyEquivalent(current, s)) { 972 shown += "~"; 973 } 974 if (defaultContents.contains(s)) { 975 gotDc = true; 976 shown += "*"; 977 } 978 if (!shown.equals(s)) { 979 presentation.add(0, shown); 980 } else { 981 presentation.add(shown); 982 } 983 } 984 if (!gotDc) { 985 int debug = 0; 986 } 987 if (leaves.size() == 1) { 988 System.out.println(prefix + Joiner.on(" ").join(presentation)); 989 } else { 990 System.out.println(prefix + "{" + Joiner.on(" ").join(presentation) + "}"); 991 } 992 } 993 for (String parent : parents) { 994 showChain(prefix, current, parent); 995 } 996 } 997 isLikelyEquivalent(String locale1, String locale2)998 static boolean isLikelyEquivalent(String locale1, String locale2) { 999 if (locale1.equals(locale2)) { 1000 return true; 1001 } 1002 try { 1003 String maxLocale1 = LikelySubtags.maximize(locale1, likelyData); 1004 String maxLocale2 = LikelySubtags.maximize(locale2, likelyData); 1005 return maxLocale1 != null && Objects.equal(maxLocale1, maxLocale2); 1006 } catch (Exception e) { 1007 return false; 1008 } 1009 } 1010 } 1011 1012 static final Map<String, String> likelyData = SUPPLEMENTAL_DATA_INFO.getLikelySubtags(); 1013 1014 private static final EnumSet<CldrVersion> badLdmlICUVersions = 1015 EnumSet.of( 1016 CldrVersion.v1_1_1, CldrVersion.v1_2, CldrVersion.v1_4_1, CldrVersion.v1_5_1); 1017 TestLikelySubtagsComplete()1018 public void TestLikelySubtagsComplete() { 1019 LanguageTagParser ltp = new LanguageTagParser(); 1020 for (String locale : testInfo.getCldrFactory().getAvailable()) { 1021 if (locale.equals("root")) { 1022 continue; 1023 } 1024 String maxLocale = LikelySubtags.maximize(locale, likelyData); 1025 if (maxLocale == null) { 1026 errln("Locale missing likely subtag: " + locale); 1027 continue; 1028 } 1029 ltp.set(maxLocale); 1030 if (ltp.getLanguage().isEmpty() 1031 || ltp.getScript().isEmpty() 1032 || ltp.getRegion().isEmpty()) { 1033 errln("Locale has defective likely subtag: " + locale + " => " + maxLocale); 1034 } 1035 } 1036 } 1037 showDifferences(String locale)1038 private void showDifferences(String locale) { 1039 CLDRFile cldrFile = testInfo.getCLDRFile(locale, false); 1040 final String localeParent = LocaleIDParser.getParent(locale); 1041 CLDRFile parentFile = testInfo.getCLDRFile(localeParent, true); 1042 int funnyCount = 0; 1043 for (Iterator<String> it = cldrFile.iterator("", cldrFile.getComparator()); 1044 it.hasNext(); ) { 1045 String path = it.next(); 1046 if (path.contains("/identity")) { 1047 continue; 1048 } 1049 final String fullXPath = cldrFile.getFullXPath(path); 1050 if (fullXPath.contains("[@draft=\"unconfirmed\"]") 1051 || fullXPath.contains("[@draft=\"provisional\"]")) { 1052 funnyCount++; 1053 continue; 1054 } 1055 logln("\tpath:\t" + path); 1056 logln("\t\t" + locale + " value:\t<" + cldrFile.getStringValue(path) + ">"); 1057 final String parentFullPath = parentFile.getFullXPath(path); 1058 logln("\t\t" + localeParent + " value:\t<" + parentFile.getStringValue(path) + ">"); 1059 logln("\t\t" + locale + " fullpath:\t" + fullXPath); 1060 logln("\t\t" + localeParent + " fullpath:\t" + parentFullPath); 1061 } 1062 logln("\tCount of non-approved:\t" + funnyCount); 1063 } 1064 1065 enum MissingType { 1066 plurals, 1067 main_exemplars, 1068 no_main, 1069 collation, 1070 index_exemplars, 1071 punct_exemplars 1072 } 1073 TestCoreData()1074 public void TestCoreData() { 1075 Set<String> availableLanguages = testInfo.getCldrFactory().getAvailableLanguages(); 1076 PluralInfo rootRules = SUPPLEMENTAL_DATA_INFO.getPlurals(PluralType.cardinal, "root"); 1077 Multimap<MissingType, Comparable> errors = TreeMultimap.create(); 1078 errors.put(MissingType.collation, "?"); 1079 1080 Multimap<MissingType, Comparable> warnings = TreeMultimap.create(); 1081 warnings.put(MissingType.collation, "?"); 1082 warnings.put(MissingType.index_exemplars, "?"); 1083 warnings.put(MissingType.punct_exemplars, "?"); 1084 1085 Set<String> collations = new HashSet<>(); 1086 1087 // collect collation info 1088 Factory collationFactory = 1089 Factory.make(CLDRPaths.COLLATION_DIRECTORY, ".*", DraftStatus.contributed); 1090 for (String localeID : collationFactory.getAvailable()) { 1091 if (isTopLevel(localeID)) { 1092 collations.add(localeID); 1093 } 1094 } 1095 logln(collations.toString()); 1096 1097 Set<String> allLanguages = 1098 Builder.with(new TreeSet<String>()) 1099 .addAll(collations) 1100 .addAll(availableLanguages) 1101 .freeze(); 1102 1103 for (String localeID : allLanguages) { 1104 if (localeID.equals("root")) { 1105 continue; // skip script locales 1106 } 1107 if (!isTopLevel(localeID)) { 1108 continue; 1109 } 1110 if (!StandardCodes.isLocaleAtLeastBasic(localeID)) { 1111 continue; 1112 } 1113 errors.clear(); 1114 warnings.clear(); 1115 1116 String name = 1117 "Locale:" + localeID + " (" + testInfo.getEnglish().getName(localeID) + ")"; 1118 1119 if (!collations.contains(localeID)) { 1120 warnings.put(MissingType.collation, "missing"); 1121 logln(name + " is missing " + MissingType.collation.toString()); 1122 } 1123 1124 try { 1125 CLDRFile cldrFile = 1126 testInfo.getCldrFactory().make(localeID, false, DraftStatus.contributed); 1127 1128 String wholeFileAlias = cldrFile.getStringValue("//ldml/alias"); 1129 if (wholeFileAlias != null) { 1130 logln("Whole-file alias:" + name); 1131 continue; 1132 } 1133 1134 PluralInfo pluralInfo = 1135 SUPPLEMENTAL_DATA_INFO.getPlurals(PluralType.cardinal, localeID); 1136 if (pluralInfo == rootRules) { 1137 logln(name + " is missing " + MissingType.plurals.toString()); 1138 warnings.put(MissingType.plurals, "missing"); 1139 } 1140 UnicodeSet main = cldrFile.getExemplarSet("", WinningChoice.WINNING); 1141 if (main == null || main.isEmpty()) { 1142 errln(" " + name + " is missing " + MissingType.main_exemplars.toString()); 1143 errors.put(MissingType.main_exemplars, "missing"); 1144 } 1145 UnicodeSet index = cldrFile.getExemplarSet("index", WinningChoice.WINNING); 1146 if (index == null || index.isEmpty()) { 1147 logln(name + " is missing " + MissingType.index_exemplars.toString()); 1148 warnings.put(MissingType.index_exemplars, "missing"); 1149 } 1150 UnicodeSet punctuation = 1151 cldrFile.getExemplarSet("punctuation", WinningChoice.WINNING); 1152 if (punctuation == null || punctuation.isEmpty()) { 1153 logln(name + " is missing " + MissingType.punct_exemplars.toString()); 1154 warnings.put(MissingType.punct_exemplars, "missing"); 1155 } 1156 } catch (Exception e) { 1157 StringWriter x = new StringWriter(); 1158 PrintWriter pw = new PrintWriter(x); 1159 e.printStackTrace(pw); 1160 pw.flush(); 1161 errln(" " + name + " is missing main locale data." + x); 1162 errors.put(MissingType.no_main, x.toString()); 1163 } 1164 1165 // report errors 1166 1167 if (errors.isEmpty() && warnings.isEmpty()) { 1168 logln(name + ": No problems..."); 1169 } 1170 } 1171 } 1172 isTopLevel(String localeID)1173 private boolean isTopLevel(String localeID) { 1174 return "root".equals(LocaleIDParser.getParent(localeID)); 1175 } 1176 1177 /** Tests that every dtd item is connected from root */ TestDtdCompleteness()1178 public void TestDtdCompleteness() { 1179 for (DtdType type : DtdType.values()) { 1180 if (type.getStatus() != DtdType.DtdStatus.active) { 1181 continue; 1182 } 1183 DtdData dtdData = DtdData.getInstance(type); 1184 Set<Element> descendents = new LinkedHashSet<>(); 1185 dtdData.getDescendents(dtdData.ROOT, descendents); 1186 Set<Element> elements = dtdData.getElements(); 1187 if (!elements.equals(descendents)) { 1188 for (Element e : elements) { 1189 if (!descendents.contains(e) 1190 && !e.equals(dtdData.PCDATA) 1191 && !e.equals(dtdData.ANY)) { 1192 errln(type + ": Element " + e + " not contained in descendents of ROOT."); 1193 } 1194 } 1195 for (Element e : descendents) { 1196 if (!elements.contains(e)) { 1197 errln(type + ": Element " + e + ", descendent of ROOT, not in elements."); 1198 } 1199 } 1200 } 1201 LinkedHashSet<Element> all = new LinkedHashSet<>(descendents); 1202 all.addAll(elements); 1203 Set<Attribute> attributes = dtdData.getAttributes(); 1204 for (Attribute a : attributes) { 1205 if (!elements.contains(a.element)) { 1206 errln(type + ": Attribute " + a + " isn't for any element."); 1207 } 1208 } 1209 } 1210 } 1211 TestBasicDTDCompatibility()1212 public void TestBasicDTDCompatibility() { 1213 1214 if (!TestCLDRPaths.canUseArchiveDirectory()) { 1215 return; 1216 } 1217 1218 final String oldCommon = CldrVersion.LAST_RELEASE_VERSION.getBaseDirectory() + "/common"; 1219 1220 // set up exceptions 1221 Set<String> changedToEmpty = 1222 new HashSet<>( 1223 Arrays.asList( 1224 new String[] { 1225 "version", 1226 "languageCoverage", 1227 "scriptCoverage", 1228 "territoryCoverage", 1229 "currencyCoverage", 1230 "timezoneCoverage", 1231 "skipDefaultLocale" 1232 })); 1233 Set<String> PCDATA = new HashSet<>(); 1234 PCDATA.add("PCDATA"); 1235 Set<String> EMPTY = new HashSet<>(); 1236 EMPTY.add("EMPTY"); 1237 Set<String> VERSION = new HashSet<>(); 1238 VERSION.add("version"); 1239 1240 // test all DTDs 1241 for (DtdType dtd : DtdType.values()) { 1242 if (dtd.getStatus() != DtdType.DtdStatus.active) { 1243 continue; 1244 } 1245 if (dtd.firstVersion != null 1246 && CldrVersion.LAST_RELEASE_VERSION.isOlderThan( 1247 CldrVersion.from(dtd.firstVersion))) { 1248 continue; // DTD didn't exist in last release 1249 } 1250 if (dtd == DtdType.ldmlICU) continue; 1251 try { 1252 ElementAttributeInfo oldDtd = ElementAttributeInfo.getInstance(oldCommon, dtd); 1253 ElementAttributeInfo newDtd = ElementAttributeInfo.getInstance(dtd); 1254 1255 if (oldDtd == newDtd) { 1256 continue; 1257 } 1258 Relation<String, String> oldElement2Children = oldDtd.getElement2Children(); 1259 Relation<String, String> newElement2Children = newDtd.getElement2Children(); 1260 1261 Relation<String, String> oldElement2Attributes = oldDtd.getElement2Attributes(); 1262 Relation<String, String> newElement2Attributes = newDtd.getElement2Attributes(); 1263 1264 for (String element : oldElement2Children.keySet()) { 1265 Set<String> oldChildren = oldElement2Children.getAll(element); 1266 Set<String> newChildren = newElement2Children.getAll(element); 1267 if (newChildren == null) { 1268 if (!knownElementExceptions.contains(Pair.of(dtd.toString(), element))) { 1269 errln("Old " + dtd + " contains element not in new: <" + element + ">"); 1270 } 1271 continue; 1272 } 1273 Set<String> funny = containsInOrder(newChildren, oldChildren); 1274 if (funny != null) { 1275 if (changedToEmpty.contains(element) 1276 && oldChildren.equals(PCDATA) 1277 && newChildren.equals(EMPTY)) { 1278 // ok, skip 1279 } else { 1280 errln( 1281 "Old " 1282 + dtd 1283 + " element <" 1284 + element 1285 + "> has children Missing/Misordered:\t" 1286 + funny 1287 + "\n\t\tOld:\t" 1288 + oldChildren 1289 + "\n\t\tNew:\t" 1290 + newChildren); 1291 } 1292 } 1293 1294 Set<String> oldAttributes = oldElement2Attributes.getAll(element); 1295 if (oldAttributes == null) { 1296 oldAttributes = Collections.emptySet(); 1297 } 1298 Set<String> newAttributes = newElement2Attributes.getAll(element); 1299 if (newAttributes == null) { 1300 newAttributes = Collections.emptySet(); 1301 } 1302 if (!newAttributes.containsAll(oldAttributes)) { 1303 LinkedHashSet<String> missing = new LinkedHashSet<>(oldAttributes); 1304 missing.removeAll(newAttributes); 1305 if (element.equals(dtd.toString()) && missing.equals(VERSION)) { 1306 // ok, skip 1307 } else { 1308 errln( 1309 "Old " 1310 + dtd 1311 + " element <" 1312 + element 1313 + "> has attributes Missing:\t" 1314 + missing 1315 + "\n\t\tOld:\t" 1316 + oldAttributes 1317 + "\n\t\tNew:\t" 1318 + newAttributes); 1319 } 1320 } 1321 } 1322 } catch (Exception e) { 1323 e.printStackTrace(); 1324 errln("Failure with " + dtd); 1325 } 1326 } 1327 } 1328 containsInOrder(Set<T> superset, Set<T> subset)1329 private <T> Set<T> containsInOrder(Set<T> superset, Set<T> subset) { 1330 if (!superset.containsAll(subset)) { 1331 LinkedHashSet<T> missing = new LinkedHashSet<>(subset); 1332 missing.removeAll(superset); 1333 return missing; 1334 } 1335 // ok, we know that they are subsets, try order 1336 Set<T> result = null; 1337 DiscreteComparator<T> comp = 1338 new DiscreteComparator.Builder<T>(Ordering.ARBITRARY).add(superset).get(); 1339 T last = null; 1340 for (T item : subset) { 1341 if (last != null) { 1342 int order = comp.compare(last, item); 1343 if (order != -1) { 1344 if (result == null) { 1345 result = new HashSet<>(); 1346 result.add(last); 1347 result.add(item); 1348 } 1349 } 1350 } 1351 last = item; 1352 } 1353 return result; 1354 } 1355 TestDtdCompatibility()1356 public void TestDtdCompatibility() { 1357 1358 for (DtdType type : DtdType.values()) { 1359 if (type.getStatus() != DtdType.DtdStatus.active) { 1360 continue; 1361 } 1362 DtdData dtdData = DtdData.getInstance(type); 1363 Map<String, Element> currentElementFromName = dtdData.getElementFromName(); 1364 1365 // current has no orphan 1366 Set<Element> orphans = new LinkedHashSet<>(dtdData.getElementFromName().values()); 1367 orphans.remove(dtdData.ROOT); 1368 orphans.remove(dtdData.PCDATA); 1369 orphans.remove(dtdData.ANY); 1370 Set<String> elementsWithoutAlt = new TreeSet<>(); 1371 Set<String> elementsWithoutDraft = new TreeSet<>(); 1372 Set<String> elementsWithoutAlias = new TreeSet<>(); 1373 Set<String> elementsWithoutSpecial = new TreeSet<>(); 1374 1375 for (Element element : dtdData.getElementFromName().values()) { 1376 Set<Element> children = element.getChildren().keySet(); 1377 orphans.removeAll(children); 1378 if (type == DtdType.ldml 1379 && !SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element.name, "*", "*")) { 1380 if (element.getType() == ElementType.PCDATA) { 1381 if (element.getAttributeNamed("alt") == null) { 1382 elementsWithoutAlt.add(element.name); 1383 } 1384 if (element.getAttributeNamed("draft") == null) { 1385 elementsWithoutDraft.add(element.name); 1386 } 1387 } else { 1388 if (children.size() != 0 && !"alias".equals(element.name)) { 1389 if (element.getChildNamed("alias") == null) { 1390 elementsWithoutAlias.add(element.name); 1391 } 1392 if (element.getChildNamed("special") == null) { 1393 elementsWithoutSpecial.add(element.name); 1394 } 1395 } 1396 } 1397 } 1398 } 1399 assertEquals( 1400 type + " DTD Must not have orphan elements", Collections.EMPTY_SET, orphans); 1401 assertEquals( 1402 type + " DTD elements with PCDATA must have 'alt' attributes", 1403 Collections.EMPTY_SET, 1404 elementsWithoutAlt); 1405 assertEquals( 1406 type + " DTD elements with PCDATA must have 'draft' attributes", 1407 Collections.EMPTY_SET, 1408 elementsWithoutDraft); 1409 assertEquals( 1410 type + " DTD elements with children must have 'alias' elements", 1411 Collections.EMPTY_SET, 1412 elementsWithoutAlias); 1413 assertEquals( 1414 type + " DTD elements with children must have 'special' elements", 1415 Collections.EMPTY_SET, 1416 elementsWithoutSpecial); 1417 1418 if (!TestCLDRPaths.canUseArchiveDirectory()) { 1419 return; 1420 } 1421 1422 for (CldrVersion version : CldrVersion.CLDR_VERSIONS_DESCENDING) { 1423 if (version == CldrVersion.unknown || version == CldrVersion.baseline) { 1424 continue; 1425 } 1426 if (type.getStatus() != DtdStatus.active) { 1427 continue; // not active 1428 } 1429 if (type.firstVersion != null 1430 && version.isOlderThan(CldrVersion.from(type.firstVersion))) { 1431 continue; // didn't exist at that point 1432 } 1433 DtdData dtdDataOld; 1434 try { 1435 dtdDataOld = DtdData.getInstance(type, version.toString()); 1436 } catch (IllegalArgumentException e) { 1437 boolean tooOld = false; 1438 switch (type) { 1439 case ldmlICU: 1440 tooOld = badLdmlICUVersions.contains(version); 1441 break; 1442 case ldmlBCP47: 1443 case keyboard3: 1444 if (type.firstVersion != null) { 1445 tooOld = version.isOlderThan(CldrVersion.from(type.firstVersion)); 1446 } 1447 break; 1448 default: 1449 break; 1450 } 1451 if (tooOld) { 1452 continue; 1453 } else { 1454 errln( 1455 "v" 1456 + version 1457 + ": " 1458 + e.getClass().getSimpleName() 1459 + ", " 1460 + e.getMessage()); 1461 continue; 1462 } 1463 } 1464 // verify that if E is in dtdDataOld, then it is in dtdData, and 1465 // has at least the same children and attributes 1466 for (Entry<String, Element> entry : dtdDataOld.getElementFromName().entrySet()) { 1467 Element oldElement = entry.getValue(); 1468 Element newElement = currentElementFromName.get(entry.getKey()); 1469 if (knownElementExceptions.contains( 1470 Pair.of(type.toString(), oldElement.getName()))) { 1471 continue; 1472 } 1473 if (assertNotNull( 1474 type 1475 + " DTD for trunk must be superset of v" 1476 + version 1477 + ", and must contain «" 1478 + oldElement.getName() 1479 + "»", 1480 newElement)) { 1481 // TODO Check order also 1482 for (Element oldChild : oldElement.getChildren().keySet()) { 1483 if (oldChild == null) { 1484 continue; 1485 } 1486 Element newChild = newElement.getChildNamed(oldChild.getName()); 1487 // skip certain items 1488 if (version.isOlderThan(CldrVersion.v1_6_1) 1489 && newElement.getName().equals("zone") 1490 && oldChild.getName().equals("usesMetazone")) { 1491 if (logKnownIssue( 1492 "CLDR-17054", 1493 "Breakage with items older than 1.6.1: " 1494 + newElement.getName() 1495 + " / " 1496 + oldChild.getName())) { 1497 continue; 1498 } 1499 } 1500 1501 if (knownChildExceptions.contains( 1502 Pair.of(newElement.getName(), oldChild.getName()))) { 1503 continue; 1504 } 1505 assertNotNull( 1506 type 1507 + " DTD - Trunk children of «" 1508 + newElement.getName() 1509 + "» must be superset of v" 1510 + version 1511 + ", and must contain «" 1512 + oldChild.getName() 1513 + "»", 1514 newChild); 1515 } 1516 for (Attribute oldAttribute : oldElement.getAttributes().keySet()) { 1517 Attribute newAttribute = 1518 newElement.getAttributeNamed(oldAttribute.getName()); 1519 1520 if (knownAttributeExceptions.contains( 1521 Pair.of(newElement.getName(), oldAttribute.getName()))) { 1522 continue; 1523 } 1524 assertNotNull( 1525 type 1526 + " DTD - Trunk attributes of «" 1527 + newElement.getName() 1528 + "» must be superset of v" 1529 + version 1530 + ", and must contain «" 1531 + oldAttribute.getName() 1532 + "»", 1533 newAttribute); 1534 } 1535 } 1536 } 1537 } 1538 } 1539 } 1540 1541 /** Compare each path to each other path for every single file in CLDR */ TestDtdComparison()1542 public void TestDtdComparison() { 1543 // try some simple paths for regression 1544 1545 sortPaths( 1546 DtdData.getInstance(DtdType.ldml).getDtdComparator(null), 1547 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/dateTimeFormatLength[@type=\"full\"]/dateTimeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1548 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats"); 1549 1550 sortPaths( 1551 DtdData.getInstance(DtdType.supplementalData).getDtdComparator(null), 1552 "//supplementalData/territoryContainment/group[@type=\"419\"][@contains=\"013 029 005\"][@grouping=\"true\"]", 1553 "//supplementalData/territoryContainment/group[@type=\"003\"][@contains=\"021 013 029\"][@grouping=\"true\"]"); 1554 } 1555 TestDtdComparisonsAll()1556 public void TestDtdComparisonsAll() { 1557 if (getInclusion() <= 5) { // Only run this test in exhaustive mode. 1558 return; 1559 } 1560 for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith(".xml")) { 1561 if (file.getParentFile().getName().equals("import") 1562 && file.getParentFile().getParentFile().getName().equals("keyboards")) { 1563 return; // skip imports 1564 } 1565 checkDtdComparatorFor(file, null); 1566 } 1567 } 1568 checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType)1569 public void checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType) { 1570 MyHandler myHandler = new MyHandler(overrideDtdType); 1571 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 1572 try { 1573 myHandler.fileName = fileToRead; 1574 xfr.read(myHandler.fileName, TestBasic.class, -1, true); 1575 logln(myHandler.fileName); 1576 } catch (Exception e) { 1577 Throwable t = e; 1578 StringBuilder b = new StringBuilder(); 1579 String indent = ""; 1580 while (t != null) { 1581 b.append(indent).append(t.getMessage()); 1582 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t"; 1583 t = t.getCause(); 1584 } 1585 errln(b.toString()); 1586 return; 1587 } 1588 DtdData dtdData = DtdData.getInstance(myHandler.dtdType); 1589 sortPaths(dtdData.getDtdComparator(null), myHandler.data); 1590 } 1591 checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType)1592 public void checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType) { 1593 MyHandler myHandler = new MyHandler(overrideDtdType); 1594 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 1595 try { 1596 myHandler.fileName = PathUtilities.getNormalizedPathString(fileToRead); 1597 xfr.read(myHandler.fileName, -1, true); 1598 logln(myHandler.fileName); 1599 } catch (Exception e) { 1600 e.printStackTrace(); 1601 Throwable t = e; 1602 StringBuilder b = new StringBuilder(); 1603 String indent = ""; 1604 while (t != null) { 1605 b.append(indent).append(t.getMessage()); 1606 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t"; 1607 t = t.getCause(); 1608 } 1609 errln(b.toString()); 1610 return; 1611 } 1612 DtdData dtdData = DtdData.getInstance(myHandler.dtdType); 1613 sortPaths(dtdData.getDtdComparator(null), myHandler.data); 1614 } 1615 1616 static class MyHandler extends XMLFileReader.SimpleHandler { 1617 private String fileName; 1618 private DtdType dtdType; 1619 private final Set<String> data = new LinkedHashSet<>(); 1620 MyHandler(DtdType overrideDtdType)1621 public MyHandler(DtdType overrideDtdType) { 1622 dtdType = overrideDtdType; 1623 } 1624 1625 @Override handlePathValue(String path, @SuppressWarnings("unused") String value)1626 public void handlePathValue(String path, @SuppressWarnings("unused") String value) { 1627 if (dtdType == null) { 1628 try { 1629 dtdType = DtdType.fromPath(path); 1630 } catch (Exception e) { 1631 throw new IllegalArgumentException("Can't read " + fileName, e); 1632 } 1633 } 1634 data.add(path); 1635 } 1636 } 1637 sortPaths(Comparator<String> dc, Collection<String> paths)1638 public void sortPaths(Comparator<String> dc, Collection<String> paths) { 1639 String[] array = paths.toArray(new String[paths.size()]); 1640 sortPaths(dc, array); 1641 } 1642 sortPaths(Comparator<String> dc, String... array)1643 public void sortPaths(Comparator<String> dc, String... array) { 1644 Arrays.sort(array, 0, array.length, dc); 1645 } 1646 // public void TestNewDtdData() moved to TestDtdData 1647 } 1648