xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestBasic.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Objects;
5 import com.google.common.collect.ImmutableMultimap;
6 import com.google.common.collect.ImmutableSet;
7 import com.google.common.collect.Multimap;
8 import com.google.common.collect.TreeMultimap;
9 import com.ibm.icu.impl.Relation;
10 import com.ibm.icu.impl.Row;
11 import com.ibm.icu.impl.Row.R2;
12 import com.ibm.icu.impl.Row.R3;
13 import com.ibm.icu.impl.Utility;
14 import com.ibm.icu.lang.UCharacter;
15 import com.ibm.icu.text.Collator;
16 import com.ibm.icu.text.DecimalFormat;
17 import com.ibm.icu.text.Normalizer;
18 import com.ibm.icu.text.NumberFormat;
19 import com.ibm.icu.text.UTF16;
20 import com.ibm.icu.text.UnicodeSet;
21 import com.ibm.icu.text.UnicodeSetIterator;
22 import com.ibm.icu.util.Currency;
23 import com.ibm.icu.util.ULocale;
24 import java.io.File;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.PrintWriter;
28 import java.io.StringWriter;
29 import java.util.ArrayList;
30 import java.util.Arrays;
31 import java.util.Collection;
32 import java.util.Collections;
33 import java.util.Comparator;
34 import java.util.EnumSet;
35 import java.util.HashSet;
36 import java.util.Iterator;
37 import java.util.LinkedHashSet;
38 import java.util.List;
39 import java.util.Map;
40 import java.util.Map.Entry;
41 import java.util.Set;
42 import java.util.TreeMap;
43 import java.util.TreeSet;
44 import org.unicode.cldr.test.DisplayAndInputProcessor;
45 import org.unicode.cldr.tool.CldrVersion;
46 import org.unicode.cldr.tool.LikelySubtags;
47 import org.unicode.cldr.util.Builder;
48 import org.unicode.cldr.util.CLDRConfig;
49 import org.unicode.cldr.util.CLDRFile;
50 import org.unicode.cldr.util.CLDRFile.DraftStatus;
51 import org.unicode.cldr.util.CLDRFile.Status;
52 import org.unicode.cldr.util.CLDRFile.WinningChoice;
53 import org.unicode.cldr.util.CLDRPaths;
54 import org.unicode.cldr.util.ChainedMap;
55 import org.unicode.cldr.util.ChainedMap.M4;
56 import org.unicode.cldr.util.CharacterFallbacks;
57 import org.unicode.cldr.util.CldrUtility;
58 import org.unicode.cldr.util.Counter;
59 import org.unicode.cldr.util.DiscreteComparator;
60 import org.unicode.cldr.util.DiscreteComparator.Ordering;
61 import org.unicode.cldr.util.DoctypeXmlStreamWrapper;
62 import org.unicode.cldr.util.DtdData;
63 import org.unicode.cldr.util.DtdData.Attribute;
64 import org.unicode.cldr.util.DtdData.Element;
65 import org.unicode.cldr.util.DtdData.ElementType;
66 import org.unicode.cldr.util.DtdType;
67 import org.unicode.cldr.util.DtdType.DtdStatus;
68 import org.unicode.cldr.util.ElementAttributeInfo;
69 import org.unicode.cldr.util.Factory;
70 import org.unicode.cldr.util.InputStreamFactory;
71 import org.unicode.cldr.util.LanguageTagParser;
72 import org.unicode.cldr.util.Level;
73 import org.unicode.cldr.util.LocaleIDParser;
74 import org.unicode.cldr.util.Pair;
75 import org.unicode.cldr.util.PathHeader;
76 import org.unicode.cldr.util.PathUtilities;
77 import org.unicode.cldr.util.StandardCodes;
78 import org.unicode.cldr.util.SupplementalDataInfo;
79 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
80 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
81 import org.unicode.cldr.util.TestCLDRPaths;
82 import org.unicode.cldr.util.XMLFileReader;
83 import org.unicode.cldr.util.XPathParts;
84 import org.xml.sax.ErrorHandler;
85 import org.xml.sax.InputSource;
86 import org.xml.sax.SAXException;
87 import org.xml.sax.SAXParseException;
88 import org.xml.sax.XMLReader;
89 
90 public class TestBasic extends TestFmwkPlus {
91 
92     private static final boolean DEBUG = false;
93 
94     static CLDRConfig testInfo = CLDRConfig.getInstance();
95 
96     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO =
97             testInfo.getSupplementalDataInfo();
98 
99     private static final ImmutableSet<Pair<String, String>> knownElementExceptions =
100             ImmutableSet.of(Pair.of("ldml", "usesMetazone"), Pair.of("ldmlICU", "usesMetazone"));
101 
102     private static final ImmutableSet<Pair<String, String>> knownAttributeExceptions =
103             ImmutableSet.of(
104                     Pair.of("ldml", "version"),
105                     Pair.of("supplementalData", "version"),
106                     Pair.of("ldmlICU", "version"),
107                     Pair.of("layout", "standard"),
108                     Pair.of("currency", "id"), // for v1.1.1
109                     Pair.of("monthNames", "type"), // for v1.1.1
110                     Pair.of("alias", "type") // for v1.1.1
111                     );
112 
113     private static final ImmutableSet<Pair<String, String>> knownChildExceptions =
114             ImmutableSet.of(
115                     Pair.of("abbreviationFallback", "special"),
116                     Pair.of("inList", "special"),
117                     Pair.of("preferenceOrdering", "special"));
118 
119     /**
120      * Simple test that loads each file in the cldr directory, thus verifying that the DTD works,
121      * and also checks that the PrettyPaths work.
122      *
123      * @author markdavis
124      */
main(String[] args)125     public static void main(String[] args) {
126         new TestBasic().run(args);
127     }
128 
129     private static final ImmutableSet<String> skipAttributes =
130             ImmutableSet.of("alt", "draft", "references");
131 
132     private final ImmutableSet<String> eightPointLocales =
133             ImmutableSet.of(
134                     "ar", "ca", "cs", "da", "de", "el", "es", "fi", "fr", "he", "hi", "hr", "hu",
135                     "id", "it", "ja", "ko", "lt", "lv", "nl", "no", "pl", "pt", "pt_PT", "ro", "ru",
136                     "sk", "sl", "sr", "sv", "th", "tr", "uk", "vi", "zh", "zh_Hant");
137 
138     // private final boolean showForceZoom = Utility.getProperty("forcezoom",
139     // false);
140 
141     private final boolean resolved = CldrUtility.getProperty("resolved", false);
142 
143     private final Exception[] internalException = new Exception[1];
144 
TestDtds()145     public void TestDtds() throws IOException {
146         Relation<Row.R2<DtdType, String>, String> foundAttributes =
147                 Relation.of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), TreeSet.class);
148         final CLDRConfig config = CLDRConfig.getInstance();
149         final File basedir = config.getCldrBaseDirectory();
150         List<TimingInfo> data = new ArrayList<>();
151 
152         for (String subdir : CLDRConfig.getCLDRDataDirectories()) {
153             checkDtds(new File(basedir, subdir), 0, foundAttributes, data);
154         }
155         if (foundAttributes.size() > 0) {
156             showFoundElements(foundAttributes);
157         }
158         if (isVerbose()) {
159             long totalBytes = 0;
160             long totalNanos = 0;
161             for (TimingInfo i : data) {
162                 long length = i.file.length();
163                 totalBytes += length;
164                 totalNanos += i.nanos;
165                 logln(i.nanos + "\t" + length + "\t" + i.file);
166             }
167             logln(totalNanos + "\t" + totalBytes);
168         }
169     }
170 
checkDtds( File directoryFile, int level, Relation<R2<DtdType, String>, String> foundAttributes, List<TimingInfo> data)171     private void checkDtds(
172             File directoryFile,
173             int level,
174             Relation<R2<DtdType, String>, String> foundAttributes,
175             List<TimingInfo> data)
176             throws IOException {
177         boolean deepCheck = getInclusion() >= 10;
178         if (directoryFile.getName().equals("import")
179                 && directoryFile.getParentFile().getName().equals("keyboards")) {
180             return; // skip imports
181         }
182         File[] listFiles = directoryFile.listFiles();
183         String normalizedPath = PathUtilities.getNormalizedPathString(directoryFile);
184         String indent = Utility.repeat("\t", level);
185         if (listFiles == null) {
186             throw new IllegalArgumentException(indent + "Empty directory: " + normalizedPath);
187         }
188         logln("Checking files for DTD errors in: " + indent + normalizedPath);
189         for (File fileName : listFiles) {
190             String name = fileName.getName();
191             if (CLDRConfig.isJunkFile(name)) {
192                 continue;
193             } else if (fileName.isDirectory()) {
194                 checkDtds(fileName, level + 1, foundAttributes, data);
195             } else if (name.endsWith(".xml")) {
196                 data.add(check(fileName));
197                 if (deepCheck // takes too long to do all the time
198                 ) {
199                     CLDRFile cldrfile =
200                             CLDRFile.loadFromFile(fileName, "temp", DraftStatus.unconfirmed);
201                     for (String xpath : cldrfile) {
202                         String fullPath = cldrfile.getFullXPath(xpath);
203                         if (fullPath == null) {
204                             fullPath = cldrfile.getFullXPath(xpath);
205                             assertNotNull("", fullPath);
206                             continue;
207                         }
208                         XPathParts parts = XPathParts.getFrozenInstance(fullPath);
209                         DtdType type = parts.getDtdData().dtdType;
210                         for (int i = 0; i < parts.size(); ++i) {
211                             String element = parts.getElement(i);
212                             R2<DtdType, String> typeElement = Row.of(type, element);
213                             if (parts.getAttributeCount(i) == 0) {
214                                 foundAttributes.put(typeElement, "NONE");
215                             } else {
216                                 for (String attribute : parts.getAttributeKeys(i)) {
217                                     foundAttributes.put(typeElement, attribute);
218                                 }
219                             }
220                         }
221                     }
222                 }
223             }
224         }
225     }
226 
showFoundElements(Relation<Row.R2<DtdType, String>, String> foundAttributes)227     public void showFoundElements(Relation<Row.R2<DtdType, String>, String> foundAttributes) {
228         Relation<Row.R2<DtdType, String>, String> theoryAttributes =
229                 Relation.of(new TreeMap<Row.R2<DtdType, String>, Set<String>>(), TreeSet.class);
230         for (DtdType type : DtdType.values()) {
231             if (type.getStatus() != DtdType.DtdStatus.active) {
232                 continue;
233             }
234             DtdData dtdData = DtdData.getInstance(type);
235             for (Element element : dtdData.getElementFromName().values()) {
236                 String name = element.getName();
237                 Set<Attribute> attributes = element.getAttributes().keySet();
238                 R2<DtdType, String> typeElement = Row.of(type, name);
239                 if (attributes.isEmpty()) {
240                     theoryAttributes.put(typeElement, "NONE");
241                 } else {
242                     for (Attribute attribute : attributes) {
243                         theoryAttributes.put(typeElement, attribute.name);
244                     }
245                 }
246             }
247         }
248         Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed =
249                 Relation.of(
250                         new TreeMap<String, Set<R3<Boolean, DtdType, String>>>(),
251                         LinkedHashSet.class);
252 
253         for (Entry<R2<DtdType, String>, Set<String>> s : theoryAttributes.keyValuesSet()) {
254             R2<DtdType, String> typeElement = s.getKey();
255             Set<String> theoryAttributeSet = s.getValue();
256             DtdType type = typeElement.get0();
257             String element = typeElement.get1();
258             if (element.equals("ANY") || element.equals("#PCDATA")) {
259                 continue;
260             }
261             boolean deprecatedElement =
262                     SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, "*", "*");
263             String header = type + "\t" + element + "\t" + (deprecatedElement ? "X" : "") + "\t";
264             Set<String> usedAttributes = foundAttributes.get(typeElement);
265             Set<String> unusedAttributes = new LinkedHashSet<>(theoryAttributeSet);
266             if (usedAttributes == null) {
267                 logln(
268                         header
269                                 + "<NOT-FOUND>\t\t"
270                                 + siftDeprecated(
271                                         type,
272                                         element,
273                                         unusedAttributes,
274                                         attributesToTypeElementUsed,
275                                         false));
276                 continue;
277             }
278             unusedAttributes.removeAll(usedAttributes);
279             logln(
280                     header
281                             + siftDeprecated(
282                                     type,
283                                     element,
284                                     usedAttributes,
285                                     attributesToTypeElementUsed,
286                                     true)
287                             + "\t"
288                             + siftDeprecated(
289                                     type,
290                                     element,
291                                     unusedAttributes,
292                                     attributesToTypeElementUsed,
293                                     false));
294         }
295 
296         logln("Undeprecated Attributes\t");
297         for (Entry<String, R3<Boolean, DtdType, String>> s :
298                 attributesToTypeElementUsed.keyValueSet()) {
299             R3<Boolean, DtdType, String> typeElementUsed = s.getValue();
300             logln(
301                     s.getKey()
302                             + "\t"
303                             + typeElementUsed.get0()
304                             + "\t"
305                             + typeElementUsed.get1()
306                             + "\t"
307                             + typeElementUsed.get2());
308         }
309     }
310 
siftDeprecated( DtdType type, String element, Set<String> attributeSet, Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed, boolean used)311     private String siftDeprecated(
312             DtdType type,
313             String element,
314             Set<String> attributeSet,
315             Relation<String, R3<Boolean, DtdType, String>> attributesToTypeElementUsed,
316             boolean used) {
317         StringBuilder b = new StringBuilder();
318         StringBuilder bdep = new StringBuilder();
319         for (String attribute : attributeSet) {
320             String attributeName =
321                     "«"
322                             + attribute
323                             + (!"NONE".equals(attribute)
324                                             && CLDRFile.isDistinguishing(type, element, attribute)
325                                     ? "*"
326                                     : "")
327                             + "»";
328             if (!"NONE".equals(attribute)
329                     && SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element, attribute, "*")) {
330                 if (bdep.length() != 0) {
331                     bdep.append(" ");
332                 }
333                 bdep.append(attributeName);
334             } else {
335                 if (b.length() != 0) {
336                     b.append(" ");
337                 }
338                 b.append(attributeName);
339                 if (!"NONE".equals(attribute)) {
340                     attributesToTypeElementUsed.put(attribute, Row.of(used, type, element));
341                 }
342             }
343         }
344         return b.toString() + "\t" + bdep.toString();
345     }
346 
347     class MyErrorHandler implements ErrorHandler {
348         @Override
error(SAXParseException exception)349         public void error(SAXParseException exception) throws SAXException {
350             errln("error: " + XMLFileReader.showSAX(exception));
351             throw exception;
352         }
353 
354         @Override
fatalError(SAXParseException exception)355         public void fatalError(SAXParseException exception) throws SAXException {
356             errln("fatalError: " + XMLFileReader.showSAX(exception));
357             throw exception;
358         }
359 
360         @Override
warning(SAXParseException exception)361         public void warning(SAXParseException exception) throws SAXException {
362             errln("warning: " + XMLFileReader.showSAX(exception));
363             throw exception;
364         }
365     }
366 
367     private class TimingInfo {
368         File file;
369         long nanos;
370     }
371 
check(File systemID)372     public TimingInfo check(File systemID) {
373         long start = System.nanoTime();
374         try (InputStream fis = InputStreamFactory.createInputStream(systemID)) {
375             // FileInputStream fis = new FileInputStream(systemID);
376             XMLReader xmlReader = XMLFileReader.createXMLReader(true);
377             xmlReader.setErrorHandler(new MyErrorHandler());
378             InputSource is = new InputSource(fis);
379             is.setSystemId(systemID.toString());
380             DoctypeXmlStreamWrapper.wrap(is);
381             xmlReader.parse(is);
382             // fis.close();
383         } catch (SAXException | IOException e) {
384             errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" + e.getMessage());
385         }
386         // catch (SAXParseException e) {
387         // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" +
388         // e.getMessage());
389         // } catch (IOException e) {
390         // errln("\t" + "Can't read " + systemID + "\t" + e.getClass() + "\t" +
391         // e.getMessage());
392         // }
393         TimingInfo timingInfo = new TimingInfo();
394         timingInfo.nanos = System.nanoTime() - start;
395         timingInfo.file = systemID;
396         return timingInfo;
397     }
398 
TestCurrencyFallback()399     public void TestCurrencyFallback() {
400         Factory cldrFactory = testInfo.getCldrFactory();
401         Set<String> currencies = StandardCodes.make().getAvailableCodes("currency");
402 
403         final UnicodeSet CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS =
404                 new UnicodeSet("[[:sc:]-[\\u0000-\\u00FF]]").freeze();
405 
406         CharacterFallbacks fallbacks = CharacterFallbacks.make();
407 
408         for (String locale : cldrFactory.getAvailable()) {
409             if (!StandardCodes.isLocaleAtLeastBasic(locale)) {
410                 continue;
411             }
412             CLDRFile file = testInfo.getCLDRFile(locale, false);
413             if (file.isNonInheriting()) continue;
414 
415             final UnicodeSet OK_CURRENCY_FALLBACK =
416                     new UnicodeSet("[\\u0000-\\u00FF]")
417                             .addAll(safeExemplars(file, ""))
418                             .addAll(safeExemplars(file, "auxiliary"))
419                             .freeze();
420             UnicodeSet badSoFar = new UnicodeSet();
421 
422             for (Iterator<String> it = file.iterator(); it.hasNext(); ) {
423                 String path = it.next();
424                 if (path.endsWith("/alias")) {
425                     continue;
426                 }
427                 String value = file.getStringValue(path);
428 
429                 // check for special characters
430                 if (CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS.containsSome(value)) {
431                     XPathParts parts = XPathParts.getFrozenInstance(path);
432                     if (!parts.getElement(-1).equals("symbol")) {
433                         continue;
434                     }
435                     // We don't care about fallbacks for narrow currency symbols
436                     if ("narrow".equals(parts.getAttributeValue(-1, "alt"))) {
437                         continue;
438                     }
439                     String currencyType = parts.getAttributeValue(-2, "type");
440 
441                     UnicodeSet fishy =
442                             new UnicodeSet()
443                                     .addAll(value)
444                                     .retainAll(CHARACTERS_THAT_SHOULD_HAVE_FALLBACKS)
445                                     .removeAll(badSoFar);
446                     for (UnicodeSetIterator it2 = new UnicodeSetIterator(fishy); it2.next(); ) {
447                         final int fishyCodepoint = it2.codepoint;
448                         List<String> fallbackList = fallbacks.getSubstitutes(fishyCodepoint);
449 
450                         String nfkc = Normalizer.normalize(fishyCodepoint, Normalizer.NFKC);
451                         if (!nfkc.equals(UTF16.valueOf(fishyCodepoint))) {
452                             if (fallbackList == null) {
453                                 fallbackList = new ArrayList<>();
454                             } else {
455                                 fallbackList = new ArrayList<>(fallbackList); // writable
456                             }
457                             fallbackList.add(nfkc);
458                         }
459                         // later test for all Latin-1
460                         if (fallbackList == null) {
461                             if (locale.equals("nqo")
462                                     && logKnownIssue("CLDR-16987", "fishy fallback test")) {
463                                 continue;
464                             }
465                             errln(
466                                     "Locale:\t"
467                                             + locale
468                                             + ";\tCharacter with no fallback:\t"
469                                             + it2.getString()
470                                             + "\t"
471                                             + UCharacter.getName(fishyCodepoint));
472                             badSoFar.add(fishyCodepoint);
473                         } else {
474                             String fallback = null;
475                             for (String fb : fallbackList) {
476                                 if (OK_CURRENCY_FALLBACK.containsAll(fb)) {
477                                     if (!fb.equals(currencyType) && currencies.contains(fb)) {
478                                         errln(
479                                                 "Locale:\t"
480                                                         + locale
481                                                         + ";\tCurrency:\t"
482                                                         + currencyType
483                                                         + ";\tFallback converts to different code!:\t"
484                                                         + fb
485                                                         + "\t"
486                                                         + it2.getString()
487                                                         + "\t"
488                                                         + UCharacter.getName(fishyCodepoint));
489                                     }
490                                     if (fallback == null) {
491                                         fallback = fb;
492                                     }
493                                 }
494                             }
495                             if (fallback == null) {
496                                 errln(
497                                         "Locale:\t"
498                                                 + locale
499                                                 + ";\tCharacter with no good fallback (exemplars+Latin1):\t"
500                                                 + it2.getString()
501                                                 + "\t"
502                                                 + UCharacter.getName(fishyCodepoint));
503                                 badSoFar.add(fishyCodepoint);
504                             } else {
505                                 logln(
506                                         "Locale:\t"
507                                                 + locale
508                                                 + ";\tCharacter with good fallback:\t"
509                                                 + it2.getString()
510                                                 + " "
511                                                 + UCharacter.getName(fishyCodepoint)
512                                                 + " => "
513                                                 + fallback);
514                                 // badSoFar.add(fishyCodepoint);
515                             }
516                         }
517                     }
518                 }
519             }
520         }
521     }
522 
TestAbstractPaths()523     public void TestAbstractPaths() {
524         Factory cldrFactory = testInfo.getCldrFactory();
525         CLDRFile english = testInfo.getEnglish();
526         Map<String, Counter<Level>> abstactPaths = new TreeMap<>();
527         RegexTransform abstractPathTransform =
528                 new RegexTransform(RegexTransform.Processing.ONE_PASS)
529                         .add("//ldml/", "")
530                         .add("\\[@alt=\"[^\"]*\"\\]", "")
531                         .add("=\"[^\"]*\"", "=\"*\"")
532                         .add("([^]])\\[", "$1\t[")
533                         .add("([^]])/", "$1\t/")
534                         .add("/", "\t");
535 
536         for (String locale : getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable()) {
537             CLDRFile file = testInfo.getCLDRFile(locale, resolved);
538             if (file.isNonInheriting()) continue;
539             logln(locale + "\t-\t" + english.getName(locale));
540 
541             for (Iterator<String> it = file.iterator(); it.hasNext(); ) {
542                 String path = it.next();
543                 if (path.endsWith("/alias")) {
544                     continue;
545                 }
546                 // collect abstracted paths
547                 String abstractPath = abstractPathTransform.transform(path);
548                 Level level = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale);
549                 if (level == Level.OPTIONAL) {
550                     level = Level.COMPREHENSIVE;
551                 }
552                 Counter<Level> row = abstactPaths.get(abstractPath);
553                 if (row == null) {
554                     abstactPaths.put(abstractPath, row = new Counter<>());
555                 }
556                 row.add(level, 1);
557             }
558         }
559         logln(CldrUtility.LINE_SEPARATOR + "Abstract Paths");
560         for (Entry<String, Counter<Level>> pathInfo : abstactPaths.entrySet()) {
561             String path = pathInfo.getKey();
562             Counter<Level> counter = pathInfo.getValue();
563             logln(counter.getTotal() + "\t" + getCoverage(counter) + "\t" + path);
564         }
565     }
566 
getCoverage(Counter<Level> counter)567     private CharSequence getCoverage(Counter<Level> counter) {
568         StringBuilder result = new StringBuilder();
569         boolean first = true;
570         for (Level level : counter.getKeysetSortedByKey()) {
571             if (first) {
572                 first = false;
573             } else {
574                 result.append(' ');
575             }
576             result.append("L").append(level.ordinal()).append("=").append(counter.get(level));
577         }
578         return result;
579     }
580 
581     // public void TestCLDRFileCache() {
582     // long start = System.nanoTime();
583     // Factory cldrFactory = testInfo.getCldrFactory();
584     // String unusualLocale = "hi";
585     // CLDRFile file = cldrFactory.make(unusualLocale, true);
586     // long afterOne = System.nanoTime();
587     // logln("First: " + (afterOne-start));
588     // CLDRFile file2 = cldrFactory.make(unusualLocale, true);
589     // long afterTwo = System.nanoTime();
590     // logln("Second: " + (afterTwo-afterOne));
591     // }
592     //
TestPaths()593     public void TestPaths() {
594         Relation<String, String> distinguishing =
595                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
596         Relation<String, String> nonDistinguishing =
597                 Relation.of(new TreeMap<String, Set<String>>(), TreeSet.class);
598         Factory cldrFactory = testInfo.getCldrFactory();
599         CLDRFile english = testInfo.getEnglish();
600 
601         Relation<String, String> pathToLocale =
602                 Relation.of(
603                         new TreeMap<String, Set<String>>(CLDRFile.getComparator(DtdType.ldml)),
604                         TreeSet.class,
605                         null);
606         Set<String> localesToTest =
607                 getInclusion() <= 5 ? eightPointLocales : cldrFactory.getAvailable();
608         for (String locale : localesToTest) {
609             CLDRFile file = testInfo.getCLDRFile(locale, resolved);
610             DtdType dtdType = null;
611             if (file.isNonInheriting()) continue;
612             DisplayAndInputProcessor displayAndInputProcessor =
613                     new DisplayAndInputProcessor(file, false);
614 
615             logln(locale + "\t-\t" + english.getName(locale));
616 
617             for (Iterator<String> it = file.iterator(); it.hasNext(); ) {
618                 String path = it.next();
619                 if (dtdType == null) {
620                     dtdType = DtdType.fromPath(path);
621                 }
622 
623                 if (path.endsWith("/alias")) {
624                     continue;
625                 }
626                 String value = file.getStringValue(path);
627                 if (value == null) {
628                     throw new IllegalArgumentException(
629                             locale + "\tError: in null value at " + path);
630                 }
631 
632                 String displayValue = displayAndInputProcessor.processForDisplay(path, value);
633                 if (!displayValue.equals(value)) {
634                     logln(
635                             "\t"
636                                     + locale
637                                     + "\tdisplayAndInputProcessor changes display value <"
638                                     + value
639                                     + ">\t=>\t<"
640                                     + displayValue
641                                     + ">\t\t"
642                                     + path);
643                 }
644                 String inputValue =
645                         displayAndInputProcessor.processInput(
646                                 path, displayValue, internalException);
647                 if (internalException[0] != null) {
648                     errln(
649                             "\t"
650                                     + locale
651                                     + "\tdisplayAndInputProcessor internal error <"
652                                     + value
653                                     + ">\t=>\t<"
654                                     + inputValue
655                                     + ">\t\t"
656                                     + path);
657                     internalException[0].printStackTrace(System.out);
658                 }
659                 if (isVerbose() && !inputValue.equals(value)) {
660                     displayAndInputProcessor.processInput(path, value, internalException); // for
661                     // debugging
662                     logln(
663                             "\t"
664                                     + locale
665                                     + "\tdisplayAndInputProcessor changes input value <"
666                                     + value
667                                     + ">\t=>\t<"
668                                     + inputValue
669                                     + ">\t\t"
670                                     + path);
671                 }
672 
673                 pathToLocale.put(path, locale);
674 
675                 // also check for non-distinguishing attributes
676                 if (path.contains("/identity")) continue;
677 
678                 String fullPath = file.getFullXPath(path);
679                 XPathParts parts = XPathParts.getFrozenInstance(fullPath);
680                 for (int i = 0; i < parts.size(); ++i) {
681                     if (parts.getAttributeCount(i) == 0) {
682                         continue;
683                     }
684                     String element = parts.getElement(i);
685                     for (String attribute : parts.getAttributeKeys(i)) {
686                         if (skipAttributes.contains(attribute)) continue;
687                         if (CLDRFile.isDistinguishing(dtdType, element, attribute)) {
688                             distinguishing.put(element, attribute);
689                         } else {
690                             nonDistinguishing.put(element, attribute);
691                         }
692                     }
693                 }
694             }
695         }
696 
697         if (isVerbose()) {
698             System.out.format(
699                     "Distinguishing Elements: %s" + CldrUtility.LINE_SEPARATOR, distinguishing);
700             System.out.format(
701                     "Nondistinguishing Elements: %s" + CldrUtility.LINE_SEPARATOR,
702                     nonDistinguishing);
703             System.out.format("Skipped %s" + CldrUtility.LINE_SEPARATOR, skipAttributes);
704         }
705     }
706 
707     /** The verbose output shows the results of 1..3 \u00a4 signs. */
checkCurrency()708     public void checkCurrency() {
709         Map<String, Set<R2<String, Integer>>> results =
710                 new TreeMap<>(Collator.getInstance(ULocale.ENGLISH));
711         for (ULocale locale : ULocale.getAvailableLocales()) {
712             if (locale.getCountry().length() != 0) {
713                 continue;
714             }
715             for (int i = 1; i < 4; ++i) {
716                 NumberFormat format = getCurrencyInstance(locale, i);
717                 for (Currency c :
718                         new Currency[] {
719                             Currency.getInstance("USD"),
720                             Currency.getInstance("EUR"),
721                             Currency.getInstance("INR")
722                         }) {
723                     format.setCurrency(c);
724                     final String formatted = format.format(12345.67);
725                     Set<R2<String, Integer>> set = results.get(formatted);
726                     if (set == null) {
727                         results.put(formatted, set = new TreeSet<>());
728                     }
729                     set.add(Row.of(locale.toString(), i));
730                 }
731             }
732         }
733         for (String formatted : results.keySet()) {
734             logln(formatted + "\t" + results.get(formatted));
735         }
736     }
737 
getCurrencyInstance(ULocale locale, int type)738     private static NumberFormat getCurrencyInstance(ULocale locale, int type) {
739         NumberFormat format = NumberFormat.getCurrencyInstance(locale);
740         if (type > 1) {
741             DecimalFormat format2 = (DecimalFormat) format;
742             String pattern = format2.toPattern();
743             String replacement = "\u00a4\u00a4";
744             for (int i = 2; i < type; ++i) {
745                 replacement += "\u00a4";
746             }
747             pattern = pattern.replace("\u00a4", replacement);
748             format2.applyPattern(pattern);
749         }
750         return format;
751     }
752 
safeExemplars(CLDRFile file, String string)753     private UnicodeSet safeExemplars(CLDRFile file, String string) {
754         final UnicodeSet result = file.getExemplarSet(string, WinningChoice.NORMAL);
755         return result != null ? result : new UnicodeSet();
756     }
757 
TestAPath()758     public void TestAPath() {
759         // <month type="1">1</month>
760         String path =
761                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"abbreviated\"]/month[@type=\"1\"]";
762         CLDRFile root = testInfo.getRoot();
763         logln("path: " + path);
764         String fullpath = root.getFullXPath(path);
765         logln("fullpath: " + fullpath);
766         String value = root.getStringValue(path);
767         logln("value: " + value);
768         Status status = new Status();
769         String source = root.getSourceLocaleID(path, status);
770         logln("locale: " + source);
771         logln("status: " + status);
772     }
773 
TestDefaultContents()774     public void TestDefaultContents() {
775         Set<String> defaultContents = Inheritance.defaultContents;
776         Multimap<String, String> parentToChildren = Inheritance.parentToChildren;
777 
778         // Put a list of locales that should be default content here.
779         final String expectDC[] = {
780             "os_GE" // see CLDR-14118
781         };
782         for (final String locale : expectDC) {
783             assertTrue(
784                     "expect " + locale + " to be a default content locale",
785                     defaultContents.contains(locale));
786         }
787 
788         if (DEBUG) {
789             Inheritance.showChain("", "", "root");
790         }
791 
792         for (String locale : defaultContents) {
793             CLDRFile cldrFile;
794             try {
795                 cldrFile = testInfo.getCLDRFile(locale, false);
796             } catch (RuntimeException e) {
797                 logln("Can't open default content file:\t" + locale);
798                 continue;
799             }
800             // we check that the default content locale is always empty
801             for (Iterator<String> it = cldrFile.iterator(); it.hasNext(); ) {
802                 String path = it.next();
803                 if (path.contains("/identity")) {
804                     continue;
805                 }
806                 errln("Default content file not empty:\t" + locale);
807                 showDifferences(locale);
808                 break;
809             }
810         }
811 
812         // check that if a locale has any children, that exactly one of them is
813         // the default content. Ignore locales with variants
814 
815         for (Entry<String, Collection<String>> localeAndKids :
816                 parentToChildren.asMap().entrySet()) {
817             String locale = localeAndKids.getKey();
818             if (locale.equals("root")) {
819                 continue;
820             }
821 
822             Collection<String> rawChildren = localeAndKids.getValue();
823 
824             // remove variant children
825             Set<String> children = new LinkedHashSet<>();
826             for (String child : rawChildren) {
827                 if (new LocaleIDParser().set(child).getVariants().length == 0) {
828                     children.add(child);
829                 }
830             }
831             if (children.isEmpty()) {
832                 continue;
833             }
834 
835             Set<String> defaultContentChildren = new LinkedHashSet<>(children);
836             defaultContentChildren.retainAll(defaultContents);
837             if (defaultContentChildren.size() == 1) {
838                 continue;
839                 // If we're already down to the region level then it's OK not to have
840                 // default contents.
841             } else if (!new LocaleIDParser().set(locale).getRegion().isEmpty()) {
842                 continue;
843             } else if (defaultContentChildren.isEmpty()) {
844                 Object possible = highestShared(locale, children);
845                 errln(
846                         "Locale has children but is missing default contents locale: "
847                                 + locale
848                                 + ", children: "
849                                 + children
850                                 + "; possible fixes for children:\n"
851                                 + possible);
852             } else {
853                 errln(
854                         "Locale has too many defaultContent locales!!: "
855                                 + locale
856                                 + ", defaultContents: "
857                                 + defaultContentChildren);
858             }
859         }
860 
861         // check that each default content locale is likely-subtag equivalent to
862         // its parent.
863 
864         for (String locale : defaultContents) {
865             String maxLocale = LikelySubtags.maximize(locale, likelyData);
866             String localeParent = LocaleIDParser.getParent(locale);
867             String maxLocaleParent = LikelySubtags.maximize(localeParent, likelyData);
868             if (locale.equals("ar_001") || locale.equals("nb")) {
869                 logln(
870                         "Known exception to likelyMax(locale="
871                                 + locale
872                                 + ")"
873                                 + " == "
874                                 + "likelyMax(defaultContent="
875                                 + localeParent
876                                 + ")");
877                 continue;
878             }
879             assertEquals(
880                     "likelyMax(locale="
881                             + locale
882                             + ")"
883                             + " == "
884                             + "likelyMax(defaultContent="
885                             + localeParent
886                             + ")",
887                     maxLocaleParent,
888                     maxLocale);
889         }
890     }
891 
highestShared(String parent, Set<String> children)892     private String highestShared(String parent, Set<String> children) {
893         M4<PathHeader, String, String, Boolean> data =
894                 ChainedMap.of(
895                         new TreeMap<PathHeader, Object>(),
896                         new TreeMap<String, Object>(),
897                         new TreeMap<String, Object>(),
898                         Boolean.class);
899         CLDRFile parentFile = testInfo.getCLDRFile(parent, true);
900         PathHeader.Factory phf = PathHeader.getFactory(testInfo.getEnglish());
901         for (String child : children) {
902             CLDRFile cldrFile = testInfo.getCLDRFile(child, false);
903             for (String path : cldrFile) {
904                 if (path.contains("/identity")) {
905                     continue;
906                 }
907                 if (path.contains("provisional") || path.contains("unconfirmed")) {
908                     continue;
909                 }
910                 String value = cldrFile.getStringValue(path);
911                 // double-check
912                 String parentValue = parentFile.getStringValue(path);
913                 if (value.equals(parentValue)) {
914                     continue;
915                 }
916                 PathHeader ph = phf.fromPath(path);
917                 data.put(ph, value, child, Boolean.TRUE);
918                 data.put(ph, parentValue == null ? "∅∅∅" : parentValue, child, Boolean.TRUE);
919             }
920         }
921         StringBuilder result = new StringBuilder();
922         for (Entry<PathHeader, Map<String, Map<String, Boolean>>> entry : data) {
923             for (Entry<String, Map<String, Boolean>> item : entry.getValue().entrySet()) {
924                 result.append("\n")
925                         .append(entry.getKey())
926                         .append("\t")
927                         .append(item.getKey() + "\t" + item.getValue().keySet());
928             }
929         }
930         return result.toString();
931     }
932 
933     public static class Inheritance {
934         public static final Set<String> defaultContents =
935                 SUPPLEMENTAL_DATA_INFO.getDefaultContentLocales();
936         public static final Multimap<String, String> parentToChildren;
937 
938         static {
939             Multimap<String, String> _parentToChildren = TreeMultimap.create();
940             for (String child : testInfo.getCldrFactory().getAvailable()) {
941                 if (child.equals("root")) {
942                     continue;
943                 }
944                 String localeParent = LocaleIDParser.getParent(child);
_parentToChildren.put(localeParent, child)945                 _parentToChildren.put(localeParent, child);
946             }
947             parentToChildren = ImmutableMultimap.copyOf(_parentToChildren);
948         }
949 
showChain(String prefix, String gparent, String current)950         public static void showChain(String prefix, String gparent, String current) {
951             Collection<String> children = parentToChildren.get(current);
952             if (children == null) {
953                 throw new IllegalArgumentException();
954             }
955             prefix +=
956                     current
957                             + (defaultContents.contains(current) ? "*" : "")
958                             + (isLikelyEquivalent(gparent, current) ? "~" : "")
959                             + "\t";
960 
961             // find leaves
962             Set<String> parents = new LinkedHashSet<>(children);
963             parents.retainAll(parentToChildren.keySet());
964             Set<String> leaves = new LinkedHashSet<>(children);
965             leaves.removeAll(parentToChildren.keySet());
966             if (!leaves.isEmpty()) {
967                 List<String> presentation = new ArrayList<>();
968                 boolean gotDc = false;
969                 for (String s : leaves) {
970                     String shown = s;
971                     if (isLikelyEquivalent(current, s)) {
972                         shown += "~";
973                     }
974                     if (defaultContents.contains(s)) {
975                         gotDc = true;
976                         shown += "*";
977                     }
978                     if (!shown.equals(s)) {
979                         presentation.add(0, shown);
980                     } else {
981                         presentation.add(shown);
982                     }
983                 }
984                 if (!gotDc) {
985                     int debug = 0;
986                 }
987                 if (leaves.size() == 1) {
988                     System.out.println(prefix + Joiner.on(" ").join(presentation));
989                 } else {
990                     System.out.println(prefix + "{" + Joiner.on(" ").join(presentation) + "}");
991                 }
992             }
993             for (String parent : parents) {
994                 showChain(prefix, current, parent);
995             }
996         }
997 
isLikelyEquivalent(String locale1, String locale2)998         static boolean isLikelyEquivalent(String locale1, String locale2) {
999             if (locale1.equals(locale2)) {
1000                 return true;
1001             }
1002             try {
1003                 String maxLocale1 = LikelySubtags.maximize(locale1, likelyData);
1004                 String maxLocale2 = LikelySubtags.maximize(locale2, likelyData);
1005                 return maxLocale1 != null && Objects.equal(maxLocale1, maxLocale2);
1006             } catch (Exception e) {
1007                 return false;
1008             }
1009         }
1010     }
1011 
1012     static final Map<String, String> likelyData = SUPPLEMENTAL_DATA_INFO.getLikelySubtags();
1013 
1014     private static final EnumSet<CldrVersion> badLdmlICUVersions =
1015             EnumSet.of(
1016                     CldrVersion.v1_1_1, CldrVersion.v1_2, CldrVersion.v1_4_1, CldrVersion.v1_5_1);
1017 
TestLikelySubtagsComplete()1018     public void TestLikelySubtagsComplete() {
1019         LanguageTagParser ltp = new LanguageTagParser();
1020         for (String locale : testInfo.getCldrFactory().getAvailable()) {
1021             if (locale.equals("root")) {
1022                 continue;
1023             }
1024             String maxLocale = LikelySubtags.maximize(locale, likelyData);
1025             if (maxLocale == null) {
1026                 errln("Locale missing likely subtag: " + locale);
1027                 continue;
1028             }
1029             ltp.set(maxLocale);
1030             if (ltp.getLanguage().isEmpty()
1031                     || ltp.getScript().isEmpty()
1032                     || ltp.getRegion().isEmpty()) {
1033                 errln("Locale has defective likely subtag: " + locale + " => " + maxLocale);
1034             }
1035         }
1036     }
1037 
showDifferences(String locale)1038     private void showDifferences(String locale) {
1039         CLDRFile cldrFile = testInfo.getCLDRFile(locale, false);
1040         final String localeParent = LocaleIDParser.getParent(locale);
1041         CLDRFile parentFile = testInfo.getCLDRFile(localeParent, true);
1042         int funnyCount = 0;
1043         for (Iterator<String> it = cldrFile.iterator("", cldrFile.getComparator());
1044                 it.hasNext(); ) {
1045             String path = it.next();
1046             if (path.contains("/identity")) {
1047                 continue;
1048             }
1049             final String fullXPath = cldrFile.getFullXPath(path);
1050             if (fullXPath.contains("[@draft=\"unconfirmed\"]")
1051                     || fullXPath.contains("[@draft=\"provisional\"]")) {
1052                 funnyCount++;
1053                 continue;
1054             }
1055             logln("\tpath:\t" + path);
1056             logln("\t\t" + locale + " value:\t<" + cldrFile.getStringValue(path) + ">");
1057             final String parentFullPath = parentFile.getFullXPath(path);
1058             logln("\t\t" + localeParent + " value:\t<" + parentFile.getStringValue(path) + ">");
1059             logln("\t\t" + locale + " fullpath:\t" + fullXPath);
1060             logln("\t\t" + localeParent + " fullpath:\t" + parentFullPath);
1061         }
1062         logln("\tCount of non-approved:\t" + funnyCount);
1063     }
1064 
1065     enum MissingType {
1066         plurals,
1067         main_exemplars,
1068         no_main,
1069         collation,
1070         index_exemplars,
1071         punct_exemplars
1072     }
1073 
TestCoreData()1074     public void TestCoreData() {
1075         Set<String> availableLanguages = testInfo.getCldrFactory().getAvailableLanguages();
1076         PluralInfo rootRules = SUPPLEMENTAL_DATA_INFO.getPlurals(PluralType.cardinal, "root");
1077         Multimap<MissingType, Comparable> errors = TreeMultimap.create();
1078         errors.put(MissingType.collation, "?");
1079 
1080         Multimap<MissingType, Comparable> warnings = TreeMultimap.create();
1081         warnings.put(MissingType.collation, "?");
1082         warnings.put(MissingType.index_exemplars, "?");
1083         warnings.put(MissingType.punct_exemplars, "?");
1084 
1085         Set<String> collations = new HashSet<>();
1086 
1087         // collect collation info
1088         Factory collationFactory =
1089                 Factory.make(CLDRPaths.COLLATION_DIRECTORY, ".*", DraftStatus.contributed);
1090         for (String localeID : collationFactory.getAvailable()) {
1091             if (isTopLevel(localeID)) {
1092                 collations.add(localeID);
1093             }
1094         }
1095         logln(collations.toString());
1096 
1097         Set<String> allLanguages =
1098                 Builder.with(new TreeSet<String>())
1099                         .addAll(collations)
1100                         .addAll(availableLanguages)
1101                         .freeze();
1102 
1103         for (String localeID : allLanguages) {
1104             if (localeID.equals("root")) {
1105                 continue; // skip script locales
1106             }
1107             if (!isTopLevel(localeID)) {
1108                 continue;
1109             }
1110             if (!StandardCodes.isLocaleAtLeastBasic(localeID)) {
1111                 continue;
1112             }
1113             errors.clear();
1114             warnings.clear();
1115 
1116             String name =
1117                     "Locale:" + localeID + " (" + testInfo.getEnglish().getName(localeID) + ")";
1118 
1119             if (!collations.contains(localeID)) {
1120                 warnings.put(MissingType.collation, "missing");
1121                 logln(name + " is missing " + MissingType.collation.toString());
1122             }
1123 
1124             try {
1125                 CLDRFile cldrFile =
1126                         testInfo.getCldrFactory().make(localeID, false, DraftStatus.contributed);
1127 
1128                 String wholeFileAlias = cldrFile.getStringValue("//ldml/alias");
1129                 if (wholeFileAlias != null) {
1130                     logln("Whole-file alias:" + name);
1131                     continue;
1132                 }
1133 
1134                 PluralInfo pluralInfo =
1135                         SUPPLEMENTAL_DATA_INFO.getPlurals(PluralType.cardinal, localeID);
1136                 if (pluralInfo == rootRules) {
1137                     logln(name + " is missing " + MissingType.plurals.toString());
1138                     warnings.put(MissingType.plurals, "missing");
1139                 }
1140                 UnicodeSet main = cldrFile.getExemplarSet("", WinningChoice.WINNING);
1141                 if (main == null || main.isEmpty()) {
1142                     errln("  " + name + " is missing " + MissingType.main_exemplars.toString());
1143                     errors.put(MissingType.main_exemplars, "missing");
1144                 }
1145                 UnicodeSet index = cldrFile.getExemplarSet("index", WinningChoice.WINNING);
1146                 if (index == null || index.isEmpty()) {
1147                     logln(name + " is missing " + MissingType.index_exemplars.toString());
1148                     warnings.put(MissingType.index_exemplars, "missing");
1149                 }
1150                 UnicodeSet punctuation =
1151                         cldrFile.getExemplarSet("punctuation", WinningChoice.WINNING);
1152                 if (punctuation == null || punctuation.isEmpty()) {
1153                     logln(name + " is missing " + MissingType.punct_exemplars.toString());
1154                     warnings.put(MissingType.punct_exemplars, "missing");
1155                 }
1156             } catch (Exception e) {
1157                 StringWriter x = new StringWriter();
1158                 PrintWriter pw = new PrintWriter(x);
1159                 e.printStackTrace(pw);
1160                 pw.flush();
1161                 errln("  " + name + " is missing main locale data." + x);
1162                 errors.put(MissingType.no_main, x.toString());
1163             }
1164 
1165             // report errors
1166 
1167             if (errors.isEmpty() && warnings.isEmpty()) {
1168                 logln(name + ": No problems...");
1169             }
1170         }
1171     }
1172 
isTopLevel(String localeID)1173     private boolean isTopLevel(String localeID) {
1174         return "root".equals(LocaleIDParser.getParent(localeID));
1175     }
1176 
1177     /** Tests that every dtd item is connected from root */
TestDtdCompleteness()1178     public void TestDtdCompleteness() {
1179         for (DtdType type : DtdType.values()) {
1180             if (type.getStatus() != DtdType.DtdStatus.active) {
1181                 continue;
1182             }
1183             DtdData dtdData = DtdData.getInstance(type);
1184             Set<Element> descendents = new LinkedHashSet<>();
1185             dtdData.getDescendents(dtdData.ROOT, descendents);
1186             Set<Element> elements = dtdData.getElements();
1187             if (!elements.equals(descendents)) {
1188                 for (Element e : elements) {
1189                     if (!descendents.contains(e)
1190                             && !e.equals(dtdData.PCDATA)
1191                             && !e.equals(dtdData.ANY)) {
1192                         errln(type + ": Element " + e + " not contained in descendents of ROOT.");
1193                     }
1194                 }
1195                 for (Element e : descendents) {
1196                     if (!elements.contains(e)) {
1197                         errln(type + ": Element " + e + ", descendent of ROOT, not in elements.");
1198                     }
1199                 }
1200             }
1201             LinkedHashSet<Element> all = new LinkedHashSet<>(descendents);
1202             all.addAll(elements);
1203             Set<Attribute> attributes = dtdData.getAttributes();
1204             for (Attribute a : attributes) {
1205                 if (!elements.contains(a.element)) {
1206                     errln(type + ": Attribute " + a + " isn't for any element.");
1207                 }
1208             }
1209         }
1210     }
1211 
TestBasicDTDCompatibility()1212     public void TestBasicDTDCompatibility() {
1213 
1214         if (!TestCLDRPaths.canUseArchiveDirectory()) {
1215             return;
1216         }
1217 
1218         final String oldCommon = CldrVersion.LAST_RELEASE_VERSION.getBaseDirectory() + "/common";
1219 
1220         // set up exceptions
1221         Set<String> changedToEmpty =
1222                 new HashSet<>(
1223                         Arrays.asList(
1224                                 new String[] {
1225                                     "version",
1226                                     "languageCoverage",
1227                                     "scriptCoverage",
1228                                     "territoryCoverage",
1229                                     "currencyCoverage",
1230                                     "timezoneCoverage",
1231                                     "skipDefaultLocale"
1232                                 }));
1233         Set<String> PCDATA = new HashSet<>();
1234         PCDATA.add("PCDATA");
1235         Set<String> EMPTY = new HashSet<>();
1236         EMPTY.add("EMPTY");
1237         Set<String> VERSION = new HashSet<>();
1238         VERSION.add("version");
1239 
1240         // test all DTDs
1241         for (DtdType dtd : DtdType.values()) {
1242             if (dtd.getStatus() != DtdType.DtdStatus.active) {
1243                 continue;
1244             }
1245             if (dtd.firstVersion != null
1246                     && CldrVersion.LAST_RELEASE_VERSION.isOlderThan(
1247                             CldrVersion.from(dtd.firstVersion))) {
1248                 continue; // DTD didn't exist in last release
1249             }
1250             if (dtd == DtdType.ldmlICU) continue;
1251             try {
1252                 ElementAttributeInfo oldDtd = ElementAttributeInfo.getInstance(oldCommon, dtd);
1253                 ElementAttributeInfo newDtd = ElementAttributeInfo.getInstance(dtd);
1254 
1255                 if (oldDtd == newDtd) {
1256                     continue;
1257                 }
1258                 Relation<String, String> oldElement2Children = oldDtd.getElement2Children();
1259                 Relation<String, String> newElement2Children = newDtd.getElement2Children();
1260 
1261                 Relation<String, String> oldElement2Attributes = oldDtd.getElement2Attributes();
1262                 Relation<String, String> newElement2Attributes = newDtd.getElement2Attributes();
1263 
1264                 for (String element : oldElement2Children.keySet()) {
1265                     Set<String> oldChildren = oldElement2Children.getAll(element);
1266                     Set<String> newChildren = newElement2Children.getAll(element);
1267                     if (newChildren == null) {
1268                         if (!knownElementExceptions.contains(Pair.of(dtd.toString(), element))) {
1269                             errln("Old " + dtd + " contains element not in new: <" + element + ">");
1270                         }
1271                         continue;
1272                     }
1273                     Set<String> funny = containsInOrder(newChildren, oldChildren);
1274                     if (funny != null) {
1275                         if (changedToEmpty.contains(element)
1276                                 && oldChildren.equals(PCDATA)
1277                                 && newChildren.equals(EMPTY)) {
1278                             // ok, skip
1279                         } else {
1280                             errln(
1281                                     "Old "
1282                                             + dtd
1283                                             + " element <"
1284                                             + element
1285                                             + "> has children Missing/Misordered:\t"
1286                                             + funny
1287                                             + "\n\t\tOld:\t"
1288                                             + oldChildren
1289                                             + "\n\t\tNew:\t"
1290                                             + newChildren);
1291                         }
1292                     }
1293 
1294                     Set<String> oldAttributes = oldElement2Attributes.getAll(element);
1295                     if (oldAttributes == null) {
1296                         oldAttributes = Collections.emptySet();
1297                     }
1298                     Set<String> newAttributes = newElement2Attributes.getAll(element);
1299                     if (newAttributes == null) {
1300                         newAttributes = Collections.emptySet();
1301                     }
1302                     if (!newAttributes.containsAll(oldAttributes)) {
1303                         LinkedHashSet<String> missing = new LinkedHashSet<>(oldAttributes);
1304                         missing.removeAll(newAttributes);
1305                         if (element.equals(dtd.toString()) && missing.equals(VERSION)) {
1306                             // ok, skip
1307                         } else {
1308                             errln(
1309                                     "Old "
1310                                             + dtd
1311                                             + " element <"
1312                                             + element
1313                                             + "> has attributes Missing:\t"
1314                                             + missing
1315                                             + "\n\t\tOld:\t"
1316                                             + oldAttributes
1317                                             + "\n\t\tNew:\t"
1318                                             + newAttributes);
1319                         }
1320                     }
1321                 }
1322             } catch (Exception e) {
1323                 e.printStackTrace();
1324                 errln("Failure with " + dtd);
1325             }
1326         }
1327     }
1328 
containsInOrder(Set<T> superset, Set<T> subset)1329     private <T> Set<T> containsInOrder(Set<T> superset, Set<T> subset) {
1330         if (!superset.containsAll(subset)) {
1331             LinkedHashSet<T> missing = new LinkedHashSet<>(subset);
1332             missing.removeAll(superset);
1333             return missing;
1334         }
1335         // ok, we know that they are subsets, try order
1336         Set<T> result = null;
1337         DiscreteComparator<T> comp =
1338                 new DiscreteComparator.Builder<T>(Ordering.ARBITRARY).add(superset).get();
1339         T last = null;
1340         for (T item : subset) {
1341             if (last != null) {
1342                 int order = comp.compare(last, item);
1343                 if (order != -1) {
1344                     if (result == null) {
1345                         result = new HashSet<>();
1346                         result.add(last);
1347                         result.add(item);
1348                     }
1349                 }
1350             }
1351             last = item;
1352         }
1353         return result;
1354     }
1355 
TestDtdCompatibility()1356     public void TestDtdCompatibility() {
1357 
1358         for (DtdType type : DtdType.values()) {
1359             if (type.getStatus() != DtdType.DtdStatus.active) {
1360                 continue;
1361             }
1362             DtdData dtdData = DtdData.getInstance(type);
1363             Map<String, Element> currentElementFromName = dtdData.getElementFromName();
1364 
1365             // current has no orphan
1366             Set<Element> orphans = new LinkedHashSet<>(dtdData.getElementFromName().values());
1367             orphans.remove(dtdData.ROOT);
1368             orphans.remove(dtdData.PCDATA);
1369             orphans.remove(dtdData.ANY);
1370             Set<String> elementsWithoutAlt = new TreeSet<>();
1371             Set<String> elementsWithoutDraft = new TreeSet<>();
1372             Set<String> elementsWithoutAlias = new TreeSet<>();
1373             Set<String> elementsWithoutSpecial = new TreeSet<>();
1374 
1375             for (Element element : dtdData.getElementFromName().values()) {
1376                 Set<Element> children = element.getChildren().keySet();
1377                 orphans.removeAll(children);
1378                 if (type == DtdType.ldml
1379                         && !SUPPLEMENTAL_DATA_INFO.isDeprecated(type, element.name, "*", "*")) {
1380                     if (element.getType() == ElementType.PCDATA) {
1381                         if (element.getAttributeNamed("alt") == null) {
1382                             elementsWithoutAlt.add(element.name);
1383                         }
1384                         if (element.getAttributeNamed("draft") == null) {
1385                             elementsWithoutDraft.add(element.name);
1386                         }
1387                     } else {
1388                         if (children.size() != 0 && !"alias".equals(element.name)) {
1389                             if (element.getChildNamed("alias") == null) {
1390                                 elementsWithoutAlias.add(element.name);
1391                             }
1392                             if (element.getChildNamed("special") == null) {
1393                                 elementsWithoutSpecial.add(element.name);
1394                             }
1395                         }
1396                     }
1397                 }
1398             }
1399             assertEquals(
1400                     type + " DTD Must not have orphan elements", Collections.EMPTY_SET, orphans);
1401             assertEquals(
1402                     type + " DTD elements with PCDATA must have 'alt' attributes",
1403                     Collections.EMPTY_SET,
1404                     elementsWithoutAlt);
1405             assertEquals(
1406                     type + " DTD elements with PCDATA must have 'draft' attributes",
1407                     Collections.EMPTY_SET,
1408                     elementsWithoutDraft);
1409             assertEquals(
1410                     type + " DTD elements with children must have 'alias' elements",
1411                     Collections.EMPTY_SET,
1412                     elementsWithoutAlias);
1413             assertEquals(
1414                     type + " DTD elements with children must have 'special' elements",
1415                     Collections.EMPTY_SET,
1416                     elementsWithoutSpecial);
1417 
1418             if (!TestCLDRPaths.canUseArchiveDirectory()) {
1419                 return;
1420             }
1421 
1422             for (CldrVersion version : CldrVersion.CLDR_VERSIONS_DESCENDING) {
1423                 if (version == CldrVersion.unknown || version == CldrVersion.baseline) {
1424                     continue;
1425                 }
1426                 if (type.getStatus() != DtdStatus.active) {
1427                     continue; // not active
1428                 }
1429                 if (type.firstVersion != null
1430                         && version.isOlderThan(CldrVersion.from(type.firstVersion))) {
1431                     continue; // didn't exist at that point
1432                 }
1433                 DtdData dtdDataOld;
1434                 try {
1435                     dtdDataOld = DtdData.getInstance(type, version.toString());
1436                 } catch (IllegalArgumentException e) {
1437                     boolean tooOld = false;
1438                     switch (type) {
1439                         case ldmlICU:
1440                             tooOld = badLdmlICUVersions.contains(version);
1441                             break;
1442                         case ldmlBCP47:
1443                         case keyboard3:
1444                             if (type.firstVersion != null) {
1445                                 tooOld = version.isOlderThan(CldrVersion.from(type.firstVersion));
1446                             }
1447                             break;
1448                         default:
1449                             break;
1450                     }
1451                     if (tooOld) {
1452                         continue;
1453                     } else {
1454                         errln(
1455                                 "v"
1456                                         + version
1457                                         + ": "
1458                                         + e.getClass().getSimpleName()
1459                                         + ", "
1460                                         + e.getMessage());
1461                         continue;
1462                     }
1463                 }
1464                 // verify that if E is in dtdDataOld, then it is in dtdData, and
1465                 // has at least the same children and attributes
1466                 for (Entry<String, Element> entry : dtdDataOld.getElementFromName().entrySet()) {
1467                     Element oldElement = entry.getValue();
1468                     Element newElement = currentElementFromName.get(entry.getKey());
1469                     if (knownElementExceptions.contains(
1470                             Pair.of(type.toString(), oldElement.getName()))) {
1471                         continue;
1472                     }
1473                     if (assertNotNull(
1474                             type
1475                                     + " DTD for trunk must be superset of v"
1476                                     + version
1477                                     + ", and must contain «"
1478                                     + oldElement.getName()
1479                                     + "»",
1480                             newElement)) {
1481                         // TODO Check order also
1482                         for (Element oldChild : oldElement.getChildren().keySet()) {
1483                             if (oldChild == null) {
1484                                 continue;
1485                             }
1486                             Element newChild = newElement.getChildNamed(oldChild.getName());
1487                             // skip certain items
1488                             if (version.isOlderThan(CldrVersion.v1_6_1)
1489                                     && newElement.getName().equals("zone")
1490                                     && oldChild.getName().equals("usesMetazone")) {
1491                                 if (logKnownIssue(
1492                                         "CLDR-17054",
1493                                         "Breakage with items older than 1.6.1: "
1494                                                 + newElement.getName()
1495                                                 + " / "
1496                                                 + oldChild.getName())) {
1497                                     continue;
1498                                 }
1499                             }
1500 
1501                             if (knownChildExceptions.contains(
1502                                     Pair.of(newElement.getName(), oldChild.getName()))) {
1503                                 continue;
1504                             }
1505                             assertNotNull(
1506                                     type
1507                                             + " DTD - Trunk children of «"
1508                                             + newElement.getName()
1509                                             + "» must be superset of v"
1510                                             + version
1511                                             + ", and must contain «"
1512                                             + oldChild.getName()
1513                                             + "»",
1514                                     newChild);
1515                         }
1516                         for (Attribute oldAttribute : oldElement.getAttributes().keySet()) {
1517                             Attribute newAttribute =
1518                                     newElement.getAttributeNamed(oldAttribute.getName());
1519 
1520                             if (knownAttributeExceptions.contains(
1521                                     Pair.of(newElement.getName(), oldAttribute.getName()))) {
1522                                 continue;
1523                             }
1524                             assertNotNull(
1525                                     type
1526                                             + " DTD - Trunk attributes of «"
1527                                             + newElement.getName()
1528                                             + "» must be superset of v"
1529                                             + version
1530                                             + ", and must contain «"
1531                                             + oldAttribute.getName()
1532                                             + "»",
1533                                     newAttribute);
1534                         }
1535                     }
1536                 }
1537             }
1538         }
1539     }
1540 
1541     /** Compare each path to each other path for every single file in CLDR */
TestDtdComparison()1542     public void TestDtdComparison() {
1543         // try some simple paths for regression
1544 
1545         sortPaths(
1546                 DtdData.getInstance(DtdType.ldml).getDtdComparator(null),
1547                 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/dateTimeFormatLength[@type=\"full\"]/dateTimeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1548                 "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats");
1549 
1550         sortPaths(
1551                 DtdData.getInstance(DtdType.supplementalData).getDtdComparator(null),
1552                 "//supplementalData/territoryContainment/group[@type=\"419\"][@contains=\"013 029 005\"][@grouping=\"true\"]",
1553                 "//supplementalData/territoryContainment/group[@type=\"003\"][@contains=\"021 013 029\"][@grouping=\"true\"]");
1554     }
1555 
TestDtdComparisonsAll()1556     public void TestDtdComparisonsAll() {
1557         if (getInclusion() <= 5) { // Only run this test in exhaustive mode.
1558             return;
1559         }
1560         for (File file : CLDRConfig.getInstance().getAllCLDRFilesEndingWith(".xml")) {
1561             if (file.getParentFile().getName().equals("import")
1562                     && file.getParentFile().getParentFile().getName().equals("keyboards")) {
1563                 return; // skip imports
1564             }
1565             checkDtdComparatorFor(file, null);
1566         }
1567     }
1568 
checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType)1569     public void checkDtdComparatorForResource(String fileToRead, DtdType overrideDtdType) {
1570         MyHandler myHandler = new MyHandler(overrideDtdType);
1571         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
1572         try {
1573             myHandler.fileName = fileToRead;
1574             xfr.read(myHandler.fileName, TestBasic.class, -1, true);
1575             logln(myHandler.fileName);
1576         } catch (Exception e) {
1577             Throwable t = e;
1578             StringBuilder b = new StringBuilder();
1579             String indent = "";
1580             while (t != null) {
1581                 b.append(indent).append(t.getMessage());
1582                 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t";
1583                 t = t.getCause();
1584             }
1585             errln(b.toString());
1586             return;
1587         }
1588         DtdData dtdData = DtdData.getInstance(myHandler.dtdType);
1589         sortPaths(dtdData.getDtdComparator(null), myHandler.data);
1590     }
1591 
checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType)1592     public void checkDtdComparatorFor(File fileToRead, DtdType overrideDtdType) {
1593         MyHandler myHandler = new MyHandler(overrideDtdType);
1594         XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
1595         try {
1596             myHandler.fileName = PathUtilities.getNormalizedPathString(fileToRead);
1597             xfr.read(myHandler.fileName, -1, true);
1598             logln(myHandler.fileName);
1599         } catch (Exception e) {
1600             e.printStackTrace();
1601             Throwable t = e;
1602             StringBuilder b = new StringBuilder();
1603             String indent = "";
1604             while (t != null) {
1605                 b.append(indent).append(t.getMessage());
1606                 indent = indent.isEmpty() ? "\n\t\t" : indent + "\t";
1607                 t = t.getCause();
1608             }
1609             errln(b.toString());
1610             return;
1611         }
1612         DtdData dtdData = DtdData.getInstance(myHandler.dtdType);
1613         sortPaths(dtdData.getDtdComparator(null), myHandler.data);
1614     }
1615 
1616     static class MyHandler extends XMLFileReader.SimpleHandler {
1617         private String fileName;
1618         private DtdType dtdType;
1619         private final Set<String> data = new LinkedHashSet<>();
1620 
MyHandler(DtdType overrideDtdType)1621         public MyHandler(DtdType overrideDtdType) {
1622             dtdType = overrideDtdType;
1623         }
1624 
1625         @Override
handlePathValue(String path, @SuppressWarnings("unused") String value)1626         public void handlePathValue(String path, @SuppressWarnings("unused") String value) {
1627             if (dtdType == null) {
1628                 try {
1629                     dtdType = DtdType.fromPath(path);
1630                 } catch (Exception e) {
1631                     throw new IllegalArgumentException("Can't read " + fileName, e);
1632                 }
1633             }
1634             data.add(path);
1635         }
1636     }
1637 
sortPaths(Comparator<String> dc, Collection<String> paths)1638     public void sortPaths(Comparator<String> dc, Collection<String> paths) {
1639         String[] array = paths.toArray(new String[paths.size()]);
1640         sortPaths(dc, array);
1641     }
1642 
sortPaths(Comparator<String> dc, String... array)1643     public void sortPaths(Comparator<String> dc, String... array) {
1644         Arrays.sort(array, 0, array.length, dc);
1645     }
1646     // public void TestNewDtdData() moved to TestDtdData
1647 }
1648