xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartDelta.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Splitter;
5 import com.google.common.collect.Multimap;
6 import com.google.common.collect.TreeMultimap;
7 import com.ibm.icu.impl.Relation;
8 import com.ibm.icu.impl.Row.R2;
9 import com.ibm.icu.impl.Row.R3;
10 import com.ibm.icu.impl.Row.R4;
11 import com.ibm.icu.text.NumberFormat;
12 import com.ibm.icu.text.UnicodeSet;
13 import com.ibm.icu.util.ICUUncheckedIOException;
14 import com.ibm.icu.util.Output;
15 import java.io.File;
16 import java.io.IOException;
17 import java.io.PrintWriter;
18 import java.util.ArrayList;
19 import java.util.Arrays;
20 import java.util.Collection;
21 import java.util.Collections;
22 import java.util.HashSet;
23 import java.util.List;
24 import java.util.Map;
25 import java.util.Map.Entry;
26 import java.util.Objects;
27 import java.util.Set;
28 import java.util.TreeMap;
29 import java.util.TreeSet;
30 import java.util.regex.Matcher;
31 import java.util.regex.Pattern;
32 import org.unicode.cldr.draft.FileUtilities;
33 import org.unicode.cldr.test.DisplayAndInputProcessor;
34 import org.unicode.cldr.test.SubmissionLocales;
35 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
36 import org.unicode.cldr.tool.Option.Options;
37 import org.unicode.cldr.tool.Option.Params;
38 import org.unicode.cldr.util.CLDRConfig;
39 import org.unicode.cldr.util.CLDRFile;
40 import org.unicode.cldr.util.CLDRFile.Status;
41 import org.unicode.cldr.util.CLDRPaths;
42 import org.unicode.cldr.util.CldrUtility;
43 import org.unicode.cldr.util.Counter;
44 import org.unicode.cldr.util.DtdData;
45 import org.unicode.cldr.util.DtdType;
46 import org.unicode.cldr.util.Factory;
47 import org.unicode.cldr.util.LanguageTagParser;
48 import org.unicode.cldr.util.Level;
49 import org.unicode.cldr.util.LocaleIDParser;
50 import org.unicode.cldr.util.Organization;
51 import org.unicode.cldr.util.Pair;
52 import org.unicode.cldr.util.PathHeader;
53 import org.unicode.cldr.util.PathHeader.PageId;
54 import org.unicode.cldr.util.PathStarrer;
55 import org.unicode.cldr.util.PatternCache;
56 import org.unicode.cldr.util.SimpleXMLSource;
57 import org.unicode.cldr.util.StandardCodes;
58 import org.unicode.cldr.util.SupplementalDataInfo;
59 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo;
60 import org.unicode.cldr.util.TransliteratorUtilities;
61 import org.unicode.cldr.util.XMLFileReader;
62 import org.unicode.cldr.util.XPathParts;
63 
64 public class ChartDelta extends Chart {
65     private static final boolean verbose_skipping = false;
66 
67     private static final String DEFAULT_DELTA_DIR_NAME = "delta";
68     private static final String DEFAULT_CHURN_DIR_NAME = "churn";
69 
70     private static final boolean SKIP_REFORMAT_ANNOTATIONS =
71             ToolConstants.PREV_CHART_VERSION.compareTo("30") >= 0;
72 
73     private static final PageId DEBUG_PAGE_ID = PageId.DayPeriod;
74 
75     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO =
76             CLDRConfig.getInstance().getSupplementalDataInfo();
77 
78     private enum MyOptions {
79         fileFilter(
80                 new Params()
81                         .setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en")
82                         .setMatch(".*")),
83         orgFilter(new Params().setHelp("filter files by organization").setMatch(".*")),
84         Vxml(new Params().setHelp("use cldr-aux for the base directory")),
85         coverageFilter(new Params().setHelp("filter files by coverage").setMatch(".*")),
86         directory(
87                 new Params()
88                         .setHelp("Set the output directory name")
89                         .setDefault(DEFAULT_DELTA_DIR_NAME)
90                         .setMatch(".*")),
91         verbose(new Params().setHelp("verbose debugging messages")),
92         highLevelOnly(new Params().setHelp("check high-level paths (churn) only").setFlag('H')),
93         ;
94 
95         // BOILERPLATE TO COPY
96         final Option option;
97 
MyOptions(Params params)98         private MyOptions(Params params) {
99             option = new Option(this, params);
100         }
101 
102         private static Options myOptions = new Options();
103 
104         static {
105             for (MyOptions option : MyOptions.values()) {
myOptions.add(option, option.option)106                 myOptions.add(option, option.option);
107             }
108         }
109 
parse(String[] args)110         private static Set<String> parse(String[] args) {
111             return myOptions.parse(MyOptions.values()[0], args, true);
112         }
113     }
114 
115     private final Matcher fileFilter;
116     private final String dirName; // "delta" or "churn" or set as option
117     private final String chartNameCap; // capitalized, e.g., "Delta" or "Churn"
118     private final String DIR; // full path of output folder
119     private final Level minimumPathCoverage;
120     private final boolean verbose;
121 
122     /**
123      * If true, check only high-level paths, i.e., paths for which any changes have high potential
124      * to cause disruptive "churn"
125      */
126     private final boolean highLevelOnly;
127 
main(String[] args)128     public static void main(String[] args) {
129         main(args, false);
130     }
131 
main(String[] args, boolean highLevelOnly)132     public static void main(String[] args, boolean highLevelOnly) {
133         System.out.println(
134                 "use -DCHART_VERSION=36.0 -DPREV_CHART_VERSION=34.0 to generate the differences between v36 and v34.");
135         MyOptions.parse(args);
136         Matcher fileFilter =
137                 !MyOptions.fileFilter.option.doesOccur()
138                         ? null
139                         : PatternCache.get(MyOptions.fileFilter.option.getValue()).matcher("");
140         if (MyOptions.orgFilter.option.doesOccur()) {
141             if (MyOptions.fileFilter.option.doesOccur()) {
142                 throw new IllegalArgumentException("Can't have both fileFilter and orgFilter");
143             }
144             String rawOrg = MyOptions.orgFilter.option.getValue();
145             Organization org = Organization.fromString(rawOrg);
146             Set<String> locales = StandardCodes.make().getLocaleCoverageLocales(org);
147             fileFilter =
148                     PatternCache.get("^(main|annotations)/(" + Joiner.on("|").join(locales) + ")$")
149                             .matcher("");
150         }
151         Level coverage =
152                 !MyOptions.coverageFilter.option.doesOccur()
153                         ? null
154                         : Level.fromString(MyOptions.coverageFilter.option.getValue());
155         boolean verbose = MyOptions.verbose.option.doesOccur();
156         if (MyOptions.highLevelOnly.option.doesOccur()) {
157             highLevelOnly = true;
158         }
159         String dirName = MyOptions.directory.option.getValue();
160         if (highLevelOnly && DEFAULT_DELTA_DIR_NAME.equals(dirName)) {
161             System.out.println(
162                     "For highLevelOnly, changing directory from "
163                             + DEFAULT_DELTA_DIR_NAME
164                             + " to "
165                             + DEFAULT_CHURN_DIR_NAME);
166             dirName = DEFAULT_CHURN_DIR_NAME;
167         }
168         ChartDelta temp = new ChartDelta(fileFilter, coverage, dirName, verbose, highLevelOnly);
169         temp.writeChart(null);
170         temp.showTotals();
171         if (highLevelOnly) {
172             HighLevelPaths.reportHighLevelPathUsage();
173         }
174         System.out.println("Finished. Files may have been created in these directories:");
175         System.out.println(temp.DIR);
176         System.out.println(getTsvDir(temp.DIR, temp.dirName));
177     }
178 
ChartDelta( Matcher fileFilter, Level coverage, String dirName, boolean verbose, boolean highLevelOnly)179     private ChartDelta(
180             Matcher fileFilter,
181             Level coverage,
182             String dirName,
183             boolean verbose,
184             boolean highLevelOnly) {
185         this.fileFilter = fileFilter;
186         this.verbose = verbose;
187         this.highLevelOnly = highLevelOnly;
188         this.dirName = dirName;
189         this.chartNameCap = dirName.substring(0, 1).toUpperCase() + dirName.substring(1);
190         this.DIR = CLDRPaths.CHART_DIRECTORY + dirName;
191         this.minimumPathCoverage = coverage;
192     }
193 
194     private static final String SEP = "\u0001";
195     private static final boolean DEBUG = false;
196     private static final String DEBUG_FILE = null; // "windowsZones.xml";
197     static Pattern fileMatcher = PatternCache.get(".*");
198 
199     static PathHeader.Factory phf = PathHeader.getFactory(ENGLISH);
200     static final Set<String> DONT_CARE =
201             new HashSet<>(Arrays.asList("draft", "standard", "reference"));
202 
203     @Override
getDirectory()204     public String getDirectory() {
205         return DIR;
206     }
207 
208     @Override
getTitle()209     public String getTitle() {
210         return chartNameCap + " Charts";
211     }
212 
213     @Override
getFileName()214     public String getFileName() {
215         return "index";
216     }
217 
218     @Override
getExplanation()219     public String getExplanation() {
220         return "<p>Charts showing the differences from the last version. "
221                 + "Titles prefixed by ¤ are special: either the locale data summary or supplemental data. "
222                 + "Not all changed data is charted yet. For details see each chart.</p>";
223     }
224 
225     @Override
writeContents(FormattedFileWriter pw)226     public void writeContents(FormattedFileWriter pw) throws IOException {
227         FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors();
228         FileUtilities.copyFile(ChartDelta.class, "index.css", getDirectory());
229         FormattedFileWriter.copyIncludeHtmls(getDirectory(), true);
230         counter.clear();
231         fileCounters.clear();
232         writeNonLdmlPlain(anchors);
233         writeLdml(anchors);
234         pw.setIndex("Main Chart Index", "../index.html");
235         pw.write(anchors.toString());
236     }
237 
238     private static class PathHeaderSegment extends R3<PathHeader, Integer, String> {
PathHeaderSegment(PathHeader b, int elementIndex, String attribute)239         public PathHeaderSegment(PathHeader b, int elementIndex, String attribute) {
240             super(b, elementIndex, attribute);
241         }
242     }
243 
244     private static class PathDiff extends R4<PathHeaderSegment, String, String, String> {
PathDiff( String locale, PathHeaderSegment pathHeaderSegment, String oldValue, String newValue)245         public PathDiff(
246                 String locale,
247                 PathHeaderSegment pathHeaderSegment,
248                 String oldValue,
249                 String newValue) {
250             super(pathHeaderSegment, locale, oldValue, newValue);
251         }
252     }
253 
254     private static final CLDRFile EMPTY_CLDR = new CLDRFile(new SimpleXMLSource("und").freeze());
255 
256     private static final File CLDR_BASE_DIR = CLDRConfig.getInstance().getCldrBaseDirectory();
257 
258     private enum ChangeType {
259         added,
260         deleted,
261         changed,
262         same;
263 
get(String oldValue, String currentValue)264         public static ChangeType get(String oldValue, String currentValue) {
265             return oldValue == null
266                     ? added
267                     : currentValue == null
268                             ? deleted
269                             : oldValue.equals(currentValue) ? same : changed;
270         }
271     }
272 
273     private Counter<ChangeType> counter = new Counter<>();
274     private Map<String, Counter<ChangeType>> fileCounters = new TreeMap<>();
275     private Set<String> badHeaders = new TreeSet<>();
276 
277     /** Add the count of changed items */
addChange(String file, ChangeType changeType, int count)278     private void addChange(String file, ChangeType changeType, int count) {
279         counter.add(changeType, count); // unified add
280         Counter<ChangeType> fileCounter = fileCounters.get(file);
281         if (fileCounter == null) {
282             fileCounters.put(file, fileCounter = new Counter<>());
283         }
284         fileCounter.add(changeType, count);
285     }
286 
showTotals()287     private void showTotals() {
288         try (PrintWriter pw =
289                 FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_summary.tsv")) {
290             // pw.println("# percentages are of *new* total");
291             pw.print("# dir\tfile");
292             for (ChangeType item : ChangeType.values()) {
293                 pw.print("\t" + (item == ChangeType.same ? "total" : item.toString()));
294             }
295             pw.println();
296             showTotal(pw, "TOTAL/", counter);
297 
298             for (Entry<String, Counter<ChangeType>> entry : fileCounters.entrySet()) {
299                 showTotal(pw, entry.getKey(), entry.getValue());
300             }
301             for (String s : badHeaders) {
302                 pw.println(s);
303             }
304             // pw.println("# EOF");
305         } catch (IOException e) {
306             throw new ICUUncheckedIOException(e);
307         }
308     }
309 
showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2)310     private void showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2) {
311         long total = counter2.getTotal();
312         NumberFormat pf = NumberFormat.getPercentInstance();
313         pf.setMinimumFractionDigits(2);
314         NumberFormat nf = NumberFormat.getIntegerInstance();
315         pw.print(title2.replace("/", "\t"));
316         for (ChangeType item : ChangeType.values()) {
317             if (item == ChangeType.same) {
318                 pw.print("\t" + nf.format(total));
319             } else {
320                 final long current = counter2.getCount(item);
321                 pw.print("\t" + nf.format(current));
322             }
323         }
324         pw.println();
325     }
326 
327     /**
328      * @param anchors
329      * @throws IOException
330      *     <p>TODO: shorten the function using subroutines
331      */
writeLdml(Anchors anchors)332     private void writeLdml(Anchors anchors) throws IOException {
333         try (PrintWriter tsvFile =
334                         FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + ".tsv");
335                 PrintWriter tsvCountFile =
336                         FileUtilities.openUTF8Writer(
337                                 getTsvDir(DIR, dirName), dirName + "_count.tsv"); ) {
338             tsvFile.println("# Section\tPage\tHeader\tCode\tLocale\tOld\tNew\tLevel");
339 
340             // set up factories
341             List<Factory> factories = new ArrayList<>();
342             List<Factory> oldFactories = new ArrayList<>();
343 
344             Counter<PathHeader> counts = new Counter<>();
345 
346             String dirBase = ToolConstants.getBaseDirectory(ToolConstants.CHART_VERSION);
347             String prevDirBase = ToolConstants.getBaseDirectory(ToolConstants.PREV_CHART_VERSION);
348 
349             for (String dir : DtdType.ldml.directories) {
350                 if (dir.equals("annotationsDerived") || dir.equals("casing")) {
351                     continue;
352                 }
353                 String current = dirBase + "common/" + dir;
354                 String past = prevDirBase + "common/" + dir;
355                 try {
356                     factories.add(Factory.make(current, ".*"));
357                 } catch (Exception e1) {
358                     System.out.println("Skipping: " + dir + "\t" + e1.getMessage());
359                     continue; // skip where the directories don't exist in old versions
360                 }
361                 try {
362                     oldFactories.add(Factory.make(past, ".*"));
363                 } catch (Exception e) {
364                     System.out.println("Couldn't open factory: " + past);
365                     past = null;
366                     oldFactories.add(null);
367                 }
368                 System.out.println("Will compare: " + dir + "\t\t" + current + "\t\t" + past);
369             }
370             if (factories.isEmpty()) {
371                 throw new IllegalArgumentException(
372                         "No factories found for " + dirBase + ": " + DtdType.ldml.directories);
373             }
374             // get a list of all the locales to cycle over
375 
376             Relation<String, String> baseToLocales =
377                     Relation.of(new TreeMap<String, Set<String>>(), HashSet.class);
378             Matcher m = fileMatcher.matcher("");
379             Set<String> defaultContents = SDI.getDefaultContentLocales();
380             LanguageTagParser ltp = new LanguageTagParser();
381             LikelySubtags ls = new LikelySubtags();
382             for (String file : factories.get(0).getAvailable()) {
383                 if (defaultContents.contains(file)) {
384                     continue;
385                 }
386                 if (!m.reset(file).matches()) {
387                     continue;
388                 }
389                 String base =
390                         file.equals("root")
391                                 ? "root"
392                                 : ltp.set(ls.minimize(file)).getLanguageScript();
393                 baseToLocales.put(base, file);
394             }
395 
396             // do keyboards later
397 
398             Status currentStatus = new Status();
399             Status oldStatus = new Status();
400             Set<PathDiff> diff = new TreeSet<>();
401             Set<String> paths = new HashSet<>();
402 
403             Relation<PathHeader, String> diffAll =
404                     Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class);
405             for (Entry<String, Set<String>> baseNLocale : baseToLocales.keyValuesSet()) {
406                 String base = baseNLocale.getKey();
407                 for (int i = 0; i < factories.size(); ++i) {
408                     Factory factory = factories.get(i);
409                     Factory oldFactory = oldFactories.get(i);
410                     List<File> sourceDirs = Arrays.asList(factory.getSourceDirectories());
411                     if (sourceDirs.size() != 1) {
412                         throw new IllegalArgumentException(
413                                 "Internal error: expect single source dir");
414                     }
415                     File sourceDir = sourceDirs.get(0);
416                     String sourceDirLeaf = sourceDir.getName();
417                     boolean resolving =
418                             !sourceDirLeaf.contains("subdivisions")
419                                     && !sourceDirLeaf.contains("transforms");
420 
421                     for (String locale : baseNLocale.getValue()) {
422                         String nameAndLocale = sourceDirLeaf + "/" + locale;
423                         if (fileFilter != null && !fileFilter.reset(nameAndLocale).find()) {
424                             if (verbose && verbose_skipping) {
425                                 System.out.println("SKIPPING: " + nameAndLocale);
426                             }
427                             continue;
428                         }
429                         if (verbose) {
430                             System.out.println(nameAndLocale);
431                         }
432                         CLDRFile current = makeWithFallback(factory, locale, resolving);
433                         CLDRFile old = makeWithFallback(oldFactory, locale, resolving);
434                         DisplayAndInputProcessor daip = new DisplayAndInputProcessor(old);
435 
436                         if (!locale.equals("root")
437                                 && current.getLocaleID().equals("root")
438                                 && old.getLocaleID().equals("root")) {
439                             continue;
440                         }
441                         if (old == EMPTY_CLDR && current == EMPTY_CLDR) {
442                             continue;
443                         }
444                         if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(locale)) {
445                             continue;
446                         }
447                         paths.clear();
448                         for (String path : current.fullIterable()) {
449                             if (allowPath(locale, path)) {
450                                 paths.add(path);
451                             }
452                         }
453                         for (String path : old.fullIterable()) {
454                             if (!paths.contains(path) && allowPath(locale, path)) {
455                                 paths.add(path);
456                             }
457                         }
458 
459                         Output<String> reformattedValue = new Output<>();
460                         Output<Boolean> hasReformattedValue = new Output<>();
461 
462                         for (String path : paths) {
463                             if (path.startsWith("//ldml/identity")
464                                     || path.endsWith("/alias")
465                                     || path.startsWith("//ldml/segmentations") // do later
466                                     || path.startsWith("//ldml/rbnf") // do later
467                             ) {
468                                 continue;
469                             }
470                             PathHeader ph = getPathHeader(path);
471                             if (ph == null) {
472                                 continue;
473                             }
474 
475                             String oldValue;
476                             String currentValue;
477 
478                             {
479                                 String sourceLocaleCurrent =
480                                         current.getSourceLocaleID(path, currentStatus);
481                                 String sourceLocaleOld =
482                                         getReformattedPath(
483                                                 oldStatus,
484                                                 old,
485                                                 path,
486                                                 reformattedValue,
487                                                 hasReformattedValue);
488 
489                                 // filter out stuff that differs at a higher level
490                                 if (!sourceLocaleCurrent.equals(locale)
491                                         && !sourceLocaleOld.equals(locale)) {
492                                     continue;
493                                 }
494                                 if (!path.equals(currentStatus.pathWhereFound)
495                                         && !path.equals(oldStatus.pathWhereFound)) {
496                                     continue;
497                                 }
498                                 // fix some incorrect cases?
499 
500                                 currentValue = current.getStringValue(path);
501                                 if (CldrUtility.INHERITANCE_MARKER.equals(currentValue)) {
502                                     currentValue = current.getBaileyValue(path, null, null);
503                                 }
504 
505                                 String oldRawValue =
506                                         hasReformattedValue.value
507                                                 ? reformattedValue.value
508                                                 : old.getStringValue(path);
509                                 if (CldrUtility.INHERITANCE_MARKER.equals(oldRawValue)) {
510                                     oldRawValue = old.getBaileyValue(path, null, null);
511                                 }
512                                 // ignore differences due to old DAIP
513                                 oldValue =
514                                         dontDaipValue(oldRawValue, path)
515                                                 ? oldRawValue
516                                                 : daip.processInput(path, oldRawValue, null);
517                             }
518                             if (highLevelOnly
519                                     && new SuspiciousChange(oldValue, currentValue, path, locale)
520                                                     .isDisruptive()
521                                             == false) {
522                                 continue;
523                             }
524                             // handle non-distinguishing attributes
525                             addPathDiff(sourceDir, old, current, locale, ph, diff);
526 
527                             addValueDiff(
528                                     sourceDir, oldValue, currentValue, locale, ph, diff, diffAll);
529                         }
530                     }
531                 }
532                 writeDiffs(anchors, base, diff, tsvFile, counts);
533                 diff.clear();
534             }
535             writeDiffs(diffAll);
536 
537             writeCounter(tsvCountFile, "Count", counts);
538         }
539     }
540 
dontDaipValue(String oldRawValue, String path)541     public boolean dontDaipValue(String oldRawValue, String path) {
542         return oldRawValue == null || path.startsWith("//ldml/collations");
543     }
544 
allowPath(String locale, String path)545     private boolean allowPath(String locale, String path) {
546         if (minimumPathCoverage != null) {
547             Level pathLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale);
548             if (minimumPathCoverage.compareTo(pathLevel) < 0) {
549                 return false;
550             }
551         }
552         return true;
553     }
554 
getReformattedPath( Status oldStatus, CLDRFile old, String path, Output<String> value, Output<Boolean> hasReformattedValue)555     private String getReformattedPath(
556             Status oldStatus,
557             CLDRFile old,
558             String path,
559             Output<String> value,
560             Output<Boolean> hasReformattedValue) {
561         if (SKIP_REFORMAT_ANNOTATIONS || !path.startsWith("//ldml/annotations/")) {
562             hasReformattedValue.value = Boolean.FALSE;
563             return old.getSourceLocaleID(path, oldStatus);
564         }
565         // OLD:     <annotation cp='[��]' tts='grinning face'>face; grin</annotation>
566         // NEW:     <annotation cp="��">face | grin</annotation>
567         //          <annotation cp="��" type="tts">grinning face</annotation>
568         // from the NEW paths, get the OLD values
569         XPathParts parts =
570                 XPathParts.getFrozenInstance(path)
571                         .cloneAsThawed(); // not frozen, for removeAttribute
572         boolean isTts = parts.getAttributeValue(-1, "type") != null;
573         if (isTts) {
574             parts.removeAttribute(-1, "type");
575         }
576         String cp = parts.getAttributeValue(-1, "cp");
577         parts.setAttribute(-1, "cp", "[" + cp + "]");
578 
579         String oldStylePath = parts.toString();
580         String temp = old.getStringValue(oldStylePath);
581         if (temp == null) {
582             hasReformattedValue.value = Boolean.FALSE;
583         } else if (isTts) {
584             String temp2 = old.getFullXPath(oldStylePath);
585             value.value = XPathParts.getFrozenInstance(temp2).getAttributeValue(-1, "tts");
586             hasReformattedValue.value = Boolean.TRUE;
587         } else {
588             value.value = temp.replaceAll("\\s*;\\s*", " | ");
589             hasReformattedValue.value = Boolean.TRUE;
590         }
591         return old.getSourceLocaleID(oldStylePath, oldStatus);
592     }
593 
594     PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A");
595 
getPathHeader(String path)596     private PathHeader getPathHeader(String path) {
597         try {
598             PathHeader ph = phf.fromPath(path);
599             if (ph.getPageId() == PageId.Unknown) {
600                 String star = starrer.set(path);
601                 badHeaders.add(star);
602                 return null;
603             }
604             return ph;
605         } catch (Exception e) {
606             String star = starrer.set(path);
607             badHeaders.add(star);
608             // System.err.println("Skipping path with bad PathHeader: " + path);
609             return null;
610         }
611     }
612 
makeWithFallback(Factory oldFactory, String locale, boolean resolving)613     private CLDRFile makeWithFallback(Factory oldFactory, String locale, boolean resolving) {
614         if (oldFactory == null) {
615             return EMPTY_CLDR;
616         }
617         CLDRFile old;
618         String oldLocale = locale;
619         while (true) { // fall back for old, maybe to root
620             try {
621                 old = oldFactory.make(oldLocale, resolving);
622                 break;
623             } catch (Exception e) {
624                 oldLocale = LocaleIDParser.getParent(oldLocale);
625                 if (oldLocale == null) {
626                     return EMPTY_CLDR;
627                 }
628             }
629         }
630         return old;
631     }
632 
addPathDiff( File sourceDir, CLDRFile old, CLDRFile current, String locale, PathHeader ph, Set<PathDiff> diff2)633     private void addPathDiff(
634             File sourceDir,
635             CLDRFile old,
636             CLDRFile current,
637             String locale,
638             PathHeader ph,
639             Set<PathDiff> diff2) {
640         String path = ph.getOriginalPath();
641         String fullPathCurrent = current.getFullXPath(path);
642         String fullPathOld = old.getFullXPath(path);
643         if (Objects.equals(fullPathCurrent, fullPathOld)) {
644             return;
645         }
646         XPathParts pathPlain = XPathParts.getFrozenInstance(path);
647         XPathParts pathCurrent =
648                 fullPathCurrent == null ? pathPlain : XPathParts.getFrozenInstance(fullPathCurrent);
649         XPathParts pathOld =
650                 fullPathOld == null ? pathPlain : XPathParts.getFrozenInstance(fullPathOld);
651         TreeSet<String> fullAttributes = null;
652         int size = pathCurrent.size();
653         String parentAndName = parentAndName(sourceDir, locale);
654         for (int elementIndex = 0; elementIndex < size; ++elementIndex) { // will have same size
655             Collection<String> distinguishing = pathPlain.getAttributeKeys(elementIndex);
656             Collection<String> attributesCurrent = pathCurrent.getAttributeKeys(elementIndex);
657             Collection<String> attributesOld = pathCurrent.getAttributeKeys(elementIndex);
658             if (attributesCurrent.isEmpty() && attributesOld.isEmpty()) {
659                 continue;
660             }
661             if (fullAttributes == null) {
662                 fullAttributes = new TreeSet<>();
663             } else {
664                 fullAttributes.clear();
665             }
666             fullAttributes.addAll(attributesCurrent);
667             fullAttributes.addAll(attributesOld);
668             fullAttributes.removeAll(distinguishing);
669             fullAttributes.removeAll(DONT_CARE);
670 
671             // at this point we only have non-distinguishing
672             for (String attribute : fullAttributes) {
673                 String attributeValueOld = pathOld.getAttributeValue(elementIndex, attribute);
674                 String attributeValueCurrent =
675                         pathCurrent.getAttributeValue(elementIndex, attribute);
676                 if (Objects.equals(attributeValueOld, attributeValueCurrent)) {
677                     addChange(parentAndName, ChangeType.same, 1);
678                     continue;
679                 }
680                 addChange(
681                         parentAndName, ChangeType.get(attributeValueOld, attributeValueCurrent), 1);
682 
683                 PathDiff row =
684                         new PathDiff(
685                                 locale,
686                                 new PathHeaderSegment(ph, size - elementIndex - 1, attribute),
687                                 attributeValueOld,
688                                 attributeValueCurrent);
689                 if (DEBUG) {
690                     System.out.println(row);
691                 }
692                 diff2.add(row);
693             }
694         }
695     }
696 
parentAndName(File sourceDir, String locale)697     private String parentAndName(File sourceDir, String locale) {
698         return sourceDir.getName() + "/" + locale + ".xml";
699     }
700 
addValueDiff( File sourceDir, String valueOld, String valueCurrent, String locale, PathHeader ph, Set<PathDiff> diff, Relation<PathHeader, String> diffAll)701     private void addValueDiff(
702             File sourceDir,
703             String valueOld,
704             String valueCurrent,
705             String locale,
706             PathHeader ph,
707             Set<PathDiff> diff,
708             Relation<PathHeader, String> diffAll) {
709         // handle stuff that can be split specially
710         Splitter splitter = getSplitter(ph.getOriginalPath(), valueOld, valueCurrent);
711         int count = 1;
712         String parentAndName = parentAndName(sourceDir, locale);
713         if (Objects.equals(valueCurrent, valueOld)) {
714             if (splitter != null && valueCurrent != null) {
715                 count = splitHandlingNull(splitter, valueCurrent).size();
716             }
717             addChange(parentAndName, ChangeType.same, count);
718         } else {
719             if (splitter != null) {
720                 List<String> setOld = splitHandlingNull(splitter, valueOld);
721                 List<String> setNew = splitHandlingNull(splitter, valueCurrent);
722                 int[] sameAndNotInSecond = new int[2];
723                 valueOld = getFilteredValue(setOld, setNew, sameAndNotInSecond);
724                 addChange(parentAndName, ChangeType.same, sameAndNotInSecond[0]);
725                 addChange(parentAndName, ChangeType.deleted, sameAndNotInSecond[1]);
726                 sameAndNotInSecond[0] = sameAndNotInSecond[1] = 0;
727                 valueCurrent = getFilteredValue(setNew, setOld, sameAndNotInSecond);
728                 addChange(parentAndName, ChangeType.added, sameAndNotInSecond[1]);
729             } else if (hasUnicodeSetValue(ph.getOriginalPath())) {
730                 UnicodeSet usOld = valueOld == null ? UnicodeSet.EMPTY : new UnicodeSet(valueOld);
731                 UnicodeSet usCurrent =
732                         valueCurrent == null ? UnicodeSet.EMPTY : new UnicodeSet(valueCurrent);
733                 UnicodeSet oldOnly = new UnicodeSet(usOld).removeAll(usCurrent);
734                 UnicodeSet currentOnly = new UnicodeSet(usCurrent).removeAll(usOld);
735                 addChange(parentAndName, ChangeType.same, usOld.size() - oldOnly.size());
736                 addChange(parentAndName, ChangeType.deleted, oldOnly.size());
737                 addChange(parentAndName, ChangeType.added, currentOnly.size());
738                 valueOld =
739                         usOld.size() == oldOnly.size()
740                                 ? oldOnly.toPattern(false)
741                                 : "…" + oldOnly + "…";
742                 valueCurrent =
743                         usCurrent.size() == currentOnly.size()
744                                 ? currentOnly.toPattern(false)
745                                 : "…" + currentOnly + "…";
746             } else {
747                 addChange(parentAndName, ChangeType.get(valueOld, valueCurrent), count);
748             }
749             PathDiff row =
750                     new PathDiff(locale, new PathHeaderSegment(ph, -1, ""), valueOld, valueCurrent);
751             diff.add(row);
752             diffAll.put(ph, locale);
753         }
754     }
755 
hasUnicodeSetValue(String xpath)756     private boolean hasUnicodeSetValue(String xpath) {
757         return xpath.startsWith("//ldml/characters/exemplar");
758     }
759 
splitHandlingNull(Splitter splitter, String value)760     private List<String> splitHandlingNull(Splitter splitter, String value) {
761         return value == null ? null : splitter.splitToList(value);
762     }
763 
getSplitter(String path, String valueOld, String valueCurrent)764     private Splitter getSplitter(String path, String valueOld, String valueCurrent) {
765         if (path.contains("/annotation") && !path.contains("tts")) {
766             return DtdData.BAR_SPLITTER;
767         } else if (valueOld != null && valueOld.contains("\n")
768                 || valueCurrent != null && valueCurrent.contains("\n")) {
769             return DtdData.CR_SPLITTER;
770         } else {
771             return null;
772         }
773     }
774 
775     /**
776      * Return string with all lines from linesToRemove removed
777      *
778      * @param toGetStringFor
779      * @param linesToRemove
780      * @return
781      */
getFilteredValue( Collection<String> toGetStringFor, Collection<String> linesToRemove, int[] sameAndDiff)782     private String getFilteredValue(
783             Collection<String> toGetStringFor,
784             Collection<String> linesToRemove,
785             int[] sameAndDiff) {
786         if (toGetStringFor == null) {
787             return null;
788         }
789         StringBuilder buf = new StringBuilder();
790         Set<String> toRemove =
791                 linesToRemove == null ? Collections.emptySet() : new HashSet<>(linesToRemove);
792         boolean removed = false;
793         for (String old : toGetStringFor) {
794             if (toRemove.contains(old)) {
795                 removed = true;
796                 sameAndDiff[0]++;
797             } else {
798                 sameAndDiff[1]++;
799                 if (removed) {
800                     buf.append("…\n");
801                     removed = false;
802                 }
803                 buf.append(old).append('\n');
804             }
805         }
806         if (removed) {
807             buf.append("…");
808         } else if (buf.length() > 0) {
809             buf.setLength(buf.length() - 1); // remove final \n
810         }
811         return buf.toString();
812     }
813 
writeDiffs( Anchors anchors, String file, String title, Multimap<PathHeader, String> bcp, PrintWriter tsvFile)814     private void writeDiffs(
815             Anchors anchors,
816             String file,
817             String title,
818             Multimap<PathHeader, String> bcp,
819             PrintWriter tsvFile) {
820         if (bcp.isEmpty()) {
821             System.out.println("\tDeleting: " + DIR + "/" + file);
822             new File(DIR + file).delete();
823             return;
824         }
825         TablePrinter tablePrinter =
826                 new TablePrinter()
827                         .addColumn(
828                                 "Section",
829                                 "class='source'",
830                                 CldrUtility.getDoubleLinkMsg(),
831                                 "class='source'",
832                                 true)
833                         .addColumn(
834                                 "Page",
835                                 "class='source'",
836                                 CldrUtility.getDoubleLinkMsg(),
837                                 "class='source'",
838                                 true) // .setRepeatDivider(true)
839                         .addColumn(
840                                 "Header",
841                                 "class='source'",
842                                 CldrUtility.getDoubleLinkMsg(),
843                                 "class='source'",
844                                 true)
845                         .addColumn("Code", "class='source'", null, "class='source'", false)
846                         .addColumn(
847                                 "Old",
848                                 "class='target'",
849                                 null,
850                                 "class='target'",
851                                 false) //  width='20%'
852                         .addColumn(
853                                 "New",
854                                 "class='target'",
855                                 null,
856                                 "class='target'",
857                                 false); //  width='20%'
858         PathHeader ph1 =
859                 phf.fromPath(
860                         "//supplementalData/metadata/alias/subdivisionAlias[@type=\"TW-TXQ\"]/_reason");
861         PathHeader ph2 =
862                 phf.fromPath(
863                         "//supplementalData/metadata/alias/subdivisionAlias[@type=\"LA-XN\"]/_replacement");
864         ph1.compareTo(ph2);
865         for (Entry<PathHeader, Collection<String>> entry : bcp.asMap().entrySet()) {
866             PathHeader ph = entry.getKey();
867             if (ph.getPageId() == DEBUG_PAGE_ID) {
868                 System.out.println(ph + "\t" + ph.getOriginalPath());
869             }
870             for (String value : entry.getValue()) {
871                 String[] oldNew = value.split(SEP);
872                 tablePrinter
873                         .addRow()
874                         .addCell(ph.getSectionId())
875                         .addCell(ph.getPageId())
876                         .addCell(ph.getHeader())
877                         .addCell(ph.getCode())
878                         .addCell(oldNew[0])
879                         .addCell(oldNew[1])
880                         .finishRow();
881             }
882         }
883         writeTable(anchors, file, tablePrinter, title, tsvFile);
884     }
885 
writeDiffs(Relation<PathHeader, String> diffAll)886     private void writeDiffs(Relation<PathHeader, String> diffAll) {
887         TablePrinter tablePrinter =
888                 new TablePrinter()
889                         .addColumn(
890                                 "Section",
891                                 "class='source'",
892                                 CldrUtility.getDoubleLinkMsg(),
893                                 "class='source'",
894                                 true)
895                         .addColumn(
896                                 "Page",
897                                 "class='source'",
898                                 CldrUtility.getDoubleLinkMsg(),
899                                 "class='source'",
900                                 true)
901                         .addColumn(
902                                 "Header",
903                                 "class='source'",
904                                 CldrUtility.getDoubleLinkMsg(),
905                                 "class='source'",
906                                 true)
907                         .addColumn("Code", "class='source'", null, "class='source'", true)
908                         .addColumn(
909                                 "Locales where different",
910                                 "class='target'",
911                                 null,
912                                 "class='target'",
913                                 true);
914         for (Entry<PathHeader, Set<String>> row : diffAll.keyValuesSet()) {
915             PathHeader ph = row.getKey();
916             Set<String> locales = row.getValue();
917             tablePrinter
918                     .addRow()
919                     .addCell(ph.getSectionId())
920                     .addCell(ph.getPageId())
921                     .addCell(ph.getHeader())
922                     .addCell(ph.getCode())
923                     .addCell(Joiner.on(" ").join(locales))
924                     .finishRow();
925         }
926     }
927 
writeDiffs( Anchors anchors, String file, Set<PathDiff> diff, PrintWriter tsvFile, Counter<PathHeader> counts)928     private void writeDiffs(
929             Anchors anchors,
930             String file,
931             Set<PathDiff> diff,
932             PrintWriter tsvFile,
933             Counter<PathHeader> counts) {
934         if (diff.isEmpty()) {
935             return;
936         }
937         TablePrinter tablePrinter =
938                 new TablePrinter()
939                         .addColumn(
940                                 "Section",
941                                 "class='source'",
942                                 CldrUtility.getDoubleLinkMsg(),
943                                 "class='source'",
944                                 true)
945                         .addColumn(
946                                 "Page",
947                                 "class='source'",
948                                 CldrUtility.getDoubleLinkMsg(),
949                                 "class='source'",
950                                 true)
951                         .addColumn(
952                                 "Header",
953                                 "class='source'",
954                                 CldrUtility.getDoubleLinkMsg(),
955                                 "class='source'",
956                                 true)
957                         .addColumn("Code", "class='source'", null, "class='source'", true)
958                         .addColumn("Locale", "class='source'", null, "class='source'", true)
959                         .addColumn(
960                                 "Old",
961                                 "class='target'",
962                                 null,
963                                 "class='target'",
964                                 true) //  width='20%'
965                         .addColumn(
966                                 "New",
967                                 "class='target'",
968                                 null,
969                                 "class='target'",
970                                 true) //  width='20%'
971                         .addColumn("Level", "class='target'", null, "class='target'", true);
972 
973         for (PathDiff row : diff) {
974             PathHeaderSegment phs = row.get0();
975             counts.add(phs.get0(), 1);
976             String locale = row.get1();
977             String oldValue = row.get2();
978             String currentValue = row.get3();
979 
980             PathHeader ph = phs.get0();
981             Integer pathIndex = phs.get1();
982             String attribute = phs.get2();
983             String specialCode = ph.getCode();
984 
985             if (!attribute.isEmpty()) {
986                 specialCode += "_" + attribute;
987                 if (pathIndex != 0) {
988                     specialCode += "|" + pathIndex;
989                 }
990             }
991             Level coverageLevel =
992                     SUPPLEMENTAL_DATA_INFO.getCoverageLevel(ph.getOriginalPath(), locale);
993             String fixedOldValue =
994                     oldValue == null
995                             ? "▷missing◁"
996                             : TransliteratorUtilities.toHTML.transform(oldValue);
997             String fixedNewValue =
998                     currentValue == null
999                             ? "▷removed◁"
1000                             : TransliteratorUtilities.toHTML.transform(currentValue);
1001 
1002             tablePrinter
1003                     .addRow()
1004                     .addCell(ph.getSectionId())
1005                     .addCell(ph.getPageId())
1006                     .addCell(ph.getHeader())
1007                     .addCell(specialCode)
1008                     .addCell(locale)
1009                     .addCell(fixedOldValue)
1010                     .addCell(fixedNewValue)
1011                     .addCell(coverageLevel)
1012                     .finishRow();
1013         }
1014         String title = ENGLISH.getName(file) + " " + chartNameCap;
1015         writeTable(anchors, file, tablePrinter, title, tsvFile);
1016 
1017         diff.clear();
1018     }
1019 
1020     private class ChartDeltaSub extends Chart {
1021         private String title;
1022         private String file;
1023         private TablePrinter tablePrinter;
1024         private PrintWriter tsvFile;
1025 
ChartDeltaSub( String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile)1026         private ChartDeltaSub(
1027                 String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile) {
1028             super();
1029             this.title = title;
1030             this.file = file;
1031             this.tablePrinter = tablePrinter;
1032             this.tsvFile = tsvFile;
1033         }
1034 
1035         @Override
getDirectory()1036         public String getDirectory() {
1037             return DIR;
1038         }
1039 
1040         @Override
getShowDate()1041         public boolean getShowDate() {
1042             return false;
1043         }
1044 
1045         @Override
getTitle()1046         public String getTitle() {
1047             return title;
1048         }
1049 
1050         @Override
getFileName()1051         public String getFileName() {
1052             return file;
1053         }
1054 
1055         @Override
getExplanation()1056         public String getExplanation() {
1057             return "<p>Lists data fields that differ from the last major version (see versions above)."
1058                     + " Inherited differences in locales are suppressed, except where the source locales are different. "
1059                     + "<p>";
1060         }
1061 
1062         @Override
writeContents(FormattedFileWriter pw)1063         public void writeContents(FormattedFileWriter pw) throws IOException {
1064             pw.write(tablePrinter.toTable());
1065             tablePrinter.toTsv(tsvFile);
1066         }
1067     }
1068 
writeTable( Anchors anchors, String file, TablePrinter tablePrinter, String title, PrintWriter tsvFile)1069     private void writeTable(
1070             Anchors anchors,
1071             String file,
1072             TablePrinter tablePrinter,
1073             String title,
1074             PrintWriter tsvFile) {
1075         ChartDeltaSub chartDeltaSub = new ChartDeltaSub(title, file, tablePrinter, tsvFile);
1076         chartDeltaSub.writeChart(anchors);
1077     }
1078 
writeNonLdmlPlain(Anchors anchors)1079     private void writeNonLdmlPlain(Anchors anchors) throws IOException {
1080         try (PrintWriter tsvFile =
1081                         FileUtilities.openUTF8Writer(
1082                                 getTsvDir(DIR, dirName), dirName + "_supp.tsv");
1083                 PrintWriter tsvCountFile =
1084                         FileUtilities.openUTF8Writer(
1085                                 getTsvDir(DIR, dirName), dirName + "_supp_count.tsv"); ) {
1086             tsvFile.println("# Section\tPage\tHeader\tCode\tOld\tNew");
1087 
1088             Multimap<PathHeader, String> bcp = TreeMultimap.create();
1089             Multimap<PathHeader, String> supplemental = TreeMultimap.create();
1090             Multimap<PathHeader, String> transforms = TreeMultimap.create();
1091 
1092             Counter<PathHeader> countSame = new Counter<>();
1093             Counter<PathHeader> countAdded = new Counter<>();
1094             Counter<PathHeader> countDeleted = new Counter<>();
1095 
1096             for (String dir : new File(CLDRPaths.BASE_DIRECTORY + "common/").list()) {
1097                 if (DtdType.ldml.directories.contains(dir)
1098                         || dir.equals(".DS_Store")
1099                         || dir.equals("dtd") // TODO as flat files
1100                         || dir.equals("properties") // TODO as flat files
1101                         || dir.equals("uca") // TODO as flat files
1102                 ) {
1103                     continue;
1104                 }
1105                 File dirOld = new File(PREV_CHART_VERSION_DIRECTORY + "common/" + dir);
1106                 System.out.println("\tLast dir: " + dirOld);
1107                 File dir2 = new File(CHART_VERSION_DIRECTORY + "common/" + dir);
1108                 System.out.println("\tCurr dir: " + dir2);
1109 
1110                 for (String file : dir2.list()) {
1111                     if (!file.endsWith(".xml")) {
1112                         continue;
1113                     }
1114                     String parentAndFile = dir + "/" + file;
1115                     String base = file.substring(0, file.length() - 4);
1116                     if (fileFilter != null && !fileFilter.reset(dir + "/" + base).find()) {
1117                         if (verbose) { //  && verbose_skipping
1118                             System.out.println("SKIPPING: " + dir + "/" + base);
1119                         }
1120                         continue;
1121                     }
1122                     if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(base)) {
1123                         continue;
1124                     }
1125                     if (verbose) {
1126                         System.out.println(file);
1127                     }
1128                     Relation<PathHeader, String> contentsOld =
1129                             fillData(dirOld.toString() + "/", file, base);
1130                     Relation<PathHeader, String> contents2 =
1131                             fillData(dir2.toString() + "/", file, base);
1132 
1133                     Set<PathHeader> keys =
1134                             new TreeSet<>(
1135                                     CldrUtility.ifNull(
1136                                             contentsOld.keySet(),
1137                                             Collections.<PathHeader>emptySet()));
1138                     keys.addAll(
1139                             CldrUtility.ifNull(
1140                                     contents2.keySet(), Collections.<PathHeader>emptySet()));
1141                     DtdType dtdType = null;
1142                     for (PathHeader key : keys) {
1143                         String originalPath = key.getOriginalPath();
1144                         if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(originalPath, base)) {
1145                             continue;
1146                         }
1147                         boolean isTransform = originalPath.contains("/tRule");
1148                         if (dtdType == null) {
1149                             dtdType = DtdType.fromPath(originalPath);
1150                         }
1151                         Multimap<PathHeader, String> target =
1152                                 dtdType == DtdType.ldmlBCP47
1153                                         ? bcp
1154                                         : isTransform ? transforms : supplemental;
1155                         Set<String> setOld = contentsOld.get(key);
1156                         Set<String> set2 = contents2.get(key);
1157 
1158                         if (Objects.equals(setOld, set2)) {
1159                             if (file.equals(DEBUG_FILE)) { // for debugging
1160                                 System.out.println("**Same: " + key + "\t" + setOld);
1161                             }
1162                             addChange(parentAndFile, ChangeType.same, setOld.size());
1163                             countSame.add(key, 1);
1164                             continue;
1165                         }
1166                         if (setOld == null) {
1167                             addChange(parentAndFile, ChangeType.added, set2.size());
1168                             for (String s : set2) {
1169                                 addRow(target, key, "▷missing◁", s);
1170                                 countAdded.add(key, 1);
1171                             }
1172                         } else if (set2 == null) {
1173                             addChange(parentAndFile, ChangeType.deleted, setOld.size());
1174                             for (String s : setOld) {
1175                                 addRow(target, key, s, "▷removed◁");
1176                                 countDeleted.add(key, 1);
1177                             }
1178                         } else {
1179                             Set<String> s1MOld = setOld;
1180                             Set<String> s2M1 = set2;
1181                             if (s1MOld.isEmpty()) {
1182                                 addRow(target, key, "▷missing◁", Joiner.on(", ").join(s2M1));
1183                                 addChange(parentAndFile, ChangeType.added, s2M1.size());
1184                                 countAdded.add(key, 1);
1185                             } else if (s2M1.isEmpty()) {
1186                                 addRow(target, key, Joiner.on(", ").join(s1MOld), "▷removed◁");
1187                                 addChange(parentAndFile, ChangeType.deleted, s1MOld.size());
1188                                 countDeleted.add(key, 1);
1189                             } else {
1190                                 String valueOld;
1191                                 String valueCurrent;
1192 
1193                                 int[] sameAndNotInSecond = new int[2];
1194                                 valueOld = getFilteredValue(s1MOld, s1MOld, sameAndNotInSecond);
1195                                 addChange(parentAndFile, ChangeType.same, sameAndNotInSecond[0]);
1196                                 countSame.add(key, 1);
1197                                 addChange(parentAndFile, ChangeType.deleted, sameAndNotInSecond[1]);
1198                                 sameAndNotInSecond[1] = 0;
1199                                 countDeleted.add(key, 1);
1200                                 valueCurrent = getFilteredValue(s2M1, s1MOld, sameAndNotInSecond);
1201                                 addChange(parentAndFile, ChangeType.added, sameAndNotInSecond[1]);
1202                                 addRow(target, key, valueOld, valueCurrent);
1203                                 countAdded.add(key, 1);
1204                             }
1205                         }
1206                     }
1207                 }
1208             }
1209             writeDiffs(anchors, "bcp47", "¤¤BCP47 " + chartNameCap, bcp, tsvFile);
1210             writeDiffs(
1211                     anchors,
1212                     "supplemental-data",
1213                     "¤¤Supplemental " + chartNameCap,
1214                     supplemental,
1215                     tsvFile);
1216             writeDiffs(anchors, "transforms", "¤¤Transforms " + chartNameCap, transforms, tsvFile);
1217 
1218             writeCounter(tsvCountFile, "CountSame", countSame);
1219             tsvCountFile.println();
1220             writeCounter(tsvCountFile, "CountAdded", countAdded);
1221             tsvCountFile.println();
1222             writeCounter(tsvCountFile, "CountDeleted", countDeleted);
1223 
1224             // tsvFile.println("# EOF");
1225             // tsvCountFile.println("# EOF");
1226         }
1227     }
1228 
writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted)1229     private void writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted) {
1230         tsvFile.append("# " + title + "\tSection\tPage\tSubhead\tCode\n\n");
1231         for (R2<Long, PathHeader> entry : countDeleted.getEntrySetSortedByCount(false, null)) {
1232             tsvFile.println(entry.get0() + "\t" + entry.get1());
1233         }
1234     }
1235 
addRow( Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem)1236     private void addRow(
1237             Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem) {
1238         if (oldItem.isEmpty() || newItem.isEmpty()) {
1239             throw new IllegalArgumentException();
1240         }
1241         target.put(key, oldItem + SEP + newItem);
1242     }
1243 
1244     /**
1245      * Fill in the chart data for the specified file
1246      *
1247      * @param directory
1248      * @param file like "xx.xml" where "xx" may be a locale name
1249      * @param fileBase like "xx", same as file without ".xml"
1250      * @return the Relation
1251      */
fillData(String directory, String file, String fileBase)1252     private Relation<PathHeader, String> fillData(String directory, String file, String fileBase) {
1253         Relation<PathHeader, String> results =
1254                 Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class);
1255 
1256         List<Pair<String, String>> contents1;
1257         try {
1258             contents1 =
1259                     XMLFileReader.loadPathValues(
1260                             directory + file, new ArrayList<Pair<String, String>>(), true);
1261         } catch (Exception e) {
1262             /*
1263              * This happens with e = ICUException, file = grammaticalFeatures.xml in cldr-36.0
1264              */
1265             return results;
1266         }
1267         DtdType dtdType = null;
1268         DtdData dtdData = null;
1269         Multimap<String, String> extras = TreeMultimap.create();
1270 
1271         for (Pair<String, String> s : contents1) {
1272             String path = s.getFirst();
1273             if (highLevelOnly
1274                     && !HighLevelPaths.pathIsHighLevel(path, fileBase /* locale, or not */)) {
1275                 continue;
1276             }
1277             String value = s.getSecond();
1278             if (dtdType == null) {
1279                 /*
1280                  * Note: although dtdType and dtdData depend on path, they are the same for all paths
1281                  * in the same file, so they only need to be set the first time through this loop.
1282                  *
1283                  * Note: the current DTD in CLDR_BASE_DIR is supposed to be backward-compatible, that is, to support
1284                  * paths from all archived versions. Any exception to that rule (e.g., for "grammaticalState") is a bug.
1285                  */
1286                 dtdType = DtdType.fromPath(path);
1287                 dtdData = DtdData.getInstance(dtdType, CLDR_BASE_DIR);
1288             }
1289             XPathParts pathPlain = XPathParts.getFrozenInstance(path);
1290             try {
1291                 if (dtdData.isMetadata(pathPlain)) {
1292                     continue;
1293                 }
1294             } catch (NullPointerException e) {
1295                 /*
1296                  * TODO: this happens for "grammaticalState" in this path from version 37:
1297                  * //supplementalData/grammaticalData/grammaticalFeatures[@targets="nominal"][@locales="he"]/grammaticalState[@values="definite indefinite construct"]
1298                  * Reference: https://unicode-org.atlassian.net/browse/CLDR-13306
1299                  */
1300                 System.out.println(
1301                         "Caught NullPointerException in fillData calling isMetadata, path = "
1302                                 + path);
1303                 continue;
1304             }
1305             Set<String> pathForValues = dtdData.getRegularizedPaths(pathPlain, extras);
1306             if (pathForValues != null) {
1307                 for (String pathForValue : pathForValues) {
1308                     PathHeader pathHeader = phf.fromPath(pathForValue);
1309                     if (pathHeader.getPageId() == PageId.Suppress) {
1310                         continue;
1311                     }
1312                     Splitter splitter = DtdData.getValueSplitter(pathPlain);
1313                     for (String line : splitter.split(value)) {
1314                         // special case # in transforms
1315                         if (isComment(pathPlain, line)) {
1316                             continue;
1317                         }
1318                         results.put(pathHeader, line);
1319                     }
1320                 }
1321             }
1322             for (Entry<String, Collection<String>> entry : extras.asMap().entrySet()) {
1323                 final String extraPath = entry.getKey();
1324                 final PathHeader pathHeaderExtra = phf.fromPath(extraPath);
1325                 if (pathHeaderExtra.getPageId() == PageId.Suppress) {
1326                     continue;
1327                 }
1328                 final Collection<String> extraValue = entry.getValue();
1329                 if (isExtraSplit(extraPath)) {
1330                     for (String items : extraValue) {
1331                         results.putAll(pathHeaderExtra, DtdData.SPACE_SPLITTER.splitToList(items));
1332                     }
1333                 } else {
1334                     results.putAll(pathHeaderExtra, extraValue);
1335                 }
1336             }
1337             if (pathForValues == null && !value.isEmpty()) {
1338                 System.err.println("Shouldn't happen");
1339             }
1340         }
1341         return results;
1342     }
1343 
isExtraSplit(String extraPath)1344     private boolean isExtraSplit(String extraPath) {
1345         if (extraPath.endsWith("/_type")
1346                 && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) {
1347             return true;
1348         }
1349         return false;
1350     }
1351 
isComment(XPathParts pathPlain, String line)1352     private static boolean isComment(XPathParts pathPlain, String line) {
1353         if (pathPlain.contains("transform")) {
1354             if (line.startsWith("#")) {
1355                 return true;
1356             }
1357         }
1358         return false;
1359     }
1360 
1361     /**
1362      * Determine when changes to the values for paths should be treated as potentially "disruptive"
1363      * for the purpose of "churn" reporting
1364      */
1365     private class SuspiciousChange {
1366         /** the old and new values, such as "HH:mm–HH:mm v" and "HH:mm – HH:mm v" */
1367         private String oldValue, newValue;
1368 
1369         /**
1370          * the path, such as
1371          * //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id="Hmv"]/greatestDifference[@id="H"]
1372          */
1373         private String path;
1374 
1375         /**
1376          * the locale (such as "doi") in which the path was found, or null, or possibly the base
1377          * file name without extension, like "xx" if the file name is "xx.xml", where "xx" may or
1378          * may not be a locale; e.g., "supplementalData"
1379          */
1380         private String locale;
1381 
SuspiciousChange(String oldValue, String newValue, String path, String locale)1382         SuspiciousChange(String oldValue, String newValue, String path, String locale) {
1383             this.oldValue = oldValue;
1384             this.newValue = newValue;
1385             this.path = path;
1386             this.locale = locale;
1387         }
1388 
1389         /**
1390          * Is the change from the old value to the new value, for this path and locale, potentially
1391          * disruptive?
1392          *
1393          * @return true or false
1394          */
isDisruptive()1395         public boolean isDisruptive() {
1396             /*
1397              * OR, not AND: certain changes in value are disruptive even for paths not
1398              * otherwise treated as high-level, and changes for high-level paths are
1399              * disruptive even if the changes in values themselves are not identified
1400              * as disruptive.
1401              */
1402             return valueChangeIsDisruptive() || HighLevelPaths.pathIsHighLevel(path, locale);
1403         }
1404 
1405         /**
1406          * Is the change from the old value to the current value potentially disruptive, based
1407          * (primarily) on the values themselves?
1408          *
1409          * @return true or false
1410          */
valueChangeIsDisruptive()1411         private boolean valueChangeIsDisruptive() {
1412             if (oldValue == null || newValue == null || oldValue.equals(newValue)) {
1413                 return false;
1414             }
1415             if (valueChangeIsDisruptiveWhitespaceOnly()) {
1416                 return true;
1417             }
1418             return false;
1419         }
1420 
1421         /**
1422          * Is the change disruptive whitespace only? Per design doc, "Format changes: second to none
1423          * on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP or NBSP ->
1424          * Narrow NBSP. Or adding a space somewhere in the format where previously there was none."
1425          *
1426          * @return true or false
1427          */
valueChangeIsDisruptiveWhitespaceOnly()1428         private boolean valueChangeIsDisruptiveWhitespaceOnly() {
1429             /*
1430              * annotations often have changes like "pop gorn", "popgorn", not treated as disruptive
1431              */
1432             if (path.startsWith("//ldml/annotations")) {
1433                 return false;
1434             }
1435             if (removeWhitespace(oldValue).equals(removeWhitespace(newValue))) {
1436                 return true;
1437             }
1438             return false;
1439         }
1440 
1441         /**
1442          * Remove whitespace from the given string
1443          *
1444          * <p>Remove whitespace as defined by regex \s, and also U+00A0 NO-BREAK SPACE U+2007 FIGURE
1445          * SPACE U+202F NARROW NO-BREAK SPACE
1446          *
1447          * @param s the string
1448          * @return the modified string
1449          */
removeWhitespace(String s)1450         private String removeWhitespace(String s) {
1451             return s.replaceAll("[\\s\\u00A0\\u2007\\u202F]", "");
1452         }
1453     }
1454 
1455     /**
1456      * Determine which paths are considered "high-level" paths, i.e., paths for which any changes
1457      * have high potential to cause disruptive "churn". Whether a path is high-level sometimes
1458      * depends on the locale or xml file in which it occurs. Some paths are high-level regardless of
1459      * the locale in which they are located. Other paths are high-level for some locales but not
1460      * others. For example, //ldml/localeDisplayNames/languages/language[@type="xx"] is high level
1461      * in locale "xx", and maybe "en", but not for all locales.
1462      */
1463     private static class HighLevelPaths {
1464         /**
1465          * A set of paths to be treated as "high-level". These are complete paths to be matched
1466          * exactly. Other paths are recognized by special functions like isHighLevelTerritoryName.
1467          *
1468          * <p>The ordering and comments are based on the design spec.
1469          */
1470         private static final Set<String> highLevelPaths =
1471                 new HashSet<>(
1472                         Arrays.asList(
1473                                 /*
1474                                  * Core data
1475                                  */
1476                                 "//ldml/characters/exemplarCharacters",
1477                                 "//ldml/numbers/defaultNumberingSystem",
1478                                 "//ldml/numbers/otherNumberingSystems/native",
1479                                 /*
1480                                  * Territory and Language names
1481                                  *  Country/Region names (English and Native names) -- see isHighLevelTerritoryName
1482                                  *   //ldml/localeDisplayName/territories/territory/...
1483                                  *  Language names (English and Native) -- see isHighLevelLangName
1484                                  *   //ldml/localeDisplayNames/languages/language/...
1485                                  */
1486                                 /*
1487                                  * Date
1488                                  * Note: "year", "month", etc., below, form a subset (eight) of all possible values for type,
1489                                  * excluding, for example, "fri" and "zone". If we use starred paths, we would need further complication
1490                                  * to filter out "fri", "zone", etc.
1491                                  */
1492                                 "//ldml/dates/fields/field[@type=\"year\"]/displayName",
1493                                 "//ldml/dates/fields/field[@type=\"month\"]/displayName",
1494                                 "//ldml/dates/fields/field[@type=\"week\"]/displayName",
1495                                 "//ldml/dates/fields/field[@type=\"day\"]/displayName",
1496                                 "//ldml/dates/fields/field[@type=\"hour\"]/displayName",
1497                                 "//ldml/dates/fields/field[@type=\"era\"]/displayName",
1498                                 "//ldml/dates/fields/field[@type=\"minute\"]/displayName",
1499                                 "//ldml/dates/fields/field[@type=\"second\"]/displayName",
1500                                 /*
1501                                  * First day of week: firstDay in supplementalData.xml; see isHighLevelFirstDay
1502                                  * First week of year: see isHighLevelWeekOfPreference
1503                                  */
1504                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"full\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1505                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"long\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1506                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"medium\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1507                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"short\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1508                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MMMEd\"]",
1509                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MEd\"]",
1510                                 /*
1511                                  * Time
1512                                  */
1513                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"full\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1514                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"long\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1515                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"medium\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1516                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"short\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1517                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"am\"]",
1518                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"am\"]",
1519                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"pm\"]",
1520                                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"pm\"]",
1521                                 /*
1522                                  * Currency (English and Native) -- see isHighLevelCurrencyName
1523                                  * E.g., //ldml/numbers/currencies/currency[@type=\"KRW\"]/displayName"
1524                                  *
1525                                  * ISO Currency Code: SupplementalData.xml match <region iso3166> -- see isHighLevelCurrencyCode
1526                                  */
1527                                 /*
1528                                  * Currency Formats
1529                                  *  a. Currency thousand separator
1530                                  *  b. Currency decimal separator
1531                                  *  c. Currency Symbol //ldml/numbers/currencies/currency[@type="CNY"]/symbol
1532                                  *  d. Currency Symbol Narrow //ldml/numbers/currencies/currency[@type=\"CNY\"]/symbol[@alt=\"narrow\"]"
1533                                  *
1534                                  * See isHighLevelCurrencySeparatorOrSymbol
1535                                  */
1536                                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1537                                 "//ldml/numbers/currencyFormats[@numberSystem=\"arab\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1538                                 /*
1539                                  * Number Symbols
1540                                  */
1541                                 "//ldml/numbers/minimumGroupingDigits",
1542                                 "//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal",
1543                                 "//ldml/numbers/symbols[@numberSystem=\"latn\"]/group",
1544                                 "//ldml/numbers/symbols[@numberSystem=\"arab\"]/decimal",
1545                                 "//ldml/numbers/symbols[@numberSystem=\"arab\"]/group",
1546                                 /*
1547                                  * Number formats
1548                                  */
1549                                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1550                                 "//ldml/numbers/percentFormats[@numberSystem=\"latn\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1551                                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"accounting\"]/pattern[@type=\"standard\"]",
1552                                 "//ldml/numbers/decimalFormats[@numberSystem=\"arab\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]",
1553                                 "//ldml/numbers/percentFormats[@numberSystem=\"arab\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"
1554                                 /*
1555                                  * "Complementary Observations"
1556                                  */
1557                                 /*
1558                                  * Changes to language aliases (supplementalMetaData) -- see isHighLevelLangAlias
1559                                  * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"]
1560                                  */
1561                                 /*
1562                                  * Changes in the containment graph -- see isHighLevelTerritoryContainment
1563                                  * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control.
1564                                  * Users use the containment graph in a variety of ways.
1565                                  * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"]
1566                                  */
1567                                 /*
1568                                  * Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP
1569                                  *  or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none.
1570                                  *  -- see SuspiciousChange.valueChangeIsDisruptiveWhitespaceOnly
1571                                  */
1572                                 /*
1573                                  * TODO: per design doc, "Adding a timezone"
1574                                  * TODO: per design doc, "Changes of symbols or codes that are cross-locale in some way such as the unknown
1575                                  *  currency symbol change '???' -> '¤'."
1576                                  * TODO: per design doc, "Change in character properties (not a CLDR but a Unicode change), and here especially
1577                                  *  newly adding or removing punctuation. Frequently irritates parsers."
1578                                  */
1579                                 ));
1580 
1581         static Pattern currencyPattern =
1582                 Pattern.compile("^//ldml/numbers/currencies/currency.*/displayName.*");
1583 
1584         /**
1585          * Should the given path in the given locale be taken into account for generating "churn"
1586          * reports?
1587          *
1588          * @param path the path of interest
1589          * @param locale the locale in which the path was found, or null, or possibly the base file
1590          *     name without extension, like "xx" if the file name is "xx.xml", where "xx" may or may
1591          *     not be a locale; e.g., "supplementalData"
1592          * @return true if it counts, else false to ignore
1593          */
pathIsHighLevel(String path, String locale)1594         private static boolean pathIsHighLevel(String path, String locale) {
1595             if (path == null || locale == null) {
1596                 return false;
1597             }
1598             if (!localeIsHighLevel(
1599                     locale)) { // for efficiency, this should be caught at a higher level
1600                 System.out.println(
1601                         "locale ["
1602                                 + locale
1603                                 + "] failed localeIsHighLevel in pathIsHighLevel; path = "
1604                                 + path);
1605                 return false;
1606             }
1607             if (pathIsReallyHighLevel(path, locale)) {
1608                 if (verboseHighLevelReporting) {
1609                     recordHighLevelMatch(path);
1610                 }
1611                 return true;
1612             }
1613             return false;
1614         }
1615 
pathIsReallyHighLevel(String path, String locale)1616         private static boolean pathIsReallyHighLevel(String path, String locale) {
1617             if (highLevelPaths.contains(path)) {
1618                 return true;
1619             } else if (isHighLevelTerritoryName(path, locale)) {
1620                 return true;
1621             } else if (isHighLevelLangName(path, locale)) {
1622                 return true;
1623             } else if (isHighLevelCurrencyName(path, locale)) {
1624                 return true;
1625             } else if (isHighLevelCurrencyCode(path, locale)) {
1626                 return true;
1627             } else if (isHighLevelCurrencySeparatorOrSymbol(path, locale)) {
1628                 return true;
1629             } else if (isHighLevelLangAlias(path, locale)) {
1630                 return true;
1631             } else if (isHighLevelTerritoryContainment(path, locale)) {
1632                 return true;
1633             } else if (isHighLevelFirstDay(path, locale)) {
1634                 return true;
1635             } else if (isHighLevelWeekOfPreference(path, locale)) {
1636                 return true;
1637             }
1638             return false;
1639         }
1640 
1641         /**
1642          * Is the given locale, or base name, to be considered for "high level" churn report?
1643          *
1644          * @param locale the locale string, or base name like "supplementalData" as in
1645          *     "supplementalData.xml"
1646          * @return true or false
1647          */
localeIsHighLevel(String locale)1648         private static boolean localeIsHighLevel(String locale) {
1649             return SubmissionLocales.CLDR_OR_HIGH_LEVEL_LOCALES.contains(locale)
1650                     || "supplementalData".equals(locale);
1651         }
1652 
1653         /**
1654          * Changes to language aliases (supplemental metadata) E.g.,
1655          * //supplementalData/metadata/alias/languageAlias[@type="aar"]
1656          *
1657          * @param path
1658          * @param locale must be "supplementalData" to match
1659          * @return true or false
1660          */
isHighLevelLangAlias(String path, String locale)1661         private static boolean isHighLevelLangAlias(String path, String locale) {
1662             if ("supplementalData".equals(locale)) {
1663                 if (path.startsWith("//supplementalData/metadata/alias/languageAlias")) {
1664                     return true;
1665                 }
1666             }
1667             return false;
1668         }
1669 
1670         /**
1671          * Changes in the containment graph Data mostly (or entirely?) from M49 standard, thus CLDR
1672          * has limited control. Users use the containment graph in a variety of ways. E.g.,
1673          * //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"]
1674          *
1675          * @param path
1676          * @param locale must be "supplementalData" to match
1677          * @return true or false
1678          */
isHighLevelTerritoryContainment(String path, String locale)1679         private static boolean isHighLevelTerritoryContainment(String path, String locale) {
1680             if ("supplementalData".equals(locale)) {
1681                 if (path.startsWith("//supplementalData/territoryContainment")) {
1682                     return true;
1683                 }
1684             }
1685             return false;
1686         }
1687 
1688         /**
1689          * Is the given path a high-level territory name path in the given locale?
1690          *
1691          * <p>E.g., //ldml/localeDisplayNames/territories/territory[@type="NNN"] if type "NNN"
1692          * CORRESPONDS TO the locale or the locale is "en"
1693          *
1694          * <p>English names (en.xml): match all types Native: check each territory type NNN
1695          * corresponding to the given locale
1696          *
1697          * <p>Exclude "alt"
1698          *
1699          * @param path
1700          * @param locale
1701          * @return true or false
1702          */
isHighLevelTerritoryName(String path, String locale)1703         private static boolean isHighLevelTerritoryName(String path, String locale) {
1704             if (path.startsWith("//ldml/localeDisplayNames/territories/territory")
1705                     && !path.contains("[@alt=")) {
1706                 if ("en".equals(locale)) {
1707                     return true;
1708                 }
1709                 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale);
1710                 if (cvi != null) {
1711                     for (String type : cvi.targetTerritories) {
1712                         if (path.contains("[@type=\"" + type + "\"]")) {
1713                             return true;
1714                         }
1715                     }
1716                 }
1717             }
1718             return false;
1719         }
1720 
1721         /**
1722          * Is the given path a high-level language name path in the given locale?
1723          *
1724          * <p>E.g., //ldml/localeDisplayNames/languages/language[@type="xx"] if type "xx" matches
1725          * the locale or the locale is "en"
1726          *
1727          * <p>Exclude "alt"
1728          *
1729          * @param path
1730          * @param locale
1731          * @return true or false
1732          */
isHighLevelLangName(String path, String locale)1733         private static boolean isHighLevelLangName(String path, String locale) {
1734             if (path.startsWith("//ldml/localeDisplayNames/languages/language")
1735                     && !path.contains("[@alt=")) {
1736                 if ("en".equals(locale)) {
1737                     /*
1738                      * English names (en.xml): match all types
1739                      */
1740                     return true;
1741                 } else if (path.contains("[@type=\"" + locale + "\"]")) {
1742                     /*
1743                      * Native names: match the type=”xx” of each xml file to identify the Native. E.g., type=ko if ko.xml
1744                      */
1745                     return true;
1746                 }
1747             }
1748             return false;
1749         }
1750 
1751         /**
1752          * Is the given path a high-level currency name path in the given locale?
1753          *
1754          * <p>E.g., //ldml/numbers/currencies/currency[@type=\"AAA\"]/displayName if type "AAA"
1755          * CORRESPONDS TO the locale or the locale is "en"
1756          *
1757          * <p>English names (en.xml): match all types Native: check each currency type AAA
1758          * corresponding to the given locale
1759          *
1760          * <p>Do NOT exclude "alt"; e.g.,
1761          * //ldml/numbers/currencies/currency[@type="ADP"]/displayName[@alt="proposed-u167-1"]
1762          *
1763          * @param path
1764          * @param locale
1765          * @return true or false
1766          */
isHighLevelCurrencyName(String path, String locale)1767         private static boolean isHighLevelCurrencyName(String path, String locale) {
1768             if (currencyPattern.matcher(path).matches()) {
1769                 if ("en".equals(locale)) {
1770                     return true;
1771                 }
1772                 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale);
1773                 if (cvi != null) {
1774                     for (String type : cvi.targetCurrencies) {
1775                         if (path.contains("[@type=\"" + type + "\"]")) {
1776                             return true;
1777                         }
1778                     }
1779                 }
1780             }
1781             return false;
1782         }
1783 
1784         /**
1785          * Is the given path a high-level currency code path in the given locale?
1786          *
1787          * <p>E.g.,
1788          * //supplementalData/currencyData/region[@iso3166="AC"]/currency[@iso4217="SHP"][@from="1976-01-01"]
1789          *
1790          * @param path
1791          * @param locale must be "supplementalData" to match
1792          * @return true or false
1793          */
isHighLevelCurrencyCode(String path, String locale)1794         private static boolean isHighLevelCurrencyCode(String path, String locale) {
1795             if ("supplementalData".equals(locale)) {
1796                 if (path.contains("iso3166")) {
1797                     return true;
1798                 }
1799             }
1800             return false;
1801         }
1802 
1803         /**
1804          * Is the given path a high-level currency thousands-separator or decimal-separator path in
1805          * the given locale?
1806          *
1807          * <p>E.g., //ldml/numbers/currencies/currency[@type="ESP"]/group
1808          * //ldml/numbers/currencies/currency[@type="ESP"]/decimal
1809          * //ldml/numbers/currencies/currency[@type="CNY"]/symbol
1810          * //ldml/numbers/currencies/currency[@type="CNY"]/symbol[@alt="narrow"]"
1811          *
1812          * @param path
1813          * @param locale
1814          * @return true or false
1815          */
isHighLevelCurrencySeparatorOrSymbol(String path, String locale)1816         private static boolean isHighLevelCurrencySeparatorOrSymbol(String path, String locale) {
1817             if (path.startsWith("//ldml/numbers/currencies/currency")
1818                     && (path.contains("group")
1819                             || path.contains("decimal")
1820                             || path.contains("symbol"))) {
1821                 return true;
1822             }
1823             return false;
1824         }
1825 
1826         /**
1827          * Is the given path a high-level weekData/firstDay in the given locale?
1828          *
1829          * <p>E.g.,//supplementalData/weekData/firstDay[@day="fri"][@territories="MV"]
1830          *
1831          * @param path
1832          * @param locale must be "supplementalData" to match
1833          * @return true or false
1834          */
isHighLevelFirstDay(String path, String locale)1835         private static boolean isHighLevelFirstDay(String path, String locale) {
1836             if ("supplementalData".equals(locale)) {
1837                 if (path.startsWith("//supplementalData/weekData/firstDay")) {
1838                     return true;
1839                 }
1840             }
1841             return false;
1842         }
1843 
1844         /**
1845          * Is the given path a high-level weekOfPreference in the given locale?
1846          *
1847          * <p>E.g.,
1848          * //supplementalData/weekData/weekOfPreference[@ordering="weekOfYear"][@locales="und"]
1849          *
1850          * @param path
1851          * @param locale must be "supplementalData" to match
1852          * @return true or false
1853          */
isHighLevelWeekOfPreference(String path, String locale)1854         private static boolean isHighLevelWeekOfPreference(String path, String locale) {
1855             if ("supplementalData".equals(locale)) {
1856                 if (path.startsWith("//supplementalData/weekData/weekOfPreference")) {
1857                     return true;
1858                 }
1859             }
1860             return false;
1861         }
1862 
1863         /** For debugging, testing */
1864         private static Set<String> highLevelPathMatched = null;
1865 
1866         private static boolean verboseHighLevelReporting = false;
1867 
recordHighLevelMatch(String path)1868         private static void recordHighLevelMatch(String path) {
1869             if (highLevelPathMatched == null) {
1870                 highLevelPathMatched = new HashSet<>();
1871             }
1872             highLevelPathMatched.add(path);
1873         }
1874 
1875         /** For debugging, report on any paths in highLevelPaths that never matched */
reportHighLevelPathUsage()1876         private static void reportHighLevelPathUsage() {
1877             if (!verboseHighLevelReporting) {
1878                 return;
1879             }
1880             if (highLevelPathMatched == null) {
1881                 System.out.println("Zero high-level paths were matched!");
1882                 return;
1883             }
1884             for (String path : highLevelPaths) {
1885                 if (!highLevelPathMatched.contains(path)) {
1886                     System.out.println("Unmatched high-level path: " + path);
1887                 }
1888             }
1889             for (String path : highLevelPathMatched) {
1890                 if (!highLevelPaths.contains(path)) {
1891                     System.out.println("Special matched high-level path: " + path);
1892                 }
1893             }
1894         }
1895     }
1896 }
1897