1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Splitter; 5 import com.google.common.collect.Multimap; 6 import com.google.common.collect.TreeMultimap; 7 import com.ibm.icu.impl.Relation; 8 import com.ibm.icu.impl.Row.R2; 9 import com.ibm.icu.impl.Row.R3; 10 import com.ibm.icu.impl.Row.R4; 11 import com.ibm.icu.text.NumberFormat; 12 import com.ibm.icu.text.UnicodeSet; 13 import com.ibm.icu.util.ICUUncheckedIOException; 14 import com.ibm.icu.util.Output; 15 import java.io.File; 16 import java.io.IOException; 17 import java.io.PrintWriter; 18 import java.util.ArrayList; 19 import java.util.Arrays; 20 import java.util.Collection; 21 import java.util.Collections; 22 import java.util.HashSet; 23 import java.util.List; 24 import java.util.Map; 25 import java.util.Map.Entry; 26 import java.util.Objects; 27 import java.util.Set; 28 import java.util.TreeMap; 29 import java.util.TreeSet; 30 import java.util.regex.Matcher; 31 import java.util.regex.Pattern; 32 import org.unicode.cldr.draft.FileUtilities; 33 import org.unicode.cldr.test.DisplayAndInputProcessor; 34 import org.unicode.cldr.test.SubmissionLocales; 35 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 36 import org.unicode.cldr.tool.Option.Options; 37 import org.unicode.cldr.tool.Option.Params; 38 import org.unicode.cldr.util.CLDRConfig; 39 import org.unicode.cldr.util.CLDRFile; 40 import org.unicode.cldr.util.CLDRFile.Status; 41 import org.unicode.cldr.util.CLDRPaths; 42 import org.unicode.cldr.util.CldrUtility; 43 import org.unicode.cldr.util.Counter; 44 import org.unicode.cldr.util.DtdData; 45 import org.unicode.cldr.util.DtdType; 46 import org.unicode.cldr.util.Factory; 47 import org.unicode.cldr.util.LanguageTagParser; 48 import org.unicode.cldr.util.Level; 49 import org.unicode.cldr.util.LocaleIDParser; 50 import org.unicode.cldr.util.Organization; 51 import org.unicode.cldr.util.Pair; 52 import org.unicode.cldr.util.PathHeader; 53 import org.unicode.cldr.util.PathHeader.PageId; 54 import org.unicode.cldr.util.PathStarrer; 55 import org.unicode.cldr.util.PatternCache; 56 import org.unicode.cldr.util.SimpleXMLSource; 57 import org.unicode.cldr.util.StandardCodes; 58 import org.unicode.cldr.util.SupplementalDataInfo; 59 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo; 60 import org.unicode.cldr.util.TransliteratorUtilities; 61 import org.unicode.cldr.util.XMLFileReader; 62 import org.unicode.cldr.util.XPathParts; 63 64 public class ChartDelta extends Chart { 65 private static final boolean verbose_skipping = false; 66 67 private static final String DEFAULT_DELTA_DIR_NAME = "delta"; 68 private static final String DEFAULT_CHURN_DIR_NAME = "churn"; 69 70 private static final boolean SKIP_REFORMAT_ANNOTATIONS = 71 ToolConstants.PREV_CHART_VERSION.compareTo("30") >= 0; 72 73 private static final PageId DEBUG_PAGE_ID = PageId.DayPeriod; 74 75 private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO = 76 CLDRConfig.getInstance().getSupplementalDataInfo(); 77 78 private enum MyOptions { 79 fileFilter( 80 new Params() 81 .setHelp("filter files by dir/locale, eg: ^main/en$ or .*/en") 82 .setMatch(".*")), 83 orgFilter(new Params().setHelp("filter files by organization").setMatch(".*")), 84 Vxml(new Params().setHelp("use cldr-aux for the base directory")), 85 coverageFilter(new Params().setHelp("filter files by coverage").setMatch(".*")), 86 directory( 87 new Params() 88 .setHelp("Set the output directory name") 89 .setDefault(DEFAULT_DELTA_DIR_NAME) 90 .setMatch(".*")), 91 verbose(new Params().setHelp("verbose debugging messages")), 92 highLevelOnly(new Params().setHelp("check high-level paths (churn) only").setFlag('H')), 93 ; 94 95 // BOILERPLATE TO COPY 96 final Option option; 97 MyOptions(Params params)98 private MyOptions(Params params) { 99 option = new Option(this, params); 100 } 101 102 private static Options myOptions = new Options(); 103 104 static { 105 for (MyOptions option : MyOptions.values()) { myOptions.add(option, option.option)106 myOptions.add(option, option.option); 107 } 108 } 109 parse(String[] args)110 private static Set<String> parse(String[] args) { 111 return myOptions.parse(MyOptions.values()[0], args, true); 112 } 113 } 114 115 private final Matcher fileFilter; 116 private final String dirName; // "delta" or "churn" or set as option 117 private final String chartNameCap; // capitalized, e.g., "Delta" or "Churn" 118 private final String DIR; // full path of output folder 119 private final Level minimumPathCoverage; 120 private final boolean verbose; 121 122 /** 123 * If true, check only high-level paths, i.e., paths for which any changes have high potential 124 * to cause disruptive "churn" 125 */ 126 private final boolean highLevelOnly; 127 main(String[] args)128 public static void main(String[] args) { 129 main(args, false); 130 } 131 main(String[] args, boolean highLevelOnly)132 public static void main(String[] args, boolean highLevelOnly) { 133 System.out.println( 134 "use -DCHART_VERSION=36.0 -DPREV_CHART_VERSION=34.0 to generate the differences between v36 and v34."); 135 MyOptions.parse(args); 136 Matcher fileFilter = 137 !MyOptions.fileFilter.option.doesOccur() 138 ? null 139 : PatternCache.get(MyOptions.fileFilter.option.getValue()).matcher(""); 140 if (MyOptions.orgFilter.option.doesOccur()) { 141 if (MyOptions.fileFilter.option.doesOccur()) { 142 throw new IllegalArgumentException("Can't have both fileFilter and orgFilter"); 143 } 144 String rawOrg = MyOptions.orgFilter.option.getValue(); 145 Organization org = Organization.fromString(rawOrg); 146 Set<String> locales = StandardCodes.make().getLocaleCoverageLocales(org); 147 fileFilter = 148 PatternCache.get("^(main|annotations)/(" + Joiner.on("|").join(locales) + ")$") 149 .matcher(""); 150 } 151 Level coverage = 152 !MyOptions.coverageFilter.option.doesOccur() 153 ? null 154 : Level.fromString(MyOptions.coverageFilter.option.getValue()); 155 boolean verbose = MyOptions.verbose.option.doesOccur(); 156 if (MyOptions.highLevelOnly.option.doesOccur()) { 157 highLevelOnly = true; 158 } 159 String dirName = MyOptions.directory.option.getValue(); 160 if (highLevelOnly && DEFAULT_DELTA_DIR_NAME.equals(dirName)) { 161 System.out.println( 162 "For highLevelOnly, changing directory from " 163 + DEFAULT_DELTA_DIR_NAME 164 + " to " 165 + DEFAULT_CHURN_DIR_NAME); 166 dirName = DEFAULT_CHURN_DIR_NAME; 167 } 168 ChartDelta temp = new ChartDelta(fileFilter, coverage, dirName, verbose, highLevelOnly); 169 temp.writeChart(null); 170 temp.showTotals(); 171 if (highLevelOnly) { 172 HighLevelPaths.reportHighLevelPathUsage(); 173 } 174 System.out.println("Finished. Files may have been created in these directories:"); 175 System.out.println(temp.DIR); 176 System.out.println(getTsvDir(temp.DIR, temp.dirName)); 177 } 178 ChartDelta( Matcher fileFilter, Level coverage, String dirName, boolean verbose, boolean highLevelOnly)179 private ChartDelta( 180 Matcher fileFilter, 181 Level coverage, 182 String dirName, 183 boolean verbose, 184 boolean highLevelOnly) { 185 this.fileFilter = fileFilter; 186 this.verbose = verbose; 187 this.highLevelOnly = highLevelOnly; 188 this.dirName = dirName; 189 this.chartNameCap = dirName.substring(0, 1).toUpperCase() + dirName.substring(1); 190 this.DIR = CLDRPaths.CHART_DIRECTORY + dirName; 191 this.minimumPathCoverage = coverage; 192 } 193 194 private static final String SEP = "\u0001"; 195 private static final boolean DEBUG = false; 196 private static final String DEBUG_FILE = null; // "windowsZones.xml"; 197 static Pattern fileMatcher = PatternCache.get(".*"); 198 199 static PathHeader.Factory phf = PathHeader.getFactory(ENGLISH); 200 static final Set<String> DONT_CARE = 201 new HashSet<>(Arrays.asList("draft", "standard", "reference")); 202 203 @Override getDirectory()204 public String getDirectory() { 205 return DIR; 206 } 207 208 @Override getTitle()209 public String getTitle() { 210 return chartNameCap + " Charts"; 211 } 212 213 @Override getFileName()214 public String getFileName() { 215 return "index"; 216 } 217 218 @Override getExplanation()219 public String getExplanation() { 220 return "<p>Charts showing the differences from the last version. " 221 + "Titles prefixed by ¤ are special: either the locale data summary or supplemental data. " 222 + "Not all changed data is charted yet. For details see each chart.</p>"; 223 } 224 225 @Override writeContents(FormattedFileWriter pw)226 public void writeContents(FormattedFileWriter pw) throws IOException { 227 FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors(); 228 FileUtilities.copyFile(ChartDelta.class, "index.css", getDirectory()); 229 FormattedFileWriter.copyIncludeHtmls(getDirectory(), true); 230 counter.clear(); 231 fileCounters.clear(); 232 writeNonLdmlPlain(anchors); 233 writeLdml(anchors); 234 pw.setIndex("Main Chart Index", "../index.html"); 235 pw.write(anchors.toString()); 236 } 237 238 private static class PathHeaderSegment extends R3<PathHeader, Integer, String> { PathHeaderSegment(PathHeader b, int elementIndex, String attribute)239 public PathHeaderSegment(PathHeader b, int elementIndex, String attribute) { 240 super(b, elementIndex, attribute); 241 } 242 } 243 244 private static class PathDiff extends R4<PathHeaderSegment, String, String, String> { PathDiff( String locale, PathHeaderSegment pathHeaderSegment, String oldValue, String newValue)245 public PathDiff( 246 String locale, 247 PathHeaderSegment pathHeaderSegment, 248 String oldValue, 249 String newValue) { 250 super(pathHeaderSegment, locale, oldValue, newValue); 251 } 252 } 253 254 private static final CLDRFile EMPTY_CLDR = new CLDRFile(new SimpleXMLSource("und").freeze()); 255 256 private static final File CLDR_BASE_DIR = CLDRConfig.getInstance().getCldrBaseDirectory(); 257 258 private enum ChangeType { 259 added, 260 deleted, 261 changed, 262 same; 263 get(String oldValue, String currentValue)264 public static ChangeType get(String oldValue, String currentValue) { 265 return oldValue == null 266 ? added 267 : currentValue == null 268 ? deleted 269 : oldValue.equals(currentValue) ? same : changed; 270 } 271 } 272 273 private Counter<ChangeType> counter = new Counter<>(); 274 private Map<String, Counter<ChangeType>> fileCounters = new TreeMap<>(); 275 private Set<String> badHeaders = new TreeSet<>(); 276 277 /** Add the count of changed items */ addChange(String file, ChangeType changeType, int count)278 private void addChange(String file, ChangeType changeType, int count) { 279 counter.add(changeType, count); // unified add 280 Counter<ChangeType> fileCounter = fileCounters.get(file); 281 if (fileCounter == null) { 282 fileCounters.put(file, fileCounter = new Counter<>()); 283 } 284 fileCounter.add(changeType, count); 285 } 286 showTotals()287 private void showTotals() { 288 try (PrintWriter pw = 289 FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + "_summary.tsv")) { 290 // pw.println("# percentages are of *new* total"); 291 pw.print("# dir\tfile"); 292 for (ChangeType item : ChangeType.values()) { 293 pw.print("\t" + (item == ChangeType.same ? "total" : item.toString())); 294 } 295 pw.println(); 296 showTotal(pw, "TOTAL/", counter); 297 298 for (Entry<String, Counter<ChangeType>> entry : fileCounters.entrySet()) { 299 showTotal(pw, entry.getKey(), entry.getValue()); 300 } 301 for (String s : badHeaders) { 302 pw.println(s); 303 } 304 // pw.println("# EOF"); 305 } catch (IOException e) { 306 throw new ICUUncheckedIOException(e); 307 } 308 } 309 showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2)310 private void showTotal(PrintWriter pw, String title2, Counter<ChangeType> counter2) { 311 long total = counter2.getTotal(); 312 NumberFormat pf = NumberFormat.getPercentInstance(); 313 pf.setMinimumFractionDigits(2); 314 NumberFormat nf = NumberFormat.getIntegerInstance(); 315 pw.print(title2.replace("/", "\t")); 316 for (ChangeType item : ChangeType.values()) { 317 if (item == ChangeType.same) { 318 pw.print("\t" + nf.format(total)); 319 } else { 320 final long current = counter2.getCount(item); 321 pw.print("\t" + nf.format(current)); 322 } 323 } 324 pw.println(); 325 } 326 327 /** 328 * @param anchors 329 * @throws IOException 330 * <p>TODO: shorten the function using subroutines 331 */ writeLdml(Anchors anchors)332 private void writeLdml(Anchors anchors) throws IOException { 333 try (PrintWriter tsvFile = 334 FileUtilities.openUTF8Writer(getTsvDir(DIR, dirName), dirName + ".tsv"); 335 PrintWriter tsvCountFile = 336 FileUtilities.openUTF8Writer( 337 getTsvDir(DIR, dirName), dirName + "_count.tsv"); ) { 338 tsvFile.println("# Section\tPage\tHeader\tCode\tLocale\tOld\tNew\tLevel"); 339 340 // set up factories 341 List<Factory> factories = new ArrayList<>(); 342 List<Factory> oldFactories = new ArrayList<>(); 343 344 Counter<PathHeader> counts = new Counter<>(); 345 346 String dirBase = ToolConstants.getBaseDirectory(ToolConstants.CHART_VERSION); 347 String prevDirBase = ToolConstants.getBaseDirectory(ToolConstants.PREV_CHART_VERSION); 348 349 for (String dir : DtdType.ldml.directories) { 350 if (dir.equals("annotationsDerived") || dir.equals("casing")) { 351 continue; 352 } 353 String current = dirBase + "common/" + dir; 354 String past = prevDirBase + "common/" + dir; 355 try { 356 factories.add(Factory.make(current, ".*")); 357 } catch (Exception e1) { 358 System.out.println("Skipping: " + dir + "\t" + e1.getMessage()); 359 continue; // skip where the directories don't exist in old versions 360 } 361 try { 362 oldFactories.add(Factory.make(past, ".*")); 363 } catch (Exception e) { 364 System.out.println("Couldn't open factory: " + past); 365 past = null; 366 oldFactories.add(null); 367 } 368 System.out.println("Will compare: " + dir + "\t\t" + current + "\t\t" + past); 369 } 370 if (factories.isEmpty()) { 371 throw new IllegalArgumentException( 372 "No factories found for " + dirBase + ": " + DtdType.ldml.directories); 373 } 374 // get a list of all the locales to cycle over 375 376 Relation<String, String> baseToLocales = 377 Relation.of(new TreeMap<String, Set<String>>(), HashSet.class); 378 Matcher m = fileMatcher.matcher(""); 379 Set<String> defaultContents = SDI.getDefaultContentLocales(); 380 LanguageTagParser ltp = new LanguageTagParser(); 381 LikelySubtags ls = new LikelySubtags(); 382 for (String file : factories.get(0).getAvailable()) { 383 if (defaultContents.contains(file)) { 384 continue; 385 } 386 if (!m.reset(file).matches()) { 387 continue; 388 } 389 String base = 390 file.equals("root") 391 ? "root" 392 : ltp.set(ls.minimize(file)).getLanguageScript(); 393 baseToLocales.put(base, file); 394 } 395 396 // do keyboards later 397 398 Status currentStatus = new Status(); 399 Status oldStatus = new Status(); 400 Set<PathDiff> diff = new TreeSet<>(); 401 Set<String> paths = new HashSet<>(); 402 403 Relation<PathHeader, String> diffAll = 404 Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class); 405 for (Entry<String, Set<String>> baseNLocale : baseToLocales.keyValuesSet()) { 406 String base = baseNLocale.getKey(); 407 for (int i = 0; i < factories.size(); ++i) { 408 Factory factory = factories.get(i); 409 Factory oldFactory = oldFactories.get(i); 410 List<File> sourceDirs = Arrays.asList(factory.getSourceDirectories()); 411 if (sourceDirs.size() != 1) { 412 throw new IllegalArgumentException( 413 "Internal error: expect single source dir"); 414 } 415 File sourceDir = sourceDirs.get(0); 416 String sourceDirLeaf = sourceDir.getName(); 417 boolean resolving = 418 !sourceDirLeaf.contains("subdivisions") 419 && !sourceDirLeaf.contains("transforms"); 420 421 for (String locale : baseNLocale.getValue()) { 422 String nameAndLocale = sourceDirLeaf + "/" + locale; 423 if (fileFilter != null && !fileFilter.reset(nameAndLocale).find()) { 424 if (verbose && verbose_skipping) { 425 System.out.println("SKIPPING: " + nameAndLocale); 426 } 427 continue; 428 } 429 if (verbose) { 430 System.out.println(nameAndLocale); 431 } 432 CLDRFile current = makeWithFallback(factory, locale, resolving); 433 CLDRFile old = makeWithFallback(oldFactory, locale, resolving); 434 DisplayAndInputProcessor daip = new DisplayAndInputProcessor(old); 435 436 if (!locale.equals("root") 437 && current.getLocaleID().equals("root") 438 && old.getLocaleID().equals("root")) { 439 continue; 440 } 441 if (old == EMPTY_CLDR && current == EMPTY_CLDR) { 442 continue; 443 } 444 if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(locale)) { 445 continue; 446 } 447 paths.clear(); 448 for (String path : current.fullIterable()) { 449 if (allowPath(locale, path)) { 450 paths.add(path); 451 } 452 } 453 for (String path : old.fullIterable()) { 454 if (!paths.contains(path) && allowPath(locale, path)) { 455 paths.add(path); 456 } 457 } 458 459 Output<String> reformattedValue = new Output<>(); 460 Output<Boolean> hasReformattedValue = new Output<>(); 461 462 for (String path : paths) { 463 if (path.startsWith("//ldml/identity") 464 || path.endsWith("/alias") 465 || path.startsWith("//ldml/segmentations") // do later 466 || path.startsWith("//ldml/rbnf") // do later 467 ) { 468 continue; 469 } 470 PathHeader ph = getPathHeader(path); 471 if (ph == null) { 472 continue; 473 } 474 475 String oldValue; 476 String currentValue; 477 478 { 479 String sourceLocaleCurrent = 480 current.getSourceLocaleID(path, currentStatus); 481 String sourceLocaleOld = 482 getReformattedPath( 483 oldStatus, 484 old, 485 path, 486 reformattedValue, 487 hasReformattedValue); 488 489 // filter out stuff that differs at a higher level 490 if (!sourceLocaleCurrent.equals(locale) 491 && !sourceLocaleOld.equals(locale)) { 492 continue; 493 } 494 if (!path.equals(currentStatus.pathWhereFound) 495 && !path.equals(oldStatus.pathWhereFound)) { 496 continue; 497 } 498 // fix some incorrect cases? 499 500 currentValue = current.getStringValue(path); 501 if (CldrUtility.INHERITANCE_MARKER.equals(currentValue)) { 502 currentValue = current.getBaileyValue(path, null, null); 503 } 504 505 String oldRawValue = 506 hasReformattedValue.value 507 ? reformattedValue.value 508 : old.getStringValue(path); 509 if (CldrUtility.INHERITANCE_MARKER.equals(oldRawValue)) { 510 oldRawValue = old.getBaileyValue(path, null, null); 511 } 512 // ignore differences due to old DAIP 513 oldValue = 514 dontDaipValue(oldRawValue, path) 515 ? oldRawValue 516 : daip.processInput(path, oldRawValue, null); 517 } 518 if (highLevelOnly 519 && new SuspiciousChange(oldValue, currentValue, path, locale) 520 .isDisruptive() 521 == false) { 522 continue; 523 } 524 // handle non-distinguishing attributes 525 addPathDiff(sourceDir, old, current, locale, ph, diff); 526 527 addValueDiff( 528 sourceDir, oldValue, currentValue, locale, ph, diff, diffAll); 529 } 530 } 531 } 532 writeDiffs(anchors, base, diff, tsvFile, counts); 533 diff.clear(); 534 } 535 writeDiffs(diffAll); 536 537 writeCounter(tsvCountFile, "Count", counts); 538 } 539 } 540 dontDaipValue(String oldRawValue, String path)541 public boolean dontDaipValue(String oldRawValue, String path) { 542 return oldRawValue == null || path.startsWith("//ldml/collations"); 543 } 544 allowPath(String locale, String path)545 private boolean allowPath(String locale, String path) { 546 if (minimumPathCoverage != null) { 547 Level pathLevel = SUPPLEMENTAL_DATA_INFO.getCoverageLevel(path, locale); 548 if (minimumPathCoverage.compareTo(pathLevel) < 0) { 549 return false; 550 } 551 } 552 return true; 553 } 554 getReformattedPath( Status oldStatus, CLDRFile old, String path, Output<String> value, Output<Boolean> hasReformattedValue)555 private String getReformattedPath( 556 Status oldStatus, 557 CLDRFile old, 558 String path, 559 Output<String> value, 560 Output<Boolean> hasReformattedValue) { 561 if (SKIP_REFORMAT_ANNOTATIONS || !path.startsWith("//ldml/annotations/")) { 562 hasReformattedValue.value = Boolean.FALSE; 563 return old.getSourceLocaleID(path, oldStatus); 564 } 565 // OLD: <annotation cp='[]' tts='grinning face'>face; grin</annotation> 566 // NEW: <annotation cp="">face | grin</annotation> 567 // <annotation cp="" type="tts">grinning face</annotation> 568 // from the NEW paths, get the OLD values 569 XPathParts parts = 570 XPathParts.getFrozenInstance(path) 571 .cloneAsThawed(); // not frozen, for removeAttribute 572 boolean isTts = parts.getAttributeValue(-1, "type") != null; 573 if (isTts) { 574 parts.removeAttribute(-1, "type"); 575 } 576 String cp = parts.getAttributeValue(-1, "cp"); 577 parts.setAttribute(-1, "cp", "[" + cp + "]"); 578 579 String oldStylePath = parts.toString(); 580 String temp = old.getStringValue(oldStylePath); 581 if (temp == null) { 582 hasReformattedValue.value = Boolean.FALSE; 583 } else if (isTts) { 584 String temp2 = old.getFullXPath(oldStylePath); 585 value.value = XPathParts.getFrozenInstance(temp2).getAttributeValue(-1, "tts"); 586 hasReformattedValue.value = Boolean.TRUE; 587 } else { 588 value.value = temp.replaceAll("\\s*;\\s*", " | "); 589 hasReformattedValue.value = Boolean.TRUE; 590 } 591 return old.getSourceLocaleID(oldStylePath, oldStatus); 592 } 593 594 PathStarrer starrer = new PathStarrer().setSubstitutionPattern("%A"); 595 getPathHeader(String path)596 private PathHeader getPathHeader(String path) { 597 try { 598 PathHeader ph = phf.fromPath(path); 599 if (ph.getPageId() == PageId.Unknown) { 600 String star = starrer.set(path); 601 badHeaders.add(star); 602 return null; 603 } 604 return ph; 605 } catch (Exception e) { 606 String star = starrer.set(path); 607 badHeaders.add(star); 608 // System.err.println("Skipping path with bad PathHeader: " + path); 609 return null; 610 } 611 } 612 makeWithFallback(Factory oldFactory, String locale, boolean resolving)613 private CLDRFile makeWithFallback(Factory oldFactory, String locale, boolean resolving) { 614 if (oldFactory == null) { 615 return EMPTY_CLDR; 616 } 617 CLDRFile old; 618 String oldLocale = locale; 619 while (true) { // fall back for old, maybe to root 620 try { 621 old = oldFactory.make(oldLocale, resolving); 622 break; 623 } catch (Exception e) { 624 oldLocale = LocaleIDParser.getParent(oldLocale); 625 if (oldLocale == null) { 626 return EMPTY_CLDR; 627 } 628 } 629 } 630 return old; 631 } 632 addPathDiff( File sourceDir, CLDRFile old, CLDRFile current, String locale, PathHeader ph, Set<PathDiff> diff2)633 private void addPathDiff( 634 File sourceDir, 635 CLDRFile old, 636 CLDRFile current, 637 String locale, 638 PathHeader ph, 639 Set<PathDiff> diff2) { 640 String path = ph.getOriginalPath(); 641 String fullPathCurrent = current.getFullXPath(path); 642 String fullPathOld = old.getFullXPath(path); 643 if (Objects.equals(fullPathCurrent, fullPathOld)) { 644 return; 645 } 646 XPathParts pathPlain = XPathParts.getFrozenInstance(path); 647 XPathParts pathCurrent = 648 fullPathCurrent == null ? pathPlain : XPathParts.getFrozenInstance(fullPathCurrent); 649 XPathParts pathOld = 650 fullPathOld == null ? pathPlain : XPathParts.getFrozenInstance(fullPathOld); 651 TreeSet<String> fullAttributes = null; 652 int size = pathCurrent.size(); 653 String parentAndName = parentAndName(sourceDir, locale); 654 for (int elementIndex = 0; elementIndex < size; ++elementIndex) { // will have same size 655 Collection<String> distinguishing = pathPlain.getAttributeKeys(elementIndex); 656 Collection<String> attributesCurrent = pathCurrent.getAttributeKeys(elementIndex); 657 Collection<String> attributesOld = pathCurrent.getAttributeKeys(elementIndex); 658 if (attributesCurrent.isEmpty() && attributesOld.isEmpty()) { 659 continue; 660 } 661 if (fullAttributes == null) { 662 fullAttributes = new TreeSet<>(); 663 } else { 664 fullAttributes.clear(); 665 } 666 fullAttributes.addAll(attributesCurrent); 667 fullAttributes.addAll(attributesOld); 668 fullAttributes.removeAll(distinguishing); 669 fullAttributes.removeAll(DONT_CARE); 670 671 // at this point we only have non-distinguishing 672 for (String attribute : fullAttributes) { 673 String attributeValueOld = pathOld.getAttributeValue(elementIndex, attribute); 674 String attributeValueCurrent = 675 pathCurrent.getAttributeValue(elementIndex, attribute); 676 if (Objects.equals(attributeValueOld, attributeValueCurrent)) { 677 addChange(parentAndName, ChangeType.same, 1); 678 continue; 679 } 680 addChange( 681 parentAndName, ChangeType.get(attributeValueOld, attributeValueCurrent), 1); 682 683 PathDiff row = 684 new PathDiff( 685 locale, 686 new PathHeaderSegment(ph, size - elementIndex - 1, attribute), 687 attributeValueOld, 688 attributeValueCurrent); 689 if (DEBUG) { 690 System.out.println(row); 691 } 692 diff2.add(row); 693 } 694 } 695 } 696 parentAndName(File sourceDir, String locale)697 private String parentAndName(File sourceDir, String locale) { 698 return sourceDir.getName() + "/" + locale + ".xml"; 699 } 700 addValueDiff( File sourceDir, String valueOld, String valueCurrent, String locale, PathHeader ph, Set<PathDiff> diff, Relation<PathHeader, String> diffAll)701 private void addValueDiff( 702 File sourceDir, 703 String valueOld, 704 String valueCurrent, 705 String locale, 706 PathHeader ph, 707 Set<PathDiff> diff, 708 Relation<PathHeader, String> diffAll) { 709 // handle stuff that can be split specially 710 Splitter splitter = getSplitter(ph.getOriginalPath(), valueOld, valueCurrent); 711 int count = 1; 712 String parentAndName = parentAndName(sourceDir, locale); 713 if (Objects.equals(valueCurrent, valueOld)) { 714 if (splitter != null && valueCurrent != null) { 715 count = splitHandlingNull(splitter, valueCurrent).size(); 716 } 717 addChange(parentAndName, ChangeType.same, count); 718 } else { 719 if (splitter != null) { 720 List<String> setOld = splitHandlingNull(splitter, valueOld); 721 List<String> setNew = splitHandlingNull(splitter, valueCurrent); 722 int[] sameAndNotInSecond = new int[2]; 723 valueOld = getFilteredValue(setOld, setNew, sameAndNotInSecond); 724 addChange(parentAndName, ChangeType.same, sameAndNotInSecond[0]); 725 addChange(parentAndName, ChangeType.deleted, sameAndNotInSecond[1]); 726 sameAndNotInSecond[0] = sameAndNotInSecond[1] = 0; 727 valueCurrent = getFilteredValue(setNew, setOld, sameAndNotInSecond); 728 addChange(parentAndName, ChangeType.added, sameAndNotInSecond[1]); 729 } else if (hasUnicodeSetValue(ph.getOriginalPath())) { 730 UnicodeSet usOld = valueOld == null ? UnicodeSet.EMPTY : new UnicodeSet(valueOld); 731 UnicodeSet usCurrent = 732 valueCurrent == null ? UnicodeSet.EMPTY : new UnicodeSet(valueCurrent); 733 UnicodeSet oldOnly = new UnicodeSet(usOld).removeAll(usCurrent); 734 UnicodeSet currentOnly = new UnicodeSet(usCurrent).removeAll(usOld); 735 addChange(parentAndName, ChangeType.same, usOld.size() - oldOnly.size()); 736 addChange(parentAndName, ChangeType.deleted, oldOnly.size()); 737 addChange(parentAndName, ChangeType.added, currentOnly.size()); 738 valueOld = 739 usOld.size() == oldOnly.size() 740 ? oldOnly.toPattern(false) 741 : "…" + oldOnly + "…"; 742 valueCurrent = 743 usCurrent.size() == currentOnly.size() 744 ? currentOnly.toPattern(false) 745 : "…" + currentOnly + "…"; 746 } else { 747 addChange(parentAndName, ChangeType.get(valueOld, valueCurrent), count); 748 } 749 PathDiff row = 750 new PathDiff(locale, new PathHeaderSegment(ph, -1, ""), valueOld, valueCurrent); 751 diff.add(row); 752 diffAll.put(ph, locale); 753 } 754 } 755 hasUnicodeSetValue(String xpath)756 private boolean hasUnicodeSetValue(String xpath) { 757 return xpath.startsWith("//ldml/characters/exemplar"); 758 } 759 splitHandlingNull(Splitter splitter, String value)760 private List<String> splitHandlingNull(Splitter splitter, String value) { 761 return value == null ? null : splitter.splitToList(value); 762 } 763 getSplitter(String path, String valueOld, String valueCurrent)764 private Splitter getSplitter(String path, String valueOld, String valueCurrent) { 765 if (path.contains("/annotation") && !path.contains("tts")) { 766 return DtdData.BAR_SPLITTER; 767 } else if (valueOld != null && valueOld.contains("\n") 768 || valueCurrent != null && valueCurrent.contains("\n")) { 769 return DtdData.CR_SPLITTER; 770 } else { 771 return null; 772 } 773 } 774 775 /** 776 * Return string with all lines from linesToRemove removed 777 * 778 * @param toGetStringFor 779 * @param linesToRemove 780 * @return 781 */ getFilteredValue( Collection<String> toGetStringFor, Collection<String> linesToRemove, int[] sameAndDiff)782 private String getFilteredValue( 783 Collection<String> toGetStringFor, 784 Collection<String> linesToRemove, 785 int[] sameAndDiff) { 786 if (toGetStringFor == null) { 787 return null; 788 } 789 StringBuilder buf = new StringBuilder(); 790 Set<String> toRemove = 791 linesToRemove == null ? Collections.emptySet() : new HashSet<>(linesToRemove); 792 boolean removed = false; 793 for (String old : toGetStringFor) { 794 if (toRemove.contains(old)) { 795 removed = true; 796 sameAndDiff[0]++; 797 } else { 798 sameAndDiff[1]++; 799 if (removed) { 800 buf.append("…\n"); 801 removed = false; 802 } 803 buf.append(old).append('\n'); 804 } 805 } 806 if (removed) { 807 buf.append("…"); 808 } else if (buf.length() > 0) { 809 buf.setLength(buf.length() - 1); // remove final \n 810 } 811 return buf.toString(); 812 } 813 writeDiffs( Anchors anchors, String file, String title, Multimap<PathHeader, String> bcp, PrintWriter tsvFile)814 private void writeDiffs( 815 Anchors anchors, 816 String file, 817 String title, 818 Multimap<PathHeader, String> bcp, 819 PrintWriter tsvFile) { 820 if (bcp.isEmpty()) { 821 System.out.println("\tDeleting: " + DIR + "/" + file); 822 new File(DIR + file).delete(); 823 return; 824 } 825 TablePrinter tablePrinter = 826 new TablePrinter() 827 .addColumn( 828 "Section", 829 "class='source'", 830 CldrUtility.getDoubleLinkMsg(), 831 "class='source'", 832 true) 833 .addColumn( 834 "Page", 835 "class='source'", 836 CldrUtility.getDoubleLinkMsg(), 837 "class='source'", 838 true) // .setRepeatDivider(true) 839 .addColumn( 840 "Header", 841 "class='source'", 842 CldrUtility.getDoubleLinkMsg(), 843 "class='source'", 844 true) 845 .addColumn("Code", "class='source'", null, "class='source'", false) 846 .addColumn( 847 "Old", 848 "class='target'", 849 null, 850 "class='target'", 851 false) // width='20%' 852 .addColumn( 853 "New", 854 "class='target'", 855 null, 856 "class='target'", 857 false); // width='20%' 858 PathHeader ph1 = 859 phf.fromPath( 860 "//supplementalData/metadata/alias/subdivisionAlias[@type=\"TW-TXQ\"]/_reason"); 861 PathHeader ph2 = 862 phf.fromPath( 863 "//supplementalData/metadata/alias/subdivisionAlias[@type=\"LA-XN\"]/_replacement"); 864 ph1.compareTo(ph2); 865 for (Entry<PathHeader, Collection<String>> entry : bcp.asMap().entrySet()) { 866 PathHeader ph = entry.getKey(); 867 if (ph.getPageId() == DEBUG_PAGE_ID) { 868 System.out.println(ph + "\t" + ph.getOriginalPath()); 869 } 870 for (String value : entry.getValue()) { 871 String[] oldNew = value.split(SEP); 872 tablePrinter 873 .addRow() 874 .addCell(ph.getSectionId()) 875 .addCell(ph.getPageId()) 876 .addCell(ph.getHeader()) 877 .addCell(ph.getCode()) 878 .addCell(oldNew[0]) 879 .addCell(oldNew[1]) 880 .finishRow(); 881 } 882 } 883 writeTable(anchors, file, tablePrinter, title, tsvFile); 884 } 885 writeDiffs(Relation<PathHeader, String> diffAll)886 private void writeDiffs(Relation<PathHeader, String> diffAll) { 887 TablePrinter tablePrinter = 888 new TablePrinter() 889 .addColumn( 890 "Section", 891 "class='source'", 892 CldrUtility.getDoubleLinkMsg(), 893 "class='source'", 894 true) 895 .addColumn( 896 "Page", 897 "class='source'", 898 CldrUtility.getDoubleLinkMsg(), 899 "class='source'", 900 true) 901 .addColumn( 902 "Header", 903 "class='source'", 904 CldrUtility.getDoubleLinkMsg(), 905 "class='source'", 906 true) 907 .addColumn("Code", "class='source'", null, "class='source'", true) 908 .addColumn( 909 "Locales where different", 910 "class='target'", 911 null, 912 "class='target'", 913 true); 914 for (Entry<PathHeader, Set<String>> row : diffAll.keyValuesSet()) { 915 PathHeader ph = row.getKey(); 916 Set<String> locales = row.getValue(); 917 tablePrinter 918 .addRow() 919 .addCell(ph.getSectionId()) 920 .addCell(ph.getPageId()) 921 .addCell(ph.getHeader()) 922 .addCell(ph.getCode()) 923 .addCell(Joiner.on(" ").join(locales)) 924 .finishRow(); 925 } 926 } 927 writeDiffs( Anchors anchors, String file, Set<PathDiff> diff, PrintWriter tsvFile, Counter<PathHeader> counts)928 private void writeDiffs( 929 Anchors anchors, 930 String file, 931 Set<PathDiff> diff, 932 PrintWriter tsvFile, 933 Counter<PathHeader> counts) { 934 if (diff.isEmpty()) { 935 return; 936 } 937 TablePrinter tablePrinter = 938 new TablePrinter() 939 .addColumn( 940 "Section", 941 "class='source'", 942 CldrUtility.getDoubleLinkMsg(), 943 "class='source'", 944 true) 945 .addColumn( 946 "Page", 947 "class='source'", 948 CldrUtility.getDoubleLinkMsg(), 949 "class='source'", 950 true) 951 .addColumn( 952 "Header", 953 "class='source'", 954 CldrUtility.getDoubleLinkMsg(), 955 "class='source'", 956 true) 957 .addColumn("Code", "class='source'", null, "class='source'", true) 958 .addColumn("Locale", "class='source'", null, "class='source'", true) 959 .addColumn( 960 "Old", 961 "class='target'", 962 null, 963 "class='target'", 964 true) // width='20%' 965 .addColumn( 966 "New", 967 "class='target'", 968 null, 969 "class='target'", 970 true) // width='20%' 971 .addColumn("Level", "class='target'", null, "class='target'", true); 972 973 for (PathDiff row : diff) { 974 PathHeaderSegment phs = row.get0(); 975 counts.add(phs.get0(), 1); 976 String locale = row.get1(); 977 String oldValue = row.get2(); 978 String currentValue = row.get3(); 979 980 PathHeader ph = phs.get0(); 981 Integer pathIndex = phs.get1(); 982 String attribute = phs.get2(); 983 String specialCode = ph.getCode(); 984 985 if (!attribute.isEmpty()) { 986 specialCode += "_" + attribute; 987 if (pathIndex != 0) { 988 specialCode += "|" + pathIndex; 989 } 990 } 991 Level coverageLevel = 992 SUPPLEMENTAL_DATA_INFO.getCoverageLevel(ph.getOriginalPath(), locale); 993 String fixedOldValue = 994 oldValue == null 995 ? "▷missing◁" 996 : TransliteratorUtilities.toHTML.transform(oldValue); 997 String fixedNewValue = 998 currentValue == null 999 ? "▷removed◁" 1000 : TransliteratorUtilities.toHTML.transform(currentValue); 1001 1002 tablePrinter 1003 .addRow() 1004 .addCell(ph.getSectionId()) 1005 .addCell(ph.getPageId()) 1006 .addCell(ph.getHeader()) 1007 .addCell(specialCode) 1008 .addCell(locale) 1009 .addCell(fixedOldValue) 1010 .addCell(fixedNewValue) 1011 .addCell(coverageLevel) 1012 .finishRow(); 1013 } 1014 String title = ENGLISH.getName(file) + " " + chartNameCap; 1015 writeTable(anchors, file, tablePrinter, title, tsvFile); 1016 1017 diff.clear(); 1018 } 1019 1020 private class ChartDeltaSub extends Chart { 1021 private String title; 1022 private String file; 1023 private TablePrinter tablePrinter; 1024 private PrintWriter tsvFile; 1025 ChartDeltaSub( String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile)1026 private ChartDeltaSub( 1027 String title, String file, TablePrinter tablePrinter, PrintWriter tsvFile) { 1028 super(); 1029 this.title = title; 1030 this.file = file; 1031 this.tablePrinter = tablePrinter; 1032 this.tsvFile = tsvFile; 1033 } 1034 1035 @Override getDirectory()1036 public String getDirectory() { 1037 return DIR; 1038 } 1039 1040 @Override getShowDate()1041 public boolean getShowDate() { 1042 return false; 1043 } 1044 1045 @Override getTitle()1046 public String getTitle() { 1047 return title; 1048 } 1049 1050 @Override getFileName()1051 public String getFileName() { 1052 return file; 1053 } 1054 1055 @Override getExplanation()1056 public String getExplanation() { 1057 return "<p>Lists data fields that differ from the last major version (see versions above)." 1058 + " Inherited differences in locales are suppressed, except where the source locales are different. " 1059 + "<p>"; 1060 } 1061 1062 @Override writeContents(FormattedFileWriter pw)1063 public void writeContents(FormattedFileWriter pw) throws IOException { 1064 pw.write(tablePrinter.toTable()); 1065 tablePrinter.toTsv(tsvFile); 1066 } 1067 } 1068 writeTable( Anchors anchors, String file, TablePrinter tablePrinter, String title, PrintWriter tsvFile)1069 private void writeTable( 1070 Anchors anchors, 1071 String file, 1072 TablePrinter tablePrinter, 1073 String title, 1074 PrintWriter tsvFile) { 1075 ChartDeltaSub chartDeltaSub = new ChartDeltaSub(title, file, tablePrinter, tsvFile); 1076 chartDeltaSub.writeChart(anchors); 1077 } 1078 writeNonLdmlPlain(Anchors anchors)1079 private void writeNonLdmlPlain(Anchors anchors) throws IOException { 1080 try (PrintWriter tsvFile = 1081 FileUtilities.openUTF8Writer( 1082 getTsvDir(DIR, dirName), dirName + "_supp.tsv"); 1083 PrintWriter tsvCountFile = 1084 FileUtilities.openUTF8Writer( 1085 getTsvDir(DIR, dirName), dirName + "_supp_count.tsv"); ) { 1086 tsvFile.println("# Section\tPage\tHeader\tCode\tOld\tNew"); 1087 1088 Multimap<PathHeader, String> bcp = TreeMultimap.create(); 1089 Multimap<PathHeader, String> supplemental = TreeMultimap.create(); 1090 Multimap<PathHeader, String> transforms = TreeMultimap.create(); 1091 1092 Counter<PathHeader> countSame = new Counter<>(); 1093 Counter<PathHeader> countAdded = new Counter<>(); 1094 Counter<PathHeader> countDeleted = new Counter<>(); 1095 1096 for (String dir : new File(CLDRPaths.BASE_DIRECTORY + "common/").list()) { 1097 if (DtdType.ldml.directories.contains(dir) 1098 || dir.equals(".DS_Store") 1099 || dir.equals("dtd") // TODO as flat files 1100 || dir.equals("properties") // TODO as flat files 1101 || dir.equals("uca") // TODO as flat files 1102 ) { 1103 continue; 1104 } 1105 File dirOld = new File(PREV_CHART_VERSION_DIRECTORY + "common/" + dir); 1106 System.out.println("\tLast dir: " + dirOld); 1107 File dir2 = new File(CHART_VERSION_DIRECTORY + "common/" + dir); 1108 System.out.println("\tCurr dir: " + dir2); 1109 1110 for (String file : dir2.list()) { 1111 if (!file.endsWith(".xml")) { 1112 continue; 1113 } 1114 String parentAndFile = dir + "/" + file; 1115 String base = file.substring(0, file.length() - 4); 1116 if (fileFilter != null && !fileFilter.reset(dir + "/" + base).find()) { 1117 if (verbose) { // && verbose_skipping 1118 System.out.println("SKIPPING: " + dir + "/" + base); 1119 } 1120 continue; 1121 } 1122 if (highLevelOnly && !HighLevelPaths.localeIsHighLevel(base)) { 1123 continue; 1124 } 1125 if (verbose) { 1126 System.out.println(file); 1127 } 1128 Relation<PathHeader, String> contentsOld = 1129 fillData(dirOld.toString() + "/", file, base); 1130 Relation<PathHeader, String> contents2 = 1131 fillData(dir2.toString() + "/", file, base); 1132 1133 Set<PathHeader> keys = 1134 new TreeSet<>( 1135 CldrUtility.ifNull( 1136 contentsOld.keySet(), 1137 Collections.<PathHeader>emptySet())); 1138 keys.addAll( 1139 CldrUtility.ifNull( 1140 contents2.keySet(), Collections.<PathHeader>emptySet())); 1141 DtdType dtdType = null; 1142 for (PathHeader key : keys) { 1143 String originalPath = key.getOriginalPath(); 1144 if (highLevelOnly && !HighLevelPaths.pathIsHighLevel(originalPath, base)) { 1145 continue; 1146 } 1147 boolean isTransform = originalPath.contains("/tRule"); 1148 if (dtdType == null) { 1149 dtdType = DtdType.fromPath(originalPath); 1150 } 1151 Multimap<PathHeader, String> target = 1152 dtdType == DtdType.ldmlBCP47 1153 ? bcp 1154 : isTransform ? transforms : supplemental; 1155 Set<String> setOld = contentsOld.get(key); 1156 Set<String> set2 = contents2.get(key); 1157 1158 if (Objects.equals(setOld, set2)) { 1159 if (file.equals(DEBUG_FILE)) { // for debugging 1160 System.out.println("**Same: " + key + "\t" + setOld); 1161 } 1162 addChange(parentAndFile, ChangeType.same, setOld.size()); 1163 countSame.add(key, 1); 1164 continue; 1165 } 1166 if (setOld == null) { 1167 addChange(parentAndFile, ChangeType.added, set2.size()); 1168 for (String s : set2) { 1169 addRow(target, key, "▷missing◁", s); 1170 countAdded.add(key, 1); 1171 } 1172 } else if (set2 == null) { 1173 addChange(parentAndFile, ChangeType.deleted, setOld.size()); 1174 for (String s : setOld) { 1175 addRow(target, key, s, "▷removed◁"); 1176 countDeleted.add(key, 1); 1177 } 1178 } else { 1179 Set<String> s1MOld = setOld; 1180 Set<String> s2M1 = set2; 1181 if (s1MOld.isEmpty()) { 1182 addRow(target, key, "▷missing◁", Joiner.on(", ").join(s2M1)); 1183 addChange(parentAndFile, ChangeType.added, s2M1.size()); 1184 countAdded.add(key, 1); 1185 } else if (s2M1.isEmpty()) { 1186 addRow(target, key, Joiner.on(", ").join(s1MOld), "▷removed◁"); 1187 addChange(parentAndFile, ChangeType.deleted, s1MOld.size()); 1188 countDeleted.add(key, 1); 1189 } else { 1190 String valueOld; 1191 String valueCurrent; 1192 1193 int[] sameAndNotInSecond = new int[2]; 1194 valueOld = getFilteredValue(s1MOld, s1MOld, sameAndNotInSecond); 1195 addChange(parentAndFile, ChangeType.same, sameAndNotInSecond[0]); 1196 countSame.add(key, 1); 1197 addChange(parentAndFile, ChangeType.deleted, sameAndNotInSecond[1]); 1198 sameAndNotInSecond[1] = 0; 1199 countDeleted.add(key, 1); 1200 valueCurrent = getFilteredValue(s2M1, s1MOld, sameAndNotInSecond); 1201 addChange(parentAndFile, ChangeType.added, sameAndNotInSecond[1]); 1202 addRow(target, key, valueOld, valueCurrent); 1203 countAdded.add(key, 1); 1204 } 1205 } 1206 } 1207 } 1208 } 1209 writeDiffs(anchors, "bcp47", "¤¤BCP47 " + chartNameCap, bcp, tsvFile); 1210 writeDiffs( 1211 anchors, 1212 "supplemental-data", 1213 "¤¤Supplemental " + chartNameCap, 1214 supplemental, 1215 tsvFile); 1216 writeDiffs(anchors, "transforms", "¤¤Transforms " + chartNameCap, transforms, tsvFile); 1217 1218 writeCounter(tsvCountFile, "CountSame", countSame); 1219 tsvCountFile.println(); 1220 writeCounter(tsvCountFile, "CountAdded", countAdded); 1221 tsvCountFile.println(); 1222 writeCounter(tsvCountFile, "CountDeleted", countDeleted); 1223 1224 // tsvFile.println("# EOF"); 1225 // tsvCountFile.println("# EOF"); 1226 } 1227 } 1228 writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted)1229 private void writeCounter(PrintWriter tsvFile, String title, Counter<PathHeader> countDeleted) { 1230 tsvFile.append("# " + title + "\tSection\tPage\tSubhead\tCode\n\n"); 1231 for (R2<Long, PathHeader> entry : countDeleted.getEntrySetSortedByCount(false, null)) { 1232 tsvFile.println(entry.get0() + "\t" + entry.get1()); 1233 } 1234 } 1235 addRow( Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem)1236 private void addRow( 1237 Multimap<PathHeader, String> target, PathHeader key, String oldItem, String newItem) { 1238 if (oldItem.isEmpty() || newItem.isEmpty()) { 1239 throw new IllegalArgumentException(); 1240 } 1241 target.put(key, oldItem + SEP + newItem); 1242 } 1243 1244 /** 1245 * Fill in the chart data for the specified file 1246 * 1247 * @param directory 1248 * @param file like "xx.xml" where "xx" may be a locale name 1249 * @param fileBase like "xx", same as file without ".xml" 1250 * @return the Relation 1251 */ fillData(String directory, String file, String fileBase)1252 private Relation<PathHeader, String> fillData(String directory, String file, String fileBase) { 1253 Relation<PathHeader, String> results = 1254 Relation.of(new TreeMap<PathHeader, Set<String>>(), TreeSet.class); 1255 1256 List<Pair<String, String>> contents1; 1257 try { 1258 contents1 = 1259 XMLFileReader.loadPathValues( 1260 directory + file, new ArrayList<Pair<String, String>>(), true); 1261 } catch (Exception e) { 1262 /* 1263 * This happens with e = ICUException, file = grammaticalFeatures.xml in cldr-36.0 1264 */ 1265 return results; 1266 } 1267 DtdType dtdType = null; 1268 DtdData dtdData = null; 1269 Multimap<String, String> extras = TreeMultimap.create(); 1270 1271 for (Pair<String, String> s : contents1) { 1272 String path = s.getFirst(); 1273 if (highLevelOnly 1274 && !HighLevelPaths.pathIsHighLevel(path, fileBase /* locale, or not */)) { 1275 continue; 1276 } 1277 String value = s.getSecond(); 1278 if (dtdType == null) { 1279 /* 1280 * Note: although dtdType and dtdData depend on path, they are the same for all paths 1281 * in the same file, so they only need to be set the first time through this loop. 1282 * 1283 * Note: the current DTD in CLDR_BASE_DIR is supposed to be backward-compatible, that is, to support 1284 * paths from all archived versions. Any exception to that rule (e.g., for "grammaticalState") is a bug. 1285 */ 1286 dtdType = DtdType.fromPath(path); 1287 dtdData = DtdData.getInstance(dtdType, CLDR_BASE_DIR); 1288 } 1289 XPathParts pathPlain = XPathParts.getFrozenInstance(path); 1290 try { 1291 if (dtdData.isMetadata(pathPlain)) { 1292 continue; 1293 } 1294 } catch (NullPointerException e) { 1295 /* 1296 * TODO: this happens for "grammaticalState" in this path from version 37: 1297 * //supplementalData/grammaticalData/grammaticalFeatures[@targets="nominal"][@locales="he"]/grammaticalState[@values="definite indefinite construct"] 1298 * Reference: https://unicode-org.atlassian.net/browse/CLDR-13306 1299 */ 1300 System.out.println( 1301 "Caught NullPointerException in fillData calling isMetadata, path = " 1302 + path); 1303 continue; 1304 } 1305 Set<String> pathForValues = dtdData.getRegularizedPaths(pathPlain, extras); 1306 if (pathForValues != null) { 1307 for (String pathForValue : pathForValues) { 1308 PathHeader pathHeader = phf.fromPath(pathForValue); 1309 if (pathHeader.getPageId() == PageId.Suppress) { 1310 continue; 1311 } 1312 Splitter splitter = DtdData.getValueSplitter(pathPlain); 1313 for (String line : splitter.split(value)) { 1314 // special case # in transforms 1315 if (isComment(pathPlain, line)) { 1316 continue; 1317 } 1318 results.put(pathHeader, line); 1319 } 1320 } 1321 } 1322 for (Entry<String, Collection<String>> entry : extras.asMap().entrySet()) { 1323 final String extraPath = entry.getKey(); 1324 final PathHeader pathHeaderExtra = phf.fromPath(extraPath); 1325 if (pathHeaderExtra.getPageId() == PageId.Suppress) { 1326 continue; 1327 } 1328 final Collection<String> extraValue = entry.getValue(); 1329 if (isExtraSplit(extraPath)) { 1330 for (String items : extraValue) { 1331 results.putAll(pathHeaderExtra, DtdData.SPACE_SPLITTER.splitToList(items)); 1332 } 1333 } else { 1334 results.putAll(pathHeaderExtra, extraValue); 1335 } 1336 } 1337 if (pathForValues == null && !value.isEmpty()) { 1338 System.err.println("Shouldn't happen"); 1339 } 1340 } 1341 return results; 1342 } 1343 isExtraSplit(String extraPath)1344 private boolean isExtraSplit(String extraPath) { 1345 if (extraPath.endsWith("/_type") 1346 && extraPath.startsWith("//supplementalData/metaZones/mapTimezones")) { 1347 return true; 1348 } 1349 return false; 1350 } 1351 isComment(XPathParts pathPlain, String line)1352 private static boolean isComment(XPathParts pathPlain, String line) { 1353 if (pathPlain.contains("transform")) { 1354 if (line.startsWith("#")) { 1355 return true; 1356 } 1357 } 1358 return false; 1359 } 1360 1361 /** 1362 * Determine when changes to the values for paths should be treated as potentially "disruptive" 1363 * for the purpose of "churn" reporting 1364 */ 1365 private class SuspiciousChange { 1366 /** the old and new values, such as "HH:mm–HH:mm v" and "HH:mm – HH:mm v" */ 1367 private String oldValue, newValue; 1368 1369 /** 1370 * the path, such as 1371 * //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id="Hmv"]/greatestDifference[@id="H"] 1372 */ 1373 private String path; 1374 1375 /** 1376 * the locale (such as "doi") in which the path was found, or null, or possibly the base 1377 * file name without extension, like "xx" if the file name is "xx.xml", where "xx" may or 1378 * may not be a locale; e.g., "supplementalData" 1379 */ 1380 private String locale; 1381 SuspiciousChange(String oldValue, String newValue, String path, String locale)1382 SuspiciousChange(String oldValue, String newValue, String path, String locale) { 1383 this.oldValue = oldValue; 1384 this.newValue = newValue; 1385 this.path = path; 1386 this.locale = locale; 1387 } 1388 1389 /** 1390 * Is the change from the old value to the new value, for this path and locale, potentially 1391 * disruptive? 1392 * 1393 * @return true or false 1394 */ isDisruptive()1395 public boolean isDisruptive() { 1396 /* 1397 * OR, not AND: certain changes in value are disruptive even for paths not 1398 * otherwise treated as high-level, and changes for high-level paths are 1399 * disruptive even if the changes in values themselves are not identified 1400 * as disruptive. 1401 */ 1402 return valueChangeIsDisruptive() || HighLevelPaths.pathIsHighLevel(path, locale); 1403 } 1404 1405 /** 1406 * Is the change from the old value to the current value potentially disruptive, based 1407 * (primarily) on the values themselves? 1408 * 1409 * @return true or false 1410 */ valueChangeIsDisruptive()1411 private boolean valueChangeIsDisruptive() { 1412 if (oldValue == null || newValue == null || oldValue.equals(newValue)) { 1413 return false; 1414 } 1415 if (valueChangeIsDisruptiveWhitespaceOnly()) { 1416 return true; 1417 } 1418 return false; 1419 } 1420 1421 /** 1422 * Is the change disruptive whitespace only? Per design doc, "Format changes: second to none 1423 * on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP or NBSP -> 1424 * Narrow NBSP. Or adding a space somewhere in the format where previously there was none." 1425 * 1426 * @return true or false 1427 */ valueChangeIsDisruptiveWhitespaceOnly()1428 private boolean valueChangeIsDisruptiveWhitespaceOnly() { 1429 /* 1430 * annotations often have changes like "pop gorn", "popgorn", not treated as disruptive 1431 */ 1432 if (path.startsWith("//ldml/annotations")) { 1433 return false; 1434 } 1435 if (removeWhitespace(oldValue).equals(removeWhitespace(newValue))) { 1436 return true; 1437 } 1438 return false; 1439 } 1440 1441 /** 1442 * Remove whitespace from the given string 1443 * 1444 * <p>Remove whitespace as defined by regex \s, and also U+00A0 NO-BREAK SPACE U+2007 FIGURE 1445 * SPACE U+202F NARROW NO-BREAK SPACE 1446 * 1447 * @param s the string 1448 * @return the modified string 1449 */ removeWhitespace(String s)1450 private String removeWhitespace(String s) { 1451 return s.replaceAll("[\\s\\u00A0\\u2007\\u202F]", ""); 1452 } 1453 } 1454 1455 /** 1456 * Determine which paths are considered "high-level" paths, i.e., paths for which any changes 1457 * have high potential to cause disruptive "churn". Whether a path is high-level sometimes 1458 * depends on the locale or xml file in which it occurs. Some paths are high-level regardless of 1459 * the locale in which they are located. Other paths are high-level for some locales but not 1460 * others. For example, //ldml/localeDisplayNames/languages/language[@type="xx"] is high level 1461 * in locale "xx", and maybe "en", but not for all locales. 1462 */ 1463 private static class HighLevelPaths { 1464 /** 1465 * A set of paths to be treated as "high-level". These are complete paths to be matched 1466 * exactly. Other paths are recognized by special functions like isHighLevelTerritoryName. 1467 * 1468 * <p>The ordering and comments are based on the design spec. 1469 */ 1470 private static final Set<String> highLevelPaths = 1471 new HashSet<>( 1472 Arrays.asList( 1473 /* 1474 * Core data 1475 */ 1476 "//ldml/characters/exemplarCharacters", 1477 "//ldml/numbers/defaultNumberingSystem", 1478 "//ldml/numbers/otherNumberingSystems/native", 1479 /* 1480 * Territory and Language names 1481 * Country/Region names (English and Native names) -- see isHighLevelTerritoryName 1482 * //ldml/localeDisplayName/territories/territory/... 1483 * Language names (English and Native) -- see isHighLevelLangName 1484 * //ldml/localeDisplayNames/languages/language/... 1485 */ 1486 /* 1487 * Date 1488 * Note: "year", "month", etc., below, form a subset (eight) of all possible values for type, 1489 * excluding, for example, "fri" and "zone". If we use starred paths, we would need further complication 1490 * to filter out "fri", "zone", etc. 1491 */ 1492 "//ldml/dates/fields/field[@type=\"year\"]/displayName", 1493 "//ldml/dates/fields/field[@type=\"month\"]/displayName", 1494 "//ldml/dates/fields/field[@type=\"week\"]/displayName", 1495 "//ldml/dates/fields/field[@type=\"day\"]/displayName", 1496 "//ldml/dates/fields/field[@type=\"hour\"]/displayName", 1497 "//ldml/dates/fields/field[@type=\"era\"]/displayName", 1498 "//ldml/dates/fields/field[@type=\"minute\"]/displayName", 1499 "//ldml/dates/fields/field[@type=\"second\"]/displayName", 1500 /* 1501 * First day of week: firstDay in supplementalData.xml; see isHighLevelFirstDay 1502 * First week of year: see isHighLevelWeekOfPreference 1503 */ 1504 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"full\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1505 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"long\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1506 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"medium\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1507 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength[@type=\"short\"]/dateFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1508 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MMMEd\"]", 1509 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem[@id=\"MEd\"]", 1510 /* 1511 * Time 1512 */ 1513 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"full\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1514 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"long\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1515 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"medium\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1516 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength[@type=\"short\"]/timeFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1517 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"am\"]", 1518 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"am\"]", 1519 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"pm\"]", 1520 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"pm\"]", 1521 /* 1522 * Currency (English and Native) -- see isHighLevelCurrencyName 1523 * E.g., //ldml/numbers/currencies/currency[@type=\"KRW\"]/displayName" 1524 * 1525 * ISO Currency Code: SupplementalData.xml match <region iso3166> -- see isHighLevelCurrencyCode 1526 */ 1527 /* 1528 * Currency Formats 1529 * a. Currency thousand separator 1530 * b. Currency decimal separator 1531 * c. Currency Symbol //ldml/numbers/currencies/currency[@type="CNY"]/symbol 1532 * d. Currency Symbol Narrow //ldml/numbers/currencies/currency[@type=\"CNY\"]/symbol[@alt=\"narrow\"]" 1533 * 1534 * See isHighLevelCurrencySeparatorOrSymbol 1535 */ 1536 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1537 "//ldml/numbers/currencyFormats[@numberSystem=\"arab\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1538 /* 1539 * Number Symbols 1540 */ 1541 "//ldml/numbers/minimumGroupingDigits", 1542 "//ldml/numbers/symbols[@numberSystem=\"latn\"]/decimal", 1543 "//ldml/numbers/symbols[@numberSystem=\"latn\"]/group", 1544 "//ldml/numbers/symbols[@numberSystem=\"arab\"]/decimal", 1545 "//ldml/numbers/symbols[@numberSystem=\"arab\"]/group", 1546 /* 1547 * Number formats 1548 */ 1549 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1550 "//ldml/numbers/percentFormats[@numberSystem=\"latn\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1551 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"accounting\"]/pattern[@type=\"standard\"]", 1552 "//ldml/numbers/decimalFormats[@numberSystem=\"arab\"]/decimalFormatLength/decimalFormat[@type=\"standard\"]/pattern[@type=\"standard\"]", 1553 "//ldml/numbers/percentFormats[@numberSystem=\"arab\"]/percentFormatLength/percentFormat[@type=\"standard\"]/pattern[@type=\"standard\"]" 1554 /* 1555 * "Complementary Observations" 1556 */ 1557 /* 1558 * Changes to language aliases (supplementalMetaData) -- see isHighLevelLangAlias 1559 * E.g., //supplementalData/metadata/alias/languageAlias[@type="aar"] 1560 */ 1561 /* 1562 * Changes in the containment graph -- see isHighLevelTerritoryContainment 1563 * Data mostly (or entirely?) from M49 standard, thus CLDR has limited control. 1564 * Users use the containment graph in a variety of ways. 1565 * E.g., //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"] 1566 */ 1567 /* 1568 * Format changes: second to none on the disruptiveness scale are changes involving spaces such as SPACE -> NBSP 1569 * or NBSP -> Narrow NBSP. Or adding a space somewhere in the format where previously there was none. 1570 * -- see SuspiciousChange.valueChangeIsDisruptiveWhitespaceOnly 1571 */ 1572 /* 1573 * TODO: per design doc, "Adding a timezone" 1574 * TODO: per design doc, "Changes of symbols or codes that are cross-locale in some way such as the unknown 1575 * currency symbol change '???' -> '¤'." 1576 * TODO: per design doc, "Change in character properties (not a CLDR but a Unicode change), and here especially 1577 * newly adding or removing punctuation. Frequently irritates parsers." 1578 */ 1579 )); 1580 1581 static Pattern currencyPattern = 1582 Pattern.compile("^//ldml/numbers/currencies/currency.*/displayName.*"); 1583 1584 /** 1585 * Should the given path in the given locale be taken into account for generating "churn" 1586 * reports? 1587 * 1588 * @param path the path of interest 1589 * @param locale the locale in which the path was found, or null, or possibly the base file 1590 * name without extension, like "xx" if the file name is "xx.xml", where "xx" may or may 1591 * not be a locale; e.g., "supplementalData" 1592 * @return true if it counts, else false to ignore 1593 */ pathIsHighLevel(String path, String locale)1594 private static boolean pathIsHighLevel(String path, String locale) { 1595 if (path == null || locale == null) { 1596 return false; 1597 } 1598 if (!localeIsHighLevel( 1599 locale)) { // for efficiency, this should be caught at a higher level 1600 System.out.println( 1601 "locale [" 1602 + locale 1603 + "] failed localeIsHighLevel in pathIsHighLevel; path = " 1604 + path); 1605 return false; 1606 } 1607 if (pathIsReallyHighLevel(path, locale)) { 1608 if (verboseHighLevelReporting) { 1609 recordHighLevelMatch(path); 1610 } 1611 return true; 1612 } 1613 return false; 1614 } 1615 pathIsReallyHighLevel(String path, String locale)1616 private static boolean pathIsReallyHighLevel(String path, String locale) { 1617 if (highLevelPaths.contains(path)) { 1618 return true; 1619 } else if (isHighLevelTerritoryName(path, locale)) { 1620 return true; 1621 } else if (isHighLevelLangName(path, locale)) { 1622 return true; 1623 } else if (isHighLevelCurrencyName(path, locale)) { 1624 return true; 1625 } else if (isHighLevelCurrencyCode(path, locale)) { 1626 return true; 1627 } else if (isHighLevelCurrencySeparatorOrSymbol(path, locale)) { 1628 return true; 1629 } else if (isHighLevelLangAlias(path, locale)) { 1630 return true; 1631 } else if (isHighLevelTerritoryContainment(path, locale)) { 1632 return true; 1633 } else if (isHighLevelFirstDay(path, locale)) { 1634 return true; 1635 } else if (isHighLevelWeekOfPreference(path, locale)) { 1636 return true; 1637 } 1638 return false; 1639 } 1640 1641 /** 1642 * Is the given locale, or base name, to be considered for "high level" churn report? 1643 * 1644 * @param locale the locale string, or base name like "supplementalData" as in 1645 * "supplementalData.xml" 1646 * @return true or false 1647 */ localeIsHighLevel(String locale)1648 private static boolean localeIsHighLevel(String locale) { 1649 return SubmissionLocales.CLDR_OR_HIGH_LEVEL_LOCALES.contains(locale) 1650 || "supplementalData".equals(locale); 1651 } 1652 1653 /** 1654 * Changes to language aliases (supplemental metadata) E.g., 1655 * //supplementalData/metadata/alias/languageAlias[@type="aar"] 1656 * 1657 * @param path 1658 * @param locale must be "supplementalData" to match 1659 * @return true or false 1660 */ isHighLevelLangAlias(String path, String locale)1661 private static boolean isHighLevelLangAlias(String path, String locale) { 1662 if ("supplementalData".equals(locale)) { 1663 if (path.startsWith("//supplementalData/metadata/alias/languageAlias")) { 1664 return true; 1665 } 1666 } 1667 return false; 1668 } 1669 1670 /** 1671 * Changes in the containment graph Data mostly (or entirely?) from M49 standard, thus CLDR 1672 * has limited control. Users use the containment graph in a variety of ways. E.g., 1673 * //supplementalData/territoryContainment/group[@type="003"][@contains="013 021 029"] 1674 * 1675 * @param path 1676 * @param locale must be "supplementalData" to match 1677 * @return true or false 1678 */ isHighLevelTerritoryContainment(String path, String locale)1679 private static boolean isHighLevelTerritoryContainment(String path, String locale) { 1680 if ("supplementalData".equals(locale)) { 1681 if (path.startsWith("//supplementalData/territoryContainment")) { 1682 return true; 1683 } 1684 } 1685 return false; 1686 } 1687 1688 /** 1689 * Is the given path a high-level territory name path in the given locale? 1690 * 1691 * <p>E.g., //ldml/localeDisplayNames/territories/territory[@type="NNN"] if type "NNN" 1692 * CORRESPONDS TO the locale or the locale is "en" 1693 * 1694 * <p>English names (en.xml): match all types Native: check each territory type NNN 1695 * corresponding to the given locale 1696 * 1697 * <p>Exclude "alt" 1698 * 1699 * @param path 1700 * @param locale 1701 * @return true or false 1702 */ isHighLevelTerritoryName(String path, String locale)1703 private static boolean isHighLevelTerritoryName(String path, String locale) { 1704 if (path.startsWith("//ldml/localeDisplayNames/territories/territory") 1705 && !path.contains("[@alt=")) { 1706 if ("en".equals(locale)) { 1707 return true; 1708 } 1709 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale); 1710 if (cvi != null) { 1711 for (String type : cvi.targetTerritories) { 1712 if (path.contains("[@type=\"" + type + "\"]")) { 1713 return true; 1714 } 1715 } 1716 } 1717 } 1718 return false; 1719 } 1720 1721 /** 1722 * Is the given path a high-level language name path in the given locale? 1723 * 1724 * <p>E.g., //ldml/localeDisplayNames/languages/language[@type="xx"] if type "xx" matches 1725 * the locale or the locale is "en" 1726 * 1727 * <p>Exclude "alt" 1728 * 1729 * @param path 1730 * @param locale 1731 * @return true or false 1732 */ isHighLevelLangName(String path, String locale)1733 private static boolean isHighLevelLangName(String path, String locale) { 1734 if (path.startsWith("//ldml/localeDisplayNames/languages/language") 1735 && !path.contains("[@alt=")) { 1736 if ("en".equals(locale)) { 1737 /* 1738 * English names (en.xml): match all types 1739 */ 1740 return true; 1741 } else if (path.contains("[@type=\"" + locale + "\"]")) { 1742 /* 1743 * Native names: match the type=”xx” of each xml file to identify the Native. E.g., type=ko if ko.xml 1744 */ 1745 return true; 1746 } 1747 } 1748 return false; 1749 } 1750 1751 /** 1752 * Is the given path a high-level currency name path in the given locale? 1753 * 1754 * <p>E.g., //ldml/numbers/currencies/currency[@type=\"AAA\"]/displayName if type "AAA" 1755 * CORRESPONDS TO the locale or the locale is "en" 1756 * 1757 * <p>English names (en.xml): match all types Native: check each currency type AAA 1758 * corresponding to the given locale 1759 * 1760 * <p>Do NOT exclude "alt"; e.g., 1761 * //ldml/numbers/currencies/currency[@type="ADP"]/displayName[@alt="proposed-u167-1"] 1762 * 1763 * @param path 1764 * @param locale 1765 * @return true or false 1766 */ isHighLevelCurrencyName(String path, String locale)1767 private static boolean isHighLevelCurrencyName(String path, String locale) { 1768 if (currencyPattern.matcher(path).matches()) { 1769 if ("en".equals(locale)) { 1770 return true; 1771 } 1772 CoverageVariableInfo cvi = SUPPLEMENTAL_DATA_INFO.getCoverageVariableInfo(locale); 1773 if (cvi != null) { 1774 for (String type : cvi.targetCurrencies) { 1775 if (path.contains("[@type=\"" + type + "\"]")) { 1776 return true; 1777 } 1778 } 1779 } 1780 } 1781 return false; 1782 } 1783 1784 /** 1785 * Is the given path a high-level currency code path in the given locale? 1786 * 1787 * <p>E.g., 1788 * //supplementalData/currencyData/region[@iso3166="AC"]/currency[@iso4217="SHP"][@from="1976-01-01"] 1789 * 1790 * @param path 1791 * @param locale must be "supplementalData" to match 1792 * @return true or false 1793 */ isHighLevelCurrencyCode(String path, String locale)1794 private static boolean isHighLevelCurrencyCode(String path, String locale) { 1795 if ("supplementalData".equals(locale)) { 1796 if (path.contains("iso3166")) { 1797 return true; 1798 } 1799 } 1800 return false; 1801 } 1802 1803 /** 1804 * Is the given path a high-level currency thousands-separator or decimal-separator path in 1805 * the given locale? 1806 * 1807 * <p>E.g., //ldml/numbers/currencies/currency[@type="ESP"]/group 1808 * //ldml/numbers/currencies/currency[@type="ESP"]/decimal 1809 * //ldml/numbers/currencies/currency[@type="CNY"]/symbol 1810 * //ldml/numbers/currencies/currency[@type="CNY"]/symbol[@alt="narrow"]" 1811 * 1812 * @param path 1813 * @param locale 1814 * @return true or false 1815 */ isHighLevelCurrencySeparatorOrSymbol(String path, String locale)1816 private static boolean isHighLevelCurrencySeparatorOrSymbol(String path, String locale) { 1817 if (path.startsWith("//ldml/numbers/currencies/currency") 1818 && (path.contains("group") 1819 || path.contains("decimal") 1820 || path.contains("symbol"))) { 1821 return true; 1822 } 1823 return false; 1824 } 1825 1826 /** 1827 * Is the given path a high-level weekData/firstDay in the given locale? 1828 * 1829 * <p>E.g.,//supplementalData/weekData/firstDay[@day="fri"][@territories="MV"] 1830 * 1831 * @param path 1832 * @param locale must be "supplementalData" to match 1833 * @return true or false 1834 */ isHighLevelFirstDay(String path, String locale)1835 private static boolean isHighLevelFirstDay(String path, String locale) { 1836 if ("supplementalData".equals(locale)) { 1837 if (path.startsWith("//supplementalData/weekData/firstDay")) { 1838 return true; 1839 } 1840 } 1841 return false; 1842 } 1843 1844 /** 1845 * Is the given path a high-level weekOfPreference in the given locale? 1846 * 1847 * <p>E.g., 1848 * //supplementalData/weekData/weekOfPreference[@ordering="weekOfYear"][@locales="und"] 1849 * 1850 * @param path 1851 * @param locale must be "supplementalData" to match 1852 * @return true or false 1853 */ isHighLevelWeekOfPreference(String path, String locale)1854 private static boolean isHighLevelWeekOfPreference(String path, String locale) { 1855 if ("supplementalData".equals(locale)) { 1856 if (path.startsWith("//supplementalData/weekData/weekOfPreference")) { 1857 return true; 1858 } 1859 } 1860 return false; 1861 } 1862 1863 /** For debugging, testing */ 1864 private static Set<String> highLevelPathMatched = null; 1865 1866 private static boolean verboseHighLevelReporting = false; 1867 recordHighLevelMatch(String path)1868 private static void recordHighLevelMatch(String path) { 1869 if (highLevelPathMatched == null) { 1870 highLevelPathMatched = new HashSet<>(); 1871 } 1872 highLevelPathMatched.add(path); 1873 } 1874 1875 /** For debugging, report on any paths in highLevelPaths that never matched */ reportHighLevelPathUsage()1876 private static void reportHighLevelPathUsage() { 1877 if (!verboseHighLevelReporting) { 1878 return; 1879 } 1880 if (highLevelPathMatched == null) { 1881 System.out.println("Zero high-level paths were matched!"); 1882 return; 1883 } 1884 for (String path : highLevelPaths) { 1885 if (!highLevelPathMatched.contains(path)) { 1886 System.out.println("Unmatched high-level path: " + path); 1887 } 1888 } 1889 for (String path : highLevelPathMatched) { 1890 if (!highLevelPaths.contains(path)) { 1891 System.out.println("Special matched high-level path: " + path); 1892 } 1893 } 1894 } 1895 } 1896 } 1897