1 package org.unicode.cldr.json; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.Lists; 5 import com.google.gson.Gson; 6 import com.google.gson.GsonBuilder; 7 import com.google.gson.JsonArray; 8 import com.google.gson.JsonElement; 9 import com.google.gson.JsonObject; 10 import com.google.gson.JsonPrimitive; 11 import com.ibm.icu.number.IntegerWidth; 12 import com.ibm.icu.number.LocalizedNumberFormatter; 13 import com.ibm.icu.number.NumberFormatter; 14 import com.ibm.icu.number.Precision; 15 import com.ibm.icu.text.MessageFormat; 16 import com.ibm.icu.util.NoUnit; 17 import com.ibm.icu.util.ULocale; 18 import java.io.BufferedReader; 19 import java.io.File; 20 import java.io.IOException; 21 import java.io.PrintWriter; 22 import java.text.ParseException; 23 import java.util.ArrayList; 24 import java.util.Arrays; 25 import java.util.Collections; 26 import java.util.Iterator; 27 import java.util.LinkedList; 28 import java.util.List; 29 import java.util.Locale; 30 import java.util.Map; 31 import java.util.Map.Entry; 32 import java.util.Optional; 33 import java.util.Set; 34 import java.util.TreeMap; 35 import java.util.TreeSet; 36 import java.util.concurrent.atomic.AtomicInteger; 37 import java.util.logging.Logger; 38 import java.util.regex.Matcher; 39 import java.util.regex.Pattern; 40 import java.util.stream.Collectors; 41 import org.unicode.cldr.draft.FileUtilities; 42 import org.unicode.cldr.draft.ScriptMetadata; 43 import org.unicode.cldr.draft.ScriptMetadata.Info; 44 import org.unicode.cldr.tool.Option.Options; 45 import org.unicode.cldr.util.Annotations; 46 import org.unicode.cldr.util.CLDRConfig; 47 import org.unicode.cldr.util.CLDRFile; 48 import org.unicode.cldr.util.CLDRFile.DraftStatus; 49 import org.unicode.cldr.util.CLDRLocale; 50 import org.unicode.cldr.util.CLDRPaths; 51 import org.unicode.cldr.util.CLDRTool; 52 import org.unicode.cldr.util.CLDRURLS; 53 import org.unicode.cldr.util.CalculatedCoverageLevels; 54 import org.unicode.cldr.util.CldrUtility; 55 import org.unicode.cldr.util.CoverageInfo; 56 import org.unicode.cldr.util.DtdData; 57 import org.unicode.cldr.util.DtdType; 58 import org.unicode.cldr.util.Factory; 59 import org.unicode.cldr.util.FileCopier; 60 import org.unicode.cldr.util.GlossonymConstructor; 61 import org.unicode.cldr.util.Level; 62 import org.unicode.cldr.util.LocaleIDParser; 63 import org.unicode.cldr.util.Pair; 64 import org.unicode.cldr.util.PatternCache; 65 import org.unicode.cldr.util.StandardCodes; 66 import org.unicode.cldr.util.SupplementalDataInfo; 67 import org.unicode.cldr.util.Timer; 68 import org.unicode.cldr.util.XMLSource; 69 import org.unicode.cldr.util.XPathParts; 70 71 /** 72 * Utility methods to extract data from CLDR repository and export it in JSON format. 73 * 74 * @author shanjian / emmons 75 */ 76 @CLDRTool(alias = "ldml2json", description = "Convert CLDR data to JSON") 77 public class Ldml2JsonConverter { 78 // Icons 79 private static final String DONE_ICON = "✅"; 80 private static final String GEAR_ICON = "⚙️"; 81 private static final String NONE_ICON = "∅"; 82 private static final String PACKAGE_ICON = ""; 83 private static final String SECTION_ICON = ""; 84 private static final String TYPE_ICON = ""; 85 private static final String WARN_ICON = "⚠️"; 86 87 // File prefix 88 private static final String CLDR_PKG_PREFIX = "cldr-"; 89 private static final String FULL_TIER_SUFFIX = "-full"; 90 private static final String MODERN_TIER_SUFFIX = "-modern"; 91 private static Logger logger = Logger.getLogger(Ldml2JsonConverter.class.getName()); 92 93 enum RunType { 94 all, // number zero 95 main, 96 supplemental(false, false), // aka 'cldr-core' 97 segments, 98 rbnf(false, true), 99 annotations, 100 annotationsDerived, 101 bcp47(false, false); 102 103 private final boolean isTiered; 104 private final boolean hasLocales; 105 RunType()106 RunType() { 107 this.isTiered = true; 108 this.hasLocales = true; 109 } 110 RunType(boolean isTiered, boolean hasLocales)111 RunType(boolean isTiered, boolean hasLocales) { 112 this.isTiered = isTiered; 113 this.hasLocales = hasLocales; 114 } 115 /** 116 * Is it split into modern/full? 117 * 118 * @return 119 */ tiered()120 public boolean tiered() { 121 return isTiered; 122 } 123 /** 124 * Does it have locale IDs? 125 * 126 * @return 127 */ locales()128 public boolean locales() { 129 return hasLocales; 130 } 131 /** 132 * return the options as a pipe-delimited list 133 * 134 * @return 135 */ valueList()136 public static String valueList() { 137 return String.join( 138 "|", 139 Lists.newArrayList(RunType.values()).stream() 140 .map(t -> t.name()) 141 .toArray(String[]::new)); 142 } 143 } 144 145 private static final StandardCodes sc = StandardCodes.make(); 146 private Set<String> defaultContentLocales = 147 SupplementalDataInfo.getInstance().getDefaultContentLocales(); 148 private Set<String> skippedDefaultContentLocales = new TreeSet<>(); 149 150 private class AvailableLocales { 151 Set<String> modern = new TreeSet<>(); 152 Set<String> full = new TreeSet<>(); 153 } 154 155 private AvailableLocales avl = new AvailableLocales(); 156 private Gson gson = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create(); 157 private static final Options options = 158 new Options( 159 "Usage: LDML2JsonConverter [OPTIONS] [FILES]\n" 160 + "This program converts CLDR data to the JSON format.\n" 161 + "Please refer to the following options. \n" 162 + "\texample: org.unicode.cldr.json.Ldml2JsonConverter -c xxx -d yyy") 163 .add( 164 "bcp47", 165 'B', 166 "(true|false)", 167 "true", 168 "Whether to strictly use BCP47 tags in filenames and data. Defaults to true.") 169 .add( 170 "bcp47-no-subtags", 171 'T', 172 "(true|false)", 173 "true", 174 "In BCP47 mode, ignore locales with subtags such as en-US-u-va-posix. Defaults to true.") 175 .add( 176 "commondir", 177 'c', 178 ".*", 179 CLDRPaths.COMMON_DIRECTORY, 180 "Common directory for CLDR files, defaults to CldrUtility.COMMON_DIRECTORY") 181 .add( 182 "destdir", 183 'd', 184 ".*", 185 CLDRPaths.GEN_DIRECTORY, 186 "Destination directory for output files, defaults to CldrUtility.GEN_DIRECTORY") 187 .add( 188 "match", 189 'm', 190 ".*", 191 ".*", 192 "Regular expression to define only specific locales or files to be generated") 193 .add( 194 "type", 195 't', 196 "(" + RunType.valueList() + ")", 197 "all", 198 "Type of CLDR data being generated, such as main, supplemental, or segments. All gets all.") 199 .add( 200 "resolved", 201 'r', 202 "(true|false)", 203 "false", 204 "Whether the output JSON for the main directory should be based on resolved or unresolved data") 205 .add( 206 "Redundant", 207 'R', 208 "(true|false)", 209 "false", 210 "Include redundant data from code-fallback and constructed") 211 .add( 212 "draftstatus", 213 's', 214 "(approved|contributed|provisional|unconfirmed)", 215 "unconfirmed", 216 "The minimum draft status of the output data") 217 .add( 218 "coverage", 219 'l', 220 "(minimal|basic|moderate|modern|comprehensive|optional)", 221 "optional", 222 "The maximum coverage level of the output data") 223 .add( 224 "packagelist", 225 'P', 226 "(true|false)", 227 "true", 228 "Whether to output PACKAGES.md and cldr-core/cldr-packages.json (during supplemental/cldr-core)") 229 .add( 230 "fullnumbers", 231 'n', 232 "(true|false)", 233 "false", 234 "Whether the output JSON should output data for all numbering systems, even those not used in the locale") 235 .add( 236 "other", 237 'o', 238 "(true|false)", 239 "false", 240 "Whether to write out the 'other' section, which contains any unmatched paths") 241 .add( 242 "packages", 243 'p', 244 "(true|false)", 245 "false", 246 "Whether to group data files into installable packages") 247 .add( 248 "identity", 249 'i', 250 "(true|false)", 251 "true", 252 "Whether to copy the identity info into all sections containing data") 253 .add("konfig", 'k', ".*", null, "LDML to JSON configuration file") 254 .add( 255 "pkgversion", 256 'V', 257 ".*", 258 getDefaultVersion(), 259 "Version to be used in writing package files") 260 .add( 261 "Modern", 262 'M', 263 "(true|false)", 264 "true", 265 "Whether to include the -modern tier") 266 // Primarily useful for non-Maven build systems where CldrUtility.LICENSE may 267 // not be available as it is put in place by pom.xml 268 .add( 269 "license-file", 270 'L', 271 ".*", 272 "", 273 "Override the license file included in the bundle"); 274 main(String[] args)275 public static void main(String[] args) throws Exception { 276 System.out.println(GEAR_ICON + " " + Ldml2JsonConverter.class.getName() + " options:"); 277 options.parse(args, true); 278 279 Timer overallTimer = new Timer(); 280 overallTimer.start(); 281 final String rawType = options.get("type").getValue(); 282 283 if (RunType.all.name().equals(rawType)) { 284 // Running all types 285 for (final RunType t : RunType.values()) { 286 if (t == RunType.all) continue; 287 System.out.println(); 288 System.out.println( 289 TYPE_ICON + "####################### " + t + " #######################"); 290 Timer subTimer = new Timer(); 291 subTimer.start(); 292 processType(t.name()); 293 System.out.println( 294 TYPE_ICON + " " + t + "\tFinished in " + subTimer.toMeasureString()); 295 System.out.println(); 296 } 297 } else { 298 processType(rawType); 299 } 300 301 System.out.println( 302 "\n\n###\n\n" 303 + DONE_ICON 304 + " Finished everything in " 305 + overallTimer.toMeasureString()); 306 } 307 processType(final String runType)308 static void processType(final String runType) throws Exception { 309 Ldml2JsonConverter l2jc = 310 new Ldml2JsonConverter( 311 options.get("commondir").getValue(), 312 options.get("destdir").getValue(), 313 runType, 314 Boolean.parseBoolean(options.get("fullnumbers").getValue()), 315 Boolean.parseBoolean(options.get("resolved").getValue()), 316 options.get("coverage").getValue(), 317 options.get("match").getValue(), 318 Boolean.parseBoolean(options.get("packages").getValue()), 319 options.get("konfig").getValue(), 320 options.get("pkgversion").getValue(), 321 Boolean.parseBoolean(options.get("bcp47").getValue()), 322 Boolean.parseBoolean(options.get("bcp47-no-subtags").getValue()), 323 Boolean.parseBoolean(options.get("Modern").getValue()), 324 Boolean.parseBoolean(options.get("Redundant").getValue()), 325 Optional.ofNullable(options.get("license-file").getValue()) 326 .filter(s -> !s.isEmpty())); 327 328 DraftStatus status = DraftStatus.valueOf(options.get("draftstatus").getValue()); 329 l2jc.processDirectory(runType, status); 330 } 331 332 // The CLDR file directory where those official XML files will be found. 333 private String cldrCommonDir; 334 // Where the generated JSON files will be stored. 335 private String outputDir; 336 // Whether data in main should output all numbering systems, even those not in use in the 337 // locale. 338 private boolean fullNumbers; 339 // Whether data in main should be resolved for output. 340 private boolean resolve; 341 // Used to match specific locales for output 342 private String match; 343 // Used to filter based on coverage 344 private int coverageValue; 345 // Whether we should write output files into installable packages 346 private boolean writePackages; 347 // Type of run for this converter: main, supplemental, or segments 348 private final RunType type; 349 // include Redundant data such as apc="apc", en_US="en (US)" 350 private boolean includeRedundant; 351 352 static class JSONSection implements Comparable<JSONSection> { 353 public String section; 354 public Pattern pattern; 355 public String packageName; 356 357 @Override compareTo(JSONSection other)358 public int compareTo(JSONSection other) { 359 return section.compareTo(other.section); 360 } 361 } 362 363 private Map<String, String> dependencies; 364 private List<JSONSection> sections; 365 private Set<String> packages; 366 private final String pkgVersion; 367 private final boolean strictBcp47; 368 private final boolean writeModernPackage; 369 private final Optional<String> licenseFile; 370 private final boolean skipBcp47LocalesWithSubtags; 371 private LdmlConfigFileReader configFileReader; 372 Ldml2JsonConverter( String cldrDir, String outputDir, String runType, boolean fullNumbers, boolean resolve, String coverage, String match, boolean writePackages, String configFile, String pkgVersion, boolean strictBcp47, boolean skipBcp47LocalesWithSubtags, boolean writeModernPackage, boolean includeRedundant, Optional<String> licenseFile)373 public Ldml2JsonConverter( 374 String cldrDir, 375 String outputDir, 376 String runType, 377 boolean fullNumbers, 378 boolean resolve, 379 String coverage, 380 String match, 381 boolean writePackages, 382 String configFile, 383 String pkgVersion, 384 boolean strictBcp47, 385 boolean skipBcp47LocalesWithSubtags, 386 boolean writeModernPackage, 387 boolean includeRedundant, 388 Optional<String> licenseFile) { 389 this.writeModernPackage = writeModernPackage; 390 this.strictBcp47 = strictBcp47; 391 this.skipBcp47LocalesWithSubtags = strictBcp47 && skipBcp47LocalesWithSubtags; 392 this.cldrCommonDir = cldrDir; 393 this.outputDir = outputDir; 394 try { 395 this.type = RunType.valueOf(runType); 396 } catch (IllegalArgumentException | NullPointerException e) { 397 throw new RuntimeException( 398 "runType (-t) invalid: " + runType + " must be one of " + RunType.valueList(), 399 e); 400 } 401 this.fullNumbers = fullNumbers; 402 this.resolve = resolve; 403 this.match = match; 404 this.writePackages = writePackages; 405 this.coverageValue = Level.get(coverage).getLevel(); 406 this.pkgVersion = pkgVersion; 407 408 LdmlConvertRules.addVersionHandler(pkgVersion.split("\\.")[0]); 409 410 configFileReader = new LdmlConfigFileReader(); 411 configFileReader.read(configFile, type); 412 this.dependencies = configFileReader.getDependencies(); 413 this.sections = configFileReader.getSections(); 414 this.packages = new TreeSet<>(); 415 this.includeRedundant = includeRedundant; 416 this.licenseFile = licenseFile; 417 } 418 419 /** 420 * @see XPathParts#addInternal 421 */ 422 static final Pattern ANNOTATION_CP_REMAP = 423 PatternCache.get("^(.*)\\[@cp=\"(\\[|\\]|'|\"|@|/|=)\"\\](.*)$"); 424 425 /** 426 * Transform the path by applying PATH_TRANSFORMATIONS rules. 427 * 428 * @param pathStr The path string being transformed. 429 * @return The transformed path. 430 */ transformPath(final String pathStr, final String pathPrefix)431 private String transformPath(final String pathStr, final String pathPrefix) { 432 String result = pathStr; 433 434 // handle annotation cp value 435 Matcher cpm = ANNOTATION_CP_REMAP.matcher(result); 436 if (cpm.matches()) { 437 // We need to avoid breaking the syntax not just of JSON, but of XPATH. 438 final String badCodepointRange = cpm.group(2); 439 StringBuilder sb = new StringBuilder(cpm.group(1)).append("[@cp=\""); 440 // JSON would handle a wide range of things if escaped, but XPATH will not. 441 if (badCodepointRange.codePointCount(0, badCodepointRange.length()) != 1) { 442 // forbid more than one U+ (because we will have to unescape it.) 443 throw new IllegalArgumentException( 444 "Need exactly one codepoint in the @cp string, but got " 445 + badCodepointRange 446 + " in xpath " 447 + pathStr); 448 } 449 badCodepointRange 450 .codePoints() 451 .forEach(cp -> sb.append("U+").append(Integer.toHexString(cp).toUpperCase())); 452 sb.append("\"]").append(cpm.group(3)); 453 result = sb.toString(); 454 } 455 456 logger.finest(" IN pathStr : " + result); 457 result = LdmlConvertRules.PathTransformSpec.applyAll(result); 458 result = result.replaceFirst("/ldml/", pathPrefix); 459 result = result.replaceFirst("/supplementalData/", pathPrefix); 460 461 if (result.startsWith("//cldr/supplemental/references/reference")) { 462 // no change 463 } else if (strictBcp47) { 464 // Look for something like <!--@MATCH:set/validity/locale--> in DTD 465 if (result.contains("localeDisplayNames/languages/language")) { 466 if (result.contains("type=\"root\"")) { 467 // This is strictBcp47 468 // Drop translation for 'root' as it conflicts with 'und' 469 return ""; // 'drop this path' 470 } 471 result = fixXpathBcp47(result, "language", "type"); 472 } else if (result.contains("likelySubtags/likelySubtag")) { 473 if (!result.contains("\"iw\"") 474 && !result.contains("\"in\"") 475 && !result.contains("\"ji\"")) { 476 // Special case: preserve 'iw' and 'in' likely subtags 477 result = fixXpathBcp47(result, "likelySubtag", "from", "to"); 478 } else { 479 result = underscoreToHypen(result); 480 logger.warning("Including aliased likelySubtags: " + result); 481 } 482 } else if (result.startsWith("//cldr/supplemental/weekData/weekOfPreference")) { 483 result = fixXpathBcp47(result, "weekOfPreference", "locales"); 484 } else if (result.startsWith("//cldr/supplemental/metadata/defaultContent")) { 485 result = fixXpathBcp47(result, "defaultContent", "locales"); 486 } else if (result.startsWith("//cldr/supplemental/grammatical") 487 && result.contains("Data/grammaticalFeatures")) { 488 result = fixXpathBcp47(result, "grammaticalFeatures", "locales"); 489 } else if (result.startsWith("//cldr/supplemental/grammatical") 490 && result.contains("Data/grammaticalDerivations")) { 491 result = fixXpathBcp47(result, "grammaticalDerivations", "locales"); 492 } else if (result.startsWith("//cldr/supplemental/dayPeriodRuleSet")) { 493 result = fixXpathBcp47(result, "dayPeriodRules", "locales"); 494 } else if (result.startsWith("//cldr/supplemental/plurals")) { 495 result = fixXpathBcp47(result, "pluralRules", "locales"); 496 } else if (result.startsWith("//cldr/supplemental/timeData/hours")) { 497 result = fixXpathBcp47MishMash(result, "hours", "regions"); 498 } else if (result.startsWith("//cldr/supplemental/parentLocales/parentLocale")) { 499 result = fixXpathBcp47(result, "parentLocale", "parent", "locales"); 500 } else if (result.startsWith( 501 "//cldr/supplemental/territoryInfo/territory/languagePopulation")) { 502 result = fixXpathBcp47(result, "languagePopulation", "type"); 503 } else if (result.contains("languages") 504 || result.contains("languageAlias") 505 || result.contains("languageMatches") 506 || result.contains("likelySubtags") 507 || result.contains("parentLocale") 508 || result.contains("locales=")) { 509 final String oldResult = result; 510 result = underscoreToHypen(result); 511 if (!oldResult.equals(result)) { 512 logger.fine(oldResult + " => " + result); 513 } 514 } 515 } else if (result.contains("languages") 516 || result.contains("languageAlias") 517 || result.contains("languageMatches") 518 || result.contains("likelySubtags") 519 || result.contains("parentLocale") 520 || result.contains("locales=")) { 521 // old behavior: just munge paths.. 522 result = underscoreToHypen(result); 523 } 524 logger.finest("OUT pathStr : " + result); 525 logger.finest("result: " + result); 526 return result; 527 } 528 529 /** Read all paths in the file, and assign each to a JSONSection. Return the map. */ mapPathsToSections( AtomicInteger readCount, int totalCount, CLDRFile file, String pathPrefix, SupplementalDataInfo sdi)530 private Map<JSONSection, List<CldrItem>> mapPathsToSections( 531 AtomicInteger readCount, 532 int totalCount, 533 CLDRFile file, 534 String pathPrefix, 535 SupplementalDataInfo sdi) 536 throws IOException, ParseException { 537 final Map<JSONSection, List<CldrItem>> sectionItems = new TreeMap<>(); 538 539 String locID = file.getLocaleID(); 540 Matcher noNumberingSystemMatcher = LdmlConvertRules.NO_NUMBERING_SYSTEM_PATTERN.matcher(""); 541 Matcher numberingSystemMatcher = LdmlConvertRules.NUMBERING_SYSTEM_PATTERN.matcher(""); 542 Matcher rootIdentityMatcher = LdmlConvertRules.ROOT_IDENTITY_PATTERN.matcher(""); 543 Set<String> activeNumberingSystems = new TreeSet<>(); 544 activeNumberingSystems.add("latn"); // Always include latin script numbers 545 for (String np : LdmlConvertRules.ACTIVE_NUMBERING_SYSTEM_XPATHS) { 546 String ns = file.getWinningValue(np); 547 if (ns != null && ns.length() > 0) { 548 activeNumberingSystems.add(ns); 549 } 550 } 551 final DtdType fileDtdType = file.getDtdType(); 552 CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo(); 553 // read paths in DTD order. The order is critical for JSON processing. 554 final CLDRFile.Status status = new CLDRFile.Status(); 555 for (Iterator<String> it = 556 file.iterator("", DtdData.getInstance(fileDtdType).getDtdComparator(null)); 557 it.hasNext(); ) { 558 int cv = Level.UNDETERMINED.getLevel(); 559 final String path = it.next(); 560 561 // Check for code-fallback and constructed first, even before fullpath and value 562 final String localeWhereFound = file.getSourceLocaleID(path, status); 563 if (!includeRedundant 564 && (localeWhereFound.equals(XMLSource.CODE_FALLBACK_ID) 565 || // language[@type="apc"] = apc : missing 566 status.pathWhereFound.equals( 567 GlossonymConstructor 568 .PSEUDO_PATH))) { // language[@type="fa_AF"] = fa (AF) 569 // or Farsi (Afghanistan) : missing 570 // Don't include these paths. 571 continue; 572 } 573 574 // now get the fullpath and value 575 String fullPath = file.getFullXPath(path); 576 String value = file.getWinningValue(path); 577 578 if (fullPath == null) { 579 fullPath = path; 580 } 581 582 if (!CLDRFile.isSupplementalName(locID) 583 && path.startsWith("//ldml/") 584 && !path.contains("/identity")) { 585 cv = covInfo.getCoverageValue(path, locID); 586 } 587 if (cv > coverageValue) { 588 continue; 589 } 590 // Discard root identity element unless the locale is root 591 rootIdentityMatcher.reset(fullPath); 592 if (rootIdentityMatcher.matches() && !"root".equals(locID)) { 593 continue; 594 } 595 596 // automatically filter out number symbols and formats without a numbering system 597 noNumberingSystemMatcher.reset(fullPath); 598 if (noNumberingSystemMatcher.matches()) { 599 continue; 600 } 601 602 // Filter out non-active numbering systems data unless fullNumbers is specified. 603 numberingSystemMatcher.reset(fullPath); 604 if (numberingSystemMatcher.matches() && !fullNumbers) { 605 XPathParts xpp = XPathParts.getFrozenInstance(fullPath); 606 String currentNS = xpp.getAttributeValue(2, "numberSystem"); 607 if (currentNS != null && !activeNumberingSystems.contains(currentNS)) { 608 continue; 609 } 610 } 611 612 // Handle the no inheritance marker. 613 if (resolve && CldrUtility.NO_INHERITANCE_MARKER.equals(value)) { 614 continue; 615 } 616 617 // discard draft before transforming 618 final String pathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(path).replaceAll(""); 619 final String fullPathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(fullPath).replaceAll(""); 620 621 final String pathNoXmlSpace = 622 CLDRFile.XML_SPACE_PATTERN.matcher(pathNoDraft).replaceAll(""); 623 final String fullPathNoXmlSpace = 624 CLDRFile.XML_SPACE_PATTERN.matcher(fullPathNoDraft).replaceAll(""); 625 626 final String transformedPath = transformPath(pathNoXmlSpace, pathPrefix); 627 final String transformedFullPath = transformPath(fullPathNoXmlSpace, pathPrefix); 628 629 if (transformedPath.isEmpty()) { 630 continue; // skip this path 631 } 632 633 for (JSONSection js : 634 sections) { // TODO: move to subfunction, error if >1 section matches 635 if (js.pattern.matcher(transformedPath).matches()) { 636 CldrItem item = 637 new CldrItem( 638 transformedPath, transformedFullPath, path, fullPath, value); 639 640 List<CldrItem> cldrItems = sectionItems.get(js); 641 if (cldrItems == null) { 642 cldrItems = new ArrayList<>(); 643 } 644 cldrItems.add(item); 645 sectionItems.put(js, cldrItems); 646 break; 647 } 648 } 649 } 650 651 // TODO: move matcher out of inner loop 652 final Matcher versionInfoMatcher = VERSION_INFO_PATTERN.matcher(""); 653 // Automatically copy the version info to any sections that had real data in them. 654 JSONSection otherSection = sections.get(sections.size() - 1); 655 List<CldrItem> others = sectionItems.get(otherSection); 656 if (others == null) { 657 return sectionItems; 658 } 659 List<CldrItem> otherSectionItems = new ArrayList<>(others); 660 int addedItemCount = 0; 661 boolean copyIdentityInfo = Boolean.parseBoolean(options.get("identity").getValue()); 662 663 for (CldrItem item : otherSectionItems) { 664 String thisPath = item.getPath(); 665 versionInfoMatcher.reset(thisPath); 666 if (versionInfoMatcher.matches()) { 667 for (JSONSection js : sections) { 668 if (sectionItems.get(js) != null 669 && !js.section.equals("other") 670 && copyIdentityInfo) { 671 List<CldrItem> hit = sectionItems.get(js); 672 hit.add(addedItemCount, item); 673 sectionItems.put(js, hit); 674 } 675 if (js.section.equals("other")) { // did not match one of the regular sections 676 List<CldrItem> hit = sectionItems.get(js); 677 hit.remove(item); 678 sectionItems.put(js, hit); 679 } 680 } 681 addedItemCount++; 682 } 683 } 684 return sectionItems; 685 } 686 687 static final Pattern VERSION_INFO_PATTERN = PatternCache.get(".*/(identity|version).*"); 688 static final Pattern HAS_SUBTAG = PatternCache.get(".*-[a-z]-.*"); 689 690 /** 691 * Convert CLDR's XML data to JSON format. 692 * 693 * @param file CLDRFile object. 694 * @param outFilename The file name used to save JSON data. 695 * @throws IOException 696 * @throws ParseException 697 * @return total items written in all files. (if 0, file had no effect) 698 */ convertCldrItems( AtomicInteger readCount, int totalCount, String dirName, String filename, String pathPrefix, final Map<JSONSection, List<CldrItem>> sectionItems)699 private int convertCldrItems( 700 AtomicInteger readCount, 701 int totalCount, 702 String dirName, 703 String filename, 704 String pathPrefix, 705 final Map<JSONSection, List<CldrItem>> sectionItems) 706 throws IOException, ParseException { 707 // zone and timezone items are queued for sorting first before they are 708 // processed. 709 710 final String filenameAsLangTag = unicodeLocaleToString(filename); 711 712 if (skipBcp47LocalesWithSubtags 713 && type.locales() 714 && HAS_SUBTAG.matcher(filenameAsLangTag).matches()) { 715 // Has a subtag, so skip it. 716 // It will show up in the "no output" list. 717 return 0; 718 } 719 720 int totalItemsInFile = 0; 721 722 List<Pair<String, Integer>> outputProgress = new LinkedList<>(); 723 724 for (JSONSection js : sections) { 725 if (js.section.equals("IGNORE")) { 726 continue; 727 } 728 String outFilename; 729 if (type == RunType.rbnf) { 730 outFilename = filenameAsLangTag + ".json"; 731 } else if (type == RunType.bcp47) { 732 outFilename = filename + ".json"; 733 } else if (js.section.equals("other")) { 734 // If you see other-___.json, it means items that were missing from 735 // JSON_config_*.txt 736 outFilename = js.section + "-" + filename + ".json"; // Use original filename 737 } else { 738 outFilename = js.section + ".json"; 739 } 740 String tier = ""; 741 boolean writeOther = Boolean.parseBoolean(options.get("other").getValue()); 742 if (js.section.equals("other") && !writeOther) { 743 continue; 744 } else { 745 StringBuilder outputDirname = new StringBuilder(outputDir); 746 if (writePackages) { 747 if (type.tiered()) { 748 LocaleIDParser lp = new LocaleIDParser(); 749 lp.set(filename); 750 if (defaultContentLocales.contains(filename) 751 && lp.getRegion().length() > 0) { 752 if (type == RunType.main) { 753 skippedDefaultContentLocales.add(filenameAsLangTag); 754 } 755 continue; 756 } 757 final boolean isModernTier = localeIsModernTier(filename); 758 if (isModernTier && writeModernPackage) { 759 tier = MODERN_TIER_SUFFIX; 760 if (type == RunType.main) { 761 avl.modern.add(filenameAsLangTag); 762 } 763 } else { 764 tier = FULL_TIER_SUFFIX; 765 } 766 if (type == RunType.main) { 767 avl.full.add(filenameAsLangTag); 768 } 769 } else if (type == RunType.rbnf) { 770 js.packageName = "rbnf"; 771 tier = ""; 772 } else if (type == RunType.bcp47) { 773 js.packageName = "bcp47"; 774 tier = ""; 775 } 776 if (js.packageName != null) { 777 String packageName = CLDR_PKG_PREFIX + js.packageName + tier; 778 outputDirname.append("/" + packageName); 779 packages.add(packageName); 780 } 781 outputDirname.append("/" + dirName + "/"); 782 if (type.tiered()) { 783 outputDirname.append(filenameAsLangTag); 784 } 785 logger.fine("outDir: " + outputDirname); 786 logger.fine("pack: " + js.packageName); 787 logger.fine("dir: " + dirName); 788 } else { 789 outputDirname.append("/" + filename); 790 } 791 792 assert (tier.isEmpty() == !type.tiered()); 793 794 List<String> outputDirs = new ArrayList<>(); 795 outputDirs.add(outputDirname.toString()); 796 if (writePackages && tier.equals(MODERN_TIER_SUFFIX) && js.packageName != null) { 797 // if it is in 'modern', add it to 'full' and core also. 798 outputDirs.add( 799 outputDirname 800 .toString() 801 .replaceFirst(MODERN_TIER_SUFFIX, FULL_TIER_SUFFIX)); 802 // Also need to make sure that the full and core package is added 803 packages.add(CLDR_PKG_PREFIX + js.packageName + FULL_TIER_SUFFIX); 804 } 805 806 for (String outputDir : outputDirs) { 807 List<CldrItem> theItems = sectionItems.get(js); 808 if (theItems == null || theItems.size() == 0) { 809 logger.fine( 810 () -> 811 ">" 812 + progressPrefix(readCount, totalCount) 813 + outputDir 814 + " - no items to write in " 815 + js.section); // mostly noise 816 continue; 817 } 818 logger.fine( 819 () -> 820 ("?" 821 + progressPrefix( 822 readCount, totalCount, filename, js.section) 823 + " - " 824 + theItems.size() 825 + " item(s)" 826 + "\r")); 827 // Create the output dir if it doesn't exist 828 File dir = new File(outputDir.toString()); 829 if (!dir.exists()) { 830 dir.mkdirs(); 831 } 832 JsonObject out = new JsonObject(); // root object for writing 833 834 ArrayList<CldrItem> sortingItems = new ArrayList<>(); 835 ArrayList<CldrItem> arrayItems = new ArrayList<>(); 836 837 ArrayList<CldrNode> nodesForLastItem = new ArrayList<>(); 838 String lastLeadingArrayItemPath = null; 839 String leadingArrayItemPath = ""; 840 int valueCount = 0; 841 String previousIdentityPath = null; 842 for (CldrItem item : theItems) { 843 if (item.getPath().isEmpty()) { 844 throw new IllegalArgumentException( 845 "empty xpath in " 846 + filename 847 + " section " 848 + js.packageName 849 + "/" 850 + js.section); 851 } 852 if (type == RunType.rbnf) { 853 item.adjustRbnfPath(); 854 } 855 856 // items in the identity section of a file should only ever contain the 857 // lowest level, even if using 858 // resolving source, so if we have duplicates ( caused by attributes used as 859 // a value ) then suppress 860 // them here. 861 if (item.getPath().contains("/identity/")) { 862 String[] parts = item.getPath().split("\\["); 863 if (parts[0].equals(previousIdentityPath)) { 864 continue; 865 } else { 866 XPathParts xpp = XPathParts.getFrozenInstance(item.getPath()); 867 String territory = xpp.findAttributeValue("territory", "type"); 868 LocaleIDParser lp = new LocaleIDParser().set(filename); 869 if (territory != null 870 && territory.length() > 0 871 && !territory.equals(lp.getRegion())) { 872 continue; 873 } 874 previousIdentityPath = parts[0]; 875 } 876 } 877 878 // some items need to be split to multiple item before processing. None 879 // of those items need to be sorted. 880 // Applies to SPLITTABLE_ATTRS attributes. 881 CldrItem[] items = item.split(); 882 if (items == null) { 883 // Nothing to split. Make it a 1-element array. 884 items = new CldrItem[1]; 885 items[0] = item; 886 } 887 valueCount += items.length; 888 889 // Hard code this part. 890 if (item.getUntransformedPath().contains("unitPreference")) { 891 // Need to do more transforms on this one, so just output version/etc 892 // here. 893 continue; 894 } 895 896 for (CldrItem newItem : items) { 897 // alias will be dropped in conversion, don't count it. 898 if (newItem.isAliasItem()) { 899 valueCount--; 900 } 901 902 // Items like zone items need to be sorted first before write them out. 903 if (newItem.needsSort()) { 904 resolveArrayItems(out, nodesForLastItem, arrayItems); 905 sortingItems.add(newItem); 906 } else { 907 Matcher matcher = 908 LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher( 909 newItem.getPath()); 910 if (matcher.matches()) { 911 resolveSortingItems(out, nodesForLastItem, sortingItems); 912 leadingArrayItemPath = matcher.group(1); 913 if (lastLeadingArrayItemPath != null 914 && !lastLeadingArrayItemPath.equals( 915 leadingArrayItemPath)) { 916 resolveArrayItems(out, nodesForLastItem, arrayItems); 917 } 918 lastLeadingArrayItemPath = leadingArrayItemPath; 919 arrayItems.add(newItem); 920 } else { 921 // output a single item 922 resolveSortingItems(out, nodesForLastItem, sortingItems); 923 resolveArrayItems(out, nodesForLastItem, arrayItems); 924 outputCldrItem(out, nodesForLastItem, newItem); 925 lastLeadingArrayItemPath = ""; 926 } 927 } 928 } 929 } 930 931 resolveSortingItems(out, nodesForLastItem, sortingItems); 932 resolveArrayItems(out, nodesForLastItem, arrayItems); 933 if (js.section.contains("unitPreferenceData")) { 934 outputUnitPreferenceData(js, theItems, out, nodesForLastItem); 935 } 936 937 // closeNodes(out, nodesForLastItem.size() - 2, 0); 938 939 // write JSON 940 try (PrintWriter outf = FileUtilities.openUTF8Writer(outputDir, outFilename)) { 941 outf.println(gson.toJson(out)); 942 } 943 944 String outPath = 945 new File(outputDir.substring(this.outputDir.length()), outFilename) 946 .getPath(); 947 outputProgress.add( 948 Pair.of(String.format("%20s %s", js.section, outPath), valueCount)); 949 logger.fine( 950 ">" 951 + progressPrefix(readCount, totalCount, filename, js.section) 952 + String.format("…%s (%d values)", outPath, valueCount)); 953 954 totalItemsInFile += valueCount; 955 } 956 } 957 } // this is the only normal output with debug off 958 StringBuilder outStr = new StringBuilder(); 959 if (!outputProgress.isEmpty()) { 960 // Put these first, so the percent is at the end. 961 for (final Pair<String, Integer> outputItem : outputProgress) { 962 outStr.append( 963 String.format("\t%6d %s\n", outputItem.getSecond(), outputItem.getFirst())); 964 } 965 outStr.append( 966 String.format( 967 "%s%-12s\t %s\n", 968 progressPrefix(readCount, totalCount), 969 filename, 970 valueSectionsFormat(totalItemsInFile, outputProgress.size()))); 971 } else { 972 outStr.append( 973 String.format( 974 "%s%-12s\t" + NONE_ICON + " (no output)\n", 975 progressPrefix(readCount, totalCount), 976 filename)); 977 } 978 synchronized (readCount) { // to prevent interleaved output 979 System.out.print(outStr); 980 } 981 return totalItemsInFile; 982 } 983 valueSectionsFormat(int values, int sections)984 private static String valueSectionsFormat(int values, int sections) { 985 return MessageFormat.format( 986 "({0, plural, one {# value} other {# values}} in {1, plural, one {# section} other {# sections}})", 987 values, 988 sections); 989 } 990 localeIsModernTier(String filename)991 private boolean localeIsModernTier(String filename) { 992 Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename); 993 if (lev == null) return false; 994 return lev.isAtLeast(Level.MODERN); 995 } 996 localeIsBasicTier(String filename)997 private boolean localeIsBasicTier(String filename) { 998 Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename); 999 if (lev == null) return false; 1000 return lev.isAtLeast(Level.BASIC); 1001 } 1002 1003 /** 1004 * Entire xpaths and random short strings are passed through this function. Not really Locale ID 1005 * to Language Tag. 1006 * 1007 * @param filename 1008 * @return 1009 */ underscoreToHypen(String filename)1010 private String underscoreToHypen(String filename) { 1011 return filename.replaceAll("_", "-"); 1012 } 1013 1014 /** 1015 * Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or 1016 * data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia) 1017 * 1018 * @param locale 1019 * @return 1020 */ unicodeLocaleToString(String locale)1021 private final String unicodeLocaleToString(String locale) { 1022 if (strictBcp47) { 1023 return CLDRLocale.toLanguageTag(locale); 1024 } else { 1025 return underscoreToHypen(locale); 1026 } 1027 } 1028 1029 Pattern IS_REGION_CODE = PatternCache.get("([A-Z][A-Z])|([0-9][0-9][0-9])"); 1030 /** 1031 * Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or 1032 * data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia) 1033 * Differs from unicodeLocaleToString in that it will preserve all uppercase region ids 1034 * 1035 * @param locale 1036 * @return 1037 */ unicodeLocaleMishMashToString(String locale)1038 private final String unicodeLocaleMishMashToString(String locale) { 1039 if (strictBcp47) { 1040 if (IS_REGION_CODE.matcher(locale).matches()) { 1041 return locale; 1042 } else { 1043 return CLDRLocale.toLanguageTag(locale); 1044 } 1045 } else { 1046 return underscoreToHypen(locale); 1047 } 1048 } 1049 1050 /** 1051 * Fixup a path to be BCP47 compliant 1052 * 1053 * @param path XPath (usually ends in elementName, but not necessarily) 1054 * @param elementName element to fixup 1055 * @param attributeNames list of attributes to fix 1056 * @return new path 1057 */ fixXpathBcp47(final String path, String elementName, String... attributeNames)1058 final String fixXpathBcp47(final String path, String elementName, String... attributeNames) { 1059 final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed(); 1060 for (final String attributeName : attributeNames) { 1061 final String oldValue = xpp.findAttributeValue(elementName, attributeName); 1062 if (oldValue == null) continue; 1063 final String oldValues[] = oldValue.split(" "); 1064 String newValue = 1065 Arrays.stream(oldValues) 1066 .map((String s) -> unicodeLocaleToString(s)) 1067 .collect(Collectors.joining(" ")); 1068 if (!oldValue.equals(newValue)) { 1069 xpp.setAttribute(elementName, attributeName, newValue); 1070 logger.finest(attributeName + " = " + oldValue + " -> " + newValue); 1071 } 1072 } 1073 return xpp.toString(); 1074 } 1075 1076 /** 1077 * Fixup a path to be BCP47 compliant …but support a mishmash of regions and locale ids 1078 * CLDR-15069 1079 * 1080 * @param path XPath (usually ends in elementName, but not necessarily) 1081 * @param elementName element to fixup 1082 * @param attributeNames list of attributes to fix 1083 * @return new path 1084 */ fixXpathBcp47MishMash( final String path, String elementName, String... attributeNames)1085 final String fixXpathBcp47MishMash( 1086 final String path, String elementName, String... attributeNames) { 1087 final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed(); 1088 for (final String attributeName : attributeNames) { 1089 final String oldValue = xpp.findAttributeValue(elementName, attributeName); 1090 if (oldValue == null) continue; 1091 final String oldValues[] = oldValue.split(" "); 1092 String newValue = 1093 Arrays.stream(oldValues) 1094 .map((String s) -> unicodeLocaleMishMashToString(s)) 1095 .collect(Collectors.joining(" ")); 1096 if (!oldValue.equals(newValue)) { 1097 xpp.setAttribute(elementName, attributeName, newValue); 1098 logger.finest(attributeName + " = " + oldValue + " -> " + newValue); 1099 } 1100 } 1101 return xpp.toString(); 1102 } 1103 outputUnitPreferenceData( JSONSection js, List<CldrItem> theItems, JsonObject out, ArrayList<CldrNode> nodesForLastItem)1104 private void outputUnitPreferenceData( 1105 JSONSection js, 1106 List<CldrItem> theItems, 1107 JsonObject out, 1108 ArrayList<CldrNode> nodesForLastItem) 1109 throws ParseException, IOException { 1110 // handle these specially. 1111 // redo earlier loop somewhat. 1112 CldrNode supplementalNode = CldrNode.createNode("cldr", "supplemental", "supplemental"); 1113 JsonElement supplementalObject = startNonleafNode(out, supplementalNode); 1114 CldrNode unitPrefNode = CldrNode.createNode("supplemental", js.section, js.section); 1115 final JsonElement o = startNonleafNode(supplementalObject, unitPrefNode); 1116 1117 // We'll directly write to 'out' 1118 1119 // Unit preference sorting is a bit more complicated, so we're going to use the CldrItems, 1120 // but collect the results more directly. 1121 1122 Map<Pair<String, String>, Map<String, List<CldrItem>>> catUsagetoRegionItems = 1123 new TreeMap<>(); 1124 1125 for (CldrItem item : theItems) { 1126 if (!item.getUntransformedPath().contains("unitPref")) { 1127 continue; 1128 } 1129 CldrItem[] items = item.split(); 1130 if (items == null) { 1131 throw new IllegalArgumentException("expected unit pref to split: " + item); 1132 } 1133 for (final CldrItem subItem : items) { 1134 // step 1: make sure the category/usage is there 1135 final XPathParts xpp = XPathParts.getFrozenInstance(subItem.getPath()); 1136 final String category = xpp.findFirstAttributeValue("category"); 1137 final String usage = xpp.findFirstAttributeValue("usage"); 1138 final String region = 1139 xpp.findFirstAttributeValue("regions"); // actually one region (split) 1140 Pair<String, String> key = Pair.of(category, usage); 1141 Map<String, List<CldrItem>> regionMap = 1142 catUsagetoRegionItems.computeIfAbsent(key, ignored -> new TreeMap<>()); 1143 List<CldrItem> perRegion = 1144 regionMap.computeIfAbsent(region, ignored -> new ArrayList<>()); 1145 perRegion.add(subItem); 1146 } 1147 } 1148 1149 // OK, now start outputting 1150 // Traverse categories/usage/regions 1151 // unitPreferenceData is already open { 1152 catUsagetoRegionItems.keySet().stream() 1153 .map(p -> p.getFirst()) 1154 .distinct() // for each category 1155 .forEach( 1156 category -> { 1157 JsonObject oo = new JsonObject(); 1158 o.getAsJsonObject().add(category, oo); 1159 1160 catUsagetoRegionItems.entrySet().stream() 1161 .filter(p -> p.getKey().getFirst().equals(category)) 1162 .forEach( 1163 ent -> { 1164 final String usage = ent.getKey().getSecond(); 1165 JsonObject ooo = new JsonObject(); 1166 oo.getAsJsonObject().add(usage, ooo); 1167 1168 ent.getValue() 1169 .forEach( 1170 (region, list) -> { 1171 JsonArray array = 1172 new JsonArray(); 1173 ooo.getAsJsonObject() 1174 .add(region, array); 1175 list.forEach( 1176 item -> { 1177 final XPathParts 1178 xpp = 1179 XPathParts 1180 .getFrozenInstance( 1181 item 1182 .getPath()); 1183 JsonObject u = 1184 new JsonObject(); 1185 array.add(u); 1186 u.addProperty( 1187 "unit", 1188 item 1189 .getValue()); 1190 if (xpp 1191 .containsAttribute( 1192 "geq")) { 1193 u.addProperty( 1194 "geq", 1195 Double 1196 .parseDouble( 1197 xpp 1198 .findFirstAttributeValue( 1199 "geq"))); 1200 } 1201 }); 1202 }); 1203 }); 1204 }); 1205 1206 // Computer, switch to 'automatic' navigation 1207 // We'll let closeNodes take over. 1208 nodesForLastItem.add(unitPrefNode); // unitPreferenceData } 1209 } 1210 1211 /** 1212 * Creates the packaging files ( i.e. package.json ) for a particular package 1213 * 1214 * @param packageName The name of the installable package 1215 */ writePackagingFiles(String outputDir, String packageName)1216 public void writePackagingFiles(String outputDir, String packageName) throws IOException { 1217 File dir = new File(outputDir.toString()); 1218 if (!dir.exists()) { 1219 dir.mkdirs(); 1220 } 1221 writePackageJson(outputDir, packageName); 1222 writeBowerJson(outputDir, packageName); 1223 writeReadme(outputDir, packageName); 1224 } 1225 1226 /** Write the ## License section */ writeCopyrightSection(PrintWriter out)1227 public void writeCopyrightSection(PrintWriter out) { 1228 out.println( 1229 CldrUtility.getCopyrightMarkdown() 1230 + "\n" 1231 + "A copy of the license is included as [LICENSE](./LICENSE)."); 1232 } 1233 1234 /** 1235 * Write the readme fragment from cldr-json-readme.md plus the copyright 1236 * 1237 * @param outf 1238 * @throws IOException 1239 */ writeReadmeSection(PrintWriter outf)1240 private void writeReadmeSection(PrintWriter outf) throws IOException { 1241 FileCopier.copy(CldrUtility.getUTF8Data("cldr-json-readme.md"), outf); 1242 outf.println(); 1243 writeCopyrightSection(outf); 1244 } 1245 writeReadme(String outputDir, String packageName)1246 public void writeReadme(String outputDir, String packageName) throws IOException { 1247 final String basePackageName = getBasePackageName(packageName); 1248 try (PrintWriter outf = 1249 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "README.md"); ) { 1250 outf.println("# " + packageName); 1251 outf.println(); 1252 outf.println(configFileReader.getPackageDescriptions().get(basePackageName)); 1253 outf.println(); 1254 if (packageName.endsWith(FULL_TIER_SUFFIX)) { 1255 outf.println("This package contains all locales."); 1256 outf.println(); 1257 } else if (packageName.endsWith(MODERN_TIER_SUFFIX)) { 1258 outf.println( 1259 "**Deprecated** This package contains only the set of locales listed as modern coverage. Use `" 1260 + CLDR_PKG_PREFIX 1261 + basePackageName 1262 + FULL_TIER_SUFFIX 1263 + "` and locale coverage data instead. The -modern packages are scheduled to be removed in v46, see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465)."); 1264 outf.println(); 1265 } 1266 outf.println(); 1267 outf.println(getNpmBadge(packageName)); 1268 outf.println(); 1269 writeReadmeSection(outf); 1270 } 1271 try (PrintWriter outf = 1272 FileUtilities.openUTF8Writer( 1273 outputDir + "/" + packageName, CldrUtility.LICENSE); ) { 1274 if (licenseFile.isPresent()) { 1275 try (BufferedReader br = FileUtilities.openUTF8Reader("", licenseFile.get()); ) { 1276 FileCopier.copy(br, outf); 1277 } 1278 } else { 1279 FileCopier.copy(CldrUtility.getUTF8Data(CldrUtility.LICENSE), outf); 1280 } 1281 } 1282 } 1283 getBasePackageName(final String packageName)1284 String getBasePackageName(final String packageName) { 1285 String basePackageName = packageName; 1286 if (basePackageName.startsWith(CLDR_PKG_PREFIX)) { 1287 basePackageName = basePackageName.substring(CLDR_PKG_PREFIX.length()); 1288 } 1289 if (basePackageName.endsWith(FULL_TIER_SUFFIX)) { 1290 basePackageName = 1291 basePackageName.substring( 1292 0, basePackageName.length() - FULL_TIER_SUFFIX.length()); 1293 } else if (basePackageName.endsWith(MODERN_TIER_SUFFIX)) { 1294 basePackageName = 1295 basePackageName.substring( 1296 0, basePackageName.length() - MODERN_TIER_SUFFIX.length()); 1297 } 1298 return basePackageName; 1299 } 1300 writeBasicInfo(JsonObject obj, String packageName, boolean isNPM)1301 public void writeBasicInfo(JsonObject obj, String packageName, boolean isNPM) { 1302 obj.addProperty("name", packageName); 1303 obj.addProperty("version", pkgVersion); 1304 1305 String[] packageNameParts = packageName.split("-"); 1306 String dependency = dependencies.get(packageNameParts[1]); 1307 if (dependency != null) { 1308 String[] dependentPackageNames = new String[1]; 1309 String tier = packageNameParts[packageNameParts.length - 1]; 1310 if (dependency.equals("core") || dependency.equals("bcp47")) { 1311 dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency; 1312 } else { 1313 dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency + "-" + tier; 1314 } 1315 1316 JsonObject dependencies = new JsonObject(); 1317 for (String dependentPackageName : dependentPackageNames) { 1318 if (dependentPackageName != null) { 1319 dependencies.addProperty(dependentPackageName, pkgVersion); 1320 } 1321 } 1322 obj.add(isNPM ? "peerDependencies" : "dependencies", dependencies); 1323 } 1324 } 1325 1326 /** 1327 * Default for version string 1328 * 1329 * @return 1330 */ getDefaultVersion()1331 private static String getDefaultVersion() { 1332 String versionString = CLDRFile.GEN_VERSION; 1333 while (versionString.split("\\.").length < 3) { 1334 versionString = versionString + ".0"; 1335 } 1336 return versionString; 1337 } 1338 writePackageJson(String outputDir, String packageName)1339 public void writePackageJson(String outputDir, String packageName) throws IOException { 1340 PrintWriter outf = 1341 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "package.json"); 1342 logger.fine( 1343 PACKAGE_ICON 1344 + " Creating packaging file => " 1345 + outputDir 1346 + File.separator 1347 + packageName 1348 + File.separator 1349 + "package.json"); 1350 JsonObject obj = new JsonObject(); 1351 writeBasicInfo(obj, packageName, true); 1352 1353 JsonArray maintainers = new JsonArray(); 1354 JsonObject primaryMaintainer = new JsonObject(); 1355 JsonObject secondaryMaintainer = new JsonObject(); 1356 1357 final String basePackageName = getBasePackageName(packageName); 1358 String description = configFileReader.getPackageDescriptions().get(basePackageName); 1359 if (packageName.endsWith(MODERN_TIER_SUFFIX)) { 1360 description = description + " (modern only: deprecated)"; 1361 } 1362 obj.addProperty("description", description); 1363 1364 obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE); 1365 obj.addProperty("author", CLDRURLS.UNICODE_CONSORTIUM); 1366 1367 primaryMaintainer.addProperty("name", "Steven R. Loomis"); 1368 primaryMaintainer.addProperty("email", "[email protected]"); 1369 1370 maintainers.add(primaryMaintainer); 1371 1372 secondaryMaintainer.addProperty("name", "John Emmons"); 1373 secondaryMaintainer.addProperty("email", "[email protected]"); 1374 secondaryMaintainer.addProperty("url", "https://github.com/JCEmmons"); 1375 1376 maintainers.add(secondaryMaintainer); 1377 obj.add("maintainers", maintainers); 1378 1379 JsonObject repository = new JsonObject(); 1380 repository.addProperty("type", "git"); 1381 repository.addProperty("url", "git://github.com/unicode-cldr/cldr-json.git"); 1382 obj.add("repository", repository); 1383 1384 obj.addProperty("license", CLDRURLS.UNICODE_SPDX); 1385 obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL); 1386 1387 final SupplementalDataInfo sdi = CLDRConfig.getInstance().getSupplementalDataInfo(); 1388 obj.addProperty("cldrVersion", sdi.getCldrVersionString()); 1389 obj.addProperty("unicodeVersion", sdi.getUnicodeVersionString()); 1390 1391 outf.println(gson.toJson(obj)); 1392 outf.close(); 1393 } 1394 writeBowerJson(String outputDir, String packageName)1395 public void writeBowerJson(String outputDir, String packageName) throws IOException { 1396 PrintWriter outf = 1397 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "bower.json"); 1398 logger.fine( 1399 PACKAGE_ICON 1400 + " Creating packaging file => " 1401 + outputDir 1402 + File.separator 1403 + packageName 1404 + File.separator 1405 + "bower.json"); 1406 JsonObject obj = new JsonObject(); 1407 writeBasicInfo(obj, packageName, false); 1408 if (type == RunType.supplemental) { 1409 JsonArray mainPaths = new JsonArray(); 1410 mainPaths.add(new JsonPrimitive("availableLocales.json")); 1411 mainPaths.add(new JsonPrimitive("defaultContent.json")); // Handled specially 1412 mainPaths.add(new JsonPrimitive("scriptMetadata.json")); 1413 mainPaths.add(new JsonPrimitive(type.toString() + "/*.json")); 1414 obj.add("main", mainPaths); 1415 } else if (type == RunType.rbnf) { 1416 obj.addProperty("main", type.toString() + "/*.json"); 1417 } else { 1418 obj.addProperty("main", type.toString() + "/**/*.json"); 1419 } 1420 1421 JsonArray ignorePaths = new JsonArray(); 1422 ignorePaths.add(new JsonPrimitive(".gitattributes")); 1423 ignorePaths.add(new JsonPrimitive("README.md")); 1424 obj.add("ignore", ignorePaths); 1425 obj.addProperty("license", CLDRURLS.UNICODE_SPDX); 1426 1427 outf.println(gson.toJson(obj)); 1428 outf.close(); 1429 } 1430 writeDefaultContent(String outputDir)1431 public void writeDefaultContent(String outputDir) throws IOException { 1432 PrintWriter outf = 1433 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "defaultContent.json"); 1434 System.out.println( 1435 PACKAGE_ICON 1436 + " Creating packaging file => " 1437 + outputDir 1438 + "/cldr-core" 1439 + File.separator 1440 + "defaultContent.json"); 1441 JsonObject obj = new JsonObject(); 1442 obj.add("defaultContent", gson.toJsonTree(skippedDefaultContentLocales)); 1443 outf.println(gson.toJson(obj)); 1444 outf.close(); 1445 } 1446 writeCoverageLevels(String outputDir)1447 public void writeCoverageLevels(String outputDir) throws IOException { 1448 try (PrintWriter outf = 1449 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "coverageLevels.json"); ) { 1450 final Map<String, String> covlocs = new TreeMap<>(); 1451 System.out.println( 1452 PACKAGE_ICON 1453 + " Creating packaging file => " 1454 + outputDir 1455 + "/cldr-core" 1456 + File.separator 1457 + "coverageLevels.json from coverageLevels.txt"); 1458 CalculatedCoverageLevels ccl = CalculatedCoverageLevels.getInstance(); 1459 for (final Map.Entry<String, org.unicode.cldr.util.Level> e : 1460 ccl.getLevels().entrySet()) { 1461 final String uloc = e.getKey(); 1462 final String level = e.getValue().name().toLowerCase(); 1463 final String bcp47loc = unicodeLocaleToString(uloc); 1464 if (covlocs.put(bcp47loc, level) != null) { 1465 throw new IllegalArgumentException( 1466 "coverageLevels.txt: duplicate locale " + bcp47loc); 1467 } 1468 } 1469 final Map<String, String> effectiveCovlocs = new TreeMap<>(); 1470 avl.full.forEach( 1471 loc -> { 1472 final String uloc = ULocale.forLanguageTag(loc).toString(); 1473 final Level lev = ccl.getEffectiveCoverageLevel(uloc); 1474 if (lev != null) { 1475 effectiveCovlocs.put(loc, lev.name().toLowerCase()); 1476 } 1477 }); 1478 JsonObject obj = new JsonObject(); 1479 // exactly what is in CLDR .txt file 1480 obj.add("coverageLevels", gson.toJsonTree(covlocs)); 1481 1482 // resolved, including all available locales 1483 obj.add("effectiveCoverageLevels", gson.toJsonTree(effectiveCovlocs)); 1484 outf.println(gson.toJson(obj)); 1485 } 1486 } 1487 writeAvailableLocales(String outputDir)1488 public void writeAvailableLocales(String outputDir) throws IOException { 1489 PrintWriter outf = 1490 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "availableLocales.json"); 1491 System.out.println( 1492 PACKAGE_ICON 1493 + " Creating packaging file => " 1494 + outputDir 1495 + "/cldr-core" 1496 + File.separator 1497 + "availableLocales.json"); 1498 JsonObject obj = new JsonObject(); 1499 obj.add("availableLocales", gson.toJsonTree(avl)); 1500 outf.println(gson.toJson(obj)); 1501 outf.close(); 1502 } 1503 writeScriptMetadata(String outputDir)1504 public void writeScriptMetadata(String outputDir) throws IOException { 1505 PrintWriter outf = 1506 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "scriptMetadata.json"); 1507 System.out.println( 1508 "Creating script metadata file => " 1509 + outputDir 1510 + File.separator 1511 + "cldr-core" 1512 + File.separator 1513 + "scriptMetadata.json"); 1514 Map<String, Info> scriptInfo = new TreeMap<>(); 1515 for (String script : ScriptMetadata.getScripts()) { 1516 Info i = ScriptMetadata.getInfo(script); 1517 scriptInfo.put(script, i); 1518 } 1519 if (ScriptMetadata.errors.size() > 0) { 1520 System.err.println(Joiner.on("\n\t").join(ScriptMetadata.errors)); 1521 // throw new IllegalArgumentException(); 1522 } 1523 1524 JsonObject obj = new JsonObject(); 1525 obj.add("scriptMetadata", gson.toJsonTree(scriptInfo)); 1526 outf.println(gson.toJson(obj)); 1527 outf.close(); 1528 } 1529 writePackageList(String outputDir)1530 public void writePackageList(String outputDir) throws IOException { 1531 PrintWriter outf = 1532 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "cldr-packages.json"); 1533 System.out.println( 1534 PACKAGE_ICON 1535 + " Creating packaging metadata file => " 1536 + outputDir 1537 + File.separator 1538 + "cldr-core" 1539 + File.separator 1540 + "cldr-packages.json and PACKAGES.md"); 1541 PrintWriter pkgs = FileUtilities.openUTF8Writer(outputDir + "/..", "PACKAGES.md"); 1542 1543 pkgs.println("# CLDR JSON Packages"); 1544 pkgs.println(); 1545 1546 LdmlConfigFileReader uberReader = new LdmlConfigFileReader(); 1547 1548 for (RunType r : RunType.values()) { 1549 if (r == RunType.all) continue; 1550 uberReader.read(null, r); 1551 } 1552 1553 TreeMap<String, String> pkgsToDesc = new TreeMap<>(); 1554 1555 JsonObject obj = new JsonObject(); 1556 obj.addProperty("license", CLDRURLS.UNICODE_SPDX); 1557 obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL); 1558 obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE); 1559 obj.addProperty("version", pkgVersion); 1560 1561 JsonArray packages = new JsonArray(); 1562 for (Map.Entry<String, String> e : uberReader.getPackageDescriptions().entrySet()) { 1563 final String baseName = e.getKey(); 1564 1565 if (baseName.equals("IGNORE") || baseName.equals("cal")) continue; 1566 if (baseName.equals("core") || baseName.equals("rbnf") || baseName.equals("bcp47")) { 1567 JsonObject packageEntry = new JsonObject(); 1568 packageEntry.addProperty("description", e.getValue()); 1569 packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName); 1570 packages.add(packageEntry); 1571 pkgsToDesc.put( 1572 packageEntry.get("name").getAsString(), 1573 packageEntry.get("description").getAsString()); 1574 } else { 1575 { 1576 JsonObject packageEntry = new JsonObject(); 1577 packageEntry.addProperty("description", e.getValue()); 1578 packageEntry.addProperty("tier", "full"); 1579 packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName + FULL_TIER_SUFFIX); 1580 packages.add(packageEntry); 1581 pkgsToDesc.put( 1582 packageEntry.get("name").getAsString(), 1583 packageEntry.get("description").getAsString()); 1584 } 1585 { 1586 JsonObject packageEntry = new JsonObject(); 1587 packageEntry.addProperty("description", e.getValue() + " modern (deprecated)"); 1588 packageEntry.addProperty("tier", "modern"); 1589 packageEntry.addProperty( 1590 "name", CLDR_PKG_PREFIX + baseName + MODERN_TIER_SUFFIX); 1591 packages.add(packageEntry); 1592 pkgsToDesc.put( 1593 packageEntry.get("name").getAsString(), 1594 packageEntry.get("description").getAsString()); 1595 } 1596 } 1597 } 1598 pkgs.println(); 1599 for (Map.Entry<String, String> e : pkgsToDesc.entrySet()) { 1600 pkgs.println("### [" + e.getKey() + "](./cldr-json/" + e.getKey() + "/)"); 1601 pkgs.println(); 1602 if (e.getKey().contains("-modern")) { 1603 pkgs.println( 1604 " - **Note: Deprecated** see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465)."); 1605 } 1606 pkgs.println(" - " + e.getValue()); 1607 pkgs.println(" - " + getNpmBadge(e.getKey())); 1608 pkgs.println(); 1609 } 1610 obj.add("packages", packages); 1611 outf.println(gson.toJson(obj)); 1612 outf.close(); 1613 pkgs.println("## JSON Metadata"); 1614 pkgs.println(); 1615 pkgs.println( 1616 "Package metadata is available at [`cldr-core`/cldr-packages.json](./cldr-json/cldr-core/cldr-packages.json)"); 1617 pkgs.println(); 1618 1619 writeReadmeSection(pkgs); 1620 pkgs.close(); 1621 } 1622 getNpmBadge(final String packageName)1623 private String getNpmBadge(final String packageName) { 1624 return String.format( 1625 "[](https://www.npmjs.org/package/%s)", 1626 packageName, packageName); 1627 } 1628 1629 /** 1630 * Process the pending sorting items. 1631 * 1632 * @param out The ArrayList to hold all output lines. 1633 * @param nodesForLastItem All the nodes from last item. 1634 * @param sortingItems The item list that should be sorted before output. 1635 * @throws IOException 1636 * @throws ParseException 1637 */ resolveSortingItems( JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems)1638 private void resolveSortingItems( 1639 JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems) 1640 throws IOException, ParseException { 1641 ArrayList<CldrItem> arrayItems = new ArrayList<>(); 1642 String lastLeadingArrayItemPath = null; 1643 1644 if (!sortingItems.isEmpty()) { 1645 Collections.sort(sortingItems); 1646 for (CldrItem item : sortingItems) { 1647 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath()); 1648 if (matcher.matches()) { 1649 String leadingArrayItemPath = matcher.group(1); 1650 if (lastLeadingArrayItemPath != null 1651 && !lastLeadingArrayItemPath.equals(leadingArrayItemPath)) { 1652 resolveArrayItems(out, nodesForLastItem, arrayItems); 1653 } 1654 lastLeadingArrayItemPath = leadingArrayItemPath; 1655 arrayItems.add(item); 1656 } else { 1657 outputCldrItem(out, nodesForLastItem, item); 1658 } 1659 } 1660 sortingItems.clear(); 1661 resolveArrayItems(out, nodesForLastItem, arrayItems); 1662 } 1663 } 1664 1665 /** 1666 * Process the pending array items. 1667 * 1668 * @param out The ArrayList to hold all output lines. 1669 * @param nodesForLastItem All the nodes from last item. 1670 * @param arrayItems The item list that should be output as array. 1671 * @throws IOException 1672 * @throws ParseException 1673 */ resolveArrayItems( JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems)1674 private void resolveArrayItems( 1675 JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems) 1676 throws IOException, ParseException { 1677 if (!arrayItems.isEmpty()) { 1678 CldrItem firstItem = arrayItems.get(0); 1679 if (firstItem.needsSort()) { 1680 Collections.sort(arrayItems); 1681 firstItem = arrayItems.get(0); 1682 } 1683 1684 int arrayLevel = getArrayIndentLevel(firstItem); // only used for trim 1685 1686 JsonArray array = outputStartArray(out, nodesForLastItem, firstItem, arrayLevel); 1687 1688 // Previous statement closed for first element, trim nodesForLastItem 1689 // so that it will not happen again inside. 1690 int len = nodesForLastItem.size(); 1691 while (len > arrayLevel) { 1692 nodesForLastItem.remove(len - 1); 1693 len--; 1694 } 1695 for (CldrItem insideItem : arrayItems) { 1696 outputArrayItem(array, insideItem, nodesForLastItem, arrayLevel); 1697 } 1698 arrayItems.clear(); 1699 1700 int lastLevel = nodesForLastItem.size() - 1; 1701 // closeNodes(out, lastLevel, arrayLevel); 1702 // out.endArray(); 1703 for (int i = arrayLevel - 1; i < lastLevel; i++) { 1704 nodesForLastItem.remove(i); 1705 } 1706 } 1707 } 1708 1709 /** 1710 * Find the indent level on which array should be inserted. 1711 * 1712 * @param item The CldrItem being examined. 1713 * @return The array indent level. 1714 * @throws ParseException 1715 */ getArrayIndentLevel(CldrItem item)1716 private int getArrayIndentLevel(CldrItem item) throws ParseException { 1717 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath()); 1718 if (!matcher.matches()) { 1719 System.out.println("No match found for " + item.getPath() + ", this shouldn't happen."); 1720 return 0; 1721 } 1722 1723 String leadingPath = matcher.group(1); 1724 CldrItem fakeItem = new CldrItem(leadingPath, leadingPath, leadingPath, leadingPath, ""); 1725 return fakeItem.getNodesInPath().size() - 1; 1726 } 1727 1728 /** 1729 * Write the start of an array. 1730 * 1731 * @param out The root object 1732 * @param nodesForLastItem Nodes in path for last CldrItem. 1733 * @param item The CldrItem to be processed. 1734 * @param arrayLevel The level on which array is laid out. 1735 * @throws IOException 1736 * @throws ParseException 1737 */ outputStartArray( JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)1738 private JsonArray outputStartArray( 1739 JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel) 1740 throws IOException, ParseException { 1741 1742 ArrayList<CldrNode> nodesInPath = item.getNodesInPath(); 1743 1744 JsonElement o = out; 1745 1746 // final CldrNode last = nodesInPath.get(nodesInPath.size()-1); 1747 1748 // Output nodes up to parent of 'arrayLevel' 1749 for (int i = 1; i < arrayLevel - 1; i++) { 1750 final CldrNode node = nodesInPath.get(i); 1751 o = startNonleafNode(o, node); 1752 } 1753 1754 // at arrayLevel, we have a named Array. 1755 // Get the name of the parent of the array 1756 String objName = nodesInPath.get(arrayLevel - 1).getNodeKeyName(); 1757 JsonArray array = new JsonArray(); 1758 o.getAsJsonObject().add(objName, array); 1759 1760 return array; 1761 } 1762 1763 /** 1764 * Write a CLDR item to file. 1765 * 1766 * <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be 1767 * dropped. 1768 * 1769 * @param out The ArrayList to hold all output lines. 1770 * @param nodesForLastItem 1771 * @param item The CldrItem to be processed. 1772 * @throws IOException 1773 * @throws ParseException 1774 */ outputCldrItem(JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item)1775 private void outputCldrItem(JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item) 1776 throws IOException, ParseException { 1777 // alias has been resolved, no need to keep it. 1778 if (item.isAliasItem()) { 1779 return; 1780 } 1781 1782 ArrayList<CldrNode> nodesInPath = item.getNodesInPath(); 1783 int arraySize = nodesInPath.size(); 1784 1785 int i = 0; 1786 if (i == nodesInPath.size() && type != RunType.rbnf) { 1787 System.err.println( 1788 "This nodes and last nodes has identical path. (" 1789 + item.getPath() 1790 + ") Some distinguishing attributes wrongly removed?"); 1791 return; 1792 } 1793 1794 // close previous nodes 1795 // closeNodes(out, nodesForLastItem.size() - 2, i); 1796 JsonElement o = out; 1797 for (; i < nodesInPath.size() - 1; ++i) { 1798 o = startNonleafNode(o, nodesInPath.get(i)); 1799 } 1800 1801 writeLeafNode(o, nodesInPath.get(i), item.getValue()); 1802 nodesForLastItem.clear(); 1803 nodesForLastItem.addAll(nodesInPath); 1804 } 1805 1806 /** 1807 * Start a non-leaf node, adding it if not there. 1808 * 1809 * @param out The input JsonObject 1810 * @param node The node being written. 1811 * @throws IOException 1812 */ startNonleafNode(JsonElement out, final CldrNode node)1813 private JsonElement startNonleafNode(JsonElement out, final CldrNode node) throws IOException { 1814 String objName = node.getNodeKeyName(); 1815 // Some node should be skipped as indicated by objName being null. 1816 logger.finest(() -> "objName= " + objName + " for path " + node.getUntransformedPath()); 1817 if (objName == null 1818 || objName.equals("cldr") 1819 || objName.equals("ldmlBCP47")) { // Skip root 'cldr' node 1820 return out; 1821 } 1822 1823 Map<String, String> attrAsValueMap = node.getAttrAsValueMap(); 1824 1825 String name; 1826 1827 if (type == RunType.annotations || type == RunType.annotationsDerived) { 1828 if (objName.startsWith("U+")) { 1829 // parse U+22 -> " etc 1830 name = (com.ibm.icu.text.UTF16.valueOf(Integer.parseInt(objName.substring(2), 16))); 1831 } else { 1832 name = (objName); 1833 } 1834 } else { 1835 name = (objName); 1836 } 1837 1838 JsonElement o = out.getAsJsonObject().get(name); 1839 1840 if (o == null) { 1841 o = new JsonObject(); 1842 out.getAsJsonObject().add(name, o); 1843 } 1844 1845 for (final String key : attrAsValueMap.keySet()) { 1846 logger.finest(() -> "Non-Leaf Node: " + node.getUntransformedPath() + " ." + key); 1847 String rawAttrValue = attrAsValueMap.get(key); 1848 String value = escapeValue(rawAttrValue); 1849 // attribute is prefixed with "_" when being used as key. 1850 String attrAsKey = "_" + key; 1851 if (LdmlConvertRules.attrIsBooleanOmitFalse( 1852 node.getUntransformedPath(), node.getName(), node.getParent(), key)) { 1853 final Boolean v = Boolean.parseBoolean(rawAttrValue); 1854 if (v) { 1855 o.getAsJsonObject().addProperty(attrAsKey, v); 1856 } // else, omit 1857 } else { 1858 // hack for localeRules 1859 if (attrAsKey.equals("_localeRules")) { 1860 // find the _localeRules object, add if it didn't exist 1861 JsonElement localeRules = out.getAsJsonObject().get(attrAsKey); 1862 if (localeRules == null) { 1863 localeRules = new JsonObject(); 1864 out.getAsJsonObject().add(attrAsKey, localeRules); 1865 } 1866 // find the sibling object, add if it did't exist ( this will be parentLocale or 1867 // collations etc.) 1868 JsonElement sibling = localeRules.getAsJsonObject().get(name); 1869 if (sibling == null) { 1870 sibling = new JsonObject(); 1871 localeRules.getAsJsonObject().add(name, sibling); 1872 } 1873 // get the 'parent' attribute, which wil be the value 1874 final String parent = 1875 XPathParts.getFrozenInstance(node.getUntransformedPath()) 1876 .getAttributeValue(-1, "parent"); 1877 // finally, we add something like "nonLikelyScript: und" 1878 sibling.getAsJsonObject().addProperty(value, parent); 1879 } else { 1880 o.getAsJsonObject().addProperty(attrAsKey, value); 1881 } 1882 } 1883 } 1884 return o; 1885 } 1886 1887 /** 1888 * Write a CLDR item to file. 1889 * 1890 * <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be 1891 * dropped. 1892 * 1893 * @param out The ArrayList to hold all output lines. 1894 * @param item The CldrItem to be processed. 1895 * @param nodesForLastItem Nodes in path for last item. 1896 * @param arrayLevel The indentation level in which array exists. 1897 * @throws IOException 1898 * @throws ParseException 1899 */ outputArrayItem( JsonArray out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel)1900 private void outputArrayItem( 1901 JsonArray out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel) 1902 throws IOException, ParseException { 1903 1904 // This method is more complicated that outputCldrItem because it needs to 1905 // handle 3 different cases. 1906 // 1. When difference is found below array item, this item will be of the 1907 // same array item. Inside the array item, it is about the same as 1908 // outputCldrItem, just with one more level of indentation because of 1909 // the array. 1910 // 2. The array item is the leaf item with no attribute, simplify it as 1911 // an object with one name/value pair. 1912 // 3. The array item is the leaf item with attribute, an embedded object 1913 // will be created inside the array item object. 1914 1915 ArrayList<CldrNode> nodesInPath = item.getNodesInPath(); 1916 String value = escapeValue(item.getValue()); 1917 int nodesNum = nodesInPath.size(); 1918 1919 // case 1 1920 // int diff = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath); 1921 CldrNode cldrNode = nodesInPath.get(nodesNum - 1); 1922 1923 // if (diff > arrayLevel) { 1924 // // close previous nodes 1925 // closeNodes(out, nodesForLastItem.size() - 1, diff + 1); 1926 1927 // for (int i = diff; i < nodesNum - 1; i++) { 1928 // startNonleafNode(out, nodesInPath.get(i), i + 1); 1929 // } 1930 // writeLeafNode(out, cldrNode, value, nodesNum); 1931 // return; 1932 // } 1933 1934 if (arrayLevel == nodesNum - 1) { 1935 // case 2 1936 // close previous nodes 1937 // if (nodesForLastItem.size() - 1 - arrayLevel > 0) { 1938 // closeNodes(out, nodesForLastItem.size() - 1, arrayLevel); 1939 // } 1940 1941 String objName = cldrNode.getNodeKeyName(); 1942 int pos = objName.indexOf('-'); 1943 if (pos > 0) { 1944 objName = objName.substring(0, pos); 1945 } 1946 1947 Map<String, String> attrAsValueMap = cldrNode.getAttrAsValueMap(); 1948 1949 if (attrAsValueMap.isEmpty()) { 1950 JsonObject o = new JsonObject(); 1951 out.add(o); 1952 o.addProperty(objName, value); 1953 } else if (objName.equals("rbnfrule")) { 1954 writeRbnfLeafNode(out, item, attrAsValueMap); 1955 } else { 1956 JsonObject o = new JsonObject(); 1957 writeLeafNode( 1958 o, 1959 objName, 1960 attrAsValueMap, 1961 value, 1962 cldrNode.getName(), 1963 cldrNode.getParent(), 1964 cldrNode); 1965 out.add(o); 1966 } 1967 // the last node is closed, remove it. 1968 nodesInPath.remove(nodesNum - 1); 1969 } else { 1970 // case 3 1971 // close previous nodes 1972 // if (nodesForLastItem.size() - 1 - (arrayLevel) > 0) { 1973 // closeNodes(out, nodesForLastItem.size() - 1, arrayLevel); 1974 // } 1975 1976 JsonObject o = new JsonObject(); 1977 out.add(o); 1978 1979 CldrNode node = nodesInPath.get(arrayLevel); 1980 String objName = node.getNodeKeyName(); 1981 int pos = objName.indexOf('-'); 1982 if (pos > 0) { 1983 objName = objName.substring(0, pos); 1984 } 1985 Map<String, String> attrAsValueMap = node.getAttrAsValueMap(); 1986 JsonObject oo = new JsonObject(); 1987 o.add(objName, oo); 1988 for (String key : attrAsValueMap.keySet()) { 1989 // attribute is prefixed with "_" when being used as key. 1990 oo.addProperty("_" + key, escapeValue(attrAsValueMap.get(key))); 1991 } 1992 1993 JsonElement o2 = out; 1994 System.err.println("PROBLEM at " + cldrNode.getUntransformedPath()); 1995 // TODO ?!! 1996 for (int i = arrayLevel + 1; i < nodesInPath.size() - 1; i++) { 1997 o2 = startNonleafNode(o2, nodesInPath.get(i)); 1998 } 1999 writeLeafNode(o2, cldrNode, value); 2000 } 2001 2002 nodesForLastItem.clear(); 2003 nodesForLastItem.addAll(nodesInPath); 2004 } 2005 writeRbnfLeafNode( JsonElement out, CldrItem item, Map<String, String> attrAsValueMap)2006 private void writeRbnfLeafNode( 2007 JsonElement out, CldrItem item, Map<String, String> attrAsValueMap) throws IOException { 2008 if (attrAsValueMap.size() != 1) { 2009 throw new IllegalArgumentException( 2010 "Error, attributes seem wrong for RBNF " + item.getUntransformedPath()); 2011 } 2012 Entry<String, String> entry = attrAsValueMap.entrySet().iterator().next(); 2013 JsonArray arr = new JsonArray(); 2014 arr.add(entry.getKey()); 2015 arr.add(entry.getValue()); 2016 out.getAsJsonArray().add(arr); 2017 } 2018 progressPrefix( AtomicInteger readCount, int totalCount, String filename, String section)2019 private String progressPrefix( 2020 AtomicInteger readCount, int totalCount, String filename, String section) { 2021 return progressPrefix(readCount.get(), totalCount, filename, section); 2022 } 2023 progressPrefix(int readCount, int totalCount, String filename, String section)2024 private String progressPrefix(int readCount, int totalCount, String filename, String section) { 2025 return progressPrefix(readCount, totalCount) + filename + "\t" + section + "\t"; 2026 } 2027 progressPrefix(AtomicInteger readCount, int totalCount)2028 private final String progressPrefix(AtomicInteger readCount, int totalCount) { 2029 return progressPrefix(readCount.get(), totalCount); 2030 } 2031 2032 final LocalizedNumberFormatter percentFormatter = 2033 NumberFormatter.withLocale(Locale.ENGLISH) 2034 .unit(NoUnit.PERCENT) 2035 .integerWidth(IntegerWidth.zeroFillTo(3)) 2036 .precision(Precision.integer()); 2037 progressPrefix(int readCount, int totalCount)2038 private final String progressPrefix(int readCount, int totalCount) { 2039 double asPercent = ((double) readCount / (double) totalCount) * 100.0; 2040 return String.format( 2041 SECTION_ICON + " %s (step %d/%d)\t[%s]:\t", 2042 type, 2043 type.ordinal(), 2044 RunType.values().length 2045 - 1, // which 'type' are we on? (all=0, minus one to get the count right) 2046 percentFormatter.format(asPercent)); 2047 } 2048 2049 /** 2050 * Process files in a directory of CLDR file tree. 2051 * 2052 * @param dirName The directory in which xml file will be transformed. 2053 * @param minimalDraftStatus The minimumDraftStatus that will be accepted. 2054 * @throws IOException 2055 * @throws ParseException 2056 */ processDirectory(String dirName, DraftStatus minimalDraftStatus)2057 public void processDirectory(String dirName, DraftStatus minimalDraftStatus) 2058 throws IOException, ParseException { 2059 SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(cldrCommonDir + "supplemental"); 2060 Factory cldrFactory = Factory.make(cldrCommonDir + dirName + "/", ".*"); 2061 Set<String> files = 2062 cldrFactory 2063 .getAvailable() 2064 // filter these out early so our work count is correct 2065 .stream() 2066 .filter( 2067 filename -> 2068 filename.matches(match) 2069 && !LdmlConvertRules.IGNORE_FILE_SET.contains( 2070 filename)) 2071 .collect(Collectors.toSet()); 2072 final int total = files.size(); 2073 AtomicInteger readCount = new AtomicInteger(0); 2074 Map<String, Throwable> errs = new TreeMap<>(); 2075 2076 // This takes a long time (minutes, in 2020), so run it in parallel forkJoinPool threads. 2077 // The result of this pipeline is an array of toString()-able filenames of XML files which 2078 // produced no JSON output, just as a warning. 2079 System.out.println( 2080 progressPrefix(0, total) 2081 + " " 2082 + MessageFormat.format( 2083 GEAR_ICON 2084 + " Beginning parallel process of {0, plural, one {# file} other {# files}}", 2085 total)); 2086 Object noOutputFiles[] = 2087 files.parallelStream() 2088 .unordered() 2089 .map( 2090 filename -> { 2091 String pathPrefix; 2092 CLDRFile file = 2093 cldrFactory.make( 2094 filename, 2095 resolve && type == RunType.main, 2096 minimalDraftStatus); 2097 // Print 'reading' after the make, to stagger the output a 2098 // little bit. 2099 // Otherwise, the printout happens before any work happens, and 2100 // is easily out of order. 2101 readCount.incrementAndGet(); 2102 logger.fine( 2103 () -> 2104 "<" 2105 + progressPrefix( 2106 readCount, total, dirName, 2107 filename) 2108 + "\r"); 2109 2110 if (type == RunType.main) { 2111 pathPrefix = 2112 "/cldr/" 2113 + dirName 2114 + "/" 2115 + unicodeLocaleToString(filename) 2116 + "/"; 2117 } else { 2118 pathPrefix = "/cldr/" + dirName + "/"; 2119 } 2120 int totalForThisFile = 0; 2121 try { 2122 totalForThisFile = 2123 convertCldrItems( 2124 readCount, 2125 total, 2126 dirName, 2127 filename, 2128 pathPrefix, 2129 mapPathsToSections( 2130 readCount, 2131 total, 2132 file, 2133 pathPrefix, 2134 sdi)); 2135 } catch (IOException | ParseException t) { 2136 t.printStackTrace(); 2137 System.err.println( 2138 "!" 2139 + progressPrefix(readCount, total) 2140 + filename 2141 + " - err - " 2142 + t); 2143 errs.put(filename, t); 2144 } finally { 2145 logger.fine( 2146 () -> 2147 "." 2148 + progressPrefix(readCount, total) 2149 + "Completing " 2150 + dirName 2151 + "/" 2152 + filename); 2153 } 2154 return new Pair<>(dirName + "/" + filename, totalForThisFile); 2155 }) 2156 .filter(p -> p.getSecond() == 0) // filter out only files which produced no 2157 // output 2158 .map(p -> p.getFirst()) 2159 .toArray(); 2160 System.out.println( 2161 progressPrefix(total, total) 2162 + " " 2163 + DONE_ICON 2164 + MessageFormat.format( 2165 "Completed parallel process of {0, plural, one {# file} other {# files}}", 2166 total)); 2167 if (noOutputFiles.length > 0) { 2168 System.err.println( 2169 WARN_ICON 2170 + MessageFormat.format( 2171 " Warning: {0, plural, one {# file} other {# files}} did not produce any output (check JSON config):", 2172 noOutputFiles.length)); 2173 for (final Object f : noOutputFiles) { 2174 final String loc = f.toString(); 2175 final String uloc = unicodeLocaleToString(f.toString()); 2176 if (skipBcp47LocalesWithSubtags 2177 && type.locales() 2178 && HAS_SUBTAG.matcher(uloc).matches()) { 2179 System.err.println( 2180 "\t- " + loc + " ❎ (Skipped due to '-T true': " + uloc + ")"); 2181 } else { 2182 System.err.println("\t- " + loc); 2183 } 2184 } 2185 } 2186 2187 if (!errs.isEmpty()) { 2188 System.err.println("Errors in these files:"); 2189 for (Map.Entry<String, Throwable> e : errs.entrySet()) { 2190 System.err.println(e.getKey() + " - " + e.getValue()); 2191 } 2192 // rethrow 2193 for (Map.Entry<String, Throwable> e : errs.entrySet()) { 2194 if (e.getValue() instanceof IOException) { 2195 throw (IOException) e.getValue(); // throw the first one 2196 } else if (e.getValue() instanceof ParseException) { 2197 throw (ParseException) e.getValue(); // throw the first one 2198 } else { 2199 throw new RuntimeException("Other exception thrown: " + e.getValue()); 2200 } 2201 /* NOTREACHED */ 2202 } 2203 } 2204 2205 if (writePackages) { 2206 for (String currentPackage : packages) { 2207 writePackagingFiles(outputDir, currentPackage); 2208 } 2209 if (type == RunType.main) { 2210 writeDefaultContent(outputDir); 2211 writeAvailableLocales(outputDir); 2212 writeCoverageLevels(outputDir); 2213 } else if (type == RunType.supplemental) { 2214 writeScriptMetadata(outputDir); 2215 if (Boolean.parseBoolean(options.get("packagelist").getValue())) { 2216 writePackageList(outputDir); 2217 } 2218 } 2219 } 2220 } 2221 2222 /** Replacement pattern for escaping. */ 2223 private static final Pattern escapePattern = PatternCache.get("\\\\(?!u)"); 2224 2225 /** 2226 * Escape \ in value string. \ should be replaced by \\, except in case of \u1234 In following 2227 * code, \\\\ represent one \, because java compiler and regular expression compiler each do one 2228 * round of escape. 2229 * 2230 * @param value Input string. 2231 * @return escaped string. 2232 */ escapeValue(String value)2233 private String escapeValue(String value) { 2234 Matcher match = escapePattern.matcher(value); 2235 String ret = match.replaceAll("\\\\"); 2236 return ret.replace("\n", " ").replace("\t", " "); 2237 } 2238 2239 /** 2240 * Write the value to output. 2241 * 2242 * @param out The ArrayList to hold all output lines. 2243 * @param node The CldrNode being written. 2244 * @param value The value part for this element. 2245 * @param level Indent level. 2246 * @throws IOException 2247 */ writeLeafNode(JsonElement out, CldrNode node, String value)2248 private void writeLeafNode(JsonElement out, CldrNode node, String value) throws IOException { 2249 2250 String objName = node.getNodeKeyName(); 2251 Map<String, String> attrAsValueMaps = node.getAttrAsValueMap(); 2252 writeLeafNode(out, objName, attrAsValueMaps, value, node.getName(), node.getParent(), node); 2253 } 2254 2255 /** 2256 * Write the value to output. 2257 * 2258 * @param out The ArrayList to hold all output lines. 2259 * @param objName The node's node. 2260 * @param attrAsValueMap Those attributes that will be treated as values. 2261 * @param value The value part for this element. 2262 * @param level Indent level. 2263 * @param nodeName the original nodeName (not distinguished) 2264 * @throws IOException 2265 */ writeLeafNode( JsonElement out, String objName, Map<String, String> attrAsValueMap, String value, final String nodeName, String parent, CldrNode node)2266 private void writeLeafNode( 2267 JsonElement out, 2268 String objName, 2269 Map<String, String> attrAsValueMap, 2270 String value, 2271 final String nodeName, 2272 String parent, 2273 CldrNode node) 2274 throws IOException { 2275 if (objName == null) { 2276 return; 2277 } 2278 value = escapeValue(value); 2279 2280 final boolean valueIsSpacesepArray = 2281 LdmlConvertRules.valueIsSpacesepArray(nodeName, parent); 2282 if (attrAsValueMap.isEmpty()) { 2283 // out.name(objName); 2284 if (value.isEmpty()) { 2285 if (valueIsSpacesepArray) { 2286 // empty value, output as empty space-sep array: [] 2287 out.getAsJsonObject().add(objName, new JsonArray()); 2288 } else { 2289 // empty value. 2290 if (objName.endsWith("SpaceReplacement")) { // foreignSpaceReplacement or 2291 // nativeSpaceReplacement 2292 out.getAsJsonObject().addProperty(objName, ""); 2293 } else { 2294 out.getAsJsonObject().add(objName, new JsonObject()); 2295 } 2296 } 2297 } else if (type == RunType.annotations || type == RunType.annotationsDerived) { 2298 JsonArray a = new JsonArray(); 2299 // split this, so "a | b | c" becomes ["a","b","c"] 2300 for (final String s : Annotations.splitter.split(value.trim())) { 2301 a.add(s); 2302 } 2303 out.getAsJsonObject().add(objName, a); 2304 } else if (valueIsSpacesepArray) { 2305 outputSpaceSepArray(out, objName, value); 2306 } else { 2307 // normal value 2308 out.getAsJsonObject().addProperty(objName, value); 2309 } 2310 return; 2311 } 2312 2313 // If there is no value, but a attribute being treated as value, 2314 // simplify the output. 2315 if (value.isEmpty() && attrAsValueMap.containsKey(LdmlConvertRules.ANONYMOUS_KEY)) { 2316 String v = attrAsValueMap.get(LdmlConvertRules.ANONYMOUS_KEY); 2317 // out.name(objName); 2318 if (valueIsSpacesepArray) { 2319 outputSpaceSepArray(out, objName, v); 2320 } else { 2321 out.getAsJsonObject().addProperty(objName, v); 2322 } 2323 return; 2324 } 2325 2326 JsonObject o = new JsonObject(); 2327 out.getAsJsonObject().add(objName, o); 2328 2329 if (!value.isEmpty()) { 2330 o.addProperty("_value", value); 2331 } 2332 2333 for (final String key : attrAsValueMap.keySet()) { 2334 String rawAttrValue = attrAsValueMap.get(key); 2335 String attrValue = escapeValue(rawAttrValue); 2336 // attribute is prefixed with "_" when being used as key. 2337 String attrAsKey = "_" + key; 2338 if (node != null) { 2339 logger.finest(() -> "Leaf Node: " + node.getUntransformedPath() + " ." + key); 2340 } 2341 if (LdmlConvertRules.ATTRVALUE_AS_ARRAY_SET.contains(key)) { 2342 String[] strings = attrValue.trim().split("\\s+"); 2343 JsonArray a = new JsonArray(); 2344 o.add(attrAsKey, a); 2345 for (String s : strings) { 2346 a.add(s); 2347 } 2348 } else if (node != null 2349 && LdmlConvertRules.attrIsBooleanOmitFalse( 2350 node.getUntransformedPath(), nodeName, parent, key)) { 2351 final Boolean v = Boolean.parseBoolean(rawAttrValue); 2352 if (v) { 2353 o.addProperty(attrAsKey, v); 2354 } // else: omit falsy value 2355 } else { 2356 o.addProperty(attrAsKey, attrValue); 2357 } 2358 } 2359 } 2360 outputSpaceSepArray(JsonElement out, String objName, String v)2361 private void outputSpaceSepArray(JsonElement out, String objName, String v) throws IOException { 2362 JsonArray a = new JsonArray(); 2363 out.getAsJsonObject().add(objName, a); 2364 // split this, so "a b c" becomes ["a","b","c"] 2365 for (final String s : v.trim().split(" ")) { 2366 if (!s.isEmpty()) { 2367 a.add(s); 2368 } 2369 } 2370 } 2371 } 2372