xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/json/Ldml2JsonConverter.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.json;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.Lists;
5 import com.google.gson.Gson;
6 import com.google.gson.GsonBuilder;
7 import com.google.gson.JsonArray;
8 import com.google.gson.JsonElement;
9 import com.google.gson.JsonObject;
10 import com.google.gson.JsonPrimitive;
11 import com.ibm.icu.number.IntegerWidth;
12 import com.ibm.icu.number.LocalizedNumberFormatter;
13 import com.ibm.icu.number.NumberFormatter;
14 import com.ibm.icu.number.Precision;
15 import com.ibm.icu.text.MessageFormat;
16 import com.ibm.icu.util.NoUnit;
17 import com.ibm.icu.util.ULocale;
18 import java.io.BufferedReader;
19 import java.io.File;
20 import java.io.IOException;
21 import java.io.PrintWriter;
22 import java.text.ParseException;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Collections;
26 import java.util.Iterator;
27 import java.util.LinkedList;
28 import java.util.List;
29 import java.util.Locale;
30 import java.util.Map;
31 import java.util.Map.Entry;
32 import java.util.Optional;
33 import java.util.Set;
34 import java.util.TreeMap;
35 import java.util.TreeSet;
36 import java.util.concurrent.atomic.AtomicInteger;
37 import java.util.logging.Logger;
38 import java.util.regex.Matcher;
39 import java.util.regex.Pattern;
40 import java.util.stream.Collectors;
41 import org.unicode.cldr.draft.FileUtilities;
42 import org.unicode.cldr.draft.ScriptMetadata;
43 import org.unicode.cldr.draft.ScriptMetadata.Info;
44 import org.unicode.cldr.tool.Option.Options;
45 import org.unicode.cldr.util.Annotations;
46 import org.unicode.cldr.util.CLDRConfig;
47 import org.unicode.cldr.util.CLDRFile;
48 import org.unicode.cldr.util.CLDRFile.DraftStatus;
49 import org.unicode.cldr.util.CLDRLocale;
50 import org.unicode.cldr.util.CLDRPaths;
51 import org.unicode.cldr.util.CLDRTool;
52 import org.unicode.cldr.util.CLDRURLS;
53 import org.unicode.cldr.util.CalculatedCoverageLevels;
54 import org.unicode.cldr.util.CldrUtility;
55 import org.unicode.cldr.util.CoverageInfo;
56 import org.unicode.cldr.util.DtdData;
57 import org.unicode.cldr.util.DtdType;
58 import org.unicode.cldr.util.Factory;
59 import org.unicode.cldr.util.FileCopier;
60 import org.unicode.cldr.util.GlossonymConstructor;
61 import org.unicode.cldr.util.Level;
62 import org.unicode.cldr.util.LocaleIDParser;
63 import org.unicode.cldr.util.Pair;
64 import org.unicode.cldr.util.PatternCache;
65 import org.unicode.cldr.util.StandardCodes;
66 import org.unicode.cldr.util.SupplementalDataInfo;
67 import org.unicode.cldr.util.Timer;
68 import org.unicode.cldr.util.XMLSource;
69 import org.unicode.cldr.util.XPathParts;
70 
71 /**
72  * Utility methods to extract data from CLDR repository and export it in JSON format.
73  *
74  * @author shanjian / emmons
75  */
76 @CLDRTool(alias = "ldml2json", description = "Convert CLDR data to JSON")
77 public class Ldml2JsonConverter {
78     // Icons
79     private static final String DONE_ICON = "✅";
80     private static final String GEAR_ICON = "⚙️";
81     private static final String NONE_ICON = "∅";
82     private static final String PACKAGE_ICON = "��";
83     private static final String SECTION_ICON = "��";
84     private static final String TYPE_ICON = "��";
85     private static final String WARN_ICON = "⚠️";
86 
87     // File prefix
88     private static final String CLDR_PKG_PREFIX = "cldr-";
89     private static final String FULL_TIER_SUFFIX = "-full";
90     private static final String MODERN_TIER_SUFFIX = "-modern";
91     private static Logger logger = Logger.getLogger(Ldml2JsonConverter.class.getName());
92 
93     enum RunType {
94         all, // number zero
95         main,
96         supplemental(false, false), // aka 'cldr-core'
97         segments,
98         rbnf(false, true),
99         annotations,
100         annotationsDerived,
101         bcp47(false, false);
102 
103         private final boolean isTiered;
104         private final boolean hasLocales;
105 
RunType()106         RunType() {
107             this.isTiered = true;
108             this.hasLocales = true;
109         }
110 
RunType(boolean isTiered, boolean hasLocales)111         RunType(boolean isTiered, boolean hasLocales) {
112             this.isTiered = isTiered;
113             this.hasLocales = hasLocales;
114         }
115         /**
116          * Is it split into modern/full?
117          *
118          * @return
119          */
tiered()120         public boolean tiered() {
121             return isTiered;
122         }
123         /**
124          * Does it have locale IDs?
125          *
126          * @return
127          */
locales()128         public boolean locales() {
129             return hasLocales;
130         }
131         /**
132          * return the options as a pipe-delimited list
133          *
134          * @return
135          */
valueList()136         public static String valueList() {
137             return String.join(
138                     "|",
139                     Lists.newArrayList(RunType.values()).stream()
140                             .map(t -> t.name())
141                             .toArray(String[]::new));
142         }
143     }
144 
145     private static final StandardCodes sc = StandardCodes.make();
146     private Set<String> defaultContentLocales =
147             SupplementalDataInfo.getInstance().getDefaultContentLocales();
148     private Set<String> skippedDefaultContentLocales = new TreeSet<>();
149 
150     private class AvailableLocales {
151         Set<String> modern = new TreeSet<>();
152         Set<String> full = new TreeSet<>();
153     }
154 
155     private AvailableLocales avl = new AvailableLocales();
156     private Gson gson = new GsonBuilder().setPrettyPrinting().disableHtmlEscaping().create();
157     private static final Options options =
158             new Options(
159                             "Usage: LDML2JsonConverter [OPTIONS] [FILES]\n"
160                                     + "This program converts CLDR data to the JSON format.\n"
161                                     + "Please refer to the following options. \n"
162                                     + "\texample: org.unicode.cldr.json.Ldml2JsonConverter -c xxx -d yyy")
163                     .add(
164                             "bcp47",
165                             'B',
166                             "(true|false)",
167                             "true",
168                             "Whether to strictly use BCP47 tags in filenames and data. Defaults to true.")
169                     .add(
170                             "bcp47-no-subtags",
171                             'T',
172                             "(true|false)",
173                             "true",
174                             "In BCP47 mode, ignore locales with subtags such as en-US-u-va-posix. Defaults to true.")
175                     .add(
176                             "commondir",
177                             'c',
178                             ".*",
179                             CLDRPaths.COMMON_DIRECTORY,
180                             "Common directory for CLDR files, defaults to CldrUtility.COMMON_DIRECTORY")
181                     .add(
182                             "destdir",
183                             'd',
184                             ".*",
185                             CLDRPaths.GEN_DIRECTORY,
186                             "Destination directory for output files, defaults to CldrUtility.GEN_DIRECTORY")
187                     .add(
188                             "match",
189                             'm',
190                             ".*",
191                             ".*",
192                             "Regular expression to define only specific locales or files to be generated")
193                     .add(
194                             "type",
195                             't',
196                             "(" + RunType.valueList() + ")",
197                             "all",
198                             "Type of CLDR data being generated, such as main, supplemental, or segments. All gets all.")
199                     .add(
200                             "resolved",
201                             'r',
202                             "(true|false)",
203                             "false",
204                             "Whether the output JSON for the main directory should be based on resolved or unresolved data")
205                     .add(
206                             "Redundant",
207                             'R',
208                             "(true|false)",
209                             "false",
210                             "Include redundant data from code-fallback and constructed")
211                     .add(
212                             "draftstatus",
213                             's',
214                             "(approved|contributed|provisional|unconfirmed)",
215                             "unconfirmed",
216                             "The minimum draft status of the output data")
217                     .add(
218                             "coverage",
219                             'l',
220                             "(minimal|basic|moderate|modern|comprehensive|optional)",
221                             "optional",
222                             "The maximum coverage level of the output data")
223                     .add(
224                             "packagelist",
225                             'P',
226                             "(true|false)",
227                             "true",
228                             "Whether to output PACKAGES.md and cldr-core/cldr-packages.json (during supplemental/cldr-core)")
229                     .add(
230                             "fullnumbers",
231                             'n',
232                             "(true|false)",
233                             "false",
234                             "Whether the output JSON should output data for all numbering systems, even those not used in the locale")
235                     .add(
236                             "other",
237                             'o',
238                             "(true|false)",
239                             "false",
240                             "Whether to write out the 'other' section, which contains any unmatched paths")
241                     .add(
242                             "packages",
243                             'p',
244                             "(true|false)",
245                             "false",
246                             "Whether to group data files into installable packages")
247                     .add(
248                             "identity",
249                             'i',
250                             "(true|false)",
251                             "true",
252                             "Whether to copy the identity info into all sections containing data")
253                     .add("konfig", 'k', ".*", null, "LDML to JSON configuration file")
254                     .add(
255                             "pkgversion",
256                             'V',
257                             ".*",
258                             getDefaultVersion(),
259                             "Version to be used in writing package files")
260                     .add(
261                             "Modern",
262                             'M',
263                             "(true|false)",
264                             "true",
265                             "Whether to include the -modern tier")
266                     // Primarily useful for non-Maven build systems where CldrUtility.LICENSE may
267                     // not be available as it is put in place by pom.xml
268                     .add(
269                             "license-file",
270                             'L',
271                             ".*",
272                             "",
273                             "Override the license file included in the bundle");
274 
main(String[] args)275     public static void main(String[] args) throws Exception {
276         System.out.println(GEAR_ICON + " " + Ldml2JsonConverter.class.getName() + " options:");
277         options.parse(args, true);
278 
279         Timer overallTimer = new Timer();
280         overallTimer.start();
281         final String rawType = options.get("type").getValue();
282 
283         if (RunType.all.name().equals(rawType)) {
284             // Running all types
285             for (final RunType t : RunType.values()) {
286                 if (t == RunType.all) continue;
287                 System.out.println();
288                 System.out.println(
289                         TYPE_ICON + "#######################  " + t + " #######################");
290                 Timer subTimer = new Timer();
291                 subTimer.start();
292                 processType(t.name());
293                 System.out.println(
294                         TYPE_ICON + " " + t + "\tFinished in " + subTimer.toMeasureString());
295                 System.out.println();
296             }
297         } else {
298             processType(rawType);
299         }
300 
301         System.out.println(
302                 "\n\n###\n\n"
303                         + DONE_ICON
304                         + " Finished everything in "
305                         + overallTimer.toMeasureString());
306     }
307 
processType(final String runType)308     static void processType(final String runType) throws Exception {
309         Ldml2JsonConverter l2jc =
310                 new Ldml2JsonConverter(
311                         options.get("commondir").getValue(),
312                         options.get("destdir").getValue(),
313                         runType,
314                         Boolean.parseBoolean(options.get("fullnumbers").getValue()),
315                         Boolean.parseBoolean(options.get("resolved").getValue()),
316                         options.get("coverage").getValue(),
317                         options.get("match").getValue(),
318                         Boolean.parseBoolean(options.get("packages").getValue()),
319                         options.get("konfig").getValue(),
320                         options.get("pkgversion").getValue(),
321                         Boolean.parseBoolean(options.get("bcp47").getValue()),
322                         Boolean.parseBoolean(options.get("bcp47-no-subtags").getValue()),
323                         Boolean.parseBoolean(options.get("Modern").getValue()),
324                         Boolean.parseBoolean(options.get("Redundant").getValue()),
325                         Optional.ofNullable(options.get("license-file").getValue())
326                                 .filter(s -> !s.isEmpty()));
327 
328         DraftStatus status = DraftStatus.valueOf(options.get("draftstatus").getValue());
329         l2jc.processDirectory(runType, status);
330     }
331 
332     // The CLDR file directory where those official XML files will be found.
333     private String cldrCommonDir;
334     // Where the generated JSON files will be stored.
335     private String outputDir;
336     // Whether data in main should output all numbering systems, even those not in use in the
337     // locale.
338     private boolean fullNumbers;
339     // Whether data in main should be resolved for output.
340     private boolean resolve;
341     // Used to match specific locales for output
342     private String match;
343     // Used to filter based on coverage
344     private int coverageValue;
345     // Whether we should write output files into installable packages
346     private boolean writePackages;
347     // Type of run for this converter: main, supplemental, or segments
348     private final RunType type;
349     // include Redundant data such as apc="apc", en_US="en (US)"
350     private boolean includeRedundant;
351 
352     static class JSONSection implements Comparable<JSONSection> {
353         public String section;
354         public Pattern pattern;
355         public String packageName;
356 
357         @Override
compareTo(JSONSection other)358         public int compareTo(JSONSection other) {
359             return section.compareTo(other.section);
360         }
361     }
362 
363     private Map<String, String> dependencies;
364     private List<JSONSection> sections;
365     private Set<String> packages;
366     private final String pkgVersion;
367     private final boolean strictBcp47;
368     private final boolean writeModernPackage;
369     private final Optional<String> licenseFile;
370     private final boolean skipBcp47LocalesWithSubtags;
371     private LdmlConfigFileReader configFileReader;
372 
Ldml2JsonConverter( String cldrDir, String outputDir, String runType, boolean fullNumbers, boolean resolve, String coverage, String match, boolean writePackages, String configFile, String pkgVersion, boolean strictBcp47, boolean skipBcp47LocalesWithSubtags, boolean writeModernPackage, boolean includeRedundant, Optional<String> licenseFile)373     public Ldml2JsonConverter(
374             String cldrDir,
375             String outputDir,
376             String runType,
377             boolean fullNumbers,
378             boolean resolve,
379             String coverage,
380             String match,
381             boolean writePackages,
382             String configFile,
383             String pkgVersion,
384             boolean strictBcp47,
385             boolean skipBcp47LocalesWithSubtags,
386             boolean writeModernPackage,
387             boolean includeRedundant,
388             Optional<String> licenseFile) {
389         this.writeModernPackage = writeModernPackage;
390         this.strictBcp47 = strictBcp47;
391         this.skipBcp47LocalesWithSubtags = strictBcp47 && skipBcp47LocalesWithSubtags;
392         this.cldrCommonDir = cldrDir;
393         this.outputDir = outputDir;
394         try {
395             this.type = RunType.valueOf(runType);
396         } catch (IllegalArgumentException | NullPointerException e) {
397             throw new RuntimeException(
398                     "runType (-t) invalid: " + runType + " must be one of " + RunType.valueList(),
399                     e);
400         }
401         this.fullNumbers = fullNumbers;
402         this.resolve = resolve;
403         this.match = match;
404         this.writePackages = writePackages;
405         this.coverageValue = Level.get(coverage).getLevel();
406         this.pkgVersion = pkgVersion;
407 
408         LdmlConvertRules.addVersionHandler(pkgVersion.split("\\.")[0]);
409 
410         configFileReader = new LdmlConfigFileReader();
411         configFileReader.read(configFile, type);
412         this.dependencies = configFileReader.getDependencies();
413         this.sections = configFileReader.getSections();
414         this.packages = new TreeSet<>();
415         this.includeRedundant = includeRedundant;
416         this.licenseFile = licenseFile;
417     }
418 
419     /**
420      * @see XPathParts#addInternal
421      */
422     static final Pattern ANNOTATION_CP_REMAP =
423             PatternCache.get("^(.*)\\[@cp=\"(\\[|\\]|'|\"|@|/|=)\"\\](.*)$");
424 
425     /**
426      * Transform the path by applying PATH_TRANSFORMATIONS rules.
427      *
428      * @param pathStr The path string being transformed.
429      * @return The transformed path.
430      */
transformPath(final String pathStr, final String pathPrefix)431     private String transformPath(final String pathStr, final String pathPrefix) {
432         String result = pathStr;
433 
434         // handle annotation cp value
435         Matcher cpm = ANNOTATION_CP_REMAP.matcher(result);
436         if (cpm.matches()) {
437             // We need to avoid breaking the syntax not just of JSON, but of XPATH.
438             final String badCodepointRange = cpm.group(2);
439             StringBuilder sb = new StringBuilder(cpm.group(1)).append("[@cp=\"");
440             // JSON would handle a wide range of things if escaped, but XPATH will not.
441             if (badCodepointRange.codePointCount(0, badCodepointRange.length()) != 1) {
442                 // forbid more than one U+ (because we will have to unescape it.)
443                 throw new IllegalArgumentException(
444                         "Need exactly one codepoint in the @cp string, but got "
445                                 + badCodepointRange
446                                 + " in xpath "
447                                 + pathStr);
448             }
449             badCodepointRange
450                     .codePoints()
451                     .forEach(cp -> sb.append("U+").append(Integer.toHexString(cp).toUpperCase()));
452             sb.append("\"]").append(cpm.group(3));
453             result = sb.toString();
454         }
455 
456         logger.finest(" IN pathStr : " + result);
457         result = LdmlConvertRules.PathTransformSpec.applyAll(result);
458         result = result.replaceFirst("/ldml/", pathPrefix);
459         result = result.replaceFirst("/supplementalData/", pathPrefix);
460 
461         if (result.startsWith("//cldr/supplemental/references/reference")) {
462             // no change
463         } else if (strictBcp47) {
464             // Look for something like <!--@MATCH:set/validity/locale--> in DTD
465             if (result.contains("localeDisplayNames/languages/language")) {
466                 if (result.contains("type=\"root\"")) {
467                     // This is strictBcp47
468                     // Drop translation for 'root' as it conflicts with 'und'
469                     return ""; // 'drop this path'
470                 }
471                 result = fixXpathBcp47(result, "language", "type");
472             } else if (result.contains("likelySubtags/likelySubtag")) {
473                 if (!result.contains("\"iw\"")
474                         && !result.contains("\"in\"")
475                         && !result.contains("\"ji\"")) {
476                     // Special case: preserve 'iw' and 'in' likely subtags
477                     result = fixXpathBcp47(result, "likelySubtag", "from", "to");
478                 } else {
479                     result = underscoreToHypen(result);
480                     logger.warning("Including aliased likelySubtags: " + result);
481                 }
482             } else if (result.startsWith("//cldr/supplemental/weekData/weekOfPreference")) {
483                 result = fixXpathBcp47(result, "weekOfPreference", "locales");
484             } else if (result.startsWith("//cldr/supplemental/metadata/defaultContent")) {
485                 result = fixXpathBcp47(result, "defaultContent", "locales");
486             } else if (result.startsWith("//cldr/supplemental/grammatical")
487                     && result.contains("Data/grammaticalFeatures")) {
488                 result = fixXpathBcp47(result, "grammaticalFeatures", "locales");
489             } else if (result.startsWith("//cldr/supplemental/grammatical")
490                     && result.contains("Data/grammaticalDerivations")) {
491                 result = fixXpathBcp47(result, "grammaticalDerivations", "locales");
492             } else if (result.startsWith("//cldr/supplemental/dayPeriodRuleSet")) {
493                 result = fixXpathBcp47(result, "dayPeriodRules", "locales");
494             } else if (result.startsWith("//cldr/supplemental/plurals")) {
495                 result = fixXpathBcp47(result, "pluralRules", "locales");
496             } else if (result.startsWith("//cldr/supplemental/timeData/hours")) {
497                 result = fixXpathBcp47MishMash(result, "hours", "regions");
498             } else if (result.startsWith("//cldr/supplemental/parentLocales/parentLocale")) {
499                 result = fixXpathBcp47(result, "parentLocale", "parent", "locales");
500             } else if (result.startsWith(
501                     "//cldr/supplemental/territoryInfo/territory/languagePopulation")) {
502                 result = fixXpathBcp47(result, "languagePopulation", "type");
503             } else if (result.contains("languages")
504                     || result.contains("languageAlias")
505                     || result.contains("languageMatches")
506                     || result.contains("likelySubtags")
507                     || result.contains("parentLocale")
508                     || result.contains("locales=")) {
509                 final String oldResult = result;
510                 result = underscoreToHypen(result);
511                 if (!oldResult.equals(result)) {
512                     logger.fine(oldResult + " => " + result);
513                 }
514             }
515         } else if (result.contains("languages")
516                 || result.contains("languageAlias")
517                 || result.contains("languageMatches")
518                 || result.contains("likelySubtags")
519                 || result.contains("parentLocale")
520                 || result.contains("locales=")) {
521             // old behavior: just munge paths..
522             result = underscoreToHypen(result);
523         }
524         logger.finest("OUT pathStr : " + result);
525         logger.finest("result: " + result);
526         return result;
527     }
528 
529     /** Read all paths in the file, and assign each to a JSONSection. Return the map. */
mapPathsToSections( AtomicInteger readCount, int totalCount, CLDRFile file, String pathPrefix, SupplementalDataInfo sdi)530     private Map<JSONSection, List<CldrItem>> mapPathsToSections(
531             AtomicInteger readCount,
532             int totalCount,
533             CLDRFile file,
534             String pathPrefix,
535             SupplementalDataInfo sdi)
536             throws IOException, ParseException {
537         final Map<JSONSection, List<CldrItem>> sectionItems = new TreeMap<>();
538 
539         String locID = file.getLocaleID();
540         Matcher noNumberingSystemMatcher = LdmlConvertRules.NO_NUMBERING_SYSTEM_PATTERN.matcher("");
541         Matcher numberingSystemMatcher = LdmlConvertRules.NUMBERING_SYSTEM_PATTERN.matcher("");
542         Matcher rootIdentityMatcher = LdmlConvertRules.ROOT_IDENTITY_PATTERN.matcher("");
543         Set<String> activeNumberingSystems = new TreeSet<>();
544         activeNumberingSystems.add("latn"); // Always include latin script numbers
545         for (String np : LdmlConvertRules.ACTIVE_NUMBERING_SYSTEM_XPATHS) {
546             String ns = file.getWinningValue(np);
547             if (ns != null && ns.length() > 0) {
548                 activeNumberingSystems.add(ns);
549             }
550         }
551         final DtdType fileDtdType = file.getDtdType();
552         CoverageInfo covInfo = CLDRConfig.getInstance().getCoverageInfo();
553         // read paths in DTD order. The order is critical for JSON processing.
554         final CLDRFile.Status status = new CLDRFile.Status();
555         for (Iterator<String> it =
556                         file.iterator("", DtdData.getInstance(fileDtdType).getDtdComparator(null));
557                 it.hasNext(); ) {
558             int cv = Level.UNDETERMINED.getLevel();
559             final String path = it.next();
560 
561             // Check for code-fallback and constructed first, even before fullpath and value
562             final String localeWhereFound = file.getSourceLocaleID(path, status);
563             if (!includeRedundant
564                     && (localeWhereFound.equals(XMLSource.CODE_FALLBACK_ID)
565                             || // language[@type="apc"] = apc : missing
566                             status.pathWhereFound.equals(
567                                     GlossonymConstructor
568                                             .PSEUDO_PATH))) { // language[@type="fa_AF"] = fa (AF)
569                 // or Farsi (Afghanistan) : missing
570                 // Don't include these paths.
571                 continue;
572             }
573 
574             // now get the fullpath and value
575             String fullPath = file.getFullXPath(path);
576             String value = file.getWinningValue(path);
577 
578             if (fullPath == null) {
579                 fullPath = path;
580             }
581 
582             if (!CLDRFile.isSupplementalName(locID)
583                     && path.startsWith("//ldml/")
584                     && !path.contains("/identity")) {
585                 cv = covInfo.getCoverageValue(path, locID);
586             }
587             if (cv > coverageValue) {
588                 continue;
589             }
590             // Discard root identity element unless the locale is root
591             rootIdentityMatcher.reset(fullPath);
592             if (rootIdentityMatcher.matches() && !"root".equals(locID)) {
593                 continue;
594             }
595 
596             // automatically filter out number symbols and formats without a numbering system
597             noNumberingSystemMatcher.reset(fullPath);
598             if (noNumberingSystemMatcher.matches()) {
599                 continue;
600             }
601 
602             // Filter out non-active numbering systems data unless fullNumbers is specified.
603             numberingSystemMatcher.reset(fullPath);
604             if (numberingSystemMatcher.matches() && !fullNumbers) {
605                 XPathParts xpp = XPathParts.getFrozenInstance(fullPath);
606                 String currentNS = xpp.getAttributeValue(2, "numberSystem");
607                 if (currentNS != null && !activeNumberingSystems.contains(currentNS)) {
608                     continue;
609                 }
610             }
611 
612             // Handle the no inheritance marker.
613             if (resolve && CldrUtility.NO_INHERITANCE_MARKER.equals(value)) {
614                 continue;
615             }
616 
617             // discard draft before transforming
618             final String pathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(path).replaceAll("");
619             final String fullPathNoDraft = CLDRFile.DRAFT_PATTERN.matcher(fullPath).replaceAll("");
620 
621             final String pathNoXmlSpace =
622                     CLDRFile.XML_SPACE_PATTERN.matcher(pathNoDraft).replaceAll("");
623             final String fullPathNoXmlSpace =
624                     CLDRFile.XML_SPACE_PATTERN.matcher(fullPathNoDraft).replaceAll("");
625 
626             final String transformedPath = transformPath(pathNoXmlSpace, pathPrefix);
627             final String transformedFullPath = transformPath(fullPathNoXmlSpace, pathPrefix);
628 
629             if (transformedPath.isEmpty()) {
630                 continue; // skip this path
631             }
632 
633             for (JSONSection js :
634                     sections) { // TODO: move to subfunction, error if >1 section matches
635                 if (js.pattern.matcher(transformedPath).matches()) {
636                     CldrItem item =
637                             new CldrItem(
638                                     transformedPath, transformedFullPath, path, fullPath, value);
639 
640                     List<CldrItem> cldrItems = sectionItems.get(js);
641                     if (cldrItems == null) {
642                         cldrItems = new ArrayList<>();
643                     }
644                     cldrItems.add(item);
645                     sectionItems.put(js, cldrItems);
646                     break;
647                 }
648             }
649         }
650 
651         // TODO: move matcher out of inner loop
652         final Matcher versionInfoMatcher = VERSION_INFO_PATTERN.matcher("");
653         // Automatically copy the version info to any sections that had real data in them.
654         JSONSection otherSection = sections.get(sections.size() - 1);
655         List<CldrItem> others = sectionItems.get(otherSection);
656         if (others == null) {
657             return sectionItems;
658         }
659         List<CldrItem> otherSectionItems = new ArrayList<>(others);
660         int addedItemCount = 0;
661         boolean copyIdentityInfo = Boolean.parseBoolean(options.get("identity").getValue());
662 
663         for (CldrItem item : otherSectionItems) {
664             String thisPath = item.getPath();
665             versionInfoMatcher.reset(thisPath);
666             if (versionInfoMatcher.matches()) {
667                 for (JSONSection js : sections) {
668                     if (sectionItems.get(js) != null
669                             && !js.section.equals("other")
670                             && copyIdentityInfo) {
671                         List<CldrItem> hit = sectionItems.get(js);
672                         hit.add(addedItemCount, item);
673                         sectionItems.put(js, hit);
674                     }
675                     if (js.section.equals("other")) { // did not match one of the regular sections
676                         List<CldrItem> hit = sectionItems.get(js);
677                         hit.remove(item);
678                         sectionItems.put(js, hit);
679                     }
680                 }
681                 addedItemCount++;
682             }
683         }
684         return sectionItems;
685     }
686 
687     static final Pattern VERSION_INFO_PATTERN = PatternCache.get(".*/(identity|version).*");
688     static final Pattern HAS_SUBTAG = PatternCache.get(".*-[a-z]-.*");
689 
690     /**
691      * Convert CLDR's XML data to JSON format.
692      *
693      * @param file CLDRFile object.
694      * @param outFilename The file name used to save JSON data.
695      * @throws IOException
696      * @throws ParseException
697      * @return total items written in all files. (if 0, file had no effect)
698      */
convertCldrItems( AtomicInteger readCount, int totalCount, String dirName, String filename, String pathPrefix, final Map<JSONSection, List<CldrItem>> sectionItems)699     private int convertCldrItems(
700             AtomicInteger readCount,
701             int totalCount,
702             String dirName,
703             String filename,
704             String pathPrefix,
705             final Map<JSONSection, List<CldrItem>> sectionItems)
706             throws IOException, ParseException {
707         // zone and timezone items are queued for sorting first before they are
708         // processed.
709 
710         final String filenameAsLangTag = unicodeLocaleToString(filename);
711 
712         if (skipBcp47LocalesWithSubtags
713                 && type.locales()
714                 && HAS_SUBTAG.matcher(filenameAsLangTag).matches()) {
715             // Has a subtag, so skip it.
716             // It will show up in the "no output" list.
717             return 0;
718         }
719 
720         int totalItemsInFile = 0;
721 
722         List<Pair<String, Integer>> outputProgress = new LinkedList<>();
723 
724         for (JSONSection js : sections) {
725             if (js.section.equals("IGNORE")) {
726                 continue;
727             }
728             String outFilename;
729             if (type == RunType.rbnf) {
730                 outFilename = filenameAsLangTag + ".json";
731             } else if (type == RunType.bcp47) {
732                 outFilename = filename + ".json";
733             } else if (js.section.equals("other")) {
734                 // If you see other-___.json, it means items that were missing from
735                 // JSON_config_*.txt
736                 outFilename = js.section + "-" + filename + ".json"; // Use original filename
737             } else {
738                 outFilename = js.section + ".json";
739             }
740             String tier = "";
741             boolean writeOther = Boolean.parseBoolean(options.get("other").getValue());
742             if (js.section.equals("other") && !writeOther) {
743                 continue;
744             } else {
745                 StringBuilder outputDirname = new StringBuilder(outputDir);
746                 if (writePackages) {
747                     if (type.tiered()) {
748                         LocaleIDParser lp = new LocaleIDParser();
749                         lp.set(filename);
750                         if (defaultContentLocales.contains(filename)
751                                 && lp.getRegion().length() > 0) {
752                             if (type == RunType.main) {
753                                 skippedDefaultContentLocales.add(filenameAsLangTag);
754                             }
755                             continue;
756                         }
757                         final boolean isModernTier = localeIsModernTier(filename);
758                         if (isModernTier && writeModernPackage) {
759                             tier = MODERN_TIER_SUFFIX;
760                             if (type == RunType.main) {
761                                 avl.modern.add(filenameAsLangTag);
762                             }
763                         } else {
764                             tier = FULL_TIER_SUFFIX;
765                         }
766                         if (type == RunType.main) {
767                             avl.full.add(filenameAsLangTag);
768                         }
769                     } else if (type == RunType.rbnf) {
770                         js.packageName = "rbnf";
771                         tier = "";
772                     } else if (type == RunType.bcp47) {
773                         js.packageName = "bcp47";
774                         tier = "";
775                     }
776                     if (js.packageName != null) {
777                         String packageName = CLDR_PKG_PREFIX + js.packageName + tier;
778                         outputDirname.append("/" + packageName);
779                         packages.add(packageName);
780                     }
781                     outputDirname.append("/" + dirName + "/");
782                     if (type.tiered()) {
783                         outputDirname.append(filenameAsLangTag);
784                     }
785                     logger.fine("outDir: " + outputDirname);
786                     logger.fine("pack: " + js.packageName);
787                     logger.fine("dir: " + dirName);
788                 } else {
789                     outputDirname.append("/" + filename);
790                 }
791 
792                 assert (tier.isEmpty() == !type.tiered());
793 
794                 List<String> outputDirs = new ArrayList<>();
795                 outputDirs.add(outputDirname.toString());
796                 if (writePackages && tier.equals(MODERN_TIER_SUFFIX) && js.packageName != null) {
797                     // if it is in 'modern', add it to 'full' and core also.
798                     outputDirs.add(
799                             outputDirname
800                                     .toString()
801                                     .replaceFirst(MODERN_TIER_SUFFIX, FULL_TIER_SUFFIX));
802                     // Also need to make sure that the full and core package is added
803                     packages.add(CLDR_PKG_PREFIX + js.packageName + FULL_TIER_SUFFIX);
804                 }
805 
806                 for (String outputDir : outputDirs) {
807                     List<CldrItem> theItems = sectionItems.get(js);
808                     if (theItems == null || theItems.size() == 0) {
809                         logger.fine(
810                                 () ->
811                                         ">"
812                                                 + progressPrefix(readCount, totalCount)
813                                                 + outputDir
814                                                 + " - no items to write in "
815                                                 + js.section); // mostly noise
816                         continue;
817                     }
818                     logger.fine(
819                             () ->
820                                     ("?"
821                                             + progressPrefix(
822                                                     readCount, totalCount, filename, js.section)
823                                             + " - "
824                                             + theItems.size()
825                                             + " item(s)"
826                                             + "\r"));
827                     // Create the output dir if it doesn't exist
828                     File dir = new File(outputDir.toString());
829                     if (!dir.exists()) {
830                         dir.mkdirs();
831                     }
832                     JsonObject out = new JsonObject(); // root object for writing
833 
834                     ArrayList<CldrItem> sortingItems = new ArrayList<>();
835                     ArrayList<CldrItem> arrayItems = new ArrayList<>();
836 
837                     ArrayList<CldrNode> nodesForLastItem = new ArrayList<>();
838                     String lastLeadingArrayItemPath = null;
839                     String leadingArrayItemPath = "";
840                     int valueCount = 0;
841                     String previousIdentityPath = null;
842                     for (CldrItem item : theItems) {
843                         if (item.getPath().isEmpty()) {
844                             throw new IllegalArgumentException(
845                                     "empty xpath in "
846                                             + filename
847                                             + " section "
848                                             + js.packageName
849                                             + "/"
850                                             + js.section);
851                         }
852                         if (type == RunType.rbnf) {
853                             item.adjustRbnfPath();
854                         }
855 
856                         // items in the identity section of a file should only ever contain the
857                         // lowest level, even if using
858                         // resolving source, so if we have duplicates ( caused by attributes used as
859                         // a value ) then suppress
860                         // them here.
861                         if (item.getPath().contains("/identity/")) {
862                             String[] parts = item.getPath().split("\\[");
863                             if (parts[0].equals(previousIdentityPath)) {
864                                 continue;
865                             } else {
866                                 XPathParts xpp = XPathParts.getFrozenInstance(item.getPath());
867                                 String territory = xpp.findAttributeValue("territory", "type");
868                                 LocaleIDParser lp = new LocaleIDParser().set(filename);
869                                 if (territory != null
870                                         && territory.length() > 0
871                                         && !territory.equals(lp.getRegion())) {
872                                     continue;
873                                 }
874                                 previousIdentityPath = parts[0];
875                             }
876                         }
877 
878                         // some items need to be split to multiple item before processing. None
879                         // of those items need to be sorted.
880                         // Applies to SPLITTABLE_ATTRS attributes.
881                         CldrItem[] items = item.split();
882                         if (items == null) {
883                             // Nothing to split. Make it a 1-element array.
884                             items = new CldrItem[1];
885                             items[0] = item;
886                         }
887                         valueCount += items.length;
888 
889                         // Hard code this part.
890                         if (item.getUntransformedPath().contains("unitPreference")) {
891                             // Need to do more transforms on this one, so just output version/etc
892                             // here.
893                             continue;
894                         }
895 
896                         for (CldrItem newItem : items) {
897                             // alias will be dropped in conversion, don't count it.
898                             if (newItem.isAliasItem()) {
899                                 valueCount--;
900                             }
901 
902                             // Items like zone items need to be sorted first before write them out.
903                             if (newItem.needsSort()) {
904                                 resolveArrayItems(out, nodesForLastItem, arrayItems);
905                                 sortingItems.add(newItem);
906                             } else {
907                                 Matcher matcher =
908                                         LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(
909                                                 newItem.getPath());
910                                 if (matcher.matches()) {
911                                     resolveSortingItems(out, nodesForLastItem, sortingItems);
912                                     leadingArrayItemPath = matcher.group(1);
913                                     if (lastLeadingArrayItemPath != null
914                                             && !lastLeadingArrayItemPath.equals(
915                                                     leadingArrayItemPath)) {
916                                         resolveArrayItems(out, nodesForLastItem, arrayItems);
917                                     }
918                                     lastLeadingArrayItemPath = leadingArrayItemPath;
919                                     arrayItems.add(newItem);
920                                 } else {
921                                     // output a single item
922                                     resolveSortingItems(out, nodesForLastItem, sortingItems);
923                                     resolveArrayItems(out, nodesForLastItem, arrayItems);
924                                     outputCldrItem(out, nodesForLastItem, newItem);
925                                     lastLeadingArrayItemPath = "";
926                                 }
927                             }
928                         }
929                     }
930 
931                     resolveSortingItems(out, nodesForLastItem, sortingItems);
932                     resolveArrayItems(out, nodesForLastItem, arrayItems);
933                     if (js.section.contains("unitPreferenceData")) {
934                         outputUnitPreferenceData(js, theItems, out, nodesForLastItem);
935                     }
936 
937                     // closeNodes(out, nodesForLastItem.size() - 2, 0);
938 
939                     // write JSON
940                     try (PrintWriter outf = FileUtilities.openUTF8Writer(outputDir, outFilename)) {
941                         outf.println(gson.toJson(out));
942                     }
943 
944                     String outPath =
945                             new File(outputDir.substring(this.outputDir.length()), outFilename)
946                                     .getPath();
947                     outputProgress.add(
948                             Pair.of(String.format("%20s %s", js.section, outPath), valueCount));
949                     logger.fine(
950                             ">"
951                                     + progressPrefix(readCount, totalCount, filename, js.section)
952                                     + String.format("…%s (%d values)", outPath, valueCount));
953 
954                     totalItemsInFile += valueCount;
955                 }
956             }
957         } // this is the only normal output with debug off
958         StringBuilder outStr = new StringBuilder();
959         if (!outputProgress.isEmpty()) {
960             // Put these first, so the percent is at the end.
961             for (final Pair<String, Integer> outputItem : outputProgress) {
962                 outStr.append(
963                         String.format("\t%6d %s\n", outputItem.getSecond(), outputItem.getFirst()));
964             }
965             outStr.append(
966                     String.format(
967                             "%s%-12s\t  %s\n",
968                             progressPrefix(readCount, totalCount),
969                             filename,
970                             valueSectionsFormat(totalItemsInFile, outputProgress.size())));
971         } else {
972             outStr.append(
973                     String.format(
974                             "%s%-12s\t" + NONE_ICON + " (no output)\n",
975                             progressPrefix(readCount, totalCount),
976                             filename));
977         }
978         synchronized (readCount) { // to prevent interleaved output
979             System.out.print(outStr);
980         }
981         return totalItemsInFile;
982     }
983 
valueSectionsFormat(int values, int sections)984     private static String valueSectionsFormat(int values, int sections) {
985         return MessageFormat.format(
986                 "({0, plural,  one {# value} other {# values}} in {1, plural, one {# section} other {# sections}})",
987                 values,
988                 sections);
989     }
990 
localeIsModernTier(String filename)991     private boolean localeIsModernTier(String filename) {
992         Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename);
993         if (lev == null) return false;
994         return lev.isAtLeast(Level.MODERN);
995     }
996 
localeIsBasicTier(String filename)997     private boolean localeIsBasicTier(String filename) {
998         Level lev = CalculatedCoverageLevels.getInstance().getEffectiveCoverageLevel(filename);
999         if (lev == null) return false;
1000         return lev.isAtLeast(Level.BASIC);
1001     }
1002 
1003     /**
1004      * Entire xpaths and random short strings are passed through this function. Not really Locale ID
1005      * to Language Tag.
1006      *
1007      * @param filename
1008      * @return
1009      */
underscoreToHypen(String filename)1010     private String underscoreToHypen(String filename) {
1011         return filename.replaceAll("_", "-");
1012     }
1013 
1014     /**
1015      * Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or
1016      * data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia)
1017      *
1018      * @param locale
1019      * @return
1020      */
unicodeLocaleToString(String locale)1021     private final String unicodeLocaleToString(String locale) {
1022         if (strictBcp47) {
1023             return CLDRLocale.toLanguageTag(locale);
1024         } else {
1025             return underscoreToHypen(locale);
1026         }
1027     }
1028 
1029     Pattern IS_REGION_CODE = PatternCache.get("([A-Z][A-Z])|([0-9][0-9][0-9])");
1030     /**
1031      * Bottleneck for converting Unicode Locale ID (root, ca_ES_VALENCIA) to String for filename or
1032      * data item. If strictBcp47 is true (default) then it will convert to (und, ca-ES-valencia)
1033      * Differs from unicodeLocaleToString in that it will preserve all uppercase region ids
1034      *
1035      * @param locale
1036      * @return
1037      */
unicodeLocaleMishMashToString(String locale)1038     private final String unicodeLocaleMishMashToString(String locale) {
1039         if (strictBcp47) {
1040             if (IS_REGION_CODE.matcher(locale).matches()) {
1041                 return locale;
1042             } else {
1043                 return CLDRLocale.toLanguageTag(locale);
1044             }
1045         } else {
1046             return underscoreToHypen(locale);
1047         }
1048     }
1049 
1050     /**
1051      * Fixup a path to be BCP47 compliant
1052      *
1053      * @param path XPath (usually ends in elementName, but not necessarily)
1054      * @param elementName element to fixup
1055      * @param attributeNames list of attributes to fix
1056      * @return new path
1057      */
fixXpathBcp47(final String path, String elementName, String... attributeNames)1058     final String fixXpathBcp47(final String path, String elementName, String... attributeNames) {
1059         final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed();
1060         for (final String attributeName : attributeNames) {
1061             final String oldValue = xpp.findAttributeValue(elementName, attributeName);
1062             if (oldValue == null) continue;
1063             final String oldValues[] = oldValue.split(" ");
1064             String newValue =
1065                     Arrays.stream(oldValues)
1066                             .map((String s) -> unicodeLocaleToString(s))
1067                             .collect(Collectors.joining(" "));
1068             if (!oldValue.equals(newValue)) {
1069                 xpp.setAttribute(elementName, attributeName, newValue);
1070                 logger.finest(attributeName + " = " + oldValue + " -> " + newValue);
1071             }
1072         }
1073         return xpp.toString();
1074     }
1075 
1076     /**
1077      * Fixup a path to be BCP47 compliant …but support a mishmash of regions and locale ids
1078      * CLDR-15069
1079      *
1080      * @param path XPath (usually ends in elementName, but not necessarily)
1081      * @param elementName element to fixup
1082      * @param attributeNames list of attributes to fix
1083      * @return new path
1084      */
fixXpathBcp47MishMash( final String path, String elementName, String... attributeNames)1085     final String fixXpathBcp47MishMash(
1086             final String path, String elementName, String... attributeNames) {
1087         final XPathParts xpp = XPathParts.getFrozenInstance(path).cloneAsThawed();
1088         for (final String attributeName : attributeNames) {
1089             final String oldValue = xpp.findAttributeValue(elementName, attributeName);
1090             if (oldValue == null) continue;
1091             final String oldValues[] = oldValue.split(" ");
1092             String newValue =
1093                     Arrays.stream(oldValues)
1094                             .map((String s) -> unicodeLocaleMishMashToString(s))
1095                             .collect(Collectors.joining(" "));
1096             if (!oldValue.equals(newValue)) {
1097                 xpp.setAttribute(elementName, attributeName, newValue);
1098                 logger.finest(attributeName + " = " + oldValue + " -> " + newValue);
1099             }
1100         }
1101         return xpp.toString();
1102     }
1103 
outputUnitPreferenceData( JSONSection js, List<CldrItem> theItems, JsonObject out, ArrayList<CldrNode> nodesForLastItem)1104     private void outputUnitPreferenceData(
1105             JSONSection js,
1106             List<CldrItem> theItems,
1107             JsonObject out,
1108             ArrayList<CldrNode> nodesForLastItem)
1109             throws ParseException, IOException {
1110         // handle these specially.
1111         // redo earlier loop somewhat.
1112         CldrNode supplementalNode = CldrNode.createNode("cldr", "supplemental", "supplemental");
1113         JsonElement supplementalObject = startNonleafNode(out, supplementalNode);
1114         CldrNode unitPrefNode = CldrNode.createNode("supplemental", js.section, js.section);
1115         final JsonElement o = startNonleafNode(supplementalObject, unitPrefNode);
1116 
1117         // We'll directly write to 'out'
1118 
1119         // Unit preference sorting is a bit more complicated, so we're going to use the CldrItems,
1120         // but collect the results more directly.
1121 
1122         Map<Pair<String, String>, Map<String, List<CldrItem>>> catUsagetoRegionItems =
1123                 new TreeMap<>();
1124 
1125         for (CldrItem item : theItems) {
1126             if (!item.getUntransformedPath().contains("unitPref")) {
1127                 continue;
1128             }
1129             CldrItem[] items = item.split();
1130             if (items == null) {
1131                 throw new IllegalArgumentException("expected unit pref to split: " + item);
1132             }
1133             for (final CldrItem subItem : items) {
1134                 // step 1: make sure the category/usage is there
1135                 final XPathParts xpp = XPathParts.getFrozenInstance(subItem.getPath());
1136                 final String category = xpp.findFirstAttributeValue("category");
1137                 final String usage = xpp.findFirstAttributeValue("usage");
1138                 final String region =
1139                         xpp.findFirstAttributeValue("regions"); // actually one region (split)
1140                 Pair<String, String> key = Pair.of(category, usage);
1141                 Map<String, List<CldrItem>> regionMap =
1142                         catUsagetoRegionItems.computeIfAbsent(key, ignored -> new TreeMap<>());
1143                 List<CldrItem> perRegion =
1144                         regionMap.computeIfAbsent(region, ignored -> new ArrayList<>());
1145                 perRegion.add(subItem);
1146             }
1147         }
1148 
1149         // OK, now start outputting
1150         // Traverse categories/usage/regions
1151         // unitPreferenceData is already open {
1152         catUsagetoRegionItems.keySet().stream()
1153                 .map(p -> p.getFirst())
1154                 .distinct() // for each category
1155                 .forEach(
1156                         category -> {
1157                             JsonObject oo = new JsonObject();
1158                             o.getAsJsonObject().add(category, oo);
1159 
1160                             catUsagetoRegionItems.entrySet().stream()
1161                                     .filter(p -> p.getKey().getFirst().equals(category))
1162                                     .forEach(
1163                                             ent -> {
1164                                                 final String usage = ent.getKey().getSecond();
1165                                                 JsonObject ooo = new JsonObject();
1166                                                 oo.getAsJsonObject().add(usage, ooo);
1167 
1168                                                 ent.getValue()
1169                                                         .forEach(
1170                                                                 (region, list) -> {
1171                                                                     JsonArray array =
1172                                                                             new JsonArray();
1173                                                                     ooo.getAsJsonObject()
1174                                                                             .add(region, array);
1175                                                                     list.forEach(
1176                                                                             item -> {
1177                                                                                 final XPathParts
1178                                                                                         xpp =
1179                                                                                                 XPathParts
1180                                                                                                         .getFrozenInstance(
1181                                                                                                                 item
1182                                                                                                                         .getPath());
1183                                                                                 JsonObject u =
1184                                                                                         new JsonObject();
1185                                                                                 array.add(u);
1186                                                                                 u.addProperty(
1187                                                                                         "unit",
1188                                                                                         item
1189                                                                                                 .getValue());
1190                                                                                 if (xpp
1191                                                                                         .containsAttribute(
1192                                                                                                 "geq")) {
1193                                                                                     u.addProperty(
1194                                                                                             "geq",
1195                                                                                             Double
1196                                                                                                     .parseDouble(
1197                                                                                                             xpp
1198                                                                                                                     .findFirstAttributeValue(
1199                                                                                                                             "geq")));
1200                                                                                 }
1201                                                                             });
1202                                                                 });
1203                                             });
1204                         });
1205 
1206         // Computer, switch to 'automatic' navigation
1207         // We'll let closeNodes take over.
1208         nodesForLastItem.add(unitPrefNode); // unitPreferenceData }
1209     }
1210 
1211     /**
1212      * Creates the packaging files ( i.e. package.json ) for a particular package
1213      *
1214      * @param packageName The name of the installable package
1215      */
writePackagingFiles(String outputDir, String packageName)1216     public void writePackagingFiles(String outputDir, String packageName) throws IOException {
1217         File dir = new File(outputDir.toString());
1218         if (!dir.exists()) {
1219             dir.mkdirs();
1220         }
1221         writePackageJson(outputDir, packageName);
1222         writeBowerJson(outputDir, packageName);
1223         writeReadme(outputDir, packageName);
1224     }
1225 
1226     /** Write the ## License section */
writeCopyrightSection(PrintWriter out)1227     public void writeCopyrightSection(PrintWriter out) {
1228         out.println(
1229                 CldrUtility.getCopyrightMarkdown()
1230                         + "\n"
1231                         + "A copy of the license is included as [LICENSE](./LICENSE).");
1232     }
1233 
1234     /**
1235      * Write the readme fragment from cldr-json-readme.md plus the copyright
1236      *
1237      * @param outf
1238      * @throws IOException
1239      */
writeReadmeSection(PrintWriter outf)1240     private void writeReadmeSection(PrintWriter outf) throws IOException {
1241         FileCopier.copy(CldrUtility.getUTF8Data("cldr-json-readme.md"), outf);
1242         outf.println();
1243         writeCopyrightSection(outf);
1244     }
1245 
writeReadme(String outputDir, String packageName)1246     public void writeReadme(String outputDir, String packageName) throws IOException {
1247         final String basePackageName = getBasePackageName(packageName);
1248         try (PrintWriter outf =
1249                 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "README.md"); ) {
1250             outf.println("# " + packageName);
1251             outf.println();
1252             outf.println(configFileReader.getPackageDescriptions().get(basePackageName));
1253             outf.println();
1254             if (packageName.endsWith(FULL_TIER_SUFFIX)) {
1255                 outf.println("This package contains all locales.");
1256                 outf.println();
1257             } else if (packageName.endsWith(MODERN_TIER_SUFFIX)) {
1258                 outf.println(
1259                         "**Deprecated** This package contains only the set of locales listed as modern coverage. Use `"
1260                                 + CLDR_PKG_PREFIX
1261                                 + basePackageName
1262                                 + FULL_TIER_SUFFIX
1263                                 + "` and locale coverage data instead. The -modern packages are scheduled to be removed in v46, see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465).");
1264                 outf.println();
1265             }
1266             outf.println();
1267             outf.println(getNpmBadge(packageName));
1268             outf.println();
1269             writeReadmeSection(outf);
1270         }
1271         try (PrintWriter outf =
1272                 FileUtilities.openUTF8Writer(
1273                         outputDir + "/" + packageName, CldrUtility.LICENSE); ) {
1274             if (licenseFile.isPresent()) {
1275                 try (BufferedReader br = FileUtilities.openUTF8Reader("", licenseFile.get()); ) {
1276                     FileCopier.copy(br, outf);
1277                 }
1278             } else {
1279                 FileCopier.copy(CldrUtility.getUTF8Data(CldrUtility.LICENSE), outf);
1280             }
1281         }
1282     }
1283 
getBasePackageName(final String packageName)1284     String getBasePackageName(final String packageName) {
1285         String basePackageName = packageName;
1286         if (basePackageName.startsWith(CLDR_PKG_PREFIX)) {
1287             basePackageName = basePackageName.substring(CLDR_PKG_PREFIX.length());
1288         }
1289         if (basePackageName.endsWith(FULL_TIER_SUFFIX)) {
1290             basePackageName =
1291                     basePackageName.substring(
1292                             0, basePackageName.length() - FULL_TIER_SUFFIX.length());
1293         } else if (basePackageName.endsWith(MODERN_TIER_SUFFIX)) {
1294             basePackageName =
1295                     basePackageName.substring(
1296                             0, basePackageName.length() - MODERN_TIER_SUFFIX.length());
1297         }
1298         return basePackageName;
1299     }
1300 
writeBasicInfo(JsonObject obj, String packageName, boolean isNPM)1301     public void writeBasicInfo(JsonObject obj, String packageName, boolean isNPM) {
1302         obj.addProperty("name", packageName);
1303         obj.addProperty("version", pkgVersion);
1304 
1305         String[] packageNameParts = packageName.split("-");
1306         String dependency = dependencies.get(packageNameParts[1]);
1307         if (dependency != null) {
1308             String[] dependentPackageNames = new String[1];
1309             String tier = packageNameParts[packageNameParts.length - 1];
1310             if (dependency.equals("core") || dependency.equals("bcp47")) {
1311                 dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency;
1312             } else {
1313                 dependentPackageNames[0] = CLDR_PKG_PREFIX + dependency + "-" + tier;
1314             }
1315 
1316             JsonObject dependencies = new JsonObject();
1317             for (String dependentPackageName : dependentPackageNames) {
1318                 if (dependentPackageName != null) {
1319                     dependencies.addProperty(dependentPackageName, pkgVersion);
1320                 }
1321             }
1322             obj.add(isNPM ? "peerDependencies" : "dependencies", dependencies);
1323         }
1324     }
1325 
1326     /**
1327      * Default for version string
1328      *
1329      * @return
1330      */
getDefaultVersion()1331     private static String getDefaultVersion() {
1332         String versionString = CLDRFile.GEN_VERSION;
1333         while (versionString.split("\\.").length < 3) {
1334             versionString = versionString + ".0";
1335         }
1336         return versionString;
1337     }
1338 
writePackageJson(String outputDir, String packageName)1339     public void writePackageJson(String outputDir, String packageName) throws IOException {
1340         PrintWriter outf =
1341                 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "package.json");
1342         logger.fine(
1343                 PACKAGE_ICON
1344                         + " Creating packaging file => "
1345                         + outputDir
1346                         + File.separator
1347                         + packageName
1348                         + File.separator
1349                         + "package.json");
1350         JsonObject obj = new JsonObject();
1351         writeBasicInfo(obj, packageName, true);
1352 
1353         JsonArray maintainers = new JsonArray();
1354         JsonObject primaryMaintainer = new JsonObject();
1355         JsonObject secondaryMaintainer = new JsonObject();
1356 
1357         final String basePackageName = getBasePackageName(packageName);
1358         String description = configFileReader.getPackageDescriptions().get(basePackageName);
1359         if (packageName.endsWith(MODERN_TIER_SUFFIX)) {
1360             description = description + " (modern only: deprecated)";
1361         }
1362         obj.addProperty("description", description);
1363 
1364         obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE);
1365         obj.addProperty("author", CLDRURLS.UNICODE_CONSORTIUM);
1366 
1367         primaryMaintainer.addProperty("name", "Steven R. Loomis");
1368         primaryMaintainer.addProperty("email", "[email protected]");
1369 
1370         maintainers.add(primaryMaintainer);
1371 
1372         secondaryMaintainer.addProperty("name", "John Emmons");
1373         secondaryMaintainer.addProperty("email", "[email protected]");
1374         secondaryMaintainer.addProperty("url", "https://github.com/JCEmmons");
1375 
1376         maintainers.add(secondaryMaintainer);
1377         obj.add("maintainers", maintainers);
1378 
1379         JsonObject repository = new JsonObject();
1380         repository.addProperty("type", "git");
1381         repository.addProperty("url", "git://github.com/unicode-cldr/cldr-json.git");
1382         obj.add("repository", repository);
1383 
1384         obj.addProperty("license", CLDRURLS.UNICODE_SPDX);
1385         obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL);
1386 
1387         final SupplementalDataInfo sdi = CLDRConfig.getInstance().getSupplementalDataInfo();
1388         obj.addProperty("cldrVersion", sdi.getCldrVersionString());
1389         obj.addProperty("unicodeVersion", sdi.getUnicodeVersionString());
1390 
1391         outf.println(gson.toJson(obj));
1392         outf.close();
1393     }
1394 
writeBowerJson(String outputDir, String packageName)1395     public void writeBowerJson(String outputDir, String packageName) throws IOException {
1396         PrintWriter outf =
1397                 FileUtilities.openUTF8Writer(outputDir + "/" + packageName, "bower.json");
1398         logger.fine(
1399                 PACKAGE_ICON
1400                         + " Creating packaging file => "
1401                         + outputDir
1402                         + File.separator
1403                         + packageName
1404                         + File.separator
1405                         + "bower.json");
1406         JsonObject obj = new JsonObject();
1407         writeBasicInfo(obj, packageName, false);
1408         if (type == RunType.supplemental) {
1409             JsonArray mainPaths = new JsonArray();
1410             mainPaths.add(new JsonPrimitive("availableLocales.json"));
1411             mainPaths.add(new JsonPrimitive("defaultContent.json")); // Handled specially
1412             mainPaths.add(new JsonPrimitive("scriptMetadata.json"));
1413             mainPaths.add(new JsonPrimitive(type.toString() + "/*.json"));
1414             obj.add("main", mainPaths);
1415         } else if (type == RunType.rbnf) {
1416             obj.addProperty("main", type.toString() + "/*.json");
1417         } else {
1418             obj.addProperty("main", type.toString() + "/**/*.json");
1419         }
1420 
1421         JsonArray ignorePaths = new JsonArray();
1422         ignorePaths.add(new JsonPrimitive(".gitattributes"));
1423         ignorePaths.add(new JsonPrimitive("README.md"));
1424         obj.add("ignore", ignorePaths);
1425         obj.addProperty("license", CLDRURLS.UNICODE_SPDX);
1426 
1427         outf.println(gson.toJson(obj));
1428         outf.close();
1429     }
1430 
writeDefaultContent(String outputDir)1431     public void writeDefaultContent(String outputDir) throws IOException {
1432         PrintWriter outf =
1433                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "defaultContent.json");
1434         System.out.println(
1435                 PACKAGE_ICON
1436                         + " Creating packaging file => "
1437                         + outputDir
1438                         + "/cldr-core"
1439                         + File.separator
1440                         + "defaultContent.json");
1441         JsonObject obj = new JsonObject();
1442         obj.add("defaultContent", gson.toJsonTree(skippedDefaultContentLocales));
1443         outf.println(gson.toJson(obj));
1444         outf.close();
1445     }
1446 
writeCoverageLevels(String outputDir)1447     public void writeCoverageLevels(String outputDir) throws IOException {
1448         try (PrintWriter outf =
1449                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "coverageLevels.json"); ) {
1450             final Map<String, String> covlocs = new TreeMap<>();
1451             System.out.println(
1452                     PACKAGE_ICON
1453                             + " Creating packaging file => "
1454                             + outputDir
1455                             + "/cldr-core"
1456                             + File.separator
1457                             + "coverageLevels.json from coverageLevels.txt");
1458             CalculatedCoverageLevels ccl = CalculatedCoverageLevels.getInstance();
1459             for (final Map.Entry<String, org.unicode.cldr.util.Level> e :
1460                     ccl.getLevels().entrySet()) {
1461                 final String uloc = e.getKey();
1462                 final String level = e.getValue().name().toLowerCase();
1463                 final String bcp47loc = unicodeLocaleToString(uloc);
1464                 if (covlocs.put(bcp47loc, level) != null) {
1465                     throw new IllegalArgumentException(
1466                             "coverageLevels.txt: duplicate locale " + bcp47loc);
1467                 }
1468             }
1469             final Map<String, String> effectiveCovlocs = new TreeMap<>();
1470             avl.full.forEach(
1471                     loc -> {
1472                         final String uloc = ULocale.forLanguageTag(loc).toString();
1473                         final Level lev = ccl.getEffectiveCoverageLevel(uloc);
1474                         if (lev != null) {
1475                             effectiveCovlocs.put(loc, lev.name().toLowerCase());
1476                         }
1477                     });
1478             JsonObject obj = new JsonObject();
1479             // exactly what is in CLDR .txt file
1480             obj.add("coverageLevels", gson.toJsonTree(covlocs));
1481 
1482             // resolved, including all available locales
1483             obj.add("effectiveCoverageLevels", gson.toJsonTree(effectiveCovlocs));
1484             outf.println(gson.toJson(obj));
1485         }
1486     }
1487 
writeAvailableLocales(String outputDir)1488     public void writeAvailableLocales(String outputDir) throws IOException {
1489         PrintWriter outf =
1490                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "availableLocales.json");
1491         System.out.println(
1492                 PACKAGE_ICON
1493                         + " Creating packaging file => "
1494                         + outputDir
1495                         + "/cldr-core"
1496                         + File.separator
1497                         + "availableLocales.json");
1498         JsonObject obj = new JsonObject();
1499         obj.add("availableLocales", gson.toJsonTree(avl));
1500         outf.println(gson.toJson(obj));
1501         outf.close();
1502     }
1503 
writeScriptMetadata(String outputDir)1504     public void writeScriptMetadata(String outputDir) throws IOException {
1505         PrintWriter outf =
1506                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "scriptMetadata.json");
1507         System.out.println(
1508                 "Creating script metadata file => "
1509                         + outputDir
1510                         + File.separator
1511                         + "cldr-core"
1512                         + File.separator
1513                         + "scriptMetadata.json");
1514         Map<String, Info> scriptInfo = new TreeMap<>();
1515         for (String script : ScriptMetadata.getScripts()) {
1516             Info i = ScriptMetadata.getInfo(script);
1517             scriptInfo.put(script, i);
1518         }
1519         if (ScriptMetadata.errors.size() > 0) {
1520             System.err.println(Joiner.on("\n\t").join(ScriptMetadata.errors));
1521             // throw new IllegalArgumentException();
1522         }
1523 
1524         JsonObject obj = new JsonObject();
1525         obj.add("scriptMetadata", gson.toJsonTree(scriptInfo));
1526         outf.println(gson.toJson(obj));
1527         outf.close();
1528     }
1529 
writePackageList(String outputDir)1530     public void writePackageList(String outputDir) throws IOException {
1531         PrintWriter outf =
1532                 FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "cldr-packages.json");
1533         System.out.println(
1534                 PACKAGE_ICON
1535                         + " Creating packaging metadata file => "
1536                         + outputDir
1537                         + File.separator
1538                         + "cldr-core"
1539                         + File.separator
1540                         + "cldr-packages.json and PACKAGES.md");
1541         PrintWriter pkgs = FileUtilities.openUTF8Writer(outputDir + "/..", "PACKAGES.md");
1542 
1543         pkgs.println("# CLDR JSON Packages");
1544         pkgs.println();
1545 
1546         LdmlConfigFileReader uberReader = new LdmlConfigFileReader();
1547 
1548         for (RunType r : RunType.values()) {
1549             if (r == RunType.all) continue;
1550             uberReader.read(null, r);
1551         }
1552 
1553         TreeMap<String, String> pkgsToDesc = new TreeMap<>();
1554 
1555         JsonObject obj = new JsonObject();
1556         obj.addProperty("license", CLDRURLS.UNICODE_SPDX);
1557         obj.addProperty("bugs", CLDRURLS.CLDR_NEWTICKET_URL);
1558         obj.addProperty("homepage", CLDRURLS.CLDR_HOMEPAGE);
1559         obj.addProperty("version", pkgVersion);
1560 
1561         JsonArray packages = new JsonArray();
1562         for (Map.Entry<String, String> e : uberReader.getPackageDescriptions().entrySet()) {
1563             final String baseName = e.getKey();
1564 
1565             if (baseName.equals("IGNORE") || baseName.equals("cal")) continue;
1566             if (baseName.equals("core") || baseName.equals("rbnf") || baseName.equals("bcp47")) {
1567                 JsonObject packageEntry = new JsonObject();
1568                 packageEntry.addProperty("description", e.getValue());
1569                 packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName);
1570                 packages.add(packageEntry);
1571                 pkgsToDesc.put(
1572                         packageEntry.get("name").getAsString(),
1573                         packageEntry.get("description").getAsString());
1574             } else {
1575                 {
1576                     JsonObject packageEntry = new JsonObject();
1577                     packageEntry.addProperty("description", e.getValue());
1578                     packageEntry.addProperty("tier", "full");
1579                     packageEntry.addProperty("name", CLDR_PKG_PREFIX + baseName + FULL_TIER_SUFFIX);
1580                     packages.add(packageEntry);
1581                     pkgsToDesc.put(
1582                             packageEntry.get("name").getAsString(),
1583                             packageEntry.get("description").getAsString());
1584                 }
1585                 {
1586                     JsonObject packageEntry = new JsonObject();
1587                     packageEntry.addProperty("description", e.getValue() + " modern (deprecated)");
1588                     packageEntry.addProperty("tier", "modern");
1589                     packageEntry.addProperty(
1590                             "name", CLDR_PKG_PREFIX + baseName + MODERN_TIER_SUFFIX);
1591                     packages.add(packageEntry);
1592                     pkgsToDesc.put(
1593                             packageEntry.get("name").getAsString(),
1594                             packageEntry.get("description").getAsString());
1595                 }
1596             }
1597         }
1598         pkgs.println();
1599         for (Map.Entry<String, String> e : pkgsToDesc.entrySet()) {
1600             pkgs.println("### [" + e.getKey() + "](./cldr-json/" + e.getKey() + "/)");
1601             pkgs.println();
1602             if (e.getKey().contains("-modern")) {
1603                 pkgs.println(
1604                         " - **Note: Deprecated** see [CLDR-16465](https://unicode-org.atlassian.net/browse/CLDR-16465).");
1605             }
1606             pkgs.println(" - " + e.getValue());
1607             pkgs.println(" - " + getNpmBadge(e.getKey()));
1608             pkgs.println();
1609         }
1610         obj.add("packages", packages);
1611         outf.println(gson.toJson(obj));
1612         outf.close();
1613         pkgs.println("## JSON Metadata");
1614         pkgs.println();
1615         pkgs.println(
1616                 "Package metadata is available at [`cldr-core`/cldr-packages.json](./cldr-json/cldr-core/cldr-packages.json)");
1617         pkgs.println();
1618 
1619         writeReadmeSection(pkgs);
1620         pkgs.close();
1621     }
1622 
getNpmBadge(final String packageName)1623     private String getNpmBadge(final String packageName) {
1624         return String.format(
1625                 "[![NPM version](https://img.shields.io/npm/v/%s.svg?style=flat)](https://www.npmjs.org/package/%s)",
1626                 packageName, packageName);
1627     }
1628 
1629     /**
1630      * Process the pending sorting items.
1631      *
1632      * @param out The ArrayList to hold all output lines.
1633      * @param nodesForLastItem All the nodes from last item.
1634      * @param sortingItems The item list that should be sorted before output.
1635      * @throws IOException
1636      * @throws ParseException
1637      */
resolveSortingItems( JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems)1638     private void resolveSortingItems(
1639             JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> sortingItems)
1640             throws IOException, ParseException {
1641         ArrayList<CldrItem> arrayItems = new ArrayList<>();
1642         String lastLeadingArrayItemPath = null;
1643 
1644         if (!sortingItems.isEmpty()) {
1645             Collections.sort(sortingItems);
1646             for (CldrItem item : sortingItems) {
1647                 Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath());
1648                 if (matcher.matches()) {
1649                     String leadingArrayItemPath = matcher.group(1);
1650                     if (lastLeadingArrayItemPath != null
1651                             && !lastLeadingArrayItemPath.equals(leadingArrayItemPath)) {
1652                         resolveArrayItems(out, nodesForLastItem, arrayItems);
1653                     }
1654                     lastLeadingArrayItemPath = leadingArrayItemPath;
1655                     arrayItems.add(item);
1656                 } else {
1657                     outputCldrItem(out, nodesForLastItem, item);
1658                 }
1659             }
1660             sortingItems.clear();
1661             resolveArrayItems(out, nodesForLastItem, arrayItems);
1662         }
1663     }
1664 
1665     /**
1666      * Process the pending array items.
1667      *
1668      * @param out The ArrayList to hold all output lines.
1669      * @param nodesForLastItem All the nodes from last item.
1670      * @param arrayItems The item list that should be output as array.
1671      * @throws IOException
1672      * @throws ParseException
1673      */
resolveArrayItems( JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems)1674     private void resolveArrayItems(
1675             JsonObject out, ArrayList<CldrNode> nodesForLastItem, ArrayList<CldrItem> arrayItems)
1676             throws IOException, ParseException {
1677         if (!arrayItems.isEmpty()) {
1678             CldrItem firstItem = arrayItems.get(0);
1679             if (firstItem.needsSort()) {
1680                 Collections.sort(arrayItems);
1681                 firstItem = arrayItems.get(0);
1682             }
1683 
1684             int arrayLevel = getArrayIndentLevel(firstItem); // only used for trim
1685 
1686             JsonArray array = outputStartArray(out, nodesForLastItem, firstItem, arrayLevel);
1687 
1688             // Previous statement closed for first element, trim nodesForLastItem
1689             // so that it will not happen again inside.
1690             int len = nodesForLastItem.size();
1691             while (len > arrayLevel) {
1692                 nodesForLastItem.remove(len - 1);
1693                 len--;
1694             }
1695             for (CldrItem insideItem : arrayItems) {
1696                 outputArrayItem(array, insideItem, nodesForLastItem, arrayLevel);
1697             }
1698             arrayItems.clear();
1699 
1700             int lastLevel = nodesForLastItem.size() - 1;
1701             // closeNodes(out, lastLevel, arrayLevel);
1702             // out.endArray();
1703             for (int i = arrayLevel - 1; i < lastLevel; i++) {
1704                 nodesForLastItem.remove(i);
1705             }
1706         }
1707     }
1708 
1709     /**
1710      * Find the indent level on which array should be inserted.
1711      *
1712      * @param item The CldrItem being examined.
1713      * @return The array indent level.
1714      * @throws ParseException
1715      */
getArrayIndentLevel(CldrItem item)1716     private int getArrayIndentLevel(CldrItem item) throws ParseException {
1717         Matcher matcher = LdmlConvertRules.ARRAY_ITEM_PATTERN.matcher(item.getPath());
1718         if (!matcher.matches()) {
1719             System.out.println("No match found for " + item.getPath() + ", this shouldn't happen.");
1720             return 0;
1721         }
1722 
1723         String leadingPath = matcher.group(1);
1724         CldrItem fakeItem = new CldrItem(leadingPath, leadingPath, leadingPath, leadingPath, "");
1725         return fakeItem.getNodesInPath().size() - 1;
1726     }
1727 
1728     /**
1729      * Write the start of an array.
1730      *
1731      * @param out The root object
1732      * @param nodesForLastItem Nodes in path for last CldrItem.
1733      * @param item The CldrItem to be processed.
1734      * @param arrayLevel The level on which array is laid out.
1735      * @throws IOException
1736      * @throws ParseException
1737      */
outputStartArray( JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)1738     private JsonArray outputStartArray(
1739             JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item, int arrayLevel)
1740             throws IOException, ParseException {
1741 
1742         ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
1743 
1744         JsonElement o = out;
1745 
1746         // final CldrNode last = nodesInPath.get(nodesInPath.size()-1);
1747 
1748         // Output nodes up to parent of 'arrayLevel'
1749         for (int i = 1; i < arrayLevel - 1; i++) {
1750             final CldrNode node = nodesInPath.get(i);
1751             o = startNonleafNode(o, node);
1752         }
1753 
1754         // at arrayLevel, we have a named Array.
1755         // Get the name of the parent of the array
1756         String objName = nodesInPath.get(arrayLevel - 1).getNodeKeyName();
1757         JsonArray array = new JsonArray();
1758         o.getAsJsonObject().add(objName, array);
1759 
1760         return array;
1761     }
1762 
1763     /**
1764      * Write a CLDR item to file.
1765      *
1766      * <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be
1767      * dropped.
1768      *
1769      * @param out The ArrayList to hold all output lines.
1770      * @param nodesForLastItem
1771      * @param item The CldrItem to be processed.
1772      * @throws IOException
1773      * @throws ParseException
1774      */
outputCldrItem(JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item)1775     private void outputCldrItem(JsonObject out, ArrayList<CldrNode> nodesForLastItem, CldrItem item)
1776             throws IOException, ParseException {
1777         // alias has been resolved, no need to keep it.
1778         if (item.isAliasItem()) {
1779             return;
1780         }
1781 
1782         ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
1783         int arraySize = nodesInPath.size();
1784 
1785         int i = 0;
1786         if (i == nodesInPath.size() && type != RunType.rbnf) {
1787             System.err.println(
1788                     "This nodes and last nodes has identical path. ("
1789                             + item.getPath()
1790                             + ") Some distinguishing attributes wrongly removed?");
1791             return;
1792         }
1793 
1794         // close previous nodes
1795         // closeNodes(out, nodesForLastItem.size() - 2, i);
1796         JsonElement o = out;
1797         for (; i < nodesInPath.size() - 1; ++i) {
1798             o = startNonleafNode(o, nodesInPath.get(i));
1799         }
1800 
1801         writeLeafNode(o, nodesInPath.get(i), item.getValue());
1802         nodesForLastItem.clear();
1803         nodesForLastItem.addAll(nodesInPath);
1804     }
1805 
1806     /**
1807      * Start a non-leaf node, adding it if not there.
1808      *
1809      * @param out The input JsonObject
1810      * @param node The node being written.
1811      * @throws IOException
1812      */
startNonleafNode(JsonElement out, final CldrNode node)1813     private JsonElement startNonleafNode(JsonElement out, final CldrNode node) throws IOException {
1814         String objName = node.getNodeKeyName();
1815         // Some node should be skipped as indicated by objName being null.
1816         logger.finest(() -> "objName= " + objName + " for path " + node.getUntransformedPath());
1817         if (objName == null
1818                 || objName.equals("cldr")
1819                 || objName.equals("ldmlBCP47")) { // Skip root 'cldr' node
1820             return out;
1821         }
1822 
1823         Map<String, String> attrAsValueMap = node.getAttrAsValueMap();
1824 
1825         String name;
1826 
1827         if (type == RunType.annotations || type == RunType.annotationsDerived) {
1828             if (objName.startsWith("U+")) {
1829                 // parse U+22 -> "   etc
1830                 name = (com.ibm.icu.text.UTF16.valueOf(Integer.parseInt(objName.substring(2), 16)));
1831             } else {
1832                 name = (objName);
1833             }
1834         } else {
1835             name = (objName);
1836         }
1837 
1838         JsonElement o = out.getAsJsonObject().get(name);
1839 
1840         if (o == null) {
1841             o = new JsonObject();
1842             out.getAsJsonObject().add(name, o);
1843         }
1844 
1845         for (final String key : attrAsValueMap.keySet()) {
1846             logger.finest(() -> "Non-Leaf Node: " + node.getUntransformedPath() + " ." + key);
1847             String rawAttrValue = attrAsValueMap.get(key);
1848             String value = escapeValue(rawAttrValue);
1849             // attribute is prefixed with "_" when being used as key.
1850             String attrAsKey = "_" + key;
1851             if (LdmlConvertRules.attrIsBooleanOmitFalse(
1852                     node.getUntransformedPath(), node.getName(), node.getParent(), key)) {
1853                 final Boolean v = Boolean.parseBoolean(rawAttrValue);
1854                 if (v) {
1855                     o.getAsJsonObject().addProperty(attrAsKey, v);
1856                 } // else, omit
1857             } else {
1858                 // hack for localeRules
1859                 if (attrAsKey.equals("_localeRules")) {
1860                     // find the _localeRules object, add if it didn't exist
1861                     JsonElement localeRules = out.getAsJsonObject().get(attrAsKey);
1862                     if (localeRules == null) {
1863                         localeRules = new JsonObject();
1864                         out.getAsJsonObject().add(attrAsKey, localeRules);
1865                     }
1866                     // find the sibling object, add if it did't exist ( this will be parentLocale or
1867                     // collations etc.)
1868                     JsonElement sibling = localeRules.getAsJsonObject().get(name);
1869                     if (sibling == null) {
1870                         sibling = new JsonObject();
1871                         localeRules.getAsJsonObject().add(name, sibling);
1872                     }
1873                     // get the 'parent' attribute, which wil be the value
1874                     final String parent =
1875                             XPathParts.getFrozenInstance(node.getUntransformedPath())
1876                                     .getAttributeValue(-1, "parent");
1877                     // finally, we add something like "nonLikelyScript: und"
1878                     sibling.getAsJsonObject().addProperty(value, parent);
1879                 } else {
1880                     o.getAsJsonObject().addProperty(attrAsKey, value);
1881                 }
1882             }
1883         }
1884         return o;
1885     }
1886 
1887     /**
1888      * Write a CLDR item to file.
1889      *
1890      * <p>"usesMetazone" will be checked to see if it is current. Those non-current item will be
1891      * dropped.
1892      *
1893      * @param out The ArrayList to hold all output lines.
1894      * @param item The CldrItem to be processed.
1895      * @param nodesForLastItem Nodes in path for last item.
1896      * @param arrayLevel The indentation level in which array exists.
1897      * @throws IOException
1898      * @throws ParseException
1899      */
outputArrayItem( JsonArray out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel)1900     private void outputArrayItem(
1901             JsonArray out, CldrItem item, ArrayList<CldrNode> nodesForLastItem, int arrayLevel)
1902             throws IOException, ParseException {
1903 
1904         // This method is more complicated that outputCldrItem because it needs to
1905         // handle 3 different cases.
1906         // 1. When difference is found below array item, this item will be of the
1907         // same array item. Inside the array item, it is about the same as
1908         // outputCldrItem, just with one more level of indentation because of
1909         // the array.
1910         // 2. The array item is the leaf item with no attribute, simplify it as
1911         // an object with one name/value pair.
1912         // 3. The array item is the leaf item with attribute, an embedded object
1913         // will be created inside the array item object.
1914 
1915         ArrayList<CldrNode> nodesInPath = item.getNodesInPath();
1916         String value = escapeValue(item.getValue());
1917         int nodesNum = nodesInPath.size();
1918 
1919         // case 1
1920         // int diff = findFirstDiffNodeIndex(nodesForLastItem, nodesInPath);
1921         CldrNode cldrNode = nodesInPath.get(nodesNum - 1);
1922 
1923         // if (diff > arrayLevel) {
1924         //     // close previous nodes
1925         //     closeNodes(out, nodesForLastItem.size() - 1, diff + 1);
1926 
1927         //     for (int i = diff; i < nodesNum - 1; i++) {
1928         //         startNonleafNode(out, nodesInPath.get(i), i + 1);
1929         //     }
1930         //     writeLeafNode(out, cldrNode, value, nodesNum);
1931         //     return;
1932         // }
1933 
1934         if (arrayLevel == nodesNum - 1) {
1935             // case 2
1936             // close previous nodes
1937             // if (nodesForLastItem.size() - 1 - arrayLevel > 0) {
1938             //     closeNodes(out, nodesForLastItem.size() - 1, arrayLevel);
1939             // }
1940 
1941             String objName = cldrNode.getNodeKeyName();
1942             int pos = objName.indexOf('-');
1943             if (pos > 0) {
1944                 objName = objName.substring(0, pos);
1945             }
1946 
1947             Map<String, String> attrAsValueMap = cldrNode.getAttrAsValueMap();
1948 
1949             if (attrAsValueMap.isEmpty()) {
1950                 JsonObject o = new JsonObject();
1951                 out.add(o);
1952                 o.addProperty(objName, value);
1953             } else if (objName.equals("rbnfrule")) {
1954                 writeRbnfLeafNode(out, item, attrAsValueMap);
1955             } else {
1956                 JsonObject o = new JsonObject();
1957                 writeLeafNode(
1958                         o,
1959                         objName,
1960                         attrAsValueMap,
1961                         value,
1962                         cldrNode.getName(),
1963                         cldrNode.getParent(),
1964                         cldrNode);
1965                 out.add(o);
1966             }
1967             // the last node is closed, remove it.
1968             nodesInPath.remove(nodesNum - 1);
1969         } else {
1970             // case 3
1971             // close previous nodes
1972             // if (nodesForLastItem.size() - 1 - (arrayLevel) > 0) {
1973             //     closeNodes(out, nodesForLastItem.size() - 1, arrayLevel);
1974             // }
1975 
1976             JsonObject o = new JsonObject();
1977             out.add(o);
1978 
1979             CldrNode node = nodesInPath.get(arrayLevel);
1980             String objName = node.getNodeKeyName();
1981             int pos = objName.indexOf('-');
1982             if (pos > 0) {
1983                 objName = objName.substring(0, pos);
1984             }
1985             Map<String, String> attrAsValueMap = node.getAttrAsValueMap();
1986             JsonObject oo = new JsonObject();
1987             o.add(objName, oo);
1988             for (String key : attrAsValueMap.keySet()) {
1989                 // attribute is prefixed with "_" when being used as key.
1990                 oo.addProperty("_" + key, escapeValue(attrAsValueMap.get(key)));
1991             }
1992 
1993             JsonElement o2 = out;
1994             System.err.println("PROBLEM at " + cldrNode.getUntransformedPath());
1995             // TODO ?!!
1996             for (int i = arrayLevel + 1; i < nodesInPath.size() - 1; i++) {
1997                 o2 = startNonleafNode(o2, nodesInPath.get(i));
1998             }
1999             writeLeafNode(o2, cldrNode, value);
2000         }
2001 
2002         nodesForLastItem.clear();
2003         nodesForLastItem.addAll(nodesInPath);
2004     }
2005 
writeRbnfLeafNode( JsonElement out, CldrItem item, Map<String, String> attrAsValueMap)2006     private void writeRbnfLeafNode(
2007             JsonElement out, CldrItem item, Map<String, String> attrAsValueMap) throws IOException {
2008         if (attrAsValueMap.size() != 1) {
2009             throw new IllegalArgumentException(
2010                     "Error, attributes seem wrong for RBNF " + item.getUntransformedPath());
2011         }
2012         Entry<String, String> entry = attrAsValueMap.entrySet().iterator().next();
2013         JsonArray arr = new JsonArray();
2014         arr.add(entry.getKey());
2015         arr.add(entry.getValue());
2016         out.getAsJsonArray().add(arr);
2017     }
2018 
progressPrefix( AtomicInteger readCount, int totalCount, String filename, String section)2019     private String progressPrefix(
2020             AtomicInteger readCount, int totalCount, String filename, String section) {
2021         return progressPrefix(readCount.get(), totalCount, filename, section);
2022     }
2023 
progressPrefix(int readCount, int totalCount, String filename, String section)2024     private String progressPrefix(int readCount, int totalCount, String filename, String section) {
2025         return progressPrefix(readCount, totalCount) + filename + "\t" + section + "\t";
2026     }
2027 
progressPrefix(AtomicInteger readCount, int totalCount)2028     private final String progressPrefix(AtomicInteger readCount, int totalCount) {
2029         return progressPrefix(readCount.get(), totalCount);
2030     }
2031 
2032     final LocalizedNumberFormatter percentFormatter =
2033             NumberFormatter.withLocale(Locale.ENGLISH)
2034                     .unit(NoUnit.PERCENT)
2035                     .integerWidth(IntegerWidth.zeroFillTo(3))
2036                     .precision(Precision.integer());
2037 
progressPrefix(int readCount, int totalCount)2038     private final String progressPrefix(int readCount, int totalCount) {
2039         double asPercent = ((double) readCount / (double) totalCount) * 100.0;
2040         return String.format(
2041                 SECTION_ICON + " %s (step %d/%d)\t[%s]:\t",
2042                 type,
2043                 type.ordinal(),
2044                 RunType.values().length
2045                         - 1, // which 'type' are we on? (all=0, minus one to get the count right)
2046                 percentFormatter.format(asPercent));
2047     }
2048 
2049     /**
2050      * Process files in a directory of CLDR file tree.
2051      *
2052      * @param dirName The directory in which xml file will be transformed.
2053      * @param minimalDraftStatus The minimumDraftStatus that will be accepted.
2054      * @throws IOException
2055      * @throws ParseException
2056      */
processDirectory(String dirName, DraftStatus minimalDraftStatus)2057     public void processDirectory(String dirName, DraftStatus minimalDraftStatus)
2058             throws IOException, ParseException {
2059         SupplementalDataInfo sdi = SupplementalDataInfo.getInstance(cldrCommonDir + "supplemental");
2060         Factory cldrFactory = Factory.make(cldrCommonDir + dirName + "/", ".*");
2061         Set<String> files =
2062                 cldrFactory
2063                         .getAvailable()
2064                         // filter these out early so our work count is correct
2065                         .stream()
2066                         .filter(
2067                                 filename ->
2068                                         filename.matches(match)
2069                                                 && !LdmlConvertRules.IGNORE_FILE_SET.contains(
2070                                                         filename))
2071                         .collect(Collectors.toSet());
2072         final int total = files.size();
2073         AtomicInteger readCount = new AtomicInteger(0);
2074         Map<String, Throwable> errs = new TreeMap<>();
2075 
2076         // This takes a long time (minutes, in 2020), so run it in parallel forkJoinPool threads.
2077         // The result of this pipeline is an array of toString()-able filenames of XML files which
2078         // produced no JSON output, just as a warning.
2079         System.out.println(
2080                 progressPrefix(0, total)
2081                         + " "
2082                         + MessageFormat.format(
2083                                 GEAR_ICON
2084                                         + " Beginning parallel process of {0, plural, one {# file} other {# files}}",
2085                                 total));
2086         Object noOutputFiles[] =
2087                 files.parallelStream()
2088                         .unordered()
2089                         .map(
2090                                 filename -> {
2091                                     String pathPrefix;
2092                                     CLDRFile file =
2093                                             cldrFactory.make(
2094                                                     filename,
2095                                                     resolve && type == RunType.main,
2096                                                     minimalDraftStatus);
2097                                     // Print 'reading' after the make, to stagger the output a
2098                                     // little bit.
2099                                     // Otherwise, the printout happens before any work happens, and
2100                                     // is easily out of order.
2101                                     readCount.incrementAndGet();
2102                                     logger.fine(
2103                                             () ->
2104                                                     "<"
2105                                                             + progressPrefix(
2106                                                                     readCount, total, dirName,
2107                                                                     filename)
2108                                                             + "\r");
2109 
2110                                     if (type == RunType.main) {
2111                                         pathPrefix =
2112                                                 "/cldr/"
2113                                                         + dirName
2114                                                         + "/"
2115                                                         + unicodeLocaleToString(filename)
2116                                                         + "/";
2117                                     } else {
2118                                         pathPrefix = "/cldr/" + dirName + "/";
2119                                     }
2120                                     int totalForThisFile = 0;
2121                                     try {
2122                                         totalForThisFile =
2123                                                 convertCldrItems(
2124                                                         readCount,
2125                                                         total,
2126                                                         dirName,
2127                                                         filename,
2128                                                         pathPrefix,
2129                                                         mapPathsToSections(
2130                                                                 readCount,
2131                                                                 total,
2132                                                                 file,
2133                                                                 pathPrefix,
2134                                                                 sdi));
2135                                     } catch (IOException | ParseException t) {
2136                                         t.printStackTrace();
2137                                         System.err.println(
2138                                                 "!"
2139                                                         + progressPrefix(readCount, total)
2140                                                         + filename
2141                                                         + " - err - "
2142                                                         + t);
2143                                         errs.put(filename, t);
2144                                     } finally {
2145                                         logger.fine(
2146                                                 () ->
2147                                                         "."
2148                                                                 + progressPrefix(readCount, total)
2149                                                                 + "Completing "
2150                                                                 + dirName
2151                                                                 + "/"
2152                                                                 + filename);
2153                                     }
2154                                     return new Pair<>(dirName + "/" + filename, totalForThisFile);
2155                                 })
2156                         .filter(p -> p.getSecond() == 0) // filter out only files which produced no
2157                         // output
2158                         .map(p -> p.getFirst())
2159                         .toArray();
2160         System.out.println(
2161                 progressPrefix(total, total)
2162                         + " "
2163                         + DONE_ICON
2164                         + MessageFormat.format(
2165                                 "Completed parallel process of {0, plural, one {# file} other {# files}}",
2166                                 total));
2167         if (noOutputFiles.length > 0) {
2168             System.err.println(
2169                     WARN_ICON
2170                             + MessageFormat.format(
2171                                     " Warning: {0, plural, one {# file} other {# files}} did not produce any output (check JSON config):",
2172                                     noOutputFiles.length));
2173             for (final Object f : noOutputFiles) {
2174                 final String loc = f.toString();
2175                 final String uloc = unicodeLocaleToString(f.toString());
2176                 if (skipBcp47LocalesWithSubtags
2177                         && type.locales()
2178                         && HAS_SUBTAG.matcher(uloc).matches()) {
2179                     System.err.println(
2180                             "\t- " + loc + " ❎ (Skipped due to '-T true': " + uloc + ")");
2181                 } else {
2182                     System.err.println("\t- " + loc);
2183                 }
2184             }
2185         }
2186 
2187         if (!errs.isEmpty()) {
2188             System.err.println("Errors in these files:");
2189             for (Map.Entry<String, Throwable> e : errs.entrySet()) {
2190                 System.err.println(e.getKey() + " - " + e.getValue());
2191             }
2192             // rethrow
2193             for (Map.Entry<String, Throwable> e : errs.entrySet()) {
2194                 if (e.getValue() instanceof IOException) {
2195                     throw (IOException) e.getValue(); // throw the first one
2196                 } else if (e.getValue() instanceof ParseException) {
2197                     throw (ParseException) e.getValue(); // throw the first one
2198                 } else {
2199                     throw new RuntimeException("Other exception thrown: " + e.getValue());
2200                 }
2201                 /* NOTREACHED */
2202             }
2203         }
2204 
2205         if (writePackages) {
2206             for (String currentPackage : packages) {
2207                 writePackagingFiles(outputDir, currentPackage);
2208             }
2209             if (type == RunType.main) {
2210                 writeDefaultContent(outputDir);
2211                 writeAvailableLocales(outputDir);
2212                 writeCoverageLevels(outputDir);
2213             } else if (type == RunType.supplemental) {
2214                 writeScriptMetadata(outputDir);
2215                 if (Boolean.parseBoolean(options.get("packagelist").getValue())) {
2216                     writePackageList(outputDir);
2217                 }
2218             }
2219         }
2220     }
2221 
2222     /** Replacement pattern for escaping. */
2223     private static final Pattern escapePattern = PatternCache.get("\\\\(?!u)");
2224 
2225     /**
2226      * Escape \ in value string. \ should be replaced by \\, except in case of \u1234 In following
2227      * code, \\\\ represent one \, because java compiler and regular expression compiler each do one
2228      * round of escape.
2229      *
2230      * @param value Input string.
2231      * @return escaped string.
2232      */
escapeValue(String value)2233     private String escapeValue(String value) {
2234         Matcher match = escapePattern.matcher(value);
2235         String ret = match.replaceAll("\\\\");
2236         return ret.replace("\n", " ").replace("\t", " ");
2237     }
2238 
2239     /**
2240      * Write the value to output.
2241      *
2242      * @param out The ArrayList to hold all output lines.
2243      * @param node The CldrNode being written.
2244      * @param value The value part for this element.
2245      * @param level Indent level.
2246      * @throws IOException
2247      */
writeLeafNode(JsonElement out, CldrNode node, String value)2248     private void writeLeafNode(JsonElement out, CldrNode node, String value) throws IOException {
2249 
2250         String objName = node.getNodeKeyName();
2251         Map<String, String> attrAsValueMaps = node.getAttrAsValueMap();
2252         writeLeafNode(out, objName, attrAsValueMaps, value, node.getName(), node.getParent(), node);
2253     }
2254 
2255     /**
2256      * Write the value to output.
2257      *
2258      * @param out The ArrayList to hold all output lines.
2259      * @param objName The node's node.
2260      * @param attrAsValueMap Those attributes that will be treated as values.
2261      * @param value The value part for this element.
2262      * @param level Indent level.
2263      * @param nodeName the original nodeName (not distinguished)
2264      * @throws IOException
2265      */
writeLeafNode( JsonElement out, String objName, Map<String, String> attrAsValueMap, String value, final String nodeName, String parent, CldrNode node)2266     private void writeLeafNode(
2267             JsonElement out,
2268             String objName,
2269             Map<String, String> attrAsValueMap,
2270             String value,
2271             final String nodeName,
2272             String parent,
2273             CldrNode node)
2274             throws IOException {
2275         if (objName == null) {
2276             return;
2277         }
2278         value = escapeValue(value);
2279 
2280         final boolean valueIsSpacesepArray =
2281                 LdmlConvertRules.valueIsSpacesepArray(nodeName, parent);
2282         if (attrAsValueMap.isEmpty()) {
2283             // out.name(objName);
2284             if (value.isEmpty()) {
2285                 if (valueIsSpacesepArray) {
2286                     // empty value, output as empty space-sep array: []
2287                     out.getAsJsonObject().add(objName, new JsonArray());
2288                 } else {
2289                     // empty value.
2290                     if (objName.endsWith("SpaceReplacement")) { // foreignSpaceReplacement or
2291                         // nativeSpaceReplacement
2292                         out.getAsJsonObject().addProperty(objName, "");
2293                     } else {
2294                         out.getAsJsonObject().add(objName, new JsonObject());
2295                     }
2296                 }
2297             } else if (type == RunType.annotations || type == RunType.annotationsDerived) {
2298                 JsonArray a = new JsonArray();
2299                 // split this, so "a | b | c" becomes ["a","b","c"]
2300                 for (final String s : Annotations.splitter.split(value.trim())) {
2301                     a.add(s);
2302                 }
2303                 out.getAsJsonObject().add(objName, a);
2304             } else if (valueIsSpacesepArray) {
2305                 outputSpaceSepArray(out, objName, value);
2306             } else {
2307                 // normal value
2308                 out.getAsJsonObject().addProperty(objName, value);
2309             }
2310             return;
2311         }
2312 
2313         // If there is no value, but a attribute being treated as value,
2314         // simplify the output.
2315         if (value.isEmpty() && attrAsValueMap.containsKey(LdmlConvertRules.ANONYMOUS_KEY)) {
2316             String v = attrAsValueMap.get(LdmlConvertRules.ANONYMOUS_KEY);
2317             // out.name(objName);
2318             if (valueIsSpacesepArray) {
2319                 outputSpaceSepArray(out, objName, v);
2320             } else {
2321                 out.getAsJsonObject().addProperty(objName, v);
2322             }
2323             return;
2324         }
2325 
2326         JsonObject o = new JsonObject();
2327         out.getAsJsonObject().add(objName, o);
2328 
2329         if (!value.isEmpty()) {
2330             o.addProperty("_value", value);
2331         }
2332 
2333         for (final String key : attrAsValueMap.keySet()) {
2334             String rawAttrValue = attrAsValueMap.get(key);
2335             String attrValue = escapeValue(rawAttrValue);
2336             // attribute is prefixed with "_" when being used as key.
2337             String attrAsKey = "_" + key;
2338             if (node != null) {
2339                 logger.finest(() -> "Leaf Node: " + node.getUntransformedPath() + " ." + key);
2340             }
2341             if (LdmlConvertRules.ATTRVALUE_AS_ARRAY_SET.contains(key)) {
2342                 String[] strings = attrValue.trim().split("\\s+");
2343                 JsonArray a = new JsonArray();
2344                 o.add(attrAsKey, a);
2345                 for (String s : strings) {
2346                     a.add(s);
2347                 }
2348             } else if (node != null
2349                     && LdmlConvertRules.attrIsBooleanOmitFalse(
2350                             node.getUntransformedPath(), nodeName, parent, key)) {
2351                 final Boolean v = Boolean.parseBoolean(rawAttrValue);
2352                 if (v) {
2353                     o.addProperty(attrAsKey, v);
2354                 } // else: omit falsy value
2355             } else {
2356                 o.addProperty(attrAsKey, attrValue);
2357             }
2358         }
2359     }
2360 
outputSpaceSepArray(JsonElement out, String objName, String v)2361     private void outputSpaceSepArray(JsonElement out, String objName, String v) throws IOException {
2362         JsonArray a = new JsonArray();
2363         out.getAsJsonObject().add(objName, a);
2364         // split this, so "a b c" becomes ["a","b","c"]
2365         for (final String s : v.trim().split(" ")) {
2366             if (!s.isEmpty()) {
2367                 a.add(s);
2368             }
2369         }
2370     }
2371 }
2372