xref: /aosp_15_r20/external/icu/tools/cldr/cldr-to-icu/src/main/java/org/unicode/icu/tool/cldrtoicu/LdmlConverter.java (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2019 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 package org.unicode.icu.tool.cldrtoicu;
4 
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static com.google.common.base.Preconditions.checkNotNull;
7 import static com.google.common.collect.ImmutableList.toImmutableList;
8 import static java.nio.charset.StandardCharsets.UTF_8;
9 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED;
10 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED;
11 import static org.unicode.cldr.api.CldrDataType.BCP47;
12 import static org.unicode.cldr.api.CldrDataType.LDML;
13 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL;
14 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.BRKITR;
15 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.COLL;
16 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.CURR;
17 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LANG;
18 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LOCALES;
19 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RBNF;
20 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.REGION;
21 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT;
22 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE;
23 
24 import java.io.BufferedWriter;
25 import java.io.IOException;
26 import java.io.InputStream;
27 import java.io.InputStreamReader;
28 import java.io.PrintWriter;
29 import java.nio.file.Files;
30 import java.nio.file.Path;
31 import java.util.*;
32 import java.util.function.Predicate;
33 import java.util.stream.Collectors;
34 import java.util.stream.Stream;
35 
36 import org.unicode.cldr.api.CldrData;
37 import org.unicode.cldr.api.CldrDataSupplier;
38 import org.unicode.cldr.api.CldrDataType;
39 import org.unicode.cldr.api.CldrPath;
40 import org.unicode.cldr.api.PathMatcher;
41 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir;
42 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuVersionInfo;
43 import org.unicode.icu.tool.cldrtoicu.localedistance.LocaleDistanceMapper;
44 import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper;
45 import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper;
46 import org.unicode.icu.tool.cldrtoicu.mapper.CollationMapper;
47 import org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapper;
48 import org.unicode.icu.tool.cldrtoicu.mapper.LocaleMapper;
49 import org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapper;
50 import org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapper;
51 import org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapper;
52 import org.unicode.icu.tool.cldrtoicu.mapper.SupplementalMapper;
53 import org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapper;
54 import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer;
55 
56 import com.google.common.base.CharMatcher;
57 import com.google.common.collect.HashMultimap;
58 import com.google.common.collect.ImmutableList;
59 import com.google.common.collect.ImmutableListMultimap;
60 import com.google.common.collect.ImmutableMap;
61 import com.google.common.collect.ImmutableSet;
62 import com.google.common.collect.LinkedListMultimap;
63 import com.google.common.collect.ListMultimap;
64 import com.google.common.collect.Maps;
65 import com.google.common.collect.SetMultimap;
66 import com.google.common.collect.Sets;
67 import com.google.common.io.CharStreams;
68 
69 /**
70  * The main converter tool for CLDR to ICU data. To run this tool, you need to supply a suitable
71  * {@link LdmlConverterConfig} instance. There is a simple {@code main()} method available in this
72  * class which can be invoked passing just the desired output directory and which relies on the
73  * presence of several system properties for the remainder of its parameters:
74  * <ul>
75  *     <li>CLDR_DIR: The root of the CLDR release from which CLDR data is read.
76  *     <li>ICU_DIR: The root of the ICU release from which additional "specials" XML data is read.
77  *     <li>CLDR_DTD_CACHE: A temporary directory with the various DTDs cached (this is a legacy
78  *         requirement from the underlying CLDR libraries and might go away one day).
79  * </ul>
80  */
81 public final class LdmlConverter {
82     // TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath).
83     private static final Predicate<CldrPath> GENDER_LIST_PATHS =
84         supplementalMatcher("gender");
85     private static final Predicate<CldrPath> METAZONE_PATHS =
86         supplementalMatcher("metaZones", "primaryZones");
87     private static final Predicate<CldrPath> METADATA_PATHS =
88         supplementalMatcher("metadata");
89     private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS =
90         supplementalMatcher(
91             "calendarData",
92             "calendarPreferenceData",
93             "codeMappings",
94             "codeMappingsCurrency",
95             "idValidity",
96             "languageData",
97             "languageMatching",
98             "measurementData",
99             "parentLocales",
100             "personNamesDefaults",
101             "subdivisionContainment",
102             "territoryContainment",
103             "territoryInfo",
104             "timeData",
105             "weekData",
106             "weekOfPreference");
107     private static final Predicate<CldrPath> CURRENCY_DATA_PATHS =
108         supplementalMatcher("currencyData");
109     private static final Predicate<CldrPath> UNITS_DATA_PATHS =
110         supplementalMatcher(
111             "convertUnits",
112             "unitConstants",
113             "unitQuantities",
114             "unitPreferenceData",
115             "unitPrefixes");
116     private static final Predicate<CldrPath> GRAMMATICAL_FEATURES_PATHS =
117         supplementalMatcher("grammaticalData");
118     private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS =
119         supplementalMatcher("numberingSystems");
120     private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS =
121         supplementalMatcher("windowsZones");
122 
supplementalMatcher(String... spec)123     private static Predicate<CldrPath> supplementalMatcher(String... spec) {
124         checkArgument(spec.length > 0, "must supply at least one matcher spec");
125         if (spec.length == 1) {
126             return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf;
127         }
128         return
129             Arrays.stream(spec)
130                 .map(s -> PathMatcher.of("//supplementalData/" + s))
131                 .map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf))
132                 .reduce(p -> false, Predicate::or);
133     }
134 
135     private static RbPath RB_PARENT = RbPath.of("%%Parent");
136     // The quotes below are only so we achieve parity with the manually written alias files.
137     // TODO: Remove unnecessary quotes once the migration to this code is complete.
138     private static RbPath RB_ALIAS = RbPath.of("\"%%ALIAS\"");
139     // Special path for adding to empty files which only exist to complete the parent chain.
140     // TODO: Confirm that this has no meaningful effect and unify "empty" file contents.
141     private static RbPath RB_EMPTY_ALIAS = RbPath.of("___");
142 
143     /**
144      * Output types defining specific subsets of the ICU data which can be converted separately.
145      * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to
146      * hide what are essentially implementation specific data splits.
147      */
148     public enum OutputType {
149         LOCALES(LDML),
150         BRKITR(LDML),
151         COLL(LDML),
152         RBNF(LDML),
153         DAY_PERIODS(SUPPLEMENTAL),
154         GENDER_LIST(SUPPLEMENTAL),
155         SUPPLEMENTAL_DATA(SUPPLEMENTAL),
156         UNITS(SUPPLEMENTAL),
157         CURRENCY_DATA(SUPPLEMENTAL),
158         GRAMMATICAL_FEATURES(SUPPLEMENTAL),
159         METADATA(SUPPLEMENTAL),
160         META_ZONES(SUPPLEMENTAL),
161         NUMBERING_SYSTEMS(SUPPLEMENTAL),
162         PLURALS(SUPPLEMENTAL),
163         PLURAL_RANGES(SUPPLEMENTAL),
164         WINDOWS_ZONES(SUPPLEMENTAL),
165         TRANSFORMS(SUPPLEMENTAL),
166         LOCALE_DISTANCE(SUPPLEMENTAL),
167         VERSION(SUPPLEMENTAL),
168         KEY_TYPE_DATA(BCP47);
169 
170         public static final ImmutableSet<OutputType> ALL = ImmutableSet.copyOf(OutputType.values());
171 
172         private final CldrDataType type;
173 
OutputType(CldrDataType type)174         OutputType(CldrDataType type) {
175             this.type = checkNotNull(type);
176         }
177 
getCldrType()178         CldrDataType getCldrType() {
179             return type;
180         }
181     }
182 
183     // Map to convert the rather arbitrarily defined "output types" to the directories into which
184     // the data is written. This is only for "LDML" types since other mappers don't need to split
185     // data into multiple directories.
186     private static final ImmutableListMultimap<OutputType, IcuLocaleDir> TYPE_TO_DIR =
187         ImmutableListMultimap.<OutputType, IcuLocaleDir>builder()
188             .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE)
189             .putAll(OutputType.BRKITR, BRKITR)
190             .putAll(OutputType.COLL, COLL)
191             .putAll(OutputType.RBNF, RBNF)
192             .build();
193 
194     /** Converts CLDR data according to the given configuration. */
convert( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)195     public static void convert(
196         CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
197         new LdmlConverter(src, supplementalData, config).convertAll();
198     }
199 
200     // The supplier for all data to be converted.
201     private final CldrDataSupplier src;
202     // Supplemental data available to mappers if needed.
203     private final SupplementalData supplementalData;
204     // The configuration controlling conversion behaviour.
205     private final LdmlConverterConfig config;
206     // The set of expanded target locale IDs.
207     // TODO: Make available IDs include specials files (or fail if specials are not available).
208     private final ImmutableSet<String> availableIds;
209     // Transformer for locale data.
210     private final PathValueTransformer localeTransformer;
211     // Transformer for supplemental data.
212     private final PathValueTransformer supplementalTransformer;
213     // Header string to go into every ICU data and transliteration rule file (comment prefixes
214     // are not present and must be added by the code writing the file).
215     private final ImmutableList<String> fileHeader;
216 
LdmlConverter( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)217     private LdmlConverter(
218         CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) {
219         this.src = checkNotNull(src);
220         this.supplementalData = checkNotNull(supplementalData);
221         this.config = checkNotNull(config);
222         this.availableIds = ImmutableSet.copyOf(
223             Sets.intersection(supplementalData.getAvailableLocaleIds(), config.getAllLocaleIds()));
224         // Load the remaining path value transformers.
225         this.supplementalTransformer =
226             RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_supplemental.txt"),
227                 IcuFunctions.ALGORITHM_FN,
228                 IcuFunctions.DATE_FN,
229                 IcuFunctions.DAY_NUMBER_FN,
230                 IcuFunctions.EXP_FN,
231                 IcuFunctions.YMD_FN);
232         this.localeTransformer =
233             RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_locale.txt"),
234                 IcuFunctions.CONTEXT_TRANSFORM_INDEX_FN);
235         this.fileHeader = readLinesFromResource("/ldml2icu_header.txt");
236     }
237 
convertAll()238     private void convertAll() {
239         processLdml();
240         processSupplemental();
241         if (config.emitReport()) {
242             System.out.println("Supplemental Data Transformer=" + supplementalTransformer);
243             System.out.println("Locale Data Transformer=" + localeTransformer);
244         }
245     }
246 
readLinesFromResource(String name)247     private static ImmutableList<String> readLinesFromResource(String name) {
248         try (InputStream in = LdmlConverter.class.getResourceAsStream(name)) {
249             return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8)));
250         } catch (IOException e) {
251             throw new RuntimeException("cannot read resource: " + name, e);
252         }
253     }
254 
loadSpecialsData(String localeId)255     private Optional<CldrData> loadSpecialsData(String localeId) {
256         String expected = localeId + ".xml";
257         try (Stream<Path> files = Files.walk(config.getSpecialsDir())) {
258             Set<Path> xmlFiles = files
259                 .filter(Files::isRegularFile)
260                 .filter(f -> f.getFileName().toString().equals(expected))
261                 .collect(Collectors.toSet());
262             return !xmlFiles.isEmpty()
263                 ? Optional.of(
264                 CldrDataSupplier.forCldrFiles(LDML, config.getMinimumDraftStatus(), xmlFiles))
265                 : Optional.empty();
266         } catch (IOException e) {
267             throw new RuntimeException(
268                 "error processing specials directory: " + config.getSpecialsDir(), e);
269         }
270     }
271 
processLdml()272     private void processLdml() {
273         ImmutableList<IcuLocaleDir> splitDirs =
274             config.getOutputTypes().stream()
275                 .filter(t -> t.getCldrType() == LDML)
276                 .flatMap(t -> TYPE_TO_DIR.get(t).stream())
277                 .collect(toImmutableList());
278         if (splitDirs.isEmpty()) {
279             return;
280         }
281 
282         String cldrVersion = config.getVersionInfo().getCldrVersion();
283 
284         Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>();
285         splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion)));
286 
287         SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create();
288         Path baseDir = config.getOutputDir();
289 
290         System.out.println("processing standard ldml files");
291         for (String id : config.getAllLocaleIds()) {
292             // Skip "target" IDs that are aliases (they are handled later).
293             if (!availableIds.contains(id)) {
294                 continue;
295             }
296             // TODO: Remove the following skip when ICU-20997 is fixed
297             if (id.contains("VALENCIA") || id.contains("TARASK")) {
298                 System.out.println("(skipping " + id + " until ICU-20997 is fixed)");
299                 continue;
300             }
301             // Now that former CLDR see locales are in common, there are some language
302             // variants that are not at a high enough coverage level to pick up.
303             // TODO need a better way of handling this.
304              if (id.contains("POLYTON")) {
305                 System.out.println("(skipping " + id + ", insufficient coverage level)");
306                 continue;
307             }
308 
309             IcuData icuData = new IcuData(id, true);
310 
311             Optional<CldrData> specials = loadSpecialsData(id);
312             CldrData unresolved = src.getDataForLocale(id, UNRESOLVED);
313 
314             BreakIteratorMapper.process(icuData, unresolved, specials);
315             CollationMapper.process(icuData, unresolved, specials, cldrVersion);
316             RbnfMapper.process(icuData, unresolved, specials);
317 
318             CldrData resolved = src.getDataForLocale(id, RESOLVED);
319             Optional<String> defaultCalendar = supplementalData.getDefaultCalendar(id);
320             LocaleMapper.process(
321                 icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar);
322 
323             ListMultimap<IcuLocaleDir, RbPath> splitPaths = LinkedListMultimap.create();
324             for (RbPath p : icuData.getPaths()) {
325                 String rootName = getBaseSegmentName(p.getSegment(0));
326                 splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p);
327             }
328 
329             Optional<String> parent = supplementalData.getExplicitParentLocaleOf(id);
330             // We always write base languages (even if empty).
331             boolean isBaseLanguage = !id.contains("_");
332             // Run through all directories (not just the keySet() of the split path map) since we
333             // sometimes write empty files.
334             for (IcuLocaleDir dir : splitDirs) {
335                 Set<String> targetIds = config.getTargetLocaleIds(dir);
336                 if (!targetIds.contains(id)) {
337                     if (!splitPaths.get(dir).isEmpty()) {
338                         System.out.format(
339                             "target IDs for %s does not contain %s, but it has data: %s\n",
340                             dir, id, splitPaths.get(dir));
341                     }
342                     continue;
343                 }
344 
345                 Path outDir = baseDir.resolve(dir.getOutputDir());
346                 IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback());
347 
348                 // The split data can still be empty for this directory, but that's expected (it
349                 // might only be written because it has an explicit parent added below).
350                 splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p)));
351 
352                 // If we add an explicit parent locale, it forces the data to be written. This is
353                 // where we check for forced overrides of the parent relationship (which is a per
354                 // directory thing).
355                 getIcuParent(id, parent, dir).ifPresent(p -> {
356                     splitData.add(RB_PARENT, p);
357                     graphMetadata.get(dir).addParent(id, p);
358                 });
359 
360                 if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) {
361                     if (id.equals("root")) {
362                         splitData.setVersion(cldrVersion);
363                     }
364                     write(splitData, outDir, false);
365                     writtenLocaleIds.put(dir, id);
366                 }
367             }
368         }
369 
370         System.out.println("processing alias ldml files");
371         for (IcuLocaleDir dir : splitDirs) {
372             Path outDir = baseDir.resolve(dir.getOutputDir());
373             Set<String> targetIds = config.getTargetLocaleIds(dir);
374             DependencyGraph depGraph = graphMetadata.get(dir);
375 
376             // TODO: Maybe calculate alias map directly into the dependency graph?
377             Map<String, String> aliasMap = getAliasMap(targetIds, dir);
378             aliasMap.forEach((s, t) -> {
379                 depGraph.addAlias(s, t);
380                 writeAliasFile(s, t, outDir);
381                 // It's only important to record which alias files are written because of forced
382                 // aliases, but since it's harmless otherwise, we just do it unconditionally.
383                 // Normal alias files don't affect the empty file calculation, but forced ones can.
384                 writtenLocaleIds.put(dir, s);
385             });
386 
387             calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values())
388                 .forEach(id -> writeEmptyFile(id, outDir, aliasMap.values()));
389 
390             writeDependencyGraph(outDir, depGraph);
391         }
392     }
393 
394 
395     private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%");
396 
397     // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type
398     // annotations (e.g. "languages:intvector"). We strip these when considering the element name.
getBaseSegmentName(String segment)399     private static String getBaseSegmentName(String segment) {
400         int idx = PATH_MODIFIER.indexIn(segment);
401         return idx == -1 ? segment : segment.substring(0, idx);
402     }
403 
404     /*
405      * There are four reasons for treating a locale ID as an alias.
406      * 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS").
407      * 2: It has no CLDR data but is missing a script subtag.
408      * 3: It is one of the special "phantom" alias which cannot be represented normally
409      *    and must be manually mapped (e.g. legacy locale IDs which don't even parse).
410      * 4: It is a "super special" forced alias, which might replace existing aliases in
411      *    some output directories.
412      */
getAliasMap(Set<String> localeIds, IcuLocaleDir dir)413     private Map<String, String> getAliasMap(Set<String> localeIds, IcuLocaleDir dir) {
414         // Even forced aliases only apply if they are in the set of locale IDs for the directory.
415         Map<String, String> forcedAliases =
416             Maps.filterKeys(config.getForcedAliases(dir), localeIds::contains);
417 
418         Map<String, String> aliasMap = new LinkedHashMap<>();
419         for (String id : localeIds) {
420             if (forcedAliases.containsKey(id)) {
421                 // Forced aliases will be added later and don't need to be processed here. This
422                 // is especially necessary if the ID is not structurally valid (e.g. "no_NO_NY")
423                 // since that cannot be processed by the code below.
424                 continue;
425             }
426             String canonicalId = supplementalData.replaceDeprecatedTags(id);
427             if (!canonicalId.equals(id)) {
428                 // If the canonical form of an ID differs from the requested ID, the this is an
429                 // alias, and just needs to point to the canonical ID.
430                 aliasMap.put(id, canonicalId);
431                 continue;
432             }
433             if (availableIds.contains(id)) {
434                 // If it's canonical and supported, it's not an alias.
435                 continue;
436             }
437             // If the requested locale is not supported, maximize it and alias to that.
438             String maximizedId = supplementalData.maximize(id)
439                 .orElseThrow(() -> new IllegalArgumentException("unsupported locale ID: " + id));
440             // We can't alias to ourselves and we shouldn't be here is the ID was already maximal.
441             checkArgument(!maximizedId.equals(id), "unsupported maximized locale ID: %s", id);
442             aliasMap.put(id, maximizedId);
443         }
444         // Important that we overwrite entries which might already exist here, since we might have
445         // already calculated a "natural" alias for something that we want to force (and we should
446         // replace the existing target, since that affects how we determine empty files later).
447         aliasMap.putAll(forcedAliases);
448         return aliasMap;
449     }
450 
451     /*
452      * Helper to determine the correct parent ID to be written into the ICU data file. The rules
453      * are:
454      * 1: If no forced parent exists (common) write the explicit parent (if that exists)
455      * 2: If a forced parent exists, but the forced value is what you would get by just truncating
456      *    the current locale ID, write nothing (ICU libraries truncate when no parent is set).
457      * 3: Write the forced parent (this is an exceptional case, and may not even occur in data).
458      */
getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir)459     private Optional<String> getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir) {
460         String forcedParentId = config.getForcedParents(dir).get(id);
461         if (forcedParentId == null) {
462             return parent;
463         }
464         return id.contains("_") && forcedParentId.regionMatches(0, id, 0, id.lastIndexOf('_'))
465             ? Optional.empty() : Optional.of(forcedParentId);
466     }
467 
processSupplemental()468     private void processSupplemental() {
469         for (OutputType type : config.getOutputTypes()) {
470             if (type.getCldrType() == LDML) {
471                 continue;
472             }
473             System.out.println("processing supplemental type " + type);
474             switch (type) {
475             case DAY_PERIODS:
476                 write(DayPeriodsMapper.process(src), "misc");
477                 break;
478 
479             case GENDER_LIST:
480                 processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false);
481                 break;
482 
483             case SUPPLEMENTAL_DATA:
484                 processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true);
485                 break;
486 
487             case UNITS:
488                 processSupplemental("units", UNITS_DATA_PATHS, "misc", true);
489                 break;
490 
491             case CURRENCY_DATA:
492                 processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", false);
493                 break;
494 
495             case GRAMMATICAL_FEATURES:
496                 processSupplemental("grammaticalFeatures", GRAMMATICAL_FEATURES_PATHS, "misc", false);
497                 break;
498 
499             case METADATA:
500                 processSupplemental("metadata", METADATA_PATHS, "misc", false);
501                 break;
502 
503             case META_ZONES:
504                 processSupplemental("metaZones", METAZONE_PATHS, "misc", false);
505                 break;
506 
507             case NUMBERING_SYSTEMS:
508                 processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false);
509                 break;
510 
511             case PLURALS:
512                 write(PluralsMapper.process(src), "misc");
513                 break;
514 
515             case PLURAL_RANGES:
516                 write(PluralRangesMapper.process(src), "misc");
517                 break;
518 
519             case LOCALE_DISTANCE:
520                 write(LocaleDistanceMapper.process(src), "misc");
521                 break;
522 
523             case WINDOWS_ZONES:
524                 processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false);
525                 break;
526 
527             case TRANSFORMS:
528                 Path transformDir = createDirectory(config.getOutputDir().resolve("translit"));
529                 write(TransformsMapper.process(src, transformDir, fileHeader), transformDir, false);
530                 break;
531 
532             case VERSION:
533                 writeIcuVersionInfo();
534                 break;
535 
536             case KEY_TYPE_DATA:
537                 Bcp47Mapper.process(src).forEach(d -> write(d, "misc"));
538                 break;
539 
540             default:
541                 throw new AssertionError("Unsupported supplemental type: " + type);
542             }
543         }
544     }
545 
546     private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion");
547 
processSupplemental( String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion)548     private void processSupplemental(
549         String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) {
550         IcuData icuData =
551             SupplementalMapper.process(src, supplementalTransformer, label, paths);
552         // A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the
553         // supplemental data XML files.
554         if (addCldrVersion) {
555             // Not the same path as used by "setVersion()"
556             icuData.add(RB_CLDR_VERSION, config.getVersionInfo().getCldrVersion());
557         }
558         write(icuData, dir);
559     }
560 
writeAliasFile(String srcId, String destId, Path dir)561     private void writeAliasFile(String srcId, String destId, Path dir) {
562         IcuData icuData = new IcuData(srcId, true);
563         icuData.add(RB_ALIAS, destId);
564         // Allow overwrite for aliases since some are "forced" and overwrite existing targets.
565         // TODO: Maybe tighten this up so only forced aliases for existing targets are overwritten.
566         write(icuData, dir, true);
567     }
568 
writeEmptyFile(String id, Path dir, Collection<String> aliasTargets)569     private void writeEmptyFile(String id, Path dir, Collection<String> aliasTargets) {
570         IcuData icuData = new IcuData(id, true);
571         // TODO: Document the reason for this (i.e. why does it matter what goes into empty files?)
572         if (aliasTargets.contains(id)) {
573             icuData.setFileComment("generated alias target");
574             icuData.add(RB_EMPTY_ALIAS, "");
575         } else {
576             // These empty files only exist because the target of an alias has a parent locale
577             // which is itself not in the set of written ICU files. An "indirect alias target".
578             // No need to add data: Just write a resource bundle with an empty top-level table.
579         }
580         write(icuData, dir, false);
581     }
582 
writeIcuVersionInfo()583     private void writeIcuVersionInfo() {
584         IcuVersionInfo versionInfo = config.getVersionInfo();
585         IcuData versionData = new IcuData("icuver", false);
586         versionData.add(RbPath.of("ICUVersion"), versionInfo.getIcuVersion());
587         versionData.add(RbPath.of("DataVersion"), versionInfo.getIcuDataVersion());
588         versionData.add(RbPath.of("CLDRVersion"), versionInfo.getCldrVersion());
589         // Write file via non-helper methods since we need to include a legacy copyright.
590         Path miscDir = config.getOutputDir().resolve("misc");
591         createDirectory(miscDir);
592         ImmutableList<String> versionHeader = ImmutableList.<String>builder()
593             .addAll(fileHeader)
594             .add(
595                 "***************************************************************************",
596                 "*",
597                 "* Copyright (C) 2010-2016 International Business Machines",
598                 "* Corporation and others.  All Rights Reserved.",
599                 "*",
600                 "***************************************************************************")
601             .build();
602         IcuTextWriter.writeToFile(versionData, miscDir, versionHeader, false);
603     }
604 
605     // Commonest case for writing data files in "normal" directories.
write(IcuData icuData, String dir)606     private void write(IcuData icuData, String dir) {
607         write(icuData, config.getOutputDir().resolve(dir), false);
608     }
609 
write(IcuData icuData, Path dir, boolean allowOverwrite)610     private void write(IcuData icuData, Path dir, boolean allowOverwrite) {
611         createDirectory(dir);
612         IcuTextWriter.writeToFile(icuData, dir, fileHeader, allowOverwrite);
613     }
614 
createDirectory(Path dir)615     private Path createDirectory(Path dir) {
616         try {
617             Files.createDirectories(dir);
618         } catch (IOException e) {
619             throw new RuntimeException("cannot create directory: " + dir, e);
620         }
621         return dir;
622     }
623 
writeDependencyGraph(Path dir, DependencyGraph depGraph)624     private void writeDependencyGraph(Path dir, DependencyGraph depGraph) {
625         createDirectory(dir);
626         try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8);
627             PrintWriter out = new PrintWriter(w)) {
628             depGraph.writeJsonTo(out, fileHeader);
629             out.flush();
630         } catch (IOException e) {
631             throw new RuntimeException("cannot write dependency graph file: " + dir, e);
632         }
633     }
634 
635     // The set of IDs to process is:
636     // * any file that was written
637     // * any alias target (not written)
638     //
639     // From which we generate the complete "closure" under the "getParent()" function. This set
640     // contains all file (written or not) which need to exist to complete the locale hierarchy.
641     //
642     // Then we remove all the written files to just leave the ones that need to be generated.
643     // This is a simple and robust approach that handles things like "gaps" in non-aliased
644     // locale IDs, where an intermediate parent is not present.
calculateEmptyFiles( Set<String> writtenIds, Collection<String> aliasTargetIds)645     private ImmutableSet<String> calculateEmptyFiles(
646         Set<String> writtenIds, Collection<String> aliasTargetIds) {
647 
648         Set<String> seedIds = new HashSet<>(writtenIds);
649         seedIds.addAll(aliasTargetIds);
650         // Be nice and sort the output (makes easier debugging).
651         Set<String> allIds = new TreeSet<>();
652         for (String id : seedIds) {
653             while (!id.equals("root") && !allIds.contains(id)) {
654                 allIds.add(id);
655                 id = supplementalData.getParent(id);
656             }
657         }
658         return ImmutableSet.copyOf(Sets.difference(allIds, writtenIds));
659     }
660 
661     private static final ImmutableMap<String, IcuLocaleDir> LOCALE_SPLIT_INFO =
662         ImmutableMap.<String, IcuLocaleDir>builder()
663             // BRKITR
664             .put("boundaries", BRKITR)
665             .put("dictionaries", BRKITR)
666             .put("exceptions", BRKITR)
667             .put("extensions", BRKITR)
668             .put("lstm", BRKITR)
669             // COLL
670             .put("collations", COLL)
671             .put("depends", COLL)
672             .put("UCARules", COLL)
673             // CURR
674             .put("Currencies", CURR)
675             .put("CurrencyPlurals", CURR)
676             .put("CurrencyUnitPatterns", CURR)
677             .put("currencySpacing", CURR)
678             // LANG
679             .put("Keys", LANG)
680             .put("Languages", LANG)
681             .put("Scripts", LANG)
682             .put("Types", LANG)
683             .put("Variants", LANG)
684             .put("characterLabelPattern", LANG)
685             .put("codePatterns", LANG)
686             .put("localeDisplayPattern", LANG)
687             // RBNF
688             .put("RBNFRules", RBNF)
689             // REGION
690             .put("Countries", REGION)
691             // UNIT
692             .put("durationUnits", UNIT)
693             .put("units", UNIT)
694             .put("unitsShort", UNIT)
695             .put("unitsNarrow", UNIT)
696             // ZONE
697             .put("zoneStrings", ZONE)
698             .build();
699 }
700