1 // © 2019 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 package org.unicode.icu.tool.cldrtoicu; 4 5 import static com.google.common.base.Preconditions.checkArgument; 6 import static com.google.common.base.Preconditions.checkNotNull; 7 import static com.google.common.collect.ImmutableList.toImmutableList; 8 import static java.nio.charset.StandardCharsets.UTF_8; 9 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.RESOLVED; 10 import static org.unicode.cldr.api.CldrDataSupplier.CldrResolution.UNRESOLVED; 11 import static org.unicode.cldr.api.CldrDataType.BCP47; 12 import static org.unicode.cldr.api.CldrDataType.LDML; 13 import static org.unicode.cldr.api.CldrDataType.SUPPLEMENTAL; 14 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.BRKITR; 15 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.COLL; 16 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.CURR; 17 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LANG; 18 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.LOCALES; 19 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.RBNF; 20 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.REGION; 21 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.UNIT; 22 import static org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir.ZONE; 23 24 import java.io.BufferedWriter; 25 import java.io.IOException; 26 import java.io.InputStream; 27 import java.io.InputStreamReader; 28 import java.io.PrintWriter; 29 import java.nio.file.Files; 30 import java.nio.file.Path; 31 import java.util.*; 32 import java.util.function.Predicate; 33 import java.util.stream.Collectors; 34 import java.util.stream.Stream; 35 36 import org.unicode.cldr.api.CldrData; 37 import org.unicode.cldr.api.CldrDataSupplier; 38 import org.unicode.cldr.api.CldrDataType; 39 import org.unicode.cldr.api.CldrPath; 40 import org.unicode.cldr.api.PathMatcher; 41 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuLocaleDir; 42 import org.unicode.icu.tool.cldrtoicu.LdmlConverterConfig.IcuVersionInfo; 43 import org.unicode.icu.tool.cldrtoicu.localedistance.LocaleDistanceMapper; 44 import org.unicode.icu.tool.cldrtoicu.mapper.Bcp47Mapper; 45 import org.unicode.icu.tool.cldrtoicu.mapper.BreakIteratorMapper; 46 import org.unicode.icu.tool.cldrtoicu.mapper.CollationMapper; 47 import org.unicode.icu.tool.cldrtoicu.mapper.DayPeriodsMapper; 48 import org.unicode.icu.tool.cldrtoicu.mapper.LocaleMapper; 49 import org.unicode.icu.tool.cldrtoicu.mapper.PluralRangesMapper; 50 import org.unicode.icu.tool.cldrtoicu.mapper.PluralsMapper; 51 import org.unicode.icu.tool.cldrtoicu.mapper.RbnfMapper; 52 import org.unicode.icu.tool.cldrtoicu.mapper.SupplementalMapper; 53 import org.unicode.icu.tool.cldrtoicu.mapper.TransformsMapper; 54 import org.unicode.icu.tool.cldrtoicu.regex.RegexTransformer; 55 56 import com.google.common.base.CharMatcher; 57 import com.google.common.collect.HashMultimap; 58 import com.google.common.collect.ImmutableList; 59 import com.google.common.collect.ImmutableListMultimap; 60 import com.google.common.collect.ImmutableMap; 61 import com.google.common.collect.ImmutableSet; 62 import com.google.common.collect.LinkedListMultimap; 63 import com.google.common.collect.ListMultimap; 64 import com.google.common.collect.Maps; 65 import com.google.common.collect.SetMultimap; 66 import com.google.common.collect.Sets; 67 import com.google.common.io.CharStreams; 68 69 /** 70 * The main converter tool for CLDR to ICU data. To run this tool, you need to supply a suitable 71 * {@link LdmlConverterConfig} instance. There is a simple {@code main()} method available in this 72 * class which can be invoked passing just the desired output directory and which relies on the 73 * presence of several system properties for the remainder of its parameters: 74 * <ul> 75 * <li>CLDR_DIR: The root of the CLDR release from which CLDR data is read. 76 * <li>ICU_DIR: The root of the ICU release from which additional "specials" XML data is read. 77 * <li>CLDR_DTD_CACHE: A temporary directory with the various DTDs cached (this is a legacy 78 * requirement from the underlying CLDR libraries and might go away one day). 79 * </ul> 80 */ 81 public final class LdmlConverter { 82 // TODO: Do all supplemental data in one go and split similarly to locale data (using RbPath). 83 private static final Predicate<CldrPath> GENDER_LIST_PATHS = 84 supplementalMatcher("gender"); 85 private static final Predicate<CldrPath> METAZONE_PATHS = 86 supplementalMatcher("metaZones", "primaryZones"); 87 private static final Predicate<CldrPath> METADATA_PATHS = 88 supplementalMatcher("metadata"); 89 private static final Predicate<CldrPath> SUPPLEMENTAL_DATA_PATHS = 90 supplementalMatcher( 91 "calendarData", 92 "calendarPreferenceData", 93 "codeMappings", 94 "codeMappingsCurrency", 95 "idValidity", 96 "languageData", 97 "languageMatching", 98 "measurementData", 99 "parentLocales", 100 "personNamesDefaults", 101 "subdivisionContainment", 102 "territoryContainment", 103 "territoryInfo", 104 "timeData", 105 "weekData", 106 "weekOfPreference"); 107 private static final Predicate<CldrPath> CURRENCY_DATA_PATHS = 108 supplementalMatcher("currencyData"); 109 private static final Predicate<CldrPath> UNITS_DATA_PATHS = 110 supplementalMatcher( 111 "convertUnits", 112 "unitConstants", 113 "unitQuantities", 114 "unitPreferenceData", 115 "unitPrefixes"); 116 private static final Predicate<CldrPath> GRAMMATICAL_FEATURES_PATHS = 117 supplementalMatcher("grammaticalData"); 118 private static final Predicate<CldrPath> NUMBERING_SYSTEMS_PATHS = 119 supplementalMatcher("numberingSystems"); 120 private static final Predicate<CldrPath> WINDOWS_ZONES_PATHS = 121 supplementalMatcher("windowsZones"); 122 supplementalMatcher(String... spec)123 private static Predicate<CldrPath> supplementalMatcher(String... spec) { 124 checkArgument(spec.length > 0, "must supply at least one matcher spec"); 125 if (spec.length == 1) { 126 return PathMatcher.of("//supplementalData/" + spec[0])::matchesPrefixOf; 127 } 128 return 129 Arrays.stream(spec) 130 .map(s -> PathMatcher.of("//supplementalData/" + s)) 131 .map(m -> ((Predicate<CldrPath>) m::matchesPrefixOf)) 132 .reduce(p -> false, Predicate::or); 133 } 134 135 private static RbPath RB_PARENT = RbPath.of("%%Parent"); 136 // The quotes below are only so we achieve parity with the manually written alias files. 137 // TODO: Remove unnecessary quotes once the migration to this code is complete. 138 private static RbPath RB_ALIAS = RbPath.of("\"%%ALIAS\""); 139 // Special path for adding to empty files which only exist to complete the parent chain. 140 // TODO: Confirm that this has no meaningful effect and unify "empty" file contents. 141 private static RbPath RB_EMPTY_ALIAS = RbPath.of("___"); 142 143 /** 144 * Output types defining specific subsets of the ICU data which can be converted separately. 145 * This closely mimics the original "NewLdml2IcuConverter" behaviour but could be simplified to 146 * hide what are essentially implementation specific data splits. 147 */ 148 public enum OutputType { 149 LOCALES(LDML), 150 BRKITR(LDML), 151 COLL(LDML), 152 RBNF(LDML), 153 DAY_PERIODS(SUPPLEMENTAL), 154 GENDER_LIST(SUPPLEMENTAL), 155 SUPPLEMENTAL_DATA(SUPPLEMENTAL), 156 UNITS(SUPPLEMENTAL), 157 CURRENCY_DATA(SUPPLEMENTAL), 158 GRAMMATICAL_FEATURES(SUPPLEMENTAL), 159 METADATA(SUPPLEMENTAL), 160 META_ZONES(SUPPLEMENTAL), 161 NUMBERING_SYSTEMS(SUPPLEMENTAL), 162 PLURALS(SUPPLEMENTAL), 163 PLURAL_RANGES(SUPPLEMENTAL), 164 WINDOWS_ZONES(SUPPLEMENTAL), 165 TRANSFORMS(SUPPLEMENTAL), 166 LOCALE_DISTANCE(SUPPLEMENTAL), 167 VERSION(SUPPLEMENTAL), 168 KEY_TYPE_DATA(BCP47); 169 170 public static final ImmutableSet<OutputType> ALL = ImmutableSet.copyOf(OutputType.values()); 171 172 private final CldrDataType type; 173 OutputType(CldrDataType type)174 OutputType(CldrDataType type) { 175 this.type = checkNotNull(type); 176 } 177 getCldrType()178 CldrDataType getCldrType() { 179 return type; 180 } 181 } 182 183 // Map to convert the rather arbitrarily defined "output types" to the directories into which 184 // the data is written. This is only for "LDML" types since other mappers don't need to split 185 // data into multiple directories. 186 private static final ImmutableListMultimap<OutputType, IcuLocaleDir> TYPE_TO_DIR = 187 ImmutableListMultimap.<OutputType, IcuLocaleDir>builder() 188 .putAll(OutputType.LOCALES, CURR, LANG, LOCALES, REGION, UNIT, ZONE) 189 .putAll(OutputType.BRKITR, BRKITR) 190 .putAll(OutputType.COLL, COLL) 191 .putAll(OutputType.RBNF, RBNF) 192 .build(); 193 194 /** Converts CLDR data according to the given configuration. */ convert( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)195 public static void convert( 196 CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) { 197 new LdmlConverter(src, supplementalData, config).convertAll(); 198 } 199 200 // The supplier for all data to be converted. 201 private final CldrDataSupplier src; 202 // Supplemental data available to mappers if needed. 203 private final SupplementalData supplementalData; 204 // The configuration controlling conversion behaviour. 205 private final LdmlConverterConfig config; 206 // The set of expanded target locale IDs. 207 // TODO: Make available IDs include specials files (or fail if specials are not available). 208 private final ImmutableSet<String> availableIds; 209 // Transformer for locale data. 210 private final PathValueTransformer localeTransformer; 211 // Transformer for supplemental data. 212 private final PathValueTransformer supplementalTransformer; 213 // Header string to go into every ICU data and transliteration rule file (comment prefixes 214 // are not present and must be added by the code writing the file). 215 private final ImmutableList<String> fileHeader; 216 LdmlConverter( CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config)217 private LdmlConverter( 218 CldrDataSupplier src, SupplementalData supplementalData, LdmlConverterConfig config) { 219 this.src = checkNotNull(src); 220 this.supplementalData = checkNotNull(supplementalData); 221 this.config = checkNotNull(config); 222 this.availableIds = ImmutableSet.copyOf( 223 Sets.intersection(supplementalData.getAvailableLocaleIds(), config.getAllLocaleIds())); 224 // Load the remaining path value transformers. 225 this.supplementalTransformer = 226 RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_supplemental.txt"), 227 IcuFunctions.ALGORITHM_FN, 228 IcuFunctions.DATE_FN, 229 IcuFunctions.DAY_NUMBER_FN, 230 IcuFunctions.EXP_FN, 231 IcuFunctions.YMD_FN); 232 this.localeTransformer = 233 RegexTransformer.fromConfigLines(readLinesFromResource("/ldml2icu_locale.txt"), 234 IcuFunctions.CONTEXT_TRANSFORM_INDEX_FN); 235 this.fileHeader = readLinesFromResource("/ldml2icu_header.txt"); 236 } 237 convertAll()238 private void convertAll() { 239 processLdml(); 240 processSupplemental(); 241 if (config.emitReport()) { 242 System.out.println("Supplemental Data Transformer=" + supplementalTransformer); 243 System.out.println("Locale Data Transformer=" + localeTransformer); 244 } 245 } 246 readLinesFromResource(String name)247 private static ImmutableList<String> readLinesFromResource(String name) { 248 try (InputStream in = LdmlConverter.class.getResourceAsStream(name)) { 249 return ImmutableList.copyOf(CharStreams.readLines(new InputStreamReader(in, UTF_8))); 250 } catch (IOException e) { 251 throw new RuntimeException("cannot read resource: " + name, e); 252 } 253 } 254 loadSpecialsData(String localeId)255 private Optional<CldrData> loadSpecialsData(String localeId) { 256 String expected = localeId + ".xml"; 257 try (Stream<Path> files = Files.walk(config.getSpecialsDir())) { 258 Set<Path> xmlFiles = files 259 .filter(Files::isRegularFile) 260 .filter(f -> f.getFileName().toString().equals(expected)) 261 .collect(Collectors.toSet()); 262 return !xmlFiles.isEmpty() 263 ? Optional.of( 264 CldrDataSupplier.forCldrFiles(LDML, config.getMinimumDraftStatus(), xmlFiles)) 265 : Optional.empty(); 266 } catch (IOException e) { 267 throw new RuntimeException( 268 "error processing specials directory: " + config.getSpecialsDir(), e); 269 } 270 } 271 processLdml()272 private void processLdml() { 273 ImmutableList<IcuLocaleDir> splitDirs = 274 config.getOutputTypes().stream() 275 .filter(t -> t.getCldrType() == LDML) 276 .flatMap(t -> TYPE_TO_DIR.get(t).stream()) 277 .collect(toImmutableList()); 278 if (splitDirs.isEmpty()) { 279 return; 280 } 281 282 String cldrVersion = config.getVersionInfo().getCldrVersion(); 283 284 Map<IcuLocaleDir, DependencyGraph> graphMetadata = new HashMap<>(); 285 splitDirs.forEach(d -> graphMetadata.put(d, new DependencyGraph(cldrVersion))); 286 287 SetMultimap<IcuLocaleDir, String> writtenLocaleIds = HashMultimap.create(); 288 Path baseDir = config.getOutputDir(); 289 290 System.out.println("processing standard ldml files"); 291 for (String id : config.getAllLocaleIds()) { 292 // Skip "target" IDs that are aliases (they are handled later). 293 if (!availableIds.contains(id)) { 294 continue; 295 } 296 // TODO: Remove the following skip when ICU-20997 is fixed 297 if (id.contains("VALENCIA") || id.contains("TARASK")) { 298 System.out.println("(skipping " + id + " until ICU-20997 is fixed)"); 299 continue; 300 } 301 // Now that former CLDR see locales are in common, there are some language 302 // variants that are not at a high enough coverage level to pick up. 303 // TODO need a better way of handling this. 304 if (id.contains("POLYTON")) { 305 System.out.println("(skipping " + id + ", insufficient coverage level)"); 306 continue; 307 } 308 309 IcuData icuData = new IcuData(id, true); 310 311 Optional<CldrData> specials = loadSpecialsData(id); 312 CldrData unresolved = src.getDataForLocale(id, UNRESOLVED); 313 314 BreakIteratorMapper.process(icuData, unresolved, specials); 315 CollationMapper.process(icuData, unresolved, specials, cldrVersion); 316 RbnfMapper.process(icuData, unresolved, specials); 317 318 CldrData resolved = src.getDataForLocale(id, RESOLVED); 319 Optional<String> defaultCalendar = supplementalData.getDefaultCalendar(id); 320 LocaleMapper.process( 321 icuData, unresolved, resolved, specials, localeTransformer, defaultCalendar); 322 323 ListMultimap<IcuLocaleDir, RbPath> splitPaths = LinkedListMultimap.create(); 324 for (RbPath p : icuData.getPaths()) { 325 String rootName = getBaseSegmentName(p.getSegment(0)); 326 splitPaths.put(LOCALE_SPLIT_INFO.getOrDefault(rootName, LOCALES), p); 327 } 328 329 Optional<String> parent = supplementalData.getExplicitParentLocaleOf(id); 330 // We always write base languages (even if empty). 331 boolean isBaseLanguage = !id.contains("_"); 332 // Run through all directories (not just the keySet() of the split path map) since we 333 // sometimes write empty files. 334 for (IcuLocaleDir dir : splitDirs) { 335 Set<String> targetIds = config.getTargetLocaleIds(dir); 336 if (!targetIds.contains(id)) { 337 if (!splitPaths.get(dir).isEmpty()) { 338 System.out.format( 339 "target IDs for %s does not contain %s, but it has data: %s\n", 340 dir, id, splitPaths.get(dir)); 341 } 342 continue; 343 } 344 345 Path outDir = baseDir.resolve(dir.getOutputDir()); 346 IcuData splitData = new IcuData(icuData.getName(), icuData.hasFallback()); 347 348 // The split data can still be empty for this directory, but that's expected (it 349 // might only be written because it has an explicit parent added below). 350 splitPaths.get(dir).forEach(p -> splitData.add(p, icuData.get(p))); 351 352 // If we add an explicit parent locale, it forces the data to be written. This is 353 // where we check for forced overrides of the parent relationship (which is a per 354 // directory thing). 355 getIcuParent(id, parent, dir).ifPresent(p -> { 356 splitData.add(RB_PARENT, p); 357 graphMetadata.get(dir).addParent(id, p); 358 }); 359 360 if (!splitData.getPaths().isEmpty() || isBaseLanguage || dir.includeEmpty()) { 361 if (id.equals("root")) { 362 splitData.setVersion(cldrVersion); 363 } 364 write(splitData, outDir, false); 365 writtenLocaleIds.put(dir, id); 366 } 367 } 368 } 369 370 System.out.println("processing alias ldml files"); 371 for (IcuLocaleDir dir : splitDirs) { 372 Path outDir = baseDir.resolve(dir.getOutputDir()); 373 Set<String> targetIds = config.getTargetLocaleIds(dir); 374 DependencyGraph depGraph = graphMetadata.get(dir); 375 376 // TODO: Maybe calculate alias map directly into the dependency graph? 377 Map<String, String> aliasMap = getAliasMap(targetIds, dir); 378 aliasMap.forEach((s, t) -> { 379 depGraph.addAlias(s, t); 380 writeAliasFile(s, t, outDir); 381 // It's only important to record which alias files are written because of forced 382 // aliases, but since it's harmless otherwise, we just do it unconditionally. 383 // Normal alias files don't affect the empty file calculation, but forced ones can. 384 writtenLocaleIds.put(dir, s); 385 }); 386 387 calculateEmptyFiles(writtenLocaleIds.get(dir), aliasMap.values()) 388 .forEach(id -> writeEmptyFile(id, outDir, aliasMap.values())); 389 390 writeDependencyGraph(outDir, depGraph); 391 } 392 } 393 394 395 private static final CharMatcher PATH_MODIFIER = CharMatcher.anyOf(":%"); 396 397 // Resource bundle paths elements can have variants (e.g. "Currencies%narrow) or type 398 // annotations (e.g. "languages:intvector"). We strip these when considering the element name. getBaseSegmentName(String segment)399 private static String getBaseSegmentName(String segment) { 400 int idx = PATH_MODIFIER.indexIn(segment); 401 return idx == -1 ? segment : segment.substring(0, idx); 402 } 403 404 /* 405 * There are four reasons for treating a locale ID as an alias. 406 * 1: It contains deprecated subtags (e.g. "sr_YU", which should be "sr_Cyrl_RS"). 407 * 2: It has no CLDR data but is missing a script subtag. 408 * 3: It is one of the special "phantom" alias which cannot be represented normally 409 * and must be manually mapped (e.g. legacy locale IDs which don't even parse). 410 * 4: It is a "super special" forced alias, which might replace existing aliases in 411 * some output directories. 412 */ getAliasMap(Set<String> localeIds, IcuLocaleDir dir)413 private Map<String, String> getAliasMap(Set<String> localeIds, IcuLocaleDir dir) { 414 // Even forced aliases only apply if they are in the set of locale IDs for the directory. 415 Map<String, String> forcedAliases = 416 Maps.filterKeys(config.getForcedAliases(dir), localeIds::contains); 417 418 Map<String, String> aliasMap = new LinkedHashMap<>(); 419 for (String id : localeIds) { 420 if (forcedAliases.containsKey(id)) { 421 // Forced aliases will be added later and don't need to be processed here. This 422 // is especially necessary if the ID is not structurally valid (e.g. "no_NO_NY") 423 // since that cannot be processed by the code below. 424 continue; 425 } 426 String canonicalId = supplementalData.replaceDeprecatedTags(id); 427 if (!canonicalId.equals(id)) { 428 // If the canonical form of an ID differs from the requested ID, the this is an 429 // alias, and just needs to point to the canonical ID. 430 aliasMap.put(id, canonicalId); 431 continue; 432 } 433 if (availableIds.contains(id)) { 434 // If it's canonical and supported, it's not an alias. 435 continue; 436 } 437 // If the requested locale is not supported, maximize it and alias to that. 438 String maximizedId = supplementalData.maximize(id) 439 .orElseThrow(() -> new IllegalArgumentException("unsupported locale ID: " + id)); 440 // We can't alias to ourselves and we shouldn't be here is the ID was already maximal. 441 checkArgument(!maximizedId.equals(id), "unsupported maximized locale ID: %s", id); 442 aliasMap.put(id, maximizedId); 443 } 444 // Important that we overwrite entries which might already exist here, since we might have 445 // already calculated a "natural" alias for something that we want to force (and we should 446 // replace the existing target, since that affects how we determine empty files later). 447 aliasMap.putAll(forcedAliases); 448 return aliasMap; 449 } 450 451 /* 452 * Helper to determine the correct parent ID to be written into the ICU data file. The rules 453 * are: 454 * 1: If no forced parent exists (common) write the explicit parent (if that exists) 455 * 2: If a forced parent exists, but the forced value is what you would get by just truncating 456 * the current locale ID, write nothing (ICU libraries truncate when no parent is set). 457 * 3: Write the forced parent (this is an exceptional case, and may not even occur in data). 458 */ getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir)459 private Optional<String> getIcuParent(String id, Optional<String> parent, IcuLocaleDir dir) { 460 String forcedParentId = config.getForcedParents(dir).get(id); 461 if (forcedParentId == null) { 462 return parent; 463 } 464 return id.contains("_") && forcedParentId.regionMatches(0, id, 0, id.lastIndexOf('_')) 465 ? Optional.empty() : Optional.of(forcedParentId); 466 } 467 processSupplemental()468 private void processSupplemental() { 469 for (OutputType type : config.getOutputTypes()) { 470 if (type.getCldrType() == LDML) { 471 continue; 472 } 473 System.out.println("processing supplemental type " + type); 474 switch (type) { 475 case DAY_PERIODS: 476 write(DayPeriodsMapper.process(src), "misc"); 477 break; 478 479 case GENDER_LIST: 480 processSupplemental("genderList", GENDER_LIST_PATHS, "misc", false); 481 break; 482 483 case SUPPLEMENTAL_DATA: 484 processSupplemental("supplementalData", SUPPLEMENTAL_DATA_PATHS, "misc", true); 485 break; 486 487 case UNITS: 488 processSupplemental("units", UNITS_DATA_PATHS, "misc", true); 489 break; 490 491 case CURRENCY_DATA: 492 processSupplemental("supplementalData", CURRENCY_DATA_PATHS, "curr", false); 493 break; 494 495 case GRAMMATICAL_FEATURES: 496 processSupplemental("grammaticalFeatures", GRAMMATICAL_FEATURES_PATHS, "misc", false); 497 break; 498 499 case METADATA: 500 processSupplemental("metadata", METADATA_PATHS, "misc", false); 501 break; 502 503 case META_ZONES: 504 processSupplemental("metaZones", METAZONE_PATHS, "misc", false); 505 break; 506 507 case NUMBERING_SYSTEMS: 508 processSupplemental("numberingSystems", NUMBERING_SYSTEMS_PATHS, "misc", false); 509 break; 510 511 case PLURALS: 512 write(PluralsMapper.process(src), "misc"); 513 break; 514 515 case PLURAL_RANGES: 516 write(PluralRangesMapper.process(src), "misc"); 517 break; 518 519 case LOCALE_DISTANCE: 520 write(LocaleDistanceMapper.process(src), "misc"); 521 break; 522 523 case WINDOWS_ZONES: 524 processSupplemental("windowsZones", WINDOWS_ZONES_PATHS, "misc", false); 525 break; 526 527 case TRANSFORMS: 528 Path transformDir = createDirectory(config.getOutputDir().resolve("translit")); 529 write(TransformsMapper.process(src, transformDir, fileHeader), transformDir, false); 530 break; 531 532 case VERSION: 533 writeIcuVersionInfo(); 534 break; 535 536 case KEY_TYPE_DATA: 537 Bcp47Mapper.process(src).forEach(d -> write(d, "misc")); 538 break; 539 540 default: 541 throw new AssertionError("Unsupported supplemental type: " + type); 542 } 543 } 544 } 545 546 private static final RbPath RB_CLDR_VERSION = RbPath.of("cldrVersion"); 547 processSupplemental( String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion)548 private void processSupplemental( 549 String label, Predicate<CldrPath> paths, String dir, boolean addCldrVersion) { 550 IcuData icuData = 551 SupplementalMapper.process(src, supplementalTransformer, label, paths); 552 // A hack for "supplementalData.txt" since the "cldrVersion" value doesn't come from the 553 // supplemental data XML files. 554 if (addCldrVersion) { 555 // Not the same path as used by "setVersion()" 556 icuData.add(RB_CLDR_VERSION, config.getVersionInfo().getCldrVersion()); 557 } 558 write(icuData, dir); 559 } 560 writeAliasFile(String srcId, String destId, Path dir)561 private void writeAliasFile(String srcId, String destId, Path dir) { 562 IcuData icuData = new IcuData(srcId, true); 563 icuData.add(RB_ALIAS, destId); 564 // Allow overwrite for aliases since some are "forced" and overwrite existing targets. 565 // TODO: Maybe tighten this up so only forced aliases for existing targets are overwritten. 566 write(icuData, dir, true); 567 } 568 writeEmptyFile(String id, Path dir, Collection<String> aliasTargets)569 private void writeEmptyFile(String id, Path dir, Collection<String> aliasTargets) { 570 IcuData icuData = new IcuData(id, true); 571 // TODO: Document the reason for this (i.e. why does it matter what goes into empty files?) 572 if (aliasTargets.contains(id)) { 573 icuData.setFileComment("generated alias target"); 574 icuData.add(RB_EMPTY_ALIAS, ""); 575 } else { 576 // These empty files only exist because the target of an alias has a parent locale 577 // which is itself not in the set of written ICU files. An "indirect alias target". 578 // No need to add data: Just write a resource bundle with an empty top-level table. 579 } 580 write(icuData, dir, false); 581 } 582 writeIcuVersionInfo()583 private void writeIcuVersionInfo() { 584 IcuVersionInfo versionInfo = config.getVersionInfo(); 585 IcuData versionData = new IcuData("icuver", false); 586 versionData.add(RbPath.of("ICUVersion"), versionInfo.getIcuVersion()); 587 versionData.add(RbPath.of("DataVersion"), versionInfo.getIcuDataVersion()); 588 versionData.add(RbPath.of("CLDRVersion"), versionInfo.getCldrVersion()); 589 // Write file via non-helper methods since we need to include a legacy copyright. 590 Path miscDir = config.getOutputDir().resolve("misc"); 591 createDirectory(miscDir); 592 ImmutableList<String> versionHeader = ImmutableList.<String>builder() 593 .addAll(fileHeader) 594 .add( 595 "***************************************************************************", 596 "*", 597 "* Copyright (C) 2010-2016 International Business Machines", 598 "* Corporation and others. All Rights Reserved.", 599 "*", 600 "***************************************************************************") 601 .build(); 602 IcuTextWriter.writeToFile(versionData, miscDir, versionHeader, false); 603 } 604 605 // Commonest case for writing data files in "normal" directories. write(IcuData icuData, String dir)606 private void write(IcuData icuData, String dir) { 607 write(icuData, config.getOutputDir().resolve(dir), false); 608 } 609 write(IcuData icuData, Path dir, boolean allowOverwrite)610 private void write(IcuData icuData, Path dir, boolean allowOverwrite) { 611 createDirectory(dir); 612 IcuTextWriter.writeToFile(icuData, dir, fileHeader, allowOverwrite); 613 } 614 createDirectory(Path dir)615 private Path createDirectory(Path dir) { 616 try { 617 Files.createDirectories(dir); 618 } catch (IOException e) { 619 throw new RuntimeException("cannot create directory: " + dir, e); 620 } 621 return dir; 622 } 623 writeDependencyGraph(Path dir, DependencyGraph depGraph)624 private void writeDependencyGraph(Path dir, DependencyGraph depGraph) { 625 createDirectory(dir); 626 try (BufferedWriter w = Files.newBufferedWriter(dir.resolve("LOCALE_DEPS.json"), UTF_8); 627 PrintWriter out = new PrintWriter(w)) { 628 depGraph.writeJsonTo(out, fileHeader); 629 out.flush(); 630 } catch (IOException e) { 631 throw new RuntimeException("cannot write dependency graph file: " + dir, e); 632 } 633 } 634 635 // The set of IDs to process is: 636 // * any file that was written 637 // * any alias target (not written) 638 // 639 // From which we generate the complete "closure" under the "getParent()" function. This set 640 // contains all file (written or not) which need to exist to complete the locale hierarchy. 641 // 642 // Then we remove all the written files to just leave the ones that need to be generated. 643 // This is a simple and robust approach that handles things like "gaps" in non-aliased 644 // locale IDs, where an intermediate parent is not present. calculateEmptyFiles( Set<String> writtenIds, Collection<String> aliasTargetIds)645 private ImmutableSet<String> calculateEmptyFiles( 646 Set<String> writtenIds, Collection<String> aliasTargetIds) { 647 648 Set<String> seedIds = new HashSet<>(writtenIds); 649 seedIds.addAll(aliasTargetIds); 650 // Be nice and sort the output (makes easier debugging). 651 Set<String> allIds = new TreeSet<>(); 652 for (String id : seedIds) { 653 while (!id.equals("root") && !allIds.contains(id)) { 654 allIds.add(id); 655 id = supplementalData.getParent(id); 656 } 657 } 658 return ImmutableSet.copyOf(Sets.difference(allIds, writtenIds)); 659 } 660 661 private static final ImmutableMap<String, IcuLocaleDir> LOCALE_SPLIT_INFO = 662 ImmutableMap.<String, IcuLocaleDir>builder() 663 // BRKITR 664 .put("boundaries", BRKITR) 665 .put("dictionaries", BRKITR) 666 .put("exceptions", BRKITR) 667 .put("extensions", BRKITR) 668 .put("lstm", BRKITR) 669 // COLL 670 .put("collations", COLL) 671 .put("depends", COLL) 672 .put("UCARules", COLL) 673 // CURR 674 .put("Currencies", CURR) 675 .put("CurrencyPlurals", CURR) 676 .put("CurrencyUnitPatterns", CURR) 677 .put("currencySpacing", CURR) 678 // LANG 679 .put("Keys", LANG) 680 .put("Languages", LANG) 681 .put("Scripts", LANG) 682 .put("Types", LANG) 683 .put("Variants", LANG) 684 .put("characterLabelPattern", LANG) 685 .put("codePatterns", LANG) 686 .put("localeDisplayPattern", LANG) 687 // RBNF 688 .put("RBNFRules", RBNF) 689 // REGION 690 .put("Countries", REGION) 691 // UNIT 692 .put("durationUnits", UNIT) 693 .put("units", UNIT) 694 .put("unitsShort", UNIT) 695 .put("unitsNarrow", UNIT) 696 // ZONE 697 .put("zoneStrings", ZONE) 698 .build(); 699 } 700