1 package org.unicode.cldr.api; 2 3 import static com.google.common.base.Preconditions.checkArgument; 4 import static com.google.common.base.Preconditions.checkNotNull; 5 import static com.google.common.collect.ImmutableSet.toImmutableSet; 6 import static org.unicode.cldr.api.CldrDataType.LDML; 7 8 import com.google.common.collect.ImmutableSet; 9 import com.google.common.collect.ImmutableSetMultimap; 10 import com.google.common.collect.LinkedHashMultimap; 11 import com.google.common.collect.Multimap; 12 import java.io.File; 13 import java.io.IOException; 14 import java.io.UncheckedIOException; 15 import java.nio.file.Files; 16 import java.nio.file.Path; 17 import java.util.Set; 18 import java.util.function.Predicate; 19 import java.util.stream.Stream; 20 import org.unicode.cldr.api.CldrData.PrefixVisitor; 21 import org.unicode.cldr.api.CldrData.ValueVisitor; 22 import org.unicode.cldr.util.CLDRFile; 23 import org.unicode.cldr.util.Factory; 24 import org.unicode.cldr.util.SimpleFactory; 25 26 /** 27 * The main API for accessing {@link CldrPath} and {@link CldrValue} instances for CLDR data. This 28 * API abstracts the data sources, file names and other implementation details of CLDR to provide a 29 * clean way to access CLDR data. 30 * 31 * <p>{@code CldrData} instances are obtained from an appropriate {@code CldrDataSupplier}, and 32 * accept a {@link ValueVisitor} or {@link PrefixVisitor} to iterate over the data. 33 * 34 * <p>For example the following code prints every value (including its associated distinguishing 35 * path) in the BCP-47 data in DTD order: 36 * 37 * <pre>{@code 38 * CldrDataSupplier supplier = CldrDataSupplier.forFilesIn(rootDir); 39 * CldrData bcp47Data = supplier.getDataForType(CldrDataType.BCP47); 40 * bcp47Data.accept(PathOrder.DTD, System.out::println); 41 * }</pre> 42 * 43 * <p>Note that while the paths of values visited in a single {@link CldrData} instance are unique, 44 * there is nothing to prevent duplication between multiple data sources. This is particularly 45 * important when considering "ordered" elements with a sort index, since it represents "encounter 46 * order" and so any merging of values would have to track and rewrite sort indices carefully. It is 47 * recommended that if multiple {@code CldrData} instances are to be processed, users ensure that no 48 * path prefixes be shared between them. See also {@link CldrPath#getSortIndex()}. 49 * 50 * <p>Note that because the distinguishing paths associated with a {@link CldrValue} are unique per 51 * visitation, the special "version" path/value must be omitted (e.g. "//ldml/version") since it 52 * would otherwise appear multiple times. It's also possible that the data being processed by this 53 * library has a different version to the library itself. 54 */ 55 public abstract class CldrDataSupplier { 56 /** 57 * Returns the default CLDR version string (e.g. {@code "36"}) that is compiled into the CLDR 58 * API library. Note that users of this API may need to specify a different version string when 59 * processing data that is out-of-sync with the current library version. 60 */ getCldrVersionString()61 public static String getCldrVersionString() { 62 return CLDRFile.GEN_VERSION; 63 } 64 65 /** Options for controlling how locale-based LDML data is processed. */ 66 public enum CldrResolution { 67 /** 68 * Locale-based CLDR data should include resolved values from other "parent" locales 69 * according to the CLDR specification. 70 */ 71 RESOLVED, 72 73 /** 74 * Locale-based CLDR data should only include values specified directly in the specified 75 * locale. 76 */ 77 UNRESOLVED 78 } 79 80 /** 81 * Returns a supplier for CLDR data in the specified CLDR project root directory. This must be a 82 * directory which contains the standard CLDR {@code "common"} directory file hierarchy. 83 * 84 * @param cldrRootDir the root directory of a CLDR project containing the data to be read. 85 * @return a supplier for CLDR data in the given path. 86 */ forCldrFilesIn(Path cldrRootDir)87 public static CldrDataSupplier forCldrFilesIn(Path cldrRootDir) { 88 // Note that, unlike "withDraftStatusAtLeast()", adding a new fluent method to support 89 // additional root directories is problematic, since: 90 // 1) directories are conceptually only important for FileBasedDataSupplier (so a new 91 // fluent method in the supplier API makes no sense for other implementations). 92 // 2) creating the directory map must happen before the supplier is returned (rather than 93 // just before it supplies any data) because of the getAvailableLocaleIds() method. 94 // 95 // Thus it seems better to just add an extra parameter to this method when/if needed. 96 // TODO: Extend the API to allow source roots to be specified (but not via directory name). 97 Set<String> rootDirs = ImmutableSet.of("common"); 98 return new FileBasedDataSupplier( 99 createCldrDirectoryMap(cldrRootDir, rootDirs), CldrDraftStatus.UNCONFIRMED); 100 } 101 102 /** 103 * Returns an unresolved CLDR data instance of a set of XML file. This is typically only used 104 * for accessing additional CLDR data outside the CLDR project directories. The data in the 105 * specified files is merged, and it is a error if the same path appears multiple times (i.e. 106 * this input file must be "disjoint" in terms of the CLDR paths they specify). 107 * 108 * @param type the expected CLDR type of the data in the XML file. 109 * @param draftStatus the desired status for filtering paths/values. 110 * @param xmlFiles the CLDR XML files. 111 * @return a data instance for the paths/values in the specified XML file. 112 */ forCldrFiles( CldrDataType type, CldrDraftStatus draftStatus, Set<Path> xmlFiles)113 public static CldrData forCldrFiles( 114 CldrDataType type, CldrDraftStatus draftStatus, Set<Path> xmlFiles) { 115 return new XmlDataSource(type, ImmutableSet.copyOf(xmlFiles), draftStatus); 116 } 117 createCldrDirectoryMap( Path cldrRootDir, Set<String> rootDirs)118 private static Multimap<CldrDataType, Path> createCldrDirectoryMap( 119 Path cldrRootDir, Set<String> rootDirs) { 120 121 LinkedHashMultimap<CldrDataType, Path> multimap = LinkedHashMultimap.create(); 122 for (CldrDataType type : CldrDataType.values()) { 123 type.getSourceDirectories() 124 .flatMap(d -> rootDirs.stream().map(r -> cldrRootDir.resolve(r).resolve(d))) 125 .filter(Files::isDirectory) 126 .forEach(p -> multimap.put(type, p)); 127 } 128 return multimap; 129 } 130 131 /** 132 * Returns an in-memory supplier for the specified {@link CldrValue}s. This is useful for 133 * testing or handling special case data. The default (arbitrary) path order is determined by 134 * the order of values passed to this method. 135 * 136 * @param values the values (and associated paths) to include in the returned data. 137 */ forValues(Iterable<CldrValue> values)138 public static CldrData forValues(Iterable<CldrValue> values) { 139 return new InMemoryData(values); 140 } 141 142 /** 143 * Returns a modified data supplier which only provides paths/values with a draft status at or 144 * above the specified value. To create a supplier that will process all CLDR paths/values, use 145 * {@link CldrDraftStatus#UNCONFIRMED UNCONFIRMED}. 146 * 147 * @param draftStatus the desired status for filtering paths/values. 148 * @return a modified supplier which filters by the specified status. 149 */ withDraftStatusAtLeast(CldrDraftStatus draftStatus)150 public abstract CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus); 151 152 /** 153 * Returns an LDML data instance for the specified locale ID. 154 * 155 * <p>If {@code resolution} is set to {@link CldrResolution#RESOLVED RESOLVED} then values 156 * inferred from parent locales and aliases will be produced by the supplier. Note that if an 157 * unsupported locale ID is given (i.e. one not in the set returned by {@link 158 * #getAvailableLocaleIds()}), then an empty data instance is returned. 159 * 160 * @param localeId the locale ID (e.g. "en_GB" or "root") for the returned data. 161 * @param resolution whether to resolve CLDR values for the given locale ID according to the 162 * CLDR specification. 163 * @return the specified locale based CLDR data (possibly empty). 164 * @throws IllegalArgumentException if the locale ID is not structurally valid. 165 */ getDataForLocale(String localeId, CldrResolution resolution)166 public abstract CldrData getDataForLocale(String localeId, CldrResolution resolution); 167 168 /** 169 * Returns an unmodifiable set of available locale IDs that this supplier can provide. This need 170 * not be ordered. 171 * 172 * @return the set of available locale IDs. 173 */ getAvailableLocaleIds()174 public abstract Set<String> getAvailableLocaleIds(); 175 176 /** 177 * Returns a data supplier for non-locale specific CLDR data of the given type. 178 * 179 * @param type the required non-{@link CldrDataType#LDML LDML} data type. 180 * @return the specified non-locale based CLDR data. 181 * @throws IllegalArgumentException if {@link CldrDataType#LDML} is given. 182 */ getDataForType(CldrDataType type)183 public abstract CldrData getDataForType(CldrDataType type); 184 185 private static final class FileBasedDataSupplier extends CldrDataSupplier { 186 private final ImmutableSetMultimap<CldrDataType, Path> directoryMap; 187 private final CldrDraftStatus draftStatus; 188 189 // Created on-demand to keep constructor simple (in a fluent API you might create several 190 // variants of a supplier but only get data from one, or only use non-LDML XML data). 191 private Factory factory = null; 192 FileBasedDataSupplier( Multimap<CldrDataType, Path> directoryMap, CldrDraftStatus draftStatus)193 private FileBasedDataSupplier( 194 Multimap<CldrDataType, Path> directoryMap, CldrDraftStatus draftStatus) { 195 this.directoryMap = ImmutableSetMultimap.copyOf(directoryMap); 196 this.draftStatus = checkNotNull(draftStatus); 197 } 198 199 // Locking should be no issue, since contention on these supplier instance is expected to 200 // be minimal. getFactory()201 private synchronized Factory getFactory() { 202 if (factory == null) { 203 File[] dirArray = 204 getDirectoriesForType(LDML).map(Path::toFile).toArray(File[]::new); 205 checkArgument( 206 dirArray.length > 0, 207 "no LDML directories exist: %s", 208 directoryMap.get(LDML)); 209 factory = SimpleFactory.make(dirArray, ".*", draftStatus.getRawStatus()); 210 } 211 return factory; 212 } 213 214 @Override withDraftStatusAtLeast(CldrDraftStatus draftStatus)215 public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) { 216 return new FileBasedDataSupplier(directoryMap, draftStatus); 217 } 218 219 @Override getDataForLocale(String localeId, CldrResolution resolution)220 public CldrData getDataForLocale(String localeId, CldrResolution resolution) { 221 LocaleIds.checkCldrLocaleId(localeId); 222 Factory factory = getFactory(); 223 if (factory.getAvailable().contains(localeId)) { 224 return new CldrFileDataSource( 225 factory.make(localeId, resolution == CldrResolution.RESOLVED)); 226 } 227 return NO_DATA; 228 } 229 230 @Override getAvailableLocaleIds()231 public Set<String> getAvailableLocaleIds() { 232 return getFactory().getAvailable(); 233 } 234 235 @Override getDataForType(CldrDataType type)236 public CldrData getDataForType(CldrDataType type) { 237 ImmutableSet<Path> xmlFiles = listXmlFilesForType(type); 238 if (!xmlFiles.isEmpty()) { 239 return new XmlDataSource(type, xmlFiles, draftStatus); 240 } 241 return NO_DATA; 242 } 243 getDirectoriesForType(CldrDataType type)244 private Stream<Path> getDirectoriesForType(CldrDataType type) { 245 return directoryMap.get(type).stream().filter(Files::exists); 246 } 247 listXmlFilesForType(CldrDataType type)248 private ImmutableSet<Path> listXmlFilesForType(CldrDataType type) { 249 ImmutableSet<Path> xmlFiles = 250 getDirectoriesForType(type) 251 .flatMap(FileBasedDataSupplier::listXmlFiles) 252 .collect(toImmutableSet()); 253 checkArgument( 254 !xmlFiles.isEmpty(), 255 "no XML files exist within directories: %s", 256 directoryMap.get(type)); 257 return xmlFiles; 258 } 259 260 // This is a separate function because stream functions cannot throw checked exceptions. 261 // 262 // Note: "Files.walk()" warns about closing resources and suggests "try-with-resources" to 263 // ensure closure, "flatMap()" (which is what calls this method) is defined to call close() 264 // on each stream as it's added into the result, so in normal use this should all be fine. 265 // 266 // https://docs.oracle.com/javase/8/docs/api/java/util/stream/Stream.html#flatMap-java.util.function.Function- listXmlFiles(Path dir)267 private static Stream<Path> listXmlFiles(Path dir) { 268 try { 269 return Files.walk(dir).filter(IS_XML_FILE); 270 } catch (IOException e) { 271 throw new UncheckedIOException(e); 272 } 273 } 274 275 private static final Predicate<Path> IS_XML_FILE = 276 p -> Files.isRegularFile(p) && p.getFileName().toString().endsWith(".xml"); 277 } 278 279 private static final CldrData NO_DATA = 280 new CldrData() { 281 @Override 282 public void accept(PathOrder order, ValueVisitor visitor) {} 283 284 @Override 285 public void accept(PathOrder order, PrefixVisitor visitor) {} 286 287 @Override 288 public CldrValue get(CldrPath path) { 289 return null; 290 } 291 }; 292 } 293