xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/api/CldrDataSupplier.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.api;
2 
3 import static com.google.common.base.Preconditions.checkArgument;
4 import static com.google.common.base.Preconditions.checkNotNull;
5 import static com.google.common.collect.ImmutableSet.toImmutableSet;
6 import static org.unicode.cldr.api.CldrDataType.LDML;
7 
8 import com.google.common.collect.ImmutableSet;
9 import com.google.common.collect.ImmutableSetMultimap;
10 import com.google.common.collect.LinkedHashMultimap;
11 import com.google.common.collect.Multimap;
12 import java.io.File;
13 import java.io.IOException;
14 import java.io.UncheckedIOException;
15 import java.nio.file.Files;
16 import java.nio.file.Path;
17 import java.util.Set;
18 import java.util.function.Predicate;
19 import java.util.stream.Stream;
20 import org.unicode.cldr.api.CldrData.PrefixVisitor;
21 import org.unicode.cldr.api.CldrData.ValueVisitor;
22 import org.unicode.cldr.util.CLDRFile;
23 import org.unicode.cldr.util.Factory;
24 import org.unicode.cldr.util.SimpleFactory;
25 
26 /**
27  * The main API for accessing {@link CldrPath} and {@link CldrValue} instances for CLDR data. This
28  * API abstracts the data sources, file names and other implementation details of CLDR to provide a
29  * clean way to access CLDR data.
30  *
31  * <p>{@code CldrData} instances are obtained from an appropriate {@code CldrDataSupplier}, and
32  * accept a {@link ValueVisitor} or {@link PrefixVisitor} to iterate over the data.
33  *
34  * <p>For example the following code prints every value (including its associated distinguishing
35  * path) in the BCP-47 data in DTD order:
36  *
37  * <pre>{@code
38  * CldrDataSupplier supplier = CldrDataSupplier.forFilesIn(rootDir);
39  * CldrData bcp47Data = supplier.getDataForType(CldrDataType.BCP47);
40  * bcp47Data.accept(PathOrder.DTD, System.out::println);
41  * }</pre>
42  *
43  * <p>Note that while the paths of values visited in a single {@link CldrData} instance are unique,
44  * there is nothing to prevent duplication between multiple data sources. This is particularly
45  * important when considering "ordered" elements with a sort index, since it represents "encounter
46  * order" and so any merging of values would have to track and rewrite sort indices carefully. It is
47  * recommended that if multiple {@code CldrData} instances are to be processed, users ensure that no
48  * path prefixes be shared between them. See also {@link CldrPath#getSortIndex()}.
49  *
50  * <p>Note that because the distinguishing paths associated with a {@link CldrValue} are unique per
51  * visitation, the special "version" path/value must be omitted (e.g. "//ldml/version") since it
52  * would otherwise appear multiple times. It's also possible that the data being processed by this
53  * library has a different version to the library itself.
54  */
55 public abstract class CldrDataSupplier {
56     /**
57      * Returns the default CLDR version string (e.g. {@code "36"}) that is compiled into the CLDR
58      * API library. Note that users of this API may need to specify a different version string when
59      * processing data that is out-of-sync with the current library version.
60      */
getCldrVersionString()61     public static String getCldrVersionString() {
62         return CLDRFile.GEN_VERSION;
63     }
64 
65     /** Options for controlling how locale-based LDML data is processed. */
66     public enum CldrResolution {
67         /**
68          * Locale-based CLDR data should include resolved values from other "parent" locales
69          * according to the CLDR specification.
70          */
71         RESOLVED,
72 
73         /**
74          * Locale-based CLDR data should only include values specified directly in the specified
75          * locale.
76          */
77         UNRESOLVED
78     }
79 
80     /**
81      * Returns a supplier for CLDR data in the specified CLDR project root directory. This must be a
82      * directory which contains the standard CLDR {@code "common"} directory file hierarchy.
83      *
84      * @param cldrRootDir the root directory of a CLDR project containing the data to be read.
85      * @return a supplier for CLDR data in the given path.
86      */
forCldrFilesIn(Path cldrRootDir)87     public static CldrDataSupplier forCldrFilesIn(Path cldrRootDir) {
88         // Note that, unlike "withDraftStatusAtLeast()", adding a new fluent method to support
89         // additional root directories is problematic, since:
90         // 1) directories are conceptually only important for FileBasedDataSupplier (so a new
91         //    fluent method in the supplier API makes no sense for other implementations).
92         // 2) creating the directory map must happen before the supplier is returned (rather than
93         //    just before it supplies any data) because of the getAvailableLocaleIds() method.
94         //
95         // Thus it seems better to just add an extra parameter to this method when/if needed.
96         // TODO: Extend the API to allow source roots to be specified (but not via directory name).
97         Set<String> rootDirs = ImmutableSet.of("common");
98         return new FileBasedDataSupplier(
99                 createCldrDirectoryMap(cldrRootDir, rootDirs), CldrDraftStatus.UNCONFIRMED);
100     }
101 
102     /**
103      * Returns an unresolved CLDR data instance of a set of XML file. This is typically only used
104      * for accessing additional CLDR data outside the CLDR project directories. The data in the
105      * specified files is merged, and it is a error if the same path appears multiple times (i.e.
106      * this input file must be "disjoint" in terms of the CLDR paths they specify).
107      *
108      * @param type the expected CLDR type of the data in the XML file.
109      * @param draftStatus the desired status for filtering paths/values.
110      * @param xmlFiles the CLDR XML files.
111      * @return a data instance for the paths/values in the specified XML file.
112      */
forCldrFiles( CldrDataType type, CldrDraftStatus draftStatus, Set<Path> xmlFiles)113     public static CldrData forCldrFiles(
114             CldrDataType type, CldrDraftStatus draftStatus, Set<Path> xmlFiles) {
115         return new XmlDataSource(type, ImmutableSet.copyOf(xmlFiles), draftStatus);
116     }
117 
createCldrDirectoryMap( Path cldrRootDir, Set<String> rootDirs)118     private static Multimap<CldrDataType, Path> createCldrDirectoryMap(
119             Path cldrRootDir, Set<String> rootDirs) {
120 
121         LinkedHashMultimap<CldrDataType, Path> multimap = LinkedHashMultimap.create();
122         for (CldrDataType type : CldrDataType.values()) {
123             type.getSourceDirectories()
124                     .flatMap(d -> rootDirs.stream().map(r -> cldrRootDir.resolve(r).resolve(d)))
125                     .filter(Files::isDirectory)
126                     .forEach(p -> multimap.put(type, p));
127         }
128         return multimap;
129     }
130 
131     /**
132      * Returns an in-memory supplier for the specified {@link CldrValue}s. This is useful for
133      * testing or handling special case data. The default (arbitrary) path order is determined by
134      * the order of values passed to this method.
135      *
136      * @param values the values (and associated paths) to include in the returned data.
137      */
forValues(Iterable<CldrValue> values)138     public static CldrData forValues(Iterable<CldrValue> values) {
139         return new InMemoryData(values);
140     }
141 
142     /**
143      * Returns a modified data supplier which only provides paths/values with a draft status at or
144      * above the specified value. To create a supplier that will process all CLDR paths/values, use
145      * {@link CldrDraftStatus#UNCONFIRMED UNCONFIRMED}.
146      *
147      * @param draftStatus the desired status for filtering paths/values.
148      * @return a modified supplier which filters by the specified status.
149      */
withDraftStatusAtLeast(CldrDraftStatus draftStatus)150     public abstract CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus);
151 
152     /**
153      * Returns an LDML data instance for the specified locale ID.
154      *
155      * <p>If {@code resolution} is set to {@link CldrResolution#RESOLVED RESOLVED} then values
156      * inferred from parent locales and aliases will be produced by the supplier. Note that if an
157      * unsupported locale ID is given (i.e. one not in the set returned by {@link
158      * #getAvailableLocaleIds()}), then an empty data instance is returned.
159      *
160      * @param localeId the locale ID (e.g. "en_GB" or "root") for the returned data.
161      * @param resolution whether to resolve CLDR values for the given locale ID according to the
162      *     CLDR specification.
163      * @return the specified locale based CLDR data (possibly empty).
164      * @throws IllegalArgumentException if the locale ID is not structurally valid.
165      */
getDataForLocale(String localeId, CldrResolution resolution)166     public abstract CldrData getDataForLocale(String localeId, CldrResolution resolution);
167 
168     /**
169      * Returns an unmodifiable set of available locale IDs that this supplier can provide. This need
170      * not be ordered.
171      *
172      * @return the set of available locale IDs.
173      */
getAvailableLocaleIds()174     public abstract Set<String> getAvailableLocaleIds();
175 
176     /**
177      * Returns a data supplier for non-locale specific CLDR data of the given type.
178      *
179      * @param type the required non-{@link CldrDataType#LDML LDML} data type.
180      * @return the specified non-locale based CLDR data.
181      * @throws IllegalArgumentException if {@link CldrDataType#LDML} is given.
182      */
getDataForType(CldrDataType type)183     public abstract CldrData getDataForType(CldrDataType type);
184 
185     private static final class FileBasedDataSupplier extends CldrDataSupplier {
186         private final ImmutableSetMultimap<CldrDataType, Path> directoryMap;
187         private final CldrDraftStatus draftStatus;
188 
189         // Created on-demand to keep constructor simple (in a fluent API you might create several
190         // variants of a supplier but only get data from one, or only use non-LDML XML data).
191         private Factory factory = null;
192 
FileBasedDataSupplier( Multimap<CldrDataType, Path> directoryMap, CldrDraftStatus draftStatus)193         private FileBasedDataSupplier(
194                 Multimap<CldrDataType, Path> directoryMap, CldrDraftStatus draftStatus) {
195             this.directoryMap = ImmutableSetMultimap.copyOf(directoryMap);
196             this.draftStatus = checkNotNull(draftStatus);
197         }
198 
199         // Locking should be no issue, since contention on these supplier instance is expected to
200         // be minimal.
getFactory()201         private synchronized Factory getFactory() {
202             if (factory == null) {
203                 File[] dirArray =
204                         getDirectoriesForType(LDML).map(Path::toFile).toArray(File[]::new);
205                 checkArgument(
206                         dirArray.length > 0,
207                         "no LDML directories exist: %s",
208                         directoryMap.get(LDML));
209                 factory = SimpleFactory.make(dirArray, ".*", draftStatus.getRawStatus());
210             }
211             return factory;
212         }
213 
214         @Override
withDraftStatusAtLeast(CldrDraftStatus draftStatus)215         public CldrDataSupplier withDraftStatusAtLeast(CldrDraftStatus draftStatus) {
216             return new FileBasedDataSupplier(directoryMap, draftStatus);
217         }
218 
219         @Override
getDataForLocale(String localeId, CldrResolution resolution)220         public CldrData getDataForLocale(String localeId, CldrResolution resolution) {
221             LocaleIds.checkCldrLocaleId(localeId);
222             Factory factory = getFactory();
223             if (factory.getAvailable().contains(localeId)) {
224                 return new CldrFileDataSource(
225                         factory.make(localeId, resolution == CldrResolution.RESOLVED));
226             }
227             return NO_DATA;
228         }
229 
230         @Override
getAvailableLocaleIds()231         public Set<String> getAvailableLocaleIds() {
232             return getFactory().getAvailable();
233         }
234 
235         @Override
getDataForType(CldrDataType type)236         public CldrData getDataForType(CldrDataType type) {
237             ImmutableSet<Path> xmlFiles = listXmlFilesForType(type);
238             if (!xmlFiles.isEmpty()) {
239                 return new XmlDataSource(type, xmlFiles, draftStatus);
240             }
241             return NO_DATA;
242         }
243 
getDirectoriesForType(CldrDataType type)244         private Stream<Path> getDirectoriesForType(CldrDataType type) {
245             return directoryMap.get(type).stream().filter(Files::exists);
246         }
247 
listXmlFilesForType(CldrDataType type)248         private ImmutableSet<Path> listXmlFilesForType(CldrDataType type) {
249             ImmutableSet<Path> xmlFiles =
250                     getDirectoriesForType(type)
251                             .flatMap(FileBasedDataSupplier::listXmlFiles)
252                             .collect(toImmutableSet());
253             checkArgument(
254                     !xmlFiles.isEmpty(),
255                     "no XML files exist within directories: %s",
256                     directoryMap.get(type));
257             return xmlFiles;
258         }
259 
260         // This is a separate function because stream functions cannot throw checked exceptions.
261         //
262         // Note: "Files.walk()" warns about closing resources and suggests "try-with-resources" to
263         // ensure closure, "flatMap()" (which is what calls this method) is defined to call close()
264         // on each stream as it's added into the result, so in normal use this should all be fine.
265         //
266         // https://docs.oracle.com/javase/8/docs/api/java/util/stream/Stream.html#flatMap-java.util.function.Function-
listXmlFiles(Path dir)267         private static Stream<Path> listXmlFiles(Path dir) {
268             try {
269                 return Files.walk(dir).filter(IS_XML_FILE);
270             } catch (IOException e) {
271                 throw new UncheckedIOException(e);
272             }
273         }
274 
275         private static final Predicate<Path> IS_XML_FILE =
276                 p -> Files.isRegularFile(p) && p.getFileName().toString().endsWith(".xml");
277     }
278 
279     private static final CldrData NO_DATA =
280             new CldrData() {
281                 @Override
282                 public void accept(PathOrder order, ValueVisitor visitor) {}
283 
284                 @Override
285                 public void accept(PathOrder order, PrefixVisitor visitor) {}
286 
287                 @Override
288                 public CldrValue get(CldrPath path) {
289                     return null;
290                 }
291             };
292 }
293