xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/api/CldrDataType.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.api;
2 
3 import static com.google.common.base.Preconditions.checkArgument;
4 import static com.google.common.collect.ImmutableMap.toImmutableMap;
5 import static java.util.function.Function.identity;
6 import static org.unicode.cldr.util.DtdData.AttributeStatus.distinguished;
7 import static org.unicode.cldr.util.DtdData.AttributeStatus.value;
8 import static org.unicode.cldr.util.DtdData.Mode.OPTIONAL;
9 
10 import com.google.common.collect.ImmutableList;
11 import com.google.common.collect.ImmutableMap;
12 import java.nio.file.Path;
13 import java.nio.file.Paths;
14 import java.util.Arrays;
15 import java.util.Comparator;
16 import java.util.function.Predicate;
17 import java.util.stream.Stream;
18 import org.unicode.cldr.util.DtdData;
19 import org.unicode.cldr.util.DtdData.Attribute;
20 import org.unicode.cldr.util.DtdData.Element;
21 import org.unicode.cldr.util.DtdType;
22 
23 /**
24  * Data types for non-locale based CLDR data. For the canonical specification for LDML data can be
25  * found at <a href="https://unicode.org/reports/tr35">Unicode Locale Data Markup Language<\a>.
26  *
27  * <p>This enum is largely a wrapper for functionality found in the underlying CLDR classes, but
28  * repackaged for convenience and to minimize surface area (and to avoid anyone needing to import
29  * classes from outside the "api" package).
30  */
31 public enum CldrDataType {
32     /**
33      * Non-locale based BCP47 data, typically associated with international identifiers such as
34      * currency symbols, timezone identifiers etc.
35      */
36     BCP47(DtdType.ldmlBCP47),
37     /**
38      * Non-locale based supplemental data, typically associated with character tables (e.g. for
39      * break iterator).
40      */
41     SUPPLEMENTAL(DtdType.supplementalData),
42     /**
43      * Locale based LDML data consisting of internationalization information and translations on a
44      * per locale basis. LDML data for one locale may be inherited from other locales.
45      */
46     LDML(DtdType.ldml, DtdType.ldmlICU);
47 
48     private static final ImmutableMap<String, CldrDataType> NAME_MAP =
49             Arrays.stream(values()).collect(toImmutableMap(t -> t.mainType.name(), identity()));
50 
51     /**
52      * Returns a CLDR data type given its XML name (the root element name in a CLDR path).
53      *
54      * @param name the XML path root (e.g. "ldml" or "supplementalData").
55      * @return the associated data type instance.
56      */
forXmlName(String name)57     public static CldrDataType forXmlName(String name) {
58         CldrDataType type = NAME_MAP.get(name);
59         checkArgument(type != null, "unsupported DTD type: %s", name);
60         return type;
61     }
62 
forRawType(DtdType rawType)63     static CldrDataType forRawType(DtdType rawType) {
64         return forXmlName(rawType.name());
65     }
66 
67     private final DtdType mainType;
68     private final ImmutableList<DtdType> extraTypes;
69     private final Comparator<String> elementComparator;
70     private final Comparator<String> attributeComparator;
71 
CldrDataType(DtdType mainType, DtdType... extraTypes)72     CldrDataType(DtdType mainType, DtdType... extraTypes) {
73         this.mainType = mainType;
74         this.extraTypes = ImmutableList.copyOf(extraTypes);
75         // There's no need to cache the DtdData instance since getInstance() already does that.
76         DtdData dtd = DtdData.getInstance(mainType);
77         // Note that the function passed in to the wrapped comparators needs to be fast, since it's
78         // called for each comparison. We assume getElementFromName() and getAttributesFromName()
79         // are efficient, and if not we'll need to cache.
80         this.elementComparator =
81                 wrapToHandleUnknownNames(
82                         dtd.getElementComparator(), dtd.getElementFromName()::containsKey);
83         this.attributeComparator =
84                 wrapToHandleUnknownNames(
85                         dtd.getAttributeComparator(), dtd.getAttributesFromName()::containsKey);
86     }
87 
getLdmlName()88     String getLdmlName() {
89         return mainType.name();
90     }
91 
getSourceDirectories()92     Stream<Path> getSourceDirectories() {
93         return mainType.directories.stream().map(Paths::get);
94     }
95 
96     /**
97      * Returns all elements known for this DTD type in undefined order. This can include elements in
98      * external namespaces (e.g. "icu:xxx").
99      */
getElements()100     Stream<Element> getElements() {
101         Stream<Element> elements = elementsFrom(mainType);
102         if (!extraTypes.isEmpty()) {
103             elements =
104                     Stream.concat(
105                             elements, extraTypes.stream().flatMap(CldrDataType::elementsFrom));
106         }
107         return elements;
108     }
109 
elementsFrom(DtdType dataType)110     private static Stream<Element> elementsFrom(DtdType dataType) {
111         // NOTE: DO NOT call getElements() here because it makes a new set every time!!
112         return DtdData.getInstance(dataType).getElementFromName().values().stream();
113     }
114 
getAttribute(String elementName, String attributeName)115     Attribute getAttribute(String elementName, String attributeName) {
116         Attribute attr = DtdData.getInstance(mainType).getAttribute(elementName, attributeName);
117         if (attr == null) {
118             for (DtdType t : extraTypes) {
119                 attr = DtdData.getInstance(t).getAttribute(elementName, attributeName);
120                 if (attr != null) {
121                     break;
122                 }
123             }
124         }
125         return attr;
126     }
127 
getElementComparator()128     Comparator<String> getElementComparator() {
129         return elementComparator;
130     }
131 
getAttributeComparator()132     Comparator<String> getAttributeComparator() {
133         return attributeComparator;
134     }
135 
136     // Unknown elements outside the DTD (such as "//ldml/special" icu:xxx elements) are not
137     // handled properly by the underlying element/attribute name comparators (they throw an
138     // exception) so we have to detect these cases first and handle them manually (even though
139     // they are very rare). Assume that:
140     // * known DTD elements come before any unknown ones, and
141     // * unknown element names can be sorted lexicographically using their qualified name.
wrapToHandleUnknownNames( Comparator<String> compare, Predicate<String> isKnown)142     private static Comparator<String> wrapToHandleUnknownNames(
143             Comparator<String> compare, Predicate<String> isKnown) {
144         // This code should only return "signum" values for ordering (i.e. {-1, 0, 1}).
145         return (lname, rname) -> {
146             if (isKnown.test(lname)) {
147                 return isKnown.test(rname) ? compare.compare(lname, rname) : -1;
148             } else {
149                 return isKnown.test(rname) ? 1 : lname.compareTo(rname);
150             }
151         };
152     }
153 
154     // We shouldn't need to check special cases (e.g. "_q") here because this should only be being
155     // called _after_ those have been filtered out.
156     // The only time that both these methods return false should be for known attributes that are
157     // either marked as deprecated or as metatadata attributes.
158     boolean isDistinguishingAttribute(String elementName, String attributeName) {
159         Attribute attribute = getAttribute(elementName, attributeName);
160         if (attribute != null) {
161             return attribute.attributeStatus == distinguished && !attribute.isDeprecated();
162         }
163         // This can happen if attribute keys are speculatively generated, which sometimes happens
164         // in transformation logic. Ideally this would end up being an error.
165         return false;
166     }
167 
168     /** Returns whether the specified attribute is a "value" attribute. */
169     boolean isValueAttribute(String elementName, String attributeName) {
170         Attribute attribute = getAttribute(elementName, attributeName);
171         if (attribute != null) {
172             return attribute.attributeStatus == value && !attribute.isDeprecated();
173         }
174         return true;
175     }
176 
177     /** Returns whether the specified attribute is a "value" attribute. */
178     boolean isValueAttribute(AttributeKey key) {
179         return isValueAttribute(key.getElementName(), key.getAttributeName());
180     }
181 
182     /**
183      * Returns whether the specified attribute is optional. Attributes unknown to the DTD are also
184      * considered optional, which can happen if attribute keys are speculatively generated, which
185      * sometimes happens in transformation logic.
186      */
187     boolean isOptionalAttribute(AttributeKey key) {
188         Attribute attribute = getAttribute(key.getElementName(), key.getAttributeName());
189         return attribute == null || attribute.mode == OPTIONAL;
190     }
191 }
192