1 package org.unicode.cldr.api; 2 3 import static com.google.common.base.Preconditions.checkArgument; 4 import static com.google.common.collect.ImmutableMap.toImmutableMap; 5 import static java.util.function.Function.identity; 6 import static org.unicode.cldr.util.DtdData.AttributeStatus.distinguished; 7 import static org.unicode.cldr.util.DtdData.AttributeStatus.value; 8 import static org.unicode.cldr.util.DtdData.Mode.OPTIONAL; 9 10 import com.google.common.collect.ImmutableList; 11 import com.google.common.collect.ImmutableMap; 12 import java.nio.file.Path; 13 import java.nio.file.Paths; 14 import java.util.Arrays; 15 import java.util.Comparator; 16 import java.util.function.Predicate; 17 import java.util.stream.Stream; 18 import org.unicode.cldr.util.DtdData; 19 import org.unicode.cldr.util.DtdData.Attribute; 20 import org.unicode.cldr.util.DtdData.Element; 21 import org.unicode.cldr.util.DtdType; 22 23 /** 24 * Data types for non-locale based CLDR data. For the canonical specification for LDML data can be 25 * found at <a href="https://unicode.org/reports/tr35">Unicode Locale Data Markup Language<\a>. 26 * 27 * <p>This enum is largely a wrapper for functionality found in the underlying CLDR classes, but 28 * repackaged for convenience and to minimize surface area (and to avoid anyone needing to import 29 * classes from outside the "api" package). 30 */ 31 public enum CldrDataType { 32 /** 33 * Non-locale based BCP47 data, typically associated with international identifiers such as 34 * currency symbols, timezone identifiers etc. 35 */ 36 BCP47(DtdType.ldmlBCP47), 37 /** 38 * Non-locale based supplemental data, typically associated with character tables (e.g. for 39 * break iterator). 40 */ 41 SUPPLEMENTAL(DtdType.supplementalData), 42 /** 43 * Locale based LDML data consisting of internationalization information and translations on a 44 * per locale basis. LDML data for one locale may be inherited from other locales. 45 */ 46 LDML(DtdType.ldml, DtdType.ldmlICU); 47 48 private static final ImmutableMap<String, CldrDataType> NAME_MAP = 49 Arrays.stream(values()).collect(toImmutableMap(t -> t.mainType.name(), identity())); 50 51 /** 52 * Returns a CLDR data type given its XML name (the root element name in a CLDR path). 53 * 54 * @param name the XML path root (e.g. "ldml" or "supplementalData"). 55 * @return the associated data type instance. 56 */ forXmlName(String name)57 public static CldrDataType forXmlName(String name) { 58 CldrDataType type = NAME_MAP.get(name); 59 checkArgument(type != null, "unsupported DTD type: %s", name); 60 return type; 61 } 62 forRawType(DtdType rawType)63 static CldrDataType forRawType(DtdType rawType) { 64 return forXmlName(rawType.name()); 65 } 66 67 private final DtdType mainType; 68 private final ImmutableList<DtdType> extraTypes; 69 private final Comparator<String> elementComparator; 70 private final Comparator<String> attributeComparator; 71 CldrDataType(DtdType mainType, DtdType... extraTypes)72 CldrDataType(DtdType mainType, DtdType... extraTypes) { 73 this.mainType = mainType; 74 this.extraTypes = ImmutableList.copyOf(extraTypes); 75 // There's no need to cache the DtdData instance since getInstance() already does that. 76 DtdData dtd = DtdData.getInstance(mainType); 77 // Note that the function passed in to the wrapped comparators needs to be fast, since it's 78 // called for each comparison. We assume getElementFromName() and getAttributesFromName() 79 // are efficient, and if not we'll need to cache. 80 this.elementComparator = 81 wrapToHandleUnknownNames( 82 dtd.getElementComparator(), dtd.getElementFromName()::containsKey); 83 this.attributeComparator = 84 wrapToHandleUnknownNames( 85 dtd.getAttributeComparator(), dtd.getAttributesFromName()::containsKey); 86 } 87 getLdmlName()88 String getLdmlName() { 89 return mainType.name(); 90 } 91 getSourceDirectories()92 Stream<Path> getSourceDirectories() { 93 return mainType.directories.stream().map(Paths::get); 94 } 95 96 /** 97 * Returns all elements known for this DTD type in undefined order. This can include elements in 98 * external namespaces (e.g. "icu:xxx"). 99 */ getElements()100 Stream<Element> getElements() { 101 Stream<Element> elements = elementsFrom(mainType); 102 if (!extraTypes.isEmpty()) { 103 elements = 104 Stream.concat( 105 elements, extraTypes.stream().flatMap(CldrDataType::elementsFrom)); 106 } 107 return elements; 108 } 109 elementsFrom(DtdType dataType)110 private static Stream<Element> elementsFrom(DtdType dataType) { 111 // NOTE: DO NOT call getElements() here because it makes a new set every time!! 112 return DtdData.getInstance(dataType).getElementFromName().values().stream(); 113 } 114 getAttribute(String elementName, String attributeName)115 Attribute getAttribute(String elementName, String attributeName) { 116 Attribute attr = DtdData.getInstance(mainType).getAttribute(elementName, attributeName); 117 if (attr == null) { 118 for (DtdType t : extraTypes) { 119 attr = DtdData.getInstance(t).getAttribute(elementName, attributeName); 120 if (attr != null) { 121 break; 122 } 123 } 124 } 125 return attr; 126 } 127 getElementComparator()128 Comparator<String> getElementComparator() { 129 return elementComparator; 130 } 131 getAttributeComparator()132 Comparator<String> getAttributeComparator() { 133 return attributeComparator; 134 } 135 136 // Unknown elements outside the DTD (such as "//ldml/special" icu:xxx elements) are not 137 // handled properly by the underlying element/attribute name comparators (they throw an 138 // exception) so we have to detect these cases first and handle them manually (even though 139 // they are very rare). Assume that: 140 // * known DTD elements come before any unknown ones, and 141 // * unknown element names can be sorted lexicographically using their qualified name. wrapToHandleUnknownNames( Comparator<String> compare, Predicate<String> isKnown)142 private static Comparator<String> wrapToHandleUnknownNames( 143 Comparator<String> compare, Predicate<String> isKnown) { 144 // This code should only return "signum" values for ordering (i.e. {-1, 0, 1}). 145 return (lname, rname) -> { 146 if (isKnown.test(lname)) { 147 return isKnown.test(rname) ? compare.compare(lname, rname) : -1; 148 } else { 149 return isKnown.test(rname) ? 1 : lname.compareTo(rname); 150 } 151 }; 152 } 153 154 // We shouldn't need to check special cases (e.g. "_q") here because this should only be being 155 // called _after_ those have been filtered out. 156 // The only time that both these methods return false should be for known attributes that are 157 // either marked as deprecated or as metatadata attributes. 158 boolean isDistinguishingAttribute(String elementName, String attributeName) { 159 Attribute attribute = getAttribute(elementName, attributeName); 160 if (attribute != null) { 161 return attribute.attributeStatus == distinguished && !attribute.isDeprecated(); 162 } 163 // This can happen if attribute keys are speculatively generated, which sometimes happens 164 // in transformation logic. Ideally this would end up being an error. 165 return false; 166 } 167 168 /** Returns whether the specified attribute is a "value" attribute. */ 169 boolean isValueAttribute(String elementName, String attributeName) { 170 Attribute attribute = getAttribute(elementName, attributeName); 171 if (attribute != null) { 172 return attribute.attributeStatus == value && !attribute.isDeprecated(); 173 } 174 return true; 175 } 176 177 /** Returns whether the specified attribute is a "value" attribute. */ 178 boolean isValueAttribute(AttributeKey key) { 179 return isValueAttribute(key.getElementName(), key.getAttributeName()); 180 } 181 182 /** 183 * Returns whether the specified attribute is optional. Attributes unknown to the DTD are also 184 * considered optional, which can happen if attribute keys are speculatively generated, which 185 * sometimes happens in transformation logic. 186 */ 187 boolean isOptionalAttribute(AttributeKey key) { 188 Attribute attribute = getAttribute(key.getElementName(), key.getAttributeName()); 189 return attribute == null || attribute.mode == OPTIONAL; 190 } 191 } 192