xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/api/CldrPath.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.api;
2 
3 import static com.google.common.base.CharMatcher.anyOf;
4 import static com.google.common.base.CharMatcher.inRange;
5 import static com.google.common.base.Preconditions.checkArgument;
6 import static com.google.common.base.Preconditions.checkElementIndex;
7 import static com.google.common.base.Preconditions.checkNotNull;
8 import static com.google.common.base.Preconditions.checkState;
9 import static java.util.stream.Collectors.toCollection;
10 
11 import com.google.common.base.CharMatcher;
12 import com.google.common.collect.ImmutableList;
13 import com.google.common.collect.ImmutableMap;
14 import java.util.ArrayList;
15 import java.util.Comparator;
16 import java.util.List;
17 import java.util.Objects;
18 import java.util.Optional;
19 import java.util.stream.Collectors;
20 import java.util.stream.IntStream;
21 import org.unicode.cldr.api.AttributeKey.AttributeSupplier;
22 
23 /**
24  * A sequence of CLDR path elements and "distinguishing" attributes.
25  *
26  * <p>In CLDR, a path is composed of a series of elements and associated attributes, where the
27  * attributes can be one of three types; "distinguishing", "value" and "metadata".
28  *
29  * <p>CldrPath instances hold only "distinguishing" attributes, with "value" attributes being held
30  * by the associated {@link CldrValue} instance, and "metadata" attributes being ignored completely
31  * since they are internal to the core CLDR classes. This approach ensures that {@code CldrPath}
32  * instances uniquely identify values and can be used as keys in maps.
33  *
34  * <p>When viewing {@code CldrPath} as strings, it is sometimes necessary to introduce an suffix to
35  * the path element name to indicate the "sort order". This is necessary to represent "ordered"
36  * elements which can appear in the LDML data (though these are rare). This suffix takes the form of
37  * {@code "{element-name}#{sort-index}"} where {@code {sort-index}} is a non-negative index which
38  * corresponds to the value returned from {@link #getSortIndex()} for that path element. The natural
39  * ordering of {@code CldrPath} handles the correctly, but you cannot sort paths properly by just
40  * comparing their string representation.
41  *
42  * <p>CldrPath is an immutable value type with efficient equality semantics.
43  *
44  * <p>See <a href="https://www.unicode.org/reports/tr35/#Definitions">the LDML specification</a> for
45  * more details.
46  */
47 public final class CldrPath implements AttributeSupplier, Comparable<CldrPath> {
48     // This is approximate and DOES NOT promise correctness, it's mainly there to catch unexpected
49     // changes in the data and it can be updated or removed if needed. At the very least element
50     // names must not contain '/', '[', ']', '@', '=', '#' or '"' to permit re-parsing of paths.
51     private static final CharMatcher NAME_CHARACTERS =
52             inRange('a', 'z').or(inRange('A', 'Z')).or(inRange('0', '9')).or(anyOf(":-_"));
53 
54     /**
55      * Parses a distinguishing CLDR path string, including "distinguishing" and private "metadata"
56      * attributes into a normalized {@link CldrPath} instance. Attributes will be parsed and handled
57      * according to their type:
58      *
59      * <ul>
60      *   <li>Distinguishing attributes will be added to the returned CldrPath instance.
61      *   <li>Non-public metadata attributes will be ignored.
62      *   <li>Value attributes are not permitted in distinguishing paths and will cause an error.
63      * </ul>
64      *
65      * <p>The path string must be structured correctly (e.g. "//ldml/foo[@bar="baz]") and must
66      * represent a known DTD type, based on the first path element (e.g. "ldml" or
67      * "supplementalData").
68      *
69      * @param path the distinguishing path string, containing only distinguishing attributes.
70      * @return the parsed distinguishing path instance.
71      * @throws IllegalArgumentException if the path is not well formed (e.g. contains unexpected
72      *     value or metadata attributes).
73      */
parseDistinguishingPath(String path)74     public static CldrPath parseDistinguishingPath(String path) {
75         return CldrPaths.processXPath(
76                 path,
77                 ImmutableList.of(),
78                 (k, v) -> {
79                     throw new IllegalArgumentException(
80                             String.format(
81                                     "unexpected value attribute '%s' in distinguishing path: %s",
82                                     k, path));
83                 });
84     }
85 
86     /**
87      * Returns the number of common path elements from the root. This is useful when determining the
88      * last common ancestor during visitation. A {@code null} path is treated as having zero length
89      * (and will always result in zero being returned).
90      *
91      * <p>Note: This is only currently use by PrefixVisitorHost, but could be made public if needed.
92      * It's only here (rather than in PrefixVisitorHost) because it depends on private methods of
93      * this class.
94      */
getCommonPrefixLength( CldrPath a, CldrPath b)95     static int getCommonPrefixLength(/* @Nullable */ CldrPath a, /* @Nullable */ CldrPath b) {
96         // Null paths are sentinels with zero length.
97         if (a == null || b == null) {
98             return 0;
99         }
100 
101         // Trim whichever path is longer until both are same length.
102         int minLength = Math.min(a.getLength(), b.getLength());
103         while (a.getLength() > minLength) a = a.getParent();
104         while (b.getLength() > minLength) b = b.getParent();
105 
106         // Work up the paths, resetting the common length every time the elements differ.
107         int commonLength = minLength;
108         for (int length = minLength; length > 0; length--) {
109             if (!a.localEquals(b)) {
110                 // Elements differ, so shortest possible common length is that of our parent path.
111                 commonLength = length - 1;
112             }
113             // Parents will both be null on the last iteration, but never used.
114             a = a.getParent();
115             b = b.getParent();
116         }
117         return commonLength;
118     }
119 
120     // If the parent is null then this path element is the top level LDML descriptor.
121     /* @Nullable */ private final CldrPath parent;
122     // The number of elements (including this one) in this path.
123     private final int length;
124     private final String elementName;
125     // The attribute keys and values in an alternating list.
126     private final ImmutableList<String> attributeKeyValuePairs;
127     // Inherits the top-most draft status in a path (which matches what CLDRFile appears to do.
128     private final Optional<CldrDraftStatus> draftStatus;
129     // The sort index for "ORDERED" path elements or (more commonly) -1 for non-ORDERED elements.
130     private final int sortIndex;
131     // The DTD type of the path (which is the same for all path elements).
132     private final CldrDataType dtdType;
133     // The proper DTD ordering for this path (based on the DTD type).
134     private final Comparator<CldrPath> ordering;
135     // Cached since ImmutableList recalculates its hash codes every time.
136     private final int hashCode;
137     // Cached on demand (it's useful during equality checking as well as toString() itself).
138     // However for elements with attributes, it's at least as large as the name and all attribute
139     // keys/values combined, so will typically double the in-memory size of the instance. We don't
140     // care about making assignment atomic however, since all values would be equal anyway.
141     private String localToString = null;
142 
CldrPath( CldrPath parent, String name, List<String> attributeKeyValuePairs, CldrDataType dtdType, CldrDraftStatus localDraftStatus, int sortIndex)143     CldrPath(
144             CldrPath parent,
145             String name,
146             List<String> attributeKeyValuePairs,
147             CldrDataType dtdType,
148             /* @Nullable */ CldrDraftStatus localDraftStatus,
149             int sortIndex) {
150         checkState(
151                 parent != null || dtdType.getLdmlName().equals(name),
152                 "unexpected root element: expected %s, but got %s",
153                 dtdType.getLdmlName(),
154                 name);
155         this.parent = parent;
156         this.length = (parent != null ? parent.getLength() : 0) + 1;
157         this.elementName = checkValidName(name, "element");
158         this.attributeKeyValuePairs = checkKeyValuePairs(attributeKeyValuePairs);
159         this.draftStatus = resolveDraftStatus(parent, localDraftStatus);
160         // Ordered elements have a sort index of 0 or more, and un-ordered have an index of -1.
161         if (CldrPaths.isOrdered(dtdType, elementName)) {
162             checkArgument(
163                     sortIndex >= 0,
164                     "missing or invalid sort index '%s' for element: %s",
165                     sortIndex,
166                     elementName);
167         } else {
168             checkArgument(
169                     sortIndex == -1,
170                     "unexpected sort index '%s' for element: %s",
171                     sortIndex,
172                     elementName);
173         }
174         this.sortIndex = sortIndex;
175         this.dtdType = checkNotNull(dtdType);
176         this.ordering = CldrPaths.getPathComparator(dtdType);
177         this.hashCode = Objects.hash(length, name, this.attributeKeyValuePairs, parent);
178     }
179 
checkValidName(String value, String description)180     private static String checkValidName(String value, String description) {
181         checkArgument(!value.isEmpty(), "%s name cannot be empty", description);
182         checkArgument(
183                 NAME_CHARACTERS.matchesAllOf(value),
184                 "invalid character in %s name: %s",
185                 description,
186                 value);
187         return value;
188     }
189 
checkKeyValuePairs(List<String> keyValuePairs)190     private static ImmutableList<String> checkKeyValuePairs(List<String> keyValuePairs) {
191         // Ensure attribute values never have double-quote in them (since current we don't escape
192         // value when putting into toString().
193         checkArgument(
194                 (keyValuePairs.size() & 1) == 0,
195                 "key/value pairs must have an even number of elements: %s",
196                 keyValuePairs);
197         for (int n = 0; n < keyValuePairs.size(); n += 2) {
198             checkValidName(keyValuePairs.get(n), "attribute");
199             String v = keyValuePairs.get(n + 1);
200             checkArgument(!v.contains("\""), "unsupported '\"' in attribute value: %s", v);
201         }
202         return ImmutableList.copyOf(keyValuePairs);
203     }
204 
205     /**
206      * Helper method used to test for equivalent path element content. Used to help avoid
207      * unnecessary allocations during processing (see {@link CldrFileDataSource}).
208      */
matchesContent( String name, int sortIndex, List<String> keyValuePairs, CldrDraftStatus draftStatus)209     boolean matchesContent(
210             String name,
211             int sortIndex,
212             List<String> keyValuePairs, /* Nullable */
213             CldrDraftStatus draftStatus) {
214         return this.elementName.equals(name)
215                 && this.sortIndex == sortIndex
216                 && this.attributeKeyValuePairs.equals(keyValuePairs)
217                 && this.draftStatus.equals(resolveDraftStatus(this.parent, draftStatus));
218     }
219 
220     // Helper to resolve the current draft status of a path based on any local draft status
221     // attributes and any existing parent status.
222     //
223     // Note that this behaviour currently matches CLDRFile behaviour as of May 2019. CLDRFile
224     // does a regex match over the full path and finds the first draft status attribute that's
225     // present, ignoring any later declarations (even if they are more restrictive). It seems
226     // likely that the implicit expectation in CLDRFile is that there's only ever one draft
227     // status attributes in any given path (see the pop() method in MyDeclHandler).
resolveDraftStatus( CldrPath parent, CldrDraftStatus localStatus)228     private static Optional<CldrDraftStatus> resolveDraftStatus(
229             /* @Nullable */ CldrPath parent, /* @Nullable */ CldrDraftStatus localStatus) {
230         if (parent != null && parent.draftStatus.isPresent()) {
231             return parent.draftStatus;
232         }
233         return localStatus != null ? localStatus.asOptional() : Optional.empty();
234     }
235 
236     /**
237      * Returns the parent path element.
238      *
239      * @return the parent path element (or {@code null} if this was the root element).
240      */
241     /* @Nullable */
getParent()242     public CldrPath getParent() {
243         return parent;
244     }
245 
246     /**
247      * Returns the non-zero length of this path (i.e. the number of elements it is made up of).
248      *
249      * @return the number of elements in this path.
250      */
getLength()251     public int getLength() {
252         return length;
253     }
254 
255     /**
256      * Returns the name of this path element. This is the qualified XML name as it appears in the
257      * CLDR XML files, including namespace (e.g. "icu:transforms") though most attributes are not
258      * part of an explicit namespace.
259      *
260      * @return the ASCII-only name of the "lowest" element in this path.
261      */
262     // TODO: This method name is weak - perhaps getElementName() ?
getName()263     public String getName() {
264         return elementName;
265     }
266 
267     /**
268      * Returns the sort index of an "ordered" path element, or {@code -1} for non-ordered elements.
269      *
270      * <p>The sort index is used to disambiguate and sort otherwise identical distinguishing paths.
271      * The feature allows a series of values to be processed reliably in an ordered sequence. It is
272      * recommended that if your data includes "ordered" elements, they are always processed in
273      * {@link org.unicode.cldr.api.CldrData.PathOrder#DTD DTD} order.
274      *
275      * @return the element's sort index (which takes priority for element ordering over any
276      *     attribute values).
277      */
getSortIndex()278     public int getSortIndex() {
279         return sortIndex;
280     }
281 
282     /**
283      * Returns the raw value of an attribute associated with this CLDR path, or null if not present.
284      * For almost all use cases it is preferable to use the accessor methods on the {@link
285      * AttributeKey} class, which provide additional useful semantic checking and common type
286      * conversion. You should only use this method directly if there's a strong performance
287      * requirement.
288      *
289      * @param key the key identifying an attribute.
290      * @return the attribute value or {@code null} if not present.
291      * @see AttributeKey
292      */
293     @Override
get(AttributeKey key)294     /* @Nullable */ public String get(AttributeKey key) {
295         checkArgument(
296                 !getDataType().isValueAttribute(key),
297                 "cannot get 'value attribute' values from a distinguishing path: %s",
298                 key);
299         String v = null;
300         for (CldrPath p = this; v == null && p != null; p = p.getParent()) {
301             if (p.getName().equals(key.getElementName())) {
302                 v = p.getLocalAttributeValue(key.getAttributeName());
303             }
304         }
305         return v;
306     }
307 
308     /**
309      * Returns the data type for this path, as defined by the top most element.
310      *
311      * @return the path's data type.
312      */
313     @Override
getDataType()314     public CldrDataType getDataType() {
315         return dtdType;
316     }
317 
318     /**
319      * Returns whether the given element name is in this path.
320      *
321      * @param elementName the element name to check.
322      * @return true if the name of this path element or any of its parents is equal to the given
323      *     element name.
324      */
containsElement(String elementName)325     boolean containsElement(String elementName) {
326         return getName().equals(elementName)
327                 || (parent != null && parent.containsElement(elementName));
328     }
329 
330     /**
331      * Returns the number of distinguishing attributes for this path element.
332      *
333      * @return the number of distinguishing attributes for the "lowest" element in this path.
334      */
getAttributeCount()335     int getAttributeCount() {
336         return attributeKeyValuePairs.size() / 2;
337     }
338 
339     /**
340      * Returns the (non empty) name of the Nth distinguishing attribute in the leaf path element.
341      *
342      * @param n the index of a distinguishing attribute for the "lowest" element in this path.
343      * @return the name of the Nth attribute on this path element.
344      */
getLocalAttributeName(int n)345     String getLocalAttributeName(int n) {
346         checkElementIndex(n, getAttributeCount());
347         return attributeKeyValuePairs.get(2 * n);
348     }
349 
350     /**
351      * Returns the value of the Nth distinguishing attribute in the leaf path element.
352      *
353      * @param n the index of a distinguishing attribute for the "lowest" element in this path.
354      * @return the value of the Nth attribute on this path element.
355      */
getLocalAttributeValue(int n)356     String getLocalAttributeValue(int n) {
357         checkElementIndex(n, getAttributeCount());
358         return attributeKeyValuePairs.get((2 * n) + 1);
359     }
360 
361     // Helper to get an attribute by name from this path element.
getLocalAttributeValue(String attributeName)362     private String getLocalAttributeValue(String attributeName) {
363         checkNotNull(attributeName);
364         for (int i = 0; i < attributeKeyValuePairs.size(); i += 2) {
365             if (attributeName.equals(attributeKeyValuePairs.get(i))) {
366                 return attributeKeyValuePairs.get(i + 1);
367             }
368         }
369         return null;
370     }
371 
372     /**
373      * Returns the draft status for this path, which is inherited from parent path elements. Note
374      * that where multiple (possibly conflicting) draft statuses are defined in a path, the "top
375      * most" (i.e. closest to the root element) value is used.
376      *
377      * @return the potentially inherited draft status.
378      */
getDraftStatus()379     Optional<CldrDraftStatus> getDraftStatus() {
380         return draftStatus;
381     }
382 
383     /**
384      * Returns a combined full path string in the XPath style {@code //foo/bar[@x="y"]/baz}, with
385      * value attributes inserted in correct DTD order for each path element.
386      *
387      * <p>Note that while in most cases the values attributes simply follow the path attributes on
388      * each element, this is not necessarily always true, and DTD ordering can place value
389      * attributes before path attributes in an element.
390      *
391      * @param value a value to be associated with this path (from which value attributes will be
392      *     obtained).
393      * @return the full XPath representation containing both distinguishing and value attributes.
394      */
getFullPath(CldrValue value)395     String getFullPath(CldrValue value) {
396         checkNotNull(value);
397         return appendToString(new StringBuilder(), value.getValueAttributes()).toString();
398     }
399 
400     /** Compares two paths in DTD order. */
401     @Override
compareTo(CldrPath other)402     public int compareTo(CldrPath other) {
403         if (dtdType == other.dtdType) {
404             return ordering.compare(this, other);
405         }
406         return dtdType.compareTo(other.dtdType);
407     }
408 
409     /** {@inheritDoc} */
410     @Override
equals(Object obj)411     public boolean equals(Object obj) {
412         if (obj == this) {
413             return true;
414         }
415         if (!(obj instanceof CldrPath)) {
416             return false;
417         }
418         CldrPath other = (CldrPath) obj;
419         // Check type and length first (checking length catches most non-equal paths).
420         if (!getDataType().equals(other.getDataType()) || length != other.length) {
421             return false;
422         }
423         // Do (n - 1) comparisons since we already know the root element is the same (the root
424         // element never has value attributes on it and is uniquely defined by the DTD type).
425         // Working up from the "leaf" of the path works well because different paths will almost
426         // always be different in the leaf element (i.e. we will fail almost immediately).
427         CldrPath pthis = this;
428         for (int n = length - 1; n > 0; n--) {
429             if (!pthis.localEquals(other)) {
430                 return false;
431             }
432             pthis = pthis.getParent();
433             other = other.getParent();
434         }
435         return true;
436     }
437 
438     /** {@inheritDoc} */
439     @Override
hashCode()440     public int hashCode() {
441         return hashCode;
442     }
443 
444     /**
445      * @return the distinguishing path string in the XPath style {@code //foo/bar[@x="y"]/baz}.
446      */
447     @Override
toString()448     public String toString() {
449         return appendToString(new StringBuilder(), ImmutableMap.of()).toString();
450     }
451 
localEquals(CldrPath other)452     private boolean localEquals(CldrPath other) {
453         // In _theory_ only length and localToString need to be checked (since the localToString is
454         // an unambiguous representation of the data, but it seems a bit hacky to rely on that.
455         return this.elementName.equals(other.elementName)
456                 && this.sortIndex == other.sortIndex
457                 && this.attributeKeyValuePairs.equals(other.attributeKeyValuePairs);
458     }
459 
460     // XPath like toString() representation of a path element (e.g. foo[@bar="x"][@baz="y"]).
461     // When a sort index is present, it's appended to the element name (e.g. "foo#42[@bar="x"]").
getLocalToString()462     private String getLocalToString() {
463         if (localToString == null) {
464             String str = getName();
465             if (sortIndex != -1) {
466                 // This is very rare so it's almost certainly better to not make a StringBuilder
467                 // above just for this possibility.
468                 str += "#" + sortIndex;
469             }
470             if (getAttributeCount() > 0) {
471                 str =
472                         IntStream.range(0, getAttributeCount())
473                                 .mapToObj(
474                                         n ->
475                                                 String.format(
476                                                         "[@%s=\"%s\"]",
477                                                         getLocalAttributeName(n),
478                                                         getLocalAttributeValue(n)))
479                                 .collect(Collectors.joining("", str, ""));
480             }
481             // Overwrite only once the local string is completed (this is idempotent so we don't
482             // have to care about locking etc.).
483             localToString = str;
484         }
485         return localToString;
486     }
487 
488     // Recursive helper for toString().
appendToString( StringBuilder out, ImmutableMap<AttributeKey, String> valueAttributes)489     private StringBuilder appendToString(
490             StringBuilder out, ImmutableMap<AttributeKey, String> valueAttributes) {
491         CldrPath parent = getParent();
492         if (parent != null) {
493             parent.appendToString(out, valueAttributes).append('/');
494         } else {
495             out.append("//");
496         }
497         if (valueAttributes.isEmpty()) {
498             return out.append(getLocalToString());
499         }
500         List<String> attributeNames =
501                 valueAttributes.keySet().stream()
502                         .filter(k -> k.getElementName().equals(getName()))
503                         .map(AttributeKey::getAttributeName)
504                         .collect(toCollection(ArrayList::new));
505         if (attributeNames.isEmpty()) {
506             // No value attributes for _this_ element so can use just the local toString().
507             return out.append(getLocalToString());
508         }
509         if (getAttributeCount() > 0) {
510             String lastPathAttributeName = getLocalAttributeName(getAttributeCount() - 1);
511             if (dtdType.getAttributeComparator()
512                             .compare(lastPathAttributeName, attributeNames.get(0))
513                     > 0) {
514                 // Oops, order is not as expected, so must reorder all attributes.
515                 appendResortedValueAttributesTo(out, attributeNames, valueAttributes);
516                 return out;
517             }
518         }
519         // Value attributes all come after path attributes.
520         return appendValueAttributesTo(
521                 out.append(getLocalToString()), attributeNames, valueAttributes);
522     }
523 
appendResortedValueAttributesTo( StringBuilder out, List<String> attributeNames, ImmutableMap<AttributeKey, String> valueAttributes)524     private void appendResortedValueAttributesTo(
525             StringBuilder out,
526             List<String> attributeNames,
527             ImmutableMap<AttributeKey, String> valueAttributes) {
528         out.append(elementName);
529         for (int n = 0; n < attributeKeyValuePairs.size(); n += 2) {
530             attributeNames.add(attributeKeyValuePairs.get(n));
531         }
532         attributeNames.sort(dtdType.getAttributeComparator());
533         for (String attrName : attributeNames) {
534             String value = getLocalAttributeValue(attrName);
535             if (value == null) {
536                 value = valueAttributes.get(AttributeKey.keyOf(elementName, attrName));
537                 checkState(value != null, "missing value %s:%s", elementName, attrName);
538             }
539             out.append(String.format("[@%s=\"%s\"]", attrName, value));
540         }
541     }
542 
appendValueAttributesTo( StringBuilder out, List<String> attributeNames, ImmutableMap<AttributeKey, String> valueAttributes)543     private StringBuilder appendValueAttributesTo(
544             StringBuilder out,
545             List<String> attributeNames,
546             ImmutableMap<AttributeKey, String> valueAttributes) {
547         for (String attrName : attributeNames) {
548             String value = valueAttributes.get(AttributeKey.keyOf(elementName, attrName));
549             checkState(value != null, "missing value %s:%s", elementName, attrName);
550             out.append(String.format("[@%s=\"%s\"]", attrName, value));
551         }
552         return out;
553     }
554 }
555