1 package org.unicode.cldr.api; 2 3 import static com.google.common.base.CharMatcher.anyOf; 4 import static com.google.common.base.CharMatcher.inRange; 5 import static com.google.common.base.Preconditions.checkArgument; 6 import static com.google.common.base.Preconditions.checkElementIndex; 7 import static com.google.common.base.Preconditions.checkNotNull; 8 import static com.google.common.base.Preconditions.checkState; 9 import static java.util.stream.Collectors.toCollection; 10 11 import com.google.common.base.CharMatcher; 12 import com.google.common.collect.ImmutableList; 13 import com.google.common.collect.ImmutableMap; 14 import java.util.ArrayList; 15 import java.util.Comparator; 16 import java.util.List; 17 import java.util.Objects; 18 import java.util.Optional; 19 import java.util.stream.Collectors; 20 import java.util.stream.IntStream; 21 import org.unicode.cldr.api.AttributeKey.AttributeSupplier; 22 23 /** 24 * A sequence of CLDR path elements and "distinguishing" attributes. 25 * 26 * <p>In CLDR, a path is composed of a series of elements and associated attributes, where the 27 * attributes can be one of three types; "distinguishing", "value" and "metadata". 28 * 29 * <p>CldrPath instances hold only "distinguishing" attributes, with "value" attributes being held 30 * by the associated {@link CldrValue} instance, and "metadata" attributes being ignored completely 31 * since they are internal to the core CLDR classes. This approach ensures that {@code CldrPath} 32 * instances uniquely identify values and can be used as keys in maps. 33 * 34 * <p>When viewing {@code CldrPath} as strings, it is sometimes necessary to introduce an suffix to 35 * the path element name to indicate the "sort order". This is necessary to represent "ordered" 36 * elements which can appear in the LDML data (though these are rare). This suffix takes the form of 37 * {@code "{element-name}#{sort-index}"} where {@code {sort-index}} is a non-negative index which 38 * corresponds to the value returned from {@link #getSortIndex()} for that path element. The natural 39 * ordering of {@code CldrPath} handles the correctly, but you cannot sort paths properly by just 40 * comparing their string representation. 41 * 42 * <p>CldrPath is an immutable value type with efficient equality semantics. 43 * 44 * <p>See <a href="https://www.unicode.org/reports/tr35/#Definitions">the LDML specification</a> for 45 * more details. 46 */ 47 public final class CldrPath implements AttributeSupplier, Comparable<CldrPath> { 48 // This is approximate and DOES NOT promise correctness, it's mainly there to catch unexpected 49 // changes in the data and it can be updated or removed if needed. At the very least element 50 // names must not contain '/', '[', ']', '@', '=', '#' or '"' to permit re-parsing of paths. 51 private static final CharMatcher NAME_CHARACTERS = 52 inRange('a', 'z').or(inRange('A', 'Z')).or(inRange('0', '9')).or(anyOf(":-_")); 53 54 /** 55 * Parses a distinguishing CLDR path string, including "distinguishing" and private "metadata" 56 * attributes into a normalized {@link CldrPath} instance. Attributes will be parsed and handled 57 * according to their type: 58 * 59 * <ul> 60 * <li>Distinguishing attributes will be added to the returned CldrPath instance. 61 * <li>Non-public metadata attributes will be ignored. 62 * <li>Value attributes are not permitted in distinguishing paths and will cause an error. 63 * </ul> 64 * 65 * <p>The path string must be structured correctly (e.g. "//ldml/foo[@bar="baz]") and must 66 * represent a known DTD type, based on the first path element (e.g. "ldml" or 67 * "supplementalData"). 68 * 69 * @param path the distinguishing path string, containing only distinguishing attributes. 70 * @return the parsed distinguishing path instance. 71 * @throws IllegalArgumentException if the path is not well formed (e.g. contains unexpected 72 * value or metadata attributes). 73 */ parseDistinguishingPath(String path)74 public static CldrPath parseDistinguishingPath(String path) { 75 return CldrPaths.processXPath( 76 path, 77 ImmutableList.of(), 78 (k, v) -> { 79 throw new IllegalArgumentException( 80 String.format( 81 "unexpected value attribute '%s' in distinguishing path: %s", 82 k, path)); 83 }); 84 } 85 86 /** 87 * Returns the number of common path elements from the root. This is useful when determining the 88 * last common ancestor during visitation. A {@code null} path is treated as having zero length 89 * (and will always result in zero being returned). 90 * 91 * <p>Note: This is only currently use by PrefixVisitorHost, but could be made public if needed. 92 * It's only here (rather than in PrefixVisitorHost) because it depends on private methods of 93 * this class. 94 */ getCommonPrefixLength( CldrPath a, CldrPath b)95 static int getCommonPrefixLength(/* @Nullable */ CldrPath a, /* @Nullable */ CldrPath b) { 96 // Null paths are sentinels with zero length. 97 if (a == null || b == null) { 98 return 0; 99 } 100 101 // Trim whichever path is longer until both are same length. 102 int minLength = Math.min(a.getLength(), b.getLength()); 103 while (a.getLength() > minLength) a = a.getParent(); 104 while (b.getLength() > minLength) b = b.getParent(); 105 106 // Work up the paths, resetting the common length every time the elements differ. 107 int commonLength = minLength; 108 for (int length = minLength; length > 0; length--) { 109 if (!a.localEquals(b)) { 110 // Elements differ, so shortest possible common length is that of our parent path. 111 commonLength = length - 1; 112 } 113 // Parents will both be null on the last iteration, but never used. 114 a = a.getParent(); 115 b = b.getParent(); 116 } 117 return commonLength; 118 } 119 120 // If the parent is null then this path element is the top level LDML descriptor. 121 /* @Nullable */ private final CldrPath parent; 122 // The number of elements (including this one) in this path. 123 private final int length; 124 private final String elementName; 125 // The attribute keys and values in an alternating list. 126 private final ImmutableList<String> attributeKeyValuePairs; 127 // Inherits the top-most draft status in a path (which matches what CLDRFile appears to do. 128 private final Optional<CldrDraftStatus> draftStatus; 129 // The sort index for "ORDERED" path elements or (more commonly) -1 for non-ORDERED elements. 130 private final int sortIndex; 131 // The DTD type of the path (which is the same for all path elements). 132 private final CldrDataType dtdType; 133 // The proper DTD ordering for this path (based on the DTD type). 134 private final Comparator<CldrPath> ordering; 135 // Cached since ImmutableList recalculates its hash codes every time. 136 private final int hashCode; 137 // Cached on demand (it's useful during equality checking as well as toString() itself). 138 // However for elements with attributes, it's at least as large as the name and all attribute 139 // keys/values combined, so will typically double the in-memory size of the instance. We don't 140 // care about making assignment atomic however, since all values would be equal anyway. 141 private String localToString = null; 142 CldrPath( CldrPath parent, String name, List<String> attributeKeyValuePairs, CldrDataType dtdType, CldrDraftStatus localDraftStatus, int sortIndex)143 CldrPath( 144 CldrPath parent, 145 String name, 146 List<String> attributeKeyValuePairs, 147 CldrDataType dtdType, 148 /* @Nullable */ CldrDraftStatus localDraftStatus, 149 int sortIndex) { 150 checkState( 151 parent != null || dtdType.getLdmlName().equals(name), 152 "unexpected root element: expected %s, but got %s", 153 dtdType.getLdmlName(), 154 name); 155 this.parent = parent; 156 this.length = (parent != null ? parent.getLength() : 0) + 1; 157 this.elementName = checkValidName(name, "element"); 158 this.attributeKeyValuePairs = checkKeyValuePairs(attributeKeyValuePairs); 159 this.draftStatus = resolveDraftStatus(parent, localDraftStatus); 160 // Ordered elements have a sort index of 0 or more, and un-ordered have an index of -1. 161 if (CldrPaths.isOrdered(dtdType, elementName)) { 162 checkArgument( 163 sortIndex >= 0, 164 "missing or invalid sort index '%s' for element: %s", 165 sortIndex, 166 elementName); 167 } else { 168 checkArgument( 169 sortIndex == -1, 170 "unexpected sort index '%s' for element: %s", 171 sortIndex, 172 elementName); 173 } 174 this.sortIndex = sortIndex; 175 this.dtdType = checkNotNull(dtdType); 176 this.ordering = CldrPaths.getPathComparator(dtdType); 177 this.hashCode = Objects.hash(length, name, this.attributeKeyValuePairs, parent); 178 } 179 checkValidName(String value, String description)180 private static String checkValidName(String value, String description) { 181 checkArgument(!value.isEmpty(), "%s name cannot be empty", description); 182 checkArgument( 183 NAME_CHARACTERS.matchesAllOf(value), 184 "invalid character in %s name: %s", 185 description, 186 value); 187 return value; 188 } 189 checkKeyValuePairs(List<String> keyValuePairs)190 private static ImmutableList<String> checkKeyValuePairs(List<String> keyValuePairs) { 191 // Ensure attribute values never have double-quote in them (since current we don't escape 192 // value when putting into toString(). 193 checkArgument( 194 (keyValuePairs.size() & 1) == 0, 195 "key/value pairs must have an even number of elements: %s", 196 keyValuePairs); 197 for (int n = 0; n < keyValuePairs.size(); n += 2) { 198 checkValidName(keyValuePairs.get(n), "attribute"); 199 String v = keyValuePairs.get(n + 1); 200 checkArgument(!v.contains("\""), "unsupported '\"' in attribute value: %s", v); 201 } 202 return ImmutableList.copyOf(keyValuePairs); 203 } 204 205 /** 206 * Helper method used to test for equivalent path element content. Used to help avoid 207 * unnecessary allocations during processing (see {@link CldrFileDataSource}). 208 */ matchesContent( String name, int sortIndex, List<String> keyValuePairs, CldrDraftStatus draftStatus)209 boolean matchesContent( 210 String name, 211 int sortIndex, 212 List<String> keyValuePairs, /* Nullable */ 213 CldrDraftStatus draftStatus) { 214 return this.elementName.equals(name) 215 && this.sortIndex == sortIndex 216 && this.attributeKeyValuePairs.equals(keyValuePairs) 217 && this.draftStatus.equals(resolveDraftStatus(this.parent, draftStatus)); 218 } 219 220 // Helper to resolve the current draft status of a path based on any local draft status 221 // attributes and any existing parent status. 222 // 223 // Note that this behaviour currently matches CLDRFile behaviour as of May 2019. CLDRFile 224 // does a regex match over the full path and finds the first draft status attribute that's 225 // present, ignoring any later declarations (even if they are more restrictive). It seems 226 // likely that the implicit expectation in CLDRFile is that there's only ever one draft 227 // status attributes in any given path (see the pop() method in MyDeclHandler). resolveDraftStatus( CldrPath parent, CldrDraftStatus localStatus)228 private static Optional<CldrDraftStatus> resolveDraftStatus( 229 /* @Nullable */ CldrPath parent, /* @Nullable */ CldrDraftStatus localStatus) { 230 if (parent != null && parent.draftStatus.isPresent()) { 231 return parent.draftStatus; 232 } 233 return localStatus != null ? localStatus.asOptional() : Optional.empty(); 234 } 235 236 /** 237 * Returns the parent path element. 238 * 239 * @return the parent path element (or {@code null} if this was the root element). 240 */ 241 /* @Nullable */ getParent()242 public CldrPath getParent() { 243 return parent; 244 } 245 246 /** 247 * Returns the non-zero length of this path (i.e. the number of elements it is made up of). 248 * 249 * @return the number of elements in this path. 250 */ getLength()251 public int getLength() { 252 return length; 253 } 254 255 /** 256 * Returns the name of this path element. This is the qualified XML name as it appears in the 257 * CLDR XML files, including namespace (e.g. "icu:transforms") though most attributes are not 258 * part of an explicit namespace. 259 * 260 * @return the ASCII-only name of the "lowest" element in this path. 261 */ 262 // TODO: This method name is weak - perhaps getElementName() ? getName()263 public String getName() { 264 return elementName; 265 } 266 267 /** 268 * Returns the sort index of an "ordered" path element, or {@code -1} for non-ordered elements. 269 * 270 * <p>The sort index is used to disambiguate and sort otherwise identical distinguishing paths. 271 * The feature allows a series of values to be processed reliably in an ordered sequence. It is 272 * recommended that if your data includes "ordered" elements, they are always processed in 273 * {@link org.unicode.cldr.api.CldrData.PathOrder#DTD DTD} order. 274 * 275 * @return the element's sort index (which takes priority for element ordering over any 276 * attribute values). 277 */ getSortIndex()278 public int getSortIndex() { 279 return sortIndex; 280 } 281 282 /** 283 * Returns the raw value of an attribute associated with this CLDR path, or null if not present. 284 * For almost all use cases it is preferable to use the accessor methods on the {@link 285 * AttributeKey} class, which provide additional useful semantic checking and common type 286 * conversion. You should only use this method directly if there's a strong performance 287 * requirement. 288 * 289 * @param key the key identifying an attribute. 290 * @return the attribute value or {@code null} if not present. 291 * @see AttributeKey 292 */ 293 @Override get(AttributeKey key)294 /* @Nullable */ public String get(AttributeKey key) { 295 checkArgument( 296 !getDataType().isValueAttribute(key), 297 "cannot get 'value attribute' values from a distinguishing path: %s", 298 key); 299 String v = null; 300 for (CldrPath p = this; v == null && p != null; p = p.getParent()) { 301 if (p.getName().equals(key.getElementName())) { 302 v = p.getLocalAttributeValue(key.getAttributeName()); 303 } 304 } 305 return v; 306 } 307 308 /** 309 * Returns the data type for this path, as defined by the top most element. 310 * 311 * @return the path's data type. 312 */ 313 @Override getDataType()314 public CldrDataType getDataType() { 315 return dtdType; 316 } 317 318 /** 319 * Returns whether the given element name is in this path. 320 * 321 * @param elementName the element name to check. 322 * @return true if the name of this path element or any of its parents is equal to the given 323 * element name. 324 */ containsElement(String elementName)325 boolean containsElement(String elementName) { 326 return getName().equals(elementName) 327 || (parent != null && parent.containsElement(elementName)); 328 } 329 330 /** 331 * Returns the number of distinguishing attributes for this path element. 332 * 333 * @return the number of distinguishing attributes for the "lowest" element in this path. 334 */ getAttributeCount()335 int getAttributeCount() { 336 return attributeKeyValuePairs.size() / 2; 337 } 338 339 /** 340 * Returns the (non empty) name of the Nth distinguishing attribute in the leaf path element. 341 * 342 * @param n the index of a distinguishing attribute for the "lowest" element in this path. 343 * @return the name of the Nth attribute on this path element. 344 */ getLocalAttributeName(int n)345 String getLocalAttributeName(int n) { 346 checkElementIndex(n, getAttributeCount()); 347 return attributeKeyValuePairs.get(2 * n); 348 } 349 350 /** 351 * Returns the value of the Nth distinguishing attribute in the leaf path element. 352 * 353 * @param n the index of a distinguishing attribute for the "lowest" element in this path. 354 * @return the value of the Nth attribute on this path element. 355 */ getLocalAttributeValue(int n)356 String getLocalAttributeValue(int n) { 357 checkElementIndex(n, getAttributeCount()); 358 return attributeKeyValuePairs.get((2 * n) + 1); 359 } 360 361 // Helper to get an attribute by name from this path element. getLocalAttributeValue(String attributeName)362 private String getLocalAttributeValue(String attributeName) { 363 checkNotNull(attributeName); 364 for (int i = 0; i < attributeKeyValuePairs.size(); i += 2) { 365 if (attributeName.equals(attributeKeyValuePairs.get(i))) { 366 return attributeKeyValuePairs.get(i + 1); 367 } 368 } 369 return null; 370 } 371 372 /** 373 * Returns the draft status for this path, which is inherited from parent path elements. Note 374 * that where multiple (possibly conflicting) draft statuses are defined in a path, the "top 375 * most" (i.e. closest to the root element) value is used. 376 * 377 * @return the potentially inherited draft status. 378 */ getDraftStatus()379 Optional<CldrDraftStatus> getDraftStatus() { 380 return draftStatus; 381 } 382 383 /** 384 * Returns a combined full path string in the XPath style {@code //foo/bar[@x="y"]/baz}, with 385 * value attributes inserted in correct DTD order for each path element. 386 * 387 * <p>Note that while in most cases the values attributes simply follow the path attributes on 388 * each element, this is not necessarily always true, and DTD ordering can place value 389 * attributes before path attributes in an element. 390 * 391 * @param value a value to be associated with this path (from which value attributes will be 392 * obtained). 393 * @return the full XPath representation containing both distinguishing and value attributes. 394 */ getFullPath(CldrValue value)395 String getFullPath(CldrValue value) { 396 checkNotNull(value); 397 return appendToString(new StringBuilder(), value.getValueAttributes()).toString(); 398 } 399 400 /** Compares two paths in DTD order. */ 401 @Override compareTo(CldrPath other)402 public int compareTo(CldrPath other) { 403 if (dtdType == other.dtdType) { 404 return ordering.compare(this, other); 405 } 406 return dtdType.compareTo(other.dtdType); 407 } 408 409 /** {@inheritDoc} */ 410 @Override equals(Object obj)411 public boolean equals(Object obj) { 412 if (obj == this) { 413 return true; 414 } 415 if (!(obj instanceof CldrPath)) { 416 return false; 417 } 418 CldrPath other = (CldrPath) obj; 419 // Check type and length first (checking length catches most non-equal paths). 420 if (!getDataType().equals(other.getDataType()) || length != other.length) { 421 return false; 422 } 423 // Do (n - 1) comparisons since we already know the root element is the same (the root 424 // element never has value attributes on it and is uniquely defined by the DTD type). 425 // Working up from the "leaf" of the path works well because different paths will almost 426 // always be different in the leaf element (i.e. we will fail almost immediately). 427 CldrPath pthis = this; 428 for (int n = length - 1; n > 0; n--) { 429 if (!pthis.localEquals(other)) { 430 return false; 431 } 432 pthis = pthis.getParent(); 433 other = other.getParent(); 434 } 435 return true; 436 } 437 438 /** {@inheritDoc} */ 439 @Override hashCode()440 public int hashCode() { 441 return hashCode; 442 } 443 444 /** 445 * @return the distinguishing path string in the XPath style {@code //foo/bar[@x="y"]/baz}. 446 */ 447 @Override toString()448 public String toString() { 449 return appendToString(new StringBuilder(), ImmutableMap.of()).toString(); 450 } 451 localEquals(CldrPath other)452 private boolean localEquals(CldrPath other) { 453 // In _theory_ only length and localToString need to be checked (since the localToString is 454 // an unambiguous representation of the data, but it seems a bit hacky to rely on that. 455 return this.elementName.equals(other.elementName) 456 && this.sortIndex == other.sortIndex 457 && this.attributeKeyValuePairs.equals(other.attributeKeyValuePairs); 458 } 459 460 // XPath like toString() representation of a path element (e.g. foo[@bar="x"][@baz="y"]). 461 // When a sort index is present, it's appended to the element name (e.g. "foo#42[@bar="x"]"). getLocalToString()462 private String getLocalToString() { 463 if (localToString == null) { 464 String str = getName(); 465 if (sortIndex != -1) { 466 // This is very rare so it's almost certainly better to not make a StringBuilder 467 // above just for this possibility. 468 str += "#" + sortIndex; 469 } 470 if (getAttributeCount() > 0) { 471 str = 472 IntStream.range(0, getAttributeCount()) 473 .mapToObj( 474 n -> 475 String.format( 476 "[@%s=\"%s\"]", 477 getLocalAttributeName(n), 478 getLocalAttributeValue(n))) 479 .collect(Collectors.joining("", str, "")); 480 } 481 // Overwrite only once the local string is completed (this is idempotent so we don't 482 // have to care about locking etc.). 483 localToString = str; 484 } 485 return localToString; 486 } 487 488 // Recursive helper for toString(). appendToString( StringBuilder out, ImmutableMap<AttributeKey, String> valueAttributes)489 private StringBuilder appendToString( 490 StringBuilder out, ImmutableMap<AttributeKey, String> valueAttributes) { 491 CldrPath parent = getParent(); 492 if (parent != null) { 493 parent.appendToString(out, valueAttributes).append('/'); 494 } else { 495 out.append("//"); 496 } 497 if (valueAttributes.isEmpty()) { 498 return out.append(getLocalToString()); 499 } 500 List<String> attributeNames = 501 valueAttributes.keySet().stream() 502 .filter(k -> k.getElementName().equals(getName())) 503 .map(AttributeKey::getAttributeName) 504 .collect(toCollection(ArrayList::new)); 505 if (attributeNames.isEmpty()) { 506 // No value attributes for _this_ element so can use just the local toString(). 507 return out.append(getLocalToString()); 508 } 509 if (getAttributeCount() > 0) { 510 String lastPathAttributeName = getLocalAttributeName(getAttributeCount() - 1); 511 if (dtdType.getAttributeComparator() 512 .compare(lastPathAttributeName, attributeNames.get(0)) 513 > 0) { 514 // Oops, order is not as expected, so must reorder all attributes. 515 appendResortedValueAttributesTo(out, attributeNames, valueAttributes); 516 return out; 517 } 518 } 519 // Value attributes all come after path attributes. 520 return appendValueAttributesTo( 521 out.append(getLocalToString()), attributeNames, valueAttributes); 522 } 523 appendResortedValueAttributesTo( StringBuilder out, List<String> attributeNames, ImmutableMap<AttributeKey, String> valueAttributes)524 private void appendResortedValueAttributesTo( 525 StringBuilder out, 526 List<String> attributeNames, 527 ImmutableMap<AttributeKey, String> valueAttributes) { 528 out.append(elementName); 529 for (int n = 0; n < attributeKeyValuePairs.size(); n += 2) { 530 attributeNames.add(attributeKeyValuePairs.get(n)); 531 } 532 attributeNames.sort(dtdType.getAttributeComparator()); 533 for (String attrName : attributeNames) { 534 String value = getLocalAttributeValue(attrName); 535 if (value == null) { 536 value = valueAttributes.get(AttributeKey.keyOf(elementName, attrName)); 537 checkState(value != null, "missing value %s:%s", elementName, attrName); 538 } 539 out.append(String.format("[@%s=\"%s\"]", attrName, value)); 540 } 541 } 542 appendValueAttributesTo( StringBuilder out, List<String> attributeNames, ImmutableMap<AttributeKey, String> valueAttributes)543 private StringBuilder appendValueAttributesTo( 544 StringBuilder out, 545 List<String> attributeNames, 546 ImmutableMap<AttributeKey, String> valueAttributes) { 547 for (String attrName : attributeNames) { 548 String value = valueAttributes.get(AttributeKey.keyOf(elementName, attrName)); 549 checkState(value != null, "missing value %s:%s", elementName, attrName); 550 out.append(String.format("[@%s=\"%s\"]", attrName, value)); 551 } 552 return out; 553 } 554 } 555