1 package org.unicode.cldr.api; 2 3 import static com.google.common.base.Preconditions.checkNotNull; 4 5 import com.google.common.collect.Lists; 6 import java.util.ArrayList; 7 import java.util.Comparator; 8 import java.util.Iterator; 9 import java.util.LinkedHashMap; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.regex.Pattern; 13 import org.unicode.cldr.util.CLDRFile; 14 import org.unicode.cldr.util.CldrUtility; 15 import org.unicode.cldr.util.XPathParts; 16 17 /** Serializes a CLDRFile as a sequence of {@link CldrValue CldrValues}. */ 18 final class CldrFileDataSource implements CldrData { 19 private static final Pattern CAPTURE_SORT_INDEX = Pattern.compile("#([0-9]+)"); 20 21 private final CLDRFile source; 22 CldrFileDataSource(CLDRFile source)23 CldrFileDataSource(CLDRFile source) { 24 this.source = checkNotNull(source); 25 } 26 27 @Override accept(PathOrder order, ValueVisitor visitor)28 public void accept(PathOrder order, ValueVisitor visitor) { 29 Iterator<String> paths; 30 switch (order) { 31 case ARBITRARY: 32 paths = source.iterator(); 33 break; 34 35 case NESTED_GROUPING: 36 // Distinguishing paths when sorted by string order should yield "nested grouping". 37 // This is because lexicographical order is determined by the earliest character 38 // difference, which either occurs in the element name or the attribute declaration. 39 // Either way, the string before the first difference will agree on zero or more 40 // complete path elements and order is always decided by a change to the lowest path 41 // element. This should therefore result in common parent prefixes always being 42 // visited 43 // consecutively. It also (like DTD ordering) greatly improves the performance when 44 // parsing paths because consecutive paths share common parent elements. 45 paths = source.iterator(null, Comparator.naturalOrder()); 46 break; 47 48 case DTD: 49 paths = source.iterator(null, source.getComparator()); 50 break; 51 52 default: 53 throw new AssertionError("Unknown path ordering: " + order); 54 } 55 read(paths, source, visitor); 56 } 57 58 @Override 59 /* @Nullable */ get(CldrPath cldrPath)60 public CldrValue get(CldrPath cldrPath) { 61 String dPath = getInternalPathString(cldrPath); 62 String fullXPath = source.getFullXPath(dPath); 63 if (fullXPath == null) { 64 return null; 65 } 66 XPathParts pathPaths = XPathParts.getFrozenInstance(fullXPath); 67 int length = pathPaths.size(); 68 Map<AttributeKey, String> attributes = new LinkedHashMap<>(); 69 for (int n = 0; n < length; n++) { 70 CldrPaths.processPathAttributes( 71 pathPaths.getElement(n), 72 pathPaths.getAttributes(n), 73 cldrPath.getDataType(), 74 e -> {}, 75 attributes::put); 76 } 77 // This is MUCH faster if you pass the distinguishing path in. If the CLDRFile is 78 // "unresolved" then we can get the special "inheritance marker" returned, which 79 // should just be treated as if there was no value present. 80 String value = source.getStringValue(dPath); 81 if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { 82 return null; 83 } 84 return CldrValue.create(value, attributes, cldrPath); 85 } 86 getInternalPathString(CldrPath p)87 private static String getInternalPathString(CldrPath p) { 88 // This is the distinguishing xpath, but possibly with a sort index present (e.g. 89 // foo#42[@bar="x"]). So to get the internal path as used by CLDRFile, we must convert '#N' 90 // into '[@_q="N"]' 91 String dpath = p.toString(); 92 if (dpath.indexOf('#') != -1) { 93 dpath = CAPTURE_SORT_INDEX.matcher(dpath).replaceAll("[@_q=\"$1\"]"); 94 } 95 return dpath; 96 } 97 read(Iterator<String> paths, CLDRFile src, ValueVisitor visitor)98 private void read(Iterator<String> paths, CLDRFile src, ValueVisitor visitor) { 99 Map<AttributeKey, String> valueAttributes = new LinkedHashMap<>(); 100 101 // This is a bit fiddly since we add path elements in reverse order to the 'stack' but want 102 // to access them using the path element index. E.g. if we add the path a->b->c->d to the 103 // stack we get "(d,c,b,a)" in the array, but really want "(a,b,c,d)" to avoid having to 104 // use recursion or other tricks to reverse the order of addition, we can just create a 105 // reversed _view_ onto the list and pass that around. We could just insert the elements at 106 // the front of the array (rather than adding them at the end) but that means repeated 107 // copying of existing elements to make room, so it's slower. 108 // 109 // This has the path elements pushed into it in reverse order. 110 List<CldrPath> previousElementStack = new ArrayList<>(); 111 // This views the path elements in forward order. 112 List<CldrPath> previousElements = Lists.reverse(previousElementStack); 113 114 while (paths.hasNext()) { 115 String dPath = paths.next(); 116 // This is MUCH faster if you pass the distinguishing path in. If the CLDRFile is 117 // "unresolved" then we can get the special "inheritance marker" returned, which 118 // should just be treated as if there was no value present. 119 String value = src.getStringValue(dPath); 120 if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) { 121 continue; 122 } 123 // There's a cache behind XPathParts which probably makes it faster to lookup these 124 // instances rather than parse them each time (it all depends on whether this is the 125 // first time the full paths are used). 126 CldrPath cldrPath = 127 CldrPaths.processXPath( 128 src.getFullXPath(dPath), previousElements, valueAttributes::put); 129 130 if (CldrPaths.isLeafPath(cldrPath) && CldrPaths.shouldEmit(cldrPath)) { 131 visitor.visit(CldrValue.create(value, valueAttributes, cldrPath)); 132 } 133 134 // Prepare the element stack for next time by pushing the current path onto it. 135 pushPathElements(cldrPath, previousElementStack); 136 valueAttributes.clear(); 137 } 138 } 139 140 /** 141 * Pushes the elements of the given path into the list. This is efficient but results in the 142 * list order being reversed (e.g. path "a->b->c->d" results in "(d,c,b,a)". A reversed view of 143 * this stack is used to present the path elements in "forward order". 144 */ pushPathElements(CldrPath cldrPath, List<CldrPath> stack)145 private static void pushPathElements(CldrPath cldrPath, List<CldrPath> stack) { 146 stack.clear(); 147 for (CldrPath p = cldrPath; p != null; p = p.getParent()) { 148 stack.add(p); 149 } 150 } 151 } 152