xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/api/CldrFileDataSource.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.api;
2 
3 import static com.google.common.base.Preconditions.checkNotNull;
4 
5 import com.google.common.collect.Lists;
6 import java.util.ArrayList;
7 import java.util.Comparator;
8 import java.util.Iterator;
9 import java.util.LinkedHashMap;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.regex.Pattern;
13 import org.unicode.cldr.util.CLDRFile;
14 import org.unicode.cldr.util.CldrUtility;
15 import org.unicode.cldr.util.XPathParts;
16 
17 /** Serializes a CLDRFile as a sequence of {@link CldrValue CldrValues}. */
18 final class CldrFileDataSource implements CldrData {
19     private static final Pattern CAPTURE_SORT_INDEX = Pattern.compile("#([0-9]+)");
20 
21     private final CLDRFile source;
22 
CldrFileDataSource(CLDRFile source)23     CldrFileDataSource(CLDRFile source) {
24         this.source = checkNotNull(source);
25     }
26 
27     @Override
accept(PathOrder order, ValueVisitor visitor)28     public void accept(PathOrder order, ValueVisitor visitor) {
29         Iterator<String> paths;
30         switch (order) {
31             case ARBITRARY:
32                 paths = source.iterator();
33                 break;
34 
35             case NESTED_GROUPING:
36                 // Distinguishing paths when sorted by string order should yield "nested grouping".
37                 // This is because lexicographical order is determined by the earliest character
38                 // difference, which either occurs in the element name or the attribute declaration.
39                 // Either way, the string before the first difference will agree on zero or more
40                 // complete path elements and order is always decided by a change to the lowest path
41                 // element. This should therefore result in common parent prefixes always being
42                 // visited
43                 // consecutively. It also (like DTD ordering) greatly improves the performance when
44                 // parsing paths because consecutive paths share common parent elements.
45                 paths = source.iterator(null, Comparator.naturalOrder());
46                 break;
47 
48             case DTD:
49                 paths = source.iterator(null, source.getComparator());
50                 break;
51 
52             default:
53                 throw new AssertionError("Unknown path ordering: " + order);
54         }
55         read(paths, source, visitor);
56     }
57 
58     @Override
59     /* @Nullable */
get(CldrPath cldrPath)60     public CldrValue get(CldrPath cldrPath) {
61         String dPath = getInternalPathString(cldrPath);
62         String fullXPath = source.getFullXPath(dPath);
63         if (fullXPath == null) {
64             return null;
65         }
66         XPathParts pathPaths = XPathParts.getFrozenInstance(fullXPath);
67         int length = pathPaths.size();
68         Map<AttributeKey, String> attributes = new LinkedHashMap<>();
69         for (int n = 0; n < length; n++) {
70             CldrPaths.processPathAttributes(
71                     pathPaths.getElement(n),
72                     pathPaths.getAttributes(n),
73                     cldrPath.getDataType(),
74                     e -> {},
75                     attributes::put);
76         }
77         // This is MUCH faster if you pass the distinguishing path in. If the CLDRFile is
78         // "unresolved" then we can get the special "inheritance marker" returned, which
79         // should just be treated as if there was no value present.
80         String value = source.getStringValue(dPath);
81         if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) {
82             return null;
83         }
84         return CldrValue.create(value, attributes, cldrPath);
85     }
86 
getInternalPathString(CldrPath p)87     private static String getInternalPathString(CldrPath p) {
88         // This is the distinguishing xpath, but possibly with a sort index present (e.g.
89         // foo#42[@bar="x"]). So to get the internal path as used by CLDRFile, we must convert '#N'
90         // into '[@_q="N"]'
91         String dpath = p.toString();
92         if (dpath.indexOf('#') != -1) {
93             dpath = CAPTURE_SORT_INDEX.matcher(dpath).replaceAll("[@_q=\"$1\"]");
94         }
95         return dpath;
96     }
97 
read(Iterator<String> paths, CLDRFile src, ValueVisitor visitor)98     private void read(Iterator<String> paths, CLDRFile src, ValueVisitor visitor) {
99         Map<AttributeKey, String> valueAttributes = new LinkedHashMap<>();
100 
101         // This is a bit fiddly since we add path elements in reverse order to the 'stack' but want
102         // to access them using the path element index. E.g. if we add the path a->b->c->d to the
103         // stack we get "(d,c,b,a)" in the array, but really want "(a,b,c,d)" to avoid having to
104         // use recursion or other tricks to reverse the order of addition, we can just create a
105         // reversed _view_ onto the list and pass that around. We could just insert the elements at
106         // the front of the array (rather than adding them at the end) but that means repeated
107         // copying of existing elements to make room, so it's slower.
108         //
109         // This has the path elements pushed into it in reverse order.
110         List<CldrPath> previousElementStack = new ArrayList<>();
111         // This views the path elements in forward order.
112         List<CldrPath> previousElements = Lists.reverse(previousElementStack);
113 
114         while (paths.hasNext()) {
115             String dPath = paths.next();
116             // This is MUCH faster if you pass the distinguishing path in. If the CLDRFile is
117             // "unresolved" then we can get the special "inheritance marker" returned, which
118             // should just be treated as if there was no value present.
119             String value = src.getStringValue(dPath);
120             if (value == null || value.equals(CldrUtility.INHERITANCE_MARKER)) {
121                 continue;
122             }
123             // There's a cache behind XPathParts which probably makes it faster to lookup these
124             // instances rather than parse them each time (it all depends on whether this is the
125             // first time the full paths are used).
126             CldrPath cldrPath =
127                     CldrPaths.processXPath(
128                             src.getFullXPath(dPath), previousElements, valueAttributes::put);
129 
130             if (CldrPaths.isLeafPath(cldrPath) && CldrPaths.shouldEmit(cldrPath)) {
131                 visitor.visit(CldrValue.create(value, valueAttributes, cldrPath));
132             }
133 
134             // Prepare the element stack for next time by pushing the current path onto it.
135             pushPathElements(cldrPath, previousElementStack);
136             valueAttributes.clear();
137         }
138     }
139 
140     /**
141      * Pushes the elements of the given path into the list. This is efficient but results in the
142      * list order being reversed (e.g. path "a->b->c->d" results in "(d,c,b,a)". A reversed view of
143      * this stack is used to present the path elements in "forward order".
144      */
pushPathElements(CldrPath cldrPath, List<CldrPath> stack)145     private static void pushPathElements(CldrPath cldrPath, List<CldrPath> stack) {
146         stack.clear();
147         for (CldrPath p = cldrPath; p != null; p = p.getParent()) {
148             stack.add(p);
149         }
150     }
151 }
152