xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import com.ibm.icu.impl.Relation;
4 import com.ibm.icu.text.Normalizer2;
5 import com.ibm.icu.text.UnicodeSet;
6 import com.ibm.icu.util.VersionInfo;
7 import java.util.Collections;
8 import java.util.HashMap;
9 import java.util.HashSet;
10 import java.util.Iterator;
11 import java.util.Map;
12 import java.util.Set;
13 import java.util.regex.Pattern;
14 import org.unicode.cldr.util.XPathParts.Comments;
15 
16 public class SimpleXMLSource extends XMLSource {
17     private Map<String, String> xpath_value = CldrUtility.newConcurrentHashMap();
18     private Map<String, String> xpath_fullXPath = CldrUtility.newConcurrentHashMap();
19     private Comments xpath_comments = new Comments(); // map from paths to comments.
20     private Relation<String, String> VALUE_TO_PATH = null;
21     private Object VALUE_TO_PATH_MUTEX = new Object();
22     private VersionInfo dtdVersionInfo;
23 
SimpleXMLSource(String localeID)24     public SimpleXMLSource(String localeID) {
25         this.setLocaleID(localeID);
26     }
27 
28     /**
29      * Create a shallow, locked copy of another XMLSource.
30      *
31      * @param copyAsLockedFrom
32      */
SimpleXMLSource(SimpleXMLSource copyAsLockedFrom)33     protected SimpleXMLSource(SimpleXMLSource copyAsLockedFrom) {
34         this.xpath_value = copyAsLockedFrom.xpath_value;
35         this.xpath_fullXPath = copyAsLockedFrom.xpath_fullXPath;
36         this.xpath_comments = copyAsLockedFrom.xpath_comments;
37         this.setLocaleID(copyAsLockedFrom.getLocaleID());
38         this.locationHash = Collections.unmodifiableMap(copyAsLockedFrom.locationHash);
39         locked = true;
40     }
41 
42     @Override
getValueAtDPath(String xpath)43     public String getValueAtDPath(String xpath) {
44         return xpath_value.get(xpath);
45     }
46 
getValueAtDPathSkippingInheritanceMarker(String xpath)47     public String getValueAtDPathSkippingInheritanceMarker(String xpath) {
48         String result = xpath_value.get(xpath);
49         return CldrUtility.INHERITANCE_MARKER.equals(result) ? null : result;
50     }
51 
52     @Override
getFullPathAtDPath(String xpath)53     public String getFullPathAtDPath(String xpath) {
54         String result = xpath_fullXPath.get(xpath);
55         if (result != null) return result;
56         if (xpath_value.get(xpath) != null) return xpath; // we don't store duplicates
57         // System.err.println("WARNING: "+getLocaleID()+": path not present in data: " + xpath);
58         // return xpath;
59         return null; // throw new IllegalArgumentException("Path not present in data: " + xpath);
60     }
61 
62     @Override
getXpathComments()63     public Comments getXpathComments() {
64         return xpath_comments;
65     }
66 
67     @Override
setXpathComments(Comments xpath_comments)68     public void setXpathComments(Comments xpath_comments) {
69         this.xpath_comments = xpath_comments;
70     }
71 
72     // public void putPathValue(String xpath, String value) {
73     // if (locked) throw new UnsupportedOperationException("Attempt to modify locked object");
74     // String distinguishingXPath = CLDRFile.getDistinguishingXPath(xpath, fixedPath);
75     // xpath_value.put(distinguishingXPath, value);
76     // if (!fixedPath[0].equals(distinguishingXPath)) {
77     // xpath_fullXPath.put(distinguishingXPath, fixedPath[0]);
78     // }
79     // }
80     @Override
removeValueAtDPath(String distinguishingXPath)81     public void removeValueAtDPath(String distinguishingXPath) {
82         String oldValue = xpath_value.get(distinguishingXPath);
83         xpath_value.remove(distinguishingXPath);
84         xpath_fullXPath.remove(distinguishingXPath);
85         updateValuePathMapping(distinguishingXPath, oldValue, null);
86     }
87 
88     @Override
iterator()89     public Iterator<String> iterator() { // must be unmodifiable or locked
90         return Collections.unmodifiableSet(xpath_value.keySet()).iterator();
91     }
92 
93     @Override
freeze()94     public XMLSource freeze() {
95         locked = true;
96         return this;
97     }
98 
99     @Override
cloneAsThawed()100     public XMLSource cloneAsThawed() {
101         SimpleXMLSource result = (SimpleXMLSource) super.cloneAsThawed();
102         result.xpath_comments = (Comments) result.xpath_comments.clone();
103         result.xpath_fullXPath = CldrUtility.newConcurrentHashMap(result.xpath_fullXPath);
104         result.xpath_value = CldrUtility.newConcurrentHashMap(result.xpath_value);
105         result.locationHash.putAll(result.locationHash);
106         return result;
107     }
108 
109     @Override
putFullPathAtDPath(String distinguishingXPath, String fullxpath)110     public void putFullPathAtDPath(String distinguishingXPath, String fullxpath) {
111         xpath_fullXPath.put(distinguishingXPath.intern(), fullxpath.intern());
112     }
113 
114     @Override
putValueAtDPath(String distinguishingXPath, String value)115     public void putValueAtDPath(String distinguishingXPath, String value) {
116         distinguishingXPath = distinguishingXPath.intern();
117         String oldValue = xpath_value.get(distinguishingXPath);
118         xpath_value.put(distinguishingXPath, value);
119         updateValuePathMapping(distinguishingXPath, oldValue, value);
120     }
121 
updateValuePathMapping( String distinguishingXPath, String oldValue, String newValue)122     private void updateValuePathMapping(
123             String distinguishingXPath, String oldValue, String newValue) {
124         synchronized (VALUE_TO_PATH_MUTEX) {
125             if (VALUE_TO_PATH != null) {
126                 if (oldValue != null) {
127                     VALUE_TO_PATH.remove(normalize(oldValue), distinguishingXPath);
128                 }
129                 if (newValue != null) {
130                     VALUE_TO_PATH.put(normalize(newValue), distinguishingXPath);
131                 }
132             }
133         }
134     }
135 
136     @Override
getPathsWithValue(String valueToMatch, String pathPrefix, Set<String> result)137     public void getPathsWithValue(String valueToMatch, String pathPrefix, Set<String> result) {
138         // build a Relation mapping value to paths, if needed
139         synchronized (VALUE_TO_PATH_MUTEX) {
140             if (VALUE_TO_PATH == null) {
141                 VALUE_TO_PATH = Relation.of(new HashMap<String, Set<String>>(), HashSet.class);
142                 for (Iterator<String> it = iterator(); it.hasNext(); ) {
143                     String path = it.next();
144                     String value1 = getValueAtDPathSkippingInheritanceMarker(path);
145                     if (value1 == null) {
146                         continue;
147                     }
148                     String value = normalize(value1);
149                     VALUE_TO_PATH.put(value, path);
150                 }
151             }
152             Set<String> paths = VALUE_TO_PATH.getAll(normalize(valueToMatch));
153             if (paths == null) {
154                 return;
155             }
156             if (pathPrefix == null || pathPrefix.length() == 0) {
157                 result.addAll(paths);
158                 return;
159             }
160             for (String path : paths) {
161                 if (path.startsWith(pathPrefix)) {
162                     // if (altPath.originalPath.startsWith(altPrefix.originalPath)) {
163                     result.add(path);
164                 }
165             }
166         }
167     }
168 
169     static final Normalizer2 NFKCCF = Normalizer2.getNFKCCasefoldInstance();
170     static final Normalizer2 NFKC = Normalizer2.getNFKCInstance();
171 
172     // The following includes letters, marks, numbers, currencies, and *selected*
173     // symbols/punctuation
174     static final UnicodeSet NON_ALPHANUM =
175             new UnicodeSet(
176                             "[^[:L:][:M:][:N:][:Sc:][\\u202F\uFFFF _ ¡ « ( ) \\- \\[ \\] \\{ \\} § / \\\\ % ٪ ‰ ؉ ‱-″ ` \\^ ¯ ¨ ° + ¬ | ¦ ~ − ⊕ ⍰ ☉ © ®]]")
177                     .freeze();
178 
normalize(String valueToMatch)179     public static String normalize(String valueToMatch) {
180         return normalize2(valueToMatch, NFKCCF);
181     }
182 
normalizeCaseSensitive(String valueToMatch)183     public static String normalizeCaseSensitive(String valueToMatch) {
184         return normalize2(valueToMatch, NFKC);
185     }
186 
normalize2(String valueToMatch, Normalizer2 normalizer2)187     public static String normalize2(String valueToMatch, Normalizer2 normalizer2) {
188         if (valueToMatch.indexOf('\u202F')
189                 >= 0) { // special hack to allow \u202f, which is otherwise removed by NFKC
190             String temp = valueToMatch.replace('\u202F', '\uFFFF');
191             String result = replace(NON_ALPHANUM, normalizer2.normalize(temp), "");
192             return result.replace('\uFFFF', '\u202F');
193         }
194         return replace(NON_ALPHANUM, normalizer2.normalize(valueToMatch), "");
195     }
196 
replace(UnicodeSet unicodeSet, String valueToMatch, String substitute)197     public static String replace(UnicodeSet unicodeSet, String valueToMatch, String substitute) {
198         // handle patterns
199         if (valueToMatch.contains("{")) {
200             valueToMatch = PLACEHOLDER.matcher(valueToMatch).replaceAll("⍰").trim();
201         }
202         StringBuilder b = null; // delay creating until needed
203         for (int i = 0; i < valueToMatch.length(); ++i) {
204             int cp = valueToMatch.codePointAt(i);
205             if (unicodeSet.contains(cp)) {
206                 if (b == null) {
207                     b = new StringBuilder();
208                     b.append(valueToMatch.substring(0, i)); // copy the start
209                 }
210                 if (substitute.length() != 0) {
211                     b.append(substitute);
212                 }
213             } else if (b != null) {
214                 b.appendCodePoint(cp);
215             }
216             if (cp > 0xFFFF) { // skip end of supplemental character
217                 ++i;
218             }
219         }
220         if (b != null) {
221             valueToMatch = b.toString();
222         }
223         return valueToMatch;
224     }
225 
226     static final Pattern PLACEHOLDER = PatternCache.get("\\{\\d\\}");
227 
setDtdVersionInfo(VersionInfo dtdVersionInfo)228     public void setDtdVersionInfo(VersionInfo dtdVersionInfo) {
229         this.dtdVersionInfo = dtdVersionInfo;
230     }
231 
232     @Override
getDtdVersionInfo()233     public VersionInfo getDtdVersionInfo() {
234         return dtdVersionInfo;
235     }
236 
237     private Map<String, SourceLocation> locationHash = new HashMap<>();
238 
239     @Override
addSourceLocation(String currentFullXPath, SourceLocation location)240     public XMLSource addSourceLocation(String currentFullXPath, SourceLocation location) {
241         if (!isFrozen()) {
242             locationHash.put(currentFullXPath.intern(), location);
243         } else {
244             System.err.println(
245                     "SimpleXMLSource::addSourceLocationAttempt to modify frozen source location");
246         }
247         return this;
248     }
249 
250     @Override
getSourceLocation(String fullXPath)251     public SourceLocation getSourceLocation(String fullXPath) {
252         return locationHash.get(fullXPath);
253     }
254 }
255