xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/test/CoverageLevel2.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.test;
2 
3 import static java.util.Collections.disjoint;
4 
5 import com.ibm.icu.util.Output;
6 import com.ibm.icu.util.VersionInfo;
7 import java.util.ArrayList;
8 import java.util.Collections;
9 import java.util.LinkedList;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Set;
14 import java.util.SortedSet;
15 import java.util.TreeMap;
16 import java.util.TreeSet;
17 import java.util.regex.Matcher;
18 import org.unicode.cldr.tool.ToolConfig;
19 import org.unicode.cldr.util.Builder;
20 import org.unicode.cldr.util.CLDRConfig;
21 import org.unicode.cldr.util.CLDRFile;
22 import org.unicode.cldr.util.CLDRLocale;
23 import org.unicode.cldr.util.CLDRPaths;
24 import org.unicode.cldr.util.CldrUtility.VariableReplacer;
25 import org.unicode.cldr.util.Level;
26 import org.unicode.cldr.util.PathHeader;
27 import org.unicode.cldr.util.PatternCache;
28 import org.unicode.cldr.util.RegexLookup;
29 import org.unicode.cldr.util.RegexLookup.Finder;
30 import org.unicode.cldr.util.RegexLookup.RegexFinder;
31 import org.unicode.cldr.util.SupplementalDataInfo;
32 import org.unicode.cldr.util.SupplementalDataInfo.ApprovalRequirementMatcher;
33 import org.unicode.cldr.util.SupplementalDataInfo.CoverageLevelInfo;
34 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo;
35 import org.unicode.cldr.util.XMLFileReader;
36 import org.unicode.cldr.util.XPathParts;
37 
38 public class CoverageLevel2 {
39 
40     // To modify the results, see /cldr/common/supplemental/coverageLevels.xml
41 
42     /** Enable to get more verbose output when debugging */
43     private static final boolean DEBUG_LOOKUP = false;
44 
45     private RegexLookup<Level> lookup = null;
46 
47     enum SetMatchType {
48         Target_Language,
49         Target_Scripts,
50         Target_Territories,
51         Target_TimeZones,
52         Target_Currencies,
53         Target_Plurals,
54         Calendar_List
55     }
56 
57     private static class LocaleSpecificInfo {
58         CoverageVariableInfo cvi;
59         String targetLanguage;
60     }
61 
62     final LocaleSpecificInfo myInfo = new LocaleSpecificInfo();
63 
64     /**
65      * We define a regex finder for use in the lookup. It has extra tests based on the ci value and
66      * the cvi value, duplicating what was in SupplementalDataInfo. It uses the sets instead of
67      * converting to regex strings.
68      *
69      * @author markdavis
70      */
71     public static class MyRegexFinder extends RegexFinder {
72         private final SetMatchType additionalMatch;
73         private final CoverageLevelInfo ci;
74 
MyRegexFinder(String pattern, String additionalMatch, CoverageLevelInfo ci)75         public MyRegexFinder(String pattern, String additionalMatch, CoverageLevelInfo ci) {
76             super(pattern);
77             // remove the ${ and the }, and change - to _.
78             this.additionalMatch =
79                     additionalMatch == null
80                             ? null
81                             : SetMatchType.valueOf(
82                                     additionalMatch
83                                             .substring(2, additionalMatch.length() - 1)
84                                             .replace('-', '_'));
85             this.ci = ci;
86         }
87 
88         @Override
find(String item, Object context, Info info)89         public boolean find(String item, Object context, Info info) {
90             LocaleSpecificInfo localeSpecificInfo = (LocaleSpecificInfo) context;
91             // Modified the logic to handle the case where we want specific languages and specific
92             // territories.
93             // Any match in language script or territory will succeed when multiple items are
94             // present.
95             boolean lstOK = false;
96             if (ci.inLanguage == null && ci.inScriptSet == null && ci.inTerritorySet == null) {
97                 lstOK = true;
98             } else if (ci.inLanguage != null
99                     && ci.inLanguage.matcher(localeSpecificInfo.targetLanguage).matches()) {
100                 lstOK = true;
101             } else if (ci.inScriptSet != null
102                     && !disjoint(ci.inScriptSet, localeSpecificInfo.cvi.targetScripts)) {
103                 lstOK = true;
104             } else if (ci.inTerritorySet != null
105                     && !disjoint(ci.inTerritorySet, localeSpecificInfo.cvi.targetTerritories)) {
106                 lstOK = true;
107             }
108 
109             if (!lstOK) {
110                 return false;
111             }
112             boolean result = super.find(item, context, info); // also sets matcher in RegexFinder
113             if (!result) {
114                 return false;
115             }
116             if (additionalMatch != null) {
117                 String groupMatch = info.value[1];
118                 //                    String groupMatch = matcher.group(1);
119                 // we match on a group, so get the right one
120                 switch (additionalMatch) {
121                     case Target_Language:
122                         return localeSpecificInfo.targetLanguage.equals(groupMatch);
123                     case Target_Scripts:
124                         return localeSpecificInfo.cvi.targetScripts.contains(groupMatch);
125                     case Target_Territories:
126                         return localeSpecificInfo.cvi.targetTerritories.contains(groupMatch);
127                     case Target_TimeZones:
128                         return localeSpecificInfo.cvi.targetTimeZones.contains(groupMatch);
129                     case Target_Currencies:
130                         return localeSpecificInfo.cvi.targetCurrencies.contains(groupMatch);
131                         // For Target_Plurals, we have to account for the fact that the @count= part
132                         // might not be in the
133                         // xpath, so we shouldn't reject the match because of that. ( i.e. The regex
134                         // is usually
135                         // ([@count='${Target-Plurals}'])?
136                     case Target_Plurals:
137                         return (groupMatch == null
138                                 || groupMatch.length() == 0
139                                 || localeSpecificInfo.cvi.targetPlurals.contains(groupMatch));
140                     case Calendar_List:
141                         return localeSpecificInfo.cvi.calendars.contains(groupMatch);
142                 }
143             }
144 
145             return true;
146         }
147 
148         @Override
equals(Object obj)149         public boolean equals(Object obj) {
150             return false;
151         }
152     }
153 
CoverageLevel2(SupplementalDataInfo sdi, String locale)154     private CoverageLevel2(SupplementalDataInfo sdi, String locale) {
155         myInfo.targetLanguage = CLDRLocale.getInstance(locale).getLanguage();
156         myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage);
157         lookup = sdi.getCoverageLookup();
158     }
159 
CoverageLevel2(SupplementalDataInfo sdi, String locale, String ruleFile)160     private CoverageLevel2(SupplementalDataInfo sdi, String locale, String ruleFile) {
161         myInfo.targetLanguage = CLDRLocale.getInstance(locale).getLanguage();
162         myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage);
163         RawCoverageFile rcf = new RawCoverageFile();
164         lookup = rcf.load(ruleFile);
165     }
166 
167     /**
168      * get an instance, using CldrUtility.SUPPLEMENTAL_DIRECTORY
169      *
170      * @param locale
171      * @return
172      * @deprecated Don't use this. call the version which takes a SupplementalDataInfo as an
173      *     argument.
174      * @see #getInstance(SupplementalDataInfo, String)
175      * @see CLDRPaths#SUPPLEMENTAL_DIRECTORY
176      */
177     @Deprecated
getInstance(String locale)178     public static CoverageLevel2 getInstance(String locale) {
179         return new CoverageLevel2(SupplementalDataInfo.getInstance(), locale);
180     }
181 
getInstance(SupplementalDataInfo sdi, String locale)182     public static CoverageLevel2 getInstance(SupplementalDataInfo sdi, String locale) {
183         return new CoverageLevel2(sdi, locale);
184     }
185 
getInstance( SupplementalDataInfo sdi, String locale, String ruleFile)186     public static CoverageLevel2 getInstance(
187             SupplementalDataInfo sdi, String locale, String ruleFile) {
188         return new CoverageLevel2(sdi, locale, ruleFile);
189     }
190 
getLevel(String path)191     public Level getLevel(String path) {
192         if (path == null) {
193             return Level.UNDETERMINED;
194         }
195         synchronized (
196                 lookup) { // synchronize on the class, since the Matchers are changed during the
197             // matching process
198             Level result;
199             if (DEBUG_LOOKUP) { // for testing
200                 Output<String[]> checkItems = new Output<>();
201                 Output<Finder> matcherFound = new Output<>();
202                 List<String> failures = new ArrayList<>();
203                 result = lookup.get(path, myInfo, checkItems, matcherFound, failures);
204                 for (String s : failures) {
205                     System.out.println(s);
206                 }
207             } else {
208                 result = lookup.get(path, myInfo, null);
209             }
210             return result == null ? Level.COMPREHENSIVE : result;
211         }
212     }
213 
getIntLevel(String path)214     public int getIntLevel(String path) {
215         return getLevel(path).getLevel();
216     }
217 
218     // Moved code in from SupplementalInfo
219     //
220     // TODO:
221     // 1. drop the corresponding code in SupplementalInfo.
222     // 2. change SupplementalInfo to skip reading coverageLevels.xml
223     // 3. change the default creation of CoverageLevels2 to instead use this code with that file.
224     // Later
225     // 4. Generalize the RawCoverageFile code, and use with other supplemental files.
226     //    That way supplemental files can be read as needed instead of all at once.
227 
228     private final List<String> approvalRequirements = new LinkedList<>(); // xpath array
229     private VariableReplacer coverageVariables = new VariableReplacer();
230     private SortedSet<CoverageLevelInfo> coverageLevels = new TreeSet<>();
231 
232     public class RawCoverageFile {
233 
234         private VersionInfo cldrVersion;
235 
236         class MyHandler extends XMLFileReader.SimpleHandler {
237             @Override
handlePathValue(String path, String pathValue)238             public void handlePathValue(String path, String pathValue) {
239                 XPathParts parts = XPathParts.getFrozenInstance(path);
240                 String level1 = parts.size() < 2 ? null : parts.getElement(1);
241                 if (level1.equals("version")) {
242                     if (cldrVersion == null) {
243                         String version = parts.getAttributeValue(1, "cldrVersion");
244                         if (version == null) {
245                             version = parts.getAttributeValue(0, "version");
246                         }
247                         cldrVersion = VersionInfo.getInstance(version);
248                     }
249                 } else if (parts.containsElement("approvalRequirement")) {
250                     approvalRequirements.add(parts.toString());
251                 } else if (parts.containsElement("coverageLevel")) {
252                     String match =
253                             parts.containsAttribute("match")
254                                     ? coverageVariables.replace(
255                                             parts.getAttributeValue(-1, "match"))
256                                     : null;
257                     String valueStr = parts.getAttributeValue(-1, "value");
258                     // Ticket 7125: map the number to English. So switch from English to number for
259                     // construction
260                     valueStr = Integer.toString(Level.get(valueStr).getLevel());
261 
262                     String inLanguage =
263                             parts.containsAttribute("inLanguage")
264                                     ? coverageVariables.replace(
265                                             parts.getAttributeValue(-1, "inLanguage"))
266                                     : null;
267                     String inScript =
268                             parts.containsAttribute("inScript")
269                                     ? coverageVariables.replace(
270                                             parts.getAttributeValue(-1, "inScript"))
271                                     : null;
272                     String inTerritory =
273                             parts.containsAttribute("inTerritory")
274                                     ? coverageVariables.replace(
275                                             parts.getAttributeValue(-1, "inTerritory"))
276                                     : null;
277                     Integer value =
278                             (valueStr != null) ? Integer.valueOf(valueStr) : Integer.valueOf("101");
279                     if (cldrVersion.getMajor() < 2) {
280                         value = 40;
281                     }
282                     CoverageLevelInfo ci =
283                             new CoverageLevelInfo(match, value, inLanguage, inScript, inTerritory);
284                     coverageLevels.add(ci);
285                 } else if (parts.containsElement("coverageVariable")) {
286                     String key = parts.getAttributeValue(-1, "key");
287                     String value = parts.getAttributeValue(-1, "value");
288                     coverageVariables.add(key, value);
289                 }
290             }
291 
292             public void cleanup() {
293                 CLDRConfig testInfo = ToolConfig.getToolInstance();
294                 SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo();
295                 CoverageLevelInfo.fixEU(coverageLevels, supplementalDataInfo2);
296                 coverageLevels = Collections.unmodifiableSortedSet(coverageLevels);
297             }
298         }
299 
300         public RegexLookup<Level> makeCoverageLookup() {
301             RegexLookup<Level> lookup =
302                     new RegexLookup<>(RegexLookup.LookupType.STAR_PATTERN_LOOKUP);
303 
304             Matcher variable = PatternCache.get("\\$\\{[A-Za-z][\\-A-Za-z]*\\}").matcher("");
305 
306             for (CoverageLevelInfo ci : coverageLevels) {
307                 String pattern =
308                         ci.match
309                                 .replace('\'', '"')
310                                 .replace("[@", "\\[@") // make sure that attributes are quoted
311                                 .replace("(", "(?:") // make sure that there are no capturing groups
312                                 // (beyond what we generate
313                                 .replace("(?:?!", "(?!"); // Allow negative lookahead
314                 pattern = "^//ldml/" + pattern + "$"; // for now, force a complete match
315                 String variableType = null;
316                 variable.reset(pattern);
317                 if (variable.find()) {
318                     pattern =
319                             pattern.substring(0, variable.start())
320                                     + "([^\"]*)"
321                                     + pattern.substring(variable.end());
322                     variableType = variable.group();
323                     if (variable.find()) {
324                         throw new IllegalArgumentException(
325                                 "We can only handle a single variable on a line");
326                     }
327                 }
328 
329                 // .replaceAll("\\]","\\\\]");
330                 lookup.add(new CoverageLevel2.MyRegexFinder(pattern, variableType, ci), ci.value);
331             }
332             return lookup;
333         }
334 
335         public RegexLookup<Level> load(String file) {
336             MyHandler myHandler = new MyHandler();
337             XMLFileReader xfr = new XMLFileReader().setHandler(myHandler);
338             xfr.read(file, -1, true);
339             myHandler.cleanup();
340             return makeCoverageLookup();
341         }
342     }
343 
344     // run these from first to last to get the approval info.
345     volatile List<ApprovalRequirementMatcher> approvalMatchers = null;
346 
347     /**
348      * Get the preliminary number of required votes based on the given locale and PathHeader
349      *
350      * <p>Important: this number may not agree with VoteResolver.getRequiredVotes since VoteResolver
351      * also takes the baseline status into account.
352      *
353      * <p>Called by VoteResolver, ShowStarredCoverage, TestCoverage, and TestCoverageLevel.
354      *
355      * @param loc the CLDRLocale
356      * @param ph the PathHeader - which path this is applied to, or null if unknown.
357      * @return a number such as 4 or 8
358      */
359     public int getRequiredVotes(CLDRLocale loc, PathHeader ph) {
360         if (approvalMatchers == null) {
361             approvalMatchers = ApprovalRequirementMatcher.buildAll(approvalRequirements);
362         }
363 
364         for (ApprovalRequirementMatcher m : approvalMatchers) {
365             if (m.matches(loc, ph)) {
366                 return m.getRequiredVotes();
367             }
368         }
369         throw new RuntimeException(
370                 "Error: " + loc + " " + ph + " ran off the end of the approvalMatchers.");
371     }
372 
373     // TODO: move to separate tool
374 
375     public static void main(String[] args) {
376         // Quick test during development to compare old to new coverageLevels
377 
378         checkCoverage("root");
379         checkCoverage("de");
380     }
381 
382     private static void checkCoverage(String locale) {
383         final CLDRConfig testInfo = ToolConfig.getToolInstance();
384         final SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo();
385 
386         CoverageLevel2 cvOld = CoverageLevel2.getInstance(supplementalDataInfo2, locale);
387 
388         CoverageLevel2 cvNew =
389                 CoverageLevel2.getInstance(
390                         supplementalDataInfo2,
391                         locale,
392                         CLDRPaths.COMMON_DIRECTORY + "supplemental-temp/coverageLevels2.xml");
393 
394         CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true);
395         Set<String> paths = Builder.with(new TreeSet<String>()).addAll(cldrFile).get();
396         PathHeader.Factory phf = PathHeader.getFactory();
397         Map<PathHeader, String> diff = new TreeMap<>();
398         Map<PathHeader, String> same = new TreeMap<>();
399         for (String path : paths) {
400             Level levelOld = cvOld.getLevel(path);
401             Level levelNew = cvNew.getLevel(path);
402             if (levelOld != levelNew) {
403                 diff.put(
404                         phf.fromPath(path),
405                         locale + "\t" + levelOld + "\t" + levelNew + "\t" + path);
406             } else if (levelOld.compareTo(Level.MODERATE) < 0) {
407                 same.put(phf.fromPath(path), locale + "\t" + path);
408             }
409         }
410         System.out.println("\nLocale\tPath\tPathHeader");
411         for (Entry<PathHeader, String> line : same.entrySet()) {
412             System.out.println(line.getValue() + "\t" + line.getKey());
413         }
414         System.out.println("\nLocale\tOld\tNew\tPath\tPathHeader");
415         for (Entry<PathHeader, String> line : diff.entrySet()) {
416             System.out.println(line.getValue() + "\t" + line.getKey());
417         }
418     }
419 }
420