1 package org.unicode.cldr.test; 2 3 import static java.util.Collections.disjoint; 4 5 import com.ibm.icu.util.Output; 6 import com.ibm.icu.util.VersionInfo; 7 import java.util.ArrayList; 8 import java.util.Collections; 9 import java.util.LinkedList; 10 import java.util.List; 11 import java.util.Map; 12 import java.util.Map.Entry; 13 import java.util.Set; 14 import java.util.SortedSet; 15 import java.util.TreeMap; 16 import java.util.TreeSet; 17 import java.util.regex.Matcher; 18 import org.unicode.cldr.tool.ToolConfig; 19 import org.unicode.cldr.util.Builder; 20 import org.unicode.cldr.util.CLDRConfig; 21 import org.unicode.cldr.util.CLDRFile; 22 import org.unicode.cldr.util.CLDRLocale; 23 import org.unicode.cldr.util.CLDRPaths; 24 import org.unicode.cldr.util.CldrUtility.VariableReplacer; 25 import org.unicode.cldr.util.Level; 26 import org.unicode.cldr.util.PathHeader; 27 import org.unicode.cldr.util.PatternCache; 28 import org.unicode.cldr.util.RegexLookup; 29 import org.unicode.cldr.util.RegexLookup.Finder; 30 import org.unicode.cldr.util.RegexLookup.RegexFinder; 31 import org.unicode.cldr.util.SupplementalDataInfo; 32 import org.unicode.cldr.util.SupplementalDataInfo.ApprovalRequirementMatcher; 33 import org.unicode.cldr.util.SupplementalDataInfo.CoverageLevelInfo; 34 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo; 35 import org.unicode.cldr.util.XMLFileReader; 36 import org.unicode.cldr.util.XPathParts; 37 38 public class CoverageLevel2 { 39 40 // To modify the results, see /cldr/common/supplemental/coverageLevels.xml 41 42 /** Enable to get more verbose output when debugging */ 43 private static final boolean DEBUG_LOOKUP = false; 44 45 private RegexLookup<Level> lookup = null; 46 47 enum SetMatchType { 48 Target_Language, 49 Target_Scripts, 50 Target_Territories, 51 Target_TimeZones, 52 Target_Currencies, 53 Target_Plurals, 54 Calendar_List 55 } 56 57 private static class LocaleSpecificInfo { 58 CoverageVariableInfo cvi; 59 String targetLanguage; 60 } 61 62 final LocaleSpecificInfo myInfo = new LocaleSpecificInfo(); 63 64 /** 65 * We define a regex finder for use in the lookup. It has extra tests based on the ci value and 66 * the cvi value, duplicating what was in SupplementalDataInfo. It uses the sets instead of 67 * converting to regex strings. 68 * 69 * @author markdavis 70 */ 71 public static class MyRegexFinder extends RegexFinder { 72 private final SetMatchType additionalMatch; 73 private final CoverageLevelInfo ci; 74 MyRegexFinder(String pattern, String additionalMatch, CoverageLevelInfo ci)75 public MyRegexFinder(String pattern, String additionalMatch, CoverageLevelInfo ci) { 76 super(pattern); 77 // remove the ${ and the }, and change - to _. 78 this.additionalMatch = 79 additionalMatch == null 80 ? null 81 : SetMatchType.valueOf( 82 additionalMatch 83 .substring(2, additionalMatch.length() - 1) 84 .replace('-', '_')); 85 this.ci = ci; 86 } 87 88 @Override find(String item, Object context, Info info)89 public boolean find(String item, Object context, Info info) { 90 LocaleSpecificInfo localeSpecificInfo = (LocaleSpecificInfo) context; 91 // Modified the logic to handle the case where we want specific languages and specific 92 // territories. 93 // Any match in language script or territory will succeed when multiple items are 94 // present. 95 boolean lstOK = false; 96 if (ci.inLanguage == null && ci.inScriptSet == null && ci.inTerritorySet == null) { 97 lstOK = true; 98 } else if (ci.inLanguage != null 99 && ci.inLanguage.matcher(localeSpecificInfo.targetLanguage).matches()) { 100 lstOK = true; 101 } else if (ci.inScriptSet != null 102 && !disjoint(ci.inScriptSet, localeSpecificInfo.cvi.targetScripts)) { 103 lstOK = true; 104 } else if (ci.inTerritorySet != null 105 && !disjoint(ci.inTerritorySet, localeSpecificInfo.cvi.targetTerritories)) { 106 lstOK = true; 107 } 108 109 if (!lstOK) { 110 return false; 111 } 112 boolean result = super.find(item, context, info); // also sets matcher in RegexFinder 113 if (!result) { 114 return false; 115 } 116 if (additionalMatch != null) { 117 String groupMatch = info.value[1]; 118 // String groupMatch = matcher.group(1); 119 // we match on a group, so get the right one 120 switch (additionalMatch) { 121 case Target_Language: 122 return localeSpecificInfo.targetLanguage.equals(groupMatch); 123 case Target_Scripts: 124 return localeSpecificInfo.cvi.targetScripts.contains(groupMatch); 125 case Target_Territories: 126 return localeSpecificInfo.cvi.targetTerritories.contains(groupMatch); 127 case Target_TimeZones: 128 return localeSpecificInfo.cvi.targetTimeZones.contains(groupMatch); 129 case Target_Currencies: 130 return localeSpecificInfo.cvi.targetCurrencies.contains(groupMatch); 131 // For Target_Plurals, we have to account for the fact that the @count= part 132 // might not be in the 133 // xpath, so we shouldn't reject the match because of that. ( i.e. The regex 134 // is usually 135 // ([@count='${Target-Plurals}'])? 136 case Target_Plurals: 137 return (groupMatch == null 138 || groupMatch.length() == 0 139 || localeSpecificInfo.cvi.targetPlurals.contains(groupMatch)); 140 case Calendar_List: 141 return localeSpecificInfo.cvi.calendars.contains(groupMatch); 142 } 143 } 144 145 return true; 146 } 147 148 @Override equals(Object obj)149 public boolean equals(Object obj) { 150 return false; 151 } 152 } 153 CoverageLevel2(SupplementalDataInfo sdi, String locale)154 private CoverageLevel2(SupplementalDataInfo sdi, String locale) { 155 myInfo.targetLanguage = CLDRLocale.getInstance(locale).getLanguage(); 156 myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage); 157 lookup = sdi.getCoverageLookup(); 158 } 159 CoverageLevel2(SupplementalDataInfo sdi, String locale, String ruleFile)160 private CoverageLevel2(SupplementalDataInfo sdi, String locale, String ruleFile) { 161 myInfo.targetLanguage = CLDRLocale.getInstance(locale).getLanguage(); 162 myInfo.cvi = sdi.getCoverageVariableInfo(myInfo.targetLanguage); 163 RawCoverageFile rcf = new RawCoverageFile(); 164 lookup = rcf.load(ruleFile); 165 } 166 167 /** 168 * get an instance, using CldrUtility.SUPPLEMENTAL_DIRECTORY 169 * 170 * @param locale 171 * @return 172 * @deprecated Don't use this. call the version which takes a SupplementalDataInfo as an 173 * argument. 174 * @see #getInstance(SupplementalDataInfo, String) 175 * @see CLDRPaths#SUPPLEMENTAL_DIRECTORY 176 */ 177 @Deprecated getInstance(String locale)178 public static CoverageLevel2 getInstance(String locale) { 179 return new CoverageLevel2(SupplementalDataInfo.getInstance(), locale); 180 } 181 getInstance(SupplementalDataInfo sdi, String locale)182 public static CoverageLevel2 getInstance(SupplementalDataInfo sdi, String locale) { 183 return new CoverageLevel2(sdi, locale); 184 } 185 getInstance( SupplementalDataInfo sdi, String locale, String ruleFile)186 public static CoverageLevel2 getInstance( 187 SupplementalDataInfo sdi, String locale, String ruleFile) { 188 return new CoverageLevel2(sdi, locale, ruleFile); 189 } 190 getLevel(String path)191 public Level getLevel(String path) { 192 if (path == null) { 193 return Level.UNDETERMINED; 194 } 195 synchronized ( 196 lookup) { // synchronize on the class, since the Matchers are changed during the 197 // matching process 198 Level result; 199 if (DEBUG_LOOKUP) { // for testing 200 Output<String[]> checkItems = new Output<>(); 201 Output<Finder> matcherFound = new Output<>(); 202 List<String> failures = new ArrayList<>(); 203 result = lookup.get(path, myInfo, checkItems, matcherFound, failures); 204 for (String s : failures) { 205 System.out.println(s); 206 } 207 } else { 208 result = lookup.get(path, myInfo, null); 209 } 210 return result == null ? Level.COMPREHENSIVE : result; 211 } 212 } 213 getIntLevel(String path)214 public int getIntLevel(String path) { 215 return getLevel(path).getLevel(); 216 } 217 218 // Moved code in from SupplementalInfo 219 // 220 // TODO: 221 // 1. drop the corresponding code in SupplementalInfo. 222 // 2. change SupplementalInfo to skip reading coverageLevels.xml 223 // 3. change the default creation of CoverageLevels2 to instead use this code with that file. 224 // Later 225 // 4. Generalize the RawCoverageFile code, and use with other supplemental files. 226 // That way supplemental files can be read as needed instead of all at once. 227 228 private final List<String> approvalRequirements = new LinkedList<>(); // xpath array 229 private VariableReplacer coverageVariables = new VariableReplacer(); 230 private SortedSet<CoverageLevelInfo> coverageLevels = new TreeSet<>(); 231 232 public class RawCoverageFile { 233 234 private VersionInfo cldrVersion; 235 236 class MyHandler extends XMLFileReader.SimpleHandler { 237 @Override handlePathValue(String path, String pathValue)238 public void handlePathValue(String path, String pathValue) { 239 XPathParts parts = XPathParts.getFrozenInstance(path); 240 String level1 = parts.size() < 2 ? null : parts.getElement(1); 241 if (level1.equals("version")) { 242 if (cldrVersion == null) { 243 String version = parts.getAttributeValue(1, "cldrVersion"); 244 if (version == null) { 245 version = parts.getAttributeValue(0, "version"); 246 } 247 cldrVersion = VersionInfo.getInstance(version); 248 } 249 } else if (parts.containsElement("approvalRequirement")) { 250 approvalRequirements.add(parts.toString()); 251 } else if (parts.containsElement("coverageLevel")) { 252 String match = 253 parts.containsAttribute("match") 254 ? coverageVariables.replace( 255 parts.getAttributeValue(-1, "match")) 256 : null; 257 String valueStr = parts.getAttributeValue(-1, "value"); 258 // Ticket 7125: map the number to English. So switch from English to number for 259 // construction 260 valueStr = Integer.toString(Level.get(valueStr).getLevel()); 261 262 String inLanguage = 263 parts.containsAttribute("inLanguage") 264 ? coverageVariables.replace( 265 parts.getAttributeValue(-1, "inLanguage")) 266 : null; 267 String inScript = 268 parts.containsAttribute("inScript") 269 ? coverageVariables.replace( 270 parts.getAttributeValue(-1, "inScript")) 271 : null; 272 String inTerritory = 273 parts.containsAttribute("inTerritory") 274 ? coverageVariables.replace( 275 parts.getAttributeValue(-1, "inTerritory")) 276 : null; 277 Integer value = 278 (valueStr != null) ? Integer.valueOf(valueStr) : Integer.valueOf("101"); 279 if (cldrVersion.getMajor() < 2) { 280 value = 40; 281 } 282 CoverageLevelInfo ci = 283 new CoverageLevelInfo(match, value, inLanguage, inScript, inTerritory); 284 coverageLevels.add(ci); 285 } else if (parts.containsElement("coverageVariable")) { 286 String key = parts.getAttributeValue(-1, "key"); 287 String value = parts.getAttributeValue(-1, "value"); 288 coverageVariables.add(key, value); 289 } 290 } 291 292 public void cleanup() { 293 CLDRConfig testInfo = ToolConfig.getToolInstance(); 294 SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo(); 295 CoverageLevelInfo.fixEU(coverageLevels, supplementalDataInfo2); 296 coverageLevels = Collections.unmodifiableSortedSet(coverageLevels); 297 } 298 } 299 300 public RegexLookup<Level> makeCoverageLookup() { 301 RegexLookup<Level> lookup = 302 new RegexLookup<>(RegexLookup.LookupType.STAR_PATTERN_LOOKUP); 303 304 Matcher variable = PatternCache.get("\\$\\{[A-Za-z][\\-A-Za-z]*\\}").matcher(""); 305 306 for (CoverageLevelInfo ci : coverageLevels) { 307 String pattern = 308 ci.match 309 .replace('\'', '"') 310 .replace("[@", "\\[@") // make sure that attributes are quoted 311 .replace("(", "(?:") // make sure that there are no capturing groups 312 // (beyond what we generate 313 .replace("(?:?!", "(?!"); // Allow negative lookahead 314 pattern = "^//ldml/" + pattern + "$"; // for now, force a complete match 315 String variableType = null; 316 variable.reset(pattern); 317 if (variable.find()) { 318 pattern = 319 pattern.substring(0, variable.start()) 320 + "([^\"]*)" 321 + pattern.substring(variable.end()); 322 variableType = variable.group(); 323 if (variable.find()) { 324 throw new IllegalArgumentException( 325 "We can only handle a single variable on a line"); 326 } 327 } 328 329 // .replaceAll("\\]","\\\\]"); 330 lookup.add(new CoverageLevel2.MyRegexFinder(pattern, variableType, ci), ci.value); 331 } 332 return lookup; 333 } 334 335 public RegexLookup<Level> load(String file) { 336 MyHandler myHandler = new MyHandler(); 337 XMLFileReader xfr = new XMLFileReader().setHandler(myHandler); 338 xfr.read(file, -1, true); 339 myHandler.cleanup(); 340 return makeCoverageLookup(); 341 } 342 } 343 344 // run these from first to last to get the approval info. 345 volatile List<ApprovalRequirementMatcher> approvalMatchers = null; 346 347 /** 348 * Get the preliminary number of required votes based on the given locale and PathHeader 349 * 350 * <p>Important: this number may not agree with VoteResolver.getRequiredVotes since VoteResolver 351 * also takes the baseline status into account. 352 * 353 * <p>Called by VoteResolver, ShowStarredCoverage, TestCoverage, and TestCoverageLevel. 354 * 355 * @param loc the CLDRLocale 356 * @param ph the PathHeader - which path this is applied to, or null if unknown. 357 * @return a number such as 4 or 8 358 */ 359 public int getRequiredVotes(CLDRLocale loc, PathHeader ph) { 360 if (approvalMatchers == null) { 361 approvalMatchers = ApprovalRequirementMatcher.buildAll(approvalRequirements); 362 } 363 364 for (ApprovalRequirementMatcher m : approvalMatchers) { 365 if (m.matches(loc, ph)) { 366 return m.getRequiredVotes(); 367 } 368 } 369 throw new RuntimeException( 370 "Error: " + loc + " " + ph + " ran off the end of the approvalMatchers."); 371 } 372 373 // TODO: move to separate tool 374 375 public static void main(String[] args) { 376 // Quick test during development to compare old to new coverageLevels 377 378 checkCoverage("root"); 379 checkCoverage("de"); 380 } 381 382 private static void checkCoverage(String locale) { 383 final CLDRConfig testInfo = ToolConfig.getToolInstance(); 384 final SupplementalDataInfo supplementalDataInfo2 = testInfo.getSupplementalDataInfo(); 385 386 CoverageLevel2 cvOld = CoverageLevel2.getInstance(supplementalDataInfo2, locale); 387 388 CoverageLevel2 cvNew = 389 CoverageLevel2.getInstance( 390 supplementalDataInfo2, 391 locale, 392 CLDRPaths.COMMON_DIRECTORY + "supplemental-temp/coverageLevels2.xml"); 393 394 CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true); 395 Set<String> paths = Builder.with(new TreeSet<String>()).addAll(cldrFile).get(); 396 PathHeader.Factory phf = PathHeader.getFactory(); 397 Map<PathHeader, String> diff = new TreeMap<>(); 398 Map<PathHeader, String> same = new TreeMap<>(); 399 for (String path : paths) { 400 Level levelOld = cvOld.getLevel(path); 401 Level levelNew = cvNew.getLevel(path); 402 if (levelOld != levelNew) { 403 diff.put( 404 phf.fromPath(path), 405 locale + "\t" + levelOld + "\t" + levelNew + "\t" + path); 406 } else if (levelOld.compareTo(Level.MODERATE) < 0) { 407 same.put(phf.fromPath(path), locale + "\t" + path); 408 } 409 } 410 System.out.println("\nLocale\tPath\tPathHeader"); 411 for (Entry<PathHeader, String> line : same.entrySet()) { 412 System.out.println(line.getValue() + "\t" + line.getKey()); 413 } 414 System.out.println("\nLocale\tOld\tNew\tPath\tPathHeader"); 415 for (Entry<PathHeader, String> line : diff.entrySet()) { 416 System.out.println(line.getValue() + "\t" + line.getKey()); 417 } 418 } 419 } 420