1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.ImmutableListMultimap; 5 import com.google.common.collect.ImmutableMap; 6 import com.google.common.collect.ImmutableSet; 7 import com.google.common.collect.LinkedHashMultimap; 8 import com.google.common.collect.Multimap; 9 import com.google.common.collect.Sets; 10 import com.google.common.collect.TreeMultimap; 11 import com.ibm.icu.impl.Relation; 12 import com.ibm.icu.impl.Row; 13 import com.ibm.icu.impl.Row.R2; 14 import com.ibm.icu.impl.Row.R4; 15 import com.ibm.icu.text.CompactDecimalFormat; 16 import com.ibm.icu.text.CompactDecimalFormat.CompactStyle; 17 import com.ibm.icu.text.Transform; 18 import com.ibm.icu.util.Calendar; 19 import com.ibm.icu.util.Output; 20 import com.ibm.icu.util.ULocale; 21 import java.util.Arrays; 22 import java.util.Collection; 23 import java.util.Collections; 24 import java.util.Date; 25 import java.util.EnumSet; 26 import java.util.HashMap; 27 import java.util.HashSet; 28 import java.util.LinkedHashSet; 29 import java.util.List; 30 import java.util.Map; 31 import java.util.Map.Entry; 32 import java.util.Set; 33 import java.util.TreeMap; 34 import java.util.TreeSet; 35 import java.util.regex.Pattern; 36 import org.unicode.cldr.draft.ScriptMetadata; 37 import org.unicode.cldr.test.CoverageLevel2; 38 import org.unicode.cldr.tool.LikelySubtags; 39 import org.unicode.cldr.util.CLDRConfig; 40 import org.unicode.cldr.util.CLDRFile; 41 import org.unicode.cldr.util.CLDRLocale; 42 import org.unicode.cldr.util.CLDRPaths; 43 import org.unicode.cldr.util.ChainedMap; 44 import org.unicode.cldr.util.ChainedMap.M4; 45 import org.unicode.cldr.util.Counter2; 46 import org.unicode.cldr.util.DtdData; 47 import org.unicode.cldr.util.DtdData.Element; 48 import org.unicode.cldr.util.DtdType; 49 import org.unicode.cldr.util.GrammarInfo; 50 import org.unicode.cldr.util.LanguageTagParser; 51 import org.unicode.cldr.util.Level; 52 import org.unicode.cldr.util.LocaleNames; 53 import org.unicode.cldr.util.LogicalGrouping; 54 import org.unicode.cldr.util.LogicalGrouping.PathType; 55 import org.unicode.cldr.util.Organization; 56 import org.unicode.cldr.util.PathHeader; 57 import org.unicode.cldr.util.PathHeader.Factory; 58 import org.unicode.cldr.util.PathStarrer; 59 import org.unicode.cldr.util.PatternCache; 60 import org.unicode.cldr.util.RegexLookup; 61 import org.unicode.cldr.util.RegexLookup.Finder; 62 import org.unicode.cldr.util.StandardCodes; 63 import org.unicode.cldr.util.SupplementalDataInfo; 64 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo; 65 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo; 66 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus; 67 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData; 68 import org.unicode.cldr.util.VoteResolver; 69 import org.unicode.cldr.util.XPathParts; 70 71 public class TestCoverageLevel extends TestFmwkPlus { 72 73 private static final boolean SHOW_LSR_DATA = false; 74 75 private static CLDRConfig testInfo = CLDRConfig.getInstance(); 76 private static final StandardCodes STANDARD_CODES = StandardCodes.make(); 77 private static final CLDRFile ENGLISH = testInfo.getEnglish(); 78 private static final SupplementalDataInfo SDI = testInfo.getSupplementalDataInfo(); 79 private static final String TC_VOTES = 80 Integer.toString(VoteResolver.Level.tc.getVotes(Organization.apple)); 81 main(String[] args)82 public static void main(String[] args) { 83 new TestCoverageLevel().run(args); 84 } 85 testSpecificPaths()86 public void testSpecificPaths() { 87 String[][] rows = { 88 { 89 "//ldml/characters/parseLenients[@scope=\"number\"][@level=\"lenient\"]/parseLenient[@sample=\",\"]", 90 "moderate", 91 TC_VOTES 92 } 93 }; 94 doSpecificPathTest("fr", rows); 95 } 96 testSpecificPathsPersCal()97 public void testSpecificPathsPersCal() { 98 String[][] rows = { 99 { 100 "//ldml/dates/calendars/calendar[@type=\"persian\"]/eras/eraAbbr/era[@type=\"0\"]", 101 "moderate", 102 "4" 103 }, 104 { 105 "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]", 106 "moderate", 107 "4" 108 } 109 }; 110 doSpecificPathTest("ckb_IR", rows); 111 } 112 testSpecificPathsDeFormatLength()113 public void testSpecificPathsDeFormatLength() { 114 String[][] rows = { 115 /* For German (de) these should be high-bar (20) per https://unicode-org.atlassian.net/browse/CLDR-14988 */ 116 { 117 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]", 118 "modern", 119 TC_VOTES 120 }, 121 { 122 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]", 123 "modern", 124 TC_VOTES 125 }, 126 { 127 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]", 128 "modern", 129 TC_VOTES 130 }, 131 { 132 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]", 133 "modern", 134 TC_VOTES 135 }, 136 { 137 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]", 138 "modern", 139 TC_VOTES 140 }, 141 { 142 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]", 143 "modern", 144 TC_VOTES 145 }, 146 { 147 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]", 148 "modern", 149 TC_VOTES 150 }, 151 { 152 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]", 153 "modern", 154 TC_VOTES 155 }, 156 { 157 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]", 158 "modern", 159 TC_VOTES 160 }, 161 { 162 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]", 163 "modern", 164 TC_VOTES 165 }, 166 { 167 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]", 168 "modern", 169 TC_VOTES 170 }, 171 { 172 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]", 173 "modern", 174 TC_VOTES 175 }, 176 /* not high-bar (20): wrong number of zeroes, or count many*/ 177 { 178 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100\"][@count=\"other\"]", 179 "comprehensive", 180 "8" 181 }, 182 { 183 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000000\"][@count=\"other\"]", 184 "modern", 185 "8" 186 }, 187 { 188 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"many\"]", 189 "modern", 190 "8" 191 }, 192 }; 193 doSpecificPathTest("de", rows); 194 } 195 doSpecificPathTest(String localeStr, String[][] rows)196 private void doSpecificPathTest(String localeStr, String[][] rows) { 197 Factory phf = PathHeader.getFactory(ENGLISH); 198 CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SDI, localeStr); 199 CLDRLocale loc = CLDRLocale.getInstance(localeStr); 200 for (String[] row : rows) { 201 String path = row[0]; 202 Level expectedLevel = Level.fromString(row[1]); 203 Level level = coverageLevel.getLevel(path); 204 assertEquals("Level for " + path, expectedLevel, level); 205 206 int expectedRequiredVotes = Integer.parseInt(row[2]); 207 int votes = SDI.getRequiredVotes(loc, phf.fromPath(path)); 208 assertEquals("Votes for " + path, expectedRequiredVotes, votes); 209 } 210 } 211 oldTestInvariantPaths()212 public void oldTestInvariantPaths() { 213 org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); 214 PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*"); 215 SupplementalDataInfo sdi = 216 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 217 218 Set<String> allPaths = new HashSet<>(); 219 M4<String, String, Level, Boolean> starredToLocalesToLevels = 220 ChainedMap.of( 221 new TreeMap<String, Object>(), 222 new TreeMap<String, Object>(), 223 new TreeMap<Level, Object>(), 224 Boolean.class); 225 226 for (String locale : factory.getAvailableLanguages()) { 227 logln(locale); 228 CLDRFile cldrFileToCheck = factory.make(locale, true); 229 for (String path : cldrFileToCheck.fullIterable()) { 230 allPaths.add(path); 231 String starred = pathStarrer.set(path); 232 Level level = sdi.getCoverageLevel(path, locale); 233 starredToLocalesToLevels.put(starred, locale, level, true); 234 } 235 } 236 237 Set<Level> levelsFound = EnumSet.noneOf(Level.class); 238 Set<String> localesWithUniqueLevels = new TreeSet<>(); 239 for (Entry<String, Map<String, Map<Level, Boolean>>> entry : starredToLocalesToLevels) { 240 String starred = entry.getKey(); 241 Map<String, Map<Level, Boolean>> localesToLevels = entry.getValue(); 242 int maxLevelCount = 0; 243 double localeCount = 0; 244 levelsFound.clear(); 245 localesWithUniqueLevels.clear(); 246 247 for (Entry<String, Map<Level, Boolean>> entry2 : localesToLevels.entrySet()) { 248 String locale = entry2.getKey(); 249 Map<Level, Boolean> levels = entry2.getValue(); 250 levelsFound.addAll(levels.keySet()); 251 if (levels.size() > maxLevelCount) { 252 maxLevelCount = levels.size(); 253 } 254 if (levels.size() == 1) { 255 localesWithUniqueLevels.add(locale); 256 } 257 localeCount++; 258 } 259 System.out.println( 260 maxLevelCount 261 + "\t" 262 + localesWithUniqueLevels.size() / localeCount 263 + "\t" 264 + starred 265 + "\t" 266 + Joiner.on(", ").join(levelsFound) 267 + "\t" 268 + (maxLevelCount == 1 269 ? "all" 270 : localesWithUniqueLevels.size() == 0 271 ? "none" 272 : Joiner.on(", ").join(localesWithUniqueLevels))); 273 } 274 } 275 276 enum LanguageStatus { 277 Lit100M("P1"), 278 Lit10MandOfficial("P2"), 279 Lit1MandOneThird("P3"); 280 final String name; 281 LanguageStatus(String name)282 LanguageStatus(String name) { 283 this.name = name; 284 } 285 } 286 287 static Relation<String, LanguageStatus> languageStatus = 288 Relation.of(new HashMap<String, Set<LanguageStatus>>(), TreeSet.class); 289 static Counter2<String> languageLiteratePopulation = new Counter2<>(); 290 static Map<String, Date> currencyToLast = new HashMap<>(); 291 static Set<String> officialSomewhere = new HashSet<>(); 292 293 static { 294 Counter2<String> territoryLiteratePopulation = new Counter2<>(); 295 LanguageTagParser parser = new LanguageTagParser(); 296 // cf 297 // http://cldr.unicode.org/development/development-process/design-proposals/languages-to-show-for-translation 298 for (String language : SDI.getLanguagesForTerritoriesPopulationData()) { 299 String base = parser.set(language).getLanguage(); 300 boolean isOfficial = false; 301 double languageLiterate = 0; 302 for (String territory : SDI.getTerritoriesForPopulationData(language)) { 303 PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory); 304 OfficialStatus officialStatus = pop.getOfficialStatus(); 305 if (officialStatus.compareTo(OfficialStatus.de_facto_official) >= 0) { 306 isOfficial = true; 307 languageStatus.put(base + "_" + territory, LanguageStatus.Lit10MandOfficial); 308 officialSomewhere.add(base); 309 } 310 double litPop = pop.getLiteratePopulation(); 311 languageLiterate += litPop; territoryLiteratePopulation.add(territory, litPop)312 territoryLiteratePopulation.add(territory, litPop); 313 languageLiteratePopulation.add(base + "_" + territory, litPop); 314 } languageLiteratePopulation.add(base, languageLiterate)315 languageLiteratePopulation.add(base, languageLiterate); 316 if (languageLiterate > 100000000) { languageStatus.put(base, LanguageStatus.Lit100M)317 languageStatus.put(base, LanguageStatus.Lit100M); 318 } 319 if (languageLiterate > 10000000 && isOfficial) { languageStatus.put(base, LanguageStatus.Lit10MandOfficial)320 languageStatus.put(base, LanguageStatus.Lit10MandOfficial); 321 } 322 } 323 for (String language : SDI.getLanguagesForTerritoriesPopulationData()) { 324 if (languageLiteratePopulation.getCount(language) < 1000000) { 325 continue; 326 } 327 String base = parser.set(language).getLanguage(); 328 for (String territory : SDI.getTerritoriesForPopulationData(language)) { 329 PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory); 330 double litPop = pop.getLiteratePopulation(); 331 double total = territoryLiteratePopulation.getCount(territory); 332 if (litPop > total / 3) { languageStatus.put(base, LanguageStatus.Lit1MandOneThird)333 languageStatus.put(base, LanguageStatus.Lit1MandOneThird); 334 } 335 } 336 } 337 for (String territory : STANDARD_CODES.getAvailableCodes("territory")) { 338 Set<CurrencyDateInfo> cdateInfo = SDI.getCurrencyDateInfo(territory); 339 if (cdateInfo == null) { 340 continue; 341 } 342 for (CurrencyDateInfo dateInfo : cdateInfo) { 343 String currency = dateInfo.getCurrency(); 344 Date last = dateInfo.getEnd(); 345 Date old = currencyToLast.get(currency); 346 if (old == null || old.compareTo(last) < 0) { currencyToLast.put(currency, last)347 currencyToLast.put(currency, last); 348 } 349 } 350 } 351 } 352 353 static CompactDecimalFormat cdf = 354 CompactDecimalFormat.getInstance(ULocale.ENGLISH, CompactStyle.SHORT); 355 isBigLanguage(String lang)356 static String isBigLanguage(String lang) { 357 Set<LanguageStatus> status = languageStatus.get(lang); 358 Double size = languageLiteratePopulation.getCount(lang); 359 String sizeString = size == null ? "?" : cdf.format(size); 360 String off = officialSomewhere.contains(lang) ? "o" : ""; 361 if (status == null || status.isEmpty()) { 362 return "P4-" + sizeString + off; 363 } 364 return status.iterator().next().name + "-" + sizeString + off; 365 } 366 367 static final Date NOW = new Date(); 368 369 private static final boolean DEBUG = false; 370 371 static class TypeName implements Transform<String, String> { 372 private final int field; 373 private final Map<String, R2<List<String>, String>> dep; 374 TypeName(int field)375 public TypeName(int field) { 376 this.field = field; 377 switch (field) { 378 case CLDRFile.LANGUAGE_NAME: 379 dep = SDI.getLocaleAliasInfo().get("language"); 380 break; 381 case CLDRFile.TERRITORY_NAME: 382 dep = SDI.getLocaleAliasInfo().get("territory"); 383 break; 384 case CLDRFile.SCRIPT_NAME: 385 dep = SDI.getLocaleAliasInfo().get("script"); 386 break; 387 default: 388 dep = null; 389 break; 390 } 391 } 392 393 @Override transform(String source)394 public String transform(String source) { 395 String result = ENGLISH.getName(field, source); 396 String extra = ""; 397 if (field == CLDRFile.LANGUAGE_NAME) { 398 String lang = isBigLanguage(source); 399 extra = lang == null ? "X" : lang; 400 } else if (field == CLDRFile.CURRENCY_NAME) { 401 Date last = currencyToLast.get(source); 402 extra = last == null ? "?" : last.compareTo(NOW) < 0 ? "old" : ""; 403 } 404 R2<List<String>, String> depValue = dep == null ? null : dep.get(source); 405 if (depValue != null) { 406 extra += extra.isEmpty() ? "" : "-"; 407 extra += depValue.get1(); 408 } 409 return result + (extra.isEmpty() ? "" : "\t" + extra); 410 } 411 } 412 413 RegexLookup<Level> exceptions = 414 RegexLookup.of( 415 null, 416 new Transform<String, Level>() { 417 @Override 418 public Level transform(String source) { 419 return Level.fromLevel(Integer.parseInt(source)); 420 } 421 }, 422 null) 423 .loadFromFile(TestCoverageLevel.class, "TestCoverageLevel.txt"); 424 425 public void TestExceptions() { 426 for (Map.Entry<Finder, Level> x : exceptions) { 427 logln(x.getKey().toString() + " => " + x.getValue()); 428 } 429 } 430 431 public void TestNarrowCurrencies() { 432 String path = "//ldml/numbers/currencies/currency[@type=\"USD\"]/symbol[@alt=\"narrow\"]"; 433 String value = ENGLISH.getStringValue(path); 434 assertEquals("Narrow $", "$", value); 435 SupplementalDataInfo sdi = 436 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 437 Level level = sdi.getCoverageLevel(path, "en"); 438 assertEquals("Narrow $", Level.MODERATE, level); 439 } 440 441 public void TestA() { 442 String path = "//ldml/characterLabels/characterLabel[@type=\"other\"]"; 443 SupplementalDataInfo sdi = 444 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY); 445 Level level = sdi.getCoverageLevel(path, "en"); 446 assertEquals("Quick Check for any attribute", Level.MODERN, level); 447 } 448 449 public void TestCoverageCompleteness() { 450 /** 451 * Check that English paths are, except for known cases, at least modern coverage. We filter 452 * out the things we know about and have determined are OK to be in comprehensive. If we add 453 * a path that doesn't get its coverage set, this test should complain about it. 454 */ 455 final ImmutableSet<String> inactiveMetazones = 456 ImmutableSet.of( 457 "Greenland", // TODO: New metazone added for tz2023d update, 458 // In CLDR 45, we don't want to include this one in modern coverage because 459 // we don't open ST for translating display names for this metazone. 460 // After 45, we will include "Greenland" in modern coverage. 461 "Bering", 462 "Dominican", 463 "Shevchenko", 464 "Alaska_Hawaii", 465 "Yerevan", 466 "Africa_FarWestern", 467 "British", 468 "Sverdlovsk", 469 "Karachi", 470 "Malaya", 471 "Oral", 472 "Frunze", 473 "Dutch_Guiana", 474 "Irish", 475 "Uralsk", 476 "Tashkent", 477 "Kwajalein", 478 "Ashkhabad", 479 "Kizilorda", 480 "Kuybyshev", 481 "Baku", 482 "Dushanbe", 483 "Goose_Bay", 484 "Liberia", 485 "Samarkand", 486 "Tbilisi", 487 "Borneo", 488 "Greenland_Central", 489 "Dacca", 490 "Aktyubinsk", 491 "Turkey", 492 "Urumqi", 493 "Acre", 494 "Almaty", 495 "Anadyr", 496 "Aqtau", 497 "Aqtobe", 498 "Kamchatka", 499 "Macau", 500 "Qyzylorda", 501 "Samara", 502 "Casey", 503 "Guam", 504 "Lanka", 505 "North_Mariana"); 506 507 final Pattern calendar100 = 508 PatternCache.get("(coptic|ethiopic-amete-alem|islamic-(rgsa|tbla|umalqura))"); 509 510 // Warning: shorter strings must come AFTER longer ones. Can process with MinimizeRegex to 511 // reorder 512 final Pattern language100 = 513 PatternCache.get( 514 "(" // start 515 + "nds_NL|fa_AF|ro_MD|sr_ME|sw_CD" 516 // Length 4 517 + "|root" 518 // Length 3 519 + "|ace|ach|ada|ady|aeb|afh|agq|ain|akk|akz|ale|aln|alt|ang|ann|anp|apc|arc|arn|aro|arp|arq|ars|arw|ary|arz|asa|ase|atj|avk|awa" 520 + "|bal|ban|bar|bax|bbc|bbj|bej|bem|bew|bez|bfd|bfq|bgc|bgn|bho|bik|bin|bjn|bkm|bla|blo|blt|bpy|bqi|bra|brh|bss|bua|bug|bum|byn|byv" 521 + "|cad|car|cay|cch|ccp|cgg|chb|chg|chk|chm|chn|cho|chp|chy|cic|ckb|clc|cop|cps|crg|crh|crj|crk|crl|crm|crr|crs|csb|csw|cwd" 522 + "|dak|dar|dav|del|den|dgr|din|dje|doi|dtp|dua|dum|dyo|dyu|dzg" 523 + "|ebu|efi|egl|egy|eka|elx|enm|esu|ext|fan|fat|fit|fon|frc|frm|fro|frp|frr|frs|fur" 524 + "|gaa|gag|gan|gay|gba|gbz|gez|gil|glk|gmh|goh|gom|gon|gor|got|grb|grc|gsw|guc|gur|guz|gwi" 525 + "|hai|hak|haw|hax|hdn|hif|hil|hit|hnj|hsn|hup|hur|iba|ilo|inh|izh|jam|jbo|jgo|jmc|jpr|jrb|jut" 526 + "|kaa|kab|kac|kaj|kam|kaw|kbd|kbl|kcg|kde|ken|kfo|kgp|kha|kho|khq|khw|kiu|kln|kmb|koi|kos|kpe|krc|kri|krj|krl|kru|ksb|ksf|ksh|kum|kut|kwk|kxv" 527 + "|lad|lag|lah|lam|lez|lfn|lij|lil|liv|lkt|lmo|lol|lou|loz|lrc|ltg|lua|lui|lun|luo|lus|luy|lzh|lzz" 528 + "|mad|maf|mag|mai|mak|man|mas|mde|mdf|mdr|men|mer|mfe|mga|mgh|mgo|mic|min|mnc|mni|moe|moh|mos|mrj|mua|mus|mwl|mwr|mwv|mye|myv|mzn" 529 + "|nan|nap|naq|nds|new|nia|niu|njo|nmg|nog|non|nov|nqo|nso|nus|nwc|nym|nyn|nyo|nzi|oka|osa|ota" 530 + "|pag|pal|pam|pap|pau|pcd|pcm|pdc|pdt|peo|pfl|phn|pms|pnt|pon|pqm|prg|pro|quc|qug|raj|rap|rar|rgn|rif|rof|rom|rtm|rue|rug|rup|rwk" 531 + "|sad|sam|saq|sas|sat|saz|sba|sbp|sdc|sdh|see|seh|sei|sel|ses|sga|sgs|shi|shn|shu|sid|skr|slh|sli|sly|sma|smj|smn|sms|snk|sog|srn|srr|stq|str|suk|sus|sux|swb|syc|syr|szl" 532 + "|tce|tcy|tem|teo|ter|tet|tgx|tht|tig|tiv|tkl|tkr|tlh|tli|tly|tmh|tog|tok|tpi|tru|trv|trw|tsd|tsi|ttm|ttt|tum|tvl|tzm" 533 + "|udm|uga|umb|vai|vec|vep|vls|vmf|vmw|vot|vro|vun|wae|wal|war|was|wbp|wuu|xal|xmf|xnr|xog|yao|yap|yrl|zap|zbl|zea|zen|zgh|zun|zza" 534 + "|ike|ojg|ssy|pis|twq" 535 // Length 2 536 + "|aa|ab|ae|ak|an|av|ay|ba|bi|bm|bo|ce|ch|cr|cu|cv|dv|dz|ee|eo|fj|gn|gv|ho|hz|ie|ii|ik|io|iu|kg|ki|kj|kl|kv|kw|lg|li|ln|lu" 537 + "|mg|mh|na|nb|nd|ng|no|nr|nv|oc|oj|om|os|pi|rn|rw|sc|se|sg|sh|sn|ss|tl|tn|ts|tw|ty|ve|vo|wa|yi|za" 538 // end 539 + ")"); 540 541 /** 542 * Recommended scripts that are allowed for comprehensive coverage. Not-recommended scripts 543 * (according to ScriptMetadata) are filtered out automatically. 544 */ 545 final Pattern script100 = PatternCache.get("(Zinh)"); 546 547 final Pattern keys100 = 548 PatternCache.get( 549 "(col(Alternate|Backwards|CaseFirst|CaseLevel|HiraganaQuaternary|" 550 + "Normalization|Numeric|Reorder|Strength)|kv|sd|mu|timezone|va|variableTop|x|d0|h0|i0|k0|m0|s0)"); 551 552 final Pattern numberingSystem100 = 553 PatternCache.get( 554 "(" 555 + "finance|native|traditional|adlm|ahom|bali|bhks|brah|cakm|cham|cyrl|diak|" 556 + "gong|gonm|hanidays|hmng|hmnp|java|jpanyear|kali|kawi|lana(tham)?|lepc|limb|" 557 + "math(bold|dbl|mono|san[bs])|modi|mong|mroo|mtei|mymr(shan|tlng)|" 558 + "nagm|newa|nkoo|olck|osma|rohg|saur|segment|shrd|sin[dh]|sora|sund|" 559 + "takr|talu|tirh|tnsa|vaii|wara|wcho)"); 560 561 final Pattern collation100 = 562 PatternCache.get( 563 "(" 564 + "big5han|compat|dictionary|emoji|eor|gb2312han|phonebook|phonetic|pinyin|searchjl|stroke|traditional|unihan|zhuyin)"); 565 566 SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo(); 567 CLDRFile english = testInfo.getEnglish(); 568 569 // Calculate date of the upcoming CLDR release, minus 5 years (deprecation policy) 570 final int versionNumber = Integer.valueOf((CLDRFile.GEN_VERSION).split("\\.")[0]); 571 Calendar cal = Calendar.getInstance(); 572 cal.set(versionNumber / 2 + versionNumber % 2 + 2001, 8 - (versionNumber % 2) * 6, 15); 573 Date cldrReleaseMinus5Years = cal.getTime(); 574 Set<String> modernCurrencies = 575 SDI.getCurrentCurrencies(SDI.getCurrencyTerritories(), cldrReleaseMinus5Years, NOW); 576 577 Set<String> needsNumberSystem = new HashSet<>(); 578 DtdData dtdData = DtdData.getInstance(DtdType.ldml); 579 Element numbersElement = dtdData.getElementFromName().get("numbers"); 580 for (Element childOfNumbers : numbersElement.getChildren().keySet()) { 581 if (childOfNumbers.containsAttribute("numberSystem")) { 582 needsNumberSystem.add(childOfNumbers.name); 583 } 584 } 585 586 for (String path : english.fullIterable()) { 587 logln("Testing path => " + path); 588 XPathParts xpp = XPathParts.getFrozenInstance(path); 589 if (path.endsWith("/alias") 590 || path.matches( 591 "//ldml/(identity|contextTransforms|layout|localeDisplayNames/transformNames)/.*")) { 592 continue; 593 } 594 if (sdi.isDeprecated(DtdType.ldml, path)) { 595 continue; 596 } 597 Level lvl = sdi.getCoverageLevel(path, "en"); 598 if (lvl == Level.UNDETERMINED) { 599 errln("Undetermined coverage value for path => " + path); 600 continue; 601 } 602 if (lvl.compareTo(Level.MODERN) <= 0) { 603 logln("Level OK [" + lvl.toString() + "] for path => " + path); 604 continue; 605 } 606 607 if (path.startsWith("//ldml/numbers")) { 608 // Paths in numbering systems outside "latn" are specifically excluded. 609 String numberingSystem = xpp.findFirstAttributeValue("numberSystem"); 610 if (numberingSystem != null && !numberingSystem.equals("latn")) { 611 continue; 612 } 613 if (xpp.containsElement("currencySpacing") || xpp.containsElement("list")) { 614 continue; 615 } 616 if (xpp.containsElement("currency")) { 617 String currencyType = xpp.findAttributeValue("currency", "type"); 618 if (!modernCurrencies.contains(currencyType)) { 619 continue; // old currency or not tender, so we don't care 620 } 621 } 622 // Currently not collecting timeSeparator data in SurveyTool 623 if (xpp.containsElement("timeSeparator")) { 624 continue; 625 } 626 // Other paths in numbers without a numbering system are deprecated. 627 // if (numberingSystem == null) { 628 // continue; 629 // } 630 if (needsNumberSystem.contains(xpp.getElement(2))) { 631 continue; 632 } 633 } else if (xpp.containsElement("zone")) { 634 String zoneType = xpp.findAttributeValue("zone", "type"); 635 if ((zoneType.startsWith("Etc/GMT") || zoneType.equals("Etc/UTC")) 636 && path.endsWith("exemplarCity")) { 637 continue; 638 } 639 // We don't survey for short timezone names or at least some alts 640 if (path.contains("/short/") || path.contains("[@alt=\"formal\"]")) { 641 continue; 642 } 643 } else if (xpp.containsElement("metazone")) { 644 // We don't survey for short metazone names 645 if (path.contains("/short/")) { 646 continue; 647 } 648 String mzName = xpp.findAttributeValue("metazone", "type"); 649 // Skip inactive metazones. 650 if (inactiveMetazones.contains(mzName)) { 651 continue; 652 } 653 // Skip paths for daylight or generic mz strings where 654 // the mz doesn't use DST. 655 if ((path.endsWith("daylight") || path.endsWith("generic")) 656 && !LogicalGrouping.metazonesDSTSet.contains(mzName)) { 657 continue; 658 } 659 } else if (path.startsWith("//ldml/dates/fields")) { 660 if ("variant".equals(xpp.findAttributeValue("displayName", "alt"))) { 661 continue; 662 } 663 // relative day/week/month, etc. short or narrow 664 if (xpp.getElement(-1).equals("relative")) { 665 String fieldType = xpp.findAttributeValue("field", "type"); 666 if (fieldType.matches(".*-(short|narrow)|quarter")) { 667 continue; 668 } 669 } 670 } else if (xpp.containsElement("language")) { 671 // Comprehensive coverage is OK for some languages. 672 String languageType = xpp.findAttributeValue("language", "type"); 673 if (language100.matcher(languageType).matches()) { 674 continue; 675 } 676 } else if (xpp.containsElement("script")) { 677 // Skip user defined script codes and alt=short 678 String scriptType = xpp.findAttributeValue("script", "type"); 679 if (scriptType.startsWith("Q") 680 || "short".equals(xpp.findAttributeValue("script", "alt"))) { 681 continue; 682 } 683 ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(scriptType); 684 if (scriptInfo == null 685 || scriptInfo.idUsage != ScriptMetadata.IdUsage.RECOMMENDED) { 686 continue; 687 } 688 if (script100.matcher(scriptType).matches()) { 689 continue; 690 } 691 } else if (xpp.containsElement("territory")) { 692 String territoryType = xpp.findAttributeValue("territory", "type"); 693 if (territoryType.equals("CQ")) { // Exceptionally reserved by ISO-3166 694 continue; 695 } 696 } else if (xpp.containsElement("key")) { 697 // Comprehensive coverage is OK for some key/types. 698 String keyType = xpp.findAttributeValue("key", "type"); 699 if (keys100.matcher(keyType).matches()) { 700 continue; 701 } 702 } else if (xpp.containsElement("type")) { 703 if ("short".equals(xpp.findAttributeValue("type", "alt"))) { 704 continue; 705 } 706 // Comprehensive coverage is OK for some key/types. 707 String keyType = xpp.findAttributeValue("type", "key"); 708 if (keys100.matcher(keyType).matches()) { 709 continue; 710 } 711 if (keyType.equals("numbers")) { 712 String ns = xpp.findAttributeValue("type", "type"); 713 if (numberingSystem100.matcher(ns).matches()) { 714 continue; 715 } 716 } 717 if (keyType.equals("collation")) { 718 String ct = xpp.findAttributeValue("type", "type"); 719 if (collation100.matcher(ct).matches()) { 720 continue; 721 } 722 } 723 if (keyType.equals("calendar")) { 724 String ct = xpp.findAttributeValue("type", "type"); 725 if (calendar100.matcher(ct).matches()) { 726 continue; 727 } 728 } 729 } else if (xpp.containsElement("variant")) { 730 // All variant names are comprehensive coverage 731 continue; 732 } else if (path.startsWith("//ldml/dates/calendars")) { 733 String calType = xpp.findAttributeValue("calendar", "type"); 734 if (!calType.matches("(gregorian|generic)")) { 735 continue; 736 } 737 // So far we are generating datetimeSkeleton mechanically, no coverage 738 if (xpp.containsElement("datetimeSkeleton")) { 739 continue; 740 } 741 // The alt="ascii" time patterns are hopefully short-lived. We do not survey 742 // for them, they can be generated mechanically from the non-alt patterns. 743 // CLDR-16606 744 if (path.contains("[@alt=\"ascii\"]")) { 745 continue; 746 } 747 String element = xpp.getElement(-1); 748 // Skip things that shouldn't normally exist in the generic calendar 749 // days, dayPeriods, quarters, and months 750 if (calType.equals("generic")) { 751 if (element.matches("(day(Period)?|month|quarter|era|appendItem)")) { 752 continue; 753 } 754 if (xpp.containsElement("intervalFormatItem")) { 755 String intervalFormatID = 756 xpp.findAttributeValue("intervalFormatItem", "id"); 757 // "Time" related, so shouldn't be in generic calendar. 758 if (intervalFormatID.matches("(h|H).*")) { 759 continue; 760 } 761 } 762 if (xpp.containsElement("dateFormatItem")) { 763 String dateFormatID = xpp.findAttributeValue("dateFormatItem", "id"); 764 // "Time" related, so shouldn't be in generic calendar. 765 if (dateFormatID.matches("E?(h|H|m).*")) { 766 continue; 767 } 768 } 769 if (xpp.containsElement("timeFormat")) { 770 continue; 771 } 772 } else { // Gregorian calendar 773 if (xpp.containsElement("eraNarrow")) { 774 continue; 775 } 776 if (element.equals("appendItem")) { 777 String request = xpp.findAttributeValue("appendItem", "request"); 778 if (!request.equals("Timezone")) { 779 continue; 780 } 781 } else if (element.equals("dayPeriod")) { 782 if ("variant".equals(xpp.findAttributeValue("dayPeriod", "alt"))) { 783 continue; 784 } 785 } else if (element.equals("dateFormatItem")) { 786 // ldml/dates/calendars/calendar[@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[@id='%dateFormatItems'] 787 assertEquals(path, Level.BASIC, lvl); 788 continue; 789 } 790 } 791 } else if (path.startsWith("//ldml/units")) { 792 // Skip paths for narrow unit fields. 793 if ("narrow".equals(xpp.findAttributeValue("unitLength", "type")) 794 || path.endsWith("/compoundUnitPattern1")) { 795 continue; 796 } 797 } else if (xpp.contains("posix")) { 798 continue; 799 } 800 801 errln("Comprehensive & no exception for path =>\t" + path); 802 } 803 } 804 805 public static class TargetsAndSublocales { 806 public final CoverageVariableInfo cvi; 807 public Set<String> scripts; 808 public Set<String> regions; 809 810 public TargetsAndSublocales(String localeLanguage) { 811 cvi = SDI.getCoverageVariableInfo(localeLanguage); 812 scripts = new TreeSet<>(); 813 regions = new TreeSet<>(); 814 } 815 816 public boolean addScript(String localeScript) { 817 return scripts.add(localeScript); 818 } 819 820 public boolean addRegion(String localeRegion) { 821 return regions.add(localeRegion); 822 } 823 } 824 825 public void TestCoverageVariableInfo() { 826 /** 827 * Compare the targetScripts and targetTerritories for a language to what we actually have 828 * in locales 829 */ 830 Map<String, TargetsAndSublocales> langToTargetsAndSublocales = new TreeMap<>(); 831 org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); 832 for (CLDRLocale locale : factory.getAvailableCLDRLocales()) { 833 String language = locale.getLanguage(); 834 if (language.length() == 0 || language.equals("root")) { 835 continue; 836 } 837 TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language); 838 if (targetsAndSublocales == null) { 839 targetsAndSublocales = new TargetsAndSublocales(language); 840 langToTargetsAndSublocales.put(language, targetsAndSublocales); 841 } 842 String script = locale.getScript(); 843 if (script.length() > 0) { 844 targetsAndSublocales.addScript(script); 845 } 846 String region = locale.getCountry(); 847 if (region.length() > 0 848 && region.length() < 3) { // do not want numeric codes like 001, 419 849 targetsAndSublocales.addRegion(region); 850 } 851 } 852 853 for (String language : langToTargetsAndSublocales.keySet()) { 854 TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language); 855 if (targetsAndSublocales == null) { 856 continue; 857 } 858 Set<String> targetScripts = new TreeSet<>(targetsAndSublocales.cvi.targetScripts); 859 Set<String> localeScripts = targetsAndSublocales.scripts; 860 localeScripts.removeAll(targetScripts); 861 if (localeScripts.size() > 0) { 862 errln( 863 "Missing scripts for language: " 864 + language 865 + ", target scripts: " 866 + targetScripts 867 + ", but locales also have: " 868 + localeScripts); 869 } 870 Set<String> targetRegions = new TreeSet<>(targetsAndSublocales.cvi.targetTerritories); 871 Set<String> localeRegions = targetsAndSublocales.regions; 872 localeRegions.removeAll(targetRegions); 873 if (localeRegions.size() > 0) { 874 errln( 875 "Missing regions for language: " 876 + language 877 + ", target regions: " 878 + targetRegions 879 + ", but locales also have: " 880 + localeRegions); 881 } 882 } 883 } 884 testBreakingLogicalGrouping()885 public void testBreakingLogicalGrouping() { 886 checkBreakingLogicalGrouping("en"); 887 checkBreakingLogicalGrouping("ar"); 888 checkBreakingLogicalGrouping("de"); 889 checkBreakingLogicalGrouping("pl"); 890 } 891 checkBreakingLogicalGrouping(String localeId)892 private void checkBreakingLogicalGrouping(String localeId) { 893 SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo(); 894 CLDRFile cldrFile = testInfo.getCldrFactory().make(localeId, true); 895 HashSet<String> seen = new HashSet<>(); 896 Multimap<Level, String> levelToPaths = TreeMultimap.create(); 897 int count = 0; 898 for (String path : cldrFile.fullIterable()) { 899 if (seen.contains(path)) { 900 continue; 901 } 902 Set<String> grouping = LogicalGrouping.getPaths(cldrFile, path); 903 seen.add(path); 904 if (grouping == null) { 905 continue; 906 } 907 seen.addAll(grouping); 908 levelToPaths.clear(); 909 for (String groupingPath : grouping) { 910 if (LogicalGrouping.isOptional(cldrFile, groupingPath)) { 911 continue; 912 } 913 Level level = sdi.getCoverageLevel(groupingPath, localeId); 914 levelToPaths.put(level, groupingPath); 915 } 916 if (levelToPaths.keySet().size() <= 1) { 917 continue; 918 } 919 // we have a failure 920 for (Entry<Level, Collection<String>> entry : levelToPaths.asMap().entrySet()) { 921 errln( 922 localeId 923 + " (" 924 + count 925 + ") Broken Logical Grouping: " 926 + entry.getKey() 927 + " => " 928 + entry.getValue()); 929 } 930 ++count; 931 } 932 } 933 testLogicalGroupingSamples()934 public void testLogicalGroupingSamples() { 935 getLogger().fine(GrammarInfo.getGrammarLocales().toString()); 936 String[][] test = { 937 { 938 "de", "SINGLETON", "//ldml/localeDisplayNames/localeDisplayPattern/localePattern", 939 }, 940 { 941 "de", 942 "METAZONE", 943 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/generic", 944 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/standard", 945 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/daylight", 946 }, 947 { 948 "de", 949 "DAYS", 950 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sun\"]", 951 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"mon\"]", 952 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"tue\"]", 953 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"wed\"]", 954 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"thu\"]", 955 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"fri\"]", 956 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sat\"]", 957 }, 958 { 959 "nl", 960 "DAY_PERIODS", 961 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]", 962 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"afternoon1\"]", 963 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"evening1\"]", 964 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"night1\"]", 965 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"midnight\"]", 966 }, 967 { 968 "de", 969 "QUARTERS", 970 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"1\"]", 971 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"2\"]", 972 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"3\"]", 973 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"4\"]", 974 }, 975 { 976 "de", 977 "MONTHS", 978 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]", 979 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"2\"]", 980 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"3\"]", 981 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"4\"]", 982 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"5\"]", 983 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"6\"]", 984 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"7\"]", 985 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"8\"]", 986 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"9\"]", 987 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"10\"]", 988 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"11\"]", 989 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"12\"]", 990 }, 991 { 992 "de", 993 "RELATIVE", 994 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"-1\"]", 995 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"0\"]", 996 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"1\"]", 997 }, 998 { 999 "de", 1000 "DECIMAL_FORMAT_LENGTH", 1001 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]", 1002 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]", 1003 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]", 1004 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]", 1005 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]", 1006 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]", 1007 }, 1008 { 1009 "cs", 1010 "COUNT", 1011 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"one\"]", 1012 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"few\"]", 1013 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"many\"]", 1014 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"other\"]", 1015 }, 1016 { 1017 "de", 1018 "COUNT", 1019 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"one\"]", 1020 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"other\"]", 1021 }, 1022 { 1023 "de", 1024 "COUNT_CASE", 1025 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]", 1026 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"dative\"]", 1027 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]", 1028 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"]", 1029 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]", 1030 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"dative\"]", 1031 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]", 1032 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"]", 1033 }, 1034 { 1035 "hi", 1036 "COUNT_CASE_GENDER", 1037 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]", 1038 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]", 1039 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]", 1040 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]", 1041 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"oblique\"]", 1042 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"oblique\"]", 1043 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"oblique\"]", 1044 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"oblique\"]" 1045 } 1046 }; 1047 Set<PathType> seenPt = new TreeSet<>(Arrays.asList(PathType.values())); 1048 for (String[] row : test) { 1049 String locale = row[0]; 1050 PathType expectedPathType = PathType.valueOf(row[1]); 1051 CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true); 1052 List<String> paths = Arrays.asList(row); 1053 paths = paths.subList(2, paths.size()); 1054 Set<String> expected = new TreeSet<>(paths); 1055 Set<Multimap<String, String>> seen = new LinkedHashSet<>(); 1056 for (String path : expected) { 1057 Set<String> grouping = new TreeSet<>(LogicalGrouping.getPaths(cldrFile, path)); 1058 final Multimap<String, String> deltaValue = delta(expected, grouping); 1059 if (seen.add(deltaValue)) { 1060 assertEquals( 1061 "Logical group for " + locale + ", " + path, 1062 ImmutableListMultimap.of(), 1063 deltaValue); 1064 } 1065 PathType actualPathType = PathType.getPathTypeFromPath(path); 1066 assertEquals("PathType", expectedPathType, actualPathType); 1067 } 1068 seenPt.remove(expectedPathType); 1069 } 1070 assertEquals("PathTypes tested", Collections.emptySet(), seenPt); 1071 } 1072 delta(Set<String> expected, Set<String> grouping)1073 private Multimap<String, String> delta(Set<String> expected, Set<String> grouping) { 1074 if (expected.equals(grouping)) { 1075 return ImmutableListMultimap.of(); 1076 } 1077 Multimap<String, String> result = LinkedHashMultimap.create(); 1078 TreeSet<String> aMinusB = new TreeSet<>(expected); 1079 aMinusB.removeAll(grouping); 1080 result.putAll("expected-actual", aMinusB); 1081 TreeSet<String> bMinusA = new TreeSet<>(grouping); 1082 bMinusA.removeAll(expected); 1083 result.putAll("actual-expected", bMinusA); 1084 return result; 1085 } 1086 1087 static class CoverageStatus { 1088 1089 private Level level; 1090 private boolean inRoot; 1091 private boolean inId; 1092 private Level languageLevel; 1093 private String displayName; 1094 CoverageStatus( Level level, boolean inRoot, boolean inId, Level languageLevel, String displayName)1095 public CoverageStatus( 1096 Level level, 1097 boolean inRoot, 1098 boolean inId, 1099 Level languageLevel, 1100 String displayName) { 1101 this.level = level; 1102 this.inRoot = inRoot; 1103 this.inId = inId; 1104 this.languageLevel = languageLevel == null ? Level.UNDETERMINED : languageLevel; 1105 this.displayName = displayName; 1106 } 1107 1108 @Override toString()1109 public String toString() { 1110 return (inRoot ? "root" : "x") 1111 + "\t" 1112 + (inId ? "ids" : "x") 1113 + "\t" 1114 + stringForm(languageLevel) 1115 + "\t" 1116 + stringForm(level) 1117 + "\t" 1118 + displayName; 1119 } 1120 stringForm(Level level2)1121 private String stringForm(Level level2) { 1122 if (level == null) { 1123 return "υnd"; 1124 } 1125 switch (level2) { 1126 case UNDETERMINED: 1127 return "υnd"; 1128 case COMPREHENSIVE: 1129 return "ϲomp"; 1130 default: 1131 return level2.toString(); 1132 } 1133 } 1134 } 1135 testLSR()1136 public void testLSR() { 1137 SupplementalDataInfo supplementalData = testInfo.getSupplementalDataInfo(); 1138 org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory(); 1139 CLDRFile root = factory.make(LocaleNames.ROOT, true); 1140 CoverageLevel2 coverageLevel = 1141 CoverageLevel2.getInstance(supplementalData, "qtz"); // non-existent locale 1142 1143 Set<String> langsRoot = new TreeSet<>(); 1144 Set<String> scriptsRoot = new TreeSet<>(); 1145 Set<String> regionsRoot = new TreeSet<>(); 1146 1147 // Get root LSR codes 1148 1149 for (String path : root) { 1150 if (!path.startsWith("//ldml/localeDisplayNames/")) { 1151 continue; 1152 } 1153 XPathParts parts = XPathParts.getFrozenInstance(path); 1154 String code = parts.getAttributeValue(3, "type"); 1155 if (code == null || code.contains("_")) { 1156 continue; 1157 } 1158 switch (parts.getElement(3)) { 1159 case "language": 1160 langsRoot.add(code); 1161 break; 1162 case "script": 1163 scriptsRoot.add(code); 1164 break; 1165 case "territory": 1166 regionsRoot.add(code); 1167 break; 1168 } 1169 } 1170 langsRoot = ImmutableSet.copyOf(langsRoot); 1171 scriptsRoot = ImmutableSet.copyOf(scriptsRoot); 1172 regionsRoot = ImmutableSet.copyOf(regionsRoot); 1173 1174 // get CLDR locale IDs' codes 1175 1176 Map<String, Level> langs = new TreeMap<>(); 1177 Map<String, Level> scripts = new TreeMap<>(); 1178 Map<String, Level> regions = new TreeMap<>(); 1179 LikelySubtags likely = new LikelySubtags(); 1180 1181 LanguageTagParser ltp = new LanguageTagParser(); 1182 for (String locale : factory.getAvailable()) { 1183 Level languageLevel = STANDARD_CODES.getLocaleCoverageLevel(Organization.cldr, locale); 1184 if (languageLevel == null || languageLevel == Level.UNDETERMINED) { 1185 languageLevel = Level.CORE; 1186 } 1187 ltp.set(locale); 1188 likely.maximize(ltp); 1189 addBestLevel(langs, ltp.getLanguage(), languageLevel); 1190 addBestLevel(scripts, ltp.getScript(), languageLevel); 1191 addBestLevel(regions, ltp.getRegion(), languageLevel); 1192 } 1193 regions.remove(""); 1194 scripts.remove(""); 1195 1196 // get the data 1197 1198 Map<String, CoverageStatus> data = new TreeMap<>(); 1199 1200 ImmutableMap<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeToInfo = 1201 ImmutableMap.of( 1202 CLDRFile.LANGUAGE_NAME, 1203 Row.of("language", langs, langsRoot, Level.MODERN), 1204 CLDRFile.SCRIPT_NAME, 1205 Row.of("script", scripts, scriptsRoot, Level.MODERATE), 1206 CLDRFile.TERRITORY_NAME, 1207 Row.of("region", regions, regionsRoot, Level.MODERATE)); 1208 1209 for (Entry<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeAndInfo : 1210 typeToInfo.entrySet()) { 1211 int type = typeAndInfo.getKey(); 1212 String name = typeAndInfo.getValue().get0(); 1213 Map<String, Level> idPartMap = typeAndInfo.getValue().get1(); 1214 Set<String> setRoot = typeAndInfo.getValue().get2(); 1215 Level targetLevel = typeAndInfo.getValue().get3(); 1216 for (String code : Sets.union(idPartMap.keySet(), setRoot)) { 1217 String displayName = testInfo.getEnglish().getName(type, code); 1218 String path = CLDRFile.getKey(type, code); 1219 Level level = coverageLevel.getLevel(path); 1220 data.put( 1221 name + "\t" + code, 1222 new CoverageStatus( 1223 level, 1224 setRoot.contains(code), 1225 idPartMap.containsKey(code), 1226 idPartMap.get(code), 1227 displayName)); 1228 } 1229 } 1230 if (SHOW_LSR_DATA) { 1231 1232 System.out.println( 1233 "\nType\tCode\tIn Root\tIn CLDR Locales\tCLDR TargeLevel\tRoot Path Level\tCombinations"); 1234 for (Entry<String, CoverageStatus> entry : data.entrySet()) { 1235 System.out.println(entry.getKey() + "\t" + entry.getValue()); 1236 } 1237 System.out.println(); 1238 for (Entry<String, CoverageStatus> entry : data.entrySet()) { 1239 final String key = entry.getKey(); 1240 if (!key.startsWith("language")) { 1241 continue; 1242 } 1243 final CoverageStatus value = entry.getValue(); 1244 if (value.inId) { 1245 continue; 1246 } 1247 String[] parts = key.split("\t"); 1248 PopulationData population = SDI.getBaseLanguagePopulationData(parts[1]); 1249 if (population == null) { 1250 System.out.println(key + "\t" + value.displayName + "\t" + value + "\t-1\t-1"); 1251 } else { 1252 System.out.println( 1253 key 1254 + "\t" 1255 + value.displayName 1256 + "\t" 1257 + value 1258 + "\t" 1259 + population.getPopulation() 1260 + "\t" 1261 + population.getLiteratePopulation()); 1262 } 1263 } 1264 } 1265 1266 Set<String> ids = new TreeSet<>(); 1267 Set<String> missing = new TreeSet<>(); 1268 for (Entry<String, CoverageStatus> entry : data.entrySet()) { 1269 final String key = entry.getKey(); 1270 if (!key.startsWith("language")) { 1271 continue; 1272 } 1273 final CoverageStatus value = entry.getValue(); 1274 if (value.inId) { 1275 String[] parts = key.split("\t"); 1276 ids.add(parts[1]); 1277 if (!value.inRoot) { 1278 missing.add(parts[1]); 1279 } 1280 } 1281 } 1282 if (!assertEquals( 1283 "Language subtags that are in a CLDR locale's ID are in root (" 1284 + missing.size() 1285 + ")", 1286 "", 1287 Joiner.on(' ').join(missing))) { 1288 warnln( 1289 "Full set for resetting $language in attributeValueValidity.xml (" 1290 + ids.size() 1291 + "):" 1292 + breakLines(ids, "\n ")); 1293 } 1294 } 1295 breakLines(Set<String> ids, String indent)1296 private String breakLines(Set<String> ids, String indent) { 1297 StringBuilder result = new StringBuilder(); 1298 int lastFirstChar = 0; 1299 for (String id : ids) { 1300 int firstChar = id.codePointAt(0); 1301 result.append(firstChar == lastFirstChar ? " " : indent); 1302 result.append(id); 1303 lastFirstChar = firstChar; 1304 } 1305 return result.toString(); 1306 } 1307 addBestLevel(Map<String, Level> codeToBestLevel, String code, Level level)1308 private void addBestLevel(Map<String, Level> codeToBestLevel, String code, Level level) { 1309 if (level != Level.UNDETERMINED) { 1310 int debug = 0; 1311 } 1312 Level old = codeToBestLevel.get(code); 1313 if (old == null) { 1314 codeToBestLevel.put(code, level); 1315 } else if (level.compareTo(old) > 0) { 1316 codeToBestLevel.put(code, level); 1317 } else if (level != old) { 1318 int debug = 0; 1319 } 1320 } 1321 TestEnglishCoverage()1322 public void TestEnglishCoverage() { 1323 Output<String> pathWhereFound = new Output<>(); 1324 Output<String> localeWhereFound = new Output<>(); 1325 Set<Row.R5<String, String, Boolean, Boolean, Level>> inherited = new TreeSet<>(); 1326 for (String path : ENGLISH) { 1327 String value = ENGLISH.getStringValueWithBailey(path, pathWhereFound, localeWhereFound); 1328 final boolean samePath = path.equals(pathWhereFound.value); 1329 final boolean sameLocale = "en".equals(localeWhereFound.value); 1330 if (!samePath) { 1331 Level level = SDI.getCoverageLevel(path, "en"); 1332 if (level.compareTo(Level.MODERN) <= 0) { 1333 inherited.add(Row.of(path, value, samePath, sameLocale, level)); 1334 } 1335 } 1336 } 1337 if (!assertEquals("English has sideways inheritance:", 0, inherited.size())) { 1338 System.out.println("Check the following, then use in modify_config.txt\n"); 1339 String pattern = "locale=en ; action=add ; new_path=%s ; new_value=%s"; 1340 for (Row.R5<String, String, Boolean, Boolean, Level> row : inherited) { 1341 System.out.println(String.format(pattern, row.get0(), row.get1())); 1342 if (DEBUG) { 1343 System.out.println( 1344 String.format( 1345 "%s\t%s\t%s\t%s\t%s", 1346 row.get0(), row.get1(), row.get2(), row.get3(), row.get4())); 1347 } 1348 } 1349 } 1350 } 1351 } 1352