xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCoverageLevel.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.ImmutableListMultimap;
5 import com.google.common.collect.ImmutableMap;
6 import com.google.common.collect.ImmutableSet;
7 import com.google.common.collect.LinkedHashMultimap;
8 import com.google.common.collect.Multimap;
9 import com.google.common.collect.Sets;
10 import com.google.common.collect.TreeMultimap;
11 import com.ibm.icu.impl.Relation;
12 import com.ibm.icu.impl.Row;
13 import com.ibm.icu.impl.Row.R2;
14 import com.ibm.icu.impl.Row.R4;
15 import com.ibm.icu.text.CompactDecimalFormat;
16 import com.ibm.icu.text.CompactDecimalFormat.CompactStyle;
17 import com.ibm.icu.text.Transform;
18 import com.ibm.icu.util.Calendar;
19 import com.ibm.icu.util.Output;
20 import com.ibm.icu.util.ULocale;
21 import java.util.Arrays;
22 import java.util.Collection;
23 import java.util.Collections;
24 import java.util.Date;
25 import java.util.EnumSet;
26 import java.util.HashMap;
27 import java.util.HashSet;
28 import java.util.LinkedHashSet;
29 import java.util.List;
30 import java.util.Map;
31 import java.util.Map.Entry;
32 import java.util.Set;
33 import java.util.TreeMap;
34 import java.util.TreeSet;
35 import java.util.regex.Pattern;
36 import org.unicode.cldr.draft.ScriptMetadata;
37 import org.unicode.cldr.test.CoverageLevel2;
38 import org.unicode.cldr.tool.LikelySubtags;
39 import org.unicode.cldr.util.CLDRConfig;
40 import org.unicode.cldr.util.CLDRFile;
41 import org.unicode.cldr.util.CLDRLocale;
42 import org.unicode.cldr.util.CLDRPaths;
43 import org.unicode.cldr.util.ChainedMap;
44 import org.unicode.cldr.util.ChainedMap.M4;
45 import org.unicode.cldr.util.Counter2;
46 import org.unicode.cldr.util.DtdData;
47 import org.unicode.cldr.util.DtdData.Element;
48 import org.unicode.cldr.util.DtdType;
49 import org.unicode.cldr.util.GrammarInfo;
50 import org.unicode.cldr.util.LanguageTagParser;
51 import org.unicode.cldr.util.Level;
52 import org.unicode.cldr.util.LocaleNames;
53 import org.unicode.cldr.util.LogicalGrouping;
54 import org.unicode.cldr.util.LogicalGrouping.PathType;
55 import org.unicode.cldr.util.Organization;
56 import org.unicode.cldr.util.PathHeader;
57 import org.unicode.cldr.util.PathHeader.Factory;
58 import org.unicode.cldr.util.PathStarrer;
59 import org.unicode.cldr.util.PatternCache;
60 import org.unicode.cldr.util.RegexLookup;
61 import org.unicode.cldr.util.RegexLookup.Finder;
62 import org.unicode.cldr.util.StandardCodes;
63 import org.unicode.cldr.util.SupplementalDataInfo;
64 import org.unicode.cldr.util.SupplementalDataInfo.CoverageVariableInfo;
65 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
66 import org.unicode.cldr.util.SupplementalDataInfo.OfficialStatus;
67 import org.unicode.cldr.util.SupplementalDataInfo.PopulationData;
68 import org.unicode.cldr.util.VoteResolver;
69 import org.unicode.cldr.util.XPathParts;
70 
71 public class TestCoverageLevel extends TestFmwkPlus {
72 
73     private static final boolean SHOW_LSR_DATA = false;
74 
75     private static CLDRConfig testInfo = CLDRConfig.getInstance();
76     private static final StandardCodes STANDARD_CODES = StandardCodes.make();
77     private static final CLDRFile ENGLISH = testInfo.getEnglish();
78     private static final SupplementalDataInfo SDI = testInfo.getSupplementalDataInfo();
79     private static final String TC_VOTES =
80             Integer.toString(VoteResolver.Level.tc.getVotes(Organization.apple));
81 
main(String[] args)82     public static void main(String[] args) {
83         new TestCoverageLevel().run(args);
84     }
85 
testSpecificPaths()86     public void testSpecificPaths() {
87         String[][] rows = {
88             {
89                 "//ldml/characters/parseLenients[@scope=\"number\"][@level=\"lenient\"]/parseLenient[@sample=\",\"]",
90                 "moderate",
91                 TC_VOTES
92             }
93         };
94         doSpecificPathTest("fr", rows);
95     }
96 
testSpecificPathsPersCal()97     public void testSpecificPathsPersCal() {
98         String[][] rows = {
99             {
100                 "//ldml/dates/calendars/calendar[@type=\"persian\"]/eras/eraAbbr/era[@type=\"0\"]",
101                 "moderate",
102                 "4"
103             },
104             {
105                 "//ldml/dates/calendars/calendar[@type=\"persian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]",
106                 "moderate",
107                 "4"
108             }
109         };
110         doSpecificPathTest("ckb_IR", rows);
111     }
112 
testSpecificPathsDeFormatLength()113     public void testSpecificPathsDeFormatLength() {
114         String[][] rows = {
115             /* For German (de) these should be high-bar (20) per https://unicode-org.atlassian.net/browse/CLDR-14988 */
116             {
117                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]",
118                 "modern",
119                 TC_VOTES
120             },
121             {
122                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]",
123                 "modern",
124                 TC_VOTES
125             },
126             {
127                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]",
128                 "modern",
129                 TC_VOTES
130             },
131             {
132                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]",
133                 "modern",
134                 TC_VOTES
135             },
136             {
137                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]",
138                 "modern",
139                 TC_VOTES
140             },
141             {
142                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]",
143                 "modern",
144                 TC_VOTES
145             },
146             {
147                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]",
148                 "modern",
149                 TC_VOTES
150             },
151             {
152                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]",
153                 "modern",
154                 TC_VOTES
155             },
156             {
157                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]",
158                 "modern",
159                 TC_VOTES
160             },
161             {
162                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]",
163                 "modern",
164                 TC_VOTES
165             },
166             {
167                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]",
168                 "modern",
169                 TC_VOTES
170             },
171             {
172                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]",
173                 "modern",
174                 TC_VOTES
175             },
176             /* not high-bar (20): wrong number of zeroes, or count many*/
177             {
178                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100\"][@count=\"other\"]",
179                 "comprehensive",
180                 "8"
181             },
182             {
183                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000000\"][@count=\"other\"]",
184                 "modern",
185                 "8"
186             },
187             {
188                 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength[@type=\"short\"]/currencyFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"many\"]",
189                 "modern",
190                 "8"
191             },
192         };
193         doSpecificPathTest("de", rows);
194     }
195 
doSpecificPathTest(String localeStr, String[][] rows)196     private void doSpecificPathTest(String localeStr, String[][] rows) {
197         Factory phf = PathHeader.getFactory(ENGLISH);
198         CoverageLevel2 coverageLevel = CoverageLevel2.getInstance(SDI, localeStr);
199         CLDRLocale loc = CLDRLocale.getInstance(localeStr);
200         for (String[] row : rows) {
201             String path = row[0];
202             Level expectedLevel = Level.fromString(row[1]);
203             Level level = coverageLevel.getLevel(path);
204             assertEquals("Level for " + path, expectedLevel, level);
205 
206             int expectedRequiredVotes = Integer.parseInt(row[2]);
207             int votes = SDI.getRequiredVotes(loc, phf.fromPath(path));
208             assertEquals("Votes for " + path, expectedRequiredVotes, votes);
209         }
210     }
211 
oldTestInvariantPaths()212     public void oldTestInvariantPaths() {
213         org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory();
214         PathStarrer pathStarrer = new PathStarrer().setSubstitutionPattern("*");
215         SupplementalDataInfo sdi =
216                 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
217 
218         Set<String> allPaths = new HashSet<>();
219         M4<String, String, Level, Boolean> starredToLocalesToLevels =
220                 ChainedMap.of(
221                         new TreeMap<String, Object>(),
222                         new TreeMap<String, Object>(),
223                         new TreeMap<Level, Object>(),
224                         Boolean.class);
225 
226         for (String locale : factory.getAvailableLanguages()) {
227             logln(locale);
228             CLDRFile cldrFileToCheck = factory.make(locale, true);
229             for (String path : cldrFileToCheck.fullIterable()) {
230                 allPaths.add(path);
231                 String starred = pathStarrer.set(path);
232                 Level level = sdi.getCoverageLevel(path, locale);
233                 starredToLocalesToLevels.put(starred, locale, level, true);
234             }
235         }
236 
237         Set<Level> levelsFound = EnumSet.noneOf(Level.class);
238         Set<String> localesWithUniqueLevels = new TreeSet<>();
239         for (Entry<String, Map<String, Map<Level, Boolean>>> entry : starredToLocalesToLevels) {
240             String starred = entry.getKey();
241             Map<String, Map<Level, Boolean>> localesToLevels = entry.getValue();
242             int maxLevelCount = 0;
243             double localeCount = 0;
244             levelsFound.clear();
245             localesWithUniqueLevels.clear();
246 
247             for (Entry<String, Map<Level, Boolean>> entry2 : localesToLevels.entrySet()) {
248                 String locale = entry2.getKey();
249                 Map<Level, Boolean> levels = entry2.getValue();
250                 levelsFound.addAll(levels.keySet());
251                 if (levels.size() > maxLevelCount) {
252                     maxLevelCount = levels.size();
253                 }
254                 if (levels.size() == 1) {
255                     localesWithUniqueLevels.add(locale);
256                 }
257                 localeCount++;
258             }
259             System.out.println(
260                     maxLevelCount
261                             + "\t"
262                             + localesWithUniqueLevels.size() / localeCount
263                             + "\t"
264                             + starred
265                             + "\t"
266                             + Joiner.on(", ").join(levelsFound)
267                             + "\t"
268                             + (maxLevelCount == 1
269                                     ? "all"
270                                     : localesWithUniqueLevels.size() == 0
271                                             ? "none"
272                                             : Joiner.on(", ").join(localesWithUniqueLevels)));
273         }
274     }
275 
276     enum LanguageStatus {
277         Lit100M("P1"),
278         Lit10MandOfficial("P2"),
279         Lit1MandOneThird("P3");
280         final String name;
281 
LanguageStatus(String name)282         LanguageStatus(String name) {
283             this.name = name;
284         }
285     }
286 
287     static Relation<String, LanguageStatus> languageStatus =
288             Relation.of(new HashMap<String, Set<LanguageStatus>>(), TreeSet.class);
289     static Counter2<String> languageLiteratePopulation = new Counter2<>();
290     static Map<String, Date> currencyToLast = new HashMap<>();
291     static Set<String> officialSomewhere = new HashSet<>();
292 
293     static {
294         Counter2<String> territoryLiteratePopulation = new Counter2<>();
295         LanguageTagParser parser = new LanguageTagParser();
296         // cf
297         // http://cldr.unicode.org/development/development-process/design-proposals/languages-to-show-for-translation
298         for (String language : SDI.getLanguagesForTerritoriesPopulationData()) {
299             String base = parser.set(language).getLanguage();
300             boolean isOfficial = false;
301             double languageLiterate = 0;
302             for (String territory : SDI.getTerritoriesForPopulationData(language)) {
303                 PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory);
304                 OfficialStatus officialStatus = pop.getOfficialStatus();
305                 if (officialStatus.compareTo(OfficialStatus.de_facto_official) >= 0) {
306                     isOfficial = true;
307                     languageStatus.put(base + "_" + territory, LanguageStatus.Lit10MandOfficial);
308                     officialSomewhere.add(base);
309                 }
310                 double litPop = pop.getLiteratePopulation();
311                 languageLiterate += litPop;
territoryLiteratePopulation.add(territory, litPop)312                 territoryLiteratePopulation.add(territory, litPop);
313                 languageLiteratePopulation.add(base + "_" + territory, litPop);
314             }
languageLiteratePopulation.add(base, languageLiterate)315             languageLiteratePopulation.add(base, languageLiterate);
316             if (languageLiterate > 100000000) {
languageStatus.put(base, LanguageStatus.Lit100M)317                 languageStatus.put(base, LanguageStatus.Lit100M);
318             }
319             if (languageLiterate > 10000000 && isOfficial) {
languageStatus.put(base, LanguageStatus.Lit10MandOfficial)320                 languageStatus.put(base, LanguageStatus.Lit10MandOfficial);
321             }
322         }
323         for (String language : SDI.getLanguagesForTerritoriesPopulationData()) {
324             if (languageLiteratePopulation.getCount(language) < 1000000) {
325                 continue;
326             }
327             String base = parser.set(language).getLanguage();
328             for (String territory : SDI.getTerritoriesForPopulationData(language)) {
329                 PopulationData pop = SDI.getLanguageAndTerritoryPopulationData(language, territory);
330                 double litPop = pop.getLiteratePopulation();
331                 double total = territoryLiteratePopulation.getCount(territory);
332                 if (litPop > total / 3) {
languageStatus.put(base, LanguageStatus.Lit1MandOneThird)333                     languageStatus.put(base, LanguageStatus.Lit1MandOneThird);
334                 }
335             }
336         }
337         for (String territory : STANDARD_CODES.getAvailableCodes("territory")) {
338             Set<CurrencyDateInfo> cdateInfo = SDI.getCurrencyDateInfo(territory);
339             if (cdateInfo == null) {
340                 continue;
341             }
342             for (CurrencyDateInfo dateInfo : cdateInfo) {
343                 String currency = dateInfo.getCurrency();
344                 Date last = dateInfo.getEnd();
345                 Date old = currencyToLast.get(currency);
346                 if (old == null || old.compareTo(last) < 0) {
currencyToLast.put(currency, last)347                     currencyToLast.put(currency, last);
348                 }
349             }
350         }
351     }
352 
353     static CompactDecimalFormat cdf =
354             CompactDecimalFormat.getInstance(ULocale.ENGLISH, CompactStyle.SHORT);
355 
isBigLanguage(String lang)356     static String isBigLanguage(String lang) {
357         Set<LanguageStatus> status = languageStatus.get(lang);
358         Double size = languageLiteratePopulation.getCount(lang);
359         String sizeString = size == null ? "?" : cdf.format(size);
360         String off = officialSomewhere.contains(lang) ? "o" : "";
361         if (status == null || status.isEmpty()) {
362             return "P4-" + sizeString + off;
363         }
364         return status.iterator().next().name + "-" + sizeString + off;
365     }
366 
367     static final Date NOW = new Date();
368 
369     private static final boolean DEBUG = false;
370 
371     static class TypeName implements Transform<String, String> {
372         private final int field;
373         private final Map<String, R2<List<String>, String>> dep;
374 
TypeName(int field)375         public TypeName(int field) {
376             this.field = field;
377             switch (field) {
378                 case CLDRFile.LANGUAGE_NAME:
379                     dep = SDI.getLocaleAliasInfo().get("language");
380                     break;
381                 case CLDRFile.TERRITORY_NAME:
382                     dep = SDI.getLocaleAliasInfo().get("territory");
383                     break;
384                 case CLDRFile.SCRIPT_NAME:
385                     dep = SDI.getLocaleAliasInfo().get("script");
386                     break;
387                 default:
388                     dep = null;
389                     break;
390             }
391         }
392 
393         @Override
transform(String source)394         public String transform(String source) {
395             String result = ENGLISH.getName(field, source);
396             String extra = "";
397             if (field == CLDRFile.LANGUAGE_NAME) {
398                 String lang = isBigLanguage(source);
399                 extra = lang == null ? "X" : lang;
400             } else if (field == CLDRFile.CURRENCY_NAME) {
401                 Date last = currencyToLast.get(source);
402                 extra = last == null ? "?" : last.compareTo(NOW) < 0 ? "old" : "";
403             }
404             R2<List<String>, String> depValue = dep == null ? null : dep.get(source);
405             if (depValue != null) {
406                 extra += extra.isEmpty() ? "" : "-";
407                 extra += depValue.get1();
408             }
409             return result + (extra.isEmpty() ? "" : "\t" + extra);
410         }
411     }
412 
413     RegexLookup<Level> exceptions =
414             RegexLookup.of(
415                             null,
416                             new Transform<String, Level>() {
417                                 @Override
418                                 public Level transform(String source) {
419                                     return Level.fromLevel(Integer.parseInt(source));
420                                 }
421                             },
422                             null)
423                     .loadFromFile(TestCoverageLevel.class, "TestCoverageLevel.txt");
424 
425     public void TestExceptions() {
426         for (Map.Entry<Finder, Level> x : exceptions) {
427             logln(x.getKey().toString() + " => " + x.getValue());
428         }
429     }
430 
431     public void TestNarrowCurrencies() {
432         String path = "//ldml/numbers/currencies/currency[@type=\"USD\"]/symbol[@alt=\"narrow\"]";
433         String value = ENGLISH.getStringValue(path);
434         assertEquals("Narrow $", "$", value);
435         SupplementalDataInfo sdi =
436                 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
437         Level level = sdi.getCoverageLevel(path, "en");
438         assertEquals("Narrow $", Level.MODERATE, level);
439     }
440 
441     public void TestA() {
442         String path = "//ldml/characterLabels/characterLabel[@type=\"other\"]";
443         SupplementalDataInfo sdi =
444                 SupplementalDataInfo.getInstance(CLDRPaths.DEFAULT_SUPPLEMENTAL_DIRECTORY);
445         Level level = sdi.getCoverageLevel(path, "en");
446         assertEquals("Quick Check for any attribute", Level.MODERN, level);
447     }
448 
449     public void TestCoverageCompleteness() {
450         /**
451          * Check that English paths are, except for known cases, at least modern coverage. We filter
452          * out the things we know about and have determined are OK to be in comprehensive. If we add
453          * a path that doesn't get its coverage set, this test should complain about it.
454          */
455         final ImmutableSet<String> inactiveMetazones =
456                 ImmutableSet.of(
457                         "Greenland", // TODO: New metazone added for tz2023d update,
458                         // In CLDR 45, we don't want to include this one in modern coverage because
459                         // we don't open ST for translating display names for this metazone.
460                         // After 45, we will include "Greenland" in modern coverage.
461                         "Bering",
462                         "Dominican",
463                         "Shevchenko",
464                         "Alaska_Hawaii",
465                         "Yerevan",
466                         "Africa_FarWestern",
467                         "British",
468                         "Sverdlovsk",
469                         "Karachi",
470                         "Malaya",
471                         "Oral",
472                         "Frunze",
473                         "Dutch_Guiana",
474                         "Irish",
475                         "Uralsk",
476                         "Tashkent",
477                         "Kwajalein",
478                         "Ashkhabad",
479                         "Kizilorda",
480                         "Kuybyshev",
481                         "Baku",
482                         "Dushanbe",
483                         "Goose_Bay",
484                         "Liberia",
485                         "Samarkand",
486                         "Tbilisi",
487                         "Borneo",
488                         "Greenland_Central",
489                         "Dacca",
490                         "Aktyubinsk",
491                         "Turkey",
492                         "Urumqi",
493                         "Acre",
494                         "Almaty",
495                         "Anadyr",
496                         "Aqtau",
497                         "Aqtobe",
498                         "Kamchatka",
499                         "Macau",
500                         "Qyzylorda",
501                         "Samara",
502                         "Casey",
503                         "Guam",
504                         "Lanka",
505                         "North_Mariana");
506 
507         final Pattern calendar100 =
508                 PatternCache.get("(coptic|ethiopic-amete-alem|islamic-(rgsa|tbla|umalqura))");
509 
510         // Warning: shorter strings must come AFTER longer ones. Can process with MinimizeRegex to
511         // reorder
512         final Pattern language100 =
513                 PatternCache.get(
514                         "(" // start
515                                 + "nds_NL|fa_AF|ro_MD|sr_ME|sw_CD"
516                                 // Length 4
517                                 + "|root"
518                                 // Length 3
519                                 + "|ace|ach|ada|ady|aeb|afh|agq|ain|akk|akz|ale|aln|alt|ang|ann|anp|apc|arc|arn|aro|arp|arq|ars|arw|ary|arz|asa|ase|atj|avk|awa"
520                                 + "|bal|ban|bar|bax|bbc|bbj|bej|bem|bew|bez|bfd|bfq|bgc|bgn|bho|bik|bin|bjn|bkm|bla|blo|blt|bpy|bqi|bra|brh|bss|bua|bug|bum|byn|byv"
521                                 + "|cad|car|cay|cch|ccp|cgg|chb|chg|chk|chm|chn|cho|chp|chy|cic|ckb|clc|cop|cps|crg|crh|crj|crk|crl|crm|crr|crs|csb|csw|cwd"
522                                 + "|dak|dar|dav|del|den|dgr|din|dje|doi|dtp|dua|dum|dyo|dyu|dzg"
523                                 + "|ebu|efi|egl|egy|eka|elx|enm|esu|ext|fan|fat|fit|fon|frc|frm|fro|frp|frr|frs|fur"
524                                 + "|gaa|gag|gan|gay|gba|gbz|gez|gil|glk|gmh|goh|gom|gon|gor|got|grb|grc|gsw|guc|gur|guz|gwi"
525                                 + "|hai|hak|haw|hax|hdn|hif|hil|hit|hnj|hsn|hup|hur|iba|ilo|inh|izh|jam|jbo|jgo|jmc|jpr|jrb|jut"
526                                 + "|kaa|kab|kac|kaj|kam|kaw|kbd|kbl|kcg|kde|ken|kfo|kgp|kha|kho|khq|khw|kiu|kln|kmb|koi|kos|kpe|krc|kri|krj|krl|kru|ksb|ksf|ksh|kum|kut|kwk|kxv"
527                                 + "|lad|lag|lah|lam|lez|lfn|lij|lil|liv|lkt|lmo|lol|lou|loz|lrc|ltg|lua|lui|lun|luo|lus|luy|lzh|lzz"
528                                 + "|mad|maf|mag|mai|mak|man|mas|mde|mdf|mdr|men|mer|mfe|mga|mgh|mgo|mic|min|mnc|mni|moe|moh|mos|mrj|mua|mus|mwl|mwr|mwv|mye|myv|mzn"
529                                 + "|nan|nap|naq|nds|new|nia|niu|njo|nmg|nog|non|nov|nqo|nso|nus|nwc|nym|nyn|nyo|nzi|oka|osa|ota"
530                                 + "|pag|pal|pam|pap|pau|pcd|pcm|pdc|pdt|peo|pfl|phn|pms|pnt|pon|pqm|prg|pro|quc|qug|raj|rap|rar|rgn|rif|rof|rom|rtm|rue|rug|rup|rwk"
531                                 + "|sad|sam|saq|sas|sat|saz|sba|sbp|sdc|sdh|see|seh|sei|sel|ses|sga|sgs|shi|shn|shu|sid|skr|slh|sli|sly|sma|smj|smn|sms|snk|sog|srn|srr|stq|str|suk|sus|sux|swb|syc|syr|szl"
532                                 + "|tce|tcy|tem|teo|ter|tet|tgx|tht|tig|tiv|tkl|tkr|tlh|tli|tly|tmh|tog|tok|tpi|tru|trv|trw|tsd|tsi|ttm|ttt|tum|tvl|tzm"
533                                 + "|udm|uga|umb|vai|vec|vep|vls|vmf|vmw|vot|vro|vun|wae|wal|war|was|wbp|wuu|xal|xmf|xnr|xog|yao|yap|yrl|zap|zbl|zea|zen|zgh|zun|zza"
534                                 + "|ike|ojg|ssy|pis|twq"
535                                 // Length 2
536                                 + "|aa|ab|ae|ak|an|av|ay|ba|bi|bm|bo|ce|ch|cr|cu|cv|dv|dz|ee|eo|fj|gn|gv|ho|hz|ie|ii|ik|io|iu|kg|ki|kj|kl|kv|kw|lg|li|ln|lu"
537                                 + "|mg|mh|na|nb|nd|ng|no|nr|nv|oc|oj|om|os|pi|rn|rw|sc|se|sg|sh|sn|ss|tl|tn|ts|tw|ty|ve|vo|wa|yi|za"
538                                 // end
539                                 + ")");
540 
541         /**
542          * Recommended scripts that are allowed for comprehensive coverage. Not-recommended scripts
543          * (according to ScriptMetadata) are filtered out automatically.
544          */
545         final Pattern script100 = PatternCache.get("(Zinh)");
546 
547         final Pattern keys100 =
548                 PatternCache.get(
549                         "(col(Alternate|Backwards|CaseFirst|CaseLevel|HiraganaQuaternary|"
550                                 + "Normalization|Numeric|Reorder|Strength)|kv|sd|mu|timezone|va|variableTop|x|d0|h0|i0|k0|m0|s0)");
551 
552         final Pattern numberingSystem100 =
553                 PatternCache.get(
554                         "("
555                                 + "finance|native|traditional|adlm|ahom|bali|bhks|brah|cakm|cham|cyrl|diak|"
556                                 + "gong|gonm|hanidays|hmng|hmnp|java|jpanyear|kali|kawi|lana(tham)?|lepc|limb|"
557                                 + "math(bold|dbl|mono|san[bs])|modi|mong|mroo|mtei|mymr(shan|tlng)|"
558                                 + "nagm|newa|nkoo|olck|osma|rohg|saur|segment|shrd|sin[dh]|sora|sund|"
559                                 + "takr|talu|tirh|tnsa|vaii|wara|wcho)");
560 
561         final Pattern collation100 =
562                 PatternCache.get(
563                         "("
564                                 + "big5han|compat|dictionary|emoji|eor|gb2312han|phonebook|phonetic|pinyin|searchjl|stroke|traditional|unihan|zhuyin)");
565 
566         SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo();
567         CLDRFile english = testInfo.getEnglish();
568 
569         // Calculate date of the upcoming CLDR release, minus 5 years (deprecation policy)
570         final int versionNumber = Integer.valueOf((CLDRFile.GEN_VERSION).split("\\.")[0]);
571         Calendar cal = Calendar.getInstance();
572         cal.set(versionNumber / 2 + versionNumber % 2 + 2001, 8 - (versionNumber % 2) * 6, 15);
573         Date cldrReleaseMinus5Years = cal.getTime();
574         Set<String> modernCurrencies =
575                 SDI.getCurrentCurrencies(SDI.getCurrencyTerritories(), cldrReleaseMinus5Years, NOW);
576 
577         Set<String> needsNumberSystem = new HashSet<>();
578         DtdData dtdData = DtdData.getInstance(DtdType.ldml);
579         Element numbersElement = dtdData.getElementFromName().get("numbers");
580         for (Element childOfNumbers : numbersElement.getChildren().keySet()) {
581             if (childOfNumbers.containsAttribute("numberSystem")) {
582                 needsNumberSystem.add(childOfNumbers.name);
583             }
584         }
585 
586         for (String path : english.fullIterable()) {
587             logln("Testing path => " + path);
588             XPathParts xpp = XPathParts.getFrozenInstance(path);
589             if (path.endsWith("/alias")
590                     || path.matches(
591                             "//ldml/(identity|contextTransforms|layout|localeDisplayNames/transformNames)/.*")) {
592                 continue;
593             }
594             if (sdi.isDeprecated(DtdType.ldml, path)) {
595                 continue;
596             }
597             Level lvl = sdi.getCoverageLevel(path, "en");
598             if (lvl == Level.UNDETERMINED) {
599                 errln("Undetermined coverage value for path => " + path);
600                 continue;
601             }
602             if (lvl.compareTo(Level.MODERN) <= 0) {
603                 logln("Level OK [" + lvl.toString() + "] for path => " + path);
604                 continue;
605             }
606 
607             if (path.startsWith("//ldml/numbers")) {
608                 // Paths in numbering systems outside "latn" are specifically excluded.
609                 String numberingSystem = xpp.findFirstAttributeValue("numberSystem");
610                 if (numberingSystem != null && !numberingSystem.equals("latn")) {
611                     continue;
612                 }
613                 if (xpp.containsElement("currencySpacing") || xpp.containsElement("list")) {
614                     continue;
615                 }
616                 if (xpp.containsElement("currency")) {
617                     String currencyType = xpp.findAttributeValue("currency", "type");
618                     if (!modernCurrencies.contains(currencyType)) {
619                         continue; // old currency or not tender, so we don't care
620                     }
621                 }
622                 // Currently not collecting timeSeparator data in SurveyTool
623                 if (xpp.containsElement("timeSeparator")) {
624                     continue;
625                 }
626                 // Other paths in numbers without a numbering system are deprecated.
627                 //                if (numberingSystem == null) {
628                 //                    continue;
629                 //                }
630                 if (needsNumberSystem.contains(xpp.getElement(2))) {
631                     continue;
632                 }
633             } else if (xpp.containsElement("zone")) {
634                 String zoneType = xpp.findAttributeValue("zone", "type");
635                 if ((zoneType.startsWith("Etc/GMT") || zoneType.equals("Etc/UTC"))
636                         && path.endsWith("exemplarCity")) {
637                     continue;
638                 }
639                 // We don't survey for short timezone names or at least some alts
640                 if (path.contains("/short/") || path.contains("[@alt=\"formal\"]")) {
641                     continue;
642                 }
643             } else if (xpp.containsElement("metazone")) {
644                 // We don't survey for short metazone names
645                 if (path.contains("/short/")) {
646                     continue;
647                 }
648                 String mzName = xpp.findAttributeValue("metazone", "type");
649                 // Skip inactive metazones.
650                 if (inactiveMetazones.contains(mzName)) {
651                     continue;
652                 }
653                 // Skip paths for daylight or generic mz strings where
654                 // the mz doesn't use DST.
655                 if ((path.endsWith("daylight") || path.endsWith("generic"))
656                         && !LogicalGrouping.metazonesDSTSet.contains(mzName)) {
657                     continue;
658                 }
659             } else if (path.startsWith("//ldml/dates/fields")) {
660                 if ("variant".equals(xpp.findAttributeValue("displayName", "alt"))) {
661                     continue;
662                 }
663                 // relative day/week/month, etc. short or narrow
664                 if (xpp.getElement(-1).equals("relative")) {
665                     String fieldType = xpp.findAttributeValue("field", "type");
666                     if (fieldType.matches(".*-(short|narrow)|quarter")) {
667                         continue;
668                     }
669                 }
670             } else if (xpp.containsElement("language")) {
671                 // Comprehensive coverage is OK for some languages.
672                 String languageType = xpp.findAttributeValue("language", "type");
673                 if (language100.matcher(languageType).matches()) {
674                     continue;
675                 }
676             } else if (xpp.containsElement("script")) {
677                 // Skip user defined script codes and alt=short
678                 String scriptType = xpp.findAttributeValue("script", "type");
679                 if (scriptType.startsWith("Q")
680                         || "short".equals(xpp.findAttributeValue("script", "alt"))) {
681                     continue;
682                 }
683                 ScriptMetadata.Info scriptInfo = ScriptMetadata.getInfo(scriptType);
684                 if (scriptInfo == null
685                         || scriptInfo.idUsage != ScriptMetadata.IdUsage.RECOMMENDED) {
686                     continue;
687                 }
688                 if (script100.matcher(scriptType).matches()) {
689                     continue;
690                 }
691             } else if (xpp.containsElement("territory")) {
692                 String territoryType = xpp.findAttributeValue("territory", "type");
693                 if (territoryType.equals("CQ")) { // Exceptionally reserved by ISO-3166
694                     continue;
695                 }
696             } else if (xpp.containsElement("key")) {
697                 // Comprehensive coverage is OK for some key/types.
698                 String keyType = xpp.findAttributeValue("key", "type");
699                 if (keys100.matcher(keyType).matches()) {
700                     continue;
701                 }
702             } else if (xpp.containsElement("type")) {
703                 if ("short".equals(xpp.findAttributeValue("type", "alt"))) {
704                     continue;
705                 }
706                 // Comprehensive coverage is OK for some key/types.
707                 String keyType = xpp.findAttributeValue("type", "key");
708                 if (keys100.matcher(keyType).matches()) {
709                     continue;
710                 }
711                 if (keyType.equals("numbers")) {
712                     String ns = xpp.findAttributeValue("type", "type");
713                     if (numberingSystem100.matcher(ns).matches()) {
714                         continue;
715                     }
716                 }
717                 if (keyType.equals("collation")) {
718                     String ct = xpp.findAttributeValue("type", "type");
719                     if (collation100.matcher(ct).matches()) {
720                         continue;
721                     }
722                 }
723                 if (keyType.equals("calendar")) {
724                     String ct = xpp.findAttributeValue("type", "type");
725                     if (calendar100.matcher(ct).matches()) {
726                         continue;
727                     }
728                 }
729             } else if (xpp.containsElement("variant")) {
730                 // All variant names are comprehensive coverage
731                 continue;
732             } else if (path.startsWith("//ldml/dates/calendars")) {
733                 String calType = xpp.findAttributeValue("calendar", "type");
734                 if (!calType.matches("(gregorian|generic)")) {
735                     continue;
736                 }
737                 // So far we are generating datetimeSkeleton mechanically, no coverage
738                 if (xpp.containsElement("datetimeSkeleton")) {
739                     continue;
740                 }
741                 // The alt="ascii" time patterns are hopefully short-lived. We do not survey
742                 // for them, they can be generated mechanically from the non-alt patterns.
743                 // CLDR-16606
744                 if (path.contains("[@alt=\"ascii\"]")) {
745                     continue;
746                 }
747                 String element = xpp.getElement(-1);
748                 // Skip things that shouldn't normally exist in the generic calendar
749                 // days, dayPeriods, quarters, and months
750                 if (calType.equals("generic")) {
751                     if (element.matches("(day(Period)?|month|quarter|era|appendItem)")) {
752                         continue;
753                     }
754                     if (xpp.containsElement("intervalFormatItem")) {
755                         String intervalFormatID =
756                                 xpp.findAttributeValue("intervalFormatItem", "id");
757                         // "Time" related, so shouldn't be in generic calendar.
758                         if (intervalFormatID.matches("(h|H).*")) {
759                             continue;
760                         }
761                     }
762                     if (xpp.containsElement("dateFormatItem")) {
763                         String dateFormatID = xpp.findAttributeValue("dateFormatItem", "id");
764                         // "Time" related, so shouldn't be in generic calendar.
765                         if (dateFormatID.matches("E?(h|H|m).*")) {
766                             continue;
767                         }
768                     }
769                     if (xpp.containsElement("timeFormat")) {
770                         continue;
771                     }
772                 } else { // Gregorian calendar
773                     if (xpp.containsElement("eraNarrow")) {
774                         continue;
775                     }
776                     if (element.equals("appendItem")) {
777                         String request = xpp.findAttributeValue("appendItem", "request");
778                         if (!request.equals("Timezone")) {
779                             continue;
780                         }
781                     } else if (element.equals("dayPeriod")) {
782                         if ("variant".equals(xpp.findAttributeValue("dayPeriod", "alt"))) {
783                             continue;
784                         }
785                     } else if (element.equals("dateFormatItem")) {
786                         // ldml/dates/calendars/calendar[@type='gregorian']/dateTimeFormats/availableFormats/dateFormatItem[@id='%dateFormatItems']
787                         assertEquals(path, Level.BASIC, lvl);
788                         continue;
789                     }
790                 }
791             } else if (path.startsWith("//ldml/units")) {
792                 // Skip paths for narrow unit fields.
793                 if ("narrow".equals(xpp.findAttributeValue("unitLength", "type"))
794                         || path.endsWith("/compoundUnitPattern1")) {
795                     continue;
796                 }
797             } else if (xpp.contains("posix")) {
798                 continue;
799             }
800 
801             errln("Comprehensive & no exception for path =>\t" + path);
802         }
803     }
804 
805     public static class TargetsAndSublocales {
806         public final CoverageVariableInfo cvi;
807         public Set<String> scripts;
808         public Set<String> regions;
809 
810         public TargetsAndSublocales(String localeLanguage) {
811             cvi = SDI.getCoverageVariableInfo(localeLanguage);
812             scripts = new TreeSet<>();
813             regions = new TreeSet<>();
814         }
815 
816         public boolean addScript(String localeScript) {
817             return scripts.add(localeScript);
818         }
819 
820         public boolean addRegion(String localeRegion) {
821             return regions.add(localeRegion);
822         }
823     }
824 
825     public void TestCoverageVariableInfo() {
826         /**
827          * Compare the targetScripts and targetTerritories for a language to what we actually have
828          * in locales
829          */
830         Map<String, TargetsAndSublocales> langToTargetsAndSublocales = new TreeMap<>();
831         org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory();
832         for (CLDRLocale locale : factory.getAvailableCLDRLocales()) {
833             String language = locale.getLanguage();
834             if (language.length() == 0 || language.equals("root")) {
835                 continue;
836             }
837             TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language);
838             if (targetsAndSublocales == null) {
839                 targetsAndSublocales = new TargetsAndSublocales(language);
840                 langToTargetsAndSublocales.put(language, targetsAndSublocales);
841             }
842             String script = locale.getScript();
843             if (script.length() > 0) {
844                 targetsAndSublocales.addScript(script);
845             }
846             String region = locale.getCountry();
847             if (region.length() > 0
848                     && region.length() < 3) { // do not want numeric codes like 001, 419
849                 targetsAndSublocales.addRegion(region);
850             }
851         }
852 
853         for (String language : langToTargetsAndSublocales.keySet()) {
854             TargetsAndSublocales targetsAndSublocales = langToTargetsAndSublocales.get(language);
855             if (targetsAndSublocales == null) {
856                 continue;
857             }
858             Set<String> targetScripts = new TreeSet<>(targetsAndSublocales.cvi.targetScripts);
859             Set<String> localeScripts = targetsAndSublocales.scripts;
860             localeScripts.removeAll(targetScripts);
861             if (localeScripts.size() > 0) {
862                 errln(
863                         "Missing scripts for language: "
864                                 + language
865                                 + ", target scripts: "
866                                 + targetScripts
867                                 + ", but locales also have: "
868                                 + localeScripts);
869             }
870             Set<String> targetRegions = new TreeSet<>(targetsAndSublocales.cvi.targetTerritories);
871             Set<String> localeRegions = targetsAndSublocales.regions;
872             localeRegions.removeAll(targetRegions);
873             if (localeRegions.size() > 0) {
874                 errln(
875                         "Missing regions for language: "
876                                 + language
877                                 + ", target regions: "
878                                 + targetRegions
879                                 + ", but locales also have: "
880                                 + localeRegions);
881             }
882         }
883     }
884 
testBreakingLogicalGrouping()885     public void testBreakingLogicalGrouping() {
886         checkBreakingLogicalGrouping("en");
887         checkBreakingLogicalGrouping("ar");
888         checkBreakingLogicalGrouping("de");
889         checkBreakingLogicalGrouping("pl");
890     }
891 
checkBreakingLogicalGrouping(String localeId)892     private void checkBreakingLogicalGrouping(String localeId) {
893         SupplementalDataInfo sdi = testInfo.getSupplementalDataInfo();
894         CLDRFile cldrFile = testInfo.getCldrFactory().make(localeId, true);
895         HashSet<String> seen = new HashSet<>();
896         Multimap<Level, String> levelToPaths = TreeMultimap.create();
897         int count = 0;
898         for (String path : cldrFile.fullIterable()) {
899             if (seen.contains(path)) {
900                 continue;
901             }
902             Set<String> grouping = LogicalGrouping.getPaths(cldrFile, path);
903             seen.add(path);
904             if (grouping == null) {
905                 continue;
906             }
907             seen.addAll(grouping);
908             levelToPaths.clear();
909             for (String groupingPath : grouping) {
910                 if (LogicalGrouping.isOptional(cldrFile, groupingPath)) {
911                     continue;
912                 }
913                 Level level = sdi.getCoverageLevel(groupingPath, localeId);
914                 levelToPaths.put(level, groupingPath);
915             }
916             if (levelToPaths.keySet().size() <= 1) {
917                 continue;
918             }
919             // we have a failure
920             for (Entry<Level, Collection<String>> entry : levelToPaths.asMap().entrySet()) {
921                 errln(
922                         localeId
923                                 + " ("
924                                 + count
925                                 + ") Broken Logical Grouping: "
926                                 + entry.getKey()
927                                 + " => "
928                                 + entry.getValue());
929             }
930             ++count;
931         }
932     }
933 
testLogicalGroupingSamples()934     public void testLogicalGroupingSamples() {
935         getLogger().fine(GrammarInfo.getGrammarLocales().toString());
936         String[][] test = {
937             {
938                 "de", "SINGLETON", "//ldml/localeDisplayNames/localeDisplayPattern/localePattern",
939             },
940             {
941                 "de",
942                 "METAZONE",
943                 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/generic",
944                 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/standard",
945                 "//ldml/dates/timeZoneNames/metazone[@type=\"Alaska\"]/long/daylight",
946             },
947             {
948                 "de",
949                 "DAYS",
950                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sun\"]",
951                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"mon\"]",
952                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"tue\"]",
953                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"wed\"]",
954                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"thu\"]",
955                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"fri\"]",
956                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/days/dayContext[@type=\"format\"]/dayWidth[@type=\"wide\"]/day[@type=\"sat\"]",
957             },
958             {
959                 "nl",
960                 "DAY_PERIODS",
961                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"morning1\"]",
962                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"afternoon1\"]",
963                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"evening1\"]",
964                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"night1\"]",
965                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=\"midnight\"]",
966             },
967             {
968                 "de",
969                 "QUARTERS",
970                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"1\"]",
971                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"2\"]",
972                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"3\"]",
973                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/quarters/quarterContext[@type=\"format\"]/quarterWidth[@type=\"wide\"]/quarter[@type=\"4\"]",
974             },
975             {
976                 "de",
977                 "MONTHS",
978                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"1\"]",
979                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"2\"]",
980                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"3\"]",
981                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"4\"]",
982                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"5\"]",
983                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"6\"]",
984                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"7\"]",
985                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"8\"]",
986                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"9\"]",
987                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"10\"]",
988                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"11\"]",
989                 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/months/monthContext[@type=\"format\"]/monthWidth[@type=\"wide\"]/month[@type=\"12\"]",
990             },
991             {
992                 "de",
993                 "RELATIVE",
994                 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"-1\"]",
995                 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"0\"]",
996                 "//ldml/dates/fields/field[@type=\"week-short\"]/relative[@type=\"1\"]",
997             },
998             {
999                 "de",
1000                 "DECIMAL_FORMAT_LENGTH",
1001                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"one\"]",
1002                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000\"][@count=\"other\"]",
1003                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"one\"]",
1004                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"10000\"][@count=\"other\"]",
1005                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"one\"]",
1006                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"long\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"100000\"][@count=\"other\"]",
1007             },
1008             {
1009                 "cs",
1010                 "COUNT",
1011                 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"one\"]",
1012                 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"few\"]",
1013                 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"many\"]",
1014                 "//ldml/numbers/currencies/currency[@type=\"BMD\"]/displayName[@count=\"other\"]",
1015             },
1016             {
1017                 "de",
1018                 "COUNT",
1019                 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"one\"]",
1020                 "//ldml/numbers/minimalPairs/pluralMinimalPairs[@count=\"other\"]",
1021             },
1022             {
1023                 "de",
1024                 "COUNT_CASE",
1025                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"accusative\"]",
1026                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"dative\"]",
1027                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"][@case=\"genitive\"]",
1028                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"one\"]",
1029                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"accusative\"]",
1030                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"dative\"]",
1031                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"][@case=\"genitive\"]",
1032                 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"area-square-kilometer\"]/unitPattern[@count=\"other\"]",
1033             },
1034             {
1035                 "hi",
1036                 "COUNT_CASE_GENDER",
1037                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"]",
1038                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"]",
1039                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"]",
1040                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"]",
1041                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@case=\"oblique\"]",
1042                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"one\"][@gender=\"feminine\"][@case=\"oblique\"]",
1043                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@case=\"oblique\"]",
1044                 "//ldml/units/unitLength[@type=\"long\"]/compoundUnit[@type=\"power2\"]/compoundUnitPattern1[@count=\"other\"][@gender=\"feminine\"][@case=\"oblique\"]"
1045             }
1046         };
1047         Set<PathType> seenPt = new TreeSet<>(Arrays.asList(PathType.values()));
1048         for (String[] row : test) {
1049             String locale = row[0];
1050             PathType expectedPathType = PathType.valueOf(row[1]);
1051             CLDRFile cldrFile = testInfo.getCldrFactory().make(locale, true);
1052             List<String> paths = Arrays.asList(row);
1053             paths = paths.subList(2, paths.size());
1054             Set<String> expected = new TreeSet<>(paths);
1055             Set<Multimap<String, String>> seen = new LinkedHashSet<>();
1056             for (String path : expected) {
1057                 Set<String> grouping = new TreeSet<>(LogicalGrouping.getPaths(cldrFile, path));
1058                 final Multimap<String, String> deltaValue = delta(expected, grouping);
1059                 if (seen.add(deltaValue)) {
1060                     assertEquals(
1061                             "Logical group for " + locale + ", " + path,
1062                             ImmutableListMultimap.of(),
1063                             deltaValue);
1064                 }
1065                 PathType actualPathType = PathType.getPathTypeFromPath(path);
1066                 assertEquals("PathType", expectedPathType, actualPathType);
1067             }
1068             seenPt.remove(expectedPathType);
1069         }
1070         assertEquals("PathTypes tested", Collections.emptySet(), seenPt);
1071     }
1072 
delta(Set<String> expected, Set<String> grouping)1073     private Multimap<String, String> delta(Set<String> expected, Set<String> grouping) {
1074         if (expected.equals(grouping)) {
1075             return ImmutableListMultimap.of();
1076         }
1077         Multimap<String, String> result = LinkedHashMultimap.create();
1078         TreeSet<String> aMinusB = new TreeSet<>(expected);
1079         aMinusB.removeAll(grouping);
1080         result.putAll("expected-actual", aMinusB);
1081         TreeSet<String> bMinusA = new TreeSet<>(grouping);
1082         bMinusA.removeAll(expected);
1083         result.putAll("actual-expected", bMinusA);
1084         return result;
1085     }
1086 
1087     static class CoverageStatus {
1088 
1089         private Level level;
1090         private boolean inRoot;
1091         private boolean inId;
1092         private Level languageLevel;
1093         private String displayName;
1094 
CoverageStatus( Level level, boolean inRoot, boolean inId, Level languageLevel, String displayName)1095         public CoverageStatus(
1096                 Level level,
1097                 boolean inRoot,
1098                 boolean inId,
1099                 Level languageLevel,
1100                 String displayName) {
1101             this.level = level;
1102             this.inRoot = inRoot;
1103             this.inId = inId;
1104             this.languageLevel = languageLevel == null ? Level.UNDETERMINED : languageLevel;
1105             this.displayName = displayName;
1106         }
1107 
1108         @Override
toString()1109         public String toString() {
1110             return (inRoot ? "root" : "x")
1111                     + "\t"
1112                     + (inId ? "ids" : "x")
1113                     + "\t"
1114                     + stringForm(languageLevel)
1115                     + "\t"
1116                     + stringForm(level)
1117                     + "\t"
1118                     + displayName;
1119         }
1120 
stringForm(Level level2)1121         private String stringForm(Level level2) {
1122             if (level == null) {
1123                 return "υnd";
1124             }
1125             switch (level2) {
1126                 case UNDETERMINED:
1127                     return "υnd";
1128                 case COMPREHENSIVE:
1129                     return "ϲomp";
1130                 default:
1131                     return level2.toString();
1132             }
1133         }
1134     }
1135 
testLSR()1136     public void testLSR() {
1137         SupplementalDataInfo supplementalData = testInfo.getSupplementalDataInfo();
1138         org.unicode.cldr.util.Factory factory = testInfo.getCldrFactory();
1139         CLDRFile root = factory.make(LocaleNames.ROOT, true);
1140         CoverageLevel2 coverageLevel =
1141                 CoverageLevel2.getInstance(supplementalData, "qtz"); // non-existent locale
1142 
1143         Set<String> langsRoot = new TreeSet<>();
1144         Set<String> scriptsRoot = new TreeSet<>();
1145         Set<String> regionsRoot = new TreeSet<>();
1146 
1147         // Get root LSR codes
1148 
1149         for (String path : root) {
1150             if (!path.startsWith("//ldml/localeDisplayNames/")) {
1151                 continue;
1152             }
1153             XPathParts parts = XPathParts.getFrozenInstance(path);
1154             String code = parts.getAttributeValue(3, "type");
1155             if (code == null || code.contains("_")) {
1156                 continue;
1157             }
1158             switch (parts.getElement(3)) {
1159                 case "language":
1160                     langsRoot.add(code);
1161                     break;
1162                 case "script":
1163                     scriptsRoot.add(code);
1164                     break;
1165                 case "territory":
1166                     regionsRoot.add(code);
1167                     break;
1168             }
1169         }
1170         langsRoot = ImmutableSet.copyOf(langsRoot);
1171         scriptsRoot = ImmutableSet.copyOf(scriptsRoot);
1172         regionsRoot = ImmutableSet.copyOf(regionsRoot);
1173 
1174         // get CLDR locale IDs' codes
1175 
1176         Map<String, Level> langs = new TreeMap<>();
1177         Map<String, Level> scripts = new TreeMap<>();
1178         Map<String, Level> regions = new TreeMap<>();
1179         LikelySubtags likely = new LikelySubtags();
1180 
1181         LanguageTagParser ltp = new LanguageTagParser();
1182         for (String locale : factory.getAvailable()) {
1183             Level languageLevel = STANDARD_CODES.getLocaleCoverageLevel(Organization.cldr, locale);
1184             if (languageLevel == null || languageLevel == Level.UNDETERMINED) {
1185                 languageLevel = Level.CORE;
1186             }
1187             ltp.set(locale);
1188             likely.maximize(ltp);
1189             addBestLevel(langs, ltp.getLanguage(), languageLevel);
1190             addBestLevel(scripts, ltp.getScript(), languageLevel);
1191             addBestLevel(regions, ltp.getRegion(), languageLevel);
1192         }
1193         regions.remove("");
1194         scripts.remove("");
1195 
1196         // get the data
1197 
1198         Map<String, CoverageStatus> data = new TreeMap<>();
1199 
1200         ImmutableMap<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeToInfo =
1201                 ImmutableMap.of(
1202                         CLDRFile.LANGUAGE_NAME,
1203                         Row.of("language", langs, langsRoot, Level.MODERN),
1204                         CLDRFile.SCRIPT_NAME,
1205                         Row.of("script", scripts, scriptsRoot, Level.MODERATE),
1206                         CLDRFile.TERRITORY_NAME,
1207                         Row.of("region", regions, regionsRoot, Level.MODERATE));
1208 
1209         for (Entry<Integer, R4<String, Map<String, Level>, Set<String>, Level>> typeAndInfo :
1210                 typeToInfo.entrySet()) {
1211             int type = typeAndInfo.getKey();
1212             String name = typeAndInfo.getValue().get0();
1213             Map<String, Level> idPartMap = typeAndInfo.getValue().get1();
1214             Set<String> setRoot = typeAndInfo.getValue().get2();
1215             Level targetLevel = typeAndInfo.getValue().get3();
1216             for (String code : Sets.union(idPartMap.keySet(), setRoot)) {
1217                 String displayName = testInfo.getEnglish().getName(type, code);
1218                 String path = CLDRFile.getKey(type, code);
1219                 Level level = coverageLevel.getLevel(path);
1220                 data.put(
1221                         name + "\t" + code,
1222                         new CoverageStatus(
1223                                 level,
1224                                 setRoot.contains(code),
1225                                 idPartMap.containsKey(code),
1226                                 idPartMap.get(code),
1227                                 displayName));
1228             }
1229         }
1230         if (SHOW_LSR_DATA) {
1231 
1232             System.out.println(
1233                     "\nType\tCode\tIn Root\tIn CLDR Locales\tCLDR TargeLevel\tRoot Path Level\tCombinations");
1234             for (Entry<String, CoverageStatus> entry : data.entrySet()) {
1235                 System.out.println(entry.getKey() + "\t" + entry.getValue());
1236             }
1237             System.out.println();
1238             for (Entry<String, CoverageStatus> entry : data.entrySet()) {
1239                 final String key = entry.getKey();
1240                 if (!key.startsWith("language")) {
1241                     continue;
1242                 }
1243                 final CoverageStatus value = entry.getValue();
1244                 if (value.inId) {
1245                     continue;
1246                 }
1247                 String[] parts = key.split("\t");
1248                 PopulationData population = SDI.getBaseLanguagePopulationData(parts[1]);
1249                 if (population == null) {
1250                     System.out.println(key + "\t" + value.displayName + "\t" + value + "\t-1\t-1");
1251                 } else {
1252                     System.out.println(
1253                             key
1254                                     + "\t"
1255                                     + value.displayName
1256                                     + "\t"
1257                                     + value
1258                                     + "\t"
1259                                     + population.getPopulation()
1260                                     + "\t"
1261                                     + population.getLiteratePopulation());
1262                 }
1263             }
1264         }
1265 
1266         Set<String> ids = new TreeSet<>();
1267         Set<String> missing = new TreeSet<>();
1268         for (Entry<String, CoverageStatus> entry : data.entrySet()) {
1269             final String key = entry.getKey();
1270             if (!key.startsWith("language")) {
1271                 continue;
1272             }
1273             final CoverageStatus value = entry.getValue();
1274             if (value.inId) {
1275                 String[] parts = key.split("\t");
1276                 ids.add(parts[1]);
1277                 if (!value.inRoot) {
1278                     missing.add(parts[1]);
1279                 }
1280             }
1281         }
1282         if (!assertEquals(
1283                 "Language subtags that are in a CLDR locale's ID are in root ("
1284                         + missing.size()
1285                         + ")",
1286                 "",
1287                 Joiner.on(' ').join(missing))) {
1288             warnln(
1289                     "Full set for resetting $language in attributeValueValidity.xml ("
1290                             + ids.size()
1291                             + "):"
1292                             + breakLines(ids, "\n                "));
1293         }
1294     }
1295 
breakLines(Set<String> ids, String indent)1296     private String breakLines(Set<String> ids, String indent) {
1297         StringBuilder result = new StringBuilder();
1298         int lastFirstChar = 0;
1299         for (String id : ids) {
1300             int firstChar = id.codePointAt(0);
1301             result.append(firstChar == lastFirstChar ? " " : indent);
1302             result.append(id);
1303             lastFirstChar = firstChar;
1304         }
1305         return result.toString();
1306     }
1307 
addBestLevel(Map<String, Level> codeToBestLevel, String code, Level level)1308     private void addBestLevel(Map<String, Level> codeToBestLevel, String code, Level level) {
1309         if (level != Level.UNDETERMINED) {
1310             int debug = 0;
1311         }
1312         Level old = codeToBestLevel.get(code);
1313         if (old == null) {
1314             codeToBestLevel.put(code, level);
1315         } else if (level.compareTo(old) > 0) {
1316             codeToBestLevel.put(code, level);
1317         } else if (level != old) {
1318             int debug = 0;
1319         }
1320     }
1321 
TestEnglishCoverage()1322     public void TestEnglishCoverage() {
1323         Output<String> pathWhereFound = new Output<>();
1324         Output<String> localeWhereFound = new Output<>();
1325         Set<Row.R5<String, String, Boolean, Boolean, Level>> inherited = new TreeSet<>();
1326         for (String path : ENGLISH) {
1327             String value = ENGLISH.getStringValueWithBailey(path, pathWhereFound, localeWhereFound);
1328             final boolean samePath = path.equals(pathWhereFound.value);
1329             final boolean sameLocale = "en".equals(localeWhereFound.value);
1330             if (!samePath) {
1331                 Level level = SDI.getCoverageLevel(path, "en");
1332                 if (level.compareTo(Level.MODERN) <= 0) {
1333                     inherited.add(Row.of(path, value, samePath, sameLocale, level));
1334                 }
1335             }
1336         }
1337         if (!assertEquals("English has sideways inheritance:", 0, inherited.size())) {
1338             System.out.println("Check the following, then use in modify_config.txt\n");
1339             String pattern = "locale=en ; action=add ; new_path=%s ; new_value=%s";
1340             for (Row.R5<String, String, Boolean, Boolean, Level> row : inherited) {
1341                 System.out.println(String.format(pattern, row.get0(), row.get1()));
1342                 if (DEBUG) {
1343                     System.out.println(
1344                             String.format(
1345                                     "%s\t%s\t%s\t%s\t%s",
1346                                     row.get0(), row.get1(), row.get2(), row.get3(), row.get4()));
1347                 }
1348             }
1349         }
1350     }
1351 }
1352