xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnContainment.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Splitter;
4 import com.google.common.collect.ImmutableSet;
5 import com.google.common.collect.ImmutableSetMultimap;
6 import com.google.common.collect.Multimap;
7 import com.google.common.collect.TreeMultimap;
8 import com.ibm.icu.impl.Row.R2;
9 import com.ibm.icu.util.ICUUncheckedIOException;
10 import java.io.BufferedReader;
11 import java.io.IOException;
12 import java.util.Collection;
13 import java.util.List;
14 import java.util.Map;
15 import java.util.Map.Entry;
16 import java.util.Set;
17 import org.unicode.cldr.draft.FileUtilities;
18 import org.unicode.cldr.util.CLDRConfig;
19 import org.unicode.cldr.util.CLDRFile;
20 import org.unicode.cldr.util.CldrUtility;
21 import org.unicode.cldr.util.SupplementalDataInfo;
22 
23 public class TestUnContainment extends TestFmwkPlus {
24     static CLDRConfig testInfo = CLDRConfig.getInstance();
25     private static final SupplementalDataInfo SUPPLEMENTAL_DATA_INFO =
26             testInfo.getSupplementalDataInfo();
27     Map<String, R2<List<String>, String>> regionToInfo =
28             SUPPLEMENTAL_DATA_INFO.getLocaleAliasInfo().get("territory");
29 
30     private static final Set<String> NOT_CLDR_TERRITORY_CODES =
31             ImmutableSet.of("830"); // Channel Islands
32     private static final Set<String> KNOWN_CONTAINMENT_EXCEPTIONS =
33             ImmutableSet.of("AQ", "680"); // Antarctica, Sark
34 
35     final Multimap<String, String> UnChildToParent;
36 
37     {
38         Multimap<String, String> _UnChildToParent = TreeMultimap.create();
39         Splitter tab = Splitter.on('\t').trimResults();
try(BufferedReader unCodes = CldrUtility.getUTF8Data("external/UnCodes.tsv"); )40         try (BufferedReader unCodes = CldrUtility.getUTF8Data("external/UnCodes.tsv"); ) {
41             for (String line : FileUtilities.in(unCodes)) {
42                 List<String> items = tab.splitToList(line);
43                 if (line.isEmpty() || line.startsWith("Global Code")) {
44                     continue;
45                 }
46                 String parent = null;
47                 for (int i = 0; i < 10; i += 2) {
48                     String region = items.get(i);
49                     if (!region.isEmpty()) {
50                         region = unToCldrCode(region);
51                         if (parent != null && region != null) {
52                             _UnChildToParent.put(region, parent);
53                         }
54                         if (region != null) {
55                             parent = region;
56                         }
57                     }
58                     if (i == 6) {
59                         ++i; // hack because last two are out of order
60                     }
61                 }
62             }
63             UnChildToParent = ImmutableSetMultimap.copyOf(_UnChildToParent);
64         } catch (IOException e) {
65             throw new ICUUncheckedIOException(e);
66         }
67     }
68 
main(String[] args)69     public static void main(String[] args) {
70         new TestUnContainment().run(args);
71     }
72 
name(Collection<String> codes)73     private String name(Collection<String> codes) {
74         StringBuilder result = new StringBuilder();
75         for (String code : codes) {
76             if (result.length() != 0) {
77                 result.append(", ");
78             }
79             result.append(name(code));
80         }
81         return result.toString();
82     }
83 
name(String code)84     private String name(String code) {
85         String name = testInfo.getEnglish().getName(CLDRFile.TERRITORY_NAME, code);
86         return name + " (" + code + ")";
87     }
88 
unToCldrCode(String code)89     private String unToCldrCode(String code) {
90 
91         if (NOT_CLDR_TERRITORY_CODES.contains(code)) {
92             return null;
93         }
94 
95         R2<List<String>, String> codeInfo = regionToInfo.get(code);
96         if (codeInfo != null) {
97             if (codeInfo.get0() != null && !codeInfo.get0().isEmpty()) {
98                 code = codeInfo.get0().get(0);
99             }
100         }
101         return code;
102     }
103 
TestContainment()104     public void TestContainment() {
105 
106         /*
107         CLDR
108         <group type="001" contains="019 002 150 142 009"/> <!--World -->
109         <group type="001" contains="EU EZ UN" status="grouping"/> <!--European Union, Eurozone, United Nations -->
110         <group type="001" contains="QU" status="deprecated"/> <!--European Union -->
111         <group type="011" contains="BF BJ CI CV GH GM GN GW LR ML MR NE NG SH SL SN TG"/> <!--Western Africa -->
112          */
113         for (Entry<String, Collection<String>> entry : UnChildToParent.asMap().entrySet()) {
114             Collection<String> unParents = entry.getValue();
115             String unChild = entry.getKey();
116             // System.out.println(name(unParents) + "\t" + name(unChild));
117             for (String unParent : unParents) {
118                 Set<String> children = SUPPLEMENTAL_DATA_INFO.getContained(unParent);
119                 if (children != null && children.contains(unChild)) {
120                     continue;
121                 }
122                 // See CLDR ticket 10187 for rationalization on the known containment exceptions.
123                 if (KNOWN_CONTAINMENT_EXCEPTIONS.contains(unChild)) {
124                     continue;
125                 }
126                 msg(
127                         "UN containment doesn't match CLDR for "
128                                 + name(unParent)
129                                 + ": cldr children "
130                                 + children
131                                 + " don't contain UN "
132                                 + name(unChild),
133                         ERR,
134                         true,
135                         true);
136             }
137         }
138     }
139 }
140