xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestScriptMetadata.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.unittest;
2 
3 import com.google.common.base.Joiner;
4 import com.ibm.icu.impl.Relation;
5 import com.ibm.icu.impl.Row;
6 import com.ibm.icu.lang.UCharacter;
7 import com.ibm.icu.lang.UProperty;
8 import com.ibm.icu.lang.UScript;
9 import com.ibm.icu.text.UTF16;
10 import com.ibm.icu.text.UnicodeSet;
11 import com.ibm.icu.util.VersionInfo;
12 import java.util.Arrays;
13 import java.util.BitSet;
14 import java.util.Collections;
15 import java.util.EnumMap;
16 import java.util.HashSet;
17 import java.util.Iterator;
18 import java.util.LinkedHashSet;
19 import java.util.Map.Entry;
20 import java.util.Set;
21 import java.util.TreeSet;
22 import org.unicode.cldr.draft.EnumLookup;
23 import org.unicode.cldr.draft.ScriptMetadata;
24 import org.unicode.cldr.draft.ScriptMetadata.IdUsage;
25 import org.unicode.cldr.draft.ScriptMetadata.Info;
26 import org.unicode.cldr.draft.ScriptMetadata.Shaping;
27 import org.unicode.cldr.draft.ScriptMetadata.Trinary;
28 import org.unicode.cldr.util.CLDRConfig;
29 import org.unicode.cldr.util.CLDRFile;
30 import org.unicode.cldr.util.Containment;
31 import org.unicode.cldr.util.StandardCodes;
32 import org.unicode.cldr.util.With;
33 import org.unicode.cldr.util.XPathParts;
34 
35 public class TestScriptMetadata extends TestFmwkPlus {
36     private static final VersionInfo ICU_UNICODE_VERSION = UCharacter.getUnicodeVersion();
37     static CLDRConfig testInfo = CLDRConfig.getInstance();
38 
main(String[] args)39     public static void main(String[] args) {
40         new TestScriptMetadata().run(args);
41     }
42 
TestLookup()43     public void TestLookup() {
44         EnumLookup<IdUsage> temp = EnumLookup.of(IdUsage.class);
45         assertEquals("", IdUsage.LIMITED_USE, temp.forString("limited Use"));
46     }
47 
TestScriptOfSample()48     public void TestScriptOfSample() {
49         BitSet bitset = new BitSet();
50         for (String script : new TreeSet<>(ScriptMetadata.getScripts())) {
51             Info info0 = ScriptMetadata.getInfo(script);
52             int codePointCount = UTF16.countCodePoint(info0.sampleChar);
53             assertEquals("Sample must be single character", 1, codePointCount);
54             if (ICU_UNICODE_VERSION.compareTo(info0.age) >= 0) {
55                 int scriptCode =
56                         UScript.getScriptExtensions(info0.sampleChar.codePointAt(0), bitset);
57                 assertTrue(
58                         script
59                                 + ": The sample character must have a "
60                                 + "single, valid script, no ScriptExtensions: "
61                                 + scriptCode,
62                         scriptCode >= 0);
63             }
64         }
65     }
66 
TestBasic()67     public void TestBasic() {
68         Info info0 = ScriptMetadata.getInfo(UScript.LATIN);
69         if (ScriptMetadata.errors.size() != 0) {
70             if (ScriptMetadata.errors.size() == 1) {
71                 logln(
72                         "ScriptMetadata initialization errors\t"
73                                 + ScriptMetadata.errors.size()
74                                 + "\t"
75                                 + Joiner.on("\n").join(ScriptMetadata.errors));
76             } else {
77                 errln(
78                         "ScriptMetadata initialization errors\t"
79                                 + ScriptMetadata.errors.size()
80                                 + "\t"
81                                 + Joiner.on("\n").join(ScriptMetadata.errors));
82             }
83         }
84 
85         // Latin Latn 2 L European Recommended no no no no
86         assertEquals("Latin-rank", 2, info0.rank);
87         assertEquals("Latin-country", "IT", info0.originCountry);
88         assertEquals("Latin-sample", "L", info0.sampleChar);
89         assertEquals("Latin-id usage", ScriptMetadata.IdUsage.RECOMMENDED, info0.idUsage);
90         assertEquals("Latin-ime?", Trinary.NO, info0.ime);
91         assertEquals("Latin-lb letters?", Trinary.NO, info0.lbLetters);
92         assertEquals("Latin-rtl?", Trinary.NO, info0.rtl);
93         assertEquals("Latin-shaping", Shaping.MIN, info0.shapingReq);
94         assertEquals("Latin-density", 1, info0.density);
95         assertEquals("Latin-Case", Trinary.YES, info0.hasCase);
96 
97         info0 = ScriptMetadata.getInfo(UScript.HEBREW);
98         assertEquals("Arabic-rtl", Trinary.YES, info0.rtl);
99         assertEquals("Arabic-shaping", Shaping.NO, info0.shapingReq);
100         assertEquals("Arabic-Case", Trinary.NO, info0.hasCase);
101     }
102 
103     @SuppressWarnings("deprecation")
TestScripts()104     public void TestScripts() {
105         UnicodeSet temp = new UnicodeSet();
106         Set<String> missingScripts = new TreeSet<>();
107         Relation<IdUsage, String> map =
108                 Relation.of(new EnumMap<IdUsage, Set<String>>(IdUsage.class), LinkedHashSet.class);
109         for (int i = UScript.COMMON; i < UScript.CODE_LIMIT; ++i) {
110             Info info = ScriptMetadata.getInfo(i);
111             if (info != null) {
112                 map.put(
113                         info.idUsage,
114                         UScript.getName(i) + "\t(" + UScript.getShortName(i) + ")\t" + info);
115             } else {
116                 // There are many script codes that are not "real"; there are no
117                 // Unicode characters for them.
118                 // separate those out.
119                 temp.applyIntPropertyValue(UProperty.SCRIPT, i);
120                 if (temp.size() != 0) { // is real
121                     errln(
122                             "Missing script metadata for "
123                                     + UScript.getName(i)
124                                     + "\t("
125                                     + UScript.getShortName(i));
126                 } else { // is not real
127                     missingScripts.add(UScript.getShortName(i));
128                 }
129             }
130         }
131         for (Entry<IdUsage, String> entry : map.keyValueSet()) {
132             logln("Script metadata found for script:" + entry.getValue());
133         }
134         if (!missingScripts.isEmpty()) {
135             logln(
136                     "No script metadata for the following scripts (no Unicode characters defined): "
137                             + missingScripts.toString());
138         }
139     }
140 
141     // lifted from ShowLanguages
getEnglishTypes( String type, int code, StandardCodes sc, CLDRFile english)142     private static Set<String> getEnglishTypes(
143             String type, int code, StandardCodes sc, CLDRFile english) {
144         Set<String> result = new HashSet<>(sc.getSurveyToolDisplayCodes(type));
145         for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext(); ) {
146             XPathParts parts = XPathParts.getFrozenInstance(it.next());
147             String newType = parts.getAttributeValue(-1, "type");
148             if (!result.contains(newType)) {
149                 result.add(newType);
150             }
151         }
152         return result;
153     }
154 
155     // lifted from ShowLanguages
getScriptsToShow(StandardCodes sc, CLDRFile english)156     private static Set<String> getScriptsToShow(StandardCodes sc, CLDRFile english) {
157         return getEnglishTypes("script", CLDRFile.SCRIPT_NAME, sc, english);
158     }
159 
TestShowLanguages()160     public void TestShowLanguages() {
161         // lifted from ShowLanguages - this is what ShowLanguages tried to do.
162         StandardCodes sc = StandardCodes.make();
163         CLDRFile english = testInfo.getEnglish();
164         Set<String> bads = new TreeSet<>();
165         UnicodeSet temp = new UnicodeSet();
166         for (String s : getScriptsToShow(sc, english)) {
167             if (ScriptMetadata.getInfo(s) == null) {
168                 // There are many script codes that are not "real"; there are no
169                 // Unicode characters for them.
170                 // separate those out.
171                 temp.applyIntPropertyValue(UProperty.SCRIPT, UScript.getCodeFromName(s));
172                 if (temp.size() != 0) { // is real
173                     bads.add(s);
174                 }
175             }
176         }
177         if (!bads.isEmpty()) {
178             errln("No metadata for scripts: " + bads.toString());
179         }
180     }
181 
TestGeographicGrouping()182     public void TestGeographicGrouping() {
183         CLDRFile english = testInfo.getEnglish();
184         Set<Row.R3<IdUsage, String, String>> lines = new TreeSet<>();
185         Set<String> extras = ScriptMetadata.getExtras();
186         for (Entry<String, Info> sc : ScriptMetadata.iterable()) {
187             String scriptCode = sc.getKey();
188             if (extras.contains(scriptCode)) {
189                 continue;
190             }
191             Info info = sc.getValue();
192             String continent = Containment.getContinent(info.originCountry);
193             String container =
194                     !continent.equals("142")
195                             ? continent
196                             : Containment.getSubcontinent(info.originCountry);
197 
198             lines.add(
199                     Row.of(
200                             info.idUsage,
201                             english.getName(CLDRFile.TERRITORY_NAME, continent),
202                             info.idUsage
203                                     + "\t"
204                                     + english.getName(CLDRFile.TERRITORY_NAME, container)
205                                     + "\t"
206                                     + scriptCode
207                                     + "\t"
208                                     + english.getName(CLDRFile.SCRIPT_NAME, scriptCode)));
209         }
210         for (Row.R3<IdUsage, String, String> s : lines) {
211             logln(s.get2());
212         }
213     }
214 
TestScriptCategories()215     public void TestScriptCategories() {
216 
217         // test completeness
218         Set<String> scripts = new TreeSet<>(ScriptMetadata.getScripts());
219         scripts.removeAll(Arrays.asList("Zinh", "Zyyy", "Zzzz"));
220         logln("All: " + scripts);
221         for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) {
222             logln(x + ": " + x.scripts.toString());
223             scripts.removeAll(x.scripts);
224         }
225         assertEquals("Completeness", Collections.EMPTY_SET, scripts);
226 
227         // test no overlap
228         assertEquals("Overlap", Collections.EMPTY_SET, scripts);
229         for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) {
230             for (ScriptMetadata.Groupings y : ScriptMetadata.Groupings.values()) {
231                 if (y == x) continue;
232                 assertTrue("overlap", Collections.disjoint(x.scripts, y.scripts));
233             }
234         }
235 
236         // assertEqualsX(Groupings.EUROPEAN, ScriptCategories.OLD_EUROPEAN);
237         // assertEqualsX(Groupings.MIDDLE_EASTERN,
238         // ScriptCategories.OLD_MIDDLE_EASTERN);
239         // assertEqualsX(Groupings.SOUTH_ASIAN,
240         // ScriptCategories.OLD_SOUTH_ASIAN);
241         // assertEqualsX(Groupings.SOUTHEAST_ASIAN,
242         // ScriptCategories.OLD_SOUTHEAST_ASIAN);
243         // assertEqualsX(Groupings.EAST_ASIAN, ScriptCategories.OLD_EAST_ASIAN);
244         // assertEqualsX(Groupings.AFRICAN, ScriptCategories.OLD_AFRICAN);
245         // assertEqualsX(Groupings.AMERICAN, ScriptCategories.OLD_AMERICAN);
246         //
247         // assertEqualsX("Historic: ", ScriptCategories.HISTORIC_SCRIPTS,
248         // ScriptCategories.OLD_HISTORIC_SCRIPTS);
249         //
250     }
251 
252     //    private void assertEqualsX(Groupings aRaw, Set<String> bRaw) {
253     //        assertEqualsX(aRaw.toString(), aRaw.scripts, bRaw);
254     //    }
255 
assertEqualsX(String title, Set<String> a, Set<String> bRaw)256     public void assertEqualsX(String title, Set<String> a, Set<String> bRaw) {
257         TreeSet<String> b =
258                 With.in(bRaw).toCollection(ScriptMetadata.TO_SHORT_SCRIPT, new TreeSet<String>());
259 
260         Set<String> a_b = new TreeSet<>(a);
261         a_b.removeAll(b);
262         Set<String> b_a = new TreeSet<>(b);
263         b_a.removeAll(a);
264         assertEquals(title + " New vs Old, ", a_b.toString(), b_a.toString());
265     }
266 }
267