1 package org.unicode.cldr.unittest; 2 3 import com.google.common.base.Joiner; 4 import com.ibm.icu.impl.Relation; 5 import com.ibm.icu.impl.Row; 6 import com.ibm.icu.lang.UCharacter; 7 import com.ibm.icu.lang.UProperty; 8 import com.ibm.icu.lang.UScript; 9 import com.ibm.icu.text.UTF16; 10 import com.ibm.icu.text.UnicodeSet; 11 import com.ibm.icu.util.VersionInfo; 12 import java.util.Arrays; 13 import java.util.BitSet; 14 import java.util.Collections; 15 import java.util.EnumMap; 16 import java.util.HashSet; 17 import java.util.Iterator; 18 import java.util.LinkedHashSet; 19 import java.util.Map.Entry; 20 import java.util.Set; 21 import java.util.TreeSet; 22 import org.unicode.cldr.draft.EnumLookup; 23 import org.unicode.cldr.draft.ScriptMetadata; 24 import org.unicode.cldr.draft.ScriptMetadata.IdUsage; 25 import org.unicode.cldr.draft.ScriptMetadata.Info; 26 import org.unicode.cldr.draft.ScriptMetadata.Shaping; 27 import org.unicode.cldr.draft.ScriptMetadata.Trinary; 28 import org.unicode.cldr.util.CLDRConfig; 29 import org.unicode.cldr.util.CLDRFile; 30 import org.unicode.cldr.util.Containment; 31 import org.unicode.cldr.util.StandardCodes; 32 import org.unicode.cldr.util.With; 33 import org.unicode.cldr.util.XPathParts; 34 35 public class TestScriptMetadata extends TestFmwkPlus { 36 private static final VersionInfo ICU_UNICODE_VERSION = UCharacter.getUnicodeVersion(); 37 static CLDRConfig testInfo = CLDRConfig.getInstance(); 38 main(String[] args)39 public static void main(String[] args) { 40 new TestScriptMetadata().run(args); 41 } 42 TestLookup()43 public void TestLookup() { 44 EnumLookup<IdUsage> temp = EnumLookup.of(IdUsage.class); 45 assertEquals("", IdUsage.LIMITED_USE, temp.forString("limited Use")); 46 } 47 TestScriptOfSample()48 public void TestScriptOfSample() { 49 BitSet bitset = new BitSet(); 50 for (String script : new TreeSet<>(ScriptMetadata.getScripts())) { 51 Info info0 = ScriptMetadata.getInfo(script); 52 int codePointCount = UTF16.countCodePoint(info0.sampleChar); 53 assertEquals("Sample must be single character", 1, codePointCount); 54 if (ICU_UNICODE_VERSION.compareTo(info0.age) >= 0) { 55 int scriptCode = 56 UScript.getScriptExtensions(info0.sampleChar.codePointAt(0), bitset); 57 assertTrue( 58 script 59 + ": The sample character must have a " 60 + "single, valid script, no ScriptExtensions: " 61 + scriptCode, 62 scriptCode >= 0); 63 } 64 } 65 } 66 TestBasic()67 public void TestBasic() { 68 Info info0 = ScriptMetadata.getInfo(UScript.LATIN); 69 if (ScriptMetadata.errors.size() != 0) { 70 if (ScriptMetadata.errors.size() == 1) { 71 logln( 72 "ScriptMetadata initialization errors\t" 73 + ScriptMetadata.errors.size() 74 + "\t" 75 + Joiner.on("\n").join(ScriptMetadata.errors)); 76 } else { 77 errln( 78 "ScriptMetadata initialization errors\t" 79 + ScriptMetadata.errors.size() 80 + "\t" 81 + Joiner.on("\n").join(ScriptMetadata.errors)); 82 } 83 } 84 85 // Latin Latn 2 L European Recommended no no no no 86 assertEquals("Latin-rank", 2, info0.rank); 87 assertEquals("Latin-country", "IT", info0.originCountry); 88 assertEquals("Latin-sample", "L", info0.sampleChar); 89 assertEquals("Latin-id usage", ScriptMetadata.IdUsage.RECOMMENDED, info0.idUsage); 90 assertEquals("Latin-ime?", Trinary.NO, info0.ime); 91 assertEquals("Latin-lb letters?", Trinary.NO, info0.lbLetters); 92 assertEquals("Latin-rtl?", Trinary.NO, info0.rtl); 93 assertEquals("Latin-shaping", Shaping.MIN, info0.shapingReq); 94 assertEquals("Latin-density", 1, info0.density); 95 assertEquals("Latin-Case", Trinary.YES, info0.hasCase); 96 97 info0 = ScriptMetadata.getInfo(UScript.HEBREW); 98 assertEquals("Arabic-rtl", Trinary.YES, info0.rtl); 99 assertEquals("Arabic-shaping", Shaping.NO, info0.shapingReq); 100 assertEquals("Arabic-Case", Trinary.NO, info0.hasCase); 101 } 102 103 @SuppressWarnings("deprecation") TestScripts()104 public void TestScripts() { 105 UnicodeSet temp = new UnicodeSet(); 106 Set<String> missingScripts = new TreeSet<>(); 107 Relation<IdUsage, String> map = 108 Relation.of(new EnumMap<IdUsage, Set<String>>(IdUsage.class), LinkedHashSet.class); 109 for (int i = UScript.COMMON; i < UScript.CODE_LIMIT; ++i) { 110 Info info = ScriptMetadata.getInfo(i); 111 if (info != null) { 112 map.put( 113 info.idUsage, 114 UScript.getName(i) + "\t(" + UScript.getShortName(i) + ")\t" + info); 115 } else { 116 // There are many script codes that are not "real"; there are no 117 // Unicode characters for them. 118 // separate those out. 119 temp.applyIntPropertyValue(UProperty.SCRIPT, i); 120 if (temp.size() != 0) { // is real 121 errln( 122 "Missing script metadata for " 123 + UScript.getName(i) 124 + "\t(" 125 + UScript.getShortName(i)); 126 } else { // is not real 127 missingScripts.add(UScript.getShortName(i)); 128 } 129 } 130 } 131 for (Entry<IdUsage, String> entry : map.keyValueSet()) { 132 logln("Script metadata found for script:" + entry.getValue()); 133 } 134 if (!missingScripts.isEmpty()) { 135 logln( 136 "No script metadata for the following scripts (no Unicode characters defined): " 137 + missingScripts.toString()); 138 } 139 } 140 141 // lifted from ShowLanguages getEnglishTypes( String type, int code, StandardCodes sc, CLDRFile english)142 private static Set<String> getEnglishTypes( 143 String type, int code, StandardCodes sc, CLDRFile english) { 144 Set<String> result = new HashSet<>(sc.getSurveyToolDisplayCodes(type)); 145 for (Iterator<String> it = english.getAvailableIterator(code); it.hasNext(); ) { 146 XPathParts parts = XPathParts.getFrozenInstance(it.next()); 147 String newType = parts.getAttributeValue(-1, "type"); 148 if (!result.contains(newType)) { 149 result.add(newType); 150 } 151 } 152 return result; 153 } 154 155 // lifted from ShowLanguages getScriptsToShow(StandardCodes sc, CLDRFile english)156 private static Set<String> getScriptsToShow(StandardCodes sc, CLDRFile english) { 157 return getEnglishTypes("script", CLDRFile.SCRIPT_NAME, sc, english); 158 } 159 TestShowLanguages()160 public void TestShowLanguages() { 161 // lifted from ShowLanguages - this is what ShowLanguages tried to do. 162 StandardCodes sc = StandardCodes.make(); 163 CLDRFile english = testInfo.getEnglish(); 164 Set<String> bads = new TreeSet<>(); 165 UnicodeSet temp = new UnicodeSet(); 166 for (String s : getScriptsToShow(sc, english)) { 167 if (ScriptMetadata.getInfo(s) == null) { 168 // There are many script codes that are not "real"; there are no 169 // Unicode characters for them. 170 // separate those out. 171 temp.applyIntPropertyValue(UProperty.SCRIPT, UScript.getCodeFromName(s)); 172 if (temp.size() != 0) { // is real 173 bads.add(s); 174 } 175 } 176 } 177 if (!bads.isEmpty()) { 178 errln("No metadata for scripts: " + bads.toString()); 179 } 180 } 181 TestGeographicGrouping()182 public void TestGeographicGrouping() { 183 CLDRFile english = testInfo.getEnglish(); 184 Set<Row.R3<IdUsage, String, String>> lines = new TreeSet<>(); 185 Set<String> extras = ScriptMetadata.getExtras(); 186 for (Entry<String, Info> sc : ScriptMetadata.iterable()) { 187 String scriptCode = sc.getKey(); 188 if (extras.contains(scriptCode)) { 189 continue; 190 } 191 Info info = sc.getValue(); 192 String continent = Containment.getContinent(info.originCountry); 193 String container = 194 !continent.equals("142") 195 ? continent 196 : Containment.getSubcontinent(info.originCountry); 197 198 lines.add( 199 Row.of( 200 info.idUsage, 201 english.getName(CLDRFile.TERRITORY_NAME, continent), 202 info.idUsage 203 + "\t" 204 + english.getName(CLDRFile.TERRITORY_NAME, container) 205 + "\t" 206 + scriptCode 207 + "\t" 208 + english.getName(CLDRFile.SCRIPT_NAME, scriptCode))); 209 } 210 for (Row.R3<IdUsage, String, String> s : lines) { 211 logln(s.get2()); 212 } 213 } 214 TestScriptCategories()215 public void TestScriptCategories() { 216 217 // test completeness 218 Set<String> scripts = new TreeSet<>(ScriptMetadata.getScripts()); 219 scripts.removeAll(Arrays.asList("Zinh", "Zyyy", "Zzzz")); 220 logln("All: " + scripts); 221 for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) { 222 logln(x + ": " + x.scripts.toString()); 223 scripts.removeAll(x.scripts); 224 } 225 assertEquals("Completeness", Collections.EMPTY_SET, scripts); 226 227 // test no overlap 228 assertEquals("Overlap", Collections.EMPTY_SET, scripts); 229 for (ScriptMetadata.Groupings x : ScriptMetadata.Groupings.values()) { 230 for (ScriptMetadata.Groupings y : ScriptMetadata.Groupings.values()) { 231 if (y == x) continue; 232 assertTrue("overlap", Collections.disjoint(x.scripts, y.scripts)); 233 } 234 } 235 236 // assertEqualsX(Groupings.EUROPEAN, ScriptCategories.OLD_EUROPEAN); 237 // assertEqualsX(Groupings.MIDDLE_EASTERN, 238 // ScriptCategories.OLD_MIDDLE_EASTERN); 239 // assertEqualsX(Groupings.SOUTH_ASIAN, 240 // ScriptCategories.OLD_SOUTH_ASIAN); 241 // assertEqualsX(Groupings.SOUTHEAST_ASIAN, 242 // ScriptCategories.OLD_SOUTHEAST_ASIAN); 243 // assertEqualsX(Groupings.EAST_ASIAN, ScriptCategories.OLD_EAST_ASIAN); 244 // assertEqualsX(Groupings.AFRICAN, ScriptCategories.OLD_AFRICAN); 245 // assertEqualsX(Groupings.AMERICAN, ScriptCategories.OLD_AMERICAN); 246 // 247 // assertEqualsX("Historic: ", ScriptCategories.HISTORIC_SCRIPTS, 248 // ScriptCategories.OLD_HISTORIC_SCRIPTS); 249 // 250 } 251 252 // private void assertEqualsX(Groupings aRaw, Set<String> bRaw) { 253 // assertEqualsX(aRaw.toString(), aRaw.scripts, bRaw); 254 // } 255 assertEqualsX(String title, Set<String> a, Set<String> bRaw)256 public void assertEqualsX(String title, Set<String> a, Set<String> bRaw) { 257 TreeSet<String> b = 258 With.in(bRaw).toCollection(ScriptMetadata.TO_SHORT_SCRIPT, new TreeSet<String>()); 259 260 Set<String> a_b = new TreeSet<>(a); 261 a_b.removeAll(b); 262 Set<String> b_a = new TreeSet<>(b); 263 b_a.removeAll(a); 264 assertEquals(title + " New vs Old, ", a_b.toString(), b_a.toString()); 265 } 266 } 267