1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Splitter; 4 import com.ibm.icu.text.UnicodeSet; 5 import java.util.HashMap; 6 import java.util.HashSet; 7 import java.util.Iterator; 8 import java.util.LinkedHashSet; 9 import java.util.Map; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.TreeMap; 13 import java.util.TreeSet; 14 import java.util.regex.Matcher; 15 import org.unicode.cldr.util.CLDRConfig; 16 import org.unicode.cldr.util.CLDRFile; 17 import org.unicode.cldr.util.Factory; 18 import org.unicode.cldr.util.Pair; 19 import org.unicode.cldr.util.PatternCache; 20 import org.unicode.cldr.util.SupplementalDataInfo; 21 import org.unicode.cldr.util.Timer; 22 import org.unicode.cldr.util.XPathParts; 23 24 public class ListUnits { 25 private static final UnicodeSet BIDI_CONTROL = new UnicodeSet("[:bidi_control:]").freeze(); 26 private static final CLDRConfig CONFIG = CLDRConfig.getInstance(); 27 private static final SupplementalDataInfo SUPP = CONFIG.getSupplementalDataInfo(); 28 private static final Task TASK = Task.listSimpleUnits; 29 30 private enum Task { 31 listUnits, 32 listSimpleUnits, 33 showDecimals, 34 getDigits, 35 } 36 37 enum Type { 38 root, 39 en, 40 other; 41 fromString(String type)42 static Type fromString(String type) { 43 return type.equals("en") ? en : type.equals("root") ? root : other; 44 } 45 } 46 main(String[] args)47 public static void main(String[] args) { 48 Factory cldrFactory = CONFIG.getCldrFactory(); 49 Set<String> defaultContent = SUPP.getDefaultContentLocales(); 50 Set<String> seen = new HashSet<>(); 51 52 LinkedHashSet<String> items = new LinkedHashSet<>(); 53 items.add("root"); 54 items.add("en"); 55 items.addAll(cldrFactory.getAvailableLanguages()); 56 Map<String, Data> rootMap = new HashMap<>(); 57 Map<String, Data> enMap = new HashMap<>(); 58 59 Timer timer = new Timer(); 60 int count = 0; 61 Splitter SEMI = Splitter.on(";").trimResults(); 62 Matcher currencyMatcher = PatternCache.get("([^0#]*).*[0#]([^0#]*)").matcher(""); 63 64 for (String locale : items) { 65 Type type = Type.fromString(locale); 66 if (type == Type.root || type == Type.en || defaultContent.contains(locale)) { 67 continue; 68 } 69 CLDRFile cldrFile = cldrFactory.make(locale, true); 70 // DecimalFormat format = new DecimalFormat(currencyPattern); 71 // String prefix = format.getPositivePrefix(); 72 // String suffix = format.getPositiveSuffix(); 73 74 // ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile); 75 // DecimalFormat format = builder.getCurrencyFormat("XXX"); 76 // String prefix = format.getPositivePrefix().replace("XXX", "\u00a4"); 77 // String suffix = format.getPositiveSuffix().replace("XXX", "\u00a4"); 78 switch (TASK) { 79 case showDecimals: 80 { 81 String compactPathPrefix = 82 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]"; 83 String currencyPattern = 84 cldrFile.getStringValue( 85 "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]"); 86 String firstPart = SEMI.split(currencyPattern).iterator().next(); 87 if (!currencyMatcher.reset(firstPart).matches()) { 88 throw new IllegalArgumentException("bad matcher"); 89 } 90 String prefix = currencyMatcher.group(1); 91 String suffix = currencyMatcher.group(2); 92 System.out.println( 93 "\n#" 94 + locale 95 + "\t«" 96 + prefix 97 + "»\t«" 98 + suffix 99 + "»\t«" 100 + currencyPattern 101 + "»"); 102 TreeMap<String, String> data = new TreeMap<>(); 103 for (String path : cldrFile.fullIterable()) { 104 // if (s.contains("decimalFormats")) { 105 // System.out.println(s); 106 // } 107 if (path.startsWith(compactPathPrefix)) { 108 String value = cldrFile.getStringValue(path); 109 String mod = 110 path.replace("decimal", "currency") 111 + "[@draft=\"provisional\"]"; 112 // // locale=en ; action=add ; 113 // new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine 114 data.put( 115 mod, 116 "locale=" 117 + locale 118 + " ; action=add" 119 + " ; new_value=" 120 + prefix 121 + value 122 + suffix 123 + " ; new_path=" 124 + mod); 125 } 126 } 127 for (Entry<String, String> line : data.entrySet()) { 128 System.out.println(line.getValue()); 129 } 130 data.clear(); 131 break; 132 } 133 case listUnits: 134 case listSimpleUnits: 135 { 136 Set<String> units = 137 getUnits( 138 cldrFile, 139 TASK, 140 type == Type.root 141 ? rootMap 142 : type == Type.en ? enMap : null); 143 if (type == Type.en) { 144 TreeSet<String> missing = new TreeSet<>(seen); 145 missing.removeAll(units); 146 for (String unit : missing) { 147 // locale=en ; action=add ; 148 // new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine 149 Data data = rootMap.get(unit); 150 if (data != null) { 151 System.out.println(data); 152 } 153 } 154 } 155 Splitter HYPHEN = Splitter.on('-'); 156 String oldBase = ""; 157 for (String unit : units) { 158 if (!seen.contains(unit)) { 159 switch (TASK) { 160 case listSimpleUnits: 161 String base = HYPHEN.split(unit).iterator().next(); 162 if (!base.equals(oldBase)) { 163 oldBase = base; 164 System.out.println(); 165 } else { 166 System.out.print(' '); 167 } 168 System.out.print(unit); 169 break; 170 case listUnits: 171 System.out.println( 172 "\t" 173 + unit.replace("/", "\t") 174 .replaceFirst("-", "\t") 175 + "\t" 176 + locale); 177 break; 178 } 179 seen.add(unit); 180 } 181 } 182 break; 183 } 184 case getDigits: 185 { 186 getDigits(cldrFile); 187 break; 188 } 189 } 190 } 191 System.out.println(); 192 System.out.println("#Done: " + count + ", " + timer); 193 } 194 getDigits(CLDRFile cldrFile)195 static void getDigits(CLDRFile cldrFile) { 196 System.out.println(cldrFile.getLocaleID()); 197 String numberSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem"); 198 Set<String> seen = new HashSet<>(); 199 seen.add(numberSystem); 200 Pair<UnicodeSet, UnicodeSet> main = getCharacters(cldrFile, numberSystem); 201 System.out.println( 202 "\tdefault: " 203 + numberSystem 204 + ", " 205 + main.getFirst().toPattern(false) 206 + ", " 207 + main.getSecond().toPattern(false)); 208 for (Iterator<String> it = cldrFile.iterator("//ldml/numbers/otherNumberingSystems"); 209 it.hasNext(); ) { 210 String path = it.next(); 211 String otherNumberingSystem = cldrFile.getWinningValue(path); 212 if (seen.contains(otherNumberingSystem)) { 213 continue; 214 } 215 seen.add(otherNumberingSystem); 216 main = getCharacters(cldrFile, otherNumberingSystem); 217 System.out.println( 218 "\tother: " 219 + otherNumberingSystem 220 + ", " 221 + main.getFirst().toPattern(false) 222 + "\t" 223 + main.getSecond().toPattern(false)); 224 } 225 } 226 getCharacters( CLDRFile cldrFileToCheck, String numberSystem)227 private static Pair<UnicodeSet, UnicodeSet> getCharacters( 228 CLDRFile cldrFileToCheck, String numberSystem) { 229 String digitString = SUPP.getDigits(numberSystem); 230 UnicodeSet digits = 231 digitString == null ? UnicodeSet.EMPTY : new UnicodeSet().addAll(digitString); 232 233 UnicodeSet punctuation = new UnicodeSet(); 234 Set<String> errors = new LinkedHashSet<>(); 235 add(cldrFileToCheck, "decimal", numberSystem, punctuation, errors); 236 // add(cldrFileToCheck, "exponential", numberSystem, punctuation, errors); 237 add(cldrFileToCheck, "group", numberSystem, punctuation, errors); 238 // add(cldrFileToCheck, "infinity", numberSystem, punctuation, errors); 239 add(cldrFileToCheck, "minusSign", numberSystem, punctuation, errors); 240 // add(cldrFileToCheck, "nan", numberSystem, punctuation, errors); 241 add(cldrFileToCheck, "list", numberSystem, punctuation, errors); 242 add(cldrFileToCheck, "percentSign", numberSystem, punctuation, errors); 243 add(cldrFileToCheck, "perMille", numberSystem, punctuation, errors); 244 add(cldrFileToCheck, "plusSign", numberSystem, punctuation, errors); 245 // symbols.setZeroDigit(getSymbolString(cldrFileToCheck, "nativeZeroDigit", numberSystem)); 246 if (!errors.isEmpty() && digitString != null) { 247 System.out.println("Missing: " + numberSystem + "\t" + errors); 248 } 249 punctuation.removeAll(BIDI_CONTROL); 250 return Pair.of(digits, punctuation); 251 } 252 add( CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors)253 private static void add( 254 CLDRFile cldrFileToCheck, 255 String subtype, 256 String numberSystem, 257 UnicodeSet punctuation, 258 Set<String> errors) { 259 final String result = getSymbolString(cldrFileToCheck, subtype, numberSystem); 260 if (result == null) { 261 errors.add(subtype); 262 } else { 263 punctuation.addAll(result); 264 } 265 } 266 getSymbolString(CLDRFile cldrFile, String key, String numsys)267 private static String getSymbolString(CLDRFile cldrFile, String key, String numsys) { 268 return cldrFile.getWinningValue( 269 "//ldml/numbers/symbols[@numberSystem=\"" + numsys + "\"]/" + key); 270 } 271 272 static final class Data { Data(String path2, String stringValue)273 public Data(String path2, String stringValue) { 274 path = path2; 275 value = stringValue; 276 } 277 278 final String path; 279 final String value; 280 281 @Override toString()282 public String toString() { 283 return "locale=en" + " ; action=add" + " ; new_path=" + path + " ; new_value=" + value; 284 } 285 } 286 getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra)287 private static Set<String> getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra) { 288 Set<String> seen = new TreeSet<>(); 289 for (String path : cldrFile) { 290 if (!path.contains("/unit")) { 291 continue; 292 } 293 XPathParts parts = XPathParts.getFrozenInstance(path); 294 String unit = parts.findAttributeValue("unit", "type"); 295 if (unit == null) { 296 continue; 297 } 298 String key = unit; 299 if (task == Task.listUnits) { 300 String length = parts.findAttributeValue("unitLength", "type"); 301 String per = "perUnitPattern".equals(parts.getElement(-1)) ? "per" : ""; 302 key = unit + "/" + length + "/" + per; 303 } 304 seen.add(key); 305 if (extra != null && !path.endsWith("/alias")) { 306 extra.put(key, new Data(path, cldrFile.getStringValue(path))); 307 } 308 } 309 return seen; 310 } 311 } 312