xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ListUnits.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Splitter;
4 import com.ibm.icu.text.UnicodeSet;
5 import java.util.HashMap;
6 import java.util.HashSet;
7 import java.util.Iterator;
8 import java.util.LinkedHashSet;
9 import java.util.Map;
10 import java.util.Map.Entry;
11 import java.util.Set;
12 import java.util.TreeMap;
13 import java.util.TreeSet;
14 import java.util.regex.Matcher;
15 import org.unicode.cldr.util.CLDRConfig;
16 import org.unicode.cldr.util.CLDRFile;
17 import org.unicode.cldr.util.Factory;
18 import org.unicode.cldr.util.Pair;
19 import org.unicode.cldr.util.PatternCache;
20 import org.unicode.cldr.util.SupplementalDataInfo;
21 import org.unicode.cldr.util.Timer;
22 import org.unicode.cldr.util.XPathParts;
23 
24 public class ListUnits {
25     private static final UnicodeSet BIDI_CONTROL = new UnicodeSet("[:bidi_control:]").freeze();
26     private static final CLDRConfig CONFIG = CLDRConfig.getInstance();
27     private static final SupplementalDataInfo SUPP = CONFIG.getSupplementalDataInfo();
28     private static final Task TASK = Task.listSimpleUnits;
29 
30     private enum Task {
31         listUnits,
32         listSimpleUnits,
33         showDecimals,
34         getDigits,
35     }
36 
37     enum Type {
38         root,
39         en,
40         other;
41 
fromString(String type)42         static Type fromString(String type) {
43             return type.equals("en") ? en : type.equals("root") ? root : other;
44         }
45     }
46 
main(String[] args)47     public static void main(String[] args) {
48         Factory cldrFactory = CONFIG.getCldrFactory();
49         Set<String> defaultContent = SUPP.getDefaultContentLocales();
50         Set<String> seen = new HashSet<>();
51 
52         LinkedHashSet<String> items = new LinkedHashSet<>();
53         items.add("root");
54         items.add("en");
55         items.addAll(cldrFactory.getAvailableLanguages());
56         Map<String, Data> rootMap = new HashMap<>();
57         Map<String, Data> enMap = new HashMap<>();
58 
59         Timer timer = new Timer();
60         int count = 0;
61         Splitter SEMI = Splitter.on(";").trimResults();
62         Matcher currencyMatcher = PatternCache.get("([^0#]*).*[0#]([^0#]*)").matcher("");
63 
64         for (String locale : items) {
65             Type type = Type.fromString(locale);
66             if (type == Type.root || type == Type.en || defaultContent.contains(locale)) {
67                 continue;
68             }
69             CLDRFile cldrFile = cldrFactory.make(locale, true);
70             //            DecimalFormat format = new DecimalFormat(currencyPattern);
71             //            String prefix = format.getPositivePrefix();
72             //            String suffix = format.getPositiveSuffix();
73 
74             //            ICUServiceBuilder builder = new ICUServiceBuilder().setCldrFile(cldrFile);
75             //            DecimalFormat format = builder.getCurrencyFormat("XXX");
76             //            String prefix = format.getPositivePrefix().replace("XXX", "\u00a4");
77             //            String suffix = format.getPositiveSuffix().replace("XXX", "\u00a4");
78             switch (TASK) {
79                 case showDecimals:
80                     {
81                         String compactPathPrefix =
82                                 "//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]";
83                         String currencyPattern =
84                                 cldrFile.getStringValue(
85                                         "//ldml/numbers/currencyFormats[@numberSystem=\"latn\"]/currencyFormatLength/currencyFormat[@type=\"standard\"]/pattern[@type=\"standard\"]");
86                         String firstPart = SEMI.split(currencyPattern).iterator().next();
87                         if (!currencyMatcher.reset(firstPart).matches()) {
88                             throw new IllegalArgumentException("bad matcher");
89                         }
90                         String prefix = currencyMatcher.group(1);
91                         String suffix = currencyMatcher.group(2);
92                         System.out.println(
93                                 "\n#"
94                                         + locale
95                                         + "\t«"
96                                         + prefix
97                                         + "»\t«"
98                                         + suffix
99                                         + "»\t«"
100                                         + currencyPattern
101                                         + "»");
102                         TreeMap<String, String> data = new TreeMap<>();
103                         for (String path : cldrFile.fullIterable()) {
104                             //                    if (s.contains("decimalFormats")) {
105                             //                        System.out.println(s);
106                             //                    }
107                             if (path.startsWith(compactPathPrefix)) {
108                                 String value = cldrFile.getStringValue(path);
109                                 String mod =
110                                         path.replace("decimal", "currency")
111                                                 + "[@draft=\"provisional\"]";
112                                 //                        // locale=en ; action=add ;
113                                 // new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine
114                                 data.put(
115                                         mod,
116                                         "locale="
117                                                 + locale
118                                                 + " ; action=add"
119                                                 + " ; new_value="
120                                                 + prefix
121                                                 + value
122                                                 + suffix
123                                                 + " ; new_path="
124                                                 + mod);
125                             }
126                         }
127                         for (Entry<String, String> line : data.entrySet()) {
128                             System.out.println(line.getValue());
129                         }
130                         data.clear();
131                         break;
132                     }
133                 case listUnits:
134                 case listSimpleUnits:
135                     {
136                         Set<String> units =
137                                 getUnits(
138                                         cldrFile,
139                                         TASK,
140                                         type == Type.root
141                                                 ? rootMap
142                                                 : type == Type.en ? enMap : null);
143                         if (type == Type.en) {
144                             TreeSet<String> missing = new TreeSet<>(seen);
145                             missing.removeAll(units);
146                             for (String unit : missing) {
147                                 // locale=en ; action=add ;
148                                 // new_path=//ldml/localeDisplayNames/territories/territory[@type="PS"][@alt="short"] ; new_value=Palestine
149                                 Data data = rootMap.get(unit);
150                                 if (data != null) {
151                                     System.out.println(data);
152                                 }
153                             }
154                         }
155                         Splitter HYPHEN = Splitter.on('-');
156                         String oldBase = "";
157                         for (String unit : units) {
158                             if (!seen.contains(unit)) {
159                                 switch (TASK) {
160                                     case listSimpleUnits:
161                                         String base = HYPHEN.split(unit).iterator().next();
162                                         if (!base.equals(oldBase)) {
163                                             oldBase = base;
164                                             System.out.println();
165                                         } else {
166                                             System.out.print(' ');
167                                         }
168                                         System.out.print(unit);
169                                         break;
170                                     case listUnits:
171                                         System.out.println(
172                                                 "\t"
173                                                         + unit.replace("/", "\t")
174                                                                 .replaceFirst("-", "\t")
175                                                         + "\t"
176                                                         + locale);
177                                         break;
178                                 }
179                                 seen.add(unit);
180                             }
181                         }
182                         break;
183                     }
184                 case getDigits:
185                     {
186                         getDigits(cldrFile);
187                         break;
188                     }
189             }
190         }
191         System.out.println();
192         System.out.println("#Done: " + count + ", " + timer);
193     }
194 
getDigits(CLDRFile cldrFile)195     static void getDigits(CLDRFile cldrFile) {
196         System.out.println(cldrFile.getLocaleID());
197         String numberSystem = cldrFile.getWinningValue("//ldml/numbers/defaultNumberingSystem");
198         Set<String> seen = new HashSet<>();
199         seen.add(numberSystem);
200         Pair<UnicodeSet, UnicodeSet> main = getCharacters(cldrFile, numberSystem);
201         System.out.println(
202                 "\tdefault: "
203                         + numberSystem
204                         + ", "
205                         + main.getFirst().toPattern(false)
206                         + ", "
207                         + main.getSecond().toPattern(false));
208         for (Iterator<String> it = cldrFile.iterator("//ldml/numbers/otherNumberingSystems");
209                 it.hasNext(); ) {
210             String path = it.next();
211             String otherNumberingSystem = cldrFile.getWinningValue(path);
212             if (seen.contains(otherNumberingSystem)) {
213                 continue;
214             }
215             seen.add(otherNumberingSystem);
216             main = getCharacters(cldrFile, otherNumberingSystem);
217             System.out.println(
218                     "\tother: "
219                             + otherNumberingSystem
220                             + ", "
221                             + main.getFirst().toPattern(false)
222                             + "\t"
223                             + main.getSecond().toPattern(false));
224         }
225     }
226 
getCharacters( CLDRFile cldrFileToCheck, String numberSystem)227     private static Pair<UnicodeSet, UnicodeSet> getCharacters(
228             CLDRFile cldrFileToCheck, String numberSystem) {
229         String digitString = SUPP.getDigits(numberSystem);
230         UnicodeSet digits =
231                 digitString == null ? UnicodeSet.EMPTY : new UnicodeSet().addAll(digitString);
232 
233         UnicodeSet punctuation = new UnicodeSet();
234         Set<String> errors = new LinkedHashSet<>();
235         add(cldrFileToCheck, "decimal", numberSystem, punctuation, errors);
236         // add(cldrFileToCheck, "exponential", numberSystem, punctuation, errors);
237         add(cldrFileToCheck, "group", numberSystem, punctuation, errors);
238         // add(cldrFileToCheck, "infinity", numberSystem, punctuation, errors);
239         add(cldrFileToCheck, "minusSign", numberSystem, punctuation, errors);
240         // add(cldrFileToCheck, "nan", numberSystem, punctuation, errors);
241         add(cldrFileToCheck, "list", numberSystem, punctuation, errors);
242         add(cldrFileToCheck, "percentSign", numberSystem, punctuation, errors);
243         add(cldrFileToCheck, "perMille", numberSystem, punctuation, errors);
244         add(cldrFileToCheck, "plusSign", numberSystem, punctuation, errors);
245         // symbols.setZeroDigit(getSymbolString(cldrFileToCheck, "nativeZeroDigit", numberSystem));
246         if (!errors.isEmpty() && digitString != null) {
247             System.out.println("Missing: " + numberSystem + "\t" + errors);
248         }
249         punctuation.removeAll(BIDI_CONTROL);
250         return Pair.of(digits, punctuation);
251     }
252 
add( CLDRFile cldrFileToCheck, String subtype, String numberSystem, UnicodeSet punctuation, Set<String> errors)253     private static void add(
254             CLDRFile cldrFileToCheck,
255             String subtype,
256             String numberSystem,
257             UnicodeSet punctuation,
258             Set<String> errors) {
259         final String result = getSymbolString(cldrFileToCheck, subtype, numberSystem);
260         if (result == null) {
261             errors.add(subtype);
262         } else {
263             punctuation.addAll(result);
264         }
265     }
266 
getSymbolString(CLDRFile cldrFile, String key, String numsys)267     private static String getSymbolString(CLDRFile cldrFile, String key, String numsys) {
268         return cldrFile.getWinningValue(
269                 "//ldml/numbers/symbols[@numberSystem=\"" + numsys + "\"]/" + key);
270     }
271 
272     static final class Data {
Data(String path2, String stringValue)273         public Data(String path2, String stringValue) {
274             path = path2;
275             value = stringValue;
276         }
277 
278         final String path;
279         final String value;
280 
281         @Override
toString()282         public String toString() {
283             return "locale=en" + " ; action=add" + " ; new_path=" + path + " ; new_value=" + value;
284         }
285     }
286 
getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra)287     private static Set<String> getUnits(CLDRFile cldrFile, Task task, Map<String, Data> extra) {
288         Set<String> seen = new TreeSet<>();
289         for (String path : cldrFile) {
290             if (!path.contains("/unit")) {
291                 continue;
292             }
293             XPathParts parts = XPathParts.getFrozenInstance(path);
294             String unit = parts.findAttributeValue("unit", "type");
295             if (unit == null) {
296                 continue;
297             }
298             String key = unit;
299             if (task == Task.listUnits) {
300                 String length = parts.findAttributeValue("unitLength", "type");
301                 String per = "perUnitPattern".equals(parts.getElement(-1)) ? "per" : "";
302                 key = unit + "/" + length + "/" + per;
303             }
304             seen.add(key);
305             if (extra != null && !path.endsWith("/alias")) {
306                 extra.put(key, new Data(path, cldrFile.getStringValue(path)));
307             }
308         }
309         return seen;
310     }
311 }
312