xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GetDescriptions.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import java.io.IOException;
4 import java.io.PrintWriter;
5 import java.util.Arrays;
6 import java.util.HashSet;
7 import java.util.Map;
8 import java.util.Set;
9 import java.util.TreeMap;
10 import java.util.TreeSet;
11 import java.util.regex.Matcher;
12 import java.util.regex.Pattern;
13 import org.unicode.cldr.draft.FileUtilities;
14 import org.unicode.cldr.util.PatternCache;
15 import org.unicode.cldr.util.StandardCodes;
16 
17 public class GetDescriptions {
18 
19     static Matcher matcher = PatternCache.get("([^,(]+)(,([^(]+))?(.*)").matcher("");
20 
21     static Map<String, String> items = new TreeMap<>();
22     static int allCount = 1;
23     static int commaCount = 1;
24 
25     private static Map<String, Map<String, Set<String>>> name_type_codes = new TreeMap<>();
26 
27     private static Set<String> preCommas = new TreeSet<>();
28 
29     private static Set<String> postCommas = new TreeSet<>();
30 
31     private static Map<String, String> descriptionWithoutComments = new TreeMap<>();
32 
33     private static Set<String> uninvertedNames = new HashSet<>();
34 
main(String[] args)35     public static void main(String[] args) throws IOException {
36         StandardCodes sc = StandardCodes.make();
37         PrintWriter commas =
38                 FileUtilities.openUTF8Writer("c:\\data\\gen\\ltru\\", "ltru-commas.txt");
39         commas.write('\uFEFF');
40         PrintWriter all = FileUtilities.openUTF8Writer("c:\\data\\gen\\ltru\\", "ltru-all.txt");
41         all.write('\uFEFF');
42 
43         for (String type : sc.getAvailableTypes()) {
44             if (type.equals("tzid")) continue;
45             if (type.equals("currency")) continue;
46             for (String code : sc.getAvailableCodes(type)) {
47                 Map<String, String> x = sc.getLangData(type, code);
48                 if (x == null) {
49                     continue;
50                 }
51                 boolean isDeprecated = x.get("Deprecated") != null;
52 
53                 all.println(allCount++ + "\t" + type + "\t" + code + "\t" + x);
54                 String descriptionField = x.get("Description");
55                 String[] descriptions = descriptionField.split("\u25AA");
56                 items.clear();
57 
58                 for (String description : descriptions) {
59                     if (!matcher.reset(description).matches()) {
60                         commas.println(
61                                 commaCount++
62                                         + "\t"
63                                         + type
64                                         + "\t"
65                                         + code
66                                         + "\t"
67                                         + description
68                                         + "\t@NO_MATCH");
69                         continue;
70                     }
71                     String preComma = matcher.group(1).trim();
72                     String postComma = matcher.group(3);
73                     postComma = postComma == null ? "" : postComma.trim();
74                     String parens = matcher.group(4);
75                     parens = parens == null ? "" : parens.trim();
76 
77                     if (preComma.length() != 0) preCommas.add(preComma);
78                     if (postComma.length() != 0) postCommas.add(postComma);
79 
80                     String newDescription = preComma;
81 
82                     String descriptionWithoutComment = preComma;
83                     String newDescriptionWithoutComment = preComma;
84                     uninvertedNames.add(newDescriptionWithoutComment);
85 
86                     if (postComma.length() != 0) {
87                         descriptionWithoutComment += ", " + postComma;
88                         newDescription = postComma + " " + newDescription;
89                         newDescriptionWithoutComment = newDescription;
90                     }
91                     if (parens.length() != 0) {
92                         newDescription += " " + parens;
93                     }
94 
95                     if (!isDeprecated) {
96                         if (descriptionWithoutComment.length() != 0)
97                             descriptionWithoutComments.put(
98                                     descriptionWithoutComment, newDescriptionWithoutComment);
99                         addTypeNameCode(name_type_codes, type, code, newDescriptionWithoutComment);
100                     }
101 
102                     if (!descriptionField.contains(",") && !descriptionField.contains("(")) {
103                         continue;
104                     }
105 
106                     checkDuplicates(commas, type, code, descriptionWithoutComment, description);
107                     if (!newDescriptionWithoutComment.equals(descriptionWithoutComment)) {
108                         checkDuplicates(
109                                 commas, type, code, newDescriptionWithoutComment, description);
110                     }
111 
112                     if (postComma.contains(",")) {
113                         commas.println(
114                                 commaCount++
115                                         + "\t"
116                                         + type
117                                         + "\t"
118                                         + code
119                                         + "\t"
120                                         + description
121                                         + "\t@DOUBLE_COMMA");
122                         continue;
123                     }
124 
125                     if (postComma.length() == 0) {
126                         commas.println(
127                                 commaCount++ + "\t" + type + "\t" + code + "\t" + description);
128                         continue;
129                     }
130 
131                     commas.println(
132                             commaCount++
133                                     + "\t"
134                                     + type
135                                     + "\t"
136                                     + code
137                                     + "\t"
138                                     + description
139                                     + "\t=>\t"
140                                     + newDescription);
141                 }
142                 checkInversion(commas, type, code, descriptions);
143             }
144         }
145         all.close();
146         commas.close();
147         showReverse();
148         System.out.println("DONE");
149     }
150 
showReverse()151     private static void showReverse() throws IOException {
152         PrintWriter reverse =
153                 FileUtilities.openUTF8Writer("c:\\data\\gen\\ltru\\", "ltru-reverse.txt");
154         reverse.write('\uFEFF');
155         int reverseCount = 1;
156         for (String name : name_type_codes.keySet()) {
157             boolean privateUse = name.equals("PRIVATE USE");
158             Map<String, Set<String>> type_codes = name_type_codes.get(name);
159             Set<String> types = type_codes.keySet();
160             for (String type : type_codes.keySet()) {
161                 String baseCode = null;
162                 for (String code : type_codes.get(type)) {
163                     if (baseCode == null || privateUse) {
164                         baseCode = code;
165                         reverse.println(reverseCount++ + "\t" + name + "\t" + type + "\t" + code);
166                         continue;
167                     }
168                     reverse.println(
169                             reverseCount++
170                                     + "\t"
171                                     + name
172                                     + "\t"
173                                     + type
174                                     + "\t"
175                                     + code
176                                     + "\t@DUPLICATE_IN\t"
177                                     + "\t"
178                                     + baseCode);
179                 }
180             }
181             reverseIfPossible(name, types);
182         }
183         reverse.close();
184         reverseCount = 1;
185         PrintWriter inversions =
186                 FileUtilities.openUTF8Writer("c:\\data\\gen\\ltru\\", "ltru-inversions.txt");
187         for (String invertedName : descriptionWithoutComments.keySet()) {
188             String name = descriptionWithoutComments.get(invertedName);
189             if (name.equals(invertedName)) continue;
190             inversions.println(reverseCount++ + "\t" + invertedName + "\t" + name);
191         }
192         inversions.close();
193     }
194 
reverseIfPossible(String name, Set<String> types)195     static void reverseIfPossible(String name, Set<String> types) {
196         for (String uninvert : uninvertedNames) {
197             if (name.endsWith(uninvert)) {
198                 addEnd(name, uninvert, types);
199             }
200             if (name.startsWith(uninvert)) {
201                 addStart(name, uninvert, types);
202             }
203         }
204         for (String preComma : preCommas) {
205             if (name.endsWith(preComma)) {
206                 addEnd(name, preComma, types);
207             }
208         }
209         for (String postComma : postCommas) {
210             if (name.startsWith(postComma)) {
211                 addStart(name, postComma, types);
212             }
213         }
214     }
215 
addStart(String name, String postComma, Set<String> types)216     private static void addStart(String name, String postComma, Set<String> types) {
217         if (name.equals(postComma)) return;
218         if (!name.startsWith(postComma + " ")) return;
219         String trial = name.substring(postComma.length()).trim() + ", " + postComma;
220         if (descriptionWithoutComments.keySet().contains(trial)) {
221             return;
222         }
223         descriptionWithoutComments.put(trial, name + "\t@MISSING\t" + types);
224     }
225 
addEnd(String name, String preComma, Set<String> types)226     private static void addEnd(String name, String preComma, Set<String> types) {
227         if (name.equals(preComma)) return;
228         if (!name.endsWith(" " + preComma)) return;
229         String trial =
230                 preComma + ", " + name.substring(0, name.length() - preComma.length()).trim();
231         if (descriptionWithoutComments.keySet().contains(trial)) {
232             return;
233         }
234         descriptionWithoutComments.put(trial, name + "\t@MISSING\t" + types);
235     }
236 
addTypeNameCode( Map<String, Map<String, Set<String>>> name_type_codes, String type, String code, String newDescriptionWithoutComment)237     private static void addTypeNameCode(
238             Map<String, Map<String, Set<String>>> name_type_codes,
239             String type,
240             String code,
241             String newDescriptionWithoutComment) {
242         Map<String, Set<String>> type_codes = name_type_codes.get(newDescriptionWithoutComment);
243         if (type_codes == null)
244             name_type_codes.put(newDescriptionWithoutComment, type_codes = new TreeMap<>());
245         Set<String> codes = type_codes.get(type);
246         if (codes == null) type_codes.put(type, codes = new TreeSet<>());
247         codes.add(code);
248     }
249 
250     static Matcher directional =
251             Pattern.compile(
252                             "(West Central|Northern|Southern|Western|Eastern|North|South|East|West|Central|Ancient|Classical|Coastal"
253                                     + "|Highland|Isthmus|Low|Lower|Lowland|Middle|Northeastern|Northwestern|Old|Plains|Southeastern|Southwestern|Straits|Upper|Valley"
254                                     + "|Written)\\s+(.+)")
255                     .matcher("");
256 
checkInversion( PrintWriter commas, String type, String code, String[] parts)257     private static void checkInversion(
258             PrintWriter commas, String type, String code, String[] parts) {
259         Set<String> items = new TreeSet<>(Arrays.asList(parts));
260         for (String item : items) {
261             if (!directional.reset(item).matches()) {
262                 continue;
263             }
264             String trial =
265                     directional.group(2)
266                             + (directional.group(2).contains(",") ? " " : ", ")
267                             + directional.group(1);
268             if (!items.contains(trial)) {
269                 commas.println(
270                         commaCount++ + "\t" + type + "\t" + code + "\t" + "\t@MISSING\t" + trial);
271             }
272         }
273     }
274 
checkDuplicates( PrintWriter commas, String type, String code, String newPartNoComment, String part)275     private static void checkDuplicates(
276             PrintWriter commas, String type, String code, String newPartNoComment, String part) {
277         String old = items.get(newPartNoComment);
278         if (old != null) {
279             commas.println(
280                     commaCount++
281                             + "\t"
282                             + type
283                             + "\t"
284                             + code
285                             + "\t"
286                             + part
287                             + "\t@DUPLICATES\t"
288                             + old);
289         } else {
290             items.put(newPartNoComment, part);
291         }
292     }
293 }
294