xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateValidityXml.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.base.Objects;
5 import com.google.common.collect.ImmutableSet;
6 import com.google.common.collect.ImmutableSetMultimap;
7 import com.google.common.collect.Multimap;
8 import com.google.common.collect.Multimaps;
9 import com.google.common.collect.SetMultimap;
10 import com.google.common.collect.TreeMultimap;
11 import com.ibm.icu.impl.Row.R2;
12 import com.ibm.icu.util.ICUUncheckedIOException;
13 import java.io.IOException;
14 import java.lang.invoke.MethodHandles;
15 import java.util.Collection;
16 import java.util.Date;
17 import java.util.EnumMap;
18 import java.util.LinkedHashMap;
19 import java.util.List;
20 import java.util.Locale;
21 import java.util.Map;
22 import java.util.Map.Entry;
23 import java.util.Set;
24 import java.util.TreeMap;
25 import java.util.TreeSet;
26 import org.unicode.cldr.draft.ScriptMetadata;
27 import org.unicode.cldr.util.CLDRPaths;
28 import org.unicode.cldr.util.CLDRTool;
29 import org.unicode.cldr.util.DtdType;
30 import org.unicode.cldr.util.StandardCodes;
31 import org.unicode.cldr.util.StandardCodes.LstrField;
32 import org.unicode.cldr.util.StandardCodes.LstrType;
33 import org.unicode.cldr.util.StringRange;
34 import org.unicode.cldr.util.StringRange.Adder;
35 import org.unicode.cldr.util.SupplementalDataInfo;
36 import org.unicode.cldr.util.SupplementalDataInfo.CurrencyDateInfo;
37 import org.unicode.cldr.util.TempPrintWriter;
38 import org.unicode.cldr.util.Validity;
39 import org.unicode.cldr.util.Validity.Status;
40 
41 @CLDRTool(
42         alias = "generate-validity-data",
43         url = "http://cldr.unicode.org/development/updating-codes/update-validity-xml")
44 public class GenerateValidityXml {
45 
46     private static final Validity VALIDITY = Validity.getInstance();
47     private static Validity OLD_VALIDITY =
48             Validity.getInstance(CLDRPaths.LAST_RELEASE_DIRECTORY + "common/validity/");
49 
50     private static final Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG =
51             StandardCodes.getEnumLstreg();
52     private static final SupplementalDataInfo SDI = SupplementalDataInfo.getInstance();
53 
54     private static class MyAdder implements Adder {
55         Appendable target;
56         boolean twoCodePoints = false;
57         long lastCodePoint = -1;
58 
59         @Override
add(String start, String end)60         public void add(String start, String end) {
61             try {
62                 long firstCodePoint = start.codePointAt(0);
63                 if (twoCodePoints) {
64                     firstCodePoint <<= 22;
65                     firstCodePoint |= start.codePointAt(1);
66                 }
67                 if (firstCodePoint == lastCodePoint) {
68                     target.append(' ');
69                 } else {
70                     target.append("\n\t\t\t");
71                 }
72                 target.append(start);
73                 if (end != null) {
74                     target.append('~').append(end);
75                 }
76                 lastCodePoint = firstCodePoint;
77             } catch (IOException e) {
78                 throw new ICUUncheckedIOException(e);
79             }
80         }
81 
reset(boolean b)82         public void reset(boolean b) {
83             lastCodePoint = -1;
84             twoCodePoints = b;
85         }
86     }
87 
88     static Set<String> containment = SDI.getContainers();
89     static Map<String, Map<LstrField, String>> codeToData = LSTREG.get(LstrType.region);
90 
91     static class Info {
92         String mainComment;
93         // private Relation<Validity.Status, String> statusMap = Relation.of(new
94         // EnumMap<Validity.Status, Set<String>>(Validity.Status.class), TreeSet.class);
95         Map<String, Validity.Status> codeToStatus = new TreeMap<>();
96         Map<Validity.Status, String> statusComment = new EnumMap<>(Status.class);
97         Set<String> newCodes = new TreeSet<>();
98 
99         static Map<String, Info> types = new LinkedHashMap<>();
100 
getInfo(String myType)101         static Info getInfo(String myType) {
102             Info info = types.get(myType);
103             if (info == null) {
104                 types.put(myType, info = new Info());
105             }
106             return info;
107         }
108 
getStatusMap()109         public SetMultimap<Status, String> getStatusMap() {
110             TreeMultimap<Status, String> result = TreeMultimap.create();
111             Multimaps.invertFrom(Multimaps.forMap(codeToStatus), result);
112             return ImmutableSetMultimap.copyOf(result);
113         }
114 
put(String key, Status value)115         public void put(String key, Status value) {
116             codeToStatus.put(key, value);
117         }
118 
remove(String key, Status value)119         public void remove(String key, Status value) {
120             codeToStatus.remove(key, value);
121         }
122 
clear()123         public void clear() {
124             codeToStatus.clear();
125         }
126 
entrySet()127         public Set<Entry<String, Status>> entrySet() {
128             return codeToStatus.entrySet();
129         }
130 
get(String key)131         public Status get(String key) {
132             return codeToStatus.get(key);
133         }
134 
putBest(String currency, Status newStatus)135         public void putBest(String currency, Status newStatus) {
136             Status oldStatus = get(currency);
137             if (oldStatus == null || newStatus.compareTo(oldStatus) < 0) {
138                 put(currency, newStatus);
139             }
140         }
141     }
142 
143     static final Map<String, Info> types = Info.types;
144 
main(String[] args)145     public static void main(String[] args) throws IOException {
146 
147         doLstr(types);
148         doSubdivisions(types);
149         doCurrency(types);
150         // write file
151         MyAdder adder = new MyAdder();
152         for (Entry<String, Info> entry : types.entrySet()) {
153             String type = entry.getKey();
154             final Info info = entry.getValue();
155             Multimap<Status, String> subtypeMap = info.getStatusMap();
156             try (TempPrintWriter output =
157                     TempPrintWriter.openUTF8Writer(
158                                     CLDRPaths.COMMON_DIRECTORY, "validity/" + type + ".xml")
159                             .skipCopyright(true)) {
160                 adder.target = output;
161                 output.append(
162                         DtdType.supplementalData.header(MethodHandles.lookup().lookupClass())
163                                 + "\t<version number=\"$Revision"
164                                 + "$\"/>\n"
165                                 + "\t<idValidity>\n");
166                 for (Entry<Status, Collection<String>> entry2 : subtypeMap.asMap().entrySet()) {
167                     Validity.Status subtype = entry2.getKey();
168                     Set<String> set = (Set<String>) entry2.getValue();
169                     String comment = info.statusComment.get(entry2.getKey());
170                     if (comment != null) {
171                         output.append("\t\t<!-- " + comment.replace("\n", "\n\t\t\t ") + " -->\n");
172                     }
173                     output.append("\t\t<id type='" + type + "' idStatus='" + subtype + "'>");
174                     final int size = set.size();
175                     output.append(
176                             "\t\t<!-- "
177                                     + size
178                                     + " item"
179                                     + (size > 1 ? "s" : "") // we know it’s English ;-)
180                                     + " -->");
181                     adder.reset(size > 600); //  || type.equals("subdivision")
182                     StringRange.compact(set, adder, true);
183                     output.append("\n\t\t</id>\n");
184                 }
185                 //                if (!info.newCodes.isEmpty()) {
186                 //                    output.append("\t\t<!-- Codes added this release:\n\t\t\t" +
187                 // showCodes(info.newCodes, "\n\t\t\t") + "\n\t\t-->\n");
188                 //                }
189                 output.append("\t</idValidity>\n</supplementalData>\n");
190             }
191         }
192         // System.out.println("TODO: add Unknown subdivisions, add private_use currencies, ...");
193     }
194 
showCodes(Set<String> newCodes, String linePrefix)195     private static String showCodes(Set<String> newCodes, String linePrefix) {
196         StringBuilder result = new StringBuilder();
197         String last = "";
198         for (String s : newCodes) {
199             String newPrefix = s.substring(0, s.indexOf('-'));
200             if (last.equals(newPrefix)) {
201                 result.append(" ");
202             } else {
203                 if (!last.isEmpty()) {
204                     result.append(linePrefix);
205                 }
206                 last = newPrefix;
207             }
208             result.append(s);
209         }
210         return result.toString();
211     }
212 
doCurrency(Map<String, Info> types)213     private static void doCurrency(Map<String, Info> types) {
214         Info info = Info.getInfo("currency");
215         Date now = new Date();
216         Date eoy = new Date(now.getYear() + 1, 0, 1); // Dec
217         for (String region : SDI.getCurrencyTerritories()) {
218             for (CurrencyDateInfo data : SDI.getCurrencyDateInfo(region)) {
219                 String currency = data.getCurrency();
220                 Date end = data.getEnd();
221                 boolean legalTender = data.isLegalTender();
222                 Status newStatus =
223                         end.after(eoy) && legalTender ? Status.regular : Status.deprecated;
224                 info.putBest(currency, newStatus);
225             }
226         }
227         info.put(LstrType.currency.unknown, Status.unknown);
228         // make sure we don't overlap.
229         // we want to keep any code that is valid in any territory, so
230         info.remove("XXX", Status.deprecated);
231         info.remove("XXX", Status.regular);
232 
233         // just to make sure info never disappears
234         Map<String, Status> oldCodes = OLD_VALIDITY.getCodeToStatus(LstrType.currency);
235         for (Entry<String, Status> entry : oldCodes.entrySet()) {
236             String key = entry.getKey();
237             Status oldStatus = entry.getValue();
238             Status newStatus = info.get(key);
239             if (!Objects.equal(oldStatus, newStatus)) {
240                 System.out.println(
241                         "Status changed: " + key + ", " + oldStatus + " => " + newStatus);
242             }
243         }
244 
245         info.statusComment.put(
246                 Status.deprecated,
247                 "Deprecated values are those that are not legal tender in some country after "
248                         + (1900 + now.getYear())
249                         + ".\n"
250                         + "More detailed usage information needed for some implementations is in supplemental data.");
251     }
252 
doSubdivisions(Map<String, Info> types)253     private static void doSubdivisions(Map<String, Info> types) {
254         Info info = Info.getInfo("subdivision");
255         Map<String, R2<List<String>, String>> aliases = SDI.getLocaleAliasInfo().get("subdivision");
256         for (String container : SDI.getContainersForSubdivisions()) {
257             for (String contained : SDI.getContainedSubdivisions(container)) {
258                 Status status =
259                         aliases.containsKey(contained)
260                                 ? Validity.Status.deprecated
261                                 : Validity.Status.regular;
262                 info.put(contained.toLowerCase(Locale.ROOT).replace("-", ""), status);
263             }
264         }
265 
266         // find out which items were valid, but are no longer in the containment map
267         // add them as deprecated
268         Map<Status, Set<String>> oldSubdivisionData =
269                 OLD_VALIDITY.getStatusToCodes(LstrType.subdivision);
270         for (Entry<Status, Set<String>> entry : oldSubdivisionData.entrySet()) {
271             for (String oldSdId : entry.getValue()) {
272                 if (info.get(oldSdId) == null) {
273                     info.put(oldSdId, Status.deprecated);
274                 }
275             }
276         }
277 
278         info.statusComment.put(
279                 Status.deprecated,
280                 "Deprecated values include those that are not formally deprecated in the country in question, but have their own region codes.\n"
281                         + "It also include codes that were previously in CLDR, for compatibility.");
282         info.statusComment.put(
283                 Status.unknown,
284                 "Unknown/Undetermined subdivision codes (ZZZZ) are defined for all regular region codes.");
285     }
286 
287     static final Set<String> VARIANTS =
288             Set.of( // variants
289                     "Aran",
290                     "Cyrs",
291                     "Hans",
292                     "Hant",
293                     "Latf",
294                     "Latg",
295                     "Syre",
296                     "Syrj",
297                     "Syrn",
298                     // composites
299                     "Hanb",
300                     "Jpan",
301                     "Hrkt",
302                     "Kore",
303                     // subsets
304                     "Jamo");
305 
doLstr(Map<String, Info> types)306     private static void doLstr(Map<String, Info> types) throws IOException {
307         Set<String> skippedScripts = new TreeSet<>();
308         for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : LSTREG.entrySet()) {
309             LstrType type = entry.getKey();
310             if (!type.isLstr || !type.isUnicode) {
311                 continue;
312             }
313             Info info = Info.getInfo(type.toString());
314             Map<String, R2<List<String>, String>> aliases =
315                     SDI.getLocaleAliasInfo()
316                             .get(type == LstrType.region ? "territory" : type.toString());
317             if (aliases == null) {
318                 System.out.println("No aliases for: " + type);
319             }
320             // gather data
321             info.clear();
322             for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) {
323                 String code = entry2.getKey();
324                 if (type == LstrType.language && code.equals("aam")
325                         || type == LstrType.variant && code.equals("arevela")
326                         || type == LstrType.extlang && code.equals("lsg")) {
327                     int debug = 0;
328                 }
329                 Map<LstrField, String> data = entry2.getValue();
330                 Validity.Status subtype = Validity.Status.regular;
331                 if (code.equals(type.unknown)) {
332                     subtype = Validity.Status.unknown;
333                 } else if (type.specials.contains(code)) {
334                     subtype = Validity.Status.special;
335                 } else if (aliases != null && aliases.containsKey(code)
336                         || data.containsKey(LstrField.Deprecated)) {
337                     subtype = Validity.Status.deprecated;
338                 } else if (data.get(LstrField.Description).startsWith("Private use")) {
339                     subtype = Validity.Status.private_use;
340                 }
341                 switch (type) {
342                     case language:
343                         if (subtype == Status.private_use && code.compareTo("qfz") < 0) {
344                             subtype = Status.reserved;
345                         } else if (code.equals("root")) {
346                             continue;
347                         }
348                         break;
349                     case region:
350                         if (containment.contains(code)) {
351                             subtype = Validity.Status.macroregion;
352                         } else if (code.equals("XA") || code.equals("XB")) {
353                             subtype = Validity.Status.special;
354                         }
355                         switch (subtype) {
356                             case regular:
357                                 Info subInfo = Info.getInfo("subdivision");
358                                 subInfo.put(code.toLowerCase(Locale.ROOT) + "zzzz", Status.unknown);
359                                 break;
360                             case private_use:
361                                 if (code.compareTo("X") < 0) {
362                                     subtype = Status.reserved;
363                                 }
364                                 break;
365                             default:
366                                 break;
367                         }
368                         break;
369                     case script:
370                         switch (code) {
371                                 // extra specials
372                             case "Qaag":
373                             case "Zinh":
374                             case "Zsye":
375                             case "Zyyy":
376                                 subtype = Status.special;
377                                 break;
378                             default:
379                                 switch (subtype) {
380                                     case private_use:
381                                         if (code.compareTo("Qaaq") < 0) {
382                                             subtype = Validity.Status.reserved;
383                                         }
384                                         break;
385                                     case regular:
386                                         ScriptMetadata.Info scriptInfo =
387                                                 ScriptMetadata.getInfo(code);
388                                         if (scriptInfo == null && !VARIANTS.contains(code)) {
389                                             skippedScripts.add(code);
390                                             continue;
391                                         }
392                                         break;
393                                     default: // don't care about rest
394                                         break;
395                                 }
396                                 break;
397                         }
398                         break;
399                     case variant:
400                         if (VARIANT_EXTRAS.contains(code)) {
401                             continue;
402                         }
403                     default:
404                         break;
405                 }
406                 info.put(code, subtype);
407             }
408         }
409         System.out.println("Skipping non-Unicode scripts: " + Joiner.on(' ').join(skippedScripts));
410     }
411 
412     static final Set<String> VARIANT_EXTRAS = ImmutableSet.of("POSIX", "REVISED", "SAAHO");
413 }
414