xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/StandardCodes.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 /*
2  **********************************************************************
3  * Copyright (c) 2002-2011, International Business Machines
4  * Corporation and others.  All Rights Reserved.
5  **********************************************************************
6  * Author: Mark Davis
7  **********************************************************************
8  */
9 package org.unicode.cldr.util;
10 
11 import com.ibm.icu.impl.Relation;
12 import com.ibm.icu.lang.UCharacter;
13 import com.ibm.icu.text.UnicodeSet;
14 import com.ibm.icu.util.Output;
15 import java.io.BufferedReader;
16 import java.util.ArrayList;
17 import java.util.Arrays;
18 import java.util.Collections;
19 import java.util.Comparator;
20 import java.util.EnumMap;
21 import java.util.EnumSet;
22 import java.util.HashMap;
23 import java.util.HashSet;
24 import java.util.Iterator;
25 import java.util.LinkedHashMap;
26 import java.util.LinkedHashSet;
27 import java.util.List;
28 import java.util.Locale;
29 import java.util.Map;
30 import java.util.Map.Entry;
31 import java.util.Set;
32 import java.util.TreeMap;
33 import java.util.TreeSet;
34 import java.util.regex.Pattern;
35 import org.unicode.cldr.draft.ScriptMetadata;
36 import org.unicode.cldr.draft.ScriptMetadata.IdUsage;
37 import org.unicode.cldr.util.Iso639Data.Type;
38 import org.unicode.cldr.util.ZoneParser.ZoneLine;
39 
40 /** Provides access to various codes used by CLDR: RFC 3066, ISO 4217, Olson tzids */
41 public class StandardCodes {
42 
43     /**
44      * Convenient for testing whether a locale is at least at Basic level
45      *
46      * @param locale
47      * @return
48      */
isLocaleAtLeastBasic(String locale)49     public static boolean isLocaleAtLeastBasic(String locale) {
50         return CalculatedCoverageLevels.getInstance().isLocaleAtLeastBasic(locale);
51     }
52 
53     public enum CodeType {
54         language,
55         script,
56         territory,
57         extlang,
58         legacy,
59         redundant,
60         variant,
61         currency,
62         tzid;
63 
from(String name)64         public static CodeType from(String name) {
65             if ("region".equals(name)) {
66                 return territory;
67             }
68             return CodeType.valueOf(name);
69         }
70     }
71 
72     private static final Set<CodeType> TypeSet =
73             Collections.unmodifiableSet(EnumSet.allOf(CodeType.class));
74 
75     private static final Set<String> TypeStringSet;
76 
77     static {
78         LinkedHashSet<String> foo = new LinkedHashSet<>();
79         for (CodeType x : CodeType.values()) {
x.toString()80             foo.add(x.toString());
81         }
82         TypeStringSet = Collections.unmodifiableSet(foo);
83     }
84 
85     public static final String DESCRIPTION_SEPARATOR = "\u25AA";
86 
87     public static final String NO_COUNTRY = "001";
88 
89     private EnumMap<CodeType, Map<String, List<String>>> type_code_data =
90             new EnumMap<>(CodeType.class);
91 
92     private EnumMap<CodeType, Map<String, List<String>>> type_name_codes =
93             new EnumMap<>(CodeType.class);
94 
95     private EnumMap<CodeType, Map<String, String>> type_code_preferred =
96             new EnumMap<>(CodeType.class);
97 
98     private Map<String, Set<String>> country_modernCurrency = new TreeMap<>();
99 
100     private Map<CodeType, Set<String>> goodCodes = new TreeMap<>();
101 
102     private static final boolean DEBUG = false;
103 
104     private static final class StandardCodesHelper {
105         static final StandardCodes SINGLETON = new StandardCodes();
106     }
107     /** Get the singleton copy of the standard codes. */
make()108     public static synchronized StandardCodes make() {
109         return StandardCodesHelper.SINGLETON;
110     }
111 
112     /**
113      * The data is the name in the case of RFC3066 codes, and the country code in the case of TZIDs
114      * and ISO currency codes. If the country code is missing, uses ZZ.
115      */
getData(String type, String code)116     public String getData(String type, String code) {
117         Map<String, List<String>> code_data = getCodeData(type);
118         if (code_data == null) return null;
119         List<String> list = code_data.get(code);
120         if (list == null) return null;
121         return list.get(0);
122     }
123 
124     /**
125      * @return the full data for the type and code For the data in lstreg, it is description | date
126      *     | canonical_value | recommended_prefix # comments
127      */
getFullData(String type, String code)128     public List<String> getFullData(String type, String code) {
129         Map<String, List<String>> code_data = getCodeData(type);
130         if (code_data == null) return null;
131         return code_data.get(code);
132     }
133 
134     /**
135      * @return the full data for the type and code For the data in lstreg, it is description | date
136      *     | canonical_value | recommended_prefix # comments
137      */
getFullData(CodeType type, String code)138     public List<String> getFullData(CodeType type, String code) {
139         Map<String, List<String>> code_data = type_code_data.get(type);
140         if (code_data == null) return null;
141         return code_data.get(code);
142     }
143 
getCodeData(String type)144     private Map<String, List<String>> getCodeData(String type) {
145         return getCodeData(CodeType.from(type));
146     }
147 
getCodeData(CodeType type)148     private Map<String, List<String>> getCodeData(CodeType type) {
149         return type_code_data.get(type);
150     }
151 
getCodes(CodeType type)152     public Set<String> getCodes(CodeType type) {
153         return type_code_data.get(type).keySet();
154     }
155 
156     /**
157      * Get at the language registry values, as a Map from label to value.
158      *
159      * @param type
160      * @param code
161      * @return
162      */
getLangData(String type, String code)163     public Map<String, String> getLangData(String type, String code) {
164         try {
165             if (type.equals("territory")) type = "region";
166             else if (type.equals("variant")) code = code.toLowerCase(Locale.ENGLISH);
167             return (Map) ((Map) getLStreg().get(type)).get(code);
168         } catch (RuntimeException e) {
169             return null;
170         }
171     }
172 
173     /** Return a replacement code, if available. If not, return null. */
getReplacement(String type, String code)174     public String getReplacement(String type, String code) {
175         if (type.equals("currency")) return null; // no replacement codes for currencies
176         List<String> data = getFullData(type, code);
177         if (data == null) return null;
178         // if available, the replacement is a non-empty value other than --, in
179         // position 2.
180         if (data.size() < 3) return null;
181         String replacement = data.get(2);
182         if (!replacement.equals("") && !replacement.equals("--")) return replacement;
183         return null;
184     }
185 
186     /**
187      * Return the list of codes that have the same data. For example, returns all currency codes for
188      * a country. If there is a preferred one, it is first.
189      *
190      * @param type
191      * @param data
192      * @return
193      */
194     @Deprecated
getCodes(String type, String data)195     public List<String> getCodes(String type, String data) {
196         return getCodes(CodeType.from(type), data);
197     }
198 
199     /**
200      * Return the list of codes that have the same data. For example, returns all currency codes for
201      * a country. If there is a preferred one, it is first.
202      */
getCodes(CodeType type, String data)203     public List<String> getCodes(CodeType type, String data) {
204         Map<String, List<String>> data_codes = type_name_codes.get(type);
205         if (data_codes == null) return null;
206         return Collections.unmodifiableList(data_codes.get(data));
207     }
208 
209     /** Where there is a preferred code, return it. */
210     @Deprecated
getPreferred(String type, String code)211     public String getPreferred(String type, String code) {
212         return getPreferred(CodeType.from(type), code);
213     }
214 
215     /** Where there is a preferred code, return it. */
getPreferred(CodeType type, String code)216     public String getPreferred(CodeType type, String code) {
217         Map<String, String> code_preferred = type_code_preferred.get(type);
218         if (code_preferred == null) return code;
219         String newCode = code_preferred.get(code);
220         if (newCode == null) return code;
221         return newCode;
222     }
223 
224     /** Get all the available types */
getAvailableTypes()225     public Set<String> getAvailableTypes() {
226         return TypeStringSet;
227     }
228 
229     /** Get all the available types */
getAvailableTypesEnum()230     public Set<CodeType> getAvailableTypesEnum() {
231         return TypeSet;
232     }
233 
234     /**
235      * Get all the available codes for a given type
236      *
237      * @param type
238      * @return
239      */
getAvailableCodes(String type)240     public Set<String> getAvailableCodes(String type) {
241         return getAvailableCodes(CodeType.from(type));
242     }
243 
244     /**
245      * Get all the available codes for a given type
246      *
247      * @param type
248      * @return
249      */
getAvailableCodes(CodeType type)250     public Set<String> getAvailableCodes(CodeType type) {
251         Map<String, List<String>> code_name = type_code_data.get(type);
252         return Collections.unmodifiableSet(code_name.keySet());
253     }
254 
getGoodAvailableCodes(String stringType)255     public Set<String> getGoodAvailableCodes(String stringType) {
256         return getGoodAvailableCodes(CodeType.from(stringType));
257     }
258 
259     /**
260      * Get all the available "real" codes for a given type, excluding private use, but including
261      * some deprecated codes. Use SupplementalDataInfo getLocaleAliases to exclude others.
262      *
263      * @param type
264      * @return
265      */
getGoodAvailableCodes(CodeType type)266     public Set<String> getGoodAvailableCodes(CodeType type) {
267         Set<String> result = goodCodes.get(type);
268         if (result == null) {
269             synchronized (goodCodes) {
270                 Map<String, List<String>> code_name = getCodeData(type);
271                 SupplementalDataInfo sd = SupplementalDataInfo.getInstance();
272                 if (code_name == null) return null;
273                 result = new TreeSet<>(code_name.keySet());
274                 switch (type) {
275                     case currency:
276                         break; // nothing special
277                     case language:
278                         return sd.getCLDRLanguageCodes();
279                     case script:
280                         return sd.getCLDRScriptCodes();
281                     case tzid:
282                         return sd.getCLDRTimezoneCodes();
283                     default:
284                         for (Iterator<String> it = result.iterator(); it.hasNext(); ) {
285                             String code = it.next();
286                             if (code.equals(LocaleNames.ROOT) || code.equals("QO")) continue;
287                             List<String> data = getFullData(type, code);
288                             if (data.size() < 3) {
289                                 if (DEBUG) System.out.println(code + "\t" + data);
290                             }
291                             if ("PRIVATE USE".equalsIgnoreCase(data.get(0))
292                                     || (!data.get(2).equals("") && !data.get(2).equals("--"))) {
293                                 // System.out.println("Removing: " + code);
294                                 it.remove();
295                             }
296                         }
297                 }
298                 result = Collections.unmodifiableSet(result);
299                 goodCodes.put(type, result);
300             }
301         }
302         return result;
303     }
304 
305     private static Set<String> GOOD_COUNTRIES;
306 
getGoodCountries()307     public Set<String> getGoodCountries() {
308         synchronized (goodCodes) {
309             if (GOOD_COUNTRIES == null) {
310                 Set<String> temp = new LinkedHashSet<>();
311                 for (String s : getGoodAvailableCodes(CodeType.territory)) {
312                     if (isCountry(s)) {
313                         temp.add(s);
314                     }
315                 }
316                 GOOD_COUNTRIES = Collections.unmodifiableSet(temp);
317             }
318         }
319         return GOOD_COUNTRIES;
320     }
321 
322     /** Gets the modern currency. */
getMainCurrencies(String countryCode)323     public Set<String> getMainCurrencies(String countryCode) {
324         return country_modernCurrency.get(countryCode);
325     }
326 
327     //    /**
328     //     * Get rid of this
329     //     *
330     //     * @param type
331     //     * @return
332     //     * @throws IOException
333     //     * @deprecated
334     //     */
335     //    public String getEffectiveLocaleType(String type) throws IOException {
336     //        if ((type != null) &&
337     // (getLocaleCoverageOrganizations().contains(Organization.valueOf(type)))) {
338     //            return type;
339     //        } else {
340     //            return null; // the default.. for now..
341     //        }
342     //    }
343 
344     static Comparator caseless =
345             new Comparator() {
346 
347                 @Override
348                 public int compare(Object arg0, Object arg1) {
349                     String s1 = (String) arg0;
350                     String s2 = (String) arg1;
351                     return s1.compareToIgnoreCase(s2);
352                 }
353             };
354 
355     /** Used for Locales.txt to mean "all" */
356     public static final String ALL_LOCALES = "*";
357 
358     /**
359      * Returns locales according to status. It returns a Map of Maps, key 1 is either IBM or Java
360      * (perhaps more later), key 2 is the Level.
361      *
362      * @deprecated
363      */
364     @Deprecated
getLocaleTypes()365     public Map<Organization, Map<String, Level>> getLocaleTypes() {
366         synchronized (StandardCodes.class) {
367             return loadPlatformLocaleStatus().platform_locale_level;
368         }
369     }
370 
371     /**
372      * Return map of locales to levels
373      *
374      * @param org
375      * @return
376      */
getLocaleToLevel(Organization org)377     public Map<String, Level> getLocaleToLevel(Organization org) {
378         return getLocaleTypes().get(org);
379     }
380 
381     /** returns the highest level in the hierarchy, not including root. */
getHighestLocaleCoverageLevel(String organization, String locale)382     public Level getHighestLocaleCoverageLevel(String organization, String locale) {
383         // first get parent
384         final String parentId = LocaleIDParser.getParent(locale);
385         Level parentLevel = Level.UNDETERMINED;
386         if (parentId != null && !parentId.equals("root")) {
387             parentLevel = getHighestLocaleCoverageLevel(organization, parentId); // recurse
388         }
389         final Level ourLevel = getLocaleCoverageLevel(organization, locale);
390         if (parentLevel.getLevel() > ourLevel.getLevel()) {
391             // if parentLevel is higher
392             return parentLevel;
393         } else {
394             return ourLevel;
395         }
396     }
397 
getLocaleCoverageLevel(String organization, String desiredLocale)398     public Level getLocaleCoverageLevel(String organization, String desiredLocale) {
399         return getLocaleCoverageLevel(Organization.fromString(organization), desiredLocale);
400     }
401 
getLocaleCoverageLevel(Organization organization, String desiredLocale)402     public Level getLocaleCoverageLevel(Organization organization, String desiredLocale) {
403         return getLocaleCoverageLevel(
404                 organization, desiredLocale, new Output<LocaleCoverageType>());
405     }
406 
407     public enum LocaleCoverageType {
408         explicit,
409         parent,
410         star,
411         undetermined
412     }
413 
414     /**
415      * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if
416      * information is missing. A locale of "*" in the data means "everything else".
417      */
getLocaleCoverageLevel( Organization organization, String desiredLocale, Output<LocaleCoverageType> coverageType)418     public Level getLocaleCoverageLevel(
419             Organization organization,
420             String desiredLocale,
421             Output<LocaleCoverageType> coverageType) {
422         coverageType.value = LocaleCoverageType.undetermined;
423         if (organization == null) {
424             return Level.UNDETERMINED;
425         }
426         Map<String, Level> locale_status =
427                 loadPlatformLocaleStatus().platform_locale_level.get(organization);
428         if (locale_status == null) {
429             return Level.UNDETERMINED;
430         }
431         // see if there is a parent
432         String originalLocale = desiredLocale;
433         while (desiredLocale != null) {
434             Level status = locale_status.get(desiredLocale);
435             if (status != null && status != Level.UNDETERMINED) {
436                 coverageType.value =
437                         originalLocale == desiredLocale
438                                 ? LocaleCoverageType.explicit
439                                 : LocaleCoverageType.parent;
440                 return status;
441             }
442             desiredLocale = LocaleIDParser.getParent(desiredLocale);
443         }
444         Level status = locale_status.get(ALL_LOCALES);
445         if (status != null && status != Level.UNDETERMINED) {
446             coverageType.value = LocaleCoverageType.star;
447             return status;
448         }
449         return Level.UNDETERMINED;
450     }
451 
452     /**
453      * Returns coverage level of locale according to organization. Returns Level.UNDETERMINED if
454      * information is missing.
455      */
getDefaultLocaleCoverageLevel(Organization organization)456     public Level getDefaultLocaleCoverageLevel(Organization organization) {
457         return getLocaleCoverageLevel(organization, ALL_LOCALES);
458     }
459 
getLocaleCoverageOrganizations()460     public Set<Organization> getLocaleCoverageOrganizations() {
461         return loadPlatformLocaleStatus().platform_locale_level.keySet();
462     }
463 
getLocaleCoverageOrganizationStrings()464     public Set<String> getLocaleCoverageOrganizationStrings() {
465         return loadPlatformLocaleStatus().platform_locale_levelString.keySet();
466     }
467 
getLocaleCoverageLocales(String organization)468     public Set<String> getLocaleCoverageLocales(String organization) {
469         return getLocaleCoverageLocales(Organization.fromString(organization));
470     }
471 
getLocaleCoverageLocales(Organization organization)472     public Set<String> getLocaleCoverageLocales(Organization organization) {
473         return loadPlatformLocaleStatus().platform_locale_level.get(organization).keySet();
474     }
475 
getLocalesToLevelsFor(Organization organization)476     public Map<String, Level> getLocalesToLevelsFor(Organization organization) {
477         return loadPlatformLocaleStatus().platform_locale_level.get(organization);
478     }
479 
getLevelsToLocalesFor(Organization organization)480     public Relation<Level, String> getLevelsToLocalesFor(Organization organization) {
481         return loadPlatformLocaleStatus().platform_level_locale.get(organization);
482     }
483 
getLocaleCoverageLocales(Organization organization, Set<Level> choice)484     public Set<String> getLocaleCoverageLocales(Organization organization, Set<Level> choice) {
485         Set<String> result = new LinkedHashSet<>();
486         for (String locale : getLocaleCoverageLocales(organization)) {
487             if (choice.contains(getLocaleCoverageLevel(organization, locale))) {
488                 result.add(locale);
489             }
490         }
491         return result;
492     }
493 
494     /**
495      * "The target coverage level is set to: - The CLDR Org coverage level if it exists, - Otherise,
496      * the maximum of all the coverage levels for that locale across all Organizations (max Modern)
497      * in Locales.txt, if there is at least one. - Otherwise Basic. - That makes the number the same
498      * for all Organizations, which makes communicating the values less prone to misinterpretation,
499      * and gives all the vetters and managers a common metric for that locale.
500      */
getTargetCoverageLevel(String localeId)501     public Level getTargetCoverageLevel(String localeId) {
502         Level level;
503 
504         // First, try CLDR locale
505         level = getLocaleCoverageLevel(Organization.cldr, localeId);
506         if (level != Level.UNDETERMINED) {
507             return level;
508         }
509 
510         // Next, Find maximum coverage level
511         for (final Organization o : Organization.values()) {
512             if (o == Organization.cldr
513                     || // Already handled, above
514                     o == Organization.unaffiliated
515                     || o == Organization.surveytool) {
516                 continue; // Skip some 'special' orgs
517             }
518             final Output<StandardCodes.LocaleCoverageType> outputType = new Output<>();
519             final Level orgLevel = getLocaleCoverageLevel(o, localeId, outputType);
520             if (outputType.value == StandardCodes.LocaleCoverageType.undetermined
521                     || outputType.value == StandardCodes.LocaleCoverageType.star) {
522                 // Skip undetermined or star
523                 continue;
524             }
525             // Pin the level to MODERN
526             final Level pinnedOrgLevel = Level.min(Level.MODERN, orgLevel);
527             // Accumulate the maxiumum org level (up to MODERN)
528             level = Level.max(level, pinnedOrgLevel);
529         }
530         if (level != Level.UNDETERMINED) {
531             return level;
532         }
533 
534         // Otherwise, BASIC
535         level = Level.BASIC;
536         return level;
537     }
538 
539     private static final class LocalesTxtHelper {
540         static LocalesTxtHelper SINGLETON = new LocalesTxtHelper();
541 
542         public LocalesTxtReader reader;
543 
LocalesTxtHelper()544         LocalesTxtHelper() {
545             reader = new LocalesTxtReader().read(StandardCodes.make()); // circular dependency
546         }
547     }
548 
549     /**
550      * Get the 'platform locale status' (aka Locales.txt) Note, do not call this from the
551      * StandardCodes constructor!
552      *
553      * @return
554      */
loadPlatformLocaleStatus()555     private LocalesTxtReader loadPlatformLocaleStatus() {
556         return LocalesTxtHelper.SINGLETON.reader;
557     }
558 
validate(LocaleIDParser parser)559     String validate(LocaleIDParser parser) {
560         String message = "";
561         String lang = parser.getLanguage();
562         if (lang.length() == 0) {
563             message += ", Missing language";
564         } else if (!getAvailableCodes("language").contains(lang)) {
565             message += ", Invalid language code: " + lang;
566         }
567         String script = parser.getScript();
568         if (script.length() != 0 && !getAvailableCodes("script").contains(script)) {
569             message += ", Invalid script code: " + script;
570         }
571         String territory = parser.getRegion();
572         if (territory.length() != 0 && !getAvailableCodes("territory").contains(territory)) {
573             message += ", Invalid territory code: " + lang;
574         }
575         return message.length() == 0 ? message : message.substring(2);
576     }
577 
578     /**
579      * Ascertain that the given locale in in the given group specified by the organization
580      *
581      * @param locale
582      * @param group
583      * @param org
584      * @return boolean
585      */
isLocaleInGroup(String locale, String group, Organization org)586     public boolean isLocaleInGroup(String locale, String group, Organization org) {
587         return group.equals(getGroup(locale, org));
588     }
589 
isLocaleInGroup(String locale, String group, String org)590     public boolean isLocaleInGroup(String locale, String group, String org) {
591         return isLocaleInGroup(locale, group, Organization.fromString(org));
592     }
593 
getGroup(String locale, String org)594     public String getGroup(String locale, String org) {
595         return getGroup(locale, Organization.fromString(org));
596     }
597 
598     /**
599      * Gets the coverage group given a locale and org
600      *
601      * @param locale
602      * @param org
603      * @return group if availble, null if not
604      */
getGroup(String locale, Organization org)605     private String getGroup(String locale, Organization org) {
606         Level l = getLocaleCoverageLevel(org, locale);
607         if (l.equals(Level.UNDETERMINED)) {
608             return null;
609         } else {
610             return l.toString();
611         }
612     }
613 
614     // ========== PRIVATES ==========
615 
StandardCodes()616     private StandardCodes() {
617         String[] files = {"ISO4217.txt"}; // , "TZID.txt"
618         type_code_preferred.put(CodeType.tzid, new TreeMap<String, String>());
619         add(CodeType.language, "root", "Root");
620         String originalLine = null;
621         for (int fileIndex = 0; fileIndex < files.length; ++fileIndex) {
622             try {
623                 BufferedReader lstreg = CldrUtility.getUTF8Data(files[fileIndex]);
624                 while (true) {
625                     String line = originalLine = lstreg.readLine();
626                     if (line == null) break;
627                     if (line.startsWith("\uFEFF")) {
628                         line = line.substring(1);
629                     }
630                     line = line.trim();
631                     int commentPos = line.indexOf('#');
632                     String comment = "";
633                     if (commentPos >= 0) {
634                         comment = line.substring(commentPos + 1).trim();
635                         line = line.substring(0, commentPos);
636                     }
637                     if (line.length() == 0) continue;
638                     List<String> pieces =
639                             CldrUtility.splitList(line, '|', true, new ArrayList<String>());
640                     CodeType type = CodeType.from(pieces.get(0));
641                     pieces.remove(0);
642 
643                     String code = pieces.get(0);
644                     pieces.remove(0);
645                     if (type.equals("date")) {
646                         continue;
647                     }
648 
649                     String oldName = pieces.get(0);
650                     int pos = oldName.indexOf(';');
651                     if (pos >= 0) {
652                         oldName = oldName.substring(0, pos).trim();
653                         pieces.set(0, oldName);
654                     }
655 
656                     List<String> data = pieces;
657                     if (comment.indexOf("deprecated") >= 0) {
658                         // System.out.println(originalLine);
659                         if (data.get(2).toString().length() == 0) {
660                             data.set(2, "--");
661                         }
662                     }
663                     if (oldName.equalsIgnoreCase("PRIVATE USE")) {
664                         int separatorPos = code.indexOf("..");
665                         if (separatorPos < 0) {
666                             add(type, code, data);
667                         } else {
668                             String current = code.substring(0, separatorPos);
669                             String end = code.substring(separatorPos + 2);
670                             // System.out.println(">>" + code + "\t" + current + "\t" + end);
671                             for (; current.compareTo(end) <= 0; current = nextAlpha(current)) {
672                                 // System.out.println(">" + current);
673                                 add(type, current, data);
674                             }
675                         }
676                         continue;
677                     }
678                     if (!type.equals("tzid")) {
679                         add(type, code, data);
680                         if (type.equals("currency")) {
681                             // currency | TPE | Timor Escudo | TP | EAST TIMOR | O
682                             if (data.get(3).equals("C")) {
683                                 String country = data.get(1);
684                                 Set<String> codes = country_modernCurrency.get(country);
685                                 if (codes == null) {
686                                     country_modernCurrency.put(country, codes = new TreeSet<>());
687                                 }
688                                 codes.add(code);
689                             }
690                         }
691                         continue;
692                     }
693                     // type = tzid
694                     // List codes = (List) Utility.splitList(code, ',', true, new
695                     // ArrayList());
696                     String preferred = null;
697                     for (int i = 0; i < pieces.size(); ++i) {
698                         code = pieces.get(i);
699                         add(type, code, data);
700                         if (preferred == null) preferred = code;
701                         else {
702                             Map<String, String> code_preferred = type_code_preferred.get(type);
703                             code_preferred.put(code, preferred);
704                         }
705                     }
706                 }
707                 lstreg.close();
708             } catch (Exception e) {
709                 System.err.println(
710                         "WARNING: "
711                                 + files[fileIndex]
712                                 + " may be a corrupted UTF-8 file. Please check.");
713                 throw (IllegalArgumentException)
714                         new IllegalArgumentException(
715                                         "Can't read " + files[fileIndex] + "\t" + originalLine)
716                                 .initCause(e);
717             }
718             country_modernCurrency = CldrUtility.protectCollection(country_modernCurrency);
719         }
720 
721         // data is: description | date | canonical_value | recommended_prefix #
722         // comments
723         // HACK, just rework
724 
725         Map<String, Map<String, Map<String, String>>> languageRegistry = getLStreg();
726         // languageRegistry = CldrUtility.protectCollection(languageRegistry);
727 
728         for (String type : languageRegistry.keySet()) {
729             CodeType type2 = CodeType.from(type);
730             Map<String, Map<String, String>> m = languageRegistry.get(type);
731             for (String code : m.keySet()) {
732                 Map<String, String> mm = m.get(code);
733                 List<String> data = new ArrayList<>(0);
734                 data.add(mm.get("Description"));
735                 data.add(mm.get("Added"));
736                 String pref = mm.get("Preferred-Value");
737                 if (pref == null) {
738                     pref = mm.get("Deprecated");
739                     if (pref == null) pref = "";
740                     else pref = "deprecated";
741                 }
742                 data.add(pref);
743                 if (type.equals("variant")) {
744                     code = code.toUpperCase();
745                 }
746                 // data.add(mm.get("Recommended_Prefix"));
747                 // {"region", "BQ", "Description", "British Antarctic Territory",
748                 // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"},
749                 add(type2, code, data);
750             }
751         }
752 
753         Map<String, List<String>> m = getZoneData();
754         for (Iterator<String> it = m.keySet().iterator(); it.hasNext(); ) {
755             String code = it.next();
756             add(CodeType.tzid, code, m.get(code).toString());
757         }
758     }
759 
760     /**
761      * @param current
762      * @return
763      */
nextAlpha(String current)764     private static String nextAlpha(String current) {
765         // Don't care that this is inefficient
766         int value = 0;
767         for (int i = 0; i < current.length(); ++i) {
768             char c = current.charAt(i);
769             c -= c < 'a' ? 'A' : 'a';
770             value = value * 26 + c;
771         }
772         value += 1;
773         String result = "";
774         for (int i = 0; i < current.length(); ++i) {
775             result = (char) ((value % 26) + 'A') + result;
776             value = value / 26;
777         }
778         if (UCharacter.toLowerCase(current).equals(current)) {
779             result = UCharacter.toLowerCase(result);
780         } else if (UCharacter.toUpperCase(current).equals(current)) {
781             // do nothing
782         } else {
783             result = UCharacter.toTitleCase(result, null);
784         }
785         return result;
786     }
787 
788     /**
789      * @param type
790      * @param string2
791      * @param string3
792      */
793     private void add(CodeType type, String string2, String string3) {
794         List<String> l = new ArrayList<>();
795         l.add(string3);
796         add(type, string2, l);
797     }
798 
799     private void add(CodeType type, String code, List<String> otherData) {
800         // hack
801         if (type == CodeType.script) {
802             if (code.equals("Qaai")) {
803                 otherData = new ArrayList<>(otherData);
804                 otherData.set(0, "Inherited");
805             } else if (code.equals("Zyyy")) {
806                 otherData = new ArrayList<>(otherData);
807                 otherData.set(0, "Common");
808             }
809         }
810 
811         // assume name is the first item
812 
813         String name = otherData.get(0);
814 
815         // add to main list
816         Map<String, List<String>> code_data = getCodeData(type);
817         if (code_data == null) {
818             code_data = new TreeMap<>();
819             type_code_data.put(type, code_data);
820         }
821         List<String> lastData = code_data.get(code);
822         if (lastData != null) {
823             lastData.addAll(otherData);
824         } else {
825             code_data.put(code, otherData);
826         }
827 
828         // now add mapping from name to codes
829         Map<String, List<String>> name_codes = type_name_codes.get(type);
830         if (name_codes == null) {
831             name_codes = new TreeMap<>();
832             type_name_codes.put(type, name_codes);
833         }
834         List<String> codes = name_codes.get(name);
835         if (codes == null) {
836             codes = new ArrayList<>();
837             name_codes.put(name, codes);
838         }
839         codes.add(code);
840     }
841 
842     private Map<String, List<String>> WorldBankInfo;
843 
844     public Map<String, List<String>> getWorldBankInfo() {
845         if (WorldBankInfo == null) {
846             List<String> temp = fillFromCommaFile("WorldBankInfo.txt", false);
847             WorldBankInfo = new HashMap<>();
848             for (String line : temp) {
849                 List<String> row = CldrUtility.splitList(line, ';', true);
850                 String key = row.get(0);
851                 row.remove(0);
852                 WorldBankInfo.put(key, row);
853             }
854             WorldBankInfo = CldrUtility.protectCollection(WorldBankInfo);
855         }
856         return WorldBankInfo;
857     }
858 
859     Set<String> moribundLanguages;
860 
861     public Set<String> getMoribundLanguages() {
862         if (moribundLanguages == null) {
863             List<String> temp = fillFromCommaFile("moribund_languages.txt", true);
864             moribundLanguages = new TreeSet<>();
865             moribundLanguages.addAll(temp);
866             moribundLanguages = CldrUtility.protectCollection(moribundLanguages);
867         }
868         return moribundLanguages;
869     }
870 
871     // produces a list of the 'clean' lines
872     private List<String> fillFromCommaFile(String filename, boolean trim) {
873         try {
874             List<String> result = new ArrayList<>();
875             String line;
876             BufferedReader lstreg = CldrUtility.getUTF8Data(filename);
877             while (true) {
878                 line = lstreg.readLine();
879                 if (line == null) break;
880                 int commentPos = line.indexOf('#');
881                 if (commentPos >= 0) {
882                     line = line.substring(0, commentPos);
883                 }
884                 if (trim) {
885                     line = line.trim();
886                 }
887                 if (line.length() == 0) continue;
888                 result.add(line);
889             }
890             return result;
891         } catch (Exception e) {
892             throw (RuntimeException)
893                     new IllegalArgumentException("Can't process file: data/" + filename)
894                             .initCause(e);
895         }
896     }
897 
898     // return a complex map. language -> arn -> {"Comments" -> "x",
899     // "Description->y,...}
900     static String[][] extras = {
901         {"language", "root", "Description", "Root", "CLDR", "True"},
902         // { "language", "cch", "Description", "Atsam", "CLDR", "True" },
903         // { "language", "kaj", "Description", "Jju", "CLDR", "True" },
904         // { "language", "kcg", "Description", "Tyap", "CLDR", "True" },
905         // { "language", "kfo", "Description", "Koro", "CLDR", "True" },
906         // { "language", "mfe", "Description", "Morisyen", "CLDR", "True" },
907         // { "region", "172", "Description", "Commonwealth of Independent States", "CLDR", "True" },
908         // { "region", "062", "Description", "South-Central Asia", "CLDR", "True" },
909         // { "region", "003", "Description", "North America", "CLDR", "True" },
910         //        { "variant", "POLYTONI", "Description", "Polytonic Greek", "CLDR", "True",
911         // "Preferred-Value", "POLYTON" },
912         {"variant", "REVISED", "Description", "Revised Orthography", "CLDR", "True"},
913         {"variant", "SAAHO", "Description", "Dialect", "CLDR", "True"},
914         {"variant", "POSIX", "Description", "Computer-Style", "CLDR", "True"},
915         // {"region", "172", "Description", "Commonwealth of Independent States",
916         // "CLDR", "True"},
917         // { "region", "", "Description", "European Union", "CLDR", "True" },
918         {"region", "ZZ", "Description", "Unknown or Invalid Region", "CLDR", "True"},
919         {"region", "QO", "Description", "Outlying Oceania", "CLDR", "True"},
920         {"region", "XK", "Description", "Kosovo", "CLDR", "True"},
921         {"script", "Qaai", "Description", "Inherited", "CLDR", "True"},
922         // {"region", "003", "Description", "North America", "CLDR", "True"},
923         // {"region", "062", "Description", "South-central Asia", "CLDR", "True"},
924         // {"region", "200", "Description", "Czechoslovakia", "CLDR", "True"},
925         // {"region", "830", "Description", "Channel Islands", "CLDR", "True"},
926         // {"region", "833", "Description", "Isle of Man", "CLDR", "True"},
927 
928         // {"region", "NT", "Description", "Neutral Zone (formerly between Saudi
929         // Arabia & Iraq)", "CLDR", "True", "Deprecated", "True"},
930         // {"region", "SU", "Description", "Union of Soviet Socialist Republics",
931         // "CLDR", "True", "Deprecated", "True"},
932         // {"region", "BQ", "Description", "British Antarctic Territory",
933         // "Preferred-Value", "AQ", "CLDR", "True", "Deprecated", "True"},
934         // {"region", "CT", "Description", "Canton and Enderbury Islands",
935         // "Preferred-Value", "KI", "CLDR", "True", "Deprecated", "True"},
936         // {"region", "FQ", "Description", "French Southern and Antarctic Territories
937         // (now split between AQ and TF)", "CLDR", "True", "Deprecated", "True"},
938         // {"region", "JT", "Description", "Johnston Island", "Preferred-Value", "UM",
939         // "CLDR", "True", "Deprecated", "True"},
940         // {"region", "MI", "Description", "Midway Islands", "Preferred-Value", "UM",
941         // "CLDR", "True", "Deprecated", "True"},
942         // {"region", "NQ", "Description", "Dronning Maud Land", "Preferred-Value",
943         // "AQ", "CLDR", "True", "Deprecated", "True"},
944         // {"region", "PC", "Description", "Pacific Islands Trust Territory (divided
945         // into FM, MH, MP, and PW)", "Preferred-Value", "AQ", "CLDR", "True",
946         // "Deprecated", "True"},
947         // {"region", "PU", "Description", "U.S. Miscellaneous Pacific Islands",
948         // "Preferred-Value", "UM", "CLDR", "True", "Deprecated", "True"},
949         // {"region", "PZ", "Description", "Panama Canal Zone", "Preferred-Value",
950         // "PA", "CLDR", "True", "Deprecated", "True"},
951         // {"region", "VD", "Description", "North Vietnam", "Preferred-Value", "VN",
952         // "CLDR", "True", "Deprecated", "True"},
953         // {"region", "WK", "Description", "Wake Island", "Preferred-Value", "UM",
954         // "CLDR", "True", "Deprecated", "True"},
955     };
956 
957     static final String registryName =
958             CldrUtility.getProperty("registry", "language-subtag-registry");
959 
960     public enum LstrType {
961         language(
962                 LocaleNames.UND,
963                 LocaleNames.ZXX,
964                 LocaleNames.MUL,
965                 LocaleNames.MIS,
966                 LocaleNames.ROOT),
967         script("Zzzz", "Zsym", "Zxxx", "Zmth"),
968         region("ZZ"),
969         variant(),
970         extension(),
971         extlang(true, false),
972         legacy(true, false),
973         redundant(true, false),
974         /** specialized codes for validity; TODO: rename LstrType * */
975         currency(false, true, "XXX"),
976         subdivision(false, true),
977         unit(false, true),
978         usage(false, true),
979         zone(false, true);
980 
981         public final Set<String> specials;
982         public final String unknown;
983         public final boolean isLstr;
984         public final boolean isUnicode;
985 
986         private LstrType(String... unknownValue) {
987             this(true, true, unknownValue);
988         }
989 
990         private LstrType(boolean lstr, boolean unicode, String... unknownValue) {
991             unknown = unknownValue.length == 0 ? null : unknownValue[0];
992             LinkedHashSet<String> set = new LinkedHashSet<>(Arrays.asList(unknownValue));
993             if (unknown != null) {
994                 set.remove(unknown);
995             }
996             specials = Collections.unmodifiableSet(set);
997             isLstr = lstr;
998             isUnicode = unicode;
999         }
1000 
1001         //
1002         static final Pattern WELLFORMED = Pattern.compile("([0-9]{3}|[a-zA-Z]{2})[a-zA-Z0-9]{1,4}");
1003 
1004         boolean isWellFormed(String candidate) {
1005             switch (this) {
1006                 case subdivision:
1007                     return WELLFORMED.matcher(candidate).matches();
1008                 default:
1009                     throw new UnsupportedOperationException();
1010             }
1011         }
1012 
1013         /** Generate compatibility string, returning 'territory' instead of 'region', etc. */
1014         public String toCompatString() {
1015             switch (this) {
1016                 case region:
1017                     return "territory";
1018                 case legacy:
1019                     return "language";
1020                 case redundant:
1021                     return "language";
1022                 default:
1023                     return toString();
1024             }
1025         }
1026 
1027         /** Create LstrType from string, allowing the compat string 'territory'. */
1028         public static LstrType fromString(String rawType) {
1029             try {
1030                 return valueOf(rawType);
1031             } catch (IllegalArgumentException e) {
1032                 if ("territory".equals(rawType)) {
1033                     return region;
1034                 }
1035                 throw e;
1036             }
1037         }
1038     }
1039 
1040     public enum LstrField {
1041         Type,
1042         Subtag,
1043         Description,
1044         Added,
1045         Scope,
1046         Tag,
1047         Suppress_Script,
1048         Macrolanguage,
1049         Deprecated,
1050         Preferred_Value,
1051         Comments,
1052         Prefix,
1053         CLDR;
1054 
1055         public static LstrField from(String s) {
1056             return LstrField.valueOf(s.trim().replace("-", "_"));
1057         }
1058     }
1059 
1060     static Map<String, Map<String, Map<String, String>>> LSTREG;
1061     static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_ENUM;
1062     static Map<LstrType, Map<String, Map<LstrField, String>>> LSTREG_RAW;
1063 
1064     /**
1065      * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...
1066      * <br>
1067      * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated
1068      * by DESCRIPTION_SEPARATOR.
1069      *
1070      * @return
1071      */
1072     public static Map<String, Map<String, Map<String, String>>> getLStreg() {
1073         if (LSTREG == null) {
1074             initLstr();
1075         }
1076         return LSTREG;
1077     }
1078 
1079     /**
1080      * Returns a map like {extlang={aao={Added=2009-07-29, Description=Algerian Saharan Arabic, ...
1081      * <br>
1082      * That is, type => subtype => map<tag,value>. Descriptions are concatenated together, separated
1083      * by DESCRIPTION_SEPARATOR.
1084      *
1085      * @return
1086      */
1087     public static Map<LstrType, Map<String, Map<LstrField, String>>> getEnumLstreg() {
1088         if (LSTREG_ENUM == null) {
1089             initLstr();
1090         }
1091         return LSTREG_ENUM;
1092     }
1093 
1094     public static Map<LstrType, Map<String, Map<LstrField, String>>> getLstregEnumRaw() {
1095         if (LSTREG_ENUM == null) {
1096             initLstr();
1097         }
1098         return LSTREG_RAW;
1099     }
1100 
1101     private static void initLstr() {
1102         Map<LstrType, Map<String, Map<LstrField, String>>> result2 = new TreeMap<>();
1103 
1104         int lineNumber = 1;
1105 
1106         Set<String> funnyTags = new TreeSet<>();
1107         String line;
1108         try {
1109             BufferedReader lstreg = CldrUtility.getUTF8Data(registryName);
1110             LstrType lastType = null;
1111             String lastTag = null;
1112             Map<String, Map<LstrField, String>> subtagData = null;
1113             Map<LstrField, String> currentData = null;
1114             LstrField lastLabel = null;
1115             String lastRest = null;
1116             boolean inRealContent = false;
1117             //            Map<String, String> translitCache = new HashMap<String, String>();
1118             for (; ; ++lineNumber) {
1119                 line = lstreg.readLine();
1120                 if (line == null) break;
1121                 if (line.length() == 0) continue; // skip blanks
1122                 if (line.startsWith("File-Date: ")) {
1123                     if (DEBUG) System.out.println("Language Subtag Registry: " + line);
1124                     inRealContent = true;
1125                     continue;
1126                 }
1127                 if (!inRealContent) {
1128                     // skip until we get to real content
1129                     continue;
1130                 }
1131                 // skip cruft
1132                 if (line.startsWith("Internet-Draft")) {
1133                     continue;
1134                 }
1135                 if (line.startsWith("Ewell")) {
1136                     continue;
1137                 }
1138                 if (line.startsWith("\f")) {
1139                     continue;
1140                 }
1141                 if (line.startsWith("4.  Security Considerations")) {
1142                     break;
1143                 }
1144 
1145                 if (line.startsWith("%%"))
1146                     continue; // skip separators (ok, since data starts with Type:
1147                 if (line.startsWith(" ")) {
1148                     currentData.put(lastLabel, lastRest + " " + line.trim());
1149                     continue;
1150                 }
1151 
1152                 /*
1153                  * Type: language Subtag: aa Description: Afar Added: 2005-10-16
1154                  * Suppress-Script: Latn
1155                  */
1156                 int pos2 = line.indexOf(':');
1157                 LstrField label = LstrField.from(line.substring(0, pos2));
1158                 String rest = line.substring(pos2 + 1).trim();
1159                 if (label == LstrField.Type) {
1160                     lastType =
1161                             rest.equals("grandfathered")
1162                                     ? LstrType.legacy
1163                                     : LstrType.fromString(rest);
1164                     subtagData = CldrUtility.get(result2, lastType);
1165                     if (subtagData == null) {
1166                         result2.put(lastType, subtagData = new TreeMap<>());
1167                     }
1168                 } else if (label == LstrField.Subtag || label == LstrField.Tag) {
1169                     lastTag = rest;
1170                     String endTag = null;
1171                     // Subtag: qaa..qtz
1172                     int pos = lastTag.indexOf("..");
1173                     if (pos >= 0) {
1174                         endTag = lastTag.substring(pos + 2);
1175                         lastTag = lastTag.substring(0, pos);
1176                     }
1177                     currentData = new TreeMap<>();
1178                     if (endTag == null) {
1179                         putSubtagData(lastTag, subtagData, currentData);
1180                         languageCount.add(lastType, 1);
1181                         // System.out.println(languageCount.getCount(lastType) + "\t" + lastType +
1182                         // "\t" + lastTag);
1183                     } else {
1184                         for (; lastTag.compareTo(endTag) <= 0; lastTag = nextAlpha(lastTag)) {
1185                             // System.out.println(">" + current);
1186                             putSubtagData(lastTag, subtagData, currentData);
1187                             languageCount.add(lastType, 1);
1188                             // System.out.println(languageCount.getCount(lastType) + "\t" + lastType
1189                             // + "\t" + lastTag);
1190                         }
1191                     }
1192                     // label.equalsIgnoreCase("Added") || label.equalsIgnoreCase("Suppress-Script"))
1193                     // {
1194                     // skip
1195                     // } else if (pieces.length < 2) {
1196                     // System.out.println("Odd Line: " + lastType + "\t" + lastTag + "\t" + line);
1197                 } else {
1198                     lastLabel = label;
1199                     // The following code was removed because in the standard tests (TestAll) both
1200                     // lastRest and rest were always equal.
1201                     //                    if(!translitCache.containsKey(rest)) {
1202                     //                        lastRest =
1203                     // TransliteratorUtilities.fromXML.transliterate(rest);
1204                     //                        translitCache.put(rest, lastRest);
1205                     //                        if (!lastRest.equals(rest)) {
1206                     //                            System.out.println(System.currentTimeMillis()+"
1207                     // initLStr: LastRest: '"+lastRest+"' Rest: '"+rest+"'");
1208                     //                        }
1209                     //                    } else {
1210                     //                        lastRest = translitCache.get(rest);
1211                     //                    }
1212                     lastRest = rest;
1213                     String oldValue = CldrUtility.get(currentData, lastLabel);
1214                     if (oldValue != null) {
1215                         lastRest = oldValue + DESCRIPTION_SEPARATOR + lastRest;
1216                     }
1217                     currentData.put(lastLabel, lastRest);
1218                 }
1219             }
1220         } catch (Exception e) {
1221             throw (RuntimeException)
1222                     new IllegalArgumentException(
1223                                     "Can't process file: data/"
1224                                             + registryName
1225                                             + ";\t at line "
1226                                             + lineNumber)
1227                             .initCause(e);
1228         } finally {
1229             if (!funnyTags.isEmpty()) {
1230                 if (DEBUG) System.out.println("Funny tags: " + funnyTags);
1231             }
1232         }
1233         // copy raw
1234         Map<LstrType, Map<String, Map<LstrField, String>>> rawLstreg = new TreeMap<>();
1235         for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry1 : result2.entrySet()) {
1236             LstrType key1 = entry1.getKey();
1237             TreeMap<String, Map<LstrField, String>> raw1 = new TreeMap<>();
rawLstreg.put(key1, raw1)1238             rawLstreg.put(key1, raw1);
1239             for (Entry<String, Map<LstrField, String>> entry2 : entry1.getValue().entrySet()) {
1240                 String key2 = entry2.getKey();
1241                 final Map<LstrField, String> value2 = entry2.getValue();
1242                 TreeMap<LstrField, String> raw2 = new TreeMap<>();
1243                 raw2.putAll(value2);
raw1.put(key2, raw2)1244                 raw1.put(key2, raw2);
1245             }
1246         }
1247         LSTREG_RAW = CldrUtility.protectCollection(rawLstreg);
1248 
1249         // add extras
1250         for (int i = 0; i < extras.length; ++i) {
1251             Map<String, Map<LstrField, String>> subtagData =
1252                     CldrUtility.get(result2, LstrType.fromString(extras[i][0]));
1253             if (subtagData == null) {
LstrType.fromString(extras[i][0])1254                 result2.put(LstrType.fromString(extras[i][0]), subtagData = new TreeMap<>());
1255             }
1256             Map<LstrField, String> labelData = new TreeMap<>();
1257             for (int j = 2; j < extras[i].length; j += 2) {
LstrField.from(extras[i][j])1258                 labelData.put(LstrField.from(extras[i][j]), extras[i][j + 1]);
1259             }
1260             Map<LstrField, String> old = CldrUtility.get(subtagData, extras[i][1]);
1261             if (old != null) {
1262                 if (!"Private use".equals(CldrUtility.get(old, LstrField.Description))) {
1263                     throw new IllegalArgumentException(
1264                             "REPLACING data for "
1265                                     + extras[i][1]
1266                                     + "\t"
1267                                     + old
1268                                     + "\twith"
1269                                     + labelData);
1270                 }
1271             }
1272             if (false) {
1273                 System.out.println(
1274                         (old != null ? "REPLACING" + "\t" + old : "ADDING")
1275                                 + " data for "
1276                                 + extras[i][1]
1277                                 + "\twith"
1278                                 + labelData);
1279             }
subtagData.put(extras[i][1], labelData)1280             subtagData.put(extras[i][1], labelData);
1281         }
1282         // build compatibility map
1283         Map<String, Map<String, Map<String, String>>> result = new LinkedHashMap<>();
1284         for (Entry<LstrType, Map<String, Map<LstrField, String>>> entry : result2.entrySet()) {
1285             Map<String, Map<String, String>> copy2 = new LinkedHashMap<>();
1286             result.put(entry.getKey().toString(), copy2);
1287             for (Entry<String, Map<LstrField, String>> entry2 : entry.getValue().entrySet()) {
1288                 Map<String, String> copy3 = new LinkedHashMap<>();
entry2.getKey()1289                 copy2.put(entry2.getKey(), copy3);
1290                 for (Entry<LstrField, String> entry3 : entry2.getValue().entrySet()) {
entry3.getValue()1291                     copy3.put(entry3.getKey().toString(), entry3.getValue());
1292                 }
1293             }
1294         }
1295         LSTREG = CldrUtility.protectCollection(result);
1296         LSTREG_ENUM = CldrUtility.protectCollection(result2);
1297     }
1298 
1299     private static <K, K2, V> Map<K2, V> putSubtagData(
1300             K lastTag, Map<K, Map<K2, V>> subtagData, Map<K2, V> currentData) {
1301         Map<K2, V> oldData = subtagData.get(lastTag);
1302         if (oldData != null) {
1303             if (oldData.get("CLDR") != null) {
1304                 System.out.println("overriding: " + lastTag + ", " + oldData);
1305             } else {
1306                 throw new IllegalArgumentException("Duplicate tag: " + lastTag);
1307             }
1308         }
1309         return subtagData.put(lastTag, currentData);
1310     }
1311 
1312     static Counter<LstrType> languageCount = new Counter<>();
1313 
1314     public static Counter<LstrType> getLanguageCount() {
1315         return languageCount;
1316     }
1317 
1318     ZoneParser zoneParser = new ZoneParser();
1319 
1320     // static public final Set<String> MODERN_SCRIPTS = Collections
1321     // .unmodifiableSet(new TreeSet(
1322     // // "Bali " +
1323     // // "Bugi " +
1324     // // "Copt " +
1325     // // "Hano " +
1326     // // "Osma " +
1327     // // "Qaai " +
1328     // // "Sylo " +
1329     // // "Syrc " +
1330     // // "Tagb " +
1331     // // "Tglg " +
1332     // Arrays
1333     // .asList("Hans Hant Jpan Hrkt Kore Arab Armn Bali Beng Bopo Cans Cham Cher Cyrl Deva Ethi Geor
1334     // Grek Gujr Guru Hani Hang Hebr Hira Knda Kana Kali Khmr Laoo Latn Lepc Limb Mlym Mong Mymr
1335     // Talu Nkoo Olck Orya Saur Sinh Tale Taml Telu Thaa Thai Tibt Tfng Vaii Yiii"
1336     // .split("\\s+"))));
1337 
1338     // updated to http://www.unicode.org/reports/tr31/tr31-9.html#Specific_Character_Adjustments
1339 
1340     /**
1341      * @deprecated
1342      */
1343     @Deprecated
1344     public Map<String, List<ZoneLine>> getZone_rules() {
1345         return zoneParser.getZone_rules();
1346     }
1347 
1348     /**
1349      * @deprecated
1350      */
1351     @Deprecated
1352     public Map<String, List<String>> getZoneData() {
1353         return zoneParser.getZoneData();
1354     }
1355 
1356     /**
1357      * @deprecated
1358      */
1359     @Deprecated
1360     public Set<String> getCanonicalTimeZones() {
1361         return zoneParser.getZoneData().keySet();
1362     }
1363 
1364     /**
1365      * @deprecated
1366      */
1367     @Deprecated
1368     public Map<String, Set<String>> getCountryToZoneSet() {
1369         return zoneParser.getCountryToZoneSet();
1370     }
1371 
1372     /**
1373      * @deprecated
1374      */
1375     @Deprecated
1376     public List<String> getDeprecatedZoneIDs() {
1377         return zoneParser.getDeprecatedZoneIDs();
1378     }
1379 
1380     /**
1381      * @deprecated
1382      */
1383     @Deprecated
1384     public Comparator<String> getTZIDComparator() {
1385         return zoneParser.getTZIDComparator();
1386     }
1387 
1388     /**
1389      * @deprecated
1390      */
1391     @Deprecated
1392     public Map<String, Set<String>> getZoneLinkNew_OldSet() {
1393         return zoneParser.getZoneLinkNew_OldSet();
1394     }
1395 
1396     /**
1397      * @deprecated
1398      */
1399     @Deprecated
1400     public Map<String, String> getZoneLinkold_new() {
1401         return zoneParser.getZoneLinkold_new();
1402     }
1403 
1404     /**
1405      * @deprecated
1406      */
1407     @Deprecated
1408     public Map getZoneRuleID_rules() {
1409         return zoneParser.getZoneRuleID_rules();
1410     }
1411 
1412     /**
1413      * @deprecated
1414      */
1415     @Deprecated
1416     public Map<String, String> getZoneToCounty() {
1417         return zoneParser.getZoneToCounty();
1418     }
1419 
1420     /**
1421      * @deprecated
1422      */
1423     @Deprecated
1424     public String getZoneVersion() {
1425         return zoneParser.getVersion();
1426     }
1427 
1428     public static String fixLanguageTag(String languageSubtag) {
1429         if (languageSubtag.equals("mo")) { // fix special cases
1430             return "ro";
1431         }
1432         return languageSubtag;
1433     }
1434 
1435     public boolean isModernLanguage(String languageCode) {
1436         if (getMoribundLanguages().contains(languageCode)) return false;
1437         Type type = Iso639Data.getType(languageCode);
1438         if (type == Type.Living) return true;
1439         if (languageCode.equals("eo")) return true; // exception for Esperanto
1440         // Scope scope = Iso639Data.getScope(languageCode);
1441         // if (scope == Scope.Collection) return false;
1442         return false;
1443     }
1444 
1445     public static boolean isScriptModern(String script) {
1446         ScriptMetadata.Info info = ScriptMetadata.getInfo(script);
1447         if (info == null) {
1448             if (false) throw new IllegalArgumentException("No script metadata for: " + script);
1449             return false;
1450         }
1451         IdUsage idUsage = info.idUsage;
1452         return idUsage != IdUsage.EXCLUSION && idUsage != IdUsage.UNKNOWN;
1453     }
1454 
1455     static final Pattern whitespace = PatternCache.get("\\s+");
1456     static Set<String> filteredCurrencies = null;
1457 
1458     public Set<String> getSurveyToolDisplayCodes(String type) {
1459         return getGoodAvailableCodes(type);
1460     }
1461 
1462     static UnicodeSet COUNTRY = new UnicodeSet("[a-zA-Z]").freeze();
1463 
1464     /**
1465      * Quick check for whether valid country. Not complete: should use Validity
1466      *
1467      * @param territory
1468      * @return
1469      */
1470     public static boolean isCountry(String territory) {
1471         switch (territory) {
1472             case "ZZ":
1473             case "QO":
1474             case "EU":
1475             case "UN":
1476             case "EZ":
1477                 return false;
1478             default:
1479                 return territory.length() == 2 && COUNTRY.containsAll(territory);
1480         }
1481     }
1482 
1483     public boolean isLstregPrivateUse(String type, String code) {
1484         Map<String, String> lStregData = getLStreg().get(type).get(code);
1485         return lStregData.get("Description").equalsIgnoreCase("private use");
1486     }
1487 
1488     public boolean isLstregDeprecated(String type, String code) {
1489         Map<String, String> lStregData = getLStreg().get(type).get(code);
1490         return lStregData.get("Deprecated") != null;
1491     }
1492 
1493     /** get prospective currencies. Only needed for a few tests */
1494     public Set<String> getOncomingCurrencies() {
1495         Set<String> result = new HashSet<>();
1496         for (Entry<String, List<String>> entry : getCodeData(CodeType.currency).entrySet()) {
1497             if (entry.getValue().get(3).equals("P")) {
1498                 result.add(entry.getKey());
1499             }
1500         }
1501         return result;
1502     }
1503 }
1504