xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/test/SubmissionLocales.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.test;
2 
3 import com.google.common.cache.CacheBuilder;
4 import com.google.common.cache.CacheLoader;
5 import com.google.common.cache.LoadingCache;
6 import com.google.common.collect.ImmutableSet;
7 import com.google.common.collect.ImmutableSortedSet;
8 import java.util.Collections;
9 import java.util.EnumSet;
10 import java.util.HashSet;
11 import java.util.Set;
12 import java.util.concurrent.ExecutionException;
13 import java.util.regex.Matcher;
14 import java.util.regex.Pattern;
15 import org.unicode.cldr.util.GrammarInfo;
16 import org.unicode.cldr.util.Level;
17 import org.unicode.cldr.util.Organization;
18 import org.unicode.cldr.util.RegexUtilities;
19 import org.unicode.cldr.util.StandardCodes;
20 import org.unicode.cldr.util.SupplementalDataInfo;
21 import org.unicode.cldr.util.VoterReportStatus;
22 import org.unicode.cldr.util.VoterReportStatus.ReportId;
23 
24 /**
25  * This class manages the Limited Submission process.
26  *
27  * <p>TODO: see https://unicode-org.atlassian.net/browse/CLDR-15230 for TODOs here
28  *
29  * @see CheckCLDR#LIMITED_SUBMISSION
30  */
31 public final class SubmissionLocales {
32     // TODO:  On the use of Locales.txt here, see
33     // https://unicode-org.atlassian.net/browse/CLDR-14838
34     /** This is the 'raw' list from Locales.txt */
35     public static final Set<String> CLDR_LOCALES =
36             StandardCodes.make().getLocaleToLevel(Organization.cldr).keySet();
37 
38     /** This is the 'special' list from Locales.txt */
39     public static final Set<String> SPECIAL_ORG_LOCALES =
40             StandardCodes.make().getLocaleToLevel(Organization.special).keySet();
41 
42     /**
43      * Non-CLDR Locales, but consistently have high level of engagement from volunteers to keep at
44      * modern level. Reevaluate for each release based on meeting 95+% of modern, moderate, and
45      * basic coverage
46      */
47     public static Set<String> HIGH_LEVEL_LOCALES =
48             ImmutableSet.of(
49                     // Note: ALL of these were found in Locales.txt under cldr.
50                     "chr", // Cherokee
51                     "gd", // Scottish Gaelic, Gaelic
52                     "fo", // Faroese
53                     "kok", // Konkani
54                     "pcm", // Nigerian Pidgin
55                     "ha", // Hausa
56                     "hsb", // Upper Sorbian
57                     "dsb", // Lower Sorbian
58                     "yue_Hans", // Cantonese (Simplified)
59                     "to" //  Tongan
60                     );
61 
62     public static final Set<String> CLDR_OR_HIGH_LEVEL_LOCALES =
63             ImmutableSet.<String>builder().addAll(CLDR_LOCALES).addAll(HIGH_LEVEL_LOCALES).build();
64 
65     /** Subset of reports open for this release */
66     private static final Set<ReportId> LIMITED_SUBMISSION_REPORTS =
67             Collections.unmodifiableSet(EnumSet.of(VoterReportStatus.ReportId.personnames));
68 
69     /** Subset of CLDR_LOCALES, minus special which are only those which are TC orgs */
70     public static final Set<String> TC_ORG_LOCALES;
71 
72     /**
73      * Set to true iff ONLY grammar locales should be limited submission {@link
74      * GrammarInfo#getGrammarLocales()}
75      */
76     public static final boolean ONLY_GRAMMAR_LOCALES = false;
77 
78     /** Update this in each limited release. */
79     public static final Set<String> LOCALES_FOR_LIMITED;
80 
81     static {
82         Set<String> temp = new HashSet<>(CLDR_OR_HIGH_LEVEL_LOCALES);
83         if (ONLY_GRAMMAR_LOCALES) {
GrammarInfo.getGrammarLocales()84             temp.retainAll(GrammarInfo.getGrammarLocales());
85         }
86         LOCALES_FOR_LIMITED = ImmutableSortedSet.copyOf(temp);
87 
88         Set<String> temp2 = new HashSet<>(CLDR_LOCALES);
89         temp2.removeAll(SPECIAL_ORG_LOCALES);
90         TC_ORG_LOCALES = ImmutableSortedSet.copyOf(temp2);
91     }
92 
93     /**
94      * New locales in this release, where we want to allow any paths even if others are restricted
95      */
96     public static Set<String> ALLOW_ALL_PATHS_BASIC =
97             ImmutableSet.of(
98                     // locales open for v43:
99                     "apc", // Levantine Arabic; NB actual submission was "ajp" South Levantine
100                     // Arabic
101                     "lmo", // Lombardi
102                     "pap", // Papiamento
103                     "rif" // Riffian
104                     );
105 
106     public static Set<String> LOCALES_ALLOWED_IN_LIMITED =
107             ImmutableSet.<String>builder()
108                     .addAll(LOCALES_FOR_LIMITED)
109                     .addAll(ALLOW_ALL_PATHS_BASIC)
110                     .build();
111 
112     public static final Pattern PATHS_ALLOWED_IN_LIMITED =
113             Pattern.compile(
114                     "//ldml/"
115                             // v43: All person names
116                             + "(personNames/.*"
117                             // v43: Turkey and its alternate
118                             + "|localeDisplayNames/territories/territory\\[@type=\"TR\"\\].*"
119                             // v43: Exemplar city for America/Ciudad_Juarez
120                             + "|dates/timeZoneNames/zone[@type=\"America/Ciudad_Juarez\"]/exemplarCity"
121                             + ")");
122 
123     // Pattern.compile("//ldml/units/unitLength\\[@type=\"long\"]");
124 
125     /* Example of special paths
126     * Pattern.compile(
127        "//ldml/"
128            + "(listPatterns/listPattern\\[@type=\"standard"
129            + "|annotations/annotation\\[@cp=\"([©®‼⁉☑✅✔✖✨✳✴❇❌❎❓-❕❗❣ ➕-➗��-��������������������������⭕��������������⭕��������������������]|��‍♀|��‍♂)\""
130            + "|localeDisplayNames/"
131            +   "(scripts/script\\[@type=\"(Elym|Hmnp|Nand|Wcho)\""
132            +    "|territories/territory\\[@type=\"(MO|SZ)\"](\\[@alt=\"variant\"])?"
133            +    "|types/type\\[@key=\"numbers\"]\\[@type=\"(hmnp|wcho)\"]"
134            +   ")"
135            + "|dates/timeZoneNames/(metazone\\[@type=\"Macau\"]"
136            +   "|zone\\[@type=\"Asia/Macau\"]"
137            +   ")"
138            + ")"
139            );
140            */
141 
142     // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/daylight, old: Macau Summer Time, new:
143     // Macao Summer Time
144     // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/standard, old: Macau Standard Time,
145     // new: Macao Standard Time
146     // ldml/localeDisplayNames/territories/territory[@type="SZ"][@alt="variant"], old: SZ, new:
147     // Swaziland
148     // ldml/dates/timeZoneNames/zone[@type="Asia/Macau"]/exemplarCity, old: Macau, new: Macao
149     // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/generic, old: Macau Time, new: Macao
150     // Time
151     // ldml/localeDisplayNames/territories/territory[@type="SZ"], old: Swaziland, new: Eswatini
152 
153     private static final class SubmissionLocalesCache {
154         public static SubmissionLocalesCache INSTANCE = new SubmissionLocalesCache();
155         private LoadingCache<String, CoverageLevel2> covs;
156 
SubmissionLocalesCache()157         SubmissionLocalesCache() {
158             covs =
159                     CacheBuilder.newBuilder()
160                             .build(
161                                     new CacheLoader<String, CoverageLevel2>() {
162                                         @Override
163                                         public CoverageLevel2 load(String key) throws Exception {
164                                             return CoverageLevel2.getInstance(
165                                                     SupplementalDataInfo.getInstance(), key);
166                                         }
167                                     });
168         }
169 
getCoverageLevel(String localeString, String path)170         public static Enum<Level> getCoverageLevel(String localeString, String path) {
171             try {
172                 return INSTANCE.covs.get(localeString).getLevel(path);
173             } catch (ExecutionException e) {
174                 throw new RuntimeException(
175                         String.format("Could not fetch coverage for %s:%s", localeString, path), e);
176             }
177         }
178     }
179 
180     /**
181      * Only call this if {@link CheckCLDR#LIMITED_SUBMISSION}
182      *
183      * @param localeString
184      * @param path
185      * @param isError
186      * @param isMissing
187      * @return true if submission is allowed, else false
188      * @see CheckCLDR#LIMITED_SUBMISSION
189      */
allowEvenIfLimited( String localeString, String path, boolean isError, boolean isMissing)190     public static boolean allowEvenIfLimited(
191             String localeString, String path, boolean isError, boolean isMissing) {
192 
193         // Allow errors to be fixed
194         if (isError) {
195             return true;
196         }
197 
198         // for new locales, allow basic paths
199         if (SubmissionLocales.ALLOW_ALL_PATHS_BASIC.contains(localeString)
200                 &&
201                 // Only check coverage level for these locales
202                 isPathBasicOrLess(localeString, path)) {
203             return true;
204         }
205 
206         // all but specific locales are otherwise locked
207         if (!SubmissionLocales.LOCALES_ALLOWED_IN_LIMITED.contains(localeString)) {
208             return false;
209         }
210 
211         // in TC Org locales, lock all paths except missing and special
212         if (isMissing && TC_ORG_LOCALES.contains(localeString)) {
213             return true;
214         }
215 
216         if (pathAllowedInLimitedSubmission(path)) {
217             return true;
218         }
219 
220         return false; // skip
221     }
222 
isPathBasicOrLess(String localeString, String path)223     private static boolean isPathBasicOrLess(String localeString, String path) {
224         return SubmissionLocalesCache.getCoverageLevel(localeString, path).compareTo(Level.BASIC)
225                 <= 0;
226     }
227 
228     private static final boolean DEBUG_REGEX = false;
229 
230     /**
231      * Only public for testing
232      *
233      * @param path
234      * @return
235      */
pathAllowedInLimitedSubmission(String path)236     public static boolean pathAllowedInLimitedSubmission(String path) {
237         if (PATHS_ALLOWED_IN_LIMITED == null) {
238             return false;
239         }
240         final Matcher matcher = SubmissionLocales.PATHS_ALLOWED_IN_LIMITED.matcher(path);
241         boolean result = matcher.lookingAt();
242         if (DEBUG_REGEX && !result) {
243             System.out.println(RegexUtilities.showMismatch(matcher, path));
244         }
245         return result;
246     }
247 
getReportsAvailableInLimited()248     public static Set<ReportId> getReportsAvailableInLimited() {
249         return LIMITED_SUBMISSION_REPORTS;
250     }
251 }
252