1 package org.unicode.cldr.test; 2 3 import com.google.common.cache.CacheBuilder; 4 import com.google.common.cache.CacheLoader; 5 import com.google.common.cache.LoadingCache; 6 import com.google.common.collect.ImmutableSet; 7 import com.google.common.collect.ImmutableSortedSet; 8 import java.util.Collections; 9 import java.util.EnumSet; 10 import java.util.HashSet; 11 import java.util.Set; 12 import java.util.concurrent.ExecutionException; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 import org.unicode.cldr.util.GrammarInfo; 16 import org.unicode.cldr.util.Level; 17 import org.unicode.cldr.util.Organization; 18 import org.unicode.cldr.util.RegexUtilities; 19 import org.unicode.cldr.util.StandardCodes; 20 import org.unicode.cldr.util.SupplementalDataInfo; 21 import org.unicode.cldr.util.VoterReportStatus; 22 import org.unicode.cldr.util.VoterReportStatus.ReportId; 23 24 /** 25 * This class manages the Limited Submission process. 26 * 27 * <p>TODO: see https://unicode-org.atlassian.net/browse/CLDR-15230 for TODOs here 28 * 29 * @see CheckCLDR#LIMITED_SUBMISSION 30 */ 31 public final class SubmissionLocales { 32 // TODO: On the use of Locales.txt here, see 33 // https://unicode-org.atlassian.net/browse/CLDR-14838 34 /** This is the 'raw' list from Locales.txt */ 35 public static final Set<String> CLDR_LOCALES = 36 StandardCodes.make().getLocaleToLevel(Organization.cldr).keySet(); 37 38 /** This is the 'special' list from Locales.txt */ 39 public static final Set<String> SPECIAL_ORG_LOCALES = 40 StandardCodes.make().getLocaleToLevel(Organization.special).keySet(); 41 42 /** 43 * Non-CLDR Locales, but consistently have high level of engagement from volunteers to keep at 44 * modern level. Reevaluate for each release based on meeting 95+% of modern, moderate, and 45 * basic coverage 46 */ 47 public static Set<String> HIGH_LEVEL_LOCALES = 48 ImmutableSet.of( 49 // Note: ALL of these were found in Locales.txt under cldr. 50 "chr", // Cherokee 51 "gd", // Scottish Gaelic, Gaelic 52 "fo", // Faroese 53 "kok", // Konkani 54 "pcm", // Nigerian Pidgin 55 "ha", // Hausa 56 "hsb", // Upper Sorbian 57 "dsb", // Lower Sorbian 58 "yue_Hans", // Cantonese (Simplified) 59 "to" // Tongan 60 ); 61 62 public static final Set<String> CLDR_OR_HIGH_LEVEL_LOCALES = 63 ImmutableSet.<String>builder().addAll(CLDR_LOCALES).addAll(HIGH_LEVEL_LOCALES).build(); 64 65 /** Subset of reports open for this release */ 66 private static final Set<ReportId> LIMITED_SUBMISSION_REPORTS = 67 Collections.unmodifiableSet(EnumSet.of(VoterReportStatus.ReportId.personnames)); 68 69 /** Subset of CLDR_LOCALES, minus special which are only those which are TC orgs */ 70 public static final Set<String> TC_ORG_LOCALES; 71 72 /** 73 * Set to true iff ONLY grammar locales should be limited submission {@link 74 * GrammarInfo#getGrammarLocales()} 75 */ 76 public static final boolean ONLY_GRAMMAR_LOCALES = false; 77 78 /** Update this in each limited release. */ 79 public static final Set<String> LOCALES_FOR_LIMITED; 80 81 static { 82 Set<String> temp = new HashSet<>(CLDR_OR_HIGH_LEVEL_LOCALES); 83 if (ONLY_GRAMMAR_LOCALES) { GrammarInfo.getGrammarLocales()84 temp.retainAll(GrammarInfo.getGrammarLocales()); 85 } 86 LOCALES_FOR_LIMITED = ImmutableSortedSet.copyOf(temp); 87 88 Set<String> temp2 = new HashSet<>(CLDR_LOCALES); 89 temp2.removeAll(SPECIAL_ORG_LOCALES); 90 TC_ORG_LOCALES = ImmutableSortedSet.copyOf(temp2); 91 } 92 93 /** 94 * New locales in this release, where we want to allow any paths even if others are restricted 95 */ 96 public static Set<String> ALLOW_ALL_PATHS_BASIC = 97 ImmutableSet.of( 98 // locales open for v43: 99 "apc", // Levantine Arabic; NB actual submission was "ajp" South Levantine 100 // Arabic 101 "lmo", // Lombardi 102 "pap", // Papiamento 103 "rif" // Riffian 104 ); 105 106 public static Set<String> LOCALES_ALLOWED_IN_LIMITED = 107 ImmutableSet.<String>builder() 108 .addAll(LOCALES_FOR_LIMITED) 109 .addAll(ALLOW_ALL_PATHS_BASIC) 110 .build(); 111 112 public static final Pattern PATHS_ALLOWED_IN_LIMITED = 113 Pattern.compile( 114 "//ldml/" 115 // v43: All person names 116 + "(personNames/.*" 117 // v43: Turkey and its alternate 118 + "|localeDisplayNames/territories/territory\\[@type=\"TR\"\\].*" 119 // v43: Exemplar city for America/Ciudad_Juarez 120 + "|dates/timeZoneNames/zone[@type=\"America/Ciudad_Juarez\"]/exemplarCity" 121 + ")"); 122 123 // Pattern.compile("//ldml/units/unitLength\\[@type=\"long\"]"); 124 125 /* Example of special paths 126 * Pattern.compile( 127 "//ldml/" 128 + "(listPatterns/listPattern\\[@type=\"standard" 129 + "|annotations/annotation\\[@cp=\"([©®‼⁉☑✅✔✖✨✳✴❇❌❎❓-❕❗❣ ➕-➗-⭕⭕]|♀|♂)\"" 130 + "|localeDisplayNames/" 131 + "(scripts/script\\[@type=\"(Elym|Hmnp|Nand|Wcho)\"" 132 + "|territories/territory\\[@type=\"(MO|SZ)\"](\\[@alt=\"variant\"])?" 133 + "|types/type\\[@key=\"numbers\"]\\[@type=\"(hmnp|wcho)\"]" 134 + ")" 135 + "|dates/timeZoneNames/(metazone\\[@type=\"Macau\"]" 136 + "|zone\\[@type=\"Asia/Macau\"]" 137 + ")" 138 + ")" 139 ); 140 */ 141 142 // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/daylight, old: Macau Summer Time, new: 143 // Macao Summer Time 144 // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/standard, old: Macau Standard Time, 145 // new: Macao Standard Time 146 // ldml/localeDisplayNames/territories/territory[@type="SZ"][@alt="variant"], old: SZ, new: 147 // Swaziland 148 // ldml/dates/timeZoneNames/zone[@type="Asia/Macau"]/exemplarCity, old: Macau, new: Macao 149 // ldml/dates/timeZoneNames/metazone[@type="Macau"]/long/generic, old: Macau Time, new: Macao 150 // Time 151 // ldml/localeDisplayNames/territories/territory[@type="SZ"], old: Swaziland, new: Eswatini 152 153 private static final class SubmissionLocalesCache { 154 public static SubmissionLocalesCache INSTANCE = new SubmissionLocalesCache(); 155 private LoadingCache<String, CoverageLevel2> covs; 156 SubmissionLocalesCache()157 SubmissionLocalesCache() { 158 covs = 159 CacheBuilder.newBuilder() 160 .build( 161 new CacheLoader<String, CoverageLevel2>() { 162 @Override 163 public CoverageLevel2 load(String key) throws Exception { 164 return CoverageLevel2.getInstance( 165 SupplementalDataInfo.getInstance(), key); 166 } 167 }); 168 } 169 getCoverageLevel(String localeString, String path)170 public static Enum<Level> getCoverageLevel(String localeString, String path) { 171 try { 172 return INSTANCE.covs.get(localeString).getLevel(path); 173 } catch (ExecutionException e) { 174 throw new RuntimeException( 175 String.format("Could not fetch coverage for %s:%s", localeString, path), e); 176 } 177 } 178 } 179 180 /** 181 * Only call this if {@link CheckCLDR#LIMITED_SUBMISSION} 182 * 183 * @param localeString 184 * @param path 185 * @param isError 186 * @param isMissing 187 * @return true if submission is allowed, else false 188 * @see CheckCLDR#LIMITED_SUBMISSION 189 */ allowEvenIfLimited( String localeString, String path, boolean isError, boolean isMissing)190 public static boolean allowEvenIfLimited( 191 String localeString, String path, boolean isError, boolean isMissing) { 192 193 // Allow errors to be fixed 194 if (isError) { 195 return true; 196 } 197 198 // for new locales, allow basic paths 199 if (SubmissionLocales.ALLOW_ALL_PATHS_BASIC.contains(localeString) 200 && 201 // Only check coverage level for these locales 202 isPathBasicOrLess(localeString, path)) { 203 return true; 204 } 205 206 // all but specific locales are otherwise locked 207 if (!SubmissionLocales.LOCALES_ALLOWED_IN_LIMITED.contains(localeString)) { 208 return false; 209 } 210 211 // in TC Org locales, lock all paths except missing and special 212 if (isMissing && TC_ORG_LOCALES.contains(localeString)) { 213 return true; 214 } 215 216 if (pathAllowedInLimitedSubmission(path)) { 217 return true; 218 } 219 220 return false; // skip 221 } 222 isPathBasicOrLess(String localeString, String path)223 private static boolean isPathBasicOrLess(String localeString, String path) { 224 return SubmissionLocalesCache.getCoverageLevel(localeString, path).compareTo(Level.BASIC) 225 <= 0; 226 } 227 228 private static final boolean DEBUG_REGEX = false; 229 230 /** 231 * Only public for testing 232 * 233 * @param path 234 * @return 235 */ pathAllowedInLimitedSubmission(String path)236 public static boolean pathAllowedInLimitedSubmission(String path) { 237 if (PATHS_ALLOWED_IN_LIMITED == null) { 238 return false; 239 } 240 final Matcher matcher = SubmissionLocales.PATHS_ALLOWED_IN_LIMITED.matcher(path); 241 boolean result = matcher.lookingAt(); 242 if (DEBUG_REGEX && !result) { 243 System.out.println(RegexUtilities.showMismatch(matcher, path)); 244 } 245 return result; 246 } 247 getReportsAvailableInLimited()248 public static Set<ReportId> getReportsAvailableInLimited() { 249 return LIMITED_SUBMISSION_REPORTS; 250 } 251 } 252