1 package org.unicode.cldr.test; 2 3 import com.google.common.cache.CacheBuilder; 4 import com.google.common.cache.CacheLoader; 5 import com.google.common.cache.LoadingCache; 6 import com.ibm.icu.util.ICUException; 7 import com.ibm.icu.util.Output; 8 import java.util.LinkedHashSet; 9 import java.util.List; 10 import java.util.Map.Entry; 11 import java.util.Set; 12 import java.util.concurrent.ExecutionException; 13 import java.util.regex.Matcher; 14 import java.util.regex.Pattern; 15 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 16 import org.unicode.cldr.util.ApproximateWidth; 17 import org.unicode.cldr.util.CLDRFile; 18 import org.unicode.cldr.util.Level; 19 import org.unicode.cldr.util.PatternCache; 20 import org.unicode.cldr.util.Rational; 21 import org.unicode.cldr.util.RegexLookup; 22 import org.unicode.cldr.util.StandardCodes.LstrType; 23 import org.unicode.cldr.util.SupplementalDataInfo; 24 import org.unicode.cldr.util.UnitConverter; 25 import org.unicode.cldr.util.UnitConverter.UnitId; 26 import org.unicode.cldr.util.Validity; 27 28 public class CheckWidths extends CheckCLDR { 29 // remember to add this class to the list in CheckCLDR.getCheckAll 30 // to run just this test, on just locales starting with 'nl', use CheckCLDR with -fnl.* 31 // -t.*CheckWidths.* 32 private static CoverageLevel2 coverageLevel; 33 private Level requiredLevel; 34 35 private static UnitWidthUtil UNIT_WIDTHS_UTIL = UnitWidthUtil.getInstance(); 36 37 /** Controls for the warning about too many components, and for when to cause error. */ 38 public static final int WARN_COMPONENTS_PER_ANNOTATION = 7; 39 40 public static final int MAX_COMPONENTS_PER_ANNOTATION = 16; 41 42 SupplementalDataInfo supplementalData; 43 44 private static final double EM = ApproximateWidth.getWidth("月"); 45 46 private static final boolean DEBUG = true; 47 48 private enum Measure { 49 CODE_POINTS, 50 DISPLAY_WIDTH, 51 SET_ELEMENTS 52 } 53 54 private enum LimitType { 55 MINIMUM, 56 MAXIMUM 57 } 58 59 private enum Special { 60 NONE, 61 QUOTES, 62 PLACEHOLDERS, 63 NUMBERSYMBOLS, 64 NUMBERFORMAT, 65 BARS, 66 PLACEHOLDER_UNITS 67 } 68 69 private static final Pattern PLACEHOLDER_PATTERN = PatternCache.get("\\{\\d\\}"); 70 71 private static class Limit { 72 final double warningReference; 73 final double errorReference; 74 final LimitType limit; 75 final Measure measure; 76 final Special special; 77 final String message; 78 final Subtype subtype; 79 final boolean debug; 80 Limit( double warningReference, double errorReference, Measure measure, LimitType limit, Special special, boolean debug)81 public Limit( 82 double warningReference, 83 double errorReference, 84 Measure measure, 85 LimitType limit, 86 Special special, 87 boolean debug) { 88 this.debug = debug; 89 this.warningReference = warningReference; 90 this.errorReference = errorReference; 91 this.limit = limit; 92 this.measure = measure; 93 this.special = special; 94 switch (limit) { 95 case MINIMUM: 96 this.subtype = Subtype.valueTooNarrow; 97 switch (measure) { 98 case CODE_POINTS: 99 this.message = "Expected no fewer than {0} character(s), but was {1}."; 100 break; 101 case DISPLAY_WIDTH: 102 this.message = "Too narrow by about {2}% (with common fonts)."; 103 break; 104 default: 105 throw new IllegalArgumentException(); 106 } 107 break; 108 case MAXIMUM: 109 switch (measure) { 110 case CODE_POINTS: 111 this.message = "Expected no more than {0} character(s), but was {1}."; 112 this.subtype = Subtype.valueTooWide; 113 break; 114 case DISPLAY_WIDTH: 115 this.message = "Too wide by about {2}% (with common fonts)."; 116 this.subtype = Subtype.valueTooWide; 117 break; 118 case SET_ELEMENTS: 119 this.message = "Expected no more than {0} items(s), but was {1}."; 120 this.subtype = Subtype.tooManyValues; 121 break; 122 default: 123 throw new IllegalArgumentException(); 124 } 125 break; 126 default: 127 throw new IllegalArgumentException(); 128 } 129 } 130 Limit( double d, double e, Measure displayWidth, LimitType maximum, Special placeholders)131 public Limit( 132 double d, double e, Measure displayWidth, LimitType maximum, Special placeholders) { 133 this(d, e, displayWidth, maximum, placeholders, false); 134 } 135 hasProblem( String path, String value, List<CheckStatus> result, CheckCLDR cause, Boolean aliasedAndComprehensive)136 boolean hasProblem( 137 String path, 138 String value, 139 List<CheckStatus> result, 140 CheckCLDR cause, 141 Boolean aliasedAndComprehensive) { 142 double factor = 1d; 143 switch (special) { 144 case NUMBERFORMAT: 145 String[] values = value.split(";", 2); 146 // If it's a number format with positive and negative subpatterns, just check 147 // the longer one. 148 value = 149 (values.length == 2 && values[1].length() > values[0].length()) 150 ? values[1] 151 : values[0]; 152 value = value.replace("'", ""); 153 break; 154 case QUOTES: 155 value = value.replace("'", ""); 156 break; 157 case PLACEHOLDER_UNITS: 158 factor = UNIT_WIDTHS_UTIL.getRoughComponentMax(path); 159 // fall through ok 160 case PLACEHOLDERS: 161 value = PLACEHOLDER_PATTERN.matcher(value).replaceAll(""); 162 break; 163 case NUMBERSYMBOLS: 164 value = 165 value.replaceAll( 166 "[\u200E\u200F\u061C]", 167 ""); // don't include LRM/RLM/ALM when checking length of number 168 // symbols 169 break; 170 case BARS: 171 value = 172 value.replaceAll("[^|]", "") 173 + "|"; // Check the number of items by counting separators. Bit 174 // of a hack... 175 break; 176 default: 177 } 178 double valueMeasure = 179 measure == Measure.DISPLAY_WIDTH 180 ? ApproximateWidth.getWidth(value) 181 : value.codePointCount(0, value.length()); 182 CheckStatus.Type errorType = CheckStatus.warningType; 183 switch (limit) { 184 case MINIMUM: 185 if (valueMeasure >= warningReference) { 186 return false; 187 } 188 if (valueMeasure < errorReference 189 && cause.getPhase() != Phase.BUILD 190 && !aliasedAndComprehensive) { 191 errorType = CheckStatus.errorType; 192 } 193 break; 194 case MAXIMUM: 195 if (valueMeasure <= warningReference * factor) { 196 return false; 197 } 198 if (valueMeasure > errorReference * factor 199 && cause.getPhase() != Phase.BUILD 200 && !aliasedAndComprehensive) { 201 // Workaround for ST submission phase only per TC discussion 2018-05-30 202 // Make too many keywords be only a warning until we decide policy (JCE) 203 if (cause.getPhase() == Phase.SUBMISSION 204 && measure.equals(Measure.SET_ELEMENTS)) { 205 errorType = CheckStatus.warningType; 206 } else { 207 errorType = CheckStatus.errorType; 208 } 209 } 210 break; 211 } 212 // the 115 is so that we don't show small percentages 213 // the /10 ...*10 is to round to multiples of 10% percent 214 double percent = 215 (int) (Math.abs(115 * valueMeasure / warningReference - 100.0d) / 10 + 0.49999d) 216 * 10; 217 result.add( 218 new CheckStatus() 219 .setCause(cause) 220 .setMainType(errorType) 221 .setSubtype(subtype) 222 .setMessage(message, warningReference, valueMeasure, percent)); 223 return true; 224 } 225 } 226 227 static RegexLookup<Limit[]> lookup = 228 new RegexLookup<Limit[]>() 229 .setPatternTransform(RegexLookup.RegexFinderTransformPath) 230 .addVariable("%A", "\"[^\"]+\"") 231 .addVariable("%P", "\"[ap]m\"") 232 .addVariable("%Q", "[^ap].*|[ap][^m].*") // Anything but am or pm 233 .add( 234 "//ldml/delimiters/(quotation|alternateQuotation)", 235 new Limit[] { 236 new Limit( 237 1, 1, Measure.CODE_POINTS, LimitType.MAXIMUM, Special.NONE) 238 }) 239 240 // Numeric items should be no more than a single character 241 242 .add( 243 "//ldml/numbers/symbols[@numberSystem=%A]/(decimal|group|minus|percent|perMille|plus)", 244 new Limit[] { 245 new Limit( 246 1, 247 1, 248 Measure.CODE_POINTS, 249 LimitType.MAXIMUM, 250 Special.NUMBERSYMBOLS) 251 }) 252 253 // Now widths 254 // The following are rough measures, just to check strange cases 255 256 .add( 257 "//ldml/characters/ellipsis[@type=\"(final|initial|medial)\"]", 258 new Limit[] { 259 new Limit( 260 2 * EM, 261 5 * EM, 262 Measure.DISPLAY_WIDTH, 263 LimitType.MAXIMUM, 264 Special.PLACEHOLDERS) 265 }) 266 .add( 267 "//ldml/localeDisplayNames/localeDisplayPattern/", 268 new Limit[] { // {0}: {1}, {0} ({1}), , 269 new Limit( 270 2 * EM, 271 3 * EM, 272 Measure.DISPLAY_WIDTH, 273 LimitType.MAXIMUM, 274 Special.PLACEHOLDERS) 275 }) 276 .add( 277 "//ldml/listPatterns/listPattern/listPatternPart[@type=%A]", 278 new Limit[] { // {0} and {1} 279 new Limit( 280 5 * EM, 281 10 * EM, 282 Measure.DISPLAY_WIDTH, 283 LimitType.MAXIMUM, 284 Special.PLACEHOLDERS) 285 }) 286 .add( 287 "//ldml/dates/timeZoneNames/fallbackFormat", 288 new Limit[] { // {1} ({0}) 289 new Limit( 290 2 * EM, 291 3 * EM, 292 Measure.DISPLAY_WIDTH, 293 LimitType.MAXIMUM, 294 Special.PLACEHOLDERS) 295 }) 296 .add( 297 "//ldml/dates/timeZoneNames/(regionFormat|hourFormat)", 298 new Limit[] { // {0} Time, 299 // +HH:mm;-HH:mm 300 new Limit( 301 10 * EM, 302 20 * EM, 303 Measure.DISPLAY_WIDTH, 304 LimitType.MAXIMUM, 305 Special.PLACEHOLDERS) 306 }) 307 .add( 308 "//ldml/dates/timeZoneNames/(gmtFormat|gmtZeroFormat)", 309 new Limit[] { // GMT{0}, GMT 310 new Limit( 311 5 * EM, 312 10 * EM, 313 Measure.DISPLAY_WIDTH, 314 LimitType.MAXIMUM, 315 Special.PLACEHOLDERS) 316 }) 317 318 // Era Abbreviations 319 320 // Allow longer for Japanese calendar eras 321 .add( 322 "//ldml/dates/calendars/calendar[@type=\"japanese\"]/.*/eraAbbr/era[@type=%A]", 323 new Limit[] { 324 new Limit( 325 12 * EM, 326 16 * EM, 327 Measure.DISPLAY_WIDTH, 328 LimitType.MAXIMUM, 329 Special.NONE) 330 }) 331 // Allow longer for ROC calendar eras 332 .add( 333 "//ldml/dates/calendars/calendar[@type=\"roc\"]/.*/eraAbbr/era[@type=%A]", 334 new Limit[] { 335 new Limit( 336 4 * EM, 337 8 * EM, 338 Measure.DISPLAY_WIDTH, 339 LimitType.MAXIMUM, 340 Special.NONE) 341 }) 342 .add( 343 "//ldml/dates/calendars/calendar.*/eraAbbr/era[@type=%A]", 344 new Limit[] { 345 new Limit( 346 3 * EM, 347 6 * EM, 348 Measure.DISPLAY_WIDTH, 349 LimitType.MAXIMUM, 350 Special.NONE) 351 }) 352 353 // am/pm abbreviated 354 .add( 355 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%P]", 356 new Limit[] { 357 new Limit( 358 4 * EM, 359 6 * EM, 360 Measure.DISPLAY_WIDTH, 361 LimitType.MAXIMUM, 362 Special.NONE) 363 }) 364 // other day periods abbreviated 365 .add( 366 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=%Q]", 367 new Limit[] { 368 new Limit( 369 8 * EM, 370 12 * EM, 371 Measure.DISPLAY_WIDTH, 372 LimitType.MAXIMUM, 373 Special.NONE) 374 }) 375 // am/pm wide 376 .add( 377 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%P]", 378 new Limit[] { 379 new Limit( 380 5 * EM, 381 10 * EM, 382 Measure.DISPLAY_WIDTH, 383 LimitType.MAXIMUM, 384 Special.NONE) 385 }) 386 // other day periods wide 387 .add( 388 "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/.*/dayPeriodWidth[@type=\"wide\"]/dayPeriod[@type=%Q]", 389 new Limit[] { 390 new Limit( 391 10 * EM, 392 20 * EM, 393 Measure.DISPLAY_WIDTH, 394 LimitType.MAXIMUM, 395 Special.NONE) 396 }) 397 398 // Narrow items 399 400 .add( 401 "//ldml/dates/calendars/calendar.*[@type=\"narrow\"](?!/cyclic|/dayPeriod|/monthPattern)", 402 new Limit[] { 403 new Limit( 404 1.5 * EM, 405 2.25 * EM, 406 Measure.DISPLAY_WIDTH, 407 LimitType.MAXIMUM, 408 Special.NONE) 409 }) 410 // \"(?!am|pm)[^\"]+\"\\ 411 412 // Compact number formats 413 // pattern[@type="100000000000000"] 414 .add( 415 "//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"100000000000000", 416 new Limit[] { 417 new Limit( 418 4 * EM, 419 6 * EM, 420 Measure.DISPLAY_WIDTH, 421 LimitType.MAXIMUM, 422 Special.NUMBERFORMAT) 423 }) 424 .add( 425 "//ldml/numbers/decimalFormats[@numberSystem=%A]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=%A]/pattern[@type=\"1", 426 new Limit[] { 427 new Limit( 428 4 * EM, 429 5 * EM, 430 Measure.DISPLAY_WIDTH, 431 LimitType.MAXIMUM, 432 Special.NUMBERFORMAT) 433 }) 434 435 // Short/Narrow units 436 // Note that the EM values are adjusted for units according to the number of 437 // components in the units 438 // See UnitWidthUtil for more information 439 .add( 440 "//ldml/units/unitLength[@type=\"(short|narrow)\"]/unit[@type=%A]/unitPattern", 441 new Limit[] { 442 new Limit( 443 3 * EM, 444 5 * EM, 445 Measure.DISPLAY_WIDTH, 446 LimitType.MAXIMUM, 447 Special.PLACEHOLDER_UNITS) 448 }) 449 450 // Currency Symbols 451 .add( 452 "//ldml/numbers/currencies/currency[@type=%A]/symbol", 453 new Limit[] { 454 new Limit( 455 3 * EM, 456 5 * EM, 457 Measure.DISPLAY_WIDTH, 458 LimitType.MAXIMUM, 459 Special.PLACEHOLDERS) 460 }) 461 462 // "grinning cat face with smiling eyes" should be normal max ~= 160 em 463 // emoji names (not keywords) 464 .add( 465 "//ldml/annotations/annotation[@cp=%A][@type=%A]", 466 new Limit[] { 467 new Limit( 468 20 * EM, 469 100 * EM, 470 Measure.DISPLAY_WIDTH, 471 LimitType.MAXIMUM, 472 Special.NONE), 473 }) 474 .add( 475 "//ldml/annotations/annotation[@cp=%A]", 476 new Limit[] { 477 new Limit( 478 WARN_COMPONENTS_PER_ANNOTATION, 479 MAX_COMPONENTS_PER_ANNOTATION, 480 Measure.SET_ELEMENTS, 481 LimitType.MAXIMUM, 482 Special.BARS) // Allow up to 5 with no warning, up to 7 483 // with no error. 484 }); 485 486 // Quell noisy printout 487 // static { 488 // System.out.println("EMs: " + ApproximateWidth.getWidth("grinning cat face with smiling 489 // eyes")); 490 // } 491 492 Set<Limit> found = new LinkedHashSet<>(); 493 494 @Override handleCheck( String path, String fullPath, String value, Options options, List<CheckStatus> result)495 public CheckCLDR handleCheck( 496 String path, String fullPath, String value, Options options, List<CheckStatus> result) { 497 if (value == null) { 498 return this; // skip 499 } 500 if (!accept(result)) return this; 501 // String testPrefix = "//ldml/units/unitLength[@type=\"narrow\"]"; 502 // if (path.startsWith(testPrefix)) { 503 // int i = 0; 504 // } 505 // Limits item0 = 506 // lookup.get("//ldml/numbers/decimalFormats[@numberSystem=\"latn\"]/decimalFormatLength[@type=\"short\"]/decimalFormat[@type=\"standard\"]/pattern[@type=\"1000000000\"][@count=\"other\"]"); 507 // item0.check("123456789", result, this); 508 509 Limit[] items = lookup.get(path); 510 if (items != null) { 511 CLDRFile.Status status = new CLDRFile.Status(); 512 this.getCldrFileToCheck().getSourceLocaleID(path, status); 513 // This was put in specifically to deal with the fact that we added a bunch of new units 514 // in CLDR 26 515 // and didn't put the narrow forms of them into modern coverage. If/when the narrow 516 // forms of all units 517 // are modern coverage, then we can safely remove the aliasedAndComprehensive check. 518 // Right now if an 519 // item is aliased and coverage is comprehensive, then it can't generate anything worse 520 // than a warning. 521 Boolean aliasedAndComprehensive = 522 (coverageLevel.getLevel(path).compareTo(Level.COMPREHENSIVE) == 0) 523 && (status.pathWhereFound.compareTo(path) != 0); 524 for (Limit item : items) { 525 if (item.hasProblem(path, value, result, this, aliasedAndComprehensive)) { 526 if (DEBUG && !found.contains(item)) { 527 found.add(item); 528 } 529 break; // only one error per item 530 } 531 } 532 } 533 return this; 534 } 535 536 @Override handleSetCldrFileToCheck( CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)537 public CheckCLDR handleSetCldrFileToCheck( 538 CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors) { 539 final String localeID = cldrFileToCheck.getLocaleID(); 540 supplementalData = 541 SupplementalDataInfo.getInstance(cldrFileToCheck.getSupplementalDirectory()); 542 coverageLevel = CoverageLevel2.getInstance(supplementalData, localeID); 543 544 super.handleSetCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 545 return this; 546 } 547 548 /** 549 * Provide a rough measure of how many unit components there are for the purpose of establishing 550 * a maximum width, with an special factor for non-metric. 551 */ 552 public static class UnitWidthUtil { 553 static final Pattern UNIT_PREFIX = 554 Pattern.compile( 555 "//ldml/units/unitLength\\[@type=\"([^\"]*)\"]/unit\\[@type=\"([^\\\"]*)\"]"); 556 final UnitConverter CONVERTER = SupplementalDataInfo.getInstance().getUnitConverter(); 557 final Set<String> validLongUnitIDs = 558 Validity.getInstance().getCodeToStatus(LstrType.unit).keySet(); 559 560 LoadingCache<String, Double> pathToUnitComponents = 561 CacheBuilder.newBuilder() 562 .build( 563 new CacheLoader<String, Double>() { 564 @Override 565 public Double load(String path) throws ExecutionException { 566 final Matcher matcher = UNIT_PREFIX.matcher(path); 567 if (matcher.lookingAt()) { 568 // String length = matcher.group(1); 569 String longUnitId = matcher.group(2); 570 return unitToComponents.get(longUnitId); 571 } else { 572 throw new ICUException("Internal error"); 573 } 574 } 575 }); 576 577 LoadingCache<String, Double> unitToComponents = 578 CacheBuilder.newBuilder() 579 .build( 580 new CacheLoader<String, Double>() { 581 @Override 582 public Double load(String longUnitId) { 583 double components = 0; 584 String shortId = CONVERTER.getShortId(longUnitId); 585 586 Set<String> systems = CONVERTER.getSystems(shortId); 587 int widthFactor = 588 systems.contains("metric") 589 && !shortId.endsWith("-metric") 590 ? 1 591 : 3; 592 // NOTE: allow cup-metric and pint-metric to be longer, 593 // since they aren't standard metric 594 595 // walk thorough the numerator and denominator to get the 596 // values 597 UnitId unitId = CONVERTER.createUnitId(shortId); 598 for (Entry<String, Integer> entry : 599 unitId.numUnitsToPowers.entrySet()) { 600 components += 601 getComponentCount( 602 entry.getKey(), entry.getValue()); 603 } 604 for (Entry<String, Integer> entry : 605 unitId.denUnitsToPowers.entrySet()) { 606 components += 607 getComponentCount( 608 entry.getKey(), entry.getValue()); 609 } 610 return widthFactor * components; 611 } 612 613 public double getComponentCount(String unit, Integer power) { 614 int result = 1; 615 if (power > 1) { 616 ++result; // add one component for a power 617 } 618 // hack for number 619 if (unit.startsWith("100-")) { 620 ++result; 621 unit = unit.substring(4); 622 } 623 Output<Rational> deprefix = new Output<>(); 624 unit = UnitConverter.stripPrefix(unit, deprefix); 625 if (!deprefix.value.equals(Rational.ONE)) { 626 ++result; // add 1 component for kilo, mega, etc. 627 } 628 for (int i = 0; i < unit.length(); ++i) { 629 if (unit.charAt(i) == '-') { 630 ++result; // add one component for -imperial, etc. 631 } 632 } 633 return result; 634 } 635 }); 636 UnitWidthUtil()637 private UnitWidthUtil() {} 638 getInstance()639 public static UnitWidthUtil getInstance() { 640 return new UnitWidthUtil(); 641 } 642 getRoughComponentMax(String path)643 public double getRoughComponentMax(String path) { 644 try { 645 return pathToUnitComponents.get(path); 646 } catch (ExecutionException e) { 647 throw new ICUException(e); 648 } 649 } 650 } 651 } 652