1 package org.unicode.cldr.tool; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.collect.ComparisonChain; 5 import com.google.common.collect.ImmutableSet; 6 import com.google.common.collect.Iterables; 7 import com.google.common.collect.Multimap; 8 import com.google.common.collect.TreeMultimap; 9 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap; 10 import com.ibm.icu.text.DecimalFormat; 11 import com.ibm.icu.text.MessageFormat; 12 import com.ibm.icu.text.PluralRules; 13 import com.ibm.icu.text.PluralRules.SampleType; 14 import com.ibm.icu.text.RuleBasedCollator; 15 import com.ibm.icu.util.Output; 16 import com.ibm.icu.util.ULocale; 17 import java.io.IOException; 18 import java.io.PrintWriter; 19 import java.util.Arrays; 20 import java.util.Collection; 21 import java.util.Collections; 22 import java.util.Comparator; 23 import java.util.HashMap; 24 import java.util.LinkedHashMap; 25 import java.util.LinkedHashSet; 26 import java.util.List; 27 import java.util.Map; 28 import java.util.Map.Entry; 29 import java.util.Set; 30 import java.util.TreeMap; 31 import java.util.TreeSet; 32 import java.util.regex.Matcher; 33 import java.util.stream.Collectors; 34 import org.unicode.cldr.draft.FileUtilities; 35 import org.unicode.cldr.test.ExampleGenerator; 36 import org.unicode.cldr.tool.FormattedFileWriter.Anchors; 37 import org.unicode.cldr.util.CLDRConfig; 38 import org.unicode.cldr.util.CLDRFile; 39 import org.unicode.cldr.util.CLDRLocale; 40 import org.unicode.cldr.util.CLDRPaths; 41 import org.unicode.cldr.util.CldrUtility; 42 import org.unicode.cldr.util.Factory; 43 import org.unicode.cldr.util.FileCopier; 44 import org.unicode.cldr.util.GrammarInfo; 45 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 46 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; 47 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 48 import org.unicode.cldr.util.ICUServiceBuilder; 49 import org.unicode.cldr.util.LanguageTagParser; 50 import org.unicode.cldr.util.Pair; 51 import org.unicode.cldr.util.PathHeader; 52 import org.unicode.cldr.util.Rational; 53 import org.unicode.cldr.util.Rational.FormatStyle; 54 import org.unicode.cldr.util.StandardCodes.LstrType; 55 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo; 56 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count; 57 import org.unicode.cldr.util.SupplementalDataInfo.PluralType; 58 import org.unicode.cldr.util.UnitConverter; 59 import org.unicode.cldr.util.UnitConverter.ConversionInfo; 60 import org.unicode.cldr.util.UnitConverter.PlaceholderLocation; 61 import org.unicode.cldr.util.UnitConverter.UnitId; 62 import org.unicode.cldr.util.UnitPathType; 63 import org.unicode.cldr.util.Validity; 64 import org.unicode.cldr.util.XPathParts; 65 66 /** Chart the grammatical forms, with unit examples */ 67 public class ChartGrammaticalForms extends Chart { 68 69 private static final String FORMATTED_SAMPLE = "Formatted Sample"; 70 71 private static final String INFO_ON_FEATURES = 72 "Current information is only for nominal forms. " 73 + "Where a Usage is present other than “general”, that means that a subset of the grammatical features are relevant to that Usage. " 74 + "For example, Feature=grammaticalGender and Usage=units might omit an ‘animate’ gender. " 75 + "For the meanings of the values, see " 76 + "<a target='spec' href='https://unicode.org/reports/tr35/tr35-general.html#Grammatical_Features'>LDML Grammatical Features</a>."; 77 78 private static final String MAIN_HEADER = "<h2>Grammatical Forms</h2>"; 79 private static final boolean DEBUG = false; 80 private static final String DIR = CLDRPaths.CHART_DIRECTORY + "grammar/"; 81 public static final PluralRules ENGLISH_PLURAL_RULES = SDI.getPlurals("en").getPluralRules(); 82 main(String[] args)83 public static void main(String[] args) { 84 new ChartGrammaticalForms().writeChart(null); 85 } 86 87 @Override getDirectory()88 public String getDirectory() { 89 return DIR; 90 } 91 92 @Override getTitle()93 public String getTitle() { 94 return "Grammatical Forms Charts"; 95 } 96 97 @Override getFileName()98 public String getFileName() { 99 return "index"; 100 } 101 102 @Override getExplanation()103 public String getExplanation() { 104 return MAIN_HEADER 105 + "<p>In this version a preliminary set of languages have additional grammatical information, as listed below.<p>"; 106 } 107 108 @Override writeContents(FormattedFileWriter pw)109 public void writeContents(FormattedFileWriter pw) throws IOException { 110 FileCopier.ensureDirectoryExists(DIR); 111 FileCopier.copy(Chart.class, "index.css", DIR); 112 FormattedFileWriter.copyIncludeHtmls(DIR); 113 114 FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors(); 115 writeSubcharts(anchors); 116 pw.setIndex("Main Chart Index", "../index.html"); 117 pw.write(anchors.toString()); 118 showInfo(pw); 119 } 120 showInfo(FormattedFileWriter pw)121 private void showInfo(FormattedFileWriter pw) throws IOException { 122 pw.append("<h2>Grammatical Features Info</h2>"); 123 pw.append( 124 "<p>The following lists the available information about grammatical features for locales. " 125 + "Note that only the above locales have localized data, at this time. " 126 + INFO_ON_FEATURES 127 + "</p>"); 128 if (GrammaticalTarget.values().length > 1) { 129 throw new IllegalArgumentException( 130 "Needs adjustment for additional GrammaticalTarget.values()"); 131 } 132 133 System.out.println(SDI.hasGrammarInfo()); 134 135 TablePrinter tablePrinter = getFormattedGrammarInfo(SDI.hasGrammarInfo()); 136 pw.append(tablePrinter.toString()); 137 } 138 getFormattedGrammarInfo(Set<String> localeIds)139 private TablePrinter getFormattedGrammarInfo(Set<String> localeIds) { 140 TablePrinter tablePrinter = 141 new TablePrinter() 142 .addColumn( 143 "Locale", "class='source' width='1%'", null, "class='source'", true) 144 .setSortPriority(0) 145 .setBreakSpans(true) 146 .addColumn( 147 "ID", 148 "class='source' width='1%'", 149 CldrUtility.getDoubleLinkMsg(), 150 "class='source'", 151 true) 152 .setBreakSpans(true) 153 .addColumn( 154 "Feature", 155 "class='source' width='1%'", 156 null, 157 "class='source'", 158 true) 159 .setSortPriority(1) 160 .setBreakSpans(true) 161 .addColumn("Usage", "class='source'", null, "class='source'", true) 162 .addColumn("Values", "class='source'", null, "class='source'", true); 163 for (String localeId : localeIds) { 164 if (localeId.equals("fi")) { 165 int debug = 0; 166 } 167 Set<String> failures = new LinkedHashSet<>(); 168 GrammarInfo grammarInfo = SDI.getGrammarInfo(localeId, false); 169 String localeName = CONFIG.getEnglish().getName(localeId); 170 for (GrammaticalFeature feature : GrammaticalFeature.values()) { 171 Map<GrammaticalScope, Set<String>> scopeToValues = 172 grammarInfo.get(GrammaticalTarget.nominal, feature); 173 if (scopeToValues.isEmpty()) { 174 continue; 175 } 176 177 Set<String> values = null; 178 boolean multiline = false; 179 for (Entry<GrammaticalScope, Set<String>> entry : scopeToValues.entrySet()) { 180 if (values == null) { 181 values = entry.getValue(); 182 } else if (!values.equals(entry.getValue())) { 183 multiline = true; 184 break; 185 } 186 } 187 Set<String> sortedValues = new TreeSet(feature.getValueComparator()); 188 if (multiline) { 189 for (GrammaticalScope usage : GrammaticalScope.values()) { 190 values = scopeToValues.get(usage); 191 if (values == null || values.isEmpty()) { 192 continue; 193 } 194 sortedValues.clear(); 195 sortedValues.addAll(values); 196 addRow( 197 tablePrinter, 198 localeName, 199 localeId, 200 feature, 201 usage.toString(), 202 Joiner.on(", ").join(sortedValues)); 203 } 204 } else { 205 try { 206 sortedValues.addAll(values); 207 addRow( 208 tablePrinter, 209 localeName, 210 localeId, 211 feature, 212 Joiner.on(", ").join(scopeToValues.keySet()), 213 Joiner.on(", ").join(sortedValues)); 214 } catch (Exception e) { 215 failures.add(e.getMessage()); 216 } 217 } 218 } 219 if (!failures.isEmpty()) { 220 System.out.println("# Failures, " + localeId + "\t" + failures); 221 } 222 } 223 return tablePrinter; 224 } 225 addRow( TablePrinter tablePrinter, String locale, String id, GrammaticalFeature feature, String usage, final String valueString)226 public void addRow( 227 TablePrinter tablePrinter, 228 String locale, 229 String id, 230 GrammaticalFeature feature, 231 String usage, 232 final String valueString) { 233 tablePrinter 234 .addRow() 235 .addCell(locale) 236 .addCell(id) 237 .addCell(feature) 238 .addCell(usage) 239 .addCell(valueString) 240 .finishRow(); 241 } 242 243 static final UnitConverter uc = SDI.getUnitConverter(); 244 static final Map<String, Map<Rational, String>> BASE_TO_FACTOR_TO_UNIT; 245 246 static { 247 Map<String, Map<Rational, String>> _BASE_TO_BEST = new TreeMap<>(); 248 ImmutableSet<String> skip = ImmutableSet.of("mile-scandinavian", "100-kilometer", "dunam"); 249 Output<String> baseOut = new Output<>(); 250 for (String longUnit : 251 Validity.getInstance() 252 .getStatusToCodes(LstrType.unit) 253 .get(Validity.Status.regular)) { 254 String shortUnit = uc.getShortId(longUnit); 255 System.out.println(shortUnit); 256 if (skip.contains(shortUnit)) { 257 continue; 258 } 259 if ("mile-per-gallon".equals(shortUnit)) { 260 int debug = 0; 261 } 262 // Set<String> systems = uc.getSystems(unit); 263 ConversionInfo info = uc.parseUnitId(shortUnit, baseOut, false); 264 if (info == null) { 265 continue; 266 } 267 Map<Rational, String> factorToUnit = _BASE_TO_BEST.get(baseOut.value); 268 if (factorToUnit == null) { _BASE_TO_BEST.put(baseOut.value, factorToUnit = new TreeMap<>())269 _BASE_TO_BEST.put(baseOut.value, factorToUnit = new TreeMap<>()); factorToUnit.put(Rational.ONE, baseOut.value)270 factorToUnit.put(Rational.ONE, baseOut.value); 271 } 272 273 if (!info.factor.isPowerOfTen()) { 274 continue; 275 } 276 277 String old = factorToUnit.get(info.factor); 278 if (old == null || old.length() > shortUnit.length()) { factorToUnit.put(info.factor, shortUnit)279 factorToUnit.put(info.factor, shortUnit); 280 } 281 } 282 BASE_TO_FACTOR_TO_UNIT = CldrUtility.protectCollection(_BASE_TO_BEST); 283 for (Entry<String, Map<Rational, String>> entry : BASE_TO_FACTOR_TO_UNIT.entrySet()) { 284 System.out.println(entry); 285 } 286 } 287 288 class BestUnitForGender implements Comparable<BestUnitForGender> { 289 final boolean durationOrLength; // true is better 290 final boolean metric; // true is better 291 final double distanceFromOne; // zero is better 292 final String quantity; 293 final String shortUnit; 294 BestUnitForGender( String shortUnit, String quantity, Collection<String> systems, double baseSize)295 public BestUnitForGender( 296 String shortUnit, String quantity, Collection<String> systems, double baseSize) { 297 super(); 298 this.shortUnit = shortUnit; 299 this.quantity = quantity; 300 this.durationOrLength = quantity.equals("duration") || quantity.equals("length"); 301 this.metric = systems.contains("metric"); 302 this.distanceFromOne = Math.abs(Math.log(baseSize)); 303 } 304 305 @Override compareTo(BestUnitForGender o)306 public int compareTo(BestUnitForGender o) { 307 // negation, because we want the best one first 308 return ComparisonChain.start() 309 .compare(o.durationOrLength, durationOrLength) 310 .compare(o.metric, metric) 311 .compare(quantity, o.quantity) 312 .compare(distanceFromOne, o.distanceFromOne) 313 .compare(shortUnit, o.shortUnit) 314 .result(); 315 } 316 317 @Override hashCode()318 public int hashCode() { 319 return shortUnit.hashCode(); 320 } 321 322 @Override equals(Object obj)323 public boolean equals(Object obj) { 324 return compareTo((BestUnitForGender) obj) == 0; 325 } 326 327 @Override toString()328 public String toString() { 329 return shortUnit 330 + "(" 331 + (durationOrLength ? "D" : "") 332 + (metric ? "M" : "") 333 + ":" 334 + quantity 335 + ":" 336 + Math.round(distanceFromOne * 10) 337 + ")"; 338 } 339 } 340 341 public class TablePrinterWithHeader { 342 final String header; 343 final TablePrinter tablePrinter; 344 TablePrinterWithHeader(String header, TablePrinter tablePrinter)345 public TablePrinterWithHeader(String header, TablePrinter tablePrinter) { 346 this.header = header; 347 this.tablePrinter = tablePrinter; 348 } 349 } 350 writeSubcharts(Anchors anchors)351 public void writeSubcharts(Anchors anchors) throws IOException { 352 Set<String> locales = GrammarInfo.getGrammarLocales(); 353 354 LanguageTagParser ltp = new LanguageTagParser(); 355 // ImmutableSet<String> casesNominativeOnly = 356 // ImmutableSet.of(GrammaticalFeature.grammaticalCase.getDefault(null)); 357 Factory factory = CLDRConfig.getInstance().getCldrFactory(); 358 359 Comparator<String> caseOrder = GrammarInfo.CaseValues.COMPARATOR; 360 Set<String> sortedCases = new TreeSet<>(caseOrder); 361 362 Comparator<String> genderOrder = GrammarInfo.GenderValues.COMPARATOR; 363 Set<String> sortedGenders = new TreeSet<>(genderOrder); 364 365 Output<Double> sizeInBaseUnits = new Output<>(); 366 367 // collect the "best unit ordering" 368 Map<String, BestUnitForGender> unitToBestUnit = new TreeMap<>(); 369 Set<String> rawUnitsToAddGrammar = GrammarInfo.getUnitsToAddGrammar(); 370 for (String longUnit : rawUnitsToAddGrammar) { 371 final String shortUnit = uc.getShortId(longUnit); 372 if (shortUnit.equals("generic")) { 373 continue; 374 } 375 String unitCell = getBestBaseUnit(uc, shortUnit, sizeInBaseUnits); 376 String quantity = 377 shortUnit.contentEquals("generic") 378 ? "temperature" 379 : uc.getQuantityFromUnit(shortUnit, false); 380 381 Set<String> systems = uc.getSystems(shortUnit); 382 unitToBestUnit.put( 383 shortUnit, 384 new BestUnitForGender(shortUnit, quantity, systems, sizeInBaseUnits.value)); 385 } 386 unitToBestUnit = ImmutableMap.copyOf(unitToBestUnit); 387 // quick check 388 // final BestUnitForGender u1 = unitToBestUnit.get("meter"); 389 // final BestUnitForGender u2 = unitToBestUnit.get("square-centimeter"); 390 // int comp = u1.compareTo(u2); // should be less 391 392 Set<BestUnitForGender> sorted2 = new TreeSet<>(unitToBestUnit.values()); 393 System.out.println(sorted2); 394 395 PlaceholderLocation placeholderPosition = PlaceholderLocation.missing; 396 Matcher placeholderMatcher = UnitConverter.PLACEHOLDER.matcher(""); 397 Output<String> unitPatternOut = new Output<>(); 398 399 for (String locale : locales) { 400 if (locale.equals("root")) { 401 continue; 402 } 403 ltp.set(locale); 404 String region = ltp.getRegion(); 405 if (!region.isEmpty()) { 406 continue; 407 } 408 GrammarInfo grammarInfo = SDI.getGrammarInfo(locale, true); 409 if (grammarInfo == null || !grammarInfo.hasInfo(GrammaticalTarget.nominal)) { 410 continue; 411 } 412 CLDRFile cldrFile = factory.make(locale, true); 413 414 { 415 Collection<String> genders = 416 grammarInfo.get( 417 GrammaticalTarget.nominal, 418 GrammaticalFeature.grammaticalGender, 419 GrammaticalScope.units); 420 sortedGenders.clear(); 421 sortedGenders.addAll(genders); 422 } 423 { 424 Collection<String> rawCases = 425 grammarInfo.get( 426 GrammaticalTarget.nominal, 427 GrammaticalFeature.grammaticalCase, 428 GrammaticalScope.units); 429 if (rawCases.isEmpty()) { 430 rawCases = ImmutableSet.of(GrammaticalFeature.grammaticalCase.getDefault(null)); 431 } 432 sortedCases.clear(); 433 sortedCases.addAll(rawCases); 434 } 435 if (sortedCases.size() <= 1 && sortedGenders.size() <= 1) { 436 continue; 437 } 438 439 // Collection<String> nomCases = rawCases.isEmpty() ? casesNominativeOnly : rawCases; 440 441 PluralInfo plurals = SDI.getPlurals(PluralType.cardinal, locale); 442 if (plurals == null) { 443 System.err.println("No " + PluralType.cardinal + " plurals for " + locale); 444 } 445 Collection<Count> adjustedPlurals = plurals.getAdjustedCounts(); 446 ICUServiceBuilder isb = ICUServiceBuilder.forLocale(CLDRLocale.getInstance(locale)); 447 DecimalFormat decFormat = isb.getNumberFormat(1); 448 449 Map<String, TablePrinterWithHeader> info = new LinkedHashMap<>(); 450 451 TablePrinter tablePrinter = getFormattedGrammarInfo(Collections.singleton(locale)); 452 info.put( 453 "Grammatical Features", 454 new TablePrinterWithHeader( 455 "<p>The following lists the available information about grammatical features for this locale. " 456 + INFO_ON_FEATURES 457 + "</p>", 458 tablePrinter)); 459 460 // because some locales have more units with grammar, get the additional ones. Also grab 461 // the minimal pairs 462 463 Set<String> unitsToAddGrammar = new TreeSet<>(rawUnitsToAddGrammar); 464 Map<PathHeader, String> minimalInfo = new TreeMap<>(); 465 PathHeader.Factory phf = PathHeader.getFactory(); 466 for (String path : cldrFile) { 467 if (!path.startsWith("//ldml/units/unitLength[@type=\"long\"]/unit")) { 468 if (path.startsWith("//ldml/numbers/minimalPairs/")) { 469 if (!path.contains("ordinal")) { 470 minimalInfo.put( 471 phf.fromPath(path), cldrFile.getStringValueWithBailey(path)); 472 } 473 } 474 continue; 475 } 476 XPathParts parts = XPathParts.getFrozenInstance(path); 477 String foundUnit = parts.getAttributeValue(3, "type"); 478 if (unitsToAddGrammar.contains(foundUnit)) { 479 continue; 480 } 481 // ldml/units/unitLength[@type="long"]/unit[@type="duration-decade"]/gender 482 // ldml/units/unitLength[@type="long"]/unit[@type="duration-decade"]/unitPattern[@count="one"][@case="accusative"] 483 switch (parts.getElement(-1)) { 484 case "gender": 485 unitsToAddGrammar.add(foundUnit); 486 break; 487 case "unitPattern": 488 if (parts.getAttributeValue(4, "case") != null) { 489 unitsToAddGrammar.add(foundUnit); 490 } 491 break; 492 } 493 } 494 495 TablePrinter minimalPrinter = 496 new TablePrinter() 497 .addColumn( 498 "Type", 499 "class='source' width='1%'", 500 CldrUtility.getDoubleLinkMsg(), 501 "class='source'", 502 true) 503 .setRepeatHeader(true) 504 .addColumn( 505 "Size", 506 "class='source' width='1%'", 507 null, 508 "class='source'", 509 true) 510 .setSortPriority(0) 511 .setHidden(true) 512 .setBreakSpans(true) 513 .addColumn( 514 "Code", 515 "class='source' width='1%'", 516 null, 517 "class='source'", 518 true) 519 .addColumn("Pattern", "class='source'", null, "class='target'", true) 520 .addColumn( 521 "Formatted Sample", 522 "class='source'", 523 null, 524 "class='target'", 525 true); 526 527 int counter = 0; 528 ExampleGenerator exampleGenerator = new ExampleGenerator(cldrFile, CONFIG.getEnglish()); 529 for (Entry<PathHeader, String> entry : minimalInfo.entrySet()) { 530 PathHeader pathHeader = entry.getKey(); 531 String value = entry.getValue(); 532 minimalPrinter 533 .addRow() 534 .addCell(pathHeader.getHeader()) 535 .addCell(counter++) 536 .addCell(pathHeader.getCode()) 537 .addCell(value) 538 .addCell( 539 exampleGenerator.getExampleHtml( 540 pathHeader.getOriginalPath(), value)); 541 542 // finish the row 543 minimalPrinter.finishRow(); 544 } 545 info.put( 546 "Minimal Pairs", 547 new TablePrinterWithHeader( 548 "<p>This table has the minimal pairs used to test the appropriateness of different values.</p>\n", 549 minimalPrinter)); 550 551 final PluralRules pluralRules = plurals.getPluralRules(); 552 // set up the table and add the headers 553 554 TablePrinter caseTablePrinter = 555 new TablePrinter() 556 .addColumn( 557 "Quantity", 558 "class='source' width='1%'", 559 null, 560 "class='source'", 561 true) 562 .setSortPriority(0) 563 .setRepeatHeader(true) 564 .addColumn( 565 "Size", 566 "class='source' width='1%'", 567 null, 568 "class='source'", 569 true) 570 .setSortPriority(1) 571 .setHidden(true) 572 .addColumn( 573 "Unit", 574 "class='source' width='1%'", 575 CldrUtility.getDoubleLinkMsg(), 576 "class='source'", 577 true) 578 .setSortPriority(2) 579 .setBreakSpans(true); 580 if (sortedGenders.size() > 1) { 581 caseTablePrinter 582 .addColumn( 583 "Gender", "class='source' width='1%'", null, "class='source'", true) 584 .addColumn( 585 "Gender MP + unit", "class='target'", null, "class='source'", true); 586 } 587 if (sortedCases.size() > 1) { 588 caseTablePrinter.addColumn( 589 "Case", "class='source' width='1%'", null, "class='source'", true); 590 // double width = ((int) ((99.0 / (adjustedPlurals.size()*2 + 1)) * 1000)) / 1000.0; 591 // String widthStringTarget = "class='target' width='" + width + "%'"; 592 String widthStringTarget = "class='target'"; 593 594 addTwoColumns( 595 caseTablePrinter, widthStringTarget, adjustedPlurals, pluralRules, true); 596 } 597 598 // now get the case and/or gender items 599 600 // also gather info on the "best power units" 601 602 for (String longUnit : unitsToAddGrammar) { 603 final String shortUnit = uc.getShortId(longUnit); 604 String unitCell = getBestBaseUnit(uc, shortUnit, sizeInBaseUnits); 605 String quantity = 606 shortUnit.contentEquals("generic") 607 ? "temperature" 608 : uc.getQuantityFromUnit(shortUnit, false); 609 String genderFormatted = "n/a"; 610 String gender = "n/a"; 611 612 if (sortedGenders.size() > 1) { 613 gender = 614 UnitPathType.gender.getTrans( 615 cldrFile, "long", shortUnit, null, null, null, null); 616 if (gender == null) { 617 gender = "n/a"; 618 } else { 619 String genderMinimalPair = 620 cldrFile.getStringValue( 621 "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\"" 622 + gender 623 + "\"]"); 624 if (genderMinimalPair != null) { 625 Count bestCount = 626 adjustedPlurals.contains(Count.one) ? Count.one : Count.other; 627 628 String unitPattern = 629 cldrFile.getStringValueWithBailey( 630 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"" 631 + longUnit 632 + "\"]/unitPattern" 633 + GrammarInfo.getGrammaticalInfoAttributes( 634 grammarInfo, 635 UnitPathType.unit, 636 bestCount.toString(), 637 null, 638 "nominative")); 639 String unit = 640 unitPattern.replace("\u00A0", "").replace("{0}", "").trim(); 641 genderFormatted = MessageFormat.format(genderMinimalPair, unit); 642 } 643 } 644 } 645 if (sortedCases.size() <= 1) { 646 caseTablePrinter 647 .addRow() 648 .addCell(quantity) 649 .addCell(sizeInBaseUnits.value) 650 .addCell(unitCell) 651 .addCell(gender) 652 .addCell(genderFormatted); 653 // finish the row 654 caseTablePrinter.finishRow(); 655 } else { 656 // Set<String> systems = uc.getSystems(shortUnit); 657 658 if (unitCell == null 659 || quantity == null 660 || gender == null 661 || sizeInBaseUnits.value == null) { 662 throw new IllegalArgumentException("No best base unit for: " + shortUnit); 663 } 664 665 for (String case1 : sortedCases) { // 666 // start a row, then add the cells in the row. 667 caseTablePrinter 668 .addRow() 669 .addCell(quantity) 670 .addCell(sizeInBaseUnits.value) 671 .addCell(unitCell); 672 if (sortedGenders.size() > 1) { 673 caseTablePrinter.addCell(gender).addCell(genderFormatted); 674 } 675 caseTablePrinter.addCell(case1); 676 677 for (Count plural : adjustedPlurals) { 678 Double sample = getBestSample(pluralRules, plural); 679 680 // <caseMinimalPairs case="nominative">{0} kostet 681 // €3,50.</caseMinimalPairs> 682 683 String unitPattern = 684 cldrFile.getStringValueWithBailey( 685 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"" 686 + longUnit 687 + "\"]/unitPattern" 688 + GrammarInfo.getGrammaticalInfoAttributes( 689 grammarInfo, 690 UnitPathType.unit, 691 plural.toString(), 692 null, 693 case1)); 694 unitPattern = unitPattern.replace("\u00A0", " "); 695 696 caseTablePrinter.addCell(unitPattern); 697 698 String numberPlusUnit = 699 MessageFormat.format(unitPattern, decFormat.format(sample)); 700 701 String caseMinimalPair = 702 cldrFile.getStringValue( 703 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"" 704 + case1 705 + "\"]"); 706 String withContext = 707 caseMinimalPair == null 708 ? numberPlusUnit 709 : MessageFormat.format(caseMinimalPair, numberPlusUnit); 710 711 caseTablePrinter.addCell(withContext); 712 } 713 // finish the row 714 caseTablePrinter.finishRow(); 715 } 716 } 717 } 718 info.put( 719 "Unit Case and Gender Info", 720 new TablePrinterWithHeader( 721 "<p>This table has rows contains unit forms appropriate for different grammatical cases and plural forms. " 722 + "Each plural form has a sample value such as <i>(1.2)</i> or <i>(2)</i>. " 723 + "That value is used with the localized unit pattern to form a formatted measure, such as “2,0 Stunden”. " 724 + "That formatted measure is in turn substituted into a " 725 + "<b><a target='doc-minimal-pairs' href='http://cldr.unicode.org/translation/grammatical-inflection#TOC-Miscellaneous-Minimal-Pairs'>case minimal pair pattern</a> to get the " 726 + FORMATTED_SAMPLE 727 + "</b>. " 728 + "The <b>Gender</b> column is informative; it just supplies the supplied gender for the unit.</p>\n" 729 + "<ul><li>For clarity, conversion values are supplied for non-metric units. " 730 + "For more information, see <a target='unit_conversions' href='../supplemental/unit_conversions.html'>Unit Conversions</a>.</li>" 731 + "</ul>\n", 732 caseTablePrinter)); 733 734 // get best units for gender. 735 Multimap<String, BestUnitForGender> bestUnitForGender = TreeMultimap.create(); 736 737 for (String longUnit : unitsToAddGrammar) { 738 final String shortUnit = uc.getShortId(longUnit); 739 String gender = 740 UnitPathType.gender.getTrans( 741 cldrFile, "long", shortUnit, null, null, null, null); 742 final BestUnitForGender bestUnit = unitToBestUnit.get(shortUnit); 743 if (gender != null && bestUnit != null) { 744 bestUnitForGender.put(gender, bestUnit); 745 } 746 } 747 748 for (Entry<String, Collection<BestUnitForGender>> entry : 749 bestUnitForGender.asMap().entrySet()) { 750 List<String> items = 751 entry.getValue().stream() 752 .map(x -> x.shortUnit) 753 .collect(Collectors.toList()); 754 System.out.println(locale + "\t" + entry.getKey() + "\t" + items); 755 } 756 757 TablePrinter powerTable = 758 new TablePrinter() 759 .addColumn( 760 "Unit", 761 "class='source' width='1%'", 762 CldrUtility.getDoubleLinkMsg(), 763 "class='source'", 764 true) 765 .setSortPriority(2) 766 .setRepeatHeader(true) 767 .addColumn( 768 "Case", 769 "class='source' width='1%'", 770 null, 771 "class='source'", 772 true) 773 .addColumn( 774 "Gender", 775 "class='source' width='1%'", 776 null, 777 "class='source'", 778 true); 779 double width = ((int) ((99.0 / (adjustedPlurals.size() * 2 + 1)) * 1000)) / 1000.0; 780 String widthStringTarget = "class='target' width='" + width + "%'"; 781 782 addTwoColumns(powerTable, widthStringTarget, adjustedPlurals, pluralRules, false); 783 784 // now get the items 785 for (String power : Arrays.asList("power2", "power3")) { 786 String unitCell = power; 787 788 for (String gender : sortedGenders) { 789 Collection<BestUnitForGender> bestUnits = bestUnitForGender.get(gender); 790 String bestUnit = null; 791 if (!bestUnits.isEmpty()) { 792 bestUnit = bestUnits.iterator().next().shortUnit; 793 } 794 795 for (String case1 : sortedCases) { // 796 // start a row, then add the cells in the row. 797 powerTable 798 .addRow() 799 .addCell(unitCell) 800 .addCell(case1) 801 .addCell(gender + (bestUnit == null ? "" : "\n(" + bestUnit + ")")); 802 803 for (Count plural : adjustedPlurals) { 804 String localizedPowerPattern = 805 UnitPathType.power.getTrans( 806 cldrFile, 807 "long", 808 power, 809 plural.toString(), 810 case1, 811 gender, 812 null); 813 localizedPowerPattern = localizedPowerPattern.replace("\u00A0", " "); 814 powerTable.addCell(localizedPowerPattern); 815 816 if (bestUnit == null) { 817 powerTable.addCell("n/a"); 818 } else { 819 Double samplePlural = getBestSample(pluralRules, plural); 820 String localizedUnitPattern = 821 UnitPathType.unit.getTrans( 822 cldrFile, 823 "long", 824 bestUnit, 825 plural.toString(), 826 case1, 827 gender, 828 null); 829 placeholderPosition = 830 UnitConverter.extractUnit( 831 placeholderMatcher, 832 localizedUnitPattern, 833 unitPatternOut); 834 if (placeholderPosition != PlaceholderLocation.middle) { 835 localizedUnitPattern = unitPatternOut.value; 836 localizedUnitPattern = 837 localizedUnitPattern.replace("\u00A0", " "); 838 String placeholderPattern = 839 placeholderPosition == PlaceholderLocation.missing 840 ? localizedUnitPattern 841 : placeholderMatcher.group(); 842 843 String combined; 844 try { 845 combined = 846 UnitConverter.combineLowercasing( 847 new ULocale(locale), 848 "long", 849 localizedPowerPattern, 850 localizedUnitPattern); 851 } catch (Exception e) { 852 throw new IllegalArgumentException( 853 locale 854 + ") Can't combine " 855 + "localizedPowerPattern=«" 856 + localizedPowerPattern 857 + "» with localizedUnitPattern=«" 858 + localizedUnitPattern 859 + "»"); 860 } 861 String combinedWithPlaceholder = 862 UnitConverter.addPlaceholder( 863 combined, 864 placeholderPattern, 865 placeholderPosition); 866 867 String sample = 868 MessageFormat.format( 869 combinedWithPlaceholder, 870 decFormat.format(samplePlural)); 871 872 String caseMinimalPair = 873 cldrFile.getStringValue( 874 "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\"" 875 + case1 876 + "\"]"); 877 String withContext = 878 caseMinimalPair == null 879 ? sample 880 : MessageFormat.format(caseMinimalPair, sample); 881 882 powerTable.addCell(withContext); 883 } else { 884 powerTable.addCell("n/a"); 885 } 886 } 887 } 888 // finish the row 889 powerTable.finishRow(); 890 } 891 } 892 } 893 info.put( 894 "Unit Power Components", 895 new TablePrinterWithHeader( 896 "<p>This table shows the square (power2) and cubic (power3) patterns, which may vary by case, gender, and plural forms. " 897 + "Each gender is illustrated with a unit where possible, such as <i>(second)</i> or <i>(meter)</i>. " 898 + "Each plural category is illustrated with a unit where possible, such as <i>(1)</i> or <i>(1.2)</i>. " 899 + "The patterns are first supplied, and then combined with the samples and " 900 + "<b><a target='doc-minimal-pairs' href='http://cldr.unicode.org/translation/grammatical-inflection#TOC-Miscellaneous-Minimal-Pairs'>case minimal pair patterns</a></b> " 901 + "in the next <b>" 902 + FORMATTED_SAMPLE 903 + "</b> column." 904 + "</p>", 905 powerTable)); 906 907 if (!info.isEmpty()) { 908 String name = ENGLISH.getName(locale); 909 new Subchart(name + ": Unit Grammar Info", locale, info).writeChart(anchors); 910 } 911 } 912 } 913 addTwoColumns( TablePrinter caseTablePrinter, String widthStringTarget, Collection<Count> adjustedPlurals, final PluralRules pluralRules, boolean spanRows)914 public void addTwoColumns( 915 TablePrinter caseTablePrinter, 916 String widthStringTarget, 917 Collection<Count> adjustedPlurals, 918 final PluralRules pluralRules, 919 boolean spanRows) { 920 for (Count plural : adjustedPlurals) { 921 Double sample = getBestSample(pluralRules, plural); 922 // final String pluralHeader = plural.toString() + " (" + sample + ")"; 923 caseTablePrinter 924 .addColumn( 925 "Pattern for " + plural.toString(), 926 widthStringTarget, 927 null, 928 "class='target'", 929 true) 930 .setSpanRows(spanRows); 931 caseTablePrinter.addColumn( 932 "Case MP + pattern with " + sample, 933 widthStringTarget, 934 null, 935 "class='target'", 936 true); 937 } 938 } 939 940 static final Map<String, Pair<String, Double>> BEST_UNIT_CACHE = new HashMap<>(); 941 getBestBaseUnit( UnitConverter uc, final String shortUnit, Output<Double> sizeInBaseUnits)942 public static String getBestBaseUnit( 943 UnitConverter uc, final String shortUnit, Output<Double> sizeInBaseUnits) { 944 Pair<String, Double> cached = BEST_UNIT_CACHE.get(shortUnit); 945 if (cached != null) { 946 sizeInBaseUnits.value = cached.getSecond(); 947 return cached.getFirst(); 948 } 949 if (shortUnit.equals("square-mile")) { 950 int debug = 0; 951 } 952 String unitCell = 953 ENGLISH.getStringValue( 954 "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\"" 955 + uc.getLongId(shortUnit) 956 + "\"]/displayName"); 957 Output<String> baseUnit = new Output<>(); 958 ConversionInfo info = uc.parseUnitId(shortUnit, baseUnit, false); 959 960 if (info != null) { 961 sizeInBaseUnits.value = info.factor.doubleValue(); 962 Map<Rational, String> factorToUnit = BASE_TO_FACTOR_TO_UNIT.get(baseUnit.value); 963 if (factorToUnit == null) { 964 int debug = 0; 965 } 966 String bestUnit = null; 967 Rational bestFactor = null; 968 Rational inputBoundary = Rational.of(2).multiply(info.factor); 969 for (Entry<Rational, String> entry : factorToUnit.entrySet()) { 970 final String possibleUnit = entry.getValue(); 971 if (possibleUnit.equals("cup-jp")) { 972 continue; // skip odd unit 973 } 974 final Rational currentFactor = entry.getKey(); 975 if (bestUnit != null && currentFactor.compareTo(inputBoundary) >= 0) { 976 break; 977 } 978 bestFactor = currentFactor; 979 bestUnit = possibleUnit; 980 } 981 bestFactor = info.factor.divide(bestFactor); // scale for bestUnit 982 if (!bestFactor.equals(Rational.ONE) || !shortUnit.equals(bestUnit)) { 983 final String string = bestFactor.toString(FormatStyle.repeating); 984 final double bestDoubleFactor = bestFactor.doubleValue(); 985 String pluralCategory = ENGLISH_PLURAL_RULES.select(bestDoubleFactor); 986 final String unitPath = 987 "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\"" 988 + uc.getLongId(bestUnit) 989 + "\"]/unitPattern[@count=\"" 990 + pluralCategory 991 + "\"]"; 992 String unitPattern = ENGLISH.getStringValue(unitPath); 993 if (unitPattern == null) { 994 final UnitId unitId = uc.createUnitId(bestUnit); 995 unitPattern = 996 unitId.toString(ENGLISH, "long", pluralCategory, null, null, false); 997 if (unitPattern == null) { 998 return null; 999 } 1000 } 1001 String unitMeasure = 1002 MessageFormat.format( 1003 unitPattern, 1004 string.contains("/") ? "~" + bestDoubleFactor : string); 1005 unitCell = shortUnit + "\n( = " + unitMeasure + ")"; 1006 } 1007 } else { 1008 sizeInBaseUnits.value = -1d; 1009 } 1010 BEST_UNIT_CACHE.put(shortUnit, Pair.of(unitCell, sizeInBaseUnits.value)); 1011 return unitCell; 1012 } 1013 getBestSample(PluralRules pluralRules, Count plural)1014 private Double getBestSample(PluralRules pluralRules, Count plural) { 1015 Collection<Double> samples = pluralRules.getSamples(plural.toString()); 1016 if (samples.isEmpty()) { 1017 samples = pluralRules.getSamples(plural.toString(), SampleType.DECIMAL); 1018 } 1019 int size = samples.size(); 1020 switch (size) { 1021 case 0: 1022 throw new IllegalArgumentException("shouldn't happen"); 1023 case 1: 1024 return samples.iterator().next(); 1025 } 1026 return Iterables.skip(samples, 1).iterator().next(); 1027 } 1028 1029 private class Subchart extends Chart { 1030 private final String title; 1031 private final String file; 1032 private final Map<String, TablePrinterWithHeader> tablePrinter; 1033 1034 @Override getShowDate()1035 public boolean getShowDate() { 1036 return false; 1037 } 1038 Subchart(String title, String file, Map<String, TablePrinterWithHeader> info)1039 public Subchart(String title, String file, Map<String, TablePrinterWithHeader> info) { 1040 super(); 1041 this.title = title; 1042 this.file = file; 1043 this.tablePrinter = info; 1044 } 1045 1046 @Override getDirectory()1047 public String getDirectory() { 1048 return DIR; 1049 } 1050 1051 @Override getTitle()1052 public String getTitle() { 1053 return title; 1054 } 1055 1056 @Override getFileName()1057 public String getFileName() { 1058 return file; 1059 } 1060 1061 @Override getExplanation()1062 public String getExplanation() { 1063 return MAIN_HEADER 1064 + "<p><i>Unit Inflections, Phase 1:</i> The end goal is to add full case and gender support for formatted units. " 1065 + "During Phase 1, a limited number of locales and units of measurement are being handled in CLDR v38, " 1066 + "so that we can work kinks out of the process before expanding to all units for all locales.</p>\n" 1067 + "<p>This chart shows grammatical information available for certain unit and/or power patterns. These patterns are also illustrated with a <b>" 1068 + FORMATTED_SAMPLE 1069 + "</b> that combine the patterns with sample numbers and " 1070 + "<b><a target='doc-minimal-pairs' href='http://cldr.unicode.org/translation/grammatical-inflection#TOC-Miscellaneous-Minimal-Pairs'>case minimal pair patterns</a></b>. " 1071 + "For example, “… für {0} …” is a <i>case minimal pair pattern</i> that requires the placeholder {0} to be in the accusative case in German. By inserting into a minimal pair pattern, " 1072 + "it is easier to ensure that the original unit and/or power patterns are correctly inflected. </p>\n" 1073 + "<p><b>Notes</b>" 1074 + "<ul><li>We don't have the cross-product of minimal pairs for both case and plural forms, " 1075 + "so the <i>case minimal pair pattern</i> might not be correct for the row’s plural category, especially in the nominative.</li>" 1076 + "<li>Translators often have difficulties with the the minimal pair patterns, " 1077 + "since they are <i>transcreations</i> not translations. The Hindi minimal pair patterns for case and gender have been discarded because they were incorrectly translated.</li>" 1078 + "<li>We don't expect translators to supply minimal pair patterns that are natural for any kind of placeholder: " 1079 + "for example, it is probably not typical to use the vocative with 3.2 meters! So look at the <b>" 1080 + FORMATTED_SAMPLE 1081 + "</b> as an aid for helping to see the context for grammatical inflections, but one that has limitations.</li></ul>"; 1082 } 1083 1084 @Override writeContents(FormattedFileWriter pw)1085 public void writeContents(FormattedFileWriter pw) throws IOException { 1086 try (PrintWriter tsv = 1087 FileUtilities.openUTF8Writer(getDirectory() + "tsv/", file + ".tsv"); ) { 1088 if (tablePrinter.size() > 1) { 1089 pw.write("<h2>Table of Contents</h2>\n"); 1090 pw.append("<ol>\n"); 1091 for (String header : tablePrinter.keySet()) { 1092 pw.write(writeTOC(header)); 1093 } 1094 pw.append("</ol>\n"); 1095 } 1096 String sep = ""; 1097 for (Entry<String, TablePrinterWithHeader> entry : tablePrinter.entrySet()) { 1098 final String header = entry.getKey(); 1099 writeHeader(pw, header); 1100 final TablePrinterWithHeader explanation = entry.getValue(); 1101 pw.write(explanation.header); 1102 pw.write(explanation.tablePrinter.toTable()); 1103 tsv.write(sep + "# " + entry.getKey() + "\n"); 1104 explanation.tablePrinter.toTsv(tsv); 1105 sep = "\n"; 1106 } 1107 } 1108 } 1109 writeHeader(FormattedFileWriter pw, final String header)1110 private void writeHeader(FormattedFileWriter pw, final String header) throws IOException { 1111 pw.write( 1112 "<h2><a name='" 1113 + FileUtilities.anchorize(header) 1114 + "'>" 1115 + header 1116 + "</a></h2>\n"); 1117 } 1118 writeTOC(String header)1119 private String writeTOC(String header) { 1120 return "<li><b>" 1121 + "<a href='#" 1122 + FileUtilities.anchorize(header) 1123 + "'>" 1124 + header 1125 + "</a>" 1126 + "</b></li>\n"; 1127 } 1128 } 1129 1130 public static RuleBasedCollator RBC; 1131 1132 static { 1133 Factory cldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "collation/", ".*"); 1134 CLDRFile root = cldrFactory.make("root", false); 1135 String rules = 1136 root.getStringValue( 1137 "//ldml/collations/collation[@type=\"emoji\"][@visibility=\"external\"]/cr"); 1138 1139 // if (!rules.contains("'#⃣'")) { 1140 // rules = rules.replace("#⃣", "'#⃣'").replace("*⃣", "'*⃣'"); //hack for 8288 1141 // } 1142 1143 try { 1144 RBC = new RuleBasedCollator(rules); 1145 } catch (Exception e) { 1146 throw new IllegalArgumentException( 1147 "Failure in rules for " + CLDRPaths.COMMON_DIRECTORY + "collation/" + "root", 1148 e); 1149 } 1150 } 1151 } 1152