xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartGrammaticalForms.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.google.common.base.Joiner;
4 import com.google.common.collect.ComparisonChain;
5 import com.google.common.collect.ImmutableSet;
6 import com.google.common.collect.Iterables;
7 import com.google.common.collect.Multimap;
8 import com.google.common.collect.TreeMultimap;
9 import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap;
10 import com.ibm.icu.text.DecimalFormat;
11 import com.ibm.icu.text.MessageFormat;
12 import com.ibm.icu.text.PluralRules;
13 import com.ibm.icu.text.PluralRules.SampleType;
14 import com.ibm.icu.text.RuleBasedCollator;
15 import com.ibm.icu.util.Output;
16 import com.ibm.icu.util.ULocale;
17 import java.io.IOException;
18 import java.io.PrintWriter;
19 import java.util.Arrays;
20 import java.util.Collection;
21 import java.util.Collections;
22 import java.util.Comparator;
23 import java.util.HashMap;
24 import java.util.LinkedHashMap;
25 import java.util.LinkedHashSet;
26 import java.util.List;
27 import java.util.Map;
28 import java.util.Map.Entry;
29 import java.util.Set;
30 import java.util.TreeMap;
31 import java.util.TreeSet;
32 import java.util.regex.Matcher;
33 import java.util.stream.Collectors;
34 import org.unicode.cldr.draft.FileUtilities;
35 import org.unicode.cldr.test.ExampleGenerator;
36 import org.unicode.cldr.tool.FormattedFileWriter.Anchors;
37 import org.unicode.cldr.util.CLDRConfig;
38 import org.unicode.cldr.util.CLDRFile;
39 import org.unicode.cldr.util.CLDRLocale;
40 import org.unicode.cldr.util.CLDRPaths;
41 import org.unicode.cldr.util.CldrUtility;
42 import org.unicode.cldr.util.Factory;
43 import org.unicode.cldr.util.FileCopier;
44 import org.unicode.cldr.util.GrammarInfo;
45 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
46 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope;
47 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget;
48 import org.unicode.cldr.util.ICUServiceBuilder;
49 import org.unicode.cldr.util.LanguageTagParser;
50 import org.unicode.cldr.util.Pair;
51 import org.unicode.cldr.util.PathHeader;
52 import org.unicode.cldr.util.Rational;
53 import org.unicode.cldr.util.Rational.FormatStyle;
54 import org.unicode.cldr.util.StandardCodes.LstrType;
55 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo;
56 import org.unicode.cldr.util.SupplementalDataInfo.PluralInfo.Count;
57 import org.unicode.cldr.util.SupplementalDataInfo.PluralType;
58 import org.unicode.cldr.util.UnitConverter;
59 import org.unicode.cldr.util.UnitConverter.ConversionInfo;
60 import org.unicode.cldr.util.UnitConverter.PlaceholderLocation;
61 import org.unicode.cldr.util.UnitConverter.UnitId;
62 import org.unicode.cldr.util.UnitPathType;
63 import org.unicode.cldr.util.Validity;
64 import org.unicode.cldr.util.XPathParts;
65 
66 /** Chart the grammatical forms, with unit examples */
67 public class ChartGrammaticalForms extends Chart {
68 
69     private static final String FORMATTED_SAMPLE = "Formatted Sample";
70 
71     private static final String INFO_ON_FEATURES =
72             "Current information is only for nominal forms. "
73                     + "Where a Usage is present other than “general”, that means that a subset of the grammatical features are relevant to that Usage. "
74                     + "For example, Feature=grammaticalGender and Usage=units might omit an ‘animate’ gender. "
75                     + "For the meanings of the values, see "
76                     + "<a target='spec' href='https://unicode.org/reports/tr35/tr35-general.html#Grammatical_Features'>LDML Grammatical Features</a>.";
77 
78     private static final String MAIN_HEADER = "<h2>Grammatical Forms</h2>";
79     private static final boolean DEBUG = false;
80     private static final String DIR = CLDRPaths.CHART_DIRECTORY + "grammar/";
81     public static final PluralRules ENGLISH_PLURAL_RULES = SDI.getPlurals("en").getPluralRules();
82 
main(String[] args)83     public static void main(String[] args) {
84         new ChartGrammaticalForms().writeChart(null);
85     }
86 
87     @Override
getDirectory()88     public String getDirectory() {
89         return DIR;
90     }
91 
92     @Override
getTitle()93     public String getTitle() {
94         return "Grammatical Forms Charts";
95     }
96 
97     @Override
getFileName()98     public String getFileName() {
99         return "index";
100     }
101 
102     @Override
getExplanation()103     public String getExplanation() {
104         return MAIN_HEADER
105                 + "<p>In this version a preliminary set of languages have additional grammatical information, as listed below.<p>";
106     }
107 
108     @Override
writeContents(FormattedFileWriter pw)109     public void writeContents(FormattedFileWriter pw) throws IOException {
110         FileCopier.ensureDirectoryExists(DIR);
111         FileCopier.copy(Chart.class, "index.css", DIR);
112         FormattedFileWriter.copyIncludeHtmls(DIR);
113 
114         FormattedFileWriter.Anchors anchors = new FormattedFileWriter.Anchors();
115         writeSubcharts(anchors);
116         pw.setIndex("Main Chart Index", "../index.html");
117         pw.write(anchors.toString());
118         showInfo(pw);
119     }
120 
showInfo(FormattedFileWriter pw)121     private void showInfo(FormattedFileWriter pw) throws IOException {
122         pw.append("<h2>Grammatical Features Info</h2>");
123         pw.append(
124                 "<p>The following lists the available information about grammatical features for locales. "
125                         + "Note that only the above locales have localized data, at this time. "
126                         + INFO_ON_FEATURES
127                         + "</p>");
128         if (GrammaticalTarget.values().length > 1) {
129             throw new IllegalArgumentException(
130                     "Needs adjustment for additional GrammaticalTarget.values()");
131         }
132 
133         System.out.println(SDI.hasGrammarInfo());
134 
135         TablePrinter tablePrinter = getFormattedGrammarInfo(SDI.hasGrammarInfo());
136         pw.append(tablePrinter.toString());
137     }
138 
getFormattedGrammarInfo(Set<String> localeIds)139     private TablePrinter getFormattedGrammarInfo(Set<String> localeIds) {
140         TablePrinter tablePrinter =
141                 new TablePrinter()
142                         .addColumn(
143                                 "Locale", "class='source' width='1%'", null, "class='source'", true)
144                         .setSortPriority(0)
145                         .setBreakSpans(true)
146                         .addColumn(
147                                 "ID",
148                                 "class='source' width='1%'",
149                                 CldrUtility.getDoubleLinkMsg(),
150                                 "class='source'",
151                                 true)
152                         .setBreakSpans(true)
153                         .addColumn(
154                                 "Feature",
155                                 "class='source' width='1%'",
156                                 null,
157                                 "class='source'",
158                                 true)
159                         .setSortPriority(1)
160                         .setBreakSpans(true)
161                         .addColumn("Usage", "class='source'", null, "class='source'", true)
162                         .addColumn("Values", "class='source'", null, "class='source'", true);
163         for (String localeId : localeIds) {
164             if (localeId.equals("fi")) {
165                 int debug = 0;
166             }
167             Set<String> failures = new LinkedHashSet<>();
168             GrammarInfo grammarInfo = SDI.getGrammarInfo(localeId, false);
169             String localeName = CONFIG.getEnglish().getName(localeId);
170             for (GrammaticalFeature feature : GrammaticalFeature.values()) {
171                 Map<GrammaticalScope, Set<String>> scopeToValues =
172                         grammarInfo.get(GrammaticalTarget.nominal, feature);
173                 if (scopeToValues.isEmpty()) {
174                     continue;
175                 }
176 
177                 Set<String> values = null;
178                 boolean multiline = false;
179                 for (Entry<GrammaticalScope, Set<String>> entry : scopeToValues.entrySet()) {
180                     if (values == null) {
181                         values = entry.getValue();
182                     } else if (!values.equals(entry.getValue())) {
183                         multiline = true;
184                         break;
185                     }
186                 }
187                 Set<String> sortedValues = new TreeSet(feature.getValueComparator());
188                 if (multiline) {
189                     for (GrammaticalScope usage : GrammaticalScope.values()) {
190                         values = scopeToValues.get(usage);
191                         if (values == null || values.isEmpty()) {
192                             continue;
193                         }
194                         sortedValues.clear();
195                         sortedValues.addAll(values);
196                         addRow(
197                                 tablePrinter,
198                                 localeName,
199                                 localeId,
200                                 feature,
201                                 usage.toString(),
202                                 Joiner.on(", ").join(sortedValues));
203                     }
204                 } else {
205                     try {
206                         sortedValues.addAll(values);
207                         addRow(
208                                 tablePrinter,
209                                 localeName,
210                                 localeId,
211                                 feature,
212                                 Joiner.on(", ").join(scopeToValues.keySet()),
213                                 Joiner.on(", ").join(sortedValues));
214                     } catch (Exception e) {
215                         failures.add(e.getMessage());
216                     }
217                 }
218             }
219             if (!failures.isEmpty()) {
220                 System.out.println("# Failures, " + localeId + "\t" + failures);
221             }
222         }
223         return tablePrinter;
224     }
225 
addRow( TablePrinter tablePrinter, String locale, String id, GrammaticalFeature feature, String usage, final String valueString)226     public void addRow(
227             TablePrinter tablePrinter,
228             String locale,
229             String id,
230             GrammaticalFeature feature,
231             String usage,
232             final String valueString) {
233         tablePrinter
234                 .addRow()
235                 .addCell(locale)
236                 .addCell(id)
237                 .addCell(feature)
238                 .addCell(usage)
239                 .addCell(valueString)
240                 .finishRow();
241     }
242 
243     static final UnitConverter uc = SDI.getUnitConverter();
244     static final Map<String, Map<Rational, String>> BASE_TO_FACTOR_TO_UNIT;
245 
246     static {
247         Map<String, Map<Rational, String>> _BASE_TO_BEST = new TreeMap<>();
248         ImmutableSet<String> skip = ImmutableSet.of("mile-scandinavian", "100-kilometer", "dunam");
249         Output<String> baseOut = new Output<>();
250         for (String longUnit :
251                 Validity.getInstance()
252                         .getStatusToCodes(LstrType.unit)
253                         .get(Validity.Status.regular)) {
254             String shortUnit = uc.getShortId(longUnit);
255             System.out.println(shortUnit);
256             if (skip.contains(shortUnit)) {
257                 continue;
258             }
259             if ("mile-per-gallon".equals(shortUnit)) {
260                 int debug = 0;
261             }
262             // Set<String> systems = uc.getSystems(unit);
263             ConversionInfo info = uc.parseUnitId(shortUnit, baseOut, false);
264             if (info == null) {
265                 continue;
266             }
267             Map<Rational, String> factorToUnit = _BASE_TO_BEST.get(baseOut.value);
268             if (factorToUnit == null) {
_BASE_TO_BEST.put(baseOut.value, factorToUnit = new TreeMap<>())269                 _BASE_TO_BEST.put(baseOut.value, factorToUnit = new TreeMap<>());
factorToUnit.put(Rational.ONE, baseOut.value)270                 factorToUnit.put(Rational.ONE, baseOut.value);
271             }
272 
273             if (!info.factor.isPowerOfTen()) {
274                 continue;
275             }
276 
277             String old = factorToUnit.get(info.factor);
278             if (old == null || old.length() > shortUnit.length()) {
factorToUnit.put(info.factor, shortUnit)279                 factorToUnit.put(info.factor, shortUnit);
280             }
281         }
282         BASE_TO_FACTOR_TO_UNIT = CldrUtility.protectCollection(_BASE_TO_BEST);
283         for (Entry<String, Map<Rational, String>> entry : BASE_TO_FACTOR_TO_UNIT.entrySet()) {
284             System.out.println(entry);
285         }
286     }
287 
288     class BestUnitForGender implements Comparable<BestUnitForGender> {
289         final boolean durationOrLength; // true is better
290         final boolean metric; // true is better
291         final double distanceFromOne; // zero is better
292         final String quantity;
293         final String shortUnit;
294 
BestUnitForGender( String shortUnit, String quantity, Collection<String> systems, double baseSize)295         public BestUnitForGender(
296                 String shortUnit, String quantity, Collection<String> systems, double baseSize) {
297             super();
298             this.shortUnit = shortUnit;
299             this.quantity = quantity;
300             this.durationOrLength = quantity.equals("duration") || quantity.equals("length");
301             this.metric = systems.contains("metric");
302             this.distanceFromOne = Math.abs(Math.log(baseSize));
303         }
304 
305         @Override
compareTo(BestUnitForGender o)306         public int compareTo(BestUnitForGender o) {
307             // negation, because we want the best one first
308             return ComparisonChain.start()
309                     .compare(o.durationOrLength, durationOrLength)
310                     .compare(o.metric, metric)
311                     .compare(quantity, o.quantity)
312                     .compare(distanceFromOne, o.distanceFromOne)
313                     .compare(shortUnit, o.shortUnit)
314                     .result();
315         }
316 
317         @Override
hashCode()318         public int hashCode() {
319             return shortUnit.hashCode();
320         }
321 
322         @Override
equals(Object obj)323         public boolean equals(Object obj) {
324             return compareTo((BestUnitForGender) obj) == 0;
325         }
326 
327         @Override
toString()328         public String toString() {
329             return shortUnit
330                     + "("
331                     + (durationOrLength ? "D" : "")
332                     + (metric ? "M" : "")
333                     + ":"
334                     + quantity
335                     + ":"
336                     + Math.round(distanceFromOne * 10)
337                     + ")";
338         }
339     }
340 
341     public class TablePrinterWithHeader {
342         final String header;
343         final TablePrinter tablePrinter;
344 
TablePrinterWithHeader(String header, TablePrinter tablePrinter)345         public TablePrinterWithHeader(String header, TablePrinter tablePrinter) {
346             this.header = header;
347             this.tablePrinter = tablePrinter;
348         }
349     }
350 
writeSubcharts(Anchors anchors)351     public void writeSubcharts(Anchors anchors) throws IOException {
352         Set<String> locales = GrammarInfo.getGrammarLocales();
353 
354         LanguageTagParser ltp = new LanguageTagParser();
355         // ImmutableSet<String> casesNominativeOnly =
356         // ImmutableSet.of(GrammaticalFeature.grammaticalCase.getDefault(null));
357         Factory factory = CLDRConfig.getInstance().getCldrFactory();
358 
359         Comparator<String> caseOrder = GrammarInfo.CaseValues.COMPARATOR;
360         Set<String> sortedCases = new TreeSet<>(caseOrder);
361 
362         Comparator<String> genderOrder = GrammarInfo.GenderValues.COMPARATOR;
363         Set<String> sortedGenders = new TreeSet<>(genderOrder);
364 
365         Output<Double> sizeInBaseUnits = new Output<>();
366 
367         // collect the "best unit ordering"
368         Map<String, BestUnitForGender> unitToBestUnit = new TreeMap<>();
369         Set<String> rawUnitsToAddGrammar = GrammarInfo.getUnitsToAddGrammar();
370         for (String longUnit : rawUnitsToAddGrammar) {
371             final String shortUnit = uc.getShortId(longUnit);
372             if (shortUnit.equals("generic")) {
373                 continue;
374             }
375             String unitCell = getBestBaseUnit(uc, shortUnit, sizeInBaseUnits);
376             String quantity =
377                     shortUnit.contentEquals("generic")
378                             ? "temperature"
379                             : uc.getQuantityFromUnit(shortUnit, false);
380 
381             Set<String> systems = uc.getSystems(shortUnit);
382             unitToBestUnit.put(
383                     shortUnit,
384                     new BestUnitForGender(shortUnit, quantity, systems, sizeInBaseUnits.value));
385         }
386         unitToBestUnit = ImmutableMap.copyOf(unitToBestUnit);
387         // quick check
388         //        final BestUnitForGender u1 = unitToBestUnit.get("meter");
389         //        final BestUnitForGender u2 = unitToBestUnit.get("square-centimeter");
390         //        int comp = u1.compareTo(u2); // should be less
391 
392         Set<BestUnitForGender> sorted2 = new TreeSet<>(unitToBestUnit.values());
393         System.out.println(sorted2);
394 
395         PlaceholderLocation placeholderPosition = PlaceholderLocation.missing;
396         Matcher placeholderMatcher = UnitConverter.PLACEHOLDER.matcher("");
397         Output<String> unitPatternOut = new Output<>();
398 
399         for (String locale : locales) {
400             if (locale.equals("root")) {
401                 continue;
402             }
403             ltp.set(locale);
404             String region = ltp.getRegion();
405             if (!region.isEmpty()) {
406                 continue;
407             }
408             GrammarInfo grammarInfo = SDI.getGrammarInfo(locale, true);
409             if (grammarInfo == null || !grammarInfo.hasInfo(GrammaticalTarget.nominal)) {
410                 continue;
411             }
412             CLDRFile cldrFile = factory.make(locale, true);
413 
414             {
415                 Collection<String> genders =
416                         grammarInfo.get(
417                                 GrammaticalTarget.nominal,
418                                 GrammaticalFeature.grammaticalGender,
419                                 GrammaticalScope.units);
420                 sortedGenders.clear();
421                 sortedGenders.addAll(genders);
422             }
423             {
424                 Collection<String> rawCases =
425                         grammarInfo.get(
426                                 GrammaticalTarget.nominal,
427                                 GrammaticalFeature.grammaticalCase,
428                                 GrammaticalScope.units);
429                 if (rawCases.isEmpty()) {
430                     rawCases = ImmutableSet.of(GrammaticalFeature.grammaticalCase.getDefault(null));
431                 }
432                 sortedCases.clear();
433                 sortedCases.addAll(rawCases);
434             }
435             if (sortedCases.size() <= 1 && sortedGenders.size() <= 1) {
436                 continue;
437             }
438 
439             // Collection<String> nomCases = rawCases.isEmpty() ? casesNominativeOnly : rawCases;
440 
441             PluralInfo plurals = SDI.getPlurals(PluralType.cardinal, locale);
442             if (plurals == null) {
443                 System.err.println("No " + PluralType.cardinal + "  plurals for " + locale);
444             }
445             Collection<Count> adjustedPlurals = plurals.getAdjustedCounts();
446             ICUServiceBuilder isb = ICUServiceBuilder.forLocale(CLDRLocale.getInstance(locale));
447             DecimalFormat decFormat = isb.getNumberFormat(1);
448 
449             Map<String, TablePrinterWithHeader> info = new LinkedHashMap<>();
450 
451             TablePrinter tablePrinter = getFormattedGrammarInfo(Collections.singleton(locale));
452             info.put(
453                     "Grammatical Features",
454                     new TablePrinterWithHeader(
455                             "<p>The following lists the available information about grammatical features for this locale. "
456                                     + INFO_ON_FEATURES
457                                     + "</p>",
458                             tablePrinter));
459 
460             // because some locales have more units with grammar, get the additional ones. Also grab
461             // the minimal pairs
462 
463             Set<String> unitsToAddGrammar = new TreeSet<>(rawUnitsToAddGrammar);
464             Map<PathHeader, String> minimalInfo = new TreeMap<>();
465             PathHeader.Factory phf = PathHeader.getFactory();
466             for (String path : cldrFile) {
467                 if (!path.startsWith("//ldml/units/unitLength[@type=\"long\"]/unit")) {
468                     if (path.startsWith("//ldml/numbers/minimalPairs/")) {
469                         if (!path.contains("ordinal")) {
470                             minimalInfo.put(
471                                     phf.fromPath(path), cldrFile.getStringValueWithBailey(path));
472                         }
473                     }
474                     continue;
475                 }
476                 XPathParts parts = XPathParts.getFrozenInstance(path);
477                 String foundUnit = parts.getAttributeValue(3, "type");
478                 if (unitsToAddGrammar.contains(foundUnit)) {
479                     continue;
480                 }
481                 // ldml/units/unitLength[@type="long"]/unit[@type="duration-decade"]/gender
482                 // ldml/units/unitLength[@type="long"]/unit[@type="duration-decade"]/unitPattern[@count="one"][@case="accusative"]
483                 switch (parts.getElement(-1)) {
484                     case "gender":
485                         unitsToAddGrammar.add(foundUnit);
486                         break;
487                     case "unitPattern":
488                         if (parts.getAttributeValue(4, "case") != null) {
489                             unitsToAddGrammar.add(foundUnit);
490                         }
491                         break;
492                 }
493             }
494 
495             TablePrinter minimalPrinter =
496                     new TablePrinter()
497                             .addColumn(
498                                     "Type",
499                                     "class='source' width='1%'",
500                                     CldrUtility.getDoubleLinkMsg(),
501                                     "class='source'",
502                                     true)
503                             .setRepeatHeader(true)
504                             .addColumn(
505                                     "Size",
506                                     "class='source' width='1%'",
507                                     null,
508                                     "class='source'",
509                                     true)
510                             .setSortPriority(0)
511                             .setHidden(true)
512                             .setBreakSpans(true)
513                             .addColumn(
514                                     "Code",
515                                     "class='source' width='1%'",
516                                     null,
517                                     "class='source'",
518                                     true)
519                             .addColumn("Pattern", "class='source'", null, "class='target'", true)
520                             .addColumn(
521                                     "Formatted Sample",
522                                     "class='source'",
523                                     null,
524                                     "class='target'",
525                                     true);
526 
527             int counter = 0;
528             ExampleGenerator exampleGenerator = new ExampleGenerator(cldrFile, CONFIG.getEnglish());
529             for (Entry<PathHeader, String> entry : minimalInfo.entrySet()) {
530                 PathHeader pathHeader = entry.getKey();
531                 String value = entry.getValue();
532                 minimalPrinter
533                         .addRow()
534                         .addCell(pathHeader.getHeader())
535                         .addCell(counter++)
536                         .addCell(pathHeader.getCode())
537                         .addCell(value)
538                         .addCell(
539                                 exampleGenerator.getExampleHtml(
540                                         pathHeader.getOriginalPath(), value));
541 
542                 // finish the row
543                 minimalPrinter.finishRow();
544             }
545             info.put(
546                     "Minimal Pairs",
547                     new TablePrinterWithHeader(
548                             "<p>This table has the minimal pairs used to test the appropriateness of different values.</p>\n",
549                             minimalPrinter));
550 
551             final PluralRules pluralRules = plurals.getPluralRules();
552             // set up the table and add the headers
553 
554             TablePrinter caseTablePrinter =
555                     new TablePrinter()
556                             .addColumn(
557                                     "Quantity",
558                                     "class='source' width='1%'",
559                                     null,
560                                     "class='source'",
561                                     true)
562                             .setSortPriority(0)
563                             .setRepeatHeader(true)
564                             .addColumn(
565                                     "Size",
566                                     "class='source' width='1%'",
567                                     null,
568                                     "class='source'",
569                                     true)
570                             .setSortPriority(1)
571                             .setHidden(true)
572                             .addColumn(
573                                     "Unit",
574                                     "class='source' width='1%'",
575                                     CldrUtility.getDoubleLinkMsg(),
576                                     "class='source'",
577                                     true)
578                             .setSortPriority(2)
579                             .setBreakSpans(true);
580             if (sortedGenders.size() > 1) {
581                 caseTablePrinter
582                         .addColumn(
583                                 "Gender", "class='source' width='1%'", null, "class='source'", true)
584                         .addColumn(
585                                 "Gender MP + unit", "class='target'", null, "class='source'", true);
586             }
587             if (sortedCases.size() > 1) {
588                 caseTablePrinter.addColumn(
589                         "Case", "class='source' width='1%'", null, "class='source'", true);
590                 // double width = ((int) ((99.0 / (adjustedPlurals.size()*2 + 1)) * 1000)) / 1000.0;
591                 // String widthStringTarget = "class='target' width='" + width + "%'";
592                 String widthStringTarget = "class='target'";
593 
594                 addTwoColumns(
595                         caseTablePrinter, widthStringTarget, adjustedPlurals, pluralRules, true);
596             }
597 
598             // now get the case and/or gender items
599 
600             // also gather info on the "best power units"
601 
602             for (String longUnit : unitsToAddGrammar) {
603                 final String shortUnit = uc.getShortId(longUnit);
604                 String unitCell = getBestBaseUnit(uc, shortUnit, sizeInBaseUnits);
605                 String quantity =
606                         shortUnit.contentEquals("generic")
607                                 ? "temperature"
608                                 : uc.getQuantityFromUnit(shortUnit, false);
609                 String genderFormatted = "n/a";
610                 String gender = "n/a";
611 
612                 if (sortedGenders.size() > 1) {
613                     gender =
614                             UnitPathType.gender.getTrans(
615                                     cldrFile, "long", shortUnit, null, null, null, null);
616                     if (gender == null) {
617                         gender = "n/a";
618                     } else {
619                         String genderMinimalPair =
620                                 cldrFile.getStringValue(
621                                         "//ldml/numbers/minimalPairs/genderMinimalPairs[@gender=\""
622                                                 + gender
623                                                 + "\"]");
624                         if (genderMinimalPair != null) {
625                             Count bestCount =
626                                     adjustedPlurals.contains(Count.one) ? Count.one : Count.other;
627 
628                             String unitPattern =
629                                     cldrFile.getStringValueWithBailey(
630                                             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\""
631                                                     + longUnit
632                                                     + "\"]/unitPattern"
633                                                     + GrammarInfo.getGrammaticalInfoAttributes(
634                                                             grammarInfo,
635                                                             UnitPathType.unit,
636                                                             bestCount.toString(),
637                                                             null,
638                                                             "nominative"));
639                             String unit =
640                                     unitPattern.replace("\u00A0", "").replace("{0}", "").trim();
641                             genderFormatted = MessageFormat.format(genderMinimalPair, unit);
642                         }
643                     }
644                 }
645                 if (sortedCases.size() <= 1) {
646                     caseTablePrinter
647                             .addRow()
648                             .addCell(quantity)
649                             .addCell(sizeInBaseUnits.value)
650                             .addCell(unitCell)
651                             .addCell(gender)
652                             .addCell(genderFormatted);
653                     // finish the row
654                     caseTablePrinter.finishRow();
655                 } else {
656                     // Set<String> systems = uc.getSystems(shortUnit);
657 
658                     if (unitCell == null
659                             || quantity == null
660                             || gender == null
661                             || sizeInBaseUnits.value == null) {
662                         throw new IllegalArgumentException("No best base unit for: " + shortUnit);
663                     }
664 
665                     for (String case1 : sortedCases) { //
666                         // start a row, then add the cells in the row.
667                         caseTablePrinter
668                                 .addRow()
669                                 .addCell(quantity)
670                                 .addCell(sizeInBaseUnits.value)
671                                 .addCell(unitCell);
672                         if (sortedGenders.size() > 1) {
673                             caseTablePrinter.addCell(gender).addCell(genderFormatted);
674                         }
675                         caseTablePrinter.addCell(case1);
676 
677                         for (Count plural : adjustedPlurals) {
678                             Double sample = getBestSample(pluralRules, plural);
679 
680                             // <caseMinimalPairs case="nominative">{0} kostet
681                             // €3,50.</caseMinimalPairs>
682 
683                             String unitPattern =
684                                     cldrFile.getStringValueWithBailey(
685                                             "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\""
686                                                     + longUnit
687                                                     + "\"]/unitPattern"
688                                                     + GrammarInfo.getGrammaticalInfoAttributes(
689                                                             grammarInfo,
690                                                             UnitPathType.unit,
691                                                             plural.toString(),
692                                                             null,
693                                                             case1));
694                             unitPattern = unitPattern.replace("\u00A0", " ");
695 
696                             caseTablePrinter.addCell(unitPattern);
697 
698                             String numberPlusUnit =
699                                     MessageFormat.format(unitPattern, decFormat.format(sample));
700 
701                             String caseMinimalPair =
702                                     cldrFile.getStringValue(
703                                             "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\""
704                                                     + case1
705                                                     + "\"]");
706                             String withContext =
707                                     caseMinimalPair == null
708                                             ? numberPlusUnit
709                                             : MessageFormat.format(caseMinimalPair, numberPlusUnit);
710 
711                             caseTablePrinter.addCell(withContext);
712                         }
713                         // finish the row
714                         caseTablePrinter.finishRow();
715                     }
716                 }
717             }
718             info.put(
719                     "Unit Case and Gender Info",
720                     new TablePrinterWithHeader(
721                             "<p>This table has rows contains unit forms appropriate for different grammatical cases and plural forms. "
722                                     + "Each plural form has a sample value such as <i>(1.2)</i> or <i>(2)</i>. "
723                                     + "That value is used with the localized unit pattern to form a formatted measure, such as “2,0 Stunden”. "
724                                     + "That formatted measure is in turn substituted into a "
725                                     + "<b><a target='doc-minimal-pairs' href='http://cldr.unicode.org/translation/grammatical-inflection#TOC-Miscellaneous-Minimal-Pairs'>case minimal pair pattern</a> to get the "
726                                     + FORMATTED_SAMPLE
727                                     + "</b>. "
728                                     + "The <b>Gender</b> column is informative; it just supplies the supplied gender for the unit.</p>\n"
729                                     + "<ul><li>For clarity, conversion values are supplied for non-metric units. "
730                                     + "For more information, see <a target='unit_conversions' href='../supplemental/unit_conversions.html'>Unit Conversions</a>.</li>"
731                                     + "</ul>\n",
732                             caseTablePrinter));
733 
734             // get best units for gender.
735             Multimap<String, BestUnitForGender> bestUnitForGender = TreeMultimap.create();
736 
737             for (String longUnit : unitsToAddGrammar) {
738                 final String shortUnit = uc.getShortId(longUnit);
739                 String gender =
740                         UnitPathType.gender.getTrans(
741                                 cldrFile, "long", shortUnit, null, null, null, null);
742                 final BestUnitForGender bestUnit = unitToBestUnit.get(shortUnit);
743                 if (gender != null && bestUnit != null) {
744                     bestUnitForGender.put(gender, bestUnit);
745                 }
746             }
747 
748             for (Entry<String, Collection<BestUnitForGender>> entry :
749                     bestUnitForGender.asMap().entrySet()) {
750                 List<String> items =
751                         entry.getValue().stream()
752                                 .map(x -> x.shortUnit)
753                                 .collect(Collectors.toList());
754                 System.out.println(locale + "\t" + entry.getKey() + "\t" + items);
755             }
756 
757             TablePrinter powerTable =
758                     new TablePrinter()
759                             .addColumn(
760                                     "Unit",
761                                     "class='source' width='1%'",
762                                     CldrUtility.getDoubleLinkMsg(),
763                                     "class='source'",
764                                     true)
765                             .setSortPriority(2)
766                             .setRepeatHeader(true)
767                             .addColumn(
768                                     "Case",
769                                     "class='source' width='1%'",
770                                     null,
771                                     "class='source'",
772                                     true)
773                             .addColumn(
774                                     "Gender",
775                                     "class='source' width='1%'",
776                                     null,
777                                     "class='source'",
778                                     true);
779             double width = ((int) ((99.0 / (adjustedPlurals.size() * 2 + 1)) * 1000)) / 1000.0;
780             String widthStringTarget = "class='target' width='" + width + "%'";
781 
782             addTwoColumns(powerTable, widthStringTarget, adjustedPlurals, pluralRules, false);
783 
784             // now get the items
785             for (String power : Arrays.asList("power2", "power3")) {
786                 String unitCell = power;
787 
788                 for (String gender : sortedGenders) {
789                     Collection<BestUnitForGender> bestUnits = bestUnitForGender.get(gender);
790                     String bestUnit = null;
791                     if (!bestUnits.isEmpty()) {
792                         bestUnit = bestUnits.iterator().next().shortUnit;
793                     }
794 
795                     for (String case1 : sortedCases) { //
796                         // start a row, then add the cells in the row.
797                         powerTable
798                                 .addRow()
799                                 .addCell(unitCell)
800                                 .addCell(case1)
801                                 .addCell(gender + (bestUnit == null ? "" : "\n(" + bestUnit + ")"));
802 
803                         for (Count plural : adjustedPlurals) {
804                             String localizedPowerPattern =
805                                     UnitPathType.power.getTrans(
806                                             cldrFile,
807                                             "long",
808                                             power,
809                                             plural.toString(),
810                                             case1,
811                                             gender,
812                                             null);
813                             localizedPowerPattern = localizedPowerPattern.replace("\u00A0", " ");
814                             powerTable.addCell(localizedPowerPattern);
815 
816                             if (bestUnit == null) {
817                                 powerTable.addCell("n/a");
818                             } else {
819                                 Double samplePlural = getBestSample(pluralRules, plural);
820                                 String localizedUnitPattern =
821                                         UnitPathType.unit.getTrans(
822                                                 cldrFile,
823                                                 "long",
824                                                 bestUnit,
825                                                 plural.toString(),
826                                                 case1,
827                                                 gender,
828                                                 null);
829                                 placeholderPosition =
830                                         UnitConverter.extractUnit(
831                                                 placeholderMatcher,
832                                                 localizedUnitPattern,
833                                                 unitPatternOut);
834                                 if (placeholderPosition != PlaceholderLocation.middle) {
835                                     localizedUnitPattern = unitPatternOut.value;
836                                     localizedUnitPattern =
837                                             localizedUnitPattern.replace("\u00A0", " ");
838                                     String placeholderPattern =
839                                             placeholderPosition == PlaceholderLocation.missing
840                                                     ? localizedUnitPattern
841                                                     : placeholderMatcher.group();
842 
843                                     String combined;
844                                     try {
845                                         combined =
846                                                 UnitConverter.combineLowercasing(
847                                                         new ULocale(locale),
848                                                         "long",
849                                                         localizedPowerPattern,
850                                                         localizedUnitPattern);
851                                     } catch (Exception e) {
852                                         throw new IllegalArgumentException(
853                                                 locale
854                                                         + ") Can't combine "
855                                                         + "localizedPowerPattern=«"
856                                                         + localizedPowerPattern
857                                                         + "» with localizedUnitPattern=«"
858                                                         + localizedUnitPattern
859                                                         + "»");
860                                     }
861                                     String combinedWithPlaceholder =
862                                             UnitConverter.addPlaceholder(
863                                                     combined,
864                                                     placeholderPattern,
865                                                     placeholderPosition);
866 
867                                     String sample =
868                                             MessageFormat.format(
869                                                     combinedWithPlaceholder,
870                                                     decFormat.format(samplePlural));
871 
872                                     String caseMinimalPair =
873                                             cldrFile.getStringValue(
874                                                     "//ldml/numbers/minimalPairs/caseMinimalPairs[@case=\""
875                                                             + case1
876                                                             + "\"]");
877                                     String withContext =
878                                             caseMinimalPair == null
879                                                     ? sample
880                                                     : MessageFormat.format(caseMinimalPair, sample);
881 
882                                     powerTable.addCell(withContext);
883                                 } else {
884                                     powerTable.addCell("n/a");
885                                 }
886                             }
887                         }
888                         // finish the row
889                         powerTable.finishRow();
890                     }
891                 }
892             }
893             info.put(
894                     "Unit Power Components",
895                     new TablePrinterWithHeader(
896                             "<p>This table shows the square (power2) and cubic (power3) patterns, which may vary by case, gender, and plural forms. "
897                                     + "Each gender is illustrated with a unit where possible, such as <i>(second)</i> or <i>(meter)</i>. "
898                                     + "Each plural category is illustrated with a unit where possible, such as <i>(1)</i> or <i>(1.2)</i>. "
899                                     + "The patterns are first supplied, and then combined with the samples and "
900                                     + "<b><a target='doc-minimal-pairs' href='http://cldr.unicode.org/translation/grammatical-inflection#TOC-Miscellaneous-Minimal-Pairs'>case minimal pair patterns</a></b> "
901                                     + "in the next <b>"
902                                     + FORMATTED_SAMPLE
903                                     + "</b> column."
904                                     + "</p>",
905                             powerTable));
906 
907             if (!info.isEmpty()) {
908                 String name = ENGLISH.getName(locale);
909                 new Subchart(name + ": Unit Grammar Info", locale, info).writeChart(anchors);
910             }
911         }
912     }
913 
addTwoColumns( TablePrinter caseTablePrinter, String widthStringTarget, Collection<Count> adjustedPlurals, final PluralRules pluralRules, boolean spanRows)914     public void addTwoColumns(
915             TablePrinter caseTablePrinter,
916             String widthStringTarget,
917             Collection<Count> adjustedPlurals,
918             final PluralRules pluralRules,
919             boolean spanRows) {
920         for (Count plural : adjustedPlurals) {
921             Double sample = getBestSample(pluralRules, plural);
922             // final String pluralHeader = plural.toString() + " (" + sample + ")";
923             caseTablePrinter
924                     .addColumn(
925                             "Pattern for " + plural.toString(),
926                             widthStringTarget,
927                             null,
928                             "class='target'",
929                             true)
930                     .setSpanRows(spanRows);
931             caseTablePrinter.addColumn(
932                     "Case MP + pattern with " + sample,
933                     widthStringTarget,
934                     null,
935                     "class='target'",
936                     true);
937         }
938     }
939 
940     static final Map<String, Pair<String, Double>> BEST_UNIT_CACHE = new HashMap<>();
941 
getBestBaseUnit( UnitConverter uc, final String shortUnit, Output<Double> sizeInBaseUnits)942     public static String getBestBaseUnit(
943             UnitConverter uc, final String shortUnit, Output<Double> sizeInBaseUnits) {
944         Pair<String, Double> cached = BEST_UNIT_CACHE.get(shortUnit);
945         if (cached != null) {
946             sizeInBaseUnits.value = cached.getSecond();
947             return cached.getFirst();
948         }
949         if (shortUnit.equals("square-mile")) {
950             int debug = 0;
951         }
952         String unitCell =
953                 ENGLISH.getStringValue(
954                         "//ldml/units/unitLength[@type=\"long\"]/unit[@type=\""
955                                 + uc.getLongId(shortUnit)
956                                 + "\"]/displayName");
957         Output<String> baseUnit = new Output<>();
958         ConversionInfo info = uc.parseUnitId(shortUnit, baseUnit, false);
959 
960         if (info != null) {
961             sizeInBaseUnits.value = info.factor.doubleValue();
962             Map<Rational, String> factorToUnit = BASE_TO_FACTOR_TO_UNIT.get(baseUnit.value);
963             if (factorToUnit == null) {
964                 int debug = 0;
965             }
966             String bestUnit = null;
967             Rational bestFactor = null;
968             Rational inputBoundary = Rational.of(2).multiply(info.factor);
969             for (Entry<Rational, String> entry : factorToUnit.entrySet()) {
970                 final String possibleUnit = entry.getValue();
971                 if (possibleUnit.equals("cup-jp")) {
972                     continue; // skip odd unit
973                 }
974                 final Rational currentFactor = entry.getKey();
975                 if (bestUnit != null && currentFactor.compareTo(inputBoundary) >= 0) {
976                     break;
977                 }
978                 bestFactor = currentFactor;
979                 bestUnit = possibleUnit;
980             }
981             bestFactor = info.factor.divide(bestFactor); // scale for bestUnit
982             if (!bestFactor.equals(Rational.ONE) || !shortUnit.equals(bestUnit)) {
983                 final String string = bestFactor.toString(FormatStyle.repeating);
984                 final double bestDoubleFactor = bestFactor.doubleValue();
985                 String pluralCategory = ENGLISH_PLURAL_RULES.select(bestDoubleFactor);
986                 final String unitPath =
987                         "//ldml/units/unitLength[@type=\"short\"]/unit[@type=\""
988                                 + uc.getLongId(bestUnit)
989                                 + "\"]/unitPattern[@count=\""
990                                 + pluralCategory
991                                 + "\"]";
992                 String unitPattern = ENGLISH.getStringValue(unitPath);
993                 if (unitPattern == null) {
994                     final UnitId unitId = uc.createUnitId(bestUnit);
995                     unitPattern =
996                             unitId.toString(ENGLISH, "long", pluralCategory, null, null, false);
997                     if (unitPattern == null) {
998                         return null;
999                     }
1000                 }
1001                 String unitMeasure =
1002                         MessageFormat.format(
1003                                 unitPattern,
1004                                 string.contains("/") ? "~" + bestDoubleFactor : string);
1005                 unitCell = shortUnit + "\n( = " + unitMeasure + ")";
1006             }
1007         } else {
1008             sizeInBaseUnits.value = -1d;
1009         }
1010         BEST_UNIT_CACHE.put(shortUnit, Pair.of(unitCell, sizeInBaseUnits.value));
1011         return unitCell;
1012     }
1013 
getBestSample(PluralRules pluralRules, Count plural)1014     private Double getBestSample(PluralRules pluralRules, Count plural) {
1015         Collection<Double> samples = pluralRules.getSamples(plural.toString());
1016         if (samples.isEmpty()) {
1017             samples = pluralRules.getSamples(plural.toString(), SampleType.DECIMAL);
1018         }
1019         int size = samples.size();
1020         switch (size) {
1021             case 0:
1022                 throw new IllegalArgumentException("shouldn't happen");
1023             case 1:
1024                 return samples.iterator().next();
1025         }
1026         return Iterables.skip(samples, 1).iterator().next();
1027     }
1028 
1029     private class Subchart extends Chart {
1030         private final String title;
1031         private final String file;
1032         private final Map<String, TablePrinterWithHeader> tablePrinter;
1033 
1034         @Override
getShowDate()1035         public boolean getShowDate() {
1036             return false;
1037         }
1038 
Subchart(String title, String file, Map<String, TablePrinterWithHeader> info)1039         public Subchart(String title, String file, Map<String, TablePrinterWithHeader> info) {
1040             super();
1041             this.title = title;
1042             this.file = file;
1043             this.tablePrinter = info;
1044         }
1045 
1046         @Override
getDirectory()1047         public String getDirectory() {
1048             return DIR;
1049         }
1050 
1051         @Override
getTitle()1052         public String getTitle() {
1053             return title;
1054         }
1055 
1056         @Override
getFileName()1057         public String getFileName() {
1058             return file;
1059         }
1060 
1061         @Override
getExplanation()1062         public String getExplanation() {
1063             return MAIN_HEADER
1064                     + "<p><i>Unit Inflections, Phase 1:</i> The end goal is to add full case and gender support for formatted units. "
1065                     + "During Phase 1, a limited number of locales and units of measurement are being handled in CLDR v38, "
1066                     + "so that we can work kinks out of the process before expanding to all units for all locales.</p>\n"
1067                     + "<p>This chart shows grammatical information available for certain unit and/or power patterns. These patterns are also illustrated with a <b>"
1068                     + FORMATTED_SAMPLE
1069                     + "</b> that combine the patterns with sample numbers and "
1070                     + "<b><a target='doc-minimal-pairs' href='http://cldr.unicode.org/translation/grammatical-inflection#TOC-Miscellaneous-Minimal-Pairs'>case minimal pair patterns</a></b>. "
1071                     + "For example, “… für {0} …” is a <i>case minimal pair pattern</i> that requires the placeholder {0} to be in the accusative case in German. By inserting into a minimal pair pattern, "
1072                     + "it is easier to ensure that the original unit and/or power patterns are correctly inflected. </p>\n"
1073                     + "<p><b>Notes</b>"
1074                     + "<ul><li>We don't have the cross-product of minimal pairs for both case and plural forms, "
1075                     + "so the <i>case minimal pair pattern</i> might not be correct for the row’s plural category, especially in the nominative.</li>"
1076                     + "<li>Translators often have difficulties with the the minimal pair patterns, "
1077                     + "since they are <i>transcreations</i> not translations. The Hindi minimal pair patterns for case and gender have been discarded because they were incorrectly translated.</li>"
1078                     + "<li>We don't expect translators to supply minimal pair patterns that are natural for any kind of placeholder: "
1079                     + "for example, it is probably not typical to use the vocative with 3.2 meters! So look at the <b>"
1080                     + FORMATTED_SAMPLE
1081                     + "</b> as an aid for helping to see the context for grammatical inflections, but one that has limitations.</li></ul>";
1082         }
1083 
1084         @Override
writeContents(FormattedFileWriter pw)1085         public void writeContents(FormattedFileWriter pw) throws IOException {
1086             try (PrintWriter tsv =
1087                     FileUtilities.openUTF8Writer(getDirectory() + "tsv/", file + ".tsv"); ) {
1088                 if (tablePrinter.size() > 1) {
1089                     pw.write("<h2>Table of Contents</h2>\n");
1090                     pw.append("<ol>\n");
1091                     for (String header : tablePrinter.keySet()) {
1092                         pw.write(writeTOC(header));
1093                     }
1094                     pw.append("</ol>\n");
1095                 }
1096                 String sep = "";
1097                 for (Entry<String, TablePrinterWithHeader> entry : tablePrinter.entrySet()) {
1098                     final String header = entry.getKey();
1099                     writeHeader(pw, header);
1100                     final TablePrinterWithHeader explanation = entry.getValue();
1101                     pw.write(explanation.header);
1102                     pw.write(explanation.tablePrinter.toTable());
1103                     tsv.write(sep + "# " + entry.getKey() + "\n");
1104                     explanation.tablePrinter.toTsv(tsv);
1105                     sep = "\n";
1106                 }
1107             }
1108         }
1109 
writeHeader(FormattedFileWriter pw, final String header)1110         private void writeHeader(FormattedFileWriter pw, final String header) throws IOException {
1111             pw.write(
1112                     "<h2><a name='"
1113                             + FileUtilities.anchorize(header)
1114                             + "'>"
1115                             + header
1116                             + "</a></h2>\n");
1117         }
1118 
writeTOC(String header)1119         private String writeTOC(String header) {
1120             return "<li><b>"
1121                     + "<a href='#"
1122                     + FileUtilities.anchorize(header)
1123                     + "'>"
1124                     + header
1125                     + "</a>"
1126                     + "</b></li>\n";
1127         }
1128     }
1129 
1130     public static RuleBasedCollator RBC;
1131 
1132     static {
1133         Factory cldrFactory = Factory.make(CLDRPaths.COMMON_DIRECTORY + "collation/", ".*");
1134         CLDRFile root = cldrFactory.make("root", false);
1135         String rules =
1136                 root.getStringValue(
1137                         "//ldml/collations/collation[@type=\"emoji\"][@visibility=\"external\"]/cr");
1138 
1139         //        if (!rules.contains("'#⃣'")) {
1140         //            rules = rules.replace("#⃣", "'#⃣'").replace("*⃣", "'*⃣'"); //hack for 8288
1141         //        }
1142 
1143         try {
1144             RBC = new RuleBasedCollator(rules);
1145         } catch (Exception e) {
1146             throw new IllegalArgumentException(
1147                     "Failure in rules for " + CLDRPaths.COMMON_DIRECTORY + "collation/" + "root",
1148                     e);
1149         }
1150     }
1151 }
1152