xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageMatching.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.tool;
2 
3 import com.ibm.icu.impl.Row.R4;
4 import java.io.IOException;
5 import java.util.List;
6 import org.unicode.cldr.util.CLDRFile;
7 
8 public class ChartLanguageMatching extends Chart {
9 
main(String[] args)10     public static void main(String[] args) {
11         new ChartLanguageMatching().writeChart(null);
12     }
13 
14     @Override
getDirectory()15     public String getDirectory() {
16         return FormattedFileWriter.CHART_TARGET_DIR;
17     }
18 
19     @Override
getTitle()20     public String getTitle() {
21         return "Language Matching";
22     }
23 
24     @Override
getExplanation()25     public String getExplanation() {
26         return "<p>Language Matching data is used to match the user’s desired language/locales against an application’s supported languages/locales. "
27                 + "For more information, see "
28                 + "<a href='http://unicode.org/reports/tr35/#LanguageMatching'>Language Matching</a>. "
29                 + "The latest release data for this chart is in "
30                 + "<a href='http://unicode.org/cldr/latest/common/supplemental/languageInfo.xml'>languageInfo.xml</a>. "
31                 + "The matching process is approximately:<p>"
32                 + "<ul>"
33                 + "<li>The rules are tested—in order—for matches, with the first one winning.</li>"
34                 + "<li>Any exact match between fields has zero distance.</li>"
35                 + "<li>The placeholder (*) matches any code (of that type). "
36                 + "For the last field in Supported, it must be different than Desired.</li>"
37                 + "<li>The <i>Distance</i> indicates how close the match is, where identical fields have distance = 0. </li>"
38                 + "<li>A ⬌︎ in the <i>Sym?</i> column indicates that the distance is symmetric, "
39                 + "and is thus used for both directions: Supported→Desired and Desired→Supported. "
40                 + "A → indicates that the distance is <i>not</i> symmetric: this is usually a <i>fallback</i> match.</li>"
41                 + "</ul>";
42     }
43 
44     @Override
writeContents(FormattedFileWriter pw)45     public void writeContents(FormattedFileWriter pw) throws IOException {
46         TablePrinter tablePrinter =
47                 new TablePrinter()
48                         .addColumn("Desired", "class='source'", null, "class='source'", true)
49                         .addColumn("Supported", "class='source'", null, "class='source'", true)
50                         .addColumn("D. Code", "class='source'", null, "class='source'", true)
51                         .setBreakSpans(true)
52                         .addColumn("S. Code", "class='source'", null, "class='source'", true)
53                         .setBreakSpans(true)
54                         .addColumn("Distance", "class='target'", null, "class='target'", true)
55                         .addColumn("Sym?", "class='target'", null, "class='target'", true);
56 
57         for (String type : SDI.getLanguageMatcherKeys()) {
58             pw.write("<h2>Type=" + type + "</h2>");
59             List<R4<String, String, Integer, Boolean>> data = SDI.getLanguageMatcherData(type);
60             for (R4<String, String, Integer, Boolean> row : data) {
61                 // <languageMatch desired="gsw" supported="de" percent="96" oneway="true" /> <!--
62                 // All Swiss speakers can read High German -->
63 
64                 tablePrinter
65                         .addRow()
66                         // .addCell(ENGLISH.getName(locale))
67                         .addCell(getName(row.get0(), true))
68                         .addCell(getName(row.get1(), false))
69                         .addCell(row.get0())
70                         .addCell(row.get1())
71                         .addCell((100 - row.get2()))
72                         .addCell(row.get3() ? "→" : "⬌")
73                         .finishRow();
74             }
75             pw.write(tablePrinter.toTable());
76             tablePrinter.clearRows();
77         }
78     }
79 
getName(String codeWithStars, boolean user)80     private String getName(String codeWithStars, boolean user) {
81         if (!codeWithStars.contains("*") && !codeWithStars.contains("$")) {
82             return ENGLISH.getName(codeWithStars, true, CLDRFile.SHORT_ALTS);
83         }
84         String[] parts = codeWithStars.split("_");
85         if (parts[0].equals("*")) {
86             parts[0] = "xxx";
87         }
88         if (parts.length > 1 && parts[1].equals("*")) {
89             parts[1] = "Xxxx";
90         }
91         String parts2orig = "XY";
92         if (parts.length > 2) {
93             parts2orig = parts[2];
94             if (parts[2].equals("*")) {
95                 parts[2] = "XX";
96             } else if (parts[2].startsWith("$")) {
97                 parts[2] = "XY";
98             }
99         }
100         String result = ENGLISH.getName(String.join("_", parts), true, CLDRFile.SHORT_ALTS);
101         if (user) {
102             result =
103                     result.replace("Xxxx", "any-script")
104                             .replace("xxx", "any-language")
105                             .replace("XX", "any-region")
106                             .replace("XY", parts2orig);
107         } else {
108             result = replaceStar(result);
109         }
110         return result;
111     }
112 
replaceStar(String result)113     private String replaceStar(String result) {
114         String temp = result.replace("XX", "any-other-region");
115         temp =
116                 temp.equals(result)
117                         ? temp.replace("Xxxx", "any-other-script")
118                         : temp.replace("Xxxx", "any-script");
119         temp =
120                 temp.equals(result)
121                         ? temp.replace("xxx", "any-other-language")
122                         : temp.replace("xxx", "any-language");
123         return temp;
124     }
125 }
126