xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/test/CheckPersonNames.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.test;
2 
3 import com.google.common.base.Joiner;
4 import com.ibm.icu.text.MessageFormat;
5 import com.ibm.icu.text.UnicodeSet;
6 import java.util.ArrayList;
7 import java.util.List;
8 import java.util.stream.Collectors;
9 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype;
10 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Type;
11 import org.unicode.cldr.tool.LikelySubtags;
12 import org.unicode.cldr.util.CLDRFile;
13 import org.unicode.cldr.util.CldrUtility;
14 import org.unicode.cldr.util.LocaleIDParser;
15 import org.unicode.cldr.util.XPathParts;
16 import org.unicode.cldr.util.personname.PersonNameFormatter;
17 import org.unicode.cldr.util.personname.PersonNameFormatter.Field;
18 import org.unicode.cldr.util.personname.PersonNameFormatter.Formality;
19 import org.unicode.cldr.util.personname.PersonNameFormatter.Length;
20 import org.unicode.cldr.util.personname.PersonNameFormatter.ModifiedField;
21 import org.unicode.cldr.util.personname.PersonNameFormatter.Modifier;
22 import org.unicode.cldr.util.personname.PersonNameFormatter.NamePattern;
23 import org.unicode.cldr.util.personname.PersonNameFormatter.Optionality;
24 import org.unicode.cldr.util.personname.PersonNameFormatter.SampleType;
25 
26 public class CheckPersonNames extends CheckCLDR {
27 
28     private static final String LengthValues =
29             Joiner.on(", ")
30                     .join(Length.ALL.stream().map(x -> x.toString()).collect(Collectors.toList()));
31     private static final String FormalityValues =
32             Joiner.on(", ")
33                     .join(
34                             Formality.ALL.stream()
35                                     .map(x -> x.toString())
36                                     .collect(Collectors.toList()));
37 
38     static final String MISSING = CldrUtility.NO_INHERITANCE_MARKER;
39 
40     private boolean isRoot = false;
41     private boolean hasRootParent = false;
42     private String initialSeparator = " ";
43 
44     private UnicodeSet allowedCharacters;
45     private boolean emptyNativeSpaceReplacement;
46 
47     static final UnicodeSet BASE_ALLOWED =
48             new UnicodeSet("[\\p{sc=Common}\\p{sc=Inherited}-\\p{N}-[❮❯∅<>∅0]]").freeze();
49     static final UnicodeSet HANI = new UnicodeSet("[\\p{sc=Hani}]").freeze();
50     static final UnicodeSet KORE = new UnicodeSet("[\\p{sc=Hang}]").addAll(HANI).freeze();
51     static final UnicodeSet JPAN =
52             new UnicodeSet("[\\p{sc=Kana}\\p{sc=Hira}]").addAll(HANI).freeze();
53 
54     @Override
handleSetCldrFileToCheck( CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)55     public CheckCLDR handleSetCldrFileToCheck(
56             CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors) {
57         String localeId = cldrFileToCheck.getLocaleID();
58         isRoot = localeId.equals("root");
59         hasRootParent = "root".equals(LocaleIDParser.getParent(localeId));
60 
61         // other characters are caught by CheckForExemplars
62         String script = new LikelySubtags().getLikelyScript(localeId);
63         allowedCharacters =
64                 new UnicodeSet(BASE_ALLOWED).addAll(getUnicodeSetForScript(script)).freeze();
65 
66         String initialPatternSequence =
67                 cldrFileToCheck.getStringValue(
68                         "//ldml/personNames/initialPattern[@type=\"initialSequence\"]");
69         initialSeparator = MessageFormat.format(initialPatternSequence, "", "");
70         //
71         emptyNativeSpaceReplacement =
72                 cldrFileToCheck
73                         .getStringValue("//ldml/personNames/nativeSpaceReplacement")
74                         .isEmpty();
75         return super.handleSetCldrFileToCheck(cldrFileToCheck, options, possibleErrors);
76     }
77 
getUnicodeSetForScript(String script)78     public UnicodeSet getUnicodeSetForScript(String script) {
79         switch (script) {
80             case "Jpan":
81                 return JPAN;
82             case "Kore":
83                 return KORE;
84             case "Hant":
85             case "Hans":
86                 return HANI;
87             default:
88                 return new UnicodeSet("[\\p{sc=" + script + "}]");
89         }
90     }
91 
92     static final UnicodeSet nativeSpaceReplacementValues = new UnicodeSet("[{}\\ ]").freeze();
93     static final UnicodeSet foreignSpaceReplacementValues = new UnicodeSet("[\\ ・·]").freeze();
94 
95     @Override
handleCheck( String path, String fullPath, String value, Options options, List<CheckStatus> result)96     public CheckCLDR handleCheck(
97             String path, String fullPath, String value, Options options, List<CheckStatus> result) {
98         if (isRoot || !path.startsWith("//ldml/personNames/")) {
99             return this;
100         }
101         if (!accept(result)) return this;
102 
103         XPathParts parts = XPathParts.getFrozenInstance(path);
104         switch (parts.getElement(2)) {
105             default:
106                 int debug = 0;
107                 break;
108             case "personName":
109                 NamePattern namePattern = NamePattern.from(0, value);
110                 checkAdjacentFields(namePattern, result);
111                 ArrayList<List<String>> failures =
112                         namePattern.findInitialFailures(initialSeparator);
113                 for (List<String> row : failures) {
114                     String previousField = row.get(0);
115                     String intermediateLiteral = row.get(1);
116                     String followingField = row.get(1);
117                     result.add(
118                             new CheckStatus()
119                                     .setCause(this)
120                                     .setMainType(CheckStatus.errorType)
121                                     .setSubtype(Subtype.illegalCharactersInPattern)
122                                     .setMessage(
123                                             "The gap between {0} and {2} must be the same as the pattern-initialSequence, =“{1}”",
124                                             previousField, intermediateLiteral, followingField));
125                 }
126 
127                 break;
128             case "nativeSpaceReplacement":
129                 if (!nativeSpaceReplacementValues.contains(value)) {
130                     result.add(
131                             new CheckStatus()
132                                     .setCause(this)
133                                     .setMainType(CheckStatus.errorType)
134                                     .setSubtype(Subtype.illegalCharactersInPattern)
135                                     .setMessage(
136                                             "NativeSpaceReplacement must be space if script requires spaces, and empty otherwise."));
137                 }
138                 break;
139             case "foreignSpaceReplacement":
140                 if (!foreignSpaceReplacementValues.contains(value)) {
141                     result.add(
142                             new CheckStatus()
143                                     .setCause(this)
144                                     .setMainType(CheckStatus.errorType)
145                                     .setSubtype(Subtype.illegalCharactersInPattern)
146                                     .setMessage(
147                                             "ForeignSpaceReplacement must be space if script requires spaces."));
148                 }
149                 break;
150             case "parameterDefault":
151                 checkParameterDefault(this, value, result, parts);
152                 break;
153             case "sampleName":
154                 if (value == null) {
155                     break;
156                 }
157                 if (!allowedCharacters.containsAll(value)
158                         && !value.equals(CldrUtility.NO_INHERITANCE_MARKER)) {
159                     UnicodeSet bad = new UnicodeSet().addAll(value).removeAll(allowedCharacters);
160                     final Type mainType =
161                             getPhase() != Phase.BUILD
162                                     ? CheckStatus.errorType
163                                     : CheckStatus
164                                             .warningType; // we need to be able to check this in
165                     // without error
166                     result.add(
167                             new CheckStatus()
168                                     .setCause(this)
169                                     .setMainType(mainType)
170                                     .setSubtype(Subtype.badSamplePersonName)
171                                     .setMessage(
172                                             "Illegal characters in sample name: "
173                                                     + bad.toPattern(false)));
174                 } else if (getCldrFileToCheck().getUnresolved().getStringValue(path) != null) {
175 
176                     // (Above) We only check for an error if there is a value in the UNresolved
177                     // file.
178                     // That is, we don't want MISSING that is inherited from root to cause an error,
179                     // unless it is explicitly inherited
180 
181                     // We also check that
182                     // - if there is a surname2, there must be either a surname or surname.core
183 
184                     String message = null;
185                     SampleType sampleType = SampleType.valueOf(parts.getAttributeValue(2, "item"));
186                     String modifiedField = parts.getAttributeValue(3, "type");
187                     boolean isMissingInUnresolved =
188                             value.equals(MISSING) || value.equals(CldrUtility.INHERITANCE_MARKER);
189 
190                     final Optionality optionality = sampleType.getOptionality(modifiedField);
191                     if (isMissingInUnresolved) {
192                         if (optionality == Optionality.required) {
193                             message = "This value must not be empty (" + MISSING + ")";
194                         }
195                     } else { // not missing, so...
196                         if (optionality == Optionality.disallowed) {
197                             message = "This value must be empty (" + MISSING + ")";
198                         } else if (modifiedField.equals("surname2")) {
199                             String surname =
200                                     getCldrFileToCheck()
201                                             .getStringValue(path)
202                                             .replace("surname2", "surname");
203                             String surnameCore =
204                                     getCldrFileToCheck()
205                                             .getStringValue(path)
206                                             .replace("surname2", "surname-core");
207                             if (surname.equals(MISSING) && surnameCore.equals(MISSING)) {
208                                 message =
209                                         "The value for '"
210                                                 + modifiedField
211                                                 + "' must not be empty ("
212                                                 + MISSING
213                                                 + ") unless 'surname2' is.";
214                             }
215                         }
216                     }
217                     if (message != null) {
218                         getPhase();
219                         final Type mainType =
220                                 getPhase() != Phase.BUILD
221                                         ? CheckStatus.errorType
222                                         : CheckStatus
223                                                 .warningType; // we need to be able to check this in
224                         // without error
225                         result.add(
226                                 new CheckStatus()
227                                         .setCause(this)
228                                         .setMainType(mainType)
229                                         .setSubtype(Subtype.badSamplePersonName)
230                                         .setMessage(message));
231                     }
232                 }
233                 break;
234         }
235         return this;
236     }
237 
checkAdjacentFields(NamePattern namePattern, List<CheckStatus> result)238     private void checkAdjacentFields(NamePattern namePattern, List<CheckStatus> result) {
239         ModifiedField lastModifiedField = null;
240         for (int i = 0; i < namePattern.getElementCount(); ++i) {
241             ModifiedField modifiedField = namePattern.getModifiedField(i);
242             if (modifiedField == null) { // literal
243                 lastModifiedField = null;
244             } else if (lastModifiedField != null) { // we have two adjacent fields
245                 // adjacent monograms are ok
246                 if (lastModifiedField.getModifiers().contains(Modifier.monogram)
247                         && modifiedField.getModifiers().contains(Modifier.monogram)) {
248                     continue;
249                 }
250                 // no gap after initials is ok (the check for consistency with the initials pattern
251                 // is elsewhere)
252                 if (lastModifiedField.getModifiers().contains(Modifier.initial)
253                         || lastModifiedField.getModifiers().contains(Modifier.initialCap)) {
254                     continue;
255                 }
256 
257                 // no gap before title is ok, for locales with no spaces
258                 if (modifiedField.getField() == Field.title && emptyNativeSpaceReplacement) {
259                     continue;
260                 }
261                 result.add(
262                         new CheckStatus()
263                                 .setCause(this)
264                                 .setMainType(
265                                         emptyNativeSpaceReplacement
266                                                 ? CheckStatus.warningType
267                                                 : CheckStatus.errorType)
268                                 .setSubtype(Subtype.missingSpaceBetweenNameFields)
269                                 .setMessage(
270                                         "Normally there should be a space or punctuation between name fields: '{'{0}'}{'{1}'}'",
271                                         lastModifiedField, modifiedField));
272             }
273             lastModifiedField = modifiedField;
274         }
275     }
276 
checkParameterDefault( CheckCLDR checkCldr, String value, List<CheckStatus> result, XPathParts parts)277     public static void checkParameterDefault(
278             CheckCLDR checkCldr, String value, List<CheckStatus> result, XPathParts parts) {
279         String okValues = null;
280         boolean succeed = false;
281         try {
282             switch (parts.getAttributeValue(-1, "parameter")) {
283                 case "length":
284                     okValues = LengthValues;
285                     PersonNameFormatter.Length.from(value);
286                     break;
287                 case "formality":
288                     okValues = FormalityValues;
289                     PersonNameFormatter.Formality.from(value);
290                     break;
291             }
292             succeed = true;
293         } catch (Exception e) {
294         }
295         if (value == null || !succeed) {
296             result.add(
297                     new CheckStatus()
298                             .setCause(checkCldr)
299                             .setMainType(CheckStatus.errorType)
300                             .setSubtype(Subtype.illegalParameterValue)
301                             .setMessage("Valid values are: {0}", okValues));
302         }
303     }
304 }
305