1 package org.unicode.cldr.test; 2 3 import com.google.common.base.Joiner; 4 import com.ibm.icu.text.MessageFormat; 5 import com.ibm.icu.text.UnicodeSet; 6 import java.util.ArrayList; 7 import java.util.List; 8 import java.util.stream.Collectors; 9 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Subtype; 10 import org.unicode.cldr.test.CheckCLDR.CheckStatus.Type; 11 import org.unicode.cldr.tool.LikelySubtags; 12 import org.unicode.cldr.util.CLDRFile; 13 import org.unicode.cldr.util.CldrUtility; 14 import org.unicode.cldr.util.LocaleIDParser; 15 import org.unicode.cldr.util.XPathParts; 16 import org.unicode.cldr.util.personname.PersonNameFormatter; 17 import org.unicode.cldr.util.personname.PersonNameFormatter.Field; 18 import org.unicode.cldr.util.personname.PersonNameFormatter.Formality; 19 import org.unicode.cldr.util.personname.PersonNameFormatter.Length; 20 import org.unicode.cldr.util.personname.PersonNameFormatter.ModifiedField; 21 import org.unicode.cldr.util.personname.PersonNameFormatter.Modifier; 22 import org.unicode.cldr.util.personname.PersonNameFormatter.NamePattern; 23 import org.unicode.cldr.util.personname.PersonNameFormatter.Optionality; 24 import org.unicode.cldr.util.personname.PersonNameFormatter.SampleType; 25 26 public class CheckPersonNames extends CheckCLDR { 27 28 private static final String LengthValues = 29 Joiner.on(", ") 30 .join(Length.ALL.stream().map(x -> x.toString()).collect(Collectors.toList())); 31 private static final String FormalityValues = 32 Joiner.on(", ") 33 .join( 34 Formality.ALL.stream() 35 .map(x -> x.toString()) 36 .collect(Collectors.toList())); 37 38 static final String MISSING = CldrUtility.NO_INHERITANCE_MARKER; 39 40 private boolean isRoot = false; 41 private boolean hasRootParent = false; 42 private String initialSeparator = " "; 43 44 private UnicodeSet allowedCharacters; 45 private boolean emptyNativeSpaceReplacement; 46 47 static final UnicodeSet BASE_ALLOWED = 48 new UnicodeSet("[\\p{sc=Common}\\p{sc=Inherited}-\\p{N}-[❮❯∅<>∅0]]").freeze(); 49 static final UnicodeSet HANI = new UnicodeSet("[\\p{sc=Hani}]").freeze(); 50 static final UnicodeSet KORE = new UnicodeSet("[\\p{sc=Hang}]").addAll(HANI).freeze(); 51 static final UnicodeSet JPAN = 52 new UnicodeSet("[\\p{sc=Kana}\\p{sc=Hira}]").addAll(HANI).freeze(); 53 54 @Override handleSetCldrFileToCheck( CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors)55 public CheckCLDR handleSetCldrFileToCheck( 56 CLDRFile cldrFileToCheck, Options options, List<CheckStatus> possibleErrors) { 57 String localeId = cldrFileToCheck.getLocaleID(); 58 isRoot = localeId.equals("root"); 59 hasRootParent = "root".equals(LocaleIDParser.getParent(localeId)); 60 61 // other characters are caught by CheckForExemplars 62 String script = new LikelySubtags().getLikelyScript(localeId); 63 allowedCharacters = 64 new UnicodeSet(BASE_ALLOWED).addAll(getUnicodeSetForScript(script)).freeze(); 65 66 String initialPatternSequence = 67 cldrFileToCheck.getStringValue( 68 "//ldml/personNames/initialPattern[@type=\"initialSequence\"]"); 69 initialSeparator = MessageFormat.format(initialPatternSequence, "", ""); 70 // 71 emptyNativeSpaceReplacement = 72 cldrFileToCheck 73 .getStringValue("//ldml/personNames/nativeSpaceReplacement") 74 .isEmpty(); 75 return super.handleSetCldrFileToCheck(cldrFileToCheck, options, possibleErrors); 76 } 77 getUnicodeSetForScript(String script)78 public UnicodeSet getUnicodeSetForScript(String script) { 79 switch (script) { 80 case "Jpan": 81 return JPAN; 82 case "Kore": 83 return KORE; 84 case "Hant": 85 case "Hans": 86 return HANI; 87 default: 88 return new UnicodeSet("[\\p{sc=" + script + "}]"); 89 } 90 } 91 92 static final UnicodeSet nativeSpaceReplacementValues = new UnicodeSet("[{}\\ ]").freeze(); 93 static final UnicodeSet foreignSpaceReplacementValues = new UnicodeSet("[\\ ・·]").freeze(); 94 95 @Override handleCheck( String path, String fullPath, String value, Options options, List<CheckStatus> result)96 public CheckCLDR handleCheck( 97 String path, String fullPath, String value, Options options, List<CheckStatus> result) { 98 if (isRoot || !path.startsWith("//ldml/personNames/")) { 99 return this; 100 } 101 if (!accept(result)) return this; 102 103 XPathParts parts = XPathParts.getFrozenInstance(path); 104 switch (parts.getElement(2)) { 105 default: 106 int debug = 0; 107 break; 108 case "personName": 109 NamePattern namePattern = NamePattern.from(0, value); 110 checkAdjacentFields(namePattern, result); 111 ArrayList<List<String>> failures = 112 namePattern.findInitialFailures(initialSeparator); 113 for (List<String> row : failures) { 114 String previousField = row.get(0); 115 String intermediateLiteral = row.get(1); 116 String followingField = row.get(1); 117 result.add( 118 new CheckStatus() 119 .setCause(this) 120 .setMainType(CheckStatus.errorType) 121 .setSubtype(Subtype.illegalCharactersInPattern) 122 .setMessage( 123 "The gap between {0} and {2} must be the same as the pattern-initialSequence, =“{1}”", 124 previousField, intermediateLiteral, followingField)); 125 } 126 127 break; 128 case "nativeSpaceReplacement": 129 if (!nativeSpaceReplacementValues.contains(value)) { 130 result.add( 131 new CheckStatus() 132 .setCause(this) 133 .setMainType(CheckStatus.errorType) 134 .setSubtype(Subtype.illegalCharactersInPattern) 135 .setMessage( 136 "NativeSpaceReplacement must be space if script requires spaces, and empty otherwise.")); 137 } 138 break; 139 case "foreignSpaceReplacement": 140 if (!foreignSpaceReplacementValues.contains(value)) { 141 result.add( 142 new CheckStatus() 143 .setCause(this) 144 .setMainType(CheckStatus.errorType) 145 .setSubtype(Subtype.illegalCharactersInPattern) 146 .setMessage( 147 "ForeignSpaceReplacement must be space if script requires spaces.")); 148 } 149 break; 150 case "parameterDefault": 151 checkParameterDefault(this, value, result, parts); 152 break; 153 case "sampleName": 154 if (value == null) { 155 break; 156 } 157 if (!allowedCharacters.containsAll(value) 158 && !value.equals(CldrUtility.NO_INHERITANCE_MARKER)) { 159 UnicodeSet bad = new UnicodeSet().addAll(value).removeAll(allowedCharacters); 160 final Type mainType = 161 getPhase() != Phase.BUILD 162 ? CheckStatus.errorType 163 : CheckStatus 164 .warningType; // we need to be able to check this in 165 // without error 166 result.add( 167 new CheckStatus() 168 .setCause(this) 169 .setMainType(mainType) 170 .setSubtype(Subtype.badSamplePersonName) 171 .setMessage( 172 "Illegal characters in sample name: " 173 + bad.toPattern(false))); 174 } else if (getCldrFileToCheck().getUnresolved().getStringValue(path) != null) { 175 176 // (Above) We only check for an error if there is a value in the UNresolved 177 // file. 178 // That is, we don't want MISSING that is inherited from root to cause an error, 179 // unless it is explicitly inherited 180 181 // We also check that 182 // - if there is a surname2, there must be either a surname or surname.core 183 184 String message = null; 185 SampleType sampleType = SampleType.valueOf(parts.getAttributeValue(2, "item")); 186 String modifiedField = parts.getAttributeValue(3, "type"); 187 boolean isMissingInUnresolved = 188 value.equals(MISSING) || value.equals(CldrUtility.INHERITANCE_MARKER); 189 190 final Optionality optionality = sampleType.getOptionality(modifiedField); 191 if (isMissingInUnresolved) { 192 if (optionality == Optionality.required) { 193 message = "This value must not be empty (" + MISSING + ")"; 194 } 195 } else { // not missing, so... 196 if (optionality == Optionality.disallowed) { 197 message = "This value must be empty (" + MISSING + ")"; 198 } else if (modifiedField.equals("surname2")) { 199 String surname = 200 getCldrFileToCheck() 201 .getStringValue(path) 202 .replace("surname2", "surname"); 203 String surnameCore = 204 getCldrFileToCheck() 205 .getStringValue(path) 206 .replace("surname2", "surname-core"); 207 if (surname.equals(MISSING) && surnameCore.equals(MISSING)) { 208 message = 209 "The value for '" 210 + modifiedField 211 + "' must not be empty (" 212 + MISSING 213 + ") unless 'surname2' is."; 214 } 215 } 216 } 217 if (message != null) { 218 getPhase(); 219 final Type mainType = 220 getPhase() != Phase.BUILD 221 ? CheckStatus.errorType 222 : CheckStatus 223 .warningType; // we need to be able to check this in 224 // without error 225 result.add( 226 new CheckStatus() 227 .setCause(this) 228 .setMainType(mainType) 229 .setSubtype(Subtype.badSamplePersonName) 230 .setMessage(message)); 231 } 232 } 233 break; 234 } 235 return this; 236 } 237 checkAdjacentFields(NamePattern namePattern, List<CheckStatus> result)238 private void checkAdjacentFields(NamePattern namePattern, List<CheckStatus> result) { 239 ModifiedField lastModifiedField = null; 240 for (int i = 0; i < namePattern.getElementCount(); ++i) { 241 ModifiedField modifiedField = namePattern.getModifiedField(i); 242 if (modifiedField == null) { // literal 243 lastModifiedField = null; 244 } else if (lastModifiedField != null) { // we have two adjacent fields 245 // adjacent monograms are ok 246 if (lastModifiedField.getModifiers().contains(Modifier.monogram) 247 && modifiedField.getModifiers().contains(Modifier.monogram)) { 248 continue; 249 } 250 // no gap after initials is ok (the check for consistency with the initials pattern 251 // is elsewhere) 252 if (lastModifiedField.getModifiers().contains(Modifier.initial) 253 || lastModifiedField.getModifiers().contains(Modifier.initialCap)) { 254 continue; 255 } 256 257 // no gap before title is ok, for locales with no spaces 258 if (modifiedField.getField() == Field.title && emptyNativeSpaceReplacement) { 259 continue; 260 } 261 result.add( 262 new CheckStatus() 263 .setCause(this) 264 .setMainType( 265 emptyNativeSpaceReplacement 266 ? CheckStatus.warningType 267 : CheckStatus.errorType) 268 .setSubtype(Subtype.missingSpaceBetweenNameFields) 269 .setMessage( 270 "Normally there should be a space or punctuation between name fields: '{'{0}'}{'{1}'}'", 271 lastModifiedField, modifiedField)); 272 } 273 lastModifiedField = modifiedField; 274 } 275 } 276 checkParameterDefault( CheckCLDR checkCldr, String value, List<CheckStatus> result, XPathParts parts)277 public static void checkParameterDefault( 278 CheckCLDR checkCldr, String value, List<CheckStatus> result, XPathParts parts) { 279 String okValues = null; 280 boolean succeed = false; 281 try { 282 switch (parts.getAttributeValue(-1, "parameter")) { 283 case "length": 284 okValues = LengthValues; 285 PersonNameFormatter.Length.from(value); 286 break; 287 case "formality": 288 okValues = FormalityValues; 289 PersonNameFormatter.Formality.from(value); 290 break; 291 } 292 succeed = true; 293 } catch (Exception e) { 294 } 295 if (value == null || !succeed) { 296 result.add( 297 new CheckStatus() 298 .setCause(checkCldr) 299 .setMainType(CheckStatus.errorType) 300 .setSubtype(Subtype.illegalParameterValue) 301 .setMessage("Valid values are: {0}", okValues)); 302 } 303 } 304 } 305