1 package org.unicode.cldr.util.personname; 2 3 import com.google.common.base.Joiner; 4 import com.google.common.base.Splitter; 5 import com.google.common.collect.Comparators; 6 import com.google.common.collect.ComparisonChain; 7 import com.google.common.collect.ImmutableBiMap; 8 import com.google.common.collect.ImmutableList; 9 import com.google.common.collect.ImmutableListMultimap; 10 import com.google.common.collect.ImmutableMap; 11 import com.google.common.collect.ImmutableSet; 12 import com.google.common.collect.LinkedListMultimap; 13 import com.google.common.collect.ListMultimap; 14 import com.google.common.collect.Multimap; 15 import com.google.common.collect.Multiset; 16 import com.google.common.collect.Sets; 17 import com.google.common.collect.TreeMultimap; 18 import com.google.common.collect.TreeMultiset; 19 import com.ibm.icu.lang.UCharacter; 20 import com.ibm.icu.text.BreakIterator; 21 import com.ibm.icu.text.CaseMap; 22 import com.ibm.icu.text.MessageFormat; 23 import com.ibm.icu.text.Transliterator; 24 import com.ibm.icu.text.UTF16; 25 import com.ibm.icu.text.UnicodeSet; 26 import com.ibm.icu.util.Output; 27 import com.ibm.icu.util.ULocale; 28 import java.util.ArrayList; 29 import java.util.Arrays; 30 import java.util.Collection; 31 import java.util.Collections; 32 import java.util.Comparator; 33 import java.util.EnumSet; 34 import java.util.HashSet; 35 import java.util.Iterator; 36 import java.util.LinkedHashSet; 37 import java.util.List; 38 import java.util.Map; 39 import java.util.Map.Entry; 40 import java.util.Objects; 41 import java.util.Set; 42 import java.util.TreeMap; 43 import java.util.TreeSet; 44 import java.util.function.Function; 45 import java.util.regex.Pattern; 46 import java.util.stream.Collectors; 47 import org.unicode.cldr.test.ExampleGenerator; 48 import org.unicode.cldr.tool.LikelySubtags; 49 import org.unicode.cldr.util.CLDRConfig; 50 import org.unicode.cldr.util.CLDRFile; 51 import org.unicode.cldr.util.CLDRLocale; 52 import org.unicode.cldr.util.ChainedMap; 53 import org.unicode.cldr.util.ChainedMap.M3; 54 import org.unicode.cldr.util.GrammarInfo; 55 import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; 56 import org.unicode.cldr.util.GrammarInfo.GrammaticalScope; 57 import org.unicode.cldr.util.GrammarInfo.GrammaticalTarget; 58 import org.unicode.cldr.util.LanguageTagParser; 59 import org.unicode.cldr.util.LocaleIDParser; 60 import org.unicode.cldr.util.Pair; 61 import org.unicode.cldr.util.XMLSource; 62 import org.unicode.cldr.util.XPathParts; 63 64 /** 65 * Rough sketch for now TODO Mark Make classes/methods private that don't need to be public TODO 66 * Peter Check for invalid parameters 67 */ 68 public class PersonNameFormatter { 69 70 public static final boolean DEBUG = System.getProperty("PersonNameFormatter.DEBUG") != null; 71 72 public enum Field { 73 title, 74 given, 75 given2, 76 surname, 77 surname2, 78 generation, 79 credentials; 80 public static final Comparator<Iterable<Field>> ITERABLE_COMPARE = 81 Comparators.lexicographical(Comparator.<Field>naturalOrder()); 82 public static final Set<Field> ALL = ImmutableSet.copyOf(Field.values()); 83 } 84 85 public enum Order { 86 givenFirst, 87 surnameFirst, 88 sorting; 89 public static final Comparator<Iterable<Order>> ITERABLE_COMPARE = 90 Comparators.lexicographical(Comparator.<Order>naturalOrder()); 91 public static final Set<Order> ALL = ImmutableSet.copyOf(Order.values()); 92 /** Use this instead of valueOf if value might be null */ from(String item)93 public static Order from(String item) { 94 return item == null ? null : Order.valueOf(item); 95 } 96 } 97 98 public enum Length { 99 // There is a slight complication because 'long' collides with a keyword. 100 long_name, 101 medium, 102 short_name; 103 104 private static ImmutableBiMap<String, Length> exceptionNames = 105 ImmutableBiMap.of( 106 "long", long_name, 107 "short", short_name); 108 109 /** Use this instead of valueOf */ from(String item)110 public static Length from(String item) { 111 if (item == null) { 112 return null; 113 } 114 Length result = exceptionNames.get(item); 115 return result != null ? result : Length.valueOf(item); 116 } 117 118 @Override toString()119 public String toString() { 120 String result = exceptionNames.inverse().get(this); 121 return result != null ? result : name(); 122 } 123 124 public static final Comparator<Iterable<Length>> ITERABLE_COMPARE = 125 Comparators.lexicographical(Comparator.<Length>naturalOrder()); 126 public static final Set<Length> ALL = ImmutableSet.copyOf(Length.values()); 127 } 128 129 public enum Usage { 130 referring, 131 addressing, 132 monogram; 133 public static final Comparator<Iterable<Usage>> ITERABLE_COMPARE = 134 Comparators.lexicographical(Comparator.<Usage>naturalOrder()); 135 public static final Set<Usage> ALL = ImmutableSet.copyOf(Usage.values()); 136 /** Use this instead of valueOf if value might be null */ from(String item)137 public static Usage from(String item) { 138 return item == null ? null : Usage.valueOf(item); 139 } 140 } 141 142 public enum Formality { 143 formal, 144 informal; 145 public static final Comparator<Iterable<Formality>> ITERABLE_COMPARE = 146 Comparators.lexicographical(Comparator.<Formality>naturalOrder()); 147 public static final Set<Formality> ALL = ImmutableSet.copyOf(Formality.values()); 148 /** Use this instead of valueOf if value might be null */ from(String item)149 public static Formality from(String item) { 150 return item == null ? null : Formality.valueOf(item); 151 } 152 } 153 154 public enum Modifier { 155 informal, 156 allCaps, 157 initialCap, 158 initial, 159 retain, 160 monogram, 161 prefix, 162 core, 163 vocative, 164 genitive; 165 public static final Set<Modifier> INITIALS = ImmutableSet.of(initialCap, initial); 166 public static final Comparator<Iterable<Modifier>> ITERABLE_COMPARE = 167 Comparators.lexicographical(Comparator.<Modifier>naturalOrder()); 168 public static final Comparator<Collection<Modifier>> LONGEST_FIRST = 169 new Comparator<>() { 170 171 @Override 172 public int compare(Collection<Modifier> o1, Collection<Modifier> o2) { 173 return ComparisonChain.start() 174 .compare(o2.size(), o1.size()) // reversed order for longest first 175 .compare(o1, o2, ITERABLE_COMPARE) 176 .result(); 177 } 178 }; 179 public static final Set<Modifier> ALL = ImmutableSet.copyOf(Modifier.values()); 180 public static final Set<Modifier> EMPTY = ImmutableSet.of(); 181 public static final Set<String> ALL_STRINGS = 182 ALL.stream().map(x -> x.toString()).collect(Collectors.toUnmodifiableSet()); 183 184 public static final Set<Modifier> GRAMMAR = ImmutableSet.of(vocative, genitive); 185 public static final Set<Modifier> NON_GRAMMAR = 186 ImmutableSet.copyOf(Sets.difference(ALL, ImmutableSet.of(vocative, genitive))); 187 188 static final Set<Set<Modifier>> INCONSISTENT_SETS = 189 ImmutableSet.of( 190 ImmutableSet.of(Modifier.core, Modifier.prefix), 191 ImmutableSet.of(Modifier.initial, Modifier.monogram), 192 ImmutableSet.of(Modifier.allCaps, Modifier.initialCap)); 193 194 /** 195 * If the input modifiers are consistent, returns an ordered set; if not, returns null and 196 * sets an error message. 197 */ getCleanSet( Collection<Modifier> modifierList, Output<String> errorMessage)198 public static Set<Modifier> getCleanSet( 199 Collection<Modifier> modifierList, Output<String> errorMessage) { 200 if (modifierList.isEmpty()) { 201 return ImmutableSet.of(); 202 } 203 Set<Modifier> modifiers = EnumSet.copyOf(modifierList); 204 String errorMessage1 = null; 205 if (modifiers.size() != modifierList.size()) { 206 Multiset<Modifier> dupCheck = TreeMultiset.create(); 207 dupCheck.addAll(modifierList); 208 for (Modifier m : modifiers) { 209 dupCheck.remove(m); 210 } 211 errorMessage1 = "Duplicate modifiers: " + JOIN_COMMA.join(dupCheck); 212 } 213 String errorMessage2 = null; 214 for (Set<Modifier> inconsistentSet : INCONSISTENT_SETS) { 215 if (modifiers.containsAll(inconsistentSet)) { 216 if (errorMessage2 == null) { 217 errorMessage2 = "Inconsistent modifiers: "; 218 } else { 219 errorMessage2 += ", "; 220 } 221 errorMessage2 += inconsistentSet; 222 } 223 } 224 errorMessage.value = 225 errorMessage1 == null 226 ? errorMessage2 227 : errorMessage2 == null 228 ? errorMessage1 229 : errorMessage1 + "; " + errorMessage1; 230 return ImmutableSet.copyOf(modifiers); 231 } 232 233 /** 234 * Extracts grammar modifiers and adds to non-grammar modifiers 235 * 236 * @param allGrammarForLocale 237 * @return 238 */ extractFrom(Collection<String> allGrammarForLocale)239 public static Set<Modifier> extractFrom(Collection<String> allGrammarForLocale) { 240 return ImmutableSet.copyOf( 241 Sets.union( 242 Modifier.NON_GRAMMAR, 243 Modifier.GRAMMAR.stream() 244 .filter(x -> allGrammarForLocale.contains(x.toString())) 245 .collect(Collectors.toSet()))); 246 } 247 248 /** 249 * Verifies that the prefix, core, and plain values are consistent. Returns null if ok, 250 * otherwise error message. 251 */ inconsistentPrefixCorePlainValues( String prefixValue, String coreValue, String plainValue)252 public static String inconsistentPrefixCorePlainValues( 253 String prefixValue, String coreValue, String plainValue) { 254 String errorMessage2 = null; 255 if (prefixValue != null) { 256 if (coreValue != null) { 257 if (plainValue != null) { // prefix = X, core = Y, plain = Z 258 // ok: prefix = "van", core = "Berg", plain = "van Berg" 259 // bad: prefix = "van", core = "Berg", plain = "van Wolf" 260 if (!plainValue.replace(prefixValue, "").trim().equals(coreValue)) { 261 errorMessage2 = 262 "-core value and -prefix value are inconsistent with plain value"; 263 } 264 } 265 // otherwise prefix = "x", core = "y", plain = null, so OK 266 } else { // prefix = X, core = null, plain = ? 267 errorMessage2 = "cannot have -prefix without -core"; 268 } 269 } else if (coreValue != null && plainValue != null && !plainValue.equals(coreValue)) { 270 errorMessage2 = 271 "There is no -prefix, but there is a -core and plain that are unequal"; 272 } 273 return errorMessage2; 274 } 275 getAllowedModifiers(String locale)276 public static Set<Modifier> getAllowedModifiers(String locale) { 277 GrammarInfo grammarInfo = 278 CLDRConfig.getInstance().getSupplementalDataInfo().getGrammarInfo(locale); 279 return grammarInfo == null 280 ? Modifier.ALL 281 : Modifier.extractFrom( 282 grammarInfo.get( 283 GrammaticalTarget.nominal, 284 GrammaticalFeature.grammaticalCase, 285 GrammaticalScope.personNames)); 286 } 287 } 288 289 private static final ImmutableSet<String> G = ImmutableSet.of("given"); 290 private static final ImmutableSet<String> GS = ImmutableSet.of("given", "surname"); 291 private static final ImmutableSet<String> GGS = ImmutableSet.of("given", "given2", "surname"); 292 private static final ImmutableSet<String> GWithSurnameCore = 293 ImmutableSet.of("given", "surname-core"); 294 private static final ImmutableSet<String> Full = 295 ImmutableSet.of( 296 "title", 297 "given", 298 "given-informal", 299 "given2", 300 "surname-prefix", 301 "surname-core", 302 "surname2", 303 "generation", 304 "credentials"); 305 private static final ImmutableSet<String> FullMinusSurname2 = 306 ImmutableSet.copyOf(Sets.difference(Full, Collections.singleton("surname2"))); 307 308 public enum Optionality { 309 required, 310 optional, 311 disallowed 312 } 313 /** 314 * Types of samples, only for use by CLDR 315 * 316 * @internal 317 */ 318 public enum SampleType { 319 nativeG(G, G), 320 nativeGS(GS, GS), 321 nativeGGS(GGS, GS), 322 nativeFull(Full, GWithSurnameCore), 323 foreignG(G, G), 324 foreignGS(GS, GS), 325 foreignGGS(GGS, GGS), 326 foreignFull(Full, FullMinusSurname2), 327 ; 328 public static final Set<SampleType> ALL = ImmutableSet.copyOf(values()); 329 public static final List<String> ALL_STRINGS = 330 ALL.stream().map(x -> x.toString()).collect(Collectors.toUnmodifiableList()); 331 332 private final boolean isNative; 333 private final String abbreviation; 334 private final Set<String> allFields; 335 private final Set<String> requiredFields; 336 SampleType(ImmutableSet<String> allFields, ImmutableSet<String> requiredFields)337 private SampleType(ImmutableSet<String> allFields, ImmutableSet<String> requiredFields) { 338 if (!allFields.containsAll(requiredFields)) { 339 throw new IllegalArgumentException( 340 allFields + " must contain all of " + requiredFields); 341 } 342 this.allFields = allFields; 343 this.requiredFields = requiredFields; 344 345 String _abbreviation = null; 346 if (name().startsWith("native")) { 347 isNative = true; 348 _abbreviation = "N" + name().substring(6); 349 } else if (name().startsWith("foreign")) { 350 isNative = false; 351 _abbreviation = "F" + name().substring(7); 352 } else { 353 throw new IllegalArgumentException("Code needs adjustment!"); 354 } 355 abbreviation = _abbreviation.replace("Full", "F"); 356 } 357 isNative()358 public boolean isNative() { 359 return isNative; 360 } 361 toAbbreviation()362 public String toAbbreviation() { 363 return abbreviation; 364 } 365 getOptionality(String field)366 public Optionality getOptionality(String field) { 367 return requiredFields.contains(field) 368 ? Optionality.required 369 : allFields.contains(field) ? Optionality.optional : Optionality.disallowed; 370 } 371 getAllFields()372 public Set<String> getAllFields() { 373 return allFields; 374 } 375 getRequiredFields()376 public Set<String> getRequiredFields() { 377 return requiredFields; 378 } 379 } 380 381 /** 382 * @internal (all of these) 383 */ 384 public static final Splitter SPLIT_SPACE = Splitter.on(' ').trimResults(); 385 386 public static final Splitter SPLIT_DASH = Splitter.on('-').trimResults(); 387 public static final Splitter SPLIT_EQUALS = Splitter.on('=').trimResults(); 388 public static final Splitter SPLIT_COMMA = Splitter.on(',').trimResults(); 389 public static final Splitter SPLIT_SEMI = Splitter.on(';').trimResults(); 390 391 public static final Joiner JOIN_SPACE = Joiner.on(' '); 392 public static final Joiner JOIN_DASH = Joiner.on('-'); 393 public static final Joiner JOIN_SEMI = Joiner.on("; "); 394 public static final Joiner JOIN_COMMA = Joiner.on(", "); 395 public static final Joiner JOIN_LFTB = Joiner.on("\n\t\t"); 396 397 /** A Field and its modifiers, corresponding to a string form like {given-initial}. Immutable */ 398 public static class ModifiedField implements Comparable<ModifiedField> { 399 private final Field field; 400 private final Set<Modifier> modifiers; 401 getField()402 public Field getField() { 403 return field; 404 } 405 getModifiers()406 public Set<Modifier> getModifiers() { 407 return modifiers; 408 } 409 ModifiedField(Field field, Collection<Modifier> modifiers)410 public ModifiedField(Field field, Collection<Modifier> modifiers) { 411 this.field = field; 412 Output<String> errorMessage = new Output<>(); 413 this.modifiers = Modifier.getCleanSet(modifiers, errorMessage); 414 if (errorMessage.value != null) { 415 throw new IllegalArgumentException(errorMessage.value); 416 } 417 } 418 419 /** convenience method for testing */ ModifiedField(Field field, Modifier... modifiers)420 public ModifiedField(Field field, Modifier... modifiers) { 421 this(field, Arrays.asList(modifiers)); 422 } 423 424 /** convenience method for testing */ from(String string)425 public static ModifiedField from(String string) { 426 Field field = null; 427 List<Modifier> modifiers = new ArrayList<>(); 428 for (String item : SPLIT_DASH.split(string)) { 429 if (field == null) { 430 field = Field.valueOf(item); 431 } else { 432 modifiers.add(Modifier.valueOf(item)); 433 } 434 } 435 return new ModifiedField(field, modifiers); 436 } 437 438 @Override toString()439 public String toString() { 440 StringBuilder result = new StringBuilder(); 441 result.append(field); 442 if (!modifiers.isEmpty()) { 443 result.append('-').append(JOIN_DASH.join(modifiers)); 444 } 445 return result.toString(); 446 } 447 448 @Override equals(Object obj)449 public boolean equals(Object obj) { 450 ModifiedField that = (ModifiedField) obj; 451 return field == that.field && modifiers.equals(that.modifiers); 452 } 453 454 @Override hashCode()455 public int hashCode() { 456 return field.hashCode() ^ modifiers.hashCode(); 457 } 458 459 @Override compareTo(ModifiedField o)460 public int compareTo(ModifiedField o) { 461 return ComparisonChain.start() 462 .compare(field, o.field) 463 .compare(modifiers, o.modifiers, Modifier.ITERABLE_COMPARE) 464 .result(); 465 } 466 467 public static final Set<String> ALL_SAMPLES = 468 ImmutableSet.of( 469 "title", 470 "given", 471 "given-informal", 472 "given2", // 473 "surname", 474 "surname-prefix", 475 "surname-core", 476 "surname2", 477 "generation", 478 "generation", 479 "credentials"); 480 } 481 482 /** 483 * An element of a name pattern: either a literal string (like ", ") or a modified field (like 484 * {given-initial}) The literal is null IFF the modifiedField is not null Immutable 485 * 486 * @internal 487 */ 488 public static class NamePatternElement implements Comparable<NamePatternElement> { 489 private final String literal; 490 private final ModifiedField modifiedField; 491 getLiteral()492 public String getLiteral() { 493 return literal; 494 } 495 getModifiedField()496 public ModifiedField getModifiedField() { 497 return modifiedField; 498 } 499 500 /** 501 * @param literal 502 * @param field 503 * @param modifiers 504 */ NamePatternElement(ModifiedField modifiedField)505 public NamePatternElement(ModifiedField modifiedField) { 506 this.literal = null; 507 this.modifiedField = modifiedField; 508 } 509 NamePatternElement(String literal)510 public NamePatternElement(String literal) { 511 this.literal = literal; 512 this.modifiedField = null; 513 } 514 515 /** convenience method for testing */ from(Object element)516 public static NamePatternElement from(Object element) { 517 if (element instanceof ModifiedField) { 518 return new NamePatternElement((ModifiedField) element); 519 } else { 520 String string = element.toString(); 521 if (string.startsWith("{") && string.endsWith("}")) { 522 return new NamePatternElement( 523 ModifiedField.from(string.substring(1, string.length() - 1))); 524 } else { 525 return new NamePatternElement(string); 526 } 527 } 528 } 529 530 @Override toString()531 public String toString() { 532 return literal != null 533 ? literal.replace("\\", "\\\\").replace("{", "\\{") 534 : modifiedField.toString(); 535 } 536 537 public static final Comparator<Iterable<NamePatternElement>> ITERABLE_COMPARE = 538 Comparators.lexicographical(Comparator.<NamePatternElement>naturalOrder()); 539 540 @Override compareTo(NamePatternElement o)541 public int compareTo(NamePatternElement o) { 542 if (literal != null && o.literal != null) { 543 return literal.compareTo(o.literal); 544 } else if (modifiedField != null && o.modifiedField != null) { 545 return modifiedField.compareTo(o.modifiedField); 546 } else { 547 return literal != null ? -1 : 1; // all literals are less than all modified fields 548 } 549 } 550 } 551 552 /** 553 * Format fallback results, for when modifiers are not found NOTE: CLDR needs to be able to 554 * create from data. 555 * 556 * @internal 557 */ 558 public static class FallbackFormatter { 559 private static final LikelySubtags LIKELY_SUBTAGS = new LikelySubtags(); 560 private final ULocale formatterLocale; 561 private final String formatterLanguage; 562 private final String formatterScript; 563 private final BreakIterator characterBreakIterator; 564 private final BreakIterator wordBreakIterator; 565 private final MessageFormat initialFormatter; 566 private final MessageFormat initialSequenceFormatter; 567 private final String foreignSpaceReplacement; 568 private final String nativeSpaceReplacement; 569 getForeignSpaceReplacement()570 public String getForeignSpaceReplacement() { 571 return foreignSpaceReplacement; 572 } 573 574 private final boolean uppercaseSurnameIfSurnameFirst; 575 private final Map<String, Enum> parameterDefaults; 576 FallbackFormatter( ULocale uLocale, String initialPattern, String initialSequencePattern, String foreignSpaceReplacement, String nativeSpaceReplacement, Map<String, Enum> parameterDefaults, boolean uppercaseSurnameIfSurnameFirst)577 public FallbackFormatter( 578 ULocale uLocale, 579 String initialPattern, 580 String initialSequencePattern, 581 String foreignSpaceReplacement, 582 String nativeSpaceReplacement, 583 Map<String, Enum> parameterDefaults, 584 boolean uppercaseSurnameIfSurnameFirst) { 585 formatterLocale = uLocale; 586 LanguageTagParser ltp = new LanguageTagParser().set(uLocale.toString()); 587 LIKELY_SUBTAGS.maximizeInPlace(ltp); 588 formatterLanguage = ltp.getLanguage(); 589 formatterScript = ltp.getScript(); 590 characterBreakIterator = BreakIterator.getCharacterInstance(uLocale); 591 wordBreakIterator = BreakIterator.getWordInstance(); 592 initialFormatter = new MessageFormat(initialPattern); 593 initialSequenceFormatter = new MessageFormat(initialSequencePattern); 594 this.foreignSpaceReplacement = 595 foreignSpaceReplacement == null ? " " : foreignSpaceReplacement; 596 this.uppercaseSurnameIfSurnameFirst = uppercaseSurnameIfSurnameFirst; 597 this.nativeSpaceReplacement = 598 nativeSpaceReplacement == null ? " " : nativeSpaceReplacement; 599 this.parameterDefaults = 600 parameterDefaults == null 601 ? Collections.emptyMap() 602 : ImmutableMap.copyOf(parameterDefaults); 603 } 604 605 /** 606 * Is foreign language: determines whether the maximized (aka likely) respective language 607 * subtags and script subtags are identical. For purposes of language comparison, regional 608 * variants, etc are not consider relevant. TODO add this to the spec 609 */ sharesLanguageScript(ULocale nameLocale)610 public boolean sharesLanguageScript(ULocale nameLocale) { 611 LanguageTagParser ltp = new LanguageTagParser().set(nameLocale.toString()); 612 LIKELY_SUBTAGS.maximizeInPlace(ltp); 613 return formatterLanguage.equals(ltp.getLanguage()) 614 && formatterScript.equals(ltp.getScript()); 615 } 616 617 /** 618 * Apply the fallbacks for modifiers that are not handled. Public for testing. 619 * 620 * @internal 621 */ applyModifierFallbacks( FormatParameters nameFormatParameters, Set<Modifier> remainingModifers, String bestValue)622 public String applyModifierFallbacks( 623 FormatParameters nameFormatParameters, 624 Set<Modifier> remainingModifers, 625 String bestValue) { 626 // apply default algorithms 627 628 boolean isBackground = false; 629 630 // apply HACK special treatment for ExampleGenerator 631 if (bestValue.startsWith(ExampleGenerator.backgroundStartSymbol) 632 && bestValue.endsWith(ExampleGenerator.backgroundEndSymbol)) { 633 isBackground = true; 634 bestValue = bestValue.substring(1, bestValue.length() - 1); 635 } 636 637 for (Modifier modifier : remainingModifers) { 638 switch (modifier) { 639 case initial: 640 boolean retainPunctuation = remainingModifers.contains(Modifier.retain); 641 bestValue = 642 formatInitial(bestValue, nameFormatParameters, retainPunctuation); 643 break; 644 case retain: 645 // do nothing-- this is handled by "initial" above 646 break; 647 case monogram: 648 bestValue = formatMonogram(bestValue, nameFormatParameters); 649 break; 650 case initialCap: 651 bestValue = 652 TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply( 653 formatterLocale.toLocale(), null, bestValue); 654 break; 655 case allCaps: 656 bestValue = UCharacter.toUpperCase(formatterLocale, bestValue); 657 break; 658 case prefix: 659 bestValue = null; 660 // TODO Mark if there is no plain, but there is a prefix and core, use that; 661 // otherwise use core 662 break; 663 case core: 664 case informal: 665 // no option, just fall back 666 break; 667 // WARNING The following fallbacks are ONLY for the examples in CLDR, not 668 // for production software 669 case genitive: 670 bestValue = bestValue + "ᵍ"; 671 break; 672 case vocative: 673 bestValue = bestValue + "ᵛ"; 674 break; 675 default: 676 break; 677 } 678 } 679 return isBackground && bestValue != null 680 ? ExampleGenerator.backgroundStartSymbol 681 + bestValue 682 + ExampleGenerator.backgroundEndSymbol 683 : bestValue; 684 } 685 formatInitial( String bestValue, FormatParameters nameFormatParameters, boolean retainPunctuation)686 public String formatInitial( 687 String bestValue, 688 FormatParameters nameFormatParameters, 689 boolean retainPunctuation) { 690 // It is probably unusual to have multiple name fields, so this could be optimized for 691 // the simpler case. 692 693 // Employ both the initialFormatter and initialSequenceFormatter 694 695 String result = null; 696 String separator = null; 697 wordBreakIterator.setText(bestValue); 698 int lastBound = wordBreakIterator.first(); 699 int curBound = wordBreakIterator.next(); 700 while (curBound != BreakIterator.DONE) { 701 String part = bestValue.substring(lastBound, curBound); 702 if (Character.isLetter(part.codePointAt(0))) { 703 String partFirst = getFirstGrapheme(part); 704 String partFormatted = initialFormatter.format(new String[] {partFirst}); 705 if (separator != null) { 706 if (result == null) { 707 result = ""; 708 } 709 result = result + separator + partFormatted; 710 } else if (result == null) { 711 result = partFormatted; 712 } else { 713 result = 714 initialSequenceFormatter.format( 715 new String[] {result, partFormatted}); 716 } 717 } else if (retainPunctuation && !Character.isWhitespace(part.codePointAt(0))) { 718 if (separator == null) { 719 separator = part; 720 } else { 721 separator = separator + part; 722 } 723 } 724 lastBound = curBound; 725 curBound = wordBreakIterator.next(); 726 } 727 return result; 728 } 729 formatMonogram(String bestValue, FormatParameters nameFormatParameters)730 public String formatMonogram(String bestValue, FormatParameters nameFormatParameters) { 731 // It is probably unusual to have multiple name fields, so this could be optimized for 732 // the simpler case. 733 734 // For the case of monograms, don't use the initialFormatter or initialSequenceFormatter 735 // And just take the first grapheme. 736 737 // special case for Survey Tool ExampleGenerator 738 739 if (bestValue.startsWith(ExampleGenerator.backgroundStartSymbol) 740 && bestValue.endsWith(ExampleGenerator.backgroundEndSymbol)) { 741 bestValue = bestValue.substring(1, bestValue.length() - 1); 742 return ExampleGenerator.backgroundStartSymbol 743 + getFirstGrapheme(bestValue) 744 + ExampleGenerator.backgroundEndSymbol; 745 } 746 747 return getFirstGrapheme(bestValue); 748 } 749 getFirstGrapheme(String bestValue)750 private String getFirstGrapheme(String bestValue) { 751 characterBreakIterator.setText(bestValue); 752 bestValue = bestValue.substring(0, characterBreakIterator.next()); 753 return bestValue; 754 } 755 formatAllCaps(String bestValue)756 public String formatAllCaps(String bestValue) { 757 return UCharacter.toUpperCase(formatterLocale, bestValue); 758 } 759 760 /** 761 * Apply other modifications. Currently just the surname capitalization, but can be extended 762 * in the future. 763 * 764 * @param modifiedField 765 */ tweak( ModifiedField modifiedField, String bestValue, FormatParameters nameFormatParameters)766 public String tweak( 767 ModifiedField modifiedField, 768 String bestValue, 769 FormatParameters nameFormatParameters) { 770 if (uppercaseSurnameIfSurnameFirst 771 && nameFormatParameters.matchesOrder(Order.surnameFirst) 772 && (modifiedField.getField() == Field.surname 773 || modifiedField.getField() == Field.surname2)) { 774 bestValue = UCharacter.toUpperCase(formatterLocale, bestValue); 775 } 776 return bestValue; 777 } 778 } 779 780 /** 781 * A name pattern, corresponding to a string such as "{given-initial} {surname}" Immutable NOTE: 782 * CLDR needs to be able to create from data. 783 * 784 * @internal 785 */ 786 public static class NamePattern implements Comparable<NamePattern> { 787 private final int rank; 788 private final ImmutableList<NamePatternElement> elements; 789 private final ImmutableSet<Field> fields; 790 getFields()791 public Set<Field> getFields() { 792 return fields; 793 } 794 getFieldsSize()795 public int getFieldsSize() { 796 return fields.size(); 797 } 798 799 /** 800 * Return the rank order (0, 1, ...) in a list 801 * 802 * @return 803 */ getRank()804 public int getRank() { 805 return rank; 806 } 807 format( NameObject nameObject, FormatParameters nameFormatParameters, FallbackFormatter fallbackInfo)808 public String format( 809 NameObject nameObject, 810 FormatParameters nameFormatParameters, 811 FallbackFormatter fallbackInfo) { 812 StringBuilder result = new StringBuilder(); 813 /* 814 * We have a series of literals and placeholders. 815 * • The literals are never "", 816 * • while the placeholders may have a value or may be missing (null). 817 * If we have a missing placeholder at the start, we discard the literal (if any) before it. 818 * 1. Effectively, that means that we don't append it to result until we've seen the following field. 819 * If we have a missing placeholder at the end, we discard a literal (if any) after it. 820 * 2. Effectively, that means that we don't append it to result if the last placeholder was missing. 821 * If we have adjacent missing placeholders, then we discard the literal between them. 822 * 823 * We also coalesce literals A and B. This can only happen if we had one or more empty placeholders: 824 * 3. If A.endsWith(B) then we discard it. 825 * 4. Any sequence of multiple whitespace is reduced to the first. 826 * The following booleans represent the state we are in: 827 */ 828 boolean seenLeadingField = 829 false; // set to true with not missing (so we have at least 1 non-missing field) 830 boolean seenEmptyLeadingField = 831 false; // set to false with not missing; set to true with missing and 832 // !seenLeadingField 833 boolean seenEmptyField = 834 false; // set to false with seenEmptyField & not missing; set to true with 835 // missing & seenLeadingField 836 837 StringBuilder literalTextBefore = 838 new StringBuilder(); // literal right after a non-missing placeholder 839 StringBuilder literalTextAfter = 840 new StringBuilder(); // literal right after a missing placeholder 841 842 // Check that we either have a given value in the pattern or a surname value in the name 843 // object 844 if (!nameObject.getAvailableFields().contains(Field.surname) && !hasNonInitialGiven()) { 845 nameObject = new GivenToSurnameNameObject(nameObject); 846 } 847 for (NamePatternElement element : elements) { 848 final String literal = element.getLiteral(); 849 if (literal != null) { 850 if (seenEmptyLeadingField) { 851 // do nothing; throw away the literal text 852 } else if (seenEmptyField) { 853 literalTextAfter.append(literal); 854 } else { 855 literalTextBefore.append(literal); 856 } 857 } else { 858 String bestValue = 859 getBestValueForNameObject( 860 nameObject, element, nameFormatParameters, fallbackInfo); 861 if (bestValue == null) { 862 if (!seenLeadingField) { 863 seenEmptyLeadingField = true; 864 literalTextBefore.setLength(0); 865 } else { 866 seenEmptyField = true; 867 literalTextAfter.setLength(0); 868 } 869 } else { 870 seenLeadingField = true; 871 seenEmptyLeadingField = false; 872 if (seenEmptyField) { 873 result.append( 874 coalesceLiterals( 875 literalTextBefore, literalTextAfter)); // also clears 876 // literalTextBefore&After 877 result.append(bestValue); 878 seenEmptyField = false; 879 } else { 880 // discard literalTextAfter 881 result.append(literalTextBefore); 882 literalTextBefore.setLength(0); 883 result.append(bestValue); 884 } 885 } 886 } 887 } 888 if (!seenEmptyField) { 889 result.append(literalTextBefore); 890 } 891 if (!fallbackInfo.foreignSpaceReplacement.equals(" ") 892 || !fallbackInfo.nativeSpaceReplacement.equals(" ")) { 893 ULocale nameLocale = nameObject.getNameLocale(); 894 if (!fallbackInfo.sharesLanguageScript(nameLocale)) { 895 return SPACES.matcher(result).replaceAll(fallbackInfo.foreignSpaceReplacement); 896 } else { 897 return SPACES.matcher(result).replaceAll(fallbackInfo.nativeSpaceReplacement); 898 // TODO add this to the spec 899 } 900 } 901 return result.toString(); 902 } 903 904 static final ImmutableSet<Modifier> INITIALS = 905 ImmutableSet.of(Modifier.initialCap, Modifier.initial); 906 hasNonInitialGiven()907 public boolean hasNonInitialGiven() { 908 if (!getFields().contains(Field.given)) { 909 return false; 910 } 911 for (int index : getFieldPositions().get(Field.given)) { 912 ModifiedField modifiedField = getModifiedField(index); 913 if (Collections.disjoint(modifiedField.getModifiers(), INITIALS)) { 914 return true; // there is a given, and it doesn't have an initial modifier. 915 } 916 } 917 return false; 918 } 919 920 static final Pattern SPACES = Pattern.compile("\\s+"); // TODO pick whitespace 921 getBestValueForNameObject( NameObject nameObject, NamePatternElement element, FormatParameters nameFormatParameters, FallbackFormatter fallbackInfo)922 private String getBestValueForNameObject( 923 NameObject nameObject, 924 NamePatternElement element, 925 FormatParameters nameFormatParameters, 926 FallbackFormatter fallbackInfo) { 927 Set<Modifier> remainingModifers = EnumSet.noneOf(Modifier.class); 928 final ModifiedField modifiedField = element.getModifiedField(); 929 String bestValue = nameObject.getBestValue(modifiedField, remainingModifers); 930 if (bestValue == null) { 931 return null; 932 } 933 if (!remainingModifers.isEmpty()) { 934 bestValue = 935 fallbackInfo.applyModifierFallbacks( 936 nameFormatParameters, remainingModifers, bestValue); 937 } 938 return fallbackInfo.tweak(modifiedField, bestValue, nameFormatParameters); 939 } 940 coalesceLiterals(StringBuilder l1, StringBuilder l2)941 private String coalesceLiterals(StringBuilder l1, StringBuilder l2) { 942 if (endsWith(l1, l2)) { 943 l2.setLength(0); 944 } 945 // get the range of nonwhitespace characters at the beginning of l1 946 int p1 = 0; 947 while (p1 < l1.length() && !Character.isWhitespace(l1.charAt(p1))) { 948 ++p1; 949 } 950 951 // get the range of nonwhitespace characters at the end of l2 952 int p2 = l2.length() - 1; 953 while (p2 >= 0 && !Character.isWhitespace(l2.charAt(p2))) { 954 --p2; 955 } 956 957 // also include one whitespace character from l1 or, if there aren't 958 // any, one whitespace character from l2 959 if (p1 < l1.length()) { 960 ++p1; 961 } else if (p2 >= 0) { 962 --p2; 963 } 964 965 // concatenate those two ranges to get the coalesced literal text 966 String result = l1.substring(0, p1) + l2.substring(p2 + 1); 967 968 // clear out l1 and l2 (done here to improve readability in format() above)) 969 l1.setLength(0); 970 l2.setLength(0); 971 972 return result; 973 } 974 endsWith(StringBuilder l1, StringBuilder l2)975 private boolean endsWith(StringBuilder l1, StringBuilder l2) { 976 final int l2Length = l2.length(); 977 final int delta = l1.length() - l2Length; 978 if (delta < 0) { 979 return false; 980 } 981 for (int i = 0; i < l2Length; ++i) { 982 // don't have to worry about unpaired surrogates. 983 if (l1.charAt(i + delta) != l2.charAt(i)) { 984 return false; 985 } 986 } 987 return true; 988 } 989 NamePattern(int rank, List<NamePatternElement> elements)990 public NamePattern(int rank, List<NamePatternElement> elements) { 991 this.rank = rank; 992 this.elements = ImmutableList.copyOf(elements); 993 Set<Field> result = EnumSet.noneOf(Field.class); 994 for (NamePatternElement element : elements) { 995 ModifiedField modifiedField = element.getModifiedField(); 996 if (modifiedField != null) { 997 result.add(modifiedField.getField()); 998 } 999 } 1000 this.fields = ImmutableSet.copyOf(result); 1001 } 1002 1003 /** convenience method for testing */ from(int rank, Object... elements)1004 public static NamePattern from(int rank, Object... elements) { 1005 return new NamePattern(rank, makeList(elements)); 1006 } 1007 1008 /** convenience method for testing */ from(int rank, String patternString)1009 public static NamePattern from(int rank, String patternString) { 1010 return new NamePattern(rank, parse(patternString)); 1011 } 1012 1013 private static final Set<Character> ALLOWED_ESCAPED_CHARACTERS = 1014 new HashSet<>(Arrays.asList('\\', '{', '}')); 1015 parse(String patternString)1016 private static List<NamePatternElement> parse(String patternString) { 1017 List<NamePatternElement> result = new ArrayList<>(); 1018 1019 String rawValue = ""; 1020 Boolean curlyStarted = false; 1021 final int patternLength = patternString.length(); 1022 int i = 0; 1023 while (i < patternLength) { 1024 final Character currentCharacter = 1025 patternString.charAt(i); // this is safe, since syntax is ASCII 1026 1027 switch (currentCharacter) { 1028 case '\\': 1029 if (i + 1 < patternLength) { 1030 final Character nextCharacter = patternString.charAt(i + 1); 1031 if (!ALLOWED_ESCAPED_CHARACTERS.contains(nextCharacter)) { 1032 throwParseError( 1033 String.format( 1034 "Escaping character '%c' is not supported", 1035 nextCharacter), 1036 patternString, 1037 i); 1038 } 1039 1040 rawValue += nextCharacter; 1041 i += 2; 1042 continue; 1043 } else { 1044 throwParseError("Invalid character: ", patternString, i); 1045 } 1046 1047 case '{': 1048 if (curlyStarted) { 1049 throwParseError("Unexpected {: ", patternString, i); 1050 } 1051 curlyStarted = true; 1052 if (!rawValue.isEmpty()) { 1053 result.add(new NamePatternElement(rawValue)); 1054 rawValue = ""; 1055 } 1056 break; 1057 1058 case '}': 1059 if (!curlyStarted) { 1060 throwParseError("Unexpected }", patternString, i); 1061 } 1062 curlyStarted = false; 1063 if (rawValue.isEmpty()) { 1064 throwParseError("Empty field '{}' is not allowed ", patternString, i); 1065 } else { 1066 try { 1067 result.add(new NamePatternElement(ModifiedField.from(rawValue))); 1068 } catch (Exception e) { 1069 throwParseError("Invalid field: ", rawValue, 0); 1070 } 1071 rawValue = ""; 1072 } 1073 break; 1074 1075 default: 1076 rawValue += currentCharacter; 1077 break; 1078 } 1079 1080 i++; 1081 } 1082 1083 if (curlyStarted) { 1084 throwParseError("Unmatched {", patternString, patternString.length()); 1085 } 1086 if (!rawValue.isEmpty()) { 1087 result.add(new NamePatternElement(rawValue)); 1088 } 1089 1090 return result; 1091 } 1092 1093 private static String BAD_POSITION = "❌"; 1094 throwParseError(String message, String patternString, int i)1095 private static void throwParseError(String message, String patternString, int i) { 1096 throw new IllegalArgumentException( 1097 message 1098 + ": " 1099 + "«" 1100 + patternString.substring(0, i) 1101 + BAD_POSITION 1102 + patternString.substring(i) 1103 + "»"); 1104 } 1105 makeList(Object... elements2)1106 private static List<NamePatternElement> makeList(Object... elements2) { 1107 List<NamePatternElement> result = new ArrayList<>(); 1108 for (Object element : elements2) { 1109 result.add(NamePatternElement.from(element)); 1110 } 1111 return result; 1112 } 1113 1114 @Override toString()1115 public String toString() { 1116 StringBuilder result = new StringBuilder("\""); 1117 for (NamePatternElement element : elements) { 1118 if (element.literal != null) { 1119 for (final Character c : element.literal.toCharArray()) { 1120 if (ALLOWED_ESCAPED_CHARACTERS.contains(c)) { 1121 result.append('\\'); 1122 } 1123 result.append(c); 1124 } 1125 } else { 1126 result.append('{').append(element).append('}'); 1127 } 1128 } 1129 return result.append("\"").toString(); 1130 } 1131 1132 public static final Comparator<Iterable<NamePattern>> ITERABLE_COMPARE = 1133 Comparators.lexicographical(Comparator.<NamePattern>naturalOrder()); 1134 1135 @Override 1136 /** Compares first by fields, then by the string value (later case would be unusual) */ compareTo(NamePattern o)1137 public int compareTo(NamePattern o) { 1138 return ComparisonChain.start() 1139 .compare(rank, o.rank) 1140 .compare(fields, o.fields, Field.ITERABLE_COMPARE) 1141 .compare(elements, o.elements, NamePatternElement.ITERABLE_COMPARE) 1142 .result(); 1143 } 1144 1145 @Override equals(Object obj)1146 public boolean equals(Object obj) { 1147 return compareTo((NamePattern) obj) == 0; // no need to optimize 1148 } 1149 1150 @Override hashCode()1151 public int hashCode() { 1152 return Objects.hash(rank, fields, elements); 1153 } 1154 1155 /** 1156 * Utility for testing validity 1157 * 1158 * @return 1159 */ getFieldPositions()1160 public Multimap<Field, Integer> getFieldPositions() { 1161 Multimap<Field, Integer> result = TreeMultimap.create(); 1162 int i = -1; 1163 for (NamePatternElement element : elements) { 1164 ++i; 1165 if (element.literal == null) { 1166 result.put(element.modifiedField.field, i); 1167 } 1168 } 1169 return result; 1170 } 1171 1172 /** Get the number of elements (literals and modified fields) in the pattern. */ getElementCount()1173 public int getElementCount() { 1174 return elements.size(); 1175 } 1176 1177 /** Get the nth literal (or null if the nth element is a field) */ getLiteral(int index)1178 public String getLiteral(int index) { 1179 return elements.get(index).literal; 1180 } 1181 1182 /** Get the nth modified field (or null if the nth element is a literal) */ getModifiedField(int index)1183 public ModifiedField getModifiedField(int index) { 1184 return elements.get(index).modifiedField; 1185 } 1186 1187 /** 1188 * @internal 1189 */ firstLiteralContaining(String item)1190 public String firstLiteralContaining(String item) { 1191 for (NamePatternElement element : elements) { 1192 final String literal = element.literal; 1193 if (literal != null && literal.contains(item)) { 1194 return literal; 1195 } 1196 } 1197 return null; 1198 } 1199 1200 /** 1201 * Returns a list of (field, literal, field) that are inconsistent with the initialSeparator 1202 * (derived from the initialPattern) 1203 */ findInitialFailures(String _initialSeparator)1204 public ArrayList<List<String>> findInitialFailures(String _initialSeparator) { 1205 ArrayList<List<String>> failures; 1206 String initialSeparator = finalWhitespace(_initialSeparator); 1207 1208 // check that the literal between initial fields matches the initial pattern 1209 ModifiedField lastField = null; 1210 boolean lastFieldInitial = false; 1211 String lastLiteral = ""; 1212 failures = new ArrayList<>(); 1213 for (int i = 0; i < getElementCount(); ++i) { 1214 // we can have {field}<literal>{field} or {field}{field} 1215 ModifiedField field = getModifiedField(i); 1216 if (field == null) { 1217 lastLiteral = finalWhitespace(getLiteral(i)); 1218 } else { 1219 boolean currentFieldInitial = 1220 !Collections.disjoint(field.getModifiers(), Modifier.INITIALS); 1221 if (currentFieldInitial && lastFieldInitial) { 1222 if (!initialSeparator.equals(lastLiteral)) { 1223 failures.add( 1224 ImmutableList.of( 1225 lastField.toString(), lastLiteral, field.toString())); 1226 } 1227 } 1228 lastField = field; 1229 lastFieldInitial = currentFieldInitial; 1230 lastLiteral = ""; 1231 } 1232 } 1233 return failures; 1234 } 1235 1236 static final UnicodeSet WS = new UnicodeSet("\\p{whitespace}").freeze(); 1237 finalWhitespace(String string)1238 private String finalWhitespace(String string) { 1239 if (!string.isEmpty()) { 1240 int finalCp = string.codePointBefore(string.length()); 1241 if (WS.contains(finalCp)) { 1242 return UTF16.valueOf(finalCp); 1243 } 1244 } 1245 return ""; 1246 } 1247 } 1248 1249 /** 1250 * Input parameters, such as {length=long_name, formality=informal}. Unmentioned items are null, 1251 * and match any value. Passed in when formatting. 1252 */ 1253 public static class FormatParameters implements Comparable<FormatParameters> { 1254 private final Order order; 1255 private final Length length; 1256 private final Usage usage; 1257 private final Formality formality; 1258 1259 /** 1260 * Normally we don't often need to create one FormalParameters from another. The one 1261 * exception is the order, which comes from the NameObject. 1262 */ setOrder(Order order)1263 public FormatParameters setOrder(Order order) { 1264 return new FormatParameters(order, length, usage, formality); 1265 } 1266 1267 /** Get the order; null means "any order" */ getOrder()1268 public Order getOrder() { 1269 return order; 1270 } 1271 1272 /** Get the length; null means "any length" */ getLength()1273 public Length getLength() { 1274 return length; 1275 } 1276 1277 /** Get the usage; null means "any usage" */ getUsage()1278 public Usage getUsage() { 1279 return usage; 1280 } 1281 1282 /** Get the formality; null means "any formality" */ getFormality()1283 public Formality getFormality() { 1284 return formality; 1285 } 1286 matches(FormatParameters other)1287 public boolean matches(FormatParameters other) { 1288 return matchesOrder(other.order) 1289 && matchesLength(other.length) 1290 && matchesUsage(other.usage) 1291 && matchesFormality(other.formality); 1292 } 1293 1294 /** Utility methods for matching, taking into account that null matches anything */ matchesOrder(Order otherOrder)1295 public boolean matchesOrder(Order otherOrder) { 1296 return order == null || otherOrder == null || order == otherOrder; 1297 } 1298 matchesFormality(final Formality otherFormality)1299 public boolean matchesFormality(final Formality otherFormality) { 1300 return formality == null || otherFormality == null || formality == otherFormality; 1301 } 1302 matchesUsage(final Usage otherUsage)1303 public boolean matchesUsage(final Usage otherUsage) { 1304 return usage == null || otherUsage == null || usage == otherUsage; 1305 } 1306 matchesLength(final Length otherLength)1307 private boolean matchesLength(final Length otherLength) { 1308 return length == null || otherLength == null || length == otherLength; 1309 } 1310 FormatParameters(Order order, Length length, Usage usage, Formality formality)1311 public FormatParameters(Order order, Length length, Usage usage, Formality formality) { 1312 this.order = order; 1313 this.length = length; 1314 this.usage = usage; 1315 this.formality = formality; 1316 } 1317 1318 @Override toString()1319 public String toString() { 1320 List<String> items = new ArrayList<>(); 1321 if (order != null) { 1322 items.add("order='" + order + "'"); 1323 } 1324 if (length != null) { 1325 items.add("length='" + length + "'"); 1326 } 1327 if (usage != null) { 1328 items.add("usage='" + usage + "'"); 1329 } 1330 if (formality != null) { 1331 items.add("formality='" + formality + "'"); 1332 } 1333 return JOIN_SPACE.join(items); 1334 } 1335 abbreviated()1336 public String abbreviated() { 1337 List<String> items = new ArrayList<>(); 1338 if (order != null) { 1339 items.add(order.toString().substring(0, 3)); 1340 } 1341 if (length != null) { 1342 items.add(length.toString().substring(0, 3)); 1343 } 1344 if (usage != null) { 1345 items.add(usage.toString().substring(0, 3)); 1346 } 1347 if (formality != null) { 1348 items.add(formality.toString().substring(0, 3)); 1349 } 1350 return JOIN_DASH.join(items); 1351 } 1352 dashed()1353 public String dashed() { 1354 List<String> items = new ArrayList<>(); 1355 if (order != null) { 1356 items.add(order.toString()); 1357 } 1358 if (length != null) { 1359 items.add(length.toString()); 1360 } 1361 if (usage != null) { 1362 items.add(usage.toString()); 1363 } 1364 if (formality != null) { 1365 items.add(formality.toString()); 1366 } 1367 return JOIN_DASH.join(items); 1368 } 1369 from(String string)1370 public static FormatParameters from(String string) { 1371 Order order = null; 1372 Length length = null; 1373 Usage usage = null; 1374 Formality formality = null; 1375 for (String part : SPLIT_SEMI.split(string)) { 1376 if (part.isBlank()) { 1377 continue; 1378 } 1379 List<String> parts = SPLIT_EQUALS.splitToList(part); 1380 if (parts.size() != 2) { 1381 throw new IllegalArgumentException( 1382 "must be of form length=medium; formality=… : " + string); 1383 } 1384 final String key = parts.get(0); 1385 final String value = parts.get(1); 1386 switch (key) { 1387 case "order": 1388 order = Order.from(value); 1389 break; 1390 case "length": 1391 length = Length.from(value); 1392 break; 1393 case "usage": 1394 usage = Usage.from(value); 1395 break; 1396 case "formality": 1397 formality = Formality.from(value); 1398 break; 1399 default: 1400 throw new IllegalArgumentException( 1401 "Unknown key/value " + key + "=" + value + " in " + string); 1402 } 1403 } 1404 return new FormatParameters(order, length, usage, formality); 1405 } 1406 1407 // for thread-safe lazy evaluation 1408 private static class LazyEval { 1409 private static ImmutableSet<FormatParameters> DATA; 1410 private static ImmutableSet<FormatParameters> CLDR_DATA; 1411 1412 static { 1413 Set<FormatParameters> _data = new LinkedHashSet<>(); 1414 Set<FormatParameters> _cldrdata = new LinkedHashSet<>(); 1415 for (Order order : Order.values()) { 1416 for (Length length : Length.values()) { 1417 if (order == Order.sorting) { _cldrdata.add( new FormatParameters( order, length, Usage.referring, Formality.formal))1418 _cldrdata.add( 1419 new FormatParameters( 1420 order, length, Usage.referring, Formality.formal)); _cldrdata.add( new FormatParameters( order, length, Usage.referring, Formality.informal))1421 _cldrdata.add( 1422 new FormatParameters( 1423 order, length, Usage.referring, Formality.informal)); 1424 } 1425 for (Formality formality : Formality.values()) { 1426 for (Usage usage : Usage.values()) { _data.add(new FormatParameters(order, length, usage, formality))1427 _data.add(new FormatParameters(order, length, usage, formality)); 1428 if (order != Order.sorting) { _cldrdata.add( new FormatParameters(order, length, usage, formality))1429 _cldrdata.add( 1430 new FormatParameters(order, length, usage, formality)); 1431 } 1432 } 1433 } 1434 } 1435 } 1436 DATA = ImmutableSet.copyOf(_data); 1437 CLDR_DATA = ImmutableSet.copyOf(_cldrdata); 1438 } 1439 } 1440 1441 /** 1442 * Returns all possible combinations of fields. 1443 * 1444 * @return 1445 */ all()1446 public static ImmutableSet<FormatParameters> all() { 1447 return LazyEval.DATA; 1448 } 1449 1450 /** 1451 * Returns all possible combinations of fields supported by CLDR. (the order=sorting 1452 * combinations are abbreviated 1453 * 1454 * @return 1455 */ allCldr()1456 public static ImmutableSet<FormatParameters> allCldr() { 1457 return LazyEval.CLDR_DATA; 1458 } 1459 1460 @Override compareTo(FormatParameters other)1461 public int compareTo(FormatParameters other) { 1462 return ComparisonChain.start() 1463 .compare(order, other.order) 1464 .compare(length, other.length) 1465 .compare(usage, other.usage) 1466 .compare(formality, other.formality) 1467 .result(); 1468 } 1469 toLabel()1470 public String toLabel() { 1471 StringBuilder sb = new StringBuilder(); 1472 addToLabel(order, sb); 1473 addToLabel(length, sb); 1474 addToLabel(usage, sb); 1475 addToLabel(formality, sb); 1476 return sb.length() == 0 ? "any" : sb.toString(); 1477 } 1478 addToLabel(T item, StringBuilder sb)1479 private <T> void addToLabel(T item, StringBuilder sb) { 1480 if (item != null) { 1481 if (sb.length() != 0) { 1482 sb.append('-'); 1483 } 1484 sb.append(item.toString()); 1485 } 1486 } 1487 1488 /** 1489 * Only used to add missing CLDR fields. If an item is missing, get the best replacements. 1490 * 1491 * @return 1492 */ getFallbacks()1493 public Iterable<FormatParameters> getFallbacks() { 1494 return ImmutableList.of( 1495 new FormatParameters(order, length, null, formality), 1496 new FormatParameters(order, length, usage, null), 1497 new FormatParameters(order, length, null, null), 1498 new FormatParameters(order, null, null, null), 1499 new FormatParameters(null, null, null, null)); 1500 } 1501 1502 @Override equals(Object obj)1503 public boolean equals(Object obj) { 1504 FormatParameters that = (FormatParameters) obj; 1505 return Objects.equals(order, that.order) 1506 && Objects.equals(length, that.length) 1507 && Objects.equals(usage, that.usage) 1508 && Objects.equals(formality, that.formality); 1509 } 1510 1511 @Override hashCode()1512 public int hashCode() { 1513 return (length == null ? 0 : length.hashCode()) 1514 ^ (formality == null ? 0 : formality.hashCode()) 1515 ^ (usage == null ? 0 : usage.hashCode()) 1516 ^ (order == null ? 0 : order.hashCode()); 1517 } 1518 } 1519 1520 /** 1521 * Returns a match for the nameFormatParameters, or null if the parameterMatcherToNamePattern 1522 * has no match. 1523 */ getBestMatchSet( ListMultimap<FormatParameters, NamePattern> parameterMatcherToNamePattern, FormatParameters nameFormatParameters)1524 public static Collection<NamePattern> getBestMatchSet( 1525 ListMultimap<FormatParameters, NamePattern> parameterMatcherToNamePattern, 1526 FormatParameters nameFormatParameters) { 1527 for (Entry<FormatParameters, Collection<NamePattern>> parametersAndPatterns : 1528 parameterMatcherToNamePattern.asMap().entrySet()) { 1529 FormatParameters parameters = parametersAndPatterns.getKey(); 1530 if (parameters.matches(nameFormatParameters)) { 1531 return parametersAndPatterns.getValue(); 1532 } 1533 } 1534 return null; // This will only happen if the NamePatternData is incomplete 1535 } 1536 1537 /** 1538 * Data that maps from NameFormatParameters and a NameObject to the best NamePattern. It must be 1539 * complete: that is, it must match every possible value. Immutable 1540 * 1541 * @internal NOTE: CLDR needs access to this. 1542 */ 1543 public static class NamePatternData { 1544 private final ImmutableMap<ULocale, Order> localeToOrder; 1545 private final ImmutableListMultimap<FormatParameters, NamePattern> 1546 parameterMatcherToNamePattern; 1547 getBestMatch( NameObject nameObject, FormatParameters nameFormatParameters)1548 public NamePattern getBestMatch( 1549 NameObject nameObject, FormatParameters nameFormatParameters) { 1550 nameFormatParameters = deriveNameOrder(nameObject, nameFormatParameters); 1551 1552 NamePattern result = null; 1553 1554 Collection<NamePattern> namePatterns = 1555 getBestMatchSet(parameterMatcherToNamePattern, nameFormatParameters); 1556 if (namePatterns == null) { 1557 // Internal error, should never happen with valid data 1558 throw new IllegalArgumentException( 1559 "Can't find " 1560 + nameFormatParameters 1561 + " in " 1562 + parameterMatcherToNamePattern); 1563 } 1564 Set<Field> nameFields = nameObject.getAvailableFields(); 1565 int bestMatchSize = -1; 1566 1567 for (NamePattern pattern : namePatterns) { 1568 Set<Field> patternFields = pattern.getFields(); 1569 1570 int matchSize = getIntersectionSize(nameFields, patternFields); 1571 1572 if ((matchSize > bestMatchSize) /* better match */ 1573 || (matchSize == bestMatchSize 1574 && patternFields.size() 1575 < result 1576 .getFieldsSize()) /* equal match, but less "extra" fields */) { 1577 bestMatchSize = matchSize; 1578 result = pattern; 1579 } 1580 } 1581 1582 return result; 1583 } 1584 1585 /** 1586 * Follow the algorithm in tr35-personNames.md under ### Derive the name order 1587 * 1588 * @param nameObject 1589 * @param nameFormatParameters 1590 * @return nameFormatParameters, modified as necessary 1591 */ deriveNameOrder( NameObject nameObject, FormatParameters nameFormatParameters)1592 public FormatParameters deriveNameOrder( 1593 NameObject nameObject, FormatParameters nameFormatParameters) { 1594 if (nameFormatParameters.order != null) { 1595 return nameFormatParameters; 1596 } else { 1597 // Use CLDRLocale for getParent because we may update the getParent relation before 1598 // ICU has a chance to. 1599 Order mappedOrder = null; 1600 LanguageTagParser ltp = new LanguageTagParser(); 1601 CLDRLocale L1 = CLDRLocale.getInstance(nameObject.getNameLocale()); 1602 1603 while (true) { 1604 CLDRLocale L2 = 1605 CLDRLocale.getInstance( 1606 ltp.set(L1.toString()) 1607 // should be able to set an ltp from a CLDRLocale 1608 .setLanguage("und") 1609 .toString()); // should be able to create a CLDRLocale 1610 // from an ltp 1611 for (CLDRLocale L : Arrays.asList(L1, L2)) { 1612 // localeToOrder maps locales to orders, so is the equivalent of looking up 1613 // first in one 1614 // then in the other. Since the same string can't be in both, the order 1615 // actually doesn't matter. 1616 localeToOrder.get(new ULocale(L.toString())); 1617 if (mappedOrder != null) { 1618 break; 1619 } 1620 } 1621 1622 L1 = L1.getParent(); 1623 if (L1 == null) { 1624 mappedOrder = Order.givenFirst; 1625 break; 1626 } 1627 } 1628 return nameFormatParameters.setOrder(mappedOrder); 1629 } 1630 } 1631 1632 /** 1633 * Build the name pattern data. In the formatParametersToNamePattern: 1634 * 1635 * <ul> 1636 * <li>Every possible FormatParameters value must match at least one FormatParameters 1637 * <li>No FormatParameters is superfluous; the ones before it must not mask it. 1638 * </ul> 1639 * 1640 * The multimap values must retain the order they are built with! 1641 */ NamePatternData( ImmutableMap<ULocale, Order> localeToOrder, ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern)1642 public NamePatternData( 1643 ImmutableMap<ULocale, Order> localeToOrder, 1644 ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern) { 1645 1646 if (formatParametersToNamePattern == null || formatParametersToNamePattern.isEmpty()) { 1647 throw new IllegalArgumentException( 1648 "formatParametersToNamePattern must be non-null, non-empty"); 1649 } 1650 1651 this.localeToOrder = localeToOrder == null ? ImmutableMap.of() : localeToOrder; 1652 1653 FormatParameters lastKey = null; 1654 Set<FormatParameters> remaining = new LinkedHashSet<>(FormatParameters.all()); 1655 1656 // check that parameters are complete, and that nothing is masked by anything previous 1657 1658 for (Entry<FormatParameters, Collection<NamePattern>> entry : 1659 formatParametersToNamePattern.asMap().entrySet()) { 1660 FormatParameters key = entry.getKey(); 1661 Collection<NamePattern> values = entry.getValue(); 1662 1663 // TODO Mark No FormatParameters should be completely masked by any previous ones 1664 1665 // The following code starts with a list of all the items, and removes any that 1666 // match 1667 int matchCount = 0; 1668 for (Iterator<FormatParameters> rest = remaining.iterator(); rest.hasNext(); ) { 1669 FormatParameters item = rest.next(); 1670 if (key.matches(item)) { 1671 rest.remove(); 1672 ++matchCount; 1673 if (DEBUG) { 1674 System.out.println(" * " + item + " matches " + key); 1675 } 1676 } 1677 } 1678 if (matchCount == 0) { 1679 key.equals(lastKey); 1680 throw new IllegalArgumentException( 1681 "key is masked by previous values: " 1682 + key 1683 + ",\n\t" 1684 + JOIN_LFTB.join(formatParametersToNamePattern.entries())); 1685 } 1686 1687 // Each entry in FormatParameters must have at least one NamePattern 1688 if (values.isEmpty()) { 1689 throw new IllegalArgumentException("key has no values: " + key); 1690 } 1691 lastKey = key; 1692 } 1693 if (!remaining.isEmpty()) { 1694 throw new IllegalArgumentException( 1695 "values are not complete; they don't match:\n\t" 1696 + JOIN_LFTB.join(remaining)); 1697 } 1698 this.parameterMatcherToNamePattern = 1699 ImmutableListMultimap.copyOf(formatParametersToNamePattern); 1700 } 1701 getLocaleToOrder()1702 public Map<ULocale, Order> getLocaleToOrder() { 1703 return localeToOrder; 1704 } 1705 1706 /** Build from strings for ease of testing */ NamePatternData( ImmutableMap<ULocale, Order> localeToOrder, String... formatParametersToNamePatterns)1707 public NamePatternData( 1708 ImmutableMap<ULocale, Order> localeToOrder, 1709 String... formatParametersToNamePatterns) { 1710 this( 1711 localeToOrder, 1712 parseFormatParametersToNamePatterns(formatParametersToNamePatterns)); 1713 } 1714 1715 private static ListMultimap<FormatParameters, NamePattern> parseFormatParametersToNamePatterns(String... formatParametersToNamePatterns)1716 parseFormatParametersToNamePatterns(String... formatParametersToNamePatterns) { 1717 int count = formatParametersToNamePatterns.length; 1718 if ((count % 2) != 0) { 1719 throw new IllegalArgumentException( 1720 "Must have even number of strings, fields => pattern: " 1721 + Arrays.asList(formatParametersToNamePatterns)); 1722 } 1723 ListMultimap<FormatParameters, NamePattern> _formatParametersToNamePatterns = 1724 LinkedListMultimap.create(); 1725 int rank = 0; 1726 for (int i = 0; i < count; i += 2) { 1727 FormatParameters pm = FormatParameters.from(formatParametersToNamePatterns[i]); 1728 NamePattern np = NamePattern.from(rank++, formatParametersToNamePatterns[i + 1]); 1729 _formatParametersToNamePatterns.put(pm, np); 1730 } 1731 addMissing(_formatParametersToNamePatterns); 1732 1733 return _formatParametersToNamePatterns; 1734 } 1735 1736 @Override toString()1737 public String toString() { 1738 return "{" 1739 + (localeToOrder.isEmpty() ? "" : "localeToOrder=" + localeToOrder + "\n\t\t") 1740 + show(parameterMatcherToNamePattern) 1741 + "}"; 1742 } 1743 show(ImmutableListMultimap<FormatParameters, NamePattern> multimap)1744 private String show(ImmutableListMultimap<FormatParameters, NamePattern> multimap) { 1745 String result = multimap.asMap().toString(); 1746 return result.replace("], ", "],\n\t\t\t"); // for readability 1747 } 1748 1749 /** 1750 * For testing 1751 * 1752 * @internal 1753 */ getMatcherToPatterns()1754 public ImmutableListMultimap<FormatParameters, NamePattern> getMatcherToPatterns() { 1755 return parameterMatcherToNamePattern; 1756 } 1757 } 1758 1759 /** 1760 * Interface used by the person name formatter to access name field values. It provides access 1761 * not only to values for modified fields directly supported by the NameObject, but also to 1762 * values that may be produced or modified by the Name Object. 1763 */ 1764 public static interface NameObject { 1765 /** 1766 * Returns the locale of the name, or null if not available. NOTE: this is not the same as 1767 * the locale of the person name formatter. 1768 */ getNameLocale()1769 public ULocale getNameLocale(); 1770 /** Returns a mapping for the modified fields directly supported to their values. */ getModifiedFieldToValue()1771 public ImmutableMap<ModifiedField, String> getModifiedFieldToValue(); 1772 /** 1773 * Returns the set of fields directly supported. Should be overridden for speed. It returns 1774 * the same value as getModifiedFieldToValue().keySet().stream().map(x -> 1775 * x.field).collect(Collectors.toSet()), but may be optimized. 1776 */ getAvailableFields()1777 public Set<Field> getAvailableFields(); 1778 /** 1779 * Returns the best available value for the modified field, or null if nothing is available. 1780 * Null is returned in all and only those cases where 1781 * !getAvailableFields().contains(modifiedField.field) 1782 * 1783 * @param modifiedField the input modified field, for which the best value is fetched. 1784 * @param remainingModifers contains the set of modifiers that were not handled by this 1785 * method. The calling code may apply fallback algorithms based on these values. 1786 * @return 1787 */ getBestValue(ModifiedField modifiedField, Set<Modifier> remainingModifers)1788 public String getBestValue(ModifiedField modifiedField, Set<Modifier> remainingModifers); 1789 } 1790 1791 /** 1792 * Specialized NameObject that returns the given value instead of the surname value. Only used 1793 * for monograms. 1794 */ 1795 public static class GivenToSurnameNameObject implements NameObject { 1796 private final NameObject nameObject; 1797 GivenToSurnameNameObject(NameObject nameObject)1798 public GivenToSurnameNameObject(NameObject nameObject) { 1799 this.nameObject = nameObject; 1800 } 1801 1802 @Override getNameLocale()1803 public ULocale getNameLocale() { 1804 return nameObject.getNameLocale(); 1805 } 1806 1807 @Override getModifiedFieldToValue()1808 public ImmutableMap<ModifiedField, String> getModifiedFieldToValue() { 1809 throw new UnsupportedOperationException(); 1810 } 1811 1812 @Override getAvailableFields()1813 public Set<Field> getAvailableFields() { 1814 Set<Field> temp = EnumSet.copyOf(nameObject.getAvailableFields()); 1815 temp.add(Field.surname); 1816 return temp; 1817 } 1818 1819 @Override getBestValue(ModifiedField modifiedField, Set<Modifier> remainingModifers)1820 public String getBestValue(ModifiedField modifiedField, Set<Modifier> remainingModifers) { 1821 switch (modifiedField.getField()) { 1822 case surname: 1823 modifiedField = new ModifiedField(Field.given, modifiedField.getModifiers()); 1824 break; 1825 case given: 1826 return null; 1827 } 1828 return nameObject.getBestValue(modifiedField, remainingModifers); 1829 } 1830 } 1831 1832 /** Transforms the fields based upon a supplied function. */ 1833 public static class TransformingNameObject implements NameObject { 1834 NameObject other; 1835 Function<String, String> stringTransform; 1836 TransformingNameObject(NameObject other, Function<String, String> stringTransform)1837 public TransformingNameObject(NameObject other, Function<String, String> stringTransform) { 1838 this.other = other; 1839 this.stringTransform = stringTransform; 1840 } 1841 TransformingNameObject(NameObject other, Transliterator t)1842 public TransformingNameObject(NameObject other, Transliterator t) { 1843 this(other, x -> t.transform(x)); 1844 } 1845 1846 @Override getNameLocale()1847 public ULocale getNameLocale() { 1848 return other.getNameLocale(); 1849 } 1850 1851 @Override getModifiedFieldToValue()1852 public ImmutableMap<ModifiedField, String> getModifiedFieldToValue() { 1853 throw new IllegalArgumentException("Not needed"); 1854 } 1855 1856 @Override getAvailableFields()1857 public Set<Field> getAvailableFields() { 1858 return other.getAvailableFields(); 1859 } 1860 1861 @Override getBestValue(ModifiedField modifiedField, Set<Modifier> remainingModifers)1862 public String getBestValue(ModifiedField modifiedField, Set<Modifier> remainingModifers) { 1863 String best = other.getBestValue(modifiedField, remainingModifers); 1864 return best == null ? null : stringTransform.apply(best); 1865 } 1866 } 1867 1868 private final NamePatternData namePatternMap; 1869 private final FallbackFormatter fallbackFormatter; 1870 1871 @Override toString()1872 public String toString() { 1873 return namePatternMap.toString(); 1874 } 1875 1876 /** 1877 * @internal 1878 */ getNamePatternData()1879 public final NamePatternData getNamePatternData() { 1880 return namePatternMap; 1881 } 1882 1883 /** 1884 * Create a formatter directly from data. NOTE CLDR will need to have access to this creation 1885 * method. 1886 * 1887 * @internal 1888 */ PersonNameFormatter( NamePatternData namePatternMap, FallbackFormatter fallbackFormatter)1889 public PersonNameFormatter( 1890 NamePatternData namePatternMap, FallbackFormatter fallbackFormatter) { 1891 this.namePatternMap = namePatternMap; 1892 this.fallbackFormatter = fallbackFormatter; 1893 } 1894 1895 /** 1896 * Create a formatter from a CLDR file. 1897 * 1898 * @internal 1899 */ PersonNameFormatter(CLDRFile cldrFile)1900 public PersonNameFormatter(CLDRFile cldrFile) { 1901 ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern = 1902 LinkedListMultimap.create(); 1903 Set<Pair<FormatParameters, NamePattern>> ordered = new TreeSet<>(); 1904 String initialPattern = null; 1905 String initialSequencePattern = null; 1906 String foreignSpaceReplacement = " "; 1907 String nativeSpaceReplacement = " "; 1908 Map<String, Enum> parameterDefaults = new TreeMap<>(); 1909 Map<ULocale, Order> _localeToOrder = new TreeMap<>(); 1910 1911 // read out the data and order it properly 1912 for (String path : cldrFile) { 1913 if (path.startsWith("//ldml/personNames") && !path.endsWith("/alias")) { 1914 String value = cldrFile.getStringValue(path); 1915 // System.out.println(path + ",\t" + value); 1916 XPathParts parts = XPathParts.getFrozenInstance(path); 1917 switch (parts.getElement(2)) { 1918 case "personName": 1919 Pair<FormatParameters, NamePattern> pair = fromPathValue(parts, value); 1920 boolean added = ordered.add(pair); 1921 if (!added) { 1922 throw new IllegalArgumentException("Duplicate path/value " + pair); 1923 } 1924 break; 1925 case "initialPattern": 1926 // ldml/personNames/initialPattern[@type="initial"] 1927 String type = parts.getAttributeValue(-1, "type"); 1928 switch (type) { 1929 case "initial": 1930 initialPattern = value; 1931 break; 1932 case "initialSequence": 1933 initialSequencePattern = value; 1934 break; 1935 default: 1936 throw new IllegalArgumentException("Unexpected path: " + path); 1937 } 1938 break; 1939 case "nameOrderLocales": 1940 // ldml/personNames/nameOrderLocales[@order="givenFirst"], value = list of 1941 // locales 1942 for (String locale : SPLIT_SPACE.split(value)) { 1943 Order order = Order.from(parts.getAttributeValue(-1, "order")); 1944 _localeToOrder.put(new ULocale(locale), order); 1945 } 1946 break; 1947 case "foreignSpaceReplacement": 1948 foreignSpaceReplacement = value; 1949 break; 1950 case "nativeSpaceReplacement": 1951 nativeSpaceReplacement = value; 1952 break; 1953 case "sampleName": 1954 // skip 1955 break; 1956 case "parameterDefault": 1957 final String setting = parts.getAttributeValue(-1, "parameter"); 1958 Enum parameterDefault = null; 1959 switch (setting) { 1960 case "length": 1961 parameterDefault = Length.from(value); 1962 break; 1963 case "formality": 1964 parameterDefault = Formality.from(value); 1965 break; 1966 } 1967 parameterDefaults.put(setting, parameterDefault); 1968 break; 1969 default: 1970 throw new IllegalArgumentException("Unexpected path: " + path); 1971 } 1972 } 1973 } 1974 for (Pair<FormatParameters, NamePattern> entry : ordered) { 1975 formatParametersToNamePattern.put(entry.getFirst(), entry.getSecond()); 1976 } 1977 addMissing(formatParametersToNamePattern); 1978 1979 ImmutableMap<ULocale, Order> localeToOrder = ImmutableMap.copyOf(_localeToOrder); 1980 this.namePatternMap = new NamePatternData(localeToOrder, formatParametersToNamePattern); 1981 this.fallbackFormatter = 1982 new FallbackFormatter( 1983 new ULocale(cldrFile.getLocaleID()), 1984 initialPattern, 1985 initialSequencePattern, 1986 foreignSpaceReplacement, 1987 nativeSpaceReplacement, 1988 parameterDefaults, 1989 false); 1990 } 1991 1992 /** 1993 * Add items that are not in the pattern, using the fallbacks. TODO: can generalize; if we have 1994 * order=x ... formality=y, and a later value that matches except with formality=null, and 1995 * nothing in between matches, can drop the first 1996 */ addMissing( ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern)1997 private static void addMissing( 1998 ListMultimap<FormatParameters, NamePattern> formatParametersToNamePattern) { 1999 for (FormatParameters formatParameters : FormatParameters.all()) { 2000 Collection<NamePattern> namePatterns = 2001 getBestMatchSet(formatParametersToNamePattern, formatParameters); 2002 if (namePatterns == null) { 2003 for (FormatParameters fallback : formatParameters.getFallbacks()) { 2004 namePatterns = getBestMatchSet(formatParametersToNamePattern, fallback); 2005 if (namePatterns != null) { 2006 formatParametersToNamePattern.putAll(fallback, namePatterns); 2007 break; 2008 } 2009 } 2010 if (namePatterns == null) { 2011 throw new IllegalArgumentException("Missing fallback for " + formatParameters); 2012 } 2013 } 2014 } 2015 } 2016 2017 /** 2018 * Main function for formatting names. 2019 * 2020 * @param nameObject — A name object, which supplies data. 2021 * @param nameFormatParameters - The specification of which parameters are desired. 2022 * @return formatted string TODO make most public methods be @internal (public but just for 2023 * testing). The NameObject and FormatParameters are exceptions. TODO decide how to allow 2024 * clients to customize data in the name object. Options: a. Leave it to implementers (eg 2025 * they can write a FilteredNameObject that changes some fields). b. Pass in explicit 2026 * override parameters, like whether to uppercase the surname in surnameFirst. TODO decide 2027 * whether/how to allow clients to customize the built-in data (namePatternData, 2028 * fallbackFormatter) a. CLDR will need to be be able to customize it completely. b. Clients 2029 * may want to set the contextual uppercasing of surnames, the handling of which locales 2030 * cause surnameFirst, etc. 2031 */ format(NameObject nameObject, FormatParameters nameFormatParameters)2032 public String format(NameObject nameObject, FormatParameters nameFormatParameters) { 2033 // look through the namePatternMap to find the best match for the set of modifiers and the 2034 // available nameObject fields 2035 NamePattern bestPattern = namePatternMap.getBestMatch(nameObject, nameFormatParameters); 2036 // then format using it 2037 return bestPattern.format(nameObject, nameFormatParameters, fallbackFormatter); 2038 } 2039 formatWithoutSuperscripts( NameObject nameObject, FormatParameters nameFormatParameters)2040 public String formatWithoutSuperscripts( 2041 NameObject nameObject, FormatParameters nameFormatParameters) { 2042 return format(nameObject, nameFormatParameters) 2043 .replace("ᵛ", "") // remove two special CLDR ST hacks 2044 .replace("ᵍ", ""); 2045 } 2046 2047 /** 2048 * For testing 2049 * 2050 * @internal 2051 */ getBestMatchSet(FormatParameters nameFormatParameters)2052 public Collection<NamePattern> getBestMatchSet(FormatParameters nameFormatParameters) { 2053 return getBestMatchSet(namePatternMap.parameterMatcherToNamePattern, nameFormatParameters); 2054 } 2055 2056 /** 2057 * Utility for constructing data from path and value. 2058 * 2059 * @internal 2060 */ fromPathValue( XPathParts parts, String value)2061 public static Pair<FormatParameters, NamePattern> fromPathValue( 2062 XPathParts parts, String value) { 2063 // ldml/personNames/personName[@length="long"][@usage="referring"][@order="sorting"]/namePattern[alt="2"] 2064 // value = {surname}, {given} {given2} {suffix} 2065 final String altValue = parts.getAttributeValue(-1, "alt"); 2066 int rank = altValue == null ? 0 : Integer.parseInt(altValue); 2067 FormatParameters pm = 2068 new FormatParameters( 2069 Order.from(parts.getAttributeValue(-2, "order")), 2070 Length.from(parts.getAttributeValue(-2, "length")), 2071 Usage.from(parts.getAttributeValue(-2, "usage")), 2072 Formality.from(parts.getAttributeValue(-2, "formality"))); 2073 2074 NamePattern np = NamePattern.from(rank, value); 2075 if (np.toString().isBlank()) { 2076 throw new IllegalArgumentException("No empty patterns allowed: " + pm); 2077 } 2078 return Pair.of(pm, np); 2079 } 2080 getOrderFromLocale(ULocale inputLocale)2081 public Order getOrderFromLocale(ULocale inputLocale) { 2082 Map<ULocale, Order> localeToOrder = getNamePatternData().getLocaleToOrder(); 2083 ULocale myLocale = inputLocale; 2084 while (true) { 2085 Order result = localeToOrder.get(myLocale); 2086 if (result != null) { 2087 return result; 2088 } 2089 String parentLocaleString = LocaleIDParser.getParent(myLocale.toString()); 2090 if (XMLSource.ROOT_ID.equals(parentLocaleString)) { 2091 break; 2092 } 2093 myLocale = new ULocale(parentLocaleString); 2094 } 2095 // if my locale is not in the locale chain, it is probably because 2096 // we have a case like hi_Latn, which has a different base language. 2097 // So try the truncation: 2098 myLocale = inputLocale; 2099 while (true) { 2100 Order result = localeToOrder.get(myLocale); 2101 if (result != null) { 2102 return result; 2103 } 2104 String parentLocaleString = LanguageTagParser.getSimpleParent(myLocale.toString()); 2105 if (parentLocaleString.isEmpty()) { 2106 break; 2107 } 2108 myLocale = new ULocale(parentLocaleString); 2109 } 2110 return null; 2111 } 2112 2113 /** 2114 * Utility for getting sample names. DOES NOT CACHE 2115 * 2116 * @param cldrFile 2117 * @return 2118 * @internal 2119 */ loadSampleNames(CLDRFile cldrFile)2120 public static Map<SampleType, SimpleNameObject> loadSampleNames(CLDRFile cldrFile) { 2121 M3<SampleType, ModifiedField, String> names = 2122 ChainedMap.of( 2123 new TreeMap<SampleType, Object>(), 2124 new TreeMap<ModifiedField, Object>(), 2125 String.class); 2126 for (String path : cldrFile) { 2127 if (path.startsWith("//ldml/personNames/sampleName")) { 2128 // ldml/personNames/sampleName[@item="full"]/nameField[@type="prefix"] 2129 String value = cldrFile.getStringValue(path); 2130 if (value != null && !value.equals("∅∅∅")) { 2131 XPathParts parts = XPathParts.getFrozenInstance(path); 2132 names.put( 2133 SampleType.valueOf(parts.getAttributeValue(-2, "item")), 2134 ModifiedField.from(parts.getAttributeValue(-1, "type")), 2135 value); 2136 } 2137 } 2138 } 2139 2140 Map<SampleType, SimpleNameObject> result = new TreeMap<>(); 2141 final String fileLocale = cldrFile.getLocaleID(); 2142 final ULocale nativeLocale = new ULocale(fileLocale); 2143 final ULocale foreignLocale = 2144 new ULocale(fileLocale.equals("es") || fileLocale.startsWith("es_") ? "nl" : "es"); 2145 for (Entry<SampleType, Map<ModifiedField, String>> entry : names) { 2146 SampleType key = entry.getKey(); 2147 ULocale nameLocale = key.isNative() ? nativeLocale : foreignLocale; 2148 SimpleNameObject name = new SimpleNameObject(nameLocale, entry.getValue()); 2149 result.put(entry.getKey(), name); 2150 } 2151 2152 // // add special foreign name for non-spacing languages 2153 // LanguageTagParser ltp = new LanguageTagParser(); 2154 // SimpleNameObject extraName = 2155 // FOREIGN_NAME_FOR_NON_SPACING.get(ltp.set(cldrFile.getLocaleID()).getLanguageScript()); 2156 // if (extraName != null) { 2157 // result.put(SampleType.foreignGGS, extraName); 2158 // } 2159 return ImmutableMap.copyOf(result); 2160 } 2161 2162 /** General Utility Avoids object creation in Sets.intersection(a,b).size() */ getIntersectionSize(Set<T> set1, Set<T> set2)2163 public static <T> int getIntersectionSize(Set<T> set1, Set<T> set2) { 2164 int size = 0; 2165 for (T e : set1) { 2166 if (set2.contains(e)) { 2167 size++; 2168 } 2169 } 2170 return size; 2171 } 2172 2173 private static final CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE = 2174 CaseMap.toTitle().wholeString().noLowercase(); 2175 getFallbackInfo()2176 public FallbackFormatter getFallbackInfo() { 2177 return fallbackFormatter; 2178 } 2179 } 2180