1 /* 2 ******************************************************************************* 3 * Copyright (C) 1996-2014, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 package org.unicode.cldr.util.props; 8 9 import com.ibm.icu.dev.util.UnicodeMap; 10 import com.ibm.icu.impl.Utility; 11 import com.ibm.icu.text.SymbolTable; 12 import com.ibm.icu.text.UFormat; 13 import com.ibm.icu.text.UTF16; 14 import com.ibm.icu.text.UnicodeMatcher; 15 import com.ibm.icu.text.UnicodeSet; 16 import com.ibm.icu.text.UnicodeSetIterator; 17 import java.io.PrintWriter; 18 import java.io.StringWriter; 19 import java.text.ParsePosition; 20 import java.util.ArrayList; 21 import java.util.Arrays; 22 import java.util.Collection; 23 import java.util.Comparator; 24 import java.util.HashMap; 25 import java.util.Iterator; 26 import java.util.LinkedHashSet; 27 import java.util.List; 28 import java.util.Map; 29 import java.util.Set; 30 import java.util.TreeMap; 31 import java.util.function.Predicate; 32 import java.util.regex.Pattern; 33 34 public abstract class UnicodeProperty extends UnicodeLabel { 35 36 public static final UnicodeSet NONCHARACTERS = 37 new UnicodeSet("[:noncharactercodepoint:]").freeze(); 38 public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze(); 39 public static final UnicodeSet SURROGATE = new UnicodeSet("[:gc=surrogate:]").freeze(); 40 41 public static final UnicodeSet HIGH_SURROGATES = new UnicodeSet("[\\uD800-\\uDB7F]").freeze(); 42 public static final int SAMPLE_HIGH_SURROGATE = HIGH_SURROGATES.charAt(0); 43 public static final UnicodeSet HIGH_PRIVATE_USE_SURROGATES = 44 new UnicodeSet("[\\uDB80-\\uDBFF]").freeze(); 45 public static final int SAMPLE_HIGH_PRIVATE_USE_SURROGATE = 46 HIGH_PRIVATE_USE_SURROGATES.charAt(0); 47 public static final UnicodeSet LOW_SURROGATES = new UnicodeSet("[\\uDC00-\\uDFFF]").freeze(); 48 public static final int SAMPLE_LOW_SURROGATE = LOW_SURROGATES.charAt(0); 49 50 public static final UnicodeSet PRIVATE_USE_AREA = new UnicodeSet("[\\uE000-\\uF8FF]").freeze(); 51 public static final int SAMPLE_PRIVATE_USE_AREA = PRIVATE_USE_AREA.charAt(0); 52 public static final UnicodeSet PRIVATE_USE_AREA_A = 53 new UnicodeSet("[\\U000F0000-\\U000FFFFD]").freeze(); 54 public static final int SAMPLE_PRIVATE_USE_AREA_A = PRIVATE_USE_AREA_A.charAt(0); 55 public static final UnicodeSet PRIVATE_USE_AREA_B = 56 new UnicodeSet("[\\U00100000-\\U0010FFFD]").freeze(); 57 public static final int SAMPLE_PRIVATE_USE_AREA_B = PRIVATE_USE_AREA_B.charAt(0); 58 59 // The following are special. They are used for performance, but must be changed if the version 60 // of Unicode for the UnicodeProperty changes. 61 private static UnicodeSet UNASSIGNED; 62 private static int SAMPLE_UNASSIGNED; 63 private static UnicodeSet SPECIALS; 64 private static UnicodeSet STUFF_TO_TEST; 65 private static UnicodeSet STUFF_TO_TEST_WITH_UNASSIGNED; 66 getUNASSIGNED()67 public static synchronized UnicodeSet getUNASSIGNED() { 68 if (UNASSIGNED == null) { 69 UNASSIGNED = new UnicodeSet("[:gc=unassigned:]").freeze(); 70 } 71 return UNASSIGNED; 72 } 73 contractUNASSIGNED(UnicodeSet toBeUnassigned)74 public static synchronized UnicodeSet contractUNASSIGNED(UnicodeSet toBeUnassigned) { 75 UnicodeSet temp = UNASSIGNED; 76 ResetCacheProperties(); 77 UNASSIGNED = 78 temp == null 79 ? toBeUnassigned.freeze() 80 : new UnicodeSet(temp).retainAll(toBeUnassigned).freeze(); 81 return UNASSIGNED; 82 } 83 getSAMPLE_UNASSIGNED()84 public static synchronized int getSAMPLE_UNASSIGNED() { 85 if (SAMPLE_UNASSIGNED == 0) { 86 SAMPLE_UNASSIGNED = getUNASSIGNED().charAt(0); 87 } 88 return SAMPLE_UNASSIGNED; 89 } 90 getSPECIALS()91 public static synchronized UnicodeSet getSPECIALS() { 92 if (SPECIALS == null) { 93 SPECIALS = 94 new UnicodeSet(getUNASSIGNED()).addAll(PRIVATE_USE).addAll(SURROGATE).freeze(); 95 } 96 return SPECIALS; 97 } 98 getSTUFF_TO_TEST()99 public static synchronized UnicodeSet getSTUFF_TO_TEST() { 100 if (STUFF_TO_TEST == null) { 101 STUFF_TO_TEST = 102 new UnicodeSet(getSPECIALS()) 103 .complement() 104 .addAll(NONCHARACTERS) 105 .add(getSAMPLE_UNASSIGNED()) 106 .add(SAMPLE_HIGH_SURROGATE) 107 .add(SAMPLE_HIGH_PRIVATE_USE_SURROGATE) 108 .add(SAMPLE_LOW_SURROGATE) 109 .add(SAMPLE_PRIVATE_USE_AREA) 110 .add(SAMPLE_PRIVATE_USE_AREA_A) 111 .add(SAMPLE_PRIVATE_USE_AREA_B) 112 .freeze(); 113 } 114 return STUFF_TO_TEST; 115 } 116 getSTUFF_TO_TEST_WITH_UNASSIGNED()117 public static synchronized UnicodeSet getSTUFF_TO_TEST_WITH_UNASSIGNED() { 118 if (STUFF_TO_TEST_WITH_UNASSIGNED == null) { 119 STUFF_TO_TEST_WITH_UNASSIGNED = 120 new UnicodeSet(getSTUFF_TO_TEST()).addAll(getUNASSIGNED()).freeze(); 121 } 122 return STUFF_TO_TEST_WITH_UNASSIGNED; 123 } 124 125 /** 126 * Reset the cache properties. Must be done if the version of Unicode is different than the ICU 127 * one, AND any UnicodeProperty has already been instantiated. TODO make this a bit more robust. 128 * 129 * @internal 130 */ ResetCacheProperties()131 public static synchronized void ResetCacheProperties() { 132 UNASSIGNED = null; 133 SAMPLE_UNASSIGNED = 0; 134 SPECIALS = null; 135 STUFF_TO_TEST = null; 136 STUFF_TO_TEST_WITH_UNASSIGNED = null; 137 } 138 139 public static boolean DEBUG = false; 140 141 public static String CHECK_NAME = "FC_NFKC_Closure"; 142 143 public static int CHECK_VALUE = 0x037A; 144 145 private String name; 146 147 private String firstNameAlias = null; 148 149 private int type; 150 151 private Map valueToFirstValueAlias = null; 152 153 private boolean hasUniformUnassigned = true; 154 155 /* 156 * Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name 157 * 158 */ 159 160 public static final int UNKNOWN = 0, 161 BINARY = 2, 162 EXTENDED_BINARY = 3, 163 ENUMERATED = 4, 164 EXTENDED_ENUMERATED = 5, 165 CATALOG = 6, 166 EXTENDED_CATALOG = 7, 167 MISC = 8, 168 EXTENDED_MISC = 9, 169 STRING = 10, 170 EXTENDED_STRING = 11, 171 NUMERIC = 12, 172 EXTENDED_NUMERIC = 13, 173 START_TYPE = 2, 174 LIMIT_TYPE = 14, 175 EXTENDED_MASK = 1, 176 CORE_MASK = ~EXTENDED_MASK, 177 BINARY_MASK = (1 << BINARY) | (1 << EXTENDED_BINARY), 178 STRING_MASK = (1 << STRING) | (1 << EXTENDED_STRING), 179 STRING_OR_MISC_MASK = 180 (1 << STRING) | (1 << EXTENDED_STRING) | (1 << MISC) | (1 << EXTENDED_MISC), 181 ENUMERATED_OR_CATALOG_MASK = 182 (1 << ENUMERATED) 183 | (1 << EXTENDED_ENUMERATED) 184 | (1 << CATALOG) 185 | (1 << EXTENDED_CATALOG); 186 187 private static final String[] TYPE_NAMES = { 188 "Unknown", 189 "Unknown", 190 "Binary", 191 "Extended Binary", 192 "Enumerated", 193 "Extended Enumerated", 194 "Catalog", 195 "Extended Catalog", 196 "Miscellaneous", 197 "Extended Miscellaneous", 198 "String", 199 "Extended String", 200 "Numeric", 201 "Extended Numeric", 202 }; 203 getTypeName(int propType)204 public static String getTypeName(int propType) { 205 return TYPE_NAMES[propType]; 206 } 207 getName()208 public final String getName() { 209 return name; 210 } 211 getType()212 public final int getType() { 213 return type; 214 } 215 getTypeName()216 public String getTypeName() { 217 return TYPE_NAMES[type]; 218 } 219 isType(int mask)220 public final boolean isType(int mask) { 221 return ((1 << type) & mask) != 0; 222 } 223 setName(String string)224 protected final void setName(String string) { 225 if (string == null) throw new IllegalArgumentException("Name must not be null"); 226 name = string; 227 } 228 setType(int i)229 protected final void setType(int i) { 230 type = i; 231 } 232 getVersion()233 public String getVersion() { 234 return _getVersion(); 235 } 236 getValue(int codepoint)237 public String getValue(int codepoint) { 238 if (DEBUG && CHECK_VALUE == codepoint && CHECK_NAME.equals(getName())) { 239 String value = _getValue(codepoint); 240 System.out.println( 241 getName() 242 + "(" 243 + Utility.hex(codepoint) 244 + "):" 245 + (getType() == STRING ? Utility.hex(value) : value)); 246 return value; 247 } 248 return _getValue(codepoint); 249 } 250 251 // public String getValue(int codepoint, boolean isShort) { 252 // return getValue(codepoint); 253 // } 254 getNameAliases(List<String> result)255 public List<String> getNameAliases(List<String> result) { 256 if (result == null) result = new ArrayList(1); 257 return _getNameAliases(result); 258 } 259 getValueAliases(String valueAlias, List<String> result)260 public List<String> getValueAliases(String valueAlias, List<String> result) { 261 if (result == null) result = new ArrayList(1); 262 result = _getValueAliases(valueAlias, result); 263 if (!result.contains(valueAlias)) { // FIX && type < NUMERIC 264 result = _getValueAliases(valueAlias, result); // for debugging 265 throw new IllegalArgumentException( 266 "Internal error: " 267 + getName() 268 + " doesn't contain " 269 + valueAlias 270 + ": " 271 + new BagFormatter().join(result)); 272 } 273 return result; 274 } 275 getAvailableValues(List<String> result)276 public List<String> getAvailableValues(List<String> result) { 277 if (result == null) result = new ArrayList(1); 278 return _getAvailableValues(result); 279 } 280 _getVersion()281 protected abstract String _getVersion(); 282 _getValue(int codepoint)283 protected abstract String _getValue(int codepoint); 284 _getNameAliases(List<String> result)285 protected abstract List<String> _getNameAliases(List<String> result); 286 _getValueAliases(String valueAlias, List<String> result)287 protected abstract List<String> _getValueAliases(String valueAlias, List<String> result); 288 _getAvailableValues(List<String> result)289 protected abstract List<String> _getAvailableValues(List<String> result); 290 291 // conveniences getNameAliases()292 public final List<String> getNameAliases() { 293 return getNameAliases(null); 294 } 295 getValueAliases(String valueAlias)296 public final List<String> getValueAliases(String valueAlias) { 297 return getValueAliases(valueAlias, null); 298 } 299 getAvailableValues()300 public final List<String> getAvailableValues() { 301 return getAvailableValues(null); 302 } 303 304 @Override getValue(int codepoint, boolean getShortest)305 public final String getValue(int codepoint, boolean getShortest) { 306 String result = getValue(codepoint); 307 if (type >= MISC || result == null || !getShortest) return result; 308 return getFirstValueAlias(result); 309 } 310 getFirstNameAlias()311 public final String getFirstNameAlias() { 312 if (firstNameAlias == null) { 313 firstNameAlias = getNameAliases().get(0); 314 } 315 return firstNameAlias; 316 } 317 getFirstValueAlias(String value)318 public final String getFirstValueAlias(String value) { 319 if (valueToFirstValueAlias == null) _getFirstValueAliasCache(); 320 return valueToFirstValueAlias.get(value).toString(); 321 } 322 _getFirstValueAliasCache()323 private void _getFirstValueAliasCache() { 324 maxValueWidth = 0; 325 maxFirstValueAliasWidth = 0; 326 valueToFirstValueAlias = new HashMap(1); 327 Iterator it = getAvailableValues().iterator(); 328 while (it.hasNext()) { 329 String value = (String) it.next(); 330 String first = getValueAliases(value).get(0); 331 if (first == null) { // internal error 332 throw new IllegalArgumentException("Value not in value aliases: " + value); 333 } 334 if (DEBUG && CHECK_NAME.equals(getName())) { 335 System.out.println( 336 "First Alias: " 337 + getName() 338 + ": " 339 + value 340 + " => " 341 + first 342 + new BagFormatter().join(getValueAliases(value))); 343 } 344 valueToFirstValueAlias.put(value, first); 345 if (value.length() > maxValueWidth) { 346 maxValueWidth = value.length(); 347 } 348 if (first.length() > maxFirstValueAliasWidth) { 349 maxFirstValueAliasWidth = first.length(); 350 } 351 } 352 } 353 354 private int maxValueWidth = -1; 355 356 private int maxFirstValueAliasWidth = -1; 357 358 @Override getMaxWidth(boolean getShortest)359 public int getMaxWidth(boolean getShortest) { 360 if (maxValueWidth < 0) _getFirstValueAliasCache(); 361 if (getShortest) return maxFirstValueAliasWidth; 362 return maxValueWidth; 363 } 364 getSet(String propertyValue)365 public final UnicodeSet getSet(String propertyValue) { 366 return getSet(propertyValue, null); 367 } 368 getSet(PatternMatcher matcher)369 public final UnicodeSet getSet(PatternMatcher matcher) { 370 return getSet(matcher, null); 371 } 372 373 /** 374 * Adds the property value set to the result. Clear the result first if you don't want to keep 375 * the original contents. 376 */ getSet(String propertyValue, UnicodeSet result)377 public final UnicodeSet getSet(String propertyValue, UnicodeSet result) { 378 if (isType(BINARY_MASK)) { 379 if (binaryYesSet == null) { 380 binaryYesSet = getSet(YES_MATCHER, null); 381 } 382 Boolean binValue = binaryValueOrNull(propertyValue); 383 if (binValue == Boolean.TRUE) { 384 if (result == null) { 385 return binaryYesSet.cloneAsThawed(); 386 } else { 387 return result.addAll(binaryYesSet); 388 } 389 } else if (binValue == Boolean.FALSE && !unicodeMapHasStringKeys) { 390 // We could cache this directly, too. 391 UnicodeSet inverse = binaryYesSet.cloneAsThawed().complement(); 392 if (result == null) { 393 return inverse; 394 } else { 395 return result.addAll(inverse); 396 } 397 } 398 } 399 Predicate<String> matcher = 400 isType(STRING_OR_MISC_MASK) 401 ? new StringEqualityMatcher(propertyValue) 402 : new NameMatcher(propertyValue); 403 return getSet(matcher, result); 404 } 405 binaryValueOrNull(String value)406 private static final Boolean binaryValueOrNull(String value) { 407 if ("Yes".equals(value)) { // fastpath 408 return Boolean.TRUE; 409 } 410 if (value == null) { 411 return null; 412 } 413 switch (toSkeleton(value)) { 414 case "n": 415 case "no": 416 case "f": 417 case "false": 418 return Boolean.FALSE; 419 case "y": 420 case "yes": 421 case "t": 422 case "true": 423 return Boolean.TRUE; 424 default: 425 return null; 426 } 427 } 428 429 private UnicodeMap unicodeMap = null; 430 private boolean unicodeMapHasStringKeys = false; 431 private UnicodeSet binaryYesSet = null; 432 433 public static final String UNUSED = "??"; 434 getSet(Predicate matcher, UnicodeSet result)435 public UnicodeSet getSet(Predicate matcher, UnicodeSet result) { 436 if (result == null) result = new UnicodeSet(); 437 boolean uniformUnassigned = hasUniformUnassigned(); 438 if (isType(STRING_OR_MISC_MASK)) { 439 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); 440 usi.next(); ) { // int i = 0; i <= 0x10FFFF; ++i 441 int i = usi.codepoint; 442 String value = getValue(i); 443 if (value != null && matcher.test(value)) { 444 result.add(i); 445 } 446 } 447 return addUntested(result, uniformUnassigned); 448 } 449 List temp = new ArrayList(1); // to avoid reallocating... 450 UnicodeMap um = getUnicodeMap_internal(); 451 Iterator it = um.getAvailableValues(null).iterator(); 452 main: 453 while (it.hasNext()) { 454 String value = (String) it.next(); 455 if (matcher.test(value)) { // fastpath 456 um.keySet(value, result); 457 continue main; 458 } 459 temp.clear(); 460 Iterator it2 = getValueAliases(value, temp).iterator(); 461 while (it2.hasNext()) { 462 String value2 = (String) it2.next(); 463 // System.out.println("Values:" + value2); 464 if (matcher.test(value2) || matcher.test(toSkeleton(value2))) { 465 um.keySet(value, result); 466 continue main; 467 } 468 } 469 } 470 return result; 471 } 472 473 /* 474 * public UnicodeSet getMatchSet(UnicodeSet result) { if (result == null) 475 * result = new UnicodeSet(); addAll(matchIterator, result); return result; } 476 * 477 * public void setMatchSet(UnicodeSet set) { matchIterator = new 478 * UnicodeSetIterator(set); } 479 */ 480 481 /** Utility for debugging */ getStack()482 public static String getStack() { 483 Exception e = new Exception(); 484 StringWriter sw = new StringWriter(); 485 PrintWriter pw = new PrintWriter(sw); 486 e.printStackTrace(pw); 487 pw.flush(); 488 return "Showing Stack with fake " + sw.getBuffer().toString(); 489 } 490 491 // TODO use this instead of plain strings 492 public static class Name implements Comparable { 493 private String skeleton; 494 495 private String pretty; 496 497 public final int RAW = 0, TITLE = 1, NORMAL = 2; 498 Name(String name, int style)499 public Name(String name, int style) { 500 if (name == null) name = ""; 501 if (style == RAW) { 502 skeleton = pretty = name; 503 } else { 504 pretty = regularize(name, style == TITLE); 505 skeleton = toSkeleton(pretty); 506 } 507 } 508 509 @Override compareTo(Object o)510 public int compareTo(Object o) { 511 return skeleton.compareTo(((Name) o).skeleton); 512 } 513 514 @Override equals(Object o)515 public boolean equals(Object o) { 516 return skeleton.equals(((Name) o).skeleton); 517 } 518 519 @Override hashCode()520 public int hashCode() { 521 return skeleton.hashCode(); 522 } 523 524 @Override toString()525 public String toString() { 526 return pretty; 527 } 528 } 529 530 /** 531 * @return the unicode map 532 */ getUnicodeMap()533 public UnicodeMap getUnicodeMap() { 534 return getUnicodeMap(false); 535 } 536 537 /** 538 * @return the unicode map 539 */ getUnicodeMap(boolean getShortest)540 public UnicodeMap getUnicodeMap(boolean getShortest) { 541 if (!getShortest) return getUnicodeMap_internal().cloneAsThawed(); 542 UnicodeMap result = new UnicodeMap(); 543 boolean uniformUnassigned = hasUniformUnassigned(); 544 545 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); 546 usi.next(); ) { // int i = 0; i <= 0x10FFFF; ++i 547 int i = usi.codepoint; 548 // if (DEBUG && i == 0x41) System.out.println(i + "\t" + 549 // getValue(i)); 550 String value = getValue(i, true); 551 result.put(i, value); 552 } 553 return addUntested(result, uniformUnassigned); 554 } 555 556 /** 557 * @return the unicode map 558 */ getUnicodeMap_internal()559 public UnicodeMap getUnicodeMap_internal() { 560 if (unicodeMap == null) { 561 unicodeMap = _getUnicodeMap(); 562 Set<String> stringKeys = unicodeMap.stringKeys(); 563 unicodeMapHasStringKeys = stringKeys != null && !stringKeys.isEmpty(); 564 } 565 return unicodeMap; 566 } 567 _getUnicodeMap()568 protected UnicodeMap _getUnicodeMap() { 569 UnicodeMap result = new UnicodeMap(); 570 HashMap myIntern = new HashMap(); 571 boolean uniformUnassigned = hasUniformUnassigned(); 572 573 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); 574 usi.next(); ) { // int i = 0; i <= 0x10FFFF; ++i 575 int i = usi.codepoint; 576 // if (DEBUG && i == 0x41) System.out.println(i + "\t" + 577 // getValue(i)); 578 String value = getValue(i); 579 String iValue = (String) myIntern.get(value); 580 if (iValue == null) myIntern.put(value, iValue = value); 581 result.put(i, iValue); 582 } 583 addUntested(result, uniformUnassigned); 584 585 if (DEBUG) { 586 for (UnicodeSetIterator usi = getStuffToTest(uniformUnassigned); 587 usi.next(); ) { // int i = 0; i <= 0x10FFFF; ++i 588 int i = usi.codepoint; 589 // if (DEBUG && i == 0x41) System.out.println(i + "\t" + 590 // getValue(i)); 591 String value = getValue(i); 592 String resultValue = (String) result.getValue(i); 593 if (!value.equals(resultValue)) { 594 throw new RuntimeException("Value failure at: " + Utility.hex(i)); 595 } 596 } 597 } 598 if (DEBUG && CHECK_NAME.equals(getName())) { 599 System.out.println(getName() + ":\t" + getClass().getName() + "\t" + getVersion()); 600 System.out.println(getStack()); 601 System.out.println(result); 602 } 603 return result; 604 } 605 getStuffToTest(boolean uniformUnassigned)606 private static UnicodeSetIterator getStuffToTest(boolean uniformUnassigned) { 607 return new UnicodeSetIterator( 608 uniformUnassigned ? getSTUFF_TO_TEST() : getSTUFF_TO_TEST_WITH_UNASSIGNED()); 609 } 610 611 /** Really ought to create a Collection UniqueList, that forces uniqueness. But for now... */ addUnique(Object obj, Collection result)612 public static Collection addUnique(Object obj, Collection result) { 613 if (obj != null && !result.contains(obj)) result.add(obj); 614 return result; 615 } 616 617 /** Utility for managing property & non-string value aliases */ 618 public static final Comparator PROPERTY_COMPARATOR = 619 new Comparator() { 620 @Override 621 public int compare(Object o1, Object o2) { 622 return compareNames((String) o1, (String) o2); 623 } 624 }; 625 626 /** Utility for managing property & non-string value aliases */ 627 // TODO optimize equalNames(String a, String b)628 public static boolean equalNames(String a, String b) { 629 if (a == b) return true; 630 if (a == null) return false; 631 return toSkeleton(a).equals(toSkeleton(b)); 632 } 633 634 /** Utility for managing property & non-string value aliases */ 635 // TODO optimize compareNames(String a, String b)636 public static int compareNames(String a, String b) { 637 if (a == b) return 0; 638 if (a == null) return -1; 639 if (b == null) return 1; 640 return toSkeleton(a).compareTo(toSkeleton(b)); 641 } 642 643 /** Utility for managing property & non-string value aliases */ 644 // TODO account for special names, tibetan, hangul toSkeleton(String source)645 public static String toSkeleton(String source) { 646 if (source == null) return null; 647 StringBuilder skeletonBuffer = new StringBuilder(); 648 boolean gotOne = false; 649 // remove spaces, '_', '-' 650 // we can do this with char, since no surrogates are involved 651 for (int i = 0; i < source.length(); ++i) { 652 char ch = source.charAt(i); 653 if (i > 0 && (ch == '_' || ch == ' ' || ch == '-')) { 654 gotOne = true; 655 } else { 656 char ch2 = Character.toLowerCase(ch); 657 if (ch2 != ch) { 658 gotOne = true; 659 skeletonBuffer.append(ch2); 660 } else { 661 skeletonBuffer.append(ch); 662 } 663 } 664 } 665 if (!gotOne) return source; // avoid string creation 666 return skeletonBuffer.toString(); 667 } 668 669 // get the name skeleton toNameSkeleton(String source)670 public static String toNameSkeleton(String source) { 671 if (source == null) return null; 672 StringBuffer result = new StringBuffer(); 673 // remove spaces, medial '-' 674 // we can do this with char, since no surrogates are involved 675 for (int i = 0; i < source.length(); ++i) { 676 char ch = source.charAt(i); 677 if (('0' <= ch && ch <= '9') || ('A' <= ch && ch <= 'Z') || ch == '<' || ch == '>') { 678 result.append(ch); 679 } else if (ch == ' ') { 680 // don't copy ever 681 } else if (ch == '-') { 682 // only copy non-medials AND trailing O-E 683 if (0 == i 684 || i == source.length() - 1 685 || source.charAt(i - 1) == ' ' 686 || source.charAt(i + 1) == ' ' 687 || (i == source.length() - 2 688 && source.charAt(i - 1) == 'O' 689 && source.charAt(i + 1) == 'E')) { 690 System.out.println("****** EXCEPTION " + source); 691 result.append(ch); 692 } 693 // otherwise don't copy 694 } else { 695 throw new IllegalArgumentException( 696 "Illegal Name Char: U+" + Utility.hex(ch) + ", " + ch); 697 } 698 } 699 return result.toString(); 700 } 701 702 /** 703 * These routines use the Java functions, because they only need to act on ASCII Changes space, 704 * - into _, inserts _ between lower and UPPER. 705 */ regularize(String source, boolean titlecaseStart)706 public static String regularize(String source, boolean titlecaseStart) { 707 if (source == null) return source; 708 /* 709 * if (source.equals("noBreak")) { // HACK if (titlecaseStart) return 710 * "NoBreak"; return source; } 711 */ 712 StringBuffer result = new StringBuffer(); 713 int lastCat = -1; 714 boolean haveFirstCased = true; 715 for (int i = 0; i < source.length(); ++i) { 716 char c = source.charAt(i); 717 if (c == ' ' || c == '-' || c == '_') { 718 c = '_'; 719 haveFirstCased = true; 720 } 721 if (c == '=') haveFirstCased = true; 722 int cat = Character.getType(c); 723 if (lastCat == Character.LOWERCASE_LETTER && cat == Character.UPPERCASE_LETTER) { 724 result.append('_'); 725 } 726 if (haveFirstCased 727 && (cat == Character.LOWERCASE_LETTER 728 || cat == Character.TITLECASE_LETTER 729 || cat == Character.UPPERCASE_LETTER)) { 730 if (titlecaseStart) { 731 c = Character.toUpperCase(c); 732 } 733 haveFirstCased = false; 734 } 735 result.append(c); 736 lastCat = cat; 737 } 738 return result.toString(); 739 } 740 741 /** 742 * Utility function for comparing codepoint to string without generating new string. 743 * 744 * @param codepoint 745 * @param other 746 * @return true if the codepoint equals the string 747 */ equals(int codepoint, String other)748 public static final boolean equals(int codepoint, String other) { 749 if (other == null) return false; 750 if (other.length() == 1) { 751 return codepoint == other.charAt(0); 752 } 753 if (other.length() == 2) { 754 return other.equals(UTF16.valueOf(codepoint)); 755 } 756 return false; 757 } 758 759 /** Utility function for comparing objects that may be null string. */ equals(T a, T b)760 public static final <T extends Object> boolean equals(T a, T b) { 761 return a == null ? b == null : b == null ? false : a.equals(b); 762 } 763 764 /** 765 * Utility that should be on UnicodeSet 766 * 767 * @param source 768 * @param result 769 */ addAll(UnicodeSetIterator source, UnicodeSet result)770 public static void addAll(UnicodeSetIterator source, UnicodeSet result) { 771 while (source.nextRange()) { 772 if (source.codepoint == UnicodeSetIterator.IS_STRING) { 773 result.add(source.string); 774 } else { 775 result.add(source.codepoint, source.codepointEnd); 776 } 777 } 778 } 779 780 /** Really ought to create a Collection UniqueList, that forces uniqueness. But for now... */ addAllUnique(Collection source, Collection result)781 public static Collection addAllUnique(Collection source, Collection result) { 782 for (Iterator it = source.iterator(); it.hasNext(); ) { 783 addUnique(it.next(), result); 784 } 785 return result; 786 } 787 788 /** Really ought to create a Collection UniqueList, that forces uniqueness. But for now... */ addAllUnique(Object[] source, Collection result)789 public static Collection addAllUnique(Object[] source, Collection result) { 790 for (int i = 0; i < source.length; ++i) { 791 addUnique(source[i], result); 792 } 793 return result; 794 } 795 796 public static class Factory { 797 static boolean DEBUG = false; 798 799 Map<String, UnicodeProperty> canonicalNames = new TreeMap<>(); 800 801 Map skeletonNames = new TreeMap(); 802 803 Map propertyCache = new HashMap(1); 804 add(UnicodeProperty sp)805 public final Factory add(UnicodeProperty sp) { 806 String name2 = sp.getName(); 807 if (name2.length() == 0) { 808 throw new IllegalArgumentException(); 809 } 810 canonicalNames.put(name2, sp); 811 skeletonNames.put(toSkeleton(name2), sp); 812 List c = sp.getNameAliases(new ArrayList(1)); 813 Iterator it = c.iterator(); 814 while (it.hasNext()) { 815 skeletonNames.put(toSkeleton((String) it.next()), sp); 816 } 817 return this; 818 } 819 getProperty(String propertyAlias)820 public UnicodeProperty getProperty(String propertyAlias) { 821 return (UnicodeProperty) skeletonNames.get(toSkeleton(propertyAlias)); 822 } 823 getAvailableNames()824 public final List<String> getAvailableNames() { 825 return getAvailableNames(null); 826 } 827 getAvailableNames(List<String> result)828 public final List<String> getAvailableNames(List<String> result) { 829 if (result == null) result = new ArrayList(1); 830 Iterator it = canonicalNames.keySet().iterator(); 831 while (it.hasNext()) { 832 addUnique(it.next(), result); 833 } 834 return result; 835 } 836 getAvailableNames(int propertyTypeMask)837 public final List getAvailableNames(int propertyTypeMask) { 838 return getAvailableNames(propertyTypeMask, null); 839 } 840 getAvailableNames(int propertyTypeMask, List result)841 public final List getAvailableNames(int propertyTypeMask, List result) { 842 if (result == null) result = new ArrayList(1); 843 Iterator it = canonicalNames.keySet().iterator(); 844 while (it.hasNext()) { 845 String item = (String) it.next(); 846 UnicodeProperty property = getProperty(item); 847 if (DEBUG) System.out.println("Properties: " + item + "," + property.getType()); 848 if (!property.isType(propertyTypeMask)) { 849 // System.out.println("Masking: " + property.getType() + "," 850 // + propertyTypeMask); 851 continue; 852 } 853 addUnique(property.getName(), result); 854 } 855 return result; 856 } 857 858 InversePatternMatcher inverseMatcher = new InversePatternMatcher(); 859 860 /** Format is: propname ('=' | '!=') propvalue ( '|' propValue )* */ getSet( String propAndValue, PatternMatcher matcher, UnicodeSet result)861 public final UnicodeSet getSet( 862 String propAndValue, PatternMatcher matcher, UnicodeSet result) { 863 int equalPos = propAndValue.indexOf('='); 864 String prop = propAndValue.substring(0, equalPos); 865 String value = propAndValue.substring(equalPos + 1); 866 boolean negative = false; 867 if (prop.endsWith("!")) { 868 prop = prop.substring(0, prop.length() - 1); 869 negative = true; 870 } 871 prop = prop.trim(); 872 UnicodeProperty up = getProperty(prop); 873 if (matcher == null) { 874 matcher = 875 new SimpleMatcher( 876 value, up.isType(STRING_OR_MISC_MASK) ? null : PROPERTY_COMPARATOR); 877 } 878 if (negative) { 879 inverseMatcher.set(matcher); 880 matcher = inverseMatcher; 881 } 882 return up.getSet(matcher.set(value), result); 883 } 884 getSet(String propAndValue, PatternMatcher matcher)885 public final UnicodeSet getSet(String propAndValue, PatternMatcher matcher) { 886 return getSet(propAndValue, matcher, null); 887 } 888 getSet(String propAndValue)889 public final UnicodeSet getSet(String propAndValue) { 890 return getSet(propAndValue, null, null); 891 } 892 getSymbolTable(String prefix)893 public final SymbolTable getSymbolTable(String prefix) { 894 return new PropertySymbolTable(prefix); 895 } 896 897 private class MyXSymbolTable extends UnicodeSet.XSymbolTable { 898 @Override applyPropertyAlias( String propertyName, String propertyValue, UnicodeSet result)899 public boolean applyPropertyAlias( 900 String propertyName, String propertyValue, UnicodeSet result) { 901 if (false) System.out.println(propertyName + "=" + propertyValue); 902 UnicodeProperty prop = getProperty(propertyName); 903 if (prop == null) return false; 904 result.clear(); 905 UnicodeSet x = prop.getSet(propertyValue, result); 906 return x.size() != 0; 907 } 908 } 909 getXSymbolTable()910 public final UnicodeSet.XSymbolTable getXSymbolTable() { 911 return new MyXSymbolTable(); 912 } 913 914 private class PropertySymbolTable implements SymbolTable { 915 static final boolean DEBUG = false; 916 917 private String prefix; 918 919 RegexMatcher regexMatcher = new RegexMatcher(); 920 PropertySymbolTable(String prefix)921 PropertySymbolTable(String prefix) { 922 this.prefix = prefix; 923 } 924 925 @Override lookup(String s)926 public char[] lookup(String s) { 927 if (DEBUG) System.out.println("\t(" + prefix + ")Looking up " + s); 928 // ensure, again, that prefix matches 929 int start = prefix.length(); 930 if (!s.regionMatches(true, 0, prefix, 0, start)) return null; 931 932 int pos = s.indexOf(':', start); 933 if (pos < 0) { // should never happen 934 throw new IllegalArgumentException("Internal Error: missing =: " + s + "\r\n"); 935 } 936 UnicodeProperty prop = getProperty(s.substring(start, pos)); 937 if (prop == null) { 938 throw new IllegalArgumentException( 939 "Invalid Property in: " 940 + s 941 + "\r\nUse " 942 + showSet(getAvailableNames())); 943 } 944 String value = s.substring(pos + 1); 945 UnicodeSet set; 946 if (value.startsWith("\u00AB")) { // regex! 947 set = prop.getSet(regexMatcher.set(value.substring(1, value.length() - 1))); 948 } else { 949 set = prop.getSet(value); 950 } 951 if (set.size() == 0) { 952 throw new IllegalArgumentException( 953 "Empty Property-Value in: " 954 + s 955 + "\r\nUse " 956 + showSet(prop.getAvailableValues())); 957 } 958 if (DEBUG) System.out.println("\t(" + prefix + ")Returning " + set.toPattern(true)); 959 return set.toPattern(true).toCharArray(); // really ugly 960 } 961 showSet(List list)962 private String showSet(List list) { 963 StringBuffer result = new StringBuffer("["); 964 boolean first = true; 965 for (Iterator it = list.iterator(); it.hasNext(); ) { 966 if (!first) result.append(", "); 967 else first = false; 968 result.append(it.next().toString()); 969 } 970 result.append("]"); 971 return result.toString(); 972 } 973 974 @Override lookupMatcher(int ch)975 public UnicodeMatcher lookupMatcher(int ch) { 976 return null; 977 } 978 979 @Override parseReference(String text, ParsePosition pos, int limit)980 public String parseReference(String text, ParsePosition pos, int limit) { 981 if (DEBUG) 982 System.out.println( 983 "\t(" 984 + prefix 985 + ")Parsing <" 986 + text.substring(pos.getIndex(), limit) 987 + ">"); 988 int start = pos.getIndex(); 989 // ensure that it starts with 'prefix' 990 if (!text.regionMatches(true, start, prefix, 0, prefix.length())) return null; 991 start += prefix.length(); 992 // now see if it is of the form identifier:identifier 993 int i = getIdentifier(text, start, limit); 994 if (i == start) return null; 995 String prop = text.substring(start, i); 996 String value = "true"; 997 if (i < limit) { 998 if (text.charAt(i) == ':') { 999 int j; 1000 if (text.charAt(i + 1) == '\u00AB') { // regular 1001 // expression 1002 j = text.indexOf('\u00BB', i + 2) + 1; // include 1003 // last 1004 // character 1005 if (j <= 0) return null; 1006 } else { 1007 j = getIdentifier(text, i + 1, limit); 1008 } 1009 value = text.substring(i + 1, j); 1010 i = j; 1011 } 1012 } 1013 pos.setIndex(i); 1014 if (DEBUG) 1015 System.out.println("\t(" + prefix + ")Parsed <" + prop + ">=<" + value + ">"); 1016 return prefix + prop + ":" + value; 1017 } 1018 getIdentifier(String text, int start, int limit)1019 private int getIdentifier(String text, int start, int limit) { 1020 if (DEBUG) System.out.println("\tGetID <" + text.substring(start, limit) + ">"); 1021 int cp = 0; 1022 int i; 1023 for (i = start; i < limit; i += UTF16.getCharCount(cp)) { 1024 cp = UTF16.charAt(text, i); 1025 if (!com.ibm.icu.lang.UCharacter.isUnicodeIdentifierPart(cp) && cp != '.') { 1026 break; 1027 } 1028 } 1029 if (DEBUG) System.out.println("\tGotID <" + text.substring(start, i) + ">"); 1030 return i; 1031 } 1032 } 1033 } 1034 1035 public static class FilteredProperty extends UnicodeProperty { 1036 private UnicodeProperty property; 1037 1038 protected StringFilter filter; 1039 1040 protected UnicodeSetIterator matchIterator = 1041 new UnicodeSetIterator(new UnicodeSet(0, 0x10FFFF)); 1042 1043 protected HashMap backmap; 1044 1045 boolean allowValueAliasCollisions = false; 1046 FilteredProperty(UnicodeProperty property, StringFilter filter)1047 public FilteredProperty(UnicodeProperty property, StringFilter filter) { 1048 this.property = property; 1049 this.filter = filter; 1050 } 1051 getFilter()1052 public StringFilter getFilter() { 1053 return filter; 1054 } 1055 setFilter(StringFilter filter)1056 public UnicodeProperty setFilter(StringFilter filter) { 1057 this.filter = filter; 1058 return this; 1059 } 1060 1061 List temp = new ArrayList(1); 1062 1063 @Override _getAvailableValues(List result)1064 public List _getAvailableValues(List result) { 1065 temp.clear(); 1066 return filter.addUnique(property.getAvailableValues(temp), result); 1067 } 1068 1069 @Override _getNameAliases(List result)1070 public List _getNameAliases(List result) { 1071 temp.clear(); 1072 return filter.addUnique(property.getNameAliases(temp), result); 1073 } 1074 1075 @Override _getValue(int codepoint)1076 public String _getValue(int codepoint) { 1077 return filter.remap(property.getValue(codepoint)); 1078 } 1079 1080 @Override _getValueAliases(String valueAlias, List result)1081 public List _getValueAliases(String valueAlias, List result) { 1082 if (backmap == null) { 1083 backmap = new HashMap(1); 1084 temp.clear(); 1085 Iterator it = property.getAvailableValues(temp).iterator(); 1086 while (it.hasNext()) { 1087 String item = (String) it.next(); 1088 String mappedItem = filter.remap(item); 1089 if (backmap.get(mappedItem) != null && !allowValueAliasCollisions) { 1090 throw new IllegalArgumentException( 1091 "Filter makes values collide! " + item + ", " + mappedItem); 1092 } 1093 backmap.put(mappedItem, item); 1094 } 1095 } 1096 valueAlias = (String) backmap.get(valueAlias); 1097 temp.clear(); 1098 return filter.addUnique(property.getValueAliases(valueAlias, temp), result); 1099 } 1100 1101 @Override _getVersion()1102 public String _getVersion() { 1103 return property.getVersion(); 1104 } 1105 isAllowValueAliasCollisions()1106 public boolean isAllowValueAliasCollisions() { 1107 return allowValueAliasCollisions; 1108 } 1109 setAllowValueAliasCollisions(boolean b)1110 public FilteredProperty setAllowValueAliasCollisions(boolean b) { 1111 allowValueAliasCollisions = b; 1112 return this; 1113 } 1114 } 1115 1116 public abstract static class StringFilter implements Cloneable { remap(String original)1117 public abstract String remap(String original); 1118 addUnique(Collection source, List result)1119 public final List addUnique(Collection source, List result) { 1120 if (result == null) result = new ArrayList(1); 1121 Iterator it = source.iterator(); 1122 while (it.hasNext()) { 1123 UnicodeProperty.addUnique(remap((String) it.next()), result); 1124 } 1125 return result; 1126 } 1127 /* 1128 * public Object clone() { try { return super.clone(); } catch 1129 * (CloneNotSupportedException e) { throw new 1130 * IllegalStateException("Should never happen."); } } 1131 */ 1132 } 1133 1134 public static class MapFilter extends StringFilter { 1135 private Map valueMap; 1136 MapFilter(Map valueMap)1137 public MapFilter(Map valueMap) { 1138 this.valueMap = valueMap; 1139 } 1140 1141 @Override remap(String original)1142 public String remap(String original) { 1143 Object changed = valueMap.get(original); 1144 return changed == null ? original : (String) changed; 1145 } 1146 getMap()1147 public Map getMap() { 1148 return valueMap; 1149 } 1150 } 1151 1152 /** Matches using .equals(). */ 1153 private static final class StringEqualityMatcher implements Predicate<String> { 1154 private final String pattern; 1155 StringEqualityMatcher(String pattern)1156 StringEqualityMatcher(String pattern) { 1157 this.pattern = pattern; 1158 } 1159 1160 @Override test(String value)1161 public boolean test(String value) { 1162 return pattern.equals(value); 1163 } 1164 } 1165 1166 /** Matches skeleton strings. Computes the pattern skeleton only once. */ 1167 private static final class NameMatcher implements Predicate<String> { 1168 private final String pattern; 1169 private final String skeleton; 1170 NameMatcher(String pattern)1171 NameMatcher(String pattern) { 1172 this.pattern = pattern; 1173 this.skeleton = toSkeleton(pattern); 1174 } 1175 1176 @Override test(String value)1177 public boolean test(String value) { 1178 return pattern.equals(value) || skeleton.equals(toSkeleton(value)); 1179 } 1180 } 1181 1182 private static final NameMatcher YES_MATCHER = new NameMatcher("Yes"); 1183 1184 public interface PatternMatcher extends Predicate { set(String pattern)1185 PatternMatcher set(String pattern); 1186 } 1187 1188 public static class InversePatternMatcher implements PatternMatcher { 1189 PatternMatcher other; 1190 set(PatternMatcher toInverse)1191 public PatternMatcher set(PatternMatcher toInverse) { 1192 other = toInverse; 1193 return this; 1194 } 1195 1196 @Override test(Object value)1197 public boolean test(Object value) { 1198 return !other.test(value); 1199 } 1200 1201 @Override set(String pattern)1202 public PatternMatcher set(String pattern) { 1203 other.set(pattern); 1204 return this; 1205 } 1206 } 1207 1208 public static class SimpleMatcher implements PatternMatcher { 1209 Comparator comparator; 1210 1211 String pattern; 1212 SimpleMatcher(String pattern, Comparator comparator)1213 public SimpleMatcher(String pattern, Comparator comparator) { 1214 this.comparator = comparator; 1215 this.pattern = pattern; 1216 } 1217 1218 @Override test(Object value)1219 public boolean test(Object value) { 1220 if (comparator == null) return pattern.equals(value); 1221 return comparator.compare(pattern, value) == 0; 1222 } 1223 1224 @Override set(String pattern)1225 public PatternMatcher set(String pattern) { 1226 this.pattern = pattern; 1227 return this; 1228 } 1229 } 1230 1231 public static class RegexMatcher implements UnicodeProperty.PatternMatcher { 1232 private java.util.regex.Matcher matcher; 1233 1234 @Override set(String pattern)1235 public UnicodeProperty.PatternMatcher set(String pattern) { 1236 matcher = Pattern.compile(pattern).matcher(""); 1237 return this; 1238 } 1239 1240 UFormat foo; 1241 1242 @Override test(Object value)1243 public boolean test(Object value) { 1244 matcher.reset(value.toString()); 1245 return matcher.find(); 1246 } 1247 } 1248 1249 public enum AliasAddAction { 1250 IGNORE_IF_MISSING, 1251 REQUIRE_MAIN_ALIAS, 1252 ADD_MAIN_ALIAS 1253 } 1254 1255 public abstract static class BaseProperty extends UnicodeProperty { 1256 private static final String[] NO_VALUES = {"No", "N", "F", "False"}; 1257 1258 private static final String[] YES_VALUES = {"Yes", "Y", "T", "True"}; 1259 1260 /** */ 1261 private static final String[][] YES_NO_ALIASES = new String[][] {YES_VALUES, NO_VALUES}; 1262 1263 protected List propertyAliases = new ArrayList(1); 1264 1265 protected Map toValueAliases; 1266 1267 protected String version; 1268 setMain( String alias, String shortAlias, int propertyType, String version)1269 public BaseProperty setMain( 1270 String alias, String shortAlias, int propertyType, String version) { 1271 setName(alias); 1272 setType(propertyType); 1273 propertyAliases.add(shortAlias); 1274 propertyAliases.add(alias); 1275 if (propertyType == BINARY) { 1276 addValueAliases(YES_NO_ALIASES, AliasAddAction.ADD_MAIN_ALIAS); 1277 } 1278 this.version = version; 1279 return this; 1280 } 1281 1282 @Override _getVersion()1283 public String _getVersion() { 1284 return version; 1285 } 1286 1287 @Override _getNameAliases(List result)1288 public List _getNameAliases(List result) { 1289 addAllUnique(propertyAliases, result); 1290 return result; 1291 } 1292 addValueAliases( String[][] valueAndAlternates, AliasAddAction aliasAddAction)1293 public BaseProperty addValueAliases( 1294 String[][] valueAndAlternates, AliasAddAction aliasAddAction) { 1295 if (toValueAliases == null) _fixValueAliases(); 1296 for (int i = 0; i < valueAndAlternates.length; ++i) { 1297 for (int j = 1; j < valueAndAlternates[0].length; ++j) { 1298 addValueAlias( 1299 valueAndAlternates[i][0], valueAndAlternates[i][j], aliasAddAction); 1300 } 1301 } 1302 return this; 1303 } 1304 addValueAlias(String value, String valueAlias, AliasAddAction aliasAddAction)1305 public void addValueAlias(String value, String valueAlias, AliasAddAction aliasAddAction) { 1306 List result = (List) toValueAliases.get(value); 1307 if (result == null) { 1308 switch (aliasAddAction) { 1309 case IGNORE_IF_MISSING: 1310 return; 1311 case REQUIRE_MAIN_ALIAS: 1312 throw new IllegalArgumentException( 1313 "Can't add alias for mising value: " + value); 1314 case ADD_MAIN_ALIAS: 1315 toValueAliases.put(value, result = new ArrayList(0)); 1316 break; 1317 } 1318 } 1319 addUnique(value, result); 1320 addUnique(valueAlias, result); 1321 } 1322 1323 @Override _getValueAliases(String valueAlias, List result)1324 protected List _getValueAliases(String valueAlias, List result) { 1325 if (toValueAliases == null) _fixValueAliases(); 1326 List a = (List) toValueAliases.get(valueAlias); 1327 if (a != null) addAllUnique(a, result); 1328 return result; 1329 } 1330 _fixValueAliases()1331 protected void _fixValueAliases() { 1332 if (toValueAliases == null) toValueAliases = new HashMap(1); 1333 for (Iterator it = getAvailableValues().iterator(); it.hasNext(); ) { 1334 Object value = it.next(); 1335 _ensureValueInAliases(value); 1336 } 1337 } 1338 _ensureValueInAliases(Object value)1339 protected void _ensureValueInAliases(Object value) { 1340 List result = (List) toValueAliases.get(value); 1341 if (result == null) toValueAliases.put(value, result = new ArrayList(1)); 1342 addUnique(value, result); 1343 } 1344 swapFirst2ValueAliases()1345 public BaseProperty swapFirst2ValueAliases() { 1346 for (Iterator it = toValueAliases.keySet().iterator(); it.hasNext(); ) { 1347 List list = (List) toValueAliases.get(it.next()); 1348 if (list.size() < 2) continue; 1349 Object first = list.get(0); 1350 list.set(0, list.get(1)); 1351 list.set(1, first); 1352 } 1353 return this; 1354 } 1355 1356 /** 1357 * @param string 1358 * @return 1359 */ addName(String string)1360 public UnicodeProperty addName(String string) { 1361 throw new UnsupportedOperationException(); 1362 } 1363 } 1364 1365 public abstract static class SimpleProperty extends BaseProperty { 1366 LinkedHashSet values; 1367 1368 @Override addName(String alias)1369 public UnicodeProperty addName(String alias) { 1370 propertyAliases.add(alias); 1371 return this; 1372 } 1373 setValues(String valueAlias)1374 public SimpleProperty setValues(String valueAlias) { 1375 _addToValues(valueAlias, null); 1376 return this; 1377 } 1378 addAliases(String valueAlias, String... aliases)1379 public SimpleProperty addAliases(String valueAlias, String... aliases) { 1380 _addToValues(valueAlias, null); 1381 return this; 1382 } 1383 setValues(String[] valueAliases, String[] alternateValueAliases)1384 public SimpleProperty setValues(String[] valueAliases, String[] alternateValueAliases) { 1385 for (int i = 0; i < valueAliases.length; ++i) { 1386 if (valueAliases[i].equals(UNUSED)) continue; 1387 _addToValues( 1388 valueAliases[i], 1389 alternateValueAliases != null ? alternateValueAliases[i] : null); 1390 } 1391 return this; 1392 } 1393 setValues(List valueAliases)1394 public SimpleProperty setValues(List valueAliases) { 1395 this.values = new LinkedHashSet(valueAliases); 1396 for (Iterator it = this.values.iterator(); it.hasNext(); ) { 1397 _addToValues((String) it.next(), null); 1398 } 1399 return this; 1400 } 1401 1402 @Override _getAvailableValues(List result)1403 public List _getAvailableValues(List result) { 1404 if (values == null) _fillValues(); 1405 result.addAll(values); 1406 return result; 1407 } 1408 _fillValues()1409 protected void _fillValues() { 1410 List newvalues = (List) getUnicodeMap_internal().getAvailableValues(new ArrayList()); 1411 for (Iterator it = newvalues.iterator(); it.hasNext(); ) { 1412 _addToValues((String) it.next(), null); 1413 } 1414 } 1415 _addToValues(String item, String alias)1416 private void _addToValues(String item, String alias) { 1417 if (values == null) values = new LinkedHashSet(); 1418 if (toValueAliases == null) _fixValueAliases(); 1419 addUnique(item, values); 1420 _ensureValueInAliases(item); 1421 addValueAlias(item, alias, AliasAddAction.REQUIRE_MAIN_ALIAS); 1422 } 1423 /* public String _getVersion() { 1424 return version; 1425 } 1426 */ 1427 } 1428 1429 public static class UnicodeMapProperty extends BaseProperty { 1430 /* 1431 * Example of usage: 1432 * new UnicodeProperty.UnicodeMapProperty() { 1433 { 1434 unicodeMap = new UnicodeMap(); 1435 unicodeMap.setErrorOnReset(true); 1436 unicodeMap.put(0xD, "CR"); 1437 unicodeMap.put(0xA, "LF"); 1438 UnicodeProperty cat = getProperty("General_Category"); 1439 UnicodeSet temp = cat.getSet("Line_Separator") 1440 .addAll(cat.getSet("Paragraph_Separator")) 1441 .addAll(cat.getSet("Control")) 1442 .addAll(cat.getSet("Format")) 1443 .remove(0xD).remove(0xA).remove(0x200C).remove(0x200D); 1444 unicodeMap.putAll(temp, "Control"); 1445 UnicodeSet graphemeExtend = getProperty("Grapheme_Extend").getSet("true"); 1446 unicodeMap.putAll(graphemeExtend,"Extend"); 1447 UnicodeProperty hangul = getProperty("Hangul_Syllable_Type"); 1448 unicodeMap.putAll(hangul.getSet("L"),"L"); 1449 unicodeMap.putAll(hangul.getSet("V"),"V"); 1450 unicodeMap.putAll(hangul.getSet("T"),"T"); 1451 unicodeMap.putAll(hangul.getSet("LV"),"LV"); 1452 unicodeMap.putAll(hangul.getSet("LVT"),"LVT"); 1453 unicodeMap.setMissing("Other"); 1454 } 1455 }.setMain("Grapheme_Cluster_Break", "GCB", UnicodeProperty.ENUMERATED, version) 1456 */ 1457 protected UnicodeMap unicodeMap; 1458 1459 @Override _getUnicodeMap()1460 protected UnicodeMap _getUnicodeMap() { 1461 return unicodeMap; 1462 } 1463 set(UnicodeMap map)1464 public UnicodeMapProperty set(UnicodeMap map) { 1465 unicodeMap = map.freeze(); 1466 return this; 1467 } 1468 1469 @Override _getValue(int codepoint)1470 protected String _getValue(int codepoint) { 1471 return (String) unicodeMap.getValue(codepoint); 1472 } 1473 1474 /* protected List _getValueAliases(String valueAlias, List result) { 1475 if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result; 1476 result.add(valueAlias); 1477 return result; // no other aliases 1478 } 1479 */ @Override _getAvailableValues(List result)1480 protected List _getAvailableValues(List result) { 1481 unicodeMap.getAvailableValues(result); 1482 if (toValueAliases != null) { 1483 for (Object s : toValueAliases.keySet()) { 1484 if (!result.contains(s)) { 1485 result.add(s); 1486 } 1487 } 1488 } 1489 return result; 1490 } 1491 } 1492 isValidValue(String propertyValue)1493 public boolean isValidValue(String propertyValue) { 1494 if (isType(STRING_OR_MISC_MASK)) { 1495 return true; 1496 } 1497 Collection<String> values = getAvailableValues(); 1498 for (String valueAlias : values) { 1499 if (UnicodeProperty.compareNames(valueAlias, propertyValue) == 0) { 1500 return true; 1501 } 1502 for (String valueAlias2 : (Collection<String>) getValueAliases(valueAlias)) { 1503 if (UnicodeProperty.compareNames(valueAlias2, propertyValue) == 0) { 1504 return true; 1505 } 1506 } 1507 } 1508 return false; 1509 } 1510 getValueAliases()1511 public List<String> getValueAliases() { 1512 List<String> result = new ArrayList(); 1513 if (isType(STRING_OR_MISC_MASK)) { 1514 return result; 1515 } 1516 Collection<String> values = getAvailableValues(); 1517 for (String valueAlias : values) { 1518 UnicodeProperty.addAllUnique(getValueAliases(valueAlias), result); 1519 } 1520 result.removeAll(values); 1521 return result; 1522 } 1523 addUntested(UnicodeSet result, boolean uniformUnassigned)1524 public static UnicodeSet addUntested(UnicodeSet result, boolean uniformUnassigned) { 1525 if (uniformUnassigned && result.contains(UnicodeProperty.getSAMPLE_UNASSIGNED())) { 1526 result.addAll(UnicodeProperty.getUNASSIGNED()); 1527 } 1528 1529 if (result.contains(UnicodeProperty.SAMPLE_HIGH_SURROGATE)) { 1530 result.addAll(UnicodeProperty.HIGH_SURROGATES); 1531 } 1532 if (result.contains(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE)) { 1533 result.addAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES); 1534 } 1535 if (result.contains(UnicodeProperty.SAMPLE_LOW_SURROGATE)) { 1536 result.addAll(UnicodeProperty.LOW_SURROGATES); 1537 } 1538 1539 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA)) { 1540 result.addAll(UnicodeProperty.PRIVATE_USE_AREA); 1541 } 1542 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A)) { 1543 result.addAll(UnicodeProperty.PRIVATE_USE_AREA_A); 1544 } 1545 if (result.contains(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B)) { 1546 result.addAll(UnicodeProperty.PRIVATE_USE_AREA_B); 1547 } 1548 1549 return result; 1550 } 1551 addUntested(UnicodeMap result, boolean uniformUnassigned)1552 public static UnicodeMap addUntested(UnicodeMap result, boolean uniformUnassigned) { 1553 Object temp; 1554 if (uniformUnassigned 1555 && null != (temp = result.get(UnicodeProperty.getSAMPLE_UNASSIGNED()))) { 1556 result.putAll(UnicodeProperty.getUNASSIGNED(), temp); 1557 } 1558 1559 if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_SURROGATE))) { 1560 result.putAll(UnicodeProperty.HIGH_SURROGATES, temp); 1561 } 1562 if (null != (temp = result.get(UnicodeProperty.SAMPLE_HIGH_PRIVATE_USE_SURROGATE))) { 1563 result.putAll(UnicodeProperty.HIGH_PRIVATE_USE_SURROGATES, temp); 1564 } 1565 if (null != (temp = result.get(UnicodeProperty.SAMPLE_LOW_SURROGATE))) { 1566 result.putAll(UnicodeProperty.LOW_SURROGATES, temp); 1567 } 1568 1569 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA))) { 1570 result.putAll(UnicodeProperty.PRIVATE_USE_AREA, temp); 1571 } 1572 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_A))) { 1573 result.putAll(UnicodeProperty.PRIVATE_USE_AREA_A, temp); 1574 } 1575 if (null != (temp = result.get(UnicodeProperty.SAMPLE_PRIVATE_USE_AREA_B))) { 1576 result.putAll(UnicodeProperty.PRIVATE_USE_AREA_B, temp); 1577 } 1578 return result; 1579 } 1580 isDefault(int cp)1581 public boolean isDefault(int cp) { 1582 String value = getValue(cp); 1583 if (isType(STRING_OR_MISC_MASK)) { 1584 return equals(cp, value); 1585 } 1586 String defaultValue = getValue(getSAMPLE_UNASSIGNED()); 1587 return defaultValue == null ? value == null : defaultValue.equals(value); 1588 } 1589 hasUniformUnassigned()1590 public boolean hasUniformUnassigned() { 1591 return hasUniformUnassigned; 1592 } 1593 setUniformUnassigned(boolean hasUniformUnassigned)1594 protected UnicodeProperty setUniformUnassigned(boolean hasUniformUnassigned) { 1595 this.hasUniformUnassigned = hasUniformUnassigned; 1596 return this; 1597 } 1598 1599 public static class UnicodeSetProperty extends BaseProperty { 1600 protected UnicodeSet unicodeSet; 1601 private static final String[] YESNO_ARRAY = new String[] {"Yes", "No"}; 1602 private static final List YESNO = Arrays.asList(YESNO_ARRAY); 1603 set(UnicodeSet set)1604 public UnicodeSetProperty set(UnicodeSet set) { 1605 unicodeSet = set.freeze(); 1606 return this; 1607 } 1608 set(String string)1609 public UnicodeSetProperty set(String string) { 1610 // TODO Auto-generated method stub 1611 return set(new UnicodeSet(string).freeze()); 1612 } 1613 1614 @Override _getValue(int codepoint)1615 protected String _getValue(int codepoint) { 1616 return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1]; 1617 } 1618 1619 @Override _getAvailableValues(List result)1620 protected List _getAvailableValues(List result) { 1621 return YESNO; 1622 } 1623 } 1624 1625 // private static class StringTransformProperty extends SimpleProperty { 1626 // Transform<String,String> transform; 1627 // 1628 // public StringTransformProperty(Transform<String,String> transform, boolean 1629 // hasUniformUnassigned) { 1630 // this.transform = transform; 1631 // setUniformUnassigned(hasUniformUnassigned); 1632 // } 1633 // protected String _getValue(int codepoint) { 1634 // return transform.transform(UTF16.valueOf(codepoint)); 1635 // } 1636 // } 1637 // 1638 // private static class CodepointTransformProperty extends SimpleProperty { 1639 // Transform<Integer,String> transform; 1640 // 1641 // public CodepointTransformProperty(Transform<Integer,String> transform, boolean 1642 // hasUniformUnassigned) { 1643 // this.transform = transform; 1644 // setUniformUnassigned(hasUniformUnassigned); 1645 // } 1646 // protected String _getValue(int codepoint) { 1647 // return transform.transform(codepoint); 1648 // } 1649 // } 1650 } 1651