1 package org.unicode.cldr.util; 2 3 import java.util.Collection; 4 import java.util.Set; 5 import java.util.stream.Collectors; 6 7 public class CharUtilities { 8 /** 9 * Simple wrapper for CharSequence 10 * 11 * @author markdavis 12 */ 13 public static class CharSourceWrapper<T extends CharSequence> implements CharSource { 14 protected T source; 15 CharSourceWrapper(T source)16 public CharSourceWrapper(T source) { 17 this.source = source; 18 } 19 20 @Override hasCharAt(int index)21 public boolean hasCharAt(int index) { 22 return index < source.length(); 23 } 24 25 @Override charAt(int index)26 public char charAt(int index) { 27 return source.charAt(index); 28 } 29 30 @Override toSourceOffset(int index)31 public int toSourceOffset(int index) { 32 return index; 33 } 34 35 @Override sublist(int start, int end)36 public CharSource sublist(int start, int end) { 37 return new CharSourceWrapper<>(source.subSequence(start, end)); 38 } 39 40 @Override sublist(int start)41 public CharSource sublist(int start) { 42 return new CharSourceWrapper<>(source.subSequence(start, source.length())); 43 } 44 45 @Override getKnownLength()46 public int getKnownLength() { 47 return source.length(); 48 } 49 50 @Override subSequence(int start, int end)51 public CharSequence subSequence(int start, int end) { 52 return source.subSequence(start, end); 53 } 54 55 @Override toString()56 public String toString() { 57 return source.toString(); 58 } 59 sourceSubSequence(int start, int end)60 public CharSequence sourceSubSequence(int start, int end) { 61 return source.subSequence(toSourceOffset(start), toSourceOffset(end)); 62 } 63 64 @Override fromSourceOffset(int index)65 public int fromSourceOffset(int index) { 66 return index; 67 } 68 69 @Override setStart(int index)70 public CharSource setStart(int index) { 71 return this; 72 } 73 74 @Override getStart()75 public int getStart() { 76 return 0; 77 } 78 } 79 80 /** 81 * Return the code point order of two CharSequences. If the text has isolated surrogates, they 82 * will not sort correctly. 83 * 84 * @param text1 85 * @param text2 86 * @return 87 */ compare(CharSource text1, CharSource text2)88 public static int compare(CharSource text1, CharSource text2) { 89 int i1 = 0; 90 int i2 = 0; 91 92 while (true) { 93 // handle running out of room 94 if (!text1.hasCharAt(i1)) { 95 if (text2.hasCharAt(i2)) { 96 return 0; 97 } 98 return -1; 99 } else if (text2.hasCharAt(i2)) { 100 return 1; 101 } 102 int cp1 = text1.charAt(i1++); 103 int cp2 = text2.charAt(i2++); 104 // if they are different, do a fixup 105 106 if (cp1 != cp2) { 107 return (cp1 + utf16Fixup[cp1 >> 11]) - (cp2 + utf16Fixup[cp2 >> 11]); 108 } 109 } 110 } 111 112 private static final char utf16Fixup[] = { 113 0, 0, 0, 0, 0, 0, 0, 0, 114 0, 0, 0, 0, 0, 0, 0, 0, 115 0, 0, 0, 0, 0, 0, 0, 0, 116 0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800 117 }; 118 119 /** 120 * Return the code point order of two CharSequences. If the text has isolated surrogates, they 121 * will not sort correctly. 122 * 123 * @param text1 124 * @param text2 125 * @return 126 */ compare(CharSequence text1, CharSequence text2)127 public static int compare(CharSequence text1, CharSequence text2) { 128 int i1 = 0; 129 int i2 = 0; 130 131 while (true) { 132 // handle running out of room 133 if (i1 >= text1.length()) { 134 if (i2 >= text2.length()) { 135 return 0; 136 } 137 return -1; 138 } else if (i2 >= text2.length()) { 139 return 1; 140 } 141 int cp1 = text1.charAt(i1++); 142 int cp2 = text2.charAt(i2++); 143 // if they are different, do a fixup 144 145 if (cp1 != cp2) { 146 return (cp1 + utf16Fixup[cp1 >> 11]) - (cp2 + utf16Fixup[cp2 >> 11]); 147 } 148 } 149 } 150 151 /** intern each element in the string and return a new unmodifiable Set */ internImmutableSet(Collection<String> s)152 public static Set<String> internImmutableSet(Collection<String> s) { 153 return s.stream().map(String::intern).collect(Collectors.toUnmodifiableSet()); 154 } 155 } 156