xref: /aosp_15_r20/external/cldr/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java (revision 912701f9769bb47905792267661f0baf2b85bed5)
1 package org.unicode.cldr.util;
2 
3 import java.util.Collection;
4 import java.util.Set;
5 import java.util.stream.Collectors;
6 
7 public class CharUtilities {
8     /**
9      * Simple wrapper for CharSequence
10      *
11      * @author markdavis
12      */
13     public static class CharSourceWrapper<T extends CharSequence> implements CharSource {
14         protected T source;
15 
CharSourceWrapper(T source)16         public CharSourceWrapper(T source) {
17             this.source = source;
18         }
19 
20         @Override
hasCharAt(int index)21         public boolean hasCharAt(int index) {
22             return index < source.length();
23         }
24 
25         @Override
charAt(int index)26         public char charAt(int index) {
27             return source.charAt(index);
28         }
29 
30         @Override
toSourceOffset(int index)31         public int toSourceOffset(int index) {
32             return index;
33         }
34 
35         @Override
sublist(int start, int end)36         public CharSource sublist(int start, int end) {
37             return new CharSourceWrapper<>(source.subSequence(start, end));
38         }
39 
40         @Override
sublist(int start)41         public CharSource sublist(int start) {
42             return new CharSourceWrapper<>(source.subSequence(start, source.length()));
43         }
44 
45         @Override
getKnownLength()46         public int getKnownLength() {
47             return source.length();
48         }
49 
50         @Override
subSequence(int start, int end)51         public CharSequence subSequence(int start, int end) {
52             return source.subSequence(start, end);
53         }
54 
55         @Override
toString()56         public String toString() {
57             return source.toString();
58         }
59 
sourceSubSequence(int start, int end)60         public CharSequence sourceSubSequence(int start, int end) {
61             return source.subSequence(toSourceOffset(start), toSourceOffset(end));
62         }
63 
64         @Override
fromSourceOffset(int index)65         public int fromSourceOffset(int index) {
66             return index;
67         }
68 
69         @Override
setStart(int index)70         public CharSource setStart(int index) {
71             return this;
72         }
73 
74         @Override
getStart()75         public int getStart() {
76             return 0;
77         }
78     }
79 
80     /**
81      * Return the code point order of two CharSequences. If the text has isolated surrogates, they
82      * will not sort correctly.
83      *
84      * @param text1
85      * @param text2
86      * @return
87      */
compare(CharSource text1, CharSource text2)88     public static int compare(CharSource text1, CharSource text2) {
89         int i1 = 0;
90         int i2 = 0;
91 
92         while (true) {
93             // handle running out of room
94             if (!text1.hasCharAt(i1)) {
95                 if (text2.hasCharAt(i2)) {
96                     return 0;
97                 }
98                 return -1;
99             } else if (text2.hasCharAt(i2)) {
100                 return 1;
101             }
102             int cp1 = text1.charAt(i1++);
103             int cp2 = text2.charAt(i2++);
104             // if they are different, do a fixup
105 
106             if (cp1 != cp2) {
107                 return (cp1 + utf16Fixup[cp1 >> 11]) - (cp2 + utf16Fixup[cp2 >> 11]);
108             }
109         }
110     }
111 
112     private static final char utf16Fixup[] = {
113         0, 0, 0, 0, 0, 0, 0, 0,
114         0, 0, 0, 0, 0, 0, 0, 0,
115         0, 0, 0, 0, 0, 0, 0, 0,
116         0, 0, 0, 0x2000, 0xf800, 0xf800, 0xf800, 0xf800
117     };
118 
119     /**
120      * Return the code point order of two CharSequences. If the text has isolated surrogates, they
121      * will not sort correctly.
122      *
123      * @param text1
124      * @param text2
125      * @return
126      */
compare(CharSequence text1, CharSequence text2)127     public static int compare(CharSequence text1, CharSequence text2) {
128         int i1 = 0;
129         int i2 = 0;
130 
131         while (true) {
132             // handle running out of room
133             if (i1 >= text1.length()) {
134                 if (i2 >= text2.length()) {
135                     return 0;
136                 }
137                 return -1;
138             } else if (i2 >= text2.length()) {
139                 return 1;
140             }
141             int cp1 = text1.charAt(i1++);
142             int cp2 = text2.charAt(i2++);
143             // if they are different, do a fixup
144 
145             if (cp1 != cp2) {
146                 return (cp1 + utf16Fixup[cp1 >> 11]) - (cp2 + utf16Fixup[cp2 >> 11]);
147             }
148         }
149     }
150 
151     /** intern each element in the string and return a new unmodifiable Set */
internImmutableSet(Collection<String> s)152     public static Set<String> internImmutableSet(Collection<String> s) {
153         return s.stream().map(String::intern).collect(Collectors.toUnmodifiableSet());
154     }
155 }
156