xref: /aosp_15_r20/external/icu/libicu/cts_headers/uniquecharstr.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2020 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker 
4*0e209d39SAndroid Build Coastguard Worker // uniquecharstr.h
5*0e209d39SAndroid Build Coastguard Worker // created: 2020sep01 Frank Yung-Fong Tang
6*0e209d39SAndroid Build Coastguard Worker 
7*0e209d39SAndroid Build Coastguard Worker #ifndef __UNIQUECHARSTR_H__
8*0e209d39SAndroid Build Coastguard Worker #define __UNIQUECHARSTR_H__
9*0e209d39SAndroid Build Coastguard Worker 
10*0e209d39SAndroid Build Coastguard Worker #include "charstr.h"
11*0e209d39SAndroid Build Coastguard Worker #include "uassert.h"
12*0e209d39SAndroid Build Coastguard Worker #include "uhash.h"
13*0e209d39SAndroid Build Coastguard Worker #include "cmemory.h"
14*0e209d39SAndroid Build Coastguard Worker 
15*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker /**
18*0e209d39SAndroid Build Coastguard Worker  * Stores NUL-terminated strings with duplicate elimination.
19*0e209d39SAndroid Build Coastguard Worker  * Checks for unique UTF-16 string pointers and converts to invariant characters.
20*0e209d39SAndroid Build Coastguard Worker  *
21*0e209d39SAndroid Build Coastguard Worker  * Intended to be stack-allocated. Add strings, get a unique number for each,
22*0e209d39SAndroid Build Coastguard Worker  * freeze the object, get a char * pointer for each string,
23*0e209d39SAndroid Build Coastguard Worker  * call orphanCharStrings() to capture the string storage, and let this object go out of scope.
24*0e209d39SAndroid Build Coastguard Worker  */
25*0e209d39SAndroid Build Coastguard Worker class UniqueCharStrings {
26*0e209d39SAndroid Build Coastguard Worker public:
UniqueCharStrings(UErrorCode & errorCode)27*0e209d39SAndroid Build Coastguard Worker     UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
28*0e209d39SAndroid Build Coastguard Worker         // Note: We hash on string contents but store stable char16_t * pointers.
29*0e209d39SAndroid Build Coastguard Worker         // If the strings are stored in resource bundles which should be built with
30*0e209d39SAndroid Build Coastguard Worker         // duplicate elimination, then we should be able to hash on just the pointer values.
31*0e209d39SAndroid Build Coastguard Worker         uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
32*0e209d39SAndroid Build Coastguard Worker         if (U_FAILURE(errorCode)) { return; }
33*0e209d39SAndroid Build Coastguard Worker         strings = new CharString();
34*0e209d39SAndroid Build Coastguard Worker         if (strings == nullptr) {
35*0e209d39SAndroid Build Coastguard Worker             errorCode = U_MEMORY_ALLOCATION_ERROR;
36*0e209d39SAndroid Build Coastguard Worker         }
37*0e209d39SAndroid Build Coastguard Worker     }
~UniqueCharStrings()38*0e209d39SAndroid Build Coastguard Worker     ~UniqueCharStrings() {
39*0e209d39SAndroid Build Coastguard Worker         uhash_close(&map);
40*0e209d39SAndroid Build Coastguard Worker         delete strings;
41*0e209d39SAndroid Build Coastguard Worker     }
42*0e209d39SAndroid Build Coastguard Worker 
43*0e209d39SAndroid Build Coastguard Worker     /** Returns/orphans the CharString that contains all strings. */
orphanCharStrings()44*0e209d39SAndroid Build Coastguard Worker     CharString *orphanCharStrings() {
45*0e209d39SAndroid Build Coastguard Worker         CharString *result = strings;
46*0e209d39SAndroid Build Coastguard Worker         strings = nullptr;
47*0e209d39SAndroid Build Coastguard Worker         return result;
48*0e209d39SAndroid Build Coastguard Worker     }
49*0e209d39SAndroid Build Coastguard Worker 
50*0e209d39SAndroid Build Coastguard Worker     /**
51*0e209d39SAndroid Build Coastguard Worker      * Adds a NUL-terminated string and returns a unique number for it.
52*0e209d39SAndroid Build Coastguard Worker      * The string must not change, nor move around in memory,
53*0e209d39SAndroid Build Coastguard Worker      * while this UniqueCharStrings is in use.
54*0e209d39SAndroid Build Coastguard Worker      *
55*0e209d39SAndroid Build Coastguard Worker      * Best used with string data in a stable storage, such as strings returned
56*0e209d39SAndroid Build Coastguard Worker      * by resource bundle functions.
57*0e209d39SAndroid Build Coastguard Worker      */
add(const char16_t * p,UErrorCode & errorCode)58*0e209d39SAndroid Build Coastguard Worker     int32_t add(const char16_t*p, UErrorCode &errorCode) {
59*0e209d39SAndroid Build Coastguard Worker         if (U_FAILURE(errorCode)) { return -1; }
60*0e209d39SAndroid Build Coastguard Worker         if (isFrozen) {
61*0e209d39SAndroid Build Coastguard Worker             errorCode = U_NO_WRITE_PERMISSION;
62*0e209d39SAndroid Build Coastguard Worker             return -1;
63*0e209d39SAndroid Build Coastguard Worker         }
64*0e209d39SAndroid Build Coastguard Worker         // The string points into the resource bundle.
65*0e209d39SAndroid Build Coastguard Worker         int32_t oldIndex = uhash_geti(&map, p);
66*0e209d39SAndroid Build Coastguard Worker         if (oldIndex != 0) {  // found duplicate
67*0e209d39SAndroid Build Coastguard Worker             return oldIndex;
68*0e209d39SAndroid Build Coastguard Worker         }
69*0e209d39SAndroid Build Coastguard Worker         // Explicit NUL terminator for the previous string.
70*0e209d39SAndroid Build Coastguard Worker         // The strings object is also terminated with one implicit NUL.
71*0e209d39SAndroid Build Coastguard Worker         strings->append(0, errorCode);
72*0e209d39SAndroid Build Coastguard Worker         int32_t newIndex = strings->length();
73*0e209d39SAndroid Build Coastguard Worker         strings->appendInvariantChars(p, u_strlen(p), errorCode);
74*0e209d39SAndroid Build Coastguard Worker         uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
75*0e209d39SAndroid Build Coastguard Worker         return newIndex;
76*0e209d39SAndroid Build Coastguard Worker     }
77*0e209d39SAndroid Build Coastguard Worker 
78*0e209d39SAndroid Build Coastguard Worker     /**
79*0e209d39SAndroid Build Coastguard Worker      * Adds a unicode string by value and returns a unique number for it.
80*0e209d39SAndroid Build Coastguard Worker      */
addByValue(UnicodeString s,UErrorCode & errorCode)81*0e209d39SAndroid Build Coastguard Worker     int32_t addByValue(UnicodeString s, UErrorCode &errorCode) {
82*0e209d39SAndroid Build Coastguard Worker         if (U_FAILURE(errorCode)) { return -1; }
83*0e209d39SAndroid Build Coastguard Worker         if (isFrozen) {
84*0e209d39SAndroid Build Coastguard Worker             errorCode = U_NO_WRITE_PERMISSION;
85*0e209d39SAndroid Build Coastguard Worker             return -1;
86*0e209d39SAndroid Build Coastguard Worker         }
87*0e209d39SAndroid Build Coastguard Worker         int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());
88*0e209d39SAndroid Build Coastguard Worker         if (oldIndex != 0) {  // found duplicate
89*0e209d39SAndroid Build Coastguard Worker             return oldIndex;
90*0e209d39SAndroid Build Coastguard Worker         }
91*0e209d39SAndroid Build Coastguard Worker         // We need to store the string content of the UnicodeString.
92*0e209d39SAndroid Build Coastguard Worker         UnicodeString *key = keyStore.create(s);
93*0e209d39SAndroid Build Coastguard Worker         if (key == nullptr) {
94*0e209d39SAndroid Build Coastguard Worker             errorCode = U_MEMORY_ALLOCATION_ERROR;
95*0e209d39SAndroid Build Coastguard Worker             return -1;
96*0e209d39SAndroid Build Coastguard Worker         }
97*0e209d39SAndroid Build Coastguard Worker         return add(key->getTerminatedBuffer(), errorCode);
98*0e209d39SAndroid Build Coastguard Worker     }
99*0e209d39SAndroid Build Coastguard Worker 
freeze()100*0e209d39SAndroid Build Coastguard Worker     void freeze() { isFrozen = true; }
101*0e209d39SAndroid Build Coastguard Worker 
102*0e209d39SAndroid Build Coastguard Worker     /**
103*0e209d39SAndroid Build Coastguard Worker      * Returns a string pointer for its unique number, if this object is frozen.
104*0e209d39SAndroid Build Coastguard Worker      * Otherwise nullptr.
105*0e209d39SAndroid Build Coastguard Worker      */
get(int32_t i)106*0e209d39SAndroid Build Coastguard Worker     const char *get(int32_t i) const {
107*0e209d39SAndroid Build Coastguard Worker         U_ASSERT(isFrozen);
108*0e209d39SAndroid Build Coastguard Worker         return isFrozen && i > 0 ? strings->data() + i : nullptr;
109*0e209d39SAndroid Build Coastguard Worker     }
110*0e209d39SAndroid Build Coastguard Worker 
111*0e209d39SAndroid Build Coastguard Worker private:
112*0e209d39SAndroid Build Coastguard Worker     UHashtable map;
113*0e209d39SAndroid Build Coastguard Worker     CharString *strings;
114*0e209d39SAndroid Build Coastguard Worker     MemoryPool<UnicodeString> keyStore;
115*0e209d39SAndroid Build Coastguard Worker     bool isFrozen = false;
116*0e209d39SAndroid Build Coastguard Worker };
117*0e209d39SAndroid Build Coastguard Worker 
118*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
119*0e209d39SAndroid Build Coastguard Worker 
120*0e209d39SAndroid Build Coastguard Worker #endif  // __UNIQUECHARSTR_H__
121