xref: /aosp_15_r20/external/icu/libicu/cts_headers/uniquecharstr.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // uniquecharstr.h
5 // created: 2020sep01 Frank Yung-Fong Tang
6 
7 #ifndef __UNIQUECHARSTR_H__
8 #define __UNIQUECHARSTR_H__
9 
10 #include "charstr.h"
11 #include "uassert.h"
12 #include "uhash.h"
13 #include "cmemory.h"
14 
15 U_NAMESPACE_BEGIN
16 
17 /**
18  * Stores NUL-terminated strings with duplicate elimination.
19  * Checks for unique UTF-16 string pointers and converts to invariant characters.
20  *
21  * Intended to be stack-allocated. Add strings, get a unique number for each,
22  * freeze the object, get a char * pointer for each string,
23  * call orphanCharStrings() to capture the string storage, and let this object go out of scope.
24  */
25 class UniqueCharStrings {
26 public:
UniqueCharStrings(UErrorCode & errorCode)27     UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
28         // Note: We hash on string contents but store stable char16_t * pointers.
29         // If the strings are stored in resource bundles which should be built with
30         // duplicate elimination, then we should be able to hash on just the pointer values.
31         uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
32         if (U_FAILURE(errorCode)) { return; }
33         strings = new CharString();
34         if (strings == nullptr) {
35             errorCode = U_MEMORY_ALLOCATION_ERROR;
36         }
37     }
~UniqueCharStrings()38     ~UniqueCharStrings() {
39         uhash_close(&map);
40         delete strings;
41     }
42 
43     /** Returns/orphans the CharString that contains all strings. */
orphanCharStrings()44     CharString *orphanCharStrings() {
45         CharString *result = strings;
46         strings = nullptr;
47         return result;
48     }
49 
50     /**
51      * Adds a NUL-terminated string and returns a unique number for it.
52      * The string must not change, nor move around in memory,
53      * while this UniqueCharStrings is in use.
54      *
55      * Best used with string data in a stable storage, such as strings returned
56      * by resource bundle functions.
57      */
add(const char16_t * p,UErrorCode & errorCode)58     int32_t add(const char16_t*p, UErrorCode &errorCode) {
59         if (U_FAILURE(errorCode)) { return -1; }
60         if (isFrozen) {
61             errorCode = U_NO_WRITE_PERMISSION;
62             return -1;
63         }
64         // The string points into the resource bundle.
65         int32_t oldIndex = uhash_geti(&map, p);
66         if (oldIndex != 0) {  // found duplicate
67             return oldIndex;
68         }
69         // Explicit NUL terminator for the previous string.
70         // The strings object is also terminated with one implicit NUL.
71         strings->append(0, errorCode);
72         int32_t newIndex = strings->length();
73         strings->appendInvariantChars(p, u_strlen(p), errorCode);
74         uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
75         return newIndex;
76     }
77 
78     /**
79      * Adds a unicode string by value and returns a unique number for it.
80      */
addByValue(UnicodeString s,UErrorCode & errorCode)81     int32_t addByValue(UnicodeString s, UErrorCode &errorCode) {
82         if (U_FAILURE(errorCode)) { return -1; }
83         if (isFrozen) {
84             errorCode = U_NO_WRITE_PERMISSION;
85             return -1;
86         }
87         int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());
88         if (oldIndex != 0) {  // found duplicate
89             return oldIndex;
90         }
91         // We need to store the string content of the UnicodeString.
92         UnicodeString *key = keyStore.create(s);
93         if (key == nullptr) {
94             errorCode = U_MEMORY_ALLOCATION_ERROR;
95             return -1;
96         }
97         return add(key->getTerminatedBuffer(), errorCode);
98     }
99 
freeze()100     void freeze() { isFrozen = true; }
101 
102     /**
103      * Returns a string pointer for its unique number, if this object is frozen.
104      * Otherwise nullptr.
105      */
get(int32_t i)106     const char *get(int32_t i) const {
107         U_ASSERT(isFrozen);
108         return isFrozen && i > 0 ? strings->data() + i : nullptr;
109     }
110 
111 private:
112     UHashtable map;
113     CharString *strings;
114     MemoryPool<UnicodeString> keyStore;
115     bool isFrozen = false;
116 };
117 
118 U_NAMESPACE_END
119 
120 #endif  // __UNIQUECHARSTR_H__
121