1*0e209d39SAndroid Build Coastguard Worker // © 2020 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker 4*0e209d39SAndroid Build Coastguard Worker // uniquecharstr.h 5*0e209d39SAndroid Build Coastguard Worker // created: 2020sep01 Frank Yung-Fong Tang 6*0e209d39SAndroid Build Coastguard Worker 7*0e209d39SAndroid Build Coastguard Worker #ifndef __UNIQUECHARSTR_H__ 8*0e209d39SAndroid Build Coastguard Worker #define __UNIQUECHARSTR_H__ 9*0e209d39SAndroid Build Coastguard Worker 10*0e209d39SAndroid Build Coastguard Worker #include "charstr.h" 11*0e209d39SAndroid Build Coastguard Worker #include "uassert.h" 12*0e209d39SAndroid Build Coastguard Worker #include "uhash.h" 13*0e209d39SAndroid Build Coastguard Worker #include "cmemory.h" 14*0e209d39SAndroid Build Coastguard Worker 15*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker /** 18*0e209d39SAndroid Build Coastguard Worker * Stores NUL-terminated strings with duplicate elimination. 19*0e209d39SAndroid Build Coastguard Worker * Checks for unique UTF-16 string pointers and converts to invariant characters. 20*0e209d39SAndroid Build Coastguard Worker * 21*0e209d39SAndroid Build Coastguard Worker * Intended to be stack-allocated. Add strings, get a unique number for each, 22*0e209d39SAndroid Build Coastguard Worker * freeze the object, get a char * pointer for each string, 23*0e209d39SAndroid Build Coastguard Worker * call orphanCharStrings() to capture the string storage, and let this object go out of scope. 24*0e209d39SAndroid Build Coastguard Worker */ 25*0e209d39SAndroid Build Coastguard Worker class UniqueCharStrings { 26*0e209d39SAndroid Build Coastguard Worker public: UniqueCharStrings(UErrorCode & errorCode)27*0e209d39SAndroid Build Coastguard Worker UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { 28*0e209d39SAndroid Build Coastguard Worker // Note: We hash on string contents but store stable char16_t * pointers. 29*0e209d39SAndroid Build Coastguard Worker // If the strings are stored in resource bundles which should be built with 30*0e209d39SAndroid Build Coastguard Worker // duplicate elimination, then we should be able to hash on just the pointer values. 31*0e209d39SAndroid Build Coastguard Worker uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode); 32*0e209d39SAndroid Build Coastguard Worker if (U_FAILURE(errorCode)) { return; } 33*0e209d39SAndroid Build Coastguard Worker strings = new CharString(); 34*0e209d39SAndroid Build Coastguard Worker if (strings == nullptr) { 35*0e209d39SAndroid Build Coastguard Worker errorCode = U_MEMORY_ALLOCATION_ERROR; 36*0e209d39SAndroid Build Coastguard Worker } 37*0e209d39SAndroid Build Coastguard Worker } ~UniqueCharStrings()38*0e209d39SAndroid Build Coastguard Worker ~UniqueCharStrings() { 39*0e209d39SAndroid Build Coastguard Worker uhash_close(&map); 40*0e209d39SAndroid Build Coastguard Worker delete strings; 41*0e209d39SAndroid Build Coastguard Worker } 42*0e209d39SAndroid Build Coastguard Worker 43*0e209d39SAndroid Build Coastguard Worker /** Returns/orphans the CharString that contains all strings. */ orphanCharStrings()44*0e209d39SAndroid Build Coastguard Worker CharString *orphanCharStrings() { 45*0e209d39SAndroid Build Coastguard Worker CharString *result = strings; 46*0e209d39SAndroid Build Coastguard Worker strings = nullptr; 47*0e209d39SAndroid Build Coastguard Worker return result; 48*0e209d39SAndroid Build Coastguard Worker } 49*0e209d39SAndroid Build Coastguard Worker 50*0e209d39SAndroid Build Coastguard Worker /** 51*0e209d39SAndroid Build Coastguard Worker * Adds a NUL-terminated string and returns a unique number for it. 52*0e209d39SAndroid Build Coastguard Worker * The string must not change, nor move around in memory, 53*0e209d39SAndroid Build Coastguard Worker * while this UniqueCharStrings is in use. 54*0e209d39SAndroid Build Coastguard Worker * 55*0e209d39SAndroid Build Coastguard Worker * Best used with string data in a stable storage, such as strings returned 56*0e209d39SAndroid Build Coastguard Worker * by resource bundle functions. 57*0e209d39SAndroid Build Coastguard Worker */ add(const char16_t * p,UErrorCode & errorCode)58*0e209d39SAndroid Build Coastguard Worker int32_t add(const char16_t*p, UErrorCode &errorCode) { 59*0e209d39SAndroid Build Coastguard Worker if (U_FAILURE(errorCode)) { return -1; } 60*0e209d39SAndroid Build Coastguard Worker if (isFrozen) { 61*0e209d39SAndroid Build Coastguard Worker errorCode = U_NO_WRITE_PERMISSION; 62*0e209d39SAndroid Build Coastguard Worker return -1; 63*0e209d39SAndroid Build Coastguard Worker } 64*0e209d39SAndroid Build Coastguard Worker // The string points into the resource bundle. 65*0e209d39SAndroid Build Coastguard Worker int32_t oldIndex = uhash_geti(&map, p); 66*0e209d39SAndroid Build Coastguard Worker if (oldIndex != 0) { // found duplicate 67*0e209d39SAndroid Build Coastguard Worker return oldIndex; 68*0e209d39SAndroid Build Coastguard Worker } 69*0e209d39SAndroid Build Coastguard Worker // Explicit NUL terminator for the previous string. 70*0e209d39SAndroid Build Coastguard Worker // The strings object is also terminated with one implicit NUL. 71*0e209d39SAndroid Build Coastguard Worker strings->append(0, errorCode); 72*0e209d39SAndroid Build Coastguard Worker int32_t newIndex = strings->length(); 73*0e209d39SAndroid Build Coastguard Worker strings->appendInvariantChars(p, u_strlen(p), errorCode); 74*0e209d39SAndroid Build Coastguard Worker uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode); 75*0e209d39SAndroid Build Coastguard Worker return newIndex; 76*0e209d39SAndroid Build Coastguard Worker } 77*0e209d39SAndroid Build Coastguard Worker 78*0e209d39SAndroid Build Coastguard Worker /** 79*0e209d39SAndroid Build Coastguard Worker * Adds a unicode string by value and returns a unique number for it. 80*0e209d39SAndroid Build Coastguard Worker */ addByValue(UnicodeString s,UErrorCode & errorCode)81*0e209d39SAndroid Build Coastguard Worker int32_t addByValue(UnicodeString s, UErrorCode &errorCode) { 82*0e209d39SAndroid Build Coastguard Worker if (U_FAILURE(errorCode)) { return -1; } 83*0e209d39SAndroid Build Coastguard Worker if (isFrozen) { 84*0e209d39SAndroid Build Coastguard Worker errorCode = U_NO_WRITE_PERMISSION; 85*0e209d39SAndroid Build Coastguard Worker return -1; 86*0e209d39SAndroid Build Coastguard Worker } 87*0e209d39SAndroid Build Coastguard Worker int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer()); 88*0e209d39SAndroid Build Coastguard Worker if (oldIndex != 0) { // found duplicate 89*0e209d39SAndroid Build Coastguard Worker return oldIndex; 90*0e209d39SAndroid Build Coastguard Worker } 91*0e209d39SAndroid Build Coastguard Worker // We need to store the string content of the UnicodeString. 92*0e209d39SAndroid Build Coastguard Worker UnicodeString *key = keyStore.create(s); 93*0e209d39SAndroid Build Coastguard Worker if (key == nullptr) { 94*0e209d39SAndroid Build Coastguard Worker errorCode = U_MEMORY_ALLOCATION_ERROR; 95*0e209d39SAndroid Build Coastguard Worker return -1; 96*0e209d39SAndroid Build Coastguard Worker } 97*0e209d39SAndroid Build Coastguard Worker return add(key->getTerminatedBuffer(), errorCode); 98*0e209d39SAndroid Build Coastguard Worker } 99*0e209d39SAndroid Build Coastguard Worker freeze()100*0e209d39SAndroid Build Coastguard Worker void freeze() { isFrozen = true; } 101*0e209d39SAndroid Build Coastguard Worker 102*0e209d39SAndroid Build Coastguard Worker /** 103*0e209d39SAndroid Build Coastguard Worker * Returns a string pointer for its unique number, if this object is frozen. 104*0e209d39SAndroid Build Coastguard Worker * Otherwise nullptr. 105*0e209d39SAndroid Build Coastguard Worker */ get(int32_t i)106*0e209d39SAndroid Build Coastguard Worker const char *get(int32_t i) const { 107*0e209d39SAndroid Build Coastguard Worker U_ASSERT(isFrozen); 108*0e209d39SAndroid Build Coastguard Worker return isFrozen && i > 0 ? strings->data() + i : nullptr; 109*0e209d39SAndroid Build Coastguard Worker } 110*0e209d39SAndroid Build Coastguard Worker 111*0e209d39SAndroid Build Coastguard Worker private: 112*0e209d39SAndroid Build Coastguard Worker UHashtable map; 113*0e209d39SAndroid Build Coastguard Worker CharString *strings; 114*0e209d39SAndroid Build Coastguard Worker MemoryPool<UnicodeString> keyStore; 115*0e209d39SAndroid Build Coastguard Worker bool isFrozen = false; 116*0e209d39SAndroid Build Coastguard Worker }; 117*0e209d39SAndroid Build Coastguard Worker 118*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 119*0e209d39SAndroid Build Coastguard Worker 120*0e209d39SAndroid Build Coastguard Worker #endif // __UNIQUECHARSTR_H__ 121