xref: /aosp_15_r20/external/cronet/third_party/icu/source/common/loadednormalizer2impl.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2014, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * loadednormalizer2impl.cpp
9 *
10 * created on: 2014sep03
11 * created by: Markus W. Scherer
12 */
13 
14 #include "unicode/utypes.h"
15 
16 #if !UCONFIG_NO_NORMALIZATION
17 
18 #include "unicode/udata.h"
19 #include "unicode/localpointer.h"
20 #include "unicode/normalizer2.h"
21 #include "unicode/ucptrie.h"
22 #include "unicode/unistr.h"
23 #include "unicode/unorm.h"
24 #include "cstring.h"
25 #include "mutex.h"
26 #include "norm2allmodes.h"
27 #include "normalizer2impl.h"
28 #include "uassert.h"
29 #include "ucln_cmn.h"
30 #include "uhash.h"
31 
32 U_NAMESPACE_BEGIN
33 
34 class LoadedNormalizer2Impl : public Normalizer2Impl {
35 public:
LoadedNormalizer2Impl()36     LoadedNormalizer2Impl() : memory(nullptr), ownedTrie(nullptr) {}
37     virtual ~LoadedNormalizer2Impl();
38 
39     void load(const char *packageName, const char *name, UErrorCode &errorCode);
40 
41 private:
42     static UBool U_CALLCONV
43     isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
44 
45     UDataMemory *memory;
46     UCPTrie *ownedTrie;
47 };
48 
~LoadedNormalizer2Impl()49 LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
50     udata_close(memory);
51     ucptrie_close(ownedTrie);
52 }
53 
54 UBool U_CALLCONV
isAcceptable(void *,const char *,const char *,const UDataInfo * pInfo)55 LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
56                                     const char * /* type */, const char * /*name*/,
57                                     const UDataInfo *pInfo) {
58     if(
59         pInfo->size>=20 &&
60         pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
61         pInfo->charsetFamily==U_CHARSET_FAMILY &&
62         pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
63         pInfo->dataFormat[1]==0x72 &&
64         pInfo->dataFormat[2]==0x6d &&
65         pInfo->dataFormat[3]==0x32 &&
66         pInfo->formatVersion[0]==4
67     ) {
68         // Normalizer2Impl *me=(Normalizer2Impl *)context;
69         // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
70         return true;
71     } else {
72         return false;
73     }
74 }
75 
76 void
load(const char * packageName,const char * name,UErrorCode & errorCode)77 LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
78     if(U_FAILURE(errorCode)) {
79         return;
80     }
81     memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
82     if(U_FAILURE(errorCode)) {
83         return;
84     }
85     const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
86     const int32_t *inIndexes=(const int32_t *)inBytes;
87     int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
88     if(indexesLength<=IX_MIN_LCCC_CP) {
89         errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
90         return;
91     }
92 
93     int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
94     int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
95     ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
96                                      inBytes+offset, nextOffset-offset, nullptr,
97                                      &errorCode);
98     if(U_FAILURE(errorCode)) {
99         return;
100     }
101 
102     offset=nextOffset;
103     nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
104     const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
105 
106     // smallFCD: new in formatVersion 2
107     offset=nextOffset;
108     const uint8_t *inSmallFCD=inBytes+offset;
109 
110     init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
111 }
112 
113 // instance cache ---------------------------------------------------------- ***
114 
115 Norm2AllModes *
createInstance(const char * packageName,const char * name,UErrorCode & errorCode)116 Norm2AllModes::createInstance(const char *packageName,
117                               const char *name,
118                               UErrorCode &errorCode) {
119     if(U_FAILURE(errorCode)) {
120         return nullptr;
121     }
122     LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
123     if(impl==nullptr) {
124         errorCode=U_MEMORY_ALLOCATION_ERROR;
125         return nullptr;
126     }
127     impl->load(packageName, name, errorCode);
128     return createInstance(impl, errorCode);
129 }
130 
131 U_CDECL_BEGIN
132 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
133 U_CDECL_END
134 
135 #if !NORM2_HARDCODE_NFC_DATA
136 static Norm2AllModes *nfcSingleton;
137 static icu::UInitOnce nfcInitOnce {};
138 #endif
139 
140 static Norm2AllModes *nfkcSingleton;
141 static icu::UInitOnce nfkcInitOnce {};
142 
143 static Norm2AllModes *nfkc_cfSingleton;
144 static icu::UInitOnce nfkc_cfInitOnce {};
145 
146 static Norm2AllModes *nfkc_scfSingleton;
147 static icu::UInitOnce nfkc_scfInitOnce {};
148 
149 static UHashtable    *cache=nullptr;
150 
151 // UInitOnce singleton initialization function
initSingletons(const char * what,UErrorCode & errorCode)152 static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
153 #if !NORM2_HARDCODE_NFC_DATA
154     if (uprv_strcmp(what, "nfc") == 0) {
155         nfcSingleton    = Norm2AllModes::createInstance(nullptr, "nfc", errorCode);
156     } else
157 #endif
158     if (uprv_strcmp(what, "nfkc") == 0) {
159         nfkcSingleton    = Norm2AllModes::createInstance(nullptr, "nfkc", errorCode);
160     } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
161         nfkc_cfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_cf", errorCode);
162     } else if (uprv_strcmp(what, "nfkc_scf") == 0) {
163         nfkc_scfSingleton = Norm2AllModes::createInstance(nullptr, "nfkc_scf", errorCode);
164     } else {
165         UPRV_UNREACHABLE_EXIT;   // Unknown singleton
166     }
167     ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
168 }
169 
170 U_CDECL_BEGIN
171 
deleteNorm2AllModes(void * allModes)172 static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
173     delete (Norm2AllModes *)allModes;
174 }
175 
uprv_loaded_normalizer2_cleanup()176 static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
177 #if !NORM2_HARDCODE_NFC_DATA
178     delete nfcSingleton;
179     nfcSingleton = nullptr;
180     nfcInitOnce.reset();
181 #endif
182 
183     delete nfkcSingleton;
184     nfkcSingleton = nullptr;
185     nfkcInitOnce.reset();
186 
187     delete nfkc_cfSingleton;
188     nfkc_cfSingleton = nullptr;
189     nfkc_cfInitOnce.reset();
190 
191     delete nfkc_scfSingleton;
192     nfkc_scfSingleton = nullptr;
193     nfkc_scfInitOnce.reset();
194 
195     uhash_close(cache);
196     cache=nullptr;
197     return true;
198 }
199 
200 U_CDECL_END
201 
202 #if !NORM2_HARDCODE_NFC_DATA
203 const Norm2AllModes *
getNFCInstance(UErrorCode & errorCode)204 Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
205     if(U_FAILURE(errorCode)) { return nullptr; }
206     umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
207     return nfcSingleton;
208 }
209 #endif
210 
211 const Norm2AllModes *
getNFKCInstance(UErrorCode & errorCode)212 Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
213     if(U_FAILURE(errorCode)) { return nullptr; }
214     umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
215     return nfkcSingleton;
216 }
217 
218 const Norm2AllModes *
getNFKC_CFInstance(UErrorCode & errorCode)219 Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
220     if(U_FAILURE(errorCode)) { return nullptr; }
221     umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
222     return nfkc_cfSingleton;
223 }
224 
225 const Norm2AllModes *
getNFKC_SCFInstance(UErrorCode & errorCode)226 Norm2AllModes::getNFKC_SCFInstance(UErrorCode &errorCode) {
227     if(U_FAILURE(errorCode)) { return nullptr; }
228     umtx_initOnce(nfkc_scfInitOnce, &initSingletons, "nfkc_scf", errorCode);
229     return nfkc_scfSingleton;
230 }
231 
232 #if !NORM2_HARDCODE_NFC_DATA
233 const Normalizer2 *
getNFCInstance(UErrorCode & errorCode)234 Normalizer2::getNFCInstance(UErrorCode &errorCode) {
235     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
236     return allModes!=nullptr ? &allModes->comp : nullptr;
237 }
238 
239 const Normalizer2 *
getNFDInstance(UErrorCode & errorCode)240 Normalizer2::getNFDInstance(UErrorCode &errorCode) {
241     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
242     return allModes!=nullptr ? &allModes->decomp : nullptr;
243 }
244 
getFCDInstance(UErrorCode & errorCode)245 const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
246     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
247     return allModes!=nullptr ? &allModes->fcd : nullptr;
248 }
249 
getFCCInstance(UErrorCode & errorCode)250 const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
251     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
252     return allModes!=nullptr ? &allModes->fcc : nullptr;
253 }
254 
255 const Normalizer2Impl *
getNFCImpl(UErrorCode & errorCode)256 Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
257     const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
258     return allModes!=nullptr ? allModes->impl : nullptr;
259 }
260 #endif
261 
262 const Normalizer2 *
getNFKCInstance(UErrorCode & errorCode)263 Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
264     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
265     return allModes!=nullptr ? &allModes->comp : nullptr;
266 }
267 
268 const Normalizer2 *
getNFKDInstance(UErrorCode & errorCode)269 Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
270     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
271     return allModes!=nullptr ? &allModes->decomp : nullptr;
272 }
273 
274 const Normalizer2 *
getNFKCCasefoldInstance(UErrorCode & errorCode)275 Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
276     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
277     return allModes!=nullptr ? &allModes->comp : nullptr;
278 }
279 
280 const Normalizer2 *
getNFKCSimpleCasefoldInstance(UErrorCode & errorCode)281 Normalizer2::getNFKCSimpleCasefoldInstance(UErrorCode &errorCode) {
282     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
283     return allModes!=nullptr ? &allModes->comp : nullptr;
284 }
285 
286 const Normalizer2 *
getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode & errorCode)287 Normalizer2::getInstance(const char *packageName,
288                          const char *name,
289                          UNormalization2Mode mode,
290                          UErrorCode &errorCode) {
291     if(U_FAILURE(errorCode)) {
292         return nullptr;
293     }
294     if(name==nullptr || *name==0) {
295         errorCode=U_ILLEGAL_ARGUMENT_ERROR;
296         return nullptr;
297     }
298     const Norm2AllModes *allModes=nullptr;
299     if(packageName==nullptr) {
300         if(0==uprv_strcmp(name, "nfc")) {
301             allModes=Norm2AllModes::getNFCInstance(errorCode);
302         } else if(0==uprv_strcmp(name, "nfkc")) {
303             allModes=Norm2AllModes::getNFKCInstance(errorCode);
304         } else if(0==uprv_strcmp(name, "nfkc_cf")) {
305             allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
306         } else if(0==uprv_strcmp(name, "nfkc_scf")) {
307             allModes=Norm2AllModes::getNFKC_SCFInstance(errorCode);
308         }
309     }
310     if(allModes==nullptr && U_SUCCESS(errorCode)) {
311         {
312             Mutex lock;
313             if(cache!=nullptr) {
314                 allModes=(Norm2AllModes *)uhash_get(cache, name);
315             }
316         }
317         if(allModes==nullptr) {
318             ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
319             LocalPointer<Norm2AllModes> localAllModes(
320                 Norm2AllModes::createInstance(packageName, name, errorCode));
321             if(U_SUCCESS(errorCode)) {
322                 Mutex lock;
323                 if(cache==nullptr) {
324                     cache=uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &errorCode);
325                     if(U_FAILURE(errorCode)) {
326                         return nullptr;
327                     }
328                     uhash_setKeyDeleter(cache, uprv_free);
329                     uhash_setValueDeleter(cache, deleteNorm2AllModes);
330                 }
331                 void *temp=uhash_get(cache, name);
332                 if(temp==nullptr) {
333                     int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
334                     char *nameCopy=(char *)uprv_malloc(keyLength);
335                     if(nameCopy==nullptr) {
336                         errorCode=U_MEMORY_ALLOCATION_ERROR;
337                         return nullptr;
338                     }
339                     uprv_memcpy(nameCopy, name, keyLength);
340                     allModes=localAllModes.getAlias();
341                     uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
342                 } else {
343                     // race condition
344                     allModes=(Norm2AllModes *)temp;
345                 }
346             }
347         }
348     }
349     if(allModes!=nullptr && U_SUCCESS(errorCode)) {
350         switch(mode) {
351         case UNORM2_COMPOSE:
352             return &allModes->comp;
353         case UNORM2_DECOMPOSE:
354             return &allModes->decomp;
355         case UNORM2_FCD:
356             return &allModes->fcd;
357         case UNORM2_COMPOSE_CONTIGUOUS:
358             return &allModes->fcc;
359         default:
360             break;  // do nothing
361         }
362     }
363     return nullptr;
364 }
365 
366 const Normalizer2 *
getInstance(UNormalizationMode mode,UErrorCode & errorCode)367 Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
368     if(U_FAILURE(errorCode)) {
369         return nullptr;
370     }
371     switch(mode) {
372     case UNORM_NFD:
373         return Normalizer2::getNFDInstance(errorCode);
374     case UNORM_NFKD:
375         return Normalizer2::getNFKDInstance(errorCode);
376     case UNORM_NFC:
377         return Normalizer2::getNFCInstance(errorCode);
378     case UNORM_NFKC:
379         return Normalizer2::getNFKCInstance(errorCode);
380     case UNORM_FCD:
381         return getFCDInstance(errorCode);
382     default:  // UNORM_NONE
383         return getNoopInstance(errorCode);
384     }
385 }
386 
387 const Normalizer2Impl *
getNFKCImpl(UErrorCode & errorCode)388 Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
389     const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
390     return allModes!=nullptr ? allModes->impl : nullptr;
391 }
392 
393 const Normalizer2Impl *
getNFKC_CFImpl(UErrorCode & errorCode)394 Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
395     const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
396     return allModes!=nullptr ? allModes->impl : nullptr;
397 }
398 
399 U_NAMESPACE_END
400 
401 // C API ------------------------------------------------------------------- ***
402 
403 U_NAMESPACE_USE
404 
405 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCInstance(UErrorCode * pErrorCode)406 unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
407     return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
408 }
409 
410 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKDInstance(UErrorCode * pErrorCode)411 unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
412     return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
413 }
414 
415 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCCasefoldInstance(UErrorCode * pErrorCode)416 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
417     return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
418 }
419 
420 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getNFKCSimpleCasefoldInstance(UErrorCode * pErrorCode)421 unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode) {
422     return (const UNormalizer2 *)Normalizer2::getNFKCSimpleCasefoldInstance(*pErrorCode);
423 }
424 
425 U_CAPI const UNormalizer2 * U_EXPORT2
unorm2_getInstance(const char * packageName,const char * name,UNormalization2Mode mode,UErrorCode * pErrorCode)426 unorm2_getInstance(const char *packageName,
427                    const char *name,
428                    UNormalization2Mode mode,
429                    UErrorCode *pErrorCode) {
430     return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
431 }
432 
433 U_CFUNC UNormalizationCheckResult
unorm_getQuickCheck(UChar32 c,UNormalizationMode mode)434 unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
435     if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
436         return UNORM_YES;
437     }
438     UErrorCode errorCode=U_ZERO_ERROR;
439     const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
440     if(U_SUCCESS(errorCode)) {
441         return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
442     } else {
443         return UNORM_MAYBE;
444     }
445 }
446 
447 #endif  // !UCONFIG_NO_NORMALIZATION
448