1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 1996-2015, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
8 * rulebasedcollator.cpp
9 *
10 * (replaced the former tblcoll.cpp)
11 *
12 * created on: 2012feb14 with new and old collation code
13 * created by: Markus W. Scherer
14 */
15
16 #include "unicode/utypes.h"
17
18 #if !UCONFIG_NO_COLLATION
19
20 #include "unicode/coll.h"
21 #include "unicode/coleitr.h"
22 #include "unicode/localpointer.h"
23 #include "unicode/locid.h"
24 #include "unicode/sortkey.h"
25 #include "unicode/tblcoll.h"
26 #include "unicode/ucol.h"
27 #include "unicode/uiter.h"
28 #include "unicode/uloc.h"
29 #include "unicode/uniset.h"
30 #include "unicode/unistr.h"
31 #include "unicode/usetiter.h"
32 #include "unicode/utf8.h"
33 #include "unicode/uversion.h"
34 #include "bocsu.h"
35 #include "bytesinkutil.h"
36 #include "charstr.h"
37 #include "cmemory.h"
38 #include "collation.h"
39 #include "collationcompare.h"
40 #include "collationdata.h"
41 #include "collationdatareader.h"
42 #include "collationfastlatin.h"
43 #include "collationiterator.h"
44 #include "collationkeys.h"
45 #include "collationroot.h"
46 #include "collationsets.h"
47 #include "collationsettings.h"
48 #include "collationtailoring.h"
49 #include "cstring.h"
50 #include "uassert.h"
51 #include "ucol_imp.h"
52 #include "uhash.h"
53 #include "uitercollationiterator.h"
54 #include "ulocimp.h"
55 #include "ustr_imp.h"
56 #include "utf16collationiterator.h"
57 #include "utf8collationiterator.h"
58 #include "uvectr64.h"
59
60 U_NAMESPACE_BEGIN
61
62 namespace {
63
64 class FixedSortKeyByteSink : public SortKeyByteSink {
65 public:
FixedSortKeyByteSink(char * dest,int32_t destCapacity)66 FixedSortKeyByteSink(char *dest, int32_t destCapacity)
67 : SortKeyByteSink(dest, destCapacity) {}
68 virtual ~FixedSortKeyByteSink();
69
70 private:
71 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
72 virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
73 };
74
~FixedSortKeyByteSink()75 FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
76
77 void
AppendBeyondCapacity(const char * bytes,int32_t,int32_t length)78 FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
79 // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
80 // Fill the buffer completely.
81 int32_t available = capacity_ - length;
82 if (available > 0) {
83 uprv_memcpy(buffer_ + length, bytes, available);
84 }
85 }
86
87 UBool
Resize(int32_t,int32_t)88 FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
89 return false;
90 }
91
92 } // namespace
93
94 // Not in an anonymous namespace, so that it can be a friend of CollationKey.
95 class CollationKeyByteSink : public SortKeyByteSink {
96 public:
CollationKeyByteSink(CollationKey & key)97 CollationKeyByteSink(CollationKey &key)
98 : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
99 key_(key) {}
100 virtual ~CollationKeyByteSink();
101
102 private:
103 virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
104 virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
105
106 CollationKey &key_;
107 };
108
~CollationKeyByteSink()109 CollationKeyByteSink::~CollationKeyByteSink() {}
110
111 void
AppendBeyondCapacity(const char * bytes,int32_t n,int32_t length)112 CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
113 // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
114 if (Resize(n, length)) {
115 uprv_memcpy(buffer_ + length, bytes, n);
116 }
117 }
118
119 UBool
Resize(int32_t appendCapacity,int32_t length)120 CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
121 if (buffer_ == nullptr) {
122 return false; // allocation failed before already
123 }
124 int32_t newCapacity = 2 * capacity_;
125 int32_t altCapacity = length + 2 * appendCapacity;
126 if (newCapacity < altCapacity) {
127 newCapacity = altCapacity;
128 }
129 if (newCapacity < 200) {
130 newCapacity = 200;
131 }
132 uint8_t *newBuffer = key_.reallocate(newCapacity, length);
133 if (newBuffer == nullptr) {
134 SetNotOk();
135 return false;
136 }
137 buffer_ = reinterpret_cast<char *>(newBuffer);
138 capacity_ = newCapacity;
139 return true;
140 }
141
RuleBasedCollator(const RuleBasedCollator & other)142 RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
143 : Collator(other),
144 data(other.data),
145 settings(other.settings),
146 tailoring(other.tailoring),
147 cacheEntry(other.cacheEntry),
148 validLocale(other.validLocale),
149 explicitlySetAttributes(other.explicitlySetAttributes),
150 actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
151 settings->addRef();
152 cacheEntry->addRef();
153 }
154
RuleBasedCollator(const uint8_t * bin,int32_t length,const RuleBasedCollator * base,UErrorCode & errorCode)155 RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
156 const RuleBasedCollator *base, UErrorCode &errorCode)
157 : data(nullptr),
158 settings(nullptr),
159 tailoring(nullptr),
160 cacheEntry(nullptr),
161 validLocale(""),
162 explicitlySetAttributes(0),
163 actualLocaleIsSameAsValid(false) {
164 if(U_FAILURE(errorCode)) { return; }
165 if(bin == nullptr || length == 0 || base == nullptr) {
166 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
167 return;
168 }
169 const CollationTailoring *root = CollationRoot::getRoot(errorCode);
170 if(U_FAILURE(errorCode)) { return; }
171 if(base->tailoring != root) {
172 errorCode = U_UNSUPPORTED_ERROR;
173 return;
174 }
175 LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
176 if(t.isNull() || t->isBogus()) {
177 errorCode = U_MEMORY_ALLOCATION_ERROR;
178 return;
179 }
180 CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
181 if(U_FAILURE(errorCode)) { return; }
182 t->actualLocale.setToBogus();
183 adoptTailoring(t.orphan(), errorCode);
184 }
185
RuleBasedCollator(const CollationCacheEntry * entry)186 RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
187 : data(entry->tailoring->data),
188 settings(entry->tailoring->settings),
189 tailoring(entry->tailoring),
190 cacheEntry(entry),
191 validLocale(entry->validLocale),
192 explicitlySetAttributes(0),
193 actualLocaleIsSameAsValid(false) {
194 settings->addRef();
195 cacheEntry->addRef();
196 }
197
~RuleBasedCollator()198 RuleBasedCollator::~RuleBasedCollator() {
199 SharedObject::clearPtr(settings);
200 SharedObject::clearPtr(cacheEntry);
201 }
202
203 void
adoptTailoring(CollationTailoring * t,UErrorCode & errorCode)204 RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
205 if(U_FAILURE(errorCode)) {
206 t->deleteIfZeroRefCount();
207 return;
208 }
209 U_ASSERT(settings == nullptr && data == nullptr && tailoring == nullptr && cacheEntry == nullptr);
210 cacheEntry = new CollationCacheEntry(t->actualLocale, t);
211 if(cacheEntry == nullptr) {
212 errorCode = U_MEMORY_ALLOCATION_ERROR;
213 t->deleteIfZeroRefCount();
214 return;
215 }
216 data = t->data;
217 settings = t->settings;
218 settings->addRef();
219 tailoring = t;
220 cacheEntry->addRef();
221 validLocale = t->actualLocale;
222 actualLocaleIsSameAsValid = false;
223 }
224
225 RuleBasedCollator *
clone() const226 RuleBasedCollator::clone() const {
227 return new RuleBasedCollator(*this);
228 }
229
operator =(const RuleBasedCollator & other)230 RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
231 if(this == &other) { return *this; }
232 SharedObject::copyPtr(other.settings, settings);
233 tailoring = other.tailoring;
234 SharedObject::copyPtr(other.cacheEntry, cacheEntry);
235 data = tailoring->data;
236 validLocale = other.validLocale;
237 explicitlySetAttributes = other.explicitlySetAttributes;
238 actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
239 return *this;
240 }
241
242 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
243
244 bool
245 RuleBasedCollator::operator==(const Collator& other) const {
246 if(this == &other) { return true; }
247 if(!Collator::operator==(other)) { return false; }
248 const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
249 if(*settings != *o.settings) { return false; }
250 if(data == o.data) { return true; }
251 UBool thisIsRoot = data->base == nullptr;
252 UBool otherIsRoot = o.data->base == nullptr;
253 U_ASSERT(!thisIsRoot || !otherIsRoot); // otherwise their data pointers should be ==
254 if(thisIsRoot != otherIsRoot) { return false; }
255 if((thisIsRoot || !tailoring->rules.isEmpty()) &&
256 (otherIsRoot || !o.tailoring->rules.isEmpty())) {
257 // Shortcut: If both collators have valid rule strings, then compare those.
258 if(tailoring->rules == o.tailoring->rules) { return true; }
259 }
260 // Different rule strings can result in the same or equivalent tailoring.
261 // The rule strings are optional in ICU resource bundles, although included by default.
262 // cloneBinary() drops the rule string.
263 UErrorCode errorCode = U_ZERO_ERROR;
264 LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
265 LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
266 if(U_FAILURE(errorCode)) { return false; }
267 if(*thisTailored != *otherTailored) { return false; }
268 // For completeness, we should compare all of the mappings;
269 // or we should create a list of strings, sort it with one collator,
270 // and check if both collators compare adjacent strings the same
271 // (order & strength, down to quaternary); or similar.
272 // Testing equality of collators seems unusual.
273 return true;
274 }
275
276 int32_t
hashCode() const277 RuleBasedCollator::hashCode() const {
278 int32_t h = settings->hashCode();
279 if(data->base == nullptr) { return h; } // root collator
280 // Do not rely on the rule string, see comments in operator==().
281 UErrorCode errorCode = U_ZERO_ERROR;
282 LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
283 if(U_FAILURE(errorCode)) { return 0; }
284 UnicodeSetIterator iter(*set);
285 while(iter.next() && !iter.isString()) {
286 h ^= data->getCE32(iter.getCodepoint());
287 }
288 return h;
289 }
290
291 void
setLocales(const Locale & requested,const Locale & valid,const Locale & actual)292 RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
293 const Locale &actual) {
294 if(actual == tailoring->actualLocale) {
295 actualLocaleIsSameAsValid = false;
296 } else {
297 U_ASSERT(actual == valid);
298 actualLocaleIsSameAsValid = true;
299 }
300 // Do not modify tailoring.actualLocale:
301 // We cannot be sure that that would be thread-safe.
302 validLocale = valid;
303 (void)requested; // Ignore, see also ticket #10477.
304 }
305
306 Locale
getLocale(ULocDataLocaleType type,UErrorCode & errorCode) const307 RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
308 if(U_FAILURE(errorCode)) {
309 return Locale::getRoot();
310 }
311 switch(type) {
312 case ULOC_ACTUAL_LOCALE:
313 return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
314 case ULOC_VALID_LOCALE:
315 return validLocale;
316 case ULOC_REQUESTED_LOCALE:
317 default:
318 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
319 return Locale::getRoot();
320 }
321 }
322
323 const char *
internalGetLocaleID(ULocDataLocaleType type,UErrorCode & errorCode) const324 RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
325 if(U_FAILURE(errorCode)) {
326 return nullptr;
327 }
328 const Locale *result;
329 switch(type) {
330 case ULOC_ACTUAL_LOCALE:
331 result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
332 break;
333 case ULOC_VALID_LOCALE:
334 result = &validLocale;
335 break;
336 case ULOC_REQUESTED_LOCALE:
337 default:
338 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
339 return nullptr;
340 }
341 if(result->isBogus()) { return nullptr; }
342 const char *id = result->getName();
343 return id[0] == 0 ? "root" : id;
344 }
345
346 const UnicodeString&
getRules() const347 RuleBasedCollator::getRules() const {
348 return tailoring->rules;
349 }
350
351 void
getRules(UColRuleOption delta,UnicodeString & buffer) const352 RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
353 if(delta == UCOL_TAILORING_ONLY) {
354 buffer = tailoring->rules;
355 return;
356 }
357 // UCOL_FULL_RULES
358 buffer.remove();
359 CollationLoader::appendRootRules(buffer);
360 buffer.append(tailoring->rules).getTerminatedBuffer();
361 }
362
363 void
getVersion(UVersionInfo version) const364 RuleBasedCollator::getVersion(UVersionInfo version) const {
365 uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
366 version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
367 }
368
369 UnicodeSet *
getTailoredSet(UErrorCode & errorCode) const370 RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
371 if(U_FAILURE(errorCode)) { return nullptr; }
372 UnicodeSet *tailored = new UnicodeSet();
373 if(tailored == nullptr) {
374 errorCode = U_MEMORY_ALLOCATION_ERROR;
375 return nullptr;
376 }
377 if(data->base != nullptr) {
378 TailoredSet(tailored).forData(data, errorCode);
379 if(U_FAILURE(errorCode)) {
380 delete tailored;
381 return nullptr;
382 }
383 }
384 return tailored;
385 }
386
387 void
internalGetContractionsAndExpansions(UnicodeSet * contractions,UnicodeSet * expansions,UBool addPrefixes,UErrorCode & errorCode) const388 RuleBasedCollator::internalGetContractionsAndExpansions(
389 UnicodeSet *contractions, UnicodeSet *expansions,
390 UBool addPrefixes, UErrorCode &errorCode) const {
391 if(U_FAILURE(errorCode)) { return; }
392 if(contractions != nullptr) {
393 contractions->clear();
394 }
395 if(expansions != nullptr) {
396 expansions->clear();
397 }
398 ContractionsAndExpansions(contractions, expansions, nullptr, addPrefixes).forData(data, errorCode);
399 }
400
401 void
internalAddContractions(UChar32 c,UnicodeSet & set,UErrorCode & errorCode) const402 RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
403 if(U_FAILURE(errorCode)) { return; }
404 ContractionsAndExpansions(&set, nullptr, nullptr, false).forCodePoint(data, c, errorCode);
405 }
406
407 const CollationSettings &
getDefaultSettings() const408 RuleBasedCollator::getDefaultSettings() const {
409 return *tailoring->settings;
410 }
411
412 UColAttributeValue
getAttribute(UColAttribute attr,UErrorCode & errorCode) const413 RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
414 if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
415 int32_t option;
416 switch(attr) {
417 case UCOL_FRENCH_COLLATION:
418 option = CollationSettings::BACKWARD_SECONDARY;
419 break;
420 case UCOL_ALTERNATE_HANDLING:
421 return settings->getAlternateHandling();
422 case UCOL_CASE_FIRST:
423 return settings->getCaseFirst();
424 case UCOL_CASE_LEVEL:
425 option = CollationSettings::CASE_LEVEL;
426 break;
427 case UCOL_NORMALIZATION_MODE:
428 option = CollationSettings::CHECK_FCD;
429 break;
430 case UCOL_STRENGTH:
431 return (UColAttributeValue)settings->getStrength();
432 case UCOL_HIRAGANA_QUATERNARY_MODE:
433 // Deprecated attribute, unsettable.
434 return UCOL_OFF;
435 case UCOL_NUMERIC_COLLATION:
436 option = CollationSettings::NUMERIC;
437 break;
438 default:
439 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
440 return UCOL_DEFAULT;
441 }
442 return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
443 }
444
445 void
setAttribute(UColAttribute attr,UColAttributeValue value,UErrorCode & errorCode)446 RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
447 UErrorCode &errorCode) {
448 UColAttributeValue oldValue = getAttribute(attr, errorCode);
449 if(U_FAILURE(errorCode)) { return; }
450 if(value == oldValue) {
451 setAttributeExplicitly(attr);
452 return;
453 }
454 const CollationSettings &defaultSettings = getDefaultSettings();
455 if(settings == &defaultSettings) {
456 if(value == UCOL_DEFAULT) {
457 setAttributeDefault(attr);
458 return;
459 }
460 }
461 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
462 if(ownedSettings == nullptr) {
463 errorCode = U_MEMORY_ALLOCATION_ERROR;
464 return;
465 }
466
467 switch(attr) {
468 case UCOL_FRENCH_COLLATION:
469 ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
470 defaultSettings.options, errorCode);
471 break;
472 case UCOL_ALTERNATE_HANDLING:
473 ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
474 break;
475 case UCOL_CASE_FIRST:
476 ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
477 break;
478 case UCOL_CASE_LEVEL:
479 ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
480 defaultSettings.options, errorCode);
481 break;
482 case UCOL_NORMALIZATION_MODE:
483 ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
484 defaultSettings.options, errorCode);
485 break;
486 case UCOL_STRENGTH:
487 ownedSettings->setStrength(value, defaultSettings.options, errorCode);
488 break;
489 case UCOL_HIRAGANA_QUATERNARY_MODE:
490 // Deprecated attribute. Check for valid values but do not change anything.
491 if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
492 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
493 }
494 break;
495 case UCOL_NUMERIC_COLLATION:
496 ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
497 break;
498 default:
499 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
500 break;
501 }
502 if(U_FAILURE(errorCode)) { return; }
503 setFastLatinOptions(*ownedSettings);
504 if(value == UCOL_DEFAULT) {
505 setAttributeDefault(attr);
506 } else {
507 setAttributeExplicitly(attr);
508 }
509 }
510
511 Collator &
setMaxVariable(UColReorderCode group,UErrorCode & errorCode)512 RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
513 if(U_FAILURE(errorCode)) { return *this; }
514 // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
515 int32_t value;
516 if(group == UCOL_REORDER_CODE_DEFAULT) {
517 value = UCOL_DEFAULT;
518 } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
519 value = group - UCOL_REORDER_CODE_FIRST;
520 } else {
521 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
522 return *this;
523 }
524 CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
525 if(value == oldValue) {
526 setAttributeExplicitly(ATTR_VARIABLE_TOP);
527 return *this;
528 }
529 const CollationSettings &defaultSettings = getDefaultSettings();
530 if(settings == &defaultSettings) {
531 if(value == UCOL_DEFAULT) {
532 setAttributeDefault(ATTR_VARIABLE_TOP);
533 return *this;
534 }
535 }
536 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
537 if(ownedSettings == nullptr) {
538 errorCode = U_MEMORY_ALLOCATION_ERROR;
539 return *this;
540 }
541
542 if(group == UCOL_REORDER_CODE_DEFAULT) {
543 group = (UColReorderCode)(
544 UCOL_REORDER_CODE_FIRST + int32_t{defaultSettings.getMaxVariable()});
545 }
546 uint32_t varTop = data->getLastPrimaryForGroup(group);
547 U_ASSERT(varTop != 0);
548 ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
549 if(U_FAILURE(errorCode)) { return *this; }
550 ownedSettings->variableTop = varTop;
551 setFastLatinOptions(*ownedSettings);
552 if(value == UCOL_DEFAULT) {
553 setAttributeDefault(ATTR_VARIABLE_TOP);
554 } else {
555 setAttributeExplicitly(ATTR_VARIABLE_TOP);
556 }
557 return *this;
558 }
559
560 UColReorderCode
getMaxVariable() const561 RuleBasedCollator::getMaxVariable() const {
562 return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + int32_t{settings->getMaxVariable()});
563 }
564
565 uint32_t
getVariableTop(UErrorCode &) const566 RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
567 return settings->variableTop;
568 }
569
570 uint32_t
setVariableTop(const char16_t * varTop,int32_t len,UErrorCode & errorCode)571 RuleBasedCollator::setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &errorCode) {
572 if(U_FAILURE(errorCode)) { return 0; }
573 if(varTop == nullptr && len !=0) {
574 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
575 return 0;
576 }
577 if(len < 0) { len = u_strlen(varTop); }
578 if(len == 0) {
579 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
580 return 0;
581 }
582 UBool numeric = settings->isNumeric();
583 int64_t ce1, ce2;
584 if(settings->dontCheckFCD()) {
585 UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
586 ce1 = ci.nextCE(errorCode);
587 ce2 = ci.nextCE(errorCode);
588 } else {
589 FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
590 ce1 = ci.nextCE(errorCode);
591 ce2 = ci.nextCE(errorCode);
592 }
593 if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
594 errorCode = U_CE_NOT_FOUND_ERROR;
595 return 0;
596 }
597 setVariableTop((uint32_t)(ce1 >> 32), errorCode);
598 return settings->variableTop;
599 }
600
601 uint32_t
setVariableTop(const UnicodeString & varTop,UErrorCode & errorCode)602 RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
603 return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
604 }
605
606 void
setVariableTop(uint32_t varTop,UErrorCode & errorCode)607 RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
608 if(U_FAILURE(errorCode)) { return; }
609 if(varTop != settings->variableTop) {
610 // Pin the variable top to the end of the reordering group which contains it.
611 // Only a few special groups are supported.
612 int32_t group = data->getGroupForPrimary(varTop);
613 if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
614 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
615 return;
616 }
617 uint32_t v = data->getLastPrimaryForGroup(group);
618 U_ASSERT(v != 0 && v >= varTop);
619 varTop = v;
620 if(varTop != settings->variableTop) {
621 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
622 if(ownedSettings == nullptr) {
623 errorCode = U_MEMORY_ALLOCATION_ERROR;
624 return;
625 }
626 ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
627 getDefaultSettings().options, errorCode);
628 if(U_FAILURE(errorCode)) { return; }
629 ownedSettings->variableTop = varTop;
630 setFastLatinOptions(*ownedSettings);
631 }
632 }
633 if(varTop == getDefaultSettings().variableTop) {
634 setAttributeDefault(ATTR_VARIABLE_TOP);
635 } else {
636 setAttributeExplicitly(ATTR_VARIABLE_TOP);
637 }
638 }
639
640 int32_t
getReorderCodes(int32_t * dest,int32_t capacity,UErrorCode & errorCode) const641 RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
642 UErrorCode &errorCode) const {
643 if(U_FAILURE(errorCode)) { return 0; }
644 if(capacity < 0 || (dest == nullptr && capacity > 0)) {
645 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
646 return 0;
647 }
648 int32_t length = settings->reorderCodesLength;
649 if(length == 0) { return 0; }
650 if(length > capacity) {
651 errorCode = U_BUFFER_OVERFLOW_ERROR;
652 return length;
653 }
654 uprv_memcpy(dest, settings->reorderCodes, length * 4);
655 return length;
656 }
657
658 void
setReorderCodes(const int32_t * reorderCodes,int32_t length,UErrorCode & errorCode)659 RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
660 UErrorCode &errorCode) {
661 if(U_FAILURE(errorCode)) { return; }
662 if(length < 0 || (reorderCodes == nullptr && length > 0)) {
663 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
664 return;
665 }
666 if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
667 length = 0;
668 }
669 if(length == settings->reorderCodesLength &&
670 uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
671 return;
672 }
673 const CollationSettings &defaultSettings = getDefaultSettings();
674 if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
675 if(settings != &defaultSettings) {
676 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
677 if(ownedSettings == nullptr) {
678 errorCode = U_MEMORY_ALLOCATION_ERROR;
679 return;
680 }
681 ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
682 setFastLatinOptions(*ownedSettings);
683 }
684 return;
685 }
686 CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
687 if(ownedSettings == nullptr) {
688 errorCode = U_MEMORY_ALLOCATION_ERROR;
689 return;
690 }
691 ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
692 setFastLatinOptions(*ownedSettings);
693 }
694
695 void
setFastLatinOptions(CollationSettings & ownedSettings) const696 RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
697 ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
698 data, ownedSettings,
699 ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
700 }
701
702 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,UErrorCode & errorCode) const703 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
704 UErrorCode &errorCode) const {
705 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
706 return doCompare(left.getBuffer(), left.length(),
707 right.getBuffer(), right.length(), errorCode);
708 }
709
710 UCollationResult
compare(const UnicodeString & left,const UnicodeString & right,int32_t length,UErrorCode & errorCode) const711 RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
712 int32_t length, UErrorCode &errorCode) const {
713 if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
714 if(length < 0) {
715 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
716 return UCOL_EQUAL;
717 }
718 int32_t leftLength = left.length();
719 int32_t rightLength = right.length();
720 if(leftLength > length) { leftLength = length; }
721 if(rightLength > length) { rightLength = length; }
722 return doCompare(left.getBuffer(), leftLength,
723 right.getBuffer(), rightLength, errorCode);
724 }
725
726 UCollationResult
compare(const char16_t * left,int32_t leftLength,const char16_t * right,int32_t rightLength,UErrorCode & errorCode) const727 RuleBasedCollator::compare(const char16_t *left, int32_t leftLength,
728 const char16_t *right, int32_t rightLength,
729 UErrorCode &errorCode) const {
730 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
731 if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
732 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
733 return UCOL_EQUAL;
734 }
735 // Make sure both or neither strings have a known length.
736 // We do not optimize for mixed length/termination.
737 if(leftLength >= 0) {
738 if(rightLength < 0) { rightLength = u_strlen(right); }
739 } else {
740 if(rightLength >= 0) { leftLength = u_strlen(left); }
741 }
742 return doCompare(left, leftLength, right, rightLength, errorCode);
743 }
744
745 UCollationResult
compareUTF8(const StringPiece & left,const StringPiece & right,UErrorCode & errorCode) const746 RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
747 UErrorCode &errorCode) const {
748 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
749 const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
750 const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
751 if((leftBytes == nullptr && !left.empty()) || (rightBytes == nullptr && !right.empty())) {
752 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
753 return UCOL_EQUAL;
754 }
755 return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
756 }
757
758 UCollationResult
internalCompareUTF8(const char * left,int32_t leftLength,const char * right,int32_t rightLength,UErrorCode & errorCode) const759 RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
760 const char *right, int32_t rightLength,
761 UErrorCode &errorCode) const {
762 if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
763 if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
764 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
765 return UCOL_EQUAL;
766 }
767 // Make sure both or neither strings have a known length.
768 // We do not optimize for mixed length/termination.
769 if(leftLength >= 0) {
770 if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right)); }
771 } else {
772 if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left)); }
773 }
774 return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
775 reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
776 }
777
778 namespace {
779
780 /**
781 * Abstract iterator for identical-level string comparisons.
782 * Returns FCD code points and handles temporary switching to NFD.
783 */
784 class NFDIterator : public UObject {
785 public:
NFDIterator()786 NFDIterator() : index(-1), length(0) {}
~NFDIterator()787 virtual ~NFDIterator() {}
788 /**
789 * Returns the next code point from the internal normalization buffer,
790 * or else the next text code point.
791 * Returns -1 at the end of the text.
792 */
nextCodePoint()793 UChar32 nextCodePoint() {
794 if(index >= 0) {
795 if(index == length) {
796 index = -1;
797 } else {
798 UChar32 c;
799 U16_NEXT_UNSAFE(decomp, index, c);
800 return c;
801 }
802 }
803 return nextRawCodePoint();
804 }
805 /**
806 * @param nfcImpl
807 * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
808 * @return the first code point in c's decomposition,
809 * or c itself if it was decomposed already or if it does not decompose
810 */
nextDecomposedCodePoint(const Normalizer2Impl & nfcImpl,UChar32 c)811 UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
812 if(index >= 0) { return c; }
813 decomp = nfcImpl.getDecomposition(c, buffer, length);
814 if(decomp == nullptr) { return c; }
815 index = 0;
816 U16_NEXT_UNSAFE(decomp, index, c);
817 return c;
818 }
819 protected:
820 /**
821 * Returns the next text code point in FCD order.
822 * Returns -1 at the end of the text.
823 */
824 virtual UChar32 nextRawCodePoint() = 0;
825 private:
826 const char16_t *decomp;
827 char16_t buffer[4];
828 int32_t index;
829 int32_t length;
830 };
831
832 class UTF16NFDIterator : public NFDIterator {
833 public:
UTF16NFDIterator(const char16_t * text,const char16_t * textLimit)834 UTF16NFDIterator(const char16_t *text, const char16_t *textLimit) : s(text), limit(textLimit) {}
835 protected:
nextRawCodePoint()836 virtual UChar32 nextRawCodePoint() override {
837 if(s == limit) { return U_SENTINEL; }
838 UChar32 c = *s++;
839 if(limit == nullptr && c == 0) {
840 s = nullptr;
841 return U_SENTINEL;
842 }
843 char16_t trail;
844 if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
845 ++s;
846 c = U16_GET_SUPPLEMENTARY(c, trail);
847 }
848 return c;
849 }
850
851 const char16_t *s;
852 const char16_t *limit;
853 };
854
855 class FCDUTF16NFDIterator : public UTF16NFDIterator {
856 public:
FCDUTF16NFDIterator(const Normalizer2Impl & nfcImpl,const char16_t * text,const char16_t * textLimit)857 FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const char16_t *text, const char16_t *textLimit)
858 : UTF16NFDIterator(nullptr, nullptr) {
859 UErrorCode errorCode = U_ZERO_ERROR;
860 const char16_t *spanLimit = nfcImpl.makeFCD(text, textLimit, nullptr, errorCode);
861 if(U_FAILURE(errorCode)) { return; }
862 if(spanLimit == textLimit || (textLimit == nullptr && *spanLimit == 0)) {
863 s = text;
864 limit = spanLimit;
865 } else {
866 str.setTo(text, (int32_t)(spanLimit - text));
867 {
868 ReorderingBuffer r_buffer(nfcImpl, str);
869 if(r_buffer.init(str.length(), errorCode)) {
870 nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);
871 }
872 }
873 if(U_SUCCESS(errorCode)) {
874 s = str.getBuffer();
875 limit = s + str.length();
876 }
877 }
878 }
879 private:
880 UnicodeString str;
881 };
882
883 class UTF8NFDIterator : public NFDIterator {
884 public:
UTF8NFDIterator(const uint8_t * text,int32_t textLength)885 UTF8NFDIterator(const uint8_t *text, int32_t textLength)
886 : s(text), pos(0), length(textLength) {}
887 protected:
nextRawCodePoint()888 virtual UChar32 nextRawCodePoint() override {
889 if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
890 UChar32 c;
891 U8_NEXT_OR_FFFD(s, pos, length, c);
892 return c;
893 }
894
895 const uint8_t *s;
896 int32_t pos;
897 int32_t length;
898 };
899
900 class FCDUTF8NFDIterator : public NFDIterator {
901 public:
FCDUTF8NFDIterator(const CollationData * data,const uint8_t * text,int32_t textLength)902 FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
903 : u8ci(data, false, text, 0, textLength) {}
904 protected:
nextRawCodePoint()905 virtual UChar32 nextRawCodePoint() override {
906 UErrorCode errorCode = U_ZERO_ERROR;
907 return u8ci.nextCodePoint(errorCode);
908 }
909 private:
910 FCDUTF8CollationIterator u8ci;
911 };
912
913 class UIterNFDIterator : public NFDIterator {
914 public:
UIterNFDIterator(UCharIterator & it)915 UIterNFDIterator(UCharIterator &it) : iter(it) {}
916 protected:
nextRawCodePoint()917 virtual UChar32 nextRawCodePoint() override {
918 return uiter_next32(&iter);
919 }
920 private:
921 UCharIterator &iter;
922 };
923
924 class FCDUIterNFDIterator : public NFDIterator {
925 public:
FCDUIterNFDIterator(const CollationData * data,UCharIterator & it,int32_t startIndex)926 FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
927 : uici(data, false, it, startIndex) {}
928 protected:
nextRawCodePoint()929 virtual UChar32 nextRawCodePoint() override {
930 UErrorCode errorCode = U_ZERO_ERROR;
931 return uici.nextCodePoint(errorCode);
932 }
933 private:
934 FCDUIterCollationIterator uici;
935 };
936
compareNFDIter(const Normalizer2Impl & nfcImpl,NFDIterator & left,NFDIterator & right)937 UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
938 NFDIterator &left, NFDIterator &right) {
939 for(;;) {
940 // Fetch the next FCD code point from each string.
941 UChar32 leftCp = left.nextCodePoint();
942 UChar32 rightCp = right.nextCodePoint();
943 if(leftCp == rightCp) {
944 if(leftCp < 0) { break; }
945 continue;
946 }
947 // If they are different, then decompose each and compare again.
948 if(leftCp < 0) {
949 leftCp = -2; // end of string
950 } else if(leftCp == 0xfffe) {
951 leftCp = -1; // U+FFFE: merge separator
952 } else {
953 leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
954 }
955 if(rightCp < 0) {
956 rightCp = -2; // end of string
957 } else if(rightCp == 0xfffe) {
958 rightCp = -1; // U+FFFE: merge separator
959 } else {
960 rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
961 }
962 if(leftCp < rightCp) { return UCOL_LESS; }
963 if(leftCp > rightCp) { return UCOL_GREATER; }
964 }
965 return UCOL_EQUAL;
966 }
967
968 } // namespace
969
970 UCollationResult
doCompare(const char16_t * left,int32_t leftLength,const char16_t * right,int32_t rightLength,UErrorCode & errorCode) const971 RuleBasedCollator::doCompare(const char16_t *left, int32_t leftLength,
972 const char16_t *right, int32_t rightLength,
973 UErrorCode &errorCode) const {
974 // U_FAILURE(errorCode) checked by caller.
975 if(left == right && leftLength == rightLength) {
976 return UCOL_EQUAL;
977 }
978
979 // Identical-prefix test.
980 const char16_t *leftLimit;
981 const char16_t *rightLimit;
982 int32_t equalPrefixLength = 0;
983 if(leftLength < 0) {
984 leftLimit = nullptr;
985 rightLimit = nullptr;
986 char16_t c;
987 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
988 if(c == 0) { return UCOL_EQUAL; }
989 ++equalPrefixLength;
990 }
991 } else {
992 leftLimit = left + leftLength;
993 rightLimit = right + rightLength;
994 for(;;) {
995 if(equalPrefixLength == leftLength) {
996 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
997 break;
998 } else if(equalPrefixLength == rightLength ||
999 left[equalPrefixLength] != right[equalPrefixLength]) {
1000 break;
1001 }
1002 ++equalPrefixLength;
1003 }
1004 }
1005
1006 UBool numeric = settings->isNumeric();
1007 if(equalPrefixLength > 0) {
1008 if((equalPrefixLength != leftLength &&
1009 data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1010 (equalPrefixLength != rightLength &&
1011 data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1012 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1013 while(--equalPrefixLength > 0 &&
1014 data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1015 }
1016 // Notes:
1017 // - A longer string can compare equal to a prefix of it if only ignorables follow.
1018 // - With a backward level, a longer string can compare less-than a prefix of it.
1019
1020 // Pass the actual start of each string into the CollationIterators,
1021 // plus the equalPrefixLength position,
1022 // so that prefix matches back into the equal prefix work.
1023 }
1024
1025 int32_t result;
1026 int32_t fastLatinOptions = settings->fastLatinOptions;
1027 if(fastLatinOptions >= 0 &&
1028 (equalPrefixLength == leftLength ||
1029 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1030 (equalPrefixLength == rightLength ||
1031 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1032 if(leftLength >= 0) {
1033 result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1034 settings->fastLatinPrimaries,
1035 fastLatinOptions,
1036 left + equalPrefixLength,
1037 leftLength - equalPrefixLength,
1038 right + equalPrefixLength,
1039 rightLength - equalPrefixLength);
1040 } else {
1041 result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1042 settings->fastLatinPrimaries,
1043 fastLatinOptions,
1044 left + equalPrefixLength, -1,
1045 right + equalPrefixLength, -1);
1046 }
1047 } else {
1048 result = CollationFastLatin::BAIL_OUT_RESULT;
1049 }
1050
1051 if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1052 if(settings->dontCheckFCD()) {
1053 UTF16CollationIterator leftIter(data, numeric,
1054 left, left + equalPrefixLength, leftLimit);
1055 UTF16CollationIterator rightIter(data, numeric,
1056 right, right + equalPrefixLength, rightLimit);
1057 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1058 } else {
1059 FCDUTF16CollationIterator leftIter(data, numeric,
1060 left, left + equalPrefixLength, leftLimit);
1061 FCDUTF16CollationIterator rightIter(data, numeric,
1062 right, right + equalPrefixLength, rightLimit);
1063 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1064 }
1065 }
1066 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1067 return (UCollationResult)result;
1068 }
1069
1070 // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1071 // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1072 // and the benefit seems unlikely to be measurable.
1073
1074 // Compare identical level.
1075 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1076 left += equalPrefixLength;
1077 right += equalPrefixLength;
1078 if(settings->dontCheckFCD()) {
1079 UTF16NFDIterator leftIter(left, leftLimit);
1080 UTF16NFDIterator rightIter(right, rightLimit);
1081 return compareNFDIter(nfcImpl, leftIter, rightIter);
1082 } else {
1083 FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1084 FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1085 return compareNFDIter(nfcImpl, leftIter, rightIter);
1086 }
1087 }
1088
1089 UCollationResult
doCompare(const uint8_t * left,int32_t leftLength,const uint8_t * right,int32_t rightLength,UErrorCode & errorCode) const1090 RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1091 const uint8_t *right, int32_t rightLength,
1092 UErrorCode &errorCode) const {
1093 // U_FAILURE(errorCode) checked by caller.
1094 if(left == right && leftLength == rightLength) {
1095 return UCOL_EQUAL;
1096 }
1097
1098 // Identical-prefix test.
1099 int32_t equalPrefixLength = 0;
1100 if(leftLength < 0) {
1101 uint8_t c;
1102 while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1103 if(c == 0) { return UCOL_EQUAL; }
1104 ++equalPrefixLength;
1105 }
1106 } else {
1107 for(;;) {
1108 if(equalPrefixLength == leftLength) {
1109 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1110 break;
1111 } else if(equalPrefixLength == rightLength ||
1112 left[equalPrefixLength] != right[equalPrefixLength]) {
1113 break;
1114 }
1115 ++equalPrefixLength;
1116 }
1117 }
1118 // Back up to the start of a partially-equal code point.
1119 if(equalPrefixLength > 0 &&
1120 ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1121 (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1122 while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1123 }
1124
1125 UBool numeric = settings->isNumeric();
1126 if(equalPrefixLength > 0) {
1127 UBool unsafe = false;
1128 if(equalPrefixLength != leftLength) {
1129 int32_t i = equalPrefixLength;
1130 UChar32 c;
1131 U8_NEXT_OR_FFFD(left, i, leftLength, c);
1132 unsafe = data->isUnsafeBackward(c, numeric);
1133 }
1134 if(!unsafe && equalPrefixLength != rightLength) {
1135 int32_t i = equalPrefixLength;
1136 UChar32 c;
1137 U8_NEXT_OR_FFFD(right, i, rightLength, c);
1138 unsafe = data->isUnsafeBackward(c, numeric);
1139 }
1140 if(unsafe) {
1141 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1142 UChar32 c;
1143 do {
1144 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1145 } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1146 }
1147 // See the notes in the UTF-16 version.
1148
1149 // Pass the actual start of each string into the CollationIterators,
1150 // plus the equalPrefixLength position,
1151 // so that prefix matches back into the equal prefix work.
1152 }
1153
1154 int32_t result;
1155 int32_t fastLatinOptions = settings->fastLatinOptions;
1156 if(fastLatinOptions >= 0 &&
1157 (equalPrefixLength == leftLength ||
1158 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1159 (equalPrefixLength == rightLength ||
1160 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1161 if(leftLength >= 0) {
1162 result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1163 settings->fastLatinPrimaries,
1164 fastLatinOptions,
1165 left + equalPrefixLength,
1166 leftLength - equalPrefixLength,
1167 right + equalPrefixLength,
1168 rightLength - equalPrefixLength);
1169 } else {
1170 result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1171 settings->fastLatinPrimaries,
1172 fastLatinOptions,
1173 left + equalPrefixLength, -1,
1174 right + equalPrefixLength, -1);
1175 }
1176 } else {
1177 result = CollationFastLatin::BAIL_OUT_RESULT;
1178 }
1179
1180 if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1181 if(settings->dontCheckFCD()) {
1182 UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1183 UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1184 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1185 } else {
1186 FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1187 FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1188 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1189 }
1190 }
1191 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1192 return (UCollationResult)result;
1193 }
1194
1195 // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1196 // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1197 // and the benefit seems unlikely to be measurable.
1198
1199 // Compare identical level.
1200 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1201 left += equalPrefixLength;
1202 right += equalPrefixLength;
1203 if(leftLength > 0) {
1204 leftLength -= equalPrefixLength;
1205 rightLength -= equalPrefixLength;
1206 }
1207 if(settings->dontCheckFCD()) {
1208 UTF8NFDIterator leftIter(left, leftLength);
1209 UTF8NFDIterator rightIter(right, rightLength);
1210 return compareNFDIter(nfcImpl, leftIter, rightIter);
1211 } else {
1212 FCDUTF8NFDIterator leftIter(data, left, leftLength);
1213 FCDUTF8NFDIterator rightIter(data, right, rightLength);
1214 return compareNFDIter(nfcImpl, leftIter, rightIter);
1215 }
1216 }
1217
1218 UCollationResult
compare(UCharIterator & left,UCharIterator & right,UErrorCode & errorCode) const1219 RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1220 UErrorCode &errorCode) const {
1221 if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1222 UBool numeric = settings->isNumeric();
1223
1224 // Identical-prefix test.
1225 int32_t equalPrefixLength = 0;
1226 {
1227 UChar32 leftUnit;
1228 UChar32 rightUnit;
1229 while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1230 if(leftUnit < 0) { return UCOL_EQUAL; }
1231 ++equalPrefixLength;
1232 }
1233
1234 // Back out the code units that differed, for the real collation comparison.
1235 if(leftUnit >= 0) { left.previous(&left); }
1236 if(rightUnit >= 0) { right.previous(&right); }
1237
1238 if(equalPrefixLength > 0) {
1239 if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1240 (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1241 // Identical prefix: Back up to the start of a contraction or reordering sequence.
1242 do {
1243 --equalPrefixLength;
1244 leftUnit = left.previous(&left);
1245 right.previous(&right);
1246 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1247 }
1248 // See the notes in the UTF-16 version.
1249 }
1250 }
1251
1252 UCollationResult result;
1253 if(settings->dontCheckFCD()) {
1254 UIterCollationIterator leftIter(data, numeric, left);
1255 UIterCollationIterator rightIter(data, numeric, right);
1256 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1257 } else {
1258 FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1259 FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1260 result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1261 }
1262 if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1263 return result;
1264 }
1265
1266 // Compare identical level.
1267 left.move(&left, equalPrefixLength, UITER_ZERO);
1268 right.move(&right, equalPrefixLength, UITER_ZERO);
1269 const Normalizer2Impl &nfcImpl = data->nfcImpl;
1270 if(settings->dontCheckFCD()) {
1271 UIterNFDIterator leftIter(left);
1272 UIterNFDIterator rightIter(right);
1273 return compareNFDIter(nfcImpl, leftIter, rightIter);
1274 } else {
1275 FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1276 FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1277 return compareNFDIter(nfcImpl, leftIter, rightIter);
1278 }
1279 }
1280
1281 CollationKey &
getCollationKey(const UnicodeString & s,CollationKey & key,UErrorCode & errorCode) const1282 RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1283 UErrorCode &errorCode) const {
1284 return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1285 }
1286
1287 CollationKey &
getCollationKey(const char16_t * s,int32_t length,CollationKey & key,UErrorCode & errorCode) const1288 RuleBasedCollator::getCollationKey(const char16_t *s, int32_t length, CollationKey& key,
1289 UErrorCode &errorCode) const {
1290 if(U_FAILURE(errorCode)) {
1291 return key.setToBogus();
1292 }
1293 if(s == nullptr && length != 0) {
1294 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1295 return key.setToBogus();
1296 }
1297 key.reset(); // resets the "bogus" state
1298 CollationKeyByteSink sink(key);
1299 writeSortKey(s, length, sink, errorCode);
1300 if(U_FAILURE(errorCode)) {
1301 key.setToBogus();
1302 } else if(key.isBogus()) {
1303 errorCode = U_MEMORY_ALLOCATION_ERROR;
1304 } else {
1305 key.setLength(sink.NumberOfBytesAppended());
1306 }
1307 return key;
1308 }
1309
1310 int32_t
getSortKey(const UnicodeString & s,uint8_t * dest,int32_t capacity) const1311 RuleBasedCollator::getSortKey(const UnicodeString &s,
1312 uint8_t *dest, int32_t capacity) const {
1313 return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1314 }
1315
1316 int32_t
getSortKey(const char16_t * s,int32_t length,uint8_t * dest,int32_t capacity) const1317 RuleBasedCollator::getSortKey(const char16_t *s, int32_t length,
1318 uint8_t *dest, int32_t capacity) const {
1319 if((s == nullptr && length != 0) || capacity < 0 || (dest == nullptr && capacity > 0)) {
1320 return 0;
1321 }
1322 uint8_t noDest[1] = { 0 };
1323 if(dest == nullptr) {
1324 // Distinguish pure preflighting from an allocation error.
1325 dest = noDest;
1326 capacity = 0;
1327 }
1328 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1329 UErrorCode errorCode = U_ZERO_ERROR;
1330 writeSortKey(s, length, sink, errorCode);
1331 return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1332 }
1333
1334 void
writeSortKey(const char16_t * s,int32_t length,SortKeyByteSink & sink,UErrorCode & errorCode) const1335 RuleBasedCollator::writeSortKey(const char16_t *s, int32_t length,
1336 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1337 if(U_FAILURE(errorCode)) { return; }
1338 const char16_t *limit = (length >= 0) ? s + length : nullptr;
1339 UBool numeric = settings->isNumeric();
1340 CollationKeys::LevelCallback callback;
1341 if(settings->dontCheckFCD()) {
1342 UTF16CollationIterator iter(data, numeric, s, s, limit);
1343 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1344 sink, Collation::PRIMARY_LEVEL,
1345 callback, true, errorCode);
1346 } else {
1347 FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1348 CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1349 sink, Collation::PRIMARY_LEVEL,
1350 callback, true, errorCode);
1351 }
1352 if(settings->getStrength() == UCOL_IDENTICAL) {
1353 writeIdenticalLevel(s, limit, sink, errorCode);
1354 }
1355 static const char terminator = 0; // TERMINATOR_BYTE
1356 sink.Append(&terminator, 1);
1357 }
1358
1359 void
writeIdenticalLevel(const char16_t * s,const char16_t * limit,SortKeyByteSink & sink,UErrorCode & errorCode) const1360 RuleBasedCollator::writeIdenticalLevel(const char16_t *s, const char16_t *limit,
1361 SortKeyByteSink &sink, UErrorCode &errorCode) const {
1362 // NFD quick check
1363 const char16_t *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, nullptr, errorCode);
1364 if(U_FAILURE(errorCode)) { return; }
1365 sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1366 UChar32 prev = 0;
1367 if(nfdQCYesLimit != s) {
1368 prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
1369 }
1370 // Is there non-NFD text?
1371 int32_t destLengthEstimate;
1372 if(limit != nullptr) {
1373 if(nfdQCYesLimit == limit) { return; }
1374 destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
1375 } else {
1376 // s is NUL-terminated
1377 if(*nfdQCYesLimit == 0) { return; }
1378 destLengthEstimate = -1;
1379 }
1380 UnicodeString nfd;
1381 data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1382 u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1383 }
1384
1385 namespace {
1386
1387 /**
1388 * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1389 * with an instance of this callback class.
1390 * When another level is about to be written, the callback
1391 * records the level and the number of bytes that will be written until
1392 * the sink (which is actually a FixedSortKeyByteSink) fills up.
1393 *
1394 * When internalNextSortKeyPart() is called again, it restarts with the last level
1395 * and ignores as many bytes as were written previously for that level.
1396 */
1397 class PartLevelCallback : public CollationKeys::LevelCallback {
1398 public:
PartLevelCallback(const SortKeyByteSink & s)1399 PartLevelCallback(const SortKeyByteSink &s)
1400 : sink(s), level(Collation::PRIMARY_LEVEL) {
1401 levelCapacity = sink.GetRemainingCapacity();
1402 }
~PartLevelCallback()1403 virtual ~PartLevelCallback() {}
needToWrite(Collation::Level l)1404 virtual UBool needToWrite(Collation::Level l) override {
1405 if(!sink.Overflowed()) {
1406 // Remember a level that will be at least partially written.
1407 level = l;
1408 levelCapacity = sink.GetRemainingCapacity();
1409 return true;
1410 } else {
1411 return false;
1412 }
1413 }
getLevel() const1414 Collation::Level getLevel() const { return level; }
getLevelCapacity() const1415 int32_t getLevelCapacity() const { return levelCapacity; }
1416
1417 private:
1418 const SortKeyByteSink &sink;
1419 Collation::Level level;
1420 int32_t levelCapacity;
1421 };
1422
1423 } // namespace
1424
1425 int32_t
internalNextSortKeyPart(UCharIterator * iter,uint32_t state[2],uint8_t * dest,int32_t count,UErrorCode & errorCode) const1426 RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1427 uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1428 if(U_FAILURE(errorCode)) { return 0; }
1429 if(iter == nullptr || state == nullptr || count < 0 || (count > 0 && dest == nullptr)) {
1430 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1431 return 0;
1432 }
1433 if(count == 0) { return 0; }
1434
1435 FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1436 sink.IgnoreBytes((int32_t)state[1]);
1437 iter->move(iter, 0, UITER_START);
1438
1439 Collation::Level level = (Collation::Level)state[0];
1440 if(level <= Collation::QUATERNARY_LEVEL) {
1441 UBool numeric = settings->isNumeric();
1442 PartLevelCallback callback(sink);
1443 if(settings->dontCheckFCD()) {
1444 UIterCollationIterator ci(data, numeric, *iter);
1445 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1446 sink, level, callback, false, errorCode);
1447 } else {
1448 FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1449 CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1450 sink, level, callback, false, errorCode);
1451 }
1452 if(U_FAILURE(errorCode)) { return 0; }
1453 if(sink.NumberOfBytesAppended() > count) {
1454 state[0] = (uint32_t)callback.getLevel();
1455 state[1] = (uint32_t)callback.getLevelCapacity();
1456 return count;
1457 }
1458 // All of the normal levels are done.
1459 if(settings->getStrength() == UCOL_IDENTICAL) {
1460 level = Collation::IDENTICAL_LEVEL;
1461 iter->move(iter, 0, UITER_START);
1462 }
1463 // else fall through to setting ZERO_LEVEL
1464 }
1465
1466 if(level == Collation::IDENTICAL_LEVEL) {
1467 int32_t levelCapacity = sink.GetRemainingCapacity();
1468 UnicodeString s;
1469 for(;;) {
1470 UChar32 c = iter->next(iter);
1471 if(c < 0) { break; }
1472 s.append((char16_t)c);
1473 }
1474 const char16_t *sArray = s.getBuffer();
1475 writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1476 if(U_FAILURE(errorCode)) { return 0; }
1477 if(sink.NumberOfBytesAppended() > count) {
1478 state[0] = (uint32_t)level;
1479 state[1] = (uint32_t)levelCapacity;
1480 return count;
1481 }
1482 }
1483
1484 // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1485 state[0] = (uint32_t)Collation::ZERO_LEVEL;
1486 state[1] = 0;
1487 int32_t length = sink.NumberOfBytesAppended();
1488 int32_t i = length;
1489 while(i < count) { dest[i++] = 0; }
1490 return length;
1491 }
1492
1493 void
internalGetCEs(const UnicodeString & str,UVector64 & ces,UErrorCode & errorCode) const1494 RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1495 UErrorCode &errorCode) const {
1496 if(U_FAILURE(errorCode)) { return; }
1497 const char16_t *s = str.getBuffer();
1498 const char16_t *limit = s + str.length();
1499 UBool numeric = settings->isNumeric();
1500 if(settings->dontCheckFCD()) {
1501 UTF16CollationIterator iter(data, numeric, s, s, limit);
1502 int64_t ce;
1503 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1504 ces.addElement(ce, errorCode);
1505 }
1506 } else {
1507 FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1508 int64_t ce;
1509 while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1510 ces.addElement(ce, errorCode);
1511 }
1512 }
1513 }
1514
1515 namespace {
1516
appendSubtag(CharString & s,char letter,const char * subtag,int32_t length,UErrorCode & errorCode)1517 void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1518 UErrorCode &errorCode) {
1519 if(U_FAILURE(errorCode) || length == 0) { return; }
1520 if(!s.isEmpty()) {
1521 s.append('_', errorCode);
1522 }
1523 s.append(letter, errorCode);
1524 for(int32_t i = 0; i < length; ++i) {
1525 s.append(uprv_toupper(subtag[i]), errorCode);
1526 }
1527 }
1528
appendAttribute(CharString & s,char letter,UColAttributeValue value,UErrorCode & errorCode)1529 void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1530 UErrorCode &errorCode) {
1531 if(U_FAILURE(errorCode)) { return; }
1532 if(!s.isEmpty()) {
1533 s.append('_', errorCode);
1534 }
1535 static const char *valueChars = "1234...........IXO..SN..LU......";
1536 s.append(letter, errorCode);
1537 s.append(valueChars[value], errorCode);
1538 }
1539
1540 } // namespace
1541
1542 int32_t
internalGetShortDefinitionString(const char * locale,char * buffer,int32_t capacity,UErrorCode & errorCode) const1543 RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1544 char *buffer, int32_t capacity,
1545 UErrorCode &errorCode) const {
1546 if(U_FAILURE(errorCode)) { return 0; }
1547 if(buffer == nullptr ? capacity != 0 : capacity < 0) {
1548 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1549 return 0;
1550 }
1551 if(locale == nullptr) {
1552 locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1553 }
1554
1555 char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1556 int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1557 "collation", locale,
1558 nullptr, &errorCode);
1559 if(U_FAILURE(errorCode)) { return 0; }
1560 resultLocale[length] = 0;
1561
1562 // Append items in alphabetic order of their short definition letters.
1563 CharString result;
1564 char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1565
1566 if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1567 appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1568 }
1569 // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1570 // See ICU tickets #10372 and #10386.
1571 if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1572 appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1573 }
1574 if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1575 appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1576 }
1577 if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1578 appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1579 }
1580 if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1581 appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1582 }
1583 // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1584 {
1585 CharString collation;
1586 CharStringByteSink sink(&collation);
1587 ulocimp_getKeywordValue(resultLocale, "collation", sink, &errorCode);
1588 appendSubtag(result, 'K', collation.data(), collation.length(), errorCode);
1589 }
1590 length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1591 if (length == 0) {
1592 appendSubtag(result, 'L', "root", 4, errorCode);
1593 } else {
1594 appendSubtag(result, 'L', subtag, length, errorCode);
1595 }
1596 if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1597 appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1598 }
1599 length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1600 appendSubtag(result, 'R', subtag, length, errorCode);
1601 if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1602 appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1603 }
1604 length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1605 appendSubtag(result, 'V', subtag, length, errorCode);
1606 length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1607 appendSubtag(result, 'Z', subtag, length, errorCode);
1608
1609 if(U_FAILURE(errorCode)) { return 0; }
1610 return result.extract(buffer, capacity, errorCode);
1611 }
1612
1613 UBool
isUnsafe(UChar32 c) const1614 RuleBasedCollator::isUnsafe(UChar32 c) const {
1615 return data->isUnsafeBackward(c, settings->isNumeric());
1616 }
1617
1618 void U_CALLCONV
computeMaxExpansions(const CollationTailoring * t,UErrorCode & errorCode)1619 RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1620 t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1621 }
1622
1623 UBool
initMaxExpansions(UErrorCode & errorCode) const1624 RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1625 umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1626 return U_SUCCESS(errorCode);
1627 }
1628
1629 CollationElementIterator *
createCollationElementIterator(const UnicodeString & source) const1630 RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1631 UErrorCode errorCode = U_ZERO_ERROR;
1632 if(!initMaxExpansions(errorCode)) { return nullptr; }
1633 CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1634 if(U_FAILURE(errorCode)) {
1635 delete cei;
1636 return nullptr;
1637 }
1638 return cei;
1639 }
1640
1641 CollationElementIterator *
createCollationElementIterator(const CharacterIterator & source) const1642 RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1643 UErrorCode errorCode = U_ZERO_ERROR;
1644 if(!initMaxExpansions(errorCode)) { return nullptr; }
1645 CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1646 if(U_FAILURE(errorCode)) {
1647 delete cei;
1648 return nullptr;
1649 }
1650 return cei;
1651 }
1652
1653 int32_t
getMaxExpansion(int32_t order) const1654 RuleBasedCollator::getMaxExpansion(int32_t order) const {
1655 UErrorCode errorCode = U_ZERO_ERROR;
1656 (void)initMaxExpansions(errorCode);
1657 return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1658 }
1659
1660 U_NAMESPACE_END
1661
1662 #endif // !UCONFIG_NO_COLLATION
1663