xref: /aosp_15_r20/external/cronet/third_party/icu/source/i18n/number_modifiers.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "umutex.h"
9 #include "ucln_cmn.h"
10 #include "ucln_in.h"
11 #include "number_modifiers.h"
12 
13 using namespace icu;
14 using namespace icu::number;
15 using namespace icu::number::impl;
16 
17 namespace {
18 
19 // TODO: This is copied from simpleformatter.cpp
20 const int32_t ARG_NUM_LIMIT = 0x100;
21 
22 // These are the default currency spacing UnicodeSets in CLDR.
23 // Pre-compute them for performance.
24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25 icu::UInitOnce gDefaultCurrencySpacingInitOnce {};
26 
27 UnicodeSet *UNISET_DIGIT = nullptr;
28 UnicodeSet *UNISET_NOTSZ = nullptr;
29 
cleanupDefaultCurrencySpacing()30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31     delete UNISET_DIGIT;
32     UNISET_DIGIT = nullptr;
33     delete UNISET_NOTSZ;
34     UNISET_NOTSZ = nullptr;
35     gDefaultCurrencySpacingInitOnce.reset();
36     return true;
37 }
38 
initDefaultCurrencySpacing(UErrorCode & status)39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40     ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41     UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42     UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status);
43     if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) {
44         status = U_MEMORY_ALLOCATION_ERROR;
45         return;
46     }
47     UNISET_DIGIT->freeze();
48     UNISET_NOTSZ->freeze();
49 }
50 
51 }  // namespace
52 
53 
54 Modifier::~Modifier() = default;
55 
Parameters()56 Modifier::Parameters::Parameters()
57         : obj(nullptr) {}
58 
Parameters(const ModifierStore * _obj,Signum _signum,StandardPlural::Form _plural)59 Modifier::Parameters::Parameters(
60     const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural)
61         : obj(_obj), signum(_signum), plural(_plural) {}
62 
63 ModifierStore::~ModifierStore() = default;
64 
~AdoptingSignumModifierStore()65 AdoptingSignumModifierStore::~AdoptingSignumModifierStore()  {
66     for (const Modifier *mod : mods) {
67         delete mod;
68     }
69 }
70 
71 AdoptingSignumModifierStore&
operator =(AdoptingSignumModifierStore && other)72 AdoptingSignumModifierStore::operator=(AdoptingSignumModifierStore&& other) noexcept {
73     for (size_t i=0; i<SIGNUM_COUNT; i++) {
74         this->mods[i] = other.mods[i];
75         other.mods[i] = nullptr;
76     }
77     return *this;
78 }
79 
80 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const81 int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
82                                      UErrorCode &status) const {
83     // Insert the suffix first since inserting the prefix will change the rightIndex
84     int length = output.insert(rightIndex, fSuffix, fField, status);
85     length += output.insert(leftIndex, fPrefix, fField, status);
86     return length;
87 }
88 
getPrefixLength() const89 int32_t ConstantAffixModifier::getPrefixLength() const {
90     return fPrefix.length();
91 }
92 
getCodePointCount() const93 int32_t ConstantAffixModifier::getCodePointCount() const {
94     return fPrefix.countChar32() + fSuffix.countChar32();
95 }
96 
isStrong() const97 bool ConstantAffixModifier::isStrong() const {
98     return fStrong;
99 }
100 
containsField(Field field) const101 bool ConstantAffixModifier::containsField(Field field) const {
102     (void)field;
103     // This method is not currently used.
104     UPRV_UNREACHABLE_EXIT;
105 }
106 
getParameters(Parameters & output) const107 void ConstantAffixModifier::getParameters(Parameters& output) const {
108     (void)output;
109     // This method is not currently used.
110     UPRV_UNREACHABLE_EXIT;
111 }
112 
semanticallyEquivalent(const Modifier & other) const113 bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const {
114     auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
115     if (_other == nullptr) {
116         return false;
117     }
118     return fPrefix == _other->fPrefix
119         && fSuffix == _other->fSuffix
120         && fField == _other->fField
121         && fStrong == _other->fStrong;
122 }
123 
124 
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong)125 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
126         : SimpleModifier(simpleFormatter, field, strong, {}) {}
127 
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong,const Modifier::Parameters parameters)128 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
129                                const Modifier::Parameters parameters)
130         : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
131           fParameters(parameters) {
132     int32_t argLimit = SimpleFormatter::getArgumentLimit(
133             fCompiledPattern.getBuffer(), fCompiledPattern.length());
134     if (argLimit == 0) {
135         // No arguments in compiled pattern
136         fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
137         U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
138         // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
139         fSuffixOffset = -1;
140         fSuffixLength = 0;
141     } else {
142         U_ASSERT(argLimit == 1);
143         if (fCompiledPattern.charAt(1) != 0) {
144             // Found prefix
145             fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
146             fSuffixOffset = 3 + fPrefixLength;
147         } else {
148             // No prefix
149             fPrefixLength = 0;
150             fSuffixOffset = 2;
151         }
152         if (3 + fPrefixLength < fCompiledPattern.length()) {
153             // Found suffix
154             fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
155         } else {
156             // No suffix
157             fSuffixLength = 0;
158         }
159     }
160 }
161 
SimpleModifier()162 SimpleModifier::SimpleModifier()
163         : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
164 }
165 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const166 int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
167                               UErrorCode &status) const {
168     return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
169 }
170 
getPrefixLength() const171 int32_t SimpleModifier::getPrefixLength() const {
172     return fPrefixLength;
173 }
174 
getCodePointCount() const175 int32_t SimpleModifier::getCodePointCount() const {
176     int32_t count = 0;
177     if (fPrefixLength > 0) {
178         count += fCompiledPattern.countChar32(2, fPrefixLength);
179     }
180     if (fSuffixLength > 0) {
181         count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
182     }
183     return count;
184 }
185 
isStrong() const186 bool SimpleModifier::isStrong() const {
187     return fStrong;
188 }
189 
containsField(Field field) const190 bool SimpleModifier::containsField(Field field) const {
191     (void)field;
192     // This method is not currently used.
193     UPRV_UNREACHABLE_EXIT;
194 }
195 
getParameters(Parameters & output) const196 void SimpleModifier::getParameters(Parameters& output) const {
197     output = fParameters;
198 }
199 
semanticallyEquivalent(const Modifier & other) const200 bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
201     auto* _other = dynamic_cast<const SimpleModifier*>(&other);
202     if (_other == nullptr) {
203         return false;
204     }
205     if (fParameters.obj != nullptr) {
206         return fParameters.obj == _other->fParameters.obj;
207     }
208     return fCompiledPattern == _other->fCompiledPattern
209         && fField == _other->fField
210         && fStrong == _other->fStrong;
211 }
212 
213 
214 int32_t
formatAsPrefixSuffix(FormattedStringBuilder & result,int32_t startIndex,int32_t endIndex,UErrorCode & status) const215 SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
216                                      UErrorCode &status) const {
217     if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
218         // There is no argument for the inner number; overwrite the entire segment with our string.
219         return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
220     } else {
221         if (fPrefixLength > 0) {
222             result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
223         }
224         if (fSuffixLength > 0) {
225             result.insert(
226                     endIndex + fPrefixLength,
227                     fCompiledPattern,
228                     1 + fSuffixOffset,
229                     1 + fSuffixOffset + fSuffixLength,
230                     fField,
231                     status);
232         }
233         return fPrefixLength + fSuffixLength;
234     }
235 }
236 
237 
238 int32_t
formatTwoArgPattern(const SimpleFormatter & compiled,FormattedStringBuilder & result,int32_t index,int32_t * outPrefixLength,int32_t * outSuffixLength,Field field,UErrorCode & status)239 SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
240                                     int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
241                                     Field field, UErrorCode& status) {
242     const UnicodeString& compiledPattern = compiled.compiledPattern;
243     int32_t argLimit = SimpleFormatter::getArgumentLimit(
244             compiledPattern.getBuffer(), compiledPattern.length());
245     if (argLimit != 2) {
246         status = U_INTERNAL_PROGRAM_ERROR;
247         return 0;
248     }
249     int32_t offset = 1; // offset into compiledPattern
250     int32_t length = 0; // chars added to result
251 
252     int32_t prefixLength = compiledPattern.charAt(offset);
253     offset++;
254     if (prefixLength < ARG_NUM_LIMIT) {
255         // No prefix
256         prefixLength = 0;
257     } else {
258         prefixLength -= ARG_NUM_LIMIT;
259         result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
260         offset += prefixLength;
261         length += prefixLength;
262         offset++;
263     }
264 
265     int32_t infixLength = compiledPattern.charAt(offset);
266     offset++;
267     if (infixLength < ARG_NUM_LIMIT) {
268         // No infix
269         infixLength = 0;
270     } else {
271         infixLength -= ARG_NUM_LIMIT;
272         result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
273         offset += infixLength;
274         length += infixLength;
275         offset++;
276     }
277 
278     int32_t suffixLength;
279     if (offset == compiledPattern.length()) {
280         // No suffix
281         suffixLength = 0;
282     } else {
283         suffixLength = compiledPattern.charAt(offset) -  ARG_NUM_LIMIT;
284         offset++;
285         result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
286         length += suffixLength;
287     }
288 
289     *outPrefixLength = prefixLength;
290     *outSuffixLength = suffixLength;
291 
292     return length;
293 }
294 
295 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const296 int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
297                                           UErrorCode &status) const {
298     int32_t length = output.insert(leftIndex, fPrefix, status);
299     if (fOverwrite) {
300         length += output.splice(
301             leftIndex + length,
302             rightIndex + length,
303             UnicodeString(), 0, 0,
304             kUndefinedField, status);
305     }
306     length += output.insert(rightIndex + length, fSuffix, status);
307     return length;
308 }
309 
getPrefixLength() const310 int32_t ConstantMultiFieldModifier::getPrefixLength() const {
311     return fPrefix.length();
312 }
313 
getCodePointCount() const314 int32_t ConstantMultiFieldModifier::getCodePointCount() const {
315     return fPrefix.codePointCount() + fSuffix.codePointCount();
316 }
317 
isStrong() const318 bool ConstantMultiFieldModifier::isStrong() const {
319     return fStrong;
320 }
321 
containsField(Field field) const322 bool ConstantMultiFieldModifier::containsField(Field field) const {
323     return fPrefix.containsField(field) || fSuffix.containsField(field);
324 }
325 
getParameters(Parameters & output) const326 void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
327     output = fParameters;
328 }
329 
semanticallyEquivalent(const Modifier & other) const330 bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
331     auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
332     if (_other == nullptr) {
333         return false;
334     }
335     if (fParameters.obj != nullptr) {
336         return fParameters.obj == _other->fParameters.obj;
337     }
338     return fPrefix.contentEquals(_other->fPrefix)
339         && fSuffix.contentEquals(_other->fSuffix)
340         && fOverwrite == _other->fOverwrite
341         && fStrong == _other->fStrong;
342 }
343 
344 
CurrencySpacingEnabledModifier(const FormattedStringBuilder & prefix,const FormattedStringBuilder & suffix,bool overwrite,bool strong,const DecimalFormatSymbols & symbols,UErrorCode & status)345 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
346                                                                const FormattedStringBuilder &suffix,
347                                                                bool overwrite,
348                                                                bool strong,
349                                                                const DecimalFormatSymbols &symbols,
350                                                                UErrorCode &status)
351         : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
352     // Check for currency spacing. Do not build the UnicodeSets unless there is
353     // a currency code point at a boundary.
354     if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
355         int prefixCp = prefix.getLastCodePoint();
356         UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
357         if (prefixUnicodeSet.contains(prefixCp)) {
358             fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
359             fAfterPrefixUnicodeSet.freeze();
360             fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
361         } else {
362             fAfterPrefixUnicodeSet.setToBogus();
363             fAfterPrefixInsert.setToBogus();
364         }
365     } else {
366         fAfterPrefixUnicodeSet.setToBogus();
367         fAfterPrefixInsert.setToBogus();
368     }
369     if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
370         int suffixCp = suffix.getFirstCodePoint();
371         UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
372         if (suffixUnicodeSet.contains(suffixCp)) {
373             fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
374             fBeforeSuffixUnicodeSet.freeze();
375             fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
376         } else {
377             fBeforeSuffixUnicodeSet.setToBogus();
378             fBeforeSuffixInsert.setToBogus();
379         }
380     } else {
381         fBeforeSuffixUnicodeSet.setToBogus();
382         fBeforeSuffixInsert.setToBogus();
383     }
384 }
385 
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const386 int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
387                                               UErrorCode &status) const {
388     // Currency spacing logic
389     int length = 0;
390     if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
391         fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
392         // TODO: Should we use the CURRENCY field here?
393         length += output.insert(
394             leftIndex,
395             fAfterPrefixInsert,
396             kUndefinedField,
397             status);
398     }
399     if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
400         fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
401         // TODO: Should we use the CURRENCY field here?
402         length += output.insert(
403             rightIndex + length,
404             fBeforeSuffixInsert,
405             kUndefinedField,
406             status);
407     }
408 
409     // Call super for the remaining logic
410     length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
411     return length;
412 }
413 
414 int32_t
applyCurrencySpacing(FormattedStringBuilder & output,int32_t prefixStart,int32_t prefixLen,int32_t suffixStart,int32_t suffixLen,const DecimalFormatSymbols & symbols,UErrorCode & status)415 CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
416                                                      int32_t prefixLen, int32_t suffixStart,
417                                                      int32_t suffixLen,
418                                                      const DecimalFormatSymbols &symbols,
419                                                      UErrorCode &status) {
420     int length = 0;
421     bool hasPrefix = (prefixLen > 0);
422     bool hasSuffix = (suffixLen > 0);
423     bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
424     if (hasPrefix && hasNumber) {
425         length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
426     }
427     if (hasSuffix && hasNumber) {
428         length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
429     }
430     return length;
431 }
432 
433 int32_t
applyCurrencySpacingAffix(FormattedStringBuilder & output,int32_t index,EAffix affix,const DecimalFormatSymbols & symbols,UErrorCode & status)434 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
435                                                           EAffix affix,
436                                                           const DecimalFormatSymbols &symbols,
437                                                           UErrorCode &status) {
438     // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
439     // This works even if the last code point in the prefix is 2 code units because the
440     // field value gets populated to both indices in the field array.
441     Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
442     if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
443         return 0;
444     }
445     int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
446     UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
447     if (!affixUniset.contains(affixCp)) {
448         return 0;
449     }
450     int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
451     UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
452     if (!numberUniset.contains(numberCp)) {
453         return 0;
454     }
455     UnicodeString spacingString = getInsertString(symbols, affix, status);
456 
457     // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
458     // It would be more efficient if this could be done before affixes were attached,
459     // so that it could be prepended/appended instead of inserted.
460     // However, the build code path is more efficient, and this is the most natural
461     // place to put currency spacing in the non-build code path.
462     // TODO: Should we use the CURRENCY field here?
463     return output.insert(index, spacingString, kUndefinedField, status);
464 }
465 
466 UnicodeSet
getUnicodeSet(const DecimalFormatSymbols & symbols,EPosition position,EAffix affix,UErrorCode & status)467 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
468                                               EAffix affix, UErrorCode &status) {
469     // Ensure the static defaults are initialized:
470     umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
471     if (U_FAILURE(status)) {
472         return UnicodeSet();
473     }
474 
475     const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
476             position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
477             affix == SUFFIX,
478             status);
479     if (pattern.compare(u"[:digit:]", -1) == 0) {
480         return *UNISET_DIGIT;
481     } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) {
482         return *UNISET_NOTSZ;
483     } else {
484         return UnicodeSet(pattern, status);
485     }
486 }
487 
488 UnicodeString
getInsertString(const DecimalFormatSymbols & symbols,EAffix affix,UErrorCode & status)489 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
490                                                 UErrorCode &status) {
491     return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
492 }
493 
494 #endif /* #if !UCONFIG_NO_FORMATTING */
495