1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "umutex.h"
9 #include "ucln_cmn.h"
10 #include "ucln_in.h"
11 #include "number_modifiers.h"
12
13 using namespace icu;
14 using namespace icu::number;
15 using namespace icu::number::impl;
16
17 namespace {
18
19 // TODO: This is copied from simpleformatter.cpp
20 const int32_t ARG_NUM_LIMIT = 0x100;
21
22 // These are the default currency spacing UnicodeSets in CLDR.
23 // Pre-compute them for performance.
24 // The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25 icu::UInitOnce gDefaultCurrencySpacingInitOnce {};
26
27 UnicodeSet *UNISET_DIGIT = nullptr;
28 UnicodeSet *UNISET_NOTSZ = nullptr;
29
cleanupDefaultCurrencySpacing()30 UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31 delete UNISET_DIGIT;
32 UNISET_DIGIT = nullptr;
33 delete UNISET_NOTSZ;
34 UNISET_NOTSZ = nullptr;
35 gDefaultCurrencySpacingInitOnce.reset();
36 return true;
37 }
38
initDefaultCurrencySpacing(UErrorCode & status)39 void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40 ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41 UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42 UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status);
43 if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) {
44 status = U_MEMORY_ALLOCATION_ERROR;
45 return;
46 }
47 UNISET_DIGIT->freeze();
48 UNISET_NOTSZ->freeze();
49 }
50
51 } // namespace
52
53
54 Modifier::~Modifier() = default;
55
Parameters()56 Modifier::Parameters::Parameters()
57 : obj(nullptr) {}
58
Parameters(const ModifierStore * _obj,Signum _signum,StandardPlural::Form _plural)59 Modifier::Parameters::Parameters(
60 const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural)
61 : obj(_obj), signum(_signum), plural(_plural) {}
62
63 ModifierStore::~ModifierStore() = default;
64
~AdoptingSignumModifierStore()65 AdoptingSignumModifierStore::~AdoptingSignumModifierStore() {
66 for (const Modifier *mod : mods) {
67 delete mod;
68 }
69 }
70
71 AdoptingSignumModifierStore&
operator =(AdoptingSignumModifierStore && other)72 AdoptingSignumModifierStore::operator=(AdoptingSignumModifierStore&& other) noexcept {
73 for (size_t i=0; i<SIGNUM_COUNT; i++) {
74 this->mods[i] = other.mods[i];
75 other.mods[i] = nullptr;
76 }
77 return *this;
78 }
79
80
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const81 int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
82 UErrorCode &status) const {
83 // Insert the suffix first since inserting the prefix will change the rightIndex
84 int length = output.insert(rightIndex, fSuffix, fField, status);
85 length += output.insert(leftIndex, fPrefix, fField, status);
86 return length;
87 }
88
getPrefixLength() const89 int32_t ConstantAffixModifier::getPrefixLength() const {
90 return fPrefix.length();
91 }
92
getCodePointCount() const93 int32_t ConstantAffixModifier::getCodePointCount() const {
94 return fPrefix.countChar32() + fSuffix.countChar32();
95 }
96
isStrong() const97 bool ConstantAffixModifier::isStrong() const {
98 return fStrong;
99 }
100
containsField(Field field) const101 bool ConstantAffixModifier::containsField(Field field) const {
102 (void)field;
103 // This method is not currently used.
104 UPRV_UNREACHABLE_EXIT;
105 }
106
getParameters(Parameters & output) const107 void ConstantAffixModifier::getParameters(Parameters& output) const {
108 (void)output;
109 // This method is not currently used.
110 UPRV_UNREACHABLE_EXIT;
111 }
112
semanticallyEquivalent(const Modifier & other) const113 bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const {
114 auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
115 if (_other == nullptr) {
116 return false;
117 }
118 return fPrefix == _other->fPrefix
119 && fSuffix == _other->fSuffix
120 && fField == _other->fField
121 && fStrong == _other->fStrong;
122 }
123
124
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong)125 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
126 : SimpleModifier(simpleFormatter, field, strong, {}) {}
127
SimpleModifier(const SimpleFormatter & simpleFormatter,Field field,bool strong,const Modifier::Parameters parameters)128 SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
129 const Modifier::Parameters parameters)
130 : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
131 fParameters(parameters) {
132 int32_t argLimit = SimpleFormatter::getArgumentLimit(
133 fCompiledPattern.getBuffer(), fCompiledPattern.length());
134 if (argLimit == 0) {
135 // No arguments in compiled pattern
136 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
137 U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
138 // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
139 fSuffixOffset = -1;
140 fSuffixLength = 0;
141 } else {
142 U_ASSERT(argLimit == 1);
143 if (fCompiledPattern.charAt(1) != 0) {
144 // Found prefix
145 fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
146 fSuffixOffset = 3 + fPrefixLength;
147 } else {
148 // No prefix
149 fPrefixLength = 0;
150 fSuffixOffset = 2;
151 }
152 if (3 + fPrefixLength < fCompiledPattern.length()) {
153 // Found suffix
154 fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
155 } else {
156 // No suffix
157 fSuffixLength = 0;
158 }
159 }
160 }
161
SimpleModifier()162 SimpleModifier::SimpleModifier()
163 : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
164 }
165
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const166 int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
167 UErrorCode &status) const {
168 return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
169 }
170
getPrefixLength() const171 int32_t SimpleModifier::getPrefixLength() const {
172 return fPrefixLength;
173 }
174
getCodePointCount() const175 int32_t SimpleModifier::getCodePointCount() const {
176 int32_t count = 0;
177 if (fPrefixLength > 0) {
178 count += fCompiledPattern.countChar32(2, fPrefixLength);
179 }
180 if (fSuffixLength > 0) {
181 count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
182 }
183 return count;
184 }
185
isStrong() const186 bool SimpleModifier::isStrong() const {
187 return fStrong;
188 }
189
containsField(Field field) const190 bool SimpleModifier::containsField(Field field) const {
191 (void)field;
192 // This method is not currently used.
193 UPRV_UNREACHABLE_EXIT;
194 }
195
getParameters(Parameters & output) const196 void SimpleModifier::getParameters(Parameters& output) const {
197 output = fParameters;
198 }
199
semanticallyEquivalent(const Modifier & other) const200 bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
201 auto* _other = dynamic_cast<const SimpleModifier*>(&other);
202 if (_other == nullptr) {
203 return false;
204 }
205 if (fParameters.obj != nullptr) {
206 return fParameters.obj == _other->fParameters.obj;
207 }
208 return fCompiledPattern == _other->fCompiledPattern
209 && fField == _other->fField
210 && fStrong == _other->fStrong;
211 }
212
213
214 int32_t
formatAsPrefixSuffix(FormattedStringBuilder & result,int32_t startIndex,int32_t endIndex,UErrorCode & status) const215 SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
216 UErrorCode &status) const {
217 if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
218 // There is no argument for the inner number; overwrite the entire segment with our string.
219 return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
220 } else {
221 if (fPrefixLength > 0) {
222 result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
223 }
224 if (fSuffixLength > 0) {
225 result.insert(
226 endIndex + fPrefixLength,
227 fCompiledPattern,
228 1 + fSuffixOffset,
229 1 + fSuffixOffset + fSuffixLength,
230 fField,
231 status);
232 }
233 return fPrefixLength + fSuffixLength;
234 }
235 }
236
237
238 int32_t
formatTwoArgPattern(const SimpleFormatter & compiled,FormattedStringBuilder & result,int32_t index,int32_t * outPrefixLength,int32_t * outSuffixLength,Field field,UErrorCode & status)239 SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
240 int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
241 Field field, UErrorCode& status) {
242 const UnicodeString& compiledPattern = compiled.compiledPattern;
243 int32_t argLimit = SimpleFormatter::getArgumentLimit(
244 compiledPattern.getBuffer(), compiledPattern.length());
245 if (argLimit != 2) {
246 status = U_INTERNAL_PROGRAM_ERROR;
247 return 0;
248 }
249 int32_t offset = 1; // offset into compiledPattern
250 int32_t length = 0; // chars added to result
251
252 int32_t prefixLength = compiledPattern.charAt(offset);
253 offset++;
254 if (prefixLength < ARG_NUM_LIMIT) {
255 // No prefix
256 prefixLength = 0;
257 } else {
258 prefixLength -= ARG_NUM_LIMIT;
259 result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
260 offset += prefixLength;
261 length += prefixLength;
262 offset++;
263 }
264
265 int32_t infixLength = compiledPattern.charAt(offset);
266 offset++;
267 if (infixLength < ARG_NUM_LIMIT) {
268 // No infix
269 infixLength = 0;
270 } else {
271 infixLength -= ARG_NUM_LIMIT;
272 result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
273 offset += infixLength;
274 length += infixLength;
275 offset++;
276 }
277
278 int32_t suffixLength;
279 if (offset == compiledPattern.length()) {
280 // No suffix
281 suffixLength = 0;
282 } else {
283 suffixLength = compiledPattern.charAt(offset) - ARG_NUM_LIMIT;
284 offset++;
285 result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
286 length += suffixLength;
287 }
288
289 *outPrefixLength = prefixLength;
290 *outSuffixLength = suffixLength;
291
292 return length;
293 }
294
295
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const296 int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
297 UErrorCode &status) const {
298 int32_t length = output.insert(leftIndex, fPrefix, status);
299 if (fOverwrite) {
300 length += output.splice(
301 leftIndex + length,
302 rightIndex + length,
303 UnicodeString(), 0, 0,
304 kUndefinedField, status);
305 }
306 length += output.insert(rightIndex + length, fSuffix, status);
307 return length;
308 }
309
getPrefixLength() const310 int32_t ConstantMultiFieldModifier::getPrefixLength() const {
311 return fPrefix.length();
312 }
313
getCodePointCount() const314 int32_t ConstantMultiFieldModifier::getCodePointCount() const {
315 return fPrefix.codePointCount() + fSuffix.codePointCount();
316 }
317
isStrong() const318 bool ConstantMultiFieldModifier::isStrong() const {
319 return fStrong;
320 }
321
containsField(Field field) const322 bool ConstantMultiFieldModifier::containsField(Field field) const {
323 return fPrefix.containsField(field) || fSuffix.containsField(field);
324 }
325
getParameters(Parameters & output) const326 void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
327 output = fParameters;
328 }
329
semanticallyEquivalent(const Modifier & other) const330 bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
331 auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
332 if (_other == nullptr) {
333 return false;
334 }
335 if (fParameters.obj != nullptr) {
336 return fParameters.obj == _other->fParameters.obj;
337 }
338 return fPrefix.contentEquals(_other->fPrefix)
339 && fSuffix.contentEquals(_other->fSuffix)
340 && fOverwrite == _other->fOverwrite
341 && fStrong == _other->fStrong;
342 }
343
344
CurrencySpacingEnabledModifier(const FormattedStringBuilder & prefix,const FormattedStringBuilder & suffix,bool overwrite,bool strong,const DecimalFormatSymbols & symbols,UErrorCode & status)345 CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
346 const FormattedStringBuilder &suffix,
347 bool overwrite,
348 bool strong,
349 const DecimalFormatSymbols &symbols,
350 UErrorCode &status)
351 : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
352 // Check for currency spacing. Do not build the UnicodeSets unless there is
353 // a currency code point at a boundary.
354 if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
355 int prefixCp = prefix.getLastCodePoint();
356 UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
357 if (prefixUnicodeSet.contains(prefixCp)) {
358 fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
359 fAfterPrefixUnicodeSet.freeze();
360 fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
361 } else {
362 fAfterPrefixUnicodeSet.setToBogus();
363 fAfterPrefixInsert.setToBogus();
364 }
365 } else {
366 fAfterPrefixUnicodeSet.setToBogus();
367 fAfterPrefixInsert.setToBogus();
368 }
369 if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
370 int suffixCp = suffix.getFirstCodePoint();
371 UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
372 if (suffixUnicodeSet.contains(suffixCp)) {
373 fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
374 fBeforeSuffixUnicodeSet.freeze();
375 fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
376 } else {
377 fBeforeSuffixUnicodeSet.setToBogus();
378 fBeforeSuffixInsert.setToBogus();
379 }
380 } else {
381 fBeforeSuffixUnicodeSet.setToBogus();
382 fBeforeSuffixInsert.setToBogus();
383 }
384 }
385
apply(FormattedStringBuilder & output,int leftIndex,int rightIndex,UErrorCode & status) const386 int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
387 UErrorCode &status) const {
388 // Currency spacing logic
389 int length = 0;
390 if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
391 fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
392 // TODO: Should we use the CURRENCY field here?
393 length += output.insert(
394 leftIndex,
395 fAfterPrefixInsert,
396 kUndefinedField,
397 status);
398 }
399 if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
400 fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
401 // TODO: Should we use the CURRENCY field here?
402 length += output.insert(
403 rightIndex + length,
404 fBeforeSuffixInsert,
405 kUndefinedField,
406 status);
407 }
408
409 // Call super for the remaining logic
410 length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
411 return length;
412 }
413
414 int32_t
applyCurrencySpacing(FormattedStringBuilder & output,int32_t prefixStart,int32_t prefixLen,int32_t suffixStart,int32_t suffixLen,const DecimalFormatSymbols & symbols,UErrorCode & status)415 CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
416 int32_t prefixLen, int32_t suffixStart,
417 int32_t suffixLen,
418 const DecimalFormatSymbols &symbols,
419 UErrorCode &status) {
420 int length = 0;
421 bool hasPrefix = (prefixLen > 0);
422 bool hasSuffix = (suffixLen > 0);
423 bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
424 if (hasPrefix && hasNumber) {
425 length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
426 }
427 if (hasSuffix && hasNumber) {
428 length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
429 }
430 return length;
431 }
432
433 int32_t
applyCurrencySpacingAffix(FormattedStringBuilder & output,int32_t index,EAffix affix,const DecimalFormatSymbols & symbols,UErrorCode & status)434 CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
435 EAffix affix,
436 const DecimalFormatSymbols &symbols,
437 UErrorCode &status) {
438 // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
439 // This works even if the last code point in the prefix is 2 code units because the
440 // field value gets populated to both indices in the field array.
441 Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
442 if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
443 return 0;
444 }
445 int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
446 UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
447 if (!affixUniset.contains(affixCp)) {
448 return 0;
449 }
450 int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
451 UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
452 if (!numberUniset.contains(numberCp)) {
453 return 0;
454 }
455 UnicodeString spacingString = getInsertString(symbols, affix, status);
456
457 // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
458 // It would be more efficient if this could be done before affixes were attached,
459 // so that it could be prepended/appended instead of inserted.
460 // However, the build code path is more efficient, and this is the most natural
461 // place to put currency spacing in the non-build code path.
462 // TODO: Should we use the CURRENCY field here?
463 return output.insert(index, spacingString, kUndefinedField, status);
464 }
465
466 UnicodeSet
getUnicodeSet(const DecimalFormatSymbols & symbols,EPosition position,EAffix affix,UErrorCode & status)467 CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
468 EAffix affix, UErrorCode &status) {
469 // Ensure the static defaults are initialized:
470 umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
471 if (U_FAILURE(status)) {
472 return UnicodeSet();
473 }
474
475 const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
476 position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
477 affix == SUFFIX,
478 status);
479 if (pattern.compare(u"[:digit:]", -1) == 0) {
480 return *UNISET_DIGIT;
481 } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) {
482 return *UNISET_NOTSZ;
483 } else {
484 return UnicodeSet(pattern, status);
485 }
486 }
487
488 UnicodeString
getInsertString(const DecimalFormatSymbols & symbols,EAffix affix,UErrorCode & status)489 CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
490 UErrorCode &status) {
491 return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
492 }
493
494 #endif /* #if !UCONFIG_NO_FORMATTING */
495