xref: /aosp_15_r20/external/icu/icu4c/source/i18n/formattedval_sbimpl.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 // This file contains one implementation of FormattedValue.
9 // Other independent implementations should go into their own cpp file for
10 // better dependency modularization.
11 
12 #include "unicode/ustring.h"
13 #include "formattedval_impl.h"
14 #include "number_types.h"
15 #include "formatted_string_builder.h"
16 #include "number_utils.h"
17 #include "static_unicode_sets.h"
18 #include "unicode/listformatter.h"
19 
20 U_NAMESPACE_BEGIN
21 
22 
23 typedef FormattedStringBuilder::Field Field;
24 
25 
FormattedValueStringBuilderImpl(Field numericField)26 FormattedValueStringBuilderImpl::FormattedValueStringBuilderImpl(Field numericField)
27         : fNumericField(numericField) {
28 }
29 
~FormattedValueStringBuilderImpl()30 FormattedValueStringBuilderImpl::~FormattedValueStringBuilderImpl() {
31 }
32 
33 
toString(UErrorCode &) const34 UnicodeString FormattedValueStringBuilderImpl::toString(UErrorCode&) const {
35     return fString.toUnicodeString();
36 }
37 
toTempString(UErrorCode &) const38 UnicodeString FormattedValueStringBuilderImpl::toTempString(UErrorCode&) const {
39     return fString.toTempUnicodeString();
40 }
41 
appendTo(Appendable & appendable,UErrorCode &) const42 Appendable& FormattedValueStringBuilderImpl::appendTo(Appendable& appendable, UErrorCode&) const {
43     appendable.appendString(fString.chars(), fString.length());
44     return appendable;
45 }
46 
nextPosition(ConstrainedFieldPosition & cfpos,UErrorCode & status) const47 UBool FormattedValueStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const {
48     // NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
49     return nextPositionImpl(cfpos, fNumericField, status) ? true : false;
50 }
51 
nextFieldPosition(FieldPosition & fp,UErrorCode & status) const52 UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
53     int32_t rawField = fp.getField();
54 
55     if (rawField == FieldPosition::DONT_CARE) {
56         return false;
57     }
58 
59     if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
60         status = U_ILLEGAL_ARGUMENT_ERROR;
61         return false;
62     }
63 
64     ConstrainedFieldPosition cfpos;
65     cfpos.constrainField(UFIELD_CATEGORY_NUMBER, rawField);
66     cfpos.setState(UFIELD_CATEGORY_NUMBER, rawField, fp.getBeginIndex(), fp.getEndIndex());
67     if (nextPositionImpl(cfpos, kUndefinedField, status)) {
68         fp.setBeginIndex(cfpos.getStart());
69         fp.setEndIndex(cfpos.getLimit());
70         return true;
71     }
72 
73     // Special case: fraction should start after integer if fraction is not present
74     if (rawField == UNUM_FRACTION_FIELD && fp.getEndIndex() == 0) {
75         bool inside = false;
76         int32_t i = fString.fZero;
77         for (; i < fString.fZero + fString.fLength; i++) {
78             if (isIntOrGroup(fString.getFieldPtr()[i]) || fString.getFieldPtr()[i] == Field(UFIELD_CATEGORY_NUMBER, UNUM_DECIMAL_SEPARATOR_FIELD)) {
79                 inside = true;
80             } else if (inside) {
81                 break;
82             }
83         }
84         fp.setBeginIndex(i - fString.fZero);
85         fp.setEndIndex(i - fString.fZero);
86     }
87 
88     return false;
89 }
90 
getAllFieldPositions(FieldPositionIteratorHandler & fpih,UErrorCode & status) const91 void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
92                                                UErrorCode& status) const {
93     ConstrainedFieldPosition cfpos;
94     while (nextPositionImpl(cfpos, kUndefinedField, status)) {
95         fpih.addAttribute(cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
96     }
97 }
98 
resetString()99 void FormattedValueStringBuilderImpl::resetString() {
100     fString.clear();
101     spanIndicesCount = 0;
102 }
103 
104 // Signal the end of the string using a field that doesn't exist and that is
105 // different from kUndefinedField, which is used for "null field".
106 static constexpr Field kEndField = Field(0xf, 0xf);
107 
nextPositionImpl(ConstrainedFieldPosition & cfpos,Field numericField,UErrorCode &) const108 bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
109     int32_t fieldStart = -1;
110     Field currField = kUndefinedField;
111     bool prevIsSpan = false;
112     int32_t nextSpanStart = -1;
113     if (spanIndicesCount > 0) {
114         int64_t si = cfpos.getInt64IterationContext();
115         U_ASSERT(si <= spanIndicesCount);
116         if (si < spanIndicesCount) {
117             nextSpanStart = spanIndices[si].start;
118         }
119         if (si > 0) {
120             prevIsSpan = cfpos.getCategory() == spanIndices[si-1].category
121                 && cfpos.getField() == spanIndices[si-1].spanValue;
122         }
123     }
124     bool prevIsNumeric = false;
125     if (numericField != kUndefinedField) {
126         prevIsNumeric = cfpos.getCategory() == numericField.getCategory()
127             && cfpos.getField() == numericField.getField();
128     }
129     bool prevIsInteger = cfpos.getCategory() == UFIELD_CATEGORY_NUMBER
130         && cfpos.getField() == UNUM_INTEGER_FIELD;
131 
132     for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) {
133         Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField;
134         // Case 1: currently scanning a field.
135         if (currField != kUndefinedField) {
136             if (currField != _field) {
137                 int32_t end = i - fString.fZero;
138                 // Grouping separators can be whitespace; don't throw them out!
139                 if (isTrimmable(currField)) {
140                     end = trimBack(i - fString.fZero);
141                 }
142                 if (end <= fieldStart) {
143                     // Entire field position is ignorable; skip.
144                     fieldStart = -1;
145                     currField = kUndefinedField;
146                     i--;  // look at this index again
147                     continue;
148                 }
149                 int32_t start = fieldStart;
150                 if (isTrimmable(currField)) {
151                     start = trimFront(start);
152                 }
153                 cfpos.setState(currField.getCategory(), currField.getField(), start, end);
154                 return true;
155             }
156             continue;
157         }
158         // Special case: emit normalField if we are pointing at the end of spanField.
159         if (i > fString.fZero && prevIsSpan) {
160             int64_t si = cfpos.getInt64IterationContext() - 1;
161             U_ASSERT(si >= 0);
162             int32_t previ = i - spanIndices[si].length;
163             U_ASSERT(previ >= fString.fZero);
164             Field prevField = fString.getFieldPtr()[previ];
165             if (prevField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
166                 // Special handling for ULISTFMT_ELEMENT_FIELD
167                 if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
168                     fieldStart = i - fString.fZero - spanIndices[si].length;
169                     int32_t end = fieldStart + spanIndices[si].length;
170                     cfpos.setState(
171                         UFIELD_CATEGORY_LIST,
172                         ULISTFMT_ELEMENT_FIELD,
173                         fieldStart,
174                         end);
175                     return true;
176                 } else {
177                     prevIsSpan = false;
178                 }
179             } else {
180                 // Re-wind, since there may be multiple fields in the span.
181                 i = previ;
182                 _field = prevField;
183             }
184         }
185         // Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
186         if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
187                 && i > fString.fZero
188                 && !prevIsInteger
189                 && !prevIsNumeric
190                 && isIntOrGroup(fString.getFieldPtr()[i - 1])
191                 && !isIntOrGroup(_field)) {
192             int j = i - 1;
193             for (; j >= fString.fZero && isIntOrGroup(fString.getFieldPtr()[j]); j--) {}
194             cfpos.setState(
195                 UFIELD_CATEGORY_NUMBER,
196                 UNUM_INTEGER_FIELD,
197                 j - fString.fZero + 1,
198                 i - fString.fZero);
199             return true;
200         }
201         // Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
202         if (numericField != kUndefinedField
203                 && cfpos.matchesField(numericField.getCategory(), numericField.getField())
204                 && i > fString.fZero
205                 && !prevIsNumeric
206                 && fString.getFieldPtr()[i - 1].isNumeric()
207                 && !_field.isNumeric()) {
208             // Re-wind to the beginning of the field and then emit it
209             int32_t j = i - 1;
210             for (; j >= fString.fZero && fString.getFieldPtr()[j].isNumeric(); j--) {}
211             cfpos.setState(
212                 numericField.getCategory(),
213                 numericField.getField(),
214                 j - fString.fZero + 1,
215                 i - fString.fZero);
216             return true;
217         }
218         // Check for span field
219         if (!prevIsSpan && (
220                 _field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD) ||
221                 i - fString.fZero == nextSpanStart)) {
222             int64_t si = cfpos.getInt64IterationContext();
223             if (si >= spanIndicesCount) {
224                 break;
225             }
226             UFieldCategory spanCategory = spanIndices[si].category;
227             int32_t spanValue = spanIndices[si].spanValue;
228             int32_t length = spanIndices[si].length;
229             cfpos.setInt64IterationContext(si + 1);
230             if (si + 1 < spanIndicesCount) {
231                 nextSpanStart = spanIndices[si + 1].start;
232             }
233             if (length == 0) {
234                 // ICU-21871: Don't return fields on empty spans
235                 i--;
236                 continue;
237             }
238             if (cfpos.matchesField(spanCategory, spanValue)) {
239                 fieldStart = i - fString.fZero;
240                 int32_t end = fieldStart + length;
241                 cfpos.setState(
242                     spanCategory,
243                     spanValue,
244                     fieldStart,
245                     end);
246                 return true;
247             } else if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
248                 // Special handling for ULISTFMT_ELEMENT_FIELD
249                 if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
250                     fieldStart = i - fString.fZero;
251                     int32_t end = fieldStart + length;
252                     cfpos.setState(
253                         UFIELD_CATEGORY_LIST,
254                         ULISTFMT_ELEMENT_FIELD,
255                         fieldStart,
256                         end);
257                     return true;
258                 } else {
259                     // Failed to match; jump ahead
260                     i += length - 1;
261                     // goto loopend
262                 }
263             }
264         }
265         // Special case: skip over INTEGER; will be coalesced later.
266         else if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) {
267             _field = kUndefinedField;
268         }
269         // No field starting at this position.
270         else if (_field.isUndefined() || _field == kEndField) {
271             // goto loopend
272         }
273         // No SpanField
274         else if (cfpos.matchesField(_field.getCategory(), _field.getField())) {
275             fieldStart = i - fString.fZero;
276             currField = _field;
277         }
278         // loopend:
279         prevIsSpan = false;
280         prevIsNumeric = false;
281         prevIsInteger = false;
282     }
283 
284     U_ASSERT(currField == kUndefinedField);
285     // Always set the position to the end so that we don't revisit previous sections
286     cfpos.setState(
287         cfpos.getCategory(),
288         cfpos.getField(),
289         fString.fLength,
290         fString.fLength);
291     return false;
292 }
293 
appendSpanInfo(UFieldCategory category,int32_t spanValue,int32_t start,int32_t length,UErrorCode & status)294 void FormattedValueStringBuilderImpl::appendSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) {
295     if (U_FAILURE(status)) { return; }
296     U_ASSERT(spanIndices.getCapacity() >= spanIndicesCount);
297     if (spanIndices.getCapacity() == spanIndicesCount) {
298         if (!spanIndices.resize(spanIndicesCount * 2, spanIndicesCount)) {
299             status = U_MEMORY_ALLOCATION_ERROR;
300             return;
301         }
302     }
303     spanIndices[spanIndicesCount] = {category, spanValue, start, length};
304     spanIndicesCount++;
305 }
306 
prependSpanInfo(UFieldCategory category,int32_t spanValue,int32_t start,int32_t length,UErrorCode & status)307 void FormattedValueStringBuilderImpl::prependSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) {
308     if (U_FAILURE(status)) { return; }
309     U_ASSERT(spanIndices.getCapacity() >= spanIndicesCount);
310     if (spanIndices.getCapacity() == spanIndicesCount) {
311         if (!spanIndices.resize(spanIndicesCount * 2, spanIndicesCount)) {
312             status = U_MEMORY_ALLOCATION_ERROR;
313             return;
314         }
315     }
316     for (int32_t i = spanIndicesCount - 1; i >= 0; i--) {
317         spanIndices[i+1] = spanIndices[i];
318     }
319     spanIndices[0] = {category, spanValue, start, length};
320     spanIndicesCount++;
321 }
322 
isIntOrGroup(Field field)323 bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) {
324     return field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
325         || field == Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD);
326 }
327 
isTrimmable(Field field)328 bool FormattedValueStringBuilderImpl::isTrimmable(Field field) {
329     return field != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD)
330         && field.getCategory() != UFIELD_CATEGORY_LIST;
331 }
332 
trimBack(int32_t limit) const333 int32_t FormattedValueStringBuilderImpl::trimBack(int32_t limit) const {
334     return unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
335         fString.getCharPtr() + fString.fZero,
336         limit,
337         USET_SPAN_CONTAINED);
338 }
339 
trimFront(int32_t start) const340 int32_t FormattedValueStringBuilderImpl::trimFront(int32_t start) const {
341     return start + unisets::get(unisets::DEFAULT_IGNORABLES)->span(
342         fString.getCharPtr() + fString.fZero + start,
343         fString.fLength - start,
344         USET_SPAN_CONTAINED);
345 }
346 
347 
348 U_NAMESPACE_END
349 
350 #endif /* #if !UCONFIG_NO_FORMATTING */
351