xref: /aosp_15_r20/external/cronet/third_party/icu/source/i18n/measunit_extra.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 // Extra functions for MeasureUnit not needed for all clients.
5 // Separate .o file so that it can be removed for modularity.
6 
7 #include "unicode/utypes.h"
8 
9 #if !UCONFIG_NO_FORMATTING
10 
11 // Allow implicit conversion from char16_t* to UnicodeString for this file:
12 // Helpful in toString methods and elsewhere.
13 #define UNISTR_FROM_STRING_EXPLICIT
14 
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "measunit_impl.h"
19 #include "resource.h"
20 #include "uarrsort.h"
21 #include "uassert.h"
22 #include "ucln_in.h"
23 #include "umutex.h"
24 #include "unicode/bytestrie.h"
25 #include "unicode/bytestriebuilder.h"
26 #include "unicode/localpointer.h"
27 #include "unicode/stringpiece.h"
28 #include "unicode/stringtriebuilder.h"
29 #include "unicode/ures.h"
30 #include "unicode/ustringtrie.h"
31 #include "uresimp.h"
32 #include "util.h"
33 #include <cstdlib>
34 
35 U_NAMESPACE_BEGIN
36 
37 
38 namespace {
39 
40 // TODO: Propose a new error code for this?
41 constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
42 
43 // Trie value offset for SI or binary prefixes. This is big enough to ensure we only
44 // insert positive integers into the trie.
45 constexpr int32_t kPrefixOffset = 64;
46 static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0,
47               "kPrefixOffset is too small for minimum UMeasurePrefix value");
48 static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0,
49               "kPrefixOffset is too small for minimum UMeasurePrefix value");
50 
51 // Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
52 constexpr int32_t kCompoundPartOffset = 128;
53 static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN,
54               "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
55 static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI,
56               "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
57 
58 enum CompoundPart {
59     // Represents "-per-"
60     COMPOUND_PART_PER = kCompoundPartOffset,
61     // Represents "-"
62     COMPOUND_PART_TIMES,
63     // Represents "-and-"
64     COMPOUND_PART_AND,
65 };
66 
67 // Trie value offset for "per-".
68 constexpr int32_t kInitialCompoundPartOffset = 192;
69 
70 enum InitialCompoundPart {
71     // Represents "per-", the only compound part that can appear at the start of
72     // an identifier.
73     INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
74 };
75 
76 // Trie value offset for powers like "square-", "cubic-", "pow2-" etc.
77 constexpr int32_t kPowerPartOffset = 256;
78 
79 enum PowerPart {
80     POWER_PART_P2 = kPowerPartOffset + 2,
81     POWER_PART_P3,
82     POWER_PART_P4,
83     POWER_PART_P5,
84     POWER_PART_P6,
85     POWER_PART_P7,
86     POWER_PART_P8,
87     POWER_PART_P9,
88     POWER_PART_P10,
89     POWER_PART_P11,
90     POWER_PART_P12,
91     POWER_PART_P13,
92     POWER_PART_P14,
93     POWER_PART_P15,
94 };
95 
96 // Trie value offset for simple units, e.g. "gram", "nautical-mile",
97 // "fluid-ounce-imperial".
98 constexpr int32_t kSimpleUnitOffset = 512;
99 
100 const struct UnitPrefixStrings {
101     const char* const string;
102     UMeasurePrefix value;
103 } gUnitPrefixStrings[] = {
104     // SI prefixes
105     { "yotta", UMEASURE_PREFIX_YOTTA },
106     { "zetta", UMEASURE_PREFIX_ZETTA },
107     { "exa", UMEASURE_PREFIX_EXA },
108     { "peta", UMEASURE_PREFIX_PETA },
109     { "tera", UMEASURE_PREFIX_TERA },
110     { "giga", UMEASURE_PREFIX_GIGA },
111     { "mega", UMEASURE_PREFIX_MEGA },
112     { "kilo", UMEASURE_PREFIX_KILO },
113     { "hecto", UMEASURE_PREFIX_HECTO },
114     { "deka", UMEASURE_PREFIX_DEKA },
115     { "deci", UMEASURE_PREFIX_DECI },
116     { "centi", UMEASURE_PREFIX_CENTI },
117     { "milli", UMEASURE_PREFIX_MILLI },
118     { "micro", UMEASURE_PREFIX_MICRO },
119     { "nano", UMEASURE_PREFIX_NANO },
120     { "pico", UMEASURE_PREFIX_PICO },
121     { "femto", UMEASURE_PREFIX_FEMTO },
122     { "atto", UMEASURE_PREFIX_ATTO },
123     { "zepto", UMEASURE_PREFIX_ZEPTO },
124     { "yocto", UMEASURE_PREFIX_YOCTO },
125     // Binary prefixes
126     { "yobi", UMEASURE_PREFIX_YOBI },
127     { "zebi", UMEASURE_PREFIX_ZEBI },
128     { "exbi", UMEASURE_PREFIX_EXBI },
129     { "pebi", UMEASURE_PREFIX_PEBI },
130     { "tebi", UMEASURE_PREFIX_TEBI },
131     { "gibi", UMEASURE_PREFIX_GIBI },
132     { "mebi", UMEASURE_PREFIX_MEBI },
133     { "kibi", UMEASURE_PREFIX_KIBI },
134 };
135 
136 /**
137  * A ResourceSink that collects simple unit identifiers from the keys of the
138  * convertUnits table into an array, and adds these values to a TrieBuilder,
139  * with associated values being their index into this array plus a specified
140  * offset.
141  *
142  * Example code:
143  *
144  *     UErrorCode status = U_ZERO_ERROR;
145  *     BytesTrieBuilder b(status);
146  *     int32_t ARR_SIZE = 200;
147  *     const char *unitIdentifiers[ARR_SIZE];
148  *     int32_t *unitCategories[ARR_SIZE];
149  *     SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers,
150  *                                              unitCategories, ARR_SIZE, b, kTrieValueOffset);
151  *     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
152  *     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
153  */
154 class SimpleUnitIdentifiersSink : public icu::ResourceSink {
155   public:
156     /**
157      * Constructor.
158      * @param quantitiesTrieData The data for constructing a quantitiesTrie,
159      *     which maps from a simple unit identifier to an index into the
160      *     gCategories array.
161      * @param out Array of char* to which pointers to the simple unit
162      *     identifiers will be saved. (Does not take ownership.)
163      * @param outCategories Array of int32_t to which category indexes will be
164      *     saved: this corresponds to simple unit IDs saved to `out`, mapping
165      *     from the ID to the value produced by the quantitiesTrie (which is an
166      *     index into the gCategories array).
167      * @param outSize The size of `out` and `outCategories`.
168      * @param trieBuilder The trie builder to which the simple unit identifier
169      *     should be added. The trie builder must outlive this resource sink.
170      * @param trieValueOffset This is added to the index of the identifier in
171      *     the `out` array, before adding to `trieBuilder` as the value
172      *     associated with the identifier.
173      */
SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData,const char ** out,int32_t * outCategories,int32_t outSize,BytesTrieBuilder & trieBuilder,int32_t trieValueOffset)174     explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out,
175                                        int32_t *outCategories, int32_t outSize,
176                                        BytesTrieBuilder &trieBuilder, int32_t trieValueOffset)
177         : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder),
178           trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {}
179 
180     /**
181      * Adds the table keys found in value to the output vector.
182      * @param key The key of the resource passed to `value`: the second
183      *     parameter of the ures_getAllItemsWithFallback() call.
184      * @param value Should be a ResourceTable value, if
185      *     ures_getAllItemsWithFallback() was called correctly for this sink.
186      * @param noFallback Ignored.
187      * @param status The standard ICU error code output parameter.
188      */
put(const char *,ResourceValue & value,UBool,UErrorCode & status)189     void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
190         ResourceTable table = value.getTable(status);
191         if (U_FAILURE(status)) return;
192 
193         if (outIndex + table.getSize() > outSize) {
194             status = U_INDEX_OUTOFBOUNDS_ERROR;
195             return;
196         }
197 
198         BytesTrie quantitiesTrie(quantitiesTrieData.data());
199 
200         // Collect keys from the table resource.
201         const char *simpleUnitID;
202         for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) {
203             U_ASSERT(i < table.getSize());
204             U_ASSERT(outIndex < outSize);
205             if (uprv_strcmp(simpleUnitID, "kilogram") == 0) {
206                 // For parsing, we use "gram", the prefixless metric mass unit. We
207                 // thus ignore the SI Base Unit of Mass: it exists due to being the
208                 // mass conversion target unit, but not needed for MeasureUnit
209                 // parsing.
210                 continue;
211             }
212             outArray[outIndex] = simpleUnitID;
213             trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status);
214 
215             // Find the base target unit for this simple unit
216             ResourceTable table = value.getTable(status);
217             if (U_FAILURE(status)) { return; }
218             if (!table.findValue("target", value)) {
219                 status = U_INVALID_FORMAT_ERROR;
220                 break;
221             }
222             int32_t len;
223             const char16_t* uTarget = value.getString(len, status);
224             CharString target;
225             target.appendInvariantChars(uTarget, len, status);
226             if (U_FAILURE(status)) { return; }
227             quantitiesTrie.reset();
228             UStringTrieResult result = quantitiesTrie.next(target.data(), target.length());
229             if (!USTRINGTRIE_HAS_VALUE(result)) {
230                 status = U_INVALID_FORMAT_ERROR;
231                 break;
232             }
233             outCategories[outIndex] = quantitiesTrie.getValue();
234 
235             outIndex++;
236         }
237     }
238 
239   private:
240     const char **outArray;
241     int32_t *outCategories;
242     int32_t outSize;
243     BytesTrieBuilder &trieBuilder;
244     int32_t trieValueOffset;
245 
246     StringPiece quantitiesTrieData;
247 
248     int32_t outIndex;
249 };
250 
251 /**
252  * A ResourceSink that collects information from `unitQuantities` in the `units`
253  * resource to provide key->value lookups from base unit to category, as well as
254  * preserving ordering information for these categories. See `units.txt`.
255  *
256  * For example: "kilogram" -> "mass", "meter-per-second" -> "speed".
257  *
258  * In C++ unitQuantity values are collected in order into a char16_t* array, while
259  * unitQuantity keys are added added to a TrieBuilder, with associated values
260  * being the index into the aforementioned char16_t* array.
261  */
262 class CategoriesSink : public icu::ResourceSink {
263   public:
264     /**
265      * Constructor.
266      * @param out Array of char16_t* to which unitQuantity values will be saved.
267      *     The pointers returned  not owned: they point directly at the resource
268      *     strings in static memory.
269      * @param outSize The size of the `out` array.
270      * @param trieBuilder The trie builder to which the keys (base units) of
271      *     each unitQuantity will be added, each with value being the offset
272      *     into `out`.
273      */
CategoriesSink(const char16_t ** out,int32_t & outSize,BytesTrieBuilder & trieBuilder)274     explicit CategoriesSink(const char16_t **out, int32_t &outSize, BytesTrieBuilder &trieBuilder)
275         : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {}
276 
put(const char *,ResourceValue & value,UBool,UErrorCode & status)277     void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
278         ResourceArray array = value.getArray(status);
279         if (U_FAILURE(status)) {
280             return;
281         }
282 
283         if (outIndex + array.getSize() > outSize) {
284             status = U_INDEX_OUTOFBOUNDS_ERROR;
285             return;
286         }
287 
288         for (int32_t i = 0; array.getValue(i, value); ++i) {
289             U_ASSERT(outIndex < outSize);
290             ResourceTable table = value.getTable(status);
291             if (U_FAILURE(status)) {
292                 return;
293             }
294             if (table.getSize() != 1) {
295                 status = U_INVALID_FORMAT_ERROR;
296                 return;
297             }
298             const char *key;
299             table.getKeyAndValue(0, key, value);
300             int32_t uTmpLen;
301             outQuantitiesArray[outIndex] = value.getString(uTmpLen, status);
302             trieBuilder.add(key, outIndex, status);
303             outIndex++;
304         }
305     }
306 
307   private:
308     const char16_t **outQuantitiesArray;
309     int32_t &outSize;
310     BytesTrieBuilder &trieBuilder;
311 
312     int32_t outIndex;
313 };
314 
315 icu::UInitOnce gUnitExtrasInitOnce {};
316 
317 // Array of simple unit IDs.
318 //
319 // The array memory itself is owned by this pointer, but the individual char* in
320 // that array point at static memory. (Note that these char* are also returned
321 // by SingleUnitImpl::getSimpleUnitID().)
322 const char **gSimpleUnits = nullptr;
323 
324 // Maps from the value associated with each simple unit ID to an index into the
325 // gCategories array.
326 int32_t *gSimpleUnitCategories = nullptr;
327 
328 char *gSerializedUnitExtrasStemTrie = nullptr;
329 
330 // Array of char16_t* pointing at the unit categories (aka "quantities", aka
331 // "types"), as found in the `unitQuantities` resource. The array memory itself
332 // is owned by this pointer, but the individual char16_t* in that array point at
333 // static memory.
334 const char16_t **gCategories = nullptr;
335 // Number of items in `gCategories`.
336 int32_t gCategoriesCount = 0;
337 // Serialized BytesTrie for mapping from base units to indices into gCategories.
338 char *gSerializedUnitCategoriesTrie = nullptr;
339 
cleanupUnitExtras()340 UBool U_CALLCONV cleanupUnitExtras() {
341     uprv_free(gSerializedUnitCategoriesTrie);
342     gSerializedUnitCategoriesTrie = nullptr;
343     uprv_free(gCategories);
344     gCategories = nullptr;
345     uprv_free(gSerializedUnitExtrasStemTrie);
346     gSerializedUnitExtrasStemTrie = nullptr;
347     uprv_free(gSimpleUnitCategories);
348     gSimpleUnitCategories = nullptr;
349     uprv_free(gSimpleUnits);
350     gSimpleUnits = nullptr;
351     gUnitExtrasInitOnce.reset();
352     return true;
353 }
354 
initUnitExtras(UErrorCode & status)355 void U_CALLCONV initUnitExtras(UErrorCode& status) {
356     ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
357     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
358 
359     // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories.
360     const char *CATEGORY_TABLE_NAME = "unitQuantities";
361     LocalUResourceBundlePointer unitQuantities(
362         ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status));
363     if (U_FAILURE(status)) { return; }
364     gCategoriesCount = unitQuantities.getAlias()->fSize;
365     size_t quantitiesMallocSize = sizeof(char16_t *) * gCategoriesCount;
366     gCategories = static_cast<const char16_t **>(uprv_malloc(quantitiesMallocSize));
367     if (gCategories == nullptr) {
368         status = U_MEMORY_ALLOCATION_ERROR;
369         return;
370     }
371     uprv_memset(gCategories, 0, quantitiesMallocSize);
372     BytesTrieBuilder quantitiesBuilder(status);
373     CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder);
374     ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status);
375     StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
376     if (U_FAILURE(status)) { return; }
377     // Copy the result into the global constant pointer
378     size_t numBytesQuantities = resultQuantities.length();
379     gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities));
380     if (gSerializedUnitCategoriesTrie == nullptr) {
381         status = U_MEMORY_ALLOCATION_ERROR;
382         return;
383     }
384     uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities);
385 
386     // Build the BytesTrie that Parser needs for parsing unit identifiers.
387 
388     BytesTrieBuilder b(status);
389     if (U_FAILURE(status)) { return; }
390 
391     // Add SI and binary prefixes
392     for (const auto& unitPrefixInfo : gUnitPrefixStrings) {
393         b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status);
394     }
395     if (U_FAILURE(status)) { return; }
396 
397     // Add syntax parts (compound, power prefixes)
398     b.add("-per-", COMPOUND_PART_PER, status);
399     b.add("-", COMPOUND_PART_TIMES, status);
400     b.add("-and-", COMPOUND_PART_AND, status);
401     b.add("per-", INITIAL_COMPOUND_PART_PER, status);
402     b.add("square-", POWER_PART_P2, status);
403     b.add("cubic-", POWER_PART_P3, status);
404     b.add("pow2-", POWER_PART_P2, status);
405     b.add("pow3-", POWER_PART_P3, status);
406     b.add("pow4-", POWER_PART_P4, status);
407     b.add("pow5-", POWER_PART_P5, status);
408     b.add("pow6-", POWER_PART_P6, status);
409     b.add("pow7-", POWER_PART_P7, status);
410     b.add("pow8-", POWER_PART_P8, status);
411     b.add("pow9-", POWER_PART_P9, status);
412     b.add("pow10-", POWER_PART_P10, status);
413     b.add("pow11-", POWER_PART_P11, status);
414     b.add("pow12-", POWER_PART_P12, status);
415     b.add("pow13-", POWER_PART_P13, status);
416     b.add("pow14-", POWER_PART_P14, status);
417     b.add("pow15-", POWER_PART_P15, status);
418     if (U_FAILURE(status)) { return; }
419 
420     // Add sanctioned simple units by offset: simple units all have entries in
421     // units/convertUnits resources.
422     LocalUResourceBundlePointer convertUnits(
423         ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status));
424     if (U_FAILURE(status)) { return; }
425 
426     // Allocate enough space: with identifierSink below skipping kilogram, we're
427     // probably allocating one more than needed.
428     int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;
429     int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;
430     gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));
431     if (gSimpleUnits == nullptr) {
432         status = U_MEMORY_ALLOCATION_ERROR;
433         return;
434     }
435     uprv_memset(gSimpleUnits, 0, arrayMallocSize);
436     arrayMallocSize = sizeof(int32_t) * simpleUnitsCount;
437     gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize));
438     if (gSimpleUnitCategories == nullptr) {
439         status = U_MEMORY_ALLOCATION_ERROR;
440         return;
441     }
442     uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize);
443 
444     // Populate gSimpleUnits and build the associated trie.
445     SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories,
446                                              simpleUnitsCount, b, kSimpleUnitOffset);
447     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
448 
449     // Build the CharsTrie
450     // TODO: Use SLOW or FAST here?
451     StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
452     if (U_FAILURE(status)) { return; }
453 
454     // Copy the result into the global constant pointer
455     size_t numBytes = result.length();
456     gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes));
457     if (gSerializedUnitExtrasStemTrie == nullptr) {
458         status = U_MEMORY_ALLOCATION_ERROR;
459         return;
460     }
461     uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes);
462 }
463 
464 class Token {
465 public:
Token(int32_t match)466     Token(int32_t match) : fMatch(match) {}
467 
468     enum Type {
469         TYPE_UNDEFINED,
470         TYPE_PREFIX,
471         // Token type for "-per-", "-", and "-and-".
472         TYPE_COMPOUND_PART,
473         // Token type for "per-".
474         TYPE_INITIAL_COMPOUND_PART,
475         TYPE_POWER_PART,
476         TYPE_SIMPLE_UNIT,
477     };
478 
479     // Calling getType() is invalid, resulting in an assertion failure, if Token
480     // value isn't positive.
getType() const481     Type getType() const {
482         U_ASSERT(fMatch > 0);
483         if (fMatch < kCompoundPartOffset) {
484             return TYPE_PREFIX;
485         }
486         if (fMatch < kInitialCompoundPartOffset) {
487             return TYPE_COMPOUND_PART;
488         }
489         if (fMatch < kPowerPartOffset) {
490             return TYPE_INITIAL_COMPOUND_PART;
491         }
492         if (fMatch < kSimpleUnitOffset) {
493             return TYPE_POWER_PART;
494         }
495         return TYPE_SIMPLE_UNIT;
496     }
497 
getUnitPrefix() const498     UMeasurePrefix getUnitPrefix() const {
499         U_ASSERT(getType() == TYPE_PREFIX);
500         return static_cast<UMeasurePrefix>(fMatch - kPrefixOffset);
501     }
502 
503     // Valid only for tokens with type TYPE_COMPOUND_PART.
getMatch() const504     int32_t getMatch() const {
505         U_ASSERT(getType() == TYPE_COMPOUND_PART);
506         return fMatch;
507     }
508 
getInitialCompoundPart() const509     int32_t getInitialCompoundPart() const {
510         // Even if there is only one InitialCompoundPart value, we have this
511         // function for the simplicity of code consistency.
512         U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
513         // Defensive: if this assert fails, code using this function also needs
514         // to change.
515         U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
516         return fMatch;
517     }
518 
getPower() const519     int8_t getPower() const {
520         U_ASSERT(getType() == TYPE_POWER_PART);
521         return static_cast<int8_t>(fMatch - kPowerPartOffset);
522     }
523 
getSimpleUnitIndex() const524     int32_t getSimpleUnitIndex() const {
525         U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
526         return fMatch - kSimpleUnitOffset;
527     }
528 
529 private:
530     int32_t fMatch;
531 };
532 
533 class Parser {
534 public:
535     /**
536      * Factory function for parsing the given identifier.
537      *
538      * @param source The identifier to parse. This function does not make a copy
539      * of source: the underlying string that source points at, must outlive the
540      * parser.
541      * @param status ICU error code.
542      */
from(StringPiece source,UErrorCode & status)543     static Parser from(StringPiece source, UErrorCode& status) {
544         if (U_FAILURE(status)) {
545             return Parser();
546         }
547         umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
548         if (U_FAILURE(status)) {
549             return Parser();
550         }
551         return Parser(source);
552     }
553 
parse(UErrorCode & status)554     MeasureUnitImpl parse(UErrorCode& status) {
555         MeasureUnitImpl result;
556 
557         if (U_FAILURE(status)) {
558             return result;
559         }
560         if (fSource.empty()) {
561             // The dimenionless unit: nothing to parse. leave result as is.
562             return result;
563         }
564 
565         while (hasNext()) {
566             bool sawAnd = false;
567 
568             SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status);
569             if (U_FAILURE(status)) {
570                 return result;
571             }
572 
573             bool added = result.appendSingleUnit(singleUnit, status);
574             if (U_FAILURE(status)) {
575                 return result;
576             }
577 
578             if (sawAnd && !added) {
579                 // Two similar units are not allowed in a mixed unit.
580                 status = kUnitIdentifierSyntaxError;
581                 return result;
582             }
583 
584             if (result.singleUnits.length() >= 2) {
585                 // nextSingleUnit fails appropriately for "per" and "and" in the
586                 // same identifier. It doesn't fail for other compound units
587                 // (COMPOUND_PART_TIMES). Consequently we take care of that
588                 // here.
589                 UMeasureUnitComplexity complexity =
590                     sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
591                 if (result.singleUnits.length() == 2) {
592                     // After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND`
593                     U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND);
594                     result.complexity = complexity;
595                 } else if (result.complexity != complexity) {
596                     // Can't have mixed compound units
597                     status = kUnitIdentifierSyntaxError;
598                     return result;
599                 }
600             }
601         }
602 
603         return result;
604     }
605 
606 private:
607     // Tracks parser progress: the offset into fSource.
608     int32_t fIndex = 0;
609 
610     // Since we're not owning this memory, whatever is passed to the constructor
611     // should live longer than this Parser - and the parser shouldn't return any
612     // references to that string.
613     StringPiece fSource;
614     BytesTrie fTrie;
615 
616     // Set to true when we've seen a "-per-" or a "per-", after which all units
617     // are in the denominator. Until we find an "-and-", at which point the
618     // identifier is invalid pending TODO(CLDR-13701).
619     bool fAfterPer = false;
620 
Parser()621     Parser() : fSource(""), fTrie(u"") {}
622 
Parser(StringPiece source)623     Parser(StringPiece source)
624         : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {}
625 
hasNext() const626     inline bool hasNext() const {
627         return fIndex < fSource.length();
628     }
629 
630     // Returns the next Token parsed from fSource, advancing fIndex to the end
631     // of that token in fSource. In case of U_FAILURE(status), the token
632     // returned will cause an abort if getType() is called on it.
nextToken(UErrorCode & status)633     Token nextToken(UErrorCode& status) {
634         fTrie.reset();
635         int32_t match = -1;
636         // Saves the position in the fSource string for the end of the most
637         // recent matching token.
638         int32_t previ = -1;
639         // Find the longest token that matches a value in the trie:
640         while (fIndex < fSource.length()) {
641             auto result = fTrie.next(fSource.data()[fIndex++]);
642             if (result == USTRINGTRIE_NO_MATCH) {
643                 break;
644             } else if (result == USTRINGTRIE_NO_VALUE) {
645                 continue;
646             }
647             U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
648             match = fTrie.getValue();
649             previ = fIndex;
650             if (result == USTRINGTRIE_FINAL_VALUE) {
651                 break;
652             }
653             U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
654             // continue;
655         }
656 
657         if (match < 0) {
658             status = kUnitIdentifierSyntaxError;
659         } else {
660             fIndex = previ;
661         }
662         return Token(match);
663     }
664 
665     /**
666      * Returns the next "single unit" via result.
667      *
668      * If a "-per-" was parsed, the result will have appropriate negative
669      * dimensionality.
670      *
671      * Returns an error if we parse both compound units and "-and-", since mixed
672      * compound units are not yet supported - TODO(CLDR-13701).
673      *
674      * @param result Will be overwritten by the result, if status shows success.
675      * @param sawAnd If an "-and-" was parsed prior to finding the "single
676      * unit", sawAnd is set to true. If not, it is left as is.
677      * @param status ICU error code.
678      */
nextSingleUnit(bool & sawAnd,UErrorCode & status)679     SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) {
680         SingleUnitImpl result;
681         if (U_FAILURE(status)) {
682             return result;
683         }
684 
685         // state:
686         // 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit)
687         // 1 = power token seen (will not accept another power token)
688         // 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token)
689         int32_t state = 0;
690 
691         bool atStart = fIndex == 0;
692         Token token = nextToken(status);
693         if (U_FAILURE(status)) {
694             return result;
695         }
696 
697         if (atStart) {
698             // Identifiers optionally start with "per-".
699             if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
700                 U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
701                 fAfterPer = true;
702                 result.dimensionality = -1;
703 
704                 token = nextToken(status);
705                 if (U_FAILURE(status)) {
706                     return result;
707                 }
708             }
709         } else {
710             // All other SingleUnit's are separated from previous SingleUnit's
711             // via a compound part:
712             if (token.getType() != Token::TYPE_COMPOUND_PART) {
713                 status = kUnitIdentifierSyntaxError;
714                 return result;
715             }
716 
717             switch (token.getMatch()) {
718             case COMPOUND_PART_PER:
719                 if (sawAnd) {
720                     // Mixed compound units not yet supported,
721                     // TODO(CLDR-13701).
722                     status = kUnitIdentifierSyntaxError;
723                     return result;
724                 }
725                 fAfterPer = true;
726                 result.dimensionality = -1;
727                 break;
728 
729             case COMPOUND_PART_TIMES:
730                 if (fAfterPer) {
731                     result.dimensionality = -1;
732                 }
733                 break;
734 
735             case COMPOUND_PART_AND:
736                 if (fAfterPer) {
737                     // Can't start with "-and-", and mixed compound units
738                     // not yet supported, TODO(CLDR-13701).
739                     status = kUnitIdentifierSyntaxError;
740                     return result;
741                 }
742                 sawAnd = true;
743                 break;
744             }
745 
746             token = nextToken(status);
747             if (U_FAILURE(status)) {
748                 return result;
749             }
750         }
751 
752         // Read tokens until we have a complete SingleUnit or we reach the end.
753         while (true) {
754             switch (token.getType()) {
755                 case Token::TYPE_POWER_PART:
756                     if (state > 0) {
757                         status = kUnitIdentifierSyntaxError;
758                         return result;
759                     }
760                     result.dimensionality *= token.getPower();
761                     state = 1;
762                     break;
763 
764                 case Token::TYPE_PREFIX:
765                     if (state > 1) {
766                         status = kUnitIdentifierSyntaxError;
767                         return result;
768                     }
769                     result.unitPrefix = token.getUnitPrefix();
770                     state = 2;
771                     break;
772 
773                 case Token::TYPE_SIMPLE_UNIT:
774                     result.index = token.getSimpleUnitIndex();
775                     return result;
776 
777                 default:
778                     status = kUnitIdentifierSyntaxError;
779                     return result;
780             }
781 
782             if (!hasNext()) {
783                 // We ran out of tokens before finding a complete single unit.
784                 status = kUnitIdentifierSyntaxError;
785                 return result;
786             }
787             token = nextToken(status);
788             if (U_FAILURE(status)) {
789                 return result;
790             }
791         }
792 
793         return result;
794     }
795 };
796 
797 // Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray.
798 int32_t U_CALLCONV
compareSingleUnits(const void *,const void * left,const void * right)799 compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
800     auto realLeft = static_cast<const SingleUnitImpl* const*>(left);
801     auto realRight = static_cast<const SingleUnitImpl* const*>(right);
802     return (*realLeft)->compareTo(**realRight);
803 }
804 
805 // Returns an index into the gCategories array, for the "unitQuantity" (aka
806 // "type" or "category") associated with the given base unit identifier. Returns
807 // -1 on failure, together with U_UNSUPPORTED_ERROR.
getUnitCategoryIndex(BytesTrie & trie,StringPiece baseUnitIdentifier,UErrorCode & status)808 int32_t getUnitCategoryIndex(BytesTrie &trie, StringPiece baseUnitIdentifier, UErrorCode &status) {
809     UStringTrieResult result = trie.reset().next(baseUnitIdentifier.data(), baseUnitIdentifier.length());
810     if (!USTRINGTRIE_HAS_VALUE(result)) {
811         status = U_UNSUPPORTED_ERROR;
812         return -1;
813     }
814 
815     return trie.getValue();
816 }
817 
818 } // namespace
819 
820 U_CAPI int32_t U_EXPORT2
umeas_getPrefixPower(UMeasurePrefix unitPrefix)821 umeas_getPrefixPower(UMeasurePrefix unitPrefix) {
822     if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
823         unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
824         return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN;
825     }
826     U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
827              unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
828     return unitPrefix - UMEASURE_PREFIX_ONE;
829 }
830 
831 U_CAPI int32_t U_EXPORT2
umeas_getPrefixBase(UMeasurePrefix unitPrefix)832 umeas_getPrefixBase(UMeasurePrefix unitPrefix) {
833     if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
834         unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
835         return 1024;
836     }
837     U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
838              unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
839     return 10;
840 }
841 
getUnitQuantity(const MeasureUnitImpl & baseMeasureUnitImpl,UErrorCode & status)842 CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status) {
843     CharString result;
844     MeasureUnitImpl baseUnitImpl = baseMeasureUnitImpl.copy(status);
845     UErrorCode localStatus = U_ZERO_ERROR;
846     umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
847     if (U_FAILURE(status)) {
848         return result;
849     }
850     BytesTrie trie(gSerializedUnitCategoriesTrie);
851 
852     baseUnitImpl.serialize(status);
853     StringPiece identifier = baseUnitImpl.identifier.data();
854     int32_t idx = getUnitCategoryIndex(trie, identifier, localStatus);
855     if (U_FAILURE(status)) {
856         return result;
857     }
858 
859     // In case the base unit identifier did not match any entry.
860     if (U_FAILURE(localStatus)) {
861         localStatus = U_ZERO_ERROR;
862         baseUnitImpl.takeReciprocal(status);
863         baseUnitImpl.serialize(status);
864         identifier.set(baseUnitImpl.identifier.data());
865         idx = getUnitCategoryIndex(trie, identifier, localStatus);
866 
867         if (U_FAILURE(status)) {
868             return result;
869         }
870     }
871 
872     // In case the reciprocal of the base unit identifier did not match any entry.
873     MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(status);
874     if (U_FAILURE(status)) {
875         return result;
876     }
877     if (U_FAILURE(localStatus)) {
878         localStatus = U_ZERO_ERROR;
879         simplifiedUnit.serialize(status);
880         identifier.set(simplifiedUnit.identifier.data());
881         idx = getUnitCategoryIndex(trie, identifier, localStatus);
882 
883         if (U_FAILURE(status)) {
884             return result;
885         }
886     }
887 
888     // In case the simplified base unit identifier did not match any entry.
889     if (U_FAILURE(localStatus)) {
890         localStatus = U_ZERO_ERROR;
891         simplifiedUnit.takeReciprocal(status);
892         simplifiedUnit.serialize(status);
893         identifier.set(simplifiedUnit.identifier.data());
894         idx = getUnitCategoryIndex(trie, identifier, localStatus);
895 
896         if (U_FAILURE(status)) {
897             return result;
898         }
899     }
900 
901     // If there is no match at all, throw an exception.
902     if (U_FAILURE(localStatus)) {
903         status = U_INVALID_FORMAT_ERROR;
904         return result;
905     }
906 
907     if (idx < 0 || idx >= gCategoriesCount) {
908         status = U_INVALID_FORMAT_ERROR;
909         return result;
910     }
911 
912     result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status);
913     return result;
914 }
915 
916 // In ICU4J, this is MeasureUnit.getSingleUnitImpl().
forMeasureUnit(const MeasureUnit & measureUnit,UErrorCode & status)917 SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
918     MeasureUnitImpl temp;
919     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
920     if (U_FAILURE(status)) {
921         return {};
922     }
923     if (impl.singleUnits.length() == 0) {
924         return {};
925     }
926     if (impl.singleUnits.length() == 1) {
927         return *impl.singleUnits[0];
928     }
929     status = U_ILLEGAL_ARGUMENT_ERROR;
930     return {};
931 }
932 
build(UErrorCode & status) const933 MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
934     MeasureUnitImpl temp;
935     temp.appendSingleUnit(*this, status);
936     // TODO(icu-units#28): the MeasureUnitImpl::build() method uses
937     // findBySubtype, which is relatively slow.
938     // - At the time of loading the simple unit IDs, we could also save a
939     //   mapping to the builtin MeasureUnit type and subtype they correspond to.
940     // - This method could then check dimensionality and index, and if both are
941     //   1, directly return MeasureUnit instances very quickly.
942     return std::move(temp).build(status);
943 }
944 
getSimpleUnitID() const945 const char *SingleUnitImpl::getSimpleUnitID() const {
946     return gSimpleUnits[index];
947 }
948 
appendNeutralIdentifier(CharString & result,UErrorCode & status) const949 void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const UPRV_NO_SANITIZE_UNDEFINED {
950     int32_t absPower = std::abs(this->dimensionality);
951 
952     U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units";
953 
954     if (absPower == 1) {
955         // no-op
956     } else if (absPower == 2) {
957         result.append(StringPiece("square-"), status);
958     } else if (absPower == 3) {
959         result.append(StringPiece("cubic-"), status);
960     } else if (absPower <= 15) {
961         result.append(StringPiece("pow"), status);
962         result.appendNumber(absPower, status);
963         result.append(StringPiece("-"), status);
964     } else {
965         status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error
966         return;
967     }
968 
969     if (U_FAILURE(status)) {
970         return;
971     }
972 
973     if (this->unitPrefix != UMEASURE_PREFIX_ONE) {
974         bool found = false;
975         for (const auto &unitPrefixInfo : gUnitPrefixStrings) {
976             // TODO: consider using binary search? If we do this, add a unit
977             // test to ensure gUnitPrefixStrings is sorted?
978             if (unitPrefixInfo.value == this->unitPrefix) {
979                 result.append(unitPrefixInfo.string, status);
980                 found = true;
981                 break;
982             }
983         }
984         if (!found) {
985             status = U_UNSUPPORTED_ERROR;
986             return;
987         }
988     }
989 
990     result.append(StringPiece(this->getSimpleUnitID()), status);
991 }
992 
getUnitCategoryIndex() const993 int32_t SingleUnitImpl::getUnitCategoryIndex() const {
994     return gSimpleUnitCategories[index];
995 }
996 
MeasureUnitImpl(const SingleUnitImpl & singleUnit,UErrorCode & status)997 MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) {
998     this->appendSingleUnit(singleUnit, status);
999 }
1000 
forIdentifier(StringPiece identifier,UErrorCode & status)1001 MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
1002     return Parser::from(identifier, status).parse(status);
1003 }
1004 
forMeasureUnit(const MeasureUnit & measureUnit,MeasureUnitImpl & memory,UErrorCode & status)1005 const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
1006         const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
1007     if (measureUnit.fImpl) {
1008         return *measureUnit.fImpl;
1009     } else {
1010         memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
1011         return memory;
1012     }
1013 }
1014 
forMeasureUnitMaybeCopy(const MeasureUnit & measureUnit,UErrorCode & status)1015 MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
1016         const MeasureUnit& measureUnit, UErrorCode& status) {
1017     if (measureUnit.fImpl) {
1018         return measureUnit.fImpl->copy(status);
1019     } else {
1020         return Parser::from(measureUnit.getIdentifier(), status).parse(status);
1021     }
1022 }
1023 
takeReciprocal(UErrorCode &)1024 void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
1025     identifier.clear();
1026     for (int32_t i = 0; i < singleUnits.length(); i++) {
1027         singleUnits[i]->dimensionality *= -1;
1028     }
1029 }
1030 
copyAndSimplify(UErrorCode & status) const1031 MeasureUnitImpl MeasureUnitImpl::copyAndSimplify(UErrorCode &status) const {
1032     MeasureUnitImpl result;
1033     for (int32_t i = 0; i < singleUnits.length(); i++) {
1034         const SingleUnitImpl &singleUnit = *this->singleUnits[i];
1035 
1036         // The following `for` loop will cause time complexity to be O(n^2).
1037         // However, n is very small (number of units, generally, at maximum equal to 10)
1038         bool unitExist = false;
1039         for (int32_t j = 0; j < result.singleUnits.length(); j++) {
1040             if (uprv_strcmp(result.singleUnits[j]->getSimpleUnitID(), singleUnit.getSimpleUnitID()) ==
1041                     0 &&
1042                 result.singleUnits[j]->unitPrefix == singleUnit.unitPrefix) {
1043                 unitExist = true;
1044                 result.singleUnits[j]->dimensionality =
1045                     result.singleUnits[j]->dimensionality + singleUnit.dimensionality;
1046                 break;
1047             }
1048         }
1049 
1050         if (!unitExist) {
1051             result.appendSingleUnit(singleUnit, status);
1052         }
1053     }
1054 
1055     return result;
1056 }
1057 
appendSingleUnit(const SingleUnitImpl & singleUnit,UErrorCode & status)1058 bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) {
1059     identifier.clear();
1060 
1061     if (singleUnit.isDimensionless()) {
1062         // Do not append dimensionless units.
1063         return false;
1064     }
1065 
1066     // Find a similar unit that already exists, to attempt to coalesce
1067     SingleUnitImpl *oldUnit = nullptr;
1068     for (int32_t i = 0; i < this->singleUnits.length(); i++) {
1069         auto *candidate = this->singleUnits[i];
1070         if (candidate->isCompatibleWith(singleUnit)) {
1071             oldUnit = candidate;
1072         }
1073     }
1074 
1075     if (oldUnit) {
1076         // Both dimensionalities will be positive, or both will be negative, by
1077         // virtue of isCompatibleWith().
1078         oldUnit->dimensionality += singleUnit.dimensionality;
1079 
1080         return false;
1081     }
1082 
1083     // Add a copy of singleUnit
1084     // NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of  singleUnit.
1085     this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit);
1086     if (U_FAILURE(status)) {
1087         return false;
1088     }
1089 
1090     // If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits`
1091     // contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND`
1092     if (this->singleUnits.length() > 1 &&
1093         this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) {
1094         this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND;
1095     }
1096 
1097     return true;
1098 }
1099 
1100 MaybeStackVector<MeasureUnitImplWithIndex>
extractIndividualUnitsWithIndices(UErrorCode & status) const1101 MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const {
1102     MaybeStackVector<MeasureUnitImplWithIndex> result;
1103 
1104     if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
1105         result.emplaceBackAndCheckErrorCode(status, 0, *this, status);
1106         return result;
1107     }
1108 
1109     for (int32_t i = 0; i < singleUnits.length(); ++i) {
1110         result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status);
1111         if (U_FAILURE(status)) {
1112             return result;
1113         }
1114     }
1115 
1116     return result;
1117 }
1118 
1119 /**
1120  * Normalize a MeasureUnitImpl and generate the identifier string in place.
1121  */
serialize(UErrorCode & status)1122 void MeasureUnitImpl::serialize(UErrorCode &status) {
1123     if (U_FAILURE(status)) {
1124         return;
1125     }
1126 
1127     if (this->singleUnits.length() == 0) {
1128         // Dimensionless, constructed by the default constructor.
1129         return;
1130     }
1131 
1132     if (this->complexity == UMEASURE_UNIT_COMPOUND) {
1133         // Note: don't sort a MIXED unit
1134         uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(),
1135                        sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status);
1136         if (U_FAILURE(status)) {
1137             return;
1138         }
1139     }
1140 
1141     CharString result;
1142     bool beforePer = true;
1143     bool firstTimeNegativeDimension = false;
1144     for (int32_t i = 0; i < this->singleUnits.length(); i++) {
1145         if (beforePer && (*this->singleUnits[i]).dimensionality < 0) {
1146             beforePer = false;
1147             firstTimeNegativeDimension = true;
1148         } else if ((*this->singleUnits[i]).dimensionality < 0) {
1149             firstTimeNegativeDimension = false;
1150         }
1151 
1152         if (U_FAILURE(status)) {
1153             return;
1154         }
1155 
1156         if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
1157             if (result.length() != 0) {
1158                 result.append(StringPiece("-and-"), status);
1159             }
1160         } else {
1161             if (firstTimeNegativeDimension) {
1162                 if (result.length() == 0) {
1163                     result.append(StringPiece("per-"), status);
1164                 } else {
1165                     result.append(StringPiece("-per-"), status);
1166                 }
1167             } else {
1168                 if (result.length() != 0) {
1169                     result.append(StringPiece("-"), status);
1170                 }
1171             }
1172         }
1173 
1174         this->singleUnits[i]->appendNeutralIdentifier(result, status);
1175     }
1176 
1177     this->identifier = CharString(result, status);
1178 }
1179 
build(UErrorCode & status)1180 MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
1181     this->serialize(status);
1182     return MeasureUnit(std::move(*this));
1183 }
1184 
forIdentifier(StringPiece identifier,UErrorCode & status)1185 MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
1186     return Parser::from(identifier, status).parse(status).build(status);
1187 }
1188 
getComplexity(UErrorCode & status) const1189 UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
1190     MeasureUnitImpl temp;
1191     return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
1192 }
1193 
getPrefix(UErrorCode & status) const1194 UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const {
1195     return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix;
1196 }
1197 
withPrefix(UMeasurePrefix prefix,UErrorCode & status) const1198 MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const UPRV_NO_SANITIZE_UNDEFINED {
1199     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1200     singleUnit.unitPrefix = prefix;
1201     return singleUnit.build(status);
1202 }
1203 
getDimensionality(UErrorCode & status) const1204 int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
1205     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1206     if (U_FAILURE(status)) { return 0; }
1207     if (singleUnit.isDimensionless()) {
1208         return 0;
1209     }
1210     return singleUnit.dimensionality;
1211 }
1212 
withDimensionality(int32_t dimensionality,UErrorCode & status) const1213 MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
1214     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1215     singleUnit.dimensionality = dimensionality;
1216     return singleUnit.build(status);
1217 }
1218 
reciprocal(UErrorCode & status) const1219 MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
1220     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
1221     impl.takeReciprocal(status);
1222     return std::move(impl).build(status);
1223 }
1224 
product(const MeasureUnit & other,UErrorCode & status) const1225 MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
1226     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
1227     MeasureUnitImpl temp;
1228     const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
1229     if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
1230         status = U_ILLEGAL_ARGUMENT_ERROR;
1231         return {};
1232     }
1233     for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) {
1234         impl.appendSingleUnit(*otherImpl.singleUnits[i], status);
1235     }
1236     if (impl.singleUnits.length() > 1) {
1237         impl.complexity = UMEASURE_UNIT_COMPOUND;
1238     }
1239     return std::move(impl).build(status);
1240 }
1241 
splitToSingleUnitsImpl(int32_t & outCount,UErrorCode & status) const1242 LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const {
1243     MeasureUnitImpl temp;
1244     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
1245     outCount = impl.singleUnits.length();
1246     MeasureUnit* arr = new MeasureUnit[outCount];
1247     if (arr == nullptr) {
1248         status = U_MEMORY_ALLOCATION_ERROR;
1249         return LocalArray<MeasureUnit>();
1250     }
1251     for (int32_t i = 0; i < outCount; i++) {
1252         arr[i] = impl.singleUnits[i]->build(status);
1253     }
1254     return LocalArray<MeasureUnit>(arr, status);
1255 }
1256 
1257 
1258 U_NAMESPACE_END
1259 
1260 #endif /* !UNCONFIG_NO_FORMATTING */
1261