1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 // Extra functions for MeasureUnit not needed for all clients.
5 // Separate .o file so that it can be removed for modularity.
6
7 #include "unicode/utypes.h"
8
9 #if !UCONFIG_NO_FORMATTING
10
11 // Allow implicit conversion from char16_t* to UnicodeString for this file:
12 // Helpful in toString methods and elsewhere.
13 #define UNISTR_FROM_STRING_EXPLICIT
14
15 #include "charstr.h"
16 #include "cmemory.h"
17 #include "cstring.h"
18 #include "measunit_impl.h"
19 #include "resource.h"
20 #include "uarrsort.h"
21 #include "uassert.h"
22 #include "ucln_in.h"
23 #include "umutex.h"
24 #include "unicode/bytestrie.h"
25 #include "unicode/bytestriebuilder.h"
26 #include "unicode/localpointer.h"
27 #include "unicode/stringpiece.h"
28 #include "unicode/stringtriebuilder.h"
29 #include "unicode/ures.h"
30 #include "unicode/ustringtrie.h"
31 #include "uresimp.h"
32 #include "util.h"
33 #include <cstdlib>
34
35 U_NAMESPACE_BEGIN
36
37
38 namespace {
39
40 // TODO: Propose a new error code for this?
41 constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
42
43 // Trie value offset for SI or binary prefixes. This is big enough to ensure we only
44 // insert positive integers into the trie.
45 constexpr int32_t kPrefixOffset = 64;
46 static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0,
47 "kPrefixOffset is too small for minimum UMeasurePrefix value");
48 static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0,
49 "kPrefixOffset is too small for minimum UMeasurePrefix value");
50
51 // Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
52 constexpr int32_t kCompoundPartOffset = 128;
53 static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN,
54 "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
55 static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI,
56 "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
57
58 enum CompoundPart {
59 // Represents "-per-"
60 COMPOUND_PART_PER = kCompoundPartOffset,
61 // Represents "-"
62 COMPOUND_PART_TIMES,
63 // Represents "-and-"
64 COMPOUND_PART_AND,
65 };
66
67 // Trie value offset for "per-".
68 constexpr int32_t kInitialCompoundPartOffset = 192;
69
70 enum InitialCompoundPart {
71 // Represents "per-", the only compound part that can appear at the start of
72 // an identifier.
73 INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
74 };
75
76 // Trie value offset for powers like "square-", "cubic-", "pow2-" etc.
77 constexpr int32_t kPowerPartOffset = 256;
78
79 enum PowerPart {
80 POWER_PART_P2 = kPowerPartOffset + 2,
81 POWER_PART_P3,
82 POWER_PART_P4,
83 POWER_PART_P5,
84 POWER_PART_P6,
85 POWER_PART_P7,
86 POWER_PART_P8,
87 POWER_PART_P9,
88 POWER_PART_P10,
89 POWER_PART_P11,
90 POWER_PART_P12,
91 POWER_PART_P13,
92 POWER_PART_P14,
93 POWER_PART_P15,
94 };
95
96 // Trie value offset for simple units, e.g. "gram", "nautical-mile",
97 // "fluid-ounce-imperial".
98 constexpr int32_t kSimpleUnitOffset = 512;
99
100 const struct UnitPrefixStrings {
101 const char* const string;
102 UMeasurePrefix value;
103 } gUnitPrefixStrings[] = {
104 // SI prefixes
105 { "yotta", UMEASURE_PREFIX_YOTTA },
106 { "zetta", UMEASURE_PREFIX_ZETTA },
107 { "exa", UMEASURE_PREFIX_EXA },
108 { "peta", UMEASURE_PREFIX_PETA },
109 { "tera", UMEASURE_PREFIX_TERA },
110 { "giga", UMEASURE_PREFIX_GIGA },
111 { "mega", UMEASURE_PREFIX_MEGA },
112 { "kilo", UMEASURE_PREFIX_KILO },
113 { "hecto", UMEASURE_PREFIX_HECTO },
114 { "deka", UMEASURE_PREFIX_DEKA },
115 { "deci", UMEASURE_PREFIX_DECI },
116 { "centi", UMEASURE_PREFIX_CENTI },
117 { "milli", UMEASURE_PREFIX_MILLI },
118 { "micro", UMEASURE_PREFIX_MICRO },
119 { "nano", UMEASURE_PREFIX_NANO },
120 { "pico", UMEASURE_PREFIX_PICO },
121 { "femto", UMEASURE_PREFIX_FEMTO },
122 { "atto", UMEASURE_PREFIX_ATTO },
123 { "zepto", UMEASURE_PREFIX_ZEPTO },
124 { "yocto", UMEASURE_PREFIX_YOCTO },
125 // Binary prefixes
126 { "yobi", UMEASURE_PREFIX_YOBI },
127 { "zebi", UMEASURE_PREFIX_ZEBI },
128 { "exbi", UMEASURE_PREFIX_EXBI },
129 { "pebi", UMEASURE_PREFIX_PEBI },
130 { "tebi", UMEASURE_PREFIX_TEBI },
131 { "gibi", UMEASURE_PREFIX_GIBI },
132 { "mebi", UMEASURE_PREFIX_MEBI },
133 { "kibi", UMEASURE_PREFIX_KIBI },
134 };
135
136 /**
137 * A ResourceSink that collects simple unit identifiers from the keys of the
138 * convertUnits table into an array, and adds these values to a TrieBuilder,
139 * with associated values being their index into this array plus a specified
140 * offset.
141 *
142 * Example code:
143 *
144 * UErrorCode status = U_ZERO_ERROR;
145 * BytesTrieBuilder b(status);
146 * int32_t ARR_SIZE = 200;
147 * const char *unitIdentifiers[ARR_SIZE];
148 * int32_t *unitCategories[ARR_SIZE];
149 * SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers,
150 * unitCategories, ARR_SIZE, b, kTrieValueOffset);
151 * LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
152 * ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
153 */
154 class SimpleUnitIdentifiersSink : public icu::ResourceSink {
155 public:
156 /**
157 * Constructor.
158 * @param quantitiesTrieData The data for constructing a quantitiesTrie,
159 * which maps from a simple unit identifier to an index into the
160 * gCategories array.
161 * @param out Array of char* to which pointers to the simple unit
162 * identifiers will be saved. (Does not take ownership.)
163 * @param outCategories Array of int32_t to which category indexes will be
164 * saved: this corresponds to simple unit IDs saved to `out`, mapping
165 * from the ID to the value produced by the quantitiesTrie (which is an
166 * index into the gCategories array).
167 * @param outSize The size of `out` and `outCategories`.
168 * @param trieBuilder The trie builder to which the simple unit identifier
169 * should be added. The trie builder must outlive this resource sink.
170 * @param trieValueOffset This is added to the index of the identifier in
171 * the `out` array, before adding to `trieBuilder` as the value
172 * associated with the identifier.
173 */
SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData,const char ** out,int32_t * outCategories,int32_t outSize,BytesTrieBuilder & trieBuilder,int32_t trieValueOffset)174 explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out,
175 int32_t *outCategories, int32_t outSize,
176 BytesTrieBuilder &trieBuilder, int32_t trieValueOffset)
177 : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder),
178 trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {}
179
180 /**
181 * Adds the table keys found in value to the output vector.
182 * @param key The key of the resource passed to `value`: the second
183 * parameter of the ures_getAllItemsWithFallback() call.
184 * @param value Should be a ResourceTable value, if
185 * ures_getAllItemsWithFallback() was called correctly for this sink.
186 * @param noFallback Ignored.
187 * @param status The standard ICU error code output parameter.
188 */
put(const char *,ResourceValue & value,UBool,UErrorCode & status)189 void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
190 ResourceTable table = value.getTable(status);
191 if (U_FAILURE(status)) return;
192
193 if (outIndex + table.getSize() > outSize) {
194 status = U_INDEX_OUTOFBOUNDS_ERROR;
195 return;
196 }
197
198 BytesTrie quantitiesTrie(quantitiesTrieData.data());
199
200 // Collect keys from the table resource.
201 const char *simpleUnitID;
202 for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) {
203 U_ASSERT(i < table.getSize());
204 U_ASSERT(outIndex < outSize);
205 if (uprv_strcmp(simpleUnitID, "kilogram") == 0) {
206 // For parsing, we use "gram", the prefixless metric mass unit. We
207 // thus ignore the SI Base Unit of Mass: it exists due to being the
208 // mass conversion target unit, but not needed for MeasureUnit
209 // parsing.
210 continue;
211 }
212 outArray[outIndex] = simpleUnitID;
213 trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status);
214
215 // Find the base target unit for this simple unit
216 ResourceTable table = value.getTable(status);
217 if (U_FAILURE(status)) { return; }
218 if (!table.findValue("target", value)) {
219 status = U_INVALID_FORMAT_ERROR;
220 break;
221 }
222 int32_t len;
223 const char16_t* uTarget = value.getString(len, status);
224 CharString target;
225 target.appendInvariantChars(uTarget, len, status);
226 if (U_FAILURE(status)) { return; }
227 quantitiesTrie.reset();
228 UStringTrieResult result = quantitiesTrie.next(target.data(), target.length());
229 if (!USTRINGTRIE_HAS_VALUE(result)) {
230 status = U_INVALID_FORMAT_ERROR;
231 break;
232 }
233 outCategories[outIndex] = quantitiesTrie.getValue();
234
235 outIndex++;
236 }
237 }
238
239 private:
240 const char **outArray;
241 int32_t *outCategories;
242 int32_t outSize;
243 BytesTrieBuilder &trieBuilder;
244 int32_t trieValueOffset;
245
246 StringPiece quantitiesTrieData;
247
248 int32_t outIndex;
249 };
250
251 /**
252 * A ResourceSink that collects information from `unitQuantities` in the `units`
253 * resource to provide key->value lookups from base unit to category, as well as
254 * preserving ordering information for these categories. See `units.txt`.
255 *
256 * For example: "kilogram" -> "mass", "meter-per-second" -> "speed".
257 *
258 * In C++ unitQuantity values are collected in order into a char16_t* array, while
259 * unitQuantity keys are added added to a TrieBuilder, with associated values
260 * being the index into the aforementioned char16_t* array.
261 */
262 class CategoriesSink : public icu::ResourceSink {
263 public:
264 /**
265 * Constructor.
266 * @param out Array of char16_t* to which unitQuantity values will be saved.
267 * The pointers returned not owned: they point directly at the resource
268 * strings in static memory.
269 * @param outSize The size of the `out` array.
270 * @param trieBuilder The trie builder to which the keys (base units) of
271 * each unitQuantity will be added, each with value being the offset
272 * into `out`.
273 */
CategoriesSink(const char16_t ** out,int32_t & outSize,BytesTrieBuilder & trieBuilder)274 explicit CategoriesSink(const char16_t **out, int32_t &outSize, BytesTrieBuilder &trieBuilder)
275 : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {}
276
put(const char *,ResourceValue & value,UBool,UErrorCode & status)277 void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
278 ResourceArray array = value.getArray(status);
279 if (U_FAILURE(status)) {
280 return;
281 }
282
283 if (outIndex + array.getSize() > outSize) {
284 status = U_INDEX_OUTOFBOUNDS_ERROR;
285 return;
286 }
287
288 for (int32_t i = 0; array.getValue(i, value); ++i) {
289 U_ASSERT(outIndex < outSize);
290 ResourceTable table = value.getTable(status);
291 if (U_FAILURE(status)) {
292 return;
293 }
294 if (table.getSize() != 1) {
295 status = U_INVALID_FORMAT_ERROR;
296 return;
297 }
298 const char *key;
299 table.getKeyAndValue(0, key, value);
300 int32_t uTmpLen;
301 outQuantitiesArray[outIndex] = value.getString(uTmpLen, status);
302 trieBuilder.add(key, outIndex, status);
303 outIndex++;
304 }
305 }
306
307 private:
308 const char16_t **outQuantitiesArray;
309 int32_t &outSize;
310 BytesTrieBuilder &trieBuilder;
311
312 int32_t outIndex;
313 };
314
315 icu::UInitOnce gUnitExtrasInitOnce {};
316
317 // Array of simple unit IDs.
318 //
319 // The array memory itself is owned by this pointer, but the individual char* in
320 // that array point at static memory. (Note that these char* are also returned
321 // by SingleUnitImpl::getSimpleUnitID().)
322 const char **gSimpleUnits = nullptr;
323
324 // Maps from the value associated with each simple unit ID to an index into the
325 // gCategories array.
326 int32_t *gSimpleUnitCategories = nullptr;
327
328 char *gSerializedUnitExtrasStemTrie = nullptr;
329
330 // Array of char16_t* pointing at the unit categories (aka "quantities", aka
331 // "types"), as found in the `unitQuantities` resource. The array memory itself
332 // is owned by this pointer, but the individual char16_t* in that array point at
333 // static memory.
334 const char16_t **gCategories = nullptr;
335 // Number of items in `gCategories`.
336 int32_t gCategoriesCount = 0;
337 // Serialized BytesTrie for mapping from base units to indices into gCategories.
338 char *gSerializedUnitCategoriesTrie = nullptr;
339
cleanupUnitExtras()340 UBool U_CALLCONV cleanupUnitExtras() {
341 uprv_free(gSerializedUnitCategoriesTrie);
342 gSerializedUnitCategoriesTrie = nullptr;
343 uprv_free(gCategories);
344 gCategories = nullptr;
345 uprv_free(gSerializedUnitExtrasStemTrie);
346 gSerializedUnitExtrasStemTrie = nullptr;
347 uprv_free(gSimpleUnitCategories);
348 gSimpleUnitCategories = nullptr;
349 uprv_free(gSimpleUnits);
350 gSimpleUnits = nullptr;
351 gUnitExtrasInitOnce.reset();
352 return true;
353 }
354
initUnitExtras(UErrorCode & status)355 void U_CALLCONV initUnitExtras(UErrorCode& status) {
356 ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
357 LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
358
359 // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories.
360 const char *CATEGORY_TABLE_NAME = "unitQuantities";
361 LocalUResourceBundlePointer unitQuantities(
362 ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status));
363 if (U_FAILURE(status)) { return; }
364 gCategoriesCount = unitQuantities.getAlias()->fSize;
365 size_t quantitiesMallocSize = sizeof(char16_t *) * gCategoriesCount;
366 gCategories = static_cast<const char16_t **>(uprv_malloc(quantitiesMallocSize));
367 if (gCategories == nullptr) {
368 status = U_MEMORY_ALLOCATION_ERROR;
369 return;
370 }
371 uprv_memset(gCategories, 0, quantitiesMallocSize);
372 BytesTrieBuilder quantitiesBuilder(status);
373 CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder);
374 ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status);
375 StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
376 if (U_FAILURE(status)) { return; }
377 // Copy the result into the global constant pointer
378 size_t numBytesQuantities = resultQuantities.length();
379 gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities));
380 if (gSerializedUnitCategoriesTrie == nullptr) {
381 status = U_MEMORY_ALLOCATION_ERROR;
382 return;
383 }
384 uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities);
385
386 // Build the BytesTrie that Parser needs for parsing unit identifiers.
387
388 BytesTrieBuilder b(status);
389 if (U_FAILURE(status)) { return; }
390
391 // Add SI and binary prefixes
392 for (const auto& unitPrefixInfo : gUnitPrefixStrings) {
393 b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status);
394 }
395 if (U_FAILURE(status)) { return; }
396
397 // Add syntax parts (compound, power prefixes)
398 b.add("-per-", COMPOUND_PART_PER, status);
399 b.add("-", COMPOUND_PART_TIMES, status);
400 b.add("-and-", COMPOUND_PART_AND, status);
401 b.add("per-", INITIAL_COMPOUND_PART_PER, status);
402 b.add("square-", POWER_PART_P2, status);
403 b.add("cubic-", POWER_PART_P3, status);
404 b.add("pow2-", POWER_PART_P2, status);
405 b.add("pow3-", POWER_PART_P3, status);
406 b.add("pow4-", POWER_PART_P4, status);
407 b.add("pow5-", POWER_PART_P5, status);
408 b.add("pow6-", POWER_PART_P6, status);
409 b.add("pow7-", POWER_PART_P7, status);
410 b.add("pow8-", POWER_PART_P8, status);
411 b.add("pow9-", POWER_PART_P9, status);
412 b.add("pow10-", POWER_PART_P10, status);
413 b.add("pow11-", POWER_PART_P11, status);
414 b.add("pow12-", POWER_PART_P12, status);
415 b.add("pow13-", POWER_PART_P13, status);
416 b.add("pow14-", POWER_PART_P14, status);
417 b.add("pow15-", POWER_PART_P15, status);
418 if (U_FAILURE(status)) { return; }
419
420 // Add sanctioned simple units by offset: simple units all have entries in
421 // units/convertUnits resources.
422 LocalUResourceBundlePointer convertUnits(
423 ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status));
424 if (U_FAILURE(status)) { return; }
425
426 // Allocate enough space: with identifierSink below skipping kilogram, we're
427 // probably allocating one more than needed.
428 int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;
429 int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;
430 gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));
431 if (gSimpleUnits == nullptr) {
432 status = U_MEMORY_ALLOCATION_ERROR;
433 return;
434 }
435 uprv_memset(gSimpleUnits, 0, arrayMallocSize);
436 arrayMallocSize = sizeof(int32_t) * simpleUnitsCount;
437 gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize));
438 if (gSimpleUnitCategories == nullptr) {
439 status = U_MEMORY_ALLOCATION_ERROR;
440 return;
441 }
442 uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize);
443
444 // Populate gSimpleUnits and build the associated trie.
445 SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories,
446 simpleUnitsCount, b, kSimpleUnitOffset);
447 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
448
449 // Build the CharsTrie
450 // TODO: Use SLOW or FAST here?
451 StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
452 if (U_FAILURE(status)) { return; }
453
454 // Copy the result into the global constant pointer
455 size_t numBytes = result.length();
456 gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes));
457 if (gSerializedUnitExtrasStemTrie == nullptr) {
458 status = U_MEMORY_ALLOCATION_ERROR;
459 return;
460 }
461 uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes);
462 }
463
464 class Token {
465 public:
Token(int32_t match)466 Token(int32_t match) : fMatch(match) {}
467
468 enum Type {
469 TYPE_UNDEFINED,
470 TYPE_PREFIX,
471 // Token type for "-per-", "-", and "-and-".
472 TYPE_COMPOUND_PART,
473 // Token type for "per-".
474 TYPE_INITIAL_COMPOUND_PART,
475 TYPE_POWER_PART,
476 TYPE_SIMPLE_UNIT,
477 };
478
479 // Calling getType() is invalid, resulting in an assertion failure, if Token
480 // value isn't positive.
getType() const481 Type getType() const {
482 U_ASSERT(fMatch > 0);
483 if (fMatch < kCompoundPartOffset) {
484 return TYPE_PREFIX;
485 }
486 if (fMatch < kInitialCompoundPartOffset) {
487 return TYPE_COMPOUND_PART;
488 }
489 if (fMatch < kPowerPartOffset) {
490 return TYPE_INITIAL_COMPOUND_PART;
491 }
492 if (fMatch < kSimpleUnitOffset) {
493 return TYPE_POWER_PART;
494 }
495 return TYPE_SIMPLE_UNIT;
496 }
497
getUnitPrefix() const498 UMeasurePrefix getUnitPrefix() const {
499 U_ASSERT(getType() == TYPE_PREFIX);
500 return static_cast<UMeasurePrefix>(fMatch - kPrefixOffset);
501 }
502
503 // Valid only for tokens with type TYPE_COMPOUND_PART.
getMatch() const504 int32_t getMatch() const {
505 U_ASSERT(getType() == TYPE_COMPOUND_PART);
506 return fMatch;
507 }
508
getInitialCompoundPart() const509 int32_t getInitialCompoundPart() const {
510 // Even if there is only one InitialCompoundPart value, we have this
511 // function for the simplicity of code consistency.
512 U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
513 // Defensive: if this assert fails, code using this function also needs
514 // to change.
515 U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
516 return fMatch;
517 }
518
getPower() const519 int8_t getPower() const {
520 U_ASSERT(getType() == TYPE_POWER_PART);
521 return static_cast<int8_t>(fMatch - kPowerPartOffset);
522 }
523
getSimpleUnitIndex() const524 int32_t getSimpleUnitIndex() const {
525 U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
526 return fMatch - kSimpleUnitOffset;
527 }
528
529 private:
530 int32_t fMatch;
531 };
532
533 class Parser {
534 public:
535 /**
536 * Factory function for parsing the given identifier.
537 *
538 * @param source The identifier to parse. This function does not make a copy
539 * of source: the underlying string that source points at, must outlive the
540 * parser.
541 * @param status ICU error code.
542 */
from(StringPiece source,UErrorCode & status)543 static Parser from(StringPiece source, UErrorCode& status) {
544 if (U_FAILURE(status)) {
545 return Parser();
546 }
547 umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
548 if (U_FAILURE(status)) {
549 return Parser();
550 }
551 return Parser(source);
552 }
553
parse(UErrorCode & status)554 MeasureUnitImpl parse(UErrorCode& status) {
555 MeasureUnitImpl result;
556
557 if (U_FAILURE(status)) {
558 return result;
559 }
560 if (fSource.empty()) {
561 // The dimenionless unit: nothing to parse. leave result as is.
562 return result;
563 }
564
565 while (hasNext()) {
566 bool sawAnd = false;
567
568 SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status);
569 if (U_FAILURE(status)) {
570 return result;
571 }
572
573 bool added = result.appendSingleUnit(singleUnit, status);
574 if (U_FAILURE(status)) {
575 return result;
576 }
577
578 if (sawAnd && !added) {
579 // Two similar units are not allowed in a mixed unit.
580 status = kUnitIdentifierSyntaxError;
581 return result;
582 }
583
584 if (result.singleUnits.length() >= 2) {
585 // nextSingleUnit fails appropriately for "per" and "and" in the
586 // same identifier. It doesn't fail for other compound units
587 // (COMPOUND_PART_TIMES). Consequently we take care of that
588 // here.
589 UMeasureUnitComplexity complexity =
590 sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
591 if (result.singleUnits.length() == 2) {
592 // After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND`
593 U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND);
594 result.complexity = complexity;
595 } else if (result.complexity != complexity) {
596 // Can't have mixed compound units
597 status = kUnitIdentifierSyntaxError;
598 return result;
599 }
600 }
601 }
602
603 return result;
604 }
605
606 private:
607 // Tracks parser progress: the offset into fSource.
608 int32_t fIndex = 0;
609
610 // Since we're not owning this memory, whatever is passed to the constructor
611 // should live longer than this Parser - and the parser shouldn't return any
612 // references to that string.
613 StringPiece fSource;
614 BytesTrie fTrie;
615
616 // Set to true when we've seen a "-per-" or a "per-", after which all units
617 // are in the denominator. Until we find an "-and-", at which point the
618 // identifier is invalid pending TODO(CLDR-13701).
619 bool fAfterPer = false;
620
Parser()621 Parser() : fSource(""), fTrie(u"") {}
622
Parser(StringPiece source)623 Parser(StringPiece source)
624 : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {}
625
hasNext() const626 inline bool hasNext() const {
627 return fIndex < fSource.length();
628 }
629
630 // Returns the next Token parsed from fSource, advancing fIndex to the end
631 // of that token in fSource. In case of U_FAILURE(status), the token
632 // returned will cause an abort if getType() is called on it.
nextToken(UErrorCode & status)633 Token nextToken(UErrorCode& status) {
634 fTrie.reset();
635 int32_t match = -1;
636 // Saves the position in the fSource string for the end of the most
637 // recent matching token.
638 int32_t previ = -1;
639 // Find the longest token that matches a value in the trie:
640 while (fIndex < fSource.length()) {
641 auto result = fTrie.next(fSource.data()[fIndex++]);
642 if (result == USTRINGTRIE_NO_MATCH) {
643 break;
644 } else if (result == USTRINGTRIE_NO_VALUE) {
645 continue;
646 }
647 U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
648 match = fTrie.getValue();
649 previ = fIndex;
650 if (result == USTRINGTRIE_FINAL_VALUE) {
651 break;
652 }
653 U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
654 // continue;
655 }
656
657 if (match < 0) {
658 status = kUnitIdentifierSyntaxError;
659 } else {
660 fIndex = previ;
661 }
662 return Token(match);
663 }
664
665 /**
666 * Returns the next "single unit" via result.
667 *
668 * If a "-per-" was parsed, the result will have appropriate negative
669 * dimensionality.
670 *
671 * Returns an error if we parse both compound units and "-and-", since mixed
672 * compound units are not yet supported - TODO(CLDR-13701).
673 *
674 * @param result Will be overwritten by the result, if status shows success.
675 * @param sawAnd If an "-and-" was parsed prior to finding the "single
676 * unit", sawAnd is set to true. If not, it is left as is.
677 * @param status ICU error code.
678 */
nextSingleUnit(bool & sawAnd,UErrorCode & status)679 SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) {
680 SingleUnitImpl result;
681 if (U_FAILURE(status)) {
682 return result;
683 }
684
685 // state:
686 // 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit)
687 // 1 = power token seen (will not accept another power token)
688 // 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token)
689 int32_t state = 0;
690
691 bool atStart = fIndex == 0;
692 Token token = nextToken(status);
693 if (U_FAILURE(status)) {
694 return result;
695 }
696
697 if (atStart) {
698 // Identifiers optionally start with "per-".
699 if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
700 U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
701 fAfterPer = true;
702 result.dimensionality = -1;
703
704 token = nextToken(status);
705 if (U_FAILURE(status)) {
706 return result;
707 }
708 }
709 } else {
710 // All other SingleUnit's are separated from previous SingleUnit's
711 // via a compound part:
712 if (token.getType() != Token::TYPE_COMPOUND_PART) {
713 status = kUnitIdentifierSyntaxError;
714 return result;
715 }
716
717 switch (token.getMatch()) {
718 case COMPOUND_PART_PER:
719 if (sawAnd) {
720 // Mixed compound units not yet supported,
721 // TODO(CLDR-13701).
722 status = kUnitIdentifierSyntaxError;
723 return result;
724 }
725 fAfterPer = true;
726 result.dimensionality = -1;
727 break;
728
729 case COMPOUND_PART_TIMES:
730 if (fAfterPer) {
731 result.dimensionality = -1;
732 }
733 break;
734
735 case COMPOUND_PART_AND:
736 if (fAfterPer) {
737 // Can't start with "-and-", and mixed compound units
738 // not yet supported, TODO(CLDR-13701).
739 status = kUnitIdentifierSyntaxError;
740 return result;
741 }
742 sawAnd = true;
743 break;
744 }
745
746 token = nextToken(status);
747 if (U_FAILURE(status)) {
748 return result;
749 }
750 }
751
752 // Read tokens until we have a complete SingleUnit or we reach the end.
753 while (true) {
754 switch (token.getType()) {
755 case Token::TYPE_POWER_PART:
756 if (state > 0) {
757 status = kUnitIdentifierSyntaxError;
758 return result;
759 }
760 result.dimensionality *= token.getPower();
761 state = 1;
762 break;
763
764 case Token::TYPE_PREFIX:
765 if (state > 1) {
766 status = kUnitIdentifierSyntaxError;
767 return result;
768 }
769 result.unitPrefix = token.getUnitPrefix();
770 state = 2;
771 break;
772
773 case Token::TYPE_SIMPLE_UNIT:
774 result.index = token.getSimpleUnitIndex();
775 return result;
776
777 default:
778 status = kUnitIdentifierSyntaxError;
779 return result;
780 }
781
782 if (!hasNext()) {
783 // We ran out of tokens before finding a complete single unit.
784 status = kUnitIdentifierSyntaxError;
785 return result;
786 }
787 token = nextToken(status);
788 if (U_FAILURE(status)) {
789 return result;
790 }
791 }
792
793 return result;
794 }
795 };
796
797 // Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray.
798 int32_t U_CALLCONV
compareSingleUnits(const void *,const void * left,const void * right)799 compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
800 auto realLeft = static_cast<const SingleUnitImpl* const*>(left);
801 auto realRight = static_cast<const SingleUnitImpl* const*>(right);
802 return (*realLeft)->compareTo(**realRight);
803 }
804
805 // Returns an index into the gCategories array, for the "unitQuantity" (aka
806 // "type" or "category") associated with the given base unit identifier. Returns
807 // -1 on failure, together with U_UNSUPPORTED_ERROR.
getUnitCategoryIndex(BytesTrie & trie,StringPiece baseUnitIdentifier,UErrorCode & status)808 int32_t getUnitCategoryIndex(BytesTrie &trie, StringPiece baseUnitIdentifier, UErrorCode &status) {
809 UStringTrieResult result = trie.reset().next(baseUnitIdentifier.data(), baseUnitIdentifier.length());
810 if (!USTRINGTRIE_HAS_VALUE(result)) {
811 status = U_UNSUPPORTED_ERROR;
812 return -1;
813 }
814
815 return trie.getValue();
816 }
817
818 } // namespace
819
820 U_CAPI int32_t U_EXPORT2
umeas_getPrefixPower(UMeasurePrefix unitPrefix)821 umeas_getPrefixPower(UMeasurePrefix unitPrefix) {
822 if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
823 unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
824 return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN;
825 }
826 U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
827 unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
828 return unitPrefix - UMEASURE_PREFIX_ONE;
829 }
830
831 U_CAPI int32_t U_EXPORT2
umeas_getPrefixBase(UMeasurePrefix unitPrefix)832 umeas_getPrefixBase(UMeasurePrefix unitPrefix) {
833 if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
834 unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
835 return 1024;
836 }
837 U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
838 unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
839 return 10;
840 }
841
getUnitQuantity(const MeasureUnitImpl & baseMeasureUnitImpl,UErrorCode & status)842 CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status) {
843 CharString result;
844 MeasureUnitImpl baseUnitImpl = baseMeasureUnitImpl.copy(status);
845 UErrorCode localStatus = U_ZERO_ERROR;
846 umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
847 if (U_FAILURE(status)) {
848 return result;
849 }
850 BytesTrie trie(gSerializedUnitCategoriesTrie);
851
852 baseUnitImpl.serialize(status);
853 StringPiece identifier = baseUnitImpl.identifier.data();
854 int32_t idx = getUnitCategoryIndex(trie, identifier, localStatus);
855 if (U_FAILURE(status)) {
856 return result;
857 }
858
859 // In case the base unit identifier did not match any entry.
860 if (U_FAILURE(localStatus)) {
861 localStatus = U_ZERO_ERROR;
862 baseUnitImpl.takeReciprocal(status);
863 baseUnitImpl.serialize(status);
864 identifier.set(baseUnitImpl.identifier.data());
865 idx = getUnitCategoryIndex(trie, identifier, localStatus);
866
867 if (U_FAILURE(status)) {
868 return result;
869 }
870 }
871
872 // In case the reciprocal of the base unit identifier did not match any entry.
873 MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(status);
874 if (U_FAILURE(status)) {
875 return result;
876 }
877 if (U_FAILURE(localStatus)) {
878 localStatus = U_ZERO_ERROR;
879 simplifiedUnit.serialize(status);
880 identifier.set(simplifiedUnit.identifier.data());
881 idx = getUnitCategoryIndex(trie, identifier, localStatus);
882
883 if (U_FAILURE(status)) {
884 return result;
885 }
886 }
887
888 // In case the simplified base unit identifier did not match any entry.
889 if (U_FAILURE(localStatus)) {
890 localStatus = U_ZERO_ERROR;
891 simplifiedUnit.takeReciprocal(status);
892 simplifiedUnit.serialize(status);
893 identifier.set(simplifiedUnit.identifier.data());
894 idx = getUnitCategoryIndex(trie, identifier, localStatus);
895
896 if (U_FAILURE(status)) {
897 return result;
898 }
899 }
900
901 // If there is no match at all, throw an exception.
902 if (U_FAILURE(localStatus)) {
903 status = U_INVALID_FORMAT_ERROR;
904 return result;
905 }
906
907 if (idx < 0 || idx >= gCategoriesCount) {
908 status = U_INVALID_FORMAT_ERROR;
909 return result;
910 }
911
912 result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status);
913 return result;
914 }
915
916 // In ICU4J, this is MeasureUnit.getSingleUnitImpl().
forMeasureUnit(const MeasureUnit & measureUnit,UErrorCode & status)917 SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
918 MeasureUnitImpl temp;
919 const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
920 if (U_FAILURE(status)) {
921 return {};
922 }
923 if (impl.singleUnits.length() == 0) {
924 return {};
925 }
926 if (impl.singleUnits.length() == 1) {
927 return *impl.singleUnits[0];
928 }
929 status = U_ILLEGAL_ARGUMENT_ERROR;
930 return {};
931 }
932
build(UErrorCode & status) const933 MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
934 MeasureUnitImpl temp;
935 temp.appendSingleUnit(*this, status);
936 // TODO(icu-units#28): the MeasureUnitImpl::build() method uses
937 // findBySubtype, which is relatively slow.
938 // - At the time of loading the simple unit IDs, we could also save a
939 // mapping to the builtin MeasureUnit type and subtype they correspond to.
940 // - This method could then check dimensionality and index, and if both are
941 // 1, directly return MeasureUnit instances very quickly.
942 return std::move(temp).build(status);
943 }
944
getSimpleUnitID() const945 const char *SingleUnitImpl::getSimpleUnitID() const {
946 return gSimpleUnits[index];
947 }
948
appendNeutralIdentifier(CharString & result,UErrorCode & status) const949 void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const UPRV_NO_SANITIZE_UNDEFINED {
950 int32_t absPower = std::abs(this->dimensionality);
951
952 U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units";
953
954 if (absPower == 1) {
955 // no-op
956 } else if (absPower == 2) {
957 result.append(StringPiece("square-"), status);
958 } else if (absPower == 3) {
959 result.append(StringPiece("cubic-"), status);
960 } else if (absPower <= 15) {
961 result.append(StringPiece("pow"), status);
962 result.appendNumber(absPower, status);
963 result.append(StringPiece("-"), status);
964 } else {
965 status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error
966 return;
967 }
968
969 if (U_FAILURE(status)) {
970 return;
971 }
972
973 if (this->unitPrefix != UMEASURE_PREFIX_ONE) {
974 bool found = false;
975 for (const auto &unitPrefixInfo : gUnitPrefixStrings) {
976 // TODO: consider using binary search? If we do this, add a unit
977 // test to ensure gUnitPrefixStrings is sorted?
978 if (unitPrefixInfo.value == this->unitPrefix) {
979 result.append(unitPrefixInfo.string, status);
980 found = true;
981 break;
982 }
983 }
984 if (!found) {
985 status = U_UNSUPPORTED_ERROR;
986 return;
987 }
988 }
989
990 result.append(StringPiece(this->getSimpleUnitID()), status);
991 }
992
getUnitCategoryIndex() const993 int32_t SingleUnitImpl::getUnitCategoryIndex() const {
994 return gSimpleUnitCategories[index];
995 }
996
MeasureUnitImpl(const SingleUnitImpl & singleUnit,UErrorCode & status)997 MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) {
998 this->appendSingleUnit(singleUnit, status);
999 }
1000
forIdentifier(StringPiece identifier,UErrorCode & status)1001 MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
1002 return Parser::from(identifier, status).parse(status);
1003 }
1004
forMeasureUnit(const MeasureUnit & measureUnit,MeasureUnitImpl & memory,UErrorCode & status)1005 const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
1006 const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
1007 if (measureUnit.fImpl) {
1008 return *measureUnit.fImpl;
1009 } else {
1010 memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
1011 return memory;
1012 }
1013 }
1014
forMeasureUnitMaybeCopy(const MeasureUnit & measureUnit,UErrorCode & status)1015 MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
1016 const MeasureUnit& measureUnit, UErrorCode& status) {
1017 if (measureUnit.fImpl) {
1018 return measureUnit.fImpl->copy(status);
1019 } else {
1020 return Parser::from(measureUnit.getIdentifier(), status).parse(status);
1021 }
1022 }
1023
takeReciprocal(UErrorCode &)1024 void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
1025 identifier.clear();
1026 for (int32_t i = 0; i < singleUnits.length(); i++) {
1027 singleUnits[i]->dimensionality *= -1;
1028 }
1029 }
1030
copyAndSimplify(UErrorCode & status) const1031 MeasureUnitImpl MeasureUnitImpl::copyAndSimplify(UErrorCode &status) const {
1032 MeasureUnitImpl result;
1033 for (int32_t i = 0; i < singleUnits.length(); i++) {
1034 const SingleUnitImpl &singleUnit = *this->singleUnits[i];
1035
1036 // The following `for` loop will cause time complexity to be O(n^2).
1037 // However, n is very small (number of units, generally, at maximum equal to 10)
1038 bool unitExist = false;
1039 for (int32_t j = 0; j < result.singleUnits.length(); j++) {
1040 if (uprv_strcmp(result.singleUnits[j]->getSimpleUnitID(), singleUnit.getSimpleUnitID()) ==
1041 0 &&
1042 result.singleUnits[j]->unitPrefix == singleUnit.unitPrefix) {
1043 unitExist = true;
1044 result.singleUnits[j]->dimensionality =
1045 result.singleUnits[j]->dimensionality + singleUnit.dimensionality;
1046 break;
1047 }
1048 }
1049
1050 if (!unitExist) {
1051 result.appendSingleUnit(singleUnit, status);
1052 }
1053 }
1054
1055 return result;
1056 }
1057
appendSingleUnit(const SingleUnitImpl & singleUnit,UErrorCode & status)1058 bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) {
1059 identifier.clear();
1060
1061 if (singleUnit.isDimensionless()) {
1062 // Do not append dimensionless units.
1063 return false;
1064 }
1065
1066 // Find a similar unit that already exists, to attempt to coalesce
1067 SingleUnitImpl *oldUnit = nullptr;
1068 for (int32_t i = 0; i < this->singleUnits.length(); i++) {
1069 auto *candidate = this->singleUnits[i];
1070 if (candidate->isCompatibleWith(singleUnit)) {
1071 oldUnit = candidate;
1072 }
1073 }
1074
1075 if (oldUnit) {
1076 // Both dimensionalities will be positive, or both will be negative, by
1077 // virtue of isCompatibleWith().
1078 oldUnit->dimensionality += singleUnit.dimensionality;
1079
1080 return false;
1081 }
1082
1083 // Add a copy of singleUnit
1084 // NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of singleUnit.
1085 this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit);
1086 if (U_FAILURE(status)) {
1087 return false;
1088 }
1089
1090 // If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits`
1091 // contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND`
1092 if (this->singleUnits.length() > 1 &&
1093 this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) {
1094 this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND;
1095 }
1096
1097 return true;
1098 }
1099
1100 MaybeStackVector<MeasureUnitImplWithIndex>
extractIndividualUnitsWithIndices(UErrorCode & status) const1101 MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const {
1102 MaybeStackVector<MeasureUnitImplWithIndex> result;
1103
1104 if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
1105 result.emplaceBackAndCheckErrorCode(status, 0, *this, status);
1106 return result;
1107 }
1108
1109 for (int32_t i = 0; i < singleUnits.length(); ++i) {
1110 result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status);
1111 if (U_FAILURE(status)) {
1112 return result;
1113 }
1114 }
1115
1116 return result;
1117 }
1118
1119 /**
1120 * Normalize a MeasureUnitImpl and generate the identifier string in place.
1121 */
serialize(UErrorCode & status)1122 void MeasureUnitImpl::serialize(UErrorCode &status) {
1123 if (U_FAILURE(status)) {
1124 return;
1125 }
1126
1127 if (this->singleUnits.length() == 0) {
1128 // Dimensionless, constructed by the default constructor.
1129 return;
1130 }
1131
1132 if (this->complexity == UMEASURE_UNIT_COMPOUND) {
1133 // Note: don't sort a MIXED unit
1134 uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(),
1135 sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status);
1136 if (U_FAILURE(status)) {
1137 return;
1138 }
1139 }
1140
1141 CharString result;
1142 bool beforePer = true;
1143 bool firstTimeNegativeDimension = false;
1144 for (int32_t i = 0; i < this->singleUnits.length(); i++) {
1145 if (beforePer && (*this->singleUnits[i]).dimensionality < 0) {
1146 beforePer = false;
1147 firstTimeNegativeDimension = true;
1148 } else if ((*this->singleUnits[i]).dimensionality < 0) {
1149 firstTimeNegativeDimension = false;
1150 }
1151
1152 if (U_FAILURE(status)) {
1153 return;
1154 }
1155
1156 if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
1157 if (result.length() != 0) {
1158 result.append(StringPiece("-and-"), status);
1159 }
1160 } else {
1161 if (firstTimeNegativeDimension) {
1162 if (result.length() == 0) {
1163 result.append(StringPiece("per-"), status);
1164 } else {
1165 result.append(StringPiece("-per-"), status);
1166 }
1167 } else {
1168 if (result.length() != 0) {
1169 result.append(StringPiece("-"), status);
1170 }
1171 }
1172 }
1173
1174 this->singleUnits[i]->appendNeutralIdentifier(result, status);
1175 }
1176
1177 this->identifier = CharString(result, status);
1178 }
1179
build(UErrorCode & status)1180 MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
1181 this->serialize(status);
1182 return MeasureUnit(std::move(*this));
1183 }
1184
forIdentifier(StringPiece identifier,UErrorCode & status)1185 MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
1186 return Parser::from(identifier, status).parse(status).build(status);
1187 }
1188
getComplexity(UErrorCode & status) const1189 UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
1190 MeasureUnitImpl temp;
1191 return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
1192 }
1193
getPrefix(UErrorCode & status) const1194 UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const {
1195 return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix;
1196 }
1197
withPrefix(UMeasurePrefix prefix,UErrorCode & status) const1198 MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const UPRV_NO_SANITIZE_UNDEFINED {
1199 SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1200 singleUnit.unitPrefix = prefix;
1201 return singleUnit.build(status);
1202 }
1203
getDimensionality(UErrorCode & status) const1204 int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
1205 SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1206 if (U_FAILURE(status)) { return 0; }
1207 if (singleUnit.isDimensionless()) {
1208 return 0;
1209 }
1210 return singleUnit.dimensionality;
1211 }
1212
withDimensionality(int32_t dimensionality,UErrorCode & status) const1213 MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
1214 SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1215 singleUnit.dimensionality = dimensionality;
1216 return singleUnit.build(status);
1217 }
1218
reciprocal(UErrorCode & status) const1219 MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
1220 MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
1221 impl.takeReciprocal(status);
1222 return std::move(impl).build(status);
1223 }
1224
product(const MeasureUnit & other,UErrorCode & status) const1225 MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
1226 MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
1227 MeasureUnitImpl temp;
1228 const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
1229 if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
1230 status = U_ILLEGAL_ARGUMENT_ERROR;
1231 return {};
1232 }
1233 for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) {
1234 impl.appendSingleUnit(*otherImpl.singleUnits[i], status);
1235 }
1236 if (impl.singleUnits.length() > 1) {
1237 impl.complexity = UMEASURE_UNIT_COMPOUND;
1238 }
1239 return std::move(impl).build(status);
1240 }
1241
splitToSingleUnitsImpl(int32_t & outCount,UErrorCode & status) const1242 LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const {
1243 MeasureUnitImpl temp;
1244 const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
1245 outCount = impl.singleUnits.length();
1246 MeasureUnit* arr = new MeasureUnit[outCount];
1247 if (arr == nullptr) {
1248 status = U_MEMORY_ALLOCATION_ERROR;
1249 return LocalArray<MeasureUnit>();
1250 }
1251 for (int32_t i = 0; i < outCount; i++) {
1252 arr[i] = impl.singleUnits[i]->build(status);
1253 }
1254 return LocalArray<MeasureUnit>(arr, status);
1255 }
1256
1257
1258 U_NAMESPACE_END
1259
1260 #endif /* !UNCONFIG_NO_FORMATTING */
1261