xref: /aosp_15_r20/external/cronet/third_party/icu/source/i18n/number_longnames.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2017 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include <cstdlib>
9 
10 #include "unicode/simpleformatter.h"
11 #include "unicode/ures.h"
12 #include "ureslocs.h"
13 #include "charstr.h"
14 #include "uresimp.h"
15 #include "measunit_impl.h"
16 #include "number_longnames.h"
17 #include "number_microprops.h"
18 #include <algorithm>
19 #include "cstring.h"
20 #include "util.h"
21 
22 using namespace icu;
23 using namespace icu::number;
24 using namespace icu::number::impl;
25 
26 namespace {
27 
28 /**
29  * Display Name (this format has no placeholder).
30  *
31  * Used as an index into the LongNameHandler::simpleFormats array. Units
32  * resources cover the normal set of PluralRules keys, as well as `dnam` and
33  * `per` forms.
34  */
35 constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT;
36 /**
37  * "per" form (e.g. "{0} per day" is day's "per" form).
38  *
39  * Used as an index into the LongNameHandler::simpleFormats array. Units
40  * resources cover the normal set of PluralRules keys, as well as `dnam` and
41  * `per` forms.
42  */
43 constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1;
44 /**
45  * Gender of the word, in languages with grammatical gender.
46  */
47 constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
48 // Number of keys in the array populated by PluralTableSink.
49 constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3;
50 
51 // TODO(icu-units#28): load this list from resources, after creating a "&set"
52 // function for use in ldml2icu rules.
53 const int32_t GENDER_COUNT = 7;
54 const char *gGenders[GENDER_COUNT] = {"animate",   "common", "feminine", "inanimate",
55                                       "masculine", "neuter", "personal"};
56 
57 // Converts a UnicodeString to a const char*, either pointing to a string in
58 // gGenders, or pointing to an empty string if an appropriate string was not
59 // found.
getGenderString(UnicodeString uGender,UErrorCode status)60 const char *getGenderString(UnicodeString uGender, UErrorCode status) {
61     if (uGender.length() == 0) {
62         return "";
63     }
64     CharString gender;
65     gender.appendInvariantChars(uGender, status);
66     if (U_FAILURE(status)) {
67         return "";
68     }
69     int32_t first = 0;
70     int32_t last = GENDER_COUNT;
71     while (first < last) {
72         int32_t mid = (first + last) / 2;
73         int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]);
74         if (cmp == 0) {
75             return gGenders[mid];
76         } else if (cmp > 0) {
77             first = mid + 1;
78         } else if (cmp < 0) {
79             last = mid;
80         }
81     }
82     // We don't return an error in case our gGenders list is incomplete in
83     // production.
84     //
85     // TODO(icu-units#28): a unit test checking all locales' genders are covered
86     // by gGenders? Else load a complete list of genders found in
87     // grammaticalFeatures in an initOnce.
88     return "";
89 }
90 
91 // Returns the array index that corresponds to the given pluralKeyword.
getIndex(const char * pluralKeyword,UErrorCode & status)92 static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
93     // pluralKeyword can also be "dnam", "per", or "gender"
94     switch (*pluralKeyword) {
95     case 'd':
96         if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) {
97             return DNAM_INDEX;
98         }
99         break;
100     case 'g':
101         if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) {
102             return GENDER_INDEX;
103         }
104         break;
105     case 'p':
106         if (uprv_strcmp(pluralKeyword + 1, "er") == 0) {
107             return PER_INDEX;
108         }
109         break;
110     default:
111         break;
112     }
113     StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status);
114     return plural;
115 }
116 
117 // Selects a string out of the `strings` array which corresponds to the
118 // specified plural form, with fallback to the OTHER form.
119 //
120 // The `strings` array must have ARRAY_LENGTH items: one corresponding to each
121 // of the plural forms, plus a display name ("dnam") and a "per" form.
getWithPlural(const UnicodeString * strings,StandardPlural::Form plural,UErrorCode & status)122 static UnicodeString getWithPlural(
123         const UnicodeString* strings,
124         StandardPlural::Form plural,
125         UErrorCode& status) {
126     UnicodeString result = strings[plural];
127     if (result.isBogus()) {
128         result = strings[StandardPlural::Form::OTHER];
129     }
130     if (result.isBogus()) {
131         // There should always be data in the "other" plural variant.
132         status = U_INTERNAL_PROGRAM_ERROR;
133     }
134     return result;
135 }
136 
137 enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END };
138 
139 /**
140  * Returns three outputs extracted from pattern.
141  *
142  * @param coreUnit is extracted as per Extract(...) in the spec:
143  *   https://unicode.org/reports/tr35/tr35-general.html#compound-units
144  * @param PlaceholderPosition indicates where in the string the placeholder was
145  *   found.
146  * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar
147  *   contains the space character (if any) that separated the placeholder from
148  *   the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one
149  *   space character is considered.
150  */
extractCorePattern(const UnicodeString & pattern,UnicodeString & coreUnit,PlaceholderPosition & placeholderPosition,char16_t & joinerChar)151 void extractCorePattern(const UnicodeString &pattern,
152                         UnicodeString &coreUnit,
153                         PlaceholderPosition &placeholderPosition,
154                         char16_t &joinerChar) {
155     joinerChar = 0;
156     int32_t len = pattern.length();
157     if (pattern.startsWith(u"{0}", 3)) {
158         placeholderPosition = PH_BEGINNING;
159         if (u_isJavaSpaceChar(pattern[3])) {
160             joinerChar = pattern[3];
161             coreUnit.setTo(pattern, 4, len - 4);
162         } else {
163             coreUnit.setTo(pattern, 3, len - 3);
164         }
165     } else if (pattern.endsWith(u"{0}", 3)) {
166         placeholderPosition = PH_END;
167         if (u_isJavaSpaceChar(pattern[len - 4])) {
168             coreUnit.setTo(pattern, 0, len - 4);
169             joinerChar = pattern[len - 4];
170         } else {
171             coreUnit.setTo(pattern, 0, len - 3);
172         }
173     } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) {
174         placeholderPosition = PH_NONE;
175         coreUnit = pattern;
176     } else {
177         placeholderPosition = PH_MIDDLE;
178         coreUnit = pattern;
179     }
180 }
181 
182 //////////////////////////
183 /// BEGIN DATA LOADING ///
184 //////////////////////////
185 
186 // Gets the gender of a built-in unit: unit must be a built-in. Returns an empty
187 // string both in case of unknown gender and in case of unknown unit.
188 UnicodeString
getGenderForBuiltin(const Locale & locale,const MeasureUnit & builtinUnit,UErrorCode & status)189 getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) {
190     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
191     if (U_FAILURE(status)) { return {}; }
192 
193     // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
194     // TODO(ICU-20400): Get duration-*-person data properly with aliases.
195     StringPiece subtypeForResource;
196     int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()));
197     if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) {
198         subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7};
199     } else {
200         subtypeForResource = builtinUnit.getSubtype();
201     }
202 
203     CharString key;
204     key.append("units/", status);
205     key.append(builtinUnit.getType(), status);
206     key.append("/", status);
207     key.append(subtypeForResource, status);
208     key.append("/gender", status);
209 
210     UErrorCode localStatus = status;
211     int32_t resultLen = 0;
212     const char16_t *result =
213         ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus);
214     if (U_SUCCESS(localStatus)) {
215         status = localStatus;
216         return UnicodeString(true, result, resultLen);
217     } else {
218         // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to
219         // check whether the parent "$unitRes" exists? Then we could return
220         // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not
221         // being a builtin).
222         return {};
223     }
224 }
225 
226 // Loads data from a resource tree with paths matching
227 // $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases
228 // and genders.
229 //
230 // An InflectedPluralSink is configured to load data for a specific gender and
231 // case. It loads all plural forms, because selection between plural forms is
232 // dependent upon the value being formatted.
233 //
234 // See data/unit/de.txt and data/unit/fr.txt for examples - take a look at
235 // units/compound/power2: German has case, French has differences for gender,
236 // but no case.
237 //
238 // TODO(icu-units#138): Conceptually similar to PluralTableSink, however the
239 // tree structures are different. After homogenizing the structures, we may be
240 // able to unify the two classes.
241 //
242 // TODO: Spec violation: expects presence of "count" - does not fallback to an
243 // absent "count"! If this fallback were added, getCompoundValue could be
244 // superseded?
245 class InflectedPluralSink : public ResourceSink {
246   public:
247     // Accepts `char*` rather than StringPiece because
248     // ResourceTable::findValue(...) requires a null-terminated `char*`.
249     //
250     // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
251     // checking is performed.
InflectedPluralSink(const char * gender,const char * caseVariant,UnicodeString * outArray)252     explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray)
253         : gender(gender), caseVariant(caseVariant), outArray(outArray) {
254         // Initialize the array to bogus strings.
255         for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
256             outArray[i].setToBogus();
257         }
258     }
259 
260     // See ResourceSink::put().
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)261     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
262         int32_t pluralIndex = getIndex(key, status);
263         if (U_FAILURE(status)) { return; }
264         if (!outArray[pluralIndex].isBogus()) {
265             // We already have a pattern
266             return;
267         }
268         ResourceTable genderTable = value.getTable(status);
269         ResourceTable caseTable; // This instance has to outlive `value`
270         if (loadForPluralForm(genderTable, caseTable, value, status)) {
271             outArray[pluralIndex] = value.getUnicodeString(status);
272         }
273     }
274 
275   private:
276     // Tries to load data for the configured gender from `genderTable`. Returns
277     // true if found, returning the data in `value`. The returned data will be
278     // for the configured gender if found, falling back to "neuter" and
279     // no-gender if not. The caseTable parameter holds the intermediate
280     // ResourceTable for the sake of lifetime management.
loadForPluralForm(const ResourceTable & genderTable,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)281     bool loadForPluralForm(const ResourceTable &genderTable,
282                            ResourceTable &caseTable,
283                            ResourceValue &value,
284                            UErrorCode &status) {
285         if (uprv_strcmp(gender, "") != 0) {
286             if (loadForGender(genderTable, gender, caseTable, value, status)) {
287                 return true;
288             }
289             if (uprv_strcmp(gender, "neuter") != 0 &&
290                 loadForGender(genderTable, "neuter", caseTable, value, status)) {
291                 return true;
292             }
293         }
294         if (loadForGender(genderTable, "_", caseTable, value, status)) {
295             return true;
296         }
297         return false;
298     }
299 
300     // Tries to load data for the given gender from `genderTable`. Returns true
301     // if found, returning the data in `value`. The returned data will be for
302     // the configured case if found, falling back to "nominative" and no-case if
303     // not.
loadForGender(const ResourceTable & genderTable,const char * genderVal,ResourceTable & caseTable,ResourceValue & value,UErrorCode & status)304     bool loadForGender(const ResourceTable &genderTable,
305                        const char *genderVal,
306                        ResourceTable &caseTable,
307                        ResourceValue &value,
308                        UErrorCode &status) {
309         if (!genderTable.findValue(genderVal, value)) {
310             return false;
311         }
312         caseTable = value.getTable(status);
313         if (uprv_strcmp(caseVariant, "") != 0) {
314             if (loadForCase(caseTable, caseVariant, value)) {
315                 return true;
316             }
317             if (uprv_strcmp(caseVariant, "nominative") != 0 &&
318                 loadForCase(caseTable, "nominative", value)) {
319                 return true;
320             }
321         }
322         if (loadForCase(caseTable, "_", value)) {
323             return true;
324         }
325         return false;
326     }
327 
328     // Tries to load data for the given case from `caseTable`. Returns true if
329     // found, returning the data in `value`.
loadForCase(const ResourceTable & caseTable,const char * caseValue,ResourceValue & value)330     bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) {
331         if (!caseTable.findValue(caseValue, value)) {
332             return false;
333         }
334         return true;
335     }
336 
337     const char *gender;
338     const char *caseVariant;
339     UnicodeString *outArray;
340 };
341 
342 // Fetches localised formatting patterns for the given subKey. See documentation
343 // for InflectedPluralSink for details.
344 //
345 // Data is loaded for the appropriate unit width, with missing data filled in
346 // from unitsShort.
getInflectedMeasureData(StringPiece subKey,const Locale & locale,const UNumberUnitWidth & width,const char * gender,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)347 void getInflectedMeasureData(StringPiece subKey,
348                              const Locale &locale,
349                              const UNumberUnitWidth &width,
350                              const char *gender,
351                              const char *caseVariant,
352                              UnicodeString *outArray,
353                              UErrorCode &status) {
354     InflectedPluralSink sink(gender, caseVariant, outArray);
355     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
356     if (U_FAILURE(status)) { return; }
357 
358     CharString key;
359     key.append("units", status);
360     if (width == UNUM_UNIT_WIDTH_NARROW) {
361         key.append("Narrow", status);
362     } else if (width == UNUM_UNIT_WIDTH_SHORT) {
363         key.append("Short", status);
364     }
365     key.append("/", status);
366     key.append(subKey, status);
367 
368     UErrorCode localStatus = status;
369     ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
370     if (width == UNUM_UNIT_WIDTH_SHORT) {
371         status = localStatus;
372         return;
373     }
374 }
375 
376 class PluralTableSink : public ResourceSink {
377   public:
378     // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
379     // checking is performed.
PluralTableSink(UnicodeString * outArray)380     explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
381         // Initialize the array to bogus strings.
382         for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
383             outArray[i].setToBogus();
384         }
385     }
386 
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)387     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
388         if (uprv_strcmp(key, "case") == 0) {
389             return;
390         }
391         int32_t index = getIndex(key, status);
392         if (U_FAILURE(status)) { return; }
393         if (!outArray[index].isBogus()) {
394             return;
395         }
396         outArray[index] = value.getUnicodeString(status);
397         if (U_FAILURE(status)) { return; }
398     }
399 
400   private:
401     UnicodeString *outArray;
402 };
403 
404 /**
405  * Populates outArray with `locale`-specific values for `unit` through use of
406  * PluralTableSink. Only the set of basic units are supported!
407  *
408  * Reading from resources *unitsNarrow* and *unitsShort* (for width
409  * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
410  * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
411  *
412  * @param unit must be a built-in unit, i.e. must have a type and subtype,
413  *     listed in gTypes and gSubTypes in measunit.cpp.
414  * @param unitDisplayCase the empty string and "nominative" are treated the
415  *     same. For other cases, strings for the requested case are used if found.
416  *     (For any missing case-specific data, we fall back to nominative.)
417  * @param outArray must be of fixed length ARRAY_LENGTH.
418  */
getMeasureData(const Locale & locale,const MeasureUnit & unit,const UNumberUnitWidth & width,const char * unitDisplayCase,UnicodeString * outArray,UErrorCode & status)419 void getMeasureData(const Locale &locale,
420                     const MeasureUnit &unit,
421                     const UNumberUnitWidth &width,
422                     const char *unitDisplayCase,
423                     UnicodeString *outArray,
424                     UErrorCode &status) {
425     PluralTableSink sink(outArray);
426     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
427     if (U_FAILURE(status)) { return; }
428 
429     CharString subKey;
430     subKey.append("/", status);
431     subKey.append(unit.getType(), status);
432     subKey.append("/", status);
433 
434     // Check if unitSubType is an alias or not.
435     LocalUResourceBundlePointer aliasBundle(ures_open(U_ICUDATA_ALIAS, "metadata", &status));
436 
437     UErrorCode aliasStatus = status;
438     StackUResourceBundle aliasFillIn;
439     CharString aliasKey;
440     aliasKey.append("alias/unit/", aliasStatus);
441     aliasKey.append(unit.getSubtype(), aliasStatus);
442     aliasKey.append("/replacement", aliasStatus);
443     ures_getByKeyWithFallback(aliasBundle.getAlias(), aliasKey.data(), aliasFillIn.getAlias(),
444                               &aliasStatus);
445     CharString unitSubType;
446     if (!U_FAILURE(aliasStatus)) {
447         // This means the subType is an alias. Then, replace unitSubType with the replacement.
448         auto replacement = ures_getUnicodeString(aliasFillIn.getAlias(), &status);
449         unitSubType.appendInvariantChars(replacement, status);
450     } else {
451         unitSubType.append(unit.getSubtype(), status);
452     }
453 
454     // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
455     // TODO(ICU-20400): Get duration-*-person data properly with aliases.
456     int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unitSubType.data()));
457     if (subtypeLen > 7 && uprv_strcmp(unitSubType.data() + subtypeLen - 7, "-person") == 0) {
458         subKey.append({unitSubType.data(), subtypeLen - 7}, status);
459     } else {
460         subKey.append({unitSubType.data(), subtypeLen}, status);
461     }
462 
463     if (width != UNUM_UNIT_WIDTH_FULL_NAME) {
464         UErrorCode localStatus = status;
465         CharString genderKey;
466         genderKey.append("units", localStatus);
467         genderKey.append(subKey, localStatus);
468         genderKey.append("/gender", localStatus);
469         StackUResourceBundle fillIn;
470         ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(),
471                                   &localStatus);
472         outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus);
473     }
474 
475     CharString key;
476     key.append("units", status);
477     if (width == UNUM_UNIT_WIDTH_NARROW) {
478         key.append("Narrow", status);
479     } else if (width == UNUM_UNIT_WIDTH_SHORT) {
480         key.append("Short", status);
481     }
482     key.append(subKey, status);
483 
484     // Grab desired case first, if available. Then grab no-case data to fill in
485     // the gaps.
486     if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) {
487         CharString caseKey;
488         caseKey.append(key, status);
489         caseKey.append("/case/", status);
490         caseKey.append(unitDisplayCase, status);
491 
492         UErrorCode localStatus = U_ZERO_ERROR;
493         // TODO(icu-units#138): our fallback logic is not spec-compliant:
494         // lateral fallback should happen before locale fallback. Switch to
495         // getInflectedMeasureData after homogenizing data format? Find a unit
496         // test case that demonstrates the incorrect fallback logic (via
497         // regional variant of an inflected language?)
498         ures_getAllChildrenWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
499     }
500 
501     // TODO(icu-units#138): our fallback logic is not spec-compliant: we
502     // check the given case, then go straight to the no-case data. The spec
503     // states we should first look for case="nominative". As part of #138,
504     // either get the spec changed, or add unit tests that warn us if
505     // case="nominative" data differs from no-case data?
506     UErrorCode localStatus = U_ZERO_ERROR;
507     ures_getAllChildrenWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
508     if (width == UNUM_UNIT_WIDTH_SHORT) {
509         if (U_FAILURE(localStatus)) {
510             status = localStatus;
511         }
512         return;
513     }
514 }
515 
516 // NOTE: outArray MUST have a length of at least ARRAY_LENGTH.
getCurrencyLongNameData(const Locale & locale,const CurrencyUnit & currency,UnicodeString * outArray,UErrorCode & status)517 void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit &currency, UnicodeString *outArray,
518                              UErrorCode &status) {
519     // In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
520     // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
521     PluralTableSink sink(outArray);
522     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status));
523     if (U_FAILURE(status)) { return; }
524     ures_getAllChildrenWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
525     if (U_FAILURE(status)) { return; }
526     for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
527         UnicodeString &pattern = outArray[i];
528         if (pattern.isBogus()) {
529             continue;
530         }
531         int32_t longNameLen = 0;
532         const char16_t *longName = ucurr_getPluralName(
533                 currency.getISOCurrency(),
534                 locale.getName(),
535                 nullptr /* isChoiceFormat */,
536                 StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
537                 &longNameLen,
538                 &status);
539         // Example pattern from data: "{0} {1}"
540         // Example output after find-and-replace: "{0} US dollars"
541         pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
542     }
543 }
544 
getCompoundValue(StringPiece compoundKey,const Locale & locale,const UNumberUnitWidth & width,UErrorCode & status)545 UnicodeString getCompoundValue(StringPiece compoundKey,
546                                const Locale &locale,
547                                const UNumberUnitWidth &width,
548                                UErrorCode &status) {
549     LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
550     if (U_FAILURE(status)) { return {}; }
551     CharString key;
552     key.append("units", status);
553     if (width == UNUM_UNIT_WIDTH_NARROW) {
554         key.append("Narrow", status);
555     } else if (width == UNUM_UNIT_WIDTH_SHORT) {
556         key.append("Short", status);
557     }
558     key.append("/compound/", status);
559     key.append(compoundKey, status);
560 
561     UErrorCode localStatus = status;
562     int32_t len = 0;
563     const char16_t *ptr =
564         ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus);
565     if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) {
566         // Fall back to short, which contains more compound data
567         key.clear();
568         key.append("unitsShort/compound/", status);
569         key.append(compoundKey, status);
570         ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status);
571     } else {
572         status = localStatus;
573     }
574     if (U_FAILURE(status)) {
575         return {};
576     }
577     return UnicodeString(ptr, len);
578 }
579 
580 /**
581  * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
582  *
583  * Consider a deriveComponent rule that looks like this:
584  *
585  *     <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
586  *
587  * Instantiating an instance as follows:
588  *
589  *     DerivedComponents d(loc, "case", "per");
590  *
591  * Applying the rule in the XML element above, `d.value0("foo")` will be "foo",
592  * and `d.value1("foo")` will be "nominative".
593  *
594  * The values returned by value0(...) and value1(...) are valid only while the
595  * instance exists. In case of any kind of failure, value0(...) and value1(...)
596  * will return "".
597  */
598 class DerivedComponents {
599   public:
600     /**
601      * Constructor.
602      *
603      * The feature and structure parameters must be null-terminated. The string
604      * referenced by compoundValue must exist for longer than the
605      * DerivedComponents instance.
606      */
DerivedComponents(const Locale & locale,const char * feature,const char * structure)607     DerivedComponents(const Locale &locale, const char *feature, const char *structure) {
608         StackUResourceBundle derivationsBundle, stackBundle;
609         ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status);
610         ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
611                       &status);
612         ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
613                       &status);
614         if (U_FAILURE(status)) {
615             return;
616         }
617         UErrorCode localStatus = U_ZERO_ERROR;
618         // TODO(icu-units#28): use standard normal locale resolution algorithms
619         // rather than just grabbing language:
620         ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
621                       &localStatus);
622         // TODO(icu-units#28):
623         // - code currently assumes if the locale exists, the rules are there -
624         //   instead of falling back to root when the requested rule is missing.
625         // - investigate ures.h functions, see if one that uses res_findResource()
626         //   might be better (or use res_findResource directly), or maybe help
627         //   improve ures documentation to guide function selection?
628         if (localStatus == U_MISSING_RESOURCE_ERROR) {
629             ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
630         } else {
631             status = localStatus;
632         }
633         ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
634         ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
635         ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
636         UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
637         UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
638         if (U_SUCCESS(status)) {
639             if (val0.compare(UnicodeString(u"compound")) == 0) {
640                 compound0_ = true;
641             } else {
642                 compound0_ = false;
643                 value0_.appendInvariantChars(val0, status);
644             }
645             if (val1.compare(UnicodeString(u"compound")) == 0) {
646                 compound1_ = true;
647             } else {
648                 compound1_ = false;
649                 value1_.appendInvariantChars(val1, status);
650             }
651         }
652     }
653 
654     // Returns a StringPiece that is only valid as long as the instance exists.
value0(const StringPiece compoundValue) const655     StringPiece value0(const StringPiece compoundValue) const {
656         return compound0_ ? compoundValue : value0_.toStringPiece();
657     }
658 
659     // Returns a StringPiece that is only valid as long as the instance exists.
value1(const StringPiece compoundValue) const660     StringPiece value1(const StringPiece compoundValue) const {
661         return compound1_ ? compoundValue : value1_.toStringPiece();
662     }
663 
664     // Returns a char* that is only valid as long as the instance exists.
value0(const char * compoundValue) const665     const char *value0(const char *compoundValue) const {
666         return compound0_ ? compoundValue : value0_.data();
667     }
668 
669     // Returns a char* that is only valid as long as the instance exists.
value1(const char * compoundValue) const670     const char *value1(const char *compoundValue) const {
671         return compound1_ ? compoundValue : value1_.data();
672     }
673 
674   private:
675     UErrorCode status = U_ZERO_ERROR;
676 
677     // Holds strings referred to by value0 and value1;
678     bool compound0_ = false, compound1_ = false;
679     CharString value0_, value1_;
680 };
681 
682 // TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding
683 // testsuite support for testing with synthetic data?
684 /**
685  * Loads and returns the value in rules that look like these:
686  *
687  * <deriveCompound feature="gender" structure="per" value="0"/>
688  * <deriveCompound feature="gender" structure="times" value="1"/>
689  *
690  * Currently a fake example, but spec compliant:
691  * <deriveCompound feature="gender" structure="power" value="feminine"/>
692  *
693  * NOTE: If U_FAILURE(status), returns an empty string.
694  */
695 UnicodeString
getDeriveCompoundRule(Locale locale,const char * feature,const char * structure,UErrorCode & status)696 getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
697     StackUResourceBundle derivationsBundle, stackBundle;
698     ures_openDirectFillIn(derivationsBundle.getAlias(), nullptr, "grammaticalFeatures", &status);
699     ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
700                   &status);
701     ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
702     // TODO: use standard normal locale resolution algorithms rather than just grabbing language:
703     ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
704     // TODO:
705     // - code currently assumes if the locale exists, the rules are there -
706     //   instead of falling back to root when the requested rule is missing.
707     // - investigate ures.h functions, see if one that uses res_findResource()
708     //   might be better (or use res_findResource directly), or maybe help
709     //   improve ures documentation to guide function selection?
710     if (status == U_MISSING_RESOURCE_ERROR) {
711         status = U_ZERO_ERROR;
712         ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
713     }
714     ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
715     ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
716     UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
717     if (U_FAILURE(status)) {
718         return {};
719     }
720     U_ASSERT(!uVal.isBogus());
721     return uVal;
722 }
723 
724 // Returns the gender string for structures following these rules:
725 //
726 // <deriveCompound feature="gender" structure="per" value="0"/>
727 // <deriveCompound feature="gender" structure="times" value="1"/>
728 //
729 // Fake example:
730 // <deriveCompound feature="gender" structure="power" value="feminine"/>
731 //
732 // data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that
733 // correspond to value="0" and value="1".
734 //
735 // Pass a nullptr to data1 if the structure has no concept of value="1" (e.g.
736 // "prefix" doesn't).
getDerivedGender(Locale locale,const char * structure,UnicodeString * data0,UnicodeString * data1,UErrorCode & status)737 UnicodeString getDerivedGender(Locale locale,
738                                const char *structure,
739                                UnicodeString *data0,
740                                UnicodeString *data1,
741                                UErrorCode &status) {
742     UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status);
743     if (val.length() == 1) {
744         switch (val[0]) {
745         case u'0':
746             return data0[GENDER_INDEX];
747         case u'1':
748             if (data1 == nullptr) {
749                 return {};
750             }
751             return data1[GENDER_INDEX];
752         }
753     }
754     return val;
755 }
756 
757 ////////////////////////
758 /// END DATA LOADING ///
759 ////////////////////////
760 
761 // TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
trimSpaceChars(const char16_t * s,int32_t & length)762 const char16_t *trimSpaceChars(const char16_t *s, int32_t &length) {
763     if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) {
764         return s;
765     }
766     int32_t start = 0;
767     int32_t limit = length;
768     while (start < limit && u_isJavaSpaceChar(s[start])) {
769         ++start;
770     }
771     if (start < limit) {
772         // There is non-white space at start; we will not move limit below that,
773         // so we need not test start<limit in the loop.
774         while (u_isJavaSpaceChar(s[limit - 1])) {
775             --limit;
776         }
777     }
778     length = limit - start;
779     return s + start;
780 }
781 
782 /**
783  * Calculates the gender of an arbitrary unit: this is the *second*
784  * implementation of an algorithm to do this:
785  *
786  * Gender is also calculated in "processPatternTimes": that code path is "bottom
787  * up", loading the gender for every component of a compound unit (at the same
788  * time as loading the Long Names formatting patterns), even if the gender is
789  * unneeded, then combining the single units' genders into the compound unit's
790  * gender, according to the rules. This algorithm does a lazier "top-down"
791  * evaluation, starting with the compound unit, calculating which single unit's
792  * gender is needed by breaking it down according to the rules, and then loading
793  * only the gender of the one single unit who's gender is needed.
794  *
795  * For future refactorings:
796  * 1. we could drop processPatternTimes' gender calculation and just call this
797  *    function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very
798  *    same table as the formatting patterns, so loading it then may be
799  *    efficient. For other unit widths however, it needs to be explicitly looked
800  *    up anyway.
801  * 2. alternatively, if CLDR is providing all the genders we need such that we
802  *    don't need to calculate them in ICU anymore, we could drop this function
803  *    and keep only processPatternTimes' calculation. (And optimise it a bit?)
804  *
805  * @param locale The desired locale.
806  * @param unit The measure unit to calculate the gender for.
807  * @return The gender string for the unit, or an empty string if unknown or
808  *     ungendered.
809  */
calculateGenderForUnit(const Locale & locale,const MeasureUnit & unit,UErrorCode & status)810 UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) {
811     MeasureUnitImpl impl;
812     const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status);
813     int32_t singleUnitIndex = 0;
814     if (mui.complexity == UMEASURE_UNIT_COMPOUND) {
815         int32_t startSlice = 0;
816         // inclusive
817         int32_t endSlice = mui.singleUnits.length()-1;
818         U_ASSERT(endSlice > 0); // Else it would not be COMPOUND
819         if (mui.singleUnits[endSlice]->dimensionality < 0) {
820             // We have a -per- construct
821             UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status);
822             if (perRule.length() != 1) {
823                 // Fixed gender for -per- units
824                 return perRule;
825             }
826             if (perRule[0] == u'1') {
827                 // Find the start of the denominator. We already know there is one.
828                 while (mui.singleUnits[startSlice]->dimensionality >= 0) {
829                     startSlice++;
830                 }
831             } else {
832                 // Find the end of the numerator
833                 while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) {
834                     endSlice--;
835                 }
836                 if (endSlice < 0) {
837                     // We have only a denominator, e.g. "per-second".
838                     // TODO(icu-units#28): find out what gender to use in the
839                     // absence of a first value - mentioned in CLDR-14253.
840                     return {};
841                 }
842             }
843         }
844         if (endSlice > startSlice) {
845             // We have a -times- construct
846             UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status);
847             if (timesRule.length() != 1) {
848                 // Fixed gender for -times- units
849                 return timesRule;
850             }
851             if (timesRule[0] == u'0') {
852                 endSlice = startSlice;
853             } else {
854                 // We assume timesRule[0] == u'1'
855                 startSlice = endSlice;
856             }
857         }
858         U_ASSERT(startSlice == endSlice);
859         singleUnitIndex = startSlice;
860     } else if (mui.complexity == UMEASURE_UNIT_MIXED) {
861         status = U_INTERNAL_PROGRAM_ERROR;
862         return {};
863     } else {
864         U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE);
865         U_ASSERT(mui.singleUnits.length() == 1);
866     }
867 
868     // Now we know which singleUnit's gender we want
869     const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex];
870     // Check for any power-prefix gender override:
871     if (std::abs(singleUnit->dimensionality) != 1) {
872         UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status);
873         if (powerRule.length() != 1) {
874             // Fixed gender for -powN- units
875             return powerRule;
876         }
877         // powerRule[0] == u'0'; u'1' not currently in spec.
878     }
879     // Check for any SI and binary prefix gender override:
880     if (std::abs(singleUnit->dimensionality) != 1) {
881         UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status);
882         if (prefixRule.length() != 1) {
883             // Fixed gender for -powN- units
884             return prefixRule;
885         }
886         // prefixRule[0] == u'0'; u'1' not currently in spec.
887     }
888     // Now we've boiled it down to the gender of one simple unit identifier:
889     return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status),
890                                status);
891 }
892 
maybeCalculateGender(const Locale & locale,const MeasureUnit & unitRef,UnicodeString * outArray,UErrorCode & status)893 void maybeCalculateGender(const Locale &locale,
894                           const MeasureUnit &unitRef,
895                           UnicodeString *outArray,
896                           UErrorCode &status) {
897     if (outArray[GENDER_INDEX].isBogus()) {
898         UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status);
899         if (meterGender.isEmpty()) {
900             // No gender for meter: assume ungendered language
901             return;
902         }
903         // We have a gendered language, but are lacking gender for unitRef.
904         outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status);
905     }
906 }
907 
908 } // namespace
909 
forMeasureUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,LongNameHandler * fillIn,UErrorCode & status)910 void LongNameHandler::forMeasureUnit(const Locale &loc,
911                                      const MeasureUnit &unitRef,
912                                      const UNumberUnitWidth &width,
913                                      const char *unitDisplayCase,
914                                      const PluralRules *rules,
915                                      const MicroPropsGenerator *parent,
916                                      LongNameHandler *fillIn,
917                                      UErrorCode &status) {
918     // From https://unicode.org/reports/tr35/tr35-general.html#compound-units -
919     // Points 1 and 2 are mostly handled by MeasureUnit:
920     //
921     // 1. If the unitId is empty or invalid, fail
922     // 2. Put the unitId into normalized order
923     U_ASSERT(fillIn != nullptr);
924 
925     if (uprv_strcmp(unitRef.getType(), "") != 0) {
926         // Handling built-in units:
927         //
928         // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant)
929         //    - If result is not empty, return it
930         UnicodeString simpleFormats[ARRAY_LENGTH];
931         getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
932         maybeCalculateGender(loc, unitRef, simpleFormats, status);
933         if (U_FAILURE(status)) {
934             return;
935         }
936         fillIn->rules = rules;
937         fillIn->parent = parent;
938         fillIn->simpleFormatsToModifiers(simpleFormats,
939                                          {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
940         if (!simpleFormats[GENDER_INDEX].isBogus()) {
941             fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
942         }
943         return;
944 
945         // TODO(icu-units#145): figure out why this causes a failure in
946         // format/MeasureFormatTest/TestIndividualPluralFallback and other
947         // tests, when it should have been an alternative for the lines above:
948 
949         // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
950         // fillIn->rules = rules;
951         // fillIn->parent = parent;
952         // return;
953     } else {
954         // Check if it is a MeasureUnit this constructor handles: this
955         // constructor does not handle mixed units
956         U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED);
957         forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
958         fillIn->rules = rules;
959         fillIn->parent = parent;
960         return;
961     }
962 }
963 
forArbitraryUnit(const Locale & loc,const MeasureUnit & unitRef,const UNumberUnitWidth & width,const char * unitDisplayCase,LongNameHandler * fillIn,UErrorCode & status)964 void LongNameHandler::forArbitraryUnit(const Locale &loc,
965                                        const MeasureUnit &unitRef,
966                                        const UNumberUnitWidth &width,
967                                        const char *unitDisplayCase,
968                                        LongNameHandler *fillIn,
969                                        UErrorCode &status) {
970     if (U_FAILURE(status)) {
971         return;
972     }
973     if (fillIn == nullptr) {
974         status = U_INTERNAL_PROGRAM_ERROR;
975         return;
976     }
977 
978     // Numbered list items are from the algorithms at
979     // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
980     //
981     // 4. Divide the unitId into numerator (the part before the "-per-") and
982     //    denominator (the part after the "-per-). If both are empty, fail
983     MeasureUnitImpl unit;
984     MeasureUnitImpl perUnit;
985     {
986         MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
987         if (U_FAILURE(status)) {
988             return;
989         }
990         for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
991             SingleUnitImpl *subUnit = fullUnit.singleUnits[i];
992             if (subUnit->dimensionality > 0) {
993                 unit.appendSingleUnit(*subUnit, status);
994             } else {
995                 subUnit->dimensionality *= -1;
996                 perUnit.appendSingleUnit(*subUnit, status);
997             }
998         }
999     }
1000 
1001     // TODO(icu-units#28): check placeholder logic, see if it needs to be
1002     // present here instead of only in processPatternTimes:
1003     //
1004     // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty
1005 
1006     DerivedComponents derivedPerCases(loc, "case", "per");
1007 
1008     // 6. numeratorUnitString
1009     UnicodeString numeratorUnitData[ARRAY_LENGTH];
1010     processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase),
1011                         numeratorUnitData, status);
1012 
1013     // 7. denominatorUnitString
1014     UnicodeString denominatorUnitData[ARRAY_LENGTH];
1015     processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase),
1016                         denominatorUnitData, status);
1017 
1018     // TODO(icu-units#139):
1019     // - implement DerivedComponents for "plural/times" and "plural/power":
1020     //   French has different rules, we'll be producing the wrong results
1021     //   currently. (Prove via tests!)
1022     // - implement DerivedComponents for "plural/per", "plural/prefix",
1023     //   "case/times", "case/power", and "case/prefix" - although they're
1024     //   currently hardcoded. Languages with different rules are surely on the
1025     //   way.
1026     //
1027     // Currently we only use "case/per", "plural/times", "case/times", and
1028     // "case/power".
1029     //
1030     // This may have impact on multiSimpleFormatsToModifiers(...) below too?
1031     // These rules are currently (ICU 69) all the same and hard-coded below.
1032     UnicodeString perUnitPattern;
1033     if (!denominatorUnitData[PER_INDEX].isBogus()) {
1034         // If we have no denominator, we obtain the empty string:
1035         perUnitPattern = denominatorUnitData[PER_INDEX];
1036     } else {
1037         // 8. Set perPattern to be getValue([per], locale, length)
1038         UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status);
1039         // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
1040         SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status);
1041         if (U_FAILURE(status)) {
1042             return;
1043         }
1044         // Plural and placeholder handling for 7. denominatorUnitString:
1045         // TODO(icu-units#139): hardcoded:
1046         // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
1047         UnicodeString denominatorFormat =
1048             getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status);
1049         // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale.
1050         SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status);
1051         if (U_FAILURE(status)) {
1052             return;
1053         }
1054         UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments();
1055         int32_t trimmedLen = denominatorPattern.length();
1056         const char16_t *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen);
1057         UnicodeString denominatorString(false, trimmed, trimmedLen);
1058         // 9. If the denominatorString is empty, set result to
1059         //    [numeratorString], otherwise set result to format(perPattern,
1060         //    numeratorString, denominatorString)
1061         //
1062         // TODO(icu-units#28): Why does UnicodeString need to be explicit in the
1063         // following line?
1064         perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status);
1065         if (U_FAILURE(status)) {
1066             return;
1067         }
1068     }
1069     if (perUnitPattern.length() == 0) {
1070         fillIn->simpleFormatsToModifiers(numeratorUnitData,
1071                                          {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1072     } else {
1073         fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern,
1074                                               {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1075     }
1076 
1077     // Gender
1078     //
1079     // TODO(icu-units#28): find out what gender to use in the absence of a first
1080     // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253.
1081     //
1082     // gender/per deriveCompound rules don't say:
1083     // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ←  gender(gram) -->
1084     fillIn->gender = getGenderString(
1085         getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status);
1086 }
1087 
processPatternTimes(MeasureUnitImpl && productUnit,Locale loc,const UNumberUnitWidth & width,const char * caseVariant,UnicodeString * outArray,UErrorCode & status)1088 void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit,
1089                                           Locale loc,
1090                                           const UNumberUnitWidth &width,
1091                                           const char *caseVariant,
1092                                           UnicodeString *outArray,
1093                                           UErrorCode &status) {
1094     if (U_FAILURE(status)) {
1095         return;
1096     }
1097     if (productUnit.complexity == UMEASURE_UNIT_MIXED) {
1098         // These are handled by MixedUnitLongNameHandler
1099         status = U_UNSUPPORTED_ERROR;
1100         return;
1101     }
1102 
1103 #if U_DEBUG
1104     for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1105         U_ASSERT(outArray[pluralIndex].length() == 0);
1106         U_ASSERT(!outArray[pluralIndex].isBogus());
1107     }
1108 #endif
1109 
1110     if (productUnit.identifier.isEmpty()) {
1111         // TODO(icu-units#28): consider when serialize should be called.
1112         // identifier might also be empty for MeasureUnit().
1113         productUnit.serialize(status);
1114     }
1115     if (U_FAILURE(status)) {
1116         return;
1117     }
1118     if (productUnit.identifier.length() == 0) {
1119         // MeasureUnit(): no units: return empty strings.
1120         return;
1121     }
1122 
1123     MeasureUnit builtinUnit;
1124     if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) {
1125         // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it
1126         // breaks them all down. Do we want to drop this?
1127         // - findBySubType isn't super efficient, if we skip it and go to basic
1128         //   singles, we don't have to construct MeasureUnit's anymore.
1129         // - Check all the existing unit tests that fail without this: is it due
1130         //   to incorrect fallback via getMeasureData?
1131         // - Do those unit tests cover this code path representatively?
1132         if (builtinUnit != MeasureUnit()) {
1133             getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status);
1134             maybeCalculateGender(loc, builtinUnit, outArray, status);
1135         }
1136         return;
1137     }
1138 
1139     // 2. Set timesPattern to be getValue(times, locale, length)
1140     UnicodeString timesPattern = getCompoundValue("times", loc, width, status);
1141     SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status);
1142     if (U_FAILURE(status)) {
1143         return;
1144     }
1145 
1146     PlaceholderPosition globalPlaceholder[ARRAY_LENGTH];
1147     char16_t globalJoinerChar = 0;
1148     // Numbered list items are from the algorithms at
1149     // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1150     //
1151     // pattern(...) point 5:
1152     // - Set both globalPlaceholder and globalPlaceholderPosition to be empty
1153     //
1154     // 3. Set result to be empty
1155     for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1156         // Initial state: empty string pattern, via all falling back to OTHER:
1157         if (pluralIndex == StandardPlural::Form::OTHER) {
1158             outArray[pluralIndex].remove();
1159         } else {
1160             outArray[pluralIndex].setToBogus();
1161         }
1162         globalPlaceholder[pluralIndex] = PH_EMPTY;
1163     }
1164 
1165     // Empty string represents "compound" (propagate the plural form).
1166     const char *pluralCategory = "";
1167     DerivedComponents derivedTimesPlurals(loc, "plural", "times");
1168     DerivedComponents derivedTimesCases(loc, "case", "times");
1169     DerivedComponents derivedPowerCases(loc, "case", "power");
1170 
1171     // 4. For each single_unit in product_unit
1172     for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length();
1173          singleUnitIndex++) {
1174         SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex];
1175         const char *singlePluralCategory;
1176         const char *singleCaseVariant;
1177         // TODO(icu-units#28): ensure we have unit tests that change/fail if we
1178         // assign incorrect case variants here:
1179         if (singleUnitIndex < productUnit.singleUnits.length() - 1) {
1180             // 4.1. If hasMultiple
1181             singlePluralCategory = derivedTimesPlurals.value0(pluralCategory);
1182             singleCaseVariant = derivedTimesCases.value0(caseVariant);
1183             pluralCategory = derivedTimesPlurals.value1(pluralCategory);
1184             caseVariant = derivedTimesCases.value1(caseVariant);
1185         } else {
1186             singlePluralCategory = derivedTimesPlurals.value1(pluralCategory);
1187             singleCaseVariant = derivedTimesCases.value1(caseVariant);
1188         }
1189 
1190         // 4.2. Get the gender of that single_unit
1191         MeasureUnit simpleUnit;
1192         if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) {
1193             // Ideally all simple units should be known, but they're not:
1194             // 100-kilometer is internally treated as a simple unit, but it is
1195             // not a built-in unit and does not have formatting data in CLDR 39.
1196             //
1197             // TODO(icu-units#28): test (desirable) invariants in unit tests.
1198             status = U_UNSUPPORTED_ERROR;
1199             return;
1200         }
1201         const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status);
1202 
1203         // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-'
1204         U_ASSERT(singleUnit->dimensionality > 0);
1205         int32_t dimensionality = singleUnit->dimensionality;
1206         UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH];
1207         if (dimensionality != 1) {
1208             // 4.3.1. set dimensionalityPrefixPattern to be
1209             //   getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender),
1210             //   such as "{0} kwadratowym"
1211             CharString dimensionalityKey("compound/power", status);
1212             dimensionalityKey.appendNumber(dimensionality, status);
1213             getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender,
1214                                     singleCaseVariant, dimensionalityPrefixPatterns, status);
1215             if (U_FAILURE(status)) {
1216                 // At the time of writing, only pow2 and pow3 are supported.
1217                 // Attempting to format other powers results in a
1218                 // U_RESOURCE_TYPE_MISMATCH. We convert the error if we
1219                 // understand it:
1220                 if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) {
1221                     status = U_UNSUPPORTED_ERROR;
1222                 }
1223                 return;
1224             }
1225 
1226             // TODO(icu-units#139):
1227             // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory)
1228 
1229             // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant)
1230             singleCaseVariant = derivedPowerCases.value0(singleCaseVariant);
1231             // 4.3.4. remove the dimensionality_prefix from singleUnit
1232             singleUnit->dimensionality = 1;
1233         }
1234 
1235         // 4.4. if singleUnit starts with an si_prefix, such as 'centi'
1236         UMeasurePrefix prefix = singleUnit->unitPrefix;
1237         UnicodeString prefixPattern;
1238         if (prefix != UMEASURE_PREFIX_ONE) {
1239             // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale,
1240             //        length), such as "centy{0}"
1241             CharString prefixKey;
1242             // prefixKey looks like "1024p3" or "10p-2":
1243             prefixKey.appendNumber(umeas_getPrefixBase(prefix), status);
1244             prefixKey.append('p', status);
1245             prefixKey.appendNumber(umeas_getPrefixPower(prefix), status);
1246             // Contains a pattern like "centy{0}".
1247             prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status);
1248 
1249             // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory)
1250             //
1251             // TODO(icu-units#139): that refers to these rules:
1252             // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/>
1253             // though I'm not sure what other value they might end up having.
1254             //
1255             // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant)
1256             //
1257             // TODO(icu-units#139): that refers to:
1258             // <deriveComponent feature="case" structure="prefix" value0="nominative"
1259             // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply
1260             // propagates.
1261 
1262             // 4.4.4. remove the si_prefix from singleUnit
1263             singleUnit->unitPrefix = UMEASURE_PREFIX_ONE;
1264         }
1265 
1266         // 4.5. Set corePattern to be the getValue(singleUnit, locale, length,
1267         //      singlePluralCategory, singleCaseVariant), such as "{0} metrem"
1268         UnicodeString singleUnitArray[ARRAY_LENGTH];
1269         // At this point we are left with a Simple Unit:
1270         U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==
1271                  0);
1272         getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray,
1273                        status);
1274         if (U_FAILURE(status)) {
1275             // Shouldn't happen if we have data for all single units
1276             return;
1277         }
1278 
1279         // Calculate output gender
1280         if (!singleUnitArray[GENDER_INDEX].isBogus()) {
1281             U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty());
1282             UnicodeString uVal;
1283 
1284             if (prefix != UMEASURE_PREFIX_ONE) {
1285                 singleUnitArray[GENDER_INDEX] =
1286                     getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status);
1287             }
1288 
1289             if (dimensionality != 1) {
1290                 singleUnitArray[GENDER_INDEX] =
1291                     getDerivedGender(loc, "power", singleUnitArray, nullptr, status);
1292             }
1293 
1294             UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status);
1295             if (timesGenderRule.length() == 1) {
1296                 switch (timesGenderRule[0]) {
1297                 case u'0':
1298                     if (singleUnitIndex == 0) {
1299                         U_ASSERT(outArray[GENDER_INDEX].isBogus());
1300                         outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1301                     }
1302                     break;
1303                 case u'1':
1304                     if (singleUnitIndex == productUnit.singleUnits.length() - 1) {
1305                         U_ASSERT(outArray[GENDER_INDEX].isBogus());
1306                         outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1307                     }
1308                 }
1309             } else {
1310                 if (outArray[GENDER_INDEX].isBogus()) {
1311                     outArray[GENDER_INDEX] = timesGenderRule;
1312                 }
1313             }
1314         }
1315 
1316         // Calculate resulting patterns for each plural form
1317         for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1318             StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex);
1319 
1320             // singleUnitArray[pluralIndex] looks something like "{0} Meter"
1321             if (outArray[pluralIndex].isBogus()) {
1322                 if (singleUnitArray[pluralIndex].isBogus()) {
1323                     // Let the usual plural fallback mechanism take care of this
1324                     // plural form
1325                     continue;
1326                 } else {
1327                     // Since our singleUnit can have a plural form that outArray
1328                     // doesn't yet have (relying on fallback to OTHER), we start
1329                     // by grabbing it with the normal plural fallback mechanism
1330                     outArray[pluralIndex] = getWithPlural(outArray, plural, status);
1331                     if (U_FAILURE(status)) {
1332                         return;
1333                     }
1334                 }
1335             }
1336 
1337             if (uprv_strcmp(singlePluralCategory, "") != 0) {
1338                 plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status));
1339             }
1340 
1341             // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern.
1342             UnicodeString coreUnit;
1343             PlaceholderPosition placeholderPosition;
1344             char16_t joinerChar;
1345             extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit,
1346                                placeholderPosition, joinerChar);
1347 
1348             // 4.7 If the position is middle, then fail
1349             if (placeholderPosition == PH_MIDDLE) {
1350                 status = U_UNSUPPORTED_ERROR;
1351                 return;
1352             }
1353 
1354             // 4.8. If globalPlaceholder is empty
1355             if (globalPlaceholder[pluralIndex] == PH_EMPTY) {
1356                 globalPlaceholder[pluralIndex] = placeholderPosition;
1357                 globalJoinerChar = joinerChar;
1358             } else {
1359                 // Expect all units involved to have the same placeholder position
1360                 U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition);
1361                 // TODO(icu-units#28): Do we want to add a unit test that checks
1362                 // for consistent joiner chars? Probably not, given how
1363                 // inconsistent they are. File a CLDR ticket with examples?
1364             }
1365             // Now coreUnit would be just "Meter"
1366 
1367             // 4.9. If siPrefixPattern is not empty
1368             if (prefix != UMEASURE_PREFIX_ONE) {
1369                 SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status);
1370                 if (U_FAILURE(status)) {
1371                     return;
1372                 }
1373 
1374                 // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern,
1375                 //        coreUnit)
1376                 UnicodeString tmp;
1377                 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1378                 //
1379                 // TODO(icu-units#28): run this only if prefixPattern does not
1380                 // contain space characters - do languages "as", "bn", "hi",
1381                 // "kk", etc have concepts of upper and lower case?:
1382                 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1383                     coreUnit.toLower(loc);
1384                 }
1385                 prefixCompiled.format(coreUnit, tmp, status);
1386                 if (U_FAILURE(status)) {
1387                     return;
1388                 }
1389                 coreUnit = tmp;
1390             }
1391 
1392             // 4.10. If dimensionalityPrefixPattern is not empty
1393             if (dimensionality != 1) {
1394                 SimpleFormatter dimensionalityCompiled(
1395                     getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status);
1396                 if (U_FAILURE(status)) {
1397                     return;
1398                 }
1399 
1400                 // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length,
1401                 //         dimensionalityPrefixPattern, coreUnit)
1402                 UnicodeString tmp;
1403                 // combineLowercasing(locale, length, prefixPattern, coreUnit)
1404                 //
1405                 // TODO(icu-units#28): run this only if prefixPattern does not
1406                 // contain space characters - do languages "as", "bn", "hi",
1407                 // "kk", etc have concepts of upper and lower case?:
1408                 if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1409                     coreUnit.toLower(loc);
1410                 }
1411                 dimensionalityCompiled.format(coreUnit, tmp, status);
1412                 if (U_FAILURE(status)) {
1413                     return;
1414                 }
1415                 coreUnit = tmp;
1416             }
1417 
1418             if (outArray[pluralIndex].length() == 0) {
1419                 // 4.11. If the result is empty, set result to be coreUnit
1420                 outArray[pluralIndex] = coreUnit;
1421             } else {
1422                 // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit)
1423                 UnicodeString tmp;
1424                 timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status);
1425                 outArray[pluralIndex] = tmp;
1426             }
1427         }
1428     }
1429     for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1430         if (globalPlaceholder[pluralIndex] == PH_BEGINNING) {
1431             UnicodeString tmp;
1432             tmp.append(u"{0}", 3);
1433             if (globalJoinerChar != 0) {
1434                 tmp.append(globalJoinerChar);
1435             }
1436             tmp.append(outArray[pluralIndex]);
1437             outArray[pluralIndex] = tmp;
1438         } else if (globalPlaceholder[pluralIndex] == PH_END) {
1439             if (globalJoinerChar != 0) {
1440                 outArray[pluralIndex].append(globalJoinerChar);
1441             }
1442             outArray[pluralIndex].append(u"{0}", 3);
1443         }
1444     }
1445 }
1446 
getUnitDisplayName(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,UErrorCode & status)1447 UnicodeString LongNameHandler::getUnitDisplayName(
1448         const Locale& loc,
1449         const MeasureUnit& unit,
1450         UNumberUnitWidth width,
1451         UErrorCode& status) {
1452     if (U_FAILURE(status)) {
1453         return ICU_Utility::makeBogusString();
1454     }
1455     UnicodeString simpleFormats[ARRAY_LENGTH];
1456     getMeasureData(loc, unit, width, "", simpleFormats, status);
1457     return simpleFormats[DNAM_INDEX];
1458 }
1459 
getUnitPattern(const Locale & loc,const MeasureUnit & unit,UNumberUnitWidth width,StandardPlural::Form pluralForm,UErrorCode & status)1460 UnicodeString LongNameHandler::getUnitPattern(
1461         const Locale& loc,
1462         const MeasureUnit& unit,
1463         UNumberUnitWidth width,
1464         StandardPlural::Form pluralForm,
1465         UErrorCode& status) {
1466     if (U_FAILURE(status)) {
1467         return ICU_Utility::makeBogusString();
1468     }
1469     UnicodeString simpleFormats[ARRAY_LENGTH];
1470     getMeasureData(loc, unit, width, "", simpleFormats, status);
1471     // The above already handles fallback from other widths to short
1472     if (U_FAILURE(status)) {
1473         return ICU_Utility::makeBogusString();
1474     }
1475     // Now handle fallback from other plural forms to OTHER
1476     return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]:
1477             simpleFormats[StandardPlural::Form::OTHER];
1478 }
1479 
forCurrencyLongNames(const Locale & loc,const CurrencyUnit & currency,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1480 LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit &currency,
1481                                                       const PluralRules *rules,
1482                                                       const MicroPropsGenerator *parent,
1483                                                       UErrorCode &status) {
1484     auto* result = new LongNameHandler(rules, parent);
1485     if (result == nullptr) {
1486         status = U_MEMORY_ALLOCATION_ERROR;
1487         return nullptr;
1488     }
1489     UnicodeString simpleFormats[ARRAY_LENGTH];
1490     getCurrencyLongNameData(loc, currency, simpleFormats, status);
1491     if (U_FAILURE(status)) { return nullptr; }
1492     result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
1493     // TODO(icu-units#28): currency gender?
1494     return result;
1495 }
1496 
simpleFormatsToModifiers(const UnicodeString * simpleFormats,Field field,UErrorCode & status)1497 void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
1498                                                UErrorCode &status) {
1499     for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1500         StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1501         UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status);
1502         if (U_FAILURE(status)) { return; }
1503         SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1504         if (U_FAILURE(status)) { return; }
1505         fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural});
1506     }
1507 }
1508 
multiSimpleFormatsToModifiers(const UnicodeString * leadFormats,UnicodeString trailFormat,Field field,UErrorCode & status)1509 void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat,
1510                                                     Field field, UErrorCode &status) {
1511     SimpleFormatter trailCompiled(trailFormat, 1, 1, status);
1512     if (U_FAILURE(status)) { return; }
1513     for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1514         StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1515         UnicodeString leadFormat = getWithPlural(leadFormats, plural, status);
1516         if (U_FAILURE(status)) { return; }
1517         UnicodeString compoundFormat;
1518         if (leadFormat.length() == 0) {
1519             compoundFormat = trailFormat;
1520         } else {
1521             trailCompiled.format(leadFormat, compoundFormat, status);
1522             if (U_FAILURE(status)) { return; }
1523         }
1524         SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status);
1525         if (U_FAILURE(status)) { return; }
1526         fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural});
1527     }
1528 }
1529 
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1530 void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1531                                       UErrorCode &status) const {
1532     if (parent != nullptr) {
1533         parent->processQuantity(quantity, micros, status);
1534     }
1535     StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
1536     micros.modOuter = &fModifiers[pluralForm];
1537     micros.gender = gender;
1538 }
1539 
getModifier(Signum,StandardPlural::Form plural) const1540 const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
1541     return &fModifiers[plural];
1542 }
1543 
forMeasureUnit(const Locale & loc,const MeasureUnit & mixedUnit,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,MixedUnitLongNameHandler * fillIn,UErrorCode & status)1544 void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
1545                                               const MeasureUnit &mixedUnit,
1546                                               const UNumberUnitWidth &width,
1547                                               const char *unitDisplayCase,
1548                                               const PluralRules *rules,
1549                                               const MicroPropsGenerator *parent,
1550                                               MixedUnitLongNameHandler *fillIn,
1551                                               UErrorCode &status) {
1552     U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED);
1553     U_ASSERT(fillIn != nullptr);
1554     if (U_FAILURE(status)) {
1555         return;
1556     }
1557 
1558     MeasureUnitImpl temp;
1559     const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
1560     // Defensive, for production code:
1561     if (impl.complexity != UMEASURE_UNIT_MIXED) {
1562         // Should be using the normal LongNameHandler
1563         status = U_UNSUPPORTED_ERROR;
1564         return;
1565     }
1566 
1567     fillIn->fMixedUnitCount = impl.singleUnits.length();
1568     fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
1569     for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
1570         // Grab data for each of the components.
1571         UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
1572         // TODO(CLDR-14582): check from the CLDR-14582 ticket whether this
1573         // propagation of unitDisplayCase is correct:
1574         getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
1575                        status);
1576         // TODO(ICU-21494): if we add support for gender for mixed units, we may
1577         // need maybeCalculateGender() here.
1578     }
1579 
1580     // TODO(icu-units#120): Make sure ICU doesn't output zero-valued
1581     // high-magnitude fields
1582     // * for mixed units count N, produce N listFormatters, one for each subset
1583     //   that might be formatted.
1584     UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
1585     if (width == UNUM_UNIT_WIDTH_NARROW) {
1586         listWidth = ULISTFMT_WIDTH_NARROW;
1587     } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1588         // This might be the same as SHORT in most languages:
1589         listWidth = ULISTFMT_WIDTH_WIDE;
1590     }
1591     fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
1592         ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
1593     // TODO(ICU-21494): grab gender of each unit, calculate the gender
1594     // associated with this list formatter, save it for later.
1595     fillIn->rules = rules;
1596     fillIn->parent = parent;
1597 
1598     // We need a localised NumberFormatter for the numbers of the bigger units
1599     // (providing Arabic numerals, for example).
1600     fillIn->fNumberFormatter = NumberFormatter::withLocale(loc);
1601 }
1602 
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1603 void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1604                                                UErrorCode &status) const {
1605     U_ASSERT(fMixedUnitCount > 1);
1606     if (parent != nullptr) {
1607         parent->processQuantity(quantity, micros, status);
1608     }
1609     micros.modOuter = getMixedUnitModifier(quantity, micros, status);
1610 }
1611 
getMixedUnitModifier(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1612 const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity,
1613                                                                MicroProps &micros,
1614                                                                UErrorCode &status) const {
1615     if (micros.mixedMeasuresCount == 0) {
1616         U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value
1617         status = U_UNSUPPORTED_ERROR;
1618         return &micros.helpers.emptyWeakModifier;
1619     }
1620 
1621     // Algorithm:
1622     //
1623     // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should
1624     // find "3 yard" and "1 foot" in micros.mixedMeasures.
1625     //
1626     // Obtain long-names with plural forms corresponding to measure values:
1627     //   * {0} yards, {0} foot, {0} inches
1628     //
1629     // Format the integer values appropriately and modify with the format
1630     // strings:
1631     //   - 3 yards, 1 foot
1632     //
1633     // Use ListFormatter to combine, with one placeholder:
1634     //   - 3 yards, 1 foot and {0} inches
1635     //
1636     // Return a SimpleModifier for this pattern, letting the rest of the
1637     // pipeline take care of the remaining inches.
1638 
1639     LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status);
1640     if (U_FAILURE(status)) {
1641         return &micros.helpers.emptyWeakModifier;
1642     }
1643 
1644     StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER;
1645     for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) {
1646         DecimalQuantity fdec;
1647 
1648         // If numbers are negative, only the first number needs to have its
1649         // negative sign formatted.
1650         int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i];
1651 
1652         if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity`
1653             // If quantity is not the first value and quantity is negative
1654             if (micros.indexOfQuantity > 0 && quantity.isNegative()) {
1655                 quantity.negate();
1656             }
1657 
1658             StandardPlural::Form quantityPlural =
1659                 utils::getPluralSafe(micros.rounder, rules, quantity, status);
1660             UnicodeString quantityFormatWithPlural =
1661                 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status);
1662             SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status);
1663             quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status);
1664         } else {
1665             fdec.setToLong(number);
1666             StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec);
1667             UnicodeString simpleFormat =
1668                 getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status);
1669             SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1670             UnicodeString num;
1671             auto appendable = UnicodeStringAppendable(num);
1672 
1673             fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status);
1674             compiledFormatter.format(num, outputMeasuresList[i], status);
1675         }
1676     }
1677 
1678     // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
1679     // can set micros.gender to the gender associated with the list formatter in
1680     // use below (once we have correct support for that). And then document this
1681     // appropriately? "getMixedUnitModifier" doesn't sound like it would do
1682     // something like this.
1683 
1684     // Combine list into a "premixed" pattern
1685     UnicodeString premixedFormatPattern;
1686     fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern,
1687                            status);
1688     SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status);
1689     if (U_FAILURE(status)) {
1690         return &micros.helpers.emptyWeakModifier;
1691     }
1692 
1693     micros.helpers.mixedUnitModifier =
1694         SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural});
1695     return &micros.helpers.mixedUnitModifier;
1696 }
1697 
getModifier(Signum,StandardPlural::Form) const1698 const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
1699                                                       StandardPlural::Form /*plural*/) const {
1700     // TODO(icu-units#28): investigate this method when investigating where
1701     // ModifierStore::getModifier() gets used. To be sure it remains
1702     // unreachable:
1703     UPRV_UNREACHABLE_EXIT;
1704     return nullptr;
1705 }
1706 
forMeasureUnits(const Locale & loc,const MaybeStackVector<MeasureUnit> & units,const UNumberUnitWidth & width,const char * unitDisplayCase,const PluralRules * rules,const MicroPropsGenerator * parent,UErrorCode & status)1707 LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
1708                                                           const MaybeStackVector<MeasureUnit> &units,
1709                                                           const UNumberUnitWidth &width,
1710                                                           const char *unitDisplayCase,
1711                                                           const PluralRules *rules,
1712                                                           const MicroPropsGenerator *parent,
1713                                                           UErrorCode &status) {
1714     LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
1715     if (U_FAILURE(status)) {
1716         return nullptr;
1717     }
1718     U_ASSERT(units.length() > 0);
1719     if (result->fHandlers.resize(units.length()) == nullptr) {
1720         status = U_MEMORY_ALLOCATION_ERROR;
1721         return nullptr;
1722     }
1723     result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
1724     for (int32_t i = 0, length = units.length(); i < length; i++) {
1725         const MeasureUnit &unit = *units[i];
1726         result->fMeasureUnits[i] = unit;
1727         if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
1728             MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
1729             MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr,
1730                                                      mlnh, status);
1731             result->fHandlers[i] = mlnh;
1732         } else {
1733             LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
1734             LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, nullptr, lnh, status);
1735             result->fHandlers[i] = lnh;
1736         }
1737         if (U_FAILURE(status)) {
1738             return nullptr;
1739         }
1740     }
1741     return result.orphan();
1742 }
1743 
processQuantity(DecimalQuantity & quantity,MicroProps & micros,UErrorCode & status) const1744 void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1745                                           UErrorCode &status) const {
1746     // We call parent->processQuantity() from the Multiplexer, instead of
1747     // letting LongNameHandler handle it: we don't know which LongNameHandler to
1748     // call until we've called the parent!
1749     fParent->processQuantity(quantity, micros, status);
1750 
1751     // Call the correct LongNameHandler based on outputUnit
1752     for (int i = 0; i < fHandlers.getCapacity(); i++) {
1753         if (fMeasureUnits[i] == micros.outputUnit) {
1754             fHandlers[i]->processQuantity(quantity, micros, status);
1755             return;
1756         }
1757     }
1758     if (U_FAILURE(status)) {
1759         return;
1760     }
1761     // We shouldn't receive any outputUnit for which we haven't already got a
1762     // LongNameHandler:
1763     status = U_INTERNAL_PROGRAM_ERROR;
1764 }
1765 
1766 #endif /* #if !UCONFIG_NO_FORMATTING */
1767