xref: /aosp_15_r20/external/cronet/third_party/icu/source/i18n/units_data.cpp (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #include "bytesinkutil.h"
9 #include "cstring.h"
10 #include "measunit_impl.h"
11 #include "number_decimalquantity.h"
12 #include "resource.h"
13 #include "uassert.h"
14 #include "ulocimp.h"
15 #include "unicode/locid.h"
16 #include "unicode/unistr.h"
17 #include "unicode/ures.h"
18 #include "units_data.h"
19 #include "uresimp.h"
20 #include "util.h"
21 #include <utility>
22 
23 U_NAMESPACE_BEGIN
24 namespace units {
25 
26 namespace {
27 
28 using icu::number::impl::DecimalQuantity;
29 
trimSpaces(CharString & factor,UErrorCode & status)30 void trimSpaces(CharString& factor, UErrorCode& status){
31    CharString trimmed;
32    for (int i = 0 ; i < factor.length(); i++) {
33        if (factor[i] == ' ') continue;
34 
35        trimmed.append(factor[i], status);
36    }
37 
38    factor = std::move(trimmed);
39 }
40 
41 /**
42  * A ResourceSink that collects conversion rate information.
43  *
44  * This class is for use by ures_getAllItemsWithFallback.
45  */
46 class ConversionRateDataSink : public ResourceSink {
47   public:
48     /**
49      * Constructor.
50      * @param out The vector to which ConversionRateInfo instances are to be
51      * added. This vector must outlive the use of the ResourceSink.
52      */
ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> * out)53     explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
54 
55     /**
56      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
57      * conversion rates that are found in `value` to the output vector.
58      *
59      * @param source This string must be "convertUnits": the resource that this
60      * class supports reading.
61      * @param value The "convertUnits" resource, containing unit conversion rate
62      * information.
63      * @param noFallback Ignored.
64      * @param status The standard ICU error code output parameter.
65      */
put(const char * source,ResourceValue & value,UBool,UErrorCode & status)66     void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
67         if (U_FAILURE(status)) { return; }
68         if (uprv_strcmp(source, "convertUnits") != 0) {
69             // This is very strict, however it is the cheapest way to be sure
70             // that with `value`, we're looking at the convertUnits table.
71             status = U_ILLEGAL_ARGUMENT_ERROR;
72             return;
73         }
74         ResourceTable conversionRateTable = value.getTable(status);
75         const char *srcUnit;
76         // We're reusing `value`, which seems to be a common pattern:
77         for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
78             ResourceTable unitTable = value.getTable(status);
79             const char *key;
80             UnicodeString baseUnit = ICU_Utility::makeBogusString();
81             UnicodeString factor = ICU_Utility::makeBogusString();
82             UnicodeString offset = ICU_Utility::makeBogusString();
83             UnicodeString systems = ICU_Utility::makeBogusString();
84             for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
85                 if (uprv_strcmp(key, "target") == 0) {
86                     baseUnit = value.getUnicodeString(status);
87                 } else if (uprv_strcmp(key, "factor") == 0) {
88                     factor = value.getUnicodeString(status);
89                 } else if (uprv_strcmp(key, "offset") == 0) {
90                     offset = value.getUnicodeString(status);
91                 } else if (uprv_strcmp(key, "systems") == 0) {
92                     systems = value.getUnicodeString(status);
93                 }
94             }
95             if (U_FAILURE(status)) { return; }
96             if (baseUnit.isBogus() || factor.isBogus()) {
97                 // We could not find a usable conversion rate: bad resource.
98                 status = U_MISSING_RESOURCE_ERROR;
99                 return;
100             }
101 
102             // We don't have this ConversionRateInfo yet: add it.
103             ConversionRateInfo *cr = outVector->emplaceBack();
104             if (!cr) {
105                 status = U_MEMORY_ALLOCATION_ERROR;
106                 return;
107             } else {
108                 cr->sourceUnit.append(srcUnit, status);
109                 cr->baseUnit.appendInvariantChars(baseUnit, status);
110                 cr->factor.appendInvariantChars(factor, status);
111                 cr->systems.appendInvariantChars(systems, status);
112                 trimSpaces(cr->factor, status);
113                 if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
114             }
115         }
116         return;
117     }
118 
119   private:
120     MaybeStackVector<ConversionRateInfo> *outVector;
121 };
122 
operator <(const UnitPreferenceMetadata & a,const UnitPreferenceMetadata & b)123 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
124     return a.compareTo(b) < 0;
125 }
126 
127 /**
128  * A ResourceSink that collects unit preferences information.
129  *
130  * This class is for use by ures_getAllItemsWithFallback.
131  */
132 class UnitPreferencesSink : public ResourceSink {
133   public:
134     /**
135      * Constructor.
136      * @param outPrefs The vector to which UnitPreference instances are to be
137      * added. This vector must outlive the use of the ResourceSink.
138      * @param outMetadata  The vector to which UnitPreferenceMetadata instances
139      * are to be added. This vector must outlive the use of the ResourceSink.
140      */
UnitPreferencesSink(MaybeStackVector<UnitPreference> * outPrefs,MaybeStackVector<UnitPreferenceMetadata> * outMetadata)141     explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
142                                  MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
143         : preferences(outPrefs), metadata(outMetadata) {}
144 
145     /**
146      * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
147      * preferences info that are found in `value` to the output vector.
148      *
149      * @param source This string must be "unitPreferenceData": the resource that
150      * this class supports reading.
151      * @param value The "unitPreferenceData" resource, containing unit
152      * preferences data.
153      * @param noFallback Ignored.
154      * @param status The standard ICU error code output parameter. Note: if an
155      * error is returned, outPrefs and outMetadata may be inconsistent.
156      */
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)157     void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
158         if (U_FAILURE(status)) { return; }
159         if (uprv_strcmp(key, "unitPreferenceData") != 0) {
160             // This is very strict, however it is the cheapest way to be sure
161             // that with `value`, we're looking at the convertUnits table.
162             status = U_ILLEGAL_ARGUMENT_ERROR;
163             return;
164         }
165         // The unitPreferenceData structure (see data/misc/units.txt) contains a
166         // hierarchy of category/usage/region, within which are a set of
167         // preferences. Hence three for-loops and another loop for the
168         // preferences themselves:
169         ResourceTable unitPreferenceDataTable = value.getTable(status);
170         const char *category;
171         for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
172             ResourceTable categoryTable = value.getTable(status);
173             const char *usage;
174             for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
175                 ResourceTable regionTable = value.getTable(status);
176                 const char *region;
177                 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
178                     // `value` now contains the set of preferences for
179                     // category/usage/region.
180                     ResourceArray unitPrefs = value.getArray(status);
181                     if (U_FAILURE(status)) { return; }
182                     int32_t prefLen = unitPrefs.getSize();
183 
184                     // Update metadata for this set of preferences.
185                     UnitPreferenceMetadata *meta = metadata->emplaceBack(
186                         category, usage, region, preferences->length(), prefLen, status);
187                     if (!meta) {
188                         status = U_MEMORY_ALLOCATION_ERROR;
189                         return;
190                     }
191                     if (U_FAILURE(status)) { return; }
192                     if (metadata->length() > 1) {
193                         // Verify that unit preferences are sorted and
194                         // without duplicates.
195                         if (!(*(*metadata)[metadata->length() - 2] <
196                               *(*metadata)[metadata->length() - 1])) {
197                             status = U_INVALID_FORMAT_ERROR;
198                             return;
199                         }
200                     }
201 
202                     // Collect the individual preferences.
203                     for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
204                         UnitPreference *up = preferences->emplaceBack();
205                         if (!up) {
206                             status = U_MEMORY_ALLOCATION_ERROR;
207                             return;
208                         }
209                         ResourceTable unitPref = value.getTable(status);
210                         if (U_FAILURE(status)) { return; }
211                         for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
212                             if (uprv_strcmp(key, "unit") == 0) {
213                                 int32_t length;
214                                 const char16_t *u = value.getString(length, status);
215                                 up->unit.appendInvariantChars(u, length, status);
216                             } else if (uprv_strcmp(key, "geq") == 0) {
217                                 int32_t length;
218                                 const char16_t *g = value.getString(length, status);
219                                 CharString geq;
220                                 geq.appendInvariantChars(g, length, status);
221                                 DecimalQuantity dq;
222                                 dq.setToDecNumber(geq.data(), status);
223                                 up->geq = dq.toDouble();
224                             } else if (uprv_strcmp(key, "skeleton") == 0) {
225                                 up->skeleton = value.getUnicodeString(status);
226                             }
227                         }
228                     }
229                 }
230             }
231         }
232     }
233 
234   private:
235     MaybeStackVector<UnitPreference> *preferences;
236     MaybeStackVector<UnitPreferenceMetadata> *metadata;
237 };
238 
binarySearch(const MaybeStackVector<UnitPreferenceMetadata> * metadata,const UnitPreferenceMetadata & desired,bool * foundCategory,bool * foundUsage,bool * foundRegion,UErrorCode & status)239 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
240                      const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
241                      bool *foundRegion, UErrorCode &status) {
242     if (U_FAILURE(status)) { return -1; }
243     int32_t start = 0;
244     int32_t end = metadata->length();
245     *foundCategory = false;
246     *foundUsage = false;
247     *foundRegion = false;
248     while (start < end) {
249         int32_t mid = (start + end) / 2;
250         int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
251         if (cmp < 0) {
252             start = mid + 1;
253         } else if (cmp > 0) {
254             end = mid;
255         } else {
256             return mid;
257         }
258     }
259     return -1;
260 }
261 
262 /**
263  * Finds the UnitPreferenceMetadata instance that matches the given category,
264  * usage and region: if missing, region falls back to "001", and usage
265  * repeatedly drops tailing components, eventually trying "default"
266  * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
267  *
268  * @param metadata The full list of UnitPreferenceMetadata instances.
269  * @param category The category to search for. See getUnitCategory().
270  * @param usage The usage for which formatting preferences is needed. If the
271  * given usage is not known, automatic fallback occurs, see function description
272  * above.
273  * @param region The region for which preferences are needed. If there are no
274  * region-specific preferences, this function automatically falls back to the
275  * "001" region (global).
276  * @param status The standard ICU error code output parameter.
277  *   * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
278  *   * If fallback to "default" or "001" didn't resolve, status will be
279  *     U_MISSING_RESOURCE.
280  * @return The index into the metadata vector which represents the appropriate
281  * preferences. If appropriate preferences are not found, -1 is returned.
282  */
getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> * metadata,StringPiece category,StringPiece usage,StringPiece region,UErrorCode & status)283 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
284                                    StringPiece category, StringPiece usage, StringPiece region,
285                                    UErrorCode &status) {
286     if (U_FAILURE(status)) { return -1; }
287     bool foundCategory, foundUsage, foundRegion;
288     UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
289     int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
290     if (U_FAILURE(status)) { return -1; }
291     if (idx >= 0) { return idx; }
292     if (!foundCategory) {
293         // TODO: failures can happen if units::getUnitCategory returns a category
294         // that does not appear in unitPreferenceData. Do we want a unit test that
295         // checks unitPreferenceData has full coverage of categories? Or just trust
296         // CLDR?
297         status = U_ILLEGAL_ARGUMENT_ERROR;
298         return -1;
299     }
300     U_ASSERT(foundCategory);
301     while (!foundUsage) {
302         int32_t lastDashIdx = desired.usage.lastIndexOf('-');
303         if (lastDashIdx > 0) {
304             desired.usage.truncate(lastDashIdx);
305         } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
306             desired.usage.truncate(0).append("default", status);
307         } else {
308             // "default" is not supposed to be missing for any valid category.
309             status = U_MISSING_RESOURCE_ERROR;
310             return -1;
311         }
312         idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
313         if (U_FAILURE(status)) { return -1; }
314     }
315     U_ASSERT(foundCategory);
316     U_ASSERT(foundUsage);
317     if (!foundRegion) {
318         if (uprv_strcmp(desired.region.data(), "001") != 0) {
319             desired.region.truncate(0).append("001", status);
320             idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
321         }
322         if (!foundRegion) {
323             // "001" is not supposed to be missing for any valid usage.
324             status = U_MISSING_RESOURCE_ERROR;
325             return -1;
326         }
327     }
328     U_ASSERT(foundCategory);
329     U_ASSERT(foundUsage);
330     U_ASSERT(foundRegion);
331     U_ASSERT(idx >= 0);
332     return idx;
333 }
334 
335 } // namespace
336 
UnitPreferenceMetadata(StringPiece category,StringPiece usage,StringPiece region,int32_t prefsOffset,int32_t prefsCount,UErrorCode & status)337 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
338                                                StringPiece region, int32_t prefsOffset,
339                                                int32_t prefsCount, UErrorCode &status) {
340     this->category.append(category, status);
341     this->usage.append(usage, status);
342     this->region.append(region, status);
343     this->prefsOffset = prefsOffset;
344     this->prefsCount = prefsCount;
345 }
346 
compareTo(const UnitPreferenceMetadata & other) const347 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
348     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
349     if (cmp == 0) {
350         cmp = uprv_strcmp(usage.data(), other.usage.data());
351     }
352     if (cmp == 0) {
353         cmp = uprv_strcmp(region.data(), other.region.data());
354     }
355     return cmp;
356 }
357 
compareTo(const UnitPreferenceMetadata & other,bool * foundCategory,bool * foundUsage,bool * foundRegion) const358 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
359                                           bool *foundUsage, bool *foundRegion) const {
360     int32_t cmp = uprv_strcmp(category.data(), other.category.data());
361     if (cmp == 0) {
362         *foundCategory = true;
363         cmp = uprv_strcmp(usage.data(), other.usage.data());
364     }
365     if (cmp == 0) {
366         *foundUsage = true;
367         cmp = uprv_strcmp(region.data(), other.region.data());
368     }
369     if (cmp == 0) {
370         *foundRegion = true;
371     }
372     return cmp;
373 }
374 
375 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
getAllConversionRates(MaybeStackVector<ConversionRateInfo> & result,UErrorCode & status)376 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
377     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
378     ConversionRateDataSink sink(&result);
379     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
380 }
381 
extractConversionInfo(StringPiece source,UErrorCode & status) const382 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
383                                                                  UErrorCode &status) const {
384     for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
385         if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
386     }
387 
388     status = U_INTERNAL_PROGRAM_ERROR;
389     return nullptr;
390 }
391 
UnitPreferences(UErrorCode & status)392 U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
393     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
394     UnitPreferencesSink sink(&unitPrefs_, &metadata_);
395     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
396 }
397 
getKeyWordValue(const Locale & locale,StringPiece kw,UErrorCode & status)398 CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) {
399     CharString result;
400     if (U_FAILURE(status)) { return result; }
401     {
402         CharStringByteSink sink(&result);
403         locale.getKeywordValue(kw, sink, status);
404     }
405     if (U_SUCCESS(status) && result.isEmpty()) {
406         status = U_MISSING_RESOURCE_ERROR;
407     }
408     return result;
409 }
410 
411 MaybeStackVector<UnitPreference>
getPreferencesFor(StringPiece category,StringPiece usage,const Locale & locale,UErrorCode & status) const412     U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
413                                                   const Locale &locale, UErrorCode &status) const {
414 
415     MaybeStackVector<UnitPreference> result;
416 
417     // TODO: remove this once all the categories are allowed.
418     // WARNING: when this is removed please make sure to keep the "fahrenhe" => "fahrenheit" mapping
419     UErrorCode internalMuStatus = U_ZERO_ERROR;
420     if (category.compare("temperature") == 0) {
421         CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus);
422         if (U_SUCCESS(internalMuStatus)) {
423             // The value for -u-mu- is `fahrenhe`, but CLDR and everything else uses `fahrenheit`
424             if (localeUnitCharString == "fahrenhe") {
425                 localeUnitCharString = CharString("fahrenheit", status);
426             }
427             // TODO: use the unit category as Java especially when all the categories are allowed..
428             if (localeUnitCharString == "celsius"
429                 || localeUnitCharString == "fahrenheit"
430                 || localeUnitCharString == "kelvin"
431             ) {
432                 UnitPreference unitPref;
433                 unitPref.unit.append(localeUnitCharString, status);
434                 result.emplaceBackAndCheckErrorCode(status, unitPref);
435                 return result;
436             }
437         }
438     }
439 
440     char regionBuf[8];
441     ulocimp_getRegionForSupplementalData(locale.getName(), false, regionBuf, 8, &status);
442     CharString region(regionBuf, status);
443 
444     // Check the locale system tag, e.g `ms=metric`.
445     UErrorCode internalMeasureTagStatus = U_ZERO_ERROR;
446     CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus);
447     bool isLocaleSystem = false;
448     if (U_SUCCESS(internalMeasureTagStatus) && (localeSystem == "metric" || localeSystem == "ussystem" || localeSystem == "uksystem")) {
449         isLocaleSystem = true;
450     }
451 
452     int32_t idx =
453         getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
454     if (U_FAILURE(status)) {
455         return result;
456     }
457 
458     U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
459     const UnitPreferenceMetadata *m = metadata_[idx];
460 
461     if (isLocaleSystem) {
462         // if the locale ID specifies a measurment system, check if ALL of the units we got back
463         // are members of that system (or are "metric_adjacent", which we consider to match all
464         // the systems)
465         bool unitsMatchSystem = true;
466         ConversionRates rates(status);
467         for (int32_t i = 0; unitsMatchSystem && i < m->prefsCount; i++) {
468             const UnitPreference& unitPref = *(unitPrefs_[i + m->prefsOffset]);
469             MeasureUnitImpl measureUnit = MeasureUnitImpl::forIdentifier(unitPref.unit.data(), status);
470             for (int32_t j = 0; unitsMatchSystem && j < measureUnit.singleUnits.length(); j++) {
471                 const SingleUnitImpl* singleUnit = measureUnit.singleUnits[j];
472                 const ConversionRateInfo* rateInfo = rates.extractConversionInfo(singleUnit->getSimpleUnitID(), status);
473                 CharString systems(rateInfo->systems, status);
474                 if (!systems.contains("metric_adjacent")) { // "metric-adjacent" is considered to match all the locale systems
475                     if (!systems.contains(localeSystem.data())) {
476                         unitsMatchSystem = false;
477                     }
478                 }
479             }
480         }
481 
482         // if any of the units we got back above don't match the mearurement system the locale ID asked for,
483         // throw out the region and just load the units for the base region for the requested measurement system
484         if (!unitsMatchSystem) {
485             region.clear();
486             if (localeSystem == "ussystem") {
487                 region.append("US", status);
488             } else if (localeSystem == "uksystem") {
489                 region.append("GB", status);
490             } else {
491                 region.append("001", status);
492             }
493             idx = getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
494             if (U_FAILURE(status)) {
495                 return result;
496             }
497 
498             m = metadata_[idx];
499         }
500     }
501 
502     for (int32_t i = 0; i < m->prefsCount; i++) {
503         result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset]));
504     }
505     return result;
506 }
507 
508 } // namespace units
509 U_NAMESPACE_END
510 
511 #endif /* #if !UCONFIG_NO_FORMATTING */
512