1 // © 2020 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #include "bytesinkutil.h"
9 #include "cstring.h"
10 #include "measunit_impl.h"
11 #include "number_decimalquantity.h"
12 #include "resource.h"
13 #include "uassert.h"
14 #include "ulocimp.h"
15 #include "unicode/locid.h"
16 #include "unicode/unistr.h"
17 #include "unicode/ures.h"
18 #include "units_data.h"
19 #include "uresimp.h"
20 #include "util.h"
21 #include <utility>
22
23 U_NAMESPACE_BEGIN
24 namespace units {
25
26 namespace {
27
28 using icu::number::impl::DecimalQuantity;
29
trimSpaces(CharString & factor,UErrorCode & status)30 void trimSpaces(CharString& factor, UErrorCode& status){
31 CharString trimmed;
32 for (int i = 0 ; i < factor.length(); i++) {
33 if (factor[i] == ' ') continue;
34
35 trimmed.append(factor[i], status);
36 }
37
38 factor = std::move(trimmed);
39 }
40
41 /**
42 * A ResourceSink that collects conversion rate information.
43 *
44 * This class is for use by ures_getAllItemsWithFallback.
45 */
46 class ConversionRateDataSink : public ResourceSink {
47 public:
48 /**
49 * Constructor.
50 * @param out The vector to which ConversionRateInfo instances are to be
51 * added. This vector must outlive the use of the ResourceSink.
52 */
ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> * out)53 explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
54
55 /**
56 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
57 * conversion rates that are found in `value` to the output vector.
58 *
59 * @param source This string must be "convertUnits": the resource that this
60 * class supports reading.
61 * @param value The "convertUnits" resource, containing unit conversion rate
62 * information.
63 * @param noFallback Ignored.
64 * @param status The standard ICU error code output parameter.
65 */
put(const char * source,ResourceValue & value,UBool,UErrorCode & status)66 void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
67 if (U_FAILURE(status)) { return; }
68 if (uprv_strcmp(source, "convertUnits") != 0) {
69 // This is very strict, however it is the cheapest way to be sure
70 // that with `value`, we're looking at the convertUnits table.
71 status = U_ILLEGAL_ARGUMENT_ERROR;
72 return;
73 }
74 ResourceTable conversionRateTable = value.getTable(status);
75 const char *srcUnit;
76 // We're reusing `value`, which seems to be a common pattern:
77 for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
78 ResourceTable unitTable = value.getTable(status);
79 const char *key;
80 UnicodeString baseUnit = ICU_Utility::makeBogusString();
81 UnicodeString factor = ICU_Utility::makeBogusString();
82 UnicodeString offset = ICU_Utility::makeBogusString();
83 UnicodeString systems = ICU_Utility::makeBogusString();
84 for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
85 if (uprv_strcmp(key, "target") == 0) {
86 baseUnit = value.getUnicodeString(status);
87 } else if (uprv_strcmp(key, "factor") == 0) {
88 factor = value.getUnicodeString(status);
89 } else if (uprv_strcmp(key, "offset") == 0) {
90 offset = value.getUnicodeString(status);
91 } else if (uprv_strcmp(key, "systems") == 0) {
92 systems = value.getUnicodeString(status);
93 }
94 }
95 if (U_FAILURE(status)) { return; }
96 if (baseUnit.isBogus() || factor.isBogus()) {
97 // We could not find a usable conversion rate: bad resource.
98 status = U_MISSING_RESOURCE_ERROR;
99 return;
100 }
101
102 // We don't have this ConversionRateInfo yet: add it.
103 ConversionRateInfo *cr = outVector->emplaceBack();
104 if (!cr) {
105 status = U_MEMORY_ALLOCATION_ERROR;
106 return;
107 } else {
108 cr->sourceUnit.append(srcUnit, status);
109 cr->baseUnit.appendInvariantChars(baseUnit, status);
110 cr->factor.appendInvariantChars(factor, status);
111 cr->systems.appendInvariantChars(systems, status);
112 trimSpaces(cr->factor, status);
113 if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
114 }
115 }
116 return;
117 }
118
119 private:
120 MaybeStackVector<ConversionRateInfo> *outVector;
121 };
122
operator <(const UnitPreferenceMetadata & a,const UnitPreferenceMetadata & b)123 bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
124 return a.compareTo(b) < 0;
125 }
126
127 /**
128 * A ResourceSink that collects unit preferences information.
129 *
130 * This class is for use by ures_getAllItemsWithFallback.
131 */
132 class UnitPreferencesSink : public ResourceSink {
133 public:
134 /**
135 * Constructor.
136 * @param outPrefs The vector to which UnitPreference instances are to be
137 * added. This vector must outlive the use of the ResourceSink.
138 * @param outMetadata The vector to which UnitPreferenceMetadata instances
139 * are to be added. This vector must outlive the use of the ResourceSink.
140 */
UnitPreferencesSink(MaybeStackVector<UnitPreference> * outPrefs,MaybeStackVector<UnitPreferenceMetadata> * outMetadata)141 explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
142 MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
143 : preferences(outPrefs), metadata(outMetadata) {}
144
145 /**
146 * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
147 * preferences info that are found in `value` to the output vector.
148 *
149 * @param source This string must be "unitPreferenceData": the resource that
150 * this class supports reading.
151 * @param value The "unitPreferenceData" resource, containing unit
152 * preferences data.
153 * @param noFallback Ignored.
154 * @param status The standard ICU error code output parameter. Note: if an
155 * error is returned, outPrefs and outMetadata may be inconsistent.
156 */
put(const char * key,ResourceValue & value,UBool,UErrorCode & status)157 void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
158 if (U_FAILURE(status)) { return; }
159 if (uprv_strcmp(key, "unitPreferenceData") != 0) {
160 // This is very strict, however it is the cheapest way to be sure
161 // that with `value`, we're looking at the convertUnits table.
162 status = U_ILLEGAL_ARGUMENT_ERROR;
163 return;
164 }
165 // The unitPreferenceData structure (see data/misc/units.txt) contains a
166 // hierarchy of category/usage/region, within which are a set of
167 // preferences. Hence three for-loops and another loop for the
168 // preferences themselves:
169 ResourceTable unitPreferenceDataTable = value.getTable(status);
170 const char *category;
171 for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
172 ResourceTable categoryTable = value.getTable(status);
173 const char *usage;
174 for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
175 ResourceTable regionTable = value.getTable(status);
176 const char *region;
177 for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
178 // `value` now contains the set of preferences for
179 // category/usage/region.
180 ResourceArray unitPrefs = value.getArray(status);
181 if (U_FAILURE(status)) { return; }
182 int32_t prefLen = unitPrefs.getSize();
183
184 // Update metadata for this set of preferences.
185 UnitPreferenceMetadata *meta = metadata->emplaceBack(
186 category, usage, region, preferences->length(), prefLen, status);
187 if (!meta) {
188 status = U_MEMORY_ALLOCATION_ERROR;
189 return;
190 }
191 if (U_FAILURE(status)) { return; }
192 if (metadata->length() > 1) {
193 // Verify that unit preferences are sorted and
194 // without duplicates.
195 if (!(*(*metadata)[metadata->length() - 2] <
196 *(*metadata)[metadata->length() - 1])) {
197 status = U_INVALID_FORMAT_ERROR;
198 return;
199 }
200 }
201
202 // Collect the individual preferences.
203 for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
204 UnitPreference *up = preferences->emplaceBack();
205 if (!up) {
206 status = U_MEMORY_ALLOCATION_ERROR;
207 return;
208 }
209 ResourceTable unitPref = value.getTable(status);
210 if (U_FAILURE(status)) { return; }
211 for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
212 if (uprv_strcmp(key, "unit") == 0) {
213 int32_t length;
214 const char16_t *u = value.getString(length, status);
215 up->unit.appendInvariantChars(u, length, status);
216 } else if (uprv_strcmp(key, "geq") == 0) {
217 int32_t length;
218 const char16_t *g = value.getString(length, status);
219 CharString geq;
220 geq.appendInvariantChars(g, length, status);
221 DecimalQuantity dq;
222 dq.setToDecNumber(geq.data(), status);
223 up->geq = dq.toDouble();
224 } else if (uprv_strcmp(key, "skeleton") == 0) {
225 up->skeleton = value.getUnicodeString(status);
226 }
227 }
228 }
229 }
230 }
231 }
232 }
233
234 private:
235 MaybeStackVector<UnitPreference> *preferences;
236 MaybeStackVector<UnitPreferenceMetadata> *metadata;
237 };
238
binarySearch(const MaybeStackVector<UnitPreferenceMetadata> * metadata,const UnitPreferenceMetadata & desired,bool * foundCategory,bool * foundUsage,bool * foundRegion,UErrorCode & status)239 int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
240 const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
241 bool *foundRegion, UErrorCode &status) {
242 if (U_FAILURE(status)) { return -1; }
243 int32_t start = 0;
244 int32_t end = metadata->length();
245 *foundCategory = false;
246 *foundUsage = false;
247 *foundRegion = false;
248 while (start < end) {
249 int32_t mid = (start + end) / 2;
250 int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
251 if (cmp < 0) {
252 start = mid + 1;
253 } else if (cmp > 0) {
254 end = mid;
255 } else {
256 return mid;
257 }
258 }
259 return -1;
260 }
261
262 /**
263 * Finds the UnitPreferenceMetadata instance that matches the given category,
264 * usage and region: if missing, region falls back to "001", and usage
265 * repeatedly drops tailing components, eventually trying "default"
266 * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
267 *
268 * @param metadata The full list of UnitPreferenceMetadata instances.
269 * @param category The category to search for. See getUnitCategory().
270 * @param usage The usage for which formatting preferences is needed. If the
271 * given usage is not known, automatic fallback occurs, see function description
272 * above.
273 * @param region The region for which preferences are needed. If there are no
274 * region-specific preferences, this function automatically falls back to the
275 * "001" region (global).
276 * @param status The standard ICU error code output parameter.
277 * * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
278 * * If fallback to "default" or "001" didn't resolve, status will be
279 * U_MISSING_RESOURCE.
280 * @return The index into the metadata vector which represents the appropriate
281 * preferences. If appropriate preferences are not found, -1 is returned.
282 */
getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> * metadata,StringPiece category,StringPiece usage,StringPiece region,UErrorCode & status)283 int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
284 StringPiece category, StringPiece usage, StringPiece region,
285 UErrorCode &status) {
286 if (U_FAILURE(status)) { return -1; }
287 bool foundCategory, foundUsage, foundRegion;
288 UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
289 int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
290 if (U_FAILURE(status)) { return -1; }
291 if (idx >= 0) { return idx; }
292 if (!foundCategory) {
293 // TODO: failures can happen if units::getUnitCategory returns a category
294 // that does not appear in unitPreferenceData. Do we want a unit test that
295 // checks unitPreferenceData has full coverage of categories? Or just trust
296 // CLDR?
297 status = U_ILLEGAL_ARGUMENT_ERROR;
298 return -1;
299 }
300 U_ASSERT(foundCategory);
301 while (!foundUsage) {
302 int32_t lastDashIdx = desired.usage.lastIndexOf('-');
303 if (lastDashIdx > 0) {
304 desired.usage.truncate(lastDashIdx);
305 } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
306 desired.usage.truncate(0).append("default", status);
307 } else {
308 // "default" is not supposed to be missing for any valid category.
309 status = U_MISSING_RESOURCE_ERROR;
310 return -1;
311 }
312 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
313 if (U_FAILURE(status)) { return -1; }
314 }
315 U_ASSERT(foundCategory);
316 U_ASSERT(foundUsage);
317 if (!foundRegion) {
318 if (uprv_strcmp(desired.region.data(), "001") != 0) {
319 desired.region.truncate(0).append("001", status);
320 idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
321 }
322 if (!foundRegion) {
323 // "001" is not supposed to be missing for any valid usage.
324 status = U_MISSING_RESOURCE_ERROR;
325 return -1;
326 }
327 }
328 U_ASSERT(foundCategory);
329 U_ASSERT(foundUsage);
330 U_ASSERT(foundRegion);
331 U_ASSERT(idx >= 0);
332 return idx;
333 }
334
335 } // namespace
336
UnitPreferenceMetadata(StringPiece category,StringPiece usage,StringPiece region,int32_t prefsOffset,int32_t prefsCount,UErrorCode & status)337 UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
338 StringPiece region, int32_t prefsOffset,
339 int32_t prefsCount, UErrorCode &status) {
340 this->category.append(category, status);
341 this->usage.append(usage, status);
342 this->region.append(region, status);
343 this->prefsOffset = prefsOffset;
344 this->prefsCount = prefsCount;
345 }
346
compareTo(const UnitPreferenceMetadata & other) const347 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
348 int32_t cmp = uprv_strcmp(category.data(), other.category.data());
349 if (cmp == 0) {
350 cmp = uprv_strcmp(usage.data(), other.usage.data());
351 }
352 if (cmp == 0) {
353 cmp = uprv_strcmp(region.data(), other.region.data());
354 }
355 return cmp;
356 }
357
compareTo(const UnitPreferenceMetadata & other,bool * foundCategory,bool * foundUsage,bool * foundRegion) const358 int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
359 bool *foundUsage, bool *foundRegion) const {
360 int32_t cmp = uprv_strcmp(category.data(), other.category.data());
361 if (cmp == 0) {
362 *foundCategory = true;
363 cmp = uprv_strcmp(usage.data(), other.usage.data());
364 }
365 if (cmp == 0) {
366 *foundUsage = true;
367 cmp = uprv_strcmp(region.data(), other.region.data());
368 }
369 if (cmp == 0) {
370 *foundRegion = true;
371 }
372 return cmp;
373 }
374
375 // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
getAllConversionRates(MaybeStackVector<ConversionRateInfo> & result,UErrorCode & status)376 void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
377 LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
378 ConversionRateDataSink sink(&result);
379 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
380 }
381
extractConversionInfo(StringPiece source,UErrorCode & status) const382 const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
383 UErrorCode &status) const {
384 for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
385 if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
386 }
387
388 status = U_INTERNAL_PROGRAM_ERROR;
389 return nullptr;
390 }
391
UnitPreferences(UErrorCode & status)392 U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
393 LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
394 UnitPreferencesSink sink(&unitPrefs_, &metadata_);
395 ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
396 }
397
getKeyWordValue(const Locale & locale,StringPiece kw,UErrorCode & status)398 CharString getKeyWordValue(const Locale &locale, StringPiece kw, UErrorCode &status) {
399 CharString result;
400 if (U_FAILURE(status)) { return result; }
401 {
402 CharStringByteSink sink(&result);
403 locale.getKeywordValue(kw, sink, status);
404 }
405 if (U_SUCCESS(status) && result.isEmpty()) {
406 status = U_MISSING_RESOURCE_ERROR;
407 }
408 return result;
409 }
410
411 MaybeStackVector<UnitPreference>
getPreferencesFor(StringPiece category,StringPiece usage,const Locale & locale,UErrorCode & status) const412 U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
413 const Locale &locale, UErrorCode &status) const {
414
415 MaybeStackVector<UnitPreference> result;
416
417 // TODO: remove this once all the categories are allowed.
418 // WARNING: when this is removed please make sure to keep the "fahrenhe" => "fahrenheit" mapping
419 UErrorCode internalMuStatus = U_ZERO_ERROR;
420 if (category.compare("temperature") == 0) {
421 CharString localeUnitCharString = getKeyWordValue(locale, "mu", internalMuStatus);
422 if (U_SUCCESS(internalMuStatus)) {
423 // The value for -u-mu- is `fahrenhe`, but CLDR and everything else uses `fahrenheit`
424 if (localeUnitCharString == "fahrenhe") {
425 localeUnitCharString = CharString("fahrenheit", status);
426 }
427 // TODO: use the unit category as Java especially when all the categories are allowed..
428 if (localeUnitCharString == "celsius"
429 || localeUnitCharString == "fahrenheit"
430 || localeUnitCharString == "kelvin"
431 ) {
432 UnitPreference unitPref;
433 unitPref.unit.append(localeUnitCharString, status);
434 result.emplaceBackAndCheckErrorCode(status, unitPref);
435 return result;
436 }
437 }
438 }
439
440 char regionBuf[8];
441 ulocimp_getRegionForSupplementalData(locale.getName(), false, regionBuf, 8, &status);
442 CharString region(regionBuf, status);
443
444 // Check the locale system tag, e.g `ms=metric`.
445 UErrorCode internalMeasureTagStatus = U_ZERO_ERROR;
446 CharString localeSystem = getKeyWordValue(locale, "measure", internalMeasureTagStatus);
447 bool isLocaleSystem = false;
448 if (U_SUCCESS(internalMeasureTagStatus) && (localeSystem == "metric" || localeSystem == "ussystem" || localeSystem == "uksystem")) {
449 isLocaleSystem = true;
450 }
451
452 int32_t idx =
453 getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
454 if (U_FAILURE(status)) {
455 return result;
456 }
457
458 U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
459 const UnitPreferenceMetadata *m = metadata_[idx];
460
461 if (isLocaleSystem) {
462 // if the locale ID specifies a measurment system, check if ALL of the units we got back
463 // are members of that system (or are "metric_adjacent", which we consider to match all
464 // the systems)
465 bool unitsMatchSystem = true;
466 ConversionRates rates(status);
467 for (int32_t i = 0; unitsMatchSystem && i < m->prefsCount; i++) {
468 const UnitPreference& unitPref = *(unitPrefs_[i + m->prefsOffset]);
469 MeasureUnitImpl measureUnit = MeasureUnitImpl::forIdentifier(unitPref.unit.data(), status);
470 for (int32_t j = 0; unitsMatchSystem && j < measureUnit.singleUnits.length(); j++) {
471 const SingleUnitImpl* singleUnit = measureUnit.singleUnits[j];
472 const ConversionRateInfo* rateInfo = rates.extractConversionInfo(singleUnit->getSimpleUnitID(), status);
473 CharString systems(rateInfo->systems, status);
474 if (!systems.contains("metric_adjacent")) { // "metric-adjacent" is considered to match all the locale systems
475 if (!systems.contains(localeSystem.data())) {
476 unitsMatchSystem = false;
477 }
478 }
479 }
480 }
481
482 // if any of the units we got back above don't match the mearurement system the locale ID asked for,
483 // throw out the region and just load the units for the base region for the requested measurement system
484 if (!unitsMatchSystem) {
485 region.clear();
486 if (localeSystem == "ussystem") {
487 region.append("US", status);
488 } else if (localeSystem == "uksystem") {
489 region.append("GB", status);
490 } else {
491 region.append("001", status);
492 }
493 idx = getPreferenceMetadataIndex(&metadata_, category, usage, region.toStringPiece(), status);
494 if (U_FAILURE(status)) {
495 return result;
496 }
497
498 m = metadata_[idx];
499 }
500 }
501
502 for (int32_t i = 0; i < m->prefsCount; i++) {
503 result.emplaceBackAndCheckErrorCode(status, *(unitPrefs_[i + m->prefsOffset]));
504 }
505 return result;
506 }
507
508 } // namespace units
509 U_NAMESPACE_END
510
511 #endif /* #if !UCONFIG_NO_FORMATTING */
512