1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #if !UCONFIG_NO_MF2
9
10 #include "unicode/dtptngen.h"
11 #include "unicode/messageformat2_data_model_names.h"
12 #include "unicode/messageformat2_function_registry.h"
13 #include "unicode/smpdtfmt.h"
14 #include "messageformat2_allocation.h"
15 #include "messageformat2_function_registry_internal.h"
16 #include "messageformat2_macros.h"
17 #include "hash.h"
18 #include "number_types.h"
19 #include "uvector.h" // U_ASSERT
20
21 #include <inttypes.h>
22 #include <math.h>
23
24 U_NAMESPACE_BEGIN
25
26 namespace message2 {
27
28 // Function registry implementation
29
~Formatter()30 Formatter::~Formatter() {}
~Selector()31 Selector::~Selector() {}
~FormatterFactory()32 FormatterFactory::~FormatterFactory() {}
~SelectorFactory()33 SelectorFactory::~SelectorFactory() {}
34
build()35 MFFunctionRegistry MFFunctionRegistry::Builder::build() {
36 U_ASSERT(formatters != nullptr && selectors != nullptr && formattersByType != nullptr);
37 MFFunctionRegistry result = MFFunctionRegistry(formatters, selectors, formattersByType);
38 formatters = nullptr;
39 selectors = nullptr;
40 formattersByType = nullptr;
41 return result;
42 }
43
adoptSelector(const FunctionName & selectorName,SelectorFactory * selectorFactory,UErrorCode & errorCode)44 MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptSelector(const FunctionName& selectorName, SelectorFactory* selectorFactory, UErrorCode& errorCode) {
45 if (U_SUCCESS(errorCode)) {
46 U_ASSERT(selectors != nullptr);
47 selectors->put(selectorName, selectorFactory, errorCode);
48 }
49 return *this;
50 }
51
adoptFormatter(const FunctionName & formatterName,FormatterFactory * formatterFactory,UErrorCode & errorCode)52 MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::adoptFormatter(const FunctionName& formatterName, FormatterFactory* formatterFactory, UErrorCode& errorCode) {
53 if (U_SUCCESS(errorCode)) {
54 U_ASSERT(formatters != nullptr);
55 formatters->put(formatterName, formatterFactory, errorCode);
56 }
57 return *this;
58 }
59
setDefaultFormatterNameByType(const UnicodeString & type,const FunctionName & functionName,UErrorCode & errorCode)60 MFFunctionRegistry::Builder& MFFunctionRegistry::Builder::setDefaultFormatterNameByType(const UnicodeString& type, const FunctionName& functionName, UErrorCode& errorCode) {
61 if (U_SUCCESS(errorCode)) {
62 U_ASSERT(formattersByType != nullptr);
63 FunctionName* f = create<FunctionName>(FunctionName(functionName), errorCode);
64 formattersByType->put(type, f, errorCode);
65 }
66 return *this;
67 }
68
Builder(UErrorCode & errorCode)69 MFFunctionRegistry::Builder::Builder(UErrorCode& errorCode) {
70 CHECK_ERROR(errorCode);
71
72 formatters = new Hashtable();
73 selectors = new Hashtable();
74 formattersByType = new Hashtable();
75 if (!(formatters != nullptr && selectors != nullptr && formattersByType != nullptr)) {
76 errorCode = U_MEMORY_ALLOCATION_ERROR;
77 }
78 formatters->setValueDeleter(uprv_deleteUObject);
79 selectors->setValueDeleter(uprv_deleteUObject);
80 formattersByType->setValueDeleter(uprv_deleteUObject);
81 }
82
~Builder()83 MFFunctionRegistry::Builder::~Builder() {
84 if (formatters != nullptr) {
85 delete formatters;
86 }
87 if (selectors != nullptr) {
88 delete selectors;
89 }
90 if (formattersByType != nullptr) {
91 delete formattersByType;
92 }
93 }
94
95 // Returns non-owned pointer. Returns pointer rather than reference because it can fail.
96 // Returns non-const because FormatterFactory is mutable.
97 // TODO: This is unsafe because of the cached-formatters map
98 // (the caller could delete the resulting pointer)
getFormatter(const FunctionName & formatterName) const99 FormatterFactory* MFFunctionRegistry::getFormatter(const FunctionName& formatterName) const {
100 U_ASSERT(formatters != nullptr);
101 return static_cast<FormatterFactory*>(formatters->get(formatterName));
102 }
103
getDefaultFormatterNameByType(const UnicodeString & type,FunctionName & name) const104 UBool MFFunctionRegistry::getDefaultFormatterNameByType(const UnicodeString& type, FunctionName& name) const {
105 U_ASSERT(formatters != nullptr);
106 const FunctionName* f = static_cast<FunctionName*>(formattersByType->get(type));
107 if (f != nullptr) {
108 name = *f;
109 return true;
110 }
111 return false;
112 }
113
getSelector(const FunctionName & selectorName) const114 const SelectorFactory* MFFunctionRegistry::getSelector(const FunctionName& selectorName) const {
115 U_ASSERT(selectors != nullptr);
116 return static_cast<const SelectorFactory*>(selectors->get(selectorName));
117 }
118
hasFormatter(const FunctionName & f) const119 bool MFFunctionRegistry::hasFormatter(const FunctionName& f) const {
120 return getFormatter(f) != nullptr;
121 }
122
hasSelector(const FunctionName & s) const123 bool MFFunctionRegistry::hasSelector(const FunctionName& s) const {
124 return getSelector(s) != nullptr;
125 }
126
checkFormatter(const char * s) const127 void MFFunctionRegistry::checkFormatter(const char* s) const {
128 #if U_DEBUG
129 U_ASSERT(hasFormatter(FunctionName(UnicodeString(s))));
130 #else
131 (void) s;
132 #endif
133 }
134
checkSelector(const char * s) const135 void MFFunctionRegistry::checkSelector(const char* s) const {
136 #if U_DEBUG
137 U_ASSERT(hasSelector(FunctionName(UnicodeString(s))));
138 #else
139 (void) s;
140 #endif
141 }
142
143 // Debugging
checkStandard() const144 void MFFunctionRegistry::checkStandard() const {
145 checkFormatter("datetime");
146 checkFormatter("date");
147 checkFormatter("time");
148 checkFormatter("number");
149 checkFormatter("integer");
150 checkSelector("number");
151 checkSelector("integer");
152 checkSelector("string");
153 }
154
155 // Formatter/selector helpers
156
157 // Converts `s` to a double, indicating failure via `errorCode`
strToDouble(const UnicodeString & s,double & result,UErrorCode & errorCode)158 static void strToDouble(const UnicodeString& s, double& result, UErrorCode& errorCode) {
159 CHECK_ERROR(errorCode);
160
161 // Using en-US locale because it happens to correspond to the spec:
162 // https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-operands
163 // Ideally, this should re-use the code for parsing number literals (Parser::parseUnquotedLiteral())
164 // It's hard to reuse the same code because of how parse errors work.
165 // TODO: Refactor
166 LocalPointer<NumberFormat> numberFormat(NumberFormat::createInstance(Locale("en-US"), errorCode));
167 CHECK_ERROR(errorCode);
168 icu::Formattable asNumber;
169 numberFormat->parse(s, asNumber, errorCode);
170 CHECK_ERROR(errorCode);
171 result = asNumber.getDouble(errorCode);
172 }
173
tryStringAsNumber(const Locale & locale,const Formattable & val,UErrorCode & errorCode)174 static double tryStringAsNumber(const Locale& locale, const Formattable& val, UErrorCode& errorCode) {
175 // Check for a string option, try to parse it as a number if present
176 UnicodeString tempString = val.getString(errorCode);
177 LocalPointer<NumberFormat> numberFormat(NumberFormat::createInstance(locale, errorCode));
178 if (U_SUCCESS(errorCode)) {
179 icu::Formattable asNumber;
180 numberFormat->parse(tempString, asNumber, errorCode);
181 if (U_SUCCESS(errorCode)) {
182 return asNumber.getDouble(errorCode);
183 }
184 }
185 return 0;
186 }
187
getInt64Value(const Locale & locale,const Formattable & value,UErrorCode & errorCode)188 static int64_t getInt64Value(const Locale& locale, const Formattable& value, UErrorCode& errorCode) {
189 if (U_SUCCESS(errorCode)) {
190 if (!value.isNumeric()) {
191 double doubleResult = tryStringAsNumber(locale, value, errorCode);
192 if (U_SUCCESS(errorCode)) {
193 return static_cast<int64_t>(doubleResult);
194 }
195 }
196 else {
197 int64_t result = value.getInt64(errorCode);
198 if (U_SUCCESS(errorCode)) {
199 return result;
200 }
201 }
202 }
203 // Option was numeric but couldn't be converted to int64_t -- could be overflow
204 return 0;
205 }
206
207 // Adopts its arguments
MFFunctionRegistry(FormatterMap * f,SelectorMap * s,Hashtable * byType)208 MFFunctionRegistry::MFFunctionRegistry(FormatterMap* f, SelectorMap* s, Hashtable* byType) : formatters(f), selectors(s), formattersByType(byType) {
209 U_ASSERT(f != nullptr && s != nullptr && byType != nullptr);
210 }
211
operator =(MFFunctionRegistry && other)212 MFFunctionRegistry& MFFunctionRegistry::operator=(MFFunctionRegistry&& other) noexcept {
213 cleanup();
214
215 formatters = other.formatters;
216 selectors = other.selectors;
217 formattersByType = other.formattersByType;
218 other.formatters = nullptr;
219 other.selectors = nullptr;
220 other.formattersByType = nullptr;
221
222 return *this;
223 }
224
cleanup()225 void MFFunctionRegistry::cleanup() noexcept {
226 if (formatters != nullptr) {
227 delete formatters;
228 }
229 if (selectors != nullptr) {
230 delete selectors;
231 }
232 if (formattersByType != nullptr) {
233 delete formattersByType;
234 }
235 }
236
237
~MFFunctionRegistry()238 MFFunctionRegistry::~MFFunctionRegistry() {
239 cleanup();
240 }
241
242 // Specific formatter implementations
243
244 // --------- Number
245
formatterForOptions(const Number & number,const FunctionOptions & opts,UErrorCode & status)246 /* static */ number::LocalizedNumberFormatter StandardFunctions::formatterForOptions(const Number& number,
247 const FunctionOptions& opts,
248 UErrorCode& status) {
249 number::UnlocalizedNumberFormatter nf;
250
251 using namespace number;
252
253 if (U_SUCCESS(status)) {
254 Formattable opt;
255 nf = NumberFormatter::with();
256 bool isInteger = number.isInteger;
257
258 if (isInteger) {
259 nf = nf.precision(Precision::integer());
260 }
261
262 // Notation options
263 if (!isInteger) {
264 // These options only apply to `:number`
265
266 // Default notation is simple
267 Notation notation = Notation::simple();
268 UnicodeString notationOpt = opts.getStringFunctionOption(UnicodeString("notation"));
269 if (notationOpt == UnicodeString("scientific")) {
270 notation = Notation::scientific();
271 } else if (notationOpt == UnicodeString("engineering")) {
272 notation = Notation::engineering();
273 } else if (notationOpt == UnicodeString("compact")) {
274 UnicodeString displayOpt = opts.getStringFunctionOption(UnicodeString("compactDisplay"));
275 if (displayOpt == UnicodeString("long")) {
276 notation = Notation::compactLong();
277 } else {
278 // Default is short
279 notation = Notation::compactShort();
280 }
281 } else {
282 // Already set to default
283 }
284 nf = nf.notation(notation);
285 }
286
287 // Style options -- specific to `:number`
288 if (!isInteger) {
289 if (number.usePercent(opts)) {
290 nf = nf.unit(NoUnit::percent());
291 }
292 }
293
294 int32_t maxSignificantDigits = number.maximumSignificantDigits(opts);
295 if (!isInteger) {
296 int32_t minFractionDigits = number.minimumFractionDigits(opts);
297 int32_t maxFractionDigits = number.maximumFractionDigits(opts);
298 int32_t minSignificantDigits = number.minimumSignificantDigits(opts);
299 Precision p = Precision::minMaxFraction(minFractionDigits, maxFractionDigits);
300 if (minSignificantDigits > 0) {
301 p = p.minSignificantDigits(minSignificantDigits);
302 }
303 if (maxSignificantDigits > 0) {
304 p = p.maxSignificantDigits(maxSignificantDigits);
305 }
306 nf = nf.precision(p);
307 } else {
308 // maxSignificantDigits applies to `:integer`, but the other precision options don't
309 Precision p = Precision::integer();
310 if (maxSignificantDigits > 0) {
311 p = p.maxSignificantDigits(maxSignificantDigits);
312 }
313 nf = nf.precision(p);
314 }
315
316 // All other options apply to both `:number` and `:integer`
317 int32_t minIntegerDigits = number.minimumIntegerDigits(opts);
318 nf = nf.integerWidth(IntegerWidth::zeroFillTo(minIntegerDigits));
319
320 // signDisplay
321 UnicodeString sd = opts.getStringFunctionOption(UnicodeString("signDisplay"));
322 UNumberSignDisplay signDisplay;
323 if (sd == UnicodeString("always")) {
324 signDisplay = UNumberSignDisplay::UNUM_SIGN_ALWAYS;
325 } else if (sd == UnicodeString("exceptZero")) {
326 signDisplay = UNumberSignDisplay::UNUM_SIGN_EXCEPT_ZERO;
327 } else if (sd == UnicodeString("negative")) {
328 signDisplay = UNumberSignDisplay::UNUM_SIGN_NEGATIVE;
329 } else if (sd == UnicodeString("never")) {
330 signDisplay = UNumberSignDisplay::UNUM_SIGN_NEVER;
331 } else {
332 signDisplay = UNumberSignDisplay::UNUM_SIGN_AUTO;
333 }
334 nf = nf.sign(signDisplay);
335
336 // useGrouping
337 UnicodeString ug = opts.getStringFunctionOption(UnicodeString("useGrouping"));
338 UNumberGroupingStrategy grp;
339 if (ug == UnicodeString("always")) {
340 grp = UNumberGroupingStrategy::UNUM_GROUPING_ON_ALIGNED;
341 } else if (ug == UnicodeString("never")) {
342 grp = UNumberGroupingStrategy::UNUM_GROUPING_OFF;
343 } else if (ug == UnicodeString("min2")) {
344 grp = UNumberGroupingStrategy::UNUM_GROUPING_MIN2;
345 } else {
346 // Default is "auto"
347 grp = UNumberGroupingStrategy::UNUM_GROUPING_AUTO;
348 }
349 nf = nf.grouping(grp);
350 }
351 return LocalizedNumberFormatter(nf.locale(number.locale));
352 }
353
createFormatter(const Locale & locale,UErrorCode & errorCode)354 Formatter* StandardFunctions::NumberFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) {
355 NULL_ON_ERROR(errorCode);
356
357 Formatter* result = new Number(locale);
358 if (result == nullptr) {
359 errorCode = U_MEMORY_ALLOCATION_ERROR;
360 }
361 return result;
362 }
363
createFormatter(const Locale & locale,UErrorCode & errorCode)364 Formatter* StandardFunctions::IntegerFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) {
365 NULL_ON_ERROR(errorCode);
366
367 Formatter* result = new Number(Number::integer(locale));
368 if (result == nullptr) {
369 errorCode = U_MEMORY_ALLOCATION_ERROR;
370 }
371 return result;
372 }
373
~IntegerFactory()374 StandardFunctions::IntegerFactory::~IntegerFactory() {}
375
notANumber(const FormattedPlaceholder & input)376 static FormattedPlaceholder notANumber(const FormattedPlaceholder& input) {
377 return FormattedPlaceholder(input, FormattedValue(UnicodeString("NaN")));
378 }
379
stringAsNumber(const number::LocalizedNumberFormatter & nf,const FormattedPlaceholder & input,UErrorCode & errorCode)380 static FormattedPlaceholder stringAsNumber(const number::LocalizedNumberFormatter& nf, const FormattedPlaceholder& input, UErrorCode& errorCode) {
381 if (U_FAILURE(errorCode)) {
382 return {};
383 }
384
385 double numberValue;
386 // Copying string to avoid GCC dangling-reference warning
387 // (although the reference is safe)
388 UnicodeString inputStr = input.asFormattable().getString(errorCode);
389 // Precondition: `input`'s source Formattable has type string
390 if (U_FAILURE(errorCode)) {
391 return {};
392 }
393 UErrorCode localErrorCode = U_ZERO_ERROR;
394 strToDouble(inputStr, numberValue, localErrorCode);
395 if (U_FAILURE(localErrorCode)) {
396 errorCode = U_MF_OPERAND_MISMATCH_ERROR;
397 return notANumber(input);
398 }
399 UErrorCode savedStatus = errorCode;
400 number::FormattedNumber result = nf.formatDouble(numberValue, errorCode);
401 // Ignore U_USING_DEFAULT_WARNING
402 if (errorCode == U_USING_DEFAULT_WARNING) {
403 errorCode = savedStatus;
404 }
405 return FormattedPlaceholder(input, FormattedValue(std::move(result)));
406 }
407
maximumFractionDigits(const FunctionOptions & opts) const408 int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions& opts) const {
409 Formattable opt;
410
411 if (isInteger) {
412 return 0;
413 }
414
415 if (opts.getFunctionOption(UnicodeString("maximumFractionDigits"), opt)) {
416 UErrorCode localErrorCode = U_ZERO_ERROR;
417 int64_t val = getInt64Value(locale, opt, localErrorCode);
418 if (U_SUCCESS(localErrorCode)) {
419 return static_cast<int32_t>(val);
420 }
421 }
422 return number::impl::kMaxIntFracSig;
423 }
424
minimumFractionDigits(const FunctionOptions & opts) const425 int32_t StandardFunctions::Number::minimumFractionDigits(const FunctionOptions& opts) const {
426 Formattable opt;
427
428 if (!isInteger) {
429 if (opts.getFunctionOption(UnicodeString("minimumFractionDigits"), opt)) {
430 UErrorCode localErrorCode = U_ZERO_ERROR;
431 int64_t val = getInt64Value(locale, opt, localErrorCode);
432 if (U_SUCCESS(localErrorCode)) {
433 return static_cast<int32_t>(val);
434 }
435 }
436 }
437 return 0;
438 }
439
minimumIntegerDigits(const FunctionOptions & opts) const440 int32_t StandardFunctions::Number::minimumIntegerDigits(const FunctionOptions& opts) const {
441 Formattable opt;
442
443 if (opts.getFunctionOption(UnicodeString("minimumIntegerDigits"), opt)) {
444 UErrorCode localErrorCode = U_ZERO_ERROR;
445 int64_t val = getInt64Value(locale, opt, localErrorCode);
446 if (U_SUCCESS(localErrorCode)) {
447 return static_cast<int32_t>(val);
448 }
449 }
450 return 0;
451 }
452
minimumSignificantDigits(const FunctionOptions & opts) const453 int32_t StandardFunctions::Number::minimumSignificantDigits(const FunctionOptions& opts) const {
454 Formattable opt;
455
456 if (!isInteger) {
457 if (opts.getFunctionOption(UnicodeString("minimumSignificantDigits"), opt)) {
458 UErrorCode localErrorCode = U_ZERO_ERROR;
459 int64_t val = getInt64Value(locale, opt, localErrorCode);
460 if (U_SUCCESS(localErrorCode)) {
461 return static_cast<int32_t>(val);
462 }
463 }
464 }
465 // Returning 0 indicates that the option wasn't provided or was a non-integer.
466 // The caller needs to check for that case, since passing 0 to Precision::minSignificantDigits()
467 // is an error.
468 return 0;
469 }
470
maximumSignificantDigits(const FunctionOptions & opts) const471 int32_t StandardFunctions::Number::maximumSignificantDigits(const FunctionOptions& opts) const {
472 Formattable opt;
473
474 if (opts.getFunctionOption(UnicodeString("maximumSignificantDigits"), opt)) {
475 UErrorCode localErrorCode = U_ZERO_ERROR;
476 int64_t val = getInt64Value(locale, opt, localErrorCode);
477 if (U_SUCCESS(localErrorCode)) {
478 return static_cast<int32_t>(val);
479 }
480 }
481 // Returning 0 indicates that the option wasn't provided or was a non-integer.
482 // The caller needs to check for that case, since passing 0 to Precision::maxSignificantDigits()
483 // is an error.
484 return 0; // Not a valid value for Precision; has to be checked
485 }
486
usePercent(const FunctionOptions & opts) const487 bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const {
488 Formattable opt;
489 if (isInteger
490 || !opts.getFunctionOption(UnicodeString("style"), opt)
491 || opt.getType() != UFMT_STRING) {
492 return false;
493 }
494 UErrorCode localErrorCode = U_ZERO_ERROR;
495 const UnicodeString& style = opt.getString(localErrorCode);
496 U_ASSERT(U_SUCCESS(localErrorCode));
497 return (style == UnicodeString("percent"));
498 }
499
integer(const Locale & loc)500 /* static */ StandardFunctions::Number StandardFunctions::Number::integer(const Locale& loc) {
501 return StandardFunctions::Number(loc, true);
502 }
503
format(FormattedPlaceholder && arg,FunctionOptions && opts,UErrorCode & errorCode) const504 FormattedPlaceholder StandardFunctions::Number::format(FormattedPlaceholder&& arg, FunctionOptions&& opts, UErrorCode& errorCode) const {
505 if (U_FAILURE(errorCode)) {
506 return {};
507 }
508
509 // No argument => return "NaN"
510 if (!arg.canFormat()) {
511 errorCode = U_MF_OPERAND_MISMATCH_ERROR;
512 return notANumber(arg);
513 }
514
515 number::LocalizedNumberFormatter realFormatter;
516 realFormatter = formatterForOptions(*this, opts, errorCode);
517
518 number::FormattedNumber numberResult;
519 if (U_SUCCESS(errorCode)) {
520 // Already checked that contents can be formatted
521 const Formattable& toFormat = arg.asFormattable();
522 switch (toFormat.getType()) {
523 case UFMT_DOUBLE: {
524 double d = toFormat.getDouble(errorCode);
525 U_ASSERT(U_SUCCESS(errorCode));
526 numberResult = realFormatter.formatDouble(d, errorCode);
527 break;
528 }
529 case UFMT_LONG: {
530 int32_t l = toFormat.getLong(errorCode);
531 U_ASSERT(U_SUCCESS(errorCode));
532 numberResult = realFormatter.formatInt(l, errorCode);
533 break;
534 }
535 case UFMT_INT64: {
536 int64_t i = toFormat.getInt64(errorCode);
537 U_ASSERT(U_SUCCESS(errorCode));
538 numberResult = realFormatter.formatInt(i, errorCode);
539 break;
540 }
541 case UFMT_STRING: {
542 // Try to parse the string as a number
543 return stringAsNumber(realFormatter, arg, errorCode);
544 }
545 default: {
546 // Other types can't be parsed as a number
547 errorCode = U_MF_OPERAND_MISMATCH_ERROR;
548 return notANumber(arg);
549 }
550 }
551 }
552
553 return FormattedPlaceholder(arg, FormattedValue(std::move(numberResult)));
554 }
555
~Number()556 StandardFunctions::Number::~Number() {}
~NumberFactory()557 StandardFunctions::NumberFactory::~NumberFactory() {}
558
559 // --------- PluralFactory
560
561
pluralType(const FunctionOptions & opts) const562 StandardFunctions::Plural::PluralType StandardFunctions::Plural::pluralType(const FunctionOptions& opts) const {
563 Formattable opt;
564
565 if (opts.getFunctionOption(UnicodeString("select"), opt)) {
566 UErrorCode localErrorCode = U_ZERO_ERROR;
567 UnicodeString val = opt.getString(localErrorCode);
568 if (U_SUCCESS(localErrorCode)) {
569 if (val == UnicodeString("ordinal")) {
570 return PluralType::PLURAL_ORDINAL;
571 }
572 if (val == UnicodeString("exact")) {
573 return PluralType::PLURAL_EXACT;
574 }
575 }
576 }
577 return PluralType::PLURAL_CARDINAL;
578 }
579
createSelector(const Locale & locale,UErrorCode & errorCode) const580 Selector* StandardFunctions::PluralFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const {
581 NULL_ON_ERROR(errorCode);
582
583 Selector* result;
584 if (isInteger) {
585 result = new Plural(Plural::integer(locale));
586 } else {
587 result = new Plural(locale);
588 }
589 if (result == nullptr) {
590 errorCode = U_MEMORY_ALLOCATION_ERROR;
591 return nullptr;
592 }
593 return result;
594 }
595
tryAsString(const UnicodeString & s,UErrorCode & errorCode)596 static double tryAsString(const UnicodeString& s, UErrorCode& errorCode) {
597 if (U_FAILURE(errorCode)) {
598 return 0;
599 }
600 // Try parsing the inputString as a double
601 double valToCheck;
602 strToDouble(s, valToCheck, errorCode);
603 return valToCheck;
604 }
605
tryWithFormattable(const Formattable & value,UErrorCode & errorCode)606 static double tryWithFormattable(const Formattable& value, UErrorCode& errorCode) {
607 if (U_FAILURE(errorCode)) {
608 return 0;
609 }
610 double valToCheck;
611 switch (value.getType()) {
612 case UFMT_DOUBLE: {
613 valToCheck = value.getDouble(errorCode);
614 break;
615 }
616 case UFMT_LONG: {
617 valToCheck = (double) value.getLong(errorCode);
618 break;
619 }
620 case UFMT_INT64: {
621 valToCheck = (double) value.getInt64(errorCode);
622 break;
623 }
624 case UFMT_STRING: {
625 const UnicodeString& s = value.getString(errorCode);
626 U_ASSERT(U_SUCCESS(errorCode));
627 return tryAsString(s, errorCode);
628 }
629 default: {
630 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
631 return 0;
632 }
633 }
634 U_ASSERT(U_SUCCESS(errorCode));
635 return valToCheck;
636 }
637
toJSONString(double d)638 static UnicodeString toJSONString(double d) {
639 // TODO :(
640 char buffer[512];
641 // "Only integer matching is required in the Technical Preview."
642 snprintf(buffer, 512, "%" PRId64, static_cast<int64_t>(d));
643 return UnicodeString(buffer);
644 }
645
selectKey(FormattedPlaceholder && toFormat,FunctionOptions && opts,const UnicodeString * keys,int32_t keysLen,UnicodeString * prefs,int32_t & prefsLen,UErrorCode & errorCode) const646 void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat,
647 FunctionOptions&& opts,
648 const UnicodeString* keys,
649 int32_t keysLen,
650 UnicodeString* prefs,
651 int32_t& prefsLen,
652 UErrorCode& errorCode) const {
653 CHECK_ERROR(errorCode);
654
655 // No argument => return "NaN"
656 if (!toFormat.canFormat()) {
657 errorCode = U_MF_SELECTOR_ERROR;
658 return;
659 }
660
661 // Only doubles and integers can match
662 double valToCheck;
663
664 bool isFormattedString = toFormat.isEvaluated() && toFormat.output().isString();
665 bool isFormattedNumber = toFormat.isEvaluated() && toFormat.output().isNumber();
666
667 if (isFormattedString) {
668 // Formatted string: try parsing it as a number
669 valToCheck = tryAsString(toFormat.output().getString(), errorCode);
670 } else {
671 // Already checked that contents can be formatted
672 valToCheck = tryWithFormattable(toFormat.asFormattable(), errorCode);
673 }
674
675 if (U_FAILURE(errorCode)) {
676 // Non-number => selector error
677 errorCode = U_MF_SELECTOR_ERROR;
678 return;
679 }
680 // TODO: This needs to be checked against https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-selection
681 // Determine `exact`, per step 1 under "Number Selection"
682 UnicodeString exact = toJSONString(valToCheck);
683
684 // Generate the matches
685 // -----------------------
686
687 prefsLen = 0;
688
689 // First, check for an exact match
690 double keyAsDouble = 0;
691 for (int32_t i = 0; i < keysLen; i++) {
692 // Try parsing the key as a double
693 UErrorCode localErrorCode = U_ZERO_ERROR;
694 strToDouble(keys[i], keyAsDouble, localErrorCode);
695 if (U_SUCCESS(localErrorCode)) {
696 if (exact == keys[i]) {
697 prefs[prefsLen] = keys[i];
698 prefsLen++;
699 break;
700 }
701 }
702 }
703
704 PluralType type = pluralType(opts);
705 // Return immediately if exact matching was requested
706 if (prefsLen == keysLen || type == PluralType::PLURAL_EXACT) {
707 return;
708 }
709
710 UPluralType t = type == PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL;
711 // Look up plural rules by locale and type
712 LocalPointer<PluralRules> rules(PluralRules::forLocale(locale, t, errorCode));
713 CHECK_ERROR(errorCode);
714
715
716 // Check for a match based on the plural category
717 UnicodeString match;
718 if (isFormattedNumber) {
719 match = rules->select(toFormat.output().getNumber(), errorCode);
720 } else {
721 if (isInteger) {
722 match = rules->select(static_cast<int32_t>(trunc(valToCheck)));
723 } else {
724 match = rules->select(valToCheck);
725 }
726 }
727 CHECK_ERROR(errorCode);
728
729 for (int32_t i = 0; i < keysLen; i ++) {
730 if (prefsLen >= keysLen) {
731 break;
732 }
733 if (match == keys[i]) {
734 prefs[prefsLen] = keys[i];
735 prefsLen++;
736 }
737 }
738 }
739
~Plural()740 StandardFunctions::Plural::~Plural() {}
~PluralFactory()741 StandardFunctions::PluralFactory::~PluralFactory() {}
742
743 // --------- DateTimeFactory
744
getStringOption(const FunctionOptions & opts,const UnicodeString & optionName,UErrorCode & errorCode)745 /* static */ UnicodeString StandardFunctions::getStringOption(const FunctionOptions& opts,
746 const UnicodeString& optionName,
747 UErrorCode& errorCode) {
748 if (U_SUCCESS(errorCode)) {
749 Formattable opt;
750 if (opts.getFunctionOption(optionName, opt)) {
751 return opt.getString(errorCode); // In case it's not a string, error code will be set
752 } else {
753 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
754 }
755 }
756 // Default is empty string
757 return {};
758 }
759
760 // Date/time options only
defaultForOption(const UnicodeString & optionName)761 static UnicodeString defaultForOption(const UnicodeString& optionName) {
762 if (optionName == UnicodeString("dateStyle")
763 || optionName == UnicodeString("timeStyle")
764 || optionName == UnicodeString("style")) {
765 return UnicodeString("short");
766 }
767 return {}; // Empty string is default
768 }
769
770 // TODO
771 // Only DateTime currently uses the function options stored in the placeholder.
772 // It also doesn't use them very consistently (it looks at the previous set of options,
773 // and others aren't preserved). This needs to be generalized,
774 // but that depends on https://github.com/unicode-org/message-format-wg/issues/515
775 // Finally, the option value is assumed to be a string,
776 // which works for datetime options but not necessarily in general.
getFunctionOption(const FormattedPlaceholder & toFormat,const FunctionOptions & opts,const UnicodeString & optionName) const777 UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat,
778 const FunctionOptions& opts,
779 const UnicodeString& optionName) const {
780 // Options passed to the current function invocation take priority
781 Formattable opt;
782 UnicodeString s;
783 UErrorCode localErrorCode = U_ZERO_ERROR;
784 s = getStringOption(opts, optionName, localErrorCode);
785 if (U_SUCCESS(localErrorCode)) {
786 return s;
787 }
788 // Next try the set of options used to construct `toFormat`
789 localErrorCode = U_ZERO_ERROR;
790 s = getStringOption(toFormat.options(), optionName, localErrorCode);
791 if (U_SUCCESS(localErrorCode)) {
792 return s;
793 }
794 // Finally, use default
795 return defaultForOption(optionName);
796 }
797
798 // Used for options that don't have defaults
getFunctionOption(const FormattedPlaceholder & toFormat,const FunctionOptions & opts,const UnicodeString & optionName,UErrorCode & errorCode) const799 UnicodeString StandardFunctions::DateTime::getFunctionOption(const FormattedPlaceholder& toFormat,
800 const FunctionOptions& opts,
801 const UnicodeString& optionName,
802 UErrorCode& errorCode) const {
803 if (U_SUCCESS(errorCode)) {
804 // Options passed to the current function invocation take priority
805 Formattable opt;
806 UnicodeString s;
807 UErrorCode localErrorCode = U_ZERO_ERROR;
808 s = getStringOption(opts, optionName, localErrorCode);
809 if (U_SUCCESS(localErrorCode)) {
810 return s;
811 }
812 // Next try the set of options used to construct `toFormat`
813 localErrorCode = U_ZERO_ERROR;
814 s = getStringOption(toFormat.options(), optionName, localErrorCode);
815 if (U_SUCCESS(localErrorCode)) {
816 return s;
817 }
818 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
819 }
820 return {};
821 }
822
stringToStyle(UnicodeString option,UErrorCode & errorCode)823 static DateFormat::EStyle stringToStyle(UnicodeString option, UErrorCode& errorCode) {
824 if (U_SUCCESS(errorCode)) {
825 UnicodeString upper = option.toUpper();
826 if (upper == UnicodeString("FULL")) {
827 return DateFormat::EStyle::kFull;
828 }
829 if (upper == UnicodeString("LONG")) {
830 return DateFormat::EStyle::kLong;
831 }
832 if (upper == UnicodeString("MEDIUM")) {
833 return DateFormat::EStyle::kMedium;
834 }
835 if (upper == UnicodeString("SHORT")) {
836 return DateFormat::EStyle::kShort;
837 }
838 if (upper.isEmpty() || upper == UnicodeString("DEFAULT")) {
839 return DateFormat::EStyle::kDefault;
840 }
841 errorCode = U_ILLEGAL_ARGUMENT_ERROR;
842 }
843 return DateFormat::EStyle::kNone;
844 }
845
dateTime(UErrorCode & errorCode)846 /* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::dateTime(UErrorCode& errorCode) {
847 NULL_ON_ERROR(errorCode);
848
849 DateTimeFactory* result = new StandardFunctions::DateTimeFactory(DateTimeType::DateTime);
850 if (result == nullptr) {
851 errorCode = U_MEMORY_ALLOCATION_ERROR;
852 }
853 return result;
854 }
855
date(UErrorCode & errorCode)856 /* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::date(UErrorCode& errorCode) {
857 NULL_ON_ERROR(errorCode);
858
859 DateTimeFactory* result = new DateTimeFactory(DateTimeType::Date);
860 if (result == nullptr) {
861 errorCode = U_MEMORY_ALLOCATION_ERROR;
862 }
863 return result;
864 }
865
time(UErrorCode & errorCode)866 /* static */ StandardFunctions::DateTimeFactory* StandardFunctions::DateTimeFactory::time(UErrorCode& errorCode) {
867 NULL_ON_ERROR(errorCode);
868
869 DateTimeFactory* result = new DateTimeFactory(DateTimeType::Time);
870 if (result == nullptr) {
871 errorCode = U_MEMORY_ALLOCATION_ERROR;
872 }
873 return result;
874 }
875
createFormatter(const Locale & locale,UErrorCode & errorCode)876 Formatter* StandardFunctions::DateTimeFactory::createFormatter(const Locale& locale, UErrorCode& errorCode) {
877 NULL_ON_ERROR(errorCode);
878
879 Formatter* result = new StandardFunctions::DateTime(locale, type);
880 if (result == nullptr) {
881 errorCode = U_MEMORY_ALLOCATION_ERROR;
882 }
883 return result;
884 }
885
format(FormattedPlaceholder && toFormat,FunctionOptions && opts,UErrorCode & errorCode) const886 FormattedPlaceholder StandardFunctions::DateTime::format(FormattedPlaceholder&& toFormat,
887 FunctionOptions&& opts,
888 UErrorCode& errorCode) const {
889 if (U_FAILURE(errorCode)) {
890 return {};
891 }
892
893 // Argument must be present
894 if (!toFormat.canFormat()) {
895 errorCode = U_MF_OPERAND_MISMATCH_ERROR;
896 return std::move(toFormat);
897 }
898
899 LocalPointer<DateFormat> df;
900 Formattable opt;
901
902 DateFormat::EStyle dateStyle = DateFormat::kShort;
903 DateFormat::EStyle timeStyle = DateFormat::kShort;
904
905 UnicodeString dateStyleName("dateStyle");
906 UnicodeString timeStyleName("timeStyle");
907 UnicodeString styleName("style");
908
909 bool hasDateStyleOption = opts.getFunctionOption(dateStyleName, opt);
910 bool hasTimeStyleOption = opts.getFunctionOption(timeStyleName, opt);
911 bool noOptions = opts.optionsCount() == 0;
912
913 bool useStyle = (type == DateTimeFactory::DateTimeType::DateTime
914 && (hasDateStyleOption || hasTimeStyleOption
915 || noOptions))
916 || (type != DateTimeFactory::DateTimeType::DateTime);
917
918 bool useDate = type == DateTimeFactory::DateTimeType::Date
919 || (type == DateTimeFactory::DateTimeType::DateTime
920 && hasDateStyleOption);
921 bool useTime = type == DateTimeFactory::DateTimeType::Time
922 || (type == DateTimeFactory::DateTimeType::DateTime
923 && hasTimeStyleOption);
924
925 if (useStyle) {
926 // Extract style options
927 if (type == DateTimeFactory::DateTimeType::DateTime) {
928 // Note that the options-getting has to be repeated across the three cases,
929 // since `:datetime` uses "dateStyle"/"timeStyle" and `:date` and `:time`
930 // use "style"
931 dateStyle = stringToStyle(getFunctionOption(toFormat, opts, dateStyleName), errorCode);
932 timeStyle = stringToStyle(getFunctionOption(toFormat, opts, timeStyleName), errorCode);
933
934 if (useDate && !useTime) {
935 df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale));
936 } else if (useTime && !useDate) {
937 df.adoptInstead(DateFormat::createTimeInstance(timeStyle, locale));
938 } else {
939 df.adoptInstead(DateFormat::createDateTimeInstance(dateStyle, timeStyle, locale));
940 }
941 } else if (type == DateTimeFactory::DateTimeType::Date) {
942 dateStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode);
943 df.adoptInstead(DateFormat::createDateInstance(dateStyle, locale));
944 } else {
945 // :time
946 timeStyle = stringToStyle(getFunctionOption(toFormat, opts, styleName), errorCode);
947 df.adoptInstead(DateFormat::createTimeInstance(timeStyle, locale));
948 }
949 } else {
950 // Build up a skeleton based on the field options, then use that to
951 // create the date formatter
952
953 UnicodeString skeleton;
954 #define ADD_PATTERN(s) skeleton += UnicodeString(s)
955 if (U_SUCCESS(errorCode)) {
956 // Year
957 UnicodeString year = getFunctionOption(toFormat, opts, UnicodeString("year"), errorCode);
958 if (U_FAILURE(errorCode)) {
959 errorCode = U_ZERO_ERROR;
960 } else {
961 useDate = true;
962 if (year == UnicodeString("2-digit")) {
963 ADD_PATTERN("YY");
964 } else if (year == UnicodeString("numeric")) {
965 ADD_PATTERN("YYYY");
966 }
967 }
968 // Month
969 UnicodeString month = getFunctionOption(toFormat, opts, UnicodeString("month"), errorCode);
970 if (U_FAILURE(errorCode)) {
971 errorCode = U_ZERO_ERROR;
972 } else {
973 useDate = true;
974 /* numeric, 2-digit, long, short, narrow */
975 if (month == UnicodeString("long")) {
976 ADD_PATTERN("MMMM");
977 } else if (month == UnicodeString("short")) {
978 ADD_PATTERN("MMM");
979 } else if (month == UnicodeString("narrow")) {
980 ADD_PATTERN("MMMMM");
981 } else if (month == UnicodeString("numeric")) {
982 ADD_PATTERN("M");
983 } else if (month == UnicodeString("2-digit")) {
984 ADD_PATTERN("MM");
985 }
986 }
987 // Weekday
988 UnicodeString weekday = getFunctionOption(toFormat, opts, UnicodeString("weekday"), errorCode);
989 if (U_FAILURE(errorCode)) {
990 errorCode = U_ZERO_ERROR;
991 } else {
992 useDate = true;
993 if (weekday == UnicodeString("long")) {
994 ADD_PATTERN("EEEE");
995 } else if (weekday == UnicodeString("short")) {
996 ADD_PATTERN("EEEEE");
997 } else if (weekday == UnicodeString("narrow")) {
998 ADD_PATTERN("EEEEE");
999 }
1000 }
1001 // Day
1002 UnicodeString day = getFunctionOption(toFormat, opts, UnicodeString("day"), errorCode);
1003 if (U_FAILURE(errorCode)) {
1004 errorCode = U_ZERO_ERROR;
1005 } else {
1006 useDate = true;
1007 if (day == UnicodeString("numeric")) {
1008 ADD_PATTERN("d");
1009 } else if (day == UnicodeString("2-digit")) {
1010 ADD_PATTERN("dd");
1011 }
1012 }
1013 // Hour
1014 UnicodeString hour = getFunctionOption(toFormat, opts, UnicodeString("hour"), errorCode);
1015 if (U_FAILURE(errorCode)) {
1016 errorCode = U_ZERO_ERROR;
1017 } else {
1018 useTime = true;
1019 if (hour == UnicodeString("numeric")) {
1020 ADD_PATTERN("h");
1021 } else if (hour == UnicodeString("2-digit")) {
1022 ADD_PATTERN("hh");
1023 }
1024 }
1025 // Minute
1026 UnicodeString minute = getFunctionOption(toFormat, opts, UnicodeString("minute"), errorCode);
1027 if (U_FAILURE(errorCode)) {
1028 errorCode = U_ZERO_ERROR;
1029 } else {
1030 useTime = true;
1031 if (minute == UnicodeString("numeric")) {
1032 ADD_PATTERN("m");
1033 } else if (minute == UnicodeString("2-digit")) {
1034 ADD_PATTERN("mm");
1035 }
1036 }
1037 // Second
1038 UnicodeString second = getFunctionOption(toFormat, opts, UnicodeString("second"), errorCode);
1039 if (U_FAILURE(errorCode)) {
1040 errorCode = U_ZERO_ERROR;
1041 } else {
1042 useTime = true;
1043 if (second == UnicodeString("numeric")) {
1044 ADD_PATTERN("s");
1045 } else if (second == UnicodeString("2-digit")) {
1046 ADD_PATTERN("ss");
1047 }
1048 }
1049 }
1050 /*
1051 TODO
1052 fractionalSecondDigits
1053 hourCycle
1054 timeZoneName
1055 era
1056 */
1057 df.adoptInstead(DateFormat::createInstanceForSkeleton(skeleton, errorCode));
1058 }
1059
1060 if (U_FAILURE(errorCode)) {
1061 return {};
1062 }
1063 if (!df.isValid()) {
1064 errorCode = U_MEMORY_ALLOCATION_ERROR;
1065 return {};
1066 }
1067
1068 UnicodeString result;
1069 const Formattable& source = toFormat.asFormattable();
1070 switch (source.getType()) {
1071 case UFMT_STRING: {
1072 const UnicodeString& sourceStr = source.getString(errorCode);
1073 U_ASSERT(U_SUCCESS(errorCode));
1074 // Pattern for ISO 8601 format - datetime
1075 UnicodeString pattern("YYYY-MM-dd'T'HH:mm:ss");
1076 LocalPointer<DateFormat> dateParser(new SimpleDateFormat(pattern, errorCode));
1077 if (U_FAILURE(errorCode)) {
1078 errorCode = U_MF_FORMATTING_ERROR;
1079 } else {
1080 // Parse the date
1081 UDate d = dateParser->parse(sourceStr, errorCode);
1082 if (U_FAILURE(errorCode)) {
1083 // Pattern for ISO 8601 format - date
1084 UnicodeString pattern("YYYY-MM-dd");
1085 errorCode = U_ZERO_ERROR;
1086 dateParser.adoptInstead(new SimpleDateFormat(pattern, errorCode));
1087 if (U_FAILURE(errorCode)) {
1088 errorCode = U_MF_FORMATTING_ERROR;
1089 } else {
1090 d = dateParser->parse(sourceStr, errorCode);
1091 if (U_FAILURE(errorCode)) {
1092 errorCode = U_MF_OPERAND_MISMATCH_ERROR;
1093 }
1094 }
1095 }
1096 // Use the parsed date as the source value
1097 // in the returned FormattedPlaceholder; this is necessary
1098 // so the date can be re-formatted
1099 toFormat = FormattedPlaceholder(message2::Formattable::forDate(d),
1100 toFormat.getFallback());
1101 df->format(d, result, 0, errorCode);
1102 }
1103 break;
1104 }
1105 case UFMT_DATE: {
1106 df->format(source.asICUFormattable(errorCode), result, 0, errorCode);
1107 if (U_FAILURE(errorCode)) {
1108 if (errorCode == U_ILLEGAL_ARGUMENT_ERROR) {
1109 errorCode = U_MF_OPERAND_MISMATCH_ERROR;
1110 }
1111 }
1112 break;
1113 }
1114 // Any other cases are an error
1115 default: {
1116 errorCode = U_MF_OPERAND_MISMATCH_ERROR;
1117 break;
1118 }
1119 }
1120 if (U_FAILURE(errorCode)) {
1121 return {};
1122 }
1123 return FormattedPlaceholder(toFormat, std::move(opts), FormattedValue(std::move(result)));
1124 }
1125
~DateTimeFactory()1126 StandardFunctions::DateTimeFactory::~DateTimeFactory() {}
~DateTime()1127 StandardFunctions::DateTime::~DateTime() {}
1128
1129 // --------- TextFactory
1130
createSelector(const Locale & locale,UErrorCode & errorCode) const1131 Selector* StandardFunctions::TextFactory::createSelector(const Locale& locale, UErrorCode& errorCode) const {
1132 Selector* result = new TextSelector(locale);
1133 if (result == nullptr) {
1134 errorCode = U_MEMORY_ALLOCATION_ERROR;
1135 return nullptr;
1136 }
1137 return result;
1138 }
1139
selectKey(FormattedPlaceholder && toFormat,FunctionOptions && opts,const UnicodeString * keys,int32_t keysLen,UnicodeString * prefs,int32_t & prefsLen,UErrorCode & errorCode) const1140 void StandardFunctions::TextSelector::selectKey(FormattedPlaceholder&& toFormat,
1141 FunctionOptions&& opts,
1142 const UnicodeString* keys,
1143 int32_t keysLen,
1144 UnicodeString* prefs,
1145 int32_t& prefsLen,
1146 UErrorCode& errorCode) const {
1147 // No options
1148 (void) opts;
1149
1150 CHECK_ERROR(errorCode);
1151
1152 // Just compares the key and value as strings
1153
1154 // Argument must be present
1155 if (!toFormat.canFormat()) {
1156 errorCode = U_MF_SELECTOR_ERROR;
1157 return;
1158 }
1159
1160 prefsLen = 0;
1161
1162 // Convert to string
1163 const UnicodeString& formattedValue = toFormat.formatToString(locale, errorCode);
1164 if (U_FAILURE(errorCode)) {
1165 return;
1166 }
1167
1168 for (int32_t i = 0; i < keysLen; i++) {
1169 if (keys[i] == formattedValue) {
1170 prefs[0] = keys[i];
1171 prefsLen = 1;
1172 break;
1173 }
1174 }
1175 }
1176
~TextFactory()1177 StandardFunctions::TextFactory::~TextFactory() {}
~TextSelector()1178 StandardFunctions::TextSelector::~TextSelector() {}
1179
1180 } // namespace message2
1181 U_NAMESPACE_END
1182
1183 #endif /* #if !UCONFIG_NO_MF2 */
1184
1185 #endif /* #if !UCONFIG_NO_FORMATTING */
1186
1187