xref: /aosp_15_r20/external/icu/icu4c/source/i18n/messageformat2.cpp (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #if !UCONFIG_NO_FORMATTING
7 
8 #if !UCONFIG_NO_MF2
9 
10 #include "unicode/messageformat2_arguments.h"
11 #include "unicode/messageformat2_data_model.h"
12 #include "unicode/messageformat2_formattable.h"
13 #include "unicode/messageformat2.h"
14 #include "unicode/unistr.h"
15 #include "messageformat2_allocation.h"
16 #include "messageformat2_evaluation.h"
17 #include "messageformat2_macros.h"
18 
19 
20 U_NAMESPACE_BEGIN
21 
22 namespace message2 {
23 
24 using namespace data_model;
25 
26 // ------------------------------------------------------
27 // Formatting
28 
29 // The result of formatting a literal is just itself.
evalLiteral(const Literal & lit)30 static Formattable evalLiteral(const Literal& lit) {
31     return Formattable(lit.unquoted());
32 }
33 
34 // Assumes that `var` is a message argument; returns the argument's value.
evalArgument(const VariableName & var,MessageContext & context,UErrorCode & errorCode) const35 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, UErrorCode& errorCode) const {
36     if (U_SUCCESS(errorCode)) {
37         // The fallback for a variable name is itself.
38         UnicodeString str(DOLLAR);
39         str += var;
40         const Formattable* val = context.getGlobal(var, errorCode);
41         if (U_SUCCESS(errorCode)) {
42             return (FormattedPlaceholder(*val, str));
43         }
44     }
45     return {};
46 }
47 
48 // Returns the contents of the literal
formatLiteral(const Literal & lit) const49 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const {
50     // The fallback for a literal is itself.
51     return FormattedPlaceholder(evalLiteral(lit), lit.quoted());
52 }
53 
formatOperand(const Environment & env,const Operand & rand,MessageContext & context,UErrorCode & status) const54 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env,
55                                                              const Operand& rand,
56                                                              MessageContext& context,
57                                                              UErrorCode &status) const {
58     if (U_FAILURE(status)) {
59         return {};
60     }
61 
62     if (rand.isNull()) {
63         return FormattedPlaceholder();
64     }
65     if (rand.isVariable()) {
66         // Check if it's local or global
67         // Note: there is no name shadowing; this is enforced by the parser
68         const VariableName& var = rand.asVariable();
69         // TODO: Currently, this code implements lazy evaluation of locals.
70         // That is, the environment binds names to a closure, not a resolved value.
71         // Eager vs. lazy evaluation is an open issue:
72         // see https://github.com/unicode-org/message-format-wg/issues/299
73 
74         // Look up the variable in the environment
75         if (env.has(var)) {
76           // `var` is a local -- look it up
77           const Closure& rhs = env.lookup(var);
78           // Format the expression using the environment from the closure
79           return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
80         }
81         // Variable wasn't found in locals -- check if it's global
82         FormattedPlaceholder result = evalArgument(var, context, status);
83         if (status == U_ILLEGAL_ARGUMENT_ERROR) {
84             status = U_ZERO_ERROR;
85             // Unbound variable -- set a resolution error
86             context.getErrors().setUnresolvedVariable(var, status);
87             // Use fallback per
88             // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
89             UnicodeString str(DOLLAR);
90             str += var;
91             return FormattedPlaceholder(str);
92         }
93         return result;
94     } else {
95         U_ASSERT(rand.isLiteral());
96         return formatLiteral(rand.asLiteral());
97     }
98 }
99 
100 // Resolves a function's options
resolveOptions(const Environment & env,const OptionMap & options,MessageContext & context,UErrorCode & status) const101 FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const {
102     LocalPointer<UVector> optionsVector(createUVector(status));
103     if (U_FAILURE(status)) {
104         return {};
105     }
106     LocalPointer<ResolvedFunctionOption> resolvedOpt;
107     for (int i = 0; i < options.size(); i++) {
108         const Option& opt = options.getOption(i, status);
109         if (U_FAILURE(status)) {
110             return {};
111         }
112         const UnicodeString& k = opt.getName();
113         const Operand& v = opt.getValue();
114 
115         // Options are fully evaluated before calling the function
116         // Format the operand
117         FormattedPlaceholder rhsVal = formatOperand(env, v, context, status);
118         if (U_FAILURE(status)) {
119             return {};
120         }
121         if (!rhsVal.isFallback()) {
122             resolvedOpt.adoptInstead(create<ResolvedFunctionOption>(ResolvedFunctionOption(k, rhsVal.asFormattable()), status));
123             if (U_FAILURE(status)) {
124                 return {};
125             }
126             optionsVector->adoptElement(resolvedOpt.orphan(), status);
127         }
128     }
129 
130     return FunctionOptions(std::move(*optionsVector), status);
131 }
132 
133 // Overload that dispatches on argument type. Syntax doesn't provide for options in this case.
evalFormatterCall(FormattedPlaceholder && argument,MessageContext & context,UErrorCode & status) const134 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument,
135                                                                        MessageContext& context,
136                                                                        UErrorCode& status) const {
137     if (U_FAILURE(status)) {
138         return {};
139     }
140 
141     // These cases should have been checked for already
142     U_ASSERT(!argument.isFallback() && !argument.isNullOperand());
143 
144     const Formattable& toFormat = argument.asFormattable();
145     switch (toFormat.getType()) {
146     case UFMT_OBJECT: {
147         const FormattableObject* obj = toFormat.getObject(status);
148         U_ASSERT(U_SUCCESS(status));
149         U_ASSERT(obj != nullptr);
150         const UnicodeString& type = obj->tag();
151         FunctionName functionName;
152         if (!getDefaultFormatterNameByType(type, functionName)) {
153             // No formatter for this type -- follow default behavior
154             break;
155         }
156         return evalFormatterCall(functionName,
157                                  std::move(argument),
158                                  FunctionOptions(),
159                                  context,
160                                  status);
161     }
162     default: {
163         // TODO: The array case isn't handled yet; not sure whether it's desirable
164         // to have a default list formatter
165         break;
166     }
167     }
168     // No formatter for this type, or it's a primitive type (which will be formatted later)
169     // -- just return the argument itself
170     return std::move(argument);
171 }
172 
173 // Overload that dispatches on function name
evalFormatterCall(const FunctionName & functionName,FormattedPlaceholder && argument,FunctionOptions && options,MessageContext & context,UErrorCode & status) const174 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(const FunctionName& functionName,
175                                                                  FormattedPlaceholder&& argument,
176                                                                  FunctionOptions&& options,
177                                                                  MessageContext& context,
178                                                                  UErrorCode& status) const {
179     if (U_FAILURE(status)) {
180         return {};
181     }
182 
183     DynamicErrors& errs = context.getErrors();
184 
185     UnicodeString fallback(COLON);
186     fallback += functionName;
187     if (!argument.isNullOperand()) {
188         fallback = argument.fallback;
189     }
190 
191     if (isFormatter(functionName)) {
192         LocalPointer<Formatter> formatterImpl(getFormatter(functionName, status));
193         if (U_FAILURE(status)) {
194             if (status == U_MF_FORMATTING_ERROR) {
195                 errs.setFormattingError(functionName, status);
196                 status = U_ZERO_ERROR;
197                 return {};
198             }
199             if (status == U_MF_UNKNOWN_FUNCTION_ERROR) {
200                 errs.setUnknownFunction(functionName, status);
201                 status = U_ZERO_ERROR;
202                 return {};
203             }
204             // Other errors are non-recoverable
205             return {};
206         }
207         U_ASSERT(formatterImpl != nullptr);
208 
209         UErrorCode savedStatus = status;
210         FormattedPlaceholder result = formatterImpl->format(std::move(argument), std::move(options), status);
211         // Update errors
212         if (savedStatus != status) {
213             if (U_FAILURE(status)) {
214                 if (status == U_MF_OPERAND_MISMATCH_ERROR) {
215                     status = U_ZERO_ERROR;
216                     errs.setOperandMismatchError(functionName, status);
217                 } else {
218                     status = U_ZERO_ERROR;
219                     // Convey any error generated by the formatter
220                     // as a formatting error, except for operand mismatch errors
221                     errs.setFormattingError(functionName, status);
222                 }
223                 return FormattedPlaceholder(fallback);
224             } else {
225                 // Ignore warnings
226                 status = savedStatus;
227             }
228         }
229         // Ignore the output if any errors occurred
230         if (errs.hasFormattingError()) {
231             return FormattedPlaceholder(fallback);
232         }
233         return result;
234     }
235     // No formatter with this name -- set error
236     if (isSelector(functionName)) {
237         errs.setFormattingError(functionName, status);
238     } else {
239         errs.setUnknownFunction(functionName, status);
240     }
241     return FormattedPlaceholder(fallback);
242 }
243 
244 // Per https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
reservedFallback(const Expression & e)245 static UnicodeString reservedFallback (const Expression& e) {
246     UErrorCode localErrorCode = U_ZERO_ERROR;
247     const Operator* rator = e.getOperator(localErrorCode);
248     U_ASSERT(U_SUCCESS(localErrorCode));
249     const Reserved& r = rator->asReserved();
250 
251     // An empty Reserved isn't representable in the syntax
252     U_ASSERT(r.numParts() > 0);
253 
254     const UnicodeString& contents = r.getPart(0).unquoted();
255     // Parts should never be empty
256     U_ASSERT(contents.length() > 0);
257 
258     // Return first character of string
259     return UnicodeString(contents, 0, 1);
260 }
261 
262 // Formats an expression using `globalEnv` for the values of variables
formatExpression(const Environment & globalEnv,const Expression & expr,MessageContext & context,UErrorCode & status) const263 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatExpression(const Environment& globalEnv,
264                                                                 const Expression& expr,
265                                                                 MessageContext& context,
266                                                                 UErrorCode &status) const {
267     if (U_FAILURE(status)) {
268         return {};
269     }
270 
271     // Formatting error
272     if (expr.isReserved()) {
273         context.getErrors().setReservedError(status);
274         return FormattedPlaceholder(reservedFallback(expr));
275     }
276 
277     const Operand& rand = expr.getOperand();
278     // Format the operand (formatOperand handles the case of a null operand)
279     FormattedPlaceholder randVal = formatOperand(globalEnv, rand, context, status);
280 
281     // Don't call the function on error values
282     if (randVal.isFallback()) {
283         return randVal;
284     }
285 
286     if (!expr.isFunctionCall()) {
287         // Dispatch based on type of `randVal`
288         return evalFormatterCall(std::move(randVal),
289                                  context,
290                                  status);
291     } else {
292         const Operator* rator = expr.getOperator(status);
293         U_ASSERT(U_SUCCESS(status));
294         const FunctionName& functionName = rator->getFunctionName();
295         const OptionMap& options = rator->getOptionsInternal();
296         // Resolve the options
297         FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status);
298 
299         // Call the formatter function
300         // The fallback for a nullary function call is the function name
301         UnicodeString fallback;
302         if (rand.isNull()) {
303             fallback = UnicodeString(COLON);
304             fallback += functionName;
305         } else {
306             fallback = randVal.fallback;
307         }
308         return evalFormatterCall(functionName,
309                                  std::move(randVal),
310                                  std::move(resolvedOptions),
311                                  context,
312                                  status);
313     }
314 }
315 
316 // Formats each text and expression part of a pattern, appending the results to `result`
formatPattern(MessageContext & context,const Environment & globalEnv,const Pattern & pat,UErrorCode & status,UnicodeString & result) const317 void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const {
318     CHECK_ERROR(status);
319 
320     for (int32_t i = 0; i < pat.numParts(); i++) {
321         const PatternPart& part = pat.getPart(i);
322         if (part.isText()) {
323             result += part.asText();
324         } else if (part.isMarkup()) {
325             // Markup is ignored
326         } else {
327 	      // Format the expression
328 	      FormattedPlaceholder partVal = formatExpression(globalEnv, part.contents(), context, status);
329 	      // Force full evaluation, e.g. applying default formatters to
330 	      // unformatted input (or formatting numbers as strings)
331               UnicodeString partResult = partVal.formatToString(locale, status);
332               result += partResult;
333               // Handle formatting errors. `formatToString()` can't take a context and thus can't
334               // register an error directly
335               if (status == U_MF_FORMATTING_ERROR) {
336                   status = U_ZERO_ERROR;
337                   // TODO: The name of the formatter that failed is unavailable.
338                   // Not ideal, but it's hard for `formatToString()`
339                   // to pass along more detailed diagnostics
340                   context.getErrors().setFormattingError(status);
341               }
342         }
343     }
344 }
345 
346 // ------------------------------------------------------
347 // Selection
348 
349 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors
350 // `res` is a vector of ResolvedSelectors
resolveSelectors(MessageContext & context,const Environment & env,UErrorCode & status,UVector & res) const351 void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const {
352     CHECK_ERROR(status);
353     U_ASSERT(!dataModel.hasPattern());
354 
355     const Expression* selectors = dataModel.getSelectorsInternal();
356     // 1. Let res be a new empty list of resolved values that support selection.
357     // (Implicit, since `res` is an out-parameter)
358     // 2. For each expression exp of the message's selectors
359     for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
360         // 2i. Let rv be the resolved value of exp.
361         ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status);
362         if (rv.hasSelector()) {
363             // 2ii. If selection is supported for rv:
364             // (True if this code has been reached)
365         } else {
366             // 2iii. Else:
367             // Let nomatch be a resolved value for which selection always fails.
368             // Append nomatch as the last element of the list res.
369             // Emit a Selection Error.
370             // (Note: in this case, rv, being a fallback, serves as `nomatch`)
371             #if U_DEBUG
372             const DynamicErrors& err = context.getErrors();
373             U_ASSERT(err.hasError());
374             U_ASSERT(rv.argument().isFallback());
375             #endif
376         }
377         // 2ii(a). Append rv as the last element of the list res.
378         // (Also fulfills 2iii)
379         LocalPointer<ResolvedSelector> v(create<ResolvedSelector>(std::move(rv), status));
380         CHECK_ERROR(status);
381         res.adoptElement(v.orphan(), status);
382     }
383 }
384 
385 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
386 // `keys` and `matches` are vectors of strings
matchSelectorKeys(const UVector & keys,MessageContext & context,ResolvedSelector && rv,UVector & keysOut,UErrorCode & status) const387 void MessageFormatter::matchSelectorKeys(const UVector& keys,
388                                          MessageContext& context,
389 					 ResolvedSelector&& rv,
390 					 UVector& keysOut,
391 					 UErrorCode& status) const {
392     CHECK_ERROR(status);
393 
394     if (!rv.hasSelector()) {
395         // Return an empty list of matches
396         return;
397     }
398 
399     auto selectorImpl = rv.getSelector();
400     U_ASSERT(selectorImpl != nullptr);
401     UErrorCode savedStatus = status;
402 
403     // Convert `keys` to an array
404     int32_t keysLen = keys.size();
405     UnicodeString* keysArr = new UnicodeString[keysLen];
406     if (keysArr == nullptr) {
407         status = U_MEMORY_ALLOCATION_ERROR;
408         return;
409     }
410     for (int32_t i = 0; i < keysLen; i++) {
411         const UnicodeString* k = static_cast<UnicodeString*>(keys[i]);
412         U_ASSERT(k != nullptr);
413         keysArr[i] = *k;
414     }
415     LocalArray<UnicodeString> adoptedKeys(keysArr);
416 
417     // Create an array to hold the output
418     UnicodeString* prefsArr = new UnicodeString[keysLen];
419     if (prefsArr == nullptr) {
420         status = U_MEMORY_ALLOCATION_ERROR;
421         return;
422     }
423     LocalArray<UnicodeString> adoptedPrefs(prefsArr);
424     int32_t prefsLen = 0;
425 
426     // Call the selector
427     selectorImpl->selectKey(rv.takeArgument(), rv.takeOptions(),
428                             adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen,
429                             status);
430 
431     // Update errors
432     if (savedStatus != status) {
433         if (U_FAILURE(status)) {
434             status = U_ZERO_ERROR;
435             context.getErrors().setSelectorError(rv.getSelectorName(), status);
436         } else {
437             // Ignore warnings
438             status = savedStatus;
439         }
440     }
441 
442     CHECK_ERROR(status);
443 
444     // Copy the resulting keys (if there was no error)
445     keysOut.removeAllElements();
446     for (int32_t i = 0; i < prefsLen; i++) {
447         UnicodeString* k = message2::create<UnicodeString>(std::move(prefsArr[i]), status);
448         if (k == nullptr) {
449             status = U_MEMORY_ALLOCATION_ERROR;
450             return;
451         }
452         keysOut.adoptElement(k, status);
453         CHECK_ERROR(status);
454     }
455 }
456 
457 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
458 // `res` is a vector of FormattedPlaceholders;
459 // `pref` is a vector of vectors of strings
resolvePreferences(MessageContext & context,UVector & res,UVector & pref,UErrorCode & status) const460 void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const {
461     CHECK_ERROR(status);
462 
463     // 1. Let pref be a new empty list of lists of strings.
464     // (Implicit, since `pref` is an out-parameter)
465     UnicodeString ks;
466     LocalPointer<UnicodeString> ksP;
467     int32_t numVariants = dataModel.numVariants();
468     const Variant* variants = dataModel.getVariantsInternal();
469     // 2. For each index i in res
470     for (int32_t i = 0; i < (int32_t) res.size(); i++) {
471         // 2i. Let keys be a new empty list of strings.
472         LocalPointer<UVector> keys(createUVector(status));
473         CHECK_ERROR(status);
474         // 2ii. For each variant `var` of the message
475         for (int32_t variantNum = 0; variantNum < numVariants; variantNum++) {
476             const SelectorKeys& selectorKeys = variants[variantNum].getKeys();
477 
478             // Note: Here, `var` names the key list of `var`,
479             // not a Variant itself
480             const Key* var = selectorKeys.getKeysInternal();
481             // 2ii(a). Let `key` be the `var` key at position i.
482             U_ASSERT(i < selectorKeys.len); // established by semantic check in formatSelectors()
483             const Key& key = var[i];
484             // 2ii(b). If `key` is not the catch-all key '*'
485             if (!key.isWildcard()) {
486                 // 2ii(b)(a) Assert that key is a literal.
487                 // (Not needed)
488                 // 2ii(b)(b) Let `ks` be the resolved value of `key`.
489                 ks = key.asLiteral().unquoted();
490                 // 2ii(b)(c) Append `ks` as the last element of the list `keys`.
491                 ksP.adoptInstead(create<UnicodeString>(std::move(ks), status));
492                 CHECK_ERROR(status);
493                 keys->adoptElement(ksP.orphan(), status);
494             }
495         }
496         // 2iii. Let `rv` be the resolved value at index `i` of `res`.
497         U_ASSERT(i < res.size());
498         ResolvedSelector rv = std::move(*(static_cast<ResolvedSelector*>(res[i])));
499         // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys)
500         LocalPointer<UVector> matches(createUVector(status));
501         matchSelectorKeys(*keys, context, std::move(rv), *matches, status);
502         // 2v. Append `matches` as the last element of the list `pref`
503         pref.adoptElement(matches.orphan(), status);
504     }
505 }
506 
507 // `v` is assumed to be a vector of strings
vectorFind(const UVector & v,const UnicodeString & k)508 static int32_t vectorFind(const UVector& v, const UnicodeString& k) {
509     for (int32_t i = 0; i < v.size(); i++) {
510         if (*static_cast<UnicodeString*>(v[i]) == k) {
511             return i;
512         }
513     }
514     return -1;
515 }
516 
vectorContains(const UVector & v,const UnicodeString & k)517 static UBool vectorContains(const UVector& v, const UnicodeString& k) {
518     return (vectorFind(v, k) != -1);
519 }
520 
521 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#filter-variants
522 // `pref` is a vector of vectors of strings. `vars` is a vector of PrioritizedVariants
filterVariants(const UVector & pref,UVector & vars,UErrorCode & status) const523 void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
524     const Variant* variants = dataModel.getVariantsInternal();
525 
526     // 1. Let `vars` be a new empty list of variants.
527     // (Not needed since `vars` is an out-parameter)
528     // 2. For each variant `var` of the message:
529     for (int32_t j = 0; j < dataModel.numVariants(); j++) {
530         const SelectorKeys& selectorKeys = variants[j].getKeys();
531         const Pattern& p = variants[j].getPattern();
532 
533         // Note: Here, `var` names the key list of `var`,
534         // not a Variant itself
535         const Key* var = selectorKeys.getKeysInternal();
536         // 2i. For each index `i` in `pref`:
537         bool noMatch = false;
538         for (int32_t i = 0; i < (int32_t) pref.size(); i++) {
539             // 2i(a). Let `key` be the `var` key at position `i`.
540             U_ASSERT(i < selectorKeys.len);
541             const Key& key = var[i];
542             // 2i(b). If key is the catch-all key '*':
543             if (key.isWildcard()) {
544                 // 2i(b)(a). Continue the inner loop on pref.
545                 continue;
546             }
547             // 2i(c). Assert that `key` is a literal.
548             // (Not needed)
549             // 2i(d). Let `ks` be the resolved value of `key`.
550             UnicodeString ks = key.asLiteral().unquoted();
551             // 2i(e). Let `matches` be the list of strings at index `i` of `pref`.
552             const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
553             // 2i(f). If `matches` includes `ks`
554             if (vectorContains(matches, ks)) {
555                 // 2i(f)(a). Continue the inner loop on `pref`.
556                 continue;
557             }
558             // 2i(g). Else:
559             // 2i(g)(a). Continue the outer loop on message variants.
560             noMatch = true;
561             break;
562         }
563         if (!noMatch) {
564             // Append `var` as the last element of the list `vars`.
565 	    PrioritizedVariant* tuple = create<PrioritizedVariant>(PrioritizedVariant(-1, selectorKeys, p), status);
566             CHECK_ERROR(status);
567             vars.adoptElement(tuple, status);
568         }
569     }
570 }
571 
572 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#sort-variants
573 // Leaves the preferred variant as element 0 in `sortable`
574 // Note: this sorts in-place, so `sortable` is just `vars`
575 // `pref` is a vector of vectors of strings; `vars` is a vector of PrioritizedVariants
sortVariants(const UVector & pref,UVector & vars,UErrorCode & status) const576 void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
577     CHECK_ERROR(status);
578 
579 // Note: steps 1 and 2 are omitted since we use `vars` as `sortable` (we sort in-place)
580     // 1. Let `sortable` be a new empty list of (integer, variant) tuples.
581     // (Not needed since `sortable` is an out-parameter)
582     // 2. For each variant `var` of `vars`
583     // 2i. Let tuple be a new tuple (-1, var).
584     // 2ii. Append `tuple` as the last element of the list `sortable`.
585 
586     // 3. Let `len` be the integer count of items in `pref`.
587     int32_t len = pref.size();
588     // 4. Let `i` be `len` - 1.
589     int32_t i = len - 1;
590     // 5. While i >= 0:
591     while (i >= 0) {
592         // 5i. Let `matches` be the list of strings at index `i` of `pref`.
593         U_ASSERT(pref[i] != nullptr);
594 	const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
595         // 5ii. Let `minpref` be the integer count of items in `matches`.
596         int32_t minpref = matches.size();
597         // 5iii. For each tuple `tuple` of `sortable`:
598         for (int32_t j = 0; j < vars.size(); j++) {
599             U_ASSERT(vars[j] != nullptr);
600             PrioritizedVariant& tuple = *(static_cast<PrioritizedVariant*>(vars[j]));
601             // 5iii(a). Let matchpref be an integer with the value minpref.
602             int32_t matchpref = minpref;
603             // 5iii(b). Let `key` be the tuple variant key at position `i`.
604             const Key* tupleVariantKeys = tuple.keys.getKeysInternal();
605             U_ASSERT(i < tuple.keys.len); // Given by earlier semantic checking
606             const Key& key = tupleVariantKeys[i];
607             // 5iii(c) If `key` is not the catch-all key '*':
608             if (!key.isWildcard()) {
609                 // 5iii(c)(a). Assert that `key` is a literal.
610                 // (Not needed)
611                 // 5iii(c)(b). Let `ks` be the resolved value of `key`.
612                 UnicodeString ks = key.asLiteral().unquoted();
613                 // 5iii(c)(c) Let matchpref be the integer position of ks in `matches`.
614                 matchpref = vectorFind(matches, ks);
615                 U_ASSERT(matchpref >= 0);
616             }
617             // 5iii(d) Set the `tuple` integer value as matchpref.
618             tuple.priority = matchpref;
619         }
620         // 5iv. Set `sortable` to be the result of calling the method SortVariants(`sortable`)
621         vars.sort(comparePrioritizedVariants, status);
622         CHECK_ERROR(status);
623         // 5v. Set `i` to be `i` - 1.
624         i--;
625     }
626     // The caller is responsible for steps 6 and 7
627     // 6. Let `var` be the `variant` element of the first element of `sortable`.
628     // 7. Select the pattern of `var`
629 }
630 
631 
632 // Evaluate the operand
resolveVariables(const Environment & env,const Operand & rand,MessageContext & context,UErrorCode & status) const633 ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const {
634     if (U_FAILURE(status)) {
635         return {};
636     }
637 
638     if (rand.isNull()) {
639         return ResolvedSelector(FormattedPlaceholder());
640     }
641 
642     if (rand.isLiteral()) {
643         return ResolvedSelector(formatLiteral(rand.asLiteral()));
644     }
645 
646     // Must be variable
647     const VariableName& var = rand.asVariable();
648     // Resolve the variable
649     if (env.has(var)) {
650         const Closure& referent = env.lookup(var);
651         // Resolve the referent
652         return resolveVariables(referent.getEnv(), referent.getExpr(), context, status);
653     }
654     // Either this is a global var or an unbound var --
655     // either way, it can't be bound to a function call.
656     // Check globals
657     FormattedPlaceholder val = evalArgument(var, context, status);
658     if (status == U_ILLEGAL_ARGUMENT_ERROR) {
659         status = U_ZERO_ERROR;
660         // Unresolved variable -- could be a previous warning. Nothing to resolve
661         U_ASSERT(context.getErrors().hasUnresolvedVariableError());
662         return ResolvedSelector(FormattedPlaceholder(var));
663     }
664     // Pass through other errors
665     return ResolvedSelector(std::move(val));
666 }
667 
668 // Evaluate the expression except for not performing the top-level function call
669 // (which is expected to be a selector, but may not be, in error cases)
resolveVariables(const Environment & env,const Expression & expr,MessageContext & context,UErrorCode & status) const670 ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
671                                                     const Expression& expr,
672                                                     MessageContext& context,
673                                                     UErrorCode &status) const {
674     if (U_FAILURE(status)) {
675         return {};
676     }
677 
678     // A `reserved` is an error
679     if (expr.isReserved()) {
680         context.getErrors().setReservedError(status);
681         return ResolvedSelector(FormattedPlaceholder(reservedFallback(expr)));
682     }
683 
684     // Function call -- resolve the operand and options
685     if (expr.isFunctionCall()) {
686         const Operator* rator = expr.getOperator(status);
687         U_ASSERT(U_SUCCESS(status));
688         // Already checked that rator is non-reserved
689         const FunctionName& selectorName = rator->getFunctionName();
690         if (isSelector(selectorName)) {
691             auto selector = getSelector(context, selectorName, status);
692             if (U_SUCCESS(status)) {
693                 FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status);
694                 // Operand may be the null argument, but resolveVariables() handles that
695                 FormattedPlaceholder argument = formatOperand(env, expr.getOperand(), context, status);
696                 return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), std::move(argument));
697             }
698         } else if (isFormatter(selectorName)) {
699             context.getErrors().setSelectorError(selectorName, status);
700         } else {
701             context.getErrors().setUnknownFunction(selectorName, status);
702         }
703         // Non-selector used as selector; an error would have been recorded earlier
704         UnicodeString fallback(COLON);
705         fallback += selectorName;
706         if (!expr.getOperand().isNull()) {
707             fallback = formatOperand(env, expr.getOperand(), context, status).fallback;
708         }
709         return ResolvedSelector(FormattedPlaceholder(fallback));
710     } else {
711         // Might be a variable reference, so expand one more level of variable
712         return resolveVariables(env, expr.getOperand(), context, status);
713     }
714 }
715 
formatSelectorExpression(const Environment & globalEnv,const Expression & expr,MessageContext & context,UErrorCode & status) const716 ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const {
717     if (U_FAILURE(status)) {
718         return {};
719     }
720 
721     // Resolve expression to determine if it's a function call
722     ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status);
723 
724     DynamicErrors& err = context.getErrors();
725 
726     // If there is a selector, then `resolveVariables()` recorded it in the context
727     if (exprResult.hasSelector()) {
728         // Check if there was an error
729         if (exprResult.argument().isFallback()) {
730             // Use a null expression if it's a syntax or data model warning;
731             // create a valid (non-fallback) formatted placeholder from the
732             // fallback string otherwise
733             if (err.hasSyntaxError() || err.hasDataModelError()) {
734                 return ResolvedSelector(FormattedPlaceholder()); // Null operand
735             } else {
736                 return ResolvedSelector(exprResult.takeArgument());
737             }
738         }
739         return exprResult;
740     }
741 
742     // No selector was found; error should already have been set
743     U_ASSERT(err.hasMissingSelectorAnnotationError() || err.hasUnknownFunctionError() || err.hasSelectorError());
744     return ResolvedSelector(FormattedPlaceholder(exprResult.argument().fallback));
745 }
746 
formatSelectors(MessageContext & context,const Environment & env,UErrorCode & status,UnicodeString & result) const747 void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const {
748     CHECK_ERROR(status);
749 
750     // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
751 
752     // Resolve Selectors
753     // res is a vector of FormattedPlaceholders
754     LocalPointer<UVector> res(createUVector(status));
755     CHECK_ERROR(status);
756     resolveSelectors(context, env, status, *res);
757 
758     // Resolve Preferences
759     // pref is a vector of vectors of strings
760     LocalPointer<UVector> pref(createUVector(status));
761     CHECK_ERROR(status);
762     resolvePreferences(context, *res, *pref, status);
763 
764     // Filter Variants
765     // vars is a vector of PrioritizedVariants
766     LocalPointer<UVector> vars(createUVector(status));
767     CHECK_ERROR(status);
768     filterVariants(*pref, *vars, status);
769 
770     // Sort Variants and select the final pattern
771     // Note: `sortable` in the spec is just `vars` here,
772     // which is sorted in-place
773     sortVariants(*pref, *vars, status);
774 
775     CHECK_ERROR(status);
776 
777     // 6. Let `var` be the `variant` element of the first element of `sortable`.
778     U_ASSERT(vars->size() > 0); // This should have been checked earlier (having 0 variants would be a data model error)
779     const PrioritizedVariant& var = *(static_cast<PrioritizedVariant*>(vars->elementAt(0)));
780     // 7. Select the pattern of `var`
781     const Pattern& pat = var.pat;
782 
783     // Format the pattern
784     formatPattern(context, env, pat, status, result);
785 }
786 
787 // Note: this is non-const due to the function registry being non-const, which is in turn
788 // due to the values (`FormatterFactory` objects in the map) having mutable state.
789 // In other words, formatting a message can mutate the underlying `MessageFormatter` by changing
790 // state within the factory objects that represent custom formatters.
formatToString(const MessageArguments & arguments,UErrorCode & status)791 UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) {
792     EMPTY_ON_ERROR(status);
793 
794     // Create a new environment that will store closures for all local variables
795     Environment* env = Environment::create(status);
796     // Create a new context with the given arguments and the `errors` structure
797     MessageContext context(arguments, *errors, status);
798 
799     // Check for unresolved variable errors
800     checkDeclarations(context, env, status);
801     LocalPointer<Environment> globalEnv(env);
802 
803     UnicodeString result;
804     if (dataModel.hasPattern()) {
805         formatPattern(context, *globalEnv, dataModel.getPattern(), status, result);
806     } else {
807         // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value
808         // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
809         const DynamicErrors& err = context.getErrors();
810         if (err.hasSyntaxError() || err.hasDataModelError()) {
811             result += REPLACEMENT;
812         } else {
813             formatSelectors(context, *globalEnv, status, result);
814         }
815     }
816     // Update status according to all errors seen while formatting
817     context.checkErrors(status);
818     return result;
819 }
820 
821 // ----------------------------------------
822 // Checking for resolution errors
823 
check(MessageContext & context,const Environment & localEnv,const OptionMap & options,UErrorCode & status) const824 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const OptionMap& options, UErrorCode& status) const {
825     // Check the RHS of each option
826     for (int32_t i = 0; i < options.size(); i++) {
827         const Option& opt = options.getOption(i, status);
828         CHECK_ERROR(status);
829         check(context, localEnv, opt.getValue(), status);
830     }
831 }
832 
check(MessageContext & context,const Environment & localEnv,const Operand & rand,UErrorCode & status) const833 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Operand& rand, UErrorCode& status) const {
834     // Nothing to check for literals
835     if (rand.isLiteral() || rand.isNull()) {
836         return;
837     }
838 
839     // Check that variable is in scope
840     const VariableName& var = rand.asVariable();
841     // Check local scope
842     if (localEnv.has(var)) {
843         return;
844     }
845     // Check global scope
846     context.getGlobal(var, status);
847     if (status == U_ILLEGAL_ARGUMENT_ERROR) {
848         status = U_ZERO_ERROR;
849         context.getErrors().setUnresolvedVariable(var, status);
850     }
851     // Either `var` is a global, or some other error occurred.
852     // Nothing more to do either way
853     return;
854 }
855 
check(MessageContext & context,const Environment & localEnv,const Expression & expr,UErrorCode & status) const856 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Expression& expr, UErrorCode& status) const {
857     // Check for unresolved variable errors
858     if (expr.isFunctionCall()) {
859         const Operator* rator = expr.getOperator(status);
860         U_ASSERT(U_SUCCESS(status));
861         const Operand& rand = expr.getOperand();
862         check(context, localEnv, rand, status);
863         check(context, localEnv, rator->getOptionsInternal(), status);
864     }
865 }
866 
867 // Check for resolution errors
checkDeclarations(MessageContext & context,Environment * & env,UErrorCode & status) const868 void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& env, UErrorCode &status) const {
869     CHECK_ERROR(status);
870 
871     const Binding* decls = getDataModel().getLocalVariablesInternal();
872     U_ASSERT(env != nullptr && decls != nullptr);
873 
874     for (int32_t i = 0; i < getDataModel().bindingsLen; i++) {
875         const Binding& decl = decls[i];
876         const Expression& rhs = decl.getValue();
877         check(context, *env, rhs, status);
878 
879         // Add a closure to the global environment,
880         // memoizing the value of localEnv up to this point
881 
882         // Add the LHS to the environment for checking the next declaration
883         env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status);
884         CHECK_ERROR(status);
885     }
886 }
887 } // namespace message2
888 
889 U_NAMESPACE_END
890 
891 #endif /* #if !UCONFIG_NO_MF2 */
892 
893 #endif /* #if !UCONFIG_NO_FORMATTING */
894