1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include "unicode/utypes.h"
5
6 #if !UCONFIG_NO_FORMATTING
7
8 #if !UCONFIG_NO_MF2
9
10 #include "unicode/messageformat2_arguments.h"
11 #include "unicode/messageformat2_data_model.h"
12 #include "unicode/messageformat2_formattable.h"
13 #include "unicode/messageformat2.h"
14 #include "unicode/unistr.h"
15 #include "messageformat2_allocation.h"
16 #include "messageformat2_evaluation.h"
17 #include "messageformat2_macros.h"
18
19
20 U_NAMESPACE_BEGIN
21
22 namespace message2 {
23
24 using namespace data_model;
25
26 // ------------------------------------------------------
27 // Formatting
28
29 // The result of formatting a literal is just itself.
evalLiteral(const Literal & lit)30 static Formattable evalLiteral(const Literal& lit) {
31 return Formattable(lit.unquoted());
32 }
33
34 // Assumes that `var` is a message argument; returns the argument's value.
evalArgument(const VariableName & var,MessageContext & context,UErrorCode & errorCode) const35 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalArgument(const VariableName& var, MessageContext& context, UErrorCode& errorCode) const {
36 if (U_SUCCESS(errorCode)) {
37 // The fallback for a variable name is itself.
38 UnicodeString str(DOLLAR);
39 str += var;
40 const Formattable* val = context.getGlobal(var, errorCode);
41 if (U_SUCCESS(errorCode)) {
42 return (FormattedPlaceholder(*val, str));
43 }
44 }
45 return {};
46 }
47
48 // Returns the contents of the literal
formatLiteral(const Literal & lit) const49 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatLiteral(const Literal& lit) const {
50 // The fallback for a literal is itself.
51 return FormattedPlaceholder(evalLiteral(lit), lit.quoted());
52 }
53
formatOperand(const Environment & env,const Operand & rand,MessageContext & context,UErrorCode & status) const54 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatOperand(const Environment& env,
55 const Operand& rand,
56 MessageContext& context,
57 UErrorCode &status) const {
58 if (U_FAILURE(status)) {
59 return {};
60 }
61
62 if (rand.isNull()) {
63 return FormattedPlaceholder();
64 }
65 if (rand.isVariable()) {
66 // Check if it's local or global
67 // Note: there is no name shadowing; this is enforced by the parser
68 const VariableName& var = rand.asVariable();
69 // TODO: Currently, this code implements lazy evaluation of locals.
70 // That is, the environment binds names to a closure, not a resolved value.
71 // Eager vs. lazy evaluation is an open issue:
72 // see https://github.com/unicode-org/message-format-wg/issues/299
73
74 // Look up the variable in the environment
75 if (env.has(var)) {
76 // `var` is a local -- look it up
77 const Closure& rhs = env.lookup(var);
78 // Format the expression using the environment from the closure
79 return formatExpression(rhs.getEnv(), rhs.getExpr(), context, status);
80 }
81 // Variable wasn't found in locals -- check if it's global
82 FormattedPlaceholder result = evalArgument(var, context, status);
83 if (status == U_ILLEGAL_ARGUMENT_ERROR) {
84 status = U_ZERO_ERROR;
85 // Unbound variable -- set a resolution error
86 context.getErrors().setUnresolvedVariable(var, status);
87 // Use fallback per
88 // https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
89 UnicodeString str(DOLLAR);
90 str += var;
91 return FormattedPlaceholder(str);
92 }
93 return result;
94 } else {
95 U_ASSERT(rand.isLiteral());
96 return formatLiteral(rand.asLiteral());
97 }
98 }
99
100 // Resolves a function's options
resolveOptions(const Environment & env,const OptionMap & options,MessageContext & context,UErrorCode & status) const101 FunctionOptions MessageFormatter::resolveOptions(const Environment& env, const OptionMap& options, MessageContext& context, UErrorCode& status) const {
102 LocalPointer<UVector> optionsVector(createUVector(status));
103 if (U_FAILURE(status)) {
104 return {};
105 }
106 LocalPointer<ResolvedFunctionOption> resolvedOpt;
107 for (int i = 0; i < options.size(); i++) {
108 const Option& opt = options.getOption(i, status);
109 if (U_FAILURE(status)) {
110 return {};
111 }
112 const UnicodeString& k = opt.getName();
113 const Operand& v = opt.getValue();
114
115 // Options are fully evaluated before calling the function
116 // Format the operand
117 FormattedPlaceholder rhsVal = formatOperand(env, v, context, status);
118 if (U_FAILURE(status)) {
119 return {};
120 }
121 if (!rhsVal.isFallback()) {
122 resolvedOpt.adoptInstead(create<ResolvedFunctionOption>(ResolvedFunctionOption(k, rhsVal.asFormattable()), status));
123 if (U_FAILURE(status)) {
124 return {};
125 }
126 optionsVector->adoptElement(resolvedOpt.orphan(), status);
127 }
128 }
129
130 return FunctionOptions(std::move(*optionsVector), status);
131 }
132
133 // Overload that dispatches on argument type. Syntax doesn't provide for options in this case.
evalFormatterCall(FormattedPlaceholder && argument,MessageContext & context,UErrorCode & status) const134 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(FormattedPlaceholder&& argument,
135 MessageContext& context,
136 UErrorCode& status) const {
137 if (U_FAILURE(status)) {
138 return {};
139 }
140
141 // These cases should have been checked for already
142 U_ASSERT(!argument.isFallback() && !argument.isNullOperand());
143
144 const Formattable& toFormat = argument.asFormattable();
145 switch (toFormat.getType()) {
146 case UFMT_OBJECT: {
147 const FormattableObject* obj = toFormat.getObject(status);
148 U_ASSERT(U_SUCCESS(status));
149 U_ASSERT(obj != nullptr);
150 const UnicodeString& type = obj->tag();
151 FunctionName functionName;
152 if (!getDefaultFormatterNameByType(type, functionName)) {
153 // No formatter for this type -- follow default behavior
154 break;
155 }
156 return evalFormatterCall(functionName,
157 std::move(argument),
158 FunctionOptions(),
159 context,
160 status);
161 }
162 default: {
163 // TODO: The array case isn't handled yet; not sure whether it's desirable
164 // to have a default list formatter
165 break;
166 }
167 }
168 // No formatter for this type, or it's a primitive type (which will be formatted later)
169 // -- just return the argument itself
170 return std::move(argument);
171 }
172
173 // Overload that dispatches on function name
evalFormatterCall(const FunctionName & functionName,FormattedPlaceholder && argument,FunctionOptions && options,MessageContext & context,UErrorCode & status) const174 [[nodiscard]] FormattedPlaceholder MessageFormatter::evalFormatterCall(const FunctionName& functionName,
175 FormattedPlaceholder&& argument,
176 FunctionOptions&& options,
177 MessageContext& context,
178 UErrorCode& status) const {
179 if (U_FAILURE(status)) {
180 return {};
181 }
182
183 DynamicErrors& errs = context.getErrors();
184
185 UnicodeString fallback(COLON);
186 fallback += functionName;
187 if (!argument.isNullOperand()) {
188 fallback = argument.fallback;
189 }
190
191 if (isFormatter(functionName)) {
192 LocalPointer<Formatter> formatterImpl(getFormatter(functionName, status));
193 if (U_FAILURE(status)) {
194 if (status == U_MF_FORMATTING_ERROR) {
195 errs.setFormattingError(functionName, status);
196 status = U_ZERO_ERROR;
197 return {};
198 }
199 if (status == U_MF_UNKNOWN_FUNCTION_ERROR) {
200 errs.setUnknownFunction(functionName, status);
201 status = U_ZERO_ERROR;
202 return {};
203 }
204 // Other errors are non-recoverable
205 return {};
206 }
207 U_ASSERT(formatterImpl != nullptr);
208
209 UErrorCode savedStatus = status;
210 FormattedPlaceholder result = formatterImpl->format(std::move(argument), std::move(options), status);
211 // Update errors
212 if (savedStatus != status) {
213 if (U_FAILURE(status)) {
214 if (status == U_MF_OPERAND_MISMATCH_ERROR) {
215 status = U_ZERO_ERROR;
216 errs.setOperandMismatchError(functionName, status);
217 } else {
218 status = U_ZERO_ERROR;
219 // Convey any error generated by the formatter
220 // as a formatting error, except for operand mismatch errors
221 errs.setFormattingError(functionName, status);
222 }
223 return FormattedPlaceholder(fallback);
224 } else {
225 // Ignore warnings
226 status = savedStatus;
227 }
228 }
229 // Ignore the output if any errors occurred
230 if (errs.hasFormattingError()) {
231 return FormattedPlaceholder(fallback);
232 }
233 return result;
234 }
235 // No formatter with this name -- set error
236 if (isSelector(functionName)) {
237 errs.setFormattingError(functionName, status);
238 } else {
239 errs.setUnknownFunction(functionName, status);
240 }
241 return FormattedPlaceholder(fallback);
242 }
243
244 // Per https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#fallback-resolution
reservedFallback(const Expression & e)245 static UnicodeString reservedFallback (const Expression& e) {
246 UErrorCode localErrorCode = U_ZERO_ERROR;
247 const Operator* rator = e.getOperator(localErrorCode);
248 U_ASSERT(U_SUCCESS(localErrorCode));
249 const Reserved& r = rator->asReserved();
250
251 // An empty Reserved isn't representable in the syntax
252 U_ASSERT(r.numParts() > 0);
253
254 const UnicodeString& contents = r.getPart(0).unquoted();
255 // Parts should never be empty
256 U_ASSERT(contents.length() > 0);
257
258 // Return first character of string
259 return UnicodeString(contents, 0, 1);
260 }
261
262 // Formats an expression using `globalEnv` for the values of variables
formatExpression(const Environment & globalEnv,const Expression & expr,MessageContext & context,UErrorCode & status) const263 [[nodiscard]] FormattedPlaceholder MessageFormatter::formatExpression(const Environment& globalEnv,
264 const Expression& expr,
265 MessageContext& context,
266 UErrorCode &status) const {
267 if (U_FAILURE(status)) {
268 return {};
269 }
270
271 // Formatting error
272 if (expr.isReserved()) {
273 context.getErrors().setReservedError(status);
274 return FormattedPlaceholder(reservedFallback(expr));
275 }
276
277 const Operand& rand = expr.getOperand();
278 // Format the operand (formatOperand handles the case of a null operand)
279 FormattedPlaceholder randVal = formatOperand(globalEnv, rand, context, status);
280
281 // Don't call the function on error values
282 if (randVal.isFallback()) {
283 return randVal;
284 }
285
286 if (!expr.isFunctionCall()) {
287 // Dispatch based on type of `randVal`
288 return evalFormatterCall(std::move(randVal),
289 context,
290 status);
291 } else {
292 const Operator* rator = expr.getOperator(status);
293 U_ASSERT(U_SUCCESS(status));
294 const FunctionName& functionName = rator->getFunctionName();
295 const OptionMap& options = rator->getOptionsInternal();
296 // Resolve the options
297 FunctionOptions resolvedOptions = resolveOptions(globalEnv, options, context, status);
298
299 // Call the formatter function
300 // The fallback for a nullary function call is the function name
301 UnicodeString fallback;
302 if (rand.isNull()) {
303 fallback = UnicodeString(COLON);
304 fallback += functionName;
305 } else {
306 fallback = randVal.fallback;
307 }
308 return evalFormatterCall(functionName,
309 std::move(randVal),
310 std::move(resolvedOptions),
311 context,
312 status);
313 }
314 }
315
316 // Formats each text and expression part of a pattern, appending the results to `result`
formatPattern(MessageContext & context,const Environment & globalEnv,const Pattern & pat,UErrorCode & status,UnicodeString & result) const317 void MessageFormatter::formatPattern(MessageContext& context, const Environment& globalEnv, const Pattern& pat, UErrorCode &status, UnicodeString& result) const {
318 CHECK_ERROR(status);
319
320 for (int32_t i = 0; i < pat.numParts(); i++) {
321 const PatternPart& part = pat.getPart(i);
322 if (part.isText()) {
323 result += part.asText();
324 } else if (part.isMarkup()) {
325 // Markup is ignored
326 } else {
327 // Format the expression
328 FormattedPlaceholder partVal = formatExpression(globalEnv, part.contents(), context, status);
329 // Force full evaluation, e.g. applying default formatters to
330 // unformatted input (or formatting numbers as strings)
331 UnicodeString partResult = partVal.formatToString(locale, status);
332 result += partResult;
333 // Handle formatting errors. `formatToString()` can't take a context and thus can't
334 // register an error directly
335 if (status == U_MF_FORMATTING_ERROR) {
336 status = U_ZERO_ERROR;
337 // TODO: The name of the formatter that failed is unavailable.
338 // Not ideal, but it's hard for `formatToString()`
339 // to pass along more detailed diagnostics
340 context.getErrors().setFormattingError(status);
341 }
342 }
343 }
344 }
345
346 // ------------------------------------------------------
347 // Selection
348
349 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-selectors
350 // `res` is a vector of ResolvedSelectors
resolveSelectors(MessageContext & context,const Environment & env,UErrorCode & status,UVector & res) const351 void MessageFormatter::resolveSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UVector& res) const {
352 CHECK_ERROR(status);
353 U_ASSERT(!dataModel.hasPattern());
354
355 const Expression* selectors = dataModel.getSelectorsInternal();
356 // 1. Let res be a new empty list of resolved values that support selection.
357 // (Implicit, since `res` is an out-parameter)
358 // 2. For each expression exp of the message's selectors
359 for (int32_t i = 0; i < dataModel.numSelectors(); i++) {
360 // 2i. Let rv be the resolved value of exp.
361 ResolvedSelector rv = formatSelectorExpression(env, selectors[i], context, status);
362 if (rv.hasSelector()) {
363 // 2ii. If selection is supported for rv:
364 // (True if this code has been reached)
365 } else {
366 // 2iii. Else:
367 // Let nomatch be a resolved value for which selection always fails.
368 // Append nomatch as the last element of the list res.
369 // Emit a Selection Error.
370 // (Note: in this case, rv, being a fallback, serves as `nomatch`)
371 #if U_DEBUG
372 const DynamicErrors& err = context.getErrors();
373 U_ASSERT(err.hasError());
374 U_ASSERT(rv.argument().isFallback());
375 #endif
376 }
377 // 2ii(a). Append rv as the last element of the list res.
378 // (Also fulfills 2iii)
379 LocalPointer<ResolvedSelector> v(create<ResolvedSelector>(std::move(rv), status));
380 CHECK_ERROR(status);
381 res.adoptElement(v.orphan(), status);
382 }
383 }
384
385 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
386 // `keys` and `matches` are vectors of strings
matchSelectorKeys(const UVector & keys,MessageContext & context,ResolvedSelector && rv,UVector & keysOut,UErrorCode & status) const387 void MessageFormatter::matchSelectorKeys(const UVector& keys,
388 MessageContext& context,
389 ResolvedSelector&& rv,
390 UVector& keysOut,
391 UErrorCode& status) const {
392 CHECK_ERROR(status);
393
394 if (!rv.hasSelector()) {
395 // Return an empty list of matches
396 return;
397 }
398
399 auto selectorImpl = rv.getSelector();
400 U_ASSERT(selectorImpl != nullptr);
401 UErrorCode savedStatus = status;
402
403 // Convert `keys` to an array
404 int32_t keysLen = keys.size();
405 UnicodeString* keysArr = new UnicodeString[keysLen];
406 if (keysArr == nullptr) {
407 status = U_MEMORY_ALLOCATION_ERROR;
408 return;
409 }
410 for (int32_t i = 0; i < keysLen; i++) {
411 const UnicodeString* k = static_cast<UnicodeString*>(keys[i]);
412 U_ASSERT(k != nullptr);
413 keysArr[i] = *k;
414 }
415 LocalArray<UnicodeString> adoptedKeys(keysArr);
416
417 // Create an array to hold the output
418 UnicodeString* prefsArr = new UnicodeString[keysLen];
419 if (prefsArr == nullptr) {
420 status = U_MEMORY_ALLOCATION_ERROR;
421 return;
422 }
423 LocalArray<UnicodeString> adoptedPrefs(prefsArr);
424 int32_t prefsLen = 0;
425
426 // Call the selector
427 selectorImpl->selectKey(rv.takeArgument(), rv.takeOptions(),
428 adoptedKeys.getAlias(), keysLen, adoptedPrefs.getAlias(), prefsLen,
429 status);
430
431 // Update errors
432 if (savedStatus != status) {
433 if (U_FAILURE(status)) {
434 status = U_ZERO_ERROR;
435 context.getErrors().setSelectorError(rv.getSelectorName(), status);
436 } else {
437 // Ignore warnings
438 status = savedStatus;
439 }
440 }
441
442 CHECK_ERROR(status);
443
444 // Copy the resulting keys (if there was no error)
445 keysOut.removeAllElements();
446 for (int32_t i = 0; i < prefsLen; i++) {
447 UnicodeString* k = message2::create<UnicodeString>(std::move(prefsArr[i]), status);
448 if (k == nullptr) {
449 status = U_MEMORY_ALLOCATION_ERROR;
450 return;
451 }
452 keysOut.adoptElement(k, status);
453 CHECK_ERROR(status);
454 }
455 }
456
457 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#resolve-preferences
458 // `res` is a vector of FormattedPlaceholders;
459 // `pref` is a vector of vectors of strings
resolvePreferences(MessageContext & context,UVector & res,UVector & pref,UErrorCode & status) const460 void MessageFormatter::resolvePreferences(MessageContext& context, UVector& res, UVector& pref, UErrorCode &status) const {
461 CHECK_ERROR(status);
462
463 // 1. Let pref be a new empty list of lists of strings.
464 // (Implicit, since `pref` is an out-parameter)
465 UnicodeString ks;
466 LocalPointer<UnicodeString> ksP;
467 int32_t numVariants = dataModel.numVariants();
468 const Variant* variants = dataModel.getVariantsInternal();
469 // 2. For each index i in res
470 for (int32_t i = 0; i < (int32_t) res.size(); i++) {
471 // 2i. Let keys be a new empty list of strings.
472 LocalPointer<UVector> keys(createUVector(status));
473 CHECK_ERROR(status);
474 // 2ii. For each variant `var` of the message
475 for (int32_t variantNum = 0; variantNum < numVariants; variantNum++) {
476 const SelectorKeys& selectorKeys = variants[variantNum].getKeys();
477
478 // Note: Here, `var` names the key list of `var`,
479 // not a Variant itself
480 const Key* var = selectorKeys.getKeysInternal();
481 // 2ii(a). Let `key` be the `var` key at position i.
482 U_ASSERT(i < selectorKeys.len); // established by semantic check in formatSelectors()
483 const Key& key = var[i];
484 // 2ii(b). If `key` is not the catch-all key '*'
485 if (!key.isWildcard()) {
486 // 2ii(b)(a) Assert that key is a literal.
487 // (Not needed)
488 // 2ii(b)(b) Let `ks` be the resolved value of `key`.
489 ks = key.asLiteral().unquoted();
490 // 2ii(b)(c) Append `ks` as the last element of the list `keys`.
491 ksP.adoptInstead(create<UnicodeString>(std::move(ks), status));
492 CHECK_ERROR(status);
493 keys->adoptElement(ksP.orphan(), status);
494 }
495 }
496 // 2iii. Let `rv` be the resolved value at index `i` of `res`.
497 U_ASSERT(i < res.size());
498 ResolvedSelector rv = std::move(*(static_cast<ResolvedSelector*>(res[i])));
499 // 2iv. Let matches be the result of calling the method MatchSelectorKeys(rv, keys)
500 LocalPointer<UVector> matches(createUVector(status));
501 matchSelectorKeys(*keys, context, std::move(rv), *matches, status);
502 // 2v. Append `matches` as the last element of the list `pref`
503 pref.adoptElement(matches.orphan(), status);
504 }
505 }
506
507 // `v` is assumed to be a vector of strings
vectorFind(const UVector & v,const UnicodeString & k)508 static int32_t vectorFind(const UVector& v, const UnicodeString& k) {
509 for (int32_t i = 0; i < v.size(); i++) {
510 if (*static_cast<UnicodeString*>(v[i]) == k) {
511 return i;
512 }
513 }
514 return -1;
515 }
516
vectorContains(const UVector & v,const UnicodeString & k)517 static UBool vectorContains(const UVector& v, const UnicodeString& k) {
518 return (vectorFind(v, k) != -1);
519 }
520
521 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#filter-variants
522 // `pref` is a vector of vectors of strings. `vars` is a vector of PrioritizedVariants
filterVariants(const UVector & pref,UVector & vars,UErrorCode & status) const523 void MessageFormatter::filterVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
524 const Variant* variants = dataModel.getVariantsInternal();
525
526 // 1. Let `vars` be a new empty list of variants.
527 // (Not needed since `vars` is an out-parameter)
528 // 2. For each variant `var` of the message:
529 for (int32_t j = 0; j < dataModel.numVariants(); j++) {
530 const SelectorKeys& selectorKeys = variants[j].getKeys();
531 const Pattern& p = variants[j].getPattern();
532
533 // Note: Here, `var` names the key list of `var`,
534 // not a Variant itself
535 const Key* var = selectorKeys.getKeysInternal();
536 // 2i. For each index `i` in `pref`:
537 bool noMatch = false;
538 for (int32_t i = 0; i < (int32_t) pref.size(); i++) {
539 // 2i(a). Let `key` be the `var` key at position `i`.
540 U_ASSERT(i < selectorKeys.len);
541 const Key& key = var[i];
542 // 2i(b). If key is the catch-all key '*':
543 if (key.isWildcard()) {
544 // 2i(b)(a). Continue the inner loop on pref.
545 continue;
546 }
547 // 2i(c). Assert that `key` is a literal.
548 // (Not needed)
549 // 2i(d). Let `ks` be the resolved value of `key`.
550 UnicodeString ks = key.asLiteral().unquoted();
551 // 2i(e). Let `matches` be the list of strings at index `i` of `pref`.
552 const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
553 // 2i(f). If `matches` includes `ks`
554 if (vectorContains(matches, ks)) {
555 // 2i(f)(a). Continue the inner loop on `pref`.
556 continue;
557 }
558 // 2i(g). Else:
559 // 2i(g)(a). Continue the outer loop on message variants.
560 noMatch = true;
561 break;
562 }
563 if (!noMatch) {
564 // Append `var` as the last element of the list `vars`.
565 PrioritizedVariant* tuple = create<PrioritizedVariant>(PrioritizedVariant(-1, selectorKeys, p), status);
566 CHECK_ERROR(status);
567 vars.adoptElement(tuple, status);
568 }
569 }
570 }
571
572 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#sort-variants
573 // Leaves the preferred variant as element 0 in `sortable`
574 // Note: this sorts in-place, so `sortable` is just `vars`
575 // `pref` is a vector of vectors of strings; `vars` is a vector of PrioritizedVariants
sortVariants(const UVector & pref,UVector & vars,UErrorCode & status) const576 void MessageFormatter::sortVariants(const UVector& pref, UVector& vars, UErrorCode& status) const {
577 CHECK_ERROR(status);
578
579 // Note: steps 1 and 2 are omitted since we use `vars` as `sortable` (we sort in-place)
580 // 1. Let `sortable` be a new empty list of (integer, variant) tuples.
581 // (Not needed since `sortable` is an out-parameter)
582 // 2. For each variant `var` of `vars`
583 // 2i. Let tuple be a new tuple (-1, var).
584 // 2ii. Append `tuple` as the last element of the list `sortable`.
585
586 // 3. Let `len` be the integer count of items in `pref`.
587 int32_t len = pref.size();
588 // 4. Let `i` be `len` - 1.
589 int32_t i = len - 1;
590 // 5. While i >= 0:
591 while (i >= 0) {
592 // 5i. Let `matches` be the list of strings at index `i` of `pref`.
593 U_ASSERT(pref[i] != nullptr);
594 const UVector& matches = *(static_cast<UVector*>(pref[i])); // `matches` is a vector of strings
595 // 5ii. Let `minpref` be the integer count of items in `matches`.
596 int32_t minpref = matches.size();
597 // 5iii. For each tuple `tuple` of `sortable`:
598 for (int32_t j = 0; j < vars.size(); j++) {
599 U_ASSERT(vars[j] != nullptr);
600 PrioritizedVariant& tuple = *(static_cast<PrioritizedVariant*>(vars[j]));
601 // 5iii(a). Let matchpref be an integer with the value minpref.
602 int32_t matchpref = minpref;
603 // 5iii(b). Let `key` be the tuple variant key at position `i`.
604 const Key* tupleVariantKeys = tuple.keys.getKeysInternal();
605 U_ASSERT(i < tuple.keys.len); // Given by earlier semantic checking
606 const Key& key = tupleVariantKeys[i];
607 // 5iii(c) If `key` is not the catch-all key '*':
608 if (!key.isWildcard()) {
609 // 5iii(c)(a). Assert that `key` is a literal.
610 // (Not needed)
611 // 5iii(c)(b). Let `ks` be the resolved value of `key`.
612 UnicodeString ks = key.asLiteral().unquoted();
613 // 5iii(c)(c) Let matchpref be the integer position of ks in `matches`.
614 matchpref = vectorFind(matches, ks);
615 U_ASSERT(matchpref >= 0);
616 }
617 // 5iii(d) Set the `tuple` integer value as matchpref.
618 tuple.priority = matchpref;
619 }
620 // 5iv. Set `sortable` to be the result of calling the method SortVariants(`sortable`)
621 vars.sort(comparePrioritizedVariants, status);
622 CHECK_ERROR(status);
623 // 5v. Set `i` to be `i` - 1.
624 i--;
625 }
626 // The caller is responsible for steps 6 and 7
627 // 6. Let `var` be the `variant` element of the first element of `sortable`.
628 // 7. Select the pattern of `var`
629 }
630
631
632 // Evaluate the operand
resolveVariables(const Environment & env,const Operand & rand,MessageContext & context,UErrorCode & status) const633 ResolvedSelector MessageFormatter::resolveVariables(const Environment& env, const Operand& rand, MessageContext& context, UErrorCode &status) const {
634 if (U_FAILURE(status)) {
635 return {};
636 }
637
638 if (rand.isNull()) {
639 return ResolvedSelector(FormattedPlaceholder());
640 }
641
642 if (rand.isLiteral()) {
643 return ResolvedSelector(formatLiteral(rand.asLiteral()));
644 }
645
646 // Must be variable
647 const VariableName& var = rand.asVariable();
648 // Resolve the variable
649 if (env.has(var)) {
650 const Closure& referent = env.lookup(var);
651 // Resolve the referent
652 return resolveVariables(referent.getEnv(), referent.getExpr(), context, status);
653 }
654 // Either this is a global var or an unbound var --
655 // either way, it can't be bound to a function call.
656 // Check globals
657 FormattedPlaceholder val = evalArgument(var, context, status);
658 if (status == U_ILLEGAL_ARGUMENT_ERROR) {
659 status = U_ZERO_ERROR;
660 // Unresolved variable -- could be a previous warning. Nothing to resolve
661 U_ASSERT(context.getErrors().hasUnresolvedVariableError());
662 return ResolvedSelector(FormattedPlaceholder(var));
663 }
664 // Pass through other errors
665 return ResolvedSelector(std::move(val));
666 }
667
668 // Evaluate the expression except for not performing the top-level function call
669 // (which is expected to be a selector, but may not be, in error cases)
resolveVariables(const Environment & env,const Expression & expr,MessageContext & context,UErrorCode & status) const670 ResolvedSelector MessageFormatter::resolveVariables(const Environment& env,
671 const Expression& expr,
672 MessageContext& context,
673 UErrorCode &status) const {
674 if (U_FAILURE(status)) {
675 return {};
676 }
677
678 // A `reserved` is an error
679 if (expr.isReserved()) {
680 context.getErrors().setReservedError(status);
681 return ResolvedSelector(FormattedPlaceholder(reservedFallback(expr)));
682 }
683
684 // Function call -- resolve the operand and options
685 if (expr.isFunctionCall()) {
686 const Operator* rator = expr.getOperator(status);
687 U_ASSERT(U_SUCCESS(status));
688 // Already checked that rator is non-reserved
689 const FunctionName& selectorName = rator->getFunctionName();
690 if (isSelector(selectorName)) {
691 auto selector = getSelector(context, selectorName, status);
692 if (U_SUCCESS(status)) {
693 FunctionOptions resolvedOptions = resolveOptions(env, rator->getOptionsInternal(), context, status);
694 // Operand may be the null argument, but resolveVariables() handles that
695 FormattedPlaceholder argument = formatOperand(env, expr.getOperand(), context, status);
696 return ResolvedSelector(selectorName, selector, std::move(resolvedOptions), std::move(argument));
697 }
698 } else if (isFormatter(selectorName)) {
699 context.getErrors().setSelectorError(selectorName, status);
700 } else {
701 context.getErrors().setUnknownFunction(selectorName, status);
702 }
703 // Non-selector used as selector; an error would have been recorded earlier
704 UnicodeString fallback(COLON);
705 fallback += selectorName;
706 if (!expr.getOperand().isNull()) {
707 fallback = formatOperand(env, expr.getOperand(), context, status).fallback;
708 }
709 return ResolvedSelector(FormattedPlaceholder(fallback));
710 } else {
711 // Might be a variable reference, so expand one more level of variable
712 return resolveVariables(env, expr.getOperand(), context, status);
713 }
714 }
715
formatSelectorExpression(const Environment & globalEnv,const Expression & expr,MessageContext & context,UErrorCode & status) const716 ResolvedSelector MessageFormatter::formatSelectorExpression(const Environment& globalEnv, const Expression& expr, MessageContext& context, UErrorCode &status) const {
717 if (U_FAILURE(status)) {
718 return {};
719 }
720
721 // Resolve expression to determine if it's a function call
722 ResolvedSelector exprResult = resolveVariables(globalEnv, expr, context, status);
723
724 DynamicErrors& err = context.getErrors();
725
726 // If there is a selector, then `resolveVariables()` recorded it in the context
727 if (exprResult.hasSelector()) {
728 // Check if there was an error
729 if (exprResult.argument().isFallback()) {
730 // Use a null expression if it's a syntax or data model warning;
731 // create a valid (non-fallback) formatted placeholder from the
732 // fallback string otherwise
733 if (err.hasSyntaxError() || err.hasDataModelError()) {
734 return ResolvedSelector(FormattedPlaceholder()); // Null operand
735 } else {
736 return ResolvedSelector(exprResult.takeArgument());
737 }
738 }
739 return exprResult;
740 }
741
742 // No selector was found; error should already have been set
743 U_ASSERT(err.hasMissingSelectorAnnotationError() || err.hasUnknownFunctionError() || err.hasSelectorError());
744 return ResolvedSelector(FormattedPlaceholder(exprResult.argument().fallback));
745 }
746
formatSelectors(MessageContext & context,const Environment & env,UErrorCode & status,UnicodeString & result) const747 void MessageFormatter::formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const {
748 CHECK_ERROR(status);
749
750 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
751
752 // Resolve Selectors
753 // res is a vector of FormattedPlaceholders
754 LocalPointer<UVector> res(createUVector(status));
755 CHECK_ERROR(status);
756 resolveSelectors(context, env, status, *res);
757
758 // Resolve Preferences
759 // pref is a vector of vectors of strings
760 LocalPointer<UVector> pref(createUVector(status));
761 CHECK_ERROR(status);
762 resolvePreferences(context, *res, *pref, status);
763
764 // Filter Variants
765 // vars is a vector of PrioritizedVariants
766 LocalPointer<UVector> vars(createUVector(status));
767 CHECK_ERROR(status);
768 filterVariants(*pref, *vars, status);
769
770 // Sort Variants and select the final pattern
771 // Note: `sortable` in the spec is just `vars` here,
772 // which is sorted in-place
773 sortVariants(*pref, *vars, status);
774
775 CHECK_ERROR(status);
776
777 // 6. Let `var` be the `variant` element of the first element of `sortable`.
778 U_ASSERT(vars->size() > 0); // This should have been checked earlier (having 0 variants would be a data model error)
779 const PrioritizedVariant& var = *(static_cast<PrioritizedVariant*>(vars->elementAt(0)));
780 // 7. Select the pattern of `var`
781 const Pattern& pat = var.pat;
782
783 // Format the pattern
784 formatPattern(context, env, pat, status, result);
785 }
786
787 // Note: this is non-const due to the function registry being non-const, which is in turn
788 // due to the values (`FormatterFactory` objects in the map) having mutable state.
789 // In other words, formatting a message can mutate the underlying `MessageFormatter` by changing
790 // state within the factory objects that represent custom formatters.
formatToString(const MessageArguments & arguments,UErrorCode & status)791 UnicodeString MessageFormatter::formatToString(const MessageArguments& arguments, UErrorCode &status) {
792 EMPTY_ON_ERROR(status);
793
794 // Create a new environment that will store closures for all local variables
795 Environment* env = Environment::create(status);
796 // Create a new context with the given arguments and the `errors` structure
797 MessageContext context(arguments, *errors, status);
798
799 // Check for unresolved variable errors
800 checkDeclarations(context, env, status);
801 LocalPointer<Environment> globalEnv(env);
802
803 UnicodeString result;
804 if (dataModel.hasPattern()) {
805 formatPattern(context, *globalEnv, dataModel.getPattern(), status, result);
806 } else {
807 // Check for errors/warnings -- if so, then the result of pattern selection is the fallback value
808 // See https://github.com/unicode-org/message-format-wg/blob/main/spec/formatting.md#pattern-selection
809 const DynamicErrors& err = context.getErrors();
810 if (err.hasSyntaxError() || err.hasDataModelError()) {
811 result += REPLACEMENT;
812 } else {
813 formatSelectors(context, *globalEnv, status, result);
814 }
815 }
816 // Update status according to all errors seen while formatting
817 context.checkErrors(status);
818 return result;
819 }
820
821 // ----------------------------------------
822 // Checking for resolution errors
823
check(MessageContext & context,const Environment & localEnv,const OptionMap & options,UErrorCode & status) const824 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const OptionMap& options, UErrorCode& status) const {
825 // Check the RHS of each option
826 for (int32_t i = 0; i < options.size(); i++) {
827 const Option& opt = options.getOption(i, status);
828 CHECK_ERROR(status);
829 check(context, localEnv, opt.getValue(), status);
830 }
831 }
832
check(MessageContext & context,const Environment & localEnv,const Operand & rand,UErrorCode & status) const833 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Operand& rand, UErrorCode& status) const {
834 // Nothing to check for literals
835 if (rand.isLiteral() || rand.isNull()) {
836 return;
837 }
838
839 // Check that variable is in scope
840 const VariableName& var = rand.asVariable();
841 // Check local scope
842 if (localEnv.has(var)) {
843 return;
844 }
845 // Check global scope
846 context.getGlobal(var, status);
847 if (status == U_ILLEGAL_ARGUMENT_ERROR) {
848 status = U_ZERO_ERROR;
849 context.getErrors().setUnresolvedVariable(var, status);
850 }
851 // Either `var` is a global, or some other error occurred.
852 // Nothing more to do either way
853 return;
854 }
855
check(MessageContext & context,const Environment & localEnv,const Expression & expr,UErrorCode & status) const856 void MessageFormatter::check(MessageContext& context, const Environment& localEnv, const Expression& expr, UErrorCode& status) const {
857 // Check for unresolved variable errors
858 if (expr.isFunctionCall()) {
859 const Operator* rator = expr.getOperator(status);
860 U_ASSERT(U_SUCCESS(status));
861 const Operand& rand = expr.getOperand();
862 check(context, localEnv, rand, status);
863 check(context, localEnv, rator->getOptionsInternal(), status);
864 }
865 }
866
867 // Check for resolution errors
checkDeclarations(MessageContext & context,Environment * & env,UErrorCode & status) const868 void MessageFormatter::checkDeclarations(MessageContext& context, Environment*& env, UErrorCode &status) const {
869 CHECK_ERROR(status);
870
871 const Binding* decls = getDataModel().getLocalVariablesInternal();
872 U_ASSERT(env != nullptr && decls != nullptr);
873
874 for (int32_t i = 0; i < getDataModel().bindingsLen; i++) {
875 const Binding& decl = decls[i];
876 const Expression& rhs = decl.getValue();
877 check(context, *env, rhs, status);
878
879 // Add a closure to the global environment,
880 // memoizing the value of localEnv up to this point
881
882 // Add the LHS to the environment for checking the next declaration
883 env = Environment::create(decl.getVariable(), Closure(rhs, *env), env, status);
884 CHECK_ERROR(status);
885 }
886 }
887 } // namespace message2
888
889 U_NAMESPACE_END
890
891 #endif /* #if !UCONFIG_NO_MF2 */
892
893 #endif /* #if !UCONFIG_NO_FORMATTING */
894