ICU 76.1  76.1
messageformat2.h
Go to the documentation of this file.
1 // © 2024 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 
4 #include "unicode/utypes.h"
5 
6 #ifndef MESSAGEFORMAT2_H
7 #define MESSAGEFORMAT2_H
8 
9 #if U_SHOW_CPLUSPLUS_API
10 
11 #if !UCONFIG_NO_FORMATTING
12 
13 #if !UCONFIG_NO_MF2
14 
21 #include "unicode/messageformat2_data_model.h"
22 #include "unicode/messageformat2_function_registry.h"
23 #include "unicode/unistr.h"
24 
25 #ifndef U_HIDE_DEPRECATED_API
26 
27 U_NAMESPACE_BEGIN
28 
29 namespace message2 {
30 
31  class Environment;
32  class MessageContext;
33  class ResolvedSelector;
34  class StaticErrors;
35 
52  // Note: This class does not currently inherit from the existing
53  // `Format` class.
54  public:
62  MessageFormatter& operator=(MessageFormatter&&) noexcept;
69  virtual ~MessageFormatter();
70 
85  UnicodeString formatToString(const MessageArguments& arguments, UErrorCode &status);
86 
102  FormattedMessage format(const MessageArguments& arguments, UErrorCode &status) const {
103  (void) arguments;
104  if (U_SUCCESS(status)) {
105  status = U_UNSUPPORTED_ERROR;
106  }
107  return FormattedMessage(status);
108  }
109 
118  const Locale& getLocale() const { return locale; }
119 
129  UnicodeString getPattern() const;
130 
140  const MFDataModel& getDataModel() const;
141 
156  U_MF_BEST_EFFORT = 0,
164  U_MF_STRICT
165  } UMFErrorHandlingBehavior;
166 
173  class U_I18N_API Builder : public UObject {
174  private:
175  friend class MessageFormatter;
176 
177  // The pattern to be parsed to generate the formatted message
178  UnicodeString pattern;
179  bool hasPattern = false;
180  bool hasDataModel = false;
181  // The data model to be used to generate the formatted message
182  // Initialized either by `setDataModel()`, or by the parser
183  // through a call to `setPattern()`
184  MFDataModel dataModel;
185  // Normalized representation of the pattern;
186  // ignored if `setPattern()` wasn't called
187  UnicodeString normalizedInput;
188  // Errors (internal representation of parse errors)
189  // Ignored if `setPattern()` wasn't called
190  StaticErrors* errors;
191  Locale locale;
192  // Not owned
193  const MFFunctionRegistry* customMFFunctionRegistry;
194  // Error behavior; see comment in `MessageFormatter` class
195  bool signalErrors = false;
196 
197  void clearState();
198  public:
208  Builder& setLocale(const Locale& locale);
224  Builder& setPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status);
238  Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry);
248  Builder& setDataModel(MFDataModel&& dataModel);
278  Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
293  MessageFormatter build(UErrorCode& status) const;
305  Builder(UErrorCode& status);
312  virtual ~Builder();
313  }; // class MessageFormatter::Builder
314 
315  // TODO: Shouldn't be public; only used for testing
324  const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
325 
326  private:
327  friend class Builder;
328  friend class MessageContext;
329 
330  MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
331 
332  MessageFormatter() = delete; // default constructor not implemented
333 
334  // Do not define default assignment operator
335  const MessageFormatter &operator=(const MessageFormatter &) = delete;
336 
337  ResolvedSelector resolveVariables(const Environment& env, const data_model::Operand&, MessageContext&, UErrorCode &) const;
338  ResolvedSelector resolveVariables(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode &) const;
339 
340  // Selection methods
341 
342  // Takes a vector of FormattedPlaceholders
343  void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
344  // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
345  void filterVariants(const UVector&, UVector&, UErrorCode&) const;
346  // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
347  void sortVariants(const UVector&, UVector&, UErrorCode&) const;
348  // Takes a vector of strings (input) and a vector of strings (output)
349  void matchSelectorKeys(const UVector&, MessageContext&, ResolvedSelector&& rv, UVector&, UErrorCode&) const;
350  // Takes a vector of FormattedPlaceholders (input),
351  // and a vector of vectors of strings (output)
352  void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
353 
354  // Formatting methods
355  [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
356  void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
357  // Formats a call to a formatting function
358  // Dispatches on argument type
359  [[nodiscard]] FormattedPlaceholder evalFormatterCall(FormattedPlaceholder&& argument,
360  MessageContext& context,
361  UErrorCode& status) const;
362  // Dispatches on function name
363  [[nodiscard]] FormattedPlaceholder evalFormatterCall(const FunctionName& functionName,
364  FormattedPlaceholder&& argument,
365  FunctionOptions&& options,
366  MessageContext& context,
367  UErrorCode& status) const;
368  // Formats an expression that appears as a selector
369  ResolvedSelector formatSelectorExpression(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode&) const;
370  // Formats an expression that appears in a pattern or as the definition of a local variable
371  [[nodiscard]] FormattedPlaceholder formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const;
372  [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
373  [[nodiscard]] FormattedPlaceholder formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
374  [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
375  void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
376 
377  // Function registry methods
378  bool hasCustomMFFunctionRegistry() const {
379  return (customMFFunctionRegistry != nullptr);
380  }
381 
382  // Precondition: custom function registry exists
383  // Note: this is non-const because the values in the MFFunctionRegistry are mutable
384  // (a FormatterFactory can have mutable state)
385  const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
386 
387  bool isCustomFormatter(const FunctionName&) const;
388  FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
389  bool isBuiltInSelector(const FunctionName&) const;
390  bool isBuiltInFormatter(const FunctionName&) const;
391  bool isCustomSelector(const FunctionName&) const;
392  const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
393  bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
394  bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
395  const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;
396 
397  Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
398  Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
399  bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
400 
401  // Checking for resolution errors
402  void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
403  void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
404  void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
405  void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
406 
407  void initErrors(UErrorCode&);
408  void clearErrors() const;
409  void cleanup() noexcept;
410 
411  // The locale this MessageFormatter was created with
412  /* const */ Locale locale;
413 
414  // Registry for built-in functions
415  MFFunctionRegistry standardMFFunctionRegistry;
416  // Registry for custom functions; may be null if no custom registry supplied
417  // Note: this is *not* owned by the MessageFormatter object
418  // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
419  // while also not requiring the function registry to be deeply-copyable. Making the
420  // function registry copyable would impose a requirement on any implementations
421  // of the FormatterFactory and SelectorFactory interfaces to implement a custom
422  // clone() method, which is necessary to avoid sharing between copies of the
423  // function registry (and thus double-frees)
424  // Not deeply immutable (the values in the function registry are mutable,
425  // as a FormatterFactory can have mutable state
426  const MFFunctionRegistry* customMFFunctionRegistry;
427 
428  // Data model, representing the parsed message
429  MFDataModel dataModel;
430 
431  // Normalized version of the input string (optional whitespace removed)
432  UnicodeString normalizedInput;
433 
434  // Errors -- only used while parsing and checking for data model errors; then
435  // the MessageContext keeps track of errors
436  // Must be a raw pointer to avoid including the internal header file
437  // defining StaticErrors
438  // Owned by `this`
439  StaticErrors* errors = nullptr;
440 
441  // Error handling behavior.
442  // If true, then formatting methods set their UErrorCode arguments
443  // to signal MessageFormat errors, and no useful output is returned.
444  // If false, then MessageFormat errors are not signaled and the
445  // formatting methods return best-effort output.
446  // The default is false.
447  bool signalErrors = false;
448  }; // class MessageFormatter
449 
450 } // namespace message2
451 
452 U_NAMESPACE_END
453 
454 #endif // U_HIDE_DEPRECATED_API
455 
456 #endif /* #if !UCONFIG_NO_MF2 */
457 
458 #endif /* #if !UCONFIG_NO_FORMATTING */
459 
460 #endif /* U_SHOW_CPLUSPLUS_API */
461 
462 #endif // MESSAGEFORMAT2_H
463 
464 // eof
The mutable Builder class allows each part of the MessageFormatter to be initialized separately; call...
#define U_SUCCESS(x)
Does the error code indicate success?
Definition: utypes.h:742
const UnicodeString & getNormalizedPattern() const
Returns a string consisting of the input with optional spaces removed.
C++ API: Unicode String.
The MessageArguments class represents the named arguments to a message.
C++ API: Formats messages using the draft MessageFormat 2.0.
The Literal class corresponds to the literal nonterminal in the MessageFormat 2 grammar, https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf and the Literal interface defined in // https://github.com/unicode-org/message-format-wg/blob/main/spec/data-model.md#expressions.
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:316
Requested operation not supported in current context.
Definition: utypes.h:482
const Locale & getLocale() const
Accesses the locale that this MessageFormatter object was created with.
The MFDataModel class describes a parsed representation of the text of a message. ...
FormattedMessage format(const MessageArguments &arguments, UErrorCode &status) const
Not yet implemented; formats the message to a FormattedMessage object, using the data model that was ...
UMFErrorHandlingBehavior
Used in conjunction with the MessageFormatter::Builder::setErrorHandlingBehavior() method...
The Operand class corresponds to the operand nonterminal in the MessageFormat 2 grammar, https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf .
The Expression class corresponds to the expression nonterminal in the MessageFormat 2 grammar and the...
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
Not yet implemented: The result of a message formatting operation.
Structure encapsulating named options passed to a custom selector or formatter.
A FormattablePlaceholder encapsulates an input value (a message2::Formattable) together with an optio...
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
Basic definitions for ICU, for both C and C++ APIs.
A Pattern is a sequence of formattable parts.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:295
Defines mappings from names of formatters and selectors to functions implementing them...
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195