ICU 76.1  76.1
coll.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
8 */
9 
52 #ifndef COLL_H
53 #define COLL_H
54 
55 #include "unicode/utypes.h"
56 
57 #if U_SHOW_CPLUSPLUS_API
58 
59 #if !UCONFIG_NO_COLLATION
60 
61 #include <functional>
62 #include <string_view>
63 #include <type_traits>
64 
65 #include "unicode/char16ptr.h"
66 #include "unicode/uobject.h"
67 #include "unicode/ucol.h"
68 #include "unicode/unorm.h"
69 #include "unicode/locid.h"
70 #include "unicode/uniset.h"
71 #include "unicode/umisc.h"
72 #include "unicode/unistr.h"
73 #include "unicode/uiter.h"
74 #include "unicode/stringpiece.h"
75 
76 U_NAMESPACE_BEGIN
77 
78 class StringEnumeration;
79 
80 #if !UCONFIG_NO_SERVICE
81 
84 class CollatorFactory;
85 #endif
86 
90 class CollationKey;
91 
173 class U_I18N_API Collator : public UObject {
174 public:
175 
176  // Collator public enums -----------------------------------------------
177 
204  {
205  PRIMARY = UCOL_PRIMARY, // 0
206  SECONDARY = UCOL_SECONDARY, // 1
207  TERTIARY = UCOL_TERTIARY, // 2
208  QUATERNARY = UCOL_QUATERNARY, // 3
209  IDENTICAL = UCOL_IDENTICAL // 15
210  };
211 
212 
213  // Cannot use #ifndef U_HIDE_DEPRECATED_API for the following, it is
214  // used by virtual methods that cannot have that conditional.
215 #ifndef U_FORCE_HIDE_DEPRECATED_API
216 
227  {
228  LESS = UCOL_LESS, // -1
229  EQUAL = UCOL_EQUAL, // 0
230  GREATER = UCOL_GREATER // 1
231  };
232 #endif // U_FORCE_HIDE_DEPRECATED_API
233 
234  // Collator public destructor -----------------------------------------
235 
240  virtual ~Collator();
241 
242  // Collator public methods --------------------------------------------
243 
262  virtual bool operator==(const Collator& other) const;
263 
271  virtual bool operator!=(const Collator& other) const;
272 
278  virtual Collator* clone() const = 0;
279 
299  static Collator* U_EXPORT2 createInstance(UErrorCode& err);
300 
334  static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
335 
336 #ifndef U_FORCE_HIDE_DEPRECATED_API
337 
348  virtual EComparisonResult compare(const UnicodeString& source,
349  const UnicodeString& target) const;
350 #endif // U_FORCE_HIDE_DEPRECATED_API
351 
364  virtual UCollationResult compare(const UnicodeString& source,
365  const UnicodeString& target,
366  UErrorCode &status) const = 0;
367 
368 #ifndef U_FORCE_HIDE_DEPRECATED_API
369 
381  virtual EComparisonResult compare(const UnicodeString& source,
382  const UnicodeString& target,
383  int32_t length) const;
384 #endif // U_FORCE_HIDE_DEPRECATED_API
385 
399  virtual UCollationResult compare(const UnicodeString& source,
400  const UnicodeString& target,
401  int32_t length,
402  UErrorCode &status) const = 0;
403 
404 #ifndef U_FORCE_HIDE_DEPRECATED_API
405 
438  virtual EComparisonResult compare(const char16_t* source, int32_t sourceLength,
439  const char16_t* target, int32_t targetLength)
440  const;
441 #endif // U_FORCE_HIDE_DEPRECATED_API
442 
459  virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
460  const char16_t* target, int32_t targetLength,
461  UErrorCode &status) const = 0;
462 
474  virtual UCollationResult compare(UCharIterator &sIter,
475  UCharIterator &tIter,
476  UErrorCode &status) const;
477 
491  virtual UCollationResult compareUTF8(const StringPiece &source,
492  const StringPiece &target,
493  UErrorCode &status) const;
494 
513  virtual CollationKey& getCollationKey(const UnicodeString& source,
514  CollationKey& key,
515  UErrorCode& status) const = 0;
516 
536  virtual CollationKey& getCollationKey(const char16_t*source,
537  int32_t sourceLength,
538  CollationKey& key,
539  UErrorCode& status) const = 0;
544  virtual int32_t hashCode() const = 0;
545 
546 #ifndef U_FORCE_HIDE_DEPRECATED_API
547 
559  virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
560 #endif // U_FORCE_HIDE_DEPRECATED_API
561 
571  UBool greater(const UnicodeString& source, const UnicodeString& target)
572  const;
573 
583  UBool greaterOrEqual(const UnicodeString& source,
584  const UnicodeString& target) const;
585 
595  UBool equals(const UnicodeString& source, const UnicodeString& target) const;
596 
597 #ifndef U_HIDE_DRAFT_API
598 
604  inline auto equal_to() const { return Predicate<std::equal_to, UCOL_EQUAL>(*this); }
605 
611  inline auto greater() const { return Predicate<std::equal_to, UCOL_GREATER>(*this); }
612 
618  inline auto less() const { return Predicate<std::equal_to, UCOL_LESS>(*this); }
619 
625  inline auto not_equal_to() const { return Predicate<std::not_equal_to, UCOL_EQUAL>(*this); }
626 
632  inline auto greater_equal() const { return Predicate<std::not_equal_to, UCOL_LESS>(*this); }
633 
639  inline auto less_equal() const { return Predicate<std::not_equal_to, UCOL_GREATER>(*this); }
640 
641 #endif // U_HIDE_DRAFT_API
642 
643 #ifndef U_FORCE_HIDE_DEPRECATED_API
644 
654  virtual ECollationStrength getStrength() const;
655 
674  virtual void setStrength(ECollationStrength newStrength);
675 #endif // U_FORCE_HIDE_DEPRECATED_API
676 
692  virtual int32_t getReorderCodes(int32_t *dest,
693  int32_t destCapacity,
694  UErrorCode& status) const;
695 
711  virtual void setReorderCodes(const int32_t* reorderCodes,
712  int32_t reorderCodesLength,
713  UErrorCode& status) ;
714 
735  static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
736  int32_t* dest,
737  int32_t destCapacity,
738  UErrorCode& status);
739 
749  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
750  const Locale& displayLocale,
751  UnicodeString& name);
752 
761  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
762  UnicodeString& name);
763 
775  static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
776 
785  static StringEnumeration* U_EXPORT2 getAvailableLocales();
786 
796  static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
797 
809  static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
810 
827  static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale,
828  UBool commonlyUsed, UErrorCode& status);
829 
857  static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
858  UBool& isAvailable, UErrorCode& status);
859 
860 #if !UCONFIG_NO_SERVICE
861 
872  static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
873 
884  static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
885 
899  static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
900 #endif /* UCONFIG_NO_SERVICE */
901 
907  virtual void getVersion(UVersionInfo info) const = 0;
908 
919  virtual UClassID getDynamicClassID() const override = 0;
920 
929  virtual void setAttribute(UColAttribute attr, UColAttributeValue value,
930  UErrorCode &status) = 0;
931 
940  virtual UColAttributeValue getAttribute(UColAttribute attr,
941  UErrorCode &status) const = 0;
942 
961  virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
962 
971  virtual UColReorderCode getMaxVariable() const;
972 
973 #ifndef U_FORCE_HIDE_DEPRECATED_API
974 
990  virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) = 0;
991 
1007  virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) = 0;
1008 
1020  virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0;
1021 #endif // U_FORCE_HIDE_DEPRECATED_API
1022 
1030  virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
1031 
1041  virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
1042 
1043 #ifndef U_FORCE_HIDE_DEPRECATED_API
1044 
1051  virtual Collator* safeClone() const;
1052 #endif // U_FORCE_HIDE_DEPRECATED_API
1053 
1070  virtual int32_t getSortKey(const UnicodeString& source,
1071  uint8_t* result,
1072  int32_t resultLength) const = 0;
1073 
1093  virtual int32_t getSortKey(const char16_t*source, int32_t sourceLength,
1094  uint8_t*result, int32_t resultLength) const = 0;
1095 
1133  static int32_t U_EXPORT2 getBound(const uint8_t *source,
1134  int32_t sourceLength,
1135  UColBoundMode boundType,
1136  uint32_t noOfLevels,
1137  uint8_t *result,
1138  int32_t resultLength,
1139  UErrorCode &status);
1140 
1141 
1142 protected:
1143 
1144  // Collator protected constructors -------------------------------------
1145 
1153  Collator();
1154 
1155 #ifndef U_HIDE_DEPRECATED_API
1156 
1167  Collator(UCollationStrength collationStrength,
1168  UNormalizationMode decompositionMode);
1169 #endif /* U_HIDE_DEPRECATED_API */
1170 
1176  Collator(const Collator& other);
1177 
1178 public:
1186  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
1187 
1211  virtual int32_t internalGetShortDefinitionString(const char *locale,
1212  char *buffer,
1213  int32_t capacity,
1214  UErrorCode &status) const;
1215 
1220  virtual UCollationResult internalCompareUTF8(
1221  const char *left, int32_t leftLength,
1222  const char *right, int32_t rightLength,
1223  UErrorCode &errorCode) const;
1224 
1229  virtual int32_t
1230  internalNextSortKeyPart(
1231  UCharIterator *iter, uint32_t state[2],
1232  uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
1233 
1234 #ifndef U_HIDE_INTERNAL_API
1235 
1236  static inline Collator *fromUCollator(UCollator *uc) {
1237  return reinterpret_cast<Collator *>(uc);
1238  }
1240  static inline const Collator *fromUCollator(const UCollator *uc) {
1241  return reinterpret_cast<const Collator *>(uc);
1242  }
1245  return reinterpret_cast<UCollator *>(this);
1246  }
1248  inline const UCollator *toUCollator() const {
1249  return reinterpret_cast<const UCollator *>(this);
1250  }
1251 #endif // U_HIDE_INTERNAL_API
1252 
1253 private:
1257  Collator& operator=(const Collator& other) = delete;
1258 
1259  friend class CFactory;
1260  friend class SimpleCFactory;
1261  friend class ICUCollatorFactory;
1262  friend class ICUCollatorService;
1263  static Collator* makeInstance(const Locale& desiredLocale,
1264  UErrorCode& status);
1265 
1266 #ifndef U_HIDE_DRAFT_API
1267 
1271  template <template <typename...> typename Compare, UCollationResult result>
1272  class Predicate {
1273  public:
1274  explicit Predicate(const Collator& parent) : collator(parent) {}
1275 
1276  template <
1277  typename T, typename U,
1278  typename = std::enable_if_t<ConvertibleToU16StringView<T> && ConvertibleToU16StringView<U>>>
1279  bool operator()(const T& lhs, const U& rhs) const {
1280  UErrorCode status = U_ZERO_ERROR;
1281  return compare(
1282  collator.compare(
1283  UnicodeString::readOnlyAlias(lhs),
1284  UnicodeString::readOnlyAlias(rhs),
1285  status),
1286  result);
1287  }
1288 
1289  bool operator()(std::string_view lhs, std::string_view rhs) const {
1290  UErrorCode status = U_ZERO_ERROR;
1291  return compare(collator.compareUTF8(lhs, rhs, status), result);
1292  }
1293 
1294 #if defined(__cpp_char8_t)
1295  bool operator()(std::u8string_view lhs, std::u8string_view rhs) const {
1296  UErrorCode status = U_ZERO_ERROR;
1297  return compare(collator.compareUTF8(lhs, rhs, status), result);
1298  }
1299 #endif
1300 
1301  private:
1302  const Collator& collator;
1303  static constexpr Compare<UCollationResult> compare{};
1304  };
1305 #endif // U_HIDE_DRAFT_API
1306 };
1307 
1308 #if !UCONFIG_NO_SERVICE
1309 
1326 public:
1327 
1332  virtual ~CollatorFactory();
1333 
1341  virtual UBool visible() const;
1342 
1350  virtual Collator* createCollator(const Locale& loc) = 0;
1351 
1362  virtual UnicodeString& getDisplayName(const Locale& objectLocale,
1363  const Locale& displayLocale,
1364  UnicodeString& result);
1365 
1375  virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
1376 };
1377 #endif /* UCONFIG_NO_SERVICE */
1378 
1379 // Collator inline methods -----------------------------------------------
1380 
1381 U_NAMESPACE_END
1382 
1383 #endif /* #if !UCONFIG_NO_COLLATION */
1384 
1385 #endif /* U_SHOW_CPLUSPLUS_API */
1386 
1387 #endif
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:59
string a < string b
Definition: ucol.h:82
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:173
const UCollator * toUCollator() const
Definition: coll.h:1248
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:346
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition: ucol.h:76
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:140
C++ API: Unicode String.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
void * UClassID
UClassID is used to identify classes without using the compiler&#39;s RTTI.
Definition: uobject.h:96
A factory, used with registerFactory, the creates multiple collators and provides display names for t...
Definition: coll.h:1325
C API: Miscellaneous definitions.
C API for code unit iteration.
Definition: uiter.h:341
Base class for &#39;pure&#39; C++ implementations of uenum api.
Definition: strenum.h:61
string a == string b
Definition: ucol.h:78
No error, no warning.
Definition: utypes.h:465
static const Collator * fromUCollator(const UCollator *uc)
Definition: coll.h:1240
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:203
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes...
Definition: ucol.h:149
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:245
auto less_equal() const
Creates a comparison function object that uses this collator.
Definition: coll.h:639
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:316
UCollator * toUCollator()
Definition: coll.h:1244
C++ API: StringPiece: Read-only byte string wrapper class.
Secondary collation strength.
Definition: ucol.h:99
auto not_equal_to() const
Creates a comparison function object that uses this collator.
Definition: coll.h:625
C API: Unicode Normalization.
auto less() const
Creates a comparison function object that uses this collator.
Definition: coll.h:618
virtual UClassID getDynamicClassID() const
ICU4C "poor man&#39;s RTTI", returns a UClassID for the actual ICU class.
EComparisonResult
LESS is returned if source string is compared to be less than target string in the compare() method...
Definition: coll.h:226
C API: Collator.
Collation keys are generated by the Collator class.
Definition: sortkey.h:101
Tertiary collation strength.
Definition: ucol.h:101
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:285
C API: Unicode Character Iteration.
C++ API: Common ICU base class UObject.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration...
Definition: umisc.h:57
string a > string b
Definition: ucol.h:80
auto equal_to() const
Creates a comparison function object that uses this collator.
Definition: coll.h:604
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested...
Definition: uloc.h:338
UColBoundMode
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned ...
Definition: ucol.h:1071
C++ API: Locale ID object.
auto greater_equal() const
Creates a comparison function object that uses this collator.
Definition: coll.h:632
auto greater() const
Creates a comparison function object that uses this collator.
Definition: coll.h:611
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:61
Basic definitions for ICU, for both C and C++ APIs.
static Collator * fromUCollator(UCollator *uc)
Definition: coll.h:1236
Identical collation strength.
Definition: ucol.h:108
Quaternary collation strength.
Definition: ucol.h:106
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:295
UColAttributeValue
Enum containing attribute values for controlling collation behavior.
Definition: ucol.h:92
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:61
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
Primary collation strength.
Definition: ucol.h:97
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types...
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195
C++ API: Unicode Set.