ICU 76.1  76.1
unorm2.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2009-2015, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: unorm2.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2009dec15
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __UNORM2_H__
20 #define __UNORM2_H__
21 
33 #include "unicode/utypes.h"
34 #include "unicode/stringoptions.h"
35 #include "unicode/uset.h"
36 
37 #if U_SHOW_CPLUSPLUS_API
38 #include "unicode/localpointer.h"
39 #endif // U_SHOW_CPLUSPLUS_API
40 
48 typedef enum {
91 
117 
122 struct UNormalizer2;
123 typedef struct UNormalizer2 UNormalizer2;
125 #if !UCONFIG_NO_NORMALIZATION
126 
138 U_CAPI const UNormalizer2 * U_EXPORT2
139 unorm2_getNFCInstance(UErrorCode *pErrorCode);
140 
152 U_CAPI const UNormalizer2 * U_EXPORT2
153 unorm2_getNFDInstance(UErrorCode *pErrorCode);
154 
166 U_CAPI const UNormalizer2 * U_EXPORT2
168 
180 U_CAPI const UNormalizer2 * U_EXPORT2
182 
197 U_CAPI const UNormalizer2 * U_EXPORT2
199 
214 U_CAPI const UNormalizer2 * U_EXPORT2
216 
238 U_CAPI const UNormalizer2 * U_EXPORT2
239 unorm2_getInstance(const char *packageName,
240  const char *name,
241  UNormalization2Mode mode,
242  UErrorCode *pErrorCode);
243 
259 U_CAPI UNormalizer2 * U_EXPORT2
260 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
261 
268 U_CAPI void U_EXPORT2
269 unorm2_close(UNormalizer2 *norm2);
270 
271 #if U_SHOW_CPLUSPLUS_API
272 
273 U_NAMESPACE_BEGIN
274 
285 
286 U_NAMESPACE_END
287 
288 #endif
289 
306 U_CAPI int32_t U_EXPORT2
307 unorm2_normalize(const UNormalizer2 *norm2,
308  const UChar *src, int32_t length,
309  UChar *dest, int32_t capacity,
310  UErrorCode *pErrorCode);
329 U_CAPI int32_t U_EXPORT2
331  UChar *first, int32_t firstLength, int32_t firstCapacity,
332  const UChar *second, int32_t secondLength,
333  UErrorCode *pErrorCode);
352 U_CAPI int32_t U_EXPORT2
353 unorm2_append(const UNormalizer2 *norm2,
354  UChar *first, int32_t firstLength, int32_t firstCapacity,
355  const UChar *second, int32_t secondLength,
356  UErrorCode *pErrorCode);
357 
377 U_CAPI int32_t U_EXPORT2
379  UChar32 c, UChar *decomposition, int32_t capacity,
380  UErrorCode *pErrorCode);
381 
411 U_CAPI int32_t U_EXPORT2
413  UChar32 c, UChar *decomposition, int32_t capacity,
414  UErrorCode *pErrorCode);
415 
431 U_CAPI UChar32 U_EXPORT2
432 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
433 
443 U_CAPI uint8_t U_EXPORT2
445 
462 U_CAPI UBool U_EXPORT2
463 unorm2_isNormalized(const UNormalizer2 *norm2,
464  const UChar *s, int32_t length,
465  UErrorCode *pErrorCode);
466 
485 unorm2_quickCheck(const UNormalizer2 *norm2,
486  const UChar *s, int32_t length,
487  UErrorCode *pErrorCode);
488 
513 U_CAPI int32_t U_EXPORT2
515  const UChar *s, int32_t length,
516  UErrorCode *pErrorCode);
517 
527 U_CAPI UBool U_EXPORT2
529 
539 U_CAPI UBool U_EXPORT2
541 
550 U_CAPI UBool U_EXPORT2
551 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
552 
619 U_CAPI int32_t U_EXPORT2
620 unorm_compare(const UChar *s1, int32_t length1,
621  const UChar *s2, int32_t length2,
622  uint32_t options,
623  UErrorCode *pErrorCode);
624 
625 #endif /* !UCONFIG_NO_NORMALIZATION */
626 #endif /* __UNORM2_H__ */
U_CAPI int32_t unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the raw decomposition mapping of c.
U_CAPI int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Returns the end of the normalized substring of the input string.
"Fast C or D" form.
Definition: unorm2.h:79
The input string is not in the normalization form.
Definition: unorm2.h:102
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
Map, and reorder canonically.
Definition: unorm2.h:66
C API: Bit set option bit constants for various string and character processing functions.
U_CAPI int32_t unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the decomposition mapping of c.
U_CAPI UNormalizationCheckResult unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
U_CAPI int32_t unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
Writes the normalized form of the source string to the destination string (replacing its contents) an...
U_CAPI void unorm2_close(UNormalizer2 *norm2)
Closes a UNormalizer2 instance from unorm2_openFiltered().
The input string is in the normalization form.
Definition: unorm2.h:107
U_CAPI int32_t unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
The input string may or may not be in the normalization form.
Definition: unorm2.h:115
U_CAPI UBool unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary after it, regardless of context...
Compose only contiguously.
Definition: unorm2.h:89
U_CAPI const UNormalizer2 * unorm2_getInstance(const char *packageName, const char *name, UNormalization2Mode mode, UErrorCode *pErrorCode)
Returns a UNormalizer2 instance which uses the specified data file (packageName/name similar to ucnv_...
C API: Unicode Set.
U_CAPI UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary before it, regardless of context...
U_CAPI UBool unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
U_CAPI const UNormalizer2 * unorm2_getNFKDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKD normalization.
U_CAPI const UNormalizer2 * unorm2_getNFDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFD normalization.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
U_CAPI const UNormalizer2 * unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization which is equ...
U_CAPI int32_t unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the second string to the first string (merging them at the boundary) and returns the length o...
U_CAPI const UNormalizer2 * unorm2_getNFCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFC normalization.
struct UNormalizer2 UNormalizer2
C typedef for struct UNormalizer2.
Definition: unorm2.h:123
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:427
U_CAPI UChar32 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)
Performs pairwise composition of a & b and returns the composite if there is one. ...
U_CAPI uint8_t unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)
Gets the combining class of c.
U_CAPI UNormalizer2 * unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode)
Constructs a filtered normalizer wrapping any UNormalizer2 instance and a filter set.
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
U_CAPI const UNormalizer2 * unorm2_getNFKCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKC normalization.
UNormalization2Mode
Constants for normalization modes.
Definition: unorm2.h:48
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:53
U_CAPI const UNormalizer2 * unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization which is equivalent to ap...
Basic definitions for ICU, for both C and C++ APIs.
U_CAPI int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
U_CAPI UBool unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)
Tests if the character is normalization-inert.
Decomposition followed by composition.
Definition: unorm2.h:57
"Smart pointer" class, closes a UNormalizer2 via unorm2_close().
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:97
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247