xref: /aosp_15_r20/external/icu/libandroidicu/include/unicode/unorm2.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *
6*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 2009-2015, International Business Machines
7*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
8*0e209d39SAndroid Build Coastguard Worker *
9*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
10*0e209d39SAndroid Build Coastguard Worker *   file name:  unorm2.h
11*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
12*0e209d39SAndroid Build Coastguard Worker *   tab size:   8 (not used)
13*0e209d39SAndroid Build Coastguard Worker *   indentation:4
14*0e209d39SAndroid Build Coastguard Worker *
15*0e209d39SAndroid Build Coastguard Worker *   created on: 2009dec15
16*0e209d39SAndroid Build Coastguard Worker *   created by: Markus W. Scherer
17*0e209d39SAndroid Build Coastguard Worker */
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker #ifndef __UNORM2_H__
20*0e209d39SAndroid Build Coastguard Worker #define __UNORM2_H__
21*0e209d39SAndroid Build Coastguard Worker 
22*0e209d39SAndroid Build Coastguard Worker /**
23*0e209d39SAndroid Build Coastguard Worker  * \file
24*0e209d39SAndroid Build Coastguard Worker  * \brief C API: New API for Unicode Normalization.
25*0e209d39SAndroid Build Coastguard Worker  *
26*0e209d39SAndroid Build Coastguard Worker  * Unicode normalization functionality for standard Unicode normalization or
27*0e209d39SAndroid Build Coastguard Worker  * for using custom mapping tables.
28*0e209d39SAndroid Build Coastguard Worker  * All instances of UNormalizer2 are unmodifiable/immutable.
29*0e209d39SAndroid Build Coastguard Worker  * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
30*0e209d39SAndroid Build Coastguard Worker  * For more details see the Normalizer2 C++ class.
31*0e209d39SAndroid Build Coastguard Worker  */
32*0e209d39SAndroid Build Coastguard Worker 
33*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
34*0e209d39SAndroid Build Coastguard Worker #include "unicode/stringoptions.h"
35*0e209d39SAndroid Build Coastguard Worker #include "unicode/uset.h"
36*0e209d39SAndroid Build Coastguard Worker 
37*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API
38*0e209d39SAndroid Build Coastguard Worker #include "unicode/localpointer.h"
39*0e209d39SAndroid Build Coastguard Worker #endif   // U_SHOW_CPLUSPLUS_API
40*0e209d39SAndroid Build Coastguard Worker 
41*0e209d39SAndroid Build Coastguard Worker /**
42*0e209d39SAndroid Build Coastguard Worker  * Constants for normalization modes.
43*0e209d39SAndroid Build Coastguard Worker  * For details about standard Unicode normalization forms
44*0e209d39SAndroid Build Coastguard Worker  * and about the algorithms which are also used with custom mapping tables
45*0e209d39SAndroid Build Coastguard Worker  * see http://www.unicode.org/unicode/reports/tr15/
46*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
47*0e209d39SAndroid Build Coastguard Worker  */
48*0e209d39SAndroid Build Coastguard Worker typedef enum {
49*0e209d39SAndroid Build Coastguard Worker     /**
50*0e209d39SAndroid Build Coastguard Worker      * Decomposition followed by composition.
51*0e209d39SAndroid Build Coastguard Worker      * Same as standard NFC when using an "nfc" instance.
52*0e209d39SAndroid Build Coastguard Worker      * Same as standard NFKC when using an "nfkc" instance.
53*0e209d39SAndroid Build Coastguard Worker      * For details about standard Unicode normalization forms
54*0e209d39SAndroid Build Coastguard Worker      * see http://www.unicode.org/unicode/reports/tr15/
55*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.4
56*0e209d39SAndroid Build Coastguard Worker      */
57*0e209d39SAndroid Build Coastguard Worker     UNORM2_COMPOSE,
58*0e209d39SAndroid Build Coastguard Worker     /**
59*0e209d39SAndroid Build Coastguard Worker      * Map, and reorder canonically.
60*0e209d39SAndroid Build Coastguard Worker      * Same as standard NFD when using an "nfc" instance.
61*0e209d39SAndroid Build Coastguard Worker      * Same as standard NFKD when using an "nfkc" instance.
62*0e209d39SAndroid Build Coastguard Worker      * For details about standard Unicode normalization forms
63*0e209d39SAndroid Build Coastguard Worker      * see http://www.unicode.org/unicode/reports/tr15/
64*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.4
65*0e209d39SAndroid Build Coastguard Worker      */
66*0e209d39SAndroid Build Coastguard Worker     UNORM2_DECOMPOSE,
67*0e209d39SAndroid Build Coastguard Worker     /**
68*0e209d39SAndroid Build Coastguard Worker      * "Fast C or D" form.
69*0e209d39SAndroid Build Coastguard Worker      * If a string is in this form, then further decomposition <i>without reordering</i>
70*0e209d39SAndroid Build Coastguard Worker      * would yield the same form as DECOMPOSE.
71*0e209d39SAndroid Build Coastguard Worker      * Text in "Fast C or D" form can be processed efficiently with data tables
72*0e209d39SAndroid Build Coastguard Worker      * that are "canonically closed", that is, that provide equivalent data for
73*0e209d39SAndroid Build Coastguard Worker      * equivalent text, without having to be fully normalized.
74*0e209d39SAndroid Build Coastguard Worker      * Not a standard Unicode normalization form.
75*0e209d39SAndroid Build Coastguard Worker      * Not a unique form: Different FCD strings can be canonically equivalent.
76*0e209d39SAndroid Build Coastguard Worker      * For details see http://www.unicode.org/notes/tn5/#FCD
77*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.4
78*0e209d39SAndroid Build Coastguard Worker      */
79*0e209d39SAndroid Build Coastguard Worker     UNORM2_FCD,
80*0e209d39SAndroid Build Coastguard Worker     /**
81*0e209d39SAndroid Build Coastguard Worker      * Compose only contiguously.
82*0e209d39SAndroid Build Coastguard Worker      * Also known as "FCC" or "Fast C Contiguous".
83*0e209d39SAndroid Build Coastguard Worker      * The result will often but not always be in NFC.
84*0e209d39SAndroid Build Coastguard Worker      * The result will conform to FCD which is useful for processing.
85*0e209d39SAndroid Build Coastguard Worker      * Not a standard Unicode normalization form.
86*0e209d39SAndroid Build Coastguard Worker      * For details see http://www.unicode.org/notes/tn5/#FCC
87*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.4
88*0e209d39SAndroid Build Coastguard Worker      */
89*0e209d39SAndroid Build Coastguard Worker     UNORM2_COMPOSE_CONTIGUOUS
90*0e209d39SAndroid Build Coastguard Worker } UNormalization2Mode;
91*0e209d39SAndroid Build Coastguard Worker 
92*0e209d39SAndroid Build Coastguard Worker /**
93*0e209d39SAndroid Build Coastguard Worker  * Result values for normalization quick check functions.
94*0e209d39SAndroid Build Coastguard Worker  * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
95*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.0
96*0e209d39SAndroid Build Coastguard Worker  */
97*0e209d39SAndroid Build Coastguard Worker typedef enum UNormalizationCheckResult {
98*0e209d39SAndroid Build Coastguard Worker   /**
99*0e209d39SAndroid Build Coastguard Worker    * The input string is not in the normalization form.
100*0e209d39SAndroid Build Coastguard Worker    * @stable ICU 2.0
101*0e209d39SAndroid Build Coastguard Worker    */
102*0e209d39SAndroid Build Coastguard Worker   UNORM_NO,
103*0e209d39SAndroid Build Coastguard Worker   /**
104*0e209d39SAndroid Build Coastguard Worker    * The input string is in the normalization form.
105*0e209d39SAndroid Build Coastguard Worker    * @stable ICU 2.0
106*0e209d39SAndroid Build Coastguard Worker    */
107*0e209d39SAndroid Build Coastguard Worker   UNORM_YES,
108*0e209d39SAndroid Build Coastguard Worker   /**
109*0e209d39SAndroid Build Coastguard Worker    * The input string may or may not be in the normalization form.
110*0e209d39SAndroid Build Coastguard Worker    * This value is only returned for composition forms like NFC and FCC,
111*0e209d39SAndroid Build Coastguard Worker    * when a backward-combining character is found for which the surrounding text
112*0e209d39SAndroid Build Coastguard Worker    * would have to be analyzed further.
113*0e209d39SAndroid Build Coastguard Worker    * @stable ICU 2.0
114*0e209d39SAndroid Build Coastguard Worker    */
115*0e209d39SAndroid Build Coastguard Worker   UNORM_MAYBE
116*0e209d39SAndroid Build Coastguard Worker } UNormalizationCheckResult;
117*0e209d39SAndroid Build Coastguard Worker 
118*0e209d39SAndroid Build Coastguard Worker /**
119*0e209d39SAndroid Build Coastguard Worker  * Opaque C service object type for the new normalization API.
120*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
121*0e209d39SAndroid Build Coastguard Worker  */
122*0e209d39SAndroid Build Coastguard Worker struct UNormalizer2;
123*0e209d39SAndroid Build Coastguard Worker typedef struct UNormalizer2 UNormalizer2;  /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
124*0e209d39SAndroid Build Coastguard Worker 
125*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_NORMALIZATION
126*0e209d39SAndroid Build Coastguard Worker 
127*0e209d39SAndroid Build Coastguard Worker /**
128*0e209d39SAndroid Build Coastguard Worker  * Returns a UNormalizer2 instance for Unicode NFC normalization.
129*0e209d39SAndroid Build Coastguard Worker  * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
130*0e209d39SAndroid Build Coastguard Worker  * Returns an unmodifiable singleton instance. Do not delete it.
131*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
132*0e209d39SAndroid Build Coastguard Worker  *                  pass the U_SUCCESS() test, or else the function returns
133*0e209d39SAndroid Build Coastguard Worker  *                  immediately. Check for U_FAILURE() on output or use with
134*0e209d39SAndroid Build Coastguard Worker  *                  function chaining. (See User Guide for details.)
135*0e209d39SAndroid Build Coastguard Worker  * @return the requested Normalizer2, if successful
136*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 49
137*0e209d39SAndroid Build Coastguard Worker  */
138*0e209d39SAndroid Build Coastguard Worker U_CAPI const UNormalizer2 * U_EXPORT2
139*0e209d39SAndroid Build Coastguard Worker unorm2_getNFCInstance(UErrorCode *pErrorCode);
140*0e209d39SAndroid Build Coastguard Worker 
141*0e209d39SAndroid Build Coastguard Worker /**
142*0e209d39SAndroid Build Coastguard Worker  * Returns a UNormalizer2 instance for Unicode NFD normalization.
143*0e209d39SAndroid Build Coastguard Worker  * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
144*0e209d39SAndroid Build Coastguard Worker  * Returns an unmodifiable singleton instance. Do not delete it.
145*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
146*0e209d39SAndroid Build Coastguard Worker  *                  pass the U_SUCCESS() test, or else the function returns
147*0e209d39SAndroid Build Coastguard Worker  *                  immediately. Check for U_FAILURE() on output or use with
148*0e209d39SAndroid Build Coastguard Worker  *                  function chaining. (See User Guide for details.)
149*0e209d39SAndroid Build Coastguard Worker  * @return the requested Normalizer2, if successful
150*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 49
151*0e209d39SAndroid Build Coastguard Worker  */
152*0e209d39SAndroid Build Coastguard Worker U_CAPI const UNormalizer2 * U_EXPORT2
153*0e209d39SAndroid Build Coastguard Worker unorm2_getNFDInstance(UErrorCode *pErrorCode);
154*0e209d39SAndroid Build Coastguard Worker 
155*0e209d39SAndroid Build Coastguard Worker /**
156*0e209d39SAndroid Build Coastguard Worker  * Returns a UNormalizer2 instance for Unicode NFKC normalization.
157*0e209d39SAndroid Build Coastguard Worker  * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
158*0e209d39SAndroid Build Coastguard Worker  * Returns an unmodifiable singleton instance. Do not delete it.
159*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
160*0e209d39SAndroid Build Coastguard Worker  *                  pass the U_SUCCESS() test, or else the function returns
161*0e209d39SAndroid Build Coastguard Worker  *                  immediately. Check for U_FAILURE() on output or use with
162*0e209d39SAndroid Build Coastguard Worker  *                  function chaining. (See User Guide for details.)
163*0e209d39SAndroid Build Coastguard Worker  * @return the requested Normalizer2, if successful
164*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 49
165*0e209d39SAndroid Build Coastguard Worker  */
166*0e209d39SAndroid Build Coastguard Worker U_CAPI const UNormalizer2 * U_EXPORT2
167*0e209d39SAndroid Build Coastguard Worker unorm2_getNFKCInstance(UErrorCode *pErrorCode);
168*0e209d39SAndroid Build Coastguard Worker 
169*0e209d39SAndroid Build Coastguard Worker /**
170*0e209d39SAndroid Build Coastguard Worker  * Returns a UNormalizer2 instance for Unicode NFKD normalization.
171*0e209d39SAndroid Build Coastguard Worker  * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
172*0e209d39SAndroid Build Coastguard Worker  * Returns an unmodifiable singleton instance. Do not delete it.
173*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
174*0e209d39SAndroid Build Coastguard Worker  *                  pass the U_SUCCESS() test, or else the function returns
175*0e209d39SAndroid Build Coastguard Worker  *                  immediately. Check for U_FAILURE() on output or use with
176*0e209d39SAndroid Build Coastguard Worker  *                  function chaining. (See User Guide for details.)
177*0e209d39SAndroid Build Coastguard Worker  * @return the requested Normalizer2, if successful
178*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 49
179*0e209d39SAndroid Build Coastguard Worker  */
180*0e209d39SAndroid Build Coastguard Worker U_CAPI const UNormalizer2 * U_EXPORT2
181*0e209d39SAndroid Build Coastguard Worker unorm2_getNFKDInstance(UErrorCode *pErrorCode);
182*0e209d39SAndroid Build Coastguard Worker 
183*0e209d39SAndroid Build Coastguard Worker /**
184*0e209d39SAndroid Build Coastguard Worker  * Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization
185*0e209d39SAndroid Build Coastguard Worker  * which is equivalent to applying the NFKC_Casefold mappings and then NFC.
186*0e209d39SAndroid Build Coastguard Worker  * See https://www.unicode.org/reports/tr44/#NFKC_Casefold
187*0e209d39SAndroid Build Coastguard Worker  *
188*0e209d39SAndroid Build Coastguard Worker  * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
189*0e209d39SAndroid Build Coastguard Worker  * Returns an unmodifiable singleton instance. Do not delete it.
190*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
191*0e209d39SAndroid Build Coastguard Worker  *                  pass the U_SUCCESS() test, or else the function returns
192*0e209d39SAndroid Build Coastguard Worker  *                  immediately. Check for U_FAILURE() on output or use with
193*0e209d39SAndroid Build Coastguard Worker  *                  function chaining. (See User Guide for details.)
194*0e209d39SAndroid Build Coastguard Worker  * @return the requested Normalizer2, if successful
195*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 49
196*0e209d39SAndroid Build Coastguard Worker  */
197*0e209d39SAndroid Build Coastguard Worker U_CAPI const UNormalizer2 * U_EXPORT2
198*0e209d39SAndroid Build Coastguard Worker unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
199*0e209d39SAndroid Build Coastguard Worker 
200*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_DRAFT_API
201*0e209d39SAndroid Build Coastguard Worker /**
202*0e209d39SAndroid Build Coastguard Worker  * Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization
203*0e209d39SAndroid Build Coastguard Worker  * which is equivalent to applying the NFKC_Simple_Casefold mappings and then NFC.
204*0e209d39SAndroid Build Coastguard Worker  * See https://www.unicode.org/reports/tr44/#NFKC_Simple_Casefold
205*0e209d39SAndroid Build Coastguard Worker  *
206*0e209d39SAndroid Build Coastguard Worker  * Same as unorm2_getInstance(NULL, "nfkc_scf", UNORM2_COMPOSE, pErrorCode).
207*0e209d39SAndroid Build Coastguard Worker  * Returns an unmodifiable singleton instance. Do not delete it.
208*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
209*0e209d39SAndroid Build Coastguard Worker  *                  pass the U_SUCCESS() test, or else the function returns
210*0e209d39SAndroid Build Coastguard Worker  *                  immediately. Check for U_FAILURE() on output or use with
211*0e209d39SAndroid Build Coastguard Worker  *                  function chaining. (See User Guide for details.)
212*0e209d39SAndroid Build Coastguard Worker  * @return the requested Normalizer2, if successful
213*0e209d39SAndroid Build Coastguard Worker  * @draft ICU 74
214*0e209d39SAndroid Build Coastguard Worker  */
215*0e209d39SAndroid Build Coastguard Worker U_CAPI const UNormalizer2 * U_EXPORT2
216*0e209d39SAndroid Build Coastguard Worker unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode);
217*0e209d39SAndroid Build Coastguard Worker #endif  // U_HIDE_DRAFT_API
218*0e209d39SAndroid Build Coastguard Worker 
219*0e209d39SAndroid Build Coastguard Worker /**
220*0e209d39SAndroid Build Coastguard Worker  * Returns a UNormalizer2 instance which uses the specified data file
221*0e209d39SAndroid Build Coastguard Worker  * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
222*0e209d39SAndroid Build Coastguard Worker  * and which composes or decomposes text according to the specified mode.
223*0e209d39SAndroid Build Coastguard Worker  * Returns an unmodifiable singleton instance. Do not delete it.
224*0e209d39SAndroid Build Coastguard Worker  *
225*0e209d39SAndroid Build Coastguard Worker  * Use packageName=NULL for data files that are part of ICU's own data.
226*0e209d39SAndroid Build Coastguard Worker  * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
227*0e209d39SAndroid Build Coastguard Worker  * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
228*0e209d39SAndroid Build Coastguard Worker  * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
229*0e209d39SAndroid Build Coastguard Worker  *
230*0e209d39SAndroid Build Coastguard Worker  * @param packageName NULL for ICU built-in data, otherwise application data package name
231*0e209d39SAndroid Build Coastguard Worker  * @param name "nfc" or "nfkc" or "nfkc_cf" or "nfkc_scf" or name of custom data file
232*0e209d39SAndroid Build Coastguard Worker  * @param mode normalization mode (compose or decompose etc.)
233*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
234*0e209d39SAndroid Build Coastguard Worker  *                  pass the U_SUCCESS() test, or else the function returns
235*0e209d39SAndroid Build Coastguard Worker  *                  immediately. Check for U_FAILURE() on output or use with
236*0e209d39SAndroid Build Coastguard Worker  *                  function chaining. (See User Guide for details.)
237*0e209d39SAndroid Build Coastguard Worker  * @return the requested UNormalizer2, if successful
238*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
239*0e209d39SAndroid Build Coastguard Worker  */
240*0e209d39SAndroid Build Coastguard Worker U_CAPI const UNormalizer2 * U_EXPORT2
241*0e209d39SAndroid Build Coastguard Worker unorm2_getInstance(const char *packageName,
242*0e209d39SAndroid Build Coastguard Worker                    const char *name,
243*0e209d39SAndroid Build Coastguard Worker                    UNormalization2Mode mode,
244*0e209d39SAndroid Build Coastguard Worker                    UErrorCode *pErrorCode);
245*0e209d39SAndroid Build Coastguard Worker 
246*0e209d39SAndroid Build Coastguard Worker /**
247*0e209d39SAndroid Build Coastguard Worker  * Constructs a filtered normalizer wrapping any UNormalizer2 instance
248*0e209d39SAndroid Build Coastguard Worker  * and a filter set.
249*0e209d39SAndroid Build Coastguard Worker  * Both are aliased and must not be modified or deleted while this object
250*0e209d39SAndroid Build Coastguard Worker  * is used.
251*0e209d39SAndroid Build Coastguard Worker  * The filter set should be frozen; otherwise the performance will suffer greatly.
252*0e209d39SAndroid Build Coastguard Worker  * @param norm2 wrapped UNormalizer2 instance
253*0e209d39SAndroid Build Coastguard Worker  * @param filterSet USet which determines the characters to be normalized
254*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
255*0e209d39SAndroid Build Coastguard Worker  *                   pass the U_SUCCESS() test, or else the function returns
256*0e209d39SAndroid Build Coastguard Worker  *                   immediately. Check for U_FAILURE() on output or use with
257*0e209d39SAndroid Build Coastguard Worker  *                   function chaining. (See User Guide for details.)
258*0e209d39SAndroid Build Coastguard Worker  * @return the requested UNormalizer2, if successful
259*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
260*0e209d39SAndroid Build Coastguard Worker  */
261*0e209d39SAndroid Build Coastguard Worker U_CAPI UNormalizer2 * U_EXPORT2
262*0e209d39SAndroid Build Coastguard Worker unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
263*0e209d39SAndroid Build Coastguard Worker 
264*0e209d39SAndroid Build Coastguard Worker /**
265*0e209d39SAndroid Build Coastguard Worker  * Closes a UNormalizer2 instance from unorm2_openFiltered().
266*0e209d39SAndroid Build Coastguard Worker  * Do not close instances from unorm2_getInstance()!
267*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance to be closed
268*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
269*0e209d39SAndroid Build Coastguard Worker  */
270*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2
271*0e209d39SAndroid Build Coastguard Worker unorm2_close(UNormalizer2 *norm2);
272*0e209d39SAndroid Build Coastguard Worker 
273*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API
274*0e209d39SAndroid Build Coastguard Worker 
275*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
276*0e209d39SAndroid Build Coastguard Worker 
277*0e209d39SAndroid Build Coastguard Worker /**
278*0e209d39SAndroid Build Coastguard Worker  * \class LocalUNormalizer2Pointer
279*0e209d39SAndroid Build Coastguard Worker  * "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
280*0e209d39SAndroid Build Coastguard Worker  * For most methods see the LocalPointerBase base class.
281*0e209d39SAndroid Build Coastguard Worker  *
282*0e209d39SAndroid Build Coastguard Worker  * @see LocalPointerBase
283*0e209d39SAndroid Build Coastguard Worker  * @see LocalPointer
284*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
285*0e209d39SAndroid Build Coastguard Worker  */
286*0e209d39SAndroid Build Coastguard Worker U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
287*0e209d39SAndroid Build Coastguard Worker 
288*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
289*0e209d39SAndroid Build Coastguard Worker 
290*0e209d39SAndroid Build Coastguard Worker #endif
291*0e209d39SAndroid Build Coastguard Worker 
292*0e209d39SAndroid Build Coastguard Worker /**
293*0e209d39SAndroid Build Coastguard Worker  * Writes the normalized form of the source string to the destination string
294*0e209d39SAndroid Build Coastguard Worker  * (replacing its contents) and returns the length of the destination string.
295*0e209d39SAndroid Build Coastguard Worker  * The source and destination strings must be different buffers.
296*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
297*0e209d39SAndroid Build Coastguard Worker  * @param src source string
298*0e209d39SAndroid Build Coastguard Worker  * @param length length of the source string, or -1 if NUL-terminated
299*0e209d39SAndroid Build Coastguard Worker  * @param dest destination string; its contents is replaced with normalized src
300*0e209d39SAndroid Build Coastguard Worker  * @param capacity number of UChars that can be written to dest
301*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
302*0e209d39SAndroid Build Coastguard Worker  *                   pass the U_SUCCESS() test, or else the function returns
303*0e209d39SAndroid Build Coastguard Worker  *                   immediately. Check for U_FAILURE() on output or use with
304*0e209d39SAndroid Build Coastguard Worker  *                   function chaining. (See User Guide for details.)
305*0e209d39SAndroid Build Coastguard Worker  * @return dest
306*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
307*0e209d39SAndroid Build Coastguard Worker  */
308*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2
309*0e209d39SAndroid Build Coastguard Worker unorm2_normalize(const UNormalizer2 *norm2,
310*0e209d39SAndroid Build Coastguard Worker                  const UChar *src, int32_t length,
311*0e209d39SAndroid Build Coastguard Worker                  UChar *dest, int32_t capacity,
312*0e209d39SAndroid Build Coastguard Worker                  UErrorCode *pErrorCode);
313*0e209d39SAndroid Build Coastguard Worker /**
314*0e209d39SAndroid Build Coastguard Worker  * Appends the normalized form of the second string to the first string
315*0e209d39SAndroid Build Coastguard Worker  * (merging them at the boundary) and returns the length of the first string.
316*0e209d39SAndroid Build Coastguard Worker  * The result is normalized if the first string was normalized.
317*0e209d39SAndroid Build Coastguard Worker  * The first and second strings must be different buffers.
318*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
319*0e209d39SAndroid Build Coastguard Worker  * @param first string, should be normalized
320*0e209d39SAndroid Build Coastguard Worker  * @param firstLength length of the first string, or -1 if NUL-terminated
321*0e209d39SAndroid Build Coastguard Worker  * @param firstCapacity number of UChars that can be written to first
322*0e209d39SAndroid Build Coastguard Worker  * @param second string, will be normalized
323*0e209d39SAndroid Build Coastguard Worker  * @param secondLength length of the source string, or -1 if NUL-terminated
324*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
325*0e209d39SAndroid Build Coastguard Worker  *                   pass the U_SUCCESS() test, or else the function returns
326*0e209d39SAndroid Build Coastguard Worker  *                   immediately. Check for U_FAILURE() on output or use with
327*0e209d39SAndroid Build Coastguard Worker  *                   function chaining. (See User Guide for details.)
328*0e209d39SAndroid Build Coastguard Worker  * @return first
329*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
330*0e209d39SAndroid Build Coastguard Worker  */
331*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2
332*0e209d39SAndroid Build Coastguard Worker unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
333*0e209d39SAndroid Build Coastguard Worker                                 UChar *first, int32_t firstLength, int32_t firstCapacity,
334*0e209d39SAndroid Build Coastguard Worker                                 const UChar *second, int32_t secondLength,
335*0e209d39SAndroid Build Coastguard Worker                                 UErrorCode *pErrorCode);
336*0e209d39SAndroid Build Coastguard Worker /**
337*0e209d39SAndroid Build Coastguard Worker  * Appends the second string to the first string
338*0e209d39SAndroid Build Coastguard Worker  * (merging them at the boundary) and returns the length of the first string.
339*0e209d39SAndroid Build Coastguard Worker  * The result is normalized if both the strings were normalized.
340*0e209d39SAndroid Build Coastguard Worker  * The first and second strings must be different buffers.
341*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
342*0e209d39SAndroid Build Coastguard Worker  * @param first string, should be normalized
343*0e209d39SAndroid Build Coastguard Worker  * @param firstLength length of the first string, or -1 if NUL-terminated
344*0e209d39SAndroid Build Coastguard Worker  * @param firstCapacity number of UChars that can be written to first
345*0e209d39SAndroid Build Coastguard Worker  * @param second string, should be normalized
346*0e209d39SAndroid Build Coastguard Worker  * @param secondLength length of the source string, or -1 if NUL-terminated
347*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
348*0e209d39SAndroid Build Coastguard Worker  *                   pass the U_SUCCESS() test, or else the function returns
349*0e209d39SAndroid Build Coastguard Worker  *                   immediately. Check for U_FAILURE() on output or use with
350*0e209d39SAndroid Build Coastguard Worker  *                   function chaining. (See User Guide for details.)
351*0e209d39SAndroid Build Coastguard Worker  * @return first
352*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
353*0e209d39SAndroid Build Coastguard Worker  */
354*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2
355*0e209d39SAndroid Build Coastguard Worker unorm2_append(const UNormalizer2 *norm2,
356*0e209d39SAndroid Build Coastguard Worker               UChar *first, int32_t firstLength, int32_t firstCapacity,
357*0e209d39SAndroid Build Coastguard Worker               const UChar *second, int32_t secondLength,
358*0e209d39SAndroid Build Coastguard Worker               UErrorCode *pErrorCode);
359*0e209d39SAndroid Build Coastguard Worker 
360*0e209d39SAndroid Build Coastguard Worker /**
361*0e209d39SAndroid Build Coastguard Worker  * Gets the decomposition mapping of c.
362*0e209d39SAndroid Build Coastguard Worker  * Roughly equivalent to normalizing the String form of c
363*0e209d39SAndroid Build Coastguard Worker  * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
364*0e209d39SAndroid Build Coastguard Worker  * returns a negative value and does not write a string
365*0e209d39SAndroid Build Coastguard Worker  * if c does not have a decomposition mapping in this instance's data.
366*0e209d39SAndroid Build Coastguard Worker  * This function is independent of the mode of the UNormalizer2.
367*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
368*0e209d39SAndroid Build Coastguard Worker  * @param c code point
369*0e209d39SAndroid Build Coastguard Worker  * @param decomposition String buffer which will be set to c's
370*0e209d39SAndroid Build Coastguard Worker  *                      decomposition mapping, if there is one.
371*0e209d39SAndroid Build Coastguard Worker  * @param capacity number of UChars that can be written to decomposition
372*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
373*0e209d39SAndroid Build Coastguard Worker  *                   pass the U_SUCCESS() test, or else the function returns
374*0e209d39SAndroid Build Coastguard Worker  *                   immediately. Check for U_FAILURE() on output or use with
375*0e209d39SAndroid Build Coastguard Worker  *                   function chaining. (See User Guide for details.)
376*0e209d39SAndroid Build Coastguard Worker  * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
377*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.6
378*0e209d39SAndroid Build Coastguard Worker  */
379*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2
380*0e209d39SAndroid Build Coastguard Worker unorm2_getDecomposition(const UNormalizer2 *norm2,
381*0e209d39SAndroid Build Coastguard Worker                         UChar32 c, UChar *decomposition, int32_t capacity,
382*0e209d39SAndroid Build Coastguard Worker                         UErrorCode *pErrorCode);
383*0e209d39SAndroid Build Coastguard Worker 
384*0e209d39SAndroid Build Coastguard Worker /**
385*0e209d39SAndroid Build Coastguard Worker  * Gets the raw decomposition mapping of c.
386*0e209d39SAndroid Build Coastguard Worker  *
387*0e209d39SAndroid Build Coastguard Worker  * This is similar to the unorm2_getDecomposition() function but returns the
388*0e209d39SAndroid Build Coastguard Worker  * raw decomposition mapping as specified in UnicodeData.txt or
389*0e209d39SAndroid Build Coastguard Worker  * (for custom data) in the mapping files processed by the gennorm2 tool.
390*0e209d39SAndroid Build Coastguard Worker  * By contrast, unorm2_getDecomposition() returns the processed,
391*0e209d39SAndroid Build Coastguard Worker  * recursively-decomposed version of this mapping.
392*0e209d39SAndroid Build Coastguard Worker  *
393*0e209d39SAndroid Build Coastguard Worker  * When used on a standard NFKC Normalizer2 instance,
394*0e209d39SAndroid Build Coastguard Worker  * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
395*0e209d39SAndroid Build Coastguard Worker  *
396*0e209d39SAndroid Build Coastguard Worker  * When used on a standard NFC Normalizer2 instance,
397*0e209d39SAndroid Build Coastguard Worker  * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
398*0e209d39SAndroid Build Coastguard Worker  * in this case, the result contains either one or two code points (=1..4 UChars).
399*0e209d39SAndroid Build Coastguard Worker  *
400*0e209d39SAndroid Build Coastguard Worker  * This function is independent of the mode of the UNormalizer2.
401*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
402*0e209d39SAndroid Build Coastguard Worker  * @param c code point
403*0e209d39SAndroid Build Coastguard Worker  * @param decomposition String buffer which will be set to c's
404*0e209d39SAndroid Build Coastguard Worker  *                      raw decomposition mapping, if there is one.
405*0e209d39SAndroid Build Coastguard Worker  * @param capacity number of UChars that can be written to decomposition
406*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
407*0e209d39SAndroid Build Coastguard Worker  *                   pass the U_SUCCESS() test, or else the function returns
408*0e209d39SAndroid Build Coastguard Worker  *                   immediately. Check for U_FAILURE() on output or use with
409*0e209d39SAndroid Build Coastguard Worker  *                   function chaining. (See User Guide for details.)
410*0e209d39SAndroid Build Coastguard Worker  * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
411*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 49
412*0e209d39SAndroid Build Coastguard Worker  */
413*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2
414*0e209d39SAndroid Build Coastguard Worker unorm2_getRawDecomposition(const UNormalizer2 *norm2,
415*0e209d39SAndroid Build Coastguard Worker                            UChar32 c, UChar *decomposition, int32_t capacity,
416*0e209d39SAndroid Build Coastguard Worker                            UErrorCode *pErrorCode);
417*0e209d39SAndroid Build Coastguard Worker 
418*0e209d39SAndroid Build Coastguard Worker /**
419*0e209d39SAndroid Build Coastguard Worker  * Performs pairwise composition of a & b and returns the composite if there is one.
420*0e209d39SAndroid Build Coastguard Worker  *
421*0e209d39SAndroid Build Coastguard Worker  * Returns a composite code point c only if c has a two-way mapping to a+b.
422*0e209d39SAndroid Build Coastguard Worker  * In standard Unicode normalization, this means that
423*0e209d39SAndroid Build Coastguard Worker  * c has a canonical decomposition to a+b
424*0e209d39SAndroid Build Coastguard Worker  * and c does not have the Full_Composition_Exclusion property.
425*0e209d39SAndroid Build Coastguard Worker  *
426*0e209d39SAndroid Build Coastguard Worker  * This function is independent of the mode of the UNormalizer2.
427*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
428*0e209d39SAndroid Build Coastguard Worker  * @param a A (normalization starter) code point.
429*0e209d39SAndroid Build Coastguard Worker  * @param b Another code point.
430*0e209d39SAndroid Build Coastguard Worker  * @return The non-negative composite code point if there is one; otherwise a negative value.
431*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 49
432*0e209d39SAndroid Build Coastguard Worker  */
433*0e209d39SAndroid Build Coastguard Worker U_CAPI UChar32 U_EXPORT2
434*0e209d39SAndroid Build Coastguard Worker unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
435*0e209d39SAndroid Build Coastguard Worker 
436*0e209d39SAndroid Build Coastguard Worker /**
437*0e209d39SAndroid Build Coastguard Worker  * Gets the combining class of c.
438*0e209d39SAndroid Build Coastguard Worker  * The default implementation returns 0
439*0e209d39SAndroid Build Coastguard Worker  * but all standard implementations return the Unicode Canonical_Combining_Class value.
440*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
441*0e209d39SAndroid Build Coastguard Worker  * @param c code point
442*0e209d39SAndroid Build Coastguard Worker  * @return c's combining class
443*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 49
444*0e209d39SAndroid Build Coastguard Worker  */
445*0e209d39SAndroid Build Coastguard Worker U_CAPI uint8_t U_EXPORT2
446*0e209d39SAndroid Build Coastguard Worker unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
447*0e209d39SAndroid Build Coastguard Worker 
448*0e209d39SAndroid Build Coastguard Worker /**
449*0e209d39SAndroid Build Coastguard Worker  * Tests if the string is normalized.
450*0e209d39SAndroid Build Coastguard Worker  * Internally, in cases where the quickCheck() method would return "maybe"
451*0e209d39SAndroid Build Coastguard Worker  * (which is only possible for the two COMPOSE modes) this method
452*0e209d39SAndroid Build Coastguard Worker  * resolves to "yes" or "no" to provide a definitive result,
453*0e209d39SAndroid Build Coastguard Worker  * at the cost of doing more work in those cases.
454*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
455*0e209d39SAndroid Build Coastguard Worker  * @param s input string
456*0e209d39SAndroid Build Coastguard Worker  * @param length length of the string, or -1 if NUL-terminated
457*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
458*0e209d39SAndroid Build Coastguard Worker  *                   pass the U_SUCCESS() test, or else the function returns
459*0e209d39SAndroid Build Coastguard Worker  *                   immediately. Check for U_FAILURE() on output or use with
460*0e209d39SAndroid Build Coastguard Worker  *                   function chaining. (See User Guide for details.)
461*0e209d39SAndroid Build Coastguard Worker  * @return true if s is normalized
462*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
463*0e209d39SAndroid Build Coastguard Worker  */
464*0e209d39SAndroid Build Coastguard Worker U_CAPI UBool U_EXPORT2
465*0e209d39SAndroid Build Coastguard Worker unorm2_isNormalized(const UNormalizer2 *norm2,
466*0e209d39SAndroid Build Coastguard Worker                     const UChar *s, int32_t length,
467*0e209d39SAndroid Build Coastguard Worker                     UErrorCode *pErrorCode);
468*0e209d39SAndroid Build Coastguard Worker 
469*0e209d39SAndroid Build Coastguard Worker /**
470*0e209d39SAndroid Build Coastguard Worker  * Tests if the string is normalized.
471*0e209d39SAndroid Build Coastguard Worker  * For the two COMPOSE modes, the result could be "maybe" in cases that
472*0e209d39SAndroid Build Coastguard Worker  * would take a little more work to resolve definitively.
473*0e209d39SAndroid Build Coastguard Worker  * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
474*0e209d39SAndroid Build Coastguard Worker  * combination of quick check + normalization, to avoid
475*0e209d39SAndroid Build Coastguard Worker  * re-checking the "yes" prefix.
476*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
477*0e209d39SAndroid Build Coastguard Worker  * @param s input string
478*0e209d39SAndroid Build Coastguard Worker  * @param length length of the string, or -1 if NUL-terminated
479*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
480*0e209d39SAndroid Build Coastguard Worker  *                   pass the U_SUCCESS() test, or else the function returns
481*0e209d39SAndroid Build Coastguard Worker  *                   immediately. Check for U_FAILURE() on output or use with
482*0e209d39SAndroid Build Coastguard Worker  *                   function chaining. (See User Guide for details.)
483*0e209d39SAndroid Build Coastguard Worker  * @return UNormalizationCheckResult
484*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
485*0e209d39SAndroid Build Coastguard Worker  */
486*0e209d39SAndroid Build Coastguard Worker U_CAPI UNormalizationCheckResult U_EXPORT2
487*0e209d39SAndroid Build Coastguard Worker unorm2_quickCheck(const UNormalizer2 *norm2,
488*0e209d39SAndroid Build Coastguard Worker                   const UChar *s, int32_t length,
489*0e209d39SAndroid Build Coastguard Worker                   UErrorCode *pErrorCode);
490*0e209d39SAndroid Build Coastguard Worker 
491*0e209d39SAndroid Build Coastguard Worker /**
492*0e209d39SAndroid Build Coastguard Worker  * Returns the end of the normalized substring of the input string.
493*0e209d39SAndroid Build Coastguard Worker  * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
494*0e209d39SAndroid Build Coastguard Worker  * the substring <code>UnicodeString(s, 0, end)</code>
495*0e209d39SAndroid Build Coastguard Worker  * will pass the quick check with a "yes" result.
496*0e209d39SAndroid Build Coastguard Worker  *
497*0e209d39SAndroid Build Coastguard Worker  * The returned end index is usually one or more characters before the
498*0e209d39SAndroid Build Coastguard Worker  * "no" or "maybe" character: The end index is at a normalization boundary.
499*0e209d39SAndroid Build Coastguard Worker  * (See the class documentation for more about normalization boundaries.)
500*0e209d39SAndroid Build Coastguard Worker  *
501*0e209d39SAndroid Build Coastguard Worker  * When the goal is a normalized string and most input strings are expected
502*0e209d39SAndroid Build Coastguard Worker  * to be normalized already, then call this method,
503*0e209d39SAndroid Build Coastguard Worker  * and if it returns a prefix shorter than the input string,
504*0e209d39SAndroid Build Coastguard Worker  * copy that prefix and use normalizeSecondAndAppend() for the remainder.
505*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
506*0e209d39SAndroid Build Coastguard Worker  * @param s input string
507*0e209d39SAndroid Build Coastguard Worker  * @param length length of the string, or -1 if NUL-terminated
508*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode Standard ICU error code. Its input value must
509*0e209d39SAndroid Build Coastguard Worker  *                   pass the U_SUCCESS() test, or else the function returns
510*0e209d39SAndroid Build Coastguard Worker  *                   immediately. Check for U_FAILURE() on output or use with
511*0e209d39SAndroid Build Coastguard Worker  *                   function chaining. (See User Guide for details.)
512*0e209d39SAndroid Build Coastguard Worker  * @return "yes" span end index
513*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
514*0e209d39SAndroid Build Coastguard Worker  */
515*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2
516*0e209d39SAndroid Build Coastguard Worker unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
517*0e209d39SAndroid Build Coastguard Worker                          const UChar *s, int32_t length,
518*0e209d39SAndroid Build Coastguard Worker                          UErrorCode *pErrorCode);
519*0e209d39SAndroid Build Coastguard Worker 
520*0e209d39SAndroid Build Coastguard Worker /**
521*0e209d39SAndroid Build Coastguard Worker  * Tests if the character always has a normalization boundary before it,
522*0e209d39SAndroid Build Coastguard Worker  * regardless of context.
523*0e209d39SAndroid Build Coastguard Worker  * For details see the Normalizer2 base class documentation.
524*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
525*0e209d39SAndroid Build Coastguard Worker  * @param c character to test
526*0e209d39SAndroid Build Coastguard Worker  * @return true if c has a normalization boundary before it
527*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
528*0e209d39SAndroid Build Coastguard Worker  */
529*0e209d39SAndroid Build Coastguard Worker U_CAPI UBool U_EXPORT2
530*0e209d39SAndroid Build Coastguard Worker unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
531*0e209d39SAndroid Build Coastguard Worker 
532*0e209d39SAndroid Build Coastguard Worker /**
533*0e209d39SAndroid Build Coastguard Worker  * Tests if the character always has a normalization boundary after it,
534*0e209d39SAndroid Build Coastguard Worker  * regardless of context.
535*0e209d39SAndroid Build Coastguard Worker  * For details see the Normalizer2 base class documentation.
536*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
537*0e209d39SAndroid Build Coastguard Worker  * @param c character to test
538*0e209d39SAndroid Build Coastguard Worker  * @return true if c has a normalization boundary after it
539*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
540*0e209d39SAndroid Build Coastguard Worker  */
541*0e209d39SAndroid Build Coastguard Worker U_CAPI UBool U_EXPORT2
542*0e209d39SAndroid Build Coastguard Worker unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
543*0e209d39SAndroid Build Coastguard Worker 
544*0e209d39SAndroid Build Coastguard Worker /**
545*0e209d39SAndroid Build Coastguard Worker  * Tests if the character is normalization-inert.
546*0e209d39SAndroid Build Coastguard Worker  * For details see the Normalizer2 base class documentation.
547*0e209d39SAndroid Build Coastguard Worker  * @param norm2 UNormalizer2 instance
548*0e209d39SAndroid Build Coastguard Worker  * @param c character to test
549*0e209d39SAndroid Build Coastguard Worker  * @return true if c is normalization-inert
550*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.4
551*0e209d39SAndroid Build Coastguard Worker  */
552*0e209d39SAndroid Build Coastguard Worker U_CAPI UBool U_EXPORT2
553*0e209d39SAndroid Build Coastguard Worker unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
554*0e209d39SAndroid Build Coastguard Worker 
555*0e209d39SAndroid Build Coastguard Worker /**
556*0e209d39SAndroid Build Coastguard Worker  * Compares two strings for canonical equivalence.
557*0e209d39SAndroid Build Coastguard Worker  * Further options include case-insensitive comparison and
558*0e209d39SAndroid Build Coastguard Worker  * code point order (as opposed to code unit order).
559*0e209d39SAndroid Build Coastguard Worker  *
560*0e209d39SAndroid Build Coastguard Worker  * Canonical equivalence between two strings is defined as their normalized
561*0e209d39SAndroid Build Coastguard Worker  * forms (NFD or NFC) being identical.
562*0e209d39SAndroid Build Coastguard Worker  * This function compares strings incrementally instead of normalizing
563*0e209d39SAndroid Build Coastguard Worker  * (and optionally case-folding) both strings entirely,
564*0e209d39SAndroid Build Coastguard Worker  * improving performance significantly.
565*0e209d39SAndroid Build Coastguard Worker  *
566*0e209d39SAndroid Build Coastguard Worker  * Bulk normalization is only necessary if the strings do not fulfill the FCD
567*0e209d39SAndroid Build Coastguard Worker  * conditions. Only in this case, and only if the strings are relatively long,
568*0e209d39SAndroid Build Coastguard Worker  * is memory allocated temporarily.
569*0e209d39SAndroid Build Coastguard Worker  * For FCD strings and short non-FCD strings there is no memory allocation.
570*0e209d39SAndroid Build Coastguard Worker  *
571*0e209d39SAndroid Build Coastguard Worker  * Semantically, this is equivalent to
572*0e209d39SAndroid Build Coastguard Worker  *   strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
573*0e209d39SAndroid Build Coastguard Worker  * where code point order and foldCase are all optional.
574*0e209d39SAndroid Build Coastguard Worker  *
575*0e209d39SAndroid Build Coastguard Worker  * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
576*0e209d39SAndroid Build Coastguard Worker  * the case folding must be performed first, then the normalization.
577*0e209d39SAndroid Build Coastguard Worker  *
578*0e209d39SAndroid Build Coastguard Worker  * @param s1 First source string.
579*0e209d39SAndroid Build Coastguard Worker  * @param length1 Length of first source string, or -1 if NUL-terminated.
580*0e209d39SAndroid Build Coastguard Worker  *
581*0e209d39SAndroid Build Coastguard Worker  * @param s2 Second source string.
582*0e209d39SAndroid Build Coastguard Worker  * @param length2 Length of second source string, or -1 if NUL-terminated.
583*0e209d39SAndroid Build Coastguard Worker  *
584*0e209d39SAndroid Build Coastguard Worker  * @param options A bit set of options:
585*0e209d39SAndroid Build Coastguard Worker  *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
586*0e209d39SAndroid Build Coastguard Worker  *     Case-sensitive comparison in code unit order, and the input strings
587*0e209d39SAndroid Build Coastguard Worker  *     are quick-checked for FCD.
588*0e209d39SAndroid Build Coastguard Worker  *
589*0e209d39SAndroid Build Coastguard Worker  *   - UNORM_INPUT_IS_FCD
590*0e209d39SAndroid Build Coastguard Worker  *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
591*0e209d39SAndroid Build Coastguard Worker  *     If not set, the function will quickCheck for FCD
592*0e209d39SAndroid Build Coastguard Worker  *     and normalize if necessary.
593*0e209d39SAndroid Build Coastguard Worker  *
594*0e209d39SAndroid Build Coastguard Worker  *   - U_COMPARE_CODE_POINT_ORDER
595*0e209d39SAndroid Build Coastguard Worker  *     Set to choose code point order instead of code unit order
596*0e209d39SAndroid Build Coastguard Worker  *     (see u_strCompare for details).
597*0e209d39SAndroid Build Coastguard Worker  *
598*0e209d39SAndroid Build Coastguard Worker  *   - U_COMPARE_IGNORE_CASE
599*0e209d39SAndroid Build Coastguard Worker  *     Set to compare strings case-insensitively using case folding,
600*0e209d39SAndroid Build Coastguard Worker  *     instead of case-sensitively.
601*0e209d39SAndroid Build Coastguard Worker  *     If set, then the following case folding options are used.
602*0e209d39SAndroid Build Coastguard Worker  *
603*0e209d39SAndroid Build Coastguard Worker  *   - Options as used with case-insensitive comparisons, currently:
604*0e209d39SAndroid Build Coastguard Worker  *
605*0e209d39SAndroid Build Coastguard Worker  *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
606*0e209d39SAndroid Build Coastguard Worker  *    (see u_strCaseCompare for details)
607*0e209d39SAndroid Build Coastguard Worker  *
608*0e209d39SAndroid Build Coastguard Worker  *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
609*0e209d39SAndroid Build Coastguard Worker  *
610*0e209d39SAndroid Build Coastguard Worker  * @param pErrorCode ICU error code in/out parameter.
611*0e209d39SAndroid Build Coastguard Worker  *                   Must fulfill U_SUCCESS before the function call.
612*0e209d39SAndroid Build Coastguard Worker  * @return <0 or 0 or >0 as usual for string comparisons
613*0e209d39SAndroid Build Coastguard Worker  *
614*0e209d39SAndroid Build Coastguard Worker  * @see unorm_normalize
615*0e209d39SAndroid Build Coastguard Worker  * @see UNORM_FCD
616*0e209d39SAndroid Build Coastguard Worker  * @see u_strCompare
617*0e209d39SAndroid Build Coastguard Worker  * @see u_strCaseCompare
618*0e209d39SAndroid Build Coastguard Worker  *
619*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.2
620*0e209d39SAndroid Build Coastguard Worker  */
621*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2
622*0e209d39SAndroid Build Coastguard Worker unorm_compare(const UChar *s1, int32_t length1,
623*0e209d39SAndroid Build Coastguard Worker               const UChar *s2, int32_t length2,
624*0e209d39SAndroid Build Coastguard Worker               uint32_t options,
625*0e209d39SAndroid Build Coastguard Worker               UErrorCode *pErrorCode);
626*0e209d39SAndroid Build Coastguard Worker 
627*0e209d39SAndroid Build Coastguard Worker #endif  /* !UCONFIG_NO_NORMALIZATION */
628*0e209d39SAndroid Build Coastguard Worker #endif  /* __UNORM2_H__ */
629