xref: /aosp_15_r20/external/icu/libandroidicu/include/unicode/uset.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 2002-2014, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  uset.h
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2002mar07
16 *   created by: Markus W. Scherer
17 *
18 *   C version of UnicodeSet.
19 */
20 
21 
22 /**
23  * \file
24  * \brief C API: Unicode Set
25  *
26  * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
27  */
28 
29 #ifndef __USET_H__
30 #define __USET_H__
31 
32 #include "unicode/utypes.h"
33 #include "unicode/uchar.h"
34 
35 #if U_SHOW_CPLUSPLUS_API
36 #include "unicode/localpointer.h"
37 #endif   // U_SHOW_CPLUSPLUS_API
38 
39 #ifndef USET_DEFINED
40 
41 #ifndef U_IN_DOXYGEN
42 #define USET_DEFINED
43 #endif
44 /**
45  * USet is the C API type corresponding to C++ class UnicodeSet.
46  * Use the uset_* API to manipulate.  Create with
47  * uset_open*, and destroy with uset_close.
48  * @stable ICU 2.4
49  */
50 typedef struct USet USet;
51 #endif
52 
53 /**
54  * Bitmask values to be passed to uset_openPatternOptions() or
55  * uset_applyPattern() taking an option parameter.
56  *
57  * Use at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
58  * These case options are mutually exclusive.
59  *
60  * Undefined options bits are ignored, and reserved for future use.
61  *
62  * @stable ICU 2.4
63  */
64 enum {
65     /**
66      * Ignore white space within patterns unless quoted or escaped.
67      * @stable ICU 2.4
68      */
69     USET_IGNORE_SPACE = 1,
70 
71     /**
72      * Enable case insensitive matching.  E.g., "[ab]" with this flag
73      * will match 'a', 'A', 'b', and 'B'.  "[^ab]" with this flag will
74      * match all except 'a', 'A', 'b', and 'B'. This performs a full
75      * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'.
76      *
77      * The resulting set is a superset of the input for the code points but
78      * not for the strings.
79      * It performs a case mapping closure of the code points and adds
80      * full case folding strings for the code points, and reduces strings of
81      * the original set to their full case folding equivalents.
82      *
83      * This is designed for case-insensitive matches, for example
84      * in regular expressions. The full code point case closure allows checking of
85      * an input character directly against the closure set.
86      * Strings are matched by comparing the case-folded form from the closure
87      * set with an incremental case folding of the string in question.
88      *
89      * The closure set will also contain single code points if the original
90      * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
91      * This is not necessary (that is, redundant) for the above matching method
92      * but results in the same closure sets regardless of whether the original
93      * set contained the code point or a string.
94      *
95      * @stable ICU 2.4
96      */
97     USET_CASE_INSENSITIVE = 2,
98 
99     /**
100      * Adds all case mappings for each element in the set.
101      * This adds the full lower-, title-, and uppercase mappings as well as the full case folding
102      * of each existing element in the set.
103      *
104      * Unlike the “case insensitive” options, this does not perform a closure.
105      * For example, it does not add 'ſ' (U+017F long s) for 's',
106      * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions.
107      *
108      * @stable ICU 3.2
109      */
110     USET_ADD_CASE_MAPPINGS = 4,
111 
112     /**
113      * Enable case insensitive matching.
114      * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings,
115      * which map each code point to one code point,
116      * not full Case_Folding (cf) mappings, which map some code points to multiple code points.
117      *
118      * This is designed for case-insensitive matches, for example in certain
119      * regular expression implementations where only Simple_Case_Folding mappings are used,
120      * such as in ECMAScript (JavaScript) regular expressions.
121      *
122      * @stable ICU 73
123      */
124     USET_SIMPLE_CASE_INSENSITIVE = 6
125 };
126 
127 /**
128  * Argument values for whether span() and similar functions continue while
129  * the current character is contained vs. not contained in the set.
130  *
131  * The functionality is straightforward for sets with only single code points,
132  * without strings (which is the common case):
133  * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
134  * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
135  * - span() and spanBack() partition any string the same way when
136  *   alternating between span(USET_SPAN_NOT_CONTAINED) and
137  *   span(either "contained" condition).
138  * - Using a complemented (inverted) set and the opposite span conditions
139  *   yields the same results.
140  *
141  * When a set contains multi-code point strings, then these statements may not
142  * be true, depending on the strings in the set (for example, whether they
143  * overlap with each other) and the string that is processed.
144  * For a set with strings:
145  * - The complement of the set contains the opposite set of code points,
146  *   but the same set of strings.
147  *   Therefore, complementing both the set and the span conditions
148  *   may yield different results.
149  * - When starting spans at different positions in a string
150  *   (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
151  *   because a set string may start before the later position.
152  * - span(USET_SPAN_SIMPLE) may be shorter than
153  *   span(USET_SPAN_CONTAINED) because it will not recursively try
154  *   all possible paths.
155  *   For example, with a set which contains the three strings "xy", "xya" and "ax",
156  *   span("xyax", USET_SPAN_CONTAINED) will return 4 but
157  *   span("xyax", USET_SPAN_SIMPLE) will return 3.
158  *   span(USET_SPAN_SIMPLE) will never be longer than
159  *   span(USET_SPAN_CONTAINED).
160  * - With either "contained" condition, span() and spanBack() may partition
161  *   a string in different ways.
162  *   For example, with a set which contains the two strings "ab" and "ba",
163  *   and when processing the string "aba",
164  *   span() will yield contained/not-contained boundaries of { 0, 2, 3 }
165  *   while spanBack() will yield boundaries of { 0, 1, 3 }.
166  *
167  * Note: If it is important to get the same boundaries whether iterating forward
168  * or backward through a string, then either only span() should be used and
169  * the boundaries cached for backward operation, or an ICU BreakIterator
170  * could be used.
171  *
172  * Note: Unpaired surrogates are treated like surrogate code points.
173  * Similarly, set strings match only on code point boundaries,
174  * never in the middle of a surrogate pair.
175  * Illegal UTF-8 sequences are treated like U+FFFD.
176  * When processing UTF-8 strings, malformed set strings
177  * (strings with unpaired surrogates which cannot be converted to UTF-8)
178  * are ignored.
179  *
180  * @stable ICU 3.8
181  */
182 typedef enum USetSpanCondition {
183     /**
184      * Continues a span() while there is no set element at the current position.
185      * Increments by one code point at a time.
186      * Stops before the first set element (character or string).
187      * (For code points only, this is like while contains(current)==false).
188      *
189      * When span() returns, the substring between where it started and the position
190      * it returned consists only of characters that are not in the set,
191      * and none of its strings overlap with the span.
192      *
193      * @stable ICU 3.8
194      */
195     USET_SPAN_NOT_CONTAINED = 0,
196     /**
197      * Spans the longest substring that is a concatenation of set elements (characters or strings).
198      * (For characters only, this is like while contains(current)==true).
199      *
200      * When span() returns, the substring between where it started and the position
201      * it returned consists only of set elements (characters or strings) that are in the set.
202      *
203      * If a set contains strings, then the span will be the longest substring for which there
204      * exists at least one non-overlapping concatenation of set elements (characters or strings).
205      * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
206      * (Java/ICU/Perl regex stops at the first match of an OR.)
207      *
208      * @stable ICU 3.8
209      */
210     USET_SPAN_CONTAINED = 1,
211     /**
212      * Continues a span() while there is a set element at the current position.
213      * Increments by the longest matching element at each position.
214      * (For characters only, this is like while contains(current)==true).
215      *
216      * When span() returns, the substring between where it started and the position
217      * it returned consists only of set elements (characters or strings) that are in the set.
218      *
219      * If a set only contains single characters, then this is the same
220      * as USET_SPAN_CONTAINED.
221      *
222      * If a set contains strings, then the span will be the longest substring
223      * with a match at each position with the longest single set element (character or string).
224      *
225      * Use this span condition together with other longest-match algorithms,
226      * such as ICU converters (ucnv_getUnicodeSet()).
227      *
228      * @stable ICU 3.8
229      */
230     USET_SPAN_SIMPLE = 2,
231 #ifndef U_HIDE_DEPRECATED_API
232     /**
233      * One more than the last span condition.
234      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
235      */
236     USET_SPAN_CONDITION_COUNT
237 #endif  // U_HIDE_DEPRECATED_API
238 } USetSpanCondition;
239 
240 enum {
241     /**
242      * Capacity of USerializedSet::staticArray.
243      * Enough for any single-code point set.
244      * Also provides padding for nice sizeof(USerializedSet).
245      * @stable ICU 2.4
246      */
247     USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
248 };
249 
250 /**
251  * A serialized form of a Unicode set.  Limited manipulations are
252  * possible directly on a serialized set.  See below.
253  * @stable ICU 2.4
254  */
255 typedef struct USerializedSet {
256     /**
257      * The serialized Unicode Set.
258      * @stable ICU 2.4
259      */
260     const uint16_t *array;
261     /**
262      * The length of the array that contains BMP characters.
263      * @stable ICU 2.4
264      */
265     int32_t bmpLength;
266     /**
267      * The total length of the array.
268      * @stable ICU 2.4
269      */
270     int32_t length;
271     /**
272      * A small buffer for the array to reduce memory allocations.
273      * @stable ICU 2.4
274      */
275     uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
276 } USerializedSet;
277 
278 /*********************************************************************
279  * USet API
280  *********************************************************************/
281 
282 /**
283  * Create an empty USet object.
284  * Equivalent to uset_open(1, 0).
285  * @return a newly created USet.  The caller must call uset_close() on
286  * it when done.
287  * @stable ICU 4.2
288  */
289 U_CAPI USet* U_EXPORT2
290 uset_openEmpty(void);
291 
292 /**
293  * Creates a USet object that contains the range of characters
294  * start..end, inclusive.  If <code>start > end</code>
295  * then an empty set is created (same as using uset_openEmpty()).
296  * @param start first character of the range, inclusive
297  * @param end last character of the range, inclusive
298  * @return a newly created USet.  The caller must call uset_close() on
299  * it when done.
300  * @stable ICU 2.4
301  */
302 U_CAPI USet* U_EXPORT2
303 uset_open(UChar32 start, UChar32 end);
304 
305 /**
306  * Creates a set from the given pattern.  See the UnicodeSet class
307  * description for the syntax of the pattern language.
308  * @param pattern a string specifying what characters are in the set
309  * @param patternLength the length of the pattern, or -1 if null
310  * terminated
311  * @param ec the error code
312  * @stable ICU 2.4
313  */
314 U_CAPI USet* U_EXPORT2
315 uset_openPattern(const UChar* pattern, int32_t patternLength,
316                  UErrorCode* ec);
317 
318 /**
319  * Creates a set from the given pattern.  See the UnicodeSet class
320  * description for the syntax of the pattern language.
321  * @param pattern a string specifying what characters are in the set
322  * @param patternLength the length of the pattern, or -1 if null
323  * terminated
324  * @param options bitmask for options to apply to the pattern.
325  * Valid options are USET_IGNORE_SPACE and
326  * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
327  * These case options are mutually exclusive.
328  * @param ec the error code
329  * @stable ICU 2.4
330  */
331 U_CAPI USet* U_EXPORT2
332 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
333                  uint32_t options,
334                  UErrorCode* ec);
335 
336 /**
337  * Disposes of the storage used by a USet object.  This function should
338  * be called exactly once for objects returned by uset_open().
339  * @param set the object to dispose of
340  * @stable ICU 2.4
341  */
342 U_CAPI void U_EXPORT2
343 uset_close(USet* set);
344 
345 #if U_SHOW_CPLUSPLUS_API
346 
347 U_NAMESPACE_BEGIN
348 
349 /**
350  * \class LocalUSetPointer
351  * "Smart pointer" class, closes a USet via uset_close().
352  * For most methods see the LocalPointerBase base class.
353  *
354  * @see LocalPointerBase
355  * @see LocalPointer
356  * @stable ICU 4.4
357  */
358 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
359 
360 U_NAMESPACE_END
361 
362 #endif
363 
364 /**
365  * Returns a copy of this object.
366  * If this set is frozen, then the clone will be frozen as well.
367  * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
368  * @param set the original set
369  * @return the newly allocated copy of the set
370  * @see uset_cloneAsThawed
371  * @stable ICU 3.8
372  */
373 U_CAPI USet * U_EXPORT2
374 uset_clone(const USet *set);
375 
376 /**
377  * Determines whether the set has been frozen (made immutable) or not.
378  * See the ICU4J Freezable interface for details.
379  * @param set the set
380  * @return true/false for whether the set has been frozen
381  * @see uset_freeze
382  * @see uset_cloneAsThawed
383  * @stable ICU 3.8
384  */
385 U_CAPI UBool U_EXPORT2
386 uset_isFrozen(const USet *set);
387 
388 /**
389  * Freeze the set (make it immutable).
390  * Once frozen, it cannot be unfrozen and is therefore thread-safe
391  * until it is deleted.
392  * See the ICU4J Freezable interface for details.
393  * Freezing the set may also make some operations faster, for example
394  * uset_contains() and uset_span().
395  * A frozen set will not be modified. (It remains frozen.)
396  * @param set the set
397  * @return the same set, now frozen
398  * @see uset_isFrozen
399  * @see uset_cloneAsThawed
400  * @stable ICU 3.8
401  */
402 U_CAPI void U_EXPORT2
403 uset_freeze(USet *set);
404 
405 /**
406  * Clone the set and make the clone mutable.
407  * See the ICU4J Freezable interface for details.
408  * @param set the set
409  * @return the mutable clone
410  * @see uset_freeze
411  * @see uset_isFrozen
412  * @see uset_clone
413  * @stable ICU 3.8
414  */
415 U_CAPI USet * U_EXPORT2
416 uset_cloneAsThawed(const USet *set);
417 
418 /**
419  * Causes the USet object to represent the range <code>start - end</code>.
420  * If <code>start > end</code> then this USet is set to an empty range.
421  * A frozen set will not be modified.
422  * @param set the object to set to the given range
423  * @param start first character in the set, inclusive
424  * @param end last character in the set, inclusive
425  * @stable ICU 3.2
426  */
427 U_CAPI void U_EXPORT2
428 uset_set(USet* set,
429          UChar32 start, UChar32 end);
430 
431 /**
432  * Modifies the set to represent the set specified by the given
433  * pattern. See the UnicodeSet class description for the syntax of
434  * the pattern language. See also the User Guide chapter about UnicodeSet.
435  * <em>Empties the set passed before applying the pattern.</em>
436  * A frozen set will not be modified.
437  * @param set               The set to which the pattern is to be applied.
438  * @param pattern           A pointer to UChar string specifying what characters are in the set.
439  *                          The character at pattern[0] must be a '['.
440  * @param patternLength     The length of the UChar string. -1 if NUL terminated.
441  * @param options           A bitmask for options to apply to the pattern.
442  *                          Valid options are USET_IGNORE_SPACE and
443  *                          at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS,
444  *                          USET_SIMPLE_CASE_INSENSITIVE.
445  *                          These case options are mutually exclusive.
446  * @param status            Returns an error if the pattern cannot be parsed.
447  * @return                  Upon successful parse, the value is either
448  *                          the index of the character after the closing ']'
449  *                          of the parsed pattern.
450  *                          If the status code indicates failure, then the return value
451  *                          is the index of the error in the source.
452  *
453  * @stable ICU 2.8
454  */
455 U_CAPI int32_t U_EXPORT2
456 uset_applyPattern(USet *set,
457                   const UChar *pattern, int32_t patternLength,
458                   uint32_t options,
459                   UErrorCode *status);
460 
461 /**
462  * Modifies the set to contain those code points which have the given value
463  * for the given binary or enumerated property, as returned by
464  * u_getIntPropertyValue.  Prior contents of this set are lost.
465  * A frozen set will not be modified.
466  *
467  * @param set the object to contain the code points defined by the property
468  *
469  * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
470  * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
471  * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
472  *
473  * @param value a value in the range u_getIntPropertyMinValue(prop)..
474  * u_getIntPropertyMaxValue(prop), with one exception.  If prop is
475  * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
476  * rather a mask value produced by U_GET_GC_MASK().  This allows grouped
477  * categories such as [:L:] to be represented.
478  *
479  * @param ec error code input/output parameter
480  *
481  * @stable ICU 3.2
482  */
483 U_CAPI void U_EXPORT2
484 uset_applyIntPropertyValue(USet* set,
485                            UProperty prop, int32_t value, UErrorCode* ec);
486 
487 /**
488  * Modifies the set to contain those code points which have the
489  * given value for the given property.  Prior contents of this
490  * set are lost.
491  * A frozen set will not be modified.
492  *
493  * @param set the object to contain the code points defined by the given
494  * property and value alias
495  *
496  * @param prop a string specifying a property alias, either short or long.
497  * The name is matched loosely.  See PropertyAliases.txt for names and a
498  * description of loose matching.  If the value string is empty, then this
499  * string is interpreted as either a General_Category value alias, a Script
500  * value alias, a binary property alias, or a special ID.  Special IDs are
501  * matched loosely and correspond to the following sets:
502  *
503  * "ANY" = [\\u0000-\\U0010FFFF],
504  * "ASCII" = [\\u0000-\\u007F],
505  * "Assigned" = [:^Cn:].
506  *
507  * @param propLength the length of the prop, or -1 if NULL
508  *
509  * @param value a string specifying a value alias, either short or long.
510  * The name is matched loosely.  See PropertyValueAliases.txt for names
511  * and a description of loose matching.  In addition to aliases listed,
512  * numeric values and canonical combining classes may be expressed
513  * numerically, e.g., ("nv", "0.5") or ("ccc", "220").  The value string
514  * may also be empty.
515  *
516  * @param valueLength the length of the value, or -1 if NULL
517  *
518  * @param ec error code input/output parameter
519  *
520  * @stable ICU 3.2
521  */
522 U_CAPI void U_EXPORT2
523 uset_applyPropertyAlias(USet* set,
524                         const UChar *prop, int32_t propLength,
525                         const UChar *value, int32_t valueLength,
526                         UErrorCode* ec);
527 
528 /**
529  * Return true if the given position, in the given pattern, appears
530  * to be the start of a UnicodeSet pattern.
531  *
532  * @param pattern a string specifying the pattern
533  * @param patternLength the length of the pattern, or -1 if NULL
534  * @param pos the given position
535  * @stable ICU 3.2
536  */
537 U_CAPI UBool U_EXPORT2
538 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
539                       int32_t pos);
540 
541 /**
542  * Returns a string representation of this set.  If the result of
543  * calling this function is passed to a uset_openPattern(), it
544  * will produce another set that is equal to this one.
545  * @param set the set
546  * @param result the string to receive the rules, may be NULL
547  * @param resultCapacity the capacity of result, may be 0 if result is NULL
548  * @param escapeUnprintable if true then convert unprintable
549  * character to their hex escape representations, \\uxxxx or
550  * \\Uxxxxxxxx.  Unprintable characters are those other than
551  * U+000A, U+0020..U+007E.
552  * @param ec error code.
553  * @return length of string, possibly larger than resultCapacity
554  * @stable ICU 2.4
555  */
556 U_CAPI int32_t U_EXPORT2
557 uset_toPattern(const USet* set,
558                UChar* result, int32_t resultCapacity,
559                UBool escapeUnprintable,
560                UErrorCode* ec);
561 
562 /**
563  * Adds the given character to the given USet.  After this call,
564  * uset_contains(set, c) will return true.
565  * A frozen set will not be modified.
566  * @param set the object to which to add the character
567  * @param c the character to add
568  * @stable ICU 2.4
569  */
570 U_CAPI void U_EXPORT2
571 uset_add(USet* set, UChar32 c);
572 
573 /**
574  * Adds all of the elements in the specified set to this set if
575  * they're not already present.  This operation effectively
576  * modifies this set so that its value is the <i>union</i> of the two
577  * sets.  The behavior of this operation is unspecified if the specified
578  * collection is modified while the operation is in progress.
579  * A frozen set will not be modified.
580  *
581  * @param set the object to which to add the set
582  * @param additionalSet the source set whose elements are to be added to this set.
583  * @stable ICU 2.6
584  */
585 U_CAPI void U_EXPORT2
586 uset_addAll(USet* set, const USet *additionalSet);
587 
588 /**
589  * Adds the given range of characters to the given USet.  After this call,
590  * uset_contains(set, start, end) will return true.
591  * A frozen set will not be modified.
592  * @param set the object to which to add the character
593  * @param start the first character of the range to add, inclusive
594  * @param end the last character of the range to add, inclusive
595  * @stable ICU 2.2
596  */
597 U_CAPI void U_EXPORT2
598 uset_addRange(USet* set, UChar32 start, UChar32 end);
599 
600 /**
601  * Adds the given string to the given USet.  After this call,
602  * uset_containsString(set, str, strLen) will return true.
603  * A frozen set will not be modified.
604  * @param set the object to which to add the character
605  * @param str the string to add
606  * @param strLen the length of the string or -1 if null terminated.
607  * @stable ICU 2.4
608  */
609 U_CAPI void U_EXPORT2
610 uset_addString(USet* set, const UChar* str, int32_t strLen);
611 
612 /**
613  * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
614  * If this set already contains any particular character, it has no effect on that character.
615  * A frozen set will not be modified.
616  * @param set the object to which to add the character
617  * @param str the source string
618  * @param strLen the length of the string or -1 if null terminated.
619  * @stable ICU 3.4
620  */
621 U_CAPI void U_EXPORT2
622 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
623 
624 /**
625  * Removes the given character from the given USet.  After this call,
626  * uset_contains(set, c) will return false.
627  * A frozen set will not be modified.
628  * @param set the object from which to remove the character
629  * @param c the character to remove
630  * @stable ICU 2.4
631  */
632 U_CAPI void U_EXPORT2
633 uset_remove(USet* set, UChar32 c);
634 
635 /**
636  * Removes the given range of characters from the given USet.  After this call,
637  * uset_contains(set, start, end) will return false.
638  * A frozen set will not be modified.
639  * @param set the object to which to add the character
640  * @param start the first character of the range to remove, inclusive
641  * @param end the last character of the range to remove, inclusive
642  * @stable ICU 2.2
643  */
644 U_CAPI void U_EXPORT2
645 uset_removeRange(USet* set, UChar32 start, UChar32 end);
646 
647 /**
648  * Removes the given string to the given USet.  After this call,
649  * uset_containsString(set, str, strLen) will return false.
650  * A frozen set will not be modified.
651  * @param set the object to which to add the character
652  * @param str the string to remove
653  * @param strLen the length of the string or -1 if null terminated.
654  * @stable ICU 2.4
655  */
656 U_CAPI void U_EXPORT2
657 uset_removeString(USet* set, const UChar* str, int32_t strLen);
658 
659 /**
660  * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
661  * A frozen set will not be modified.
662  *
663  * @param set the object to be modified
664  * @param str the string
665  * @param length the length of the string, or -1 if NUL-terminated
666  * @stable ICU 69
667  */
668 U_CAPI void U_EXPORT2
669 uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
670 
671 /**
672  * Removes from this set all of its elements that are contained in the
673  * specified set.  This operation effectively modifies this
674  * set so that its value is the <i>asymmetric set difference</i> of
675  * the two sets.
676  * A frozen set will not be modified.
677  * @param set the object from which the elements are to be removed
678  * @param removeSet the object that defines which elements will be
679  * removed from this set
680  * @stable ICU 3.2
681  */
682 U_CAPI void U_EXPORT2
683 uset_removeAll(USet* set, const USet* removeSet);
684 
685 /**
686  * Retain only the elements in this set that are contained in the
687  * specified range.  If <code>start > end</code> then an empty range is
688  * retained, leaving the set empty.  This is equivalent to
689  * a boolean logic AND, or a set INTERSECTION.
690  * A frozen set will not be modified.
691  *
692  * @param set the object for which to retain only the specified range
693  * @param start first character, inclusive, of range
694  * @param end last character, inclusive, of range
695  * @stable ICU 3.2
696  */
697 U_CAPI void U_EXPORT2
698 uset_retain(USet* set, UChar32 start, UChar32 end);
699 
700 /**
701  * Retains only the specified string from this set if it is present.
702  * Upon return this set will be empty if it did not contain s, or
703  * will only contain s if it did contain s.
704  * A frozen set will not be modified.
705  *
706  * @param set the object to be modified
707  * @param str the string
708  * @param length the length of the string, or -1 if NUL-terminated
709  * @stable ICU 69
710  */
711 U_CAPI void U_EXPORT2
712 uset_retainString(USet *set, const UChar *str, int32_t length);
713 
714 /**
715  * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
716  * A frozen set will not be modified.
717  *
718  * @param set the object to be modified
719  * @param str the string
720  * @param length the length of the string, or -1 if NUL-terminated
721  * @stable ICU 69
722  */
723 U_CAPI void U_EXPORT2
724 uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
725 
726 /**
727  * Retains only the elements in this set that are contained in the
728  * specified set.  In other words, removes from this set all of
729  * its elements that are not contained in the specified set.  This
730  * operation effectively modifies this set so that its value is
731  * the <i>intersection</i> of the two sets.
732  * A frozen set will not be modified.
733  *
734  * @param set the object on which to perform the retain
735  * @param retain set that defines which elements this set will retain
736  * @stable ICU 3.2
737  */
738 U_CAPI void U_EXPORT2
739 uset_retainAll(USet* set, const USet* retain);
740 
741 /**
742  * Reallocate this objects internal structures to take up the least
743  * possible space, without changing this object's value.
744  * A frozen set will not be modified.
745  *
746  * @param set the object on which to perform the compact
747  * @stable ICU 3.2
748  */
749 U_CAPI void U_EXPORT2
750 uset_compact(USet* set);
751 
752 /**
753  * This is equivalent to
754  * <code>uset_complementRange(set, 0, 0x10FFFF)</code>.
755  *
756  * <strong>Note:</strong> This performs a symmetric difference with all code points
757  * <em>and thus retains all multicharacter strings</em>.
758  * In order to achieve a “code point complement” (all code points minus this set),
759  * the easiest is to <code>uset_complement(set); uset_removeAllStrings(set);</code>.
760  *
761  * A frozen set will not be modified.
762  * @param set the set
763  * @stable ICU 2.4
764  */
765 U_CAPI void U_EXPORT2
766 uset_complement(USet* set);
767 
768 /**
769  * Complements the specified range in this set.  Any character in
770  * the range will be removed if it is in this set, or will be
771  * added if it is not in this set.  If <code>start > end</code>
772  * then an empty range is complemented, leaving the set unchanged.
773  * This is equivalent to a boolean logic XOR.
774  * A frozen set will not be modified.
775  *
776  * @param set the object to be modified
777  * @param start first character, inclusive, of range
778  * @param end last character, inclusive, of range
779  * @stable ICU 69
780  */
781 U_CAPI void U_EXPORT2
782 uset_complementRange(USet *set, UChar32 start, UChar32 end);
783 
784 /**
785  * Complements the specified string in this set.
786  * The string will be removed if it is in this set, or will be added if it is not in this set.
787  * A frozen set will not be modified.
788  *
789  * @param set the object to be modified
790  * @param str the string
791  * @param length the length of the string, or -1 if NUL-terminated
792  * @stable ICU 69
793  */
794 U_CAPI void U_EXPORT2
795 uset_complementString(USet *set, const UChar *str, int32_t length);
796 
797 /**
798  * Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
799  * A frozen set will not be modified.
800  *
801  * @param set the object to be modified
802  * @param str the string
803  * @param length the length of the string, or -1 if NUL-terminated
804  * @stable ICU 69
805  */
806 U_CAPI void U_EXPORT2
807 uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
808 
809 /**
810  * Complements in this set all elements contained in the specified
811  * set.  Any character in the other set will be removed if it is
812  * in this set, or will be added if it is not in this set.
813  * A frozen set will not be modified.
814  *
815  * @param set the set with which to complement
816  * @param complement set that defines which elements will be xor'ed
817  * from this set.
818  * @stable ICU 3.2
819  */
820 U_CAPI void U_EXPORT2
821 uset_complementAll(USet* set, const USet* complement);
822 
823 /**
824  * Removes all of the elements from this set.  This set will be
825  * empty after this call returns.
826  * A frozen set will not be modified.
827  * @param set the set
828  * @stable ICU 2.4
829  */
830 U_CAPI void U_EXPORT2
831 uset_clear(USet* set);
832 
833 /**
834  * Close this set over the given attribute.  For the attribute
835  * USET_CASE_INSENSITIVE, the result is to modify this set so that:
836  *
837  * 1. For each character or string 'a' in this set, all strings or
838  * characters 'b' such that foldCase(a) == foldCase(b) are added
839  * to this set.
840  *
841  * 2. For each string 'e' in the resulting set, if e !=
842  * foldCase(e), 'e' will be removed.
843  *
844  * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
845  *
846  * (Here foldCase(x) refers to the operation u_strFoldCase, and a
847  * == b denotes that the contents are the same, not pointer
848  * comparison.)
849  *
850  * A frozen set will not be modified.
851  *
852  * @param set the set
853  *
854  * @param attributes bitmask for attributes to close over.
855  * Valid options:
856  * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
857  * These case options are mutually exclusive.
858  * Unrelated options bits are ignored.
859  * @stable ICU 4.2
860  */
861 U_CAPI void U_EXPORT2
862 uset_closeOver(USet* set, int32_t attributes);
863 
864 /**
865  * Remove all strings from this set.
866  *
867  * @param set the set
868  * @stable ICU 4.2
869  */
870 U_CAPI void U_EXPORT2
871 uset_removeAllStrings(USet* set);
872 
873 /**
874  * Returns true if the given USet contains no characters and no
875  * strings.
876  * @param set the set
877  * @return true if set is empty
878  * @stable ICU 2.4
879  */
880 U_CAPI UBool U_EXPORT2
881 uset_isEmpty(const USet* set);
882 
883 /**
884  * @param set the set
885  * @return true if this set contains multi-character strings or the empty string.
886  * @stable ICU 70
887  */
888 U_CAPI UBool U_EXPORT2
889 uset_hasStrings(const USet *set);
890 
891 /**
892  * Returns true if the given USet contains the given character.
893  * This function works faster with a frozen set.
894  * @param set the set
895  * @param c The codepoint to check for within the set
896  * @return true if set contains c
897  * @stable ICU 2.4
898  */
899 U_CAPI UBool U_EXPORT2
900 uset_contains(const USet* set, UChar32 c);
901 
902 /**
903  * Returns true if the given USet contains all characters c
904  * where start <= c && c <= end.
905  * @param set the set
906  * @param start the first character of the range to test, inclusive
907  * @param end the last character of the range to test, inclusive
908  * @return true if set contains the range
909  * @stable ICU 2.2
910  */
911 U_CAPI UBool U_EXPORT2
912 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
913 
914 /**
915  * Returns true if the given USet contains the given string.
916  * @param set the set
917  * @param str the string
918  * @param strLen the length of the string or -1 if null terminated.
919  * @return true if set contains str
920  * @stable ICU 2.4
921  */
922 U_CAPI UBool U_EXPORT2
923 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
924 
925 /**
926  * Returns the index of the given character within this set, where
927  * the set is ordered by ascending code point.  If the character
928  * is not in this set, return -1.  The inverse of this method is
929  * <code>charAt()</code>.
930  * @param set the set
931  * @param c the character to obtain the index for
932  * @return an index from 0..size()-1, or -1
933  * @stable ICU 3.2
934  */
935 U_CAPI int32_t U_EXPORT2
936 uset_indexOf(const USet* set, UChar32 c);
937 
938 /**
939  * Returns the character at the given index within this set, where
940  * the set is ordered by ascending code point.  If the index is
941  * out of range for characters, returns (UChar32)-1.
942  * The inverse of this method is <code>indexOf()</code>.
943  *
944  * For iteration, this is slower than uset_getRangeCount()/uset_getItemCount()
945  * with uset_getItem(), because for each call it skips linearly over <code>index</code>
946  * characters in the ranges.
947  *
948  * @param set the set
949  * @param charIndex an index from 0..size()-1 to obtain the char for
950  * @return the character at the given index, or (UChar32)-1.
951  * @stable ICU 3.2
952  */
953 U_CAPI UChar32 U_EXPORT2
954 uset_charAt(const USet* set, int32_t charIndex);
955 
956 /**
957  * Returns the number of characters and strings contained in this set.
958  * The last (uset_getItemCount() - uset_getRangeCount()) items are strings.
959  *
960  * This is slower than uset_getRangeCount() and uset_getItemCount() because
961  * it counts the code points of all ranges.
962  *
963  * @param set the set
964  * @return a non-negative integer counting the characters and strings
965  * contained in set
966  * @stable ICU 2.4
967  * @see uset_getRangeCount
968  */
969 U_CAPI int32_t U_EXPORT2
970 uset_size(const USet* set);
971 
972 /**
973  * @param set the set
974  * @return the number of ranges in this set.
975  * @stable ICU 70
976  * @see uset_getItemCount
977  * @see uset_getItem
978  * @see uset_size
979  */
980 U_CAPI int32_t U_EXPORT2
981 uset_getRangeCount(const USet *set);
982 
983 /**
984  * Returns the number of items in this set.  An item is either a range
985  * of characters or a single multicharacter string.
986  * @param set the set
987  * @return a non-negative integer counting the character ranges
988  * and/or strings contained in set
989  * @stable ICU 2.4
990  */
991 U_CAPI int32_t U_EXPORT2
992 uset_getItemCount(const USet* set);
993 
994 /**
995  * Returns an item of this set.  An item is either a range of
996  * characters or a single multicharacter string (which can be the empty string).
997  *
998  * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0,
999  * and the range is <code>*start</code>..<code>*end</code>.
1000  *
1001  * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then
1002  * this function copies the string into <code>str[strCapacity]</code> and
1003  * returns the length of the string (0 for the empty string).
1004  *
1005  * If <code>itemIndex</code> is out of range, then this function returns -1.
1006  *
1007  * Note that 0 is returned for each range as well as for the empty string.
1008  *
1009  * @param set the set
1010  * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1
1011  * @param start pointer to variable to receive first character in range, inclusive;
1012  *              can be NULL for a string item
1013  * @param end pointer to variable to receive last character in range, inclusive;
1014  *            can be NULL for a string item
1015  * @param str buffer to receive the string, may be NULL
1016  * @param strCapacity capacity of str, or 0 if str is NULL
1017  * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range
1018  * @return the length of the string (0 or >= 2), or 0 if the item is a range,
1019  *         or -1 if the itemIndex is out of range
1020  * @stable ICU 2.4
1021  */
1022 U_CAPI int32_t U_EXPORT2
1023 uset_getItem(const USet* set, int32_t itemIndex,
1024              UChar32* start, UChar32* end,
1025              UChar* str, int32_t strCapacity,
1026              UErrorCode* ec);
1027 
1028 /**
1029  * Returns true if set1 contains all the characters and strings
1030  * of set2. It answers the question, 'Is set1 a superset of set2?'
1031  * @param set1 set to be checked for containment
1032  * @param set2 set to be checked for containment
1033  * @return true if the test condition is met
1034  * @stable ICU 3.2
1035  */
1036 U_CAPI UBool U_EXPORT2
1037 uset_containsAll(const USet* set1, const USet* set2);
1038 
1039 /**
1040  * Returns true if this set contains all the characters
1041  * of the given string. This is does not check containment of grapheme
1042  * clusters, like uset_containsString.
1043  * @param set set of characters to be checked for containment
1044  * @param str string containing codepoints to be checked for containment
1045  * @param strLen the length of the string or -1 if null terminated.
1046  * @return true if the test condition is met
1047  * @stable ICU 3.4
1048  */
1049 U_CAPI UBool U_EXPORT2
1050 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1051 
1052 /**
1053  * Returns true if set1 contains none of the characters and strings
1054  * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
1055  * @param set1 set to be checked for containment
1056  * @param set2 set to be checked for containment
1057  * @return true if the test condition is met
1058  * @stable ICU 3.2
1059  */
1060 U_CAPI UBool U_EXPORT2
1061 uset_containsNone(const USet* set1, const USet* set2);
1062 
1063 /**
1064  * Returns true if set1 contains some of the characters and strings
1065  * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
1066  * @param set1 set to be checked for containment
1067  * @param set2 set to be checked for containment
1068  * @return true if the test condition is met
1069  * @stable ICU 3.2
1070  */
1071 U_CAPI UBool U_EXPORT2
1072 uset_containsSome(const USet* set1, const USet* set2);
1073 
1074 /**
1075  * Returns the length of the initial substring of the input string which
1076  * consists only of characters and strings that are contained in this set
1077  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1078  * or only of characters and strings that are not contained
1079  * in this set (USET_SPAN_NOT_CONTAINED).
1080  * See USetSpanCondition for details.
1081  * Similar to the strspn() C library function.
1082  * Unpaired surrogates are treated according to contains() of their surrogate code points.
1083  * This function works faster with a frozen set and with a non-negative string length argument.
1084  * @param set the set
1085  * @param s start of the string
1086  * @param length of the string; can be -1 for NUL-terminated
1087  * @param spanCondition specifies the containment condition
1088  * @return the length of the initial substring according to the spanCondition;
1089  *         0 if the start of the string does not fit the spanCondition
1090  * @stable ICU 3.8
1091  * @see USetSpanCondition
1092  */
1093 U_CAPI int32_t U_EXPORT2
1094 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1095 
1096 /**
1097  * Returns the start of the trailing substring of the input string which
1098  * consists only of characters and strings that are contained in this set
1099  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1100  * or only of characters and strings that are not contained
1101  * in this set (USET_SPAN_NOT_CONTAINED).
1102  * See USetSpanCondition for details.
1103  * Unpaired surrogates are treated according to contains() of their surrogate code points.
1104  * This function works faster with a frozen set and with a non-negative string length argument.
1105  * @param set the set
1106  * @param s start of the string
1107  * @param length of the string; can be -1 for NUL-terminated
1108  * @param spanCondition specifies the containment condition
1109  * @return the start of the trailing substring according to the spanCondition;
1110  *         the string length if the end of the string does not fit the spanCondition
1111  * @stable ICU 3.8
1112  * @see USetSpanCondition
1113  */
1114 U_CAPI int32_t U_EXPORT2
1115 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1116 
1117 /**
1118  * Returns the length of the initial substring of the input string which
1119  * consists only of characters and strings that are contained in this set
1120  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1121  * or only of characters and strings that are not contained
1122  * in this set (USET_SPAN_NOT_CONTAINED).
1123  * See USetSpanCondition for details.
1124  * Similar to the strspn() C library function.
1125  * Malformed byte sequences are treated according to contains(0xfffd).
1126  * This function works faster with a frozen set and with a non-negative string length argument.
1127  * @param set the set
1128  * @param s start of the string (UTF-8)
1129  * @param length of the string; can be -1 for NUL-terminated
1130  * @param spanCondition specifies the containment condition
1131  * @return the length of the initial substring according to the spanCondition;
1132  *         0 if the start of the string does not fit the spanCondition
1133  * @stable ICU 3.8
1134  * @see USetSpanCondition
1135  */
1136 U_CAPI int32_t U_EXPORT2
1137 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1138 
1139 /**
1140  * Returns the start of the trailing substring of the input string which
1141  * consists only of characters and strings that are contained in this set
1142  * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
1143  * or only of characters and strings that are not contained
1144  * in this set (USET_SPAN_NOT_CONTAINED).
1145  * See USetSpanCondition for details.
1146  * Malformed byte sequences are treated according to contains(0xfffd).
1147  * This function works faster with a frozen set and with a non-negative string length argument.
1148  * @param set the set
1149  * @param s start of the string (UTF-8)
1150  * @param length of the string; can be -1 for NUL-terminated
1151  * @param spanCondition specifies the containment condition
1152  * @return the start of the trailing substring according to the spanCondition;
1153  *         the string length if the end of the string does not fit the spanCondition
1154  * @stable ICU 3.8
1155  * @see USetSpanCondition
1156  */
1157 U_CAPI int32_t U_EXPORT2
1158 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1159 
1160 /**
1161  * Returns true if set1 contains all of the characters and strings
1162  * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
1163  * @param set1 set to be checked for containment
1164  * @param set2 set to be checked for containment
1165  * @return true if the test condition is met
1166  * @stable ICU 3.2
1167  */
1168 U_CAPI UBool U_EXPORT2
1169 uset_equals(const USet* set1, const USet* set2);
1170 
1171 /*********************************************************************
1172  * Serialized set API
1173  *********************************************************************/
1174 
1175 /**
1176  * Serializes this set into an array of 16-bit integers.  Serialization
1177  * (currently) only records the characters in the set; multicharacter
1178  * strings are ignored.
1179  *
1180  * The array
1181  * has following format (each line is one 16-bit integer):
1182  *
1183  *  length     = (n+2*m) | (m!=0?0x8000:0)
1184  *  bmpLength  = n; present if m!=0
1185  *  bmp[0]
1186  *  bmp[1]
1187  *  ...
1188  *  bmp[n-1]
1189  *  supp-high[0]
1190  *  supp-low[0]
1191  *  supp-high[1]
1192  *  supp-low[1]
1193  *  ...
1194  *  supp-high[m-1]
1195  *  supp-low[m-1]
1196  *
1197  * The array starts with a header.  After the header are n bmp
1198  * code points, then m supplementary code points.  Either n or m
1199  * or both may be zero.  n+2*m is always <= 0x7FFF.
1200  *
1201  * If there are no supplementary characters (if m==0) then the
1202  * header is one 16-bit integer, 'length', with value n.
1203  *
1204  * If there are supplementary characters (if m!=0) then the header
1205  * is two 16-bit integers.  The first, 'length', has value
1206  * (n+2*m)|0x8000.  The second, 'bmpLength', has value n.
1207  *
1208  * After the header the code points are stored in ascending order.
1209  * Supplementary code points are stored as most significant 16
1210  * bits followed by least significant 16 bits.
1211  *
1212  * @param set the set
1213  * @param dest pointer to buffer of destCapacity 16-bit integers.
1214  * May be NULL only if destCapacity is zero.
1215  * @param destCapacity size of dest, or zero.  Must not be negative.
1216  * @param pErrorCode pointer to the error code.  Will be set to
1217  * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF.  Will be set to
1218  * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
1219  * @return the total length of the serialized format, including
1220  * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
1221  * than U_BUFFER_OVERFLOW_ERROR.
1222  * @stable ICU 2.4
1223  */
1224 U_CAPI int32_t U_EXPORT2
1225 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1226 
1227 /**
1228  * Given a serialized array, fill in the given serialized set object.
1229  * @param fillSet pointer to result
1230  * @param src pointer to start of array
1231  * @param srcLength length of array
1232  * @return true if the given array is valid, otherwise false
1233  * @stable ICU 2.4
1234  */
1235 U_CAPI UBool U_EXPORT2
1236 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1237 
1238 /**
1239  * Set the USerializedSet to contain the given character (and nothing
1240  * else).
1241  * @param fillSet pointer to result
1242  * @param c The codepoint to set
1243  * @stable ICU 2.4
1244  */
1245 U_CAPI void U_EXPORT2
1246 uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
1247 
1248 /**
1249  * Returns true if the given USerializedSet contains the given
1250  * character.
1251  * @param set the serialized set
1252  * @param c The codepoint to check for within the set
1253  * @return true if set contains c
1254  * @stable ICU 2.4
1255  */
1256 U_CAPI UBool U_EXPORT2
1257 uset_serializedContains(const USerializedSet* set, UChar32 c);
1258 
1259 /**
1260  * Returns the number of disjoint ranges of characters contained in
1261  * the given serialized set.  Ignores any strings contained in the
1262  * set.
1263  * @param set the serialized set
1264  * @return a non-negative integer counting the character ranges
1265  * contained in set
1266  * @stable ICU 2.4
1267  */
1268 U_CAPI int32_t U_EXPORT2
1269 uset_getSerializedRangeCount(const USerializedSet* set);
1270 
1271 /**
1272  * Returns a range of characters contained in the given serialized
1273  * set.
1274  * @param set the serialized set
1275  * @param rangeIndex a non-negative integer in the range 0..
1276  * uset_getSerializedRangeCount(set)-1
1277  * @param pStart pointer to variable to receive first character
1278  * in range, inclusive
1279  * @param pEnd pointer to variable to receive last character in range,
1280  * inclusive
1281  * @return true if rangeIndex is valid, otherwise false
1282  * @stable ICU 2.4
1283  */
1284 U_CAPI UBool U_EXPORT2
1285 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1286                         UChar32* pStart, UChar32* pEnd);
1287 
1288 #endif
1289