1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1996-2015, International Business Machines Corporation and others. 6*0e209d39SAndroid Build Coastguard Worker * All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ****************************************************************************** 8*0e209d39SAndroid Build Coastguard Worker */ 9*0e209d39SAndroid Build Coastguard Worker 10*0e209d39SAndroid Build Coastguard Worker #ifndef UBRK_H 11*0e209d39SAndroid Build Coastguard Worker #define UBRK_H 12*0e209d39SAndroid Build Coastguard Worker 13*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 14*0e209d39SAndroid Build Coastguard Worker #include "unicode/uloc.h" 15*0e209d39SAndroid Build Coastguard Worker #include "unicode/utext.h" 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API 18*0e209d39SAndroid Build Coastguard Worker #include "unicode/localpointer.h" 19*0e209d39SAndroid Build Coastguard Worker #endif // U_SHOW_CPLUSPLUS_API 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker /** 22*0e209d39SAndroid Build Coastguard Worker * A text-break iterator. 23*0e209d39SAndroid Build Coastguard Worker * For usage in C programs. 24*0e209d39SAndroid Build Coastguard Worker */ 25*0e209d39SAndroid Build Coastguard Worker #ifndef UBRK_TYPEDEF_UBREAK_ITERATOR 26*0e209d39SAndroid Build Coastguard Worker # define UBRK_TYPEDEF_UBREAK_ITERATOR 27*0e209d39SAndroid Build Coastguard Worker /** 28*0e209d39SAndroid Build Coastguard Worker * Opaque type representing an ICU Break iterator object. 29*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 30*0e209d39SAndroid Build Coastguard Worker */ 31*0e209d39SAndroid Build Coastguard Worker typedef struct UBreakIterator UBreakIterator; 32*0e209d39SAndroid Build Coastguard Worker #endif 33*0e209d39SAndroid Build Coastguard Worker 34*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_BREAK_ITERATION 35*0e209d39SAndroid Build Coastguard Worker 36*0e209d39SAndroid Build Coastguard Worker 37*0e209d39SAndroid Build Coastguard Worker /** 38*0e209d39SAndroid Build Coastguard Worker * @addtogroup icu4c ICU4C 39*0e209d39SAndroid Build Coastguard Worker * @{ 40*0e209d39SAndroid Build Coastguard Worker * \file 41*0e209d39SAndroid Build Coastguard Worker * \brief C API: BreakIterator 42*0e209d39SAndroid Build Coastguard Worker * 43*0e209d39SAndroid Build Coastguard Worker * <h2> BreakIterator C API </h2> 44*0e209d39SAndroid Build Coastguard Worker * 45*0e209d39SAndroid Build Coastguard Worker * The BreakIterator C API defines methods for finding the location 46*0e209d39SAndroid Build Coastguard Worker * of boundaries in text. Pointer to a UBreakIterator maintain a 47*0e209d39SAndroid Build Coastguard Worker * current position and scan over text returning the index of characters 48*0e209d39SAndroid Build Coastguard Worker * where boundaries occur. 49*0e209d39SAndroid Build Coastguard Worker * <p> 50*0e209d39SAndroid Build Coastguard Worker * Line boundary analysis determines where a text string can be broken 51*0e209d39SAndroid Build Coastguard Worker * when line-wrapping. The mechanism correctly handles punctuation and 52*0e209d39SAndroid Build Coastguard Worker * hyphenated words. 53*0e209d39SAndroid Build Coastguard Worker * <p> 54*0e209d39SAndroid Build Coastguard Worker * Note: The locale keyword "lb" can be used to modify line break 55*0e209d39SAndroid Build Coastguard Worker * behavior according to the CSS level 3 line-break options, see 56*0e209d39SAndroid Build Coastguard Worker * <http://dev.w3.org/csswg/css-text/#line-breaking>. For example: 57*0e209d39SAndroid Build Coastguard Worker * "ja@lb=strict", "zh@lb=loose". 58*0e209d39SAndroid Build Coastguard Worker * <p> 59*0e209d39SAndroid Build Coastguard Worker * Sentence boundary analysis allows selection with correct 60*0e209d39SAndroid Build Coastguard Worker * interpretation of periods within numbers and abbreviations, and 61*0e209d39SAndroid Build Coastguard Worker * trailing punctuation marks such as quotation marks and parentheses. 62*0e209d39SAndroid Build Coastguard Worker * <p> 63*0e209d39SAndroid Build Coastguard Worker * Note: The locale keyword "ss" can be used to enable use of 64*0e209d39SAndroid Build Coastguard Worker * segmentation suppression data (preventing breaks in English after 65*0e209d39SAndroid Build Coastguard Worker * abbreviations such as "Mr." or "Est.", for example), as follows: 66*0e209d39SAndroid Build Coastguard Worker * "en@ss=standard". 67*0e209d39SAndroid Build Coastguard Worker * <p> 68*0e209d39SAndroid Build Coastguard Worker * Word boundary analysis is used by search and replace functions, as 69*0e209d39SAndroid Build Coastguard Worker * well as within text editing applications that allow the user to 70*0e209d39SAndroid Build Coastguard Worker * select words with a double click. Word selection provides correct 71*0e209d39SAndroid Build Coastguard Worker * interpretation of punctuation marks within and following 72*0e209d39SAndroid Build Coastguard Worker * words. Characters that are not part of a word, such as symbols or 73*0e209d39SAndroid Build Coastguard Worker * punctuation marks, have word-breaks on both sides. 74*0e209d39SAndroid Build Coastguard Worker * <p> 75*0e209d39SAndroid Build Coastguard Worker * Character boundary analysis identifies the boundaries of 76*0e209d39SAndroid Build Coastguard Worker * "Extended Grapheme Clusters", which are groupings of codepoints 77*0e209d39SAndroid Build Coastguard Worker * that should be treated as character-like units for many text operations. 78*0e209d39SAndroid Build Coastguard Worker * Please see Unicode Standard Annex #29, Unicode Text Segmentation, 79*0e209d39SAndroid Build Coastguard Worker * http://www.unicode.org/reports/tr29/ for additional information 80*0e209d39SAndroid Build Coastguard Worker * on grapheme clusters and guidelines on their use. 81*0e209d39SAndroid Build Coastguard Worker * <p> 82*0e209d39SAndroid Build Coastguard Worker * Title boundary analysis locates all positions, 83*0e209d39SAndroid Build Coastguard Worker * typically starts of words, that should be set to Title Case 84*0e209d39SAndroid Build Coastguard Worker * when title casing the text. 85*0e209d39SAndroid Build Coastguard Worker * <p> 86*0e209d39SAndroid Build Coastguard Worker * The text boundary positions are found according to the rules 87*0e209d39SAndroid Build Coastguard Worker * described in Unicode Standard Annex #29, Text Boundaries, and 88*0e209d39SAndroid Build Coastguard Worker * Unicode Standard Annex #14, Line Breaking Properties. These 89*0e209d39SAndroid Build Coastguard Worker * are available at http://www.unicode.org/reports/tr14/ and 90*0e209d39SAndroid Build Coastguard Worker * http://www.unicode.org/reports/tr29/. 91*0e209d39SAndroid Build Coastguard Worker * <p> 92*0e209d39SAndroid Build Coastguard Worker * In addition to the plain C API defined in this header file, an 93*0e209d39SAndroid Build Coastguard Worker * object oriented C++ API with equivalent functionality is defined in the 94*0e209d39SAndroid Build Coastguard Worker * file brkiter.h. 95*0e209d39SAndroid Build Coastguard Worker * <p> 96*0e209d39SAndroid Build Coastguard Worker * Code snippets illustrating the use of the Break Iterator APIs 97*0e209d39SAndroid Build Coastguard Worker * are available in the ICU User Guide, 98*0e209d39SAndroid Build Coastguard Worker * https://unicode-org.github.io/icu/userguide/boundaryanalysis/ 99*0e209d39SAndroid Build Coastguard Worker * and in the sample program icu/source/samples/break/break.cpp 100*0e209d39SAndroid Build Coastguard Worker */ 101*0e209d39SAndroid Build Coastguard Worker 102*0e209d39SAndroid Build Coastguard Worker /** The possible types of text boundaries. \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 103*0e209d39SAndroid Build Coastguard Worker typedef enum UBreakIteratorType { 104*0e209d39SAndroid Build Coastguard Worker /** Character breaks \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 105*0e209d39SAndroid Build Coastguard Worker UBRK_CHARACTER = 0, 106*0e209d39SAndroid Build Coastguard Worker /** Word breaks \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 107*0e209d39SAndroid Build Coastguard Worker UBRK_WORD = 1, 108*0e209d39SAndroid Build Coastguard Worker /** Line breaks \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 109*0e209d39SAndroid Build Coastguard Worker UBRK_LINE = 2, 110*0e209d39SAndroid Build Coastguard Worker /** Sentence breaks \xrefitem stable "Stable" "Stable List" ICU 2.0 */ 111*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE = 3, 112*0e209d39SAndroid Build Coastguard Worker 113*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_DEPRECATED_API 114*0e209d39SAndroid Build Coastguard Worker /** 115*0e209d39SAndroid Build Coastguard Worker * Title Case breaks 116*0e209d39SAndroid Build Coastguard Worker * The iterator created using this type locates title boundaries as described for 117*0e209d39SAndroid Build Coastguard Worker * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration, 118*0e209d39SAndroid Build Coastguard Worker * please use Word Boundary iterator. 119*0e209d39SAndroid Build Coastguard Worker * 120*0e209d39SAndroid Build Coastguard Worker * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later. 121*0e209d39SAndroid Build Coastguard Worker */ 122*0e209d39SAndroid Build Coastguard Worker UBRK_TITLE = 4, 123*0e209d39SAndroid Build Coastguard Worker /** 124*0e209d39SAndroid Build Coastguard Worker * One more than the highest normal UBreakIteratorType value. 125*0e209d39SAndroid Build Coastguard Worker * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 58 The numeric value may change over time, see ICU ticket #12420. 126*0e209d39SAndroid Build Coastguard Worker */ 127*0e209d39SAndroid Build Coastguard Worker UBRK_COUNT = 5 128*0e209d39SAndroid Build Coastguard Worker #endif // U_HIDE_DEPRECATED_API 129*0e209d39SAndroid Build Coastguard Worker } UBreakIteratorType; 130*0e209d39SAndroid Build Coastguard Worker 131*0e209d39SAndroid Build Coastguard Worker /** Value indicating all text boundaries have been returned. 132*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 133*0e209d39SAndroid Build Coastguard Worker */ 134*0e209d39SAndroid Build Coastguard Worker #define UBRK_DONE ((int32_t) -1) 135*0e209d39SAndroid Build Coastguard Worker 136*0e209d39SAndroid Build Coastguard Worker 137*0e209d39SAndroid Build Coastguard Worker /** 138*0e209d39SAndroid Build Coastguard Worker * Enum constants for the word break tags returned by 139*0e209d39SAndroid Build Coastguard Worker * getRuleStatus(). A range of values is defined for each category of 140*0e209d39SAndroid Build Coastguard Worker * word, to allow for further subdivisions of a category in future releases. 141*0e209d39SAndroid Build Coastguard Worker * Applications should check for tag values falling within the range, rather 142*0e209d39SAndroid Build Coastguard Worker * than for single individual values. 143*0e209d39SAndroid Build Coastguard Worker * 144*0e209d39SAndroid Build Coastguard Worker * The numeric values of all of these constants are stable (will not change). 145*0e209d39SAndroid Build Coastguard Worker * 146*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.2 147*0e209d39SAndroid Build Coastguard Worker */ 148*0e209d39SAndroid Build Coastguard Worker typedef enum UWordBreak { 149*0e209d39SAndroid Build Coastguard Worker /** Tag value for "words" that do not fit into any of other categories. 150*0e209d39SAndroid Build Coastguard Worker * Includes spaces and most punctuation. */ 151*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_NONE = 0, 152*0e209d39SAndroid Build Coastguard Worker /** Upper bound for tags for uncategorized words. */ 153*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_NONE_LIMIT = 100, 154*0e209d39SAndroid Build Coastguard Worker /** Tag value for words that appear to be numbers, lower limit. */ 155*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_NUMBER = 100, 156*0e209d39SAndroid Build Coastguard Worker /** Tag value for words that appear to be numbers, upper limit. */ 157*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_NUMBER_LIMIT = 200, 158*0e209d39SAndroid Build Coastguard Worker /** Tag value for words that contain letters, excluding 159*0e209d39SAndroid Build Coastguard Worker * hiragana, katakana or ideographic characters, lower limit. */ 160*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_LETTER = 200, 161*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing letters, upper limit */ 162*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_LETTER_LIMIT = 300, 163*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing kana characters, lower limit */ 164*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_KANA = 300, 165*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing kana characters, upper limit */ 166*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_KANA_LIMIT = 400, 167*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing ideographic characters, lower limit */ 168*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_IDEO = 400, 169*0e209d39SAndroid Build Coastguard Worker /** Tag value for words containing ideographic characters, upper limit */ 170*0e209d39SAndroid Build Coastguard Worker UBRK_WORD_IDEO_LIMIT = 500 171*0e209d39SAndroid Build Coastguard Worker } UWordBreak; 172*0e209d39SAndroid Build Coastguard Worker 173*0e209d39SAndroid Build Coastguard Worker /** 174*0e209d39SAndroid Build Coastguard Worker * Enum constants for the line break tags returned by getRuleStatus(). 175*0e209d39SAndroid Build Coastguard Worker * A range of values is defined for each category of 176*0e209d39SAndroid Build Coastguard Worker * word, to allow for further subdivisions of a category in future releases. 177*0e209d39SAndroid Build Coastguard Worker * Applications should check for tag values falling within the range, rather 178*0e209d39SAndroid Build Coastguard Worker * than for single individual values. 179*0e209d39SAndroid Build Coastguard Worker * 180*0e209d39SAndroid Build Coastguard Worker * The numeric values of all of these constants are stable (will not change). 181*0e209d39SAndroid Build Coastguard Worker * 182*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.8 183*0e209d39SAndroid Build Coastguard Worker */ 184*0e209d39SAndroid Build Coastguard Worker typedef enum ULineBreakTag { 185*0e209d39SAndroid Build Coastguard Worker /** Tag value for soft line breaks, positions at which a line break 186*0e209d39SAndroid Build Coastguard Worker * is acceptable but not required */ 187*0e209d39SAndroid Build Coastguard Worker UBRK_LINE_SOFT = 0, 188*0e209d39SAndroid Build Coastguard Worker /** Upper bound for soft line breaks. */ 189*0e209d39SAndroid Build Coastguard Worker UBRK_LINE_SOFT_LIMIT = 100, 190*0e209d39SAndroid Build Coastguard Worker /** Tag value for a hard, or mandatory line break */ 191*0e209d39SAndroid Build Coastguard Worker UBRK_LINE_HARD = 100, 192*0e209d39SAndroid Build Coastguard Worker /** Upper bound for hard line breaks. */ 193*0e209d39SAndroid Build Coastguard Worker UBRK_LINE_HARD_LIMIT = 200 194*0e209d39SAndroid Build Coastguard Worker } ULineBreakTag; 195*0e209d39SAndroid Build Coastguard Worker 196*0e209d39SAndroid Build Coastguard Worker 197*0e209d39SAndroid Build Coastguard Worker 198*0e209d39SAndroid Build Coastguard Worker /** 199*0e209d39SAndroid Build Coastguard Worker * Enum constants for the sentence break tags returned by getRuleStatus(). 200*0e209d39SAndroid Build Coastguard Worker * A range of values is defined for each category of 201*0e209d39SAndroid Build Coastguard Worker * sentence, to allow for further subdivisions of a category in future releases. 202*0e209d39SAndroid Build Coastguard Worker * Applications should check for tag values falling within the range, rather 203*0e209d39SAndroid Build Coastguard Worker * than for single individual values. 204*0e209d39SAndroid Build Coastguard Worker * 205*0e209d39SAndroid Build Coastguard Worker * The numeric values of all of these constants are stable (will not change). 206*0e209d39SAndroid Build Coastguard Worker * 207*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.8 208*0e209d39SAndroid Build Coastguard Worker */ 209*0e209d39SAndroid Build Coastguard Worker typedef enum USentenceBreakTag { 210*0e209d39SAndroid Build Coastguard Worker /** Tag value for for sentences ending with a sentence terminator 211*0e209d39SAndroid Build Coastguard Worker * ('.', '?', '!', etc.) character, possibly followed by a 212*0e209d39SAndroid Build Coastguard Worker * hard separator (CR, LF, PS, etc.) 213*0e209d39SAndroid Build Coastguard Worker */ 214*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE_TERM = 0, 215*0e209d39SAndroid Build Coastguard Worker /** Upper bound for tags for sentences ended by sentence terminators. */ 216*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE_TERM_LIMIT = 100, 217*0e209d39SAndroid Build Coastguard Worker /** Tag value for for sentences that do not contain an ending 218*0e209d39SAndroid Build Coastguard Worker * sentence terminator ('.', '?', '!', etc.) character, but 219*0e209d39SAndroid Build Coastguard Worker * are ended only by a hard separator (CR, LF, PS, etc.) or end of input. 220*0e209d39SAndroid Build Coastguard Worker */ 221*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE_SEP = 100, 222*0e209d39SAndroid Build Coastguard Worker /** Upper bound for tags for sentences ended by a separator. */ 223*0e209d39SAndroid Build Coastguard Worker UBRK_SENTENCE_SEP_LIMIT = 200 224*0e209d39SAndroid Build Coastguard Worker /** Tag value for a hard, or mandatory line break */ 225*0e209d39SAndroid Build Coastguard Worker } USentenceBreakTag; 226*0e209d39SAndroid Build Coastguard Worker 227*0e209d39SAndroid Build Coastguard Worker 228*0e209d39SAndroid Build Coastguard Worker /** 229*0e209d39SAndroid Build Coastguard Worker * Open a new UBreakIterator for locating text boundaries for a specified locale. 230*0e209d39SAndroid Build Coastguard Worker * A UBreakIterator may be used for detecting character, line, word, 231*0e209d39SAndroid Build Coastguard Worker * and sentence breaks in text. 232*0e209d39SAndroid Build Coastguard Worker * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD, 233*0e209d39SAndroid Build Coastguard Worker * UBRK_LINE, UBRK_SENTENCE 234*0e209d39SAndroid Build Coastguard Worker * @param locale The locale specifying the text-breaking conventions. Note that 235*0e209d39SAndroid Build Coastguard Worker * locale keys such as "lb" and "ss" may be used to modify text break behavior, 236*0e209d39SAndroid Build Coastguard Worker * see general discussion of BreakIterator C API. 237*0e209d39SAndroid Build Coastguard Worker * @param text The text to be iterated over. May be null, in which case ubrk_setText() is 238*0e209d39SAndroid Build Coastguard Worker * used to specify the text to be iterated. 239*0e209d39SAndroid Build Coastguard Worker * @param textLength The number of characters in text, or -1 if null-terminated. 240*0e209d39SAndroid Build Coastguard Worker * @param status A UErrorCode to receive any errors. 241*0e209d39SAndroid Build Coastguard Worker * @return A UBreakIterator for the specified locale. 242*0e209d39SAndroid Build Coastguard Worker * @see ubrk_openRules 243*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 244*0e209d39SAndroid Build Coastguard Worker */ 245*0e209d39SAndroid Build Coastguard Worker U_CAPI UBreakIterator* U_EXPORT2 246*0e209d39SAndroid Build Coastguard Worker ubrk_open(UBreakIteratorType type, 247*0e209d39SAndroid Build Coastguard Worker const char *locale, 248*0e209d39SAndroid Build Coastguard Worker const UChar *text, 249*0e209d39SAndroid Build Coastguard Worker int32_t textLength, 250*0e209d39SAndroid Build Coastguard Worker UErrorCode *status) __INTRODUCED_IN(31); 251*0e209d39SAndroid Build Coastguard Worker 252*0e209d39SAndroid Build Coastguard Worker 253*0e209d39SAndroid Build Coastguard Worker 254*0e209d39SAndroid Build Coastguard Worker 255*0e209d39SAndroid Build Coastguard Worker 256*0e209d39SAndroid Build Coastguard Worker 257*0e209d39SAndroid Build Coastguard Worker 258*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_DEPRECATED_API 259*0e209d39SAndroid Build Coastguard Worker 260*0e209d39SAndroid Build Coastguard Worker 261*0e209d39SAndroid Build Coastguard Worker 262*0e209d39SAndroid Build Coastguard Worker #endif /* U_HIDE_DEPRECATED_API */ 263*0e209d39SAndroid Build Coastguard Worker 264*0e209d39SAndroid Build Coastguard Worker /** 265*0e209d39SAndroid Build Coastguard Worker * Thread safe cloning operation. 266*0e209d39SAndroid Build Coastguard Worker * @param bi iterator to be cloned 267*0e209d39SAndroid Build Coastguard Worker * @param status to indicate whether the operation went on smoothly or there were errors 268*0e209d39SAndroid Build Coastguard Worker * @return pointer to the new clone 269*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 69 270*0e209d39SAndroid Build Coastguard Worker */ 271*0e209d39SAndroid Build Coastguard Worker U_CAPI UBreakIterator * U_EXPORT2 272*0e209d39SAndroid Build Coastguard Worker ubrk_clone(const UBreakIterator *bi, 273*0e209d39SAndroid Build Coastguard Worker UErrorCode *status) __INTRODUCED_IN(31); 274*0e209d39SAndroid Build Coastguard Worker 275*0e209d39SAndroid Build Coastguard Worker 276*0e209d39SAndroid Build Coastguard Worker 277*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_DEPRECATED_API 278*0e209d39SAndroid Build Coastguard Worker 279*0e209d39SAndroid Build Coastguard Worker /** 280*0e209d39SAndroid Build Coastguard Worker * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone(). 281*0e209d39SAndroid Build Coastguard Worker * \xrefitem deprecated "Deprecated" "Deprecated List" ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer. 282*0e209d39SAndroid Build Coastguard Worker */ 283*0e209d39SAndroid Build Coastguard Worker #define U_BRK_SAFECLONE_BUFFERSIZE 1 284*0e209d39SAndroid Build Coastguard Worker 285*0e209d39SAndroid Build Coastguard Worker #endif /* U_HIDE_DEPRECATED_API */ 286*0e209d39SAndroid Build Coastguard Worker 287*0e209d39SAndroid Build Coastguard Worker /** 288*0e209d39SAndroid Build Coastguard Worker * Close a UBreakIterator. 289*0e209d39SAndroid Build Coastguard Worker * Once closed, a UBreakIterator may no longer be used. 290*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to close. 291*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 292*0e209d39SAndroid Build Coastguard Worker */ 293*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 294*0e209d39SAndroid Build Coastguard Worker ubrk_close(UBreakIterator *bi) __INTRODUCED_IN(31); 295*0e209d39SAndroid Build Coastguard Worker 296*0e209d39SAndroid Build Coastguard Worker 297*0e209d39SAndroid Build Coastguard Worker 298*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API 299*0e209d39SAndroid Build Coastguard Worker 300*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 301*0e209d39SAndroid Build Coastguard Worker 302*0e209d39SAndroid Build Coastguard Worker /** 303*0e209d39SAndroid Build Coastguard Worker * \class LocalUBreakIteratorPointer 304*0e209d39SAndroid Build Coastguard Worker * "Smart pointer" class, closes a UBreakIterator via ubrk_close(). 305*0e209d39SAndroid Build Coastguard Worker * For most methods see the LocalPointerBase base class. 306*0e209d39SAndroid Build Coastguard Worker * 307*0e209d39SAndroid Build Coastguard Worker * @see LocalPointerBase 308*0e209d39SAndroid Build Coastguard Worker * @see LocalPointer 309*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 4.4 310*0e209d39SAndroid Build Coastguard Worker */ 311*0e209d39SAndroid Build Coastguard Worker U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close); 312*0e209d39SAndroid Build Coastguard Worker 313*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 314*0e209d39SAndroid Build Coastguard Worker 315*0e209d39SAndroid Build Coastguard Worker #endif 316*0e209d39SAndroid Build Coastguard Worker 317*0e209d39SAndroid Build Coastguard Worker /** 318*0e209d39SAndroid Build Coastguard Worker * Sets an existing iterator to point to a new piece of text. 319*0e209d39SAndroid Build Coastguard Worker * The break iterator retains a pointer to the supplied text. 320*0e209d39SAndroid Build Coastguard Worker * The caller must not modify or delete the text while the BreakIterator 321*0e209d39SAndroid Build Coastguard Worker * retains the reference. 322*0e209d39SAndroid Build Coastguard Worker * 323*0e209d39SAndroid Build Coastguard Worker * @param bi The iterator to use 324*0e209d39SAndroid Build Coastguard Worker * @param text The text to be set 325*0e209d39SAndroid Build Coastguard Worker * @param textLength The length of the text 326*0e209d39SAndroid Build Coastguard Worker * @param status The error code 327*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 328*0e209d39SAndroid Build Coastguard Worker */ 329*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 330*0e209d39SAndroid Build Coastguard Worker ubrk_setText(UBreakIterator* bi, 331*0e209d39SAndroid Build Coastguard Worker const UChar* text, 332*0e209d39SAndroid Build Coastguard Worker int32_t textLength, 333*0e209d39SAndroid Build Coastguard Worker UErrorCode* status) __INTRODUCED_IN(31); 334*0e209d39SAndroid Build Coastguard Worker 335*0e209d39SAndroid Build Coastguard Worker 336*0e209d39SAndroid Build Coastguard Worker 337*0e209d39SAndroid Build Coastguard Worker 338*0e209d39SAndroid Build Coastguard Worker /** 339*0e209d39SAndroid Build Coastguard Worker * Sets an existing iterator to point to a new piece of text. 340*0e209d39SAndroid Build Coastguard Worker * 341*0e209d39SAndroid Build Coastguard Worker * All index positions returned by break iterator functions are 342*0e209d39SAndroid Build Coastguard Worker * native indices from the UText. For example, when breaking UTF-8 343*0e209d39SAndroid Build Coastguard Worker * encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc. 344*0e209d39SAndroid Build Coastguard Worker * will be UTF-8 string indices, not UTF-16 positions. 345*0e209d39SAndroid Build Coastguard Worker * 346*0e209d39SAndroid Build Coastguard Worker * @param bi The iterator to use 347*0e209d39SAndroid Build Coastguard Worker * @param text The text to be set. 348*0e209d39SAndroid Build Coastguard Worker * This function makes a shallow clone of the supplied UText. This means 349*0e209d39SAndroid Build Coastguard Worker * that the caller is free to immediately close or otherwise reuse the 350*0e209d39SAndroid Build Coastguard Worker * UText that was passed as a parameter, but that the underlying text itself 351*0e209d39SAndroid Build Coastguard Worker * must not be altered while being referenced by the break iterator. 352*0e209d39SAndroid Build Coastguard Worker * @param status The error code 353*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 3.4 354*0e209d39SAndroid Build Coastguard Worker */ 355*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 356*0e209d39SAndroid Build Coastguard Worker ubrk_setUText(UBreakIterator* bi, 357*0e209d39SAndroid Build Coastguard Worker UText* text, 358*0e209d39SAndroid Build Coastguard Worker UErrorCode* status) __INTRODUCED_IN(31); 359*0e209d39SAndroid Build Coastguard Worker 360*0e209d39SAndroid Build Coastguard Worker 361*0e209d39SAndroid Build Coastguard Worker 362*0e209d39SAndroid Build Coastguard Worker 363*0e209d39SAndroid Build Coastguard Worker 364*0e209d39SAndroid Build Coastguard Worker /** 365*0e209d39SAndroid Build Coastguard Worker * Determine the most recently-returned text boundary. 366*0e209d39SAndroid Build Coastguard Worker * 367*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 368*0e209d39SAndroid Build Coastguard Worker * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous, 369*0e209d39SAndroid Build Coastguard Worker * \ref ubrk_first, or \ref ubrk_last. 370*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 371*0e209d39SAndroid Build Coastguard Worker */ 372*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 373*0e209d39SAndroid Build Coastguard Worker ubrk_current(const UBreakIterator *bi) __INTRODUCED_IN(31); 374*0e209d39SAndroid Build Coastguard Worker 375*0e209d39SAndroid Build Coastguard Worker 376*0e209d39SAndroid Build Coastguard Worker 377*0e209d39SAndroid Build Coastguard Worker /** 378*0e209d39SAndroid Build Coastguard Worker * Advance the iterator to the boundary following the current boundary. 379*0e209d39SAndroid Build Coastguard Worker * 380*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 381*0e209d39SAndroid Build Coastguard Worker * @return The character index of the next text boundary, or UBRK_DONE 382*0e209d39SAndroid Build Coastguard Worker * if all text boundaries have been returned. 383*0e209d39SAndroid Build Coastguard Worker * @see ubrk_previous 384*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 385*0e209d39SAndroid Build Coastguard Worker */ 386*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 387*0e209d39SAndroid Build Coastguard Worker ubrk_next(UBreakIterator *bi) __INTRODUCED_IN(31); 388*0e209d39SAndroid Build Coastguard Worker 389*0e209d39SAndroid Build Coastguard Worker 390*0e209d39SAndroid Build Coastguard Worker 391*0e209d39SAndroid Build Coastguard Worker /** 392*0e209d39SAndroid Build Coastguard Worker * Set the iterator position to the boundary preceding the current boundary. 393*0e209d39SAndroid Build Coastguard Worker * 394*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 395*0e209d39SAndroid Build Coastguard Worker * @return The character index of the preceding text boundary, or UBRK_DONE 396*0e209d39SAndroid Build Coastguard Worker * if all text boundaries have been returned. 397*0e209d39SAndroid Build Coastguard Worker * @see ubrk_next 398*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 399*0e209d39SAndroid Build Coastguard Worker */ 400*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 401*0e209d39SAndroid Build Coastguard Worker ubrk_previous(UBreakIterator *bi) __INTRODUCED_IN(31); 402*0e209d39SAndroid Build Coastguard Worker 403*0e209d39SAndroid Build Coastguard Worker 404*0e209d39SAndroid Build Coastguard Worker 405*0e209d39SAndroid Build Coastguard Worker /** 406*0e209d39SAndroid Build Coastguard Worker * Set the iterator position to zero, the start of the text being scanned. 407*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 408*0e209d39SAndroid Build Coastguard Worker * @return The new iterator position (zero). 409*0e209d39SAndroid Build Coastguard Worker * @see ubrk_last 410*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 411*0e209d39SAndroid Build Coastguard Worker */ 412*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 413*0e209d39SAndroid Build Coastguard Worker ubrk_first(UBreakIterator *bi) __INTRODUCED_IN(31); 414*0e209d39SAndroid Build Coastguard Worker 415*0e209d39SAndroid Build Coastguard Worker 416*0e209d39SAndroid Build Coastguard Worker 417*0e209d39SAndroid Build Coastguard Worker /** 418*0e209d39SAndroid Build Coastguard Worker * Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned. 419*0e209d39SAndroid Build Coastguard Worker * This is not the same as the last character. 420*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 421*0e209d39SAndroid Build Coastguard Worker * @return The character offset immediately <EM>beyond</EM> the last character in the 422*0e209d39SAndroid Build Coastguard Worker * text being scanned. 423*0e209d39SAndroid Build Coastguard Worker * @see ubrk_first 424*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 425*0e209d39SAndroid Build Coastguard Worker */ 426*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 427*0e209d39SAndroid Build Coastguard Worker ubrk_last(UBreakIterator *bi) __INTRODUCED_IN(31); 428*0e209d39SAndroid Build Coastguard Worker 429*0e209d39SAndroid Build Coastguard Worker 430*0e209d39SAndroid Build Coastguard Worker 431*0e209d39SAndroid Build Coastguard Worker /** 432*0e209d39SAndroid Build Coastguard Worker * Set the iterator position to the first boundary preceding the specified offset. 433*0e209d39SAndroid Build Coastguard Worker * The new position is always smaller than offset, or UBRK_DONE. 434*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 435*0e209d39SAndroid Build Coastguard Worker * @param offset The offset to begin scanning. 436*0e209d39SAndroid Build Coastguard Worker * @return The text boundary preceding offset, or UBRK_DONE. 437*0e209d39SAndroid Build Coastguard Worker * @see ubrk_following 438*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 439*0e209d39SAndroid Build Coastguard Worker */ 440*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 441*0e209d39SAndroid Build Coastguard Worker ubrk_preceding(UBreakIterator *bi, 442*0e209d39SAndroid Build Coastguard Worker int32_t offset) __INTRODUCED_IN(31); 443*0e209d39SAndroid Build Coastguard Worker 444*0e209d39SAndroid Build Coastguard Worker 445*0e209d39SAndroid Build Coastguard Worker 446*0e209d39SAndroid Build Coastguard Worker /** 447*0e209d39SAndroid Build Coastguard Worker * Advance the iterator to the first boundary following the specified offset. 448*0e209d39SAndroid Build Coastguard Worker * The value returned is always greater than offset, or UBRK_DONE. 449*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 450*0e209d39SAndroid Build Coastguard Worker * @param offset The offset to begin scanning. 451*0e209d39SAndroid Build Coastguard Worker * @return The text boundary following offset, or UBRK_DONE. 452*0e209d39SAndroid Build Coastguard Worker * @see ubrk_preceding 453*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 454*0e209d39SAndroid Build Coastguard Worker */ 455*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 456*0e209d39SAndroid Build Coastguard Worker ubrk_following(UBreakIterator *bi, 457*0e209d39SAndroid Build Coastguard Worker int32_t offset) __INTRODUCED_IN(31); 458*0e209d39SAndroid Build Coastguard Worker 459*0e209d39SAndroid Build Coastguard Worker 460*0e209d39SAndroid Build Coastguard Worker 461*0e209d39SAndroid Build Coastguard Worker /** 462*0e209d39SAndroid Build Coastguard Worker * Get a locale for which text breaking information is available. 463*0e209d39SAndroid Build Coastguard Worker * A UBreakIterator in a locale returned by this function will perform the correct 464*0e209d39SAndroid Build Coastguard Worker * text breaking for the locale. 465*0e209d39SAndroid Build Coastguard Worker * @param index The index of the desired locale. 466*0e209d39SAndroid Build Coastguard Worker * @return A locale for which number text breaking information is available, or 0 if none. 467*0e209d39SAndroid Build Coastguard Worker * @see ubrk_countAvailable 468*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 469*0e209d39SAndroid Build Coastguard Worker */ 470*0e209d39SAndroid Build Coastguard Worker U_CAPI const char* U_EXPORT2 471*0e209d39SAndroid Build Coastguard Worker ubrk_getAvailable(int32_t index) __INTRODUCED_IN(31); 472*0e209d39SAndroid Build Coastguard Worker 473*0e209d39SAndroid Build Coastguard Worker 474*0e209d39SAndroid Build Coastguard Worker 475*0e209d39SAndroid Build Coastguard Worker /** 476*0e209d39SAndroid Build Coastguard Worker * Determine how many locales have text breaking information available. 477*0e209d39SAndroid Build Coastguard Worker * This function is most useful as determining the loop ending condition for 478*0e209d39SAndroid Build Coastguard Worker * calls to \ref ubrk_getAvailable. 479*0e209d39SAndroid Build Coastguard Worker * @return The number of locales for which text breaking information is available. 480*0e209d39SAndroid Build Coastguard Worker * @see ubrk_getAvailable 481*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 482*0e209d39SAndroid Build Coastguard Worker */ 483*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 484*0e209d39SAndroid Build Coastguard Worker ubrk_countAvailable(void) __INTRODUCED_IN(31); 485*0e209d39SAndroid Build Coastguard Worker 486*0e209d39SAndroid Build Coastguard Worker 487*0e209d39SAndroid Build Coastguard Worker 488*0e209d39SAndroid Build Coastguard Worker 489*0e209d39SAndroid Build Coastguard Worker /** 490*0e209d39SAndroid Build Coastguard Worker * Returns true if the specified position is a boundary position. As a side 491*0e209d39SAndroid Build Coastguard Worker * effect, leaves the iterator pointing to the first boundary position at 492*0e209d39SAndroid Build Coastguard Worker * or after "offset". 493*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use. 494*0e209d39SAndroid Build Coastguard Worker * @param offset the offset to check. 495*0e209d39SAndroid Build Coastguard Worker * @return True if "offset" is a boundary position. 496*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.0 497*0e209d39SAndroid Build Coastguard Worker */ 498*0e209d39SAndroid Build Coastguard Worker U_CAPI UBool U_EXPORT2 499*0e209d39SAndroid Build Coastguard Worker ubrk_isBoundary(UBreakIterator *bi, int32_t offset) __INTRODUCED_IN(31); 500*0e209d39SAndroid Build Coastguard Worker 501*0e209d39SAndroid Build Coastguard Worker 502*0e209d39SAndroid Build Coastguard Worker 503*0e209d39SAndroid Build Coastguard Worker /** 504*0e209d39SAndroid Build Coastguard Worker * Return the status from the break rule that determined the most recently 505*0e209d39SAndroid Build Coastguard Worker * returned break position. The values appear in the rule source 506*0e209d39SAndroid Build Coastguard Worker * within brackets, {123}, for example. For rules that do not specify a 507*0e209d39SAndroid Build Coastguard Worker * status, a default value of 0 is returned. 508*0e209d39SAndroid Build Coastguard Worker * <p> 509*0e209d39SAndroid Build Coastguard Worker * For word break iterators, the possible values are defined in enum UWordBreak. 510*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 2.2 511*0e209d39SAndroid Build Coastguard Worker */ 512*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 513*0e209d39SAndroid Build Coastguard Worker ubrk_getRuleStatus(UBreakIterator *bi) __INTRODUCED_IN(31); 514*0e209d39SAndroid Build Coastguard Worker 515*0e209d39SAndroid Build Coastguard Worker 516*0e209d39SAndroid Build Coastguard Worker 517*0e209d39SAndroid Build Coastguard Worker /** 518*0e209d39SAndroid Build Coastguard Worker * Get the statuses from the break rules that determined the most recently 519*0e209d39SAndroid Build Coastguard Worker * returned break position. The values appear in the rule source 520*0e209d39SAndroid Build Coastguard Worker * within brackets, {123}, for example. The default status value for rules 521*0e209d39SAndroid Build Coastguard Worker * that do not explicitly provide one is zero. 522*0e209d39SAndroid Build Coastguard Worker * <p> 523*0e209d39SAndroid Build Coastguard Worker * For word break iterators, the possible values are defined in enum UWordBreak. 524*0e209d39SAndroid Build Coastguard Worker * @param bi The break iterator to use 525*0e209d39SAndroid Build Coastguard Worker * @param fillInVec an array to be filled in with the status values. 526*0e209d39SAndroid Build Coastguard Worker * @param capacity the length of the supplied vector. A length of zero causes 527*0e209d39SAndroid Build Coastguard Worker * the function to return the number of status values, in the 528*0e209d39SAndroid Build Coastguard Worker * normal way, without attempting to store any values. 529*0e209d39SAndroid Build Coastguard Worker * @param status receives error codes. 530*0e209d39SAndroid Build Coastguard Worker * @return The number of rule status values from rules that determined 531*0e209d39SAndroid Build Coastguard Worker * the most recent boundary returned by the break iterator. 532*0e209d39SAndroid Build Coastguard Worker * \xrefitem stable "Stable" "Stable List" ICU 3.0 533*0e209d39SAndroid Build Coastguard Worker */ 534*0e209d39SAndroid Build Coastguard Worker U_CAPI int32_t U_EXPORT2 535*0e209d39SAndroid Build Coastguard Worker ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status) __INTRODUCED_IN(31); 536*0e209d39SAndroid Build Coastguard Worker 537*0e209d39SAndroid Build Coastguard Worker 538*0e209d39SAndroid Build Coastguard Worker 539*0e209d39SAndroid Build Coastguard Worker 540*0e209d39SAndroid Build Coastguard Worker 541*0e209d39SAndroid Build Coastguard Worker 542*0e209d39SAndroid Build Coastguard Worker 543*0e209d39SAndroid Build Coastguard Worker 544*0e209d39SAndroid Build Coastguard Worker 545*0e209d39SAndroid Build Coastguard Worker 546*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 547*0e209d39SAndroid Build Coastguard Worker 548*0e209d39SAndroid Build Coastguard Worker #endif 549*0e209d39SAndroid Build Coastguard Worker 550*0e209d39SAndroid Build Coastguard Worker /** @} */ // addtogroup 551