xref: /aosp_15_r20/external/icu/libicu/cts_headers/unicode/rbbi.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker ***************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 1999-2016 International Business Machines Corporation   *
6*0e209d39SAndroid Build Coastguard Worker *   and others. All rights reserved.                                      *
7*0e209d39SAndroid Build Coastguard Worker ***************************************************************************
8*0e209d39SAndroid Build Coastguard Worker 
9*0e209d39SAndroid Build Coastguard Worker **********************************************************************
10*0e209d39SAndroid Build Coastguard Worker *   Date        Name        Description
11*0e209d39SAndroid Build Coastguard Worker *   10/22/99    alan        Creation.
12*0e209d39SAndroid Build Coastguard Worker *   11/11/99    rgillam     Complete port from Java.
13*0e209d39SAndroid Build Coastguard Worker **********************************************************************
14*0e209d39SAndroid Build Coastguard Worker */
15*0e209d39SAndroid Build Coastguard Worker 
16*0e209d39SAndroid Build Coastguard Worker #ifndef RBBI_H
17*0e209d39SAndroid Build Coastguard Worker #define RBBI_H
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
20*0e209d39SAndroid Build Coastguard Worker 
21*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API
22*0e209d39SAndroid Build Coastguard Worker 
23*0e209d39SAndroid Build Coastguard Worker /**
24*0e209d39SAndroid Build Coastguard Worker  * \file
25*0e209d39SAndroid Build Coastguard Worker  * \brief C++ API: Rule Based Break Iterator
26*0e209d39SAndroid Build Coastguard Worker  */
27*0e209d39SAndroid Build Coastguard Worker 
28*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_BREAK_ITERATION
29*0e209d39SAndroid Build Coastguard Worker 
30*0e209d39SAndroid Build Coastguard Worker #include "unicode/brkiter.h"
31*0e209d39SAndroid Build Coastguard Worker #include "unicode/udata.h"
32*0e209d39SAndroid Build Coastguard Worker #include "unicode/parseerr.h"
33*0e209d39SAndroid Build Coastguard Worker #include "unicode/schriter.h"
34*0e209d39SAndroid Build Coastguard Worker 
35*0e209d39SAndroid Build Coastguard Worker struct UCPTrie;
36*0e209d39SAndroid Build Coastguard Worker 
37*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
38*0e209d39SAndroid Build Coastguard Worker 
39*0e209d39SAndroid Build Coastguard Worker /** @internal */
40*0e209d39SAndroid Build Coastguard Worker class  LanguageBreakEngine;
41*0e209d39SAndroid Build Coastguard Worker struct RBBIDataHeader;
42*0e209d39SAndroid Build Coastguard Worker class  RBBIDataWrapper;
43*0e209d39SAndroid Build Coastguard Worker class  UnhandledEngine;
44*0e209d39SAndroid Build Coastguard Worker class  UStack;
45*0e209d39SAndroid Build Coastguard Worker 
46*0e209d39SAndroid Build Coastguard Worker 
47*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API
48*0e209d39SAndroid Build Coastguard Worker /**
49*0e209d39SAndroid Build Coastguard Worker  * The ExternalBreakEngine class define an abstract interface for the host environment
50*0e209d39SAndroid Build Coastguard Worker  * to provide a low level facility to break text for unicode text in script that the text boundary
51*0e209d39SAndroid Build Coastguard Worker  * cannot be handled by upper level rule based logic, for example, for Chinese and Japanese
52*0e209d39SAndroid Build Coastguard Worker  * word breaking, Thai, Khmer, Burmese, Lao and other Southeast Asian scripts.
53*0e209d39SAndroid Build Coastguard Worker  * The host environment implement one or more subclass of ExternalBreakEngine and
54*0e209d39SAndroid Build Coastguard Worker  * register them in the initialization time by calling
55*0e209d39SAndroid Build Coastguard Worker  * RuleBasedBreakIterator::registerExternalBreakEngine(). ICU adopt and own the engine and will
56*0e209d39SAndroid Build Coastguard Worker  * delete the registered external engine in proper time during the clean up
57*0e209d39SAndroid Build Coastguard Worker  * event.
58*0e209d39SAndroid Build Coastguard Worker  * @internal ICU 74 technology preview
59*0e209d39SAndroid Build Coastguard Worker  */
60*0e209d39SAndroid Build Coastguard Worker class ExternalBreakEngine : public UObject {
61*0e209d39SAndroid Build Coastguard Worker   public:
62*0e209d39SAndroid Build Coastguard Worker     /**
63*0e209d39SAndroid Build Coastguard Worker      * destructor
64*0e209d39SAndroid Build Coastguard Worker      * @internal ICU 74 technology preview
65*0e209d39SAndroid Build Coastguard Worker      */
~ExternalBreakEngine()66*0e209d39SAndroid Build Coastguard Worker     virtual ~ExternalBreakEngine() {}
67*0e209d39SAndroid Build Coastguard Worker 
68*0e209d39SAndroid Build Coastguard Worker     /**
69*0e209d39SAndroid Build Coastguard Worker      * <p>Indicate whether this engine handles a particular character when
70*0e209d39SAndroid Build Coastguard Worker      * the RuleBasedBreakIterator is used for a particular locale. This method is used
71*0e209d39SAndroid Build Coastguard Worker      * by the RuleBasedBreakIterator to find a break engine.</p>
72*0e209d39SAndroid Build Coastguard Worker      * @param c A character which begins a run that the engine might handle.
73*0e209d39SAndroid Build Coastguard Worker      * @param locale    The locale.
74*0e209d39SAndroid Build Coastguard Worker      * @return true if this engine handles the particular character for that locale.
75*0e209d39SAndroid Build Coastguard Worker      * @internal ICU 74 technology preview
76*0e209d39SAndroid Build Coastguard Worker      */
77*0e209d39SAndroid Build Coastguard Worker     virtual bool isFor(UChar32 c, const char* locale) const = 0;
78*0e209d39SAndroid Build Coastguard Worker 
79*0e209d39SAndroid Build Coastguard Worker     /**
80*0e209d39SAndroid Build Coastguard Worker      * <p>Indicate whether this engine handles a particular character.This method is
81*0e209d39SAndroid Build Coastguard Worker      * used by the RuleBasedBreakIterator after it already find a break engine to see which
82*0e209d39SAndroid Build Coastguard Worker      * characters after the first one can be handled by this break engine.</p>
83*0e209d39SAndroid Build Coastguard Worker      * @param c A character that the engine might handle.
84*0e209d39SAndroid Build Coastguard Worker      * @return true if this engine handles the particular character.
85*0e209d39SAndroid Build Coastguard Worker      * @internal ICU 74 technology preview
86*0e209d39SAndroid Build Coastguard Worker      */
87*0e209d39SAndroid Build Coastguard Worker     virtual bool handles(UChar32 c) const = 0;
88*0e209d39SAndroid Build Coastguard Worker 
89*0e209d39SAndroid Build Coastguard Worker     /**
90*0e209d39SAndroid Build Coastguard Worker      * <p>Divide up a range of text handled by this break engine.</p>
91*0e209d39SAndroid Build Coastguard Worker      *
92*0e209d39SAndroid Build Coastguard Worker      * @param text A UText representing the text
93*0e209d39SAndroid Build Coastguard Worker      * @param start The start of the range of known characters
94*0e209d39SAndroid Build Coastguard Worker      * @param end The end of the range of known characters
95*0e209d39SAndroid Build Coastguard Worker      * @param foundBreaks Output of C array of int32_t break positions, or
96*0e209d39SAndroid Build Coastguard Worker      * nullptr
97*0e209d39SAndroid Build Coastguard Worker      * @param foundBreaksCapacity The capacity of foundBreaks
98*0e209d39SAndroid Build Coastguard Worker      * @param status Information on any errors encountered.
99*0e209d39SAndroid Build Coastguard Worker      * @return The number of breaks found
100*0e209d39SAndroid Build Coastguard Worker      * @internal ICU 74 technology preview
101*0e209d39SAndroid Build Coastguard Worker      */
102*0e209d39SAndroid Build Coastguard Worker      virtual int32_t fillBreaks(UText* text,  int32_t start, int32_t end,
103*0e209d39SAndroid Build Coastguard Worker                                int32_t* foundBreaks, int32_t foundBreaksCapacity,
104*0e209d39SAndroid Build Coastguard Worker                                UErrorCode& status) const = 0;
105*0e209d39SAndroid Build Coastguard Worker };
106*0e209d39SAndroid Build Coastguard Worker #endif  /* U_HIDE_INTERNAL_API */
107*0e209d39SAndroid Build Coastguard Worker 
108*0e209d39SAndroid Build Coastguard Worker 
109*0e209d39SAndroid Build Coastguard Worker /**
110*0e209d39SAndroid Build Coastguard Worker  *
111*0e209d39SAndroid Build Coastguard Worker  * A subclass of BreakIterator whose behavior is specified using a list of rules.
112*0e209d39SAndroid Build Coastguard Worker  * <p>Instances of this class are most commonly created by the factory methods of
113*0e209d39SAndroid Build Coastguard Worker  *  BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
114*0e209d39SAndroid Build Coastguard Worker  *  and then used via the abstract API in class BreakIterator</p>
115*0e209d39SAndroid Build Coastguard Worker  *
116*0e209d39SAndroid Build Coastguard Worker  * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
117*0e209d39SAndroid Build Coastguard Worker  *
118*0e209d39SAndroid Build Coastguard Worker  * <p>This class is not intended to be subclassed.</p>
119*0e209d39SAndroid Build Coastguard Worker  */
120*0e209d39SAndroid Build Coastguard Worker class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator {
121*0e209d39SAndroid Build Coastguard Worker 
122*0e209d39SAndroid Build Coastguard Worker private:
123*0e209d39SAndroid Build Coastguard Worker     /**
124*0e209d39SAndroid Build Coastguard Worker      * The UText through which this BreakIterator accesses the text
125*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
126*0e209d39SAndroid Build Coastguard Worker      */
127*0e209d39SAndroid Build Coastguard Worker     UText  fText = UTEXT_INITIALIZER;
128*0e209d39SAndroid Build Coastguard Worker 
129*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API
130*0e209d39SAndroid Build Coastguard Worker public:
131*0e209d39SAndroid Build Coastguard Worker #endif /* U_HIDE_INTERNAL_API */
132*0e209d39SAndroid Build Coastguard Worker     /**
133*0e209d39SAndroid Build Coastguard Worker      * The rule data for this BreakIterator instance.
134*0e209d39SAndroid Build Coastguard Worker      * Not for general use; Public only for testing purposes.
135*0e209d39SAndroid Build Coastguard Worker      * @internal
136*0e209d39SAndroid Build Coastguard Worker      */
137*0e209d39SAndroid Build Coastguard Worker     RBBIDataWrapper    *fData = nullptr;
138*0e209d39SAndroid Build Coastguard Worker 
139*0e209d39SAndroid Build Coastguard Worker private:
140*0e209d39SAndroid Build Coastguard Worker     /**
141*0e209d39SAndroid Build Coastguard Worker       * The saved error code associated with this break iterator.
142*0e209d39SAndroid Build Coastguard Worker       * This is the value to be returned by copyErrorTo().
143*0e209d39SAndroid Build Coastguard Worker       */
144*0e209d39SAndroid Build Coastguard Worker     UErrorCode      fErrorCode = U_ZERO_ERROR;
145*0e209d39SAndroid Build Coastguard Worker 
146*0e209d39SAndroid Build Coastguard Worker     /**
147*0e209d39SAndroid Build Coastguard Worker       * The current  position of the iterator. Pinned, 0 < fPosition <= text.length.
148*0e209d39SAndroid Build Coastguard Worker       * Never has the value UBRK_DONE (-1).
149*0e209d39SAndroid Build Coastguard Worker       */
150*0e209d39SAndroid Build Coastguard Worker     int32_t         fPosition = 0;
151*0e209d39SAndroid Build Coastguard Worker 
152*0e209d39SAndroid Build Coastguard Worker     /**
153*0e209d39SAndroid Build Coastguard Worker       * TODO:
154*0e209d39SAndroid Build Coastguard Worker       */
155*0e209d39SAndroid Build Coastguard Worker     int32_t         fRuleStatusIndex = 0;
156*0e209d39SAndroid Build Coastguard Worker 
157*0e209d39SAndroid Build Coastguard Worker     /**
158*0e209d39SAndroid Build Coastguard Worker      *   Cache of previously determined boundary positions.
159*0e209d39SAndroid Build Coastguard Worker      */
160*0e209d39SAndroid Build Coastguard Worker     class BreakCache;
161*0e209d39SAndroid Build Coastguard Worker     BreakCache         *fBreakCache = nullptr;
162*0e209d39SAndroid Build Coastguard Worker 
163*0e209d39SAndroid Build Coastguard Worker     /**
164*0e209d39SAndroid Build Coastguard Worker      *  Cache of boundary positions within a region of text that has been
165*0e209d39SAndroid Build Coastguard Worker      *  sub-divided by dictionary based breaking.
166*0e209d39SAndroid Build Coastguard Worker      */
167*0e209d39SAndroid Build Coastguard Worker     class DictionaryCache;
168*0e209d39SAndroid Build Coastguard Worker     DictionaryCache *fDictionaryCache = nullptr;
169*0e209d39SAndroid Build Coastguard Worker 
170*0e209d39SAndroid Build Coastguard Worker     /**
171*0e209d39SAndroid Build Coastguard Worker      *
172*0e209d39SAndroid Build Coastguard Worker      * If present, UStack of LanguageBreakEngine objects that might handle
173*0e209d39SAndroid Build Coastguard Worker      * dictionary characters. Searched from top to bottom to find an object to
174*0e209d39SAndroid Build Coastguard Worker      * handle a given character.
175*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
176*0e209d39SAndroid Build Coastguard Worker      */
177*0e209d39SAndroid Build Coastguard Worker     UStack              *fLanguageBreakEngines = nullptr;
178*0e209d39SAndroid Build Coastguard Worker 
179*0e209d39SAndroid Build Coastguard Worker     /**
180*0e209d39SAndroid Build Coastguard Worker      *
181*0e209d39SAndroid Build Coastguard Worker      * If present, the special LanguageBreakEngine used for handling
182*0e209d39SAndroid Build Coastguard Worker      * characters that are in the dictionary set, but not handled by any
183*0e209d39SAndroid Build Coastguard Worker      * LanguageBreakEngine.
184*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
185*0e209d39SAndroid Build Coastguard Worker      */
186*0e209d39SAndroid Build Coastguard Worker     UnhandledEngine     *fUnhandledBreakEngine = nullptr;
187*0e209d39SAndroid Build Coastguard Worker 
188*0e209d39SAndroid Build Coastguard Worker     /**
189*0e209d39SAndroid Build Coastguard Worker      * Counter for the number of characters encountered with the "dictionary"
190*0e209d39SAndroid Build Coastguard Worker      *   flag set.
191*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
192*0e209d39SAndroid Build Coastguard Worker      */
193*0e209d39SAndroid Build Coastguard Worker     uint32_t            fDictionaryCharCount = 0;
194*0e209d39SAndroid Build Coastguard Worker 
195*0e209d39SAndroid Build Coastguard Worker     /**
196*0e209d39SAndroid Build Coastguard Worker      *   A character iterator that refers to the same text as the UText, above.
197*0e209d39SAndroid Build Coastguard Worker      *   Only included for compatibility with old API, which was based on CharacterIterators.
198*0e209d39SAndroid Build Coastguard Worker      *   Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
199*0e209d39SAndroid Build Coastguard Worker      */
200*0e209d39SAndroid Build Coastguard Worker     CharacterIterator  *fCharIter = &fSCharIter;
201*0e209d39SAndroid Build Coastguard Worker 
202*0e209d39SAndroid Build Coastguard Worker     /**
203*0e209d39SAndroid Build Coastguard Worker      *   When the input text is provided by a UnicodeString, this will point to
204*0e209d39SAndroid Build Coastguard Worker      *    a characterIterator that wraps that data.  Needed only for the
205*0e209d39SAndroid Build Coastguard Worker      *    implementation of getText(), a backwards compatibility issue.
206*0e209d39SAndroid Build Coastguard Worker      */
207*0e209d39SAndroid Build Coastguard Worker     UCharCharacterIterator fSCharIter {u"", 0};
208*0e209d39SAndroid Build Coastguard Worker 
209*0e209d39SAndroid Build Coastguard Worker     /**
210*0e209d39SAndroid Build Coastguard Worker       * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
211*0e209d39SAndroid Build Coastguard Worker       */
212*0e209d39SAndroid Build Coastguard Worker     bool           fDone = false;
213*0e209d39SAndroid Build Coastguard Worker 
214*0e209d39SAndroid Build Coastguard Worker     /**
215*0e209d39SAndroid Build Coastguard Worker      *  Array of look-ahead tentative results.
216*0e209d39SAndroid Build Coastguard Worker      */
217*0e209d39SAndroid Build Coastguard Worker     int32_t *fLookAheadMatches = nullptr;
218*0e209d39SAndroid Build Coastguard Worker 
219*0e209d39SAndroid Build Coastguard Worker     /**
220*0e209d39SAndroid Build Coastguard Worker      *  A flag to indicate if phrase based breaking is enabled.
221*0e209d39SAndroid Build Coastguard Worker      */
222*0e209d39SAndroid Build Coastguard Worker     UBool fIsPhraseBreaking = false;
223*0e209d39SAndroid Build Coastguard Worker 
224*0e209d39SAndroid Build Coastguard Worker     //=======================================================================
225*0e209d39SAndroid Build Coastguard Worker     // constructors
226*0e209d39SAndroid Build Coastguard Worker     //=======================================================================
227*0e209d39SAndroid Build Coastguard Worker 
228*0e209d39SAndroid Build Coastguard Worker     /**
229*0e209d39SAndroid Build Coastguard Worker      * Constructor from a flattened set of RBBI data in malloced memory.
230*0e209d39SAndroid Build Coastguard Worker      *             RulesBasedBreakIterators built from a custom set of rules
231*0e209d39SAndroid Build Coastguard Worker      *             are created via this constructor; the rules are compiled
232*0e209d39SAndroid Build Coastguard Worker      *             into memory, then the break iterator is constructed here.
233*0e209d39SAndroid Build Coastguard Worker      *
234*0e209d39SAndroid Build Coastguard Worker      *             The break iterator adopts the memory, and will
235*0e209d39SAndroid Build Coastguard Worker      *             free it when done.
236*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
237*0e209d39SAndroid Build Coastguard Worker      */
238*0e209d39SAndroid Build Coastguard Worker     RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
239*0e209d39SAndroid Build Coastguard Worker 
240*0e209d39SAndroid Build Coastguard Worker     /**
241*0e209d39SAndroid Build Coastguard Worker      * This constructor uses the udata interface to create a BreakIterator
242*0e209d39SAndroid Build Coastguard Worker      * whose internal tables live in a memory-mapped file.  "image" is an
243*0e209d39SAndroid Build Coastguard Worker      * ICU UDataMemory handle for the pre-compiled break iterator tables.
244*0e209d39SAndroid Build Coastguard Worker      * @param image handle to the memory image for the break iterator data.
245*0e209d39SAndroid Build Coastguard Worker      *        Ownership of the UDataMemory handle passes to the Break Iterator,
246*0e209d39SAndroid Build Coastguard Worker      *        which will be responsible for closing it when it is no longer needed.
247*0e209d39SAndroid Build Coastguard Worker      * @param status Information on any errors encountered.
248*0e209d39SAndroid Build Coastguard Worker      * @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
249*0e209d39SAndroid Build Coastguard Worker      * @see udata_open
250*0e209d39SAndroid Build Coastguard Worker      * @see #getBinaryRules
251*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
252*0e209d39SAndroid Build Coastguard Worker      */
253*0e209d39SAndroid Build Coastguard Worker     RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
254*0e209d39SAndroid Build Coastguard Worker 
255*0e209d39SAndroid Build Coastguard Worker     /** @internal */
256*0e209d39SAndroid Build Coastguard Worker     friend class RBBIRuleBuilder;
257*0e209d39SAndroid Build Coastguard Worker     /** @internal */
258*0e209d39SAndroid Build Coastguard Worker     friend class BreakIterator;
259*0e209d39SAndroid Build Coastguard Worker 
260*0e209d39SAndroid Build Coastguard Worker     /**
261*0e209d39SAndroid Build Coastguard Worker      * Default constructor with an error code parameter.
262*0e209d39SAndroid Build Coastguard Worker      * Aside from error handling, otherwise identical to the default constructor.
263*0e209d39SAndroid Build Coastguard Worker      * Internally, handles common initialization for other constructors.
264*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
265*0e209d39SAndroid Build Coastguard Worker      */
266*0e209d39SAndroid Build Coastguard Worker     RuleBasedBreakIterator(UErrorCode *status);
267*0e209d39SAndroid Build Coastguard Worker 
268*0e209d39SAndroid Build Coastguard Worker public:
269*0e209d39SAndroid Build Coastguard Worker 
270*0e209d39SAndroid Build Coastguard Worker     /** Default constructor.  Creates an empty shell of an iterator, with no
271*0e209d39SAndroid Build Coastguard Worker      *  rules or text to iterate over.   Object can subsequently be assigned to,
272*0e209d39SAndroid Build Coastguard Worker      *  but is otherwise unusable.
273*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.2
274*0e209d39SAndroid Build Coastguard Worker      */
275*0e209d39SAndroid Build Coastguard Worker     RuleBasedBreakIterator();
276*0e209d39SAndroid Build Coastguard Worker 
277*0e209d39SAndroid Build Coastguard Worker     /**
278*0e209d39SAndroid Build Coastguard Worker      * Copy constructor.  Will produce a break iterator with the same behavior,
279*0e209d39SAndroid Build Coastguard Worker      * and which iterates over the same text, as the one passed in.
280*0e209d39SAndroid Build Coastguard Worker      * @param that The RuleBasedBreakIterator passed to be copied
281*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.0
282*0e209d39SAndroid Build Coastguard Worker      */
283*0e209d39SAndroid Build Coastguard Worker     RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
284*0e209d39SAndroid Build Coastguard Worker 
285*0e209d39SAndroid Build Coastguard Worker     /**
286*0e209d39SAndroid Build Coastguard Worker      * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
287*0e209d39SAndroid Build Coastguard Worker      * @param rules The break rules to be used.
288*0e209d39SAndroid Build Coastguard Worker      * @param parseError  In the event of a syntax error in the rules, provides the location
289*0e209d39SAndroid Build Coastguard Worker      *                    within the rules of the problem.
290*0e209d39SAndroid Build Coastguard Worker      * @param status Information on any errors encountered.
291*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.2
292*0e209d39SAndroid Build Coastguard Worker      */
293*0e209d39SAndroid Build Coastguard Worker     RuleBasedBreakIterator( const UnicodeString    &rules,
294*0e209d39SAndroid Build Coastguard Worker                              UParseError           &parseError,
295*0e209d39SAndroid Build Coastguard Worker                              UErrorCode            &status);
296*0e209d39SAndroid Build Coastguard Worker 
297*0e209d39SAndroid Build Coastguard Worker     /**
298*0e209d39SAndroid Build Coastguard Worker      * Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
299*0e209d39SAndroid Build Coastguard Worker      * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
300*0e209d39SAndroid Build Coastguard Worker      * Construction of a break iterator in this way is substantially faster than
301*0e209d39SAndroid Build Coastguard Worker      * construction from source rules.
302*0e209d39SAndroid Build Coastguard Worker      *
303*0e209d39SAndroid Build Coastguard Worker      * Ownership of the storage containing the compiled rules remains with the
304*0e209d39SAndroid Build Coastguard Worker      * caller of this function.  The compiled rules must not be  modified or
305*0e209d39SAndroid Build Coastguard Worker      * deleted during the life of the break iterator.
306*0e209d39SAndroid Build Coastguard Worker      *
307*0e209d39SAndroid Build Coastguard Worker      * The compiled rules are not compatible across different major versions of ICU.
308*0e209d39SAndroid Build Coastguard Worker      * The compiled rules are compatible only between machines with the same
309*0e209d39SAndroid Build Coastguard Worker      * byte ordering (little or big endian) and the same base character set family
310*0e209d39SAndroid Build Coastguard Worker      * (ASCII or EBCDIC).
311*0e209d39SAndroid Build Coastguard Worker      *
312*0e209d39SAndroid Build Coastguard Worker      * @see #getBinaryRules
313*0e209d39SAndroid Build Coastguard Worker      * @param compiledRules A pointer to the compiled break rules to be used.
314*0e209d39SAndroid Build Coastguard Worker      * @param ruleLength The length of the compiled break rules, in bytes.  This
315*0e209d39SAndroid Build Coastguard Worker      *   corresponds to the length value produced by getBinaryRules().
316*0e209d39SAndroid Build Coastguard Worker      * @param status Information on any errors encountered, including invalid
317*0e209d39SAndroid Build Coastguard Worker      *   binary rules.
318*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
319*0e209d39SAndroid Build Coastguard Worker      */
320*0e209d39SAndroid Build Coastguard Worker     RuleBasedBreakIterator(const uint8_t *compiledRules,
321*0e209d39SAndroid Build Coastguard Worker                            uint32_t       ruleLength,
322*0e209d39SAndroid Build Coastguard Worker                            UErrorCode    &status);
323*0e209d39SAndroid Build Coastguard Worker 
324*0e209d39SAndroid Build Coastguard Worker     /**
325*0e209d39SAndroid Build Coastguard Worker      * This constructor uses the udata interface to create a BreakIterator
326*0e209d39SAndroid Build Coastguard Worker      * whose internal tables live in a memory-mapped file.  "image" is an
327*0e209d39SAndroid Build Coastguard Worker      * ICU UDataMemory handle for the pre-compiled break iterator tables.
328*0e209d39SAndroid Build Coastguard Worker      * @param image handle to the memory image for the break iterator data.
329*0e209d39SAndroid Build Coastguard Worker      *        Ownership of the UDataMemory handle passes to the Break Iterator,
330*0e209d39SAndroid Build Coastguard Worker      *        which will be responsible for closing it when it is no longer needed.
331*0e209d39SAndroid Build Coastguard Worker      * @param status Information on any errors encountered.
332*0e209d39SAndroid Build Coastguard Worker      * @see udata_open
333*0e209d39SAndroid Build Coastguard Worker      * @see #getBinaryRules
334*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.8
335*0e209d39SAndroid Build Coastguard Worker      */
336*0e209d39SAndroid Build Coastguard Worker     RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
337*0e209d39SAndroid Build Coastguard Worker 
338*0e209d39SAndroid Build Coastguard Worker     /**
339*0e209d39SAndroid Build Coastguard Worker      * Destructor
340*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
341*0e209d39SAndroid Build Coastguard Worker      */
342*0e209d39SAndroid Build Coastguard Worker     virtual ~RuleBasedBreakIterator();
343*0e209d39SAndroid Build Coastguard Worker 
344*0e209d39SAndroid Build Coastguard Worker     /**
345*0e209d39SAndroid Build Coastguard Worker      * Assignment operator.  Sets this iterator to have the same behavior,
346*0e209d39SAndroid Build Coastguard Worker      * and iterate over the same text, as the one passed in.
347*0e209d39SAndroid Build Coastguard Worker      * @param that The RuleBasedBreakItertor passed in
348*0e209d39SAndroid Build Coastguard Worker      * @return the newly created RuleBasedBreakIterator
349*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
350*0e209d39SAndroid Build Coastguard Worker      */
351*0e209d39SAndroid Build Coastguard Worker     RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
352*0e209d39SAndroid Build Coastguard Worker 
353*0e209d39SAndroid Build Coastguard Worker     /**
354*0e209d39SAndroid Build Coastguard Worker      * Equality operator.  Returns true if both BreakIterators are of the
355*0e209d39SAndroid Build Coastguard Worker      * same class, have the same behavior, and iterate over the same text.
356*0e209d39SAndroid Build Coastguard Worker      * @param that The BreakIterator to be compared for equality
357*0e209d39SAndroid Build Coastguard Worker      * @return true if both BreakIterators are of the
358*0e209d39SAndroid Build Coastguard Worker      * same class, have the same behavior, and iterate over the same text.
359*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
360*0e209d39SAndroid Build Coastguard Worker      */
361*0e209d39SAndroid Build Coastguard Worker     virtual bool operator==(const BreakIterator& that) const override;
362*0e209d39SAndroid Build Coastguard Worker 
363*0e209d39SAndroid Build Coastguard Worker     /**
364*0e209d39SAndroid Build Coastguard Worker      * Not-equal operator.  If operator== returns true, this returns false,
365*0e209d39SAndroid Build Coastguard Worker      * and vice versa.
366*0e209d39SAndroid Build Coastguard Worker      * @param that The BreakIterator to be compared for inequality
367*0e209d39SAndroid Build Coastguard Worker      * @return true if both BreakIterators are not same.
368*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
369*0e209d39SAndroid Build Coastguard Worker      */
370*0e209d39SAndroid Build Coastguard Worker     inline bool operator!=(const BreakIterator& that) const {
371*0e209d39SAndroid Build Coastguard Worker         return !operator==(that);
372*0e209d39SAndroid Build Coastguard Worker     }
373*0e209d39SAndroid Build Coastguard Worker 
374*0e209d39SAndroid Build Coastguard Worker     /**
375*0e209d39SAndroid Build Coastguard Worker      * Returns a newly-constructed RuleBasedBreakIterator with the same
376*0e209d39SAndroid Build Coastguard Worker      * behavior, and iterating over the same text, as this one.
377*0e209d39SAndroid Build Coastguard Worker      * Differs from the copy constructor in that it is polymorphic, and
378*0e209d39SAndroid Build Coastguard Worker      * will correctly clone (copy) a derived class.
379*0e209d39SAndroid Build Coastguard Worker      * clone() is thread safe.  Multiple threads may simultaneously
380*0e209d39SAndroid Build Coastguard Worker      * clone the same source break iterator.
381*0e209d39SAndroid Build Coastguard Worker      * @return a newly-constructed RuleBasedBreakIterator
382*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.0
383*0e209d39SAndroid Build Coastguard Worker      */
384*0e209d39SAndroid Build Coastguard Worker     virtual RuleBasedBreakIterator* clone() const override;
385*0e209d39SAndroid Build Coastguard Worker 
386*0e209d39SAndroid Build Coastguard Worker     /**
387*0e209d39SAndroid Build Coastguard Worker      * Compute a hash code for this BreakIterator
388*0e209d39SAndroid Build Coastguard Worker      * @return A hash code
389*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
390*0e209d39SAndroid Build Coastguard Worker      */
391*0e209d39SAndroid Build Coastguard Worker     virtual int32_t hashCode() const;
392*0e209d39SAndroid Build Coastguard Worker 
393*0e209d39SAndroid Build Coastguard Worker     /**
394*0e209d39SAndroid Build Coastguard Worker      * Returns the description used to create this iterator
395*0e209d39SAndroid Build Coastguard Worker      * @return the description used to create this iterator
396*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
397*0e209d39SAndroid Build Coastguard Worker      */
398*0e209d39SAndroid Build Coastguard Worker     virtual const UnicodeString& getRules() const;
399*0e209d39SAndroid Build Coastguard Worker 
400*0e209d39SAndroid Build Coastguard Worker     //=======================================================================
401*0e209d39SAndroid Build Coastguard Worker     // BreakIterator overrides
402*0e209d39SAndroid Build Coastguard Worker     //=======================================================================
403*0e209d39SAndroid Build Coastguard Worker 
404*0e209d39SAndroid Build Coastguard Worker     /**
405*0e209d39SAndroid Build Coastguard Worker      * <p>
406*0e209d39SAndroid Build Coastguard Worker      * Return a CharacterIterator over the text being analyzed.
407*0e209d39SAndroid Build Coastguard Worker      * The returned character iterator is owned by the break iterator, and must
408*0e209d39SAndroid Build Coastguard Worker      * not be deleted by the caller.  Repeated calls to this function may
409*0e209d39SAndroid Build Coastguard Worker      * return the same CharacterIterator.
410*0e209d39SAndroid Build Coastguard Worker      * </p>
411*0e209d39SAndroid Build Coastguard Worker      * <p>
412*0e209d39SAndroid Build Coastguard Worker      * The returned character iterator must not be used concurrently with
413*0e209d39SAndroid Build Coastguard Worker      * the break iterator.  If concurrent operation is needed, clone the
414*0e209d39SAndroid Build Coastguard Worker      * returned character iterator first and operate on the clone.
415*0e209d39SAndroid Build Coastguard Worker      * </p>
416*0e209d39SAndroid Build Coastguard Worker      * <p>
417*0e209d39SAndroid Build Coastguard Worker      * When the break iterator is operating on text supplied via a UText,
418*0e209d39SAndroid Build Coastguard Worker      * this function will fail, returning a CharacterIterator containing no text.
419*0e209d39SAndroid Build Coastguard Worker      * The function getUText() provides similar functionality,
420*0e209d39SAndroid Build Coastguard Worker      * is reliable, and is more efficient.
421*0e209d39SAndroid Build Coastguard Worker      * </p>
422*0e209d39SAndroid Build Coastguard Worker      *
423*0e209d39SAndroid Build Coastguard Worker      * TODO:  deprecate this function?
424*0e209d39SAndroid Build Coastguard Worker      *
425*0e209d39SAndroid Build Coastguard Worker      * @return An iterator over the text being analyzed.
426*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.0
427*0e209d39SAndroid Build Coastguard Worker      */
428*0e209d39SAndroid Build Coastguard Worker     virtual CharacterIterator& getText() const override;
429*0e209d39SAndroid Build Coastguard Worker 
430*0e209d39SAndroid Build Coastguard Worker     /**
431*0e209d39SAndroid Build Coastguard Worker       *  Get a UText for the text being analyzed.
432*0e209d39SAndroid Build Coastguard Worker       *  The returned UText is a shallow clone of the UText used internally
433*0e209d39SAndroid Build Coastguard Worker       *  by the break iterator implementation.  It can safely be used to
434*0e209d39SAndroid Build Coastguard Worker       *  access the text without impacting any break iterator operations,
435*0e209d39SAndroid Build Coastguard Worker       *  but the underlying text itself must not be altered.
436*0e209d39SAndroid Build Coastguard Worker       *
437*0e209d39SAndroid Build Coastguard Worker       * @param fillIn A UText to be filled in.  If nullptr, a new UText will be
438*0e209d39SAndroid Build Coastguard Worker       *           allocated to hold the result.
439*0e209d39SAndroid Build Coastguard Worker       * @param status receives any error codes.
440*0e209d39SAndroid Build Coastguard Worker       * @return   The current UText for this break iterator.  If an input
441*0e209d39SAndroid Build Coastguard Worker       *           UText was provided, it will always be returned.
442*0e209d39SAndroid Build Coastguard Worker       * @stable ICU 3.4
443*0e209d39SAndroid Build Coastguard Worker       */
444*0e209d39SAndroid Build Coastguard Worker      virtual UText *getUText(UText *fillIn, UErrorCode &status) const override;
445*0e209d39SAndroid Build Coastguard Worker 
446*0e209d39SAndroid Build Coastguard Worker     /**
447*0e209d39SAndroid Build Coastguard Worker      * Set the iterator to analyze a new piece of text.  This function resets
448*0e209d39SAndroid Build Coastguard Worker      * the current iteration position to the beginning of the text.
449*0e209d39SAndroid Build Coastguard Worker      * @param newText An iterator over the text to analyze.  The BreakIterator
450*0e209d39SAndroid Build Coastguard Worker      * takes ownership of the character iterator.  The caller MUST NOT delete it!
451*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
452*0e209d39SAndroid Build Coastguard Worker      */
453*0e209d39SAndroid Build Coastguard Worker     virtual void adoptText(CharacterIterator* newText) override;
454*0e209d39SAndroid Build Coastguard Worker 
455*0e209d39SAndroid Build Coastguard Worker     /**
456*0e209d39SAndroid Build Coastguard Worker      * Set the iterator to analyze a new piece of text.  This function resets
457*0e209d39SAndroid Build Coastguard Worker      * the current iteration position to the beginning of the text.
458*0e209d39SAndroid Build Coastguard Worker      *
459*0e209d39SAndroid Build Coastguard Worker      * The BreakIterator will retain a reference to the supplied string.
460*0e209d39SAndroid Build Coastguard Worker      * The caller must not modify or delete the text while the BreakIterator
461*0e209d39SAndroid Build Coastguard Worker      * retains the reference.
462*0e209d39SAndroid Build Coastguard Worker      *
463*0e209d39SAndroid Build Coastguard Worker      * @param newText The text to analyze.
464*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
465*0e209d39SAndroid Build Coastguard Worker      */
466*0e209d39SAndroid Build Coastguard Worker     virtual void setText(const UnicodeString& newText) override;
467*0e209d39SAndroid Build Coastguard Worker 
468*0e209d39SAndroid Build Coastguard Worker     /**
469*0e209d39SAndroid Build Coastguard Worker      * Reset the break iterator to operate over the text represented by
470*0e209d39SAndroid Build Coastguard Worker      * the UText.  The iterator position is reset to the start.
471*0e209d39SAndroid Build Coastguard Worker      *
472*0e209d39SAndroid Build Coastguard Worker      * This function makes a shallow clone of the supplied UText.  This means
473*0e209d39SAndroid Build Coastguard Worker      * that the caller is free to immediately close or otherwise reuse the
474*0e209d39SAndroid Build Coastguard Worker      * Utext that was passed as a parameter, but that the underlying text itself
475*0e209d39SAndroid Build Coastguard Worker      * must not be altered while being referenced by the break iterator.
476*0e209d39SAndroid Build Coastguard Worker      *
477*0e209d39SAndroid Build Coastguard Worker      * @param text    The UText used to change the text.
478*0e209d39SAndroid Build Coastguard Worker      * @param status  Receives any error codes.
479*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 3.4
480*0e209d39SAndroid Build Coastguard Worker      */
481*0e209d39SAndroid Build Coastguard Worker     virtual void  setText(UText *text, UErrorCode &status) override;
482*0e209d39SAndroid Build Coastguard Worker 
483*0e209d39SAndroid Build Coastguard Worker     /**
484*0e209d39SAndroid Build Coastguard Worker      * Sets the current iteration position to the beginning of the text, position zero.
485*0e209d39SAndroid Build Coastguard Worker      * @return The offset of the beginning of the text, zero.
486*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
487*0e209d39SAndroid Build Coastguard Worker      */
488*0e209d39SAndroid Build Coastguard Worker     virtual int32_t first() override;
489*0e209d39SAndroid Build Coastguard Worker 
490*0e209d39SAndroid Build Coastguard Worker     /**
491*0e209d39SAndroid Build Coastguard Worker      * Sets the current iteration position to the end of the text.
492*0e209d39SAndroid Build Coastguard Worker      * @return The text's past-the-end offset.
493*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
494*0e209d39SAndroid Build Coastguard Worker      */
495*0e209d39SAndroid Build Coastguard Worker     virtual int32_t last() override;
496*0e209d39SAndroid Build Coastguard Worker 
497*0e209d39SAndroid Build Coastguard Worker     /**
498*0e209d39SAndroid Build Coastguard Worker      * Advances the iterator either forward or backward the specified number of steps.
499*0e209d39SAndroid Build Coastguard Worker      * Negative values move backward, and positive values move forward.  This is
500*0e209d39SAndroid Build Coastguard Worker      * equivalent to repeatedly calling next() or previous().
501*0e209d39SAndroid Build Coastguard Worker      * @param n The number of steps to move.  The sign indicates the direction
502*0e209d39SAndroid Build Coastguard Worker      * (negative is backwards, and positive is forwards).
503*0e209d39SAndroid Build Coastguard Worker      * @return The character offset of the boundary position n boundaries away from
504*0e209d39SAndroid Build Coastguard Worker      * the current one.
505*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
506*0e209d39SAndroid Build Coastguard Worker      */
507*0e209d39SAndroid Build Coastguard Worker     virtual int32_t next(int32_t n) override;
508*0e209d39SAndroid Build Coastguard Worker 
509*0e209d39SAndroid Build Coastguard Worker     /**
510*0e209d39SAndroid Build Coastguard Worker      * Advances the iterator to the next boundary position.
511*0e209d39SAndroid Build Coastguard Worker      * @return The position of the first boundary after this one.
512*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
513*0e209d39SAndroid Build Coastguard Worker      */
514*0e209d39SAndroid Build Coastguard Worker     virtual int32_t next() override;
515*0e209d39SAndroid Build Coastguard Worker 
516*0e209d39SAndroid Build Coastguard Worker     /**
517*0e209d39SAndroid Build Coastguard Worker      * Moves the iterator backwards, to the last boundary preceding this one.
518*0e209d39SAndroid Build Coastguard Worker      * @return The position of the last boundary position preceding this one.
519*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
520*0e209d39SAndroid Build Coastguard Worker      */
521*0e209d39SAndroid Build Coastguard Worker     virtual int32_t previous() override;
522*0e209d39SAndroid Build Coastguard Worker 
523*0e209d39SAndroid Build Coastguard Worker     /**
524*0e209d39SAndroid Build Coastguard Worker      * Sets the iterator to refer to the first boundary position following
525*0e209d39SAndroid Build Coastguard Worker      * the specified position.
526*0e209d39SAndroid Build Coastguard Worker      * @param offset The position from which to begin searching for a break position.
527*0e209d39SAndroid Build Coastguard Worker      * @return The position of the first break after the current position.
528*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
529*0e209d39SAndroid Build Coastguard Worker      */
530*0e209d39SAndroid Build Coastguard Worker     virtual int32_t following(int32_t offset) override;
531*0e209d39SAndroid Build Coastguard Worker 
532*0e209d39SAndroid Build Coastguard Worker     /**
533*0e209d39SAndroid Build Coastguard Worker      * Sets the iterator to refer to the last boundary position before the
534*0e209d39SAndroid Build Coastguard Worker      * specified position.
535*0e209d39SAndroid Build Coastguard Worker      * @param offset The position to begin searching for a break from.
536*0e209d39SAndroid Build Coastguard Worker      * @return The position of the last boundary before the starting position.
537*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
538*0e209d39SAndroid Build Coastguard Worker      */
539*0e209d39SAndroid Build Coastguard Worker     virtual int32_t preceding(int32_t offset) override;
540*0e209d39SAndroid Build Coastguard Worker 
541*0e209d39SAndroid Build Coastguard Worker     /**
542*0e209d39SAndroid Build Coastguard Worker      * Returns true if the specified position is a boundary position.  As a side
543*0e209d39SAndroid Build Coastguard Worker      * effect, leaves the iterator pointing to the first boundary position at
544*0e209d39SAndroid Build Coastguard Worker      * or after "offset".
545*0e209d39SAndroid Build Coastguard Worker      * @param offset the offset to check.
546*0e209d39SAndroid Build Coastguard Worker      * @return True if "offset" is a boundary position.
547*0e209d39SAndroid Build Coastguard Worker      *  @stable ICU 2.0
548*0e209d39SAndroid Build Coastguard Worker      */
549*0e209d39SAndroid Build Coastguard Worker     virtual UBool isBoundary(int32_t offset) override;
550*0e209d39SAndroid Build Coastguard Worker 
551*0e209d39SAndroid Build Coastguard Worker     /**
552*0e209d39SAndroid Build Coastguard Worker      * Returns the current iteration position. Note that UBRK_DONE is never
553*0e209d39SAndroid Build Coastguard Worker      * returned from this function; if iteration has run to the end of a
554*0e209d39SAndroid Build Coastguard Worker      * string, current() will return the length of the string while
555*0e209d39SAndroid Build Coastguard Worker      * next() will return UBRK_DONE).
556*0e209d39SAndroid Build Coastguard Worker      * @return The current iteration position.
557*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.0
558*0e209d39SAndroid Build Coastguard Worker      */
559*0e209d39SAndroid Build Coastguard Worker     virtual int32_t current() const override;
560*0e209d39SAndroid Build Coastguard Worker 
561*0e209d39SAndroid Build Coastguard Worker     /**
562*0e209d39SAndroid Build Coastguard Worker      * Return the status tag from the break rule that determined the boundary at
563*0e209d39SAndroid Build Coastguard Worker      * the current iteration position.  For break rules that do not specify a
564*0e209d39SAndroid Build Coastguard Worker      * status, a default value of 0 is returned.  If more than one break rule
565*0e209d39SAndroid Build Coastguard Worker      * would cause a boundary to be located at some position in the text,
566*0e209d39SAndroid Build Coastguard Worker      * the numerically largest of the applicable status values is returned.
567*0e209d39SAndroid Build Coastguard Worker      * <p>
568*0e209d39SAndroid Build Coastguard Worker      * Of the standard types of ICU break iterators, only word break and
569*0e209d39SAndroid Build Coastguard Worker      * line break provide status values.  The values are defined in
570*0e209d39SAndroid Build Coastguard Worker      * the header file ubrk.h.  For Word breaks, the status allows distinguishing between words
571*0e209d39SAndroid Build Coastguard Worker      * that contain alphabetic letters, "words" that appear to be numbers,
572*0e209d39SAndroid Build Coastguard Worker      * punctuation and spaces, words containing ideographic characters, and
573*0e209d39SAndroid Build Coastguard Worker      * more.  For Line Break, the status distinguishes between hard (mandatory) breaks
574*0e209d39SAndroid Build Coastguard Worker      * and soft (potential) break positions.
575*0e209d39SAndroid Build Coastguard Worker      * <p>
576*0e209d39SAndroid Build Coastguard Worker      * <code>getRuleStatus()</code> can be called after obtaining a boundary
577*0e209d39SAndroid Build Coastguard Worker      * position from <code>next()</code>, <code>previous()</code>, or
578*0e209d39SAndroid Build Coastguard Worker      * any other break iterator functions that returns a boundary position.
579*0e209d39SAndroid Build Coastguard Worker      * <p>
580*0e209d39SAndroid Build Coastguard Worker      * Note that <code>getRuleStatus()</code> returns the value corresponding to
581*0e209d39SAndroid Build Coastguard Worker      * <code>current()</code> index even after <code>next()</code> has returned DONE.
582*0e209d39SAndroid Build Coastguard Worker      * <p>
583*0e209d39SAndroid Build Coastguard Worker      * When creating custom break rules, one is free to define whatever
584*0e209d39SAndroid Build Coastguard Worker      * status values may be convenient for the application.
585*0e209d39SAndroid Build Coastguard Worker      * <p>
586*0e209d39SAndroid Build Coastguard Worker      * @return the status from the break rule that determined the boundary
587*0e209d39SAndroid Build Coastguard Worker      * at the current iteration position.
588*0e209d39SAndroid Build Coastguard Worker      *
589*0e209d39SAndroid Build Coastguard Worker      * @see UWordBreak
590*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.2
591*0e209d39SAndroid Build Coastguard Worker      */
592*0e209d39SAndroid Build Coastguard Worker     virtual int32_t getRuleStatus() const override;
593*0e209d39SAndroid Build Coastguard Worker 
594*0e209d39SAndroid Build Coastguard Worker    /**
595*0e209d39SAndroid Build Coastguard Worker     * Get the status (tag) values from the break rule(s) that determined the boundary
596*0e209d39SAndroid Build Coastguard Worker     * at the current iteration position.
597*0e209d39SAndroid Build Coastguard Worker     * <p>
598*0e209d39SAndroid Build Coastguard Worker     * The returned status value(s) are stored into an array provided by the caller.
599*0e209d39SAndroid Build Coastguard Worker     * The values are stored in sorted (ascending) order.
600*0e209d39SAndroid Build Coastguard Worker     * If the capacity of the output array is insufficient to hold the data,
601*0e209d39SAndroid Build Coastguard Worker     *  the output will be truncated to the available length, and a
602*0e209d39SAndroid Build Coastguard Worker     *  U_BUFFER_OVERFLOW_ERROR will be signaled.
603*0e209d39SAndroid Build Coastguard Worker     *
604*0e209d39SAndroid Build Coastguard Worker     * @param fillInVec an array to be filled in with the status values.
605*0e209d39SAndroid Build Coastguard Worker     * @param capacity  the length of the supplied vector.  A length of zero causes
606*0e209d39SAndroid Build Coastguard Worker     *                  the function to return the number of status values, in the
607*0e209d39SAndroid Build Coastguard Worker     *                  normal way, without attempting to store any values.
608*0e209d39SAndroid Build Coastguard Worker     * @param status    receives error codes.
609*0e209d39SAndroid Build Coastguard Worker     * @return          The number of rule status values from the rules that determined
610*0e209d39SAndroid Build Coastguard Worker     *                  the boundary at the current iteration position.
611*0e209d39SAndroid Build Coastguard Worker     *                  In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
612*0e209d39SAndroid Build Coastguard Worker     *                  is the total number of status values that were available,
613*0e209d39SAndroid Build Coastguard Worker     *                  not the reduced number that were actually returned.
614*0e209d39SAndroid Build Coastguard Worker     * @see getRuleStatus
615*0e209d39SAndroid Build Coastguard Worker     * @stable ICU 3.0
616*0e209d39SAndroid Build Coastguard Worker     */
617*0e209d39SAndroid Build Coastguard Worker     virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override;
618*0e209d39SAndroid Build Coastguard Worker 
619*0e209d39SAndroid Build Coastguard Worker     /**
620*0e209d39SAndroid Build Coastguard Worker      * Returns a unique class ID POLYMORPHICALLY.  Pure virtual override.
621*0e209d39SAndroid Build Coastguard Worker      * This method is to implement a simple version of RTTI, since not all
622*0e209d39SAndroid Build Coastguard Worker      * C++ compilers support genuine RTTI.  Polymorphic operator==() and
623*0e209d39SAndroid Build Coastguard Worker      * clone() methods call this method.
624*0e209d39SAndroid Build Coastguard Worker      *
625*0e209d39SAndroid Build Coastguard Worker      * @return          The class ID for this object. All objects of a
626*0e209d39SAndroid Build Coastguard Worker      *                  given class have the same class ID.  Objects of
627*0e209d39SAndroid Build Coastguard Worker      *                  other classes have different class IDs.
628*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.0
629*0e209d39SAndroid Build Coastguard Worker      */
630*0e209d39SAndroid Build Coastguard Worker     virtual UClassID getDynamicClassID() const override;
631*0e209d39SAndroid Build Coastguard Worker 
632*0e209d39SAndroid Build Coastguard Worker     /**
633*0e209d39SAndroid Build Coastguard Worker      * Returns the class ID for this class.  This is useful only for
634*0e209d39SAndroid Build Coastguard Worker      * comparing to a return value from getDynamicClassID().  For example:
635*0e209d39SAndroid Build Coastguard Worker      *
636*0e209d39SAndroid Build Coastguard Worker      *      Base* polymorphic_pointer = createPolymorphicObject();
637*0e209d39SAndroid Build Coastguard Worker      *      if (polymorphic_pointer->getDynamicClassID() ==
638*0e209d39SAndroid Build Coastguard Worker      *          Derived::getStaticClassID()) ...
639*0e209d39SAndroid Build Coastguard Worker      *
640*0e209d39SAndroid Build Coastguard Worker      * @return          The class ID for all objects of this class.
641*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 2.0
642*0e209d39SAndroid Build Coastguard Worker      */
643*0e209d39SAndroid Build Coastguard Worker     static UClassID U_EXPORT2 getStaticClassID();
644*0e209d39SAndroid Build Coastguard Worker 
645*0e209d39SAndroid Build Coastguard Worker #ifndef U_FORCE_HIDE_DEPRECATED_API
646*0e209d39SAndroid Build Coastguard Worker     /**
647*0e209d39SAndroid Build Coastguard Worker      * Deprecated functionality. Use clone() instead.
648*0e209d39SAndroid Build Coastguard Worker      *
649*0e209d39SAndroid Build Coastguard Worker      * Create a clone (copy) of this break iterator in memory provided
650*0e209d39SAndroid Build Coastguard Worker      *  by the caller.  The idea is to increase performance by avoiding
651*0e209d39SAndroid Build Coastguard Worker      *  a storage allocation.  Use of this function is NOT RECOMMENDED.
652*0e209d39SAndroid Build Coastguard Worker      *  Performance gains are minimal, and correct buffer management is
653*0e209d39SAndroid Build Coastguard Worker      *  tricky.  Use clone() instead.
654*0e209d39SAndroid Build Coastguard Worker      *
655*0e209d39SAndroid Build Coastguard Worker      * @param stackBuffer  The pointer to the memory into which the cloned object
656*0e209d39SAndroid Build Coastguard Worker      *                     should be placed.  If nullptr,  allocate heap memory
657*0e209d39SAndroid Build Coastguard Worker      *                     for the cloned object.
658*0e209d39SAndroid Build Coastguard Worker      * @param BufferSize   The size of the buffer.  If zero, return the required
659*0e209d39SAndroid Build Coastguard Worker      *                     buffer size, but do not clone the object.  If the
660*0e209d39SAndroid Build Coastguard Worker      *                     size was too small (but not zero), allocate heap
661*0e209d39SAndroid Build Coastguard Worker      *                     storage for the cloned object.
662*0e209d39SAndroid Build Coastguard Worker      *
663*0e209d39SAndroid Build Coastguard Worker      * @param status       Error status.  U_SAFECLONE_ALLOCATED_WARNING will be
664*0e209d39SAndroid Build Coastguard Worker      *                     returned if the provided buffer was too small, and
665*0e209d39SAndroid Build Coastguard Worker      *                     the clone was therefore put on the heap.
666*0e209d39SAndroid Build Coastguard Worker      *
667*0e209d39SAndroid Build Coastguard Worker      * @return  Pointer to the clone object.  This may differ from the stackBuffer
668*0e209d39SAndroid Build Coastguard Worker      *          address if the byte alignment of the stack buffer was not suitable
669*0e209d39SAndroid Build Coastguard Worker      *          or if the stackBuffer was too small to hold the clone.
670*0e209d39SAndroid Build Coastguard Worker      * @deprecated ICU 52. Use clone() instead.
671*0e209d39SAndroid Build Coastguard Worker      */
672*0e209d39SAndroid Build Coastguard Worker     virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer,
673*0e209d39SAndroid Build Coastguard Worker                                                       int32_t &BufferSize,
674*0e209d39SAndroid Build Coastguard Worker                                                       UErrorCode &status) override;
675*0e209d39SAndroid Build Coastguard Worker #endif  // U_FORCE_HIDE_DEPRECATED_API
676*0e209d39SAndroid Build Coastguard Worker 
677*0e209d39SAndroid Build Coastguard Worker     /**
678*0e209d39SAndroid Build Coastguard Worker      * Return the binary form of compiled break rules,
679*0e209d39SAndroid Build Coastguard Worker      * which can then be used to create a new break iterator at some
680*0e209d39SAndroid Build Coastguard Worker      * time in the future.  Creating a break iterator from pre-compiled rules
681*0e209d39SAndroid Build Coastguard Worker      * is much faster than building one from the source form of the
682*0e209d39SAndroid Build Coastguard Worker      * break rules.
683*0e209d39SAndroid Build Coastguard Worker      *
684*0e209d39SAndroid Build Coastguard Worker      * The binary data can only be used with the same version of ICU
685*0e209d39SAndroid Build Coastguard Worker      *  and on the same platform type (processor endian-ness)
686*0e209d39SAndroid Build Coastguard Worker      *
687*0e209d39SAndroid Build Coastguard Worker      * @param length Returns the length of the binary data.  (Out parameter.)
688*0e209d39SAndroid Build Coastguard Worker      *
689*0e209d39SAndroid Build Coastguard Worker      * @return   A pointer to the binary (compiled) rule data.  The storage
690*0e209d39SAndroid Build Coastguard Worker      *           belongs to the RulesBasedBreakIterator object, not the
691*0e209d39SAndroid Build Coastguard Worker      *           caller, and must not be modified or deleted.
692*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 4.8
693*0e209d39SAndroid Build Coastguard Worker      */
694*0e209d39SAndroid Build Coastguard Worker     virtual const uint8_t *getBinaryRules(uint32_t &length);
695*0e209d39SAndroid Build Coastguard Worker 
696*0e209d39SAndroid Build Coastguard Worker     /**
697*0e209d39SAndroid Build Coastguard Worker      *  Set the subject text string upon which the break iterator is operating
698*0e209d39SAndroid Build Coastguard Worker      *  without changing any other aspect of the matching state.
699*0e209d39SAndroid Build Coastguard Worker      *  The new and previous text strings must have the same content.
700*0e209d39SAndroid Build Coastguard Worker      *
701*0e209d39SAndroid Build Coastguard Worker      *  This function is intended for use in environments where ICU is operating on
702*0e209d39SAndroid Build Coastguard Worker      *  strings that may move around in memory.  It provides a mechanism for notifying
703*0e209d39SAndroid Build Coastguard Worker      *  ICU that the string has been relocated, and providing a new UText to access the
704*0e209d39SAndroid Build Coastguard Worker      *  string in its new position.
705*0e209d39SAndroid Build Coastguard Worker      *
706*0e209d39SAndroid Build Coastguard Worker      *  Note that the break iterator implementation never copies the underlying text
707*0e209d39SAndroid Build Coastguard Worker      *  of a string being processed, but always operates directly on the original text
708*0e209d39SAndroid Build Coastguard Worker      *  provided by the user. Refreshing simply drops the references to the old text
709*0e209d39SAndroid Build Coastguard Worker      *  and replaces them with references to the new.
710*0e209d39SAndroid Build Coastguard Worker      *
711*0e209d39SAndroid Build Coastguard Worker      *  Caution:  this function is normally used only by very specialized,
712*0e209d39SAndroid Build Coastguard Worker      *  system-level code.  One example use case is with garbage collection that moves
713*0e209d39SAndroid Build Coastguard Worker      *  the text in memory.
714*0e209d39SAndroid Build Coastguard Worker      *
715*0e209d39SAndroid Build Coastguard Worker      * @param input      The new (moved) text string.
716*0e209d39SAndroid Build Coastguard Worker      * @param status     Receives errors detected by this function.
717*0e209d39SAndroid Build Coastguard Worker      * @return           *this
718*0e209d39SAndroid Build Coastguard Worker      *
719*0e209d39SAndroid Build Coastguard Worker      * @stable ICU 49
720*0e209d39SAndroid Build Coastguard Worker      */
721*0e209d39SAndroid Build Coastguard Worker     virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status) override;
722*0e209d39SAndroid Build Coastguard Worker 
723*0e209d39SAndroid Build Coastguard Worker 
724*0e209d39SAndroid Build Coastguard Worker private:
725*0e209d39SAndroid Build Coastguard Worker     //=======================================================================
726*0e209d39SAndroid Build Coastguard Worker     // implementation
727*0e209d39SAndroid Build Coastguard Worker     //=======================================================================
728*0e209d39SAndroid Build Coastguard Worker     /**
729*0e209d39SAndroid Build Coastguard Worker      * Iterate backwards from an arbitrary position in the input text using the
730*0e209d39SAndroid Build Coastguard Worker      * synthesized Safe Reverse rules.
731*0e209d39SAndroid Build Coastguard Worker      * This locates a "Safe Position" from which the forward break rules
732*0e209d39SAndroid Build Coastguard Worker      * will operate correctly. A Safe Position is not necessarily a boundary itself.
733*0e209d39SAndroid Build Coastguard Worker      *
734*0e209d39SAndroid Build Coastguard Worker      * @param fromPosition the position in the input text to begin the iteration.
735*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
736*0e209d39SAndroid Build Coastguard Worker      */
737*0e209d39SAndroid Build Coastguard Worker     int32_t handleSafePrevious(int32_t fromPosition);
738*0e209d39SAndroid Build Coastguard Worker 
739*0e209d39SAndroid Build Coastguard Worker     /**
740*0e209d39SAndroid Build Coastguard Worker      * Find a rule-based boundary by running the state machine.
741*0e209d39SAndroid Build Coastguard Worker      * Input
742*0e209d39SAndroid Build Coastguard Worker      *    fPosition, the position in the text to begin from.
743*0e209d39SAndroid Build Coastguard Worker      * Output
744*0e209d39SAndroid Build Coastguard Worker      *    fPosition:           the boundary following the starting position.
745*0e209d39SAndroid Build Coastguard Worker      *    fDictionaryCharCount the number of dictionary characters encountered.
746*0e209d39SAndroid Build Coastguard Worker      *                         If > 0, the segment will be further subdivided
747*0e209d39SAndroid Build Coastguard Worker      *    fRuleStatusIndex     Info from the state table indicating which rules caused the boundary.
748*0e209d39SAndroid Build Coastguard Worker      *
749*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
750*0e209d39SAndroid Build Coastguard Worker      */
751*0e209d39SAndroid Build Coastguard Worker     int32_t handleNext();
752*0e209d39SAndroid Build Coastguard Worker 
753*0e209d39SAndroid Build Coastguard Worker     /*
754*0e209d39SAndroid Build Coastguard Worker      * Templatized version of handleNext() and handleSafePrevious().
755*0e209d39SAndroid Build Coastguard Worker      *
756*0e209d39SAndroid Build Coastguard Worker      * There will be exactly four instantiations, two each for 8 and 16 bit tables,
757*0e209d39SAndroid Build Coastguard Worker      * two each for 8 and 16 bit trie.
758*0e209d39SAndroid Build Coastguard Worker      * Having separate instantiations for the table types keeps conditional tests of
759*0e209d39SAndroid Build Coastguard Worker      * the table type out of the inner loops, at the expense of replicated code.
760*0e209d39SAndroid Build Coastguard Worker      *
761*0e209d39SAndroid Build Coastguard Worker      * The template parameter for the Trie access function is a value, not a type.
762*0e209d39SAndroid Build Coastguard Worker      * Doing it this way, the compiler will inline the Trie function in the
763*0e209d39SAndroid Build Coastguard Worker      * expanded functions. (Both the 8 and 16 bit access functions have the same type
764*0e209d39SAndroid Build Coastguard Worker      * signature)
765*0e209d39SAndroid Build Coastguard Worker      */
766*0e209d39SAndroid Build Coastguard Worker 
767*0e209d39SAndroid Build Coastguard Worker     typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32);
768*0e209d39SAndroid Build Coastguard Worker 
769*0e209d39SAndroid Build Coastguard Worker     template<typename RowType, PTrieFunc trieFunc>
770*0e209d39SAndroid Build Coastguard Worker     int32_t handleSafePrevious(int32_t fromPosition);
771*0e209d39SAndroid Build Coastguard Worker 
772*0e209d39SAndroid Build Coastguard Worker     template<typename RowType, PTrieFunc trieFunc>
773*0e209d39SAndroid Build Coastguard Worker     int32_t handleNext();
774*0e209d39SAndroid Build Coastguard Worker 
775*0e209d39SAndroid Build Coastguard Worker 
776*0e209d39SAndroid Build Coastguard Worker     /**
777*0e209d39SAndroid Build Coastguard Worker      * This function returns the appropriate LanguageBreakEngine for a
778*0e209d39SAndroid Build Coastguard Worker      * given character c.
779*0e209d39SAndroid Build Coastguard Worker      * @param c         A character in the dictionary set
780*0e209d39SAndroid Build Coastguard Worker      * @param locale    The locale.
781*0e209d39SAndroid Build Coastguard Worker      * @internal (private)
782*0e209d39SAndroid Build Coastguard Worker      */
783*0e209d39SAndroid Build Coastguard Worker     const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c, const char* locale);
784*0e209d39SAndroid Build Coastguard Worker 
785*0e209d39SAndroid Build Coastguard Worker   public:
786*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API
787*0e209d39SAndroid Build Coastguard Worker     /**
788*0e209d39SAndroid Build Coastguard Worker      *   Debugging function only.
789*0e209d39SAndroid Build Coastguard Worker      *   @internal
790*0e209d39SAndroid Build Coastguard Worker      */
791*0e209d39SAndroid Build Coastguard Worker      void dumpCache();
792*0e209d39SAndroid Build Coastguard Worker 
793*0e209d39SAndroid Build Coastguard Worker     /**
794*0e209d39SAndroid Build Coastguard Worker      * Debugging function only.
795*0e209d39SAndroid Build Coastguard Worker      * @internal
796*0e209d39SAndroid Build Coastguard Worker      */
797*0e209d39SAndroid Build Coastguard Worker     void dumpTables();
798*0e209d39SAndroid Build Coastguard Worker #endif  /* U_HIDE_INTERNAL_API */
799*0e209d39SAndroid Build Coastguard Worker 
800*0e209d39SAndroid Build Coastguard Worker #ifndef U_HIDE_INTERNAL_API
801*0e209d39SAndroid Build Coastguard Worker     /**
802*0e209d39SAndroid Build Coastguard Worker      * Register a new external break engine. The external break engine will be adopted.
803*0e209d39SAndroid Build Coastguard Worker      * Because ICU may choose to cache break engine internally, this must
804*0e209d39SAndroid Build Coastguard Worker      * be called at application startup, prior to any calls to
805*0e209d39SAndroid Build Coastguard Worker      * object methods of RuleBasedBreakIterator to avoid undefined behavior.
806*0e209d39SAndroid Build Coastguard Worker      * @param toAdopt the ExternalBreakEngine instance to be adopted
807*0e209d39SAndroid Build Coastguard Worker      * @param status the in/out status code, no special meanings are assigned
808*0e209d39SAndroid Build Coastguard Worker      * @internal ICU 74 technology preview
809*0e209d39SAndroid Build Coastguard Worker      */
810*0e209d39SAndroid Build Coastguard Worker     static void U_EXPORT2 registerExternalBreakEngine(
811*0e209d39SAndroid Build Coastguard Worker                   ExternalBreakEngine* toAdopt, UErrorCode& status);
812*0e209d39SAndroid Build Coastguard Worker #endif  /* U_HIDE_INTERNAL_API */
813*0e209d39SAndroid Build Coastguard Worker 
814*0e209d39SAndroid Build Coastguard Worker };
815*0e209d39SAndroid Build Coastguard Worker 
816*0e209d39SAndroid Build Coastguard Worker 
817*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
818*0e209d39SAndroid Build Coastguard Worker 
819*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
820*0e209d39SAndroid Build Coastguard Worker 
821*0e209d39SAndroid Build Coastguard Worker #endif /* U_SHOW_CPLUSPLUS_API */
822*0e209d39SAndroid Build Coastguard Worker 
823*0e209d39SAndroid Build Coastguard Worker #endif
824