xref: /aosp_15_r20/external/icu/libicu/cts_headers/dictbe.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /**
4*0e209d39SAndroid Build Coastguard Worker  *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker  * Copyright (C) 2006-2014, International Business Machines Corporation   *
6*0e209d39SAndroid Build Coastguard Worker  * and others. All Rights Reserved.                                            *
7*0e209d39SAndroid Build Coastguard Worker  *******************************************************************************
8*0e209d39SAndroid Build Coastguard Worker  */
9*0e209d39SAndroid Build Coastguard Worker 
10*0e209d39SAndroid Build Coastguard Worker #ifndef DICTBE_H
11*0e209d39SAndroid Build Coastguard Worker #define DICTBE_H
12*0e209d39SAndroid Build Coastguard Worker 
13*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
14*0e209d39SAndroid Build Coastguard Worker #include "unicode/uniset.h"
15*0e209d39SAndroid Build Coastguard Worker #include "unicode/utext.h"
16*0e209d39SAndroid Build Coastguard Worker 
17*0e209d39SAndroid Build Coastguard Worker #include "brkeng.h"
18*0e209d39SAndroid Build Coastguard Worker #include "hash.h"
19*0e209d39SAndroid Build Coastguard Worker #include "mlbe.h"
20*0e209d39SAndroid Build Coastguard Worker #include "uvectr32.h"
21*0e209d39SAndroid Build Coastguard Worker 
22*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
23*0e209d39SAndroid Build Coastguard Worker 
24*0e209d39SAndroid Build Coastguard Worker class DictionaryMatcher;
25*0e209d39SAndroid Build Coastguard Worker class MlBreakEngine;
26*0e209d39SAndroid Build Coastguard Worker class Normalizer2;
27*0e209d39SAndroid Build Coastguard Worker 
28*0e209d39SAndroid Build Coastguard Worker /*******************************************************************
29*0e209d39SAndroid Build Coastguard Worker  * DictionaryBreakEngine
30*0e209d39SAndroid Build Coastguard Worker  */
31*0e209d39SAndroid Build Coastguard Worker 
32*0e209d39SAndroid Build Coastguard Worker /**
33*0e209d39SAndroid Build Coastguard Worker  * <p>DictionaryBreakEngine is a kind of LanguageBreakEngine that uses a
34*0e209d39SAndroid Build Coastguard Worker  * dictionary to determine language-specific breaks.</p>
35*0e209d39SAndroid Build Coastguard Worker  *
36*0e209d39SAndroid Build Coastguard Worker  * <p>After it is constructed a DictionaryBreakEngine may be shared between
37*0e209d39SAndroid Build Coastguard Worker  * threads without synchronization.</p>
38*0e209d39SAndroid Build Coastguard Worker  */
39*0e209d39SAndroid Build Coastguard Worker class DictionaryBreakEngine : public LanguageBreakEngine {
40*0e209d39SAndroid Build Coastguard Worker  private:
41*0e209d39SAndroid Build Coastguard Worker     /**
42*0e209d39SAndroid Build Coastguard Worker      * The set of characters handled by this engine
43*0e209d39SAndroid Build Coastguard Worker      * @internal
44*0e209d39SAndroid Build Coastguard Worker      */
45*0e209d39SAndroid Build Coastguard Worker 
46*0e209d39SAndroid Build Coastguard Worker   UnicodeSet    fSet;
47*0e209d39SAndroid Build Coastguard Worker 
48*0e209d39SAndroid Build Coastguard Worker  public:
49*0e209d39SAndroid Build Coastguard Worker 
50*0e209d39SAndroid Build Coastguard Worker   /**
51*0e209d39SAndroid Build Coastguard Worker    * <p>Constructor </p>
52*0e209d39SAndroid Build Coastguard Worker    */
53*0e209d39SAndroid Build Coastguard Worker   DictionaryBreakEngine();
54*0e209d39SAndroid Build Coastguard Worker 
55*0e209d39SAndroid Build Coastguard Worker   /**
56*0e209d39SAndroid Build Coastguard Worker    * <p>Virtual destructor.</p>
57*0e209d39SAndroid Build Coastguard Worker    */
58*0e209d39SAndroid Build Coastguard Worker   virtual ~DictionaryBreakEngine();
59*0e209d39SAndroid Build Coastguard Worker 
60*0e209d39SAndroid Build Coastguard Worker   /**
61*0e209d39SAndroid Build Coastguard Worker    * <p>Indicate whether this engine handles a particular character for
62*0e209d39SAndroid Build Coastguard Worker    * a particular kind of break.</p>
63*0e209d39SAndroid Build Coastguard Worker    *
64*0e209d39SAndroid Build Coastguard Worker    * @param c A character which begins a run that the engine might handle
65*0e209d39SAndroid Build Coastguard Worker    * @param locale The locale.
66*0e209d39SAndroid Build Coastguard Worker    * @return true if this engine handles the particular character and break
67*0e209d39SAndroid Build Coastguard Worker    * type.
68*0e209d39SAndroid Build Coastguard Worker    */
69*0e209d39SAndroid Build Coastguard Worker   virtual UBool handles(UChar32 c, const char* locale) const override;
70*0e209d39SAndroid Build Coastguard Worker 
71*0e209d39SAndroid Build Coastguard Worker   /**
72*0e209d39SAndroid Build Coastguard Worker    * <p>Find any breaks within a run in the supplied text.</p>
73*0e209d39SAndroid Build Coastguard Worker    *
74*0e209d39SAndroid Build Coastguard Worker    * @param text A UText representing the text. The iterator is left at
75*0e209d39SAndroid Build Coastguard Worker    * the end of the run of characters which the engine is capable of handling
76*0e209d39SAndroid Build Coastguard Worker    * that starts from the first character in the range.
77*0e209d39SAndroid Build Coastguard Worker    * @param startPos The start of the run within the supplied text.
78*0e209d39SAndroid Build Coastguard Worker    * @param endPos The end of the run within the supplied text.
79*0e209d39SAndroid Build Coastguard Worker    * @param foundBreaks vector of int32_t to receive the break positions
80*0e209d39SAndroid Build Coastguard Worker    * @param status Information on any errors encountered.
81*0e209d39SAndroid Build Coastguard Worker    * @return The number of breaks found.
82*0e209d39SAndroid Build Coastguard Worker    */
83*0e209d39SAndroid Build Coastguard Worker   virtual int32_t findBreaks( UText *text,
84*0e209d39SAndroid Build Coastguard Worker                               int32_t startPos,
85*0e209d39SAndroid Build Coastguard Worker                               int32_t endPos,
86*0e209d39SAndroid Build Coastguard Worker                               UVector32 &foundBreaks,
87*0e209d39SAndroid Build Coastguard Worker                               UBool isPhraseBreaking,
88*0e209d39SAndroid Build Coastguard Worker                               UErrorCode& status ) const override;
89*0e209d39SAndroid Build Coastguard Worker 
90*0e209d39SAndroid Build Coastguard Worker  protected:
91*0e209d39SAndroid Build Coastguard Worker 
92*0e209d39SAndroid Build Coastguard Worker  /**
93*0e209d39SAndroid Build Coastguard Worker   * <p>Set the character set handled by this engine.</p>
94*0e209d39SAndroid Build Coastguard Worker   *
95*0e209d39SAndroid Build Coastguard Worker   * @param set A UnicodeSet of the set of characters handled by the engine
96*0e209d39SAndroid Build Coastguard Worker   */
97*0e209d39SAndroid Build Coastguard Worker   virtual void setCharacters( const UnicodeSet &set );
98*0e209d39SAndroid Build Coastguard Worker 
99*0e209d39SAndroid Build Coastguard Worker  /**
100*0e209d39SAndroid Build Coastguard Worker   * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
101*0e209d39SAndroid Build Coastguard Worker   *
102*0e209d39SAndroid Build Coastguard Worker   * @param text A UText representing the text
103*0e209d39SAndroid Build Coastguard Worker   * @param rangeStart The start of the range of dictionary characters
104*0e209d39SAndroid Build Coastguard Worker   * @param rangeEnd The end of the range of dictionary characters
105*0e209d39SAndroid Build Coastguard Worker   * @param foundBreaks Output of C array of int32_t break positions, or 0
106*0e209d39SAndroid Build Coastguard Worker   * @param status Information on any errors encountered.
107*0e209d39SAndroid Build Coastguard Worker   * @return The number of breaks found
108*0e209d39SAndroid Build Coastguard Worker   */
109*0e209d39SAndroid Build Coastguard Worker   virtual int32_t divideUpDictionaryRange( UText *text,
110*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeStart,
111*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeEnd,
112*0e209d39SAndroid Build Coastguard Worker                                            UVector32 &foundBreaks,
113*0e209d39SAndroid Build Coastguard Worker                                            UBool isPhraseBreaking,
114*0e209d39SAndroid Build Coastguard Worker                                            UErrorCode& status) const = 0;
115*0e209d39SAndroid Build Coastguard Worker 
116*0e209d39SAndroid Build Coastguard Worker };
117*0e209d39SAndroid Build Coastguard Worker 
118*0e209d39SAndroid Build Coastguard Worker /*******************************************************************
119*0e209d39SAndroid Build Coastguard Worker  * ThaiBreakEngine
120*0e209d39SAndroid Build Coastguard Worker  */
121*0e209d39SAndroid Build Coastguard Worker 
122*0e209d39SAndroid Build Coastguard Worker /**
123*0e209d39SAndroid Build Coastguard Worker  * <p>ThaiBreakEngine is a kind of DictionaryBreakEngine that uses a
124*0e209d39SAndroid Build Coastguard Worker  * dictionary and heuristics to determine Thai-specific breaks.</p>
125*0e209d39SAndroid Build Coastguard Worker  *
126*0e209d39SAndroid Build Coastguard Worker  * <p>After it is constructed a ThaiBreakEngine may be shared between
127*0e209d39SAndroid Build Coastguard Worker  * threads without synchronization.</p>
128*0e209d39SAndroid Build Coastguard Worker  */
129*0e209d39SAndroid Build Coastguard Worker class ThaiBreakEngine : public DictionaryBreakEngine {
130*0e209d39SAndroid Build Coastguard Worker  private:
131*0e209d39SAndroid Build Coastguard Worker     /**
132*0e209d39SAndroid Build Coastguard Worker      * The set of characters handled by this engine
133*0e209d39SAndroid Build Coastguard Worker      * @internal
134*0e209d39SAndroid Build Coastguard Worker      */
135*0e209d39SAndroid Build Coastguard Worker 
136*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fEndWordSet;
137*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fBeginWordSet;
138*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fSuffixSet;
139*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fMarkSet;
140*0e209d39SAndroid Build Coastguard Worker   DictionaryMatcher  *fDictionary;
141*0e209d39SAndroid Build Coastguard Worker 
142*0e209d39SAndroid Build Coastguard Worker  public:
143*0e209d39SAndroid Build Coastguard Worker 
144*0e209d39SAndroid Build Coastguard Worker   /**
145*0e209d39SAndroid Build Coastguard Worker    * <p>Default constructor.</p>
146*0e209d39SAndroid Build Coastguard Worker    *
147*0e209d39SAndroid Build Coastguard Worker    * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
148*0e209d39SAndroid Build Coastguard Worker    * engine is deleted.
149*0e209d39SAndroid Build Coastguard Worker    */
150*0e209d39SAndroid Build Coastguard Worker   ThaiBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
151*0e209d39SAndroid Build Coastguard Worker 
152*0e209d39SAndroid Build Coastguard Worker   /**
153*0e209d39SAndroid Build Coastguard Worker    * <p>Virtual destructor.</p>
154*0e209d39SAndroid Build Coastguard Worker    */
155*0e209d39SAndroid Build Coastguard Worker   virtual ~ThaiBreakEngine();
156*0e209d39SAndroid Build Coastguard Worker 
157*0e209d39SAndroid Build Coastguard Worker  protected:
158*0e209d39SAndroid Build Coastguard Worker  /**
159*0e209d39SAndroid Build Coastguard Worker   * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
160*0e209d39SAndroid Build Coastguard Worker   *
161*0e209d39SAndroid Build Coastguard Worker   * @param text A UText representing the text
162*0e209d39SAndroid Build Coastguard Worker   * @param rangeStart The start of the range of dictionary characters
163*0e209d39SAndroid Build Coastguard Worker   * @param rangeEnd The end of the range of dictionary characters
164*0e209d39SAndroid Build Coastguard Worker   * @param foundBreaks Output of C array of int32_t break positions, or 0
165*0e209d39SAndroid Build Coastguard Worker   * @param status Information on any errors encountered.
166*0e209d39SAndroid Build Coastguard Worker   * @return The number of breaks found
167*0e209d39SAndroid Build Coastguard Worker   */
168*0e209d39SAndroid Build Coastguard Worker   virtual int32_t divideUpDictionaryRange( UText *text,
169*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeStart,
170*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeEnd,
171*0e209d39SAndroid Build Coastguard Worker                                            UVector32 &foundBreaks,
172*0e209d39SAndroid Build Coastguard Worker                                            UBool isPhraseBreaking,
173*0e209d39SAndroid Build Coastguard Worker                                            UErrorCode& status) const override;
174*0e209d39SAndroid Build Coastguard Worker 
175*0e209d39SAndroid Build Coastguard Worker };
176*0e209d39SAndroid Build Coastguard Worker 
177*0e209d39SAndroid Build Coastguard Worker /*******************************************************************
178*0e209d39SAndroid Build Coastguard Worker  * LaoBreakEngine
179*0e209d39SAndroid Build Coastguard Worker  */
180*0e209d39SAndroid Build Coastguard Worker 
181*0e209d39SAndroid Build Coastguard Worker /**
182*0e209d39SAndroid Build Coastguard Worker  * <p>LaoBreakEngine is a kind of DictionaryBreakEngine that uses a
183*0e209d39SAndroid Build Coastguard Worker  * dictionary and heuristics to determine Lao-specific breaks.</p>
184*0e209d39SAndroid Build Coastguard Worker  *
185*0e209d39SAndroid Build Coastguard Worker  * <p>After it is constructed a LaoBreakEngine may be shared between
186*0e209d39SAndroid Build Coastguard Worker  * threads without synchronization.</p>
187*0e209d39SAndroid Build Coastguard Worker  */
188*0e209d39SAndroid Build Coastguard Worker class LaoBreakEngine : public DictionaryBreakEngine {
189*0e209d39SAndroid Build Coastguard Worker  private:
190*0e209d39SAndroid Build Coastguard Worker     /**
191*0e209d39SAndroid Build Coastguard Worker      * The set of characters handled by this engine
192*0e209d39SAndroid Build Coastguard Worker      * @internal
193*0e209d39SAndroid Build Coastguard Worker      */
194*0e209d39SAndroid Build Coastguard Worker 
195*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fEndWordSet;
196*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fBeginWordSet;
197*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fMarkSet;
198*0e209d39SAndroid Build Coastguard Worker   DictionaryMatcher  *fDictionary;
199*0e209d39SAndroid Build Coastguard Worker 
200*0e209d39SAndroid Build Coastguard Worker  public:
201*0e209d39SAndroid Build Coastguard Worker 
202*0e209d39SAndroid Build Coastguard Worker   /**
203*0e209d39SAndroid Build Coastguard Worker    * <p>Default constructor.</p>
204*0e209d39SAndroid Build Coastguard Worker    *
205*0e209d39SAndroid Build Coastguard Worker    * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
206*0e209d39SAndroid Build Coastguard Worker    * engine is deleted.
207*0e209d39SAndroid Build Coastguard Worker    */
208*0e209d39SAndroid Build Coastguard Worker   LaoBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
209*0e209d39SAndroid Build Coastguard Worker 
210*0e209d39SAndroid Build Coastguard Worker   /**
211*0e209d39SAndroid Build Coastguard Worker    * <p>Virtual destructor.</p>
212*0e209d39SAndroid Build Coastguard Worker    */
213*0e209d39SAndroid Build Coastguard Worker   virtual ~LaoBreakEngine();
214*0e209d39SAndroid Build Coastguard Worker 
215*0e209d39SAndroid Build Coastguard Worker  protected:
216*0e209d39SAndroid Build Coastguard Worker  /**
217*0e209d39SAndroid Build Coastguard Worker   * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
218*0e209d39SAndroid Build Coastguard Worker   *
219*0e209d39SAndroid Build Coastguard Worker   * @param text A UText representing the text
220*0e209d39SAndroid Build Coastguard Worker   * @param rangeStart The start of the range of dictionary characters
221*0e209d39SAndroid Build Coastguard Worker   * @param rangeEnd The end of the range of dictionary characters
222*0e209d39SAndroid Build Coastguard Worker   * @param foundBreaks Output of C array of int32_t break positions, or 0
223*0e209d39SAndroid Build Coastguard Worker   * @param status Information on any errors encountered.
224*0e209d39SAndroid Build Coastguard Worker   * @return The number of breaks found
225*0e209d39SAndroid Build Coastguard Worker   */
226*0e209d39SAndroid Build Coastguard Worker   virtual int32_t divideUpDictionaryRange( UText *text,
227*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeStart,
228*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeEnd,
229*0e209d39SAndroid Build Coastguard Worker                                            UVector32 &foundBreaks,
230*0e209d39SAndroid Build Coastguard Worker                                            UBool isPhraseBreaking,
231*0e209d39SAndroid Build Coastguard Worker                                            UErrorCode& status) const override;
232*0e209d39SAndroid Build Coastguard Worker 
233*0e209d39SAndroid Build Coastguard Worker };
234*0e209d39SAndroid Build Coastguard Worker 
235*0e209d39SAndroid Build Coastguard Worker /*******************************************************************
236*0e209d39SAndroid Build Coastguard Worker  * BurmeseBreakEngine
237*0e209d39SAndroid Build Coastguard Worker  */
238*0e209d39SAndroid Build Coastguard Worker 
239*0e209d39SAndroid Build Coastguard Worker /**
240*0e209d39SAndroid Build Coastguard Worker  * <p>BurmeseBreakEngine is a kind of DictionaryBreakEngine that uses a
241*0e209d39SAndroid Build Coastguard Worker  * DictionaryMatcher and heuristics to determine Burmese-specific breaks.</p>
242*0e209d39SAndroid Build Coastguard Worker  *
243*0e209d39SAndroid Build Coastguard Worker  * <p>After it is constructed a BurmeseBreakEngine may be shared between
244*0e209d39SAndroid Build Coastguard Worker  * threads without synchronization.</p>
245*0e209d39SAndroid Build Coastguard Worker  */
246*0e209d39SAndroid Build Coastguard Worker class BurmeseBreakEngine : public DictionaryBreakEngine {
247*0e209d39SAndroid Build Coastguard Worker  private:
248*0e209d39SAndroid Build Coastguard Worker     /**
249*0e209d39SAndroid Build Coastguard Worker      * The set of characters handled by this engine
250*0e209d39SAndroid Build Coastguard Worker      * @internal
251*0e209d39SAndroid Build Coastguard Worker      */
252*0e209d39SAndroid Build Coastguard Worker 
253*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fEndWordSet;
254*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fBeginWordSet;
255*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fMarkSet;
256*0e209d39SAndroid Build Coastguard Worker   DictionaryMatcher  *fDictionary;
257*0e209d39SAndroid Build Coastguard Worker 
258*0e209d39SAndroid Build Coastguard Worker  public:
259*0e209d39SAndroid Build Coastguard Worker 
260*0e209d39SAndroid Build Coastguard Worker   /**
261*0e209d39SAndroid Build Coastguard Worker    * <p>Default constructor.</p>
262*0e209d39SAndroid Build Coastguard Worker    *
263*0e209d39SAndroid Build Coastguard Worker    * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
264*0e209d39SAndroid Build Coastguard Worker    * engine is deleted.
265*0e209d39SAndroid Build Coastguard Worker    */
266*0e209d39SAndroid Build Coastguard Worker   BurmeseBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
267*0e209d39SAndroid Build Coastguard Worker 
268*0e209d39SAndroid Build Coastguard Worker   /**
269*0e209d39SAndroid Build Coastguard Worker    * <p>Virtual destructor.</p>
270*0e209d39SAndroid Build Coastguard Worker    */
271*0e209d39SAndroid Build Coastguard Worker   virtual ~BurmeseBreakEngine();
272*0e209d39SAndroid Build Coastguard Worker 
273*0e209d39SAndroid Build Coastguard Worker  protected:
274*0e209d39SAndroid Build Coastguard Worker  /**
275*0e209d39SAndroid Build Coastguard Worker   * <p>Divide up a range of known dictionary characters.</p>
276*0e209d39SAndroid Build Coastguard Worker   *
277*0e209d39SAndroid Build Coastguard Worker   * @param text A UText representing the text
278*0e209d39SAndroid Build Coastguard Worker   * @param rangeStart The start of the range of dictionary characters
279*0e209d39SAndroid Build Coastguard Worker   * @param rangeEnd The end of the range of dictionary characters
280*0e209d39SAndroid Build Coastguard Worker   * @param foundBreaks Output of C array of int32_t break positions, or 0
281*0e209d39SAndroid Build Coastguard Worker   * @param status Information on any errors encountered.
282*0e209d39SAndroid Build Coastguard Worker   * @return The number of breaks found
283*0e209d39SAndroid Build Coastguard Worker   */
284*0e209d39SAndroid Build Coastguard Worker   virtual int32_t divideUpDictionaryRange( UText *text,
285*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeStart,
286*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeEnd,
287*0e209d39SAndroid Build Coastguard Worker                                            UVector32 &foundBreaks,
288*0e209d39SAndroid Build Coastguard Worker                                            UBool isPhraseBreaking,
289*0e209d39SAndroid Build Coastguard Worker                                            UErrorCode& status) const override;
290*0e209d39SAndroid Build Coastguard Worker 
291*0e209d39SAndroid Build Coastguard Worker };
292*0e209d39SAndroid Build Coastguard Worker 
293*0e209d39SAndroid Build Coastguard Worker /*******************************************************************
294*0e209d39SAndroid Build Coastguard Worker  * KhmerBreakEngine
295*0e209d39SAndroid Build Coastguard Worker  */
296*0e209d39SAndroid Build Coastguard Worker 
297*0e209d39SAndroid Build Coastguard Worker /**
298*0e209d39SAndroid Build Coastguard Worker  * <p>KhmerBreakEngine is a kind of DictionaryBreakEngine that uses a
299*0e209d39SAndroid Build Coastguard Worker  * DictionaryMatcher and heuristics to determine Khmer-specific breaks.</p>
300*0e209d39SAndroid Build Coastguard Worker  *
301*0e209d39SAndroid Build Coastguard Worker  * <p>After it is constructed a KhmerBreakEngine may be shared between
302*0e209d39SAndroid Build Coastguard Worker  * threads without synchronization.</p>
303*0e209d39SAndroid Build Coastguard Worker  */
304*0e209d39SAndroid Build Coastguard Worker class KhmerBreakEngine : public DictionaryBreakEngine {
305*0e209d39SAndroid Build Coastguard Worker  private:
306*0e209d39SAndroid Build Coastguard Worker     /**
307*0e209d39SAndroid Build Coastguard Worker      * The set of characters handled by this engine
308*0e209d39SAndroid Build Coastguard Worker      * @internal
309*0e209d39SAndroid Build Coastguard Worker      */
310*0e209d39SAndroid Build Coastguard Worker 
311*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fEndWordSet;
312*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fBeginWordSet;
313*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fMarkSet;
314*0e209d39SAndroid Build Coastguard Worker   DictionaryMatcher  *fDictionary;
315*0e209d39SAndroid Build Coastguard Worker 
316*0e209d39SAndroid Build Coastguard Worker  public:
317*0e209d39SAndroid Build Coastguard Worker 
318*0e209d39SAndroid Build Coastguard Worker   /**
319*0e209d39SAndroid Build Coastguard Worker    * <p>Default constructor.</p>
320*0e209d39SAndroid Build Coastguard Worker    *
321*0e209d39SAndroid Build Coastguard Worker    * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
322*0e209d39SAndroid Build Coastguard Worker    * engine is deleted.
323*0e209d39SAndroid Build Coastguard Worker    */
324*0e209d39SAndroid Build Coastguard Worker   KhmerBreakEngine(DictionaryMatcher *adoptDictionary, UErrorCode &status);
325*0e209d39SAndroid Build Coastguard Worker 
326*0e209d39SAndroid Build Coastguard Worker   /**
327*0e209d39SAndroid Build Coastguard Worker    * <p>Virtual destructor.</p>
328*0e209d39SAndroid Build Coastguard Worker    */
329*0e209d39SAndroid Build Coastguard Worker   virtual ~KhmerBreakEngine();
330*0e209d39SAndroid Build Coastguard Worker 
331*0e209d39SAndroid Build Coastguard Worker  protected:
332*0e209d39SAndroid Build Coastguard Worker  /**
333*0e209d39SAndroid Build Coastguard Worker   * <p>Divide up a range of known dictionary characters.</p>
334*0e209d39SAndroid Build Coastguard Worker   *
335*0e209d39SAndroid Build Coastguard Worker   * @param text A UText representing the text
336*0e209d39SAndroid Build Coastguard Worker   * @param rangeStart The start of the range of dictionary characters
337*0e209d39SAndroid Build Coastguard Worker   * @param rangeEnd The end of the range of dictionary characters
338*0e209d39SAndroid Build Coastguard Worker   * @param foundBreaks Output of C array of int32_t break positions, or 0
339*0e209d39SAndroid Build Coastguard Worker   * @param status Information on any errors encountered.
340*0e209d39SAndroid Build Coastguard Worker   * @return The number of breaks found
341*0e209d39SAndroid Build Coastguard Worker   */
342*0e209d39SAndroid Build Coastguard Worker   virtual int32_t divideUpDictionaryRange( UText *text,
343*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeStart,
344*0e209d39SAndroid Build Coastguard Worker                                            int32_t rangeEnd,
345*0e209d39SAndroid Build Coastguard Worker                                            UVector32 &foundBreaks,
346*0e209d39SAndroid Build Coastguard Worker                                            UBool isPhraseBreaking,
347*0e209d39SAndroid Build Coastguard Worker                                            UErrorCode& status) const override;
348*0e209d39SAndroid Build Coastguard Worker 
349*0e209d39SAndroid Build Coastguard Worker };
350*0e209d39SAndroid Build Coastguard Worker 
351*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_NORMALIZATION
352*0e209d39SAndroid Build Coastguard Worker 
353*0e209d39SAndroid Build Coastguard Worker /*******************************************************************
354*0e209d39SAndroid Build Coastguard Worker  * CjkBreakEngine
355*0e209d39SAndroid Build Coastguard Worker  */
356*0e209d39SAndroid Build Coastguard Worker 
357*0e209d39SAndroid Build Coastguard Worker //indicates language/script that the CjkBreakEngine will handle
358*0e209d39SAndroid Build Coastguard Worker enum LanguageType {
359*0e209d39SAndroid Build Coastguard Worker     kKorean,
360*0e209d39SAndroid Build Coastguard Worker     kChineseJapanese
361*0e209d39SAndroid Build Coastguard Worker };
362*0e209d39SAndroid Build Coastguard Worker 
363*0e209d39SAndroid Build Coastguard Worker /**
364*0e209d39SAndroid Build Coastguard Worker  * <p>CjkBreakEngine is a kind of DictionaryBreakEngine that uses a
365*0e209d39SAndroid Build Coastguard Worker  * dictionary with costs associated with each word and
366*0e209d39SAndroid Build Coastguard Worker  * Viterbi decoding to determine CJK-specific breaks.</p>
367*0e209d39SAndroid Build Coastguard Worker  */
368*0e209d39SAndroid Build Coastguard Worker class CjkBreakEngine : public DictionaryBreakEngine {
369*0e209d39SAndroid Build Coastguard Worker  protected:
370*0e209d39SAndroid Build Coastguard Worker     /**
371*0e209d39SAndroid Build Coastguard Worker      * The set of characters handled by this engine
372*0e209d39SAndroid Build Coastguard Worker      * @internal
373*0e209d39SAndroid Build Coastguard Worker      */
374*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fHangulWordSet;
375*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fDigitOrOpenPunctuationOrAlphabetSet;
376*0e209d39SAndroid Build Coastguard Worker   UnicodeSet                fClosePunctuationSet;
377*0e209d39SAndroid Build Coastguard Worker 
378*0e209d39SAndroid Build Coastguard Worker   DictionaryMatcher        *fDictionary;
379*0e209d39SAndroid Build Coastguard Worker   const Normalizer2        *nfkcNorm2;
380*0e209d39SAndroid Build Coastguard Worker   MlBreakEngine            *fMlBreakEngine;
381*0e209d39SAndroid Build Coastguard Worker   bool                      isCj;
382*0e209d39SAndroid Build Coastguard Worker 
383*0e209d39SAndroid Build Coastguard Worker  private:
384*0e209d39SAndroid Build Coastguard Worker   // Load Japanese extensions.
385*0e209d39SAndroid Build Coastguard Worker   void loadJapaneseExtensions(UErrorCode& error);
386*0e209d39SAndroid Build Coastguard Worker   // Load Japanese Hiragana.
387*0e209d39SAndroid Build Coastguard Worker   void loadHiragana(UErrorCode& error);
388*0e209d39SAndroid Build Coastguard Worker   // Initialize fSkipSet by loading Japanese Hiragana and extensions.
389*0e209d39SAndroid Build Coastguard Worker   void initJapanesePhraseParameter(UErrorCode& error);
390*0e209d39SAndroid Build Coastguard Worker 
391*0e209d39SAndroid Build Coastguard Worker   Hashtable fSkipSet;
392*0e209d39SAndroid Build Coastguard Worker 
393*0e209d39SAndroid Build Coastguard Worker  public:
394*0e209d39SAndroid Build Coastguard Worker 
395*0e209d39SAndroid Build Coastguard Worker     /**
396*0e209d39SAndroid Build Coastguard Worker      * <p>Default constructor.</p>
397*0e209d39SAndroid Build Coastguard Worker      *
398*0e209d39SAndroid Build Coastguard Worker      * @param adoptDictionary A DictionaryMatcher to adopt. Deleted when the
399*0e209d39SAndroid Build Coastguard Worker      * engine is deleted. The DictionaryMatcher must contain costs for each word
400*0e209d39SAndroid Build Coastguard Worker      * in order for the dictionary to work properly.
401*0e209d39SAndroid Build Coastguard Worker      */
402*0e209d39SAndroid Build Coastguard Worker   CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status);
403*0e209d39SAndroid Build Coastguard Worker 
404*0e209d39SAndroid Build Coastguard Worker     /**
405*0e209d39SAndroid Build Coastguard Worker      * <p>Virtual destructor.</p>
406*0e209d39SAndroid Build Coastguard Worker      */
407*0e209d39SAndroid Build Coastguard Worker   virtual ~CjkBreakEngine();
408*0e209d39SAndroid Build Coastguard Worker 
409*0e209d39SAndroid Build Coastguard Worker  protected:
410*0e209d39SAndroid Build Coastguard Worker     /**
411*0e209d39SAndroid Build Coastguard Worker      * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
412*0e209d39SAndroid Build Coastguard Worker      *
413*0e209d39SAndroid Build Coastguard Worker      * @param text A UText representing the text
414*0e209d39SAndroid Build Coastguard Worker      * @param rangeStart The start of the range of dictionary characters
415*0e209d39SAndroid Build Coastguard Worker      * @param rangeEnd The end of the range of dictionary characters
416*0e209d39SAndroid Build Coastguard Worker      * @param foundBreaks Output of C array of int32_t break positions, or 0
417*0e209d39SAndroid Build Coastguard Worker      * @param status Information on any errors encountered.
418*0e209d39SAndroid Build Coastguard Worker      * @return The number of breaks found
419*0e209d39SAndroid Build Coastguard Worker      */
420*0e209d39SAndroid Build Coastguard Worker   virtual int32_t divideUpDictionaryRange( UText *text,
421*0e209d39SAndroid Build Coastguard Worker           int32_t rangeStart,
422*0e209d39SAndroid Build Coastguard Worker           int32_t rangeEnd,
423*0e209d39SAndroid Build Coastguard Worker           UVector32 &foundBreaks,
424*0e209d39SAndroid Build Coastguard Worker           UBool isPhraseBreaking,
425*0e209d39SAndroid Build Coastguard Worker           UErrorCode& status) const override;
426*0e209d39SAndroid Build Coastguard Worker 
427*0e209d39SAndroid Build Coastguard Worker };
428*0e209d39SAndroid Build Coastguard Worker 
429*0e209d39SAndroid Build Coastguard Worker #endif
430*0e209d39SAndroid Build Coastguard Worker 
431*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
432*0e209d39SAndroid Build Coastguard Worker 
433*0e209d39SAndroid Build Coastguard Worker     /* DICTBE_H */
434*0e209d39SAndroid Build Coastguard Worker #endif
435