xref: /aosp_15_r20/external/icu/libicu/cts_headers/lstmbe.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2021 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker 
4*0e209d39SAndroid Build Coastguard Worker #ifndef LSTMBE_H
5*0e209d39SAndroid Build Coastguard Worker #define LSTMBE_H
6*0e209d39SAndroid Build Coastguard Worker 
7*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
8*0e209d39SAndroid Build Coastguard Worker 
9*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_BREAK_ITERATION
10*0e209d39SAndroid Build Coastguard Worker 
11*0e209d39SAndroid Build Coastguard Worker #include "unicode/uniset.h"
12*0e209d39SAndroid Build Coastguard Worker #include "unicode/ures.h"
13*0e209d39SAndroid Build Coastguard Worker #include "unicode/utext.h"
14*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
15*0e209d39SAndroid Build Coastguard Worker 
16*0e209d39SAndroid Build Coastguard Worker #include "brkeng.h"
17*0e209d39SAndroid Build Coastguard Worker #include "dictbe.h"
18*0e209d39SAndroid Build Coastguard Worker #include "uvectr32.h"
19*0e209d39SAndroid Build Coastguard Worker 
20*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
21*0e209d39SAndroid Build Coastguard Worker 
22*0e209d39SAndroid Build Coastguard Worker class Vectorizer;
23*0e209d39SAndroid Build Coastguard Worker struct LSTMData;
24*0e209d39SAndroid Build Coastguard Worker 
25*0e209d39SAndroid Build Coastguard Worker /*******************************************************************
26*0e209d39SAndroid Build Coastguard Worker  * LSTMBreakEngine
27*0e209d39SAndroid Build Coastguard Worker  */
28*0e209d39SAndroid Build Coastguard Worker 
29*0e209d39SAndroid Build Coastguard Worker /**
30*0e209d39SAndroid Build Coastguard Worker  * <p>LSTMBreakEngine is a kind of DictionaryBreakEngine that uses a
31*0e209d39SAndroid Build Coastguard Worker  * LSTM to determine language-specific breaks.</p>
32*0e209d39SAndroid Build Coastguard Worker  *
33*0e209d39SAndroid Build Coastguard Worker  * <p>After it is constructed a LSTMBreakEngine may be shared between
34*0e209d39SAndroid Build Coastguard Worker  * threads without synchronization.</p>
35*0e209d39SAndroid Build Coastguard Worker  */
36*0e209d39SAndroid Build Coastguard Worker class LSTMBreakEngine : public DictionaryBreakEngine {
37*0e209d39SAndroid Build Coastguard Worker public:
38*0e209d39SAndroid Build Coastguard Worker     /**
39*0e209d39SAndroid Build Coastguard Worker      * <p>Constructor.</p>
40*0e209d39SAndroid Build Coastguard Worker      */
41*0e209d39SAndroid Build Coastguard Worker     LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status);
42*0e209d39SAndroid Build Coastguard Worker 
43*0e209d39SAndroid Build Coastguard Worker     /**
44*0e209d39SAndroid Build Coastguard Worker      * <p>Virtual destructor.</p>
45*0e209d39SAndroid Build Coastguard Worker      */
46*0e209d39SAndroid Build Coastguard Worker     virtual ~LSTMBreakEngine();
47*0e209d39SAndroid Build Coastguard Worker 
48*0e209d39SAndroid Build Coastguard Worker     virtual const char16_t* name() const;
49*0e209d39SAndroid Build Coastguard Worker 
50*0e209d39SAndroid Build Coastguard Worker protected:
51*0e209d39SAndroid Build Coastguard Worker     /**
52*0e209d39SAndroid Build Coastguard Worker      * <p>Divide up a range of known dictionary characters handled by this break engine.</p>
53*0e209d39SAndroid Build Coastguard Worker      *
54*0e209d39SAndroid Build Coastguard Worker      * @param text A UText representing the text
55*0e209d39SAndroid Build Coastguard Worker      * @param rangeStart The start of the range of dictionary characters
56*0e209d39SAndroid Build Coastguard Worker      * @param rangeEnd The end of the range of dictionary characters
57*0e209d39SAndroid Build Coastguard Worker      * @param foundBreaks Output of C array of int32_t break positions, or 0
58*0e209d39SAndroid Build Coastguard Worker      * @param status Information on any errors encountered.
59*0e209d39SAndroid Build Coastguard Worker      * @return The number of breaks found
60*0e209d39SAndroid Build Coastguard Worker      */
61*0e209d39SAndroid Build Coastguard Worker      virtual int32_t divideUpDictionaryRange(UText *text,
62*0e209d39SAndroid Build Coastguard Worker                                              int32_t rangeStart,
63*0e209d39SAndroid Build Coastguard Worker                                              int32_t rangeEnd,
64*0e209d39SAndroid Build Coastguard Worker                                              UVector32 &foundBreaks,
65*0e209d39SAndroid Build Coastguard Worker                                              UBool isPhraseBreaking,
66*0e209d39SAndroid Build Coastguard Worker                                              UErrorCode& status) const override;
67*0e209d39SAndroid Build Coastguard Worker private:
68*0e209d39SAndroid Build Coastguard Worker     const LSTMData* fData;
69*0e209d39SAndroid Build Coastguard Worker     const Vectorizer* fVectorizer;
70*0e209d39SAndroid Build Coastguard Worker };
71*0e209d39SAndroid Build Coastguard Worker 
72*0e209d39SAndroid Build Coastguard Worker U_CAPI const LanguageBreakEngine* U_EXPORT2 CreateLSTMBreakEngine(
73*0e209d39SAndroid Build Coastguard Worker     UScriptCode script, const LSTMData* data, UErrorCode& status);
74*0e209d39SAndroid Build Coastguard Worker 
75*0e209d39SAndroid Build Coastguard Worker U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData(
76*0e209d39SAndroid Build Coastguard Worker     UResourceBundle* rb, UErrorCode& status);
77*0e209d39SAndroid Build Coastguard Worker 
78*0e209d39SAndroid Build Coastguard Worker U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript(
79*0e209d39SAndroid Build Coastguard Worker     UScriptCode script, UErrorCode& status);
80*0e209d39SAndroid Build Coastguard Worker 
81*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data);
82*0e209d39SAndroid Build Coastguard Worker U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data);
83*0e209d39SAndroid Build Coastguard Worker 
84*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
85*0e209d39SAndroid Build Coastguard Worker 
86*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_BREAK_ITERATION */
87*0e209d39SAndroid Build Coastguard Worker 
88*0e209d39SAndroid Build Coastguard Worker #endif  /* LSTMBE_H */
89