1*0e209d39SAndroid Build Coastguard Worker // © 2021 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker 4*0e209d39SAndroid Build Coastguard Worker #ifndef LSTMBE_H 5*0e209d39SAndroid Build Coastguard Worker #define LSTMBE_H 6*0e209d39SAndroid Build Coastguard Worker 7*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 8*0e209d39SAndroid Build Coastguard Worker 9*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_BREAK_ITERATION 10*0e209d39SAndroid Build Coastguard Worker 11*0e209d39SAndroid Build Coastguard Worker #include "unicode/uniset.h" 12*0e209d39SAndroid Build Coastguard Worker #include "unicode/ures.h" 13*0e209d39SAndroid Build Coastguard Worker #include "unicode/utext.h" 14*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 15*0e209d39SAndroid Build Coastguard Worker 16*0e209d39SAndroid Build Coastguard Worker #include "brkeng.h" 17*0e209d39SAndroid Build Coastguard Worker #include "dictbe.h" 18*0e209d39SAndroid Build Coastguard Worker #include "uvectr32.h" 19*0e209d39SAndroid Build Coastguard Worker 20*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 21*0e209d39SAndroid Build Coastguard Worker 22*0e209d39SAndroid Build Coastguard Worker class Vectorizer; 23*0e209d39SAndroid Build Coastguard Worker struct LSTMData; 24*0e209d39SAndroid Build Coastguard Worker 25*0e209d39SAndroid Build Coastguard Worker /******************************************************************* 26*0e209d39SAndroid Build Coastguard Worker * LSTMBreakEngine 27*0e209d39SAndroid Build Coastguard Worker */ 28*0e209d39SAndroid Build Coastguard Worker 29*0e209d39SAndroid Build Coastguard Worker /** 30*0e209d39SAndroid Build Coastguard Worker * <p>LSTMBreakEngine is a kind of DictionaryBreakEngine that uses a 31*0e209d39SAndroid Build Coastguard Worker * LSTM to determine language-specific breaks.</p> 32*0e209d39SAndroid Build Coastguard Worker * 33*0e209d39SAndroid Build Coastguard Worker * <p>After it is constructed a LSTMBreakEngine may be shared between 34*0e209d39SAndroid Build Coastguard Worker * threads without synchronization.</p> 35*0e209d39SAndroid Build Coastguard Worker */ 36*0e209d39SAndroid Build Coastguard Worker class LSTMBreakEngine : public DictionaryBreakEngine { 37*0e209d39SAndroid Build Coastguard Worker public: 38*0e209d39SAndroid Build Coastguard Worker /** 39*0e209d39SAndroid Build Coastguard Worker * <p>Constructor.</p> 40*0e209d39SAndroid Build Coastguard Worker */ 41*0e209d39SAndroid Build Coastguard Worker LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status); 42*0e209d39SAndroid Build Coastguard Worker 43*0e209d39SAndroid Build Coastguard Worker /** 44*0e209d39SAndroid Build Coastguard Worker * <p>Virtual destructor.</p> 45*0e209d39SAndroid Build Coastguard Worker */ 46*0e209d39SAndroid Build Coastguard Worker virtual ~LSTMBreakEngine(); 47*0e209d39SAndroid Build Coastguard Worker 48*0e209d39SAndroid Build Coastguard Worker virtual const char16_t* name() const; 49*0e209d39SAndroid Build Coastguard Worker 50*0e209d39SAndroid Build Coastguard Worker protected: 51*0e209d39SAndroid Build Coastguard Worker /** 52*0e209d39SAndroid Build Coastguard Worker * <p>Divide up a range of known dictionary characters handled by this break engine.</p> 53*0e209d39SAndroid Build Coastguard Worker * 54*0e209d39SAndroid Build Coastguard Worker * @param text A UText representing the text 55*0e209d39SAndroid Build Coastguard Worker * @param rangeStart The start of the range of dictionary characters 56*0e209d39SAndroid Build Coastguard Worker * @param rangeEnd The end of the range of dictionary characters 57*0e209d39SAndroid Build Coastguard Worker * @param foundBreaks Output of C array of int32_t break positions, or 0 58*0e209d39SAndroid Build Coastguard Worker * @param status Information on any errors encountered. 59*0e209d39SAndroid Build Coastguard Worker * @return The number of breaks found 60*0e209d39SAndroid Build Coastguard Worker */ 61*0e209d39SAndroid Build Coastguard Worker virtual int32_t divideUpDictionaryRange(UText *text, 62*0e209d39SAndroid Build Coastguard Worker int32_t rangeStart, 63*0e209d39SAndroid Build Coastguard Worker int32_t rangeEnd, 64*0e209d39SAndroid Build Coastguard Worker UVector32 &foundBreaks, 65*0e209d39SAndroid Build Coastguard Worker UBool isPhraseBreaking, 66*0e209d39SAndroid Build Coastguard Worker UErrorCode& status) const override; 67*0e209d39SAndroid Build Coastguard Worker private: 68*0e209d39SAndroid Build Coastguard Worker const LSTMData* fData; 69*0e209d39SAndroid Build Coastguard Worker const Vectorizer* fVectorizer; 70*0e209d39SAndroid Build Coastguard Worker }; 71*0e209d39SAndroid Build Coastguard Worker 72*0e209d39SAndroid Build Coastguard Worker U_CAPI const LanguageBreakEngine* U_EXPORT2 CreateLSTMBreakEngine( 73*0e209d39SAndroid Build Coastguard Worker UScriptCode script, const LSTMData* data, UErrorCode& status); 74*0e209d39SAndroid Build Coastguard Worker 75*0e209d39SAndroid Build Coastguard Worker U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData( 76*0e209d39SAndroid Build Coastguard Worker UResourceBundle* rb, UErrorCode& status); 77*0e209d39SAndroid Build Coastguard Worker 78*0e209d39SAndroid Build Coastguard Worker U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript( 79*0e209d39SAndroid Build Coastguard Worker UScriptCode script, UErrorCode& status); 80*0e209d39SAndroid Build Coastguard Worker 81*0e209d39SAndroid Build Coastguard Worker U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data); 82*0e209d39SAndroid Build Coastguard Worker U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data); 83*0e209d39SAndroid Build Coastguard Worker 84*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 85*0e209d39SAndroid Build Coastguard Worker 86*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 87*0e209d39SAndroid Build Coastguard Worker 88*0e209d39SAndroid Build Coastguard Worker #endif /* LSTMBE_H */ 89