1 // © 2021 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 4 #ifndef LSTMBE_H 5 #define LSTMBE_H 6 7 #include "unicode/utypes.h" 8 9 #if !UCONFIG_NO_BREAK_ITERATION 10 11 #include "unicode/uniset.h" 12 #include "unicode/ures.h" 13 #include "unicode/utext.h" 14 #include "unicode/utypes.h" 15 16 #include "brkeng.h" 17 #include "dictbe.h" 18 #include "uvectr32.h" 19 20 U_NAMESPACE_BEGIN 21 22 class Vectorizer; 23 struct LSTMData; 24 25 /******************************************************************* 26 * LSTMBreakEngine 27 */ 28 29 /** 30 * <p>LSTMBreakEngine is a kind of DictionaryBreakEngine that uses a 31 * LSTM to determine language-specific breaks.</p> 32 * 33 * <p>After it is constructed a LSTMBreakEngine may be shared between 34 * threads without synchronization.</p> 35 */ 36 class LSTMBreakEngine : public DictionaryBreakEngine { 37 public: 38 /** 39 * <p>Constructor.</p> 40 */ 41 LSTMBreakEngine(const LSTMData* data, const UnicodeSet& set, UErrorCode &status); 42 43 /** 44 * <p>Virtual destructor.</p> 45 */ 46 virtual ~LSTMBreakEngine(); 47 48 virtual const char16_t* name() const; 49 50 protected: 51 /** 52 * <p>Divide up a range of known dictionary characters handled by this break engine.</p> 53 * 54 * @param text A UText representing the text 55 * @param rangeStart The start of the range of dictionary characters 56 * @param rangeEnd The end of the range of dictionary characters 57 * @param foundBreaks Output of C array of int32_t break positions, or 0 58 * @param status Information on any errors encountered. 59 * @return The number of breaks found 60 */ 61 virtual int32_t divideUpDictionaryRange(UText *text, 62 int32_t rangeStart, 63 int32_t rangeEnd, 64 UVector32 &foundBreaks, 65 UBool isPhraseBreaking, 66 UErrorCode& status) const override; 67 private: 68 const LSTMData* fData; 69 const Vectorizer* fVectorizer; 70 }; 71 72 U_CAPI const LanguageBreakEngine* U_EXPORT2 CreateLSTMBreakEngine( 73 UScriptCode script, const LSTMData* data, UErrorCode& status); 74 75 U_CAPI const LSTMData* U_EXPORT2 CreateLSTMData( 76 UResourceBundle* rb, UErrorCode& status); 77 78 U_CAPI const LSTMData* U_EXPORT2 CreateLSTMDataForScript( 79 UScriptCode script, UErrorCode& status); 80 81 U_CAPI void U_EXPORT2 DeleteLSTMData(const LSTMData* data); 82 U_CAPI const char16_t* U_EXPORT2 LSTMDataName(const LSTMData* data); 83 84 U_NAMESPACE_END 85 86 #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 87 88 #endif /* LSTMBE_H */ 89