1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2010-2016, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 8*0e209d39SAndroid Build Coastguard Worker * file name: bytestriebuilder.h 9*0e209d39SAndroid Build Coastguard Worker * encoding: UTF-8 10*0e209d39SAndroid Build Coastguard Worker * tab size: 8 (not used) 11*0e209d39SAndroid Build Coastguard Worker * indentation:4 12*0e209d39SAndroid Build Coastguard Worker * 13*0e209d39SAndroid Build Coastguard Worker * created on: 2010sep25 14*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer 15*0e209d39SAndroid Build Coastguard Worker */ 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker /** 18*0e209d39SAndroid Build Coastguard Worker * \file 19*0e209d39SAndroid Build Coastguard Worker * \brief C++ API: Builder for icu::BytesTrie 20*0e209d39SAndroid Build Coastguard Worker */ 21*0e209d39SAndroid Build Coastguard Worker 22*0e209d39SAndroid Build Coastguard Worker #ifndef __BYTESTRIEBUILDER_H__ 23*0e209d39SAndroid Build Coastguard Worker #define __BYTESTRIEBUILDER_H__ 24*0e209d39SAndroid Build Coastguard Worker 25*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 26*0e209d39SAndroid Build Coastguard Worker 27*0e209d39SAndroid Build Coastguard Worker #if U_SHOW_CPLUSPLUS_API 28*0e209d39SAndroid Build Coastguard Worker 29*0e209d39SAndroid Build Coastguard Worker #include "unicode/bytestrie.h" 30*0e209d39SAndroid Build Coastguard Worker #include "unicode/stringpiece.h" 31*0e209d39SAndroid Build Coastguard Worker #include "unicode/stringtriebuilder.h" 32*0e209d39SAndroid Build Coastguard Worker 33*0e209d39SAndroid Build Coastguard Worker class BytesTrieTest; 34*0e209d39SAndroid Build Coastguard Worker 35*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 36*0e209d39SAndroid Build Coastguard Worker 37*0e209d39SAndroid Build Coastguard Worker class BytesTrieElement; 38*0e209d39SAndroid Build Coastguard Worker class CharString; 39*0e209d39SAndroid Build Coastguard Worker /** 40*0e209d39SAndroid Build Coastguard Worker * Builder class for BytesTrie. 41*0e209d39SAndroid Build Coastguard Worker * 42*0e209d39SAndroid Build Coastguard Worker * This class is not intended for public subclassing. 43*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.8 44*0e209d39SAndroid Build Coastguard Worker */ 45*0e209d39SAndroid Build Coastguard Worker class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { 46*0e209d39SAndroid Build Coastguard Worker public: 47*0e209d39SAndroid Build Coastguard Worker /** 48*0e209d39SAndroid Build Coastguard Worker * Constructs an empty builder. 49*0e209d39SAndroid Build Coastguard Worker * @param errorCode Standard ICU error code. 50*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.8 51*0e209d39SAndroid Build Coastguard Worker */ 52*0e209d39SAndroid Build Coastguard Worker BytesTrieBuilder(UErrorCode &errorCode); 53*0e209d39SAndroid Build Coastguard Worker 54*0e209d39SAndroid Build Coastguard Worker /** 55*0e209d39SAndroid Build Coastguard Worker * Destructor. 56*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.8 57*0e209d39SAndroid Build Coastguard Worker */ 58*0e209d39SAndroid Build Coastguard Worker virtual ~BytesTrieBuilder(); 59*0e209d39SAndroid Build Coastguard Worker 60*0e209d39SAndroid Build Coastguard Worker /** 61*0e209d39SAndroid Build Coastguard Worker * Adds a (byte sequence, value) pair. 62*0e209d39SAndroid Build Coastguard Worker * The byte sequence must be unique. 63*0e209d39SAndroid Build Coastguard Worker * The bytes will be copied; the builder does not keep 64*0e209d39SAndroid Build Coastguard Worker * a reference to the input StringPiece or its data(). 65*0e209d39SAndroid Build Coastguard Worker * @param s The input byte sequence. 66*0e209d39SAndroid Build Coastguard Worker * @param value The value associated with this byte sequence. 67*0e209d39SAndroid Build Coastguard Worker * @param errorCode Standard ICU error code. Its input value must 68*0e209d39SAndroid Build Coastguard Worker * pass the U_SUCCESS() test, or else the function returns 69*0e209d39SAndroid Build Coastguard Worker * immediately. Check for U_FAILURE() on output or use with 70*0e209d39SAndroid Build Coastguard Worker * function chaining. (See User Guide for details.) 71*0e209d39SAndroid Build Coastguard Worker * @return *this 72*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.8 73*0e209d39SAndroid Build Coastguard Worker */ 74*0e209d39SAndroid Build Coastguard Worker BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode); 75*0e209d39SAndroid Build Coastguard Worker 76*0e209d39SAndroid Build Coastguard Worker /** 77*0e209d39SAndroid Build Coastguard Worker * Builds a BytesTrie for the add()ed data. 78*0e209d39SAndroid Build Coastguard Worker * Once built, no further data can be add()ed until clear() is called. 79*0e209d39SAndroid Build Coastguard Worker * 80*0e209d39SAndroid Build Coastguard Worker * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 81*0e209d39SAndroid Build Coastguard Worker * must have been add()ed. 82*0e209d39SAndroid Build Coastguard Worker * 83*0e209d39SAndroid Build Coastguard Worker * This method passes ownership of the builder's internal result array to the new trie object. 84*0e209d39SAndroid Build Coastguard Worker * Another call to any build() variant will re-serialize the trie. 85*0e209d39SAndroid Build Coastguard Worker * After clear() has been called, a new array will be used as well. 86*0e209d39SAndroid Build Coastguard Worker * @param buildOption Build option, see UStringTrieBuildOption. 87*0e209d39SAndroid Build Coastguard Worker * @param errorCode Standard ICU error code. Its input value must 88*0e209d39SAndroid Build Coastguard Worker * pass the U_SUCCESS() test, or else the function returns 89*0e209d39SAndroid Build Coastguard Worker * immediately. Check for U_FAILURE() on output or use with 90*0e209d39SAndroid Build Coastguard Worker * function chaining. (See User Guide for details.) 91*0e209d39SAndroid Build Coastguard Worker * @return A new BytesTrie for the add()ed data. 92*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.8 93*0e209d39SAndroid Build Coastguard Worker */ 94*0e209d39SAndroid Build Coastguard Worker BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 95*0e209d39SAndroid Build Coastguard Worker 96*0e209d39SAndroid Build Coastguard Worker /** 97*0e209d39SAndroid Build Coastguard Worker * Builds a BytesTrie for the add()ed data and byte-serializes it. 98*0e209d39SAndroid Build Coastguard Worker * Once built, no further data can be add()ed until clear() is called. 99*0e209d39SAndroid Build Coastguard Worker * 100*0e209d39SAndroid Build Coastguard Worker * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 101*0e209d39SAndroid Build Coastguard Worker * must have been add()ed. 102*0e209d39SAndroid Build Coastguard Worker * 103*0e209d39SAndroid Build Coastguard Worker * Multiple calls to buildStringPiece() return StringPieces referring to the 104*0e209d39SAndroid Build Coastguard Worker * builder's same byte array, without rebuilding. 105*0e209d39SAndroid Build Coastguard Worker * If buildStringPiece() is called after build(), the trie will be 106*0e209d39SAndroid Build Coastguard Worker * re-serialized into a new array (because build() passes on ownership). 107*0e209d39SAndroid Build Coastguard Worker * If build() is called after buildStringPiece(), the trie object returned 108*0e209d39SAndroid Build Coastguard Worker * by build() will become the owner of the underlying string for the 109*0e209d39SAndroid Build Coastguard Worker * previously returned StringPiece. 110*0e209d39SAndroid Build Coastguard Worker * After clear() has been called, a new array will be used as well. 111*0e209d39SAndroid Build Coastguard Worker * @param buildOption Build option, see UStringTrieBuildOption. 112*0e209d39SAndroid Build Coastguard Worker * @param errorCode Standard ICU error code. Its input value must 113*0e209d39SAndroid Build Coastguard Worker * pass the U_SUCCESS() test, or else the function returns 114*0e209d39SAndroid Build Coastguard Worker * immediately. Check for U_FAILURE() on output or use with 115*0e209d39SAndroid Build Coastguard Worker * function chaining. (See User Guide for details.) 116*0e209d39SAndroid Build Coastguard Worker * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. 117*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.8 118*0e209d39SAndroid Build Coastguard Worker */ 119*0e209d39SAndroid Build Coastguard Worker StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 120*0e209d39SAndroid Build Coastguard Worker 121*0e209d39SAndroid Build Coastguard Worker /** 122*0e209d39SAndroid Build Coastguard Worker * Removes all (byte sequence, value) pairs. 123*0e209d39SAndroid Build Coastguard Worker * New data can then be add()ed and a new trie can be built. 124*0e209d39SAndroid Build Coastguard Worker * @return *this 125*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.8 126*0e209d39SAndroid Build Coastguard Worker */ 127*0e209d39SAndroid Build Coastguard Worker BytesTrieBuilder &clear(); 128*0e209d39SAndroid Build Coastguard Worker 129*0e209d39SAndroid Build Coastguard Worker private: 130*0e209d39SAndroid Build Coastguard Worker friend class ::BytesTrieTest; 131*0e209d39SAndroid Build Coastguard Worker 132*0e209d39SAndroid Build Coastguard Worker BytesTrieBuilder(const BytesTrieBuilder &other) = delete; // no copy constructor 133*0e209d39SAndroid Build Coastguard Worker BytesTrieBuilder &operator=(const BytesTrieBuilder &other) = delete; // no assignment operator 134*0e209d39SAndroid Build Coastguard Worker 135*0e209d39SAndroid Build Coastguard Worker void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 136*0e209d39SAndroid Build Coastguard Worker 137*0e209d39SAndroid Build Coastguard Worker virtual int32_t getElementStringLength(int32_t i) const override; 138*0e209d39SAndroid Build Coastguard Worker virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const override; 139*0e209d39SAndroid Build Coastguard Worker virtual int32_t getElementValue(int32_t i) const override; 140*0e209d39SAndroid Build Coastguard Worker 141*0e209d39SAndroid Build Coastguard Worker virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const override; 142*0e209d39SAndroid Build Coastguard Worker 143*0e209d39SAndroid Build Coastguard Worker virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const override; 144*0e209d39SAndroid Build Coastguard Worker virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const override; 145*0e209d39SAndroid Build Coastguard Worker virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const override; 146*0e209d39SAndroid Build Coastguard Worker matchNodesCanHaveValues()147*0e209d39SAndroid Build Coastguard Worker virtual UBool matchNodesCanHaveValues() const override { return false; } 148*0e209d39SAndroid Build Coastguard Worker getMaxBranchLinearSubNodeLength()149*0e209d39SAndroid Build Coastguard Worker virtual int32_t getMaxBranchLinearSubNodeLength() const override { return BytesTrie::kMaxBranchLinearSubNodeLength; } getMinLinearMatch()150*0e209d39SAndroid Build Coastguard Worker virtual int32_t getMinLinearMatch() const override { return BytesTrie::kMinLinearMatch; } getMaxLinearMatchLength()151*0e209d39SAndroid Build Coastguard Worker virtual int32_t getMaxLinearMatchLength() const override { return BytesTrie::kMaxLinearMatchLength; } 152*0e209d39SAndroid Build Coastguard Worker 153*0e209d39SAndroid Build Coastguard Worker /** 154*0e209d39SAndroid Build Coastguard Worker * @internal (private) 155*0e209d39SAndroid Build Coastguard Worker */ 156*0e209d39SAndroid Build Coastguard Worker class BTLinearMatchNode : public LinearMatchNode { 157*0e209d39SAndroid Build Coastguard Worker public: 158*0e209d39SAndroid Build Coastguard Worker BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); 159*0e209d39SAndroid Build Coastguard Worker virtual bool operator==(const Node &other) const override; 160*0e209d39SAndroid Build Coastguard Worker virtual void write(StringTrieBuilder &builder) override; 161*0e209d39SAndroid Build Coastguard Worker private: 162*0e209d39SAndroid Build Coastguard Worker const char *s; 163*0e209d39SAndroid Build Coastguard Worker }; 164*0e209d39SAndroid Build Coastguard Worker 165*0e209d39SAndroid Build Coastguard Worker virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 166*0e209d39SAndroid Build Coastguard Worker Node *nextNode) const override; 167*0e209d39SAndroid Build Coastguard Worker 168*0e209d39SAndroid Build Coastguard Worker UBool ensureCapacity(int32_t length); 169*0e209d39SAndroid Build Coastguard Worker virtual int32_t write(int32_t byte) override; 170*0e209d39SAndroid Build Coastguard Worker int32_t write(const char *b, int32_t length); 171*0e209d39SAndroid Build Coastguard Worker virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) override; 172*0e209d39SAndroid Build Coastguard Worker virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) override; 173*0e209d39SAndroid Build Coastguard Worker virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) override; 174*0e209d39SAndroid Build Coastguard Worker virtual int32_t writeDeltaTo(int32_t jumpTarget) override; 175*0e209d39SAndroid Build Coastguard Worker static int32_t internalEncodeDelta(int32_t i, char intBytes[]); 176*0e209d39SAndroid Build Coastguard Worker 177*0e209d39SAndroid Build Coastguard Worker CharString *strings; // Pointer not object so we need not #include internal charstr.h. 178*0e209d39SAndroid Build Coastguard Worker BytesTrieElement *elements; 179*0e209d39SAndroid Build Coastguard Worker int32_t elementsCapacity; 180*0e209d39SAndroid Build Coastguard Worker int32_t elementsLength; 181*0e209d39SAndroid Build Coastguard Worker 182*0e209d39SAndroid Build Coastguard Worker // Byte serialization of the trie. 183*0e209d39SAndroid Build Coastguard Worker // Grows from the back: bytesLength measures from the end of the buffer! 184*0e209d39SAndroid Build Coastguard Worker char *bytes; 185*0e209d39SAndroid Build Coastguard Worker int32_t bytesCapacity; 186*0e209d39SAndroid Build Coastguard Worker int32_t bytesLength; 187*0e209d39SAndroid Build Coastguard Worker }; 188*0e209d39SAndroid Build Coastguard Worker 189*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 190*0e209d39SAndroid Build Coastguard Worker 191*0e209d39SAndroid Build Coastguard Worker #endif /* U_SHOW_CPLUSPLUS_API */ 192*0e209d39SAndroid Build Coastguard Worker 193*0e209d39SAndroid Build Coastguard Worker #endif // __BYTESTRIEBUILDER_H__ 194