1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker // 4*0e209d39SAndroid Build Coastguard Worker // rbbitblb.h 5*0e209d39SAndroid Build Coastguard Worker // 6*0e209d39SAndroid Build Coastguard Worker 7*0e209d39SAndroid Build Coastguard Worker /* 8*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 9*0e209d39SAndroid Build Coastguard Worker * Copyright (c) 2002-2016, International Business Machines 10*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 11*0e209d39SAndroid Build Coastguard Worker ********************************************************************** 12*0e209d39SAndroid Build Coastguard Worker */ 13*0e209d39SAndroid Build Coastguard Worker 14*0e209d39SAndroid Build Coastguard Worker #ifndef RBBITBLB_H 15*0e209d39SAndroid Build Coastguard Worker #define RBBITBLB_H 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_BREAK_ITERATION 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h" 22*0e209d39SAndroid Build Coastguard Worker #include "unicode/rbbi.h" 23*0e209d39SAndroid Build Coastguard Worker #include "rbbidata.h" 24*0e209d39SAndroid Build Coastguard Worker #include "rbbirb.h" 25*0e209d39SAndroid Build Coastguard Worker #include "rbbinode.h" 26*0e209d39SAndroid Build Coastguard Worker 27*0e209d39SAndroid Build Coastguard Worker 28*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 29*0e209d39SAndroid Build Coastguard Worker 30*0e209d39SAndroid Build Coastguard Worker class RBBIRuleScanner; 31*0e209d39SAndroid Build Coastguard Worker class RBBIRuleBuilder; 32*0e209d39SAndroid Build Coastguard Worker class UVector32; 33*0e209d39SAndroid Build Coastguard Worker 34*0e209d39SAndroid Build Coastguard Worker // 35*0e209d39SAndroid Build Coastguard Worker // class RBBITableBuilder is part of the RBBI rule compiler. 36*0e209d39SAndroid Build Coastguard Worker // It builds the state transition table used by the RBBI runtime 37*0e209d39SAndroid Build Coastguard Worker // from the expression syntax tree generated by the rule scanner. 38*0e209d39SAndroid Build Coastguard Worker // 39*0e209d39SAndroid Build Coastguard Worker // This class is part of the RBBI implementation only. 40*0e209d39SAndroid Build Coastguard Worker // There is no user-visible public API here. 41*0e209d39SAndroid Build Coastguard Worker // 42*0e209d39SAndroid Build Coastguard Worker 43*0e209d39SAndroid Build Coastguard Worker class RBBITableBuilder : public UMemory { 44*0e209d39SAndroid Build Coastguard Worker public: 45*0e209d39SAndroid Build Coastguard Worker RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status); 46*0e209d39SAndroid Build Coastguard Worker ~RBBITableBuilder(); 47*0e209d39SAndroid Build Coastguard Worker 48*0e209d39SAndroid Build Coastguard Worker void buildForwardTable(); 49*0e209d39SAndroid Build Coastguard Worker 50*0e209d39SAndroid Build Coastguard Worker /** Return the runtime size in bytes of the built state table. */ 51*0e209d39SAndroid Build Coastguard Worker int32_t getTableSize() const; 52*0e209d39SAndroid Build Coastguard Worker 53*0e209d39SAndroid Build Coastguard Worker /** Fill in the runtime state table. Sufficient memory must exist at the specified location. 54*0e209d39SAndroid Build Coastguard Worker */ 55*0e209d39SAndroid Build Coastguard Worker void exportTable(void *where); 56*0e209d39SAndroid Build Coastguard Worker 57*0e209d39SAndroid Build Coastguard Worker /** Use 8 bits to encode the forward table */ 58*0e209d39SAndroid Build Coastguard Worker bool use8BitsForTable() const; 59*0e209d39SAndroid Build Coastguard Worker 60*0e209d39SAndroid Build Coastguard Worker /** 61*0e209d39SAndroid Build Coastguard Worker * Find duplicate (redundant) character classes. Begin looking with categories.first. 62*0e209d39SAndroid Build Coastguard Worker * Duplicate, if found are returned in the categories parameter. 63*0e209d39SAndroid Build Coastguard Worker * This is an iterator-like function, used to identify character classes 64*0e209d39SAndroid Build Coastguard Worker * (state table columns) that can be eliminated. 65*0e209d39SAndroid Build Coastguard Worker * @param categories in/out parameter, specifies where to start looking for duplicates, 66*0e209d39SAndroid Build Coastguard Worker * and returns the first pair of duplicates found, if any. 67*0e209d39SAndroid Build Coastguard Worker * @return true if duplicate char classes were found, false otherwise. 68*0e209d39SAndroid Build Coastguard Worker */ 69*0e209d39SAndroid Build Coastguard Worker bool findDuplCharClassFrom(IntPair *categories); 70*0e209d39SAndroid Build Coastguard Worker 71*0e209d39SAndroid Build Coastguard Worker /** Remove a column from the state table. Used when two character categories 72*0e209d39SAndroid Build Coastguard Worker * have been found equivalent, and merged together, to eliminate the unneeded table column. 73*0e209d39SAndroid Build Coastguard Worker */ 74*0e209d39SAndroid Build Coastguard Worker void removeColumn(int32_t column); 75*0e209d39SAndroid Build Coastguard Worker 76*0e209d39SAndroid Build Coastguard Worker /** 77*0e209d39SAndroid Build Coastguard Worker * Check for, and remove duplicate states (table rows). 78*0e209d39SAndroid Build Coastguard Worker * @return the number of states removed. 79*0e209d39SAndroid Build Coastguard Worker */ 80*0e209d39SAndroid Build Coastguard Worker int32_t removeDuplicateStates(); 81*0e209d39SAndroid Build Coastguard Worker 82*0e209d39SAndroid Build Coastguard Worker /** Build the safe reverse table from the already-constructed forward table. */ 83*0e209d39SAndroid Build Coastguard Worker void buildSafeReverseTable(UErrorCode &status); 84*0e209d39SAndroid Build Coastguard Worker 85*0e209d39SAndroid Build Coastguard Worker /** Return the runtime size in bytes of the built safe reverse state table. */ 86*0e209d39SAndroid Build Coastguard Worker int32_t getSafeTableSize() const; 87*0e209d39SAndroid Build Coastguard Worker 88*0e209d39SAndroid Build Coastguard Worker /** Fill in the runtime safe state table. Sufficient memory must exist at the specified location. 89*0e209d39SAndroid Build Coastguard Worker */ 90*0e209d39SAndroid Build Coastguard Worker void exportSafeTable(void *where); 91*0e209d39SAndroid Build Coastguard Worker 92*0e209d39SAndroid Build Coastguard Worker /** Use 8 bits to encode the safe reverse table */ 93*0e209d39SAndroid Build Coastguard Worker bool use8BitsForSafeTable() const; 94*0e209d39SAndroid Build Coastguard Worker 95*0e209d39SAndroid Build Coastguard Worker private: 96*0e209d39SAndroid Build Coastguard Worker void calcNullable(RBBINode *n); 97*0e209d39SAndroid Build Coastguard Worker void calcFirstPos(RBBINode *n); 98*0e209d39SAndroid Build Coastguard Worker void calcLastPos(RBBINode *n); 99*0e209d39SAndroid Build Coastguard Worker void calcFollowPos(RBBINode *n); 100*0e209d39SAndroid Build Coastguard Worker void calcChainedFollowPos(RBBINode *n, RBBINode *endMarkNode); 101*0e209d39SAndroid Build Coastguard Worker void bofFixup(); 102*0e209d39SAndroid Build Coastguard Worker void buildStateTable(); 103*0e209d39SAndroid Build Coastguard Worker void mapLookAheadRules(); 104*0e209d39SAndroid Build Coastguard Worker void flagAcceptingStates(); 105*0e209d39SAndroid Build Coastguard Worker void flagLookAheadStates(); 106*0e209d39SAndroid Build Coastguard Worker void flagTaggedStates(); 107*0e209d39SAndroid Build Coastguard Worker void mergeRuleStatusVals(); 108*0e209d39SAndroid Build Coastguard Worker 109*0e209d39SAndroid Build Coastguard Worker /** 110*0e209d39SAndroid Build Coastguard Worker * Merge redundant state table columns, eliminating character classes with identical behavior. 111*0e209d39SAndroid Build Coastguard Worker * Done after the state tables are generated, just before converting to their run-time format. 112*0e209d39SAndroid Build Coastguard Worker */ 113*0e209d39SAndroid Build Coastguard Worker int32_t mergeColumns(); 114*0e209d39SAndroid Build Coastguard Worker 115*0e209d39SAndroid Build Coastguard Worker void addRuleRootNodes(UVector *dest, RBBINode *node); 116*0e209d39SAndroid Build Coastguard Worker 117*0e209d39SAndroid Build Coastguard Worker /** 118*0e209d39SAndroid Build Coastguard Worker * Find duplicate (redundant) states, beginning at the specified pair, 119*0e209d39SAndroid Build Coastguard Worker * within this state table. This is an iterator-like function, used to 120*0e209d39SAndroid Build Coastguard Worker * identify states (state table rows) that can be eliminated. 121*0e209d39SAndroid Build Coastguard Worker * @param states in/out parameter, specifies where to start looking for duplicates, 122*0e209d39SAndroid Build Coastguard Worker * and returns the first pair of duplicates found, if any. 123*0e209d39SAndroid Build Coastguard Worker * @return true if duplicate states were found, false otherwise. 124*0e209d39SAndroid Build Coastguard Worker */ 125*0e209d39SAndroid Build Coastguard Worker bool findDuplicateState(IntPair *states); 126*0e209d39SAndroid Build Coastguard Worker 127*0e209d39SAndroid Build Coastguard Worker /** Remove a duplicate state. 128*0e209d39SAndroid Build Coastguard Worker * @param duplStates The duplicate states. The first is kept, the second is removed. 129*0e209d39SAndroid Build Coastguard Worker * All references to the second in the state table are retargeted 130*0e209d39SAndroid Build Coastguard Worker * to the first. 131*0e209d39SAndroid Build Coastguard Worker */ 132*0e209d39SAndroid Build Coastguard Worker void removeState(IntPair duplStates); 133*0e209d39SAndroid Build Coastguard Worker 134*0e209d39SAndroid Build Coastguard Worker /** Find the next duplicate state in the safe reverse table. An iterator function. 135*0e209d39SAndroid Build Coastguard Worker * @param states in/out parameter, specifies where to start looking for duplicates, 136*0e209d39SAndroid Build Coastguard Worker * and returns the first pair of duplicates found, if any. 137*0e209d39SAndroid Build Coastguard Worker * @return true if a duplicate pair of states was found. 138*0e209d39SAndroid Build Coastguard Worker */ 139*0e209d39SAndroid Build Coastguard Worker bool findDuplicateSafeState(IntPair *states); 140*0e209d39SAndroid Build Coastguard Worker 141*0e209d39SAndroid Build Coastguard Worker /** Remove a duplicate state from the safe table. 142*0e209d39SAndroid Build Coastguard Worker * @param duplStates The duplicate states. The first is kept, the second is removed. 143*0e209d39SAndroid Build Coastguard Worker * All references to the second in the state table are retargeted 144*0e209d39SAndroid Build Coastguard Worker * to the first. 145*0e209d39SAndroid Build Coastguard Worker */ 146*0e209d39SAndroid Build Coastguard Worker void removeSafeState(IntPair duplStates); 147*0e209d39SAndroid Build Coastguard Worker 148*0e209d39SAndroid Build Coastguard Worker // Set functions for UVector. 149*0e209d39SAndroid Build Coastguard Worker // TODO: make a USet subclass of UVector 150*0e209d39SAndroid Build Coastguard Worker 151*0e209d39SAndroid Build Coastguard Worker void setAdd(UVector *dest, UVector *source); 152*0e209d39SAndroid Build Coastguard Worker UBool setEquals(UVector *a, UVector *b); 153*0e209d39SAndroid Build Coastguard Worker 154*0e209d39SAndroid Build Coastguard Worker void sortedAdd(UVector **dest, int32_t val); 155*0e209d39SAndroid Build Coastguard Worker 156*0e209d39SAndroid Build Coastguard Worker public: 157*0e209d39SAndroid Build Coastguard Worker #ifdef RBBI_DEBUG 158*0e209d39SAndroid Build Coastguard Worker void printSet(UVector *s); 159*0e209d39SAndroid Build Coastguard Worker void printPosSets(RBBINode *n /* = nullptr */); 160*0e209d39SAndroid Build Coastguard Worker void printStates(); 161*0e209d39SAndroid Build Coastguard Worker void printRuleStatusTable(); 162*0e209d39SAndroid Build Coastguard Worker void printReverseTable(); 163*0e209d39SAndroid Build Coastguard Worker #else 164*0e209d39SAndroid Build Coastguard Worker #define printSet(s) 165*0e209d39SAndroid Build Coastguard Worker #define printPosSets(n) 166*0e209d39SAndroid Build Coastguard Worker #define printStates() 167*0e209d39SAndroid Build Coastguard Worker #define printRuleStatusTable() 168*0e209d39SAndroid Build Coastguard Worker #define printReverseTable() 169*0e209d39SAndroid Build Coastguard Worker #endif 170*0e209d39SAndroid Build Coastguard Worker 171*0e209d39SAndroid Build Coastguard Worker private: 172*0e209d39SAndroid Build Coastguard Worker RBBIRuleBuilder *fRB; 173*0e209d39SAndroid Build Coastguard Worker RBBINode *&fTree; // The root node of the parse tree to build a 174*0e209d39SAndroid Build Coastguard Worker // table for. 175*0e209d39SAndroid Build Coastguard Worker UErrorCode *fStatus; 176*0e209d39SAndroid Build Coastguard Worker 177*0e209d39SAndroid Build Coastguard Worker /** State Descriptors, UVector<RBBIStateDescriptor> */ 178*0e209d39SAndroid Build Coastguard Worker UVector *fDStates; // D states (Aho's terminology) 179*0e209d39SAndroid Build Coastguard Worker // Index is state number 180*0e209d39SAndroid Build Coastguard Worker // Contents are RBBIStateDescriptor pointers. 181*0e209d39SAndroid Build Coastguard Worker 182*0e209d39SAndroid Build Coastguard Worker /** Synthesized safe table, UVector of UnicodeString, one string per table row. */ 183*0e209d39SAndroid Build Coastguard Worker UVector *fSafeTable; 184*0e209d39SAndroid Build Coastguard Worker 185*0e209d39SAndroid Build Coastguard Worker /** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */ 186*0e209d39SAndroid Build Coastguard Worker UVector32 *fLookAheadRuleMap = nullptr; 187*0e209d39SAndroid Build Coastguard Worker 188*0e209d39SAndroid Build Coastguard Worker /* Counter used when assigning lookahead rule numbers. 189*0e209d39SAndroid Build Coastguard Worker * Contains the last look-ahead number already in use. 190*0e209d39SAndroid Build Coastguard Worker * The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved 191*0e209d39SAndroid Build Coastguard Worker * for non-lookahead accepting states. See the declarations of RBBIStateTableRowT. */ 192*0e209d39SAndroid Build Coastguard Worker int32_t fLASlotsInUse = ACCEPTING_UNCONDITIONAL; 193*0e209d39SAndroid Build Coastguard Worker 194*0e209d39SAndroid Build Coastguard Worker 195*0e209d39SAndroid Build Coastguard Worker RBBITableBuilder(const RBBITableBuilder &other) = delete; // forbid copying of this class 196*0e209d39SAndroid Build Coastguard Worker RBBITableBuilder &operator=(const RBBITableBuilder &other) = delete; // forbid copying of this class 197*0e209d39SAndroid Build Coastguard Worker }; 198*0e209d39SAndroid Build Coastguard Worker 199*0e209d39SAndroid Build Coastguard Worker // 200*0e209d39SAndroid Build Coastguard Worker // RBBIStateDescriptor - The DFA is constructed as a set of these descriptors, 201*0e209d39SAndroid Build Coastguard Worker // one for each state. 202*0e209d39SAndroid Build Coastguard Worker class RBBIStateDescriptor : public UMemory { 203*0e209d39SAndroid Build Coastguard Worker public: 204*0e209d39SAndroid Build Coastguard Worker UBool fMarked; 205*0e209d39SAndroid Build Coastguard Worker uint32_t fAccepting; 206*0e209d39SAndroid Build Coastguard Worker uint32_t fLookAhead; 207*0e209d39SAndroid Build Coastguard Worker UVector *fTagVals; 208*0e209d39SAndroid Build Coastguard Worker int32_t fTagsIdx; 209*0e209d39SAndroid Build Coastguard Worker UVector *fPositions; // Set of parse tree positions associated 210*0e209d39SAndroid Build Coastguard Worker // with this state. Unordered (it's a set). 211*0e209d39SAndroid Build Coastguard Worker // UVector contents are RBBINode * 212*0e209d39SAndroid Build Coastguard Worker 213*0e209d39SAndroid Build Coastguard Worker UVector32 *fDtran; // Transitions out of this state. 214*0e209d39SAndroid Build Coastguard Worker // indexed by input character 215*0e209d39SAndroid Build Coastguard Worker // contents is int index of dest state 216*0e209d39SAndroid Build Coastguard Worker // in RBBITableBuilder.fDStates 217*0e209d39SAndroid Build Coastguard Worker 218*0e209d39SAndroid Build Coastguard Worker RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus); 219*0e209d39SAndroid Build Coastguard Worker ~RBBIStateDescriptor(); 220*0e209d39SAndroid Build Coastguard Worker 221*0e209d39SAndroid Build Coastguard Worker private: 222*0e209d39SAndroid Build Coastguard Worker RBBIStateDescriptor(const RBBIStateDescriptor &other) = delete; // forbid copying of this class 223*0e209d39SAndroid Build Coastguard Worker RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other) = delete; // forbid copying of this class 224*0e209d39SAndroid Build Coastguard Worker }; 225*0e209d39SAndroid Build Coastguard Worker 226*0e209d39SAndroid Build Coastguard Worker 227*0e209d39SAndroid Build Coastguard Worker 228*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 229*0e209d39SAndroid Build Coastguard Worker 230*0e209d39SAndroid Build Coastguard Worker #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ 231*0e209d39SAndroid Build Coastguard Worker 232*0e209d39SAndroid Build Coastguard Worker #endif 233