1*0e209d39SAndroid Build Coastguard Worker // Copyright (C) 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker 4*0e209d39SAndroid Build Coastguard Worker // file: rbbi_cache.h 5*0e209d39SAndroid Build Coastguard Worker // 6*0e209d39SAndroid Build Coastguard Worker #ifndef RBBI_CACHE_H 7*0e209d39SAndroid Build Coastguard Worker #define RBBI_CACHE_H 8*0e209d39SAndroid Build Coastguard Worker 9*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 10*0e209d39SAndroid Build Coastguard Worker 11*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_BREAK_ITERATION 12*0e209d39SAndroid Build Coastguard Worker 13*0e209d39SAndroid Build Coastguard Worker #include "unicode/rbbi.h" 14*0e209d39SAndroid Build Coastguard Worker #include "unicode/uobject.h" 15*0e209d39SAndroid Build Coastguard Worker 16*0e209d39SAndroid Build Coastguard Worker #include "uvectr32.h" 17*0e209d39SAndroid Build Coastguard Worker 18*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 19*0e209d39SAndroid Build Coastguard Worker 20*0e209d39SAndroid Build Coastguard Worker /* DictionaryCache stores the boundaries obtained from a run of dictionary characters. 21*0e209d39SAndroid Build Coastguard Worker * Dictionary boundaries are moved first to this cache, then from here 22*0e209d39SAndroid Build Coastguard Worker * to the main BreakCache, where they may inter-leave with non-dictionary 23*0e209d39SAndroid Build Coastguard Worker * boundaries. The public BreakIterator API always fetches directly 24*0e209d39SAndroid Build Coastguard Worker * from the main BreakCache, not from here. 25*0e209d39SAndroid Build Coastguard Worker * 26*0e209d39SAndroid Build Coastguard Worker * In common situations, the number of boundaries in a single dictionary run 27*0e209d39SAndroid Build Coastguard Worker * should be quite small, it will be terminated by punctuation, spaces, 28*0e209d39SAndroid Build Coastguard Worker * or any other non-dictionary characters. The main BreakCache may end 29*0e209d39SAndroid Build Coastguard Worker * up with boundaries from multiple dictionary based runs. 30*0e209d39SAndroid Build Coastguard Worker * 31*0e209d39SAndroid Build Coastguard Worker * The boundaries are stored in a simple ArrayList (vector), with the 32*0e209d39SAndroid Build Coastguard Worker * assumption that they will be accessed sequentially. 33*0e209d39SAndroid Build Coastguard Worker */ 34*0e209d39SAndroid Build Coastguard Worker class RuleBasedBreakIterator::DictionaryCache: public UMemory { 35*0e209d39SAndroid Build Coastguard Worker public: 36*0e209d39SAndroid Build Coastguard Worker DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status); 37*0e209d39SAndroid Build Coastguard Worker ~DictionaryCache(); 38*0e209d39SAndroid Build Coastguard Worker 39*0e209d39SAndroid Build Coastguard Worker void reset(); 40*0e209d39SAndroid Build Coastguard Worker 41*0e209d39SAndroid Build Coastguard Worker UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex); 42*0e209d39SAndroid Build Coastguard Worker UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex); 43*0e209d39SAndroid Build Coastguard Worker 44*0e209d39SAndroid Build Coastguard Worker /** 45*0e209d39SAndroid Build Coastguard Worker * Populate the cache with the dictionary based boundaries within a region of text. 46*0e209d39SAndroid Build Coastguard Worker * @param startPos The start position of a range of text 47*0e209d39SAndroid Build Coastguard Worker * @param endPos The end position of a range of text 48*0e209d39SAndroid Build Coastguard Worker * @param firstRuleStatus The rule status index that applies to the break at startPos 49*0e209d39SAndroid Build Coastguard Worker * @param otherRuleStatus The rule status index that applies to boundaries other than startPos 50*0e209d39SAndroid Build Coastguard Worker * @internal 51*0e209d39SAndroid Build Coastguard Worker */ 52*0e209d39SAndroid Build Coastguard Worker void populateDictionary(int32_t startPos, int32_t endPos, 53*0e209d39SAndroid Build Coastguard Worker int32_t firstRuleStatus, int32_t otherRuleStatus); 54*0e209d39SAndroid Build Coastguard Worker 55*0e209d39SAndroid Build Coastguard Worker 56*0e209d39SAndroid Build Coastguard Worker 57*0e209d39SAndroid Build Coastguard Worker RuleBasedBreakIterator *fBI; 58*0e209d39SAndroid Build Coastguard Worker 59*0e209d39SAndroid Build Coastguard Worker UVector32 fBreaks; // A vector containing the boundaries. 60*0e209d39SAndroid Build Coastguard Worker int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following() 61*0e209d39SAndroid Build Coastguard Worker // or preceding(). Optimizes sequential access. 62*0e209d39SAndroid Build Coastguard Worker int32_t fStart; // Text position of first boundary in cache. 63*0e209d39SAndroid Build Coastguard Worker int32_t fLimit; // Last boundary in cache. Which is the limit of the 64*0e209d39SAndroid Build Coastguard Worker // text segment being handled by the dictionary. 65*0e209d39SAndroid Build Coastguard Worker int32_t fFirstRuleStatusIndex; // Rule status info for first boundary. 66*0e209d39SAndroid Build Coastguard Worker int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries. 67*0e209d39SAndroid Build Coastguard Worker }; 68*0e209d39SAndroid Build Coastguard Worker 69*0e209d39SAndroid Build Coastguard Worker 70*0e209d39SAndroid Build Coastguard Worker /* 71*0e209d39SAndroid Build Coastguard Worker * class BreakCache 72*0e209d39SAndroid Build Coastguard Worker * 73*0e209d39SAndroid Build Coastguard Worker * Cache of break boundary positions and rule status values. 74*0e209d39SAndroid Build Coastguard Worker * Break iterator API functions, next(), previous(), etc., will use cached results 75*0e209d39SAndroid Build Coastguard Worker * when possible, and otherwise cache new results as they are obtained. 76*0e209d39SAndroid Build Coastguard Worker * 77*0e209d39SAndroid Build Coastguard Worker * Uniformly caches both dictionary and rule based (non-dictionary) boundaries. 78*0e209d39SAndroid Build Coastguard Worker * 79*0e209d39SAndroid Build Coastguard Worker * The cache is implemented as a single circular buffer. 80*0e209d39SAndroid Build Coastguard Worker */ 81*0e209d39SAndroid Build Coastguard Worker 82*0e209d39SAndroid Build Coastguard Worker /* 83*0e209d39SAndroid Build Coastguard Worker * size of the circular cache buffer. 84*0e209d39SAndroid Build Coastguard Worker */ 85*0e209d39SAndroid Build Coastguard Worker 86*0e209d39SAndroid Build Coastguard Worker class RuleBasedBreakIterator::BreakCache: public UMemory { 87*0e209d39SAndroid Build Coastguard Worker public: 88*0e209d39SAndroid Build Coastguard Worker BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status); 89*0e209d39SAndroid Build Coastguard Worker virtual ~BreakCache(); 90*0e209d39SAndroid Build Coastguard Worker void reset(int32_t pos = 0, int32_t ruleStatus = 0); next()91*0e209d39SAndroid Build Coastguard Worker void next() { if (fBufIdx == fEndBufIdx) { 92*0e209d39SAndroid Build Coastguard Worker nextOL(); 93*0e209d39SAndroid Build Coastguard Worker } else { 94*0e209d39SAndroid Build Coastguard Worker fBufIdx = modChunkSize(fBufIdx + 1); 95*0e209d39SAndroid Build Coastguard Worker fTextIdx = fBI->fPosition = fBoundaries[fBufIdx]; 96*0e209d39SAndroid Build Coastguard Worker fBI->fRuleStatusIndex = fStatuses[fBufIdx]; 97*0e209d39SAndroid Build Coastguard Worker } 98*0e209d39SAndroid Build Coastguard Worker } 99*0e209d39SAndroid Build Coastguard Worker 100*0e209d39SAndroid Build Coastguard Worker 101*0e209d39SAndroid Build Coastguard Worker void nextOL(); 102*0e209d39SAndroid Build Coastguard Worker void previous(UErrorCode &status); 103*0e209d39SAndroid Build Coastguard Worker 104*0e209d39SAndroid Build Coastguard Worker // Move the iteration state to the position following the startPosition. 105*0e209d39SAndroid Build Coastguard Worker // Input position must be pinned to the input length. 106*0e209d39SAndroid Build Coastguard Worker void following(int32_t startPosition, UErrorCode &status); 107*0e209d39SAndroid Build Coastguard Worker 108*0e209d39SAndroid Build Coastguard Worker void preceding(int32_t startPosition, UErrorCode &status); 109*0e209d39SAndroid Build Coastguard Worker 110*0e209d39SAndroid Build Coastguard Worker /* 111*0e209d39SAndroid Build Coastguard Worker * Update the state of the public BreakIterator (fBI) to reflect the 112*0e209d39SAndroid Build Coastguard Worker * current state of the break iterator cache (this). 113*0e209d39SAndroid Build Coastguard Worker */ 114*0e209d39SAndroid Build Coastguard Worker int32_t current(); 115*0e209d39SAndroid Build Coastguard Worker 116*0e209d39SAndroid Build Coastguard Worker /** 117*0e209d39SAndroid Build Coastguard Worker * Add boundaries to the cache near the specified position. 118*0e209d39SAndroid Build Coastguard Worker * The given position need not be a boundary itself. 119*0e209d39SAndroid Build Coastguard Worker * The input position must be within the range of the text, and 120*0e209d39SAndroid Build Coastguard Worker * on a code point boundary. 121*0e209d39SAndroid Build Coastguard Worker * If the requested position is a break boundary, leave the iteration 122*0e209d39SAndroid Build Coastguard Worker * position on it. 123*0e209d39SAndroid Build Coastguard Worker * If the requested position is not a boundary, leave the iteration 124*0e209d39SAndroid Build Coastguard Worker * position on the preceding boundary and include both the 125*0e209d39SAndroid Build Coastguard Worker * preceding and following boundaries in the cache. 126*0e209d39SAndroid Build Coastguard Worker * Additional boundaries, either preceding or following, may be added 127*0e209d39SAndroid Build Coastguard Worker * to the cache as a side effect. 128*0e209d39SAndroid Build Coastguard Worker * 129*0e209d39SAndroid Build Coastguard Worker * Return false if the operation failed. 130*0e209d39SAndroid Build Coastguard Worker */ 131*0e209d39SAndroid Build Coastguard Worker UBool populateNear(int32_t position, UErrorCode &status); 132*0e209d39SAndroid Build Coastguard Worker 133*0e209d39SAndroid Build Coastguard Worker /** 134*0e209d39SAndroid Build Coastguard Worker * Add boundary(s) to the cache following the current last boundary. 135*0e209d39SAndroid Build Coastguard Worker * Return false if at the end of the text, and no more boundaries can be added. 136*0e209d39SAndroid Build Coastguard Worker * Leave iteration position at the first newly added boundary, or unchanged if no boundary was added. 137*0e209d39SAndroid Build Coastguard Worker */ 138*0e209d39SAndroid Build Coastguard Worker UBool populateFollowing(); 139*0e209d39SAndroid Build Coastguard Worker 140*0e209d39SAndroid Build Coastguard Worker /** 141*0e209d39SAndroid Build Coastguard Worker * Add one or more boundaries to the cache preceding the first currently cached boundary. 142*0e209d39SAndroid Build Coastguard Worker * Leave the iteration position on the first added boundary. 143*0e209d39SAndroid Build Coastguard Worker * Return false if no boundaries could be added (if at the start of the text.) 144*0e209d39SAndroid Build Coastguard Worker */ 145*0e209d39SAndroid Build Coastguard Worker UBool populatePreceding(UErrorCode &status); 146*0e209d39SAndroid Build Coastguard Worker 147*0e209d39SAndroid Build Coastguard Worker enum UpdatePositionValues { 148*0e209d39SAndroid Build Coastguard Worker RetainCachePosition = 0, 149*0e209d39SAndroid Build Coastguard Worker UpdateCachePosition = 1 150*0e209d39SAndroid Build Coastguard Worker }; 151*0e209d39SAndroid Build Coastguard Worker 152*0e209d39SAndroid Build Coastguard Worker /* 153*0e209d39SAndroid Build Coastguard Worker * Add the boundary following the current position. 154*0e209d39SAndroid Build Coastguard Worker * The current position can be left as it was, or changed to the newly added boundary, 155*0e209d39SAndroid Build Coastguard Worker * as specified by the update parameter. 156*0e209d39SAndroid Build Coastguard Worker */ 157*0e209d39SAndroid Build Coastguard Worker void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update); 158*0e209d39SAndroid Build Coastguard Worker 159*0e209d39SAndroid Build Coastguard Worker 160*0e209d39SAndroid Build Coastguard Worker /* 161*0e209d39SAndroid Build Coastguard Worker * Add the boundary preceding the current position. 162*0e209d39SAndroid Build Coastguard Worker * The current position can be left as it was, or changed to the newly added boundary, 163*0e209d39SAndroid Build Coastguard Worker * as specified by the update parameter. 164*0e209d39SAndroid Build Coastguard Worker */ 165*0e209d39SAndroid Build Coastguard Worker bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update); 166*0e209d39SAndroid Build Coastguard Worker 167*0e209d39SAndroid Build Coastguard Worker /** 168*0e209d39SAndroid Build Coastguard Worker * Set the cache position to the specified position, or, if the position 169*0e209d39SAndroid Build Coastguard Worker * falls between to cached boundaries, to the preceding boundary. 170*0e209d39SAndroid Build Coastguard Worker * Fails if the requested position is outside of the range of boundaries currently held by the cache. 171*0e209d39SAndroid Build Coastguard Worker * The startPosition must be on a code point boundary. 172*0e209d39SAndroid Build Coastguard Worker * 173*0e209d39SAndroid Build Coastguard Worker * Return true if successful, false if the specified position is after 174*0e209d39SAndroid Build Coastguard Worker * the last cached boundary or before the first. 175*0e209d39SAndroid Build Coastguard Worker */ 176*0e209d39SAndroid Build Coastguard Worker UBool seek(int32_t startPosition); 177*0e209d39SAndroid Build Coastguard Worker 178*0e209d39SAndroid Build Coastguard Worker void dumpCache(); 179*0e209d39SAndroid Build Coastguard Worker 180*0e209d39SAndroid Build Coastguard Worker private: modChunkSize(int index)181*0e209d39SAndroid Build Coastguard Worker static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); } 182*0e209d39SAndroid Build Coastguard Worker 183*0e209d39SAndroid Build Coastguard Worker static constexpr int32_t CACHE_SIZE = 128; 184*0e209d39SAndroid Build Coastguard Worker static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two."); 185*0e209d39SAndroid Build Coastguard Worker 186*0e209d39SAndroid Build Coastguard Worker RuleBasedBreakIterator *fBI; 187*0e209d39SAndroid Build Coastguard Worker int32_t fStartBufIdx; 188*0e209d39SAndroid Build Coastguard Worker int32_t fEndBufIdx; // inclusive 189*0e209d39SAndroid Build Coastguard Worker 190*0e209d39SAndroid Build Coastguard Worker int32_t fTextIdx; 191*0e209d39SAndroid Build Coastguard Worker int32_t fBufIdx; 192*0e209d39SAndroid Build Coastguard Worker 193*0e209d39SAndroid Build Coastguard Worker int32_t fBoundaries[CACHE_SIZE]; 194*0e209d39SAndroid Build Coastguard Worker uint16_t fStatuses[CACHE_SIZE]; 195*0e209d39SAndroid Build Coastguard Worker 196*0e209d39SAndroid Build Coastguard Worker UVector32 fSideBuffer; 197*0e209d39SAndroid Build Coastguard Worker }; 198*0e209d39SAndroid Build Coastguard Worker 199*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 200*0e209d39SAndroid Build Coastguard Worker 201*0e209d39SAndroid Build Coastguard Worker #endif // #if !UCONFIG_NO_BREAK_ITERATION 202*0e209d39SAndroid Build Coastguard Worker 203*0e209d39SAndroid Build Coastguard Worker #endif // RBBI_CACHE_H 204