1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 5*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 2010-2014, International Business Machines 6*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 7*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 8*0e209d39SAndroid Build Coastguard Worker * utf16collationiterator.h 9*0e209d39SAndroid Build Coastguard Worker * 10*0e209d39SAndroid Build Coastguard Worker * created on: 2010oct27 11*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer 12*0e209d39SAndroid Build Coastguard Worker */ 13*0e209d39SAndroid Build Coastguard Worker 14*0e209d39SAndroid Build Coastguard Worker #ifndef __UTF16COLLATIONITERATOR_H__ 15*0e209d39SAndroid Build Coastguard Worker #define __UTF16COLLATIONITERATOR_H__ 16*0e209d39SAndroid Build Coastguard Worker 17*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h" 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_COLLATION 20*0e209d39SAndroid Build Coastguard Worker 21*0e209d39SAndroid Build Coastguard Worker #include "cmemory.h" 22*0e209d39SAndroid Build Coastguard Worker #include "collation.h" 23*0e209d39SAndroid Build Coastguard Worker #include "collationdata.h" 24*0e209d39SAndroid Build Coastguard Worker #include "collationiterator.h" 25*0e209d39SAndroid Build Coastguard Worker #include "normalizer2impl.h" 26*0e209d39SAndroid Build Coastguard Worker 27*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN 28*0e209d39SAndroid Build Coastguard Worker 29*0e209d39SAndroid Build Coastguard Worker /** 30*0e209d39SAndroid Build Coastguard Worker * UTF-16 collation element and character iterator. 31*0e209d39SAndroid Build Coastguard Worker * Handles normalized UTF-16 text inline, with length or NUL-terminated. 32*0e209d39SAndroid Build Coastguard Worker * Unnormalized text is handled by a subclass. 33*0e209d39SAndroid Build Coastguard Worker */ 34*0e209d39SAndroid Build Coastguard Worker class U_I18N_API UTF16CollationIterator : public CollationIterator { 35*0e209d39SAndroid Build Coastguard Worker public: UTF16CollationIterator(const CollationData * d,UBool numeric,const char16_t * s,const char16_t * p,const char16_t * lim)36*0e209d39SAndroid Build Coastguard Worker UTF16CollationIterator(const CollationData *d, UBool numeric, 37*0e209d39SAndroid Build Coastguard Worker const char16_t *s, const char16_t *p, const char16_t *lim) 38*0e209d39SAndroid Build Coastguard Worker : CollationIterator(d, numeric), 39*0e209d39SAndroid Build Coastguard Worker start(s), pos(p), limit(lim) {} 40*0e209d39SAndroid Build Coastguard Worker 41*0e209d39SAndroid Build Coastguard Worker UTF16CollationIterator(const UTF16CollationIterator &other, const char16_t *newText); 42*0e209d39SAndroid Build Coastguard Worker 43*0e209d39SAndroid Build Coastguard Worker virtual ~UTF16CollationIterator(); 44*0e209d39SAndroid Build Coastguard Worker 45*0e209d39SAndroid Build Coastguard Worker virtual bool operator==(const CollationIterator &other) const override; 46*0e209d39SAndroid Build Coastguard Worker 47*0e209d39SAndroid Build Coastguard Worker virtual void resetToOffset(int32_t newOffset) override; 48*0e209d39SAndroid Build Coastguard Worker 49*0e209d39SAndroid Build Coastguard Worker virtual int32_t getOffset() const override; 50*0e209d39SAndroid Build Coastguard Worker setText(const char16_t * s,const char16_t * lim)51*0e209d39SAndroid Build Coastguard Worker void setText(const char16_t *s, const char16_t *lim) { 52*0e209d39SAndroid Build Coastguard Worker reset(); 53*0e209d39SAndroid Build Coastguard Worker start = pos = s; 54*0e209d39SAndroid Build Coastguard Worker limit = lim; 55*0e209d39SAndroid Build Coastguard Worker } 56*0e209d39SAndroid Build Coastguard Worker 57*0e209d39SAndroid Build Coastguard Worker virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; 58*0e209d39SAndroid Build Coastguard Worker 59*0e209d39SAndroid Build Coastguard Worker virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; 60*0e209d39SAndroid Build Coastguard Worker 61*0e209d39SAndroid Build Coastguard Worker protected: 62*0e209d39SAndroid Build Coastguard Worker // Copy constructor only for subclasses which set the pointers. UTF16CollationIterator(const UTF16CollationIterator & other)63*0e209d39SAndroid Build Coastguard Worker UTF16CollationIterator(const UTF16CollationIterator &other) 64*0e209d39SAndroid Build Coastguard Worker : CollationIterator(other), 65*0e209d39SAndroid Build Coastguard Worker start(nullptr), pos(nullptr), limit(nullptr) {} 66*0e209d39SAndroid Build Coastguard Worker 67*0e209d39SAndroid Build Coastguard Worker virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; 68*0e209d39SAndroid Build Coastguard Worker 69*0e209d39SAndroid Build Coastguard Worker virtual char16_t handleGetTrailSurrogate() override; 70*0e209d39SAndroid Build Coastguard Worker 71*0e209d39SAndroid Build Coastguard Worker virtual UBool foundNULTerminator() override; 72*0e209d39SAndroid Build Coastguard Worker 73*0e209d39SAndroid Build Coastguard Worker virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 74*0e209d39SAndroid Build Coastguard Worker 75*0e209d39SAndroid Build Coastguard Worker virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 76*0e209d39SAndroid Build Coastguard Worker 77*0e209d39SAndroid Build Coastguard Worker // UTF-16 string pointers. 78*0e209d39SAndroid Build Coastguard Worker // limit can be nullptr for NUL-terminated strings. 79*0e209d39SAndroid Build Coastguard Worker const char16_t *start, *pos, *limit; 80*0e209d39SAndroid Build Coastguard Worker }; 81*0e209d39SAndroid Build Coastguard Worker 82*0e209d39SAndroid Build Coastguard Worker /** 83*0e209d39SAndroid Build Coastguard Worker * Incrementally checks the input text for FCD and normalizes where necessary. 84*0e209d39SAndroid Build Coastguard Worker */ 85*0e209d39SAndroid Build Coastguard Worker class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator { 86*0e209d39SAndroid Build Coastguard Worker public: FCDUTF16CollationIterator(const CollationData * data,UBool numeric,const char16_t * s,const char16_t * p,const char16_t * lim)87*0e209d39SAndroid Build Coastguard Worker FCDUTF16CollationIterator(const CollationData *data, UBool numeric, 88*0e209d39SAndroid Build Coastguard Worker const char16_t *s, const char16_t *p, const char16_t *lim) 89*0e209d39SAndroid Build Coastguard Worker : UTF16CollationIterator(data, numeric, s, p, lim), 90*0e209d39SAndroid Build Coastguard Worker rawStart(s), segmentStart(p), segmentLimit(nullptr), rawLimit(lim), 91*0e209d39SAndroid Build Coastguard Worker nfcImpl(data->nfcImpl), 92*0e209d39SAndroid Build Coastguard Worker checkDir(1) {} 93*0e209d39SAndroid Build Coastguard Worker 94*0e209d39SAndroid Build Coastguard Worker FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const char16_t *newText); 95*0e209d39SAndroid Build Coastguard Worker 96*0e209d39SAndroid Build Coastguard Worker virtual ~FCDUTF16CollationIterator(); 97*0e209d39SAndroid Build Coastguard Worker 98*0e209d39SAndroid Build Coastguard Worker virtual bool operator==(const CollationIterator &other) const override; 99*0e209d39SAndroid Build Coastguard Worker 100*0e209d39SAndroid Build Coastguard Worker virtual void resetToOffset(int32_t newOffset) override; 101*0e209d39SAndroid Build Coastguard Worker 102*0e209d39SAndroid Build Coastguard Worker virtual int32_t getOffset() const override; 103*0e209d39SAndroid Build Coastguard Worker 104*0e209d39SAndroid Build Coastguard Worker virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; 105*0e209d39SAndroid Build Coastguard Worker 106*0e209d39SAndroid Build Coastguard Worker virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; 107*0e209d39SAndroid Build Coastguard Worker 108*0e209d39SAndroid Build Coastguard Worker protected: 109*0e209d39SAndroid Build Coastguard Worker virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; 110*0e209d39SAndroid Build Coastguard Worker 111*0e209d39SAndroid Build Coastguard Worker virtual UBool foundNULTerminator() override; 112*0e209d39SAndroid Build Coastguard Worker 113*0e209d39SAndroid Build Coastguard Worker virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 114*0e209d39SAndroid Build Coastguard Worker 115*0e209d39SAndroid Build Coastguard Worker virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 116*0e209d39SAndroid Build Coastguard Worker 117*0e209d39SAndroid Build Coastguard Worker private: 118*0e209d39SAndroid Build Coastguard Worker /** 119*0e209d39SAndroid Build Coastguard Worker * Switches to forward checking if possible. 120*0e209d39SAndroid Build Coastguard Worker * To be called when checkDir < 0 || (checkDir == 0 && pos == limit). 121*0e209d39SAndroid Build Coastguard Worker * Returns with checkDir > 0 || (checkDir == 0 && pos != limit). 122*0e209d39SAndroid Build Coastguard Worker */ 123*0e209d39SAndroid Build Coastguard Worker void switchToForward(); 124*0e209d39SAndroid Build Coastguard Worker 125*0e209d39SAndroid Build Coastguard Worker /** 126*0e209d39SAndroid Build Coastguard Worker * Extend the FCD text segment forward or normalize around pos. 127*0e209d39SAndroid Build Coastguard Worker * To be called when checkDir > 0 && pos != limit. 128*0e209d39SAndroid Build Coastguard Worker * @return true if success, checkDir == 0 and pos != limit 129*0e209d39SAndroid Build Coastguard Worker */ 130*0e209d39SAndroid Build Coastguard Worker UBool nextSegment(UErrorCode &errorCode); 131*0e209d39SAndroid Build Coastguard Worker 132*0e209d39SAndroid Build Coastguard Worker /** 133*0e209d39SAndroid Build Coastguard Worker * Switches to backward checking. 134*0e209d39SAndroid Build Coastguard Worker * To be called when checkDir > 0 || (checkDir == 0 && pos == start). 135*0e209d39SAndroid Build Coastguard Worker * Returns with checkDir < 0 || (checkDir == 0 && pos != start). 136*0e209d39SAndroid Build Coastguard Worker */ 137*0e209d39SAndroid Build Coastguard Worker void switchToBackward(); 138*0e209d39SAndroid Build Coastguard Worker 139*0e209d39SAndroid Build Coastguard Worker /** 140*0e209d39SAndroid Build Coastguard Worker * Extend the FCD text segment backward or normalize around pos. 141*0e209d39SAndroid Build Coastguard Worker * To be called when checkDir < 0 && pos != start. 142*0e209d39SAndroid Build Coastguard Worker * @return true if success, checkDir == 0 and pos != start 143*0e209d39SAndroid Build Coastguard Worker */ 144*0e209d39SAndroid Build Coastguard Worker UBool previousSegment(UErrorCode &errorCode); 145*0e209d39SAndroid Build Coastguard Worker 146*0e209d39SAndroid Build Coastguard Worker UBool normalize(const char16_t *from, const char16_t *to, UErrorCode &errorCode); 147*0e209d39SAndroid Build Coastguard Worker 148*0e209d39SAndroid Build Coastguard Worker // Text pointers: The input text is [rawStart, rawLimit[ 149*0e209d39SAndroid Build Coastguard Worker // where rawLimit can be nullptr for NUL-terminated text. 150*0e209d39SAndroid Build Coastguard Worker // 151*0e209d39SAndroid Build Coastguard Worker // checkDir > 0: 152*0e209d39SAndroid Build Coastguard Worker // 153*0e209d39SAndroid Build Coastguard Worker // The input text [segmentStart..pos[ passes the FCD check. 154*0e209d39SAndroid Build Coastguard Worker // Moving forward checks incrementally. 155*0e209d39SAndroid Build Coastguard Worker // segmentLimit is undefined. limit == rawLimit. 156*0e209d39SAndroid Build Coastguard Worker // 157*0e209d39SAndroid Build Coastguard Worker // checkDir < 0: 158*0e209d39SAndroid Build Coastguard Worker // The input text [pos..segmentLimit[ passes the FCD check. 159*0e209d39SAndroid Build Coastguard Worker // Moving backward checks incrementally. 160*0e209d39SAndroid Build Coastguard Worker // segmentStart is undefined, start == rawStart. 161*0e209d39SAndroid Build Coastguard Worker // 162*0e209d39SAndroid Build Coastguard Worker // checkDir == 0: 163*0e209d39SAndroid Build Coastguard Worker // 164*0e209d39SAndroid Build Coastguard Worker // The input text [segmentStart..segmentLimit[ is being processed. 165*0e209d39SAndroid Build Coastguard Worker // These pointers are at FCD boundaries. 166*0e209d39SAndroid Build Coastguard Worker // Either this text segment already passes the FCD check 167*0e209d39SAndroid Build Coastguard Worker // and segmentStart==start<=pos<=limit==segmentLimit, 168*0e209d39SAndroid Build Coastguard Worker // or the current segment had to be normalized so that 169*0e209d39SAndroid Build Coastguard Worker // [segmentStart..segmentLimit[ turned into the normalized string, 170*0e209d39SAndroid Build Coastguard Worker // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length(). 171*0e209d39SAndroid Build Coastguard Worker const char16_t *rawStart; 172*0e209d39SAndroid Build Coastguard Worker const char16_t *segmentStart; 173*0e209d39SAndroid Build Coastguard Worker const char16_t *segmentLimit; 174*0e209d39SAndroid Build Coastguard Worker // rawLimit==nullptr for a NUL-terminated string. 175*0e209d39SAndroid Build Coastguard Worker const char16_t *rawLimit; 176*0e209d39SAndroid Build Coastguard Worker 177*0e209d39SAndroid Build Coastguard Worker const Normalizer2Impl &nfcImpl; 178*0e209d39SAndroid Build Coastguard Worker UnicodeString normalized; 179*0e209d39SAndroid Build Coastguard Worker // Direction of incremental FCD check. See comments before rawStart. 180*0e209d39SAndroid Build Coastguard Worker int8_t checkDir; 181*0e209d39SAndroid Build Coastguard Worker }; 182*0e209d39SAndroid Build Coastguard Worker 183*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END 184*0e209d39SAndroid Build Coastguard Worker 185*0e209d39SAndroid Build Coastguard Worker #endif // !UCONFIG_NO_COLLATION 186*0e209d39SAndroid Build Coastguard Worker #endif // __UTF16COLLATIONITERATOR_H__ 187