1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (C) 2012-2016, International Business Machines 6 * Corporation and others. All Rights Reserved. 7 ******************************************************************************* 8 * uitercollationiterator.h 9 * 10 * created on: 2012sep23 (from utf16collationiterator.h) 11 * created by: Markus W. Scherer 12 */ 13 14 #ifndef __UITERCOLLATIONITERATOR_H__ 15 #define __UITERCOLLATIONITERATOR_H__ 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_COLLATION 20 21 #include "unicode/uiter.h" 22 #include "cmemory.h" 23 #include "collation.h" 24 #include "collationdata.h" 25 #include "collationiterator.h" 26 #include "normalizer2impl.h" 27 28 U_NAMESPACE_BEGIN 29 30 /** 31 * UCharIterator-based collation element and character iterator. 32 * Handles normalized text inline, with length or NUL-terminated. 33 * Unnormalized text is handled by a subclass. 34 */ 35 class U_I18N_API UIterCollationIterator : public CollationIterator { 36 public: UIterCollationIterator(const CollationData * d,UBool numeric,UCharIterator & ui)37 UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui) 38 : CollationIterator(d, numeric), iter(ui) {} 39 40 virtual ~UIterCollationIterator(); 41 42 virtual void resetToOffset(int32_t newOffset) override; 43 44 virtual int32_t getOffset() const override; 45 46 virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; 47 48 virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; 49 50 protected: 51 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; 52 53 virtual char16_t handleGetTrailSurrogate() override; 54 55 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 56 57 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 58 59 UCharIterator &iter; 60 }; 61 62 /** 63 * Incrementally checks the input text for FCD and normalizes where necessary. 64 */ 65 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator { 66 public: FCDUIterCollationIterator(const CollationData * data,UBool numeric,UCharIterator & ui,int32_t startIndex)67 FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex) 68 : UIterCollationIterator(data, numeric, ui), 69 state(ITER_CHECK_FWD), start(startIndex), 70 nfcImpl(data->nfcImpl) {} 71 72 virtual ~FCDUIterCollationIterator(); 73 74 virtual void resetToOffset(int32_t newOffset) override; 75 76 virtual int32_t getOffset() const override; 77 78 virtual UChar32 nextCodePoint(UErrorCode &errorCode) override; 79 80 virtual UChar32 previousCodePoint(UErrorCode &errorCode) override; 81 82 protected: 83 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override; 84 85 virtual char16_t handleGetTrailSurrogate() override; 86 87 88 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 89 90 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override; 91 92 private: 93 /** 94 * Switches to forward checking if possible. 95 */ 96 void switchToForward(); 97 98 /** 99 * Extends the FCD text segment forward or normalizes around pos. 100 * @return true if success 101 */ 102 UBool nextSegment(UErrorCode &errorCode); 103 104 /** 105 * Switches to backward checking. 106 */ 107 void switchToBackward(); 108 109 /** 110 * Extends the FCD text segment backward or normalizes around pos. 111 * @return true if success 112 */ 113 UBool previousSegment(UErrorCode &errorCode); 114 115 UBool normalize(const UnicodeString &s, UErrorCode &errorCode); 116 117 enum State { 118 /** 119 * The input text [start..(iter index)[ passes the FCD check. 120 * Moving forward checks incrementally. 121 * pos & limit are undefined. 122 */ 123 ITER_CHECK_FWD, 124 /** 125 * The input text [(iter index)..limit[ passes the FCD check. 126 * Moving backward checks incrementally. 127 * start & pos are undefined. 128 */ 129 ITER_CHECK_BWD, 130 /** 131 * The input text [start..limit[ passes the FCD check. 132 * pos tracks the current text index. 133 */ 134 ITER_IN_FCD_SEGMENT, 135 /** 136 * The input text [start..limit[ failed the FCD check and was normalized. 137 * pos tracks the current index in the normalized string. 138 * The text iterator is at the limit index. 139 */ 140 IN_NORM_ITER_AT_LIMIT, 141 /** 142 * The input text [start..limit[ failed the FCD check and was normalized. 143 * pos tracks the current index in the normalized string. 144 * The text iterator is at the start index. 145 */ 146 IN_NORM_ITER_AT_START 147 }; 148 149 State state; 150 151 int32_t start; 152 int32_t pos; 153 int32_t limit; 154 155 const Normalizer2Impl &nfcImpl; 156 UnicodeString normalized; 157 }; 158 159 U_NAMESPACE_END 160 161 #endif // !UCONFIG_NO_COLLATION 162 #endif // __UITERCOLLATIONITERATOR_H__ 163