xref: /aosp_15_r20/external/icu/libicu/cts_headers/uitercollationiterator.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 * Copyright (C) 2012-2016, International Business Machines
6 * Corporation and others.  All Rights Reserved.
7 *******************************************************************************
8 * uitercollationiterator.h
9 *
10 * created on: 2012sep23 (from utf16collationiterator.h)
11 * created by: Markus W. Scherer
12 */
13 
14 #ifndef __UITERCOLLATIONITERATOR_H__
15 #define __UITERCOLLATIONITERATOR_H__
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_COLLATION
20 
21 #include "unicode/uiter.h"
22 #include "cmemory.h"
23 #include "collation.h"
24 #include "collationdata.h"
25 #include "collationiterator.h"
26 #include "normalizer2impl.h"
27 
28 U_NAMESPACE_BEGIN
29 
30 /**
31  * UCharIterator-based collation element and character iterator.
32  * Handles normalized text inline, with length or NUL-terminated.
33  * Unnormalized text is handled by a subclass.
34  */
35 class U_I18N_API UIterCollationIterator : public CollationIterator {
36 public:
UIterCollationIterator(const CollationData * d,UBool numeric,UCharIterator & ui)37     UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
38             : CollationIterator(d, numeric), iter(ui) {}
39 
40     virtual ~UIterCollationIterator();
41 
42     virtual void resetToOffset(int32_t newOffset) override;
43 
44     virtual int32_t getOffset() const override;
45 
46     virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
47 
48     virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
49 
50 protected:
51     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
52 
53     virtual char16_t handleGetTrailSurrogate() override;
54 
55     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
56 
57     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
58 
59     UCharIterator &iter;
60 };
61 
62 /**
63  * Incrementally checks the input text for FCD and normalizes where necessary.
64  */
65 class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
66 public:
FCDUIterCollationIterator(const CollationData * data,UBool numeric,UCharIterator & ui,int32_t startIndex)67     FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
68             : UIterCollationIterator(data, numeric, ui),
69               state(ITER_CHECK_FWD), start(startIndex),
70               nfcImpl(data->nfcImpl) {}
71 
72     virtual ~FCDUIterCollationIterator();
73 
74     virtual void resetToOffset(int32_t newOffset) override;
75 
76     virtual int32_t getOffset() const override;
77 
78     virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
79 
80     virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
81 
82 protected:
83     virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
84 
85     virtual char16_t handleGetTrailSurrogate() override;
86 
87 
88     virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
89 
90     virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
91 
92 private:
93     /**
94      * Switches to forward checking if possible.
95      */
96     void switchToForward();
97 
98     /**
99      * Extends the FCD text segment forward or normalizes around pos.
100      * @return true if success
101      */
102     UBool nextSegment(UErrorCode &errorCode);
103 
104     /**
105      * Switches to backward checking.
106      */
107     void switchToBackward();
108 
109     /**
110      * Extends the FCD text segment backward or normalizes around pos.
111      * @return true if success
112      */
113     UBool previousSegment(UErrorCode &errorCode);
114 
115     UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
116 
117     enum State {
118         /**
119          * The input text [start..(iter index)[ passes the FCD check.
120          * Moving forward checks incrementally.
121          * pos & limit are undefined.
122          */
123         ITER_CHECK_FWD,
124         /**
125          * The input text [(iter index)..limit[ passes the FCD check.
126          * Moving backward checks incrementally.
127          * start & pos are undefined.
128          */
129         ITER_CHECK_BWD,
130         /**
131          * The input text [start..limit[ passes the FCD check.
132          * pos tracks the current text index.
133          */
134         ITER_IN_FCD_SEGMENT,
135         /**
136          * The input text [start..limit[ failed the FCD check and was normalized.
137          * pos tracks the current index in the normalized string.
138          * The text iterator is at the limit index.
139          */
140         IN_NORM_ITER_AT_LIMIT,
141         /**
142          * The input text [start..limit[ failed the FCD check and was normalized.
143          * pos tracks the current index in the normalized string.
144          * The text iterator is at the start index.
145          */
146         IN_NORM_ITER_AT_START
147     };
148 
149     State state;
150 
151     int32_t start;
152     int32_t pos;
153     int32_t limit;
154 
155     const Normalizer2Impl &nfcImpl;
156     UnicodeString normalized;
157 };
158 
159 U_NAMESPACE_END
160 
161 #endif  // !UCONFIG_NO_COLLATION
162 #endif  // __UITERCOLLATIONITERATOR_H__
163