xref: /aosp_15_r20/external/icu/libandroidicu/include/unicode/utf16.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *
6*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 1999-2012, International Business Machines
7*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
8*0e209d39SAndroid Build Coastguard Worker *
9*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
10*0e209d39SAndroid Build Coastguard Worker *   file name:  utf16.h
11*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
12*0e209d39SAndroid Build Coastguard Worker *   tab size:   8 (not used)
13*0e209d39SAndroid Build Coastguard Worker *   indentation:4
14*0e209d39SAndroid Build Coastguard Worker *
15*0e209d39SAndroid Build Coastguard Worker *   created on: 1999sep09
16*0e209d39SAndroid Build Coastguard Worker *   created by: Markus W. Scherer
17*0e209d39SAndroid Build Coastguard Worker */
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker /**
20*0e209d39SAndroid Build Coastguard Worker  * \file
21*0e209d39SAndroid Build Coastguard Worker  * \brief C API: 16-bit Unicode handling macros
22*0e209d39SAndroid Build Coastguard Worker  *
23*0e209d39SAndroid Build Coastguard Worker  * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
24*0e209d39SAndroid Build Coastguard Worker  *
25*0e209d39SAndroid Build Coastguard Worker  * For more information see utf.h and the ICU User Guide Strings chapter
26*0e209d39SAndroid Build Coastguard Worker  * (https://unicode-org.github.io/icu/userguide/strings).
27*0e209d39SAndroid Build Coastguard Worker  *
28*0e209d39SAndroid Build Coastguard Worker  * <em>Usage:</em>
29*0e209d39SAndroid Build Coastguard Worker  * ICU coding guidelines for if() statements should be followed when using these macros.
30*0e209d39SAndroid Build Coastguard Worker  * Compound statements (curly braces {}) must be used  for if-else-while...
31*0e209d39SAndroid Build Coastguard Worker  * bodies and all macro statements should be terminated with semicolon.
32*0e209d39SAndroid Build Coastguard Worker  */
33*0e209d39SAndroid Build Coastguard Worker 
34*0e209d39SAndroid Build Coastguard Worker #ifndef __UTF16_H__
35*0e209d39SAndroid Build Coastguard Worker #define __UTF16_H__
36*0e209d39SAndroid Build Coastguard Worker 
37*0e209d39SAndroid Build Coastguard Worker #include <stdbool.h>
38*0e209d39SAndroid Build Coastguard Worker #include "unicode/umachine.h"
39*0e209d39SAndroid Build Coastguard Worker #ifndef __UTF_H__
40*0e209d39SAndroid Build Coastguard Worker #   include "unicode/utf.h"
41*0e209d39SAndroid Build Coastguard Worker #endif
42*0e209d39SAndroid Build Coastguard Worker 
43*0e209d39SAndroid Build Coastguard Worker /* single-code point definitions -------------------------------------------- */
44*0e209d39SAndroid Build Coastguard Worker 
45*0e209d39SAndroid Build Coastguard Worker /**
46*0e209d39SAndroid Build Coastguard Worker  * Does this code unit alone encode a code point (BMP, not a surrogate)?
47*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
48*0e209d39SAndroid Build Coastguard Worker  * @return true or false
49*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
50*0e209d39SAndroid Build Coastguard Worker  */
51*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
52*0e209d39SAndroid Build Coastguard Worker 
53*0e209d39SAndroid Build Coastguard Worker /**
54*0e209d39SAndroid Build Coastguard Worker  * Is this code unit a lead surrogate (U+d800..U+dbff)?
55*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
56*0e209d39SAndroid Build Coastguard Worker  * @return true or false
57*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
58*0e209d39SAndroid Build Coastguard Worker  */
59*0e209d39SAndroid Build Coastguard Worker #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
60*0e209d39SAndroid Build Coastguard Worker 
61*0e209d39SAndroid Build Coastguard Worker /**
62*0e209d39SAndroid Build Coastguard Worker  * Is this code unit a trail surrogate (U+dc00..U+dfff)?
63*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
64*0e209d39SAndroid Build Coastguard Worker  * @return true or false
65*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
66*0e209d39SAndroid Build Coastguard Worker  */
67*0e209d39SAndroid Build Coastguard Worker #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
68*0e209d39SAndroid Build Coastguard Worker 
69*0e209d39SAndroid Build Coastguard Worker /**
70*0e209d39SAndroid Build Coastguard Worker  * Is this code unit a surrogate (U+d800..U+dfff)?
71*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
72*0e209d39SAndroid Build Coastguard Worker  * @return true or false
73*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
74*0e209d39SAndroid Build Coastguard Worker  */
75*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
76*0e209d39SAndroid Build Coastguard Worker 
77*0e209d39SAndroid Build Coastguard Worker /**
78*0e209d39SAndroid Build Coastguard Worker  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
79*0e209d39SAndroid Build Coastguard Worker  * is it a lead surrogate?
80*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
81*0e209d39SAndroid Build Coastguard Worker  * @return true or false
82*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
83*0e209d39SAndroid Build Coastguard Worker  */
84*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
85*0e209d39SAndroid Build Coastguard Worker 
86*0e209d39SAndroid Build Coastguard Worker /**
87*0e209d39SAndroid Build Coastguard Worker  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
88*0e209d39SAndroid Build Coastguard Worker  * is it a trail surrogate?
89*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
90*0e209d39SAndroid Build Coastguard Worker  * @return true or false
91*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 4.2
92*0e209d39SAndroid Build Coastguard Worker  */
93*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
94*0e209d39SAndroid Build Coastguard Worker 
95*0e209d39SAndroid Build Coastguard Worker /**
96*0e209d39SAndroid Build Coastguard Worker  * Helper constant for U16_GET_SUPPLEMENTARY.
97*0e209d39SAndroid Build Coastguard Worker  * @internal
98*0e209d39SAndroid Build Coastguard Worker  */
99*0e209d39SAndroid Build Coastguard Worker #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
100*0e209d39SAndroid Build Coastguard Worker 
101*0e209d39SAndroid Build Coastguard Worker /**
102*0e209d39SAndroid Build Coastguard Worker  * Get a supplementary code point value (U+10000..U+10ffff)
103*0e209d39SAndroid Build Coastguard Worker  * from its lead and trail surrogates.
104*0e209d39SAndroid Build Coastguard Worker  * The result is undefined if the input values are not
105*0e209d39SAndroid Build Coastguard Worker  * lead and trail surrogates.
106*0e209d39SAndroid Build Coastguard Worker  *
107*0e209d39SAndroid Build Coastguard Worker  * @param lead lead surrogate (U+d800..U+dbff)
108*0e209d39SAndroid Build Coastguard Worker  * @param trail trail surrogate (U+dc00..U+dfff)
109*0e209d39SAndroid Build Coastguard Worker  * @return supplementary code point (U+10000..U+10ffff)
110*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
111*0e209d39SAndroid Build Coastguard Worker  */
112*0e209d39SAndroid Build Coastguard Worker #define U16_GET_SUPPLEMENTARY(lead, trail) \
113*0e209d39SAndroid Build Coastguard Worker     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
114*0e209d39SAndroid Build Coastguard Worker 
115*0e209d39SAndroid Build Coastguard Worker 
116*0e209d39SAndroid Build Coastguard Worker /**
117*0e209d39SAndroid Build Coastguard Worker  * Get the lead surrogate (0xd800..0xdbff) for a
118*0e209d39SAndroid Build Coastguard Worker  * supplementary code point (0x10000..0x10ffff).
119*0e209d39SAndroid Build Coastguard Worker  * @param supplementary 32-bit code point (U+10000..U+10ffff)
120*0e209d39SAndroid Build Coastguard Worker  * @return lead surrogate (U+d800..U+dbff) for supplementary
121*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
122*0e209d39SAndroid Build Coastguard Worker  */
123*0e209d39SAndroid Build Coastguard Worker #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
124*0e209d39SAndroid Build Coastguard Worker 
125*0e209d39SAndroid Build Coastguard Worker /**
126*0e209d39SAndroid Build Coastguard Worker  * Get the trail surrogate (0xdc00..0xdfff) for a
127*0e209d39SAndroid Build Coastguard Worker  * supplementary code point (0x10000..0x10ffff).
128*0e209d39SAndroid Build Coastguard Worker  * @param supplementary 32-bit code point (U+10000..U+10ffff)
129*0e209d39SAndroid Build Coastguard Worker  * @return trail surrogate (U+dc00..U+dfff) for supplementary
130*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
131*0e209d39SAndroid Build Coastguard Worker  */
132*0e209d39SAndroid Build Coastguard Worker #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
133*0e209d39SAndroid Build Coastguard Worker 
134*0e209d39SAndroid Build Coastguard Worker /**
135*0e209d39SAndroid Build Coastguard Worker  * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
136*0e209d39SAndroid Build Coastguard Worker  * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
137*0e209d39SAndroid Build Coastguard Worker  * @param c 32-bit code point
138*0e209d39SAndroid Build Coastguard Worker  * @return 1 or 2
139*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
140*0e209d39SAndroid Build Coastguard Worker  */
141*0e209d39SAndroid Build Coastguard Worker #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
142*0e209d39SAndroid Build Coastguard Worker 
143*0e209d39SAndroid Build Coastguard Worker /**
144*0e209d39SAndroid Build Coastguard Worker  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
145*0e209d39SAndroid Build Coastguard Worker  * @return 2
146*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
147*0e209d39SAndroid Build Coastguard Worker  */
148*0e209d39SAndroid Build Coastguard Worker #define U16_MAX_LENGTH 2
149*0e209d39SAndroid Build Coastguard Worker 
150*0e209d39SAndroid Build Coastguard Worker /**
151*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a random-access offset,
152*0e209d39SAndroid Build Coastguard Worker  * without changing the offset.
153*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
154*0e209d39SAndroid Build Coastguard Worker  *
155*0e209d39SAndroid Build Coastguard Worker  * The offset may point to either the lead or trail surrogate unit
156*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
157*0e209d39SAndroid Build Coastguard Worker  * the adjacent matching surrogate as well.
158*0e209d39SAndroid Build Coastguard Worker  * The result is undefined if the offset points to a single, unpaired surrogate.
159*0e209d39SAndroid Build Coastguard Worker  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
160*0e209d39SAndroid Build Coastguard Worker  *
161*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
162*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
163*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
164*0e209d39SAndroid Build Coastguard Worker  * @see U16_GET
165*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
166*0e209d39SAndroid Build Coastguard Worker  */
167*0e209d39SAndroid Build Coastguard Worker #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
168*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[i]; \
169*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
170*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_LEAD(c)) { \
171*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
172*0e209d39SAndroid Build Coastguard Worker         } else { \
173*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
174*0e209d39SAndroid Build Coastguard Worker         } \
175*0e209d39SAndroid Build Coastguard Worker     } \
176*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
177*0e209d39SAndroid Build Coastguard Worker 
178*0e209d39SAndroid Build Coastguard Worker /**
179*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a random-access offset,
180*0e209d39SAndroid Build Coastguard Worker  * without changing the offset.
181*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
182*0e209d39SAndroid Build Coastguard Worker  *
183*0e209d39SAndroid Build Coastguard Worker  * The offset may point to either the lead or trail surrogate unit
184*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
185*0e209d39SAndroid Build Coastguard Worker  * the adjacent matching surrogate as well.
186*0e209d39SAndroid Build Coastguard Worker  *
187*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
188*0e209d39SAndroid Build Coastguard Worker  *
189*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a single, unpaired surrogate, then
190*0e209d39SAndroid Build Coastguard Worker  * c is set to that unpaired surrogate.
191*0e209d39SAndroid Build Coastguard Worker  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
192*0e209d39SAndroid Build Coastguard Worker  *
193*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
194*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
195*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<=i<length
196*0e209d39SAndroid Build Coastguard Worker  * @param length string length
197*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
198*0e209d39SAndroid Build Coastguard Worker  * @see U16_GET_UNSAFE
199*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
200*0e209d39SAndroid Build Coastguard Worker  */
201*0e209d39SAndroid Build Coastguard Worker #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
202*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[i]; \
203*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
204*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
205*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_LEAD(c)) { \
206*0e209d39SAndroid Build Coastguard Worker             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
207*0e209d39SAndroid Build Coastguard Worker                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
208*0e209d39SAndroid Build Coastguard Worker             } \
209*0e209d39SAndroid Build Coastguard Worker         } else { \
210*0e209d39SAndroid Build Coastguard Worker             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
211*0e209d39SAndroid Build Coastguard Worker                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
212*0e209d39SAndroid Build Coastguard Worker             } \
213*0e209d39SAndroid Build Coastguard Worker         } \
214*0e209d39SAndroid Build Coastguard Worker     } \
215*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
216*0e209d39SAndroid Build Coastguard Worker 
217*0e209d39SAndroid Build Coastguard Worker /**
218*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a random-access offset,
219*0e209d39SAndroid Build Coastguard Worker  * without changing the offset.
220*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
221*0e209d39SAndroid Build Coastguard Worker  *
222*0e209d39SAndroid Build Coastguard Worker  * The offset may point to either the lead or trail surrogate unit
223*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
224*0e209d39SAndroid Build Coastguard Worker  * the adjacent matching surrogate as well.
225*0e209d39SAndroid Build Coastguard Worker  *
226*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
227*0e209d39SAndroid Build Coastguard Worker  *
228*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a single, unpaired surrogate, then
229*0e209d39SAndroid Build Coastguard Worker  * c is set to U+FFFD.
230*0e209d39SAndroid Build Coastguard Worker  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
231*0e209d39SAndroid Build Coastguard Worker  *
232*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
233*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
234*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<=i<length
235*0e209d39SAndroid Build Coastguard Worker  * @param length string length
236*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
237*0e209d39SAndroid Build Coastguard Worker  * @see U16_GET_UNSAFE
238*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 60
239*0e209d39SAndroid Build Coastguard Worker  */
240*0e209d39SAndroid Build Coastguard Worker #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
241*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[i]; \
242*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
243*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
244*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_LEAD(c)) { \
245*0e209d39SAndroid Build Coastguard Worker             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
246*0e209d39SAndroid Build Coastguard Worker                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
247*0e209d39SAndroid Build Coastguard Worker             } else { \
248*0e209d39SAndroid Build Coastguard Worker                 (c)=0xfffd; \
249*0e209d39SAndroid Build Coastguard Worker             } \
250*0e209d39SAndroid Build Coastguard Worker         } else { \
251*0e209d39SAndroid Build Coastguard Worker             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
252*0e209d39SAndroid Build Coastguard Worker                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
253*0e209d39SAndroid Build Coastguard Worker             } else { \
254*0e209d39SAndroid Build Coastguard Worker                 (c)=0xfffd; \
255*0e209d39SAndroid Build Coastguard Worker             } \
256*0e209d39SAndroid Build Coastguard Worker         } \
257*0e209d39SAndroid Build Coastguard Worker     } \
258*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
259*0e209d39SAndroid Build Coastguard Worker 
260*0e209d39SAndroid Build Coastguard Worker /* definitions with forward iteration --------------------------------------- */
261*0e209d39SAndroid Build Coastguard Worker 
262*0e209d39SAndroid Build Coastguard Worker /**
263*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a code point boundary offset,
264*0e209d39SAndroid Build Coastguard Worker  * and advance the offset to the next code point boundary.
265*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing forward iteration.)
266*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
267*0e209d39SAndroid Build Coastguard Worker  *
268*0e209d39SAndroid Build Coastguard Worker  * The offset may point to the lead surrogate unit
269*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
270*0e209d39SAndroid Build Coastguard Worker  * the following trail surrogate as well.
271*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a trail surrogate, then that itself
272*0e209d39SAndroid Build Coastguard Worker  * will be returned as the code point.
273*0e209d39SAndroid Build Coastguard Worker  * The result is undefined if the offset points to a single, unpaired lead surrogate.
274*0e209d39SAndroid Build Coastguard Worker  *
275*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
276*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
277*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
278*0e209d39SAndroid Build Coastguard Worker  * @see U16_NEXT
279*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
280*0e209d39SAndroid Build Coastguard Worker  */
281*0e209d39SAndroid Build Coastguard Worker #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
282*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[(i)++]; \
283*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD(c)) { \
284*0e209d39SAndroid Build Coastguard Worker         (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
285*0e209d39SAndroid Build Coastguard Worker     } \
286*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
287*0e209d39SAndroid Build Coastguard Worker 
288*0e209d39SAndroid Build Coastguard Worker /**
289*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a code point boundary offset,
290*0e209d39SAndroid Build Coastguard Worker  * and advance the offset to the next code point boundary.
291*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing forward iteration.)
292*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
293*0e209d39SAndroid Build Coastguard Worker  *
294*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
295*0e209d39SAndroid Build Coastguard Worker  *
296*0e209d39SAndroid Build Coastguard Worker  * The offset may point to the lead surrogate unit
297*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
298*0e209d39SAndroid Build Coastguard Worker  * the following trail surrogate as well.
299*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a trail surrogate or
300*0e209d39SAndroid Build Coastguard Worker  * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
301*0e209d39SAndroid Build Coastguard Worker  *
302*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
303*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be i<length
304*0e209d39SAndroid Build Coastguard Worker  * @param length string length
305*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
306*0e209d39SAndroid Build Coastguard Worker  * @see U16_NEXT_UNSAFE
307*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
308*0e209d39SAndroid Build Coastguard Worker  */
309*0e209d39SAndroid Build Coastguard Worker #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
310*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[(i)++]; \
311*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD(c)) { \
312*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
313*0e209d39SAndroid Build Coastguard Worker         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
314*0e209d39SAndroid Build Coastguard Worker             ++(i); \
315*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
316*0e209d39SAndroid Build Coastguard Worker         } \
317*0e209d39SAndroid Build Coastguard Worker     } \
318*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
319*0e209d39SAndroid Build Coastguard Worker 
320*0e209d39SAndroid Build Coastguard Worker /**
321*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a code point boundary offset,
322*0e209d39SAndroid Build Coastguard Worker  * and advance the offset to the next code point boundary.
323*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing forward iteration.)
324*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
325*0e209d39SAndroid Build Coastguard Worker  *
326*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
327*0e209d39SAndroid Build Coastguard Worker  *
328*0e209d39SAndroid Build Coastguard Worker  * The offset may point to the lead surrogate unit
329*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
330*0e209d39SAndroid Build Coastguard Worker  * the following trail surrogate as well.
331*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a trail surrogate or
332*0e209d39SAndroid Build Coastguard Worker  * to a single, unpaired lead surrogate, then c is set to U+FFFD.
333*0e209d39SAndroid Build Coastguard Worker  *
334*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
335*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be i<length
336*0e209d39SAndroid Build Coastguard Worker  * @param length string length
337*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
338*0e209d39SAndroid Build Coastguard Worker  * @see U16_NEXT_UNSAFE
339*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 60
340*0e209d39SAndroid Build Coastguard Worker  */
341*0e209d39SAndroid Build Coastguard Worker #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
342*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[(i)++]; \
343*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
344*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
345*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
346*0e209d39SAndroid Build Coastguard Worker             ++(i); \
347*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
348*0e209d39SAndroid Build Coastguard Worker         } else { \
349*0e209d39SAndroid Build Coastguard Worker             (c)=0xfffd; \
350*0e209d39SAndroid Build Coastguard Worker         } \
351*0e209d39SAndroid Build Coastguard Worker     } \
352*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
353*0e209d39SAndroid Build Coastguard Worker 
354*0e209d39SAndroid Build Coastguard Worker /**
355*0e209d39SAndroid Build Coastguard Worker  * Append a code point to a string, overwriting 1 or 2 code units.
356*0e209d39SAndroid Build Coastguard Worker  * The offset points to the current end of the string contents
357*0e209d39SAndroid Build Coastguard Worker  * and is advanced (post-increment).
358*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
359*0e209d39SAndroid Build Coastguard Worker  * Otherwise, the result is undefined.
360*0e209d39SAndroid Build Coastguard Worker  *
361*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string buffer
362*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
363*0e209d39SAndroid Build Coastguard Worker  * @param c code point to append
364*0e209d39SAndroid Build Coastguard Worker  * @see U16_APPEND
365*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
366*0e209d39SAndroid Build Coastguard Worker  */
367*0e209d39SAndroid Build Coastguard Worker #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
368*0e209d39SAndroid Build Coastguard Worker     if((uint32_t)(c)<=0xffff) { \
369*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(c); \
370*0e209d39SAndroid Build Coastguard Worker     } else { \
371*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
372*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
373*0e209d39SAndroid Build Coastguard Worker     } \
374*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
375*0e209d39SAndroid Build Coastguard Worker 
376*0e209d39SAndroid Build Coastguard Worker /**
377*0e209d39SAndroid Build Coastguard Worker  * Append a code point to a string, overwriting 1 or 2 code units.
378*0e209d39SAndroid Build Coastguard Worker  * The offset points to the current end of the string contents
379*0e209d39SAndroid Build Coastguard Worker  * and is advanced (post-increment).
380*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, checks for a valid code point.
381*0e209d39SAndroid Build Coastguard Worker  * If a surrogate pair is written, checks for sufficient space in the string.
382*0e209d39SAndroid Build Coastguard Worker  * If the code point is not valid or a trail surrogate does not fit,
383*0e209d39SAndroid Build Coastguard Worker  * then isError is set to true.
384*0e209d39SAndroid Build Coastguard Worker  *
385*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string buffer
386*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be i<capacity
387*0e209d39SAndroid Build Coastguard Worker  * @param capacity size of the string buffer
388*0e209d39SAndroid Build Coastguard Worker  * @param c code point to append
389*0e209d39SAndroid Build Coastguard Worker  * @param isError output UBool set to true if an error occurs, otherwise not modified
390*0e209d39SAndroid Build Coastguard Worker  * @see U16_APPEND_UNSAFE
391*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
392*0e209d39SAndroid Build Coastguard Worker  */
393*0e209d39SAndroid Build Coastguard Worker #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
394*0e209d39SAndroid Build Coastguard Worker     if((uint32_t)(c)<=0xffff) { \
395*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(c); \
396*0e209d39SAndroid Build Coastguard Worker     } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
397*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
398*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
399*0e209d39SAndroid Build Coastguard Worker     } else /* c>0x10ffff or not enough space */ { \
400*0e209d39SAndroid Build Coastguard Worker         (isError)=true; \
401*0e209d39SAndroid Build Coastguard Worker     } \
402*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
403*0e209d39SAndroid Build Coastguard Worker 
404*0e209d39SAndroid Build Coastguard Worker /**
405*0e209d39SAndroid Build Coastguard Worker  * Advance the string offset from one code point boundary to the next.
406*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing iteration.)
407*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
408*0e209d39SAndroid Build Coastguard Worker  *
409*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
410*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
411*0e209d39SAndroid Build Coastguard Worker  * @see U16_FWD_1
412*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
413*0e209d39SAndroid Build Coastguard Worker  */
414*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
415*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD((s)[(i)++])) { \
416*0e209d39SAndroid Build Coastguard Worker         ++(i); \
417*0e209d39SAndroid Build Coastguard Worker     } \
418*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
419*0e209d39SAndroid Build Coastguard Worker 
420*0e209d39SAndroid Build Coastguard Worker /**
421*0e209d39SAndroid Build Coastguard Worker  * Advance the string offset from one code point boundary to the next.
422*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing iteration.)
423*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
424*0e209d39SAndroid Build Coastguard Worker  *
425*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
426*0e209d39SAndroid Build Coastguard Worker  *
427*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
428*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be i<length
429*0e209d39SAndroid Build Coastguard Worker  * @param length string length
430*0e209d39SAndroid Build Coastguard Worker  * @see U16_FWD_1_UNSAFE
431*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
432*0e209d39SAndroid Build Coastguard Worker  */
433*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
434*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
435*0e209d39SAndroid Build Coastguard Worker         ++(i); \
436*0e209d39SAndroid Build Coastguard Worker     } \
437*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
438*0e209d39SAndroid Build Coastguard Worker 
439*0e209d39SAndroid Build Coastguard Worker /**
440*0e209d39SAndroid Build Coastguard Worker  * Advance the string offset from one code point boundary to the n-th next one,
441*0e209d39SAndroid Build Coastguard Worker  * i.e., move forward by n code points.
442*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing iteration.)
443*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
444*0e209d39SAndroid Build Coastguard Worker  *
445*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
446*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
447*0e209d39SAndroid Build Coastguard Worker  * @param n number of code points to skip
448*0e209d39SAndroid Build Coastguard Worker  * @see U16_FWD_N
449*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
450*0e209d39SAndroid Build Coastguard Worker  */
451*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
452*0e209d39SAndroid Build Coastguard Worker     int32_t __N=(n); \
453*0e209d39SAndroid Build Coastguard Worker     while(__N>0) { \
454*0e209d39SAndroid Build Coastguard Worker         U16_FWD_1_UNSAFE(s, i); \
455*0e209d39SAndroid Build Coastguard Worker         --__N; \
456*0e209d39SAndroid Build Coastguard Worker     } \
457*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
458*0e209d39SAndroid Build Coastguard Worker 
459*0e209d39SAndroid Build Coastguard Worker /**
460*0e209d39SAndroid Build Coastguard Worker  * Advance the string offset from one code point boundary to the n-th next one,
461*0e209d39SAndroid Build Coastguard Worker  * i.e., move forward by n code points.
462*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing iteration.)
463*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
464*0e209d39SAndroid Build Coastguard Worker  *
465*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
466*0e209d39SAndroid Build Coastguard Worker  *
467*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
468*0e209d39SAndroid Build Coastguard Worker  * @param i int32_t string offset, must be i<length
469*0e209d39SAndroid Build Coastguard Worker  * @param length int32_t string length
470*0e209d39SAndroid Build Coastguard Worker  * @param n number of code points to skip
471*0e209d39SAndroid Build Coastguard Worker  * @see U16_FWD_N_UNSAFE
472*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
473*0e209d39SAndroid Build Coastguard Worker  */
474*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
475*0e209d39SAndroid Build Coastguard Worker     int32_t __N=(n); \
476*0e209d39SAndroid Build Coastguard Worker     while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
477*0e209d39SAndroid Build Coastguard Worker         U16_FWD_1(s, i, length); \
478*0e209d39SAndroid Build Coastguard Worker         --__N; \
479*0e209d39SAndroid Build Coastguard Worker     } \
480*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
481*0e209d39SAndroid Build Coastguard Worker 
482*0e209d39SAndroid Build Coastguard Worker /**
483*0e209d39SAndroid Build Coastguard Worker  * Adjust a random-access offset to a code point boundary
484*0e209d39SAndroid Build Coastguard Worker  * at the start of a code point.
485*0e209d39SAndroid Build Coastguard Worker  * If the offset points to the trail surrogate of a surrogate pair,
486*0e209d39SAndroid Build Coastguard Worker  * then the offset is decremented.
487*0e209d39SAndroid Build Coastguard Worker  * Otherwise, it is not modified.
488*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
489*0e209d39SAndroid Build Coastguard Worker  *
490*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
491*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
492*0e209d39SAndroid Build Coastguard Worker  * @see U16_SET_CP_START
493*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
494*0e209d39SAndroid Build Coastguard Worker  */
495*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
496*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL((s)[i])) { \
497*0e209d39SAndroid Build Coastguard Worker         --(i); \
498*0e209d39SAndroid Build Coastguard Worker     } \
499*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
500*0e209d39SAndroid Build Coastguard Worker 
501*0e209d39SAndroid Build Coastguard Worker /**
502*0e209d39SAndroid Build Coastguard Worker  * Adjust a random-access offset to a code point boundary
503*0e209d39SAndroid Build Coastguard Worker  * at the start of a code point.
504*0e209d39SAndroid Build Coastguard Worker  * If the offset points to the trail surrogate of a surrogate pair,
505*0e209d39SAndroid Build Coastguard Worker  * then the offset is decremented.
506*0e209d39SAndroid Build Coastguard Worker  * Otherwise, it is not modified.
507*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
508*0e209d39SAndroid Build Coastguard Worker  *
509*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
510*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
511*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<=i
512*0e209d39SAndroid Build Coastguard Worker  * @see U16_SET_CP_START_UNSAFE
513*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
514*0e209d39SAndroid Build Coastguard Worker  */
515*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
516*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
517*0e209d39SAndroid Build Coastguard Worker         --(i); \
518*0e209d39SAndroid Build Coastguard Worker     } \
519*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
520*0e209d39SAndroid Build Coastguard Worker 
521*0e209d39SAndroid Build Coastguard Worker /* definitions with backward iteration -------------------------------------- */
522*0e209d39SAndroid Build Coastguard Worker 
523*0e209d39SAndroid Build Coastguard Worker /**
524*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one
525*0e209d39SAndroid Build Coastguard Worker  * and get the code point between them.
526*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
527*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
528*0e209d39SAndroid Build Coastguard Worker  *
529*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
530*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a trail surrogate unit
531*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, then the macro will read
532*0e209d39SAndroid Build Coastguard Worker  * the preceding lead surrogate as well.
533*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a lead surrogate, then that itself
534*0e209d39SAndroid Build Coastguard Worker  * will be returned as the code point.
535*0e209d39SAndroid Build Coastguard Worker  * The result is undefined if the offset is behind a single, unpaired trail surrogate.
536*0e209d39SAndroid Build Coastguard Worker  *
537*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
538*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
539*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
540*0e209d39SAndroid Build Coastguard Worker  * @see U16_PREV
541*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
542*0e209d39SAndroid Build Coastguard Worker  */
543*0e209d39SAndroid Build Coastguard Worker #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
544*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[--(i)]; \
545*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL(c)) { \
546*0e209d39SAndroid Build Coastguard Worker         (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
547*0e209d39SAndroid Build Coastguard Worker     } \
548*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
549*0e209d39SAndroid Build Coastguard Worker 
550*0e209d39SAndroid Build Coastguard Worker /**
551*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one
552*0e209d39SAndroid Build Coastguard Worker  * and get the code point between them.
553*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
554*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
555*0e209d39SAndroid Build Coastguard Worker  *
556*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
557*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a trail surrogate unit
558*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, then the macro will read
559*0e209d39SAndroid Build Coastguard Worker  * the preceding lead surrogate as well.
560*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a lead surrogate or behind a single, unpaired
561*0e209d39SAndroid Build Coastguard Worker  * trail surrogate, then c is set to that unpaired surrogate.
562*0e209d39SAndroid Build Coastguard Worker  *
563*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
564*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
565*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<i
566*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
567*0e209d39SAndroid Build Coastguard Worker  * @see U16_PREV_UNSAFE
568*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
569*0e209d39SAndroid Build Coastguard Worker  */
570*0e209d39SAndroid Build Coastguard Worker #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
571*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[--(i)]; \
572*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL(c)) { \
573*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
574*0e209d39SAndroid Build Coastguard Worker         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
575*0e209d39SAndroid Build Coastguard Worker             --(i); \
576*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
577*0e209d39SAndroid Build Coastguard Worker         } \
578*0e209d39SAndroid Build Coastguard Worker     } \
579*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
580*0e209d39SAndroid Build Coastguard Worker 
581*0e209d39SAndroid Build Coastguard Worker /**
582*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one
583*0e209d39SAndroid Build Coastguard Worker  * and get the code point between them.
584*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
585*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
586*0e209d39SAndroid Build Coastguard Worker  *
587*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
588*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a trail surrogate unit
589*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, then the macro will read
590*0e209d39SAndroid Build Coastguard Worker  * the preceding lead surrogate as well.
591*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a lead surrogate or behind a single, unpaired
592*0e209d39SAndroid Build Coastguard Worker  * trail surrogate, then c is set to U+FFFD.
593*0e209d39SAndroid Build Coastguard Worker  *
594*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
595*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
596*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<i
597*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
598*0e209d39SAndroid Build Coastguard Worker  * @see U16_PREV_UNSAFE
599*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 60
600*0e209d39SAndroid Build Coastguard Worker  */
601*0e209d39SAndroid Build Coastguard Worker #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
602*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[--(i)]; \
603*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
604*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
605*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
606*0e209d39SAndroid Build Coastguard Worker             --(i); \
607*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
608*0e209d39SAndroid Build Coastguard Worker         } else { \
609*0e209d39SAndroid Build Coastguard Worker             (c)=0xfffd; \
610*0e209d39SAndroid Build Coastguard Worker         } \
611*0e209d39SAndroid Build Coastguard Worker     } \
612*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
613*0e209d39SAndroid Build Coastguard Worker 
614*0e209d39SAndroid Build Coastguard Worker /**
615*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one.
616*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
617*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
618*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
619*0e209d39SAndroid Build Coastguard Worker  *
620*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
621*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
622*0e209d39SAndroid Build Coastguard Worker  * @see U16_BACK_1
623*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
624*0e209d39SAndroid Build Coastguard Worker  */
625*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
626*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL((s)[--(i)])) { \
627*0e209d39SAndroid Build Coastguard Worker         --(i); \
628*0e209d39SAndroid Build Coastguard Worker     } \
629*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
630*0e209d39SAndroid Build Coastguard Worker 
631*0e209d39SAndroid Build Coastguard Worker /**
632*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one.
633*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
634*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
635*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
636*0e209d39SAndroid Build Coastguard Worker  *
637*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
638*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
639*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<i
640*0e209d39SAndroid Build Coastguard Worker  * @see U16_BACK_1_UNSAFE
641*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
642*0e209d39SAndroid Build Coastguard Worker  */
643*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
644*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
645*0e209d39SAndroid Build Coastguard Worker         --(i); \
646*0e209d39SAndroid Build Coastguard Worker     } \
647*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
648*0e209d39SAndroid Build Coastguard Worker 
649*0e209d39SAndroid Build Coastguard Worker /**
650*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the n-th one before it,
651*0e209d39SAndroid Build Coastguard Worker  * i.e., move backward by n code points.
652*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
653*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
654*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
655*0e209d39SAndroid Build Coastguard Worker  *
656*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
657*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
658*0e209d39SAndroid Build Coastguard Worker  * @param n number of code points to skip
659*0e209d39SAndroid Build Coastguard Worker  * @see U16_BACK_N
660*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
661*0e209d39SAndroid Build Coastguard Worker  */
662*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
663*0e209d39SAndroid Build Coastguard Worker     int32_t __N=(n); \
664*0e209d39SAndroid Build Coastguard Worker     while(__N>0) { \
665*0e209d39SAndroid Build Coastguard Worker         U16_BACK_1_UNSAFE(s, i); \
666*0e209d39SAndroid Build Coastguard Worker         --__N; \
667*0e209d39SAndroid Build Coastguard Worker     } \
668*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
669*0e209d39SAndroid Build Coastguard Worker 
670*0e209d39SAndroid Build Coastguard Worker /**
671*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the n-th one before it,
672*0e209d39SAndroid Build Coastguard Worker  * i.e., move backward by n code points.
673*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
674*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
675*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
676*0e209d39SAndroid Build Coastguard Worker  *
677*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
678*0e209d39SAndroid Build Coastguard Worker  * @param start start of string
679*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<i
680*0e209d39SAndroid Build Coastguard Worker  * @param n number of code points to skip
681*0e209d39SAndroid Build Coastguard Worker  * @see U16_BACK_N_UNSAFE
682*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
683*0e209d39SAndroid Build Coastguard Worker  */
684*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
685*0e209d39SAndroid Build Coastguard Worker     int32_t __N=(n); \
686*0e209d39SAndroid Build Coastguard Worker     while(__N>0 && (i)>(start)) { \
687*0e209d39SAndroid Build Coastguard Worker         U16_BACK_1(s, start, i); \
688*0e209d39SAndroid Build Coastguard Worker         --__N; \
689*0e209d39SAndroid Build Coastguard Worker     } \
690*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
691*0e209d39SAndroid Build Coastguard Worker 
692*0e209d39SAndroid Build Coastguard Worker /**
693*0e209d39SAndroid Build Coastguard Worker  * Adjust a random-access offset to a code point boundary after a code point.
694*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind the lead surrogate of a surrogate pair,
695*0e209d39SAndroid Build Coastguard Worker  * then the offset is incremented.
696*0e209d39SAndroid Build Coastguard Worker  * Otherwise, it is not modified.
697*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
698*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
699*0e209d39SAndroid Build Coastguard Worker  *
700*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
701*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
702*0e209d39SAndroid Build Coastguard Worker  * @see U16_SET_CP_LIMIT
703*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
704*0e209d39SAndroid Build Coastguard Worker  */
705*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
706*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD((s)[(i)-1])) { \
707*0e209d39SAndroid Build Coastguard Worker         ++(i); \
708*0e209d39SAndroid Build Coastguard Worker     } \
709*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
710*0e209d39SAndroid Build Coastguard Worker 
711*0e209d39SAndroid Build Coastguard Worker /**
712*0e209d39SAndroid Build Coastguard Worker  * Adjust a random-access offset to a code point boundary after a code point.
713*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind the lead surrogate of a surrogate pair,
714*0e209d39SAndroid Build Coastguard Worker  * then the offset is incremented.
715*0e209d39SAndroid Build Coastguard Worker  * Otherwise, it is not modified.
716*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
717*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
718*0e209d39SAndroid Build Coastguard Worker  *
719*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
720*0e209d39SAndroid Build Coastguard Worker  *
721*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
722*0e209d39SAndroid Build Coastguard Worker  * @param start int32_t starting string offset (usually 0)
723*0e209d39SAndroid Build Coastguard Worker  * @param i int32_t string offset, start<=i<=length
724*0e209d39SAndroid Build Coastguard Worker  * @param length int32_t string length
725*0e209d39SAndroid Build Coastguard Worker  * @see U16_SET_CP_LIMIT_UNSAFE
726*0e209d39SAndroid Build Coastguard Worker  * @stable ICU 2.4
727*0e209d39SAndroid Build Coastguard Worker  */
728*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
729*0e209d39SAndroid Build Coastguard Worker     if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
730*0e209d39SAndroid Build Coastguard Worker         ++(i); \
731*0e209d39SAndroid Build Coastguard Worker     } \
732*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
733*0e209d39SAndroid Build Coastguard Worker 
734*0e209d39SAndroid Build Coastguard Worker #endif
735