xref: /aosp_15_r20/external/icu/libicu/ndk_headers/unicode/utf16.h (revision 0e209d3975ff4a8c132096b14b0e9364a753506e)
1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *
6*0e209d39SAndroid Build Coastguard Worker *   Copyright (C) 1999-2012, International Business Machines
7*0e209d39SAndroid Build Coastguard Worker *   Corporation and others.  All Rights Reserved.
8*0e209d39SAndroid Build Coastguard Worker *
9*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
10*0e209d39SAndroid Build Coastguard Worker *   file name:  utf16.h
11*0e209d39SAndroid Build Coastguard Worker *   encoding:   UTF-8
12*0e209d39SAndroid Build Coastguard Worker *   tab size:   8 (not used)
13*0e209d39SAndroid Build Coastguard Worker *   indentation:4
14*0e209d39SAndroid Build Coastguard Worker *
15*0e209d39SAndroid Build Coastguard Worker *   created on: 1999sep09
16*0e209d39SAndroid Build Coastguard Worker *   created by: Markus W. Scherer
17*0e209d39SAndroid Build Coastguard Worker */
18*0e209d39SAndroid Build Coastguard Worker 
19*0e209d39SAndroid Build Coastguard Worker /**
20*0e209d39SAndroid Build Coastguard Worker  * @addtogroup icu4c ICU4C
21*0e209d39SAndroid Build Coastguard Worker  * @{
22*0e209d39SAndroid Build Coastguard Worker  * \file
23*0e209d39SAndroid Build Coastguard Worker  * \brief C API: 16-bit Unicode handling macros
24*0e209d39SAndroid Build Coastguard Worker  *
25*0e209d39SAndroid Build Coastguard Worker  * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
26*0e209d39SAndroid Build Coastguard Worker  *
27*0e209d39SAndroid Build Coastguard Worker  * For more information see utf.h and the ICU User Guide Strings chapter
28*0e209d39SAndroid Build Coastguard Worker  * (https://unicode-org.github.io/icu/userguide/strings).
29*0e209d39SAndroid Build Coastguard Worker  *
30*0e209d39SAndroid Build Coastguard Worker  * <em>Usage:</em>
31*0e209d39SAndroid Build Coastguard Worker  * ICU coding guidelines for if() statements should be followed when using these macros.
32*0e209d39SAndroid Build Coastguard Worker  * Compound statements (curly braces {}) must be used  for if-else-while...
33*0e209d39SAndroid Build Coastguard Worker  * bodies and all macro statements should be terminated with semicolon.
34*0e209d39SAndroid Build Coastguard Worker  */
35*0e209d39SAndroid Build Coastguard Worker 
36*0e209d39SAndroid Build Coastguard Worker #ifndef __UTF16_H__
37*0e209d39SAndroid Build Coastguard Worker #define __UTF16_H__
38*0e209d39SAndroid Build Coastguard Worker 
39*0e209d39SAndroid Build Coastguard Worker #include <stdbool.h>
40*0e209d39SAndroid Build Coastguard Worker #include "unicode/umachine.h"
41*0e209d39SAndroid Build Coastguard Worker #ifndef __UTF_H__
42*0e209d39SAndroid Build Coastguard Worker #   include "unicode/utf.h"
43*0e209d39SAndroid Build Coastguard Worker #endif
44*0e209d39SAndroid Build Coastguard Worker 
45*0e209d39SAndroid Build Coastguard Worker /* single-code point definitions -------------------------------------------- */
46*0e209d39SAndroid Build Coastguard Worker 
47*0e209d39SAndroid Build Coastguard Worker /**
48*0e209d39SAndroid Build Coastguard Worker  * Does this code unit alone encode a code point (BMP, not a surrogate)?
49*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
50*0e209d39SAndroid Build Coastguard Worker  * @return true or false
51*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
52*0e209d39SAndroid Build Coastguard Worker  */
53*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
54*0e209d39SAndroid Build Coastguard Worker 
55*0e209d39SAndroid Build Coastguard Worker /**
56*0e209d39SAndroid Build Coastguard Worker  * Is this code unit a lead surrogate (U+d800..U+dbff)?
57*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
58*0e209d39SAndroid Build Coastguard Worker  * @return true or false
59*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
60*0e209d39SAndroid Build Coastguard Worker  */
61*0e209d39SAndroid Build Coastguard Worker #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
62*0e209d39SAndroid Build Coastguard Worker 
63*0e209d39SAndroid Build Coastguard Worker /**
64*0e209d39SAndroid Build Coastguard Worker  * Is this code unit a trail surrogate (U+dc00..U+dfff)?
65*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
66*0e209d39SAndroid Build Coastguard Worker  * @return true or false
67*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
68*0e209d39SAndroid Build Coastguard Worker  */
69*0e209d39SAndroid Build Coastguard Worker #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
70*0e209d39SAndroid Build Coastguard Worker 
71*0e209d39SAndroid Build Coastguard Worker /**
72*0e209d39SAndroid Build Coastguard Worker  * Is this code unit a surrogate (U+d800..U+dfff)?
73*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
74*0e209d39SAndroid Build Coastguard Worker  * @return true or false
75*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
76*0e209d39SAndroid Build Coastguard Worker  */
77*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
78*0e209d39SAndroid Build Coastguard Worker 
79*0e209d39SAndroid Build Coastguard Worker /**
80*0e209d39SAndroid Build Coastguard Worker  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
81*0e209d39SAndroid Build Coastguard Worker  * is it a lead surrogate?
82*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
83*0e209d39SAndroid Build Coastguard Worker  * @return true or false
84*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
85*0e209d39SAndroid Build Coastguard Worker  */
86*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
87*0e209d39SAndroid Build Coastguard Worker 
88*0e209d39SAndroid Build Coastguard Worker /**
89*0e209d39SAndroid Build Coastguard Worker  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
90*0e209d39SAndroid Build Coastguard Worker  * is it a trail surrogate?
91*0e209d39SAndroid Build Coastguard Worker  * @param c 16-bit code unit
92*0e209d39SAndroid Build Coastguard Worker  * @return true or false
93*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 4.2
94*0e209d39SAndroid Build Coastguard Worker  */
95*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
96*0e209d39SAndroid Build Coastguard Worker 
97*0e209d39SAndroid Build Coastguard Worker /**
98*0e209d39SAndroid Build Coastguard Worker  * Helper constant for U16_GET_SUPPLEMENTARY.
99*0e209d39SAndroid Build Coastguard Worker  * \xrefitem internal "Internal"  "Internal List"  Do not use. This API is for internal use only.
100*0e209d39SAndroid Build Coastguard Worker  */
101*0e209d39SAndroid Build Coastguard Worker #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
102*0e209d39SAndroid Build Coastguard Worker 
103*0e209d39SAndroid Build Coastguard Worker /**
104*0e209d39SAndroid Build Coastguard Worker  * Get a supplementary code point value (U+10000..U+10ffff)
105*0e209d39SAndroid Build Coastguard Worker  * from its lead and trail surrogates.
106*0e209d39SAndroid Build Coastguard Worker  * The result is undefined if the input values are not
107*0e209d39SAndroid Build Coastguard Worker  * lead and trail surrogates.
108*0e209d39SAndroid Build Coastguard Worker  *
109*0e209d39SAndroid Build Coastguard Worker  * @param lead lead surrogate (U+d800..U+dbff)
110*0e209d39SAndroid Build Coastguard Worker  * @param trail trail surrogate (U+dc00..U+dfff)
111*0e209d39SAndroid Build Coastguard Worker  * @return supplementary code point (U+10000..U+10ffff)
112*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
113*0e209d39SAndroid Build Coastguard Worker  */
114*0e209d39SAndroid Build Coastguard Worker #define U16_GET_SUPPLEMENTARY(lead, trail) \
115*0e209d39SAndroid Build Coastguard Worker     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
116*0e209d39SAndroid Build Coastguard Worker 
117*0e209d39SAndroid Build Coastguard Worker 
118*0e209d39SAndroid Build Coastguard Worker /**
119*0e209d39SAndroid Build Coastguard Worker  * Get the lead surrogate (0xd800..0xdbff) for a
120*0e209d39SAndroid Build Coastguard Worker  * supplementary code point (0x10000..0x10ffff).
121*0e209d39SAndroid Build Coastguard Worker  * @param supplementary 32-bit code point (U+10000..U+10ffff)
122*0e209d39SAndroid Build Coastguard Worker  * @return lead surrogate (U+d800..U+dbff) for supplementary
123*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
124*0e209d39SAndroid Build Coastguard Worker  */
125*0e209d39SAndroid Build Coastguard Worker #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
126*0e209d39SAndroid Build Coastguard Worker 
127*0e209d39SAndroid Build Coastguard Worker /**
128*0e209d39SAndroid Build Coastguard Worker  * Get the trail surrogate (0xdc00..0xdfff) for a
129*0e209d39SAndroid Build Coastguard Worker  * supplementary code point (0x10000..0x10ffff).
130*0e209d39SAndroid Build Coastguard Worker  * @param supplementary 32-bit code point (U+10000..U+10ffff)
131*0e209d39SAndroid Build Coastguard Worker  * @return trail surrogate (U+dc00..U+dfff) for supplementary
132*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
133*0e209d39SAndroid Build Coastguard Worker  */
134*0e209d39SAndroid Build Coastguard Worker #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
135*0e209d39SAndroid Build Coastguard Worker 
136*0e209d39SAndroid Build Coastguard Worker /**
137*0e209d39SAndroid Build Coastguard Worker  * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
138*0e209d39SAndroid Build Coastguard Worker  * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
139*0e209d39SAndroid Build Coastguard Worker  * @param c 32-bit code point
140*0e209d39SAndroid Build Coastguard Worker  * @return 1 or 2
141*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
142*0e209d39SAndroid Build Coastguard Worker  */
143*0e209d39SAndroid Build Coastguard Worker #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
144*0e209d39SAndroid Build Coastguard Worker 
145*0e209d39SAndroid Build Coastguard Worker /**
146*0e209d39SAndroid Build Coastguard Worker  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
147*0e209d39SAndroid Build Coastguard Worker  * @return 2
148*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
149*0e209d39SAndroid Build Coastguard Worker  */
150*0e209d39SAndroid Build Coastguard Worker #define U16_MAX_LENGTH 2
151*0e209d39SAndroid Build Coastguard Worker 
152*0e209d39SAndroid Build Coastguard Worker /**
153*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a random-access offset,
154*0e209d39SAndroid Build Coastguard Worker  * without changing the offset.
155*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
156*0e209d39SAndroid Build Coastguard Worker  *
157*0e209d39SAndroid Build Coastguard Worker  * The offset may point to either the lead or trail surrogate unit
158*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
159*0e209d39SAndroid Build Coastguard Worker  * the adjacent matching surrogate as well.
160*0e209d39SAndroid Build Coastguard Worker  * The result is undefined if the offset points to a single, unpaired surrogate.
161*0e209d39SAndroid Build Coastguard Worker  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
162*0e209d39SAndroid Build Coastguard Worker  *
163*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
164*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
165*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
166*0e209d39SAndroid Build Coastguard Worker  * @see U16_GET
167*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
168*0e209d39SAndroid Build Coastguard Worker  */
169*0e209d39SAndroid Build Coastguard Worker #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
170*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[i]; \
171*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
172*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_LEAD(c)) { \
173*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
174*0e209d39SAndroid Build Coastguard Worker         } else { \
175*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
176*0e209d39SAndroid Build Coastguard Worker         } \
177*0e209d39SAndroid Build Coastguard Worker     } \
178*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
179*0e209d39SAndroid Build Coastguard Worker 
180*0e209d39SAndroid Build Coastguard Worker /**
181*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a random-access offset,
182*0e209d39SAndroid Build Coastguard Worker  * without changing the offset.
183*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
184*0e209d39SAndroid Build Coastguard Worker  *
185*0e209d39SAndroid Build Coastguard Worker  * The offset may point to either the lead or trail surrogate unit
186*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
187*0e209d39SAndroid Build Coastguard Worker  * the adjacent matching surrogate as well.
188*0e209d39SAndroid Build Coastguard Worker  *
189*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
190*0e209d39SAndroid Build Coastguard Worker  *
191*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a single, unpaired surrogate, then
192*0e209d39SAndroid Build Coastguard Worker  * c is set to that unpaired surrogate.
193*0e209d39SAndroid Build Coastguard Worker  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
194*0e209d39SAndroid Build Coastguard Worker  *
195*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
196*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
197*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<=i<length
198*0e209d39SAndroid Build Coastguard Worker  * @param length string length
199*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
200*0e209d39SAndroid Build Coastguard Worker  * @see U16_GET_UNSAFE
201*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
202*0e209d39SAndroid Build Coastguard Worker  */
203*0e209d39SAndroid Build Coastguard Worker #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
204*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[i]; \
205*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
206*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
207*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_LEAD(c)) { \
208*0e209d39SAndroid Build Coastguard Worker             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
209*0e209d39SAndroid Build Coastguard Worker                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
210*0e209d39SAndroid Build Coastguard Worker             } \
211*0e209d39SAndroid Build Coastguard Worker         } else { \
212*0e209d39SAndroid Build Coastguard Worker             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
213*0e209d39SAndroid Build Coastguard Worker                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
214*0e209d39SAndroid Build Coastguard Worker             } \
215*0e209d39SAndroid Build Coastguard Worker         } \
216*0e209d39SAndroid Build Coastguard Worker     } \
217*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
218*0e209d39SAndroid Build Coastguard Worker 
219*0e209d39SAndroid Build Coastguard Worker /**
220*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a random-access offset,
221*0e209d39SAndroid Build Coastguard Worker  * without changing the offset.
222*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
223*0e209d39SAndroid Build Coastguard Worker  *
224*0e209d39SAndroid Build Coastguard Worker  * The offset may point to either the lead or trail surrogate unit
225*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
226*0e209d39SAndroid Build Coastguard Worker  * the adjacent matching surrogate as well.
227*0e209d39SAndroid Build Coastguard Worker  *
228*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
229*0e209d39SAndroid Build Coastguard Worker  *
230*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a single, unpaired surrogate, then
231*0e209d39SAndroid Build Coastguard Worker  * c is set to U+FFFD.
232*0e209d39SAndroid Build Coastguard Worker  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
233*0e209d39SAndroid Build Coastguard Worker  *
234*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
235*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
236*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<=i<length
237*0e209d39SAndroid Build Coastguard Worker  * @param length string length
238*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
239*0e209d39SAndroid Build Coastguard Worker  * @see U16_GET_UNSAFE
240*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 60
241*0e209d39SAndroid Build Coastguard Worker  */
242*0e209d39SAndroid Build Coastguard Worker #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
243*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[i]; \
244*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
245*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
246*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_LEAD(c)) { \
247*0e209d39SAndroid Build Coastguard Worker             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
248*0e209d39SAndroid Build Coastguard Worker                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
249*0e209d39SAndroid Build Coastguard Worker             } else { \
250*0e209d39SAndroid Build Coastguard Worker                 (c)=0xfffd; \
251*0e209d39SAndroid Build Coastguard Worker             } \
252*0e209d39SAndroid Build Coastguard Worker         } else { \
253*0e209d39SAndroid Build Coastguard Worker             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
254*0e209d39SAndroid Build Coastguard Worker                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
255*0e209d39SAndroid Build Coastguard Worker             } else { \
256*0e209d39SAndroid Build Coastguard Worker                 (c)=0xfffd; \
257*0e209d39SAndroid Build Coastguard Worker             } \
258*0e209d39SAndroid Build Coastguard Worker         } \
259*0e209d39SAndroid Build Coastguard Worker     } \
260*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
261*0e209d39SAndroid Build Coastguard Worker 
262*0e209d39SAndroid Build Coastguard Worker /* definitions with forward iteration --------------------------------------- */
263*0e209d39SAndroid Build Coastguard Worker 
264*0e209d39SAndroid Build Coastguard Worker /**
265*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a code point boundary offset,
266*0e209d39SAndroid Build Coastguard Worker  * and advance the offset to the next code point boundary.
267*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing forward iteration.)
268*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
269*0e209d39SAndroid Build Coastguard Worker  *
270*0e209d39SAndroid Build Coastguard Worker  * The offset may point to the lead surrogate unit
271*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
272*0e209d39SAndroid Build Coastguard Worker  * the following trail surrogate as well.
273*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a trail surrogate, then that itself
274*0e209d39SAndroid Build Coastguard Worker  * will be returned as the code point.
275*0e209d39SAndroid Build Coastguard Worker  * The result is undefined if the offset points to a single, unpaired lead surrogate.
276*0e209d39SAndroid Build Coastguard Worker  *
277*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
278*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
279*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
280*0e209d39SAndroid Build Coastguard Worker  * @see U16_NEXT
281*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
282*0e209d39SAndroid Build Coastguard Worker  */
283*0e209d39SAndroid Build Coastguard Worker #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
284*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[(i)++]; \
285*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD(c)) { \
286*0e209d39SAndroid Build Coastguard Worker         (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
287*0e209d39SAndroid Build Coastguard Worker     } \
288*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
289*0e209d39SAndroid Build Coastguard Worker 
290*0e209d39SAndroid Build Coastguard Worker /**
291*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a code point boundary offset,
292*0e209d39SAndroid Build Coastguard Worker  * and advance the offset to the next code point boundary.
293*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing forward iteration.)
294*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
295*0e209d39SAndroid Build Coastguard Worker  *
296*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
297*0e209d39SAndroid Build Coastguard Worker  *
298*0e209d39SAndroid Build Coastguard Worker  * The offset may point to the lead surrogate unit
299*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
300*0e209d39SAndroid Build Coastguard Worker  * the following trail surrogate as well.
301*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a trail surrogate or
302*0e209d39SAndroid Build Coastguard Worker  * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
303*0e209d39SAndroid Build Coastguard Worker  *
304*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
305*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be i<length
306*0e209d39SAndroid Build Coastguard Worker  * @param length string length
307*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
308*0e209d39SAndroid Build Coastguard Worker  * @see U16_NEXT_UNSAFE
309*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
310*0e209d39SAndroid Build Coastguard Worker  */
311*0e209d39SAndroid Build Coastguard Worker #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
312*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[(i)++]; \
313*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD(c)) { \
314*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
315*0e209d39SAndroid Build Coastguard Worker         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
316*0e209d39SAndroid Build Coastguard Worker             ++(i); \
317*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
318*0e209d39SAndroid Build Coastguard Worker         } \
319*0e209d39SAndroid Build Coastguard Worker     } \
320*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
321*0e209d39SAndroid Build Coastguard Worker 
322*0e209d39SAndroid Build Coastguard Worker /**
323*0e209d39SAndroid Build Coastguard Worker  * Get a code point from a string at a code point boundary offset,
324*0e209d39SAndroid Build Coastguard Worker  * and advance the offset to the next code point boundary.
325*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing forward iteration.)
326*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
327*0e209d39SAndroid Build Coastguard Worker  *
328*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
329*0e209d39SAndroid Build Coastguard Worker  *
330*0e209d39SAndroid Build Coastguard Worker  * The offset may point to the lead surrogate unit
331*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, in which case the macro will read
332*0e209d39SAndroid Build Coastguard Worker  * the following trail surrogate as well.
333*0e209d39SAndroid Build Coastguard Worker  * If the offset points to a trail surrogate or
334*0e209d39SAndroid Build Coastguard Worker  * to a single, unpaired lead surrogate, then c is set to U+FFFD.
335*0e209d39SAndroid Build Coastguard Worker  *
336*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
337*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be i<length
338*0e209d39SAndroid Build Coastguard Worker  * @param length string length
339*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
340*0e209d39SAndroid Build Coastguard Worker  * @see U16_NEXT_UNSAFE
341*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 60
342*0e209d39SAndroid Build Coastguard Worker  */
343*0e209d39SAndroid Build Coastguard Worker #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
344*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[(i)++]; \
345*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
346*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
347*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
348*0e209d39SAndroid Build Coastguard Worker             ++(i); \
349*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
350*0e209d39SAndroid Build Coastguard Worker         } else { \
351*0e209d39SAndroid Build Coastguard Worker             (c)=0xfffd; \
352*0e209d39SAndroid Build Coastguard Worker         } \
353*0e209d39SAndroid Build Coastguard Worker     } \
354*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
355*0e209d39SAndroid Build Coastguard Worker 
356*0e209d39SAndroid Build Coastguard Worker /**
357*0e209d39SAndroid Build Coastguard Worker  * Append a code point to a string, overwriting 1 or 2 code units.
358*0e209d39SAndroid Build Coastguard Worker  * The offset points to the current end of the string contents
359*0e209d39SAndroid Build Coastguard Worker  * and is advanced (post-increment).
360*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
361*0e209d39SAndroid Build Coastguard Worker  * Otherwise, the result is undefined.
362*0e209d39SAndroid Build Coastguard Worker  *
363*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string buffer
364*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
365*0e209d39SAndroid Build Coastguard Worker  * @param c code point to append
366*0e209d39SAndroid Build Coastguard Worker  * @see U16_APPEND
367*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
368*0e209d39SAndroid Build Coastguard Worker  */
369*0e209d39SAndroid Build Coastguard Worker #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
370*0e209d39SAndroid Build Coastguard Worker     if((uint32_t)(c)<=0xffff) { \
371*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(c); \
372*0e209d39SAndroid Build Coastguard Worker     } else { \
373*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
374*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
375*0e209d39SAndroid Build Coastguard Worker     } \
376*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
377*0e209d39SAndroid Build Coastguard Worker 
378*0e209d39SAndroid Build Coastguard Worker /**
379*0e209d39SAndroid Build Coastguard Worker  * Append a code point to a string, overwriting 1 or 2 code units.
380*0e209d39SAndroid Build Coastguard Worker  * The offset points to the current end of the string contents
381*0e209d39SAndroid Build Coastguard Worker  * and is advanced (post-increment).
382*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, checks for a valid code point.
383*0e209d39SAndroid Build Coastguard Worker  * If a surrogate pair is written, checks for sufficient space in the string.
384*0e209d39SAndroid Build Coastguard Worker  * If the code point is not valid or a trail surrogate does not fit,
385*0e209d39SAndroid Build Coastguard Worker  * then isError is set to true.
386*0e209d39SAndroid Build Coastguard Worker  *
387*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string buffer
388*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be i<capacity
389*0e209d39SAndroid Build Coastguard Worker  * @param capacity size of the string buffer
390*0e209d39SAndroid Build Coastguard Worker  * @param c code point to append
391*0e209d39SAndroid Build Coastguard Worker  * @param isError output UBool set to true if an error occurs, otherwise not modified
392*0e209d39SAndroid Build Coastguard Worker  * @see U16_APPEND_UNSAFE
393*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
394*0e209d39SAndroid Build Coastguard Worker  */
395*0e209d39SAndroid Build Coastguard Worker #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
396*0e209d39SAndroid Build Coastguard Worker     if((uint32_t)(c)<=0xffff) { \
397*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(c); \
398*0e209d39SAndroid Build Coastguard Worker     } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
399*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
400*0e209d39SAndroid Build Coastguard Worker         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
401*0e209d39SAndroid Build Coastguard Worker     } else /* c>0x10ffff or not enough space */ { \
402*0e209d39SAndroid Build Coastguard Worker         (isError)=true; \
403*0e209d39SAndroid Build Coastguard Worker     } \
404*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
405*0e209d39SAndroid Build Coastguard Worker 
406*0e209d39SAndroid Build Coastguard Worker /**
407*0e209d39SAndroid Build Coastguard Worker  * Advance the string offset from one code point boundary to the next.
408*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing iteration.)
409*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
410*0e209d39SAndroid Build Coastguard Worker  *
411*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
412*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
413*0e209d39SAndroid Build Coastguard Worker  * @see U16_FWD_1
414*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
415*0e209d39SAndroid Build Coastguard Worker  */
416*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
417*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD((s)[(i)++])) { \
418*0e209d39SAndroid Build Coastguard Worker         ++(i); \
419*0e209d39SAndroid Build Coastguard Worker     } \
420*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
421*0e209d39SAndroid Build Coastguard Worker 
422*0e209d39SAndroid Build Coastguard Worker /**
423*0e209d39SAndroid Build Coastguard Worker  * Advance the string offset from one code point boundary to the next.
424*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing iteration.)
425*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
426*0e209d39SAndroid Build Coastguard Worker  *
427*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
428*0e209d39SAndroid Build Coastguard Worker  *
429*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
430*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be i<length
431*0e209d39SAndroid Build Coastguard Worker  * @param length string length
432*0e209d39SAndroid Build Coastguard Worker  * @see U16_FWD_1_UNSAFE
433*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
434*0e209d39SAndroid Build Coastguard Worker  */
435*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
436*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
437*0e209d39SAndroid Build Coastguard Worker         ++(i); \
438*0e209d39SAndroid Build Coastguard Worker     } \
439*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
440*0e209d39SAndroid Build Coastguard Worker 
441*0e209d39SAndroid Build Coastguard Worker /**
442*0e209d39SAndroid Build Coastguard Worker  * Advance the string offset from one code point boundary to the n-th next one,
443*0e209d39SAndroid Build Coastguard Worker  * i.e., move forward by n code points.
444*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing iteration.)
445*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
446*0e209d39SAndroid Build Coastguard Worker  *
447*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
448*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
449*0e209d39SAndroid Build Coastguard Worker  * @param n number of code points to skip
450*0e209d39SAndroid Build Coastguard Worker  * @see U16_FWD_N
451*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
452*0e209d39SAndroid Build Coastguard Worker  */
453*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
454*0e209d39SAndroid Build Coastguard Worker     int32_t __N=(n); \
455*0e209d39SAndroid Build Coastguard Worker     while(__N>0) { \
456*0e209d39SAndroid Build Coastguard Worker         U16_FWD_1_UNSAFE(s, i); \
457*0e209d39SAndroid Build Coastguard Worker         --__N; \
458*0e209d39SAndroid Build Coastguard Worker     } \
459*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
460*0e209d39SAndroid Build Coastguard Worker 
461*0e209d39SAndroid Build Coastguard Worker /**
462*0e209d39SAndroid Build Coastguard Worker  * Advance the string offset from one code point boundary to the n-th next one,
463*0e209d39SAndroid Build Coastguard Worker  * i.e., move forward by n code points.
464*0e209d39SAndroid Build Coastguard Worker  * (Post-incrementing iteration.)
465*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
466*0e209d39SAndroid Build Coastguard Worker  *
467*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
468*0e209d39SAndroid Build Coastguard Worker  *
469*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
470*0e209d39SAndroid Build Coastguard Worker  * @param i int32_t string offset, must be i<length
471*0e209d39SAndroid Build Coastguard Worker  * @param length int32_t string length
472*0e209d39SAndroid Build Coastguard Worker  * @param n number of code points to skip
473*0e209d39SAndroid Build Coastguard Worker  * @see U16_FWD_N_UNSAFE
474*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
475*0e209d39SAndroid Build Coastguard Worker  */
476*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
477*0e209d39SAndroid Build Coastguard Worker     int32_t __N=(n); \
478*0e209d39SAndroid Build Coastguard Worker     while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
479*0e209d39SAndroid Build Coastguard Worker         U16_FWD_1(s, i, length); \
480*0e209d39SAndroid Build Coastguard Worker         --__N; \
481*0e209d39SAndroid Build Coastguard Worker     } \
482*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
483*0e209d39SAndroid Build Coastguard Worker 
484*0e209d39SAndroid Build Coastguard Worker /**
485*0e209d39SAndroid Build Coastguard Worker  * Adjust a random-access offset to a code point boundary
486*0e209d39SAndroid Build Coastguard Worker  * at the start of a code point.
487*0e209d39SAndroid Build Coastguard Worker  * If the offset points to the trail surrogate of a surrogate pair,
488*0e209d39SAndroid Build Coastguard Worker  * then the offset is decremented.
489*0e209d39SAndroid Build Coastguard Worker  * Otherwise, it is not modified.
490*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
491*0e209d39SAndroid Build Coastguard Worker  *
492*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
493*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
494*0e209d39SAndroid Build Coastguard Worker  * @see U16_SET_CP_START
495*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
496*0e209d39SAndroid Build Coastguard Worker  */
497*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
498*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL((s)[i])) { \
499*0e209d39SAndroid Build Coastguard Worker         --(i); \
500*0e209d39SAndroid Build Coastguard Worker     } \
501*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
502*0e209d39SAndroid Build Coastguard Worker 
503*0e209d39SAndroid Build Coastguard Worker /**
504*0e209d39SAndroid Build Coastguard Worker  * Adjust a random-access offset to a code point boundary
505*0e209d39SAndroid Build Coastguard Worker  * at the start of a code point.
506*0e209d39SAndroid Build Coastguard Worker  * If the offset points to the trail surrogate of a surrogate pair,
507*0e209d39SAndroid Build Coastguard Worker  * then the offset is decremented.
508*0e209d39SAndroid Build Coastguard Worker  * Otherwise, it is not modified.
509*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
510*0e209d39SAndroid Build Coastguard Worker  *
511*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
512*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
513*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<=i
514*0e209d39SAndroid Build Coastguard Worker  * @see U16_SET_CP_START_UNSAFE
515*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
516*0e209d39SAndroid Build Coastguard Worker  */
517*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
518*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
519*0e209d39SAndroid Build Coastguard Worker         --(i); \
520*0e209d39SAndroid Build Coastguard Worker     } \
521*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
522*0e209d39SAndroid Build Coastguard Worker 
523*0e209d39SAndroid Build Coastguard Worker /* definitions with backward iteration -------------------------------------- */
524*0e209d39SAndroid Build Coastguard Worker 
525*0e209d39SAndroid Build Coastguard Worker /**
526*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one
527*0e209d39SAndroid Build Coastguard Worker  * and get the code point between them.
528*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
529*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
530*0e209d39SAndroid Build Coastguard Worker  *
531*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
532*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a trail surrogate unit
533*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, then the macro will read
534*0e209d39SAndroid Build Coastguard Worker  * the preceding lead surrogate as well.
535*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a lead surrogate, then that itself
536*0e209d39SAndroid Build Coastguard Worker  * will be returned as the code point.
537*0e209d39SAndroid Build Coastguard Worker  * The result is undefined if the offset is behind a single, unpaired trail surrogate.
538*0e209d39SAndroid Build Coastguard Worker  *
539*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
540*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
541*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
542*0e209d39SAndroid Build Coastguard Worker  * @see U16_PREV
543*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
544*0e209d39SAndroid Build Coastguard Worker  */
545*0e209d39SAndroid Build Coastguard Worker #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
546*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[--(i)]; \
547*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL(c)) { \
548*0e209d39SAndroid Build Coastguard Worker         (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
549*0e209d39SAndroid Build Coastguard Worker     } \
550*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
551*0e209d39SAndroid Build Coastguard Worker 
552*0e209d39SAndroid Build Coastguard Worker /**
553*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one
554*0e209d39SAndroid Build Coastguard Worker  * and get the code point between them.
555*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
556*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
557*0e209d39SAndroid Build Coastguard Worker  *
558*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
559*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a trail surrogate unit
560*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, then the macro will read
561*0e209d39SAndroid Build Coastguard Worker  * the preceding lead surrogate as well.
562*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a lead surrogate or behind a single, unpaired
563*0e209d39SAndroid Build Coastguard Worker  * trail surrogate, then c is set to that unpaired surrogate.
564*0e209d39SAndroid Build Coastguard Worker  *
565*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
566*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
567*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<i
568*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
569*0e209d39SAndroid Build Coastguard Worker  * @see U16_PREV_UNSAFE
570*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
571*0e209d39SAndroid Build Coastguard Worker  */
572*0e209d39SAndroid Build Coastguard Worker #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
573*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[--(i)]; \
574*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL(c)) { \
575*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
576*0e209d39SAndroid Build Coastguard Worker         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
577*0e209d39SAndroid Build Coastguard Worker             --(i); \
578*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
579*0e209d39SAndroid Build Coastguard Worker         } \
580*0e209d39SAndroid Build Coastguard Worker     } \
581*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
582*0e209d39SAndroid Build Coastguard Worker 
583*0e209d39SAndroid Build Coastguard Worker /**
584*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one
585*0e209d39SAndroid Build Coastguard Worker  * and get the code point between them.
586*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
587*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
588*0e209d39SAndroid Build Coastguard Worker  *
589*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
590*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a trail surrogate unit
591*0e209d39SAndroid Build Coastguard Worker  * for a supplementary code point, then the macro will read
592*0e209d39SAndroid Build Coastguard Worker  * the preceding lead surrogate as well.
593*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind a lead surrogate or behind a single, unpaired
594*0e209d39SAndroid Build Coastguard Worker  * trail surrogate, then c is set to U+FFFD.
595*0e209d39SAndroid Build Coastguard Worker  *
596*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
597*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
598*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<i
599*0e209d39SAndroid Build Coastguard Worker  * @param c output UChar32 variable
600*0e209d39SAndroid Build Coastguard Worker  * @see U16_PREV_UNSAFE
601*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 60
602*0e209d39SAndroid Build Coastguard Worker  */
603*0e209d39SAndroid Build Coastguard Worker #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
604*0e209d39SAndroid Build Coastguard Worker     (c)=(s)[--(i)]; \
605*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_SURROGATE(c)) { \
606*0e209d39SAndroid Build Coastguard Worker         uint16_t __c2; \
607*0e209d39SAndroid Build Coastguard Worker         if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
608*0e209d39SAndroid Build Coastguard Worker             --(i); \
609*0e209d39SAndroid Build Coastguard Worker             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
610*0e209d39SAndroid Build Coastguard Worker         } else { \
611*0e209d39SAndroid Build Coastguard Worker             (c)=0xfffd; \
612*0e209d39SAndroid Build Coastguard Worker         } \
613*0e209d39SAndroid Build Coastguard Worker     } \
614*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
615*0e209d39SAndroid Build Coastguard Worker 
616*0e209d39SAndroid Build Coastguard Worker /**
617*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one.
618*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
619*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
620*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
621*0e209d39SAndroid Build Coastguard Worker  *
622*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
623*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
624*0e209d39SAndroid Build Coastguard Worker  * @see U16_BACK_1
625*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
626*0e209d39SAndroid Build Coastguard Worker  */
627*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
628*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL((s)[--(i)])) { \
629*0e209d39SAndroid Build Coastguard Worker         --(i); \
630*0e209d39SAndroid Build Coastguard Worker     } \
631*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
632*0e209d39SAndroid Build Coastguard Worker 
633*0e209d39SAndroid Build Coastguard Worker /**
634*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the previous one.
635*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
636*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
637*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
638*0e209d39SAndroid Build Coastguard Worker  *
639*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
640*0e209d39SAndroid Build Coastguard Worker  * @param start starting string offset (usually 0)
641*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<i
642*0e209d39SAndroid Build Coastguard Worker  * @see U16_BACK_1_UNSAFE
643*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
644*0e209d39SAndroid Build Coastguard Worker  */
645*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
646*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
647*0e209d39SAndroid Build Coastguard Worker         --(i); \
648*0e209d39SAndroid Build Coastguard Worker     } \
649*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
650*0e209d39SAndroid Build Coastguard Worker 
651*0e209d39SAndroid Build Coastguard Worker /**
652*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the n-th one before it,
653*0e209d39SAndroid Build Coastguard Worker  * i.e., move backward by n code points.
654*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
655*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
656*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
657*0e209d39SAndroid Build Coastguard Worker  *
658*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
659*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
660*0e209d39SAndroid Build Coastguard Worker  * @param n number of code points to skip
661*0e209d39SAndroid Build Coastguard Worker  * @see U16_BACK_N
662*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
663*0e209d39SAndroid Build Coastguard Worker  */
664*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
665*0e209d39SAndroid Build Coastguard Worker     int32_t __N=(n); \
666*0e209d39SAndroid Build Coastguard Worker     while(__N>0) { \
667*0e209d39SAndroid Build Coastguard Worker         U16_BACK_1_UNSAFE(s, i); \
668*0e209d39SAndroid Build Coastguard Worker         --__N; \
669*0e209d39SAndroid Build Coastguard Worker     } \
670*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
671*0e209d39SAndroid Build Coastguard Worker 
672*0e209d39SAndroid Build Coastguard Worker /**
673*0e209d39SAndroid Build Coastguard Worker  * Move the string offset from one code point boundary to the n-th one before it,
674*0e209d39SAndroid Build Coastguard Worker  * i.e., move backward by n code points.
675*0e209d39SAndroid Build Coastguard Worker  * (Pre-decrementing backward iteration.)
676*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
677*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
678*0e209d39SAndroid Build Coastguard Worker  *
679*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
680*0e209d39SAndroid Build Coastguard Worker  * @param start start of string
681*0e209d39SAndroid Build Coastguard Worker  * @param i string offset, must be start<i
682*0e209d39SAndroid Build Coastguard Worker  * @param n number of code points to skip
683*0e209d39SAndroid Build Coastguard Worker  * @see U16_BACK_N_UNSAFE
684*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
685*0e209d39SAndroid Build Coastguard Worker  */
686*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
687*0e209d39SAndroid Build Coastguard Worker     int32_t __N=(n); \
688*0e209d39SAndroid Build Coastguard Worker     while(__N>0 && (i)>(start)) { \
689*0e209d39SAndroid Build Coastguard Worker         U16_BACK_1(s, start, i); \
690*0e209d39SAndroid Build Coastguard Worker         --__N; \
691*0e209d39SAndroid Build Coastguard Worker     } \
692*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
693*0e209d39SAndroid Build Coastguard Worker 
694*0e209d39SAndroid Build Coastguard Worker /**
695*0e209d39SAndroid Build Coastguard Worker  * Adjust a random-access offset to a code point boundary after a code point.
696*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind the lead surrogate of a surrogate pair,
697*0e209d39SAndroid Build Coastguard Worker  * then the offset is incremented.
698*0e209d39SAndroid Build Coastguard Worker  * Otherwise, it is not modified.
699*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
700*0e209d39SAndroid Build Coastguard Worker  * "Unsafe" macro, assumes well-formed UTF-16.
701*0e209d39SAndroid Build Coastguard Worker  *
702*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
703*0e209d39SAndroid Build Coastguard Worker  * @param i string offset
704*0e209d39SAndroid Build Coastguard Worker  * @see U16_SET_CP_LIMIT
705*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
706*0e209d39SAndroid Build Coastguard Worker  */
707*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
708*0e209d39SAndroid Build Coastguard Worker     if(U16_IS_LEAD((s)[(i)-1])) { \
709*0e209d39SAndroid Build Coastguard Worker         ++(i); \
710*0e209d39SAndroid Build Coastguard Worker     } \
711*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
712*0e209d39SAndroid Build Coastguard Worker 
713*0e209d39SAndroid Build Coastguard Worker /**
714*0e209d39SAndroid Build Coastguard Worker  * Adjust a random-access offset to a code point boundary after a code point.
715*0e209d39SAndroid Build Coastguard Worker  * If the offset is behind the lead surrogate of a surrogate pair,
716*0e209d39SAndroid Build Coastguard Worker  * then the offset is incremented.
717*0e209d39SAndroid Build Coastguard Worker  * Otherwise, it is not modified.
718*0e209d39SAndroid Build Coastguard Worker  * The input offset may be the same as the string length.
719*0e209d39SAndroid Build Coastguard Worker  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
720*0e209d39SAndroid Build Coastguard Worker  *
721*0e209d39SAndroid Build Coastguard Worker  * The length can be negative for a NUL-terminated string.
722*0e209d39SAndroid Build Coastguard Worker  *
723*0e209d39SAndroid Build Coastguard Worker  * @param s const UChar * string
724*0e209d39SAndroid Build Coastguard Worker  * @param start int32_t starting string offset (usually 0)
725*0e209d39SAndroid Build Coastguard Worker  * @param i int32_t string offset, start<=i<=length
726*0e209d39SAndroid Build Coastguard Worker  * @param length int32_t string length
727*0e209d39SAndroid Build Coastguard Worker  * @see U16_SET_CP_LIMIT_UNSAFE
728*0e209d39SAndroid Build Coastguard Worker  * \xrefitem stable "Stable" "Stable List" ICU 2.4
729*0e209d39SAndroid Build Coastguard Worker  */
730*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
731*0e209d39SAndroid Build Coastguard Worker     if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
732*0e209d39SAndroid Build Coastguard Worker         ++(i); \
733*0e209d39SAndroid Build Coastguard Worker     } \
734*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END
735*0e209d39SAndroid Build Coastguard Worker 
736*0e209d39SAndroid Build Coastguard Worker #endif
737*0e209d39SAndroid Build Coastguard Worker 
738*0e209d39SAndroid Build Coastguard Worker /** @} */ // addtogroup
739