1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others. 2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html 3*0e209d39SAndroid Build Coastguard Worker /* 4*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 5*0e209d39SAndroid Build Coastguard Worker * 6*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1999-2012, International Business Machines 7*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved. 8*0e209d39SAndroid Build Coastguard Worker * 9*0e209d39SAndroid Build Coastguard Worker ******************************************************************************* 10*0e209d39SAndroid Build Coastguard Worker * file name: utf16.h 11*0e209d39SAndroid Build Coastguard Worker * encoding: UTF-8 12*0e209d39SAndroid Build Coastguard Worker * tab size: 8 (not used) 13*0e209d39SAndroid Build Coastguard Worker * indentation:4 14*0e209d39SAndroid Build Coastguard Worker * 15*0e209d39SAndroid Build Coastguard Worker * created on: 1999sep09 16*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer 17*0e209d39SAndroid Build Coastguard Worker */ 18*0e209d39SAndroid Build Coastguard Worker 19*0e209d39SAndroid Build Coastguard Worker /** 20*0e209d39SAndroid Build Coastguard Worker * \file 21*0e209d39SAndroid Build Coastguard Worker * \brief C API: 16-bit Unicode handling macros 22*0e209d39SAndroid Build Coastguard Worker * 23*0e209d39SAndroid Build Coastguard Worker * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings. 24*0e209d39SAndroid Build Coastguard Worker * 25*0e209d39SAndroid Build Coastguard Worker * For more information see utf.h and the ICU User Guide Strings chapter 26*0e209d39SAndroid Build Coastguard Worker * (https://unicode-org.github.io/icu/userguide/strings). 27*0e209d39SAndroid Build Coastguard Worker * 28*0e209d39SAndroid Build Coastguard Worker * <em>Usage:</em> 29*0e209d39SAndroid Build Coastguard Worker * ICU coding guidelines for if() statements should be followed when using these macros. 30*0e209d39SAndroid Build Coastguard Worker * Compound statements (curly braces {}) must be used for if-else-while... 31*0e209d39SAndroid Build Coastguard Worker * bodies and all macro statements should be terminated with semicolon. 32*0e209d39SAndroid Build Coastguard Worker */ 33*0e209d39SAndroid Build Coastguard Worker 34*0e209d39SAndroid Build Coastguard Worker #ifndef __UTF16_H__ 35*0e209d39SAndroid Build Coastguard Worker #define __UTF16_H__ 36*0e209d39SAndroid Build Coastguard Worker 37*0e209d39SAndroid Build Coastguard Worker #include <stdbool.h> 38*0e209d39SAndroid Build Coastguard Worker #include "unicode/umachine.h" 39*0e209d39SAndroid Build Coastguard Worker #ifndef __UTF_H__ 40*0e209d39SAndroid Build Coastguard Worker # include "unicode/utf.h" 41*0e209d39SAndroid Build Coastguard Worker #endif 42*0e209d39SAndroid Build Coastguard Worker 43*0e209d39SAndroid Build Coastguard Worker /* single-code point definitions -------------------------------------------- */ 44*0e209d39SAndroid Build Coastguard Worker 45*0e209d39SAndroid Build Coastguard Worker /** 46*0e209d39SAndroid Build Coastguard Worker * Does this code unit alone encode a code point (BMP, not a surrogate)? 47*0e209d39SAndroid Build Coastguard Worker * @param c 16-bit code unit 48*0e209d39SAndroid Build Coastguard Worker * @return true or false 49*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 50*0e209d39SAndroid Build Coastguard Worker */ 51*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c) 52*0e209d39SAndroid Build Coastguard Worker 53*0e209d39SAndroid Build Coastguard Worker /** 54*0e209d39SAndroid Build Coastguard Worker * Is this code unit a lead surrogate (U+d800..U+dbff)? 55*0e209d39SAndroid Build Coastguard Worker * @param c 16-bit code unit 56*0e209d39SAndroid Build Coastguard Worker * @return true or false 57*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 58*0e209d39SAndroid Build Coastguard Worker */ 59*0e209d39SAndroid Build Coastguard Worker #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800) 60*0e209d39SAndroid Build Coastguard Worker 61*0e209d39SAndroid Build Coastguard Worker /** 62*0e209d39SAndroid Build Coastguard Worker * Is this code unit a trail surrogate (U+dc00..U+dfff)? 63*0e209d39SAndroid Build Coastguard Worker * @param c 16-bit code unit 64*0e209d39SAndroid Build Coastguard Worker * @return true or false 65*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 66*0e209d39SAndroid Build Coastguard Worker */ 67*0e209d39SAndroid Build Coastguard Worker #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00) 68*0e209d39SAndroid Build Coastguard Worker 69*0e209d39SAndroid Build Coastguard Worker /** 70*0e209d39SAndroid Build Coastguard Worker * Is this code unit a surrogate (U+d800..U+dfff)? 71*0e209d39SAndroid Build Coastguard Worker * @param c 16-bit code unit 72*0e209d39SAndroid Build Coastguard Worker * @return true or false 73*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 74*0e209d39SAndroid Build Coastguard Worker */ 75*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c) 76*0e209d39SAndroid Build Coastguard Worker 77*0e209d39SAndroid Build Coastguard Worker /** 78*0e209d39SAndroid Build Coastguard Worker * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), 79*0e209d39SAndroid Build Coastguard Worker * is it a lead surrogate? 80*0e209d39SAndroid Build Coastguard Worker * @param c 16-bit code unit 81*0e209d39SAndroid Build Coastguard Worker * @return true or false 82*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 83*0e209d39SAndroid Build Coastguard Worker */ 84*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0) 85*0e209d39SAndroid Build Coastguard Worker 86*0e209d39SAndroid Build Coastguard Worker /** 87*0e209d39SAndroid Build Coastguard Worker * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), 88*0e209d39SAndroid Build Coastguard Worker * is it a trail surrogate? 89*0e209d39SAndroid Build Coastguard Worker * @param c 16-bit code unit 90*0e209d39SAndroid Build Coastguard Worker * @return true or false 91*0e209d39SAndroid Build Coastguard Worker * @stable ICU 4.2 92*0e209d39SAndroid Build Coastguard Worker */ 93*0e209d39SAndroid Build Coastguard Worker #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0) 94*0e209d39SAndroid Build Coastguard Worker 95*0e209d39SAndroid Build Coastguard Worker /** 96*0e209d39SAndroid Build Coastguard Worker * Helper constant for U16_GET_SUPPLEMENTARY. 97*0e209d39SAndroid Build Coastguard Worker * @internal 98*0e209d39SAndroid Build Coastguard Worker */ 99*0e209d39SAndroid Build Coastguard Worker #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000) 100*0e209d39SAndroid Build Coastguard Worker 101*0e209d39SAndroid Build Coastguard Worker /** 102*0e209d39SAndroid Build Coastguard Worker * Get a supplementary code point value (U+10000..U+10ffff) 103*0e209d39SAndroid Build Coastguard Worker * from its lead and trail surrogates. 104*0e209d39SAndroid Build Coastguard Worker * The result is undefined if the input values are not 105*0e209d39SAndroid Build Coastguard Worker * lead and trail surrogates. 106*0e209d39SAndroid Build Coastguard Worker * 107*0e209d39SAndroid Build Coastguard Worker * @param lead lead surrogate (U+d800..U+dbff) 108*0e209d39SAndroid Build Coastguard Worker * @param trail trail surrogate (U+dc00..U+dfff) 109*0e209d39SAndroid Build Coastguard Worker * @return supplementary code point (U+10000..U+10ffff) 110*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 111*0e209d39SAndroid Build Coastguard Worker */ 112*0e209d39SAndroid Build Coastguard Worker #define U16_GET_SUPPLEMENTARY(lead, trail) \ 113*0e209d39SAndroid Build Coastguard Worker (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET) 114*0e209d39SAndroid Build Coastguard Worker 115*0e209d39SAndroid Build Coastguard Worker 116*0e209d39SAndroid Build Coastguard Worker /** 117*0e209d39SAndroid Build Coastguard Worker * Get the lead surrogate (0xd800..0xdbff) for a 118*0e209d39SAndroid Build Coastguard Worker * supplementary code point (0x10000..0x10ffff). 119*0e209d39SAndroid Build Coastguard Worker * @param supplementary 32-bit code point (U+10000..U+10ffff) 120*0e209d39SAndroid Build Coastguard Worker * @return lead surrogate (U+d800..U+dbff) for supplementary 121*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 122*0e209d39SAndroid Build Coastguard Worker */ 123*0e209d39SAndroid Build Coastguard Worker #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0) 124*0e209d39SAndroid Build Coastguard Worker 125*0e209d39SAndroid Build Coastguard Worker /** 126*0e209d39SAndroid Build Coastguard Worker * Get the trail surrogate (0xdc00..0xdfff) for a 127*0e209d39SAndroid Build Coastguard Worker * supplementary code point (0x10000..0x10ffff). 128*0e209d39SAndroid Build Coastguard Worker * @param supplementary 32-bit code point (U+10000..U+10ffff) 129*0e209d39SAndroid Build Coastguard Worker * @return trail surrogate (U+dc00..U+dfff) for supplementary 130*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 131*0e209d39SAndroid Build Coastguard Worker */ 132*0e209d39SAndroid Build Coastguard Worker #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00) 133*0e209d39SAndroid Build Coastguard Worker 134*0e209d39SAndroid Build Coastguard Worker /** 135*0e209d39SAndroid Build Coastguard Worker * How many 16-bit code units are used to encode this Unicode code point? (1 or 2) 136*0e209d39SAndroid Build Coastguard Worker * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff). 137*0e209d39SAndroid Build Coastguard Worker * @param c 32-bit code point 138*0e209d39SAndroid Build Coastguard Worker * @return 1 or 2 139*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 140*0e209d39SAndroid Build Coastguard Worker */ 141*0e209d39SAndroid Build Coastguard Worker #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2) 142*0e209d39SAndroid Build Coastguard Worker 143*0e209d39SAndroid Build Coastguard Worker /** 144*0e209d39SAndroid Build Coastguard Worker * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff). 145*0e209d39SAndroid Build Coastguard Worker * @return 2 146*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 147*0e209d39SAndroid Build Coastguard Worker */ 148*0e209d39SAndroid Build Coastguard Worker #define U16_MAX_LENGTH 2 149*0e209d39SAndroid Build Coastguard Worker 150*0e209d39SAndroid Build Coastguard Worker /** 151*0e209d39SAndroid Build Coastguard Worker * Get a code point from a string at a random-access offset, 152*0e209d39SAndroid Build Coastguard Worker * without changing the offset. 153*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes well-formed UTF-16. 154*0e209d39SAndroid Build Coastguard Worker * 155*0e209d39SAndroid Build Coastguard Worker * The offset may point to either the lead or trail surrogate unit 156*0e209d39SAndroid Build Coastguard Worker * for a supplementary code point, in which case the macro will read 157*0e209d39SAndroid Build Coastguard Worker * the adjacent matching surrogate as well. 158*0e209d39SAndroid Build Coastguard Worker * The result is undefined if the offset points to a single, unpaired surrogate. 159*0e209d39SAndroid Build Coastguard Worker * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. 160*0e209d39SAndroid Build Coastguard Worker * 161*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 162*0e209d39SAndroid Build Coastguard Worker * @param i string offset 163*0e209d39SAndroid Build Coastguard Worker * @param c output UChar32 variable 164*0e209d39SAndroid Build Coastguard Worker * @see U16_GET 165*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 166*0e209d39SAndroid Build Coastguard Worker */ 167*0e209d39SAndroid Build Coastguard Worker #define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 168*0e209d39SAndroid Build Coastguard Worker (c)=(s)[i]; \ 169*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE(c)) { \ 170*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE_LEAD(c)) { \ 171*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \ 172*0e209d39SAndroid Build Coastguard Worker } else { \ 173*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \ 174*0e209d39SAndroid Build Coastguard Worker } \ 175*0e209d39SAndroid Build Coastguard Worker } \ 176*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 177*0e209d39SAndroid Build Coastguard Worker 178*0e209d39SAndroid Build Coastguard Worker /** 179*0e209d39SAndroid Build Coastguard Worker * Get a code point from a string at a random-access offset, 180*0e209d39SAndroid Build Coastguard Worker * without changing the offset. 181*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 182*0e209d39SAndroid Build Coastguard Worker * 183*0e209d39SAndroid Build Coastguard Worker * The offset may point to either the lead or trail surrogate unit 184*0e209d39SAndroid Build Coastguard Worker * for a supplementary code point, in which case the macro will read 185*0e209d39SAndroid Build Coastguard Worker * the adjacent matching surrogate as well. 186*0e209d39SAndroid Build Coastguard Worker * 187*0e209d39SAndroid Build Coastguard Worker * The length can be negative for a NUL-terminated string. 188*0e209d39SAndroid Build Coastguard Worker * 189*0e209d39SAndroid Build Coastguard Worker * If the offset points to a single, unpaired surrogate, then 190*0e209d39SAndroid Build Coastguard Worker * c is set to that unpaired surrogate. 191*0e209d39SAndroid Build Coastguard Worker * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. 192*0e209d39SAndroid Build Coastguard Worker * 193*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 194*0e209d39SAndroid Build Coastguard Worker * @param start starting string offset (usually 0) 195*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be start<=i<length 196*0e209d39SAndroid Build Coastguard Worker * @param length string length 197*0e209d39SAndroid Build Coastguard Worker * @param c output UChar32 variable 198*0e209d39SAndroid Build Coastguard Worker * @see U16_GET_UNSAFE 199*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 200*0e209d39SAndroid Build Coastguard Worker */ 201*0e209d39SAndroid Build Coastguard Worker #define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 202*0e209d39SAndroid Build Coastguard Worker (c)=(s)[i]; \ 203*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE(c)) { \ 204*0e209d39SAndroid Build Coastguard Worker uint16_t __c2; \ 205*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE_LEAD(c)) { \ 206*0e209d39SAndroid Build Coastguard Worker if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ 207*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 208*0e209d39SAndroid Build Coastguard Worker } \ 209*0e209d39SAndroid Build Coastguard Worker } else { \ 210*0e209d39SAndroid Build Coastguard Worker if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 211*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 212*0e209d39SAndroid Build Coastguard Worker } \ 213*0e209d39SAndroid Build Coastguard Worker } \ 214*0e209d39SAndroid Build Coastguard Worker } \ 215*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 216*0e209d39SAndroid Build Coastguard Worker 217*0e209d39SAndroid Build Coastguard Worker /** 218*0e209d39SAndroid Build Coastguard Worker * Get a code point from a string at a random-access offset, 219*0e209d39SAndroid Build Coastguard Worker * without changing the offset. 220*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 221*0e209d39SAndroid Build Coastguard Worker * 222*0e209d39SAndroid Build Coastguard Worker * The offset may point to either the lead or trail surrogate unit 223*0e209d39SAndroid Build Coastguard Worker * for a supplementary code point, in which case the macro will read 224*0e209d39SAndroid Build Coastguard Worker * the adjacent matching surrogate as well. 225*0e209d39SAndroid Build Coastguard Worker * 226*0e209d39SAndroid Build Coastguard Worker * The length can be negative for a NUL-terminated string. 227*0e209d39SAndroid Build Coastguard Worker * 228*0e209d39SAndroid Build Coastguard Worker * If the offset points to a single, unpaired surrogate, then 229*0e209d39SAndroid Build Coastguard Worker * c is set to U+FFFD. 230*0e209d39SAndroid Build Coastguard Worker * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. 231*0e209d39SAndroid Build Coastguard Worker * 232*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 233*0e209d39SAndroid Build Coastguard Worker * @param start starting string offset (usually 0) 234*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be start<=i<length 235*0e209d39SAndroid Build Coastguard Worker * @param length string length 236*0e209d39SAndroid Build Coastguard Worker * @param c output UChar32 variable 237*0e209d39SAndroid Build Coastguard Worker * @see U16_GET_UNSAFE 238*0e209d39SAndroid Build Coastguard Worker * @stable ICU 60 239*0e209d39SAndroid Build Coastguard Worker */ 240*0e209d39SAndroid Build Coastguard Worker #define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 241*0e209d39SAndroid Build Coastguard Worker (c)=(s)[i]; \ 242*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE(c)) { \ 243*0e209d39SAndroid Build Coastguard Worker uint16_t __c2; \ 244*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE_LEAD(c)) { \ 245*0e209d39SAndroid Build Coastguard Worker if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ 246*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 247*0e209d39SAndroid Build Coastguard Worker } else { \ 248*0e209d39SAndroid Build Coastguard Worker (c)=0xfffd; \ 249*0e209d39SAndroid Build Coastguard Worker } \ 250*0e209d39SAndroid Build Coastguard Worker } else { \ 251*0e209d39SAndroid Build Coastguard Worker if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 252*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 253*0e209d39SAndroid Build Coastguard Worker } else { \ 254*0e209d39SAndroid Build Coastguard Worker (c)=0xfffd; \ 255*0e209d39SAndroid Build Coastguard Worker } \ 256*0e209d39SAndroid Build Coastguard Worker } \ 257*0e209d39SAndroid Build Coastguard Worker } \ 258*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 259*0e209d39SAndroid Build Coastguard Worker 260*0e209d39SAndroid Build Coastguard Worker /* definitions with forward iteration --------------------------------------- */ 261*0e209d39SAndroid Build Coastguard Worker 262*0e209d39SAndroid Build Coastguard Worker /** 263*0e209d39SAndroid Build Coastguard Worker * Get a code point from a string at a code point boundary offset, 264*0e209d39SAndroid Build Coastguard Worker * and advance the offset to the next code point boundary. 265*0e209d39SAndroid Build Coastguard Worker * (Post-incrementing forward iteration.) 266*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes well-formed UTF-16. 267*0e209d39SAndroid Build Coastguard Worker * 268*0e209d39SAndroid Build Coastguard Worker * The offset may point to the lead surrogate unit 269*0e209d39SAndroid Build Coastguard Worker * for a supplementary code point, in which case the macro will read 270*0e209d39SAndroid Build Coastguard Worker * the following trail surrogate as well. 271*0e209d39SAndroid Build Coastguard Worker * If the offset points to a trail surrogate, then that itself 272*0e209d39SAndroid Build Coastguard Worker * will be returned as the code point. 273*0e209d39SAndroid Build Coastguard Worker * The result is undefined if the offset points to a single, unpaired lead surrogate. 274*0e209d39SAndroid Build Coastguard Worker * 275*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 276*0e209d39SAndroid Build Coastguard Worker * @param i string offset 277*0e209d39SAndroid Build Coastguard Worker * @param c output UChar32 variable 278*0e209d39SAndroid Build Coastguard Worker * @see U16_NEXT 279*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 280*0e209d39SAndroid Build Coastguard Worker */ 281*0e209d39SAndroid Build Coastguard Worker #define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 282*0e209d39SAndroid Build Coastguard Worker (c)=(s)[(i)++]; \ 283*0e209d39SAndroid Build Coastguard Worker if(U16_IS_LEAD(c)) { \ 284*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \ 285*0e209d39SAndroid Build Coastguard Worker } \ 286*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 287*0e209d39SAndroid Build Coastguard Worker 288*0e209d39SAndroid Build Coastguard Worker /** 289*0e209d39SAndroid Build Coastguard Worker * Get a code point from a string at a code point boundary offset, 290*0e209d39SAndroid Build Coastguard Worker * and advance the offset to the next code point boundary. 291*0e209d39SAndroid Build Coastguard Worker * (Post-incrementing forward iteration.) 292*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 293*0e209d39SAndroid Build Coastguard Worker * 294*0e209d39SAndroid Build Coastguard Worker * The length can be negative for a NUL-terminated string. 295*0e209d39SAndroid Build Coastguard Worker * 296*0e209d39SAndroid Build Coastguard Worker * The offset may point to the lead surrogate unit 297*0e209d39SAndroid Build Coastguard Worker * for a supplementary code point, in which case the macro will read 298*0e209d39SAndroid Build Coastguard Worker * the following trail surrogate as well. 299*0e209d39SAndroid Build Coastguard Worker * If the offset points to a trail surrogate or 300*0e209d39SAndroid Build Coastguard Worker * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. 301*0e209d39SAndroid Build Coastguard Worker * 302*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 303*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be i<length 304*0e209d39SAndroid Build Coastguard Worker * @param length string length 305*0e209d39SAndroid Build Coastguard Worker * @param c output UChar32 variable 306*0e209d39SAndroid Build Coastguard Worker * @see U16_NEXT_UNSAFE 307*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 308*0e209d39SAndroid Build Coastguard Worker */ 309*0e209d39SAndroid Build Coastguard Worker #define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 310*0e209d39SAndroid Build Coastguard Worker (c)=(s)[(i)++]; \ 311*0e209d39SAndroid Build Coastguard Worker if(U16_IS_LEAD(c)) { \ 312*0e209d39SAndroid Build Coastguard Worker uint16_t __c2; \ 313*0e209d39SAndroid Build Coastguard Worker if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 314*0e209d39SAndroid Build Coastguard Worker ++(i); \ 315*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 316*0e209d39SAndroid Build Coastguard Worker } \ 317*0e209d39SAndroid Build Coastguard Worker } \ 318*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 319*0e209d39SAndroid Build Coastguard Worker 320*0e209d39SAndroid Build Coastguard Worker /** 321*0e209d39SAndroid Build Coastguard Worker * Get a code point from a string at a code point boundary offset, 322*0e209d39SAndroid Build Coastguard Worker * and advance the offset to the next code point boundary. 323*0e209d39SAndroid Build Coastguard Worker * (Post-incrementing forward iteration.) 324*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 325*0e209d39SAndroid Build Coastguard Worker * 326*0e209d39SAndroid Build Coastguard Worker * The length can be negative for a NUL-terminated string. 327*0e209d39SAndroid Build Coastguard Worker * 328*0e209d39SAndroid Build Coastguard Worker * The offset may point to the lead surrogate unit 329*0e209d39SAndroid Build Coastguard Worker * for a supplementary code point, in which case the macro will read 330*0e209d39SAndroid Build Coastguard Worker * the following trail surrogate as well. 331*0e209d39SAndroid Build Coastguard Worker * If the offset points to a trail surrogate or 332*0e209d39SAndroid Build Coastguard Worker * to a single, unpaired lead surrogate, then c is set to U+FFFD. 333*0e209d39SAndroid Build Coastguard Worker * 334*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 335*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be i<length 336*0e209d39SAndroid Build Coastguard Worker * @param length string length 337*0e209d39SAndroid Build Coastguard Worker * @param c output UChar32 variable 338*0e209d39SAndroid Build Coastguard Worker * @see U16_NEXT_UNSAFE 339*0e209d39SAndroid Build Coastguard Worker * @stable ICU 60 340*0e209d39SAndroid Build Coastguard Worker */ 341*0e209d39SAndroid Build Coastguard Worker #define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \ 342*0e209d39SAndroid Build Coastguard Worker (c)=(s)[(i)++]; \ 343*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE(c)) { \ 344*0e209d39SAndroid Build Coastguard Worker uint16_t __c2; \ 345*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ 346*0e209d39SAndroid Build Coastguard Worker ++(i); \ 347*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ 348*0e209d39SAndroid Build Coastguard Worker } else { \ 349*0e209d39SAndroid Build Coastguard Worker (c)=0xfffd; \ 350*0e209d39SAndroid Build Coastguard Worker } \ 351*0e209d39SAndroid Build Coastguard Worker } \ 352*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 353*0e209d39SAndroid Build Coastguard Worker 354*0e209d39SAndroid Build Coastguard Worker /** 355*0e209d39SAndroid Build Coastguard Worker * Append a code point to a string, overwriting 1 or 2 code units. 356*0e209d39SAndroid Build Coastguard Worker * The offset points to the current end of the string contents 357*0e209d39SAndroid Build Coastguard Worker * and is advanced (post-increment). 358*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes a valid code point and sufficient space in the string. 359*0e209d39SAndroid Build Coastguard Worker * Otherwise, the result is undefined. 360*0e209d39SAndroid Build Coastguard Worker * 361*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string buffer 362*0e209d39SAndroid Build Coastguard Worker * @param i string offset 363*0e209d39SAndroid Build Coastguard Worker * @param c code point to append 364*0e209d39SAndroid Build Coastguard Worker * @see U16_APPEND 365*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 366*0e209d39SAndroid Build Coastguard Worker */ 367*0e209d39SAndroid Build Coastguard Worker #define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 368*0e209d39SAndroid Build Coastguard Worker if((uint32_t)(c)<=0xffff) { \ 369*0e209d39SAndroid Build Coastguard Worker (s)[(i)++]=(uint16_t)(c); \ 370*0e209d39SAndroid Build Coastguard Worker } else { \ 371*0e209d39SAndroid Build Coastguard Worker (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 372*0e209d39SAndroid Build Coastguard Worker (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 373*0e209d39SAndroid Build Coastguard Worker } \ 374*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 375*0e209d39SAndroid Build Coastguard Worker 376*0e209d39SAndroid Build Coastguard Worker /** 377*0e209d39SAndroid Build Coastguard Worker * Append a code point to a string, overwriting 1 or 2 code units. 378*0e209d39SAndroid Build Coastguard Worker * The offset points to the current end of the string contents 379*0e209d39SAndroid Build Coastguard Worker * and is advanced (post-increment). 380*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, checks for a valid code point. 381*0e209d39SAndroid Build Coastguard Worker * If a surrogate pair is written, checks for sufficient space in the string. 382*0e209d39SAndroid Build Coastguard Worker * If the code point is not valid or a trail surrogate does not fit, 383*0e209d39SAndroid Build Coastguard Worker * then isError is set to true. 384*0e209d39SAndroid Build Coastguard Worker * 385*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string buffer 386*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be i<capacity 387*0e209d39SAndroid Build Coastguard Worker * @param capacity size of the string buffer 388*0e209d39SAndroid Build Coastguard Worker * @param c code point to append 389*0e209d39SAndroid Build Coastguard Worker * @param isError output UBool set to true if an error occurs, otherwise not modified 390*0e209d39SAndroid Build Coastguard Worker * @see U16_APPEND_UNSAFE 391*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 392*0e209d39SAndroid Build Coastguard Worker */ 393*0e209d39SAndroid Build Coastguard Worker #define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \ 394*0e209d39SAndroid Build Coastguard Worker if((uint32_t)(c)<=0xffff) { \ 395*0e209d39SAndroid Build Coastguard Worker (s)[(i)++]=(uint16_t)(c); \ 396*0e209d39SAndroid Build Coastguard Worker } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \ 397*0e209d39SAndroid Build Coastguard Worker (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \ 398*0e209d39SAndroid Build Coastguard Worker (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \ 399*0e209d39SAndroid Build Coastguard Worker } else /* c>0x10ffff or not enough space */ { \ 400*0e209d39SAndroid Build Coastguard Worker (isError)=true; \ 401*0e209d39SAndroid Build Coastguard Worker } \ 402*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 403*0e209d39SAndroid Build Coastguard Worker 404*0e209d39SAndroid Build Coastguard Worker /** 405*0e209d39SAndroid Build Coastguard Worker * Advance the string offset from one code point boundary to the next. 406*0e209d39SAndroid Build Coastguard Worker * (Post-incrementing iteration.) 407*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes well-formed UTF-16. 408*0e209d39SAndroid Build Coastguard Worker * 409*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 410*0e209d39SAndroid Build Coastguard Worker * @param i string offset 411*0e209d39SAndroid Build Coastguard Worker * @see U16_FWD_1 412*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 413*0e209d39SAndroid Build Coastguard Worker */ 414*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 415*0e209d39SAndroid Build Coastguard Worker if(U16_IS_LEAD((s)[(i)++])) { \ 416*0e209d39SAndroid Build Coastguard Worker ++(i); \ 417*0e209d39SAndroid Build Coastguard Worker } \ 418*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 419*0e209d39SAndroid Build Coastguard Worker 420*0e209d39SAndroid Build Coastguard Worker /** 421*0e209d39SAndroid Build Coastguard Worker * Advance the string offset from one code point boundary to the next. 422*0e209d39SAndroid Build Coastguard Worker * (Post-incrementing iteration.) 423*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 424*0e209d39SAndroid Build Coastguard Worker * 425*0e209d39SAndroid Build Coastguard Worker * The length can be negative for a NUL-terminated string. 426*0e209d39SAndroid Build Coastguard Worker * 427*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 428*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be i<length 429*0e209d39SAndroid Build Coastguard Worker * @param length string length 430*0e209d39SAndroid Build Coastguard Worker * @see U16_FWD_1_UNSAFE 431*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 432*0e209d39SAndroid Build Coastguard Worker */ 433*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \ 434*0e209d39SAndroid Build Coastguard Worker if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \ 435*0e209d39SAndroid Build Coastguard Worker ++(i); \ 436*0e209d39SAndroid Build Coastguard Worker } \ 437*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 438*0e209d39SAndroid Build Coastguard Worker 439*0e209d39SAndroid Build Coastguard Worker /** 440*0e209d39SAndroid Build Coastguard Worker * Advance the string offset from one code point boundary to the n-th next one, 441*0e209d39SAndroid Build Coastguard Worker * i.e., move forward by n code points. 442*0e209d39SAndroid Build Coastguard Worker * (Post-incrementing iteration.) 443*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes well-formed UTF-16. 444*0e209d39SAndroid Build Coastguard Worker * 445*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 446*0e209d39SAndroid Build Coastguard Worker * @param i string offset 447*0e209d39SAndroid Build Coastguard Worker * @param n number of code points to skip 448*0e209d39SAndroid Build Coastguard Worker * @see U16_FWD_N 449*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 450*0e209d39SAndroid Build Coastguard Worker */ 451*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 452*0e209d39SAndroid Build Coastguard Worker int32_t __N=(n); \ 453*0e209d39SAndroid Build Coastguard Worker while(__N>0) { \ 454*0e209d39SAndroid Build Coastguard Worker U16_FWD_1_UNSAFE(s, i); \ 455*0e209d39SAndroid Build Coastguard Worker --__N; \ 456*0e209d39SAndroid Build Coastguard Worker } \ 457*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 458*0e209d39SAndroid Build Coastguard Worker 459*0e209d39SAndroid Build Coastguard Worker /** 460*0e209d39SAndroid Build Coastguard Worker * Advance the string offset from one code point boundary to the n-th next one, 461*0e209d39SAndroid Build Coastguard Worker * i.e., move forward by n code points. 462*0e209d39SAndroid Build Coastguard Worker * (Post-incrementing iteration.) 463*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 464*0e209d39SAndroid Build Coastguard Worker * 465*0e209d39SAndroid Build Coastguard Worker * The length can be negative for a NUL-terminated string. 466*0e209d39SAndroid Build Coastguard Worker * 467*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 468*0e209d39SAndroid Build Coastguard Worker * @param i int32_t string offset, must be i<length 469*0e209d39SAndroid Build Coastguard Worker * @param length int32_t string length 470*0e209d39SAndroid Build Coastguard Worker * @param n number of code points to skip 471*0e209d39SAndroid Build Coastguard Worker * @see U16_FWD_N_UNSAFE 472*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 473*0e209d39SAndroid Build Coastguard Worker */ 474*0e209d39SAndroid Build Coastguard Worker #define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \ 475*0e209d39SAndroid Build Coastguard Worker int32_t __N=(n); \ 476*0e209d39SAndroid Build Coastguard Worker while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \ 477*0e209d39SAndroid Build Coastguard Worker U16_FWD_1(s, i, length); \ 478*0e209d39SAndroid Build Coastguard Worker --__N; \ 479*0e209d39SAndroid Build Coastguard Worker } \ 480*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 481*0e209d39SAndroid Build Coastguard Worker 482*0e209d39SAndroid Build Coastguard Worker /** 483*0e209d39SAndroid Build Coastguard Worker * Adjust a random-access offset to a code point boundary 484*0e209d39SAndroid Build Coastguard Worker * at the start of a code point. 485*0e209d39SAndroid Build Coastguard Worker * If the offset points to the trail surrogate of a surrogate pair, 486*0e209d39SAndroid Build Coastguard Worker * then the offset is decremented. 487*0e209d39SAndroid Build Coastguard Worker * Otherwise, it is not modified. 488*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes well-formed UTF-16. 489*0e209d39SAndroid Build Coastguard Worker * 490*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 491*0e209d39SAndroid Build Coastguard Worker * @param i string offset 492*0e209d39SAndroid Build Coastguard Worker * @see U16_SET_CP_START 493*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 494*0e209d39SAndroid Build Coastguard Worker */ 495*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 496*0e209d39SAndroid Build Coastguard Worker if(U16_IS_TRAIL((s)[i])) { \ 497*0e209d39SAndroid Build Coastguard Worker --(i); \ 498*0e209d39SAndroid Build Coastguard Worker } \ 499*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 500*0e209d39SAndroid Build Coastguard Worker 501*0e209d39SAndroid Build Coastguard Worker /** 502*0e209d39SAndroid Build Coastguard Worker * Adjust a random-access offset to a code point boundary 503*0e209d39SAndroid Build Coastguard Worker * at the start of a code point. 504*0e209d39SAndroid Build Coastguard Worker * If the offset points to the trail surrogate of a surrogate pair, 505*0e209d39SAndroid Build Coastguard Worker * then the offset is decremented. 506*0e209d39SAndroid Build Coastguard Worker * Otherwise, it is not modified. 507*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 508*0e209d39SAndroid Build Coastguard Worker * 509*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 510*0e209d39SAndroid Build Coastguard Worker * @param start starting string offset (usually 0) 511*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be start<=i 512*0e209d39SAndroid Build Coastguard Worker * @see U16_SET_CP_START_UNSAFE 513*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 514*0e209d39SAndroid Build Coastguard Worker */ 515*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ 516*0e209d39SAndroid Build Coastguard Worker if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ 517*0e209d39SAndroid Build Coastguard Worker --(i); \ 518*0e209d39SAndroid Build Coastguard Worker } \ 519*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 520*0e209d39SAndroid Build Coastguard Worker 521*0e209d39SAndroid Build Coastguard Worker /* definitions with backward iteration -------------------------------------- */ 522*0e209d39SAndroid Build Coastguard Worker 523*0e209d39SAndroid Build Coastguard Worker /** 524*0e209d39SAndroid Build Coastguard Worker * Move the string offset from one code point boundary to the previous one 525*0e209d39SAndroid Build Coastguard Worker * and get the code point between them. 526*0e209d39SAndroid Build Coastguard Worker * (Pre-decrementing backward iteration.) 527*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes well-formed UTF-16. 528*0e209d39SAndroid Build Coastguard Worker * 529*0e209d39SAndroid Build Coastguard Worker * The input offset may be the same as the string length. 530*0e209d39SAndroid Build Coastguard Worker * If the offset is behind a trail surrogate unit 531*0e209d39SAndroid Build Coastguard Worker * for a supplementary code point, then the macro will read 532*0e209d39SAndroid Build Coastguard Worker * the preceding lead surrogate as well. 533*0e209d39SAndroid Build Coastguard Worker * If the offset is behind a lead surrogate, then that itself 534*0e209d39SAndroid Build Coastguard Worker * will be returned as the code point. 535*0e209d39SAndroid Build Coastguard Worker * The result is undefined if the offset is behind a single, unpaired trail surrogate. 536*0e209d39SAndroid Build Coastguard Worker * 537*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 538*0e209d39SAndroid Build Coastguard Worker * @param i string offset 539*0e209d39SAndroid Build Coastguard Worker * @param c output UChar32 variable 540*0e209d39SAndroid Build Coastguard Worker * @see U16_PREV 541*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 542*0e209d39SAndroid Build Coastguard Worker */ 543*0e209d39SAndroid Build Coastguard Worker #define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 544*0e209d39SAndroid Build Coastguard Worker (c)=(s)[--(i)]; \ 545*0e209d39SAndroid Build Coastguard Worker if(U16_IS_TRAIL(c)) { \ 546*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \ 547*0e209d39SAndroid Build Coastguard Worker } \ 548*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 549*0e209d39SAndroid Build Coastguard Worker 550*0e209d39SAndroid Build Coastguard Worker /** 551*0e209d39SAndroid Build Coastguard Worker * Move the string offset from one code point boundary to the previous one 552*0e209d39SAndroid Build Coastguard Worker * and get the code point between them. 553*0e209d39SAndroid Build Coastguard Worker * (Pre-decrementing backward iteration.) 554*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 555*0e209d39SAndroid Build Coastguard Worker * 556*0e209d39SAndroid Build Coastguard Worker * The input offset may be the same as the string length. 557*0e209d39SAndroid Build Coastguard Worker * If the offset is behind a trail surrogate unit 558*0e209d39SAndroid Build Coastguard Worker * for a supplementary code point, then the macro will read 559*0e209d39SAndroid Build Coastguard Worker * the preceding lead surrogate as well. 560*0e209d39SAndroid Build Coastguard Worker * If the offset is behind a lead surrogate or behind a single, unpaired 561*0e209d39SAndroid Build Coastguard Worker * trail surrogate, then c is set to that unpaired surrogate. 562*0e209d39SAndroid Build Coastguard Worker * 563*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 564*0e209d39SAndroid Build Coastguard Worker * @param start starting string offset (usually 0) 565*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be start<i 566*0e209d39SAndroid Build Coastguard Worker * @param c output UChar32 variable 567*0e209d39SAndroid Build Coastguard Worker * @see U16_PREV_UNSAFE 568*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 569*0e209d39SAndroid Build Coastguard Worker */ 570*0e209d39SAndroid Build Coastguard Worker #define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 571*0e209d39SAndroid Build Coastguard Worker (c)=(s)[--(i)]; \ 572*0e209d39SAndroid Build Coastguard Worker if(U16_IS_TRAIL(c)) { \ 573*0e209d39SAndroid Build Coastguard Worker uint16_t __c2; \ 574*0e209d39SAndroid Build Coastguard Worker if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 575*0e209d39SAndroid Build Coastguard Worker --(i); \ 576*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 577*0e209d39SAndroid Build Coastguard Worker } \ 578*0e209d39SAndroid Build Coastguard Worker } \ 579*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 580*0e209d39SAndroid Build Coastguard Worker 581*0e209d39SAndroid Build Coastguard Worker /** 582*0e209d39SAndroid Build Coastguard Worker * Move the string offset from one code point boundary to the previous one 583*0e209d39SAndroid Build Coastguard Worker * and get the code point between them. 584*0e209d39SAndroid Build Coastguard Worker * (Pre-decrementing backward iteration.) 585*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 586*0e209d39SAndroid Build Coastguard Worker * 587*0e209d39SAndroid Build Coastguard Worker * The input offset may be the same as the string length. 588*0e209d39SAndroid Build Coastguard Worker * If the offset is behind a trail surrogate unit 589*0e209d39SAndroid Build Coastguard Worker * for a supplementary code point, then the macro will read 590*0e209d39SAndroid Build Coastguard Worker * the preceding lead surrogate as well. 591*0e209d39SAndroid Build Coastguard Worker * If the offset is behind a lead surrogate or behind a single, unpaired 592*0e209d39SAndroid Build Coastguard Worker * trail surrogate, then c is set to U+FFFD. 593*0e209d39SAndroid Build Coastguard Worker * 594*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 595*0e209d39SAndroid Build Coastguard Worker * @param start starting string offset (usually 0) 596*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be start<i 597*0e209d39SAndroid Build Coastguard Worker * @param c output UChar32 variable 598*0e209d39SAndroid Build Coastguard Worker * @see U16_PREV_UNSAFE 599*0e209d39SAndroid Build Coastguard Worker * @stable ICU 60 600*0e209d39SAndroid Build Coastguard Worker */ 601*0e209d39SAndroid Build Coastguard Worker #define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \ 602*0e209d39SAndroid Build Coastguard Worker (c)=(s)[--(i)]; \ 603*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE(c)) { \ 604*0e209d39SAndroid Build Coastguard Worker uint16_t __c2; \ 605*0e209d39SAndroid Build Coastguard Worker if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ 606*0e209d39SAndroid Build Coastguard Worker --(i); \ 607*0e209d39SAndroid Build Coastguard Worker (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ 608*0e209d39SAndroid Build Coastguard Worker } else { \ 609*0e209d39SAndroid Build Coastguard Worker (c)=0xfffd; \ 610*0e209d39SAndroid Build Coastguard Worker } \ 611*0e209d39SAndroid Build Coastguard Worker } \ 612*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 613*0e209d39SAndroid Build Coastguard Worker 614*0e209d39SAndroid Build Coastguard Worker /** 615*0e209d39SAndroid Build Coastguard Worker * Move the string offset from one code point boundary to the previous one. 616*0e209d39SAndroid Build Coastguard Worker * (Pre-decrementing backward iteration.) 617*0e209d39SAndroid Build Coastguard Worker * The input offset may be the same as the string length. 618*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes well-formed UTF-16. 619*0e209d39SAndroid Build Coastguard Worker * 620*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 621*0e209d39SAndroid Build Coastguard Worker * @param i string offset 622*0e209d39SAndroid Build Coastguard Worker * @see U16_BACK_1 623*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 624*0e209d39SAndroid Build Coastguard Worker */ 625*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 626*0e209d39SAndroid Build Coastguard Worker if(U16_IS_TRAIL((s)[--(i)])) { \ 627*0e209d39SAndroid Build Coastguard Worker --(i); \ 628*0e209d39SAndroid Build Coastguard Worker } \ 629*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 630*0e209d39SAndroid Build Coastguard Worker 631*0e209d39SAndroid Build Coastguard Worker /** 632*0e209d39SAndroid Build Coastguard Worker * Move the string offset from one code point boundary to the previous one. 633*0e209d39SAndroid Build Coastguard Worker * (Pre-decrementing backward iteration.) 634*0e209d39SAndroid Build Coastguard Worker * The input offset may be the same as the string length. 635*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 636*0e209d39SAndroid Build Coastguard Worker * 637*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 638*0e209d39SAndroid Build Coastguard Worker * @param start starting string offset (usually 0) 639*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be start<i 640*0e209d39SAndroid Build Coastguard Worker * @see U16_BACK_1_UNSAFE 641*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 642*0e209d39SAndroid Build Coastguard Worker */ 643*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \ 644*0e209d39SAndroid Build Coastguard Worker if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \ 645*0e209d39SAndroid Build Coastguard Worker --(i); \ 646*0e209d39SAndroid Build Coastguard Worker } \ 647*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 648*0e209d39SAndroid Build Coastguard Worker 649*0e209d39SAndroid Build Coastguard Worker /** 650*0e209d39SAndroid Build Coastguard Worker * Move the string offset from one code point boundary to the n-th one before it, 651*0e209d39SAndroid Build Coastguard Worker * i.e., move backward by n code points. 652*0e209d39SAndroid Build Coastguard Worker * (Pre-decrementing backward iteration.) 653*0e209d39SAndroid Build Coastguard Worker * The input offset may be the same as the string length. 654*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes well-formed UTF-16. 655*0e209d39SAndroid Build Coastguard Worker * 656*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 657*0e209d39SAndroid Build Coastguard Worker * @param i string offset 658*0e209d39SAndroid Build Coastguard Worker * @param n number of code points to skip 659*0e209d39SAndroid Build Coastguard Worker * @see U16_BACK_N 660*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 661*0e209d39SAndroid Build Coastguard Worker */ 662*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 663*0e209d39SAndroid Build Coastguard Worker int32_t __N=(n); \ 664*0e209d39SAndroid Build Coastguard Worker while(__N>0) { \ 665*0e209d39SAndroid Build Coastguard Worker U16_BACK_1_UNSAFE(s, i); \ 666*0e209d39SAndroid Build Coastguard Worker --__N; \ 667*0e209d39SAndroid Build Coastguard Worker } \ 668*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 669*0e209d39SAndroid Build Coastguard Worker 670*0e209d39SAndroid Build Coastguard Worker /** 671*0e209d39SAndroid Build Coastguard Worker * Move the string offset from one code point boundary to the n-th one before it, 672*0e209d39SAndroid Build Coastguard Worker * i.e., move backward by n code points. 673*0e209d39SAndroid Build Coastguard Worker * (Pre-decrementing backward iteration.) 674*0e209d39SAndroid Build Coastguard Worker * The input offset may be the same as the string length. 675*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 676*0e209d39SAndroid Build Coastguard Worker * 677*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 678*0e209d39SAndroid Build Coastguard Worker * @param start start of string 679*0e209d39SAndroid Build Coastguard Worker * @param i string offset, must be start<i 680*0e209d39SAndroid Build Coastguard Worker * @param n number of code points to skip 681*0e209d39SAndroid Build Coastguard Worker * @see U16_BACK_N_UNSAFE 682*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 683*0e209d39SAndroid Build Coastguard Worker */ 684*0e209d39SAndroid Build Coastguard Worker #define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \ 685*0e209d39SAndroid Build Coastguard Worker int32_t __N=(n); \ 686*0e209d39SAndroid Build Coastguard Worker while(__N>0 && (i)>(start)) { \ 687*0e209d39SAndroid Build Coastguard Worker U16_BACK_1(s, start, i); \ 688*0e209d39SAndroid Build Coastguard Worker --__N; \ 689*0e209d39SAndroid Build Coastguard Worker } \ 690*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 691*0e209d39SAndroid Build Coastguard Worker 692*0e209d39SAndroid Build Coastguard Worker /** 693*0e209d39SAndroid Build Coastguard Worker * Adjust a random-access offset to a code point boundary after a code point. 694*0e209d39SAndroid Build Coastguard Worker * If the offset is behind the lead surrogate of a surrogate pair, 695*0e209d39SAndroid Build Coastguard Worker * then the offset is incremented. 696*0e209d39SAndroid Build Coastguard Worker * Otherwise, it is not modified. 697*0e209d39SAndroid Build Coastguard Worker * The input offset may be the same as the string length. 698*0e209d39SAndroid Build Coastguard Worker * "Unsafe" macro, assumes well-formed UTF-16. 699*0e209d39SAndroid Build Coastguard Worker * 700*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 701*0e209d39SAndroid Build Coastguard Worker * @param i string offset 702*0e209d39SAndroid Build Coastguard Worker * @see U16_SET_CP_LIMIT 703*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 704*0e209d39SAndroid Build Coastguard Worker */ 705*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \ 706*0e209d39SAndroid Build Coastguard Worker if(U16_IS_LEAD((s)[(i)-1])) { \ 707*0e209d39SAndroid Build Coastguard Worker ++(i); \ 708*0e209d39SAndroid Build Coastguard Worker } \ 709*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 710*0e209d39SAndroid Build Coastguard Worker 711*0e209d39SAndroid Build Coastguard Worker /** 712*0e209d39SAndroid Build Coastguard Worker * Adjust a random-access offset to a code point boundary after a code point. 713*0e209d39SAndroid Build Coastguard Worker * If the offset is behind the lead surrogate of a surrogate pair, 714*0e209d39SAndroid Build Coastguard Worker * then the offset is incremented. 715*0e209d39SAndroid Build Coastguard Worker * Otherwise, it is not modified. 716*0e209d39SAndroid Build Coastguard Worker * The input offset may be the same as the string length. 717*0e209d39SAndroid Build Coastguard Worker * "Safe" macro, handles unpaired surrogates and checks for string boundaries. 718*0e209d39SAndroid Build Coastguard Worker * 719*0e209d39SAndroid Build Coastguard Worker * The length can be negative for a NUL-terminated string. 720*0e209d39SAndroid Build Coastguard Worker * 721*0e209d39SAndroid Build Coastguard Worker * @param s const UChar * string 722*0e209d39SAndroid Build Coastguard Worker * @param start int32_t starting string offset (usually 0) 723*0e209d39SAndroid Build Coastguard Worker * @param i int32_t string offset, start<=i<=length 724*0e209d39SAndroid Build Coastguard Worker * @param length int32_t string length 725*0e209d39SAndroid Build Coastguard Worker * @see U16_SET_CP_LIMIT_UNSAFE 726*0e209d39SAndroid Build Coastguard Worker * @stable ICU 2.4 727*0e209d39SAndroid Build Coastguard Worker */ 728*0e209d39SAndroid Build Coastguard Worker #define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \ 729*0e209d39SAndroid Build Coastguard Worker if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \ 730*0e209d39SAndroid Build Coastguard Worker ++(i); \ 731*0e209d39SAndroid Build Coastguard Worker } \ 732*0e209d39SAndroid Build Coastguard Worker } UPRV_BLOCK_MACRO_END 733*0e209d39SAndroid Build Coastguard Worker 734*0e209d39SAndroid Build Coastguard Worker #endif 735