1*f6dc9357SAndroid Build Coastguard Worker // UTFConvert.cpp
2*f6dc9357SAndroid Build Coastguard Worker
3*f6dc9357SAndroid Build Coastguard Worker #include "StdAfx.h"
4*f6dc9357SAndroid Build Coastguard Worker
5*f6dc9357SAndroid Build Coastguard Worker // #include <stdio.h>
6*f6dc9357SAndroid Build Coastguard Worker
7*f6dc9357SAndroid Build Coastguard Worker #include "MyTypes.h"
8*f6dc9357SAndroid Build Coastguard Worker #include "UTFConvert.h"
9*f6dc9357SAndroid Build Coastguard Worker
10*f6dc9357SAndroid Build Coastguard Worker
11*f6dc9357SAndroid Build Coastguard Worker #ifndef Z7_WCHART_IS_16BIT
12*f6dc9357SAndroid Build Coastguard Worker #ifndef __APPLE__
13*f6dc9357SAndroid Build Coastguard Worker // we define it if the system supports files with non-utf8 symbols:
14*f6dc9357SAndroid Build Coastguard Worker #define MY_UTF8_RAW_NON_UTF8_SUPPORTED
15*f6dc9357SAndroid Build Coastguard Worker #endif
16*f6dc9357SAndroid Build Coastguard Worker #endif
17*f6dc9357SAndroid Build Coastguard Worker
18*f6dc9357SAndroid Build Coastguard Worker /*
19*f6dc9357SAndroid Build Coastguard Worker MY_UTF8_START(n) - is a base value for start byte (head), if there are (n) additional bytes after start byte
20*f6dc9357SAndroid Build Coastguard Worker
21*f6dc9357SAndroid Build Coastguard Worker n : MY_UTF8_START(n) : Bits of code point
22*f6dc9357SAndroid Build Coastguard Worker
23*f6dc9357SAndroid Build Coastguard Worker 0 : 0x80 : : unused
24*f6dc9357SAndroid Build Coastguard Worker 1 : 0xC0 : 11 :
25*f6dc9357SAndroid Build Coastguard Worker 2 : 0xE0 : 16 : Basic Multilingual Plane
26*f6dc9357SAndroid Build Coastguard Worker 3 : 0xF0 : 21 : Unicode space
27*f6dc9357SAndroid Build Coastguard Worker 4 : 0xF8 : 26 :
28*f6dc9357SAndroid Build Coastguard Worker 5 : 0xFC : 31 : UCS-4 : wcstombs() in ubuntu is limited to that value
29*f6dc9357SAndroid Build Coastguard Worker 6 : 0xFE : 36 : We can use it, if we want to encode any 32-bit value
30*f6dc9357SAndroid Build Coastguard Worker 7 : 0xFF :
31*f6dc9357SAndroid Build Coastguard Worker */
32*f6dc9357SAndroid Build Coastguard Worker
33*f6dc9357SAndroid Build Coastguard Worker #define MY_UTF8_START(n) (0x100 - (1 << (7 - (n))))
34*f6dc9357SAndroid Build Coastguard Worker
35*f6dc9357SAndroid Build Coastguard Worker #define MY_UTF8_HEAD_PARSE2(n) \
36*f6dc9357SAndroid Build Coastguard Worker if (c < MY_UTF8_START((n) + 1)) \
37*f6dc9357SAndroid Build Coastguard Worker { numBytes = (n); val -= MY_UTF8_START(n); }
38*f6dc9357SAndroid Build Coastguard Worker
39*f6dc9357SAndroid Build Coastguard Worker #ifndef Z7_WCHART_IS_16BIT
40*f6dc9357SAndroid Build Coastguard Worker
41*f6dc9357SAndroid Build Coastguard Worker /*
42*f6dc9357SAndroid Build Coastguard Worker if (wchar_t is 32-bit), we can support large points in long UTF-8 sequence,
43*f6dc9357SAndroid Build Coastguard Worker when we convert wchar_t strings to UTF-8:
44*f6dc9357SAndroid Build Coastguard Worker (_UTF8_NUM_TAIL_BYTES_MAX == 3) : (21-bits points) - Unicode
45*f6dc9357SAndroid Build Coastguard Worker (_UTF8_NUM_TAIL_BYTES_MAX == 5) : (31-bits points) - UCS-4
46*f6dc9357SAndroid Build Coastguard Worker (_UTF8_NUM_TAIL_BYTES_MAX == 6) : (36-bit hack)
47*f6dc9357SAndroid Build Coastguard Worker */
48*f6dc9357SAndroid Build Coastguard Worker
49*f6dc9357SAndroid Build Coastguard Worker #define MY_UTF8_NUM_TAIL_BYTES_MAX 5
50*f6dc9357SAndroid Build Coastguard Worker #endif
51*f6dc9357SAndroid Build Coastguard Worker
52*f6dc9357SAndroid Build Coastguard Worker /*
53*f6dc9357SAndroid Build Coastguard Worker #define MY_UTF8_HEAD_PARSE \
54*f6dc9357SAndroid Build Coastguard Worker UInt32 val = c; \
55*f6dc9357SAndroid Build Coastguard Worker MY_UTF8_HEAD_PARSE2(1) \
56*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(2) \
57*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(3) \
58*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(4) \
59*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(5) \
60*f6dc9357SAndroid Build Coastguard Worker #if MY_UTF8_NUM_TAIL_BYTES_MAX >= 6
61*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(6)
62*f6dc9357SAndroid Build Coastguard Worker #endif
63*f6dc9357SAndroid Build Coastguard Worker */
64*f6dc9357SAndroid Build Coastguard Worker
65*f6dc9357SAndroid Build Coastguard Worker #define MY_UTF8_HEAD_PARSE_MAX_3_BYTES \
66*f6dc9357SAndroid Build Coastguard Worker UInt32 val = c; \
67*f6dc9357SAndroid Build Coastguard Worker MY_UTF8_HEAD_PARSE2(1) \
68*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(2) \
69*f6dc9357SAndroid Build Coastguard Worker else { numBytes = 3; val -= MY_UTF8_START(3); }
70*f6dc9357SAndroid Build Coastguard Worker
71*f6dc9357SAndroid Build Coastguard Worker
72*f6dc9357SAndroid Build Coastguard Worker #define MY_UTF8_RANGE(n) (((UInt32)1) << ((n) * 5 + 6))
73*f6dc9357SAndroid Build Coastguard Worker
74*f6dc9357SAndroid Build Coastguard Worker
75*f6dc9357SAndroid Build Coastguard Worker #define START_POINT_FOR_SURROGATE 0x10000
76*f6dc9357SAndroid Build Coastguard Worker
77*f6dc9357SAndroid Build Coastguard Worker
78*f6dc9357SAndroid Build Coastguard Worker /* we use 128 bytes block in 16-bit BMP-PLANE to encode non-UTF-8 Escapes
79*f6dc9357SAndroid Build Coastguard Worker Also we can use additional HIGH-PLANE (we use 21-bit points above 0x1f0000)
80*f6dc9357SAndroid Build Coastguard Worker to simplify internal intermediate conversion in Linux:
81*f6dc9357SAndroid Build Coastguard Worker RAW-UTF-8 <-> internal wchar_t utf-16 strings <-> RAW-UTF-UTF-8
82*f6dc9357SAndroid Build Coastguard Worker */
83*f6dc9357SAndroid Build Coastguard Worker
84*f6dc9357SAndroid Build Coastguard Worker
85*f6dc9357SAndroid Build Coastguard Worker #if defined(Z7_WCHART_IS_16BIT)
86*f6dc9357SAndroid Build Coastguard Worker
87*f6dc9357SAndroid Build Coastguard Worker #define UTF_ESCAPE_PLANE 0
88*f6dc9357SAndroid Build Coastguard Worker
89*f6dc9357SAndroid Build Coastguard Worker #else
90*f6dc9357SAndroid Build Coastguard Worker
91*f6dc9357SAndroid Build Coastguard Worker /*
92*f6dc9357SAndroid Build Coastguard Worker we can place 128 ESCAPE chars to
93*f6dc9357SAndroid Build Coastguard Worker ef 80 - ee be 80 (3-bytes utf-8) : similar to WSL
94*f6dc9357SAndroid Build Coastguard Worker ef ff - ee bf bf
95*f6dc9357SAndroid Build Coastguard Worker
96*f6dc9357SAndroid Build Coastguard Worker 1f ef 80 - f7 be be 80 (4-bytes utf-8) : last 4-bytes utf-8 plane (out of Unicode)
97*f6dc9357SAndroid Build Coastguard Worker 1f ef ff - f7 be bf bf (4-bytes utf-8) : last 4-bytes utf-8 plane (out of Unicode)
98*f6dc9357SAndroid Build Coastguard Worker */
99*f6dc9357SAndroid Build Coastguard Worker
100*f6dc9357SAndroid Build Coastguard Worker // #define UTF_ESCAPE_PLANE_HIGH (0x1f << 16)
101*f6dc9357SAndroid Build Coastguard Worker // #define UTF_ESCAPE_PLANE UTF_ESCAPE_PLANE_HIGH
102*f6dc9357SAndroid Build Coastguard Worker #define UTF_ESCAPE_PLANE 0
103*f6dc9357SAndroid Build Coastguard Worker
104*f6dc9357SAndroid Build Coastguard Worker /*
105*f6dc9357SAndroid Build Coastguard Worker if (Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE is set)
106*f6dc9357SAndroid Build Coastguard Worker {
107*f6dc9357SAndroid Build Coastguard Worker if (UTF_ESCAPE_PLANE is UTF_ESCAPE_PLANE_HIGH)
108*f6dc9357SAndroid Build Coastguard Worker {
109*f6dc9357SAndroid Build Coastguard Worker we can restore any 8-bit Escape from ESCAPE-PLANE-21 plane.
110*f6dc9357SAndroid Build Coastguard Worker But ESCAPE-PLANE-21 point cannot be stored to utf-16 (7z archive)
111*f6dc9357SAndroid Build Coastguard Worker So we still need a way to extract 8-bit Escapes and BMP-Escapes-8
112*f6dc9357SAndroid Build Coastguard Worker from same BMP-Escapes-16 stored in 7z.
113*f6dc9357SAndroid Build Coastguard Worker And if we want to restore any 8-bit from 7z archive,
114*f6dc9357SAndroid Build Coastguard Worker we still must use Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT for (utf-8 -> utf-16)
115*f6dc9357SAndroid Build Coastguard Worker Also we need additional Conversions to tranform from utf-16 to utf-16-With-Escapes-21
116*f6dc9357SAndroid Build Coastguard Worker }
117*f6dc9357SAndroid Build Coastguard Worker else (UTF_ESCAPE_PLANE == 0)
118*f6dc9357SAndroid Build Coastguard Worker {
119*f6dc9357SAndroid Build Coastguard Worker we must convert original 3-bytes utf-8 BMP-Escape point to sequence
120*f6dc9357SAndroid Build Coastguard Worker of 3 BMP-Escape-16 points with Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT
121*f6dc9357SAndroid Build Coastguard Worker so we can extract original RAW-UTF-8 from UTFD-16 later.
122*f6dc9357SAndroid Build Coastguard Worker }
123*f6dc9357SAndroid Build Coastguard Worker }
124*f6dc9357SAndroid Build Coastguard Worker */
125*f6dc9357SAndroid Build Coastguard Worker
126*f6dc9357SAndroid Build Coastguard Worker #endif
127*f6dc9357SAndroid Build Coastguard Worker
128*f6dc9357SAndroid Build Coastguard Worker
129*f6dc9357SAndroid Build Coastguard Worker
130*f6dc9357SAndroid Build Coastguard Worker #define UTF_ESCAPE_BASE 0xef00
131*f6dc9357SAndroid Build Coastguard Worker
132*f6dc9357SAndroid Build Coastguard Worker
133*f6dc9357SAndroid Build Coastguard Worker #ifdef UTF_ESCAPE_BASE
134*f6dc9357SAndroid Build Coastguard Worker #define IS_ESCAPE_POINT(v, plane) (((v) & (UInt32)0xffffff80) == (plane) + UTF_ESCAPE_BASE + 0x80)
135*f6dc9357SAndroid Build Coastguard Worker #endif
136*f6dc9357SAndroid Build Coastguard Worker
137*f6dc9357SAndroid Build Coastguard Worker #define IS_SURROGATE_POINT(v) (((v) & (UInt32)0xfffff800) == 0xd800)
138*f6dc9357SAndroid Build Coastguard Worker #define IS_LOW_SURROGATE_POINT(v) (((v) & (UInt32)0xfffffc00) == 0xdc00)
139*f6dc9357SAndroid Build Coastguard Worker
140*f6dc9357SAndroid Build Coastguard Worker
141*f6dc9357SAndroid Build Coastguard Worker #define UTF_ERROR_UTF8_CHECK \
142*f6dc9357SAndroid Build Coastguard Worker { NonUtf = true; continue; }
143*f6dc9357SAndroid Build Coastguard Worker
Check_Buf(const char * src,size_t size)144*f6dc9357SAndroid Build Coastguard Worker void CUtf8Check::Check_Buf(const char *src, size_t size) throw()
145*f6dc9357SAndroid Build Coastguard Worker {
146*f6dc9357SAndroid Build Coastguard Worker Clear();
147*f6dc9357SAndroid Build Coastguard Worker // Byte maxByte = 0;
148*f6dc9357SAndroid Build Coastguard Worker
149*f6dc9357SAndroid Build Coastguard Worker for (;;)
150*f6dc9357SAndroid Build Coastguard Worker {
151*f6dc9357SAndroid Build Coastguard Worker if (size == 0)
152*f6dc9357SAndroid Build Coastguard Worker break;
153*f6dc9357SAndroid Build Coastguard Worker
154*f6dc9357SAndroid Build Coastguard Worker const Byte c = (Byte)(*src++);
155*f6dc9357SAndroid Build Coastguard Worker size--;
156*f6dc9357SAndroid Build Coastguard Worker
157*f6dc9357SAndroid Build Coastguard Worker if (c == 0)
158*f6dc9357SAndroid Build Coastguard Worker {
159*f6dc9357SAndroid Build Coastguard Worker ZeroChar = true;
160*f6dc9357SAndroid Build Coastguard Worker continue;
161*f6dc9357SAndroid Build Coastguard Worker }
162*f6dc9357SAndroid Build Coastguard Worker
163*f6dc9357SAndroid Build Coastguard Worker /*
164*f6dc9357SAndroid Build Coastguard Worker if (c > maxByte)
165*f6dc9357SAndroid Build Coastguard Worker maxByte = c;
166*f6dc9357SAndroid Build Coastguard Worker */
167*f6dc9357SAndroid Build Coastguard Worker
168*f6dc9357SAndroid Build Coastguard Worker if (c < 0x80)
169*f6dc9357SAndroid Build Coastguard Worker continue;
170*f6dc9357SAndroid Build Coastguard Worker
171*f6dc9357SAndroid Build Coastguard Worker if (c < 0xc0 + 2)
172*f6dc9357SAndroid Build Coastguard Worker UTF_ERROR_UTF8_CHECK
173*f6dc9357SAndroid Build Coastguard Worker
174*f6dc9357SAndroid Build Coastguard Worker unsigned numBytes;
175*f6dc9357SAndroid Build Coastguard Worker UInt32 val = c;
176*f6dc9357SAndroid Build Coastguard Worker MY_UTF8_HEAD_PARSE2(1)
177*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(2)
178*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(3)
179*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(4)
180*f6dc9357SAndroid Build Coastguard Worker else MY_UTF8_HEAD_PARSE2(5)
181*f6dc9357SAndroid Build Coastguard Worker else
182*f6dc9357SAndroid Build Coastguard Worker {
183*f6dc9357SAndroid Build Coastguard Worker UTF_ERROR_UTF8_CHECK
184*f6dc9357SAndroid Build Coastguard Worker }
185*f6dc9357SAndroid Build Coastguard Worker
186*f6dc9357SAndroid Build Coastguard Worker unsigned pos = 0;
187*f6dc9357SAndroid Build Coastguard Worker do
188*f6dc9357SAndroid Build Coastguard Worker {
189*f6dc9357SAndroid Build Coastguard Worker if (pos == size)
190*f6dc9357SAndroid Build Coastguard Worker break;
191*f6dc9357SAndroid Build Coastguard Worker unsigned c2 = (Byte)src[pos];
192*f6dc9357SAndroid Build Coastguard Worker c2 -= 0x80;
193*f6dc9357SAndroid Build Coastguard Worker if (c2 >= 0x40)
194*f6dc9357SAndroid Build Coastguard Worker break;
195*f6dc9357SAndroid Build Coastguard Worker val <<= 6;
196*f6dc9357SAndroid Build Coastguard Worker val |= c2;
197*f6dc9357SAndroid Build Coastguard Worker if (pos == 0)
198*f6dc9357SAndroid Build Coastguard Worker if (val < (((unsigned)1 << 7) >> numBytes))
199*f6dc9357SAndroid Build Coastguard Worker break;
200*f6dc9357SAndroid Build Coastguard Worker pos++;
201*f6dc9357SAndroid Build Coastguard Worker }
202*f6dc9357SAndroid Build Coastguard Worker while (--numBytes);
203*f6dc9357SAndroid Build Coastguard Worker
204*f6dc9357SAndroid Build Coastguard Worker if (numBytes != 0)
205*f6dc9357SAndroid Build Coastguard Worker {
206*f6dc9357SAndroid Build Coastguard Worker if (pos == size)
207*f6dc9357SAndroid Build Coastguard Worker Truncated = true;
208*f6dc9357SAndroid Build Coastguard Worker else
209*f6dc9357SAndroid Build Coastguard Worker UTF_ERROR_UTF8_CHECK
210*f6dc9357SAndroid Build Coastguard Worker }
211*f6dc9357SAndroid Build Coastguard Worker
212*f6dc9357SAndroid Build Coastguard Worker #ifdef UTF_ESCAPE_BASE
213*f6dc9357SAndroid Build Coastguard Worker if (IS_ESCAPE_POINT(val, 0))
214*f6dc9357SAndroid Build Coastguard Worker Escape = true;
215*f6dc9357SAndroid Build Coastguard Worker #endif
216*f6dc9357SAndroid Build Coastguard Worker
217*f6dc9357SAndroid Build Coastguard Worker if (MaxHighPoint < val)
218*f6dc9357SAndroid Build Coastguard Worker MaxHighPoint = val;
219*f6dc9357SAndroid Build Coastguard Worker
220*f6dc9357SAndroid Build Coastguard Worker if (IS_SURROGATE_POINT(val))
221*f6dc9357SAndroid Build Coastguard Worker SingleSurrogate = true;
222*f6dc9357SAndroid Build Coastguard Worker
223*f6dc9357SAndroid Build Coastguard Worker src += pos;
224*f6dc9357SAndroid Build Coastguard Worker size -= pos;
225*f6dc9357SAndroid Build Coastguard Worker }
226*f6dc9357SAndroid Build Coastguard Worker
227*f6dc9357SAndroid Build Coastguard Worker // MaxByte = maxByte;
228*f6dc9357SAndroid Build Coastguard Worker }
229*f6dc9357SAndroid Build Coastguard Worker
Check_UTF8_Buf(const char * src,size_t size,bool allowReduced)230*f6dc9357SAndroid Build Coastguard Worker bool Check_UTF8_Buf(const char *src, size_t size, bool allowReduced) throw()
231*f6dc9357SAndroid Build Coastguard Worker {
232*f6dc9357SAndroid Build Coastguard Worker CUtf8Check check;
233*f6dc9357SAndroid Build Coastguard Worker check.Check_Buf(src, size);
234*f6dc9357SAndroid Build Coastguard Worker return check.IsOK(allowReduced);
235*f6dc9357SAndroid Build Coastguard Worker }
236*f6dc9357SAndroid Build Coastguard Worker
237*f6dc9357SAndroid Build Coastguard Worker /*
238*f6dc9357SAndroid Build Coastguard Worker bool CheckUTF8_chars(const char *src, bool allowReduced) throw()
239*f6dc9357SAndroid Build Coastguard Worker {
240*f6dc9357SAndroid Build Coastguard Worker CUtf8Check check;
241*f6dc9357SAndroid Build Coastguard Worker check.CheckBuf(src, strlen(src));
242*f6dc9357SAndroid Build Coastguard Worker return check.IsOK(allowReduced);
243*f6dc9357SAndroid Build Coastguard Worker }
244*f6dc9357SAndroid Build Coastguard Worker */
245*f6dc9357SAndroid Build Coastguard Worker
CheckUTF8_AString(const AString & s)246*f6dc9357SAndroid Build Coastguard Worker bool CheckUTF8_AString(const AString &s) throw()
247*f6dc9357SAndroid Build Coastguard Worker {
248*f6dc9357SAndroid Build Coastguard Worker CUtf8Check check;
249*f6dc9357SAndroid Build Coastguard Worker check.Check_AString(s);
250*f6dc9357SAndroid Build Coastguard Worker return check.IsOK();
251*f6dc9357SAndroid Build Coastguard Worker }
252*f6dc9357SAndroid Build Coastguard Worker
253*f6dc9357SAndroid Build Coastguard Worker
254*f6dc9357SAndroid Build Coastguard Worker /*
255*f6dc9357SAndroid Build Coastguard Worker bool CheckUTF8(const char *src, bool allowReduced) throw()
256*f6dc9357SAndroid Build Coastguard Worker {
257*f6dc9357SAndroid Build Coastguard Worker // return Check_UTF8_Buf(src, strlen(src), allowReduced);
258*f6dc9357SAndroid Build Coastguard Worker
259*f6dc9357SAndroid Build Coastguard Worker for (;;)
260*f6dc9357SAndroid Build Coastguard Worker {
261*f6dc9357SAndroid Build Coastguard Worker const Byte c = (Byte)(*src++);
262*f6dc9357SAndroid Build Coastguard Worker if (c == 0)
263*f6dc9357SAndroid Build Coastguard Worker return true;
264*f6dc9357SAndroid Build Coastguard Worker
265*f6dc9357SAndroid Build Coastguard Worker if (c < 0x80)
266*f6dc9357SAndroid Build Coastguard Worker continue;
267*f6dc9357SAndroid Build Coastguard Worker if (c < 0xC0 + 2 || c >= 0xf5)
268*f6dc9357SAndroid Build Coastguard Worker return false;
269*f6dc9357SAndroid Build Coastguard Worker
270*f6dc9357SAndroid Build Coastguard Worker unsigned numBytes;
271*f6dc9357SAndroid Build Coastguard Worker MY_UTF8_HEAD_PARSE
272*f6dc9357SAndroid Build Coastguard Worker else
273*f6dc9357SAndroid Build Coastguard Worker return false;
274*f6dc9357SAndroid Build Coastguard Worker
275*f6dc9357SAndroid Build Coastguard Worker unsigned pos = 0;
276*f6dc9357SAndroid Build Coastguard Worker
277*f6dc9357SAndroid Build Coastguard Worker do
278*f6dc9357SAndroid Build Coastguard Worker {
279*f6dc9357SAndroid Build Coastguard Worker Byte c2 = (Byte)(*src++);
280*f6dc9357SAndroid Build Coastguard Worker if (c2 < 0x80 || c2 >= 0xC0)
281*f6dc9357SAndroid Build Coastguard Worker return allowReduced && c2 == 0;
282*f6dc9357SAndroid Build Coastguard Worker val <<= 6;
283*f6dc9357SAndroid Build Coastguard Worker val |= (c2 - 0x80);
284*f6dc9357SAndroid Build Coastguard Worker pos++;
285*f6dc9357SAndroid Build Coastguard Worker }
286*f6dc9357SAndroid Build Coastguard Worker while (--numBytes);
287*f6dc9357SAndroid Build Coastguard Worker
288*f6dc9357SAndroid Build Coastguard Worker if (val < MY_UTF8_RANGE(pos - 1))
289*f6dc9357SAndroid Build Coastguard Worker return false;
290*f6dc9357SAndroid Build Coastguard Worker
291*f6dc9357SAndroid Build Coastguard Worker if (val >= 0x110000)
292*f6dc9357SAndroid Build Coastguard Worker return false;
293*f6dc9357SAndroid Build Coastguard Worker }
294*f6dc9357SAndroid Build Coastguard Worker }
295*f6dc9357SAndroid Build Coastguard Worker */
296*f6dc9357SAndroid Build Coastguard Worker
297*f6dc9357SAndroid Build Coastguard Worker // in case of UTF-8 error we have two ways:
298*f6dc9357SAndroid Build Coastguard Worker // 21.01- : old : 0xfffd: REPLACEMENT CHARACTER : old version
299*f6dc9357SAndroid Build Coastguard Worker // 21.02+ : new : 0xef00 + (c) : similar to WSL scheme for low symbols
300*f6dc9357SAndroid Build Coastguard Worker
301*f6dc9357SAndroid Build Coastguard Worker #define UTF_REPLACEMENT_CHAR 0xfffd
302*f6dc9357SAndroid Build Coastguard Worker
303*f6dc9357SAndroid Build Coastguard Worker
304*f6dc9357SAndroid Build Coastguard Worker
305*f6dc9357SAndroid Build Coastguard Worker #define UTF_ESCAPE(c) \
306*f6dc9357SAndroid Build Coastguard Worker ((flags & Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE) ? \
307*f6dc9357SAndroid Build Coastguard Worker UTF_ESCAPE_PLANE + UTF_ESCAPE_BASE + (c) : UTF_REPLACEMENT_CHAR)
308*f6dc9357SAndroid Build Coastguard Worker
309*f6dc9357SAndroid Build Coastguard Worker /*
310*f6dc9357SAndroid Build Coastguard Worker #define UTF_HARD_ERROR_UTF8
311*f6dc9357SAndroid Build Coastguard Worker { if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \
312*f6dc9357SAndroid Build Coastguard Worker destPos++; ok = false; continue; }
313*f6dc9357SAndroid Build Coastguard Worker */
314*f6dc9357SAndroid Build Coastguard Worker
315*f6dc9357SAndroid Build Coastguard Worker // we ignore utf errors, and don't change (ok) variable!
316*f6dc9357SAndroid Build Coastguard Worker
317*f6dc9357SAndroid Build Coastguard Worker #define UTF_ERROR_UTF8 \
318*f6dc9357SAndroid Build Coastguard Worker { if (dest) dest[destPos] = (wchar_t)UTF_ESCAPE(c); \
319*f6dc9357SAndroid Build Coastguard Worker destPos++; continue; }
320*f6dc9357SAndroid Build Coastguard Worker
321*f6dc9357SAndroid Build Coastguard Worker // we store UTF-16 in wchar_t strings. So we use surrogates for big unicode points:
322*f6dc9357SAndroid Build Coastguard Worker
323*f6dc9357SAndroid Build Coastguard Worker // for debug puposes only we can store UTF-32 in wchar_t:
324*f6dc9357SAndroid Build Coastguard Worker // #define START_POINT_FOR_SURROGATE ((UInt32)0 - 1)
325*f6dc9357SAndroid Build Coastguard Worker
326*f6dc9357SAndroid Build Coastguard Worker
327*f6dc9357SAndroid Build Coastguard Worker /*
328*f6dc9357SAndroid Build Coastguard Worker WIN32 MultiByteToWideChar(CP_UTF8) emits 0xfffd point, if utf-8 error was found.
329*f6dc9357SAndroid Build Coastguard Worker Ant it can emit single 0xfffd from 2 src bytes.
330*f6dc9357SAndroid Build Coastguard Worker It doesn't emit single 0xfffd from 3-4 src bytes.
331*f6dc9357SAndroid Build Coastguard Worker We can
332*f6dc9357SAndroid Build Coastguard Worker 1) emit Escape point for each incorrect byte. So we can data recover later
333*f6dc9357SAndroid Build Coastguard Worker 2) emit 0xfffd for each incorrect byte.
334*f6dc9357SAndroid Build Coastguard Worker That scheme is similar to Escape scheme, but we emit 0xfffd
335*f6dc9357SAndroid Build Coastguard Worker instead of each Escape point.
336*f6dc9357SAndroid Build Coastguard Worker 3) emit single 0xfffd from 1-2 incorrect bytes, as WIN32 MultiByteToWideChar scheme
337*f6dc9357SAndroid Build Coastguard Worker */
338*f6dc9357SAndroid Build Coastguard Worker
Utf8_To_Utf16(wchar_t * dest,size_t * destLen,const char * src,const char * srcLim,unsigned flags)339*f6dc9357SAndroid Build Coastguard Worker static bool Utf8_To_Utf16(wchar_t *dest, size_t *destLen, const char *src, const char *srcLim, unsigned flags) throw()
340*f6dc9357SAndroid Build Coastguard Worker {
341*f6dc9357SAndroid Build Coastguard Worker size_t destPos = 0;
342*f6dc9357SAndroid Build Coastguard Worker bool ok = true;
343*f6dc9357SAndroid Build Coastguard Worker
344*f6dc9357SAndroid Build Coastguard Worker for (;;)
345*f6dc9357SAndroid Build Coastguard Worker {
346*f6dc9357SAndroid Build Coastguard Worker if (src == srcLim)
347*f6dc9357SAndroid Build Coastguard Worker {
348*f6dc9357SAndroid Build Coastguard Worker *destLen = destPos;
349*f6dc9357SAndroid Build Coastguard Worker return ok;
350*f6dc9357SAndroid Build Coastguard Worker }
351*f6dc9357SAndroid Build Coastguard Worker
352*f6dc9357SAndroid Build Coastguard Worker const Byte c = (Byte)(*src++);
353*f6dc9357SAndroid Build Coastguard Worker
354*f6dc9357SAndroid Build Coastguard Worker if (c < 0x80)
355*f6dc9357SAndroid Build Coastguard Worker {
356*f6dc9357SAndroid Build Coastguard Worker if (dest)
357*f6dc9357SAndroid Build Coastguard Worker dest[destPos] = (wchar_t)c;
358*f6dc9357SAndroid Build Coastguard Worker destPos++;
359*f6dc9357SAndroid Build Coastguard Worker continue;
360*f6dc9357SAndroid Build Coastguard Worker }
361*f6dc9357SAndroid Build Coastguard Worker
362*f6dc9357SAndroid Build Coastguard Worker if (c < 0xc0 + 2
363*f6dc9357SAndroid Build Coastguard Worker || c >= 0xf5) // it's limit for 0x140000 unicode codes : win32 compatibility
364*f6dc9357SAndroid Build Coastguard Worker {
365*f6dc9357SAndroid Build Coastguard Worker UTF_ERROR_UTF8
366*f6dc9357SAndroid Build Coastguard Worker }
367*f6dc9357SAndroid Build Coastguard Worker
368*f6dc9357SAndroid Build Coastguard Worker unsigned numBytes;
369*f6dc9357SAndroid Build Coastguard Worker
370*f6dc9357SAndroid Build Coastguard Worker MY_UTF8_HEAD_PARSE_MAX_3_BYTES
371*f6dc9357SAndroid Build Coastguard Worker
372*f6dc9357SAndroid Build Coastguard Worker unsigned pos = 0;
373*f6dc9357SAndroid Build Coastguard Worker do
374*f6dc9357SAndroid Build Coastguard Worker {
375*f6dc9357SAndroid Build Coastguard Worker if (src + pos == srcLim)
376*f6dc9357SAndroid Build Coastguard Worker break;
377*f6dc9357SAndroid Build Coastguard Worker unsigned c2 = (Byte)src[pos];
378*f6dc9357SAndroid Build Coastguard Worker c2 -= 0x80;
379*f6dc9357SAndroid Build Coastguard Worker if (c2 >= 0x40)
380*f6dc9357SAndroid Build Coastguard Worker break;
381*f6dc9357SAndroid Build Coastguard Worker val <<= 6;
382*f6dc9357SAndroid Build Coastguard Worker val |= c2;
383*f6dc9357SAndroid Build Coastguard Worker pos++;
384*f6dc9357SAndroid Build Coastguard Worker if (pos == 1)
385*f6dc9357SAndroid Build Coastguard Worker {
386*f6dc9357SAndroid Build Coastguard Worker if (val < (((unsigned)1 << 7) >> numBytes))
387*f6dc9357SAndroid Build Coastguard Worker break;
388*f6dc9357SAndroid Build Coastguard Worker if (numBytes == 2)
389*f6dc9357SAndroid Build Coastguard Worker {
390*f6dc9357SAndroid Build Coastguard Worker if (flags & Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR)
391*f6dc9357SAndroid Build Coastguard Worker if ((val & (0xF800 >> 6)) == (0xd800 >> 6))
392*f6dc9357SAndroid Build Coastguard Worker break;
393*f6dc9357SAndroid Build Coastguard Worker }
394*f6dc9357SAndroid Build Coastguard Worker else if (numBytes == 3 && val >= (0x110000 >> 12))
395*f6dc9357SAndroid Build Coastguard Worker break;
396*f6dc9357SAndroid Build Coastguard Worker }
397*f6dc9357SAndroid Build Coastguard Worker }
398*f6dc9357SAndroid Build Coastguard Worker while (--numBytes);
399*f6dc9357SAndroid Build Coastguard Worker
400*f6dc9357SAndroid Build Coastguard Worker if (numBytes != 0)
401*f6dc9357SAndroid Build Coastguard Worker {
402*f6dc9357SAndroid Build Coastguard Worker if ((flags & Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE) == 0)
403*f6dc9357SAndroid Build Coastguard Worker {
404*f6dc9357SAndroid Build Coastguard Worker // the following code to emit the 0xfffd chars as win32 Utf8 function.
405*f6dc9357SAndroid Build Coastguard Worker // disable the folling line, if you need 0xfffd for each incorrect byte as in Escape mode
406*f6dc9357SAndroid Build Coastguard Worker src += pos;
407*f6dc9357SAndroid Build Coastguard Worker }
408*f6dc9357SAndroid Build Coastguard Worker UTF_ERROR_UTF8
409*f6dc9357SAndroid Build Coastguard Worker }
410*f6dc9357SAndroid Build Coastguard Worker
411*f6dc9357SAndroid Build Coastguard Worker /*
412*f6dc9357SAndroid Build Coastguard Worker if (val < MY_UTF8_RANGE(pos - 1))
413*f6dc9357SAndroid Build Coastguard Worker UTF_ERROR_UTF8
414*f6dc9357SAndroid Build Coastguard Worker */
415*f6dc9357SAndroid Build Coastguard Worker
416*f6dc9357SAndroid Build Coastguard Worker #ifdef UTF_ESCAPE_BASE
417*f6dc9357SAndroid Build Coastguard Worker
418*f6dc9357SAndroid Build Coastguard Worker if ((flags & Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT)
419*f6dc9357SAndroid Build Coastguard Worker && IS_ESCAPE_POINT(val, 0))
420*f6dc9357SAndroid Build Coastguard Worker {
421*f6dc9357SAndroid Build Coastguard Worker // We will emit 3 utf16-Escape-16-21 points from one Escape-16 point (3 bytes)
422*f6dc9357SAndroid Build Coastguard Worker UTF_ERROR_UTF8
423*f6dc9357SAndroid Build Coastguard Worker }
424*f6dc9357SAndroid Build Coastguard Worker
425*f6dc9357SAndroid Build Coastguard Worker #endif
426*f6dc9357SAndroid Build Coastguard Worker
427*f6dc9357SAndroid Build Coastguard Worker /*
428*f6dc9357SAndroid Build Coastguard Worker We don't expect virtual Escape-21 points in UTF-8 stream.
429*f6dc9357SAndroid Build Coastguard Worker And we don't check for Escape-21.
430*f6dc9357SAndroid Build Coastguard Worker So utf8-Escape-21 will be converted to another 3 utf16-Escape-21 points.
431*f6dc9357SAndroid Build Coastguard Worker Maybe we could convert virtual utf8-Escape-21 to one utf16-Escape-21 point in some cases?
432*f6dc9357SAndroid Build Coastguard Worker */
433*f6dc9357SAndroid Build Coastguard Worker
434*f6dc9357SAndroid Build Coastguard Worker if (val < START_POINT_FOR_SURROGATE)
435*f6dc9357SAndroid Build Coastguard Worker {
436*f6dc9357SAndroid Build Coastguard Worker /*
437*f6dc9357SAndroid Build Coastguard Worker if ((flags & Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR)
438*f6dc9357SAndroid Build Coastguard Worker && IS_SURROGATE_POINT(val))
439*f6dc9357SAndroid Build Coastguard Worker {
440*f6dc9357SAndroid Build Coastguard Worker // We will emit 3 utf16-Escape-16-21 points from one Surrogate-16 point (3 bytes)
441*f6dc9357SAndroid Build Coastguard Worker UTF_ERROR_UTF8
442*f6dc9357SAndroid Build Coastguard Worker }
443*f6dc9357SAndroid Build Coastguard Worker */
444*f6dc9357SAndroid Build Coastguard Worker if (dest)
445*f6dc9357SAndroid Build Coastguard Worker dest[destPos] = (wchar_t)val;
446*f6dc9357SAndroid Build Coastguard Worker destPos++;
447*f6dc9357SAndroid Build Coastguard Worker }
448*f6dc9357SAndroid Build Coastguard Worker else
449*f6dc9357SAndroid Build Coastguard Worker {
450*f6dc9357SAndroid Build Coastguard Worker /*
451*f6dc9357SAndroid Build Coastguard Worker if (val >= 0x110000)
452*f6dc9357SAndroid Build Coastguard Worker {
453*f6dc9357SAndroid Build Coastguard Worker // We will emit utf16-Escape-16-21 point from each source byte
454*f6dc9357SAndroid Build Coastguard Worker UTF_ERROR_UTF8
455*f6dc9357SAndroid Build Coastguard Worker }
456*f6dc9357SAndroid Build Coastguard Worker */
457*f6dc9357SAndroid Build Coastguard Worker if (dest)
458*f6dc9357SAndroid Build Coastguard Worker {
459*f6dc9357SAndroid Build Coastguard Worker dest[destPos + 0] = (wchar_t)(0xd800 - (0x10000 >> 10) + (val >> 10));
460*f6dc9357SAndroid Build Coastguard Worker dest[destPos + 1] = (wchar_t)(0xdc00 + (val & 0x3ff));
461*f6dc9357SAndroid Build Coastguard Worker }
462*f6dc9357SAndroid Build Coastguard Worker destPos += 2;
463*f6dc9357SAndroid Build Coastguard Worker }
464*f6dc9357SAndroid Build Coastguard Worker src += pos;
465*f6dc9357SAndroid Build Coastguard Worker }
466*f6dc9357SAndroid Build Coastguard Worker }
467*f6dc9357SAndroid Build Coastguard Worker
468*f6dc9357SAndroid Build Coastguard Worker
469*f6dc9357SAndroid Build Coastguard Worker
470*f6dc9357SAndroid Build Coastguard Worker #define MY_UTF8_HEAD(n, val) ((char)(MY_UTF8_START(n) + (val >> (6 * (n)))))
471*f6dc9357SAndroid Build Coastguard Worker #define MY_UTF8_CHAR(n, val) ((char)(0x80 + (((val) >> (6 * (n))) & 0x3F)))
472*f6dc9357SAndroid Build Coastguard Worker
Utf16_To_Utf8_Calc(const wchar_t * src,const wchar_t * srcLim,unsigned flags)473*f6dc9357SAndroid Build Coastguard Worker static size_t Utf16_To_Utf8_Calc(const wchar_t *src, const wchar_t *srcLim, unsigned flags)
474*f6dc9357SAndroid Build Coastguard Worker {
475*f6dc9357SAndroid Build Coastguard Worker size_t size = (size_t)(srcLim - src);
476*f6dc9357SAndroid Build Coastguard Worker for (;;)
477*f6dc9357SAndroid Build Coastguard Worker {
478*f6dc9357SAndroid Build Coastguard Worker if (src == srcLim)
479*f6dc9357SAndroid Build Coastguard Worker return size;
480*f6dc9357SAndroid Build Coastguard Worker
481*f6dc9357SAndroid Build Coastguard Worker UInt32 val = (UInt32)(*src++);
482*f6dc9357SAndroid Build Coastguard Worker
483*f6dc9357SAndroid Build Coastguard Worker if (val < 0x80)
484*f6dc9357SAndroid Build Coastguard Worker continue;
485*f6dc9357SAndroid Build Coastguard Worker
486*f6dc9357SAndroid Build Coastguard Worker if (val < MY_UTF8_RANGE(1))
487*f6dc9357SAndroid Build Coastguard Worker {
488*f6dc9357SAndroid Build Coastguard Worker size++;
489*f6dc9357SAndroid Build Coastguard Worker continue;
490*f6dc9357SAndroid Build Coastguard Worker }
491*f6dc9357SAndroid Build Coastguard Worker
492*f6dc9357SAndroid Build Coastguard Worker #ifdef UTF_ESCAPE_BASE
493*f6dc9357SAndroid Build Coastguard Worker
494*f6dc9357SAndroid Build Coastguard Worker #if UTF_ESCAPE_PLANE != 0
495*f6dc9357SAndroid Build Coastguard Worker if (flags & Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE)
496*f6dc9357SAndroid Build Coastguard Worker if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE))
497*f6dc9357SAndroid Build Coastguard Worker continue;
498*f6dc9357SAndroid Build Coastguard Worker #endif
499*f6dc9357SAndroid Build Coastguard Worker
500*f6dc9357SAndroid Build Coastguard Worker if (flags & Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE)
501*f6dc9357SAndroid Build Coastguard Worker if (IS_ESCAPE_POINT(val, 0))
502*f6dc9357SAndroid Build Coastguard Worker continue;
503*f6dc9357SAndroid Build Coastguard Worker
504*f6dc9357SAndroid Build Coastguard Worker #endif
505*f6dc9357SAndroid Build Coastguard Worker
506*f6dc9357SAndroid Build Coastguard Worker if (IS_SURROGATE_POINT(val))
507*f6dc9357SAndroid Build Coastguard Worker {
508*f6dc9357SAndroid Build Coastguard Worker // it's hack to UTF-8 encoding
509*f6dc9357SAndroid Build Coastguard Worker
510*f6dc9357SAndroid Build Coastguard Worker if (val < 0xdc00 && src != srcLim)
511*f6dc9357SAndroid Build Coastguard Worker {
512*f6dc9357SAndroid Build Coastguard Worker const UInt32 c2 = (UInt32)*src;
513*f6dc9357SAndroid Build Coastguard Worker if (c2 >= 0xdc00 && c2 < 0xe000)
514*f6dc9357SAndroid Build Coastguard Worker src++;
515*f6dc9357SAndroid Build Coastguard Worker }
516*f6dc9357SAndroid Build Coastguard Worker size += 2;
517*f6dc9357SAndroid Build Coastguard Worker continue;
518*f6dc9357SAndroid Build Coastguard Worker }
519*f6dc9357SAndroid Build Coastguard Worker
520*f6dc9357SAndroid Build Coastguard Worker #ifdef Z7_WCHART_IS_16BIT
521*f6dc9357SAndroid Build Coastguard Worker
522*f6dc9357SAndroid Build Coastguard Worker size += 2;
523*f6dc9357SAndroid Build Coastguard Worker
524*f6dc9357SAndroid Build Coastguard Worker #else
525*f6dc9357SAndroid Build Coastguard Worker
526*f6dc9357SAndroid Build Coastguard Worker if (val < MY_UTF8_RANGE(2)) size += 2;
527*f6dc9357SAndroid Build Coastguard Worker else if (val < MY_UTF8_RANGE(3)) size += 3;
528*f6dc9357SAndroid Build Coastguard Worker else if (val < MY_UTF8_RANGE(4)) size += 4;
529*f6dc9357SAndroid Build Coastguard Worker else if (val < MY_UTF8_RANGE(5)) size += 5;
530*f6dc9357SAndroid Build Coastguard Worker else
531*f6dc9357SAndroid Build Coastguard Worker #if MY_UTF8_NUM_TAIL_BYTES_MAX >= 6
532*f6dc9357SAndroid Build Coastguard Worker size += 6;
533*f6dc9357SAndroid Build Coastguard Worker #else
534*f6dc9357SAndroid Build Coastguard Worker size += 3;
535*f6dc9357SAndroid Build Coastguard Worker #endif
536*f6dc9357SAndroid Build Coastguard Worker
537*f6dc9357SAndroid Build Coastguard Worker #endif
538*f6dc9357SAndroid Build Coastguard Worker }
539*f6dc9357SAndroid Build Coastguard Worker }
540*f6dc9357SAndroid Build Coastguard Worker
541*f6dc9357SAndroid Build Coastguard Worker
Utf16_To_Utf8(char * dest,const wchar_t * src,const wchar_t * srcLim,unsigned flags)542*f6dc9357SAndroid Build Coastguard Worker static char *Utf16_To_Utf8(char *dest, const wchar_t *src, const wchar_t *srcLim, unsigned flags)
543*f6dc9357SAndroid Build Coastguard Worker {
544*f6dc9357SAndroid Build Coastguard Worker for (;;)
545*f6dc9357SAndroid Build Coastguard Worker {
546*f6dc9357SAndroid Build Coastguard Worker if (src == srcLim)
547*f6dc9357SAndroid Build Coastguard Worker return dest;
548*f6dc9357SAndroid Build Coastguard Worker
549*f6dc9357SAndroid Build Coastguard Worker UInt32 val = (UInt32)*src++;
550*f6dc9357SAndroid Build Coastguard Worker
551*f6dc9357SAndroid Build Coastguard Worker if (val < 0x80)
552*f6dc9357SAndroid Build Coastguard Worker {
553*f6dc9357SAndroid Build Coastguard Worker *dest++ = (char)val;
554*f6dc9357SAndroid Build Coastguard Worker continue;
555*f6dc9357SAndroid Build Coastguard Worker }
556*f6dc9357SAndroid Build Coastguard Worker
557*f6dc9357SAndroid Build Coastguard Worker if (val < MY_UTF8_RANGE(1))
558*f6dc9357SAndroid Build Coastguard Worker {
559*f6dc9357SAndroid Build Coastguard Worker dest[0] = MY_UTF8_HEAD(1, val);
560*f6dc9357SAndroid Build Coastguard Worker dest[1] = MY_UTF8_CHAR(0, val);
561*f6dc9357SAndroid Build Coastguard Worker dest += 2;
562*f6dc9357SAndroid Build Coastguard Worker continue;
563*f6dc9357SAndroid Build Coastguard Worker }
564*f6dc9357SAndroid Build Coastguard Worker
565*f6dc9357SAndroid Build Coastguard Worker #ifdef UTF_ESCAPE_BASE
566*f6dc9357SAndroid Build Coastguard Worker
567*f6dc9357SAndroid Build Coastguard Worker #if UTF_ESCAPE_PLANE != 0
568*f6dc9357SAndroid Build Coastguard Worker /*
569*f6dc9357SAndroid Build Coastguard Worker if (wchar_t is 32-bit)
570*f6dc9357SAndroid Build Coastguard Worker && (Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE is set)
571*f6dc9357SAndroid Build Coastguard Worker && (point is virtual escape plane)
572*f6dc9357SAndroid Build Coastguard Worker we extract 8-bit byte from virtual HIGH-ESCAPE PLANE.
573*f6dc9357SAndroid Build Coastguard Worker */
574*f6dc9357SAndroid Build Coastguard Worker if (flags & Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE)
575*f6dc9357SAndroid Build Coastguard Worker if (IS_ESCAPE_POINT(val, UTF_ESCAPE_PLANE))
576*f6dc9357SAndroid Build Coastguard Worker {
577*f6dc9357SAndroid Build Coastguard Worker *dest++ = (char)(val);
578*f6dc9357SAndroid Build Coastguard Worker continue;
579*f6dc9357SAndroid Build Coastguard Worker }
580*f6dc9357SAndroid Build Coastguard Worker #endif // UTF_ESCAPE_PLANE != 0
581*f6dc9357SAndroid Build Coastguard Worker
582*f6dc9357SAndroid Build Coastguard Worker /* if (Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE is defined)
583*f6dc9357SAndroid Build Coastguard Worker we extract 8-bit byte from BMP-ESCAPE PLANE. */
584*f6dc9357SAndroid Build Coastguard Worker
585*f6dc9357SAndroid Build Coastguard Worker if (flags & Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE)
586*f6dc9357SAndroid Build Coastguard Worker if (IS_ESCAPE_POINT(val, 0))
587*f6dc9357SAndroid Build Coastguard Worker {
588*f6dc9357SAndroid Build Coastguard Worker *dest++ = (char)(val);
589*f6dc9357SAndroid Build Coastguard Worker continue;
590*f6dc9357SAndroid Build Coastguard Worker }
591*f6dc9357SAndroid Build Coastguard Worker
592*f6dc9357SAndroid Build Coastguard Worker #endif // UTF_ESCAPE_BASE
593*f6dc9357SAndroid Build Coastguard Worker
594*f6dc9357SAndroid Build Coastguard Worker if (IS_SURROGATE_POINT(val))
595*f6dc9357SAndroid Build Coastguard Worker {
596*f6dc9357SAndroid Build Coastguard Worker // it's hack to UTF-8 encoding
597*f6dc9357SAndroid Build Coastguard Worker if (val < 0xdc00 && src != srcLim)
598*f6dc9357SAndroid Build Coastguard Worker {
599*f6dc9357SAndroid Build Coastguard Worker const UInt32 c2 = (UInt32)*src;
600*f6dc9357SAndroid Build Coastguard Worker if (IS_LOW_SURROGATE_POINT(c2))
601*f6dc9357SAndroid Build Coastguard Worker {
602*f6dc9357SAndroid Build Coastguard Worker src++;
603*f6dc9357SAndroid Build Coastguard Worker val = (((val - 0xd800) << 10) | (c2 - 0xdc00)) + 0x10000;
604*f6dc9357SAndroid Build Coastguard Worker dest[0] = MY_UTF8_HEAD(3, val);
605*f6dc9357SAndroid Build Coastguard Worker dest[1] = MY_UTF8_CHAR(2, val);
606*f6dc9357SAndroid Build Coastguard Worker dest[2] = MY_UTF8_CHAR(1, val);
607*f6dc9357SAndroid Build Coastguard Worker dest[3] = MY_UTF8_CHAR(0, val);
608*f6dc9357SAndroid Build Coastguard Worker dest += 4;
609*f6dc9357SAndroid Build Coastguard Worker continue;
610*f6dc9357SAndroid Build Coastguard Worker }
611*f6dc9357SAndroid Build Coastguard Worker }
612*f6dc9357SAndroid Build Coastguard Worker if (flags & Z7_UTF_FLAG_TO_UTF8_SURROGATE_ERROR)
613*f6dc9357SAndroid Build Coastguard Worker val = UTF_REPLACEMENT_CHAR; // WIN32 function does it
614*f6dc9357SAndroid Build Coastguard Worker }
615*f6dc9357SAndroid Build Coastguard Worker
616*f6dc9357SAndroid Build Coastguard Worker #ifndef Z7_WCHART_IS_16BIT
617*f6dc9357SAndroid Build Coastguard Worker if (val < MY_UTF8_RANGE(2))
618*f6dc9357SAndroid Build Coastguard Worker #endif
619*f6dc9357SAndroid Build Coastguard Worker {
620*f6dc9357SAndroid Build Coastguard Worker dest[0] = MY_UTF8_HEAD(2, val);
621*f6dc9357SAndroid Build Coastguard Worker dest[1] = MY_UTF8_CHAR(1, val);
622*f6dc9357SAndroid Build Coastguard Worker dest[2] = MY_UTF8_CHAR(0, val);
623*f6dc9357SAndroid Build Coastguard Worker dest += 3;
624*f6dc9357SAndroid Build Coastguard Worker continue;
625*f6dc9357SAndroid Build Coastguard Worker }
626*f6dc9357SAndroid Build Coastguard Worker
627*f6dc9357SAndroid Build Coastguard Worker #ifndef Z7_WCHART_IS_16BIT
628*f6dc9357SAndroid Build Coastguard Worker
629*f6dc9357SAndroid Build Coastguard Worker // we don't expect this case. so we can throw exception
630*f6dc9357SAndroid Build Coastguard Worker // throw 20210407;
631*f6dc9357SAndroid Build Coastguard Worker
632*f6dc9357SAndroid Build Coastguard Worker char b;
633*f6dc9357SAndroid Build Coastguard Worker unsigned numBits;
634*f6dc9357SAndroid Build Coastguard Worker if (val < MY_UTF8_RANGE(3)) { numBits = 6 * 3; b = MY_UTF8_HEAD(3, val); }
635*f6dc9357SAndroid Build Coastguard Worker else if (val < MY_UTF8_RANGE(4)) { numBits = 6 * 4; b = MY_UTF8_HEAD(4, val); }
636*f6dc9357SAndroid Build Coastguard Worker else if (val < MY_UTF8_RANGE(5)) { numBits = 6 * 5; b = MY_UTF8_HEAD(5, val); }
637*f6dc9357SAndroid Build Coastguard Worker #if MY_UTF8_NUM_TAIL_BYTES_MAX >= 6
638*f6dc9357SAndroid Build Coastguard Worker else { numBits = 6 * 6; b = (char)MY_UTF8_START(6); }
639*f6dc9357SAndroid Build Coastguard Worker #else
640*f6dc9357SAndroid Build Coastguard Worker else
641*f6dc9357SAndroid Build Coastguard Worker {
642*f6dc9357SAndroid Build Coastguard Worker val = UTF_REPLACEMENT_CHAR;
643*f6dc9357SAndroid Build Coastguard Worker { numBits = 6 * 3; b = MY_UTF8_HEAD(3, val); }
644*f6dc9357SAndroid Build Coastguard Worker }
645*f6dc9357SAndroid Build Coastguard Worker #endif
646*f6dc9357SAndroid Build Coastguard Worker
647*f6dc9357SAndroid Build Coastguard Worker *dest++ = b;
648*f6dc9357SAndroid Build Coastguard Worker
649*f6dc9357SAndroid Build Coastguard Worker do
650*f6dc9357SAndroid Build Coastguard Worker {
651*f6dc9357SAndroid Build Coastguard Worker numBits -= 6;
652*f6dc9357SAndroid Build Coastguard Worker *dest++ = (char)(0x80 + ((val >> numBits) & 0x3F));
653*f6dc9357SAndroid Build Coastguard Worker }
654*f6dc9357SAndroid Build Coastguard Worker while (numBits != 0);
655*f6dc9357SAndroid Build Coastguard Worker
656*f6dc9357SAndroid Build Coastguard Worker #endif
657*f6dc9357SAndroid Build Coastguard Worker }
658*f6dc9357SAndroid Build Coastguard Worker }
659*f6dc9357SAndroid Build Coastguard Worker
Convert_UTF8_Buf_To_Unicode(const char * src,size_t srcSize,UString & dest,unsigned flags)660*f6dc9357SAndroid Build Coastguard Worker bool Convert_UTF8_Buf_To_Unicode(const char *src, size_t srcSize, UString &dest, unsigned flags)
661*f6dc9357SAndroid Build Coastguard Worker {
662*f6dc9357SAndroid Build Coastguard Worker dest.Empty();
663*f6dc9357SAndroid Build Coastguard Worker size_t destLen = 0;
664*f6dc9357SAndroid Build Coastguard Worker Utf8_To_Utf16(NULL, &destLen, src, src + srcSize, flags);
665*f6dc9357SAndroid Build Coastguard Worker bool res = Utf8_To_Utf16(dest.GetBuf((unsigned)destLen), &destLen, src, src + srcSize, flags);
666*f6dc9357SAndroid Build Coastguard Worker dest.ReleaseBuf_SetEnd((unsigned)destLen);
667*f6dc9357SAndroid Build Coastguard Worker return res;
668*f6dc9357SAndroid Build Coastguard Worker }
669*f6dc9357SAndroid Build Coastguard Worker
ConvertUTF8ToUnicode_Flags(const AString & src,UString & dest,unsigned flags)670*f6dc9357SAndroid Build Coastguard Worker bool ConvertUTF8ToUnicode_Flags(const AString &src, UString &dest, unsigned flags)
671*f6dc9357SAndroid Build Coastguard Worker {
672*f6dc9357SAndroid Build Coastguard Worker return Convert_UTF8_Buf_To_Unicode(src, src.Len(), dest, flags);
673*f6dc9357SAndroid Build Coastguard Worker }
674*f6dc9357SAndroid Build Coastguard Worker
675*f6dc9357SAndroid Build Coastguard Worker
676*f6dc9357SAndroid Build Coastguard Worker static
677*f6dc9357SAndroid Build Coastguard Worker unsigned g_UTF8_To_Unicode_Flags =
678*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE
679*f6dc9357SAndroid Build Coastguard Worker #ifndef Z7_WCHART_IS_16BIT
680*f6dc9357SAndroid Build Coastguard Worker | Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR
681*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_UTF8_RAW_NON_UTF8_SUPPORTED
682*f6dc9357SAndroid Build Coastguard Worker | Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT
683*f6dc9357SAndroid Build Coastguard Worker #endif
684*f6dc9357SAndroid Build Coastguard Worker #endif
685*f6dc9357SAndroid Build Coastguard Worker ;
686*f6dc9357SAndroid Build Coastguard Worker
687*f6dc9357SAndroid Build Coastguard Worker
688*f6dc9357SAndroid Build Coastguard Worker /*
689*f6dc9357SAndroid Build Coastguard Worker bool ConvertUTF8ToUnicode_boolRes(const AString &src, UString &dest)
690*f6dc9357SAndroid Build Coastguard Worker {
691*f6dc9357SAndroid Build Coastguard Worker return ConvertUTF8ToUnicode_Flags(src, dest, g_UTF8_To_Unicode_Flags);
692*f6dc9357SAndroid Build Coastguard Worker }
693*f6dc9357SAndroid Build Coastguard Worker */
694*f6dc9357SAndroid Build Coastguard Worker
ConvertUTF8ToUnicode(const AString & src,UString & dest)695*f6dc9357SAndroid Build Coastguard Worker bool ConvertUTF8ToUnicode(const AString &src, UString &dest)
696*f6dc9357SAndroid Build Coastguard Worker {
697*f6dc9357SAndroid Build Coastguard Worker return ConvertUTF8ToUnicode_Flags(src, dest, g_UTF8_To_Unicode_Flags);
698*f6dc9357SAndroid Build Coastguard Worker }
699*f6dc9357SAndroid Build Coastguard Worker
700*f6dc9357SAndroid Build Coastguard Worker void Print_UString(const UString &a);
701*f6dc9357SAndroid Build Coastguard Worker
ConvertUnicodeToUTF8_Flags(const UString & src,AString & dest,unsigned flags)702*f6dc9357SAndroid Build Coastguard Worker void ConvertUnicodeToUTF8_Flags(const UString &src, AString &dest, unsigned flags)
703*f6dc9357SAndroid Build Coastguard Worker {
704*f6dc9357SAndroid Build Coastguard Worker /*
705*f6dc9357SAndroid Build Coastguard Worker if (src.Len()== 24)
706*f6dc9357SAndroid Build Coastguard Worker throw "202104";
707*f6dc9357SAndroid Build Coastguard Worker */
708*f6dc9357SAndroid Build Coastguard Worker dest.Empty();
709*f6dc9357SAndroid Build Coastguard Worker const size_t destLen = Utf16_To_Utf8_Calc(src, src.Ptr(src.Len()), flags);
710*f6dc9357SAndroid Build Coastguard Worker char *destStart = dest.GetBuf((unsigned)destLen);
711*f6dc9357SAndroid Build Coastguard Worker const char *destEnd = Utf16_To_Utf8(destStart, src, src.Ptr(src.Len()), flags);
712*f6dc9357SAndroid Build Coastguard Worker dest.ReleaseBuf_SetEnd((unsigned)destLen);
713*f6dc9357SAndroid Build Coastguard Worker // printf("\nlen = %d\n", src.Len());
714*f6dc9357SAndroid Build Coastguard Worker if (destLen != (size_t)(destEnd - destStart))
715*f6dc9357SAndroid Build Coastguard Worker {
716*f6dc9357SAndroid Build Coastguard Worker /*
717*f6dc9357SAndroid Build Coastguard Worker // dest.ReleaseBuf_SetEnd((unsigned)(destEnd - destStart));
718*f6dc9357SAndroid Build Coastguard Worker printf("\nlen = %d\n", (unsigned)destLen);
719*f6dc9357SAndroid Build Coastguard Worker printf("\n(destEnd - destStart) = %d\n", (unsigned)(destEnd - destStart));
720*f6dc9357SAndroid Build Coastguard Worker printf("\n");
721*f6dc9357SAndroid Build Coastguard Worker // Print_UString(src);
722*f6dc9357SAndroid Build Coastguard Worker printf("\n");
723*f6dc9357SAndroid Build Coastguard Worker // printf("\nlen = %d\n", destLen);
724*f6dc9357SAndroid Build Coastguard Worker */
725*f6dc9357SAndroid Build Coastguard Worker throw 20210406;
726*f6dc9357SAndroid Build Coastguard Worker }
727*f6dc9357SAndroid Build Coastguard Worker }
728*f6dc9357SAndroid Build Coastguard Worker
729*f6dc9357SAndroid Build Coastguard Worker
730*f6dc9357SAndroid Build Coastguard Worker
731*f6dc9357SAndroid Build Coastguard Worker unsigned g_Unicode_To_UTF8_Flags =
732*f6dc9357SAndroid Build Coastguard Worker // Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE
733*f6dc9357SAndroid Build Coastguard Worker 0
734*f6dc9357SAndroid Build Coastguard Worker #ifndef _WIN32
735*f6dc9357SAndroid Build Coastguard Worker #ifdef MY_UTF8_RAW_NON_UTF8_SUPPORTED
736*f6dc9357SAndroid Build Coastguard Worker | Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE
737*f6dc9357SAndroid Build Coastguard Worker #else
738*f6dc9357SAndroid Build Coastguard Worker | Z7_UTF_FLAG_TO_UTF8_SURROGATE_ERROR
739*f6dc9357SAndroid Build Coastguard Worker #endif
740*f6dc9357SAndroid Build Coastguard Worker #endif
741*f6dc9357SAndroid Build Coastguard Worker ;
742*f6dc9357SAndroid Build Coastguard Worker
ConvertUnicodeToUTF8(const UString & src,AString & dest)743*f6dc9357SAndroid Build Coastguard Worker void ConvertUnicodeToUTF8(const UString &src, AString &dest)
744*f6dc9357SAndroid Build Coastguard Worker {
745*f6dc9357SAndroid Build Coastguard Worker ConvertUnicodeToUTF8_Flags(src, dest, g_Unicode_To_UTF8_Flags);
746*f6dc9357SAndroid Build Coastguard Worker }
747*f6dc9357SAndroid Build Coastguard Worker
Convert_Unicode_To_UTF8_Buf(const UString & src,CByteBuffer & dest)748*f6dc9357SAndroid Build Coastguard Worker void Convert_Unicode_To_UTF8_Buf(const UString &src, CByteBuffer &dest)
749*f6dc9357SAndroid Build Coastguard Worker {
750*f6dc9357SAndroid Build Coastguard Worker const unsigned flags = g_Unicode_To_UTF8_Flags;
751*f6dc9357SAndroid Build Coastguard Worker dest.Free();
752*f6dc9357SAndroid Build Coastguard Worker const size_t destLen = Utf16_To_Utf8_Calc(src, src.Ptr(src.Len()), flags);
753*f6dc9357SAndroid Build Coastguard Worker dest.Alloc(destLen);
754*f6dc9357SAndroid Build Coastguard Worker const char *destEnd = Utf16_To_Utf8((char *)(void *)(Byte *)dest, src, src.Ptr(src.Len()), flags);
755*f6dc9357SAndroid Build Coastguard Worker if (destLen != (size_t)(destEnd - (char *)(void *)(Byte *)dest))
756*f6dc9357SAndroid Build Coastguard Worker throw 202104;
757*f6dc9357SAndroid Build Coastguard Worker }
758*f6dc9357SAndroid Build Coastguard Worker
759*f6dc9357SAndroid Build Coastguard Worker /*
760*f6dc9357SAndroid Build Coastguard Worker
761*f6dc9357SAndroid Build Coastguard Worker #ifndef _WIN32
762*f6dc9357SAndroid Build Coastguard Worker void Convert_UTF16_To_UTF32(const UString &src, UString &dest)
763*f6dc9357SAndroid Build Coastguard Worker {
764*f6dc9357SAndroid Build Coastguard Worker dest.Empty();
765*f6dc9357SAndroid Build Coastguard Worker for (size_t i = 0; i < src.Len();)
766*f6dc9357SAndroid Build Coastguard Worker {
767*f6dc9357SAndroid Build Coastguard Worker wchar_t c = src[i++];
768*f6dc9357SAndroid Build Coastguard Worker if (c >= 0xd800 && c < 0xdc00 && i < src.Len())
769*f6dc9357SAndroid Build Coastguard Worker {
770*f6dc9357SAndroid Build Coastguard Worker const wchar_t c2 = src[i];
771*f6dc9357SAndroid Build Coastguard Worker if (c2 >= 0xdc00 && c2 < 0xe000)
772*f6dc9357SAndroid Build Coastguard Worker {
773*f6dc9357SAndroid Build Coastguard Worker // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
774*f6dc9357SAndroid Build Coastguard Worker c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff);
775*f6dc9357SAndroid Build Coastguard Worker // printf("%4x\n", (int)c);
776*f6dc9357SAndroid Build Coastguard Worker i++;
777*f6dc9357SAndroid Build Coastguard Worker }
778*f6dc9357SAndroid Build Coastguard Worker }
779*f6dc9357SAndroid Build Coastguard Worker dest += c;
780*f6dc9357SAndroid Build Coastguard Worker }
781*f6dc9357SAndroid Build Coastguard Worker }
782*f6dc9357SAndroid Build Coastguard Worker
783*f6dc9357SAndroid Build Coastguard Worker void Convert_UTF32_To_UTF16(const UString &src, UString &dest)
784*f6dc9357SAndroid Build Coastguard Worker {
785*f6dc9357SAndroid Build Coastguard Worker dest.Empty();
786*f6dc9357SAndroid Build Coastguard Worker for (size_t i = 0; i < src.Len();)
787*f6dc9357SAndroid Build Coastguard Worker {
788*f6dc9357SAndroid Build Coastguard Worker wchar_t w = src[i++];
789*f6dc9357SAndroid Build Coastguard Worker if (w >= 0x10000 && w < 0x110000)
790*f6dc9357SAndroid Build Coastguard Worker {
791*f6dc9357SAndroid Build Coastguard Worker w -= 0x10000;
792*f6dc9357SAndroid Build Coastguard Worker dest += (wchar_t)((unsigned)0xd800 + (((unsigned)w >> 10) & 0x3ff));
793*f6dc9357SAndroid Build Coastguard Worker w = 0xdc00 + (w & 0x3ff);
794*f6dc9357SAndroid Build Coastguard Worker }
795*f6dc9357SAndroid Build Coastguard Worker dest += w;
796*f6dc9357SAndroid Build Coastguard Worker }
797*f6dc9357SAndroid Build Coastguard Worker }
798*f6dc9357SAndroid Build Coastguard Worker
799*f6dc9357SAndroid Build Coastguard Worker bool UTF32_IsThere_BigPoint(const UString &src)
800*f6dc9357SAndroid Build Coastguard Worker {
801*f6dc9357SAndroid Build Coastguard Worker for (size_t i = 0; i < src.Len();)
802*f6dc9357SAndroid Build Coastguard Worker {
803*f6dc9357SAndroid Build Coastguard Worker const UInt32 c = (UInt32)src[i++];
804*f6dc9357SAndroid Build Coastguard Worker if (c >= 0x110000)
805*f6dc9357SAndroid Build Coastguard Worker return true;
806*f6dc9357SAndroid Build Coastguard Worker }
807*f6dc9357SAndroid Build Coastguard Worker return false;
808*f6dc9357SAndroid Build Coastguard Worker }
809*f6dc9357SAndroid Build Coastguard Worker
810*f6dc9357SAndroid Build Coastguard Worker bool Unicode_IsThere_BmpEscape(const UString &src)
811*f6dc9357SAndroid Build Coastguard Worker {
812*f6dc9357SAndroid Build Coastguard Worker for (size_t i = 0; i < src.Len();)
813*f6dc9357SAndroid Build Coastguard Worker {
814*f6dc9357SAndroid Build Coastguard Worker const UInt32 c = (UInt32)src[i++];
815*f6dc9357SAndroid Build Coastguard Worker if (IS_ESCAPE_POINT(c, 0))
816*f6dc9357SAndroid Build Coastguard Worker return true;
817*f6dc9357SAndroid Build Coastguard Worker }
818*f6dc9357SAndroid Build Coastguard Worker return false;
819*f6dc9357SAndroid Build Coastguard Worker }
820*f6dc9357SAndroid Build Coastguard Worker
821*f6dc9357SAndroid Build Coastguard Worker
822*f6dc9357SAndroid Build Coastguard Worker #endif
823*f6dc9357SAndroid Build Coastguard Worker
824*f6dc9357SAndroid Build Coastguard Worker bool Unicode_IsThere_Utf16SurrogateError(const UString &src)
825*f6dc9357SAndroid Build Coastguard Worker {
826*f6dc9357SAndroid Build Coastguard Worker for (size_t i = 0; i < src.Len();)
827*f6dc9357SAndroid Build Coastguard Worker {
828*f6dc9357SAndroid Build Coastguard Worker const UInt32 val = (UInt32)src[i++];
829*f6dc9357SAndroid Build Coastguard Worker if (IS_SURROGATE_POINT(val))
830*f6dc9357SAndroid Build Coastguard Worker {
831*f6dc9357SAndroid Build Coastguard Worker // it's hack to UTF-8 encoding
832*f6dc9357SAndroid Build Coastguard Worker if (val >= 0xdc00 || i == src.Len())
833*f6dc9357SAndroid Build Coastguard Worker return true;
834*f6dc9357SAndroid Build Coastguard Worker const UInt32 c2 = (UInt32)*src;
835*f6dc9357SAndroid Build Coastguard Worker if (!IS_LOW_SURROGATE_POINT(c2))
836*f6dc9357SAndroid Build Coastguard Worker return true;
837*f6dc9357SAndroid Build Coastguard Worker }
838*f6dc9357SAndroid Build Coastguard Worker }
839*f6dc9357SAndroid Build Coastguard Worker return false;
840*f6dc9357SAndroid Build Coastguard Worker }
841*f6dc9357SAndroid Build Coastguard Worker */
842*f6dc9357SAndroid Build Coastguard Worker
843*f6dc9357SAndroid Build Coastguard Worker #ifndef Z7_WCHART_IS_16BIT
844*f6dc9357SAndroid Build Coastguard Worker
Convert_UnicodeEsc16_To_UnicodeEscHigh(UString &)845*f6dc9357SAndroid Build Coastguard Worker void Convert_UnicodeEsc16_To_UnicodeEscHigh
846*f6dc9357SAndroid Build Coastguard Worker #if UTF_ESCAPE_PLANE == 0
847*f6dc9357SAndroid Build Coastguard Worker (UString &) {}
848*f6dc9357SAndroid Build Coastguard Worker #else
849*f6dc9357SAndroid Build Coastguard Worker (UString &s)
850*f6dc9357SAndroid Build Coastguard Worker {
851*f6dc9357SAndroid Build Coastguard Worker const unsigned len = s.Len();
852*f6dc9357SAndroid Build Coastguard Worker for (unsigned i = 0; i < len; i++)
853*f6dc9357SAndroid Build Coastguard Worker {
854*f6dc9357SAndroid Build Coastguard Worker wchar_t c = s[i];
855*f6dc9357SAndroid Build Coastguard Worker if (IS_ESCAPE_POINT(c, 0))
856*f6dc9357SAndroid Build Coastguard Worker {
857*f6dc9357SAndroid Build Coastguard Worker c += UTF_ESCAPE_PLANE;
858*f6dc9357SAndroid Build Coastguard Worker s.ReplaceOneCharAtPos(i, c);
859*f6dc9357SAndroid Build Coastguard Worker }
860*f6dc9357SAndroid Build Coastguard Worker }
861*f6dc9357SAndroid Build Coastguard Worker }
862*f6dc9357SAndroid Build Coastguard Worker #endif
863*f6dc9357SAndroid Build Coastguard Worker #endif
864