1*f6dc9357SAndroid Build Coastguard Worker // Common/UTFConvert.h 2*f6dc9357SAndroid Build Coastguard Worker 3*f6dc9357SAndroid Build Coastguard Worker #ifndef ZIP7_INC_COMMON_UTF_CONVERT_H 4*f6dc9357SAndroid Build Coastguard Worker #define ZIP7_INC_COMMON_UTF_CONVERT_H 5*f6dc9357SAndroid Build Coastguard Worker 6*f6dc9357SAndroid Build Coastguard Worker #include "MyBuffer.h" 7*f6dc9357SAndroid Build Coastguard Worker #include "MyString.h" 8*f6dc9357SAndroid Build Coastguard Worker 9*f6dc9357SAndroid Build Coastguard Worker struct CUtf8Check 10*f6dc9357SAndroid Build Coastguard Worker { 11*f6dc9357SAndroid Build Coastguard Worker // Byte MaxByte; // in original src stream 12*f6dc9357SAndroid Build Coastguard Worker bool NonUtf; 13*f6dc9357SAndroid Build Coastguard Worker bool ZeroChar; 14*f6dc9357SAndroid Build Coastguard Worker bool SingleSurrogate; 15*f6dc9357SAndroid Build Coastguard Worker bool Escape; 16*f6dc9357SAndroid Build Coastguard Worker bool Truncated; 17*f6dc9357SAndroid Build Coastguard Worker UInt32 MaxHighPoint; // only for points >= 0x80 18*f6dc9357SAndroid Build Coastguard Worker CUtf8CheckCUtf8Check19*f6dc9357SAndroid Build Coastguard Worker CUtf8Check() { Clear(); } 20*f6dc9357SAndroid Build Coastguard Worker ClearCUtf8Check21*f6dc9357SAndroid Build Coastguard Worker void Clear() 22*f6dc9357SAndroid Build Coastguard Worker { 23*f6dc9357SAndroid Build Coastguard Worker // MaxByte = 0; 24*f6dc9357SAndroid Build Coastguard Worker NonUtf = false; 25*f6dc9357SAndroid Build Coastguard Worker ZeroChar = false; 26*f6dc9357SAndroid Build Coastguard Worker SingleSurrogate = false; 27*f6dc9357SAndroid Build Coastguard Worker Escape = false; 28*f6dc9357SAndroid Build Coastguard Worker Truncated = false; 29*f6dc9357SAndroid Build Coastguard Worker MaxHighPoint = 0; 30*f6dc9357SAndroid Build Coastguard Worker } 31*f6dc9357SAndroid Build Coastguard Worker UpdateCUtf8Check32*f6dc9357SAndroid Build Coastguard Worker void Update(const CUtf8Check &c) 33*f6dc9357SAndroid Build Coastguard Worker { 34*f6dc9357SAndroid Build Coastguard Worker if (c.NonUtf) NonUtf = true; 35*f6dc9357SAndroid Build Coastguard Worker if (c.ZeroChar) ZeroChar = true; 36*f6dc9357SAndroid Build Coastguard Worker if (c.SingleSurrogate) SingleSurrogate = true; 37*f6dc9357SAndroid Build Coastguard Worker if (c.Escape) Escape = true; 38*f6dc9357SAndroid Build Coastguard Worker if (c.Truncated) Truncated = true; 39*f6dc9357SAndroid Build Coastguard Worker if (MaxHighPoint < c.MaxHighPoint) MaxHighPoint = c.MaxHighPoint; 40*f6dc9357SAndroid Build Coastguard Worker } 41*f6dc9357SAndroid Build Coastguard Worker PrintStatusCUtf8Check42*f6dc9357SAndroid Build Coastguard Worker void PrintStatus(AString &s) const 43*f6dc9357SAndroid Build Coastguard Worker { 44*f6dc9357SAndroid Build Coastguard Worker s.Empty(); 45*f6dc9357SAndroid Build Coastguard Worker 46*f6dc9357SAndroid Build Coastguard Worker // s.Add_OptSpaced("MaxByte="); 47*f6dc9357SAndroid Build Coastguard Worker // s.Add_UInt32(MaxByte); 48*f6dc9357SAndroid Build Coastguard Worker 49*f6dc9357SAndroid Build Coastguard Worker if (NonUtf) s.Add_OptSpaced("non-UTF8"); 50*f6dc9357SAndroid Build Coastguard Worker if (ZeroChar) s.Add_OptSpaced("ZeroChar"); 51*f6dc9357SAndroid Build Coastguard Worker if (SingleSurrogate) s.Add_OptSpaced("SingleSurrogate"); 52*f6dc9357SAndroid Build Coastguard Worker if (Escape) s.Add_OptSpaced("Escape"); 53*f6dc9357SAndroid Build Coastguard Worker if (Truncated) s.Add_OptSpaced("Truncated"); 54*f6dc9357SAndroid Build Coastguard Worker 55*f6dc9357SAndroid Build Coastguard Worker if (MaxHighPoint != 0) 56*f6dc9357SAndroid Build Coastguard Worker { 57*f6dc9357SAndroid Build Coastguard Worker s.Add_OptSpaced("MaxUnicode="); 58*f6dc9357SAndroid Build Coastguard Worker s.Add_UInt32(MaxHighPoint); 59*f6dc9357SAndroid Build Coastguard Worker } 60*f6dc9357SAndroid Build Coastguard Worker } 61*f6dc9357SAndroid Build Coastguard Worker 62*f6dc9357SAndroid Build Coastguard Worker 63*f6dc9357SAndroid Build Coastguard Worker bool IsOK(bool allowReduced = false) const 64*f6dc9357SAndroid Build Coastguard Worker { 65*f6dc9357SAndroid Build Coastguard Worker if (NonUtf || SingleSurrogate || ZeroChar) 66*f6dc9357SAndroid Build Coastguard Worker return false; 67*f6dc9357SAndroid Build Coastguard Worker if (MaxHighPoint >= 0x110000) 68*f6dc9357SAndroid Build Coastguard Worker return false; 69*f6dc9357SAndroid Build Coastguard Worker if (Truncated && !allowReduced) 70*f6dc9357SAndroid Build Coastguard Worker return false; 71*f6dc9357SAndroid Build Coastguard Worker return true; 72*f6dc9357SAndroid Build Coastguard Worker } 73*f6dc9357SAndroid Build Coastguard Worker 74*f6dc9357SAndroid Build Coastguard Worker // it checks full buffer as specified in (size) and it doesn't stop on zero char 75*f6dc9357SAndroid Build Coastguard Worker void Check_Buf(const char *src, size_t size) throw(); 76*f6dc9357SAndroid Build Coastguard Worker Check_AStringCUtf8Check77*f6dc9357SAndroid Build Coastguard Worker void Check_AString(const AString &s) throw() 78*f6dc9357SAndroid Build Coastguard Worker { 79*f6dc9357SAndroid Build Coastguard Worker Check_Buf(s.Ptr(), s.Len()); 80*f6dc9357SAndroid Build Coastguard Worker } 81*f6dc9357SAndroid Build Coastguard Worker }; 82*f6dc9357SAndroid Build Coastguard Worker 83*f6dc9357SAndroid Build Coastguard Worker /* 84*f6dc9357SAndroid Build Coastguard Worker if (allowReduced == false) - all UTF-8 character sequences must be finished. 85*f6dc9357SAndroid Build Coastguard Worker if (allowReduced == true) - it allows truncated last character-Utf8-sequence 86*f6dc9357SAndroid Build Coastguard Worker */ 87*f6dc9357SAndroid Build Coastguard Worker 88*f6dc9357SAndroid Build Coastguard Worker bool Check_UTF8_Buf(const char *src, size_t size, bool allowReduced) throw(); 89*f6dc9357SAndroid Build Coastguard Worker bool CheckUTF8_AString(const AString &s) throw(); 90*f6dc9357SAndroid Build Coastguard Worker 91*f6dc9357SAndroid Build Coastguard Worker #define Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR (1 << 0) 92*f6dc9357SAndroid Build Coastguard Worker #define Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE (1 << 1) 93*f6dc9357SAndroid Build Coastguard Worker #define Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT (1 << 2) 94*f6dc9357SAndroid Build Coastguard Worker 95*f6dc9357SAndroid Build Coastguard Worker /* 96*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR 97*f6dc9357SAndroid Build Coastguard Worker 98*f6dc9357SAndroid Build Coastguard Worker if (flag is NOT set) 99*f6dc9357SAndroid Build Coastguard Worker { 100*f6dc9357SAndroid Build Coastguard Worker it processes SINGLE-SURROGATE-8 as valid Unicode point. 101*f6dc9357SAndroid Build Coastguard Worker it converts SINGLE-SURROGATE-8 to SINGLE-SURROGATE-16 102*f6dc9357SAndroid Build Coastguard Worker Note: some sequencies of two SINGLE-SURROGATE-8 points 103*f6dc9357SAndroid Build Coastguard Worker will generate correct SURROGATE-16-PAIR, and 104*f6dc9357SAndroid Build Coastguard Worker that SURROGATE-16-PAIR later will be converted to correct 105*f6dc9357SAndroid Build Coastguard Worker UTF8-SURROGATE-21 point. So we don't restore original 106*f6dc9357SAndroid Build Coastguard Worker STR-8 sequence in that case. 107*f6dc9357SAndroid Build Coastguard Worker } 108*f6dc9357SAndroid Build Coastguard Worker 109*f6dc9357SAndroid Build Coastguard Worker if (flag is set) 110*f6dc9357SAndroid Build Coastguard Worker { 111*f6dc9357SAndroid Build Coastguard Worker if (Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE is defined) 112*f6dc9357SAndroid Build Coastguard Worker it generates ESCAPE for SINGLE-SURROGATE-8, 113*f6dc9357SAndroid Build Coastguard Worker if (Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE is not defined) 114*f6dc9357SAndroid Build Coastguard Worker it generates U+fffd for SINGLE-SURROGATE-8, 115*f6dc9357SAndroid Build Coastguard Worker } 116*f6dc9357SAndroid Build Coastguard Worker 117*f6dc9357SAndroid Build Coastguard Worker 118*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE 119*f6dc9357SAndroid Build Coastguard Worker 120*f6dc9357SAndroid Build Coastguard Worker if (flag is NOT set) 121*f6dc9357SAndroid Build Coastguard Worker it generates (U+fffd) code for non-UTF-8 (invalid) characters 122*f6dc9357SAndroid Build Coastguard Worker 123*f6dc9357SAndroid Build Coastguard Worker if (flag is set) 124*f6dc9357SAndroid Build Coastguard Worker { 125*f6dc9357SAndroid Build Coastguard Worker It generates (ESCAPE) codes for NON-UTF-8 (invalid) characters. 126*f6dc9357SAndroid Build Coastguard Worker And later we can restore original UTF-8-RAW characters from (ESCAPE-16-21) codes. 127*f6dc9357SAndroid Build Coastguard Worker } 128*f6dc9357SAndroid Build Coastguard Worker 129*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT 130*f6dc9357SAndroid Build Coastguard Worker 131*f6dc9357SAndroid Build Coastguard Worker if (flag is NOT set) 132*f6dc9357SAndroid Build Coastguard Worker { 133*f6dc9357SAndroid Build Coastguard Worker it process ESCAPE-8 points as another Unicode points. 134*f6dc9357SAndroid Build Coastguard Worker In Linux: ESCAPE-16 will mean two different ESCAPE-8 seqences, 135*f6dc9357SAndroid Build Coastguard Worker so we need HIGH-ESCAPE-PLANE-21 to restore UTF-8-RAW -> UTF-16 -> UTF-8-RAW 136*f6dc9357SAndroid Build Coastguard Worker } 137*f6dc9357SAndroid Build Coastguard Worker 138*f6dc9357SAndroid Build Coastguard Worker if (flag is set) 139*f6dc9357SAndroid Build Coastguard Worker { 140*f6dc9357SAndroid Build Coastguard Worker it generates ESCAPE-16-21 for ESCAPE-8 points 141*f6dc9357SAndroid Build Coastguard Worker so we can restore UTF-8-RAW -> UTF-16 -> UTF-8-RAW without HIGH-ESCAPE-PLANE-21. 142*f6dc9357SAndroid Build Coastguard Worker } 143*f6dc9357SAndroid Build Coastguard Worker 144*f6dc9357SAndroid Build Coastguard Worker 145*f6dc9357SAndroid Build Coastguard Worker Main USE CASES with UTF-8 <-> UTF-16 conversions: 146*f6dc9357SAndroid Build Coastguard Worker 147*f6dc9357SAndroid Build Coastguard Worker WIN32: UTF-16-RAW -> UTF-8 (Archive) -> UTF-16-RAW 148*f6dc9357SAndroid Build Coastguard Worker { 149*f6dc9357SAndroid Build Coastguard Worker set Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE 150*f6dc9357SAndroid Build Coastguard Worker Do NOT set Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR 151*f6dc9357SAndroid Build Coastguard Worker Do NOT set Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT 152*f6dc9357SAndroid Build Coastguard Worker 153*f6dc9357SAndroid Build Coastguard Worker So we restore original SINGLE-SURROGATE-16 from single SINGLE-SURROGATE-8. 154*f6dc9357SAndroid Build Coastguard Worker } 155*f6dc9357SAndroid Build Coastguard Worker 156*f6dc9357SAndroid Build Coastguard Worker Linux: UTF-8-RAW -> UTF-16 (Intermediate / Archive) -> UTF-8-RAW 157*f6dc9357SAndroid Build Coastguard Worker { 158*f6dc9357SAndroid Build Coastguard Worker we want restore original UTF-8-RAW sequence later from that ESCAPE-16. 159*f6dc9357SAndroid Build Coastguard Worker Set the flags: 160*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR 161*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE 162*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT 163*f6dc9357SAndroid Build Coastguard Worker } 164*f6dc9357SAndroid Build Coastguard Worker 165*f6dc9357SAndroid Build Coastguard Worker MacOS: UTF-8-RAW -> UTF-16 (Intermediate / Archive) -> UTF-8-RAW 166*f6dc9357SAndroid Build Coastguard Worker { 167*f6dc9357SAndroid Build Coastguard Worker we want to restore correct UTF-8 without any BMP processing: 168*f6dc9357SAndroid Build Coastguard Worker Set the flags: 169*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR 170*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE 171*f6dc9357SAndroid Build Coastguard Worker } 172*f6dc9357SAndroid Build Coastguard Worker 173*f6dc9357SAndroid Build Coastguard Worker */ 174*f6dc9357SAndroid Build Coastguard Worker 175*f6dc9357SAndroid Build Coastguard Worker // zero char is not allowed in (src) buf 176*f6dc9357SAndroid Build Coastguard Worker bool Convert_UTF8_Buf_To_Unicode(const char *src, size_t srcSize, UString &dest, unsigned flags = 0); 177*f6dc9357SAndroid Build Coastguard Worker 178*f6dc9357SAndroid Build Coastguard Worker bool ConvertUTF8ToUnicode_Flags(const AString &src, UString &dest, unsigned flags = 0); 179*f6dc9357SAndroid Build Coastguard Worker bool ConvertUTF8ToUnicode(const AString &src, UString &dest); 180*f6dc9357SAndroid Build Coastguard Worker 181*f6dc9357SAndroid Build Coastguard Worker #define Z7_UTF_FLAG_TO_UTF8_SURROGATE_ERROR (1 << 8) 182*f6dc9357SAndroid Build Coastguard Worker #define Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE (1 << 9) 183*f6dc9357SAndroid Build Coastguard Worker // #define Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE (1 << 10) 184*f6dc9357SAndroid Build Coastguard Worker 185*f6dc9357SAndroid Build Coastguard Worker /* 186*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_TO_UTF8_SURROGATE_ERROR 187*f6dc9357SAndroid Build Coastguard Worker 188*f6dc9357SAndroid Build Coastguard Worker if (flag is NOT set) 189*f6dc9357SAndroid Build Coastguard Worker { 190*f6dc9357SAndroid Build Coastguard Worker we extract SINGLE-SURROGATE as normal UTF-8 191*f6dc9357SAndroid Build Coastguard Worker 192*f6dc9357SAndroid Build Coastguard Worker In Windows : for UTF-16-RAW <-> UTF-8 (archive) <-> UTF-16-RAW in . 193*f6dc9357SAndroid Build Coastguard Worker 194*f6dc9357SAndroid Build Coastguard Worker In Linux : 195*f6dc9357SAndroid Build Coastguard Worker use-case-1: UTF-8 -> UTF-16 -> UTF-8 doesn't generate UTF-16 SINGLE-SURROGATE, 196*f6dc9357SAndroid Build Coastguard Worker if (Z7_UTF_FLAG_FROM_UTF8_SURROGATE_ERROR) is used. 197*f6dc9357SAndroid Build Coastguard Worker use-case 2: UTF-16-7z (with SINGLE-SURROGATE from Windows) -> UTF-8 (Linux) 198*f6dc9357SAndroid Build Coastguard Worker will generate SINGLE-SURROGATE-UTF-8 here. 199*f6dc9357SAndroid Build Coastguard Worker } 200*f6dc9357SAndroid Build Coastguard Worker 201*f6dc9357SAndroid Build Coastguard Worker if (flag is set) 202*f6dc9357SAndroid Build Coastguard Worker { 203*f6dc9357SAndroid Build Coastguard Worker we generate UTF_REPLACEMENT_CHAR (0xfffd) for SINGLE_SURROGATE 204*f6dc9357SAndroid Build Coastguard Worker it can be used for compatibility mode with WIN32 UTF function 205*f6dc9357SAndroid Build Coastguard Worker or if we want UTF-8 stream without any errors 206*f6dc9357SAndroid Build Coastguard Worker } 207*f6dc9357SAndroid Build Coastguard Worker 208*f6dc9357SAndroid Build Coastguard Worker 209*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE 210*f6dc9357SAndroid Build Coastguard Worker 211*f6dc9357SAndroid Build Coastguard Worker if (flag is NOT set) it doesn't extract raw 8-bit symbol from Escape-Plane-16 212*f6dc9357SAndroid Build Coastguard Worker if (flag is set) it extracts raw 8-bit symbol from Escape-Plane-16 213*f6dc9357SAndroid Build Coastguard Worker 214*f6dc9357SAndroid Build Coastguard Worker in Linux we need some way to extract NON-UTF8 RAW 8-bits from BMP (UTF-16 7z archive): 215*f6dc9357SAndroid Build Coastguard Worker if (we use High-Escape-Plane), we can transfer BMP escapes to High-Escape-Plane. 216*f6dc9357SAndroid Build Coastguard Worker if (we don't use High-Escape-Plane), we must use Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE. 217*f6dc9357SAndroid Build Coastguard Worker 218*f6dc9357SAndroid Build Coastguard Worker 219*f6dc9357SAndroid Build Coastguard Worker Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE 220*f6dc9357SAndroid Build Coastguard Worker // that flag affects the code only if (wchar_t is 32-bit) 221*f6dc9357SAndroid Build Coastguard Worker // that mode with high-escape can be disabled now in UTFConvert.cpp 222*f6dc9357SAndroid Build Coastguard Worker if (flag is NOT set) 223*f6dc9357SAndroid Build Coastguard Worker it doesn't extract raw 8-bit symbol from High-Escape-Plane 224*f6dc9357SAndroid Build Coastguard Worker if (flag is set) 225*f6dc9357SAndroid Build Coastguard Worker it extracts raw 8-bit symbol from High-Escape-Plane 226*f6dc9357SAndroid Build Coastguard Worker 227*f6dc9357SAndroid Build Coastguard Worker Main use cases: 228*f6dc9357SAndroid Build Coastguard Worker 229*f6dc9357SAndroid Build Coastguard Worker WIN32 : UTF-16-RAW -> UTF-8 (archive) -> UTF-16-RAW 230*f6dc9357SAndroid Build Coastguard Worker { 231*f6dc9357SAndroid Build Coastguard Worker Do NOT set Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE. 232*f6dc9357SAndroid Build Coastguard Worker Do NOT set Z7_UTF_FLAG_TO_UTF8_SURROGATE_ERROR. 233*f6dc9357SAndroid Build Coastguard Worker So we restore original UTF-16-RAW. 234*f6dc9357SAndroid Build Coastguard Worker } 235*f6dc9357SAndroid Build Coastguard Worker 236*f6dc9357SAndroid Build Coastguard Worker Linix : UTF-8 with Escapes -> UTF-16 (7z archive) -> UTF-8 with Escapes 237*f6dc9357SAndroid Build Coastguard Worker set Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE to extract non-UTF from 7z archive 238*f6dc9357SAndroid Build Coastguard Worker set Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE for intermediate UTF-16. 239*f6dc9357SAndroid Build Coastguard Worker Note: high esacape mode can be ignored now in UTFConvert.cpp 240*f6dc9357SAndroid Build Coastguard Worker 241*f6dc9357SAndroid Build Coastguard Worker macOS: 242*f6dc9357SAndroid Build Coastguard Worker the system doesn't support incorrect UTF-8 in file names. 243*f6dc9357SAndroid Build Coastguard Worker set Z7_UTF_FLAG_TO_UTF8_SURROGATE_ERROR 244*f6dc9357SAndroid Build Coastguard Worker */ 245*f6dc9357SAndroid Build Coastguard Worker 246*f6dc9357SAndroid Build Coastguard Worker extern unsigned g_Unicode_To_UTF8_Flags; 247*f6dc9357SAndroid Build Coastguard Worker 248*f6dc9357SAndroid Build Coastguard Worker void ConvertUnicodeToUTF8_Flags(const UString &src, AString &dest, unsigned flags = 0); 249*f6dc9357SAndroid Build Coastguard Worker void ConvertUnicodeToUTF8(const UString &src, AString &dest); 250*f6dc9357SAndroid Build Coastguard Worker 251*f6dc9357SAndroid Build Coastguard Worker void Convert_Unicode_To_UTF8_Buf(const UString &src, CByteBuffer &dest); 252*f6dc9357SAndroid Build Coastguard Worker 253*f6dc9357SAndroid Build Coastguard Worker /* 254*f6dc9357SAndroid Build Coastguard Worker #ifndef _WIN32 255*f6dc9357SAndroid Build Coastguard Worker void Convert_UTF16_To_UTF32(const UString &src, UString &dest); 256*f6dc9357SAndroid Build Coastguard Worker void Convert_UTF32_To_UTF16(const UString &src, UString &dest); 257*f6dc9357SAndroid Build Coastguard Worker bool UTF32_IsThere_BigPoint(const UString &src); 258*f6dc9357SAndroid Build Coastguard Worker bool Unicode_IsThere_BmpEscape(const UString &src); 259*f6dc9357SAndroid Build Coastguard Worker #endif 260*f6dc9357SAndroid Build Coastguard Worker 261*f6dc9357SAndroid Build Coastguard Worker bool Unicode_IsThere_Utf16SurrogateError(const UString &src); 262*f6dc9357SAndroid Build Coastguard Worker */ 263*f6dc9357SAndroid Build Coastguard Worker 264*f6dc9357SAndroid Build Coastguard Worker #ifdef Z7_WCHART_IS_16BIT 265*f6dc9357SAndroid Build Coastguard Worker #define Convert_UnicodeEsc16_To_UnicodeEscHigh(s) 266*f6dc9357SAndroid Build Coastguard Worker #else 267*f6dc9357SAndroid Build Coastguard Worker void Convert_UnicodeEsc16_To_UnicodeEscHigh(UString &s); 268*f6dc9357SAndroid Build Coastguard Worker #endif 269*f6dc9357SAndroid Build Coastguard Worker 270*f6dc9357SAndroid Build Coastguard Worker /* 271*f6dc9357SAndroid Build Coastguard Worker // #include "../../C/CpuArch.h" 272*f6dc9357SAndroid Build Coastguard Worker 273*f6dc9357SAndroid Build Coastguard Worker // ---------- Utf16 Little endian functions ---------- 274*f6dc9357SAndroid Build Coastguard Worker 275*f6dc9357SAndroid Build Coastguard Worker // We store 16-bit surrogates even in 32-bit WCHARs in Linux. 276*f6dc9357SAndroid Build Coastguard Worker // So now we don't use the following code: 277*f6dc9357SAndroid Build Coastguard Worker 278*f6dc9357SAndroid Build Coastguard Worker #if WCHAR_MAX > 0xffff 279*f6dc9357SAndroid Build Coastguard Worker 280*f6dc9357SAndroid Build Coastguard Worker // void *p : pointer to src bytes stream 281*f6dc9357SAndroid Build Coastguard Worker // size_t len : num Utf16 characters : it can include or not include NULL character 282*f6dc9357SAndroid Build Coastguard Worker 283*f6dc9357SAndroid Build Coastguard Worker inline size_t Utf16LE__Get_Num_WCHARs(const void *p, size_t len) 284*f6dc9357SAndroid Build Coastguard Worker { 285*f6dc9357SAndroid Build Coastguard Worker #if WCHAR_MAX > 0xffff 286*f6dc9357SAndroid Build Coastguard Worker size_t num_wchars = 0; 287*f6dc9357SAndroid Build Coastguard Worker for (size_t i = 0; i < len; i++) 288*f6dc9357SAndroid Build Coastguard Worker { 289*f6dc9357SAndroid Build Coastguard Worker wchar_t c = GetUi16(p); 290*f6dc9357SAndroid Build Coastguard Worker p = (const void *)((const Byte *)p + 2); 291*f6dc9357SAndroid Build Coastguard Worker if (c >= 0xd800 && c < 0xdc00 && i + 1 != len) 292*f6dc9357SAndroid Build Coastguard Worker { 293*f6dc9357SAndroid Build Coastguard Worker wchar_t c2 = GetUi16(p); 294*f6dc9357SAndroid Build Coastguard Worker if (c2 >= 0xdc00 && c2 < 0xe000) 295*f6dc9357SAndroid Build Coastguard Worker { 296*f6dc9357SAndroid Build Coastguard Worker c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff); 297*f6dc9357SAndroid Build Coastguard Worker p = (const void *)((const Byte *)p + 2); 298*f6dc9357SAndroid Build Coastguard Worker i++; 299*f6dc9357SAndroid Build Coastguard Worker } 300*f6dc9357SAndroid Build Coastguard Worker } 301*f6dc9357SAndroid Build Coastguard Worker num_wchars++; 302*f6dc9357SAndroid Build Coastguard Worker } 303*f6dc9357SAndroid Build Coastguard Worker return num_wchars; 304*f6dc9357SAndroid Build Coastguard Worker #else 305*f6dc9357SAndroid Build Coastguard Worker UNUSED_VAR(p) 306*f6dc9357SAndroid Build Coastguard Worker return len; 307*f6dc9357SAndroid Build Coastguard Worker #endif 308*f6dc9357SAndroid Build Coastguard Worker } 309*f6dc9357SAndroid Build Coastguard Worker 310*f6dc9357SAndroid Build Coastguard Worker // #include <stdio.h> 311*f6dc9357SAndroid Build Coastguard Worker 312*f6dc9357SAndroid Build Coastguard Worker inline wchar_t *Utf16LE__To_WCHARs_Sep(const void *p, size_t len, wchar_t *dest) 313*f6dc9357SAndroid Build Coastguard Worker { 314*f6dc9357SAndroid Build Coastguard Worker for (size_t i = 0; i < len; i++) 315*f6dc9357SAndroid Build Coastguard Worker { 316*f6dc9357SAndroid Build Coastguard Worker wchar_t c = GetUi16(p); 317*f6dc9357SAndroid Build Coastguard Worker p = (const void *)((const Byte *)p + 2); 318*f6dc9357SAndroid Build Coastguard Worker 319*f6dc9357SAndroid Build Coastguard Worker #if WCHAR_PATH_SEPARATOR != L'/' 320*f6dc9357SAndroid Build Coastguard Worker if (c == L'/') 321*f6dc9357SAndroid Build Coastguard Worker c = WCHAR_PATH_SEPARATOR; 322*f6dc9357SAndroid Build Coastguard Worker #endif 323*f6dc9357SAndroid Build Coastguard Worker 324*f6dc9357SAndroid Build Coastguard Worker #if WCHAR_MAX > 0xffff 325*f6dc9357SAndroid Build Coastguard Worker 326*f6dc9357SAndroid Build Coastguard Worker if (c >= 0xd800 && c < 0xdc00 && i + 1 != len) 327*f6dc9357SAndroid Build Coastguard Worker { 328*f6dc9357SAndroid Build Coastguard Worker wchar_t c2 = GetUi16(p); 329*f6dc9357SAndroid Build Coastguard Worker if (c2 >= 0xdc00 && c2 < 0xe000) 330*f6dc9357SAndroid Build Coastguard Worker { 331*f6dc9357SAndroid Build Coastguard Worker // printf("\nSurragate : %4x %4x -> ", (int)c, (int)c2); 332*f6dc9357SAndroid Build Coastguard Worker c = 0x10000 + ((c & 0x3ff) << 10) + (c2 & 0x3ff); 333*f6dc9357SAndroid Build Coastguard Worker p = (const void *)((const Byte *)p + 2); 334*f6dc9357SAndroid Build Coastguard Worker i++; 335*f6dc9357SAndroid Build Coastguard Worker // printf("%4x\n", (int)c); 336*f6dc9357SAndroid Build Coastguard Worker } 337*f6dc9357SAndroid Build Coastguard Worker } 338*f6dc9357SAndroid Build Coastguard Worker 339*f6dc9357SAndroid Build Coastguard Worker #endif 340*f6dc9357SAndroid Build Coastguard Worker 341*f6dc9357SAndroid Build Coastguard Worker *dest++ = c; 342*f6dc9357SAndroid Build Coastguard Worker } 343*f6dc9357SAndroid Build Coastguard Worker return dest; 344*f6dc9357SAndroid Build Coastguard Worker } 345*f6dc9357SAndroid Build Coastguard Worker 346*f6dc9357SAndroid Build Coastguard Worker 347*f6dc9357SAndroid Build Coastguard Worker inline size_t Get_Num_Utf16_chars_from_wchar_string(const wchar_t *p) 348*f6dc9357SAndroid Build Coastguard Worker { 349*f6dc9357SAndroid Build Coastguard Worker size_t num = 0; 350*f6dc9357SAndroid Build Coastguard Worker for (;;) 351*f6dc9357SAndroid Build Coastguard Worker { 352*f6dc9357SAndroid Build Coastguard Worker wchar_t c = *p++; 353*f6dc9357SAndroid Build Coastguard Worker if (c == 0) 354*f6dc9357SAndroid Build Coastguard Worker return num; 355*f6dc9357SAndroid Build Coastguard Worker num += ((c >= 0x10000 && c < 0x110000) ? 2 : 1); 356*f6dc9357SAndroid Build Coastguard Worker } 357*f6dc9357SAndroid Build Coastguard Worker return num; 358*f6dc9357SAndroid Build Coastguard Worker } 359*f6dc9357SAndroid Build Coastguard Worker 360*f6dc9357SAndroid Build Coastguard Worker inline Byte *wchars_to_Utf16LE(const wchar_t *p, Byte *dest) 361*f6dc9357SAndroid Build Coastguard Worker { 362*f6dc9357SAndroid Build Coastguard Worker for (;;) 363*f6dc9357SAndroid Build Coastguard Worker { 364*f6dc9357SAndroid Build Coastguard Worker wchar_t c = *p++; 365*f6dc9357SAndroid Build Coastguard Worker if (c == 0) 366*f6dc9357SAndroid Build Coastguard Worker return dest; 367*f6dc9357SAndroid Build Coastguard Worker if (c >= 0x10000 && c < 0x110000) 368*f6dc9357SAndroid Build Coastguard Worker { 369*f6dc9357SAndroid Build Coastguard Worker SetUi16(dest , (UInt16)(0xd800 + ((c >> 10) & 0x3FF))); 370*f6dc9357SAndroid Build Coastguard Worker SetUi16(dest + 2, (UInt16)(0xdc00 + ( c & 0x3FF))); 371*f6dc9357SAndroid Build Coastguard Worker dest += 4; 372*f6dc9357SAndroid Build Coastguard Worker } 373*f6dc9357SAndroid Build Coastguard Worker else 374*f6dc9357SAndroid Build Coastguard Worker { 375*f6dc9357SAndroid Build Coastguard Worker SetUi16(dest, c); 376*f6dc9357SAndroid Build Coastguard Worker dest += 2; 377*f6dc9357SAndroid Build Coastguard Worker } 378*f6dc9357SAndroid Build Coastguard Worker } 379*f6dc9357SAndroid Build Coastguard Worker } 380*f6dc9357SAndroid Build Coastguard Worker 381*f6dc9357SAndroid Build Coastguard Worker #endif 382*f6dc9357SAndroid Build Coastguard Worker */ 383*f6dc9357SAndroid Build Coastguard Worker 384*f6dc9357SAndroid Build Coastguard Worker #endif 385