1*0e209d39SAndroid Build Coastguard Worker // © 2016 and later: Unicode, Inc. and others.
2*0e209d39SAndroid Build Coastguard Worker // License & terms of use: http://www.unicode.org/copyright.html
3*0e209d39SAndroid Build Coastguard Worker /*
4*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
5*0e209d39SAndroid Build Coastguard Worker *
6*0e209d39SAndroid Build Coastguard Worker * Copyright (C) 1999-2014, International Business Machines
7*0e209d39SAndroid Build Coastguard Worker * Corporation and others. All Rights Reserved.
8*0e209d39SAndroid Build Coastguard Worker *
9*0e209d39SAndroid Build Coastguard Worker *******************************************************************************
10*0e209d39SAndroid Build Coastguard Worker * file name: unistr_cnv.cpp
11*0e209d39SAndroid Build Coastguard Worker * encoding: UTF-8
12*0e209d39SAndroid Build Coastguard Worker * tab size: 8 (not used)
13*0e209d39SAndroid Build Coastguard Worker * indentation:2
14*0e209d39SAndroid Build Coastguard Worker *
15*0e209d39SAndroid Build Coastguard Worker * created on: 2004aug19
16*0e209d39SAndroid Build Coastguard Worker * created by: Markus W. Scherer
17*0e209d39SAndroid Build Coastguard Worker *
18*0e209d39SAndroid Build Coastguard Worker * Character conversion functions moved here from unistr.cpp
19*0e209d39SAndroid Build Coastguard Worker */
20*0e209d39SAndroid Build Coastguard Worker
21*0e209d39SAndroid Build Coastguard Worker #include "unicode/utypes.h"
22*0e209d39SAndroid Build Coastguard Worker
23*0e209d39SAndroid Build Coastguard Worker #if !UCONFIG_NO_CONVERSION
24*0e209d39SAndroid Build Coastguard Worker
25*0e209d39SAndroid Build Coastguard Worker #include "unicode/putil.h"
26*0e209d39SAndroid Build Coastguard Worker #include "cstring.h"
27*0e209d39SAndroid Build Coastguard Worker #include "cmemory.h"
28*0e209d39SAndroid Build Coastguard Worker #include "unicode/ustring.h"
29*0e209d39SAndroid Build Coastguard Worker #include "unicode/unistr.h"
30*0e209d39SAndroid Build Coastguard Worker #include "unicode/ucnv.h"
31*0e209d39SAndroid Build Coastguard Worker #include "ucnv_imp.h"
32*0e209d39SAndroid Build Coastguard Worker #include "putilimp.h"
33*0e209d39SAndroid Build Coastguard Worker #include "ustr_cnv.h"
34*0e209d39SAndroid Build Coastguard Worker #include "ustr_imp.h"
35*0e209d39SAndroid Build Coastguard Worker
36*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_BEGIN
37*0e209d39SAndroid Build Coastguard Worker
38*0e209d39SAndroid Build Coastguard Worker //========================================
39*0e209d39SAndroid Build Coastguard Worker // Constructors
40*0e209d39SAndroid Build Coastguard Worker //========================================
41*0e209d39SAndroid Build Coastguard Worker
42*0e209d39SAndroid Build Coastguard Worker #if !U_CHARSET_IS_UTF8
43*0e209d39SAndroid Build Coastguard Worker
UnicodeString(const char * codepageData)44*0e209d39SAndroid Build Coastguard Worker UnicodeString::UnicodeString(const char *codepageData) {
45*0e209d39SAndroid Build Coastguard Worker fUnion.fFields.fLengthAndFlags = kShortString;
46*0e209d39SAndroid Build Coastguard Worker if(codepageData != 0) {
47*0e209d39SAndroid Build Coastguard Worker doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), 0);
48*0e209d39SAndroid Build Coastguard Worker }
49*0e209d39SAndroid Build Coastguard Worker }
50*0e209d39SAndroid Build Coastguard Worker
UnicodeString(const char * codepageData,int32_t dataLength)51*0e209d39SAndroid Build Coastguard Worker UnicodeString::UnicodeString(const char *codepageData,
52*0e209d39SAndroid Build Coastguard Worker int32_t dataLength) {
53*0e209d39SAndroid Build Coastguard Worker fUnion.fFields.fLengthAndFlags = kShortString;
54*0e209d39SAndroid Build Coastguard Worker if(codepageData != 0) {
55*0e209d39SAndroid Build Coastguard Worker doCodepageCreate(codepageData, dataLength, 0);
56*0e209d39SAndroid Build Coastguard Worker }
57*0e209d39SAndroid Build Coastguard Worker }
58*0e209d39SAndroid Build Coastguard Worker
59*0e209d39SAndroid Build Coastguard Worker // else see unistr.cpp
60*0e209d39SAndroid Build Coastguard Worker #endif
61*0e209d39SAndroid Build Coastguard Worker
UnicodeString(const char * codepageData,const char * codepage)62*0e209d39SAndroid Build Coastguard Worker UnicodeString::UnicodeString(const char *codepageData,
63*0e209d39SAndroid Build Coastguard Worker const char *codepage) {
64*0e209d39SAndroid Build Coastguard Worker fUnion.fFields.fLengthAndFlags = kShortString;
65*0e209d39SAndroid Build Coastguard Worker if (codepageData != nullptr) {
66*0e209d39SAndroid Build Coastguard Worker doCodepageCreate(codepageData, (int32_t)uprv_strlen(codepageData), codepage);
67*0e209d39SAndroid Build Coastguard Worker }
68*0e209d39SAndroid Build Coastguard Worker }
69*0e209d39SAndroid Build Coastguard Worker
UnicodeString(const char * codepageData,int32_t dataLength,const char * codepage)70*0e209d39SAndroid Build Coastguard Worker UnicodeString::UnicodeString(const char *codepageData,
71*0e209d39SAndroid Build Coastguard Worker int32_t dataLength,
72*0e209d39SAndroid Build Coastguard Worker const char *codepage) {
73*0e209d39SAndroid Build Coastguard Worker fUnion.fFields.fLengthAndFlags = kShortString;
74*0e209d39SAndroid Build Coastguard Worker if (codepageData != nullptr) {
75*0e209d39SAndroid Build Coastguard Worker doCodepageCreate(codepageData, dataLength, codepage);
76*0e209d39SAndroid Build Coastguard Worker }
77*0e209d39SAndroid Build Coastguard Worker }
78*0e209d39SAndroid Build Coastguard Worker
UnicodeString(const char * src,int32_t srcLength,UConverter * cnv,UErrorCode & errorCode)79*0e209d39SAndroid Build Coastguard Worker UnicodeString::UnicodeString(const char *src, int32_t srcLength,
80*0e209d39SAndroid Build Coastguard Worker UConverter *cnv,
81*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode) {
82*0e209d39SAndroid Build Coastguard Worker fUnion.fFields.fLengthAndFlags = kShortString;
83*0e209d39SAndroid Build Coastguard Worker if(U_SUCCESS(errorCode)) {
84*0e209d39SAndroid Build Coastguard Worker // check arguments
85*0e209d39SAndroid Build Coastguard Worker if(src==nullptr) {
86*0e209d39SAndroid Build Coastguard Worker // treat as an empty string, do nothing more
87*0e209d39SAndroid Build Coastguard Worker } else if(srcLength<-1) {
88*0e209d39SAndroid Build Coastguard Worker errorCode=U_ILLEGAL_ARGUMENT_ERROR;
89*0e209d39SAndroid Build Coastguard Worker } else {
90*0e209d39SAndroid Build Coastguard Worker // get input length
91*0e209d39SAndroid Build Coastguard Worker if(srcLength==-1) {
92*0e209d39SAndroid Build Coastguard Worker srcLength=(int32_t)uprv_strlen(src);
93*0e209d39SAndroid Build Coastguard Worker }
94*0e209d39SAndroid Build Coastguard Worker if(srcLength>0) {
95*0e209d39SAndroid Build Coastguard Worker if (cnv != nullptr) {
96*0e209d39SAndroid Build Coastguard Worker // use the provided converter
97*0e209d39SAndroid Build Coastguard Worker ucnv_resetToUnicode(cnv);
98*0e209d39SAndroid Build Coastguard Worker doCodepageCreate(src, srcLength, cnv, errorCode);
99*0e209d39SAndroid Build Coastguard Worker } else {
100*0e209d39SAndroid Build Coastguard Worker // use the default converter
101*0e209d39SAndroid Build Coastguard Worker cnv=u_getDefaultConverter(&errorCode);
102*0e209d39SAndroid Build Coastguard Worker doCodepageCreate(src, srcLength, cnv, errorCode);
103*0e209d39SAndroid Build Coastguard Worker u_releaseDefaultConverter(cnv);
104*0e209d39SAndroid Build Coastguard Worker }
105*0e209d39SAndroid Build Coastguard Worker }
106*0e209d39SAndroid Build Coastguard Worker }
107*0e209d39SAndroid Build Coastguard Worker
108*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
109*0e209d39SAndroid Build Coastguard Worker setToBogus();
110*0e209d39SAndroid Build Coastguard Worker }
111*0e209d39SAndroid Build Coastguard Worker }
112*0e209d39SAndroid Build Coastguard Worker }
113*0e209d39SAndroid Build Coastguard Worker
114*0e209d39SAndroid Build Coastguard Worker //========================================
115*0e209d39SAndroid Build Coastguard Worker // Codeset conversion
116*0e209d39SAndroid Build Coastguard Worker //========================================
117*0e209d39SAndroid Build Coastguard Worker
118*0e209d39SAndroid Build Coastguard Worker #if !U_CHARSET_IS_UTF8
119*0e209d39SAndroid Build Coastguard Worker
120*0e209d39SAndroid Build Coastguard Worker int32_t
extract(int32_t start,int32_t length,char * target,uint32_t dstSize) const121*0e209d39SAndroid Build Coastguard Worker UnicodeString::extract(int32_t start,
122*0e209d39SAndroid Build Coastguard Worker int32_t length,
123*0e209d39SAndroid Build Coastguard Worker char *target,
124*0e209d39SAndroid Build Coastguard Worker uint32_t dstSize) const {
125*0e209d39SAndroid Build Coastguard Worker return extract(start, length, target, dstSize, 0);
126*0e209d39SAndroid Build Coastguard Worker }
127*0e209d39SAndroid Build Coastguard Worker
128*0e209d39SAndroid Build Coastguard Worker // else see unistr.cpp
129*0e209d39SAndroid Build Coastguard Worker #endif
130*0e209d39SAndroid Build Coastguard Worker
131*0e209d39SAndroid Build Coastguard Worker int32_t
extract(int32_t start,int32_t length,char * target,uint32_t dstSize,const char * codepage) const132*0e209d39SAndroid Build Coastguard Worker UnicodeString::extract(int32_t start,
133*0e209d39SAndroid Build Coastguard Worker int32_t length,
134*0e209d39SAndroid Build Coastguard Worker char *target,
135*0e209d39SAndroid Build Coastguard Worker uint32_t dstSize,
136*0e209d39SAndroid Build Coastguard Worker const char *codepage) const
137*0e209d39SAndroid Build Coastguard Worker {
138*0e209d39SAndroid Build Coastguard Worker // if the arguments are illegal, then do nothing
139*0e209d39SAndroid Build Coastguard Worker if (/*dstSize < 0 || */(dstSize > 0 && target == nullptr)) {
140*0e209d39SAndroid Build Coastguard Worker return 0;
141*0e209d39SAndroid Build Coastguard Worker }
142*0e209d39SAndroid Build Coastguard Worker
143*0e209d39SAndroid Build Coastguard Worker // pin the indices to legal values
144*0e209d39SAndroid Build Coastguard Worker pinIndices(start, length);
145*0e209d39SAndroid Build Coastguard Worker
146*0e209d39SAndroid Build Coastguard Worker // We need to cast dstSize to int32_t for all subsequent code.
147*0e209d39SAndroid Build Coastguard Worker // I don't know why the API was defined with uint32_t but we are stuck with it.
148*0e209d39SAndroid Build Coastguard Worker // Also, dstSize==0xffffffff means "unlimited" but if we use target+dstSize
149*0e209d39SAndroid Build Coastguard Worker // as a limit in some functions, it may wrap around and yield a pointer
150*0e209d39SAndroid Build Coastguard Worker // that compares less-than target.
151*0e209d39SAndroid Build Coastguard Worker int32_t capacity;
152*0e209d39SAndroid Build Coastguard Worker if(dstSize < 0x7fffffff) {
153*0e209d39SAndroid Build Coastguard Worker // Assume that the capacity is real and a limit pointer won't wrap around.
154*0e209d39SAndroid Build Coastguard Worker capacity = (int32_t)dstSize;
155*0e209d39SAndroid Build Coastguard Worker } else {
156*0e209d39SAndroid Build Coastguard Worker // Pin the capacity so that a limit pointer does not wrap around.
157*0e209d39SAndroid Build Coastguard Worker char *targetLimit = (char *)U_MAX_PTR(target);
158*0e209d39SAndroid Build Coastguard Worker // U_MAX_PTR(target) returns a targetLimit that is at most 0x7fffffff
159*0e209d39SAndroid Build Coastguard Worker // greater than target and does not wrap around the top of the address space.
160*0e209d39SAndroid Build Coastguard Worker capacity = (int32_t)(targetLimit - target);
161*0e209d39SAndroid Build Coastguard Worker }
162*0e209d39SAndroid Build Coastguard Worker
163*0e209d39SAndroid Build Coastguard Worker // create the converter
164*0e209d39SAndroid Build Coastguard Worker UConverter *converter;
165*0e209d39SAndroid Build Coastguard Worker UErrorCode status = U_ZERO_ERROR;
166*0e209d39SAndroid Build Coastguard Worker
167*0e209d39SAndroid Build Coastguard Worker // just write the NUL if the string length is 0
168*0e209d39SAndroid Build Coastguard Worker if(length == 0) {
169*0e209d39SAndroid Build Coastguard Worker return u_terminateChars(target, capacity, 0, &status);
170*0e209d39SAndroid Build Coastguard Worker }
171*0e209d39SAndroid Build Coastguard Worker
172*0e209d39SAndroid Build Coastguard Worker // if the codepage is the default, use our cache
173*0e209d39SAndroid Build Coastguard Worker // if it is an empty string, then use the "invariant character" conversion
174*0e209d39SAndroid Build Coastguard Worker if (codepage == nullptr) {
175*0e209d39SAndroid Build Coastguard Worker const char *defaultName = ucnv_getDefaultName();
176*0e209d39SAndroid Build Coastguard Worker if(UCNV_FAST_IS_UTF8(defaultName)) {
177*0e209d39SAndroid Build Coastguard Worker return toUTF8(start, length, target, capacity);
178*0e209d39SAndroid Build Coastguard Worker }
179*0e209d39SAndroid Build Coastguard Worker converter = u_getDefaultConverter(&status);
180*0e209d39SAndroid Build Coastguard Worker } else if (*codepage == 0) {
181*0e209d39SAndroid Build Coastguard Worker // use the "invariant characters" conversion
182*0e209d39SAndroid Build Coastguard Worker int32_t destLength;
183*0e209d39SAndroid Build Coastguard Worker if(length <= capacity) {
184*0e209d39SAndroid Build Coastguard Worker destLength = length;
185*0e209d39SAndroid Build Coastguard Worker } else {
186*0e209d39SAndroid Build Coastguard Worker destLength = capacity;
187*0e209d39SAndroid Build Coastguard Worker }
188*0e209d39SAndroid Build Coastguard Worker u_UCharsToChars(getArrayStart() + start, target, destLength);
189*0e209d39SAndroid Build Coastguard Worker return u_terminateChars(target, capacity, length, &status);
190*0e209d39SAndroid Build Coastguard Worker } else {
191*0e209d39SAndroid Build Coastguard Worker converter = ucnv_open(codepage, &status);
192*0e209d39SAndroid Build Coastguard Worker }
193*0e209d39SAndroid Build Coastguard Worker
194*0e209d39SAndroid Build Coastguard Worker length = doExtract(start, length, target, capacity, converter, status);
195*0e209d39SAndroid Build Coastguard Worker
196*0e209d39SAndroid Build Coastguard Worker // close the converter
197*0e209d39SAndroid Build Coastguard Worker if (codepage == nullptr) {
198*0e209d39SAndroid Build Coastguard Worker u_releaseDefaultConverter(converter);
199*0e209d39SAndroid Build Coastguard Worker } else {
200*0e209d39SAndroid Build Coastguard Worker ucnv_close(converter);
201*0e209d39SAndroid Build Coastguard Worker }
202*0e209d39SAndroid Build Coastguard Worker
203*0e209d39SAndroid Build Coastguard Worker return length;
204*0e209d39SAndroid Build Coastguard Worker }
205*0e209d39SAndroid Build Coastguard Worker
206*0e209d39SAndroid Build Coastguard Worker int32_t
extract(char * dest,int32_t destCapacity,UConverter * cnv,UErrorCode & errorCode) const207*0e209d39SAndroid Build Coastguard Worker UnicodeString::extract(char *dest, int32_t destCapacity,
208*0e209d39SAndroid Build Coastguard Worker UConverter *cnv,
209*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode) const
210*0e209d39SAndroid Build Coastguard Worker {
211*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
212*0e209d39SAndroid Build Coastguard Worker return 0;
213*0e209d39SAndroid Build Coastguard Worker }
214*0e209d39SAndroid Build Coastguard Worker
215*0e209d39SAndroid Build Coastguard Worker if (isBogus() || destCapacity < 0 || (destCapacity > 0 && dest == nullptr)) {
216*0e209d39SAndroid Build Coastguard Worker errorCode=U_ILLEGAL_ARGUMENT_ERROR;
217*0e209d39SAndroid Build Coastguard Worker return 0;
218*0e209d39SAndroid Build Coastguard Worker }
219*0e209d39SAndroid Build Coastguard Worker
220*0e209d39SAndroid Build Coastguard Worker // nothing to do?
221*0e209d39SAndroid Build Coastguard Worker if(isEmpty()) {
222*0e209d39SAndroid Build Coastguard Worker return u_terminateChars(dest, destCapacity, 0, &errorCode);
223*0e209d39SAndroid Build Coastguard Worker }
224*0e209d39SAndroid Build Coastguard Worker
225*0e209d39SAndroid Build Coastguard Worker // get the converter
226*0e209d39SAndroid Build Coastguard Worker UBool isDefaultConverter;
227*0e209d39SAndroid Build Coastguard Worker if (cnv == nullptr) {
228*0e209d39SAndroid Build Coastguard Worker isDefaultConverter=true;
229*0e209d39SAndroid Build Coastguard Worker cnv=u_getDefaultConverter(&errorCode);
230*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
231*0e209d39SAndroid Build Coastguard Worker return 0;
232*0e209d39SAndroid Build Coastguard Worker }
233*0e209d39SAndroid Build Coastguard Worker } else {
234*0e209d39SAndroid Build Coastguard Worker isDefaultConverter=false;
235*0e209d39SAndroid Build Coastguard Worker ucnv_resetFromUnicode(cnv);
236*0e209d39SAndroid Build Coastguard Worker }
237*0e209d39SAndroid Build Coastguard Worker
238*0e209d39SAndroid Build Coastguard Worker // convert
239*0e209d39SAndroid Build Coastguard Worker int32_t len=doExtract(0, length(), dest, destCapacity, cnv, errorCode);
240*0e209d39SAndroid Build Coastguard Worker
241*0e209d39SAndroid Build Coastguard Worker // release the converter
242*0e209d39SAndroid Build Coastguard Worker if(isDefaultConverter) {
243*0e209d39SAndroid Build Coastguard Worker u_releaseDefaultConverter(cnv);
244*0e209d39SAndroid Build Coastguard Worker }
245*0e209d39SAndroid Build Coastguard Worker
246*0e209d39SAndroid Build Coastguard Worker return len;
247*0e209d39SAndroid Build Coastguard Worker }
248*0e209d39SAndroid Build Coastguard Worker
249*0e209d39SAndroid Build Coastguard Worker int32_t
doExtract(int32_t start,int32_t length,char * dest,int32_t destCapacity,UConverter * cnv,UErrorCode & errorCode) const250*0e209d39SAndroid Build Coastguard Worker UnicodeString::doExtract(int32_t start, int32_t length,
251*0e209d39SAndroid Build Coastguard Worker char *dest, int32_t destCapacity,
252*0e209d39SAndroid Build Coastguard Worker UConverter *cnv,
253*0e209d39SAndroid Build Coastguard Worker UErrorCode &errorCode) const
254*0e209d39SAndroid Build Coastguard Worker {
255*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(errorCode)) {
256*0e209d39SAndroid Build Coastguard Worker if(destCapacity!=0) {
257*0e209d39SAndroid Build Coastguard Worker *dest=0;
258*0e209d39SAndroid Build Coastguard Worker }
259*0e209d39SAndroid Build Coastguard Worker return 0;
260*0e209d39SAndroid Build Coastguard Worker }
261*0e209d39SAndroid Build Coastguard Worker
262*0e209d39SAndroid Build Coastguard Worker const char16_t *src=getArrayStart()+start, *srcLimit=src+length;
263*0e209d39SAndroid Build Coastguard Worker char *originalDest=dest;
264*0e209d39SAndroid Build Coastguard Worker const char *destLimit;
265*0e209d39SAndroid Build Coastguard Worker
266*0e209d39SAndroid Build Coastguard Worker if(destCapacity==0) {
267*0e209d39SAndroid Build Coastguard Worker destLimit=dest=nullptr;
268*0e209d39SAndroid Build Coastguard Worker } else if(destCapacity==-1) {
269*0e209d39SAndroid Build Coastguard Worker // Pin the limit to U_MAX_PTR if the "magic" destCapacity is used.
270*0e209d39SAndroid Build Coastguard Worker destLimit=(char*)U_MAX_PTR(dest);
271*0e209d39SAndroid Build Coastguard Worker // for NUL-termination, translate into highest int32_t
272*0e209d39SAndroid Build Coastguard Worker destCapacity=0x7fffffff;
273*0e209d39SAndroid Build Coastguard Worker } else {
274*0e209d39SAndroid Build Coastguard Worker destLimit=dest+destCapacity;
275*0e209d39SAndroid Build Coastguard Worker }
276*0e209d39SAndroid Build Coastguard Worker
277*0e209d39SAndroid Build Coastguard Worker // perform the conversion
278*0e209d39SAndroid Build Coastguard Worker ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &errorCode);
279*0e209d39SAndroid Build Coastguard Worker length=(int32_t)(dest-originalDest);
280*0e209d39SAndroid Build Coastguard Worker
281*0e209d39SAndroid Build Coastguard Worker // if an overflow occurs, then get the preflighting length
282*0e209d39SAndroid Build Coastguard Worker if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
283*0e209d39SAndroid Build Coastguard Worker char buffer[1024];
284*0e209d39SAndroid Build Coastguard Worker
285*0e209d39SAndroid Build Coastguard Worker destLimit=buffer+sizeof(buffer);
286*0e209d39SAndroid Build Coastguard Worker do {
287*0e209d39SAndroid Build Coastguard Worker dest=buffer;
288*0e209d39SAndroid Build Coastguard Worker errorCode=U_ZERO_ERROR;
289*0e209d39SAndroid Build Coastguard Worker ucnv_fromUnicode(cnv, &dest, destLimit, &src, srcLimit, nullptr, true, &errorCode);
290*0e209d39SAndroid Build Coastguard Worker length+=(int32_t)(dest-buffer);
291*0e209d39SAndroid Build Coastguard Worker } while(errorCode==U_BUFFER_OVERFLOW_ERROR);
292*0e209d39SAndroid Build Coastguard Worker }
293*0e209d39SAndroid Build Coastguard Worker
294*0e209d39SAndroid Build Coastguard Worker return u_terminateChars(originalDest, destCapacity, length, &errorCode);
295*0e209d39SAndroid Build Coastguard Worker }
296*0e209d39SAndroid Build Coastguard Worker
297*0e209d39SAndroid Build Coastguard Worker void
doCodepageCreate(const char * codepageData,int32_t dataLength,const char * codepage)298*0e209d39SAndroid Build Coastguard Worker UnicodeString::doCodepageCreate(const char *codepageData,
299*0e209d39SAndroid Build Coastguard Worker int32_t dataLength,
300*0e209d39SAndroid Build Coastguard Worker const char *codepage)
301*0e209d39SAndroid Build Coastguard Worker {
302*0e209d39SAndroid Build Coastguard Worker // if there's nothing to convert, do nothing
303*0e209d39SAndroid Build Coastguard Worker if (codepageData == nullptr || dataLength == 0 || dataLength < -1) {
304*0e209d39SAndroid Build Coastguard Worker return;
305*0e209d39SAndroid Build Coastguard Worker }
306*0e209d39SAndroid Build Coastguard Worker if(dataLength == -1) {
307*0e209d39SAndroid Build Coastguard Worker dataLength = (int32_t)uprv_strlen(codepageData);
308*0e209d39SAndroid Build Coastguard Worker }
309*0e209d39SAndroid Build Coastguard Worker
310*0e209d39SAndroid Build Coastguard Worker UErrorCode status = U_ZERO_ERROR;
311*0e209d39SAndroid Build Coastguard Worker
312*0e209d39SAndroid Build Coastguard Worker // create the converter
313*0e209d39SAndroid Build Coastguard Worker // if the codepage is the default, use our cache
314*0e209d39SAndroid Build Coastguard Worker // if it is an empty string, then use the "invariant character" conversion
315*0e209d39SAndroid Build Coastguard Worker UConverter *converter;
316*0e209d39SAndroid Build Coastguard Worker if (codepage == nullptr) {
317*0e209d39SAndroid Build Coastguard Worker const char *defaultName = ucnv_getDefaultName();
318*0e209d39SAndroid Build Coastguard Worker if(UCNV_FAST_IS_UTF8(defaultName)) {
319*0e209d39SAndroid Build Coastguard Worker setToUTF8(StringPiece(codepageData, dataLength));
320*0e209d39SAndroid Build Coastguard Worker return;
321*0e209d39SAndroid Build Coastguard Worker }
322*0e209d39SAndroid Build Coastguard Worker converter = u_getDefaultConverter(&status);
323*0e209d39SAndroid Build Coastguard Worker } else if (*codepage == 0) {
324*0e209d39SAndroid Build Coastguard Worker // use the "invariant characters" conversion
325*0e209d39SAndroid Build Coastguard Worker if(cloneArrayIfNeeded(dataLength, dataLength, false)) {
326*0e209d39SAndroid Build Coastguard Worker u_charsToUChars(codepageData, getArrayStart(), dataLength);
327*0e209d39SAndroid Build Coastguard Worker setLength(dataLength);
328*0e209d39SAndroid Build Coastguard Worker } else {
329*0e209d39SAndroid Build Coastguard Worker setToBogus();
330*0e209d39SAndroid Build Coastguard Worker }
331*0e209d39SAndroid Build Coastguard Worker return;
332*0e209d39SAndroid Build Coastguard Worker } else {
333*0e209d39SAndroid Build Coastguard Worker converter = ucnv_open(codepage, &status);
334*0e209d39SAndroid Build Coastguard Worker }
335*0e209d39SAndroid Build Coastguard Worker
336*0e209d39SAndroid Build Coastguard Worker // if we failed, set the appropriate flags and return
337*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(status)) {
338*0e209d39SAndroid Build Coastguard Worker setToBogus();
339*0e209d39SAndroid Build Coastguard Worker return;
340*0e209d39SAndroid Build Coastguard Worker }
341*0e209d39SAndroid Build Coastguard Worker
342*0e209d39SAndroid Build Coastguard Worker // perform the conversion
343*0e209d39SAndroid Build Coastguard Worker doCodepageCreate(codepageData, dataLength, converter, status);
344*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(status)) {
345*0e209d39SAndroid Build Coastguard Worker setToBogus();
346*0e209d39SAndroid Build Coastguard Worker }
347*0e209d39SAndroid Build Coastguard Worker
348*0e209d39SAndroid Build Coastguard Worker // close the converter
349*0e209d39SAndroid Build Coastguard Worker if (codepage == nullptr) {
350*0e209d39SAndroid Build Coastguard Worker u_releaseDefaultConverter(converter);
351*0e209d39SAndroid Build Coastguard Worker } else {
352*0e209d39SAndroid Build Coastguard Worker ucnv_close(converter);
353*0e209d39SAndroid Build Coastguard Worker }
354*0e209d39SAndroid Build Coastguard Worker }
355*0e209d39SAndroid Build Coastguard Worker
356*0e209d39SAndroid Build Coastguard Worker void
doCodepageCreate(const char * codepageData,int32_t dataLength,UConverter * converter,UErrorCode & status)357*0e209d39SAndroid Build Coastguard Worker UnicodeString::doCodepageCreate(const char *codepageData,
358*0e209d39SAndroid Build Coastguard Worker int32_t dataLength,
359*0e209d39SAndroid Build Coastguard Worker UConverter *converter,
360*0e209d39SAndroid Build Coastguard Worker UErrorCode &status)
361*0e209d39SAndroid Build Coastguard Worker {
362*0e209d39SAndroid Build Coastguard Worker if(U_FAILURE(status)) {
363*0e209d39SAndroid Build Coastguard Worker return;
364*0e209d39SAndroid Build Coastguard Worker }
365*0e209d39SAndroid Build Coastguard Worker
366*0e209d39SAndroid Build Coastguard Worker // set up the conversion parameters
367*0e209d39SAndroid Build Coastguard Worker const char *mySource = codepageData;
368*0e209d39SAndroid Build Coastguard Worker const char *mySourceEnd = mySource + dataLength;
369*0e209d39SAndroid Build Coastguard Worker char16_t *array, *myTarget;
370*0e209d39SAndroid Build Coastguard Worker
371*0e209d39SAndroid Build Coastguard Worker // estimate the size needed:
372*0e209d39SAndroid Build Coastguard Worker int32_t arraySize;
373*0e209d39SAndroid Build Coastguard Worker if(dataLength <= US_STACKBUF_SIZE) {
374*0e209d39SAndroid Build Coastguard Worker // try to use the stack buffer
375*0e209d39SAndroid Build Coastguard Worker arraySize = US_STACKBUF_SIZE;
376*0e209d39SAndroid Build Coastguard Worker } else {
377*0e209d39SAndroid Build Coastguard Worker // 1.25 char16_t's per source byte should cover most cases
378*0e209d39SAndroid Build Coastguard Worker arraySize = dataLength + (dataLength >> 2);
379*0e209d39SAndroid Build Coastguard Worker }
380*0e209d39SAndroid Build Coastguard Worker
381*0e209d39SAndroid Build Coastguard Worker // we do not care about the current contents
382*0e209d39SAndroid Build Coastguard Worker UBool doCopyArray = false;
383*0e209d39SAndroid Build Coastguard Worker for(;;) {
384*0e209d39SAndroid Build Coastguard Worker if(!cloneArrayIfNeeded(arraySize, arraySize, doCopyArray)) {
385*0e209d39SAndroid Build Coastguard Worker setToBogus();
386*0e209d39SAndroid Build Coastguard Worker break;
387*0e209d39SAndroid Build Coastguard Worker }
388*0e209d39SAndroid Build Coastguard Worker
389*0e209d39SAndroid Build Coastguard Worker // perform the conversion
390*0e209d39SAndroid Build Coastguard Worker array = getArrayStart();
391*0e209d39SAndroid Build Coastguard Worker myTarget = array + length();
392*0e209d39SAndroid Build Coastguard Worker ucnv_toUnicode(converter, &myTarget, array + getCapacity(),
393*0e209d39SAndroid Build Coastguard Worker &mySource, mySourceEnd, nullptr, true, &status);
394*0e209d39SAndroid Build Coastguard Worker
395*0e209d39SAndroid Build Coastguard Worker // update the conversion parameters
396*0e209d39SAndroid Build Coastguard Worker setLength((int32_t)(myTarget - array));
397*0e209d39SAndroid Build Coastguard Worker
398*0e209d39SAndroid Build Coastguard Worker // allocate more space and copy data, if needed
399*0e209d39SAndroid Build Coastguard Worker if(status == U_BUFFER_OVERFLOW_ERROR) {
400*0e209d39SAndroid Build Coastguard Worker // reset the error code
401*0e209d39SAndroid Build Coastguard Worker status = U_ZERO_ERROR;
402*0e209d39SAndroid Build Coastguard Worker
403*0e209d39SAndroid Build Coastguard Worker // keep the previous conversion results
404*0e209d39SAndroid Build Coastguard Worker doCopyArray = true;
405*0e209d39SAndroid Build Coastguard Worker
406*0e209d39SAndroid Build Coastguard Worker // estimate the new size needed, larger than before
407*0e209d39SAndroid Build Coastguard Worker // try 2 char16_t's per remaining source byte
408*0e209d39SAndroid Build Coastguard Worker arraySize = (int32_t)(length() + 2 * (mySourceEnd - mySource));
409*0e209d39SAndroid Build Coastguard Worker } else {
410*0e209d39SAndroid Build Coastguard Worker break;
411*0e209d39SAndroid Build Coastguard Worker }
412*0e209d39SAndroid Build Coastguard Worker }
413*0e209d39SAndroid Build Coastguard Worker }
414*0e209d39SAndroid Build Coastguard Worker
415*0e209d39SAndroid Build Coastguard Worker U_NAMESPACE_END
416*0e209d39SAndroid Build Coastguard Worker
417*0e209d39SAndroid Build Coastguard Worker #endif
418