xref: /aosp_15_r20/external/google-breakpad/src/common/string_conversion.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1*9712c20fSFrederick Mayle // Copyright 2006 Google LLC
2*9712c20fSFrederick Mayle //
3*9712c20fSFrederick Mayle // Redistribution and use in source and binary forms, with or without
4*9712c20fSFrederick Mayle // modification, are permitted provided that the following conditions are
5*9712c20fSFrederick Mayle // met:
6*9712c20fSFrederick Mayle //
7*9712c20fSFrederick Mayle //     * Redistributions of source code must retain the above copyright
8*9712c20fSFrederick Mayle // notice, this list of conditions and the following disclaimer.
9*9712c20fSFrederick Mayle //     * Redistributions in binary form must reproduce the above
10*9712c20fSFrederick Mayle // copyright notice, this list of conditions and the following disclaimer
11*9712c20fSFrederick Mayle // in the documentation and/or other materials provided with the
12*9712c20fSFrederick Mayle // distribution.
13*9712c20fSFrederick Mayle //     * Neither the name of Google LLC nor the names of its
14*9712c20fSFrederick Mayle // contributors may be used to endorse or promote products derived from
15*9712c20fSFrederick Mayle // this software without specific prior written permission.
16*9712c20fSFrederick Mayle //
17*9712c20fSFrederick Mayle // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18*9712c20fSFrederick Mayle // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19*9712c20fSFrederick Mayle // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20*9712c20fSFrederick Mayle // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21*9712c20fSFrederick Mayle // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22*9712c20fSFrederick Mayle // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23*9712c20fSFrederick Mayle // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24*9712c20fSFrederick Mayle // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25*9712c20fSFrederick Mayle // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26*9712c20fSFrederick Mayle // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27*9712c20fSFrederick Mayle // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28*9712c20fSFrederick Mayle 
29*9712c20fSFrederick Mayle #ifdef HAVE_CONFIG_H
30*9712c20fSFrederick Mayle #include <config.h>  // Must come first
31*9712c20fSFrederick Mayle #endif
32*9712c20fSFrederick Mayle 
33*9712c20fSFrederick Mayle #include <string.h>
34*9712c20fSFrederick Mayle 
35*9712c20fSFrederick Mayle #include "common/convert_UTF.h"
36*9712c20fSFrederick Mayle #include "common/scoped_ptr.h"
37*9712c20fSFrederick Mayle #include "common/string_conversion.h"
38*9712c20fSFrederick Mayle #include "common/using_std_string.h"
39*9712c20fSFrederick Mayle 
40*9712c20fSFrederick Mayle namespace google_breakpad {
41*9712c20fSFrederick Mayle 
42*9712c20fSFrederick Mayle using std::vector;
43*9712c20fSFrederick Mayle 
UTF8ToUTF16(const char * in,vector<uint16_t> * out)44*9712c20fSFrederick Mayle void UTF8ToUTF16(const char* in, vector<uint16_t>* out) {
45*9712c20fSFrederick Mayle   size_t source_length = strlen(in);
46*9712c20fSFrederick Mayle   const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
47*9712c20fSFrederick Mayle   const UTF8* source_end_ptr = source_ptr + source_length;
48*9712c20fSFrederick Mayle   // Erase the contents and zero fill to the expected size
49*9712c20fSFrederick Mayle   out->clear();
50*9712c20fSFrederick Mayle   out->insert(out->begin(), source_length, 0);
51*9712c20fSFrederick Mayle   uint16_t* target_ptr = &(*out)[0];
52*9712c20fSFrederick Mayle   uint16_t* target_end_ptr = target_ptr + out->capacity();
53*9712c20fSFrederick Mayle   ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
54*9712c20fSFrederick Mayle                                                &target_ptr, target_end_ptr,
55*9712c20fSFrederick Mayle                                                strictConversion);
56*9712c20fSFrederick Mayle 
57*9712c20fSFrederick Mayle   // Resize to be the size of the # of converted characters + NULL
58*9712c20fSFrederick Mayle   out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
59*9712c20fSFrederick Mayle }
60*9712c20fSFrederick Mayle 
UTF8ToUTF16Char(const char * in,int in_length,uint16_t out[2])61*9712c20fSFrederick Mayle int UTF8ToUTF16Char(const char* in, int in_length, uint16_t out[2]) {
62*9712c20fSFrederick Mayle   const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
63*9712c20fSFrederick Mayle   const UTF8* source_end_ptr = source_ptr + 1;
64*9712c20fSFrederick Mayle   uint16_t* target_ptr = out;
65*9712c20fSFrederick Mayle   uint16_t* target_end_ptr = target_ptr + 2;
66*9712c20fSFrederick Mayle   out[0] = out[1] = 0;
67*9712c20fSFrederick Mayle 
68*9712c20fSFrederick Mayle   // Process one character at a time
69*9712c20fSFrederick Mayle   while (1) {
70*9712c20fSFrederick Mayle     ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
71*9712c20fSFrederick Mayle                                                  &target_ptr, target_end_ptr,
72*9712c20fSFrederick Mayle                                                  strictConversion);
73*9712c20fSFrederick Mayle 
74*9712c20fSFrederick Mayle     if (result == conversionOK)
75*9712c20fSFrederick Mayle       return static_cast<int>(source_ptr - reinterpret_cast<const UTF8*>(in));
76*9712c20fSFrederick Mayle 
77*9712c20fSFrederick Mayle     // Add another character to the input stream and try again
78*9712c20fSFrederick Mayle     source_ptr = reinterpret_cast<const UTF8*>(in);
79*9712c20fSFrederick Mayle     ++source_end_ptr;
80*9712c20fSFrederick Mayle 
81*9712c20fSFrederick Mayle     if (source_end_ptr > reinterpret_cast<const UTF8*>(in) + in_length)
82*9712c20fSFrederick Mayle       break;
83*9712c20fSFrederick Mayle   }
84*9712c20fSFrederick Mayle 
85*9712c20fSFrederick Mayle   return 0;
86*9712c20fSFrederick Mayle }
87*9712c20fSFrederick Mayle 
UTF32ToUTF16(const wchar_t * in,vector<uint16_t> * out)88*9712c20fSFrederick Mayle void UTF32ToUTF16(const wchar_t* in, vector<uint16_t>* out) {
89*9712c20fSFrederick Mayle   size_t source_length = wcslen(in);
90*9712c20fSFrederick Mayle   const UTF32* source_ptr = reinterpret_cast<const UTF32*>(in);
91*9712c20fSFrederick Mayle   const UTF32* source_end_ptr = source_ptr + source_length;
92*9712c20fSFrederick Mayle   // Erase the contents and zero fill to the expected size
93*9712c20fSFrederick Mayle   out->clear();
94*9712c20fSFrederick Mayle   out->insert(out->begin(), source_length, 0);
95*9712c20fSFrederick Mayle   uint16_t* target_ptr = &(*out)[0];
96*9712c20fSFrederick Mayle   uint16_t* target_end_ptr = target_ptr + out->capacity();
97*9712c20fSFrederick Mayle   ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
98*9712c20fSFrederick Mayle                                                 &target_ptr, target_end_ptr,
99*9712c20fSFrederick Mayle                                                 strictConversion);
100*9712c20fSFrederick Mayle 
101*9712c20fSFrederick Mayle   // Resize to be the size of the # of converted characters + NULL
102*9712c20fSFrederick Mayle   out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
103*9712c20fSFrederick Mayle }
104*9712c20fSFrederick Mayle 
UTF32ToUTF16Char(wchar_t in,uint16_t out[2])105*9712c20fSFrederick Mayle void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) {
106*9712c20fSFrederick Mayle   const UTF32* source_ptr = reinterpret_cast<const UTF32*>(&in);
107*9712c20fSFrederick Mayle   const UTF32* source_end_ptr = source_ptr + 1;
108*9712c20fSFrederick Mayle   uint16_t* target_ptr = out;
109*9712c20fSFrederick Mayle   uint16_t* target_end_ptr = target_ptr + 2;
110*9712c20fSFrederick Mayle   out[0] = out[1] = 0;
111*9712c20fSFrederick Mayle   ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
112*9712c20fSFrederick Mayle                                                 &target_ptr, target_end_ptr,
113*9712c20fSFrederick Mayle                                                 strictConversion);
114*9712c20fSFrederick Mayle 
115*9712c20fSFrederick Mayle   if (result != conversionOK) {
116*9712c20fSFrederick Mayle     out[0] = out[1] = 0;
117*9712c20fSFrederick Mayle   }
118*9712c20fSFrederick Mayle }
119*9712c20fSFrederick Mayle 
Swap(uint16_t value)120*9712c20fSFrederick Mayle static inline uint16_t Swap(uint16_t value) {
121*9712c20fSFrederick Mayle   return (value >> 8) | static_cast<uint16_t>(value << 8);
122*9712c20fSFrederick Mayle }
123*9712c20fSFrederick Mayle 
UTF16ToUTF8(const vector<uint16_t> & in,bool swap)124*9712c20fSFrederick Mayle string UTF16ToUTF8(const vector<uint16_t>& in, bool swap) {
125*9712c20fSFrederick Mayle   const UTF16* source_ptr = &in[0];
126*9712c20fSFrederick Mayle   scoped_array<uint16_t> source_buffer;
127*9712c20fSFrederick Mayle 
128*9712c20fSFrederick Mayle   // If we're to swap, we need to make a local copy and swap each byte pair
129*9712c20fSFrederick Mayle   if (swap) {
130*9712c20fSFrederick Mayle     int idx = 0;
131*9712c20fSFrederick Mayle     source_buffer.reset(new uint16_t[in.size()]);
132*9712c20fSFrederick Mayle     UTF16* source_buffer_ptr = source_buffer.get();
133*9712c20fSFrederick Mayle     for (vector<uint16_t>::const_iterator it = in.begin();
134*9712c20fSFrederick Mayle          it != in.end(); ++it, ++idx)
135*9712c20fSFrederick Mayle       source_buffer_ptr[idx] = Swap(*it);
136*9712c20fSFrederick Mayle 
137*9712c20fSFrederick Mayle     source_ptr = source_buffer.get();
138*9712c20fSFrederick Mayle   }
139*9712c20fSFrederick Mayle 
140*9712c20fSFrederick Mayle   // The maximum expansion would be 4x the size of the input string.
141*9712c20fSFrederick Mayle   const UTF16* source_end_ptr = source_ptr + in.size();
142*9712c20fSFrederick Mayle   size_t target_capacity = in.size() * 4;
143*9712c20fSFrederick Mayle   scoped_array<UTF8> target_buffer(new UTF8[target_capacity]);
144*9712c20fSFrederick Mayle   UTF8* target_ptr = target_buffer.get();
145*9712c20fSFrederick Mayle   UTF8* target_end_ptr = target_ptr + target_capacity;
146*9712c20fSFrederick Mayle   ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr,
147*9712c20fSFrederick Mayle                                                &target_ptr, target_end_ptr,
148*9712c20fSFrederick Mayle                                                strictConversion);
149*9712c20fSFrederick Mayle 
150*9712c20fSFrederick Mayle   if (result == conversionOK) {
151*9712c20fSFrederick Mayle     const char* targetPtr = reinterpret_cast<const char*>(target_buffer.get());
152*9712c20fSFrederick Mayle     return targetPtr;
153*9712c20fSFrederick Mayle   }
154*9712c20fSFrederick Mayle 
155*9712c20fSFrederick Mayle   return "";
156*9712c20fSFrederick Mayle }
157*9712c20fSFrederick Mayle 
158*9712c20fSFrederick Mayle }  // namespace google_breakpad
159