xref: /aosp_15_r20/external/google-breakpad/src/common/string_conversion.cc (revision 9712c20fc9bbfbac4935993a2ca0b3958c5adad2)
1 // Copyright 2006 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>  // Must come first
31 #endif
32 
33 #include <string.h>
34 
35 #include "common/convert_UTF.h"
36 #include "common/scoped_ptr.h"
37 #include "common/string_conversion.h"
38 #include "common/using_std_string.h"
39 
40 namespace google_breakpad {
41 
42 using std::vector;
43 
UTF8ToUTF16(const char * in,vector<uint16_t> * out)44 void UTF8ToUTF16(const char* in, vector<uint16_t>* out) {
45   size_t source_length = strlen(in);
46   const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
47   const UTF8* source_end_ptr = source_ptr + source_length;
48   // Erase the contents and zero fill to the expected size
49   out->clear();
50   out->insert(out->begin(), source_length, 0);
51   uint16_t* target_ptr = &(*out)[0];
52   uint16_t* target_end_ptr = target_ptr + out->capacity();
53   ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
54                                                &target_ptr, target_end_ptr,
55                                                strictConversion);
56 
57   // Resize to be the size of the # of converted characters + NULL
58   out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
59 }
60 
UTF8ToUTF16Char(const char * in,int in_length,uint16_t out[2])61 int UTF8ToUTF16Char(const char* in, int in_length, uint16_t out[2]) {
62   const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
63   const UTF8* source_end_ptr = source_ptr + 1;
64   uint16_t* target_ptr = out;
65   uint16_t* target_end_ptr = target_ptr + 2;
66   out[0] = out[1] = 0;
67 
68   // Process one character at a time
69   while (1) {
70     ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
71                                                  &target_ptr, target_end_ptr,
72                                                  strictConversion);
73 
74     if (result == conversionOK)
75       return static_cast<int>(source_ptr - reinterpret_cast<const UTF8*>(in));
76 
77     // Add another character to the input stream and try again
78     source_ptr = reinterpret_cast<const UTF8*>(in);
79     ++source_end_ptr;
80 
81     if (source_end_ptr > reinterpret_cast<const UTF8*>(in) + in_length)
82       break;
83   }
84 
85   return 0;
86 }
87 
UTF32ToUTF16(const wchar_t * in,vector<uint16_t> * out)88 void UTF32ToUTF16(const wchar_t* in, vector<uint16_t>* out) {
89   size_t source_length = wcslen(in);
90   const UTF32* source_ptr = reinterpret_cast<const UTF32*>(in);
91   const UTF32* source_end_ptr = source_ptr + source_length;
92   // Erase the contents and zero fill to the expected size
93   out->clear();
94   out->insert(out->begin(), source_length, 0);
95   uint16_t* target_ptr = &(*out)[0];
96   uint16_t* target_end_ptr = target_ptr + out->capacity();
97   ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
98                                                 &target_ptr, target_end_ptr,
99                                                 strictConversion);
100 
101   // Resize to be the size of the # of converted characters + NULL
102   out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
103 }
104 
UTF32ToUTF16Char(wchar_t in,uint16_t out[2])105 void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) {
106   const UTF32* source_ptr = reinterpret_cast<const UTF32*>(&in);
107   const UTF32* source_end_ptr = source_ptr + 1;
108   uint16_t* target_ptr = out;
109   uint16_t* target_end_ptr = target_ptr + 2;
110   out[0] = out[1] = 0;
111   ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
112                                                 &target_ptr, target_end_ptr,
113                                                 strictConversion);
114 
115   if (result != conversionOK) {
116     out[0] = out[1] = 0;
117   }
118 }
119 
Swap(uint16_t value)120 static inline uint16_t Swap(uint16_t value) {
121   return (value >> 8) | static_cast<uint16_t>(value << 8);
122 }
123 
UTF16ToUTF8(const vector<uint16_t> & in,bool swap)124 string UTF16ToUTF8(const vector<uint16_t>& in, bool swap) {
125   const UTF16* source_ptr = &in[0];
126   scoped_array<uint16_t> source_buffer;
127 
128   // If we're to swap, we need to make a local copy and swap each byte pair
129   if (swap) {
130     int idx = 0;
131     source_buffer.reset(new uint16_t[in.size()]);
132     UTF16* source_buffer_ptr = source_buffer.get();
133     for (vector<uint16_t>::const_iterator it = in.begin();
134          it != in.end(); ++it, ++idx)
135       source_buffer_ptr[idx] = Swap(*it);
136 
137     source_ptr = source_buffer.get();
138   }
139 
140   // The maximum expansion would be 4x the size of the input string.
141   const UTF16* source_end_ptr = source_ptr + in.size();
142   size_t target_capacity = in.size() * 4;
143   scoped_array<UTF8> target_buffer(new UTF8[target_capacity]);
144   UTF8* target_ptr = target_buffer.get();
145   UTF8* target_end_ptr = target_ptr + target_capacity;
146   ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr,
147                                                &target_ptr, target_end_ptr,
148                                                strictConversion);
149 
150   if (result == conversionOK) {
151     const char* targetPtr = reinterpret_cast<const char*>(target_buffer.get());
152     return targetPtr;
153   }
154 
155   return "";
156 }
157 
158 }  // namespace google_breakpad
159