1 // Copyright 2006 Google LLC
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 // * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 // * Neither the name of Google LLC nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29 #ifdef HAVE_CONFIG_H
30 #include <config.h> // Must come first
31 #endif
32
33 #include <string.h>
34
35 #include "common/convert_UTF.h"
36 #include "common/scoped_ptr.h"
37 #include "common/string_conversion.h"
38 #include "common/using_std_string.h"
39
40 namespace google_breakpad {
41
42 using std::vector;
43
UTF8ToUTF16(const char * in,vector<uint16_t> * out)44 void UTF8ToUTF16(const char* in, vector<uint16_t>* out) {
45 size_t source_length = strlen(in);
46 const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
47 const UTF8* source_end_ptr = source_ptr + source_length;
48 // Erase the contents and zero fill to the expected size
49 out->clear();
50 out->insert(out->begin(), source_length, 0);
51 uint16_t* target_ptr = &(*out)[0];
52 uint16_t* target_end_ptr = target_ptr + out->capacity();
53 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
54 &target_ptr, target_end_ptr,
55 strictConversion);
56
57 // Resize to be the size of the # of converted characters + NULL
58 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
59 }
60
UTF8ToUTF16Char(const char * in,int in_length,uint16_t out[2])61 int UTF8ToUTF16Char(const char* in, int in_length, uint16_t out[2]) {
62 const UTF8* source_ptr = reinterpret_cast<const UTF8*>(in);
63 const UTF8* source_end_ptr = source_ptr + 1;
64 uint16_t* target_ptr = out;
65 uint16_t* target_end_ptr = target_ptr + 2;
66 out[0] = out[1] = 0;
67
68 // Process one character at a time
69 while (1) {
70 ConversionResult result = ConvertUTF8toUTF16(&source_ptr, source_end_ptr,
71 &target_ptr, target_end_ptr,
72 strictConversion);
73
74 if (result == conversionOK)
75 return static_cast<int>(source_ptr - reinterpret_cast<const UTF8*>(in));
76
77 // Add another character to the input stream and try again
78 source_ptr = reinterpret_cast<const UTF8*>(in);
79 ++source_end_ptr;
80
81 if (source_end_ptr > reinterpret_cast<const UTF8*>(in) + in_length)
82 break;
83 }
84
85 return 0;
86 }
87
UTF32ToUTF16(const wchar_t * in,vector<uint16_t> * out)88 void UTF32ToUTF16(const wchar_t* in, vector<uint16_t>* out) {
89 size_t source_length = wcslen(in);
90 const UTF32* source_ptr = reinterpret_cast<const UTF32*>(in);
91 const UTF32* source_end_ptr = source_ptr + source_length;
92 // Erase the contents and zero fill to the expected size
93 out->clear();
94 out->insert(out->begin(), source_length, 0);
95 uint16_t* target_ptr = &(*out)[0];
96 uint16_t* target_end_ptr = target_ptr + out->capacity();
97 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
98 &target_ptr, target_end_ptr,
99 strictConversion);
100
101 // Resize to be the size of the # of converted characters + NULL
102 out->resize(result == conversionOK ? target_ptr - &(*out)[0] + 1: 0);
103 }
104
UTF32ToUTF16Char(wchar_t in,uint16_t out[2])105 void UTF32ToUTF16Char(wchar_t in, uint16_t out[2]) {
106 const UTF32* source_ptr = reinterpret_cast<const UTF32*>(&in);
107 const UTF32* source_end_ptr = source_ptr + 1;
108 uint16_t* target_ptr = out;
109 uint16_t* target_end_ptr = target_ptr + 2;
110 out[0] = out[1] = 0;
111 ConversionResult result = ConvertUTF32toUTF16(&source_ptr, source_end_ptr,
112 &target_ptr, target_end_ptr,
113 strictConversion);
114
115 if (result != conversionOK) {
116 out[0] = out[1] = 0;
117 }
118 }
119
Swap(uint16_t value)120 static inline uint16_t Swap(uint16_t value) {
121 return (value >> 8) | static_cast<uint16_t>(value << 8);
122 }
123
UTF16ToUTF8(const vector<uint16_t> & in,bool swap)124 string UTF16ToUTF8(const vector<uint16_t>& in, bool swap) {
125 const UTF16* source_ptr = &in[0];
126 scoped_array<uint16_t> source_buffer;
127
128 // If we're to swap, we need to make a local copy and swap each byte pair
129 if (swap) {
130 int idx = 0;
131 source_buffer.reset(new uint16_t[in.size()]);
132 UTF16* source_buffer_ptr = source_buffer.get();
133 for (vector<uint16_t>::const_iterator it = in.begin();
134 it != in.end(); ++it, ++idx)
135 source_buffer_ptr[idx] = Swap(*it);
136
137 source_ptr = source_buffer.get();
138 }
139
140 // The maximum expansion would be 4x the size of the input string.
141 const UTF16* source_end_ptr = source_ptr + in.size();
142 size_t target_capacity = in.size() * 4;
143 scoped_array<UTF8> target_buffer(new UTF8[target_capacity]);
144 UTF8* target_ptr = target_buffer.get();
145 UTF8* target_end_ptr = target_ptr + target_capacity;
146 ConversionResult result = ConvertUTF16toUTF8(&source_ptr, source_end_ptr,
147 &target_ptr, target_end_ptr,
148 strictConversion);
149
150 if (result == conversionOK) {
151 const char* targetPtr = reinterpret_cast<const char*>(target_buffer.get());
152 return targetPtr;
153 }
154
155 return "";
156 }
157
158 } // namespace google_breakpad
159