xref: /aosp_15_r20/external/abseil-cpp/absl/strings/escaping.cc (revision 9356374a3709195abf420251b3e825997ff56c0f)
1*9356374aSAndroid Build Coastguard Worker // Copyright 2017 The Abseil Authors.
2*9356374aSAndroid Build Coastguard Worker //
3*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*9356374aSAndroid Build Coastguard Worker //
7*9356374aSAndroid Build Coastguard Worker //      https://www.apache.org/licenses/LICENSE-2.0
8*9356374aSAndroid Build Coastguard Worker //
9*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*9356374aSAndroid Build Coastguard Worker // limitations under the License.
14*9356374aSAndroid Build Coastguard Worker 
15*9356374aSAndroid Build Coastguard Worker #include "absl/strings/escaping.h"
16*9356374aSAndroid Build Coastguard Worker 
17*9356374aSAndroid Build Coastguard Worker #include <algorithm>
18*9356374aSAndroid Build Coastguard Worker #include <cassert>
19*9356374aSAndroid Build Coastguard Worker #include <cstddef>
20*9356374aSAndroid Build Coastguard Worker #include <cstdint>
21*9356374aSAndroid Build Coastguard Worker #include <cstring>
22*9356374aSAndroid Build Coastguard Worker #include <limits>
23*9356374aSAndroid Build Coastguard Worker #include <string>
24*9356374aSAndroid Build Coastguard Worker #include <utility>
25*9356374aSAndroid Build Coastguard Worker 
26*9356374aSAndroid Build Coastguard Worker #include "absl/base/config.h"
27*9356374aSAndroid Build Coastguard Worker #include "absl/base/internal/raw_logging.h"
28*9356374aSAndroid Build Coastguard Worker #include "absl/base/internal/unaligned_access.h"
29*9356374aSAndroid Build Coastguard Worker #include "absl/base/nullability.h"
30*9356374aSAndroid Build Coastguard Worker #include "absl/strings/ascii.h"
31*9356374aSAndroid Build Coastguard Worker #include "absl/strings/charset.h"
32*9356374aSAndroid Build Coastguard Worker #include "absl/strings/internal/escaping.h"
33*9356374aSAndroid Build Coastguard Worker #include "absl/strings/internal/resize_uninitialized.h"
34*9356374aSAndroid Build Coastguard Worker #include "absl/strings/internal/utf8.h"
35*9356374aSAndroid Build Coastguard Worker #include "absl/strings/numbers.h"
36*9356374aSAndroid Build Coastguard Worker #include "absl/strings/str_cat.h"
37*9356374aSAndroid Build Coastguard Worker #include "absl/strings/string_view.h"
38*9356374aSAndroid Build Coastguard Worker 
39*9356374aSAndroid Build Coastguard Worker namespace absl {
40*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_BEGIN
41*9356374aSAndroid Build Coastguard Worker namespace {
42*9356374aSAndroid Build Coastguard Worker 
43*9356374aSAndroid Build Coastguard Worker // These are used for the leave_nulls_escaped argument to CUnescapeInternal().
44*9356374aSAndroid Build Coastguard Worker constexpr bool kUnescapeNulls = false;
45*9356374aSAndroid Build Coastguard Worker 
is_octal_digit(char c)46*9356374aSAndroid Build Coastguard Worker inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); }
47*9356374aSAndroid Build Coastguard Worker 
hex_digit_to_int(char c)48*9356374aSAndroid Build Coastguard Worker inline unsigned int hex_digit_to_int(char c) {
49*9356374aSAndroid Build Coastguard Worker   static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61,
50*9356374aSAndroid Build Coastguard Worker                 "Character set must be ASCII.");
51*9356374aSAndroid Build Coastguard Worker   assert(absl::ascii_isxdigit(static_cast<unsigned char>(c)));
52*9356374aSAndroid Build Coastguard Worker   unsigned int x = static_cast<unsigned char>(c);
53*9356374aSAndroid Build Coastguard Worker   if (x > '9') {
54*9356374aSAndroid Build Coastguard Worker     x += 9;
55*9356374aSAndroid Build Coastguard Worker   }
56*9356374aSAndroid Build Coastguard Worker   return x & 0xf;
57*9356374aSAndroid Build Coastguard Worker }
58*9356374aSAndroid Build Coastguard Worker 
IsSurrogate(char32_t c,absl::string_view src,absl::Nullable<std::string * > error)59*9356374aSAndroid Build Coastguard Worker inline bool IsSurrogate(char32_t c, absl::string_view src,
60*9356374aSAndroid Build Coastguard Worker                         absl::Nullable<std::string*> error) {
61*9356374aSAndroid Build Coastguard Worker   if (c >= 0xD800 && c <= 0xDFFF) {
62*9356374aSAndroid Build Coastguard Worker     if (error) {
63*9356374aSAndroid Build Coastguard Worker       *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
64*9356374aSAndroid Build Coastguard Worker                             src);
65*9356374aSAndroid Build Coastguard Worker     }
66*9356374aSAndroid Build Coastguard Worker     return true;
67*9356374aSAndroid Build Coastguard Worker   }
68*9356374aSAndroid Build Coastguard Worker   return false;
69*9356374aSAndroid Build Coastguard Worker }
70*9356374aSAndroid Build Coastguard Worker 
71*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
72*9356374aSAndroid Build Coastguard Worker // CUnescapeInternal()
73*9356374aSAndroid Build Coastguard Worker //    Implements both CUnescape() and CUnescapeForNullTerminatedString().
74*9356374aSAndroid Build Coastguard Worker //
75*9356374aSAndroid Build Coastguard Worker //    Unescapes C escape sequences and is the reverse of CEscape().
76*9356374aSAndroid Build Coastguard Worker //
77*9356374aSAndroid Build Coastguard Worker //    If 'source' is valid, stores the unescaped string and its size in
78*9356374aSAndroid Build Coastguard Worker //    'dest' and 'dest_len' respectively, and returns true. Otherwise
79*9356374aSAndroid Build Coastguard Worker //    returns false and optionally stores the error description in
80*9356374aSAndroid Build Coastguard Worker //    'error'. Set 'error' to nullptr to disable error reporting.
81*9356374aSAndroid Build Coastguard Worker //
82*9356374aSAndroid Build Coastguard Worker //    'dest' should point to a buffer that is at least as big as 'source'.
83*9356374aSAndroid Build Coastguard Worker //    'source' and 'dest' may be the same.
84*9356374aSAndroid Build Coastguard Worker //
85*9356374aSAndroid Build Coastguard Worker //     NOTE: any changes to this function must also be reflected in the older
86*9356374aSAndroid Build Coastguard Worker //     UnescapeCEscapeSequences().
87*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
CUnescapeInternal(absl::string_view source,bool leave_nulls_escaped,absl::Nonnull<char * > dest,absl::Nonnull<ptrdiff_t * > dest_len,absl::Nullable<std::string * > error)88*9356374aSAndroid Build Coastguard Worker bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
89*9356374aSAndroid Build Coastguard Worker                        absl::Nonnull<char*> dest,
90*9356374aSAndroid Build Coastguard Worker                        absl::Nonnull<ptrdiff_t*> dest_len,
91*9356374aSAndroid Build Coastguard Worker                        absl::Nullable<std::string*> error) {
92*9356374aSAndroid Build Coastguard Worker   char* d = dest;
93*9356374aSAndroid Build Coastguard Worker   const char* p = source.data();
94*9356374aSAndroid Build Coastguard Worker   const char* end = p + source.size();
95*9356374aSAndroid Build Coastguard Worker   const char* last_byte = end - 1;
96*9356374aSAndroid Build Coastguard Worker 
97*9356374aSAndroid Build Coastguard Worker   // Small optimization for case where source = dest and there's no escaping
98*9356374aSAndroid Build Coastguard Worker   while (p == d && p < end && *p != '\\') p++, d++;
99*9356374aSAndroid Build Coastguard Worker 
100*9356374aSAndroid Build Coastguard Worker   while (p < end) {
101*9356374aSAndroid Build Coastguard Worker     if (*p != '\\') {
102*9356374aSAndroid Build Coastguard Worker       *d++ = *p++;
103*9356374aSAndroid Build Coastguard Worker     } else {
104*9356374aSAndroid Build Coastguard Worker       if (++p > last_byte) {  // skip past the '\\'
105*9356374aSAndroid Build Coastguard Worker         if (error) *error = "String cannot end with \\";
106*9356374aSAndroid Build Coastguard Worker         return false;
107*9356374aSAndroid Build Coastguard Worker       }
108*9356374aSAndroid Build Coastguard Worker       switch (*p) {
109*9356374aSAndroid Build Coastguard Worker         case 'a':  *d++ = '\a';  break;
110*9356374aSAndroid Build Coastguard Worker         case 'b':  *d++ = '\b';  break;
111*9356374aSAndroid Build Coastguard Worker         case 'f':  *d++ = '\f';  break;
112*9356374aSAndroid Build Coastguard Worker         case 'n':  *d++ = '\n';  break;
113*9356374aSAndroid Build Coastguard Worker         case 'r':  *d++ = '\r';  break;
114*9356374aSAndroid Build Coastguard Worker         case 't':  *d++ = '\t';  break;
115*9356374aSAndroid Build Coastguard Worker         case 'v':  *d++ = '\v';  break;
116*9356374aSAndroid Build Coastguard Worker         case '\\': *d++ = '\\';  break;
117*9356374aSAndroid Build Coastguard Worker         case '?':  *d++ = '\?';  break;    // \?  Who knew?
118*9356374aSAndroid Build Coastguard Worker         case '\'': *d++ = '\'';  break;
119*9356374aSAndroid Build Coastguard Worker         case '"':  *d++ = '\"';  break;
120*9356374aSAndroid Build Coastguard Worker         case '0':
121*9356374aSAndroid Build Coastguard Worker         case '1':
122*9356374aSAndroid Build Coastguard Worker         case '2':
123*9356374aSAndroid Build Coastguard Worker         case '3':
124*9356374aSAndroid Build Coastguard Worker         case '4':
125*9356374aSAndroid Build Coastguard Worker         case '5':
126*9356374aSAndroid Build Coastguard Worker         case '6':
127*9356374aSAndroid Build Coastguard Worker         case '7': {
128*9356374aSAndroid Build Coastguard Worker           // octal digit: 1 to 3 digits
129*9356374aSAndroid Build Coastguard Worker           const char* octal_start = p;
130*9356374aSAndroid Build Coastguard Worker           unsigned int ch = static_cast<unsigned int>(*p - '0');  // digit 1
131*9356374aSAndroid Build Coastguard Worker           if (p < last_byte && is_octal_digit(p[1]))
132*9356374aSAndroid Build Coastguard Worker             ch = ch * 8 + static_cast<unsigned int>(*++p - '0');  // digit 2
133*9356374aSAndroid Build Coastguard Worker           if (p < last_byte && is_octal_digit(p[1]))
134*9356374aSAndroid Build Coastguard Worker             ch = ch * 8 + static_cast<unsigned int>(*++p - '0');  // digit 3
135*9356374aSAndroid Build Coastguard Worker           if (ch > 0xff) {
136*9356374aSAndroid Build Coastguard Worker             if (error) {
137*9356374aSAndroid Build Coastguard Worker               *error = "Value of \\" +
138*9356374aSAndroid Build Coastguard Worker                        std::string(octal_start,
139*9356374aSAndroid Build Coastguard Worker                                    static_cast<size_t>(p + 1 - octal_start)) +
140*9356374aSAndroid Build Coastguard Worker                        " exceeds 0xff";
141*9356374aSAndroid Build Coastguard Worker             }
142*9356374aSAndroid Build Coastguard Worker             return false;
143*9356374aSAndroid Build Coastguard Worker           }
144*9356374aSAndroid Build Coastguard Worker           if ((ch == 0) && leave_nulls_escaped) {
145*9356374aSAndroid Build Coastguard Worker             // Copy the escape sequence for the null character
146*9356374aSAndroid Build Coastguard Worker             const size_t octal_size = static_cast<size_t>(p + 1 - octal_start);
147*9356374aSAndroid Build Coastguard Worker             *d++ = '\\';
148*9356374aSAndroid Build Coastguard Worker             memmove(d, octal_start, octal_size);
149*9356374aSAndroid Build Coastguard Worker             d += octal_size;
150*9356374aSAndroid Build Coastguard Worker             break;
151*9356374aSAndroid Build Coastguard Worker           }
152*9356374aSAndroid Build Coastguard Worker           *d++ = static_cast<char>(ch);
153*9356374aSAndroid Build Coastguard Worker           break;
154*9356374aSAndroid Build Coastguard Worker         }
155*9356374aSAndroid Build Coastguard Worker         case 'x':
156*9356374aSAndroid Build Coastguard Worker         case 'X': {
157*9356374aSAndroid Build Coastguard Worker           if (p >= last_byte) {
158*9356374aSAndroid Build Coastguard Worker             if (error) *error = "String cannot end with \\x";
159*9356374aSAndroid Build Coastguard Worker             return false;
160*9356374aSAndroid Build Coastguard Worker           } else if (!absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) {
161*9356374aSAndroid Build Coastguard Worker             if (error) *error = "\\x cannot be followed by a non-hex digit";
162*9356374aSAndroid Build Coastguard Worker             return false;
163*9356374aSAndroid Build Coastguard Worker           }
164*9356374aSAndroid Build Coastguard Worker           unsigned int ch = 0;
165*9356374aSAndroid Build Coastguard Worker           const char* hex_start = p;
166*9356374aSAndroid Build Coastguard Worker           while (p < last_byte &&
167*9356374aSAndroid Build Coastguard Worker                  absl::ascii_isxdigit(static_cast<unsigned char>(p[1])))
168*9356374aSAndroid Build Coastguard Worker             // Arbitrarily many hex digits
169*9356374aSAndroid Build Coastguard Worker             ch = (ch << 4) + hex_digit_to_int(*++p);
170*9356374aSAndroid Build Coastguard Worker           if (ch > 0xFF) {
171*9356374aSAndroid Build Coastguard Worker             if (error) {
172*9356374aSAndroid Build Coastguard Worker               *error = "Value of \\" +
173*9356374aSAndroid Build Coastguard Worker                        std::string(hex_start,
174*9356374aSAndroid Build Coastguard Worker                                    static_cast<size_t>(p + 1 - hex_start)) +
175*9356374aSAndroid Build Coastguard Worker                        " exceeds 0xff";
176*9356374aSAndroid Build Coastguard Worker             }
177*9356374aSAndroid Build Coastguard Worker             return false;
178*9356374aSAndroid Build Coastguard Worker           }
179*9356374aSAndroid Build Coastguard Worker           if ((ch == 0) && leave_nulls_escaped) {
180*9356374aSAndroid Build Coastguard Worker             // Copy the escape sequence for the null character
181*9356374aSAndroid Build Coastguard Worker             const size_t hex_size = static_cast<size_t>(p + 1 - hex_start);
182*9356374aSAndroid Build Coastguard Worker             *d++ = '\\';
183*9356374aSAndroid Build Coastguard Worker             memmove(d, hex_start, hex_size);
184*9356374aSAndroid Build Coastguard Worker             d += hex_size;
185*9356374aSAndroid Build Coastguard Worker             break;
186*9356374aSAndroid Build Coastguard Worker           }
187*9356374aSAndroid Build Coastguard Worker           *d++ = static_cast<char>(ch);
188*9356374aSAndroid Build Coastguard Worker           break;
189*9356374aSAndroid Build Coastguard Worker         }
190*9356374aSAndroid Build Coastguard Worker         case 'u': {
191*9356374aSAndroid Build Coastguard Worker           // \uhhhh => convert 4 hex digits to UTF-8
192*9356374aSAndroid Build Coastguard Worker           char32_t rune = 0;
193*9356374aSAndroid Build Coastguard Worker           const char* hex_start = p;
194*9356374aSAndroid Build Coastguard Worker           if (p + 4 >= end) {
195*9356374aSAndroid Build Coastguard Worker             if (error) {
196*9356374aSAndroid Build Coastguard Worker               *error = "\\u must be followed by 4 hex digits: \\" +
197*9356374aSAndroid Build Coastguard Worker                        std::string(hex_start,
198*9356374aSAndroid Build Coastguard Worker                                    static_cast<size_t>(p + 1 - hex_start));
199*9356374aSAndroid Build Coastguard Worker             }
200*9356374aSAndroid Build Coastguard Worker             return false;
201*9356374aSAndroid Build Coastguard Worker           }
202*9356374aSAndroid Build Coastguard Worker           for (int i = 0; i < 4; ++i) {
203*9356374aSAndroid Build Coastguard Worker             // Look one char ahead.
204*9356374aSAndroid Build Coastguard Worker             if (absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) {
205*9356374aSAndroid Build Coastguard Worker               rune = (rune << 4) + hex_digit_to_int(*++p);  // Advance p.
206*9356374aSAndroid Build Coastguard Worker             } else {
207*9356374aSAndroid Build Coastguard Worker               if (error) {
208*9356374aSAndroid Build Coastguard Worker                 *error = "\\u must be followed by 4 hex digits: \\" +
209*9356374aSAndroid Build Coastguard Worker                          std::string(hex_start,
210*9356374aSAndroid Build Coastguard Worker                                      static_cast<size_t>(p + 1 - hex_start));
211*9356374aSAndroid Build Coastguard Worker               }
212*9356374aSAndroid Build Coastguard Worker               return false;
213*9356374aSAndroid Build Coastguard Worker             }
214*9356374aSAndroid Build Coastguard Worker           }
215*9356374aSAndroid Build Coastguard Worker           if ((rune == 0) && leave_nulls_escaped) {
216*9356374aSAndroid Build Coastguard Worker             // Copy the escape sequence for the null character
217*9356374aSAndroid Build Coastguard Worker             *d++ = '\\';
218*9356374aSAndroid Build Coastguard Worker             memmove(d, hex_start, 5);  // u0000
219*9356374aSAndroid Build Coastguard Worker             d += 5;
220*9356374aSAndroid Build Coastguard Worker             break;
221*9356374aSAndroid Build Coastguard Worker           }
222*9356374aSAndroid Build Coastguard Worker           if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) {
223*9356374aSAndroid Build Coastguard Worker             return false;
224*9356374aSAndroid Build Coastguard Worker           }
225*9356374aSAndroid Build Coastguard Worker           d += strings_internal::EncodeUTF8Char(d, rune);
226*9356374aSAndroid Build Coastguard Worker           break;
227*9356374aSAndroid Build Coastguard Worker         }
228*9356374aSAndroid Build Coastguard Worker         case 'U': {
229*9356374aSAndroid Build Coastguard Worker           // \Uhhhhhhhh => convert 8 hex digits to UTF-8
230*9356374aSAndroid Build Coastguard Worker           char32_t rune = 0;
231*9356374aSAndroid Build Coastguard Worker           const char* hex_start = p;
232*9356374aSAndroid Build Coastguard Worker           if (p + 8 >= end) {
233*9356374aSAndroid Build Coastguard Worker             if (error) {
234*9356374aSAndroid Build Coastguard Worker               *error = "\\U must be followed by 8 hex digits: \\" +
235*9356374aSAndroid Build Coastguard Worker                        std::string(hex_start,
236*9356374aSAndroid Build Coastguard Worker                                    static_cast<size_t>(p + 1 - hex_start));
237*9356374aSAndroid Build Coastguard Worker             }
238*9356374aSAndroid Build Coastguard Worker             return false;
239*9356374aSAndroid Build Coastguard Worker           }
240*9356374aSAndroid Build Coastguard Worker           for (int i = 0; i < 8; ++i) {
241*9356374aSAndroid Build Coastguard Worker             // Look one char ahead.
242*9356374aSAndroid Build Coastguard Worker             if (absl::ascii_isxdigit(static_cast<unsigned char>(p[1]))) {
243*9356374aSAndroid Build Coastguard Worker               // Don't change rune until we're sure this
244*9356374aSAndroid Build Coastguard Worker               // is within the Unicode limit, but do advance p.
245*9356374aSAndroid Build Coastguard Worker               uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p);
246*9356374aSAndroid Build Coastguard Worker               if (newrune > 0x10FFFF) {
247*9356374aSAndroid Build Coastguard Worker                 if (error) {
248*9356374aSAndroid Build Coastguard Worker                   *error = "Value of \\" +
249*9356374aSAndroid Build Coastguard Worker                            std::string(hex_start,
250*9356374aSAndroid Build Coastguard Worker                                        static_cast<size_t>(p + 1 - hex_start)) +
251*9356374aSAndroid Build Coastguard Worker                            " exceeds Unicode limit (0x10FFFF)";
252*9356374aSAndroid Build Coastguard Worker                 }
253*9356374aSAndroid Build Coastguard Worker                 return false;
254*9356374aSAndroid Build Coastguard Worker               } else {
255*9356374aSAndroid Build Coastguard Worker                 rune = newrune;
256*9356374aSAndroid Build Coastguard Worker               }
257*9356374aSAndroid Build Coastguard Worker             } else {
258*9356374aSAndroid Build Coastguard Worker               if (error) {
259*9356374aSAndroid Build Coastguard Worker                 *error = "\\U must be followed by 8 hex digits: \\" +
260*9356374aSAndroid Build Coastguard Worker                          std::string(hex_start,
261*9356374aSAndroid Build Coastguard Worker                                      static_cast<size_t>(p + 1 - hex_start));
262*9356374aSAndroid Build Coastguard Worker               }
263*9356374aSAndroid Build Coastguard Worker               return false;
264*9356374aSAndroid Build Coastguard Worker             }
265*9356374aSAndroid Build Coastguard Worker           }
266*9356374aSAndroid Build Coastguard Worker           if ((rune == 0) && leave_nulls_escaped) {
267*9356374aSAndroid Build Coastguard Worker             // Copy the escape sequence for the null character
268*9356374aSAndroid Build Coastguard Worker             *d++ = '\\';
269*9356374aSAndroid Build Coastguard Worker             memmove(d, hex_start, 9);  // U00000000
270*9356374aSAndroid Build Coastguard Worker             d += 9;
271*9356374aSAndroid Build Coastguard Worker             break;
272*9356374aSAndroid Build Coastguard Worker           }
273*9356374aSAndroid Build Coastguard Worker           if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) {
274*9356374aSAndroid Build Coastguard Worker             return false;
275*9356374aSAndroid Build Coastguard Worker           }
276*9356374aSAndroid Build Coastguard Worker           d += strings_internal::EncodeUTF8Char(d, rune);
277*9356374aSAndroid Build Coastguard Worker           break;
278*9356374aSAndroid Build Coastguard Worker         }
279*9356374aSAndroid Build Coastguard Worker         default: {
280*9356374aSAndroid Build Coastguard Worker           if (error) *error = std::string("Unknown escape sequence: \\") + *p;
281*9356374aSAndroid Build Coastguard Worker           return false;
282*9356374aSAndroid Build Coastguard Worker         }
283*9356374aSAndroid Build Coastguard Worker       }
284*9356374aSAndroid Build Coastguard Worker       p++;                                 // read past letter we escaped
285*9356374aSAndroid Build Coastguard Worker     }
286*9356374aSAndroid Build Coastguard Worker   }
287*9356374aSAndroid Build Coastguard Worker   *dest_len = d - dest;
288*9356374aSAndroid Build Coastguard Worker   return true;
289*9356374aSAndroid Build Coastguard Worker }
290*9356374aSAndroid Build Coastguard Worker 
291*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
292*9356374aSAndroid Build Coastguard Worker // CUnescapeInternal()
293*9356374aSAndroid Build Coastguard Worker //
294*9356374aSAndroid Build Coastguard Worker //    Same as above but uses a std::string for output. 'source' and 'dest'
295*9356374aSAndroid Build Coastguard Worker //    may be the same.
296*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
CUnescapeInternal(absl::string_view source,bool leave_nulls_escaped,absl::Nonnull<std::string * > dest,absl::Nullable<std::string * > error)297*9356374aSAndroid Build Coastguard Worker bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
298*9356374aSAndroid Build Coastguard Worker                        absl::Nonnull<std::string*> dest,
299*9356374aSAndroid Build Coastguard Worker                        absl::Nullable<std::string*> error) {
300*9356374aSAndroid Build Coastguard Worker   strings_internal::STLStringResizeUninitialized(dest, source.size());
301*9356374aSAndroid Build Coastguard Worker 
302*9356374aSAndroid Build Coastguard Worker   ptrdiff_t dest_size;
303*9356374aSAndroid Build Coastguard Worker   if (!CUnescapeInternal(source,
304*9356374aSAndroid Build Coastguard Worker                          leave_nulls_escaped,
305*9356374aSAndroid Build Coastguard Worker                          &(*dest)[0],
306*9356374aSAndroid Build Coastguard Worker                          &dest_size,
307*9356374aSAndroid Build Coastguard Worker                          error)) {
308*9356374aSAndroid Build Coastguard Worker     return false;
309*9356374aSAndroid Build Coastguard Worker   }
310*9356374aSAndroid Build Coastguard Worker   dest->erase(static_cast<size_t>(dest_size));
311*9356374aSAndroid Build Coastguard Worker   return true;
312*9356374aSAndroid Build Coastguard Worker }
313*9356374aSAndroid Build Coastguard Worker 
314*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
315*9356374aSAndroid Build Coastguard Worker // CEscape()
316*9356374aSAndroid Build Coastguard Worker // CHexEscape()
317*9356374aSAndroid Build Coastguard Worker // Utf8SafeCEscape()
318*9356374aSAndroid Build Coastguard Worker // Utf8SafeCHexEscape()
319*9356374aSAndroid Build Coastguard Worker //    Escapes 'src' using C-style escape sequences.  This is useful for
320*9356374aSAndroid Build Coastguard Worker //    preparing query flags.  The 'Hex' version uses hexadecimal rather than
321*9356374aSAndroid Build Coastguard Worker //    octal sequences.  The 'Utf8Safe' version does not touch UTF-8 bytes.
322*9356374aSAndroid Build Coastguard Worker //
323*9356374aSAndroid Build Coastguard Worker //    Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint().
324*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
CEscapeInternal(absl::string_view src,bool use_hex,bool utf8_safe)325*9356374aSAndroid Build Coastguard Worker std::string CEscapeInternal(absl::string_view src, bool use_hex,
326*9356374aSAndroid Build Coastguard Worker                             bool utf8_safe) {
327*9356374aSAndroid Build Coastguard Worker   std::string dest;
328*9356374aSAndroid Build Coastguard Worker   bool last_hex_escape = false;  // true if last output char was \xNN.
329*9356374aSAndroid Build Coastguard Worker 
330*9356374aSAndroid Build Coastguard Worker   for (char c : src) {
331*9356374aSAndroid Build Coastguard Worker     bool is_hex_escape = false;
332*9356374aSAndroid Build Coastguard Worker     switch (c) {
333*9356374aSAndroid Build Coastguard Worker       case '\n': dest.append("\\" "n"); break;
334*9356374aSAndroid Build Coastguard Worker       case '\r': dest.append("\\" "r"); break;
335*9356374aSAndroid Build Coastguard Worker       case '\t': dest.append("\\" "t"); break;
336*9356374aSAndroid Build Coastguard Worker       case '\"': dest.append("\\" "\""); break;
337*9356374aSAndroid Build Coastguard Worker       case '\'': dest.append("\\" "'"); break;
338*9356374aSAndroid Build Coastguard Worker       case '\\': dest.append("\\" "\\"); break;
339*9356374aSAndroid Build Coastguard Worker       default: {
340*9356374aSAndroid Build Coastguard Worker         // Note that if we emit \xNN and the src character after that is a hex
341*9356374aSAndroid Build Coastguard Worker         // digit then that digit must be escaped too to prevent it being
342*9356374aSAndroid Build Coastguard Worker         // interpreted as part of the character code by C.
343*9356374aSAndroid Build Coastguard Worker         const unsigned char uc = static_cast<unsigned char>(c);
344*9356374aSAndroid Build Coastguard Worker         if ((!utf8_safe || uc < 0x80) &&
345*9356374aSAndroid Build Coastguard Worker             (!absl::ascii_isprint(uc) ||
346*9356374aSAndroid Build Coastguard Worker              (last_hex_escape && absl::ascii_isxdigit(uc)))) {
347*9356374aSAndroid Build Coastguard Worker           if (use_hex) {
348*9356374aSAndroid Build Coastguard Worker             dest.append("\\" "x");
349*9356374aSAndroid Build Coastguard Worker             dest.push_back(numbers_internal::kHexChar[uc / 16]);
350*9356374aSAndroid Build Coastguard Worker             dest.push_back(numbers_internal::kHexChar[uc % 16]);
351*9356374aSAndroid Build Coastguard Worker             is_hex_escape = true;
352*9356374aSAndroid Build Coastguard Worker           } else {
353*9356374aSAndroid Build Coastguard Worker             dest.append("\\");
354*9356374aSAndroid Build Coastguard Worker             dest.push_back(numbers_internal::kHexChar[uc / 64]);
355*9356374aSAndroid Build Coastguard Worker             dest.push_back(numbers_internal::kHexChar[(uc % 64) / 8]);
356*9356374aSAndroid Build Coastguard Worker             dest.push_back(numbers_internal::kHexChar[uc % 8]);
357*9356374aSAndroid Build Coastguard Worker           }
358*9356374aSAndroid Build Coastguard Worker         } else {
359*9356374aSAndroid Build Coastguard Worker           dest.push_back(c);
360*9356374aSAndroid Build Coastguard Worker           break;
361*9356374aSAndroid Build Coastguard Worker         }
362*9356374aSAndroid Build Coastguard Worker       }
363*9356374aSAndroid Build Coastguard Worker     }
364*9356374aSAndroid Build Coastguard Worker     last_hex_escape = is_hex_escape;
365*9356374aSAndroid Build Coastguard Worker   }
366*9356374aSAndroid Build Coastguard Worker 
367*9356374aSAndroid Build Coastguard Worker   return dest;
368*9356374aSAndroid Build Coastguard Worker }
369*9356374aSAndroid Build Coastguard Worker 
370*9356374aSAndroid Build Coastguard Worker /* clang-format off */
371*9356374aSAndroid Build Coastguard Worker constexpr unsigned char kCEscapedLen[256] = {
372*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4,  // \t, \n, \r
373*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
374*9356374aSAndroid Build Coastguard Worker     1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,  // ", '
375*9356374aSAndroid Build Coastguard Worker     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // '0'..'9'
376*9356374aSAndroid Build Coastguard Worker     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'A'..'O'
377*9356374aSAndroid Build Coastguard Worker     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,  // 'P'..'Z', '\'
378*9356374aSAndroid Build Coastguard Worker     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'a'..'o'
379*9356374aSAndroid Build Coastguard Worker     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,  // 'p'..'z', DEL
380*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
381*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
382*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
383*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
384*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
385*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
386*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
387*9356374aSAndroid Build Coastguard Worker     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
388*9356374aSAndroid Build Coastguard Worker };
389*9356374aSAndroid Build Coastguard Worker /* clang-format on */
390*9356374aSAndroid Build Coastguard Worker 
391*9356374aSAndroid Build Coastguard Worker // Calculates the length of the C-style escaped version of 'src'.
392*9356374aSAndroid Build Coastguard Worker // Assumes that non-printable characters are escaped using octal sequences, and
393*9356374aSAndroid Build Coastguard Worker // that UTF-8 bytes are not handled specially.
CEscapedLength(absl::string_view src)394*9356374aSAndroid Build Coastguard Worker inline size_t CEscapedLength(absl::string_view src) {
395*9356374aSAndroid Build Coastguard Worker   size_t escaped_len = 0;
396*9356374aSAndroid Build Coastguard Worker   // The maximum value of kCEscapedLen[x] is 4, so we can escape any string of
397*9356374aSAndroid Build Coastguard Worker   // length size_t_max/4 without checking for overflow.
398*9356374aSAndroid Build Coastguard Worker   size_t unchecked_limit =
399*9356374aSAndroid Build Coastguard Worker       std::min<size_t>(src.size(), std::numeric_limits<size_t>::max() / 4);
400*9356374aSAndroid Build Coastguard Worker   size_t i = 0;
401*9356374aSAndroid Build Coastguard Worker   while (i < unchecked_limit) {
402*9356374aSAndroid Build Coastguard Worker     // Common case: No need to check for overflow.
403*9356374aSAndroid Build Coastguard Worker     escaped_len += kCEscapedLen[static_cast<unsigned char>(src[i++])];
404*9356374aSAndroid Build Coastguard Worker   }
405*9356374aSAndroid Build Coastguard Worker   while (i < src.size()) {
406*9356374aSAndroid Build Coastguard Worker     // Beyond unchecked_limit we need to check for overflow before adding.
407*9356374aSAndroid Build Coastguard Worker     size_t char_len = kCEscapedLen[static_cast<unsigned char>(src[i++])];
408*9356374aSAndroid Build Coastguard Worker     ABSL_INTERNAL_CHECK(
409*9356374aSAndroid Build Coastguard Worker         escaped_len <= std::numeric_limits<size_t>::max() - char_len,
410*9356374aSAndroid Build Coastguard Worker         "escaped_len overflow");
411*9356374aSAndroid Build Coastguard Worker     escaped_len += char_len;
412*9356374aSAndroid Build Coastguard Worker   }
413*9356374aSAndroid Build Coastguard Worker   return escaped_len;
414*9356374aSAndroid Build Coastguard Worker }
415*9356374aSAndroid Build Coastguard Worker 
CEscapeAndAppendInternal(absl::string_view src,absl::Nonnull<std::string * > dest)416*9356374aSAndroid Build Coastguard Worker void CEscapeAndAppendInternal(absl::string_view src,
417*9356374aSAndroid Build Coastguard Worker                               absl::Nonnull<std::string*> dest) {
418*9356374aSAndroid Build Coastguard Worker   size_t escaped_len = CEscapedLength(src);
419*9356374aSAndroid Build Coastguard Worker   if (escaped_len == src.size()) {
420*9356374aSAndroid Build Coastguard Worker     dest->append(src.data(), src.size());
421*9356374aSAndroid Build Coastguard Worker     return;
422*9356374aSAndroid Build Coastguard Worker   }
423*9356374aSAndroid Build Coastguard Worker 
424*9356374aSAndroid Build Coastguard Worker   size_t cur_dest_len = dest->size();
425*9356374aSAndroid Build Coastguard Worker   ABSL_INTERNAL_CHECK(
426*9356374aSAndroid Build Coastguard Worker       cur_dest_len <= std::numeric_limits<size_t>::max() - escaped_len,
427*9356374aSAndroid Build Coastguard Worker       "std::string size overflow");
428*9356374aSAndroid Build Coastguard Worker   strings_internal::STLStringResizeUninitialized(dest,
429*9356374aSAndroid Build Coastguard Worker                                                  cur_dest_len + escaped_len);
430*9356374aSAndroid Build Coastguard Worker   char* append_ptr = &(*dest)[cur_dest_len];
431*9356374aSAndroid Build Coastguard Worker 
432*9356374aSAndroid Build Coastguard Worker   for (char c : src) {
433*9356374aSAndroid Build Coastguard Worker     size_t char_len = kCEscapedLen[static_cast<unsigned char>(c)];
434*9356374aSAndroid Build Coastguard Worker     if (char_len == 1) {
435*9356374aSAndroid Build Coastguard Worker       *append_ptr++ = c;
436*9356374aSAndroid Build Coastguard Worker     } else if (char_len == 2) {
437*9356374aSAndroid Build Coastguard Worker       switch (c) {
438*9356374aSAndroid Build Coastguard Worker         case '\n':
439*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = '\\';
440*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = 'n';
441*9356374aSAndroid Build Coastguard Worker           break;
442*9356374aSAndroid Build Coastguard Worker         case '\r':
443*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = '\\';
444*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = 'r';
445*9356374aSAndroid Build Coastguard Worker           break;
446*9356374aSAndroid Build Coastguard Worker         case '\t':
447*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = '\\';
448*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = 't';
449*9356374aSAndroid Build Coastguard Worker           break;
450*9356374aSAndroid Build Coastguard Worker         case '\"':
451*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = '\\';
452*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = '\"';
453*9356374aSAndroid Build Coastguard Worker           break;
454*9356374aSAndroid Build Coastguard Worker         case '\'':
455*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = '\\';
456*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = '\'';
457*9356374aSAndroid Build Coastguard Worker           break;
458*9356374aSAndroid Build Coastguard Worker         case '\\':
459*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = '\\';
460*9356374aSAndroid Build Coastguard Worker           *append_ptr++ = '\\';
461*9356374aSAndroid Build Coastguard Worker           break;
462*9356374aSAndroid Build Coastguard Worker       }
463*9356374aSAndroid Build Coastguard Worker     } else {
464*9356374aSAndroid Build Coastguard Worker       *append_ptr++ = '\\';
465*9356374aSAndroid Build Coastguard Worker       *append_ptr++ = '0' + static_cast<unsigned char>(c) / 64;
466*9356374aSAndroid Build Coastguard Worker       *append_ptr++ = '0' + (static_cast<unsigned char>(c) % 64) / 8;
467*9356374aSAndroid Build Coastguard Worker       *append_ptr++ = '0' + static_cast<unsigned char>(c) % 8;
468*9356374aSAndroid Build Coastguard Worker     }
469*9356374aSAndroid Build Coastguard Worker   }
470*9356374aSAndroid Build Coastguard Worker }
471*9356374aSAndroid Build Coastguard Worker 
472*9356374aSAndroid Build Coastguard Worker // Reverses the mapping in Base64EscapeInternal; see that method's
473*9356374aSAndroid Build Coastguard Worker // documentation for details of the mapping.
Base64UnescapeInternal(absl::Nullable<const char * > src_param,size_t szsrc,absl::Nullable<char * > dest,size_t szdest,absl::Nonnull<const signed char * > unbase64,absl::Nonnull<size_t * > len)474*9356374aSAndroid Build Coastguard Worker bool Base64UnescapeInternal(absl::Nullable<const char*> src_param, size_t szsrc,
475*9356374aSAndroid Build Coastguard Worker                             absl::Nullable<char*> dest, size_t szdest,
476*9356374aSAndroid Build Coastguard Worker                             absl::Nonnull<const signed char*> unbase64,
477*9356374aSAndroid Build Coastguard Worker                             absl::Nonnull<size_t*> len) {
478*9356374aSAndroid Build Coastguard Worker   static const char kPad64Equals = '=';
479*9356374aSAndroid Build Coastguard Worker   static const char kPad64Dot = '.';
480*9356374aSAndroid Build Coastguard Worker 
481*9356374aSAndroid Build Coastguard Worker   size_t destidx = 0;
482*9356374aSAndroid Build Coastguard Worker   int decode = 0;
483*9356374aSAndroid Build Coastguard Worker   int state = 0;
484*9356374aSAndroid Build Coastguard Worker   unsigned char ch = 0;
485*9356374aSAndroid Build Coastguard Worker   unsigned int temp = 0;
486*9356374aSAndroid Build Coastguard Worker 
487*9356374aSAndroid Build Coastguard Worker   // If "char" is signed by default, using *src as an array index results in
488*9356374aSAndroid Build Coastguard Worker   // accessing negative array elements. Treat the input as a pointer to
489*9356374aSAndroid Build Coastguard Worker   // unsigned char to avoid this.
490*9356374aSAndroid Build Coastguard Worker   const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
491*9356374aSAndroid Build Coastguard Worker 
492*9356374aSAndroid Build Coastguard Worker   // The GET_INPUT macro gets the next input character, skipping
493*9356374aSAndroid Build Coastguard Worker   // over any whitespace, and stopping when we reach the end of the
494*9356374aSAndroid Build Coastguard Worker   // string or when we read any non-data character.  The arguments are
495*9356374aSAndroid Build Coastguard Worker   // an arbitrary identifier (used as a label for goto) and the number
496*9356374aSAndroid Build Coastguard Worker   // of data bytes that must remain in the input to avoid aborting the
497*9356374aSAndroid Build Coastguard Worker   // loop.
498*9356374aSAndroid Build Coastguard Worker #define GET_INPUT(label, remain)                                \
499*9356374aSAndroid Build Coastguard Worker   label:                                                        \
500*9356374aSAndroid Build Coastguard Worker   --szsrc;                                                      \
501*9356374aSAndroid Build Coastguard Worker   ch = *src++;                                                  \
502*9356374aSAndroid Build Coastguard Worker   decode = unbase64[ch];                                        \
503*9356374aSAndroid Build Coastguard Worker   if (decode < 0) {                                             \
504*9356374aSAndroid Build Coastguard Worker     if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \
505*9356374aSAndroid Build Coastguard Worker     state = 4 - remain;                                         \
506*9356374aSAndroid Build Coastguard Worker     break;                                                      \
507*9356374aSAndroid Build Coastguard Worker   }
508*9356374aSAndroid Build Coastguard Worker 
509*9356374aSAndroid Build Coastguard Worker   // if dest is null, we're just checking to see if it's legal input
510*9356374aSAndroid Build Coastguard Worker   // rather than producing output.  (I suspect this could just be done
511*9356374aSAndroid Build Coastguard Worker   // with a regexp...).  We duplicate the loop so this test can be
512*9356374aSAndroid Build Coastguard Worker   // outside it instead of in every iteration.
513*9356374aSAndroid Build Coastguard Worker 
514*9356374aSAndroid Build Coastguard Worker   if (dest) {
515*9356374aSAndroid Build Coastguard Worker     // This loop consumes 4 input bytes and produces 3 output bytes
516*9356374aSAndroid Build Coastguard Worker     // per iteration.  We can't know at the start that there is enough
517*9356374aSAndroid Build Coastguard Worker     // data left in the string for a full iteration, so the loop may
518*9356374aSAndroid Build Coastguard Worker     // break out in the middle; if so 'state' will be set to the
519*9356374aSAndroid Build Coastguard Worker     // number of input bytes read.
520*9356374aSAndroid Build Coastguard Worker 
521*9356374aSAndroid Build Coastguard Worker     while (szsrc >= 4) {
522*9356374aSAndroid Build Coastguard Worker       // We'll start by optimistically assuming that the next four
523*9356374aSAndroid Build Coastguard Worker       // bytes of the string (src[0..3]) are four good data bytes
524*9356374aSAndroid Build Coastguard Worker       // (that is, no nulls, whitespace, padding chars, or illegal
525*9356374aSAndroid Build Coastguard Worker       // chars).  We need to test src[0..2] for nulls individually
526*9356374aSAndroid Build Coastguard Worker       // before constructing temp to preserve the property that we
527*9356374aSAndroid Build Coastguard Worker       // never read past a null in the string (no matter how long
528*9356374aSAndroid Build Coastguard Worker       // szsrc claims the string is).
529*9356374aSAndroid Build Coastguard Worker 
530*9356374aSAndroid Build Coastguard Worker       if (!src[0] || !src[1] || !src[2] ||
531*9356374aSAndroid Build Coastguard Worker           ((temp = ((unsigned(unbase64[src[0]]) << 18) |
532*9356374aSAndroid Build Coastguard Worker                     (unsigned(unbase64[src[1]]) << 12) |
533*9356374aSAndroid Build Coastguard Worker                     (unsigned(unbase64[src[2]]) << 6) |
534*9356374aSAndroid Build Coastguard Worker                     (unsigned(unbase64[src[3]])))) &
535*9356374aSAndroid Build Coastguard Worker            0x80000000)) {
536*9356374aSAndroid Build Coastguard Worker         // Iff any of those four characters was bad (null, illegal,
537*9356374aSAndroid Build Coastguard Worker         // whitespace, padding), then temp's high bit will be set
538*9356374aSAndroid Build Coastguard Worker         // (because unbase64[] is -1 for all bad characters).
539*9356374aSAndroid Build Coastguard Worker         //
540*9356374aSAndroid Build Coastguard Worker         // We'll back up and resort to the slower decoder, which knows
541*9356374aSAndroid Build Coastguard Worker         // how to handle those cases.
542*9356374aSAndroid Build Coastguard Worker 
543*9356374aSAndroid Build Coastguard Worker         GET_INPUT(first, 4);
544*9356374aSAndroid Build Coastguard Worker         temp = static_cast<unsigned char>(decode);
545*9356374aSAndroid Build Coastguard Worker         GET_INPUT(second, 3);
546*9356374aSAndroid Build Coastguard Worker         temp = (temp << 6) | static_cast<unsigned char>(decode);
547*9356374aSAndroid Build Coastguard Worker         GET_INPUT(third, 2);
548*9356374aSAndroid Build Coastguard Worker         temp = (temp << 6) | static_cast<unsigned char>(decode);
549*9356374aSAndroid Build Coastguard Worker         GET_INPUT(fourth, 1);
550*9356374aSAndroid Build Coastguard Worker         temp = (temp << 6) | static_cast<unsigned char>(decode);
551*9356374aSAndroid Build Coastguard Worker       } else {
552*9356374aSAndroid Build Coastguard Worker         // We really did have four good data bytes, so advance four
553*9356374aSAndroid Build Coastguard Worker         // characters in the string.
554*9356374aSAndroid Build Coastguard Worker 
555*9356374aSAndroid Build Coastguard Worker         szsrc -= 4;
556*9356374aSAndroid Build Coastguard Worker         src += 4;
557*9356374aSAndroid Build Coastguard Worker       }
558*9356374aSAndroid Build Coastguard Worker 
559*9356374aSAndroid Build Coastguard Worker       // temp has 24 bits of input, so write that out as three bytes.
560*9356374aSAndroid Build Coastguard Worker 
561*9356374aSAndroid Build Coastguard Worker       if (destidx + 3 > szdest) return false;
562*9356374aSAndroid Build Coastguard Worker       dest[destidx + 2] = static_cast<char>(temp);
563*9356374aSAndroid Build Coastguard Worker       temp >>= 8;
564*9356374aSAndroid Build Coastguard Worker       dest[destidx + 1] = static_cast<char>(temp);
565*9356374aSAndroid Build Coastguard Worker       temp >>= 8;
566*9356374aSAndroid Build Coastguard Worker       dest[destidx] = static_cast<char>(temp);
567*9356374aSAndroid Build Coastguard Worker       destidx += 3;
568*9356374aSAndroid Build Coastguard Worker     }
569*9356374aSAndroid Build Coastguard Worker   } else {
570*9356374aSAndroid Build Coastguard Worker     while (szsrc >= 4) {
571*9356374aSAndroid Build Coastguard Worker       if (!src[0] || !src[1] || !src[2] ||
572*9356374aSAndroid Build Coastguard Worker           ((temp = ((unsigned(unbase64[src[0]]) << 18) |
573*9356374aSAndroid Build Coastguard Worker                     (unsigned(unbase64[src[1]]) << 12) |
574*9356374aSAndroid Build Coastguard Worker                     (unsigned(unbase64[src[2]]) << 6) |
575*9356374aSAndroid Build Coastguard Worker                     (unsigned(unbase64[src[3]])))) &
576*9356374aSAndroid Build Coastguard Worker            0x80000000)) {
577*9356374aSAndroid Build Coastguard Worker         GET_INPUT(first_no_dest, 4);
578*9356374aSAndroid Build Coastguard Worker         GET_INPUT(second_no_dest, 3);
579*9356374aSAndroid Build Coastguard Worker         GET_INPUT(third_no_dest, 2);
580*9356374aSAndroid Build Coastguard Worker         GET_INPUT(fourth_no_dest, 1);
581*9356374aSAndroid Build Coastguard Worker       } else {
582*9356374aSAndroid Build Coastguard Worker         szsrc -= 4;
583*9356374aSAndroid Build Coastguard Worker         src += 4;
584*9356374aSAndroid Build Coastguard Worker       }
585*9356374aSAndroid Build Coastguard Worker       destidx += 3;
586*9356374aSAndroid Build Coastguard Worker     }
587*9356374aSAndroid Build Coastguard Worker   }
588*9356374aSAndroid Build Coastguard Worker 
589*9356374aSAndroid Build Coastguard Worker #undef GET_INPUT
590*9356374aSAndroid Build Coastguard Worker 
591*9356374aSAndroid Build Coastguard Worker   // if the loop terminated because we read a bad character, return
592*9356374aSAndroid Build Coastguard Worker   // now.
593*9356374aSAndroid Build Coastguard Worker   if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot &&
594*9356374aSAndroid Build Coastguard Worker       !absl::ascii_isspace(ch))
595*9356374aSAndroid Build Coastguard Worker     return false;
596*9356374aSAndroid Build Coastguard Worker 
597*9356374aSAndroid Build Coastguard Worker   if (ch == kPad64Equals || ch == kPad64Dot) {
598*9356374aSAndroid Build Coastguard Worker     // if we stopped by hitting an '=' or '.', un-read that character -- we'll
599*9356374aSAndroid Build Coastguard Worker     // look at it again when we count to check for the proper number of
600*9356374aSAndroid Build Coastguard Worker     // equals signs at the end.
601*9356374aSAndroid Build Coastguard Worker     ++szsrc;
602*9356374aSAndroid Build Coastguard Worker     --src;
603*9356374aSAndroid Build Coastguard Worker   } else {
604*9356374aSAndroid Build Coastguard Worker     // This loop consumes 1 input byte per iteration.  It's used to
605*9356374aSAndroid Build Coastguard Worker     // clean up the 0-3 input bytes remaining when the first, faster
606*9356374aSAndroid Build Coastguard Worker     // loop finishes.  'temp' contains the data from 'state' input
607*9356374aSAndroid Build Coastguard Worker     // characters read by the first loop.
608*9356374aSAndroid Build Coastguard Worker     while (szsrc > 0) {
609*9356374aSAndroid Build Coastguard Worker       --szsrc;
610*9356374aSAndroid Build Coastguard Worker       ch = *src++;
611*9356374aSAndroid Build Coastguard Worker       decode = unbase64[ch];
612*9356374aSAndroid Build Coastguard Worker       if (decode < 0) {
613*9356374aSAndroid Build Coastguard Worker         if (absl::ascii_isspace(ch)) {
614*9356374aSAndroid Build Coastguard Worker           continue;
615*9356374aSAndroid Build Coastguard Worker         } else if (ch == kPad64Equals || ch == kPad64Dot) {
616*9356374aSAndroid Build Coastguard Worker           // back up one character; we'll read it again when we check
617*9356374aSAndroid Build Coastguard Worker           // for the correct number of pad characters at the end.
618*9356374aSAndroid Build Coastguard Worker           ++szsrc;
619*9356374aSAndroid Build Coastguard Worker           --src;
620*9356374aSAndroid Build Coastguard Worker           break;
621*9356374aSAndroid Build Coastguard Worker         } else {
622*9356374aSAndroid Build Coastguard Worker           return false;
623*9356374aSAndroid Build Coastguard Worker         }
624*9356374aSAndroid Build Coastguard Worker       }
625*9356374aSAndroid Build Coastguard Worker 
626*9356374aSAndroid Build Coastguard Worker       // Each input character gives us six bits of output.
627*9356374aSAndroid Build Coastguard Worker       temp = (temp << 6) | static_cast<unsigned char>(decode);
628*9356374aSAndroid Build Coastguard Worker       ++state;
629*9356374aSAndroid Build Coastguard Worker       if (state == 4) {
630*9356374aSAndroid Build Coastguard Worker         // If we've accumulated 24 bits of output, write that out as
631*9356374aSAndroid Build Coastguard Worker         // three bytes.
632*9356374aSAndroid Build Coastguard Worker         if (dest) {
633*9356374aSAndroid Build Coastguard Worker           if (destidx + 3 > szdest) return false;
634*9356374aSAndroid Build Coastguard Worker           dest[destidx + 2] = static_cast<char>(temp);
635*9356374aSAndroid Build Coastguard Worker           temp >>= 8;
636*9356374aSAndroid Build Coastguard Worker           dest[destidx + 1] = static_cast<char>(temp);
637*9356374aSAndroid Build Coastguard Worker           temp >>= 8;
638*9356374aSAndroid Build Coastguard Worker           dest[destidx] = static_cast<char>(temp);
639*9356374aSAndroid Build Coastguard Worker         }
640*9356374aSAndroid Build Coastguard Worker         destidx += 3;
641*9356374aSAndroid Build Coastguard Worker         state = 0;
642*9356374aSAndroid Build Coastguard Worker         temp = 0;
643*9356374aSAndroid Build Coastguard Worker       }
644*9356374aSAndroid Build Coastguard Worker     }
645*9356374aSAndroid Build Coastguard Worker   }
646*9356374aSAndroid Build Coastguard Worker 
647*9356374aSAndroid Build Coastguard Worker   // Process the leftover data contained in 'temp' at the end of the input.
648*9356374aSAndroid Build Coastguard Worker   int expected_equals = 0;
649*9356374aSAndroid Build Coastguard Worker   switch (state) {
650*9356374aSAndroid Build Coastguard Worker     case 0:
651*9356374aSAndroid Build Coastguard Worker       // Nothing left over; output is a multiple of 3 bytes.
652*9356374aSAndroid Build Coastguard Worker       break;
653*9356374aSAndroid Build Coastguard Worker 
654*9356374aSAndroid Build Coastguard Worker     case 1:
655*9356374aSAndroid Build Coastguard Worker       // Bad input; we have 6 bits left over.
656*9356374aSAndroid Build Coastguard Worker       return false;
657*9356374aSAndroid Build Coastguard Worker 
658*9356374aSAndroid Build Coastguard Worker     case 2:
659*9356374aSAndroid Build Coastguard Worker       // Produce one more output byte from the 12 input bits we have left.
660*9356374aSAndroid Build Coastguard Worker       if (dest) {
661*9356374aSAndroid Build Coastguard Worker         if (destidx + 1 > szdest) return false;
662*9356374aSAndroid Build Coastguard Worker         temp >>= 4;
663*9356374aSAndroid Build Coastguard Worker         dest[destidx] = static_cast<char>(temp);
664*9356374aSAndroid Build Coastguard Worker       }
665*9356374aSAndroid Build Coastguard Worker       ++destidx;
666*9356374aSAndroid Build Coastguard Worker       expected_equals = 2;
667*9356374aSAndroid Build Coastguard Worker       break;
668*9356374aSAndroid Build Coastguard Worker 
669*9356374aSAndroid Build Coastguard Worker     case 3:
670*9356374aSAndroid Build Coastguard Worker       // Produce two more output bytes from the 18 input bits we have left.
671*9356374aSAndroid Build Coastguard Worker       if (dest) {
672*9356374aSAndroid Build Coastguard Worker         if (destidx + 2 > szdest) return false;
673*9356374aSAndroid Build Coastguard Worker         temp >>= 2;
674*9356374aSAndroid Build Coastguard Worker         dest[destidx + 1] = static_cast<char>(temp);
675*9356374aSAndroid Build Coastguard Worker         temp >>= 8;
676*9356374aSAndroid Build Coastguard Worker         dest[destidx] = static_cast<char>(temp);
677*9356374aSAndroid Build Coastguard Worker       }
678*9356374aSAndroid Build Coastguard Worker       destidx += 2;
679*9356374aSAndroid Build Coastguard Worker       expected_equals = 1;
680*9356374aSAndroid Build Coastguard Worker       break;
681*9356374aSAndroid Build Coastguard Worker 
682*9356374aSAndroid Build Coastguard Worker     default:
683*9356374aSAndroid Build Coastguard Worker       // state should have no other values at this point.
684*9356374aSAndroid Build Coastguard Worker       ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d",
685*9356374aSAndroid Build Coastguard Worker                    state);
686*9356374aSAndroid Build Coastguard Worker   }
687*9356374aSAndroid Build Coastguard Worker 
688*9356374aSAndroid Build Coastguard Worker   // The remainder of the string should be all whitespace, mixed with
689*9356374aSAndroid Build Coastguard Worker   // exactly 0 equals signs, or exactly 'expected_equals' equals
690*9356374aSAndroid Build Coastguard Worker   // signs.  (Always accepting 0 equals signs is an Abseil extension
691*9356374aSAndroid Build Coastguard Worker   // not covered in the RFC, as is accepting dot as the pad character.)
692*9356374aSAndroid Build Coastguard Worker 
693*9356374aSAndroid Build Coastguard Worker   int equals = 0;
694*9356374aSAndroid Build Coastguard Worker   while (szsrc > 0) {
695*9356374aSAndroid Build Coastguard Worker     if (*src == kPad64Equals || *src == kPad64Dot)
696*9356374aSAndroid Build Coastguard Worker       ++equals;
697*9356374aSAndroid Build Coastguard Worker     else if (!absl::ascii_isspace(*src))
698*9356374aSAndroid Build Coastguard Worker       return false;
699*9356374aSAndroid Build Coastguard Worker     --szsrc;
700*9356374aSAndroid Build Coastguard Worker     ++src;
701*9356374aSAndroid Build Coastguard Worker   }
702*9356374aSAndroid Build Coastguard Worker 
703*9356374aSAndroid Build Coastguard Worker   const bool ok = (equals == 0 || equals == expected_equals);
704*9356374aSAndroid Build Coastguard Worker   if (ok) *len = destidx;
705*9356374aSAndroid Build Coastguard Worker   return ok;
706*9356374aSAndroid Build Coastguard Worker }
707*9356374aSAndroid Build Coastguard Worker 
708*9356374aSAndroid Build Coastguard Worker // The arrays below map base64-escaped characters back to their original values.
709*9356374aSAndroid Build Coastguard Worker // For the inverse case, see k(WebSafe)Base64Chars in the internal
710*9356374aSAndroid Build Coastguard Worker // escaping.cc.
711*9356374aSAndroid Build Coastguard Worker // These arrays were generated by the following inversion code:
712*9356374aSAndroid Build Coastguard Worker // #include <sys/time.h>
713*9356374aSAndroid Build Coastguard Worker // #include <stdlib.h>
714*9356374aSAndroid Build Coastguard Worker // #include <string.h>
715*9356374aSAndroid Build Coastguard Worker // main()
716*9356374aSAndroid Build Coastguard Worker // {
717*9356374aSAndroid Build Coastguard Worker //   static const char Base64[] =
718*9356374aSAndroid Build Coastguard Worker //     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
719*9356374aSAndroid Build Coastguard Worker //   char* pos;
720*9356374aSAndroid Build Coastguard Worker //   int idx, i, j;
721*9356374aSAndroid Build Coastguard Worker //   printf("    ");
722*9356374aSAndroid Build Coastguard Worker //   for (i = 0; i < 255; i += 8) {
723*9356374aSAndroid Build Coastguard Worker //     for (j = i; j < i + 8; j++) {
724*9356374aSAndroid Build Coastguard Worker //       pos = strchr(Base64, j);
725*9356374aSAndroid Build Coastguard Worker //       if ((pos == nullptr) || (j == 0))
726*9356374aSAndroid Build Coastguard Worker //         idx = -1;
727*9356374aSAndroid Build Coastguard Worker //       else
728*9356374aSAndroid Build Coastguard Worker //         idx = pos - Base64;
729*9356374aSAndroid Build Coastguard Worker //       if (idx == -1)
730*9356374aSAndroid Build Coastguard Worker //         printf(" %2d,     ", idx);
731*9356374aSAndroid Build Coastguard Worker //       else
732*9356374aSAndroid Build Coastguard Worker //         printf(" %2d/*%c*/,", idx, j);
733*9356374aSAndroid Build Coastguard Worker //     }
734*9356374aSAndroid Build Coastguard Worker //     printf("\n    ");
735*9356374aSAndroid Build Coastguard Worker //   }
736*9356374aSAndroid Build Coastguard Worker // }
737*9356374aSAndroid Build Coastguard Worker //
738*9356374aSAndroid Build Coastguard Worker // where the value of "Base64[]" was replaced by one of k(WebSafe)Base64Chars
739*9356374aSAndroid Build Coastguard Worker // in the internal escaping.cc.
740*9356374aSAndroid Build Coastguard Worker /* clang-format off */
741*9356374aSAndroid Build Coastguard Worker constexpr signed char kUnBase64[] = {
742*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
743*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
744*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
745*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
746*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
747*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
748*9356374aSAndroid Build Coastguard Worker     52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
749*9356374aSAndroid Build Coastguard Worker     60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
750*9356374aSAndroid Build Coastguard Worker     -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
751*9356374aSAndroid Build Coastguard Worker     07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
752*9356374aSAndroid Build Coastguard Worker     15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
753*9356374aSAndroid Build Coastguard Worker     23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
754*9356374aSAndroid Build Coastguard Worker     -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
755*9356374aSAndroid Build Coastguard Worker     33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
756*9356374aSAndroid Build Coastguard Worker     41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
757*9356374aSAndroid Build Coastguard Worker     49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
758*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
759*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
760*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
761*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
762*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
763*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
764*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
765*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
766*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
767*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
768*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
769*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
770*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
771*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
772*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
773*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
774*9356374aSAndroid Build Coastguard Worker };
775*9356374aSAndroid Build Coastguard Worker 
776*9356374aSAndroid Build Coastguard Worker constexpr signed char kUnWebSafeBase64[] = {
777*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
778*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
779*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
780*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
781*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
782*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      62/*-*/, -1,      -1,
783*9356374aSAndroid Build Coastguard Worker     52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
784*9356374aSAndroid Build Coastguard Worker     60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
785*9356374aSAndroid Build Coastguard Worker     -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
786*9356374aSAndroid Build Coastguard Worker     07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
787*9356374aSAndroid Build Coastguard Worker     15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
788*9356374aSAndroid Build Coastguard Worker     23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      63/*_*/,
789*9356374aSAndroid Build Coastguard Worker     -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
790*9356374aSAndroid Build Coastguard Worker     33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
791*9356374aSAndroid Build Coastguard Worker     41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
792*9356374aSAndroid Build Coastguard Worker     49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
793*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
794*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
795*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
796*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
797*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
798*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
799*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
800*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
801*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
802*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
803*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
804*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
805*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
806*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
807*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
808*9356374aSAndroid Build Coastguard Worker     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
809*9356374aSAndroid Build Coastguard Worker };
810*9356374aSAndroid Build Coastguard Worker /* clang-format on */
811*9356374aSAndroid Build Coastguard Worker 
812*9356374aSAndroid Build Coastguard Worker template <typename String>
Base64UnescapeInternal(absl::Nullable<const char * > src,size_t slen,absl::Nonnull<String * > dest,absl::Nonnull<const signed char * > unbase64)813*9356374aSAndroid Build Coastguard Worker bool Base64UnescapeInternal(absl::Nullable<const char*> src, size_t slen,
814*9356374aSAndroid Build Coastguard Worker                             absl::Nonnull<String*> dest,
815*9356374aSAndroid Build Coastguard Worker                             absl::Nonnull<const signed char*> unbase64) {
816*9356374aSAndroid Build Coastguard Worker   // Determine the size of the output string.  Base64 encodes every 3 bytes into
817*9356374aSAndroid Build Coastguard Worker   // 4 characters.  Any leftover chars are added directly for good measure.
818*9356374aSAndroid Build Coastguard Worker   const size_t dest_len = 3 * (slen / 4) + (slen % 4);
819*9356374aSAndroid Build Coastguard Worker 
820*9356374aSAndroid Build Coastguard Worker   strings_internal::STLStringResizeUninitialized(dest, dest_len);
821*9356374aSAndroid Build Coastguard Worker 
822*9356374aSAndroid Build Coastguard Worker   // We are getting the destination buffer by getting the beginning of the
823*9356374aSAndroid Build Coastguard Worker   // string and converting it into a char *.
824*9356374aSAndroid Build Coastguard Worker   size_t len;
825*9356374aSAndroid Build Coastguard Worker   const bool ok =
826*9356374aSAndroid Build Coastguard Worker       Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len);
827*9356374aSAndroid Build Coastguard Worker   if (!ok) {
828*9356374aSAndroid Build Coastguard Worker     dest->clear();
829*9356374aSAndroid Build Coastguard Worker     return false;
830*9356374aSAndroid Build Coastguard Worker   }
831*9356374aSAndroid Build Coastguard Worker 
832*9356374aSAndroid Build Coastguard Worker   // could be shorter if there was padding
833*9356374aSAndroid Build Coastguard Worker   assert(len <= dest_len);
834*9356374aSAndroid Build Coastguard Worker   dest->erase(len);
835*9356374aSAndroid Build Coastguard Worker 
836*9356374aSAndroid Build Coastguard Worker   return true;
837*9356374aSAndroid Build Coastguard Worker }
838*9356374aSAndroid Build Coastguard Worker 
839*9356374aSAndroid Build Coastguard Worker /* clang-format off */
840*9356374aSAndroid Build Coastguard Worker constexpr char kHexValueLenient[256] = {
841*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
842*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
843*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
844*9356374aSAndroid Build Coastguard Worker     0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0,  // '0'..'9'
845*9356374aSAndroid Build Coastguard Worker     0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 'A'..'F'
846*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
847*9356374aSAndroid Build Coastguard Worker     0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 'a'..'f'
848*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
849*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
850*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
851*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
852*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
853*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
854*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
855*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
856*9356374aSAndroid Build Coastguard Worker     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
857*9356374aSAndroid Build Coastguard Worker };
858*9356374aSAndroid Build Coastguard Worker 
859*9356374aSAndroid Build Coastguard Worker constexpr signed char kHexValueStrict[256] = {
860*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
861*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
862*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
863*9356374aSAndroid Build Coastguard Worker      0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1,  // '0'..'9'
864*9356374aSAndroid Build Coastguard Worker     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 'A'..'F'
865*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
866*9356374aSAndroid Build Coastguard Worker     -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 'a'..'f'
867*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
868*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
869*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
870*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
871*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
872*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
873*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
874*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
875*9356374aSAndroid Build Coastguard Worker     -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
876*9356374aSAndroid Build Coastguard Worker };
877*9356374aSAndroid Build Coastguard Worker /* clang-format on */
878*9356374aSAndroid Build Coastguard Worker 
879*9356374aSAndroid Build Coastguard Worker // This is a templated function so that T can be either a char*
880*9356374aSAndroid Build Coastguard Worker // or a string.  This works because we use the [] operator to access
881*9356374aSAndroid Build Coastguard Worker // individual characters at a time.
882*9356374aSAndroid Build Coastguard Worker template <typename T>
HexStringToBytesInternal(absl::Nullable<const char * > from,T to,size_t num)883*9356374aSAndroid Build Coastguard Worker void HexStringToBytesInternal(absl::Nullable<const char*> from, T to,
884*9356374aSAndroid Build Coastguard Worker                               size_t num) {
885*9356374aSAndroid Build Coastguard Worker   for (size_t i = 0; i < num; i++) {
886*9356374aSAndroid Build Coastguard Worker     to[i] = static_cast<char>(kHexValueLenient[from[i * 2] & 0xFF] << 4) +
887*9356374aSAndroid Build Coastguard Worker             (kHexValueLenient[from[i * 2 + 1] & 0xFF]);
888*9356374aSAndroid Build Coastguard Worker   }
889*9356374aSAndroid Build Coastguard Worker }
890*9356374aSAndroid Build Coastguard Worker 
891*9356374aSAndroid Build Coastguard Worker // This is a templated function so that T can be either a char* or a
892*9356374aSAndroid Build Coastguard Worker // std::string.
893*9356374aSAndroid Build Coastguard Worker template <typename T>
BytesToHexStringInternal(absl::Nullable<const unsigned char * > src,T dest,size_t num)894*9356374aSAndroid Build Coastguard Worker void BytesToHexStringInternal(absl::Nullable<const unsigned char*> src, T dest,
895*9356374aSAndroid Build Coastguard Worker                               size_t num) {
896*9356374aSAndroid Build Coastguard Worker   auto dest_ptr = &dest[0];
897*9356374aSAndroid Build Coastguard Worker   for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
898*9356374aSAndroid Build Coastguard Worker     const char* hex_p = &numbers_internal::kHexTable[*src_ptr * 2];
899*9356374aSAndroid Build Coastguard Worker     std::copy(hex_p, hex_p + 2, dest_ptr);
900*9356374aSAndroid Build Coastguard Worker   }
901*9356374aSAndroid Build Coastguard Worker }
902*9356374aSAndroid Build Coastguard Worker 
903*9356374aSAndroid Build Coastguard Worker }  // namespace
904*9356374aSAndroid Build Coastguard Worker 
905*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
906*9356374aSAndroid Build Coastguard Worker // CUnescape()
907*9356374aSAndroid Build Coastguard Worker //
908*9356374aSAndroid Build Coastguard Worker // See CUnescapeInternal() for implementation details.
909*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
CUnescape(absl::string_view source,absl::Nonnull<std::string * > dest,absl::Nullable<std::string * > error)910*9356374aSAndroid Build Coastguard Worker bool CUnescape(absl::string_view source, absl::Nonnull<std::string*> dest,
911*9356374aSAndroid Build Coastguard Worker                absl::Nullable<std::string*> error) {
912*9356374aSAndroid Build Coastguard Worker   return CUnescapeInternal(source, kUnescapeNulls, dest, error);
913*9356374aSAndroid Build Coastguard Worker }
914*9356374aSAndroid Build Coastguard Worker 
CEscape(absl::string_view src)915*9356374aSAndroid Build Coastguard Worker std::string CEscape(absl::string_view src) {
916*9356374aSAndroid Build Coastguard Worker   std::string dest;
917*9356374aSAndroid Build Coastguard Worker   CEscapeAndAppendInternal(src, &dest);
918*9356374aSAndroid Build Coastguard Worker   return dest;
919*9356374aSAndroid Build Coastguard Worker }
920*9356374aSAndroid Build Coastguard Worker 
CHexEscape(absl::string_view src)921*9356374aSAndroid Build Coastguard Worker std::string CHexEscape(absl::string_view src) {
922*9356374aSAndroid Build Coastguard Worker   return CEscapeInternal(src, true, false);
923*9356374aSAndroid Build Coastguard Worker }
924*9356374aSAndroid Build Coastguard Worker 
Utf8SafeCEscape(absl::string_view src)925*9356374aSAndroid Build Coastguard Worker std::string Utf8SafeCEscape(absl::string_view src) {
926*9356374aSAndroid Build Coastguard Worker   return CEscapeInternal(src, false, true);
927*9356374aSAndroid Build Coastguard Worker }
928*9356374aSAndroid Build Coastguard Worker 
Utf8SafeCHexEscape(absl::string_view src)929*9356374aSAndroid Build Coastguard Worker std::string Utf8SafeCHexEscape(absl::string_view src) {
930*9356374aSAndroid Build Coastguard Worker   return CEscapeInternal(src, true, true);
931*9356374aSAndroid Build Coastguard Worker }
932*9356374aSAndroid Build Coastguard Worker 
Base64Unescape(absl::string_view src,absl::Nonnull<std::string * > dest)933*9356374aSAndroid Build Coastguard Worker bool Base64Unescape(absl::string_view src, absl::Nonnull<std::string*> dest) {
934*9356374aSAndroid Build Coastguard Worker   return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
935*9356374aSAndroid Build Coastguard Worker }
936*9356374aSAndroid Build Coastguard Worker 
WebSafeBase64Unescape(absl::string_view src,absl::Nonnull<std::string * > dest)937*9356374aSAndroid Build Coastguard Worker bool WebSafeBase64Unescape(absl::string_view src,
938*9356374aSAndroid Build Coastguard Worker                            absl::Nonnull<std::string*> dest) {
939*9356374aSAndroid Build Coastguard Worker   return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
940*9356374aSAndroid Build Coastguard Worker }
941*9356374aSAndroid Build Coastguard Worker 
Base64Escape(absl::string_view src,absl::Nonnull<std::string * > dest)942*9356374aSAndroid Build Coastguard Worker void Base64Escape(absl::string_view src, absl::Nonnull<std::string*> dest) {
943*9356374aSAndroid Build Coastguard Worker   strings_internal::Base64EscapeInternal(
944*9356374aSAndroid Build Coastguard Worker       reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
945*9356374aSAndroid Build Coastguard Worker       true, strings_internal::kBase64Chars);
946*9356374aSAndroid Build Coastguard Worker }
947*9356374aSAndroid Build Coastguard Worker 
WebSafeBase64Escape(absl::string_view src,absl::Nonnull<std::string * > dest)948*9356374aSAndroid Build Coastguard Worker void WebSafeBase64Escape(absl::string_view src,
949*9356374aSAndroid Build Coastguard Worker                          absl::Nonnull<std::string*> dest) {
950*9356374aSAndroid Build Coastguard Worker   strings_internal::Base64EscapeInternal(
951*9356374aSAndroid Build Coastguard Worker       reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
952*9356374aSAndroid Build Coastguard Worker       false, strings_internal::kWebSafeBase64Chars);
953*9356374aSAndroid Build Coastguard Worker }
954*9356374aSAndroid Build Coastguard Worker 
Base64Escape(absl::string_view src)955*9356374aSAndroid Build Coastguard Worker std::string Base64Escape(absl::string_view src) {
956*9356374aSAndroid Build Coastguard Worker   std::string dest;
957*9356374aSAndroid Build Coastguard Worker   strings_internal::Base64EscapeInternal(
958*9356374aSAndroid Build Coastguard Worker       reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
959*9356374aSAndroid Build Coastguard Worker       true, strings_internal::kBase64Chars);
960*9356374aSAndroid Build Coastguard Worker   return dest;
961*9356374aSAndroid Build Coastguard Worker }
962*9356374aSAndroid Build Coastguard Worker 
WebSafeBase64Escape(absl::string_view src)963*9356374aSAndroid Build Coastguard Worker std::string WebSafeBase64Escape(absl::string_view src) {
964*9356374aSAndroid Build Coastguard Worker   std::string dest;
965*9356374aSAndroid Build Coastguard Worker   strings_internal::Base64EscapeInternal(
966*9356374aSAndroid Build Coastguard Worker       reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
967*9356374aSAndroid Build Coastguard Worker       false, strings_internal::kWebSafeBase64Chars);
968*9356374aSAndroid Build Coastguard Worker   return dest;
969*9356374aSAndroid Build Coastguard Worker }
970*9356374aSAndroid Build Coastguard Worker 
HexStringToBytes(absl::string_view hex,absl::Nonnull<std::string * > bytes)971*9356374aSAndroid Build Coastguard Worker bool HexStringToBytes(absl::string_view hex,
972*9356374aSAndroid Build Coastguard Worker                       absl::Nonnull<std::string*> bytes) {
973*9356374aSAndroid Build Coastguard Worker   std::string output;
974*9356374aSAndroid Build Coastguard Worker 
975*9356374aSAndroid Build Coastguard Worker   size_t num_bytes = hex.size() / 2;
976*9356374aSAndroid Build Coastguard Worker   if (hex.size() != num_bytes * 2) {
977*9356374aSAndroid Build Coastguard Worker     return false;
978*9356374aSAndroid Build Coastguard Worker   }
979*9356374aSAndroid Build Coastguard Worker 
980*9356374aSAndroid Build Coastguard Worker   absl::strings_internal::STLStringResizeUninitialized(&output, num_bytes);
981*9356374aSAndroid Build Coastguard Worker   auto hex_p = hex.cbegin();
982*9356374aSAndroid Build Coastguard Worker   for (std::string::iterator bin_p = output.begin(); bin_p != output.end();
983*9356374aSAndroid Build Coastguard Worker        ++bin_p) {
984*9356374aSAndroid Build Coastguard Worker     int h1 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
985*9356374aSAndroid Build Coastguard Worker     int h2 = absl::kHexValueStrict[static_cast<size_t>(*hex_p++)];
986*9356374aSAndroid Build Coastguard Worker     if (h1 == -1 || h2 == -1) {
987*9356374aSAndroid Build Coastguard Worker       output.resize(static_cast<size_t>(bin_p - output.begin()));
988*9356374aSAndroid Build Coastguard Worker       return false;
989*9356374aSAndroid Build Coastguard Worker     }
990*9356374aSAndroid Build Coastguard Worker     *bin_p = static_cast<char>((h1 << 4) + h2);
991*9356374aSAndroid Build Coastguard Worker   }
992*9356374aSAndroid Build Coastguard Worker 
993*9356374aSAndroid Build Coastguard Worker   *bytes = std::move(output);
994*9356374aSAndroid Build Coastguard Worker   return true;
995*9356374aSAndroid Build Coastguard Worker }
996*9356374aSAndroid Build Coastguard Worker 
HexStringToBytes(absl::string_view from)997*9356374aSAndroid Build Coastguard Worker std::string HexStringToBytes(absl::string_view from) {
998*9356374aSAndroid Build Coastguard Worker   std::string result;
999*9356374aSAndroid Build Coastguard Worker   const auto num = from.size() / 2;
1000*9356374aSAndroid Build Coastguard Worker   strings_internal::STLStringResizeUninitialized(&result, num);
1001*9356374aSAndroid Build Coastguard Worker   absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
1002*9356374aSAndroid Build Coastguard Worker   return result;
1003*9356374aSAndroid Build Coastguard Worker }
1004*9356374aSAndroid Build Coastguard Worker 
BytesToHexString(absl::string_view from)1005*9356374aSAndroid Build Coastguard Worker std::string BytesToHexString(absl::string_view from) {
1006*9356374aSAndroid Build Coastguard Worker   std::string result;
1007*9356374aSAndroid Build Coastguard Worker   strings_internal::STLStringResizeUninitialized(&result, 2 * from.size());
1008*9356374aSAndroid Build Coastguard Worker   absl::BytesToHexStringInternal<std::string&>(
1009*9356374aSAndroid Build Coastguard Worker       reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
1010*9356374aSAndroid Build Coastguard Worker   return result;
1011*9356374aSAndroid Build Coastguard Worker }
1012*9356374aSAndroid Build Coastguard Worker 
1013*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_END
1014*9356374aSAndroid Build Coastguard Worker }  // namespace absl
1015