1*a62be085SSadaf Ebrahimi #include "strings/escaping.h"
2*a62be085SSadaf Ebrahimi
3*a62be085SSadaf Ebrahimi #include <cassert>
4*a62be085SSadaf Ebrahimi
5*a62be085SSadaf Ebrahimi #include "android-base/logging.h"
6*a62be085SSadaf Ebrahimi #include "strings/ascii_ctype.h"
7*a62be085SSadaf Ebrahimi
8*a62be085SSadaf Ebrahimi namespace dynamic_depth {
9*a62be085SSadaf Ebrahimi
10*a62be085SSadaf Ebrahimi // ----------------------------------------------------------------------
11*a62be085SSadaf Ebrahimi // ptrdiff_t Base64Unescape() - base64 decoder
12*a62be085SSadaf Ebrahimi // ptrdiff_t Base64Escape() - base64 encoder
13*a62be085SSadaf Ebrahimi // ptrdiff_t WebSafeBase64Unescape() - Google's variation of base64 decoder
14*a62be085SSadaf Ebrahimi // ptrdiff_t WebSafeBase64Escape() - Google's variation of base64 encoder
15*a62be085SSadaf Ebrahimi //
16*a62be085SSadaf Ebrahimi // Check out
17*a62be085SSadaf Ebrahimi // http://tools.ietf.org/html/rfc2045 for formal description, but what we
18*a62be085SSadaf Ebrahimi // care about is that...
19*a62be085SSadaf Ebrahimi // Take the encoded stuff in groups of 4 characters and turn each
20*a62be085SSadaf Ebrahimi // character into a code 0 to 63 thus:
21*a62be085SSadaf Ebrahimi // A-Z map to 0 to 25
22*a62be085SSadaf Ebrahimi // a-z map to 26 to 51
23*a62be085SSadaf Ebrahimi // 0-9 map to 52 to 61
24*a62be085SSadaf Ebrahimi // +(- for WebSafe) maps to 62
25*a62be085SSadaf Ebrahimi // /(_ for WebSafe) maps to 63
26*a62be085SSadaf Ebrahimi // There will be four numbers, all less than 64 which can be represented
27*a62be085SSadaf Ebrahimi // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
28*a62be085SSadaf Ebrahimi // Arrange the 6 digit binary numbers into three bytes as such:
29*a62be085SSadaf Ebrahimi // aaaaaabb bbbbcccc ccdddddd
30*a62be085SSadaf Ebrahimi // Equals signs (one or two) are used at the end of the encoded block to
31*a62be085SSadaf Ebrahimi // indicate that the text was not an integer multiple of three bytes long.
32*a62be085SSadaf Ebrahimi // ----------------------------------------------------------------------
33*a62be085SSadaf Ebrahimi
Base64UnescapeInternal(const char * src_param,size_t szsrc,char * dest,size_t szdest,const signed char * unbase64,size_t * len)34*a62be085SSadaf Ebrahimi bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
35*a62be085SSadaf Ebrahimi size_t szdest, const signed char* unbase64,
36*a62be085SSadaf Ebrahimi size_t* len) {
37*a62be085SSadaf Ebrahimi static const char kPad64Equals = '=';
38*a62be085SSadaf Ebrahimi static const char kPad64Dot = '.';
39*a62be085SSadaf Ebrahimi
40*a62be085SSadaf Ebrahimi size_t destidx = 0;
41*a62be085SSadaf Ebrahimi int decode = 0;
42*a62be085SSadaf Ebrahimi int state = 0;
43*a62be085SSadaf Ebrahimi unsigned int ch = 0;
44*a62be085SSadaf Ebrahimi unsigned int temp = 0;
45*a62be085SSadaf Ebrahimi
46*a62be085SSadaf Ebrahimi // If "char" is signed by default, using *src as an array index results in
47*a62be085SSadaf Ebrahimi // accessing negative array elements. Treat the input as a pointer to
48*a62be085SSadaf Ebrahimi // unsigned char to avoid this.
49*a62be085SSadaf Ebrahimi const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
50*a62be085SSadaf Ebrahimi
51*a62be085SSadaf Ebrahimi // The GET_INPUT macro gets the next input character, skipping
52*a62be085SSadaf Ebrahimi // over any whitespace, and stopping when we reach the end of the
53*a62be085SSadaf Ebrahimi // string or when we read any non-data character. The arguments are
54*a62be085SSadaf Ebrahimi // an arbitrary identifier (used as a label for goto) and the number
55*a62be085SSadaf Ebrahimi // of data bytes that must remain in the input to avoid aborting the
56*a62be085SSadaf Ebrahimi // loop.
57*a62be085SSadaf Ebrahimi #define GET_INPUT(label, remain) \
58*a62be085SSadaf Ebrahimi label: \
59*a62be085SSadaf Ebrahimi --szsrc; \
60*a62be085SSadaf Ebrahimi ch = *src++; \
61*a62be085SSadaf Ebrahimi decode = unbase64[ch]; \
62*a62be085SSadaf Ebrahimi if (decode < 0) { \
63*a62be085SSadaf Ebrahimi if (ascii_isspace(ch) && szsrc >= remain) goto label; \
64*a62be085SSadaf Ebrahimi state = 4 - remain; \
65*a62be085SSadaf Ebrahimi break; \
66*a62be085SSadaf Ebrahimi }
67*a62be085SSadaf Ebrahimi
68*a62be085SSadaf Ebrahimi // if dest is null, we're just checking to see if it's legal input
69*a62be085SSadaf Ebrahimi // rather than producing output. (I suspect this could just be done
70*a62be085SSadaf Ebrahimi // with a regexp...). We duplicate the loop so this test can be
71*a62be085SSadaf Ebrahimi // outside it instead of in every iteration.
72*a62be085SSadaf Ebrahimi
73*a62be085SSadaf Ebrahimi if (dest) {
74*a62be085SSadaf Ebrahimi // This loop consumes 4 input bytes and produces 3 output bytes
75*a62be085SSadaf Ebrahimi // per iteration. We can't know at the start that there is enough
76*a62be085SSadaf Ebrahimi // data left in the string for a full iteration, so the loop may
77*a62be085SSadaf Ebrahimi // break out in the middle; if so 'state' will be set to the
78*a62be085SSadaf Ebrahimi // number of input bytes read.
79*a62be085SSadaf Ebrahimi
80*a62be085SSadaf Ebrahimi while (szsrc >= 4) {
81*a62be085SSadaf Ebrahimi // We'll start by optimistically assuming that the next four
82*a62be085SSadaf Ebrahimi // bytes of the string (src[0..3]) are four good data bytes
83*a62be085SSadaf Ebrahimi // (that is, no nulls, whitespace, padding chars, or illegal
84*a62be085SSadaf Ebrahimi // chars). We need to test src[0..2] for nulls individually
85*a62be085SSadaf Ebrahimi // before constructing temp to preserve the property that we
86*a62be085SSadaf Ebrahimi // never read past a null in the string (no matter how long
87*a62be085SSadaf Ebrahimi // szsrc claims the string is).
88*a62be085SSadaf Ebrahimi
89*a62be085SSadaf Ebrahimi if (!src[0] || !src[1] || !src[2] ||
90*a62be085SSadaf Ebrahimi ((temp = ((unsigned(unbase64[src[0]]) << 18) |
91*a62be085SSadaf Ebrahimi (unsigned(unbase64[src[1]]) << 12) |
92*a62be085SSadaf Ebrahimi (unsigned(unbase64[src[2]]) << 6) |
93*a62be085SSadaf Ebrahimi (unsigned(unbase64[src[3]])))) &
94*a62be085SSadaf Ebrahimi 0x80000000)) {
95*a62be085SSadaf Ebrahimi // Iff any of those four characters was bad (null, illegal,
96*a62be085SSadaf Ebrahimi // whitespace, padding), then temp's high bit will be set
97*a62be085SSadaf Ebrahimi // (because unbase64[] is -1 for all bad characters).
98*a62be085SSadaf Ebrahimi //
99*a62be085SSadaf Ebrahimi // We'll back up and resort to the slower decoder, which knows
100*a62be085SSadaf Ebrahimi // how to handle those cases.
101*a62be085SSadaf Ebrahimi
102*a62be085SSadaf Ebrahimi GET_INPUT(first, 4);
103*a62be085SSadaf Ebrahimi temp = decode;
104*a62be085SSadaf Ebrahimi GET_INPUT(second, 3);
105*a62be085SSadaf Ebrahimi temp = (temp << 6) | decode;
106*a62be085SSadaf Ebrahimi GET_INPUT(third, 2);
107*a62be085SSadaf Ebrahimi temp = (temp << 6) | decode;
108*a62be085SSadaf Ebrahimi GET_INPUT(fourth, 1);
109*a62be085SSadaf Ebrahimi temp = (temp << 6) | decode;
110*a62be085SSadaf Ebrahimi } else {
111*a62be085SSadaf Ebrahimi // We really did have four good data bytes, so advance four
112*a62be085SSadaf Ebrahimi // characters in the string.
113*a62be085SSadaf Ebrahimi
114*a62be085SSadaf Ebrahimi szsrc -= 4;
115*a62be085SSadaf Ebrahimi src += 4;
116*a62be085SSadaf Ebrahimi decode = -1;
117*a62be085SSadaf Ebrahimi ch = '\0';
118*a62be085SSadaf Ebrahimi }
119*a62be085SSadaf Ebrahimi
120*a62be085SSadaf Ebrahimi // temp has 24 bits of input, so write that out as three bytes.
121*a62be085SSadaf Ebrahimi
122*a62be085SSadaf Ebrahimi if (destidx + 3 > szdest) return false;
123*a62be085SSadaf Ebrahimi dest[destidx + 2] = temp;
124*a62be085SSadaf Ebrahimi temp >>= 8;
125*a62be085SSadaf Ebrahimi dest[destidx + 1] = temp;
126*a62be085SSadaf Ebrahimi temp >>= 8;
127*a62be085SSadaf Ebrahimi dest[destidx] = temp;
128*a62be085SSadaf Ebrahimi destidx += 3;
129*a62be085SSadaf Ebrahimi }
130*a62be085SSadaf Ebrahimi } else {
131*a62be085SSadaf Ebrahimi while (szsrc >= 4) {
132*a62be085SSadaf Ebrahimi if (!src[0] || !src[1] || !src[2] ||
133*a62be085SSadaf Ebrahimi ((temp = ((unsigned(unbase64[src[0]]) << 18) |
134*a62be085SSadaf Ebrahimi (unsigned(unbase64[src[1]]) << 12) |
135*a62be085SSadaf Ebrahimi (unsigned(unbase64[src[2]]) << 6) |
136*a62be085SSadaf Ebrahimi (unsigned(unbase64[src[3]])))) &
137*a62be085SSadaf Ebrahimi 0x80000000)) {
138*a62be085SSadaf Ebrahimi GET_INPUT(first_no_dest, 4);
139*a62be085SSadaf Ebrahimi GET_INPUT(second_no_dest, 3);
140*a62be085SSadaf Ebrahimi GET_INPUT(third_no_dest, 2);
141*a62be085SSadaf Ebrahimi GET_INPUT(fourth_no_dest, 1);
142*a62be085SSadaf Ebrahimi } else {
143*a62be085SSadaf Ebrahimi szsrc -= 4;
144*a62be085SSadaf Ebrahimi src += 4;
145*a62be085SSadaf Ebrahimi decode = -1;
146*a62be085SSadaf Ebrahimi ch = '\0';
147*a62be085SSadaf Ebrahimi }
148*a62be085SSadaf Ebrahimi destidx += 3;
149*a62be085SSadaf Ebrahimi }
150*a62be085SSadaf Ebrahimi }
151*a62be085SSadaf Ebrahimi
152*a62be085SSadaf Ebrahimi #undef GET_INPUT
153*a62be085SSadaf Ebrahimi
154*a62be085SSadaf Ebrahimi // if the loop terminated because we read a bad character, return
155*a62be085SSadaf Ebrahimi // now.
156*a62be085SSadaf Ebrahimi if (decode < 0 && ch != '\0' && ch != kPad64Equals && ch != kPad64Dot &&
157*a62be085SSadaf Ebrahimi !ascii_isspace(ch))
158*a62be085SSadaf Ebrahimi return false;
159*a62be085SSadaf Ebrahimi
160*a62be085SSadaf Ebrahimi if (ch == kPad64Equals || ch == kPad64Dot) {
161*a62be085SSadaf Ebrahimi // if we stopped by hitting an '=' or '.', un-read that character -- we'll
162*a62be085SSadaf Ebrahimi // look at it again when we count to check for the proper number of
163*a62be085SSadaf Ebrahimi // equals signs at the end.
164*a62be085SSadaf Ebrahimi ++szsrc;
165*a62be085SSadaf Ebrahimi --src;
166*a62be085SSadaf Ebrahimi } else {
167*a62be085SSadaf Ebrahimi // This loop consumes 1 input byte per iteration. It's used to
168*a62be085SSadaf Ebrahimi // clean up the 0-3 input bytes remaining when the first, faster
169*a62be085SSadaf Ebrahimi // loop finishes. 'temp' contains the data from 'state' input
170*a62be085SSadaf Ebrahimi // characters read by the first loop.
171*a62be085SSadaf Ebrahimi while (szsrc > 0) {
172*a62be085SSadaf Ebrahimi --szsrc;
173*a62be085SSadaf Ebrahimi ch = *src++;
174*a62be085SSadaf Ebrahimi decode = unbase64[ch];
175*a62be085SSadaf Ebrahimi if (decode < 0) {
176*a62be085SSadaf Ebrahimi if (ascii_isspace(ch)) {
177*a62be085SSadaf Ebrahimi continue;
178*a62be085SSadaf Ebrahimi } else if (ch == '\0') {
179*a62be085SSadaf Ebrahimi break;
180*a62be085SSadaf Ebrahimi } else if (ch == kPad64Equals || ch == kPad64Dot) {
181*a62be085SSadaf Ebrahimi // back up one character; we'll read it again when we check
182*a62be085SSadaf Ebrahimi // for the correct number of pad characters at the end.
183*a62be085SSadaf Ebrahimi ++szsrc;
184*a62be085SSadaf Ebrahimi --src;
185*a62be085SSadaf Ebrahimi break;
186*a62be085SSadaf Ebrahimi } else {
187*a62be085SSadaf Ebrahimi return false;
188*a62be085SSadaf Ebrahimi }
189*a62be085SSadaf Ebrahimi }
190*a62be085SSadaf Ebrahimi
191*a62be085SSadaf Ebrahimi // Each input character gives us six bits of output.
192*a62be085SSadaf Ebrahimi temp = (temp << 6) | decode;
193*a62be085SSadaf Ebrahimi ++state;
194*a62be085SSadaf Ebrahimi if (state == 4) {
195*a62be085SSadaf Ebrahimi // If we've accumulated 24 bits of output, write that out as
196*a62be085SSadaf Ebrahimi // three bytes.
197*a62be085SSadaf Ebrahimi if (dest) {
198*a62be085SSadaf Ebrahimi if (destidx + 3 > szdest) return false;
199*a62be085SSadaf Ebrahimi dest[destidx + 2] = temp;
200*a62be085SSadaf Ebrahimi temp >>= 8;
201*a62be085SSadaf Ebrahimi dest[destidx + 1] = temp;
202*a62be085SSadaf Ebrahimi temp >>= 8;
203*a62be085SSadaf Ebrahimi dest[destidx] = temp;
204*a62be085SSadaf Ebrahimi }
205*a62be085SSadaf Ebrahimi destidx += 3;
206*a62be085SSadaf Ebrahimi state = 0;
207*a62be085SSadaf Ebrahimi temp = 0;
208*a62be085SSadaf Ebrahimi }
209*a62be085SSadaf Ebrahimi }
210*a62be085SSadaf Ebrahimi }
211*a62be085SSadaf Ebrahimi
212*a62be085SSadaf Ebrahimi // Process the leftover data contained in 'temp' at the end of the input.
213*a62be085SSadaf Ebrahimi int expected_equals = 0;
214*a62be085SSadaf Ebrahimi switch (state) {
215*a62be085SSadaf Ebrahimi case 0:
216*a62be085SSadaf Ebrahimi // Nothing left over; output is a multiple of 3 bytes.
217*a62be085SSadaf Ebrahimi break;
218*a62be085SSadaf Ebrahimi
219*a62be085SSadaf Ebrahimi case 1:
220*a62be085SSadaf Ebrahimi // Bad input; we have 6 bits left over.
221*a62be085SSadaf Ebrahimi return false;
222*a62be085SSadaf Ebrahimi
223*a62be085SSadaf Ebrahimi case 2:
224*a62be085SSadaf Ebrahimi // Produce one more output byte from the 12 input bits we have left.
225*a62be085SSadaf Ebrahimi if (dest) {
226*a62be085SSadaf Ebrahimi if (destidx + 1 > szdest) return false;
227*a62be085SSadaf Ebrahimi temp >>= 4;
228*a62be085SSadaf Ebrahimi dest[destidx] = temp;
229*a62be085SSadaf Ebrahimi }
230*a62be085SSadaf Ebrahimi ++destidx;
231*a62be085SSadaf Ebrahimi expected_equals = 2;
232*a62be085SSadaf Ebrahimi break;
233*a62be085SSadaf Ebrahimi
234*a62be085SSadaf Ebrahimi case 3:
235*a62be085SSadaf Ebrahimi // Produce two more output bytes from the 18 input bits we have left.
236*a62be085SSadaf Ebrahimi if (dest) {
237*a62be085SSadaf Ebrahimi if (destidx + 2 > szdest) return false;
238*a62be085SSadaf Ebrahimi temp >>= 2;
239*a62be085SSadaf Ebrahimi dest[destidx + 1] = temp;
240*a62be085SSadaf Ebrahimi temp >>= 8;
241*a62be085SSadaf Ebrahimi dest[destidx] = temp;
242*a62be085SSadaf Ebrahimi }
243*a62be085SSadaf Ebrahimi destidx += 2;
244*a62be085SSadaf Ebrahimi expected_equals = 1;
245*a62be085SSadaf Ebrahimi break;
246*a62be085SSadaf Ebrahimi
247*a62be085SSadaf Ebrahimi default:
248*a62be085SSadaf Ebrahimi // state should have no other values at this point.
249*a62be085SSadaf Ebrahimi LOG(FATAL) << "This can't happen; base64 decoder state = " << state;
250*a62be085SSadaf Ebrahimi }
251*a62be085SSadaf Ebrahimi
252*a62be085SSadaf Ebrahimi // The remainder of the string should be all whitespace, mixed with
253*a62be085SSadaf Ebrahimi // exactly 0 equals signs, or exactly 'expected_equals' equals
254*a62be085SSadaf Ebrahimi // signs. (Always accepting 0 equals signs is a google extension
255*a62be085SSadaf Ebrahimi // not covered in the RFC, as is accepting dot as the pad character.)
256*a62be085SSadaf Ebrahimi
257*a62be085SSadaf Ebrahimi int equals = 0;
258*a62be085SSadaf Ebrahimi while (szsrc > 0 && *src) {
259*a62be085SSadaf Ebrahimi if (*src == kPad64Equals || *src == kPad64Dot)
260*a62be085SSadaf Ebrahimi ++equals;
261*a62be085SSadaf Ebrahimi else if (!ascii_isspace(*src))
262*a62be085SSadaf Ebrahimi return false;
263*a62be085SSadaf Ebrahimi --szsrc;
264*a62be085SSadaf Ebrahimi ++src;
265*a62be085SSadaf Ebrahimi }
266*a62be085SSadaf Ebrahimi
267*a62be085SSadaf Ebrahimi const bool ok = (equals == 0 || equals == expected_equals);
268*a62be085SSadaf Ebrahimi if (ok) *len = destidx;
269*a62be085SSadaf Ebrahimi return ok;
270*a62be085SSadaf Ebrahimi }
271*a62be085SSadaf Ebrahimi
272*a62be085SSadaf Ebrahimi // The arrays below were generated by the following code
273*a62be085SSadaf Ebrahimi // #include <sys/time.h>
274*a62be085SSadaf Ebrahimi // #include <stdlib.h>
275*a62be085SSadaf Ebrahimi // #include <string.h>
276*a62be085SSadaf Ebrahimi // main()
277*a62be085SSadaf Ebrahimi // {
278*a62be085SSadaf Ebrahimi // static const char Base64[] =
279*a62be085SSadaf Ebrahimi // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
280*a62be085SSadaf Ebrahimi // char* pos;
281*a62be085SSadaf Ebrahimi // int idx, i, j;
282*a62be085SSadaf Ebrahimi // printf(" ");
283*a62be085SSadaf Ebrahimi // for (i = 0; i < 255; i += 8) {
284*a62be085SSadaf Ebrahimi // for (j = i; j < i + 8; j++) {
285*a62be085SSadaf Ebrahimi // pos = strchr(Base64, j);
286*a62be085SSadaf Ebrahimi // if ((pos == NULL) || (j == 0))
287*a62be085SSadaf Ebrahimi // idx = -1;
288*a62be085SSadaf Ebrahimi // else
289*a62be085SSadaf Ebrahimi // idx = pos - Base64;
290*a62be085SSadaf Ebrahimi // if (idx == -1)
291*a62be085SSadaf Ebrahimi // printf(" %2d, ", idx);
292*a62be085SSadaf Ebrahimi // else
293*a62be085SSadaf Ebrahimi // printf(" %2d/*%c*/,", idx, j);
294*a62be085SSadaf Ebrahimi // }
295*a62be085SSadaf Ebrahimi // printf("\n ");
296*a62be085SSadaf Ebrahimi // }
297*a62be085SSadaf Ebrahimi // }
298*a62be085SSadaf Ebrahimi //
299*a62be085SSadaf Ebrahimi // where the value of "Base64[]" was replaced by one of the base-64 conversion
300*a62be085SSadaf Ebrahimi // tables from the functions below.
301*a62be085SSadaf Ebrahimi static const signed char kUnBase64[] = {
302*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
303*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
304*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
305*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
306*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
307*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
308*a62be085SSadaf Ebrahimi -1, 62 /*+*/, -1, -1, -1, 63 /*/ */, 52 /*0*/,
309*a62be085SSadaf Ebrahimi 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
310*a62be085SSadaf Ebrahimi 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
311*a62be085SSadaf Ebrahimi -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
312*a62be085SSadaf Ebrahimi 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
313*a62be085SSadaf Ebrahimi 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
314*a62be085SSadaf Ebrahimi 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
315*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, 26 /*a*/,
316*a62be085SSadaf Ebrahimi 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
317*a62be085SSadaf Ebrahimi 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
318*a62be085SSadaf Ebrahimi 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
319*a62be085SSadaf Ebrahimi 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
320*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
321*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
322*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
323*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
324*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
325*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
326*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
327*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
328*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
329*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
330*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
331*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
332*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
333*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
334*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
335*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
336*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
337*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
338*a62be085SSadaf Ebrahimi -1, -1, -1, -1};
339*a62be085SSadaf Ebrahimi static const signed char kUnWebSafeBase64[] = {
340*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
341*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
342*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
343*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
344*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
345*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
346*a62be085SSadaf Ebrahimi -1, -1, -1, 62 /*-*/, -1, -1, 52 /*0*/,
347*a62be085SSadaf Ebrahimi 53 /*1*/, 54 /*2*/, 55 /*3*/, 56 /*4*/, 57 /*5*/, 58 /*6*/, 59 /*7*/,
348*a62be085SSadaf Ebrahimi 60 /*8*/, 61 /*9*/, -1, -1, -1, -1, -1,
349*a62be085SSadaf Ebrahimi -1, -1, 0 /*A*/, 1 /*B*/, 2 /*C*/, 3 /*D*/, 4 /*E*/,
350*a62be085SSadaf Ebrahimi 5 /*F*/, 6 /*G*/, 07 /*H*/, 8 /*I*/, 9 /*J*/, 10 /*K*/, 11 /*L*/,
351*a62be085SSadaf Ebrahimi 12 /*M*/, 13 /*N*/, 14 /*O*/, 15 /*P*/, 16 /*Q*/, 17 /*R*/, 18 /*S*/,
352*a62be085SSadaf Ebrahimi 19 /*T*/, 20 /*U*/, 21 /*V*/, 22 /*W*/, 23 /*X*/, 24 /*Y*/, 25 /*Z*/,
353*a62be085SSadaf Ebrahimi -1, -1, -1, -1, 63 /*_*/, -1, 26 /*a*/,
354*a62be085SSadaf Ebrahimi 27 /*b*/, 28 /*c*/, 29 /*d*/, 30 /*e*/, 31 /*f*/, 32 /*g*/, 33 /*h*/,
355*a62be085SSadaf Ebrahimi 34 /*i*/, 35 /*j*/, 36 /*k*/, 37 /*l*/, 38 /*m*/, 39 /*n*/, 40 /*o*/,
356*a62be085SSadaf Ebrahimi 41 /*p*/, 42 /*q*/, 43 /*r*/, 44 /*s*/, 45 /*t*/, 46 /*u*/, 47 /*v*/,
357*a62be085SSadaf Ebrahimi 48 /*w*/, 49 /*x*/, 50 /*y*/, 51 /*z*/, -1, -1, -1,
358*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
359*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
360*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
361*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
362*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
363*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
364*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
365*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
366*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
367*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
368*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
369*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
370*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
371*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
372*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
373*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
374*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
375*a62be085SSadaf Ebrahimi -1, -1, -1, -1, -1, -1, -1,
376*a62be085SSadaf Ebrahimi -1, -1, -1, -1};
377*a62be085SSadaf Ebrahimi
Base64UnescapeInternal(const char * src,size_t slen,string * dest,const signed char * unbase64)378*a62be085SSadaf Ebrahimi static bool Base64UnescapeInternal(const char* src, size_t slen, string* dest,
379*a62be085SSadaf Ebrahimi const signed char* unbase64) {
380*a62be085SSadaf Ebrahimi // Determine the size of the output string. Base64 encodes every 3 bytes into
381*a62be085SSadaf Ebrahimi // 4 characters. any leftover chars are added directly for good measure.
382*a62be085SSadaf Ebrahimi // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
383*a62be085SSadaf Ebrahimi const size_t dest_len = 3 * (slen / 4) + (slen % 4);
384*a62be085SSadaf Ebrahimi
385*a62be085SSadaf Ebrahimi dest->resize(dest_len);
386*a62be085SSadaf Ebrahimi
387*a62be085SSadaf Ebrahimi // We are getting the destination buffer by getting the beginning of the
388*a62be085SSadaf Ebrahimi // string and converting it into a char *.
389*a62be085SSadaf Ebrahimi size_t len;
390*a62be085SSadaf Ebrahimi const bool ok =
391*a62be085SSadaf Ebrahimi Base64UnescapeInternal(src, slen, dest->empty() ? NULL : &*dest->begin(),
392*a62be085SSadaf Ebrahimi dest_len, unbase64, &len);
393*a62be085SSadaf Ebrahimi if (!ok) {
394*a62be085SSadaf Ebrahimi dest->clear();
395*a62be085SSadaf Ebrahimi return false;
396*a62be085SSadaf Ebrahimi }
397*a62be085SSadaf Ebrahimi
398*a62be085SSadaf Ebrahimi // could be shorter if there was padding
399*a62be085SSadaf Ebrahimi DCHECK_LE(len, dest_len);
400*a62be085SSadaf Ebrahimi dest->erase(len);
401*a62be085SSadaf Ebrahimi
402*a62be085SSadaf Ebrahimi return true;
403*a62be085SSadaf Ebrahimi }
404*a62be085SSadaf Ebrahimi
Base64Unescape(const string & src,string * dest)405*a62be085SSadaf Ebrahimi bool Base64Unescape(const string& src, string* dest) {
406*a62be085SSadaf Ebrahimi return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
407*a62be085SSadaf Ebrahimi }
408*a62be085SSadaf Ebrahimi
WebSafeBase64Unescape(const string & src,string * dest)409*a62be085SSadaf Ebrahimi bool WebSafeBase64Unescape(const string& src, string* dest) {
410*a62be085SSadaf Ebrahimi return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
411*a62be085SSadaf Ebrahimi }
412*a62be085SSadaf Ebrahimi
413*a62be085SSadaf Ebrahimi // Base64Escape
414*a62be085SSadaf Ebrahimi //
415*a62be085SSadaf Ebrahimi // NOTE: We have to use an unsigned type for src because code built
416*a62be085SSadaf Ebrahimi // in the the /google tree treats characters as signed unless
417*a62be085SSadaf Ebrahimi // otherwised specified.
Base64EscapeInternal(const unsigned char * src,int szsrc,char * dest,int szdest,const char * base64,bool do_padding)418*a62be085SSadaf Ebrahimi int Base64EscapeInternal(const unsigned char* src, int szsrc, char* dest,
419*a62be085SSadaf Ebrahimi int szdest, const char* base64, bool do_padding) {
420*a62be085SSadaf Ebrahimi static const char kPad64 = '=';
421*a62be085SSadaf Ebrahimi
422*a62be085SSadaf Ebrahimi if (szsrc <= 0) return 0;
423*a62be085SSadaf Ebrahimi
424*a62be085SSadaf Ebrahimi char* cur_dest = dest;
425*a62be085SSadaf Ebrahimi const unsigned char* cur_src = src;
426*a62be085SSadaf Ebrahimi
427*a62be085SSadaf Ebrahimi // Three bytes of data encodes to four characters of cyphertext.
428*a62be085SSadaf Ebrahimi // So we can pump through three-byte chunks atomically.
429*a62be085SSadaf Ebrahimi while (szsrc > 2) { /* keep going until we have less than 24 bits */
430*a62be085SSadaf Ebrahimi if ((szdest -= 4) < 0) return 0;
431*a62be085SSadaf Ebrahimi cur_dest[0] = base64[cur_src[0] >> 2];
432*a62be085SSadaf Ebrahimi cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];
433*a62be085SSadaf Ebrahimi cur_dest[2] = base64[((cur_src[1] & 0x0f) << 2) + (cur_src[2] >> 6)];
434*a62be085SSadaf Ebrahimi cur_dest[3] = base64[cur_src[2] & 0x3f];
435*a62be085SSadaf Ebrahimi
436*a62be085SSadaf Ebrahimi cur_dest += 4;
437*a62be085SSadaf Ebrahimi cur_src += 3;
438*a62be085SSadaf Ebrahimi szsrc -= 3;
439*a62be085SSadaf Ebrahimi }
440*a62be085SSadaf Ebrahimi
441*a62be085SSadaf Ebrahimi /* now deal with the tail (<=2 bytes) */
442*a62be085SSadaf Ebrahimi switch (szsrc) {
443*a62be085SSadaf Ebrahimi case 0:
444*a62be085SSadaf Ebrahimi // Nothing left; nothing more to do.
445*a62be085SSadaf Ebrahimi break;
446*a62be085SSadaf Ebrahimi case 1:
447*a62be085SSadaf Ebrahimi // One byte left: this encodes to two characters, and (optionally)
448*a62be085SSadaf Ebrahimi // two pad characters to round out the four-character cypherblock.
449*a62be085SSadaf Ebrahimi if ((szdest -= 2) < 0) return 0;
450*a62be085SSadaf Ebrahimi cur_dest[0] = base64[cur_src[0] >> 2];
451*a62be085SSadaf Ebrahimi cur_dest[1] = base64[(cur_src[0] & 0x03) << 4];
452*a62be085SSadaf Ebrahimi cur_dest += 2;
453*a62be085SSadaf Ebrahimi if (do_padding) {
454*a62be085SSadaf Ebrahimi if ((szdest -= 2) < 0) return 0;
455*a62be085SSadaf Ebrahimi cur_dest[0] = kPad64;
456*a62be085SSadaf Ebrahimi cur_dest[1] = kPad64;
457*a62be085SSadaf Ebrahimi cur_dest += 2;
458*a62be085SSadaf Ebrahimi }
459*a62be085SSadaf Ebrahimi break;
460*a62be085SSadaf Ebrahimi case 2:
461*a62be085SSadaf Ebrahimi // Two bytes left: this encodes to three characters, and (optionally)
462*a62be085SSadaf Ebrahimi // one pad character to round out the four-character cypherblock.
463*a62be085SSadaf Ebrahimi if ((szdest -= 3) < 0) return 0;
464*a62be085SSadaf Ebrahimi cur_dest[0] = base64[cur_src[0] >> 2];
465*a62be085SSadaf Ebrahimi cur_dest[1] = base64[((cur_src[0] & 0x03) << 4) + (cur_src[1] >> 4)];
466*a62be085SSadaf Ebrahimi cur_dest[2] = base64[(cur_src[1] & 0x0f) << 2];
467*a62be085SSadaf Ebrahimi cur_dest += 3;
468*a62be085SSadaf Ebrahimi if (do_padding) {
469*a62be085SSadaf Ebrahimi if ((szdest -= 1) < 0) return 0;
470*a62be085SSadaf Ebrahimi cur_dest[0] = kPad64;
471*a62be085SSadaf Ebrahimi cur_dest += 1;
472*a62be085SSadaf Ebrahimi }
473*a62be085SSadaf Ebrahimi break;
474*a62be085SSadaf Ebrahimi default:
475*a62be085SSadaf Ebrahimi // Should not be reached: blocks of 3 bytes are handled
476*a62be085SSadaf Ebrahimi // in the while loop before this switch statement.
477*a62be085SSadaf Ebrahimi CHECK(false) << "Logic problem? szsrc = " << szsrc;
478*a62be085SSadaf Ebrahimi break;
479*a62be085SSadaf Ebrahimi }
480*a62be085SSadaf Ebrahimi return (cur_dest - dest);
481*a62be085SSadaf Ebrahimi }
482*a62be085SSadaf Ebrahimi
483*a62be085SSadaf Ebrahimi static const char kBase64Chars[] =
484*a62be085SSadaf Ebrahimi "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
485*a62be085SSadaf Ebrahimi
486*a62be085SSadaf Ebrahimi // Digit conversion.
487*a62be085SSadaf Ebrahimi static const char kHexTable[513] =
488*a62be085SSadaf Ebrahimi "000102030405060708090a0b0c0d0e0f"
489*a62be085SSadaf Ebrahimi "101112131415161718191a1b1c1d1e1f"
490*a62be085SSadaf Ebrahimi "202122232425262728292a2b2c2d2e2f"
491*a62be085SSadaf Ebrahimi "303132333435363738393a3b3c3d3e3f"
492*a62be085SSadaf Ebrahimi "404142434445464748494a4b4c4d4e4f"
493*a62be085SSadaf Ebrahimi "505152535455565758595a5b5c5d5e5f"
494*a62be085SSadaf Ebrahimi "606162636465666768696a6b6c6d6e6f"
495*a62be085SSadaf Ebrahimi "707172737475767778797a7b7c7d7e7f"
496*a62be085SSadaf Ebrahimi "808182838485868788898a8b8c8d8e8f"
497*a62be085SSadaf Ebrahimi "909192939495969798999a9b9c9d9e9f"
498*a62be085SSadaf Ebrahimi "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
499*a62be085SSadaf Ebrahimi "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
500*a62be085SSadaf Ebrahimi "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
501*a62be085SSadaf Ebrahimi "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
502*a62be085SSadaf Ebrahimi "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
503*a62be085SSadaf Ebrahimi "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
504*a62be085SSadaf Ebrahimi
CalculateBase64EscapedLenInternal(size_t input_len,bool do_padding)505*a62be085SSadaf Ebrahimi size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
506*a62be085SSadaf Ebrahimi // Base64 encodes three bytes of input at a time. If the input is not
507*a62be085SSadaf Ebrahimi // divisible by three, we pad as appropriate.
508*a62be085SSadaf Ebrahimi //
509*a62be085SSadaf Ebrahimi // (from http://tools.ietf.org/html/rfc3548)
510*a62be085SSadaf Ebrahimi // Special processing is performed if fewer than 24 bits are available
511*a62be085SSadaf Ebrahimi // at the end of the data being encoded. A full encoding quantum is
512*a62be085SSadaf Ebrahimi // always completed at the end of a quantity. When fewer than 24 input
513*a62be085SSadaf Ebrahimi // bits are available in an input group, zero bits are added (on the
514*a62be085SSadaf Ebrahimi // right) to form an integral number of 6-bit groups. Padding at the
515*a62be085SSadaf Ebrahimi // end of the data is performed using the '=' character. Since all base
516*a62be085SSadaf Ebrahimi // 64 input is an integral number of octets, only the following cases
517*a62be085SSadaf Ebrahimi // can arise:
518*a62be085SSadaf Ebrahimi
519*a62be085SSadaf Ebrahimi // Base64 encodes each three bytes of input into four bytes of output.
520*a62be085SSadaf Ebrahimi size_t len = (input_len / 3) * 4;
521*a62be085SSadaf Ebrahimi
522*a62be085SSadaf Ebrahimi if (input_len % 3 == 0) {
523*a62be085SSadaf Ebrahimi // (from http://tools.ietf.org/html/rfc3548)
524*a62be085SSadaf Ebrahimi // (1) the final quantum of encoding input is an integral multiple of 24
525*a62be085SSadaf Ebrahimi // bits; here, the final unit of encoded output will be an integral
526*a62be085SSadaf Ebrahimi // multiple of 4 characters with no "=" padding,
527*a62be085SSadaf Ebrahimi } else if (input_len % 3 == 1) {
528*a62be085SSadaf Ebrahimi // (from http://tools.ietf.org/html/rfc3548)
529*a62be085SSadaf Ebrahimi // (2) the final quantum of encoding input is exactly 8 bits; here, the
530*a62be085SSadaf Ebrahimi // final unit of encoded output will be two characters followed by two
531*a62be085SSadaf Ebrahimi // "=" padding characters, or
532*a62be085SSadaf Ebrahimi len += 2;
533*a62be085SSadaf Ebrahimi if (do_padding) {
534*a62be085SSadaf Ebrahimi len += 2;
535*a62be085SSadaf Ebrahimi }
536*a62be085SSadaf Ebrahimi } else { // (input_len % 3 == 2)
537*a62be085SSadaf Ebrahimi // (from http://tools.ietf.org/html/rfc3548)
538*a62be085SSadaf Ebrahimi // (3) the final quantum of encoding input is exactly 16 bits; here, the
539*a62be085SSadaf Ebrahimi // final unit of encoded output will be three characters followed by one
540*a62be085SSadaf Ebrahimi // "=" padding character.
541*a62be085SSadaf Ebrahimi len += 3;
542*a62be085SSadaf Ebrahimi if (do_padding) {
543*a62be085SSadaf Ebrahimi len += 1;
544*a62be085SSadaf Ebrahimi }
545*a62be085SSadaf Ebrahimi }
546*a62be085SSadaf Ebrahimi
547*a62be085SSadaf Ebrahimi assert(len >= input_len); // make sure we didn't overflow
548*a62be085SSadaf Ebrahimi return len;
549*a62be085SSadaf Ebrahimi }
550*a62be085SSadaf Ebrahimi
Base64EscapeInternal(const unsigned char * src,size_t szsrc,string * dest,bool do_padding,const char * base64_chars)551*a62be085SSadaf Ebrahimi void Base64EscapeInternal(const unsigned char* src, size_t szsrc, string* dest,
552*a62be085SSadaf Ebrahimi bool do_padding, const char* base64_chars) {
553*a62be085SSadaf Ebrahimi const size_t calc_escaped_size =
554*a62be085SSadaf Ebrahimi CalculateBase64EscapedLenInternal(szsrc, do_padding);
555*a62be085SSadaf Ebrahimi dest->resize(calc_escaped_size);
556*a62be085SSadaf Ebrahimi const int escaped_len = Base64EscapeInternal(
557*a62be085SSadaf Ebrahimi src, static_cast<int>(szsrc), dest->empty() ? NULL : &*dest->begin(),
558*a62be085SSadaf Ebrahimi static_cast<int>(dest->size()), base64_chars, do_padding);
559*a62be085SSadaf Ebrahimi DCHECK_EQ(calc_escaped_size, escaped_len);
560*a62be085SSadaf Ebrahimi dest->erase(escaped_len);
561*a62be085SSadaf Ebrahimi }
562*a62be085SSadaf Ebrahimi
Base64Escape(const unsigned char * src,ptrdiff_t szsrc,string * dest,bool do_padding)563*a62be085SSadaf Ebrahimi void Base64Escape(const unsigned char* src, ptrdiff_t szsrc, string* dest,
564*a62be085SSadaf Ebrahimi bool do_padding) {
565*a62be085SSadaf Ebrahimi if (szsrc < 0) return;
566*a62be085SSadaf Ebrahimi Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars);
567*a62be085SSadaf Ebrahimi }
568*a62be085SSadaf Ebrahimi
569*a62be085SSadaf Ebrahimi // This is a templated function so that T can be either a char* or a string.
570*a62be085SSadaf Ebrahimi template <typename T>
b2a_hex_t(const unsigned char * src,T dest,ptrdiff_t num)571*a62be085SSadaf Ebrahimi static void b2a_hex_t(const unsigned char* src, T dest, ptrdiff_t num) {
572*a62be085SSadaf Ebrahimi auto dest_ptr = &dest[0];
573*a62be085SSadaf Ebrahimi for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) {
574*a62be085SSadaf Ebrahimi const char* hex_p = &kHexTable[*src_ptr * 2];
575*a62be085SSadaf Ebrahimi std::copy(hex_p, hex_p + 2, dest_ptr);
576*a62be085SSadaf Ebrahimi }
577*a62be085SSadaf Ebrahimi }
578*a62be085SSadaf Ebrahimi
b2a_hex(const char * b,ptrdiff_t len)579*a62be085SSadaf Ebrahimi string b2a_hex(const char* b, ptrdiff_t len) {
580*a62be085SSadaf Ebrahimi string result;
581*a62be085SSadaf Ebrahimi result.resize(len << 1);
582*a62be085SSadaf Ebrahimi b2a_hex_t<string&>(reinterpret_cast<const unsigned char*>(b), result, len);
583*a62be085SSadaf Ebrahimi return result;
584*a62be085SSadaf Ebrahimi }
585*a62be085SSadaf Ebrahimi
586*a62be085SSadaf Ebrahimi } // namespace dynamic_depth
587