1*9356374aSAndroid Build Coastguard Worker // Copyright 2020 The Abseil Authors.
2*9356374aSAndroid Build Coastguard Worker //
3*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*9356374aSAndroid Build Coastguard Worker //
7*9356374aSAndroid Build Coastguard Worker // https://www.apache.org/licenses/LICENSE-2.0
8*9356374aSAndroid Build Coastguard Worker //
9*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*9356374aSAndroid Build Coastguard Worker // limitations under the License.
14*9356374aSAndroid Build Coastguard Worker
15*9356374aSAndroid Build Coastguard Worker #include "absl/strings/internal/escaping.h"
16*9356374aSAndroid Build Coastguard Worker
17*9356374aSAndroid Build Coastguard Worker #include <limits>
18*9356374aSAndroid Build Coastguard Worker
19*9356374aSAndroid Build Coastguard Worker #include "absl/base/internal/endian.h"
20*9356374aSAndroid Build Coastguard Worker #include "absl/base/internal/raw_logging.h"
21*9356374aSAndroid Build Coastguard Worker
22*9356374aSAndroid Build Coastguard Worker namespace absl {
23*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_BEGIN
24*9356374aSAndroid Build Coastguard Worker namespace strings_internal {
25*9356374aSAndroid Build Coastguard Worker
26*9356374aSAndroid Build Coastguard Worker // The two strings below provide maps from normal 6-bit characters to their
27*9356374aSAndroid Build Coastguard Worker // base64-escaped equivalent.
28*9356374aSAndroid Build Coastguard Worker // For the inverse case, see kUn(WebSafe)Base64 in the external
29*9356374aSAndroid Build Coastguard Worker // escaping.cc.
30*9356374aSAndroid Build Coastguard Worker ABSL_CONST_INIT const char kBase64Chars[] =
31*9356374aSAndroid Build Coastguard Worker "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
32*9356374aSAndroid Build Coastguard Worker
33*9356374aSAndroid Build Coastguard Worker ABSL_CONST_INIT const char kWebSafeBase64Chars[] =
34*9356374aSAndroid Build Coastguard Worker "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
35*9356374aSAndroid Build Coastguard Worker
CalculateBase64EscapedLenInternal(size_t input_len,bool do_padding)36*9356374aSAndroid Build Coastguard Worker size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
37*9356374aSAndroid Build Coastguard Worker // Base64 encodes three bytes of input at a time. If the input is not
38*9356374aSAndroid Build Coastguard Worker // divisible by three, we pad as appropriate.
39*9356374aSAndroid Build Coastguard Worker //
40*9356374aSAndroid Build Coastguard Worker // Base64 encodes each three bytes of input into four bytes of output.
41*9356374aSAndroid Build Coastguard Worker constexpr size_t kMaxSize = (std::numeric_limits<size_t>::max() - 1) / 4 * 3;
42*9356374aSAndroid Build Coastguard Worker ABSL_INTERNAL_CHECK(input_len <= kMaxSize,
43*9356374aSAndroid Build Coastguard Worker "CalculateBase64EscapedLenInternal() overflow");
44*9356374aSAndroid Build Coastguard Worker size_t len = (input_len / 3) * 4;
45*9356374aSAndroid Build Coastguard Worker
46*9356374aSAndroid Build Coastguard Worker // Since all base 64 input is an integral number of octets, only the following
47*9356374aSAndroid Build Coastguard Worker // cases can arise:
48*9356374aSAndroid Build Coastguard Worker if (input_len % 3 == 0) {
49*9356374aSAndroid Build Coastguard Worker // (from https://tools.ietf.org/html/rfc3548)
50*9356374aSAndroid Build Coastguard Worker // (1) the final quantum of encoding input is an integral multiple of 24
51*9356374aSAndroid Build Coastguard Worker // bits; here, the final unit of encoded output will be an integral
52*9356374aSAndroid Build Coastguard Worker // multiple of 4 characters with no "=" padding,
53*9356374aSAndroid Build Coastguard Worker } else if (input_len % 3 == 1) {
54*9356374aSAndroid Build Coastguard Worker // (from https://tools.ietf.org/html/rfc3548)
55*9356374aSAndroid Build Coastguard Worker // (2) the final quantum of encoding input is exactly 8 bits; here, the
56*9356374aSAndroid Build Coastguard Worker // final unit of encoded output will be two characters followed by two
57*9356374aSAndroid Build Coastguard Worker // "=" padding characters, or
58*9356374aSAndroid Build Coastguard Worker len += 2;
59*9356374aSAndroid Build Coastguard Worker if (do_padding) {
60*9356374aSAndroid Build Coastguard Worker len += 2;
61*9356374aSAndroid Build Coastguard Worker }
62*9356374aSAndroid Build Coastguard Worker } else { // (input_len % 3 == 2)
63*9356374aSAndroid Build Coastguard Worker // (from https://tools.ietf.org/html/rfc3548)
64*9356374aSAndroid Build Coastguard Worker // (3) the final quantum of encoding input is exactly 16 bits; here, the
65*9356374aSAndroid Build Coastguard Worker // final unit of encoded output will be three characters followed by one
66*9356374aSAndroid Build Coastguard Worker // "=" padding character.
67*9356374aSAndroid Build Coastguard Worker len += 3;
68*9356374aSAndroid Build Coastguard Worker if (do_padding) {
69*9356374aSAndroid Build Coastguard Worker len += 1;
70*9356374aSAndroid Build Coastguard Worker }
71*9356374aSAndroid Build Coastguard Worker }
72*9356374aSAndroid Build Coastguard Worker
73*9356374aSAndroid Build Coastguard Worker return len;
74*9356374aSAndroid Build Coastguard Worker }
75*9356374aSAndroid Build Coastguard Worker
76*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
77*9356374aSAndroid Build Coastguard Worker // Take the input in groups of 4 characters and turn each
78*9356374aSAndroid Build Coastguard Worker // character into a code 0 to 63 thus:
79*9356374aSAndroid Build Coastguard Worker // A-Z map to 0 to 25
80*9356374aSAndroid Build Coastguard Worker // a-z map to 26 to 51
81*9356374aSAndroid Build Coastguard Worker // 0-9 map to 52 to 61
82*9356374aSAndroid Build Coastguard Worker // +(- for WebSafe) maps to 62
83*9356374aSAndroid Build Coastguard Worker // /(_ for WebSafe) maps to 63
84*9356374aSAndroid Build Coastguard Worker // There will be four numbers, all less than 64 which can be represented
85*9356374aSAndroid Build Coastguard Worker // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
86*9356374aSAndroid Build Coastguard Worker // Arrange the 6 digit binary numbers into three bytes as such:
87*9356374aSAndroid Build Coastguard Worker // aaaaaabb bbbbcccc ccdddddd
88*9356374aSAndroid Build Coastguard Worker // Equals signs (one or two) are used at the end of the encoded block to
89*9356374aSAndroid Build Coastguard Worker // indicate that the text was not an integer multiple of three bytes long.
90*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------
Base64EscapeInternal(const unsigned char * src,size_t szsrc,char * dest,size_t szdest,const char * base64,bool do_padding)91*9356374aSAndroid Build Coastguard Worker size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
92*9356374aSAndroid Build Coastguard Worker size_t szdest, const char* base64,
93*9356374aSAndroid Build Coastguard Worker bool do_padding) {
94*9356374aSAndroid Build Coastguard Worker static const char kPad64 = '=';
95*9356374aSAndroid Build Coastguard Worker
96*9356374aSAndroid Build Coastguard Worker if (szsrc * 4 > szdest * 3) return 0;
97*9356374aSAndroid Build Coastguard Worker
98*9356374aSAndroid Build Coastguard Worker char* cur_dest = dest;
99*9356374aSAndroid Build Coastguard Worker const unsigned char* cur_src = src;
100*9356374aSAndroid Build Coastguard Worker
101*9356374aSAndroid Build Coastguard Worker char* const limit_dest = dest + szdest;
102*9356374aSAndroid Build Coastguard Worker const unsigned char* const limit_src = src + szsrc;
103*9356374aSAndroid Build Coastguard Worker
104*9356374aSAndroid Build Coastguard Worker // (from https://tools.ietf.org/html/rfc3548)
105*9356374aSAndroid Build Coastguard Worker // Special processing is performed if fewer than 24 bits are available
106*9356374aSAndroid Build Coastguard Worker // at the end of the data being encoded. A full encoding quantum is
107*9356374aSAndroid Build Coastguard Worker // always completed at the end of a quantity. When fewer than 24 input
108*9356374aSAndroid Build Coastguard Worker // bits are available in an input group, zero bits are added (on the
109*9356374aSAndroid Build Coastguard Worker // right) to form an integral number of 6-bit groups.
110*9356374aSAndroid Build Coastguard Worker //
111*9356374aSAndroid Build Coastguard Worker // If do_padding is true, padding at the end of the data is performed. This
112*9356374aSAndroid Build Coastguard Worker // output padding uses the '=' character.
113*9356374aSAndroid Build Coastguard Worker
114*9356374aSAndroid Build Coastguard Worker // Three bytes of data encodes to four characters of cyphertext.
115*9356374aSAndroid Build Coastguard Worker // So we can pump through three-byte chunks atomically.
116*9356374aSAndroid Build Coastguard Worker if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
117*9356374aSAndroid Build Coastguard Worker while (cur_src < limit_src - 3) { // While we have >= 32 bits.
118*9356374aSAndroid Build Coastguard Worker uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
119*9356374aSAndroid Build Coastguard Worker
120*9356374aSAndroid Build Coastguard Worker cur_dest[0] = base64[in >> 18];
121*9356374aSAndroid Build Coastguard Worker in &= 0x3FFFF;
122*9356374aSAndroid Build Coastguard Worker cur_dest[1] = base64[in >> 12];
123*9356374aSAndroid Build Coastguard Worker in &= 0xFFF;
124*9356374aSAndroid Build Coastguard Worker cur_dest[2] = base64[in >> 6];
125*9356374aSAndroid Build Coastguard Worker in &= 0x3F;
126*9356374aSAndroid Build Coastguard Worker cur_dest[3] = base64[in];
127*9356374aSAndroid Build Coastguard Worker
128*9356374aSAndroid Build Coastguard Worker cur_dest += 4;
129*9356374aSAndroid Build Coastguard Worker cur_src += 3;
130*9356374aSAndroid Build Coastguard Worker }
131*9356374aSAndroid Build Coastguard Worker }
132*9356374aSAndroid Build Coastguard Worker // To save time, we didn't update szdest or szsrc in the loop. So do it now.
133*9356374aSAndroid Build Coastguard Worker szdest = static_cast<size_t>(limit_dest - cur_dest);
134*9356374aSAndroid Build Coastguard Worker szsrc = static_cast<size_t>(limit_src - cur_src);
135*9356374aSAndroid Build Coastguard Worker
136*9356374aSAndroid Build Coastguard Worker /* now deal with the tail (<=3 bytes) */
137*9356374aSAndroid Build Coastguard Worker switch (szsrc) {
138*9356374aSAndroid Build Coastguard Worker case 0:
139*9356374aSAndroid Build Coastguard Worker // Nothing left; nothing more to do.
140*9356374aSAndroid Build Coastguard Worker break;
141*9356374aSAndroid Build Coastguard Worker case 1: {
142*9356374aSAndroid Build Coastguard Worker // One byte left: this encodes to two characters, and (optionally)
143*9356374aSAndroid Build Coastguard Worker // two pad characters to round out the four-character cypherblock.
144*9356374aSAndroid Build Coastguard Worker if (szdest < 2) return 0;
145*9356374aSAndroid Build Coastguard Worker uint32_t in = cur_src[0];
146*9356374aSAndroid Build Coastguard Worker cur_dest[0] = base64[in >> 2];
147*9356374aSAndroid Build Coastguard Worker in &= 0x3;
148*9356374aSAndroid Build Coastguard Worker cur_dest[1] = base64[in << 4];
149*9356374aSAndroid Build Coastguard Worker cur_dest += 2;
150*9356374aSAndroid Build Coastguard Worker szdest -= 2;
151*9356374aSAndroid Build Coastguard Worker if (do_padding) {
152*9356374aSAndroid Build Coastguard Worker if (szdest < 2) return 0;
153*9356374aSAndroid Build Coastguard Worker cur_dest[0] = kPad64;
154*9356374aSAndroid Build Coastguard Worker cur_dest[1] = kPad64;
155*9356374aSAndroid Build Coastguard Worker cur_dest += 2;
156*9356374aSAndroid Build Coastguard Worker szdest -= 2;
157*9356374aSAndroid Build Coastguard Worker }
158*9356374aSAndroid Build Coastguard Worker break;
159*9356374aSAndroid Build Coastguard Worker }
160*9356374aSAndroid Build Coastguard Worker case 2: {
161*9356374aSAndroid Build Coastguard Worker // Two bytes left: this encodes to three characters, and (optionally)
162*9356374aSAndroid Build Coastguard Worker // one pad character to round out the four-character cypherblock.
163*9356374aSAndroid Build Coastguard Worker if (szdest < 3) return 0;
164*9356374aSAndroid Build Coastguard Worker uint32_t in = absl::big_endian::Load16(cur_src);
165*9356374aSAndroid Build Coastguard Worker cur_dest[0] = base64[in >> 10];
166*9356374aSAndroid Build Coastguard Worker in &= 0x3FF;
167*9356374aSAndroid Build Coastguard Worker cur_dest[1] = base64[in >> 4];
168*9356374aSAndroid Build Coastguard Worker in &= 0x00F;
169*9356374aSAndroid Build Coastguard Worker cur_dest[2] = base64[in << 2];
170*9356374aSAndroid Build Coastguard Worker cur_dest += 3;
171*9356374aSAndroid Build Coastguard Worker szdest -= 3;
172*9356374aSAndroid Build Coastguard Worker if (do_padding) {
173*9356374aSAndroid Build Coastguard Worker if (szdest < 1) return 0;
174*9356374aSAndroid Build Coastguard Worker cur_dest[0] = kPad64;
175*9356374aSAndroid Build Coastguard Worker cur_dest += 1;
176*9356374aSAndroid Build Coastguard Worker szdest -= 1;
177*9356374aSAndroid Build Coastguard Worker }
178*9356374aSAndroid Build Coastguard Worker break;
179*9356374aSAndroid Build Coastguard Worker }
180*9356374aSAndroid Build Coastguard Worker case 3: {
181*9356374aSAndroid Build Coastguard Worker // Three bytes left: same as in the big loop above. We can't do this in
182*9356374aSAndroid Build Coastguard Worker // the loop because the loop above always reads 4 bytes, and the fourth
183*9356374aSAndroid Build Coastguard Worker // byte is past the end of the input.
184*9356374aSAndroid Build Coastguard Worker if (szdest < 4) return 0;
185*9356374aSAndroid Build Coastguard Worker uint32_t in =
186*9356374aSAndroid Build Coastguard Worker (uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1);
187*9356374aSAndroid Build Coastguard Worker cur_dest[0] = base64[in >> 18];
188*9356374aSAndroid Build Coastguard Worker in &= 0x3FFFF;
189*9356374aSAndroid Build Coastguard Worker cur_dest[1] = base64[in >> 12];
190*9356374aSAndroid Build Coastguard Worker in &= 0xFFF;
191*9356374aSAndroid Build Coastguard Worker cur_dest[2] = base64[in >> 6];
192*9356374aSAndroid Build Coastguard Worker in &= 0x3F;
193*9356374aSAndroid Build Coastguard Worker cur_dest[3] = base64[in];
194*9356374aSAndroid Build Coastguard Worker cur_dest += 4;
195*9356374aSAndroid Build Coastguard Worker szdest -= 4;
196*9356374aSAndroid Build Coastguard Worker break;
197*9356374aSAndroid Build Coastguard Worker }
198*9356374aSAndroid Build Coastguard Worker default:
199*9356374aSAndroid Build Coastguard Worker // Should not be reached: blocks of 4 bytes are handled
200*9356374aSAndroid Build Coastguard Worker // in the while loop before this switch statement.
201*9356374aSAndroid Build Coastguard Worker ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
202*9356374aSAndroid Build Coastguard Worker break;
203*9356374aSAndroid Build Coastguard Worker }
204*9356374aSAndroid Build Coastguard Worker return static_cast<size_t>(cur_dest - dest);
205*9356374aSAndroid Build Coastguard Worker }
206*9356374aSAndroid Build Coastguard Worker
207*9356374aSAndroid Build Coastguard Worker } // namespace strings_internal
208*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_END
209*9356374aSAndroid Build Coastguard Worker } // namespace absl
210