1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/internal/escaping.h"
16
17 #include "absl/base/internal/endian.h"
18 #include "absl/base/internal/raw_logging.h"
19
20 namespace absl {
21 ABSL_NAMESPACE_BEGIN
22 namespace strings_internal {
23
24 ABSL_CONST_INIT const char kBase64Chars[] =
25 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
26
CalculateBase64EscapedLenInternal(size_t input_len,bool do_padding)27 size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
28 // Base64 encodes three bytes of input at a time. If the input is not
29 // divisible by three, we pad as appropriate.
30 //
31 // Base64 encodes each three bytes of input into four bytes of output.
32 size_t len = (input_len / 3) * 4;
33
34 // Since all base 64 input is an integral number of octets, only the following
35 // cases can arise:
36 if (input_len % 3 == 0) {
37 // (from https://tools.ietf.org/html/rfc3548)
38 // (1) the final quantum of encoding input is an integral multiple of 24
39 // bits; here, the final unit of encoded output will be an integral
40 // multiple of 4 characters with no "=" padding,
41 } else if (input_len % 3 == 1) {
42 // (from https://tools.ietf.org/html/rfc3548)
43 // (2) the final quantum of encoding input is exactly 8 bits; here, the
44 // final unit of encoded output will be two characters followed by two
45 // "=" padding characters, or
46 len += 2;
47 if (do_padding) {
48 len += 2;
49 }
50 } else { // (input_len % 3 == 2)
51 // (from https://tools.ietf.org/html/rfc3548)
52 // (3) the final quantum of encoding input is exactly 16 bits; here, the
53 // final unit of encoded output will be three characters followed by one
54 // "=" padding character.
55 len += 3;
56 if (do_padding) {
57 len += 1;
58 }
59 }
60
61 assert(len >= input_len); // make sure we didn't overflow
62 return len;
63 }
64
Base64EscapeInternal(const unsigned char * src,size_t szsrc,char * dest,size_t szdest,const char * base64,bool do_padding)65 size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
66 size_t szdest, const char* base64,
67 bool do_padding) {
68 static const char kPad64 = '=';
69
70 if (szsrc * 4 > szdest * 3) return 0;
71
72 char* cur_dest = dest;
73 const unsigned char* cur_src = src;
74
75 char* const limit_dest = dest + szdest;
76 const unsigned char* const limit_src = src + szsrc;
77
78 // (from https://tools.ietf.org/html/rfc3548)
79 // Special processing is performed if fewer than 24 bits are available
80 // at the end of the data being encoded. A full encoding quantum is
81 // always completed at the end of a quantity. When fewer than 24 input
82 // bits are available in an input group, zero bits are added (on the
83 // right) to form an integral number of 6-bit groups.
84 //
85 // If do_padding is true, padding at the end of the data is performed. This
86 // output padding uses the '=' character.
87
88 // Three bytes of data encodes to four characters of cyphertext.
89 // So we can pump through three-byte chunks atomically.
90 if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
91 while (cur_src < limit_src - 3) { // While we have >= 32 bits.
92 uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
93
94 cur_dest[0] = base64[in >> 18];
95 in &= 0x3FFFF;
96 cur_dest[1] = base64[in >> 12];
97 in &= 0xFFF;
98 cur_dest[2] = base64[in >> 6];
99 in &= 0x3F;
100 cur_dest[3] = base64[in];
101
102 cur_dest += 4;
103 cur_src += 3;
104 }
105 }
106 // To save time, we didn't update szdest or szsrc in the loop. So do it now.
107 szdest = static_cast<size_t>(limit_dest - cur_dest);
108 szsrc = static_cast<size_t>(limit_src - cur_src);
109
110 /* now deal with the tail (<=3 bytes) */
111 switch (szsrc) {
112 case 0:
113 // Nothing left; nothing more to do.
114 break;
115 case 1: {
116 // One byte left: this encodes to two characters, and (optionally)
117 // two pad characters to round out the four-character cypherblock.
118 if (szdest < 2) return 0;
119 uint32_t in = cur_src[0];
120 cur_dest[0] = base64[in >> 2];
121 in &= 0x3;
122 cur_dest[1] = base64[in << 4];
123 cur_dest += 2;
124 szdest -= 2;
125 if (do_padding) {
126 if (szdest < 2) return 0;
127 cur_dest[0] = kPad64;
128 cur_dest[1] = kPad64;
129 cur_dest += 2;
130 szdest -= 2;
131 }
132 break;
133 }
134 case 2: {
135 // Two bytes left: this encodes to three characters, and (optionally)
136 // one pad character to round out the four-character cypherblock.
137 if (szdest < 3) return 0;
138 uint32_t in = absl::big_endian::Load16(cur_src);
139 cur_dest[0] = base64[in >> 10];
140 in &= 0x3FF;
141 cur_dest[1] = base64[in >> 4];
142 in &= 0x00F;
143 cur_dest[2] = base64[in << 2];
144 cur_dest += 3;
145 szdest -= 3;
146 if (do_padding) {
147 if (szdest < 1) return 0;
148 cur_dest[0] = kPad64;
149 cur_dest += 1;
150 szdest -= 1;
151 }
152 break;
153 }
154 case 3: {
155 // Three bytes left: same as in the big loop above. We can't do this in
156 // the loop because the loop above always reads 4 bytes, and the fourth
157 // byte is past the end of the input.
158 if (szdest < 4) return 0;
159 uint32_t in =
160 (uint32_t{cur_src[0]} << 16) + absl::big_endian::Load16(cur_src + 1);
161 cur_dest[0] = base64[in >> 18];
162 in &= 0x3FFFF;
163 cur_dest[1] = base64[in >> 12];
164 in &= 0xFFF;
165 cur_dest[2] = base64[in >> 6];
166 in &= 0x3F;
167 cur_dest[3] = base64[in];
168 cur_dest += 4;
169 szdest -= 4;
170 break;
171 }
172 default:
173 // Should not be reached: blocks of 4 bytes are handled
174 // in the while loop before this switch statement.
175 ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
176 break;
177 }
178 return static_cast<size_t>(cur_dest - dest);
179 }
180
181 } // namespace strings_internal
182 ABSL_NAMESPACE_END
183 } // namespace absl
184