1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 ** each line encodes 45 bytes (except possibly the last)
7 ** First char encodes (binary) length, rest data
8 ** each char encodes 6 bits, as follows:
9 ** binary: 01234567 abcdefgh ijklmnop
10 ** ascii: 012345 67abcd efghij klmnop
11 ** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 ** short binary data is zero-extended (so the bits are always in the
13 ** right place), this does *not* reflect in the length.
14 ** base64:
15 ** Line breaks are insignificant, but lines are at most 76 chars
16 ** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 ** is done via a table.
18 ** Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 ** File starts with introductory text, real data starts and ends
21 ** with colons.
22 ** Data consists of three similar parts: info, datafork, resourcefork.
23 ** Each part is protected (at the end) with a 16-bit crc
24 ** The binary data is run-length encoded, and then ascii-fied:
25 ** binary: 01234567 abcdefgh ijklmnop
26 ** ascii: 012345 67abcd efghij klmnop
27 ** ASCII encoding is table-driven, see the code.
28 ** Short binary data results in the runt ascii-byte being output with
29 ** the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 ** Programs that encode binary data in ASCII are written in
35 ** such a style that they are as unreadable as possible. Devices used
36 ** include unnecessary global variables, burying important tables
37 ** in unrelated sourcefiles, putting functions in include files,
38 ** using seemingly-descriptive variable names for different purposes,
39 ** calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character. It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55
56 #ifndef Py_BUILD_CORE_BUILTIN
57 # define Py_BUILD_CORE_MODULE 1
58 #endif
59
60 #define PY_SSIZE_T_CLEAN
61
62 #include "Python.h"
63 #include "pycore_long.h" // _PyLong_DigitValue
64 #include "pycore_strhex.h" // _Py_strhex_bytes_with_sep()
65 #ifdef USE_ZLIB_CRC32
66 # include "zlib.h"
67 #endif
68
69 typedef struct binascii_state {
70 PyObject *Error;
71 PyObject *Incomplete;
72 } binascii_state;
73
74 static inline binascii_state *
get_binascii_state(PyObject * module)75 get_binascii_state(PyObject *module)
76 {
77 return (binascii_state *)PyModule_GetState(module);
78 }
79
80
81 static const unsigned char table_a2b_base64[] = {
82 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
83 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
84 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
85 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
86 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
87 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
88 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
89 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
90
91 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
92 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
93 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
94 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
95 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
96 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
97 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
98 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
99 };
100
101 #define BASE64_PAD '='
102
103 /* Max binary chunk size; limited only by available memory */
104 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
105
106 static const unsigned char table_b2a_base64[] =
107 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
108
109
110 static const unsigned short crctab_hqx[256] = {
111 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
112 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
113 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
114 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
115 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
116 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
117 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
118 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
119 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
120 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
121 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
122 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
123 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
124 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
125 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
126 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
127 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
128 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
129 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
130 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
131 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
132 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
133 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
134 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
135 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
136 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
137 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
138 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
139 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
140 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
141 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
142 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
143 };
144
145 /*[clinic input]
146 module binascii
147 [clinic start generated code]*/
148 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
149
150 /*[python input]
151
152 class ascii_buffer_converter(CConverter):
153 type = 'Py_buffer'
154 converter = 'ascii_buffer_converter'
155 impl_by_reference = True
156 c_default = "{NULL, NULL}"
157
158 def cleanup(self):
159 name = self.name
160 return "".join(["if (", name, ".obj)\n PyBuffer_Release(&", name, ");\n"])
161
162 [python start generated code]*/
163 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
164
165 static int
ascii_buffer_converter(PyObject * arg,Py_buffer * buf)166 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
167 {
168 if (arg == NULL) {
169 PyBuffer_Release(buf);
170 return 1;
171 }
172 if (PyUnicode_Check(arg)) {
173 if (PyUnicode_READY(arg) < 0)
174 return 0;
175 if (!PyUnicode_IS_ASCII(arg)) {
176 PyErr_SetString(PyExc_ValueError,
177 "string argument should contain only ASCII characters");
178 return 0;
179 }
180 assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
181 buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
182 buf->len = PyUnicode_GET_LENGTH(arg);
183 buf->obj = NULL;
184 return 1;
185 }
186 if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
187 PyErr_Format(PyExc_TypeError,
188 "argument should be bytes, buffer or ASCII string, "
189 "not '%.100s'", Py_TYPE(arg)->tp_name);
190 return 0;
191 }
192 if (!PyBuffer_IsContiguous(buf, 'C')) {
193 PyErr_Format(PyExc_TypeError,
194 "argument should be a contiguous buffer, "
195 "not '%.100s'", Py_TYPE(arg)->tp_name);
196 PyBuffer_Release(buf);
197 return 0;
198 }
199 return Py_CLEANUP_SUPPORTED;
200 }
201
202 #include "clinic/binascii.c.h"
203
204 /*[clinic input]
205 binascii.a2b_uu
206
207 data: ascii_buffer
208 /
209
210 Decode a line of uuencoded data.
211 [clinic start generated code]*/
212
213 static PyObject *
binascii_a2b_uu_impl(PyObject * module,Py_buffer * data)214 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
215 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
216 {
217 const unsigned char *ascii_data;
218 unsigned char *bin_data;
219 int leftbits = 0;
220 unsigned char this_ch;
221 unsigned int leftchar = 0;
222 PyObject *rv;
223 Py_ssize_t ascii_len, bin_len;
224 binascii_state *state;
225
226 ascii_data = data->buf;
227 ascii_len = data->len;
228
229 assert(ascii_len >= 0);
230
231 /* First byte: binary data length (in bytes) */
232 bin_len = (*ascii_data++ - ' ') & 077;
233 ascii_len--;
234
235 /* Allocate the buffer */
236 if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
237 return NULL;
238 bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
239
240 for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
241 /* XXX is it really best to add NULs if there's no more data */
242 this_ch = (ascii_len > 0) ? *ascii_data : 0;
243 if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
244 /*
245 ** Whitespace. Assume some spaces got eaten at
246 ** end-of-line. (We check this later)
247 */
248 this_ch = 0;
249 } else {
250 /* Check the character for legality
251 ** The 64 in stead of the expected 63 is because
252 ** there are a few uuencodes out there that use
253 ** '`' as zero instead of space.
254 */
255 if ( this_ch < ' ' || this_ch > (' ' + 64)) {
256 state = get_binascii_state(module);
257 if (state == NULL) {
258 return NULL;
259 }
260 PyErr_SetString(state->Error, "Illegal char");
261 Py_DECREF(rv);
262 return NULL;
263 }
264 this_ch = (this_ch - ' ') & 077;
265 }
266 /*
267 ** Shift it in on the low end, and see if there's
268 ** a byte ready for output.
269 */
270 leftchar = (leftchar << 6) | (this_ch);
271 leftbits += 6;
272 if ( leftbits >= 8 ) {
273 leftbits -= 8;
274 *bin_data++ = (leftchar >> leftbits) & 0xff;
275 leftchar &= ((1 << leftbits) - 1);
276 bin_len--;
277 }
278 }
279 /*
280 ** Finally, check that if there's anything left on the line
281 ** that it's whitespace only.
282 */
283 while( ascii_len-- > 0 ) {
284 this_ch = *ascii_data++;
285 /* Extra '`' may be written as padding in some cases */
286 if ( this_ch != ' ' && this_ch != ' '+64 &&
287 this_ch != '\n' && this_ch != '\r' ) {
288 state = get_binascii_state(module);
289 if (state == NULL) {
290 return NULL;
291 }
292 PyErr_SetString(state->Error, "Trailing garbage");
293 Py_DECREF(rv);
294 return NULL;
295 }
296 }
297 return rv;
298 }
299
300 /*[clinic input]
301 binascii.b2a_uu
302
303 data: Py_buffer
304 /
305 *
306 backtick: bool(accept={int}) = False
307
308 Uuencode line of data.
309 [clinic start generated code]*/
310
311 static PyObject *
binascii_b2a_uu_impl(PyObject * module,Py_buffer * data,int backtick)312 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
313 /*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
314 {
315 unsigned char *ascii_data;
316 const unsigned char *bin_data;
317 int leftbits = 0;
318 unsigned char this_ch;
319 unsigned int leftchar = 0;
320 binascii_state *state;
321 Py_ssize_t bin_len, out_len;
322 _PyBytesWriter writer;
323
324 _PyBytesWriter_Init(&writer);
325 bin_data = data->buf;
326 bin_len = data->len;
327 if ( bin_len > 45 ) {
328 /* The 45 is a limit that appears in all uuencode's */
329 state = get_binascii_state(module);
330 if (state == NULL) {
331 return NULL;
332 }
333 PyErr_SetString(state->Error, "At most 45 bytes at once");
334 return NULL;
335 }
336
337 /* We're lazy and allocate to much (fixed up later) */
338 out_len = 2 + (bin_len + 2) / 3 * 4;
339 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
340 if (ascii_data == NULL)
341 return NULL;
342
343 /* Store the length */
344 if (backtick && !bin_len)
345 *ascii_data++ = '`';
346 else
347 *ascii_data++ = ' ' + (unsigned char)bin_len;
348
349 for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
350 /* Shift the data (or padding) into our buffer */
351 if ( bin_len > 0 ) /* Data */
352 leftchar = (leftchar << 8) | *bin_data;
353 else /* Padding */
354 leftchar <<= 8;
355 leftbits += 8;
356
357 /* See if there are 6-bit groups ready */
358 while ( leftbits >= 6 ) {
359 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
360 leftbits -= 6;
361 if (backtick && !this_ch)
362 *ascii_data++ = '`';
363 else
364 *ascii_data++ = this_ch + ' ';
365 }
366 }
367 *ascii_data++ = '\n'; /* Append a courtesy newline */
368
369 return _PyBytesWriter_Finish(&writer, ascii_data);
370 }
371
372 /*[clinic input]
373 binascii.a2b_base64
374
375 data: ascii_buffer
376 /
377 *
378 strict_mode: bool(accept={int}) = False
379
380 Decode a line of base64 data.
381
382 strict_mode
383 When set to True, bytes that are not part of the base64 standard are not allowed.
384 The same applies to excess data after padding (= / ==).
385 [clinic start generated code]*/
386
387 static PyObject *
binascii_a2b_base64_impl(PyObject * module,Py_buffer * data,int strict_mode)388 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
389 /*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/
390 {
391 assert(data->len >= 0);
392
393 const unsigned char *ascii_data = data->buf;
394 size_t ascii_len = data->len;
395 binascii_state *state = NULL;
396 char padding_started = 0;
397
398 /* Allocate the buffer */
399 Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
400 _PyBytesWriter writer;
401 _PyBytesWriter_Init(&writer);
402 unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
403 if (bin_data == NULL)
404 return NULL;
405 unsigned char *bin_data_start = bin_data;
406
407 if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
408 state = get_binascii_state(module);
409 if (state) {
410 PyErr_SetString(state->Error, "Leading padding not allowed");
411 }
412 goto error_end;
413 }
414
415 int quad_pos = 0;
416 unsigned char leftchar = 0;
417 int pads = 0;
418 for (size_t i = 0; i < ascii_len; i++) {
419 unsigned char this_ch = ascii_data[i];
420
421 /* Check for pad sequences and ignore
422 ** the invalid ones.
423 */
424 if (this_ch == BASE64_PAD) {
425 padding_started = 1;
426
427 if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
428 /* A pad sequence means we should not parse more input.
429 ** We've already interpreted the data from the quad at this point.
430 ** in strict mode, an error should raise if there's excess data after the padding.
431 */
432 if (strict_mode && i + 1 < ascii_len) {
433 state = get_binascii_state(module);
434 if (state) {
435 PyErr_SetString(state->Error, "Excess data after padding");
436 }
437 goto error_end;
438 }
439
440 goto done;
441 }
442 continue;
443 }
444
445 this_ch = table_a2b_base64[this_ch];
446 if (this_ch >= 64) {
447 if (strict_mode) {
448 state = get_binascii_state(module);
449 if (state) {
450 PyErr_SetString(state->Error, "Only base64 data is allowed");
451 }
452 goto error_end;
453 }
454 continue;
455 }
456
457 // Characters that are not '=', in the middle of the padding, are not allowed
458 if (strict_mode && padding_started) {
459 state = get_binascii_state(module);
460 if (state) {
461 PyErr_SetString(state->Error, "Discontinuous padding not allowed");
462 }
463 goto error_end;
464 }
465 pads = 0;
466
467 switch (quad_pos) {
468 case 0:
469 quad_pos = 1;
470 leftchar = this_ch;
471 break;
472 case 1:
473 quad_pos = 2;
474 *bin_data++ = (leftchar << 2) | (this_ch >> 4);
475 leftchar = this_ch & 0x0f;
476 break;
477 case 2:
478 quad_pos = 3;
479 *bin_data++ = (leftchar << 4) | (this_ch >> 2);
480 leftchar = this_ch & 0x03;
481 break;
482 case 3:
483 quad_pos = 0;
484 *bin_data++ = (leftchar << 6) | (this_ch);
485 leftchar = 0;
486 break;
487 }
488 }
489
490 if (quad_pos != 0) {
491 state = get_binascii_state(module);
492 if (state == NULL) {
493 /* error already set, from get_binascii_state */
494 } else if (quad_pos == 1) {
495 /*
496 ** There is exactly one extra valid, non-padding, base64 character.
497 ** This is an invalid length, as there is no possible input that
498 ** could encoded into such a base64 string.
499 */
500 PyErr_Format(state->Error,
501 "Invalid base64-encoded string: "
502 "number of data characters (%zd) cannot be 1 more "
503 "than a multiple of 4",
504 (bin_data - bin_data_start) / 3 * 4 + 1);
505 } else {
506 PyErr_SetString(state->Error, "Incorrect padding");
507 }
508 error_end:
509 _PyBytesWriter_Dealloc(&writer);
510 return NULL;
511 }
512
513 done:
514 return _PyBytesWriter_Finish(&writer, bin_data);
515 }
516
517
518 /*[clinic input]
519 binascii.b2a_base64
520
521 data: Py_buffer
522 /
523 *
524 newline: bool(accept={int}) = True
525
526 Base64-code line of data.
527 [clinic start generated code]*/
528
529 static PyObject *
binascii_b2a_base64_impl(PyObject * module,Py_buffer * data,int newline)530 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
531 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
532 {
533 unsigned char *ascii_data;
534 const unsigned char *bin_data;
535 int leftbits = 0;
536 unsigned char this_ch;
537 unsigned int leftchar = 0;
538 Py_ssize_t bin_len, out_len;
539 _PyBytesWriter writer;
540 binascii_state *state;
541
542 bin_data = data->buf;
543 bin_len = data->len;
544 _PyBytesWriter_Init(&writer);
545
546 assert(bin_len >= 0);
547
548 if ( bin_len > BASE64_MAXBIN ) {
549 state = get_binascii_state(module);
550 if (state == NULL) {
551 return NULL;
552 }
553 PyErr_SetString(state->Error, "Too much data for base64 line");
554 return NULL;
555 }
556
557 /* We're lazy and allocate too much (fixed up later).
558 "+2" leaves room for up to two pad characters.
559 Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
560 out_len = bin_len*2 + 2;
561 if (newline)
562 out_len++;
563 ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
564 if (ascii_data == NULL)
565 return NULL;
566
567 for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
568 /* Shift the data into our buffer */
569 leftchar = (leftchar << 8) | *bin_data;
570 leftbits += 8;
571
572 /* See if there are 6-bit groups ready */
573 while ( leftbits >= 6 ) {
574 this_ch = (leftchar >> (leftbits-6)) & 0x3f;
575 leftbits -= 6;
576 *ascii_data++ = table_b2a_base64[this_ch];
577 }
578 }
579 if ( leftbits == 2 ) {
580 *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
581 *ascii_data++ = BASE64_PAD;
582 *ascii_data++ = BASE64_PAD;
583 } else if ( leftbits == 4 ) {
584 *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
585 *ascii_data++ = BASE64_PAD;
586 }
587 if (newline)
588 *ascii_data++ = '\n'; /* Append a courtesy newline */
589
590 return _PyBytesWriter_Finish(&writer, ascii_data);
591 }
592
593
594 /*[clinic input]
595 binascii.crc_hqx
596
597 data: Py_buffer
598 crc: unsigned_int(bitwise=True)
599 /
600
601 Compute CRC-CCITT incrementally.
602 [clinic start generated code]*/
603
604 static PyObject *
binascii_crc_hqx_impl(PyObject * module,Py_buffer * data,unsigned int crc)605 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
606 /*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
607 {
608 const unsigned char *bin_data;
609 Py_ssize_t len;
610
611 crc &= 0xffff;
612 bin_data = data->buf;
613 len = data->len;
614
615 while(len-- > 0) {
616 crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
617 }
618
619 return PyLong_FromUnsignedLong(crc);
620 }
621
622 #ifndef USE_ZLIB_CRC32
623 /* Crc - 32 BIT ANSI X3.66 CRC checksum files
624 Also known as: ISO 3307
625 **********************************************************************|
626 * *|
627 * Demonstration program to compute the 32-bit CRC used as the frame *|
628 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *|
629 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *|
630 * protocol). The 32-bit FCS was added via the Federal Register, *|
631 * 1 June 1982, p.23798. I presume but don't know for certain that *|
632 * this polynomial is or will be included in CCITT V.41, which *|
633 * defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *|
634 * PUB 78 says that the 32-bit FCS reduces otherwise undetected *|
635 * errors by a factor of 10^-5 over 16-bit FCS. *|
636 * *|
637 **********************************************************************|
638
639 Copyright (C) 1986 Gary S. Brown. You may use this program, or
640 code or tables extracted from it, as desired without restriction.
641
642 First, the polynomial itself and its table of feedback terms. The
643 polynomial is
644 X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
645 Note that we take it "backwards" and put the highest-order term in
646 the lowest-order bit. The X^32 term is "implied"; the LSB is the
647 X^31 term, etc. The X^0 term (usually shown as "+1") results in
648 the MSB being 1.
649
650 Note that the usual hardware shift register implementation, which
651 is what we're using (we're merely optimizing it by doing eight-bit
652 chunks at a time) shifts bits into the lowest-order term. In our
653 implementation, that means shifting towards the right. Why do we
654 do it this way? Because the calculated CRC must be transmitted in
655 order from highest-order term to lowest-order term. UARTs transmit
656 characters in order from LSB to MSB. By storing the CRC this way,
657 we hand it to the UART in the order low-byte to high-byte; the UART
658 sends each low-bit to hight-bit; and the result is transmission bit
659 by bit from highest- to lowest-order term without requiring any bit
660 shuffling on our part. Reception works similarly.
661
662 The feedback terms table consists of 256, 32-bit entries. Notes:
663
664 1. The table can be generated at runtime if desired; code to do so
665 is shown later. It might not be obvious, but the feedback
666 terms simply represent the results of eight shift/xor opera-
667 tions for all combinations of data and CRC register values.
668
669 2. The CRC accumulation logic is the same for all CRC polynomials,
670 be they sixteen or thirty-two bits wide. You simply choose the
671 appropriate table. Alternatively, because the table can be
672 generated at runtime, you can start by generating the table for
673 the polynomial in question and use exactly the same "updcrc",
674 if your application needn't simultaneously handle two CRC
675 polynomials. (Note, however, that XMODEM is strange.)
676
677 3. For 16-bit CRCs, the table entries need be only 16 bits wide;
678 of course, 32-bit entries work OK if the high 16 bits are zero.
679
680 4. The values must be right-shifted by eight bits by the "updcrc"
681 logic; the shift must be unsigned (bring in zeroes). On some
682 hardware you could probably optimize the shift in assembler by
683 using byte-swap instructions.
684 ********************************************************************/
685
686 static const unsigned int crc_32_tab[256] = {
687 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
688 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
689 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
690 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
691 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
692 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
693 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
694 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
695 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
696 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
697 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
698 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
699 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
700 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
701 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
702 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
703 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
704 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
705 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
706 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
707 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
708 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
709 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
710 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
711 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
712 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
713 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
714 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
715 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
716 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
717 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
718 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
719 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
720 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
721 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
722 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
723 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
724 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
725 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
726 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
727 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
728 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
729 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
730 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
731 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
732 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
733 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
734 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
735 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
736 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
737 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
738 0x2d02ef8dU
739 };
740
741 static unsigned int
internal_crc32(const unsigned char * bin_data,Py_ssize_t len,unsigned int crc)742 internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
743 { /* By Jim Ahlstrom; All rights transferred to CNRI */
744 unsigned int result;
745
746 crc = ~ crc;
747 while (len-- > 0) {
748 crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
749 /* Note: (crc >> 8) MUST zero fill on left */
750 }
751
752 result = (crc ^ 0xFFFFFFFF);
753 return result & 0xffffffff;
754 }
755 #endif /* USE_ZLIB_CRC32 */
756
757 /*[clinic input]
758 binascii.crc32 -> unsigned_int
759
760 data: Py_buffer
761 crc: unsigned_int(bitwise=True) = 0
762 /
763
764 Compute CRC-32 incrementally.
765 [clinic start generated code]*/
766
767 static unsigned int
binascii_crc32_impl(PyObject * module,Py_buffer * data,unsigned int crc)768 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
769 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
770
771 #ifdef USE_ZLIB_CRC32
772 /* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
773 * modules for historical reasons. */
774 {
775 /* Releasing the GIL for very small buffers is inefficient
776 and may lower performance */
777 if (data->len > 1024*5) {
778 unsigned char *buf = data->buf;
779 Py_ssize_t len = data->len;
780
781 Py_BEGIN_ALLOW_THREADS
782 /* Avoid truncation of length for very large buffers. crc32() takes
783 length as an unsigned int, which may be narrower than Py_ssize_t. */
784 while ((size_t)len > UINT_MAX) {
785 crc = crc32(crc, buf, UINT_MAX);
786 buf += (size_t) UINT_MAX;
787 len -= (size_t) UINT_MAX;
788 }
789 crc = crc32(crc, buf, (unsigned int)len);
790 Py_END_ALLOW_THREADS
791 } else {
792 crc = crc32(crc, data->buf, (unsigned int)data->len);
793 }
794 return crc & 0xffffffff;
795 }
796 #else /* USE_ZLIB_CRC32 */
797 {
798 const unsigned char *bin_data = data->buf;
799 Py_ssize_t len = data->len;
800
801 /* Releasing the GIL for very small buffers is inefficient
802 and may lower performance */
803 if (len > 1024*5) {
804 unsigned int result;
805 Py_BEGIN_ALLOW_THREADS
806 result = internal_crc32(bin_data, len, crc);
807 Py_END_ALLOW_THREADS
808 return result;
809 } else {
810 return internal_crc32(bin_data, len, crc);
811 }
812 }
813 #endif /* USE_ZLIB_CRC32 */
814
815 /*[clinic input]
816 binascii.b2a_hex
817
818 data: Py_buffer
819 sep: object = NULL
820 An optional single character or byte to separate hex bytes.
821 bytes_per_sep: int = 1
822 How many bytes between separators. Positive values count from the
823 right, negative values count from the left.
824
825 Hexadecimal representation of binary data.
826
827 The return value is a bytes object. This function is also
828 available as "hexlify()".
829
830 Example:
831 >>> binascii.b2a_hex(b'\xb9\x01\xef')
832 b'b901ef'
833 >>> binascii.hexlify(b'\xb9\x01\xef', ':')
834 b'b9:01:ef'
835 >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
836 b'b9_01ef'
837 [clinic start generated code]*/
838
839 static PyObject *
binascii_b2a_hex_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)840 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
841 int bytes_per_sep)
842 /*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
843 {
844 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
845 sep, bytes_per_sep);
846 }
847
848 /*[clinic input]
849 binascii.hexlify = binascii.b2a_hex
850
851 Hexadecimal representation of binary data.
852
853 The return value is a bytes object. This function is also
854 available as "b2a_hex()".
855 [clinic start generated code]*/
856
857 static PyObject *
binascii_hexlify_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)858 binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
859 int bytes_per_sep)
860 /*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
861 {
862 return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
863 sep, bytes_per_sep);
864 }
865
866 /*[clinic input]
867 binascii.a2b_hex
868
869 hexstr: ascii_buffer
870 /
871
872 Binary data of hexadecimal representation.
873
874 hexstr must contain an even number of hex digits (upper or lower case).
875 This function is also available as "unhexlify()".
876 [clinic start generated code]*/
877
878 static PyObject *
binascii_a2b_hex_impl(PyObject * module,Py_buffer * hexstr)879 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
880 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
881 {
882 const char* argbuf;
883 Py_ssize_t arglen;
884 PyObject *retval;
885 char* retbuf;
886 Py_ssize_t i, j;
887 binascii_state *state;
888
889 argbuf = hexstr->buf;
890 arglen = hexstr->len;
891
892 assert(arglen >= 0);
893
894 /* XXX What should we do about strings with an odd length? Should
895 * we add an implicit leading zero, or a trailing zero? For now,
896 * raise an exception.
897 */
898 if (arglen % 2) {
899 state = get_binascii_state(module);
900 if (state == NULL) {
901 return NULL;
902 }
903 PyErr_SetString(state->Error, "Odd-length string");
904 return NULL;
905 }
906
907 retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
908 if (!retval)
909 return NULL;
910 retbuf = PyBytes_AS_STRING(retval);
911
912 for (i=j=0; i < arglen; i += 2) {
913 unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
914 unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
915 if (top >= 16 || bot >= 16) {
916 state = get_binascii_state(module);
917 if (state == NULL) {
918 return NULL;
919 }
920 PyErr_SetString(state->Error,
921 "Non-hexadecimal digit found");
922 goto finally;
923 }
924 retbuf[j++] = (top << 4) + bot;
925 }
926 return retval;
927
928 finally:
929 Py_DECREF(retval);
930 return NULL;
931 }
932
933 /*[clinic input]
934 binascii.unhexlify = binascii.a2b_hex
935
936 Binary data of hexadecimal representation.
937
938 hexstr must contain an even number of hex digits (upper or lower case).
939 [clinic start generated code]*/
940
941 static PyObject *
binascii_unhexlify_impl(PyObject * module,Py_buffer * hexstr)942 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
943 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
944 {
945 return binascii_a2b_hex_impl(module, hexstr);
946 }
947
948 #define MAXLINESIZE 76
949
950
951 /*[clinic input]
952 binascii.a2b_qp
953
954 data: ascii_buffer
955 header: bool(accept={int}) = False
956
957 Decode a string of qp-encoded data.
958 [clinic start generated code]*/
959
960 static PyObject *
binascii_a2b_qp_impl(PyObject * module,Py_buffer * data,int header)961 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
962 /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
963 {
964 Py_ssize_t in, out;
965 char ch;
966 const unsigned char *ascii_data;
967 unsigned char *odata;
968 Py_ssize_t datalen = 0;
969 PyObject *rv;
970
971 ascii_data = data->buf;
972 datalen = data->len;
973
974 /* We allocate the output same size as input, this is overkill.
975 */
976 odata = (unsigned char *) PyMem_Calloc(1, datalen);
977 if (odata == NULL) {
978 PyErr_NoMemory();
979 return NULL;
980 }
981
982 in = out = 0;
983 while (in < datalen) {
984 if (ascii_data[in] == '=') {
985 in++;
986 if (in >= datalen) break;
987 /* Soft line breaks */
988 if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
989 if (ascii_data[in] != '\n') {
990 while (in < datalen && ascii_data[in] != '\n') in++;
991 }
992 if (in < datalen) in++;
993 }
994 else if (ascii_data[in] == '=') {
995 /* broken case from broken python qp */
996 odata[out++] = '=';
997 in++;
998 }
999 else if ((in + 1 < datalen) &&
1000 ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1001 (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1002 (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1003 ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1004 (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1005 (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1006 /* hexval */
1007 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
1008 in++;
1009 ch |= _PyLong_DigitValue[ascii_data[in]];
1010 in++;
1011 odata[out++] = ch;
1012 }
1013 else {
1014 odata[out++] = '=';
1015 }
1016 }
1017 else if (header && ascii_data[in] == '_') {
1018 odata[out++] = ' ';
1019 in++;
1020 }
1021 else {
1022 odata[out] = ascii_data[in];
1023 in++;
1024 out++;
1025 }
1026 }
1027 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1028 PyMem_Free(odata);
1029 return NULL;
1030 }
1031 PyMem_Free(odata);
1032 return rv;
1033 }
1034
1035 static int
to_hex(unsigned char ch,unsigned char * s)1036 to_hex (unsigned char ch, unsigned char *s)
1037 {
1038 unsigned int uvalue = ch;
1039
1040 s[1] = "0123456789ABCDEF"[uvalue % 16];
1041 uvalue = (uvalue / 16);
1042 s[0] = "0123456789ABCDEF"[uvalue % 16];
1043 return 0;
1044 }
1045
1046 /* XXX: This is ridiculously complicated to be backward compatible
1047 * (mostly) with the quopri module. It doesn't re-create the quopri
1048 * module bug where text ending in CRLF has the CR encoded */
1049
1050 /*[clinic input]
1051 binascii.b2a_qp
1052
1053 data: Py_buffer
1054 quotetabs: bool(accept={int}) = False
1055 istext: bool(accept={int}) = True
1056 header: bool(accept={int}) = False
1057
1058 Encode a string using quoted-printable encoding.
1059
1060 On encoding, when istext is set, newlines are not encoded, and white
1061 space at end of lines is. When istext is not set, \r and \n (CR/LF)
1062 are both encoded. When quotetabs is set, space and tabs are encoded.
1063 [clinic start generated code]*/
1064
1065 static PyObject *
binascii_b2a_qp_impl(PyObject * module,Py_buffer * data,int quotetabs,int istext,int header)1066 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1067 int istext, int header)
1068 /*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
1069 {
1070 Py_ssize_t in, out;
1071 const unsigned char *databuf;
1072 unsigned char *odata;
1073 Py_ssize_t datalen = 0, odatalen = 0;
1074 PyObject *rv;
1075 unsigned int linelen = 0;
1076 unsigned char ch;
1077 int crlf = 0;
1078 const unsigned char *p;
1079
1080 databuf = data->buf;
1081 datalen = data->len;
1082
1083 /* See if this string is using CRLF line ends */
1084 /* XXX: this function has the side effect of converting all of
1085 * the end of lines to be the same depending on this detection
1086 * here */
1087 p = (const unsigned char *) memchr(databuf, '\n', datalen);
1088 if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1089 crlf = 1;
1090
1091 /* First, scan to see how many characters need to be encoded */
1092 in = 0;
1093 while (in < datalen) {
1094 Py_ssize_t delta = 0;
1095 if ((databuf[in] > 126) ||
1096 (databuf[in] == '=') ||
1097 (header && databuf[in] == '_') ||
1098 ((databuf[in] == '.') && (linelen == 0) &&
1099 (in + 1 == datalen || databuf[in+1] == '\n' ||
1100 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1101 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1102 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1103 ((databuf[in] < 33) &&
1104 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1105 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1106 {
1107 if ((linelen + 3) >= MAXLINESIZE) {
1108 linelen = 0;
1109 if (crlf)
1110 delta += 3;
1111 else
1112 delta += 2;
1113 }
1114 linelen += 3;
1115 delta += 3;
1116 in++;
1117 }
1118 else {
1119 if (istext &&
1120 ((databuf[in] == '\n') ||
1121 ((in+1 < datalen) && (databuf[in] == '\r') &&
1122 (databuf[in+1] == '\n'))))
1123 {
1124 linelen = 0;
1125 /* Protect against whitespace on end of line */
1126 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1127 delta += 2;
1128 if (crlf)
1129 delta += 2;
1130 else
1131 delta += 1;
1132 if (databuf[in] == '\r')
1133 in += 2;
1134 else
1135 in++;
1136 }
1137 else {
1138 if ((in + 1 != datalen) &&
1139 (databuf[in+1] != '\n') &&
1140 (linelen + 1) >= MAXLINESIZE) {
1141 linelen = 0;
1142 if (crlf)
1143 delta += 3;
1144 else
1145 delta += 2;
1146 }
1147 linelen++;
1148 delta++;
1149 in++;
1150 }
1151 }
1152 if (PY_SSIZE_T_MAX - delta < odatalen) {
1153 PyErr_NoMemory();
1154 return NULL;
1155 }
1156 odatalen += delta;
1157 }
1158
1159 /* We allocate the output same size as input, this is overkill.
1160 */
1161 odata = (unsigned char *) PyMem_Calloc(1, odatalen);
1162 if (odata == NULL) {
1163 PyErr_NoMemory();
1164 return NULL;
1165 }
1166
1167 in = out = linelen = 0;
1168 while (in < datalen) {
1169 if ((databuf[in] > 126) ||
1170 (databuf[in] == '=') ||
1171 (header && databuf[in] == '_') ||
1172 ((databuf[in] == '.') && (linelen == 0) &&
1173 (in + 1 == datalen || databuf[in+1] == '\n' ||
1174 databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1175 (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1176 ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1177 ((databuf[in] < 33) &&
1178 (databuf[in] != '\r') && (databuf[in] != '\n') &&
1179 (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1180 {
1181 if ((linelen + 3 )>= MAXLINESIZE) {
1182 odata[out++] = '=';
1183 if (crlf) odata[out++] = '\r';
1184 odata[out++] = '\n';
1185 linelen = 0;
1186 }
1187 odata[out++] = '=';
1188 to_hex(databuf[in], &odata[out]);
1189 out += 2;
1190 in++;
1191 linelen += 3;
1192 }
1193 else {
1194 if (istext &&
1195 ((databuf[in] == '\n') ||
1196 ((in+1 < datalen) && (databuf[in] == '\r') &&
1197 (databuf[in+1] == '\n'))))
1198 {
1199 linelen = 0;
1200 /* Protect against whitespace on end of line */
1201 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1202 ch = odata[out-1];
1203 odata[out-1] = '=';
1204 to_hex(ch, &odata[out]);
1205 out += 2;
1206 }
1207
1208 if (crlf) odata[out++] = '\r';
1209 odata[out++] = '\n';
1210 if (databuf[in] == '\r')
1211 in += 2;
1212 else
1213 in++;
1214 }
1215 else {
1216 if ((in + 1 != datalen) &&
1217 (databuf[in+1] != '\n') &&
1218 (linelen + 1) >= MAXLINESIZE) {
1219 odata[out++] = '=';
1220 if (crlf) odata[out++] = '\r';
1221 odata[out++] = '\n';
1222 linelen = 0;
1223 }
1224 linelen++;
1225 if (header && databuf[in] == ' ') {
1226 odata[out++] = '_';
1227 in++;
1228 }
1229 else {
1230 odata[out++] = databuf[in++];
1231 }
1232 }
1233 }
1234 }
1235 if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1236 PyMem_Free(odata);
1237 return NULL;
1238 }
1239 PyMem_Free(odata);
1240 return rv;
1241 }
1242
1243 /* List of functions defined in the module */
1244
1245 static struct PyMethodDef binascii_module_methods[] = {
1246 BINASCII_A2B_UU_METHODDEF
1247 BINASCII_B2A_UU_METHODDEF
1248 BINASCII_A2B_BASE64_METHODDEF
1249 BINASCII_B2A_BASE64_METHODDEF
1250 BINASCII_A2B_HEX_METHODDEF
1251 BINASCII_B2A_HEX_METHODDEF
1252 BINASCII_HEXLIFY_METHODDEF
1253 BINASCII_UNHEXLIFY_METHODDEF
1254 BINASCII_CRC_HQX_METHODDEF
1255 BINASCII_CRC32_METHODDEF
1256 BINASCII_A2B_QP_METHODDEF
1257 BINASCII_B2A_QP_METHODDEF
1258 {NULL, NULL} /* sentinel */
1259 };
1260
1261
1262 /* Initialization function for the module (*must* be called PyInit_binascii) */
1263 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1264
1265 static int
binascii_exec(PyObject * module)1266 binascii_exec(PyObject *module) {
1267 int result;
1268 binascii_state *state = PyModule_GetState(module);
1269 if (state == NULL) {
1270 return -1;
1271 }
1272
1273 state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1274 if (state->Error == NULL) {
1275 return -1;
1276 }
1277 Py_INCREF(state->Error);
1278 result = PyModule_AddObject(module, "Error", state->Error);
1279 if (result == -1) {
1280 Py_DECREF(state->Error);
1281 return -1;
1282 }
1283
1284 state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1285 if (state->Incomplete == NULL) {
1286 return -1;
1287 }
1288 Py_INCREF(state->Incomplete);
1289 result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
1290 if (result == -1) {
1291 Py_DECREF(state->Incomplete);
1292 return -1;
1293 }
1294
1295 return 0;
1296 }
1297
1298 static PyModuleDef_Slot binascii_slots[] = {
1299 {Py_mod_exec, binascii_exec},
1300 {0, NULL}
1301 };
1302
1303 static int
binascii_traverse(PyObject * module,visitproc visit,void * arg)1304 binascii_traverse(PyObject *module, visitproc visit, void *arg)
1305 {
1306 binascii_state *state = get_binascii_state(module);
1307 Py_VISIT(state->Error);
1308 Py_VISIT(state->Incomplete);
1309 return 0;
1310 }
1311
1312 static int
binascii_clear(PyObject * module)1313 binascii_clear(PyObject *module)
1314 {
1315 binascii_state *state = get_binascii_state(module);
1316 Py_CLEAR(state->Error);
1317 Py_CLEAR(state->Incomplete);
1318 return 0;
1319 }
1320
1321 static void
binascii_free(void * module)1322 binascii_free(void *module)
1323 {
1324 binascii_clear((PyObject *)module);
1325 }
1326
1327 static struct PyModuleDef binasciimodule = {
1328 PyModuleDef_HEAD_INIT,
1329 "binascii",
1330 doc_binascii,
1331 sizeof(binascii_state),
1332 binascii_module_methods,
1333 binascii_slots,
1334 binascii_traverse,
1335 binascii_clear,
1336 binascii_free
1337 };
1338
1339 PyMODINIT_FUNC
PyInit_binascii(void)1340 PyInit_binascii(void)
1341 {
1342 return PyModuleDef_Init(&binasciimodule);
1343 }
1344