1 /*
2 ** Routines to represent binary data in ASCII and vice-versa
3 **
4 ** This module currently supports the following encodings:
5 ** uuencode:
6 **      each line encodes 45 bytes (except possibly the last)
7 **      First char encodes (binary) length, rest data
8 **      each char encodes 6 bits, as follows:
9 **      binary: 01234567 abcdefgh ijklmnop
10 **      ascii:  012345 67abcd efghij klmnop
11 **      ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12 **      short binary data is zero-extended (so the bits are always in the
13 **      right place), this does *not* reflect in the length.
14 ** base64:
15 **      Line breaks are insignificant, but lines are at most 76 chars
16 **      each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17 **      is done via a table.
18 **      Short binary data is filled (in ASCII) with '='.
19 ** hqx:
20 **      File starts with introductory text, real data starts and ends
21 **      with colons.
22 **      Data consists of three similar parts: info, datafork, resourcefork.
23 **      Each part is protected (at the end) with a 16-bit crc
24 **      The binary data is run-length encoded, and then ascii-fied:
25 **      binary: 01234567 abcdefgh ijklmnop
26 **      ascii:  012345 67abcd efghij klmnop
27 **      ASCII encoding is table-driven, see the code.
28 **      Short binary data results in the runt ascii-byte being output with
29 **      the bits in the right place.
30 **
31 ** While I was reading dozens of programs that encode or decode the formats
32 ** here (documentation? hihi:-) I have formulated Jansen's Observation:
33 **
34 **      Programs that encode binary data in ASCII are written in
35 **      such a style that they are as unreadable as possible. Devices used
36 **      include unnecessary global variables, burying important tables
37 **      in unrelated sourcefiles, putting functions in include files,
38 **      using seemingly-descriptive variable names for different purposes,
39 **      calls to empty subroutines and a host of others.
40 **
41 ** I have attempted to break with this tradition, but I guess that that
42 ** does make the performance sub-optimal. Oh well, too bad...
43 **
44 ** Jack Jansen, CWI, July 1995.
45 **
46 ** Added support for quoted-printable encoding, based on rfc 1521 et al
47 ** quoted-printable encoding specifies that non printable characters (anything
48 ** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49 ** of the character.  It also specifies some other behavior to enable 8bit data
50 ** in a mail message with little difficulty (maximum line sizes, protecting
51 ** some cases of whitespace, etc).
52 **
53 ** Brandon Long, September 2001.
54 */
55 
56 #ifndef Py_BUILD_CORE_BUILTIN
57 #  define Py_BUILD_CORE_MODULE 1
58 #endif
59 
60 #define PY_SSIZE_T_CLEAN
61 
62 #include "Python.h"
63 #include "pycore_long.h"          // _PyLong_DigitValue
64 #include "pycore_strhex.h"        // _Py_strhex_bytes_with_sep()
65 #ifdef USE_ZLIB_CRC32
66 #  include "zlib.h"
67 #endif
68 
69 typedef struct binascii_state {
70     PyObject *Error;
71     PyObject *Incomplete;
72 } binascii_state;
73 
74 static inline binascii_state *
get_binascii_state(PyObject * module)75 get_binascii_state(PyObject *module)
76 {
77     return (binascii_state *)PyModule_GetState(module);
78 }
79 
80 
81 static const unsigned char table_a2b_base64[] = {
82     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
83     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
84     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
85     52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
86     -1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
87     15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
88     -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
89     41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
90 
91     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
92     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
93     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
94     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
95     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
96     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
97     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
98     -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
99 };
100 
101 #define BASE64_PAD '='
102 
103 /* Max binary chunk size; limited only by available memory */
104 #define BASE64_MAXBIN ((PY_SSIZE_T_MAX - 3) / 2)
105 
106 static const unsigned char table_b2a_base64[] =
107 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
108 
109 
110 static const unsigned short crctab_hqx[256] = {
111     0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
112     0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
113     0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
114     0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
115     0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
116     0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
117     0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
118     0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
119     0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
120     0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
121     0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
122     0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
123     0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
124     0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
125     0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
126     0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
127     0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
128     0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
129     0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
130     0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
131     0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
132     0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
133     0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
134     0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
135     0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
136     0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
137     0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
138     0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
139     0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
140     0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
141     0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
142     0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
143 };
144 
145 /*[clinic input]
146 module binascii
147 [clinic start generated code]*/
148 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=de89fb46bcaf3fec]*/
149 
150 /*[python input]
151 
152 class ascii_buffer_converter(CConverter):
153     type = 'Py_buffer'
154     converter = 'ascii_buffer_converter'
155     impl_by_reference = True
156     c_default = "{NULL, NULL}"
157 
158     def cleanup(self):
159         name = self.name
160         return "".join(["if (", name, ".obj)\n   PyBuffer_Release(&", name, ");\n"])
161 
162 [python start generated code]*/
163 /*[python end generated code: output=da39a3ee5e6b4b0d input=3eb7b63610da92cd]*/
164 
165 static int
ascii_buffer_converter(PyObject * arg,Py_buffer * buf)166 ascii_buffer_converter(PyObject *arg, Py_buffer *buf)
167 {
168     if (arg == NULL) {
169         PyBuffer_Release(buf);
170         return 1;
171     }
172     if (PyUnicode_Check(arg)) {
173         if (PyUnicode_READY(arg) < 0)
174             return 0;
175         if (!PyUnicode_IS_ASCII(arg)) {
176             PyErr_SetString(PyExc_ValueError,
177                             "string argument should contain only ASCII characters");
178             return 0;
179         }
180         assert(PyUnicode_KIND(arg) == PyUnicode_1BYTE_KIND);
181         buf->buf = (void *) PyUnicode_1BYTE_DATA(arg);
182         buf->len = PyUnicode_GET_LENGTH(arg);
183         buf->obj = NULL;
184         return 1;
185     }
186     if (PyObject_GetBuffer(arg, buf, PyBUF_SIMPLE) != 0) {
187         PyErr_Format(PyExc_TypeError,
188                      "argument should be bytes, buffer or ASCII string, "
189                      "not '%.100s'", Py_TYPE(arg)->tp_name);
190         return 0;
191     }
192     if (!PyBuffer_IsContiguous(buf, 'C')) {
193         PyErr_Format(PyExc_TypeError,
194                      "argument should be a contiguous buffer, "
195                      "not '%.100s'", Py_TYPE(arg)->tp_name);
196         PyBuffer_Release(buf);
197         return 0;
198     }
199     return Py_CLEANUP_SUPPORTED;
200 }
201 
202 #include "clinic/binascii.c.h"
203 
204 /*[clinic input]
205 binascii.a2b_uu
206 
207     data: ascii_buffer
208     /
209 
210 Decode a line of uuencoded data.
211 [clinic start generated code]*/
212 
213 static PyObject *
binascii_a2b_uu_impl(PyObject * module,Py_buffer * data)214 binascii_a2b_uu_impl(PyObject *module, Py_buffer *data)
215 /*[clinic end generated code: output=e027f8e0b0598742 input=7cafeaf73df63d1c]*/
216 {
217     const unsigned char *ascii_data;
218     unsigned char *bin_data;
219     int leftbits = 0;
220     unsigned char this_ch;
221     unsigned int leftchar = 0;
222     PyObject *rv;
223     Py_ssize_t ascii_len, bin_len;
224     binascii_state *state;
225 
226     ascii_data = data->buf;
227     ascii_len = data->len;
228 
229     assert(ascii_len >= 0);
230 
231     /* First byte: binary data length (in bytes) */
232     bin_len = (*ascii_data++ - ' ') & 077;
233     ascii_len--;
234 
235     /* Allocate the buffer */
236     if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
237         return NULL;
238     bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
239 
240     for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
241         /* XXX is it really best to add NULs if there's no more data */
242         this_ch = (ascii_len > 0) ? *ascii_data : 0;
243         if ( this_ch == '\n' || this_ch == '\r' || ascii_len <= 0) {
244             /*
245             ** Whitespace. Assume some spaces got eaten at
246             ** end-of-line. (We check this later)
247             */
248             this_ch = 0;
249         } else {
250             /* Check the character for legality
251             ** The 64 in stead of the expected 63 is because
252             ** there are a few uuencodes out there that use
253             ** '`' as zero instead of space.
254             */
255             if ( this_ch < ' ' || this_ch > (' ' + 64)) {
256                 state = get_binascii_state(module);
257                 if (state == NULL) {
258                     return NULL;
259                 }
260                 PyErr_SetString(state->Error, "Illegal char");
261                 Py_DECREF(rv);
262                 return NULL;
263             }
264             this_ch = (this_ch - ' ') & 077;
265         }
266         /*
267         ** Shift it in on the low end, and see if there's
268         ** a byte ready for output.
269         */
270         leftchar = (leftchar << 6) | (this_ch);
271         leftbits += 6;
272         if ( leftbits >= 8 ) {
273             leftbits -= 8;
274             *bin_data++ = (leftchar >> leftbits) & 0xff;
275             leftchar &= ((1 << leftbits) - 1);
276             bin_len--;
277         }
278     }
279     /*
280     ** Finally, check that if there's anything left on the line
281     ** that it's whitespace only.
282     */
283     while( ascii_len-- > 0 ) {
284         this_ch = *ascii_data++;
285         /* Extra '`' may be written as padding in some cases */
286         if ( this_ch != ' ' && this_ch != ' '+64 &&
287              this_ch != '\n' && this_ch != '\r' ) {
288             state = get_binascii_state(module);
289             if (state == NULL) {
290                 return NULL;
291             }
292             PyErr_SetString(state->Error, "Trailing garbage");
293             Py_DECREF(rv);
294             return NULL;
295         }
296     }
297     return rv;
298 }
299 
300 /*[clinic input]
301 binascii.b2a_uu
302 
303     data: Py_buffer
304     /
305     *
306     backtick: bool(accept={int}) = False
307 
308 Uuencode line of data.
309 [clinic start generated code]*/
310 
311 static PyObject *
binascii_b2a_uu_impl(PyObject * module,Py_buffer * data,int backtick)312 binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
313 /*[clinic end generated code: output=b1b99de62d9bbeb8 input=b26bc8d32b6ed2f6]*/
314 {
315     unsigned char *ascii_data;
316     const unsigned char *bin_data;
317     int leftbits = 0;
318     unsigned char this_ch;
319     unsigned int leftchar = 0;
320     binascii_state *state;
321     Py_ssize_t bin_len, out_len;
322     _PyBytesWriter writer;
323 
324     _PyBytesWriter_Init(&writer);
325     bin_data = data->buf;
326     bin_len = data->len;
327     if ( bin_len > 45 ) {
328         /* The 45 is a limit that appears in all uuencode's */
329         state = get_binascii_state(module);
330         if (state == NULL) {
331             return NULL;
332         }
333         PyErr_SetString(state->Error, "At most 45 bytes at once");
334         return NULL;
335     }
336 
337     /* We're lazy and allocate to much (fixed up later) */
338     out_len = 2 + (bin_len + 2) / 3 * 4;
339     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
340     if (ascii_data == NULL)
341         return NULL;
342 
343     /* Store the length */
344     if (backtick && !bin_len)
345         *ascii_data++ = '`';
346     else
347         *ascii_data++ = ' ' + (unsigned char)bin_len;
348 
349     for( ; bin_len > 0 || leftbits != 0 ; bin_len--, bin_data++ ) {
350         /* Shift the data (or padding) into our buffer */
351         if ( bin_len > 0 )              /* Data */
352             leftchar = (leftchar << 8) | *bin_data;
353         else                            /* Padding */
354             leftchar <<= 8;
355         leftbits += 8;
356 
357         /* See if there are 6-bit groups ready */
358         while ( leftbits >= 6 ) {
359             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
360             leftbits -= 6;
361             if (backtick && !this_ch)
362                 *ascii_data++ = '`';
363             else
364                 *ascii_data++ = this_ch + ' ';
365         }
366     }
367     *ascii_data++ = '\n';       /* Append a courtesy newline */
368 
369     return _PyBytesWriter_Finish(&writer, ascii_data);
370 }
371 
372 /*[clinic input]
373 binascii.a2b_base64
374 
375     data: ascii_buffer
376     /
377     *
378     strict_mode: bool(accept={int}) = False
379 
380 Decode a line of base64 data.
381 
382   strict_mode
383     When set to True, bytes that are not part of the base64 standard are not allowed.
384     The same applies to excess data after padding (= / ==).
385 [clinic start generated code]*/
386 
387 static PyObject *
binascii_a2b_base64_impl(PyObject * module,Py_buffer * data,int strict_mode)388 binascii_a2b_base64_impl(PyObject *module, Py_buffer *data, int strict_mode)
389 /*[clinic end generated code: output=5409557788d4f975 input=3a30c4e3528317c6]*/
390 {
391     assert(data->len >= 0);
392 
393     const unsigned char *ascii_data = data->buf;
394     size_t ascii_len = data->len;
395     binascii_state *state = NULL;
396     char padding_started = 0;
397 
398     /* Allocate the buffer */
399     Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
400     _PyBytesWriter writer;
401     _PyBytesWriter_Init(&writer);
402     unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
403     if (bin_data == NULL)
404         return NULL;
405     unsigned char *bin_data_start = bin_data;
406 
407     if (strict_mode && ascii_len > 0 && ascii_data[0] == '=') {
408         state = get_binascii_state(module);
409         if (state) {
410             PyErr_SetString(state->Error, "Leading padding not allowed");
411         }
412         goto error_end;
413     }
414 
415     int quad_pos = 0;
416     unsigned char leftchar = 0;
417     int pads = 0;
418     for (size_t i = 0; i < ascii_len; i++) {
419         unsigned char this_ch = ascii_data[i];
420 
421         /* Check for pad sequences and ignore
422         ** the invalid ones.
423         */
424         if (this_ch == BASE64_PAD) {
425             padding_started = 1;
426 
427             if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
428                 /* A pad sequence means we should not parse more input.
429                 ** We've already interpreted the data from the quad at this point.
430                 ** in strict mode, an error should raise if there's excess data after the padding.
431                 */
432                 if (strict_mode && i + 1 < ascii_len) {
433                     state = get_binascii_state(module);
434                     if (state) {
435                         PyErr_SetString(state->Error, "Excess data after padding");
436                     }
437                     goto error_end;
438                 }
439 
440                 goto done;
441             }
442             continue;
443         }
444 
445         this_ch = table_a2b_base64[this_ch];
446         if (this_ch >= 64) {
447             if (strict_mode) {
448                 state = get_binascii_state(module);
449                 if (state) {
450                     PyErr_SetString(state->Error, "Only base64 data is allowed");
451                 }
452                 goto error_end;
453             }
454             continue;
455         }
456 
457         // Characters that are not '=', in the middle of the padding, are not allowed
458         if (strict_mode && padding_started) {
459             state = get_binascii_state(module);
460             if (state) {
461                 PyErr_SetString(state->Error, "Discontinuous padding not allowed");
462             }
463             goto error_end;
464         }
465         pads = 0;
466 
467         switch (quad_pos) {
468             case 0:
469                 quad_pos = 1;
470                 leftchar = this_ch;
471                 break;
472             case 1:
473                 quad_pos = 2;
474                 *bin_data++ = (leftchar << 2) | (this_ch >> 4);
475                 leftchar = this_ch & 0x0f;
476                 break;
477             case 2:
478                 quad_pos = 3;
479                 *bin_data++ = (leftchar << 4) | (this_ch >> 2);
480                 leftchar = this_ch & 0x03;
481                 break;
482             case 3:
483                 quad_pos = 0;
484                 *bin_data++ = (leftchar << 6) | (this_ch);
485                 leftchar = 0;
486                 break;
487         }
488     }
489 
490     if (quad_pos != 0) {
491         state = get_binascii_state(module);
492         if (state == NULL) {
493             /* error already set, from get_binascii_state */
494         } else if (quad_pos == 1) {
495             /*
496             ** There is exactly one extra valid, non-padding, base64 character.
497             ** This is an invalid length, as there is no possible input that
498             ** could encoded into such a base64 string.
499             */
500             PyErr_Format(state->Error,
501                          "Invalid base64-encoded string: "
502                          "number of data characters (%zd) cannot be 1 more "
503                          "than a multiple of 4",
504                          (bin_data - bin_data_start) / 3 * 4 + 1);
505         } else {
506             PyErr_SetString(state->Error, "Incorrect padding");
507         }
508         error_end:
509         _PyBytesWriter_Dealloc(&writer);
510         return NULL;
511     }
512 
513 done:
514     return _PyBytesWriter_Finish(&writer, bin_data);
515 }
516 
517 
518 /*[clinic input]
519 binascii.b2a_base64
520 
521     data: Py_buffer
522     /
523     *
524     newline: bool(accept={int}) = True
525 
526 Base64-code line of data.
527 [clinic start generated code]*/
528 
529 static PyObject *
binascii_b2a_base64_impl(PyObject * module,Py_buffer * data,int newline)530 binascii_b2a_base64_impl(PyObject *module, Py_buffer *data, int newline)
531 /*[clinic end generated code: output=4ad62c8e8485d3b3 input=6083dac5777fa45d]*/
532 {
533     unsigned char *ascii_data;
534     const unsigned char *bin_data;
535     int leftbits = 0;
536     unsigned char this_ch;
537     unsigned int leftchar = 0;
538     Py_ssize_t bin_len, out_len;
539     _PyBytesWriter writer;
540     binascii_state *state;
541 
542     bin_data = data->buf;
543     bin_len = data->len;
544     _PyBytesWriter_Init(&writer);
545 
546     assert(bin_len >= 0);
547 
548     if ( bin_len > BASE64_MAXBIN ) {
549         state = get_binascii_state(module);
550         if (state == NULL) {
551             return NULL;
552         }
553         PyErr_SetString(state->Error, "Too much data for base64 line");
554         return NULL;
555     }
556 
557     /* We're lazy and allocate too much (fixed up later).
558        "+2" leaves room for up to two pad characters.
559        Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
560     out_len = bin_len*2 + 2;
561     if (newline)
562         out_len++;
563     ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
564     if (ascii_data == NULL)
565         return NULL;
566 
567     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
568         /* Shift the data into our buffer */
569         leftchar = (leftchar << 8) | *bin_data;
570         leftbits += 8;
571 
572         /* See if there are 6-bit groups ready */
573         while ( leftbits >= 6 ) {
574             this_ch = (leftchar >> (leftbits-6)) & 0x3f;
575             leftbits -= 6;
576             *ascii_data++ = table_b2a_base64[this_ch];
577         }
578     }
579     if ( leftbits == 2 ) {
580         *ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
581         *ascii_data++ = BASE64_PAD;
582         *ascii_data++ = BASE64_PAD;
583     } else if ( leftbits == 4 ) {
584         *ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
585         *ascii_data++ = BASE64_PAD;
586     }
587     if (newline)
588         *ascii_data++ = '\n';       /* Append a courtesy newline */
589 
590     return _PyBytesWriter_Finish(&writer, ascii_data);
591 }
592 
593 
594 /*[clinic input]
595 binascii.crc_hqx
596 
597     data: Py_buffer
598     crc: unsigned_int(bitwise=True)
599     /
600 
601 Compute CRC-CCITT incrementally.
602 [clinic start generated code]*/
603 
604 static PyObject *
binascii_crc_hqx_impl(PyObject * module,Py_buffer * data,unsigned int crc)605 binascii_crc_hqx_impl(PyObject *module, Py_buffer *data, unsigned int crc)
606 /*[clinic end generated code: output=2fde213d0f547a98 input=56237755370a951c]*/
607 {
608     const unsigned char *bin_data;
609     Py_ssize_t len;
610 
611     crc &= 0xffff;
612     bin_data = data->buf;
613     len = data->len;
614 
615     while(len-- > 0) {
616         crc = ((crc<<8)&0xff00) ^ crctab_hqx[(crc>>8)^*bin_data++];
617     }
618 
619     return PyLong_FromUnsignedLong(crc);
620 }
621 
622 #ifndef USE_ZLIB_CRC32
623 /*  Crc - 32 BIT ANSI X3.66 CRC checksum files
624     Also known as: ISO 3307
625 **********************************************************************|
626 *                                                                    *|
627 * Demonstration program to compute the 32-bit CRC used as the frame  *|
628 * check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71     *|
629 * and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level     *|
630 * protocol).  The 32-bit FCS was added via the Federal Register,     *|
631 * 1 June 1982, p.23798.  I presume but don't know for certain that   *|
632 * this polynomial is or will be included in CCITT V.41, which        *|
633 * defines the 16-bit CRC (often called CRC-CCITT) polynomial.  FIPS  *|
634 * PUB 78 says that the 32-bit FCS reduces otherwise undetected       *|
635 * errors by a factor of 10^-5 over 16-bit FCS.                       *|
636 *                                                                    *|
637 **********************************************************************|
638 
639  Copyright (C) 1986 Gary S. Brown.  You may use this program, or
640  code or tables extracted from it, as desired without restriction.
641 
642  First, the polynomial itself and its table of feedback terms.  The
643  polynomial is
644  X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
645  Note that we take it "backwards" and put the highest-order term in
646  the lowest-order bit.  The X^32 term is "implied"; the LSB is the
647  X^31 term, etc.  The X^0 term (usually shown as "+1") results in
648  the MSB being 1.
649 
650  Note that the usual hardware shift register implementation, which
651  is what we're using (we're merely optimizing it by doing eight-bit
652  chunks at a time) shifts bits into the lowest-order term.  In our
653  implementation, that means shifting towards the right.  Why do we
654  do it this way?  Because the calculated CRC must be transmitted in
655  order from highest-order term to lowest-order term.  UARTs transmit
656  characters in order from LSB to MSB.  By storing the CRC this way,
657  we hand it to the UART in the order low-byte to high-byte; the UART
658  sends each low-bit to hight-bit; and the result is transmission bit
659  by bit from highest- to lowest-order term without requiring any bit
660  shuffling on our part.  Reception works similarly.
661 
662  The feedback terms table consists of 256, 32-bit entries.  Notes:
663 
664   1. The table can be generated at runtime if desired; code to do so
665      is shown later.  It might not be obvious, but the feedback
666      terms simply represent the results of eight shift/xor opera-
667      tions for all combinations of data and CRC register values.
668 
669   2. The CRC accumulation logic is the same for all CRC polynomials,
670      be they sixteen or thirty-two bits wide.  You simply choose the
671      appropriate table.  Alternatively, because the table can be
672      generated at runtime, you can start by generating the table for
673      the polynomial in question and use exactly the same "updcrc",
674      if your application needn't simultaneously handle two CRC
675      polynomials.  (Note, however, that XMODEM is strange.)
676 
677   3. For 16-bit CRCs, the table entries need be only 16 bits wide;
678      of course, 32-bit entries work OK if the high 16 bits are zero.
679 
680   4. The values must be right-shifted by eight bits by the "updcrc"
681      logic; the shift must be unsigned (bring in zeroes).  On some
682      hardware you could probably optimize the shift in assembler by
683      using byte-swap instructions.
684 ********************************************************************/
685 
686 static const unsigned int crc_32_tab[256] = {
687 0x00000000U, 0x77073096U, 0xee0e612cU, 0x990951baU, 0x076dc419U,
688 0x706af48fU, 0xe963a535U, 0x9e6495a3U, 0x0edb8832U, 0x79dcb8a4U,
689 0xe0d5e91eU, 0x97d2d988U, 0x09b64c2bU, 0x7eb17cbdU, 0xe7b82d07U,
690 0x90bf1d91U, 0x1db71064U, 0x6ab020f2U, 0xf3b97148U, 0x84be41deU,
691 0x1adad47dU, 0x6ddde4ebU, 0xf4d4b551U, 0x83d385c7U, 0x136c9856U,
692 0x646ba8c0U, 0xfd62f97aU, 0x8a65c9ecU, 0x14015c4fU, 0x63066cd9U,
693 0xfa0f3d63U, 0x8d080df5U, 0x3b6e20c8U, 0x4c69105eU, 0xd56041e4U,
694 0xa2677172U, 0x3c03e4d1U, 0x4b04d447U, 0xd20d85fdU, 0xa50ab56bU,
695 0x35b5a8faU, 0x42b2986cU, 0xdbbbc9d6U, 0xacbcf940U, 0x32d86ce3U,
696 0x45df5c75U, 0xdcd60dcfU, 0xabd13d59U, 0x26d930acU, 0x51de003aU,
697 0xc8d75180U, 0xbfd06116U, 0x21b4f4b5U, 0x56b3c423U, 0xcfba9599U,
698 0xb8bda50fU, 0x2802b89eU, 0x5f058808U, 0xc60cd9b2U, 0xb10be924U,
699 0x2f6f7c87U, 0x58684c11U, 0xc1611dabU, 0xb6662d3dU, 0x76dc4190U,
700 0x01db7106U, 0x98d220bcU, 0xefd5102aU, 0x71b18589U, 0x06b6b51fU,
701 0x9fbfe4a5U, 0xe8b8d433U, 0x7807c9a2U, 0x0f00f934U, 0x9609a88eU,
702 0xe10e9818U, 0x7f6a0dbbU, 0x086d3d2dU, 0x91646c97U, 0xe6635c01U,
703 0x6b6b51f4U, 0x1c6c6162U, 0x856530d8U, 0xf262004eU, 0x6c0695edU,
704 0x1b01a57bU, 0x8208f4c1U, 0xf50fc457U, 0x65b0d9c6U, 0x12b7e950U,
705 0x8bbeb8eaU, 0xfcb9887cU, 0x62dd1ddfU, 0x15da2d49U, 0x8cd37cf3U,
706 0xfbd44c65U, 0x4db26158U, 0x3ab551ceU, 0xa3bc0074U, 0xd4bb30e2U,
707 0x4adfa541U, 0x3dd895d7U, 0xa4d1c46dU, 0xd3d6f4fbU, 0x4369e96aU,
708 0x346ed9fcU, 0xad678846U, 0xda60b8d0U, 0x44042d73U, 0x33031de5U,
709 0xaa0a4c5fU, 0xdd0d7cc9U, 0x5005713cU, 0x270241aaU, 0xbe0b1010U,
710 0xc90c2086U, 0x5768b525U, 0x206f85b3U, 0xb966d409U, 0xce61e49fU,
711 0x5edef90eU, 0x29d9c998U, 0xb0d09822U, 0xc7d7a8b4U, 0x59b33d17U,
712 0x2eb40d81U, 0xb7bd5c3bU, 0xc0ba6cadU, 0xedb88320U, 0x9abfb3b6U,
713 0x03b6e20cU, 0x74b1d29aU, 0xead54739U, 0x9dd277afU, 0x04db2615U,
714 0x73dc1683U, 0xe3630b12U, 0x94643b84U, 0x0d6d6a3eU, 0x7a6a5aa8U,
715 0xe40ecf0bU, 0x9309ff9dU, 0x0a00ae27U, 0x7d079eb1U, 0xf00f9344U,
716 0x8708a3d2U, 0x1e01f268U, 0x6906c2feU, 0xf762575dU, 0x806567cbU,
717 0x196c3671U, 0x6e6b06e7U, 0xfed41b76U, 0x89d32be0U, 0x10da7a5aU,
718 0x67dd4accU, 0xf9b9df6fU, 0x8ebeeff9U, 0x17b7be43U, 0x60b08ed5U,
719 0xd6d6a3e8U, 0xa1d1937eU, 0x38d8c2c4U, 0x4fdff252U, 0xd1bb67f1U,
720 0xa6bc5767U, 0x3fb506ddU, 0x48b2364bU, 0xd80d2bdaU, 0xaf0a1b4cU,
721 0x36034af6U, 0x41047a60U, 0xdf60efc3U, 0xa867df55U, 0x316e8eefU,
722 0x4669be79U, 0xcb61b38cU, 0xbc66831aU, 0x256fd2a0U, 0x5268e236U,
723 0xcc0c7795U, 0xbb0b4703U, 0x220216b9U, 0x5505262fU, 0xc5ba3bbeU,
724 0xb2bd0b28U, 0x2bb45a92U, 0x5cb36a04U, 0xc2d7ffa7U, 0xb5d0cf31U,
725 0x2cd99e8bU, 0x5bdeae1dU, 0x9b64c2b0U, 0xec63f226U, 0x756aa39cU,
726 0x026d930aU, 0x9c0906a9U, 0xeb0e363fU, 0x72076785U, 0x05005713U,
727 0x95bf4a82U, 0xe2b87a14U, 0x7bb12baeU, 0x0cb61b38U, 0x92d28e9bU,
728 0xe5d5be0dU, 0x7cdcefb7U, 0x0bdbdf21U, 0x86d3d2d4U, 0xf1d4e242U,
729 0x68ddb3f8U, 0x1fda836eU, 0x81be16cdU, 0xf6b9265bU, 0x6fb077e1U,
730 0x18b74777U, 0x88085ae6U, 0xff0f6a70U, 0x66063bcaU, 0x11010b5cU,
731 0x8f659effU, 0xf862ae69U, 0x616bffd3U, 0x166ccf45U, 0xa00ae278U,
732 0xd70dd2eeU, 0x4e048354U, 0x3903b3c2U, 0xa7672661U, 0xd06016f7U,
733 0x4969474dU, 0x3e6e77dbU, 0xaed16a4aU, 0xd9d65adcU, 0x40df0b66U,
734 0x37d83bf0U, 0xa9bcae53U, 0xdebb9ec5U, 0x47b2cf7fU, 0x30b5ffe9U,
735 0xbdbdf21cU, 0xcabac28aU, 0x53b39330U, 0x24b4a3a6U, 0xbad03605U,
736 0xcdd70693U, 0x54de5729U, 0x23d967bfU, 0xb3667a2eU, 0xc4614ab8U,
737 0x5d681b02U, 0x2a6f2b94U, 0xb40bbe37U, 0xc30c8ea1U, 0x5a05df1bU,
738 0x2d02ef8dU
739 };
740 
741 static unsigned int
internal_crc32(const unsigned char * bin_data,Py_ssize_t len,unsigned int crc)742 internal_crc32(const unsigned char *bin_data, Py_ssize_t len, unsigned int crc)
743 { /* By Jim Ahlstrom; All rights transferred to CNRI */
744     unsigned int result;
745 
746     crc = ~ crc;
747     while (len-- > 0) {
748         crc = crc_32_tab[(crc ^ *bin_data++) & 0xff] ^ (crc >> 8);
749         /* Note:  (crc >> 8) MUST zero fill on left */
750     }
751 
752     result = (crc ^ 0xFFFFFFFF);
753     return result & 0xffffffff;
754 }
755 #endif  /* USE_ZLIB_CRC32 */
756 
757 /*[clinic input]
758 binascii.crc32 -> unsigned_int
759 
760     data: Py_buffer
761     crc: unsigned_int(bitwise=True) = 0
762     /
763 
764 Compute CRC-32 incrementally.
765 [clinic start generated code]*/
766 
767 static unsigned int
binascii_crc32_impl(PyObject * module,Py_buffer * data,unsigned int crc)768 binascii_crc32_impl(PyObject *module, Py_buffer *data, unsigned int crc)
769 /*[clinic end generated code: output=52cf59056a78593b input=bbe340bc99d25aa8]*/
770 
771 #ifdef USE_ZLIB_CRC32
772 /* This is the same as zlibmodule.c zlib_crc32_impl. It exists in two
773  * modules for historical reasons. */
774 {
775     /* Releasing the GIL for very small buffers is inefficient
776        and may lower performance */
777     if (data->len > 1024*5) {
778         unsigned char *buf = data->buf;
779         Py_ssize_t len = data->len;
780 
781         Py_BEGIN_ALLOW_THREADS
782         /* Avoid truncation of length for very large buffers. crc32() takes
783            length as an unsigned int, which may be narrower than Py_ssize_t. */
784         while ((size_t)len > UINT_MAX) {
785             crc = crc32(crc, buf, UINT_MAX);
786             buf += (size_t) UINT_MAX;
787             len -= (size_t) UINT_MAX;
788         }
789         crc = crc32(crc, buf, (unsigned int)len);
790         Py_END_ALLOW_THREADS
791     } else {
792         crc = crc32(crc, data->buf, (unsigned int)data->len);
793     }
794     return crc & 0xffffffff;
795 }
796 #else  /* USE_ZLIB_CRC32 */
797 {
798     const unsigned char *bin_data = data->buf;
799     Py_ssize_t len = data->len;
800 
801     /* Releasing the GIL for very small buffers is inefficient
802        and may lower performance */
803     if (len > 1024*5) {
804         unsigned int result;
805         Py_BEGIN_ALLOW_THREADS
806         result = internal_crc32(bin_data, len, crc);
807         Py_END_ALLOW_THREADS
808         return result;
809     } else {
810         return internal_crc32(bin_data, len, crc);
811     }
812 }
813 #endif  /* USE_ZLIB_CRC32 */
814 
815 /*[clinic input]
816 binascii.b2a_hex
817 
818     data: Py_buffer
819     sep: object = NULL
820         An optional single character or byte to separate hex bytes.
821     bytes_per_sep: int = 1
822         How many bytes between separators.  Positive values count from the
823         right, negative values count from the left.
824 
825 Hexadecimal representation of binary data.
826 
827 The return value is a bytes object.  This function is also
828 available as "hexlify()".
829 
830 Example:
831 >>> binascii.b2a_hex(b'\xb9\x01\xef')
832 b'b901ef'
833 >>> binascii.hexlify(b'\xb9\x01\xef', ':')
834 b'b9:01:ef'
835 >>> binascii.b2a_hex(b'\xb9\x01\xef', b'_', 2)
836 b'b9_01ef'
837 [clinic start generated code]*/
838 
839 static PyObject *
binascii_b2a_hex_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)840 binascii_b2a_hex_impl(PyObject *module, Py_buffer *data, PyObject *sep,
841                       int bytes_per_sep)
842 /*[clinic end generated code: output=a26937946a81d2c7 input=ec0ade6ba2e43543]*/
843 {
844     return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
845                                      sep, bytes_per_sep);
846 }
847 
848 /*[clinic input]
849 binascii.hexlify = binascii.b2a_hex
850 
851 Hexadecimal representation of binary data.
852 
853 The return value is a bytes object.  This function is also
854 available as "b2a_hex()".
855 [clinic start generated code]*/
856 
857 static PyObject *
binascii_hexlify_impl(PyObject * module,Py_buffer * data,PyObject * sep,int bytes_per_sep)858 binascii_hexlify_impl(PyObject *module, Py_buffer *data, PyObject *sep,
859                       int bytes_per_sep)
860 /*[clinic end generated code: output=d12aa1b001b15199 input=bc317bd4e241f76b]*/
861 {
862     return _Py_strhex_bytes_with_sep((const char *)data->buf, data->len,
863                                      sep, bytes_per_sep);
864 }
865 
866 /*[clinic input]
867 binascii.a2b_hex
868 
869     hexstr: ascii_buffer
870     /
871 
872 Binary data of hexadecimal representation.
873 
874 hexstr must contain an even number of hex digits (upper or lower case).
875 This function is also available as "unhexlify()".
876 [clinic start generated code]*/
877 
878 static PyObject *
binascii_a2b_hex_impl(PyObject * module,Py_buffer * hexstr)879 binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
880 /*[clinic end generated code: output=0cc1a139af0eeecb input=9e1e7f2f94db24fd]*/
881 {
882     const char* argbuf;
883     Py_ssize_t arglen;
884     PyObject *retval;
885     char* retbuf;
886     Py_ssize_t i, j;
887     binascii_state *state;
888 
889     argbuf = hexstr->buf;
890     arglen = hexstr->len;
891 
892     assert(arglen >= 0);
893 
894     /* XXX What should we do about strings with an odd length?  Should
895      * we add an implicit leading zero, or a trailing zero?  For now,
896      * raise an exception.
897      */
898     if (arglen % 2) {
899         state = get_binascii_state(module);
900         if (state == NULL) {
901             return NULL;
902         }
903         PyErr_SetString(state->Error, "Odd-length string");
904         return NULL;
905     }
906 
907     retval = PyBytes_FromStringAndSize(NULL, (arglen/2));
908     if (!retval)
909         return NULL;
910     retbuf = PyBytes_AS_STRING(retval);
911 
912     for (i=j=0; i < arglen; i += 2) {
913         unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
914         unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
915         if (top >= 16 || bot >= 16) {
916             state = get_binascii_state(module);
917             if (state == NULL) {
918                 return NULL;
919             }
920             PyErr_SetString(state->Error,
921                             "Non-hexadecimal digit found");
922             goto finally;
923         }
924         retbuf[j++] = (top << 4) + bot;
925     }
926     return retval;
927 
928   finally:
929     Py_DECREF(retval);
930     return NULL;
931 }
932 
933 /*[clinic input]
934 binascii.unhexlify = binascii.a2b_hex
935 
936 Binary data of hexadecimal representation.
937 
938 hexstr must contain an even number of hex digits (upper or lower case).
939 [clinic start generated code]*/
940 
941 static PyObject *
binascii_unhexlify_impl(PyObject * module,Py_buffer * hexstr)942 binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
943 /*[clinic end generated code: output=51a64c06c79629e3 input=dd8c012725f462da]*/
944 {
945     return binascii_a2b_hex_impl(module, hexstr);
946 }
947 
948 #define MAXLINESIZE 76
949 
950 
951 /*[clinic input]
952 binascii.a2b_qp
953 
954     data: ascii_buffer
955     header: bool(accept={int}) = False
956 
957 Decode a string of qp-encoded data.
958 [clinic start generated code]*/
959 
960 static PyObject *
binascii_a2b_qp_impl(PyObject * module,Py_buffer * data,int header)961 binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
962 /*[clinic end generated code: output=e99f7846cfb9bc53 input=bf6766fea76cce8f]*/
963 {
964     Py_ssize_t in, out;
965     char ch;
966     const unsigned char *ascii_data;
967     unsigned char *odata;
968     Py_ssize_t datalen = 0;
969     PyObject *rv;
970 
971     ascii_data = data->buf;
972     datalen = data->len;
973 
974     /* We allocate the output same size as input, this is overkill.
975      */
976     odata = (unsigned char *) PyMem_Calloc(1, datalen);
977     if (odata == NULL) {
978         PyErr_NoMemory();
979         return NULL;
980     }
981 
982     in = out = 0;
983     while (in < datalen) {
984         if (ascii_data[in] == '=') {
985             in++;
986             if (in >= datalen) break;
987             /* Soft line breaks */
988             if ((ascii_data[in] == '\n') || (ascii_data[in] == '\r')) {
989                 if (ascii_data[in] != '\n') {
990                     while (in < datalen && ascii_data[in] != '\n') in++;
991                 }
992                 if (in < datalen) in++;
993             }
994             else if (ascii_data[in] == '=') {
995                 /* broken case from broken python qp */
996                 odata[out++] = '=';
997                 in++;
998             }
999             else if ((in + 1 < datalen) &&
1000                      ((ascii_data[in] >= 'A' && ascii_data[in] <= 'F') ||
1001                       (ascii_data[in] >= 'a' && ascii_data[in] <= 'f') ||
1002                       (ascii_data[in] >= '0' && ascii_data[in] <= '9')) &&
1003                      ((ascii_data[in+1] >= 'A' && ascii_data[in+1] <= 'F') ||
1004                       (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
1005                       (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
1006                 /* hexval */
1007                 ch = _PyLong_DigitValue[ascii_data[in]] << 4;
1008                 in++;
1009                 ch |= _PyLong_DigitValue[ascii_data[in]];
1010                 in++;
1011                 odata[out++] = ch;
1012             }
1013             else {
1014               odata[out++] = '=';
1015             }
1016         }
1017         else if (header && ascii_data[in] == '_') {
1018             odata[out++] = ' ';
1019             in++;
1020         }
1021         else {
1022             odata[out] = ascii_data[in];
1023             in++;
1024             out++;
1025         }
1026     }
1027     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1028         PyMem_Free(odata);
1029         return NULL;
1030     }
1031     PyMem_Free(odata);
1032     return rv;
1033 }
1034 
1035 static int
to_hex(unsigned char ch,unsigned char * s)1036 to_hex (unsigned char ch, unsigned char *s)
1037 {
1038     unsigned int uvalue = ch;
1039 
1040     s[1] = "0123456789ABCDEF"[uvalue % 16];
1041     uvalue = (uvalue / 16);
1042     s[0] = "0123456789ABCDEF"[uvalue % 16];
1043     return 0;
1044 }
1045 
1046 /* XXX: This is ridiculously complicated to be backward compatible
1047  * (mostly) with the quopri module.  It doesn't re-create the quopri
1048  * module bug where text ending in CRLF has the CR encoded */
1049 
1050 /*[clinic input]
1051 binascii.b2a_qp
1052 
1053     data: Py_buffer
1054     quotetabs: bool(accept={int}) = False
1055     istext: bool(accept={int}) = True
1056     header: bool(accept={int}) = False
1057 
1058 Encode a string using quoted-printable encoding.
1059 
1060 On encoding, when istext is set, newlines are not encoded, and white
1061 space at end of lines is.  When istext is not set, \r and \n (CR/LF)
1062 are both encoded.  When quotetabs is set, space and tabs are encoded.
1063 [clinic start generated code]*/
1064 
1065 static PyObject *
binascii_b2a_qp_impl(PyObject * module,Py_buffer * data,int quotetabs,int istext,int header)1066 binascii_b2a_qp_impl(PyObject *module, Py_buffer *data, int quotetabs,
1067                      int istext, int header)
1068 /*[clinic end generated code: output=e9884472ebb1a94c input=21fb7eea4a184ba6]*/
1069 {
1070     Py_ssize_t in, out;
1071     const unsigned char *databuf;
1072     unsigned char *odata;
1073     Py_ssize_t datalen = 0, odatalen = 0;
1074     PyObject *rv;
1075     unsigned int linelen = 0;
1076     unsigned char ch;
1077     int crlf = 0;
1078     const unsigned char *p;
1079 
1080     databuf = data->buf;
1081     datalen = data->len;
1082 
1083     /* See if this string is using CRLF line ends */
1084     /* XXX: this function has the side effect of converting all of
1085      * the end of lines to be the same depending on this detection
1086      * here */
1087     p = (const unsigned char *) memchr(databuf, '\n', datalen);
1088     if ((p != NULL) && (p > databuf) && (*(p-1) == '\r'))
1089         crlf = 1;
1090 
1091     /* First, scan to see how many characters need to be encoded */
1092     in = 0;
1093     while (in < datalen) {
1094         Py_ssize_t delta = 0;
1095         if ((databuf[in] > 126) ||
1096             (databuf[in] == '=') ||
1097             (header && databuf[in] == '_') ||
1098             ((databuf[in] == '.') && (linelen == 0) &&
1099              (in + 1 == datalen || databuf[in+1] == '\n' ||
1100               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1101             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1102             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1103             ((databuf[in] < 33) &&
1104              (databuf[in] != '\r') && (databuf[in] != '\n') &&
1105              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1106         {
1107             if ((linelen + 3) >= MAXLINESIZE) {
1108                 linelen = 0;
1109                 if (crlf)
1110                     delta += 3;
1111                 else
1112                     delta += 2;
1113             }
1114             linelen += 3;
1115             delta += 3;
1116             in++;
1117         }
1118         else {
1119             if (istext &&
1120                 ((databuf[in] == '\n') ||
1121                  ((in+1 < datalen) && (databuf[in] == '\r') &&
1122                  (databuf[in+1] == '\n'))))
1123             {
1124                 linelen = 0;
1125                 /* Protect against whitespace on end of line */
1126                 if (in && ((databuf[in-1] == ' ') || (databuf[in-1] == '\t')))
1127                     delta += 2;
1128                 if (crlf)
1129                     delta += 2;
1130                 else
1131                     delta += 1;
1132                 if (databuf[in] == '\r')
1133                     in += 2;
1134                 else
1135                     in++;
1136             }
1137             else {
1138                 if ((in + 1 != datalen) &&
1139                     (databuf[in+1] != '\n') &&
1140                     (linelen + 1) >= MAXLINESIZE) {
1141                     linelen = 0;
1142                     if (crlf)
1143                         delta += 3;
1144                     else
1145                         delta += 2;
1146                 }
1147                 linelen++;
1148                 delta++;
1149                 in++;
1150             }
1151         }
1152         if (PY_SSIZE_T_MAX - delta < odatalen) {
1153             PyErr_NoMemory();
1154             return NULL;
1155         }
1156         odatalen += delta;
1157     }
1158 
1159     /* We allocate the output same size as input, this is overkill.
1160      */
1161     odata = (unsigned char *) PyMem_Calloc(1, odatalen);
1162     if (odata == NULL) {
1163         PyErr_NoMemory();
1164         return NULL;
1165     }
1166 
1167     in = out = linelen = 0;
1168     while (in < datalen) {
1169         if ((databuf[in] > 126) ||
1170             (databuf[in] == '=') ||
1171             (header && databuf[in] == '_') ||
1172             ((databuf[in] == '.') && (linelen == 0) &&
1173              (in + 1 == datalen || databuf[in+1] == '\n' ||
1174               databuf[in+1] == '\r' || databuf[in+1] == 0)) ||
1175             (!istext && ((databuf[in] == '\r') || (databuf[in] == '\n'))) ||
1176             ((databuf[in] == '\t' || databuf[in] == ' ') && (in + 1 == datalen)) ||
1177             ((databuf[in] < 33) &&
1178              (databuf[in] != '\r') && (databuf[in] != '\n') &&
1179              (quotetabs || ((databuf[in] != '\t') && (databuf[in] != ' ')))))
1180         {
1181             if ((linelen + 3 )>= MAXLINESIZE) {
1182                 odata[out++] = '=';
1183                 if (crlf) odata[out++] = '\r';
1184                 odata[out++] = '\n';
1185                 linelen = 0;
1186             }
1187             odata[out++] = '=';
1188             to_hex(databuf[in], &odata[out]);
1189             out += 2;
1190             in++;
1191             linelen += 3;
1192         }
1193         else {
1194             if (istext &&
1195                 ((databuf[in] == '\n') ||
1196                  ((in+1 < datalen) && (databuf[in] == '\r') &&
1197                  (databuf[in+1] == '\n'))))
1198             {
1199                 linelen = 0;
1200                 /* Protect against whitespace on end of line */
1201                 if (out && ((odata[out-1] == ' ') || (odata[out-1] == '\t'))) {
1202                     ch = odata[out-1];
1203                     odata[out-1] = '=';
1204                     to_hex(ch, &odata[out]);
1205                     out += 2;
1206                 }
1207 
1208                 if (crlf) odata[out++] = '\r';
1209                 odata[out++] = '\n';
1210                 if (databuf[in] == '\r')
1211                     in += 2;
1212                 else
1213                     in++;
1214             }
1215             else {
1216                 if ((in + 1 != datalen) &&
1217                     (databuf[in+1] != '\n') &&
1218                     (linelen + 1) >= MAXLINESIZE) {
1219                     odata[out++] = '=';
1220                     if (crlf) odata[out++] = '\r';
1221                     odata[out++] = '\n';
1222                     linelen = 0;
1223                 }
1224                 linelen++;
1225                 if (header && databuf[in] == ' ') {
1226                     odata[out++] = '_';
1227                     in++;
1228                 }
1229                 else {
1230                     odata[out++] = databuf[in++];
1231                 }
1232             }
1233         }
1234     }
1235     if ((rv = PyBytes_FromStringAndSize((char *)odata, out)) == NULL) {
1236         PyMem_Free(odata);
1237         return NULL;
1238     }
1239     PyMem_Free(odata);
1240     return rv;
1241 }
1242 
1243 /* List of functions defined in the module */
1244 
1245 static struct PyMethodDef binascii_module_methods[] = {
1246     BINASCII_A2B_UU_METHODDEF
1247     BINASCII_B2A_UU_METHODDEF
1248     BINASCII_A2B_BASE64_METHODDEF
1249     BINASCII_B2A_BASE64_METHODDEF
1250     BINASCII_A2B_HEX_METHODDEF
1251     BINASCII_B2A_HEX_METHODDEF
1252     BINASCII_HEXLIFY_METHODDEF
1253     BINASCII_UNHEXLIFY_METHODDEF
1254     BINASCII_CRC_HQX_METHODDEF
1255     BINASCII_CRC32_METHODDEF
1256     BINASCII_A2B_QP_METHODDEF
1257     BINASCII_B2A_QP_METHODDEF
1258     {NULL, NULL}                             /* sentinel */
1259 };
1260 
1261 
1262 /* Initialization function for the module (*must* be called PyInit_binascii) */
1263 PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1264 
1265 static int
binascii_exec(PyObject * module)1266 binascii_exec(PyObject *module) {
1267     int result;
1268     binascii_state *state = PyModule_GetState(module);
1269     if (state == NULL) {
1270         return -1;
1271     }
1272 
1273     state->Error = PyErr_NewException("binascii.Error", PyExc_ValueError, NULL);
1274     if (state->Error == NULL) {
1275         return -1;
1276     }
1277     Py_INCREF(state->Error);
1278     result = PyModule_AddObject(module, "Error", state->Error);
1279     if (result == -1) {
1280         Py_DECREF(state->Error);
1281         return -1;
1282     }
1283 
1284     state->Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1285     if (state->Incomplete == NULL) {
1286         return -1;
1287     }
1288     Py_INCREF(state->Incomplete);
1289     result = PyModule_AddObject(module, "Incomplete", state->Incomplete);
1290     if (result == -1) {
1291         Py_DECREF(state->Incomplete);
1292         return -1;
1293     }
1294 
1295     return 0;
1296 }
1297 
1298 static PyModuleDef_Slot binascii_slots[] = {
1299     {Py_mod_exec, binascii_exec},
1300     {0, NULL}
1301 };
1302 
1303 static int
binascii_traverse(PyObject * module,visitproc visit,void * arg)1304 binascii_traverse(PyObject *module, visitproc visit, void *arg)
1305 {
1306     binascii_state *state = get_binascii_state(module);
1307     Py_VISIT(state->Error);
1308     Py_VISIT(state->Incomplete);
1309     return 0;
1310 }
1311 
1312 static int
binascii_clear(PyObject * module)1313 binascii_clear(PyObject *module)
1314 {
1315     binascii_state *state = get_binascii_state(module);
1316     Py_CLEAR(state->Error);
1317     Py_CLEAR(state->Incomplete);
1318     return 0;
1319 }
1320 
1321 static void
binascii_free(void * module)1322 binascii_free(void *module)
1323 {
1324     binascii_clear((PyObject *)module);
1325 }
1326 
1327 static struct PyModuleDef binasciimodule = {
1328     PyModuleDef_HEAD_INIT,
1329     "binascii",
1330     doc_binascii,
1331     sizeof(binascii_state),
1332     binascii_module_methods,
1333     binascii_slots,
1334     binascii_traverse,
1335     binascii_clear,
1336     binascii_free
1337 };
1338 
1339 PyMODINIT_FUNC
PyInit_binascii(void)1340 PyInit_binascii(void)
1341 {
1342     return PyModuleDef_Init(&binasciimodule);
1343 }
1344