1#! /usr/bin/env python3
2
3"""Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings"""
4
5# Modified 04-Oct-1995 by Jack Jansen to use binascii module
6# Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support
7# Modified 22-May-2007 by Guido van Rossum to use bytes everywhere
8
9import re
10import struct
11import binascii
12
13
14__all__ = [
15    # Legacy interface exports traditional RFC 2045 Base64 encodings
16    'encode', 'decode', 'encodebytes', 'decodebytes',
17    # Generalized interface for other encodings
18    'b64encode', 'b64decode', 'b32encode', 'b32decode',
19    'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
20    # Base85 and Ascii85 encodings
21    'b85encode', 'b85decode', 'a85encode', 'a85decode',
22    # Standard Base64 encoding
23    'standard_b64encode', 'standard_b64decode',
24    # Some common Base64 alternatives.  As referenced by RFC 3458, see thread
25    # starting at:
26    #
27    # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html
28    'urlsafe_b64encode', 'urlsafe_b64decode',
29    ]
30
31
32bytes_types = (bytes, bytearray)  # Types acceptable as binary data
33
34def _bytes_from_decode_data(s):
35    if isinstance(s, str):
36        try:
37            return s.encode('ascii')
38        except UnicodeEncodeError:
39            raise ValueError('string argument should contain only ASCII characters')
40    if isinstance(s, bytes_types):
41        return s
42    try:
43        return memoryview(s).tobytes()
44    except TypeError:
45        raise TypeError("argument should be a bytes-like object or ASCII "
46                        "string, not %r" % s.__class__.__name__) from None
47
48
49# Base64 encoding/decoding uses binascii
50
51def b64encode(s, altchars=None):
52    """Encode the bytes-like object s using Base64 and return a bytes object.
53
54    Optional altchars should be a byte string of length 2 which specifies an
55    alternative alphabet for the '+' and '/' characters.  This allows an
56    application to e.g. generate url or filesystem safe Base64 strings.
57    """
58    encoded = binascii.b2a_base64(s, newline=False)
59    if altchars is not None:
60        assert len(altchars) == 2, repr(altchars)
61        return encoded.translate(bytes.maketrans(b'+/', altchars))
62    return encoded
63
64
65def b64decode(s, altchars=None, validate=False):
66    """Decode the Base64 encoded bytes-like object or ASCII string s.
67
68    Optional altchars must be a bytes-like object or ASCII string of length 2
69    which specifies the alternative alphabet used instead of the '+' and '/'
70    characters.
71
72    The result is returned as a bytes object.  A binascii.Error is raised if
73    s is incorrectly padded.
74
75    If validate is False (the default), characters that are neither in the
76    normal base-64 alphabet nor the alternative alphabet are discarded prior
77    to the padding check.  If validate is True, these non-alphabet characters
78    in the input result in a binascii.Error.
79    For more information about the strict base64 check, see:
80
81    https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
82    """
83    s = _bytes_from_decode_data(s)
84    if altchars is not None:
85        altchars = _bytes_from_decode_data(altchars)
86        assert len(altchars) == 2, repr(altchars)
87        s = s.translate(bytes.maketrans(altchars, b'+/'))
88    return binascii.a2b_base64(s, strict_mode=validate)
89
90
91def standard_b64encode(s):
92    """Encode bytes-like object s using the standard Base64 alphabet.
93
94    The result is returned as a bytes object.
95    """
96    return b64encode(s)
97
98def standard_b64decode(s):
99    """Decode bytes encoded with the standard Base64 alphabet.
100
101    Argument s is a bytes-like object or ASCII string to decode.  The result
102    is returned as a bytes object.  A binascii.Error is raised if the input
103    is incorrectly padded.  Characters that are not in the standard alphabet
104    are discarded prior to the padding check.
105    """
106    return b64decode(s)
107
108
109_urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_')
110_urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/')
111
112def urlsafe_b64encode(s):
113    """Encode bytes using the URL- and filesystem-safe Base64 alphabet.
114
115    Argument s is a bytes-like object to encode.  The result is returned as a
116    bytes object.  The alphabet uses '-' instead of '+' and '_' instead of
117    '/'.
118    """
119    return b64encode(s).translate(_urlsafe_encode_translation)
120
121def urlsafe_b64decode(s):
122    """Decode bytes using the URL- and filesystem-safe Base64 alphabet.
123
124    Argument s is a bytes-like object or ASCII string to decode.  The result
125    is returned as a bytes object.  A binascii.Error is raised if the input
126    is incorrectly padded.  Characters that are not in the URL-safe base-64
127    alphabet, and are not a plus '+' or slash '/', are discarded prior to the
128    padding check.
129
130    The alphabet uses '-' instead of '+' and '_' instead of '/'.
131    """
132    s = _bytes_from_decode_data(s)
133    s = s.translate(_urlsafe_decode_translation)
134    return b64decode(s)
135
136
137
138# Base32 encoding/decoding must be done in Python
139_B32_ENCODE_DOCSTRING = '''
140Encode the bytes-like objects using {encoding} and return a bytes object.
141'''
142_B32_DECODE_DOCSTRING = '''
143Decode the {encoding} encoded bytes-like object or ASCII string s.
144
145Optional casefold is a flag specifying whether a lowercase alphabet is
146acceptable as input.  For security purposes, the default is False.
147{extra_args}
148The result is returned as a bytes object.  A binascii.Error is raised if
149the input is incorrectly padded or if there are non-alphabet
150characters present in the input.
151'''
152_B32_DECODE_MAP01_DOCSTRING = '''
153RFC 3548 allows for optional mapping of the digit 0 (zero) to the
154letter O (oh), and for optional mapping of the digit 1 (one) to
155either the letter I (eye) or letter L (el).  The optional argument
156map01 when not None, specifies which letter the digit 1 should be
157mapped to (when map01 is not None, the digit 0 is always mapped to
158the letter O).  For security purposes the default is None, so that
1590 and 1 are not allowed in the input.
160'''
161_b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
162_b32hexalphabet = b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
163_b32tab2 = {}
164_b32rev = {}
165
166def _b32encode(alphabet, s):
167    global _b32tab2
168    # Delay the initialization of the table to not waste memory
169    # if the function is never called
170    if alphabet not in _b32tab2:
171        b32tab = [bytes((i,)) for i in alphabet]
172        _b32tab2[alphabet] = [a + b for a in b32tab for b in b32tab]
173        b32tab = None
174
175    if not isinstance(s, bytes_types):
176        s = memoryview(s).tobytes()
177    leftover = len(s) % 5
178    # Pad the last quantum with zero bits if necessary
179    if leftover:
180        s = s + b'\0' * (5 - leftover)  # Don't use += !
181    encoded = bytearray()
182    from_bytes = int.from_bytes
183    b32tab2 = _b32tab2[alphabet]
184    for i in range(0, len(s), 5):
185        c = from_bytes(s[i: i + 5])              # big endian
186        encoded += (b32tab2[c >> 30] +           # bits 1 - 10
187                    b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20
188                    b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30
189                    b32tab2[c & 0x3ff]           # bits 31 - 40
190                   )
191    # Adjust for any leftover partial quanta
192    if leftover == 1:
193        encoded[-6:] = b'======'
194    elif leftover == 2:
195        encoded[-4:] = b'===='
196    elif leftover == 3:
197        encoded[-3:] = b'==='
198    elif leftover == 4:
199        encoded[-1:] = b'='
200    return bytes(encoded)
201
202def _b32decode(alphabet, s, casefold=False, map01=None):
203    global _b32rev
204    # Delay the initialization of the table to not waste memory
205    # if the function is never called
206    if alphabet not in _b32rev:
207        _b32rev[alphabet] = {v: k for k, v in enumerate(alphabet)}
208    s = _bytes_from_decode_data(s)
209    if len(s) % 8:
210        raise binascii.Error('Incorrect padding')
211    # Handle section 2.4 zero and one mapping.  The flag map01 will be either
212    # False, or the character to map the digit 1 (one) to.  It should be
213    # either L (el) or I (eye).
214    if map01 is not None:
215        map01 = _bytes_from_decode_data(map01)
216        assert len(map01) == 1, repr(map01)
217        s = s.translate(bytes.maketrans(b'01', b'O' + map01))
218    if casefold:
219        s = s.upper()
220    # Strip off pad characters from the right.  We need to count the pad
221    # characters because this will tell us how many null bytes to remove from
222    # the end of the decoded string.
223    l = len(s)
224    s = s.rstrip(b'=')
225    padchars = l - len(s)
226    # Now decode the full quanta
227    decoded = bytearray()
228    b32rev = _b32rev[alphabet]
229    for i in range(0, len(s), 8):
230        quanta = s[i: i + 8]
231        acc = 0
232        try:
233            for c in quanta:
234                acc = (acc << 5) + b32rev[c]
235        except KeyError:
236            raise binascii.Error('Non-base32 digit found') from None
237        decoded += acc.to_bytes(5)  # big endian
238    # Process the last, partial quanta
239    if l % 8 or padchars not in {0, 1, 3, 4, 6}:
240        raise binascii.Error('Incorrect padding')
241    if padchars and decoded:
242        acc <<= 5 * padchars
243        last = acc.to_bytes(5)  # big endian
244        leftover = (43 - 5 * padchars) // 8  # 1: 4, 3: 3, 4: 2, 6: 1
245        decoded[-5:] = last[:leftover]
246    return bytes(decoded)
247
248
249def b32encode(s):
250    return _b32encode(_b32alphabet, s)
251b32encode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32')
252
253def b32decode(s, casefold=False, map01=None):
254    return _b32decode(_b32alphabet, s, casefold, map01)
255b32decode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32',
256                                        extra_args=_B32_DECODE_MAP01_DOCSTRING)
257
258def b32hexencode(s):
259    return _b32encode(_b32hexalphabet, s)
260b32hexencode.__doc__ = _B32_ENCODE_DOCSTRING.format(encoding='base32hex')
261
262def b32hexdecode(s, casefold=False):
263    # base32hex does not have the 01 mapping
264    return _b32decode(_b32hexalphabet, s, casefold)
265b32hexdecode.__doc__ = _B32_DECODE_DOCSTRING.format(encoding='base32hex',
266                                                    extra_args='')
267
268
269# RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
270# lowercase.  The RFC also recommends against accepting input case
271# insensitively.
272def b16encode(s):
273    """Encode the bytes-like object s using Base16 and return a bytes object.
274    """
275    return binascii.hexlify(s).upper()
276
277
278def b16decode(s, casefold=False):
279    """Decode the Base16 encoded bytes-like object or ASCII string s.
280
281    Optional casefold is a flag specifying whether a lowercase alphabet is
282    acceptable as input.  For security purposes, the default is False.
283
284    The result is returned as a bytes object.  A binascii.Error is raised if
285    s is incorrectly padded or if there are non-alphabet characters present
286    in the input.
287    """
288    s = _bytes_from_decode_data(s)
289    if casefold:
290        s = s.upper()
291    if re.search(b'[^0-9A-F]', s):
292        raise binascii.Error('Non-base16 digit found')
293    return binascii.unhexlify(s)
294
295#
296# Ascii85 encoding/decoding
297#
298
299_a85chars = None
300_a85chars2 = None
301_A85START = b"<~"
302_A85END = b"~>"
303
304def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
305    # Helper function for a85encode and b85encode
306    if not isinstance(b, bytes_types):
307        b = memoryview(b).tobytes()
308
309    padding = (-len(b)) % 4
310    if padding:
311        b = b + b'\0' * padding
312    words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
313
314    chunks = [b'z' if foldnuls and not word else
315              b'y' if foldspaces and word == 0x20202020 else
316              (chars2[word // 614125] +
317               chars2[word // 85 % 7225] +
318               chars[word % 85])
319              for word in words]
320
321    if padding and not pad:
322        if chunks[-1] == b'z':
323            chunks[-1] = chars[0] * 5
324        chunks[-1] = chunks[-1][:-padding]
325
326    return b''.join(chunks)
327
328def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
329    """Encode bytes-like object b using Ascii85 and return a bytes object.
330
331    foldspaces is an optional flag that uses the special short sequence 'y'
332    instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This
333    feature is not supported by the "standard" Adobe encoding.
334
335    wrapcol controls whether the output should have newline (b'\\n') characters
336    added to it. If this is non-zero, each output line will be at most this
337    many characters long.
338
339    pad controls whether the input is padded to a multiple of 4 before
340    encoding. Note that the btoa implementation always pads.
341
342    adobe controls whether the encoded byte sequence is framed with <~ and ~>,
343    which is used by the Adobe implementation.
344    """
345    global _a85chars, _a85chars2
346    # Delay the initialization of tables to not waste memory
347    # if the function is never called
348    if _a85chars2 is None:
349        _a85chars = [bytes((i,)) for i in range(33, 118)]
350        _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
351
352    result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
353
354    if adobe:
355        result = _A85START + result
356    if wrapcol:
357        wrapcol = max(2 if adobe else 1, wrapcol)
358        chunks = [result[i: i + wrapcol]
359                  for i in range(0, len(result), wrapcol)]
360        if adobe:
361            if len(chunks[-1]) + 2 > wrapcol:
362                chunks.append(b'')
363        result = b'\n'.join(chunks)
364    if adobe:
365        result += _A85END
366
367    return result
368
369def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'):
370    """Decode the Ascii85 encoded bytes-like object or ASCII string b.
371
372    foldspaces is a flag that specifies whether the 'y' short sequence should be
373    accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is
374    not supported by the "standard" Adobe encoding.
375
376    adobe controls whether the input sequence is in Adobe Ascii85 format (i.e.
377    is framed with <~ and ~>).
378
379    ignorechars should be a byte string containing characters to ignore from the
380    input. This should only contain whitespace characters, and by default
381    contains all whitespace characters in ASCII.
382
383    The result is returned as a bytes object.
384    """
385    b = _bytes_from_decode_data(b)
386    if adobe:
387        if not b.endswith(_A85END):
388            raise ValueError(
389                "Ascii85 encoded byte sequences must end "
390                "with {!r}".format(_A85END)
391                )
392        if b.startswith(_A85START):
393            b = b[2:-2]  # Strip off start/end markers
394        else:
395            b = b[:-2]
396    #
397    # We have to go through this stepwise, so as to ignore spaces and handle
398    # special short sequences
399    #
400    packI = struct.Struct('!I').pack
401    decoded = []
402    decoded_append = decoded.append
403    curr = []
404    curr_append = curr.append
405    curr_clear = curr.clear
406    for x in b + b'u' * 4:
407        if b'!'[0] <= x <= b'u'[0]:
408            curr_append(x)
409            if len(curr) == 5:
410                acc = 0
411                for x in curr:
412                    acc = 85 * acc + (x - 33)
413                try:
414                    decoded_append(packI(acc))
415                except struct.error:
416                    raise ValueError('Ascii85 overflow') from None
417                curr_clear()
418        elif x == b'z'[0]:
419            if curr:
420                raise ValueError('z inside Ascii85 5-tuple')
421            decoded_append(b'\0\0\0\0')
422        elif foldspaces and x == b'y'[0]:
423            if curr:
424                raise ValueError('y inside Ascii85 5-tuple')
425            decoded_append(b'\x20\x20\x20\x20')
426        elif x in ignorechars:
427            # Skip whitespace
428            continue
429        else:
430            raise ValueError('Non-Ascii85 digit found: %c' % x)
431
432    result = b''.join(decoded)
433    padding = 4 - len(curr)
434    if padding:
435        # Throw away the extra padding
436        result = result[:-padding]
437    return result
438
439# The following code is originally taken (with permission) from Mercurial
440
441_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
442                b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
443_b85chars = None
444_b85chars2 = None
445_b85dec = None
446
447def b85encode(b, pad=False):
448    """Encode bytes-like object b in base85 format and return a bytes object.
449
450    If pad is true, the input is padded with b'\\0' so its length is a multiple of
451    4 bytes before encoding.
452    """
453    global _b85chars, _b85chars2
454    # Delay the initialization of tables to not waste memory
455    # if the function is never called
456    if _b85chars2 is None:
457        _b85chars = [bytes((i,)) for i in _b85alphabet]
458        _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
459    return _85encode(b, _b85chars, _b85chars2, pad)
460
461def b85decode(b):
462    """Decode the base85-encoded bytes-like object or ASCII string b
463
464    The result is returned as a bytes object.
465    """
466    global _b85dec
467    # Delay the initialization of tables to not waste memory
468    # if the function is never called
469    if _b85dec is None:
470        _b85dec = [None] * 256
471        for i, c in enumerate(_b85alphabet):
472            _b85dec[c] = i
473
474    b = _bytes_from_decode_data(b)
475    padding = (-len(b)) % 5
476    b = b + b'~' * padding
477    out = []
478    packI = struct.Struct('!I').pack
479    for i in range(0, len(b), 5):
480        chunk = b[i:i + 5]
481        acc = 0
482        try:
483            for c in chunk:
484                acc = acc * 85 + _b85dec[c]
485        except TypeError:
486            for j, c in enumerate(chunk):
487                if _b85dec[c] is None:
488                    raise ValueError('bad base85 character at position %d'
489                                    % (i + j)) from None
490            raise
491        try:
492            out.append(packI(acc))
493        except struct.error:
494            raise ValueError('base85 overflow in hunk starting at byte %d'
495                             % i) from None
496
497    result = b''.join(out)
498    if padding:
499        result = result[:-padding]
500    return result
501
502# Legacy interface.  This code could be cleaned up since I don't believe
503# binascii has any line length limitations.  It just doesn't seem worth it
504# though.  The files should be opened in binary mode.
505
506MAXLINESIZE = 76 # Excluding the CRLF
507MAXBINSIZE = (MAXLINESIZE//4)*3
508
509def encode(input, output):
510    """Encode a file; input and output are binary files."""
511    while True:
512        s = input.read(MAXBINSIZE)
513        if not s:
514            break
515        while len(s) < MAXBINSIZE:
516            ns = input.read(MAXBINSIZE-len(s))
517            if not ns:
518                break
519            s += ns
520        line = binascii.b2a_base64(s)
521        output.write(line)
522
523
524def decode(input, output):
525    """Decode a file; input and output are binary files."""
526    while True:
527        line = input.readline()
528        if not line:
529            break
530        s = binascii.a2b_base64(line)
531        output.write(s)
532
533def _input_type_check(s):
534    try:
535        m = memoryview(s)
536    except TypeError as err:
537        msg = "expected bytes-like object, not %s" % s.__class__.__name__
538        raise TypeError(msg) from err
539    if m.format not in ('c', 'b', 'B'):
540        msg = ("expected single byte elements, not %r from %s" %
541                                          (m.format, s.__class__.__name__))
542        raise TypeError(msg)
543    if m.ndim != 1:
544        msg = ("expected 1-D data, not %d-D data from %s" %
545                                          (m.ndim, s.__class__.__name__))
546        raise TypeError(msg)
547
548
549def encodebytes(s):
550    """Encode a bytestring into a bytes object containing multiple lines
551    of base-64 data."""
552    _input_type_check(s)
553    pieces = []
554    for i in range(0, len(s), MAXBINSIZE):
555        chunk = s[i : i + MAXBINSIZE]
556        pieces.append(binascii.b2a_base64(chunk))
557    return b"".join(pieces)
558
559
560def decodebytes(s):
561    """Decode a bytestring of base-64 data into a bytes object."""
562    _input_type_check(s)
563    return binascii.a2b_base64(s)
564
565
566# Usable as a script...
567def main():
568    """Small main program"""
569    import sys, getopt
570    usage = """usage: %s [-h|-d|-e|-u|-t] [file|-]
571        -h: print this help message and exit
572        -d, -u: decode
573        -e: encode (default)
574        -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]
575    try:
576        opts, args = getopt.getopt(sys.argv[1:], 'hdeut')
577    except getopt.error as msg:
578        sys.stdout = sys.stderr
579        print(msg)
580        print(usage)
581        sys.exit(2)
582    func = encode
583    for o, a in opts:
584        if o == '-e': func = encode
585        if o == '-d': func = decode
586        if o == '-u': func = decode
587        if o == '-t': test(); return
588        if o == '-h': print(usage); return
589    if args and args[0] != '-':
590        with open(args[0], 'rb') as f:
591            func(f, sys.stdout.buffer)
592    else:
593        func(sys.stdin.buffer, sys.stdout.buffer)
594
595
596def test():
597    s0 = b"Aladdin:open sesame"
598    print(repr(s0))
599    s1 = encodebytes(s0)
600    print(repr(s1))
601    s2 = decodebytes(s1)
602    print(repr(s2))
603    assert s0 == s2
604
605
606if __name__ == '__main__':
607    main()
608