xref: /aosp_15_r20/prebuilts/build-tools/common/py3-stdlib/codecs.py (revision cda5da8d549138a6648c5ee6d7a49cf8f4a657be)
1*cda5da8dSAndroid Build Coastguard Worker""" codecs -- Python Codec Registry, API and helpers.
2*cda5da8dSAndroid Build Coastguard Worker
3*cda5da8dSAndroid Build Coastguard Worker
4*cda5da8dSAndroid Build Coastguard WorkerWritten by Marc-Andre Lemburg ([email protected]).
5*cda5da8dSAndroid Build Coastguard Worker
6*cda5da8dSAndroid Build Coastguard Worker(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
7*cda5da8dSAndroid Build Coastguard Worker
8*cda5da8dSAndroid Build Coastguard Worker"""
9*cda5da8dSAndroid Build Coastguard Worker
10*cda5da8dSAndroid Build Coastguard Workerimport builtins
11*cda5da8dSAndroid Build Coastguard Workerimport sys
12*cda5da8dSAndroid Build Coastguard Worker
13*cda5da8dSAndroid Build Coastguard Worker### Registry and builtin stateless codec functions
14*cda5da8dSAndroid Build Coastguard Worker
15*cda5da8dSAndroid Build Coastguard Workertry:
16*cda5da8dSAndroid Build Coastguard Worker    from _codecs import *
17*cda5da8dSAndroid Build Coastguard Workerexcept ImportError as why:
18*cda5da8dSAndroid Build Coastguard Worker    raise SystemError('Failed to load the builtin codecs: %s' % why)
19*cda5da8dSAndroid Build Coastguard Worker
20*cda5da8dSAndroid Build Coastguard Worker__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE",
21*cda5da8dSAndroid Build Coastguard Worker           "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE",
22*cda5da8dSAndroid Build Coastguard Worker           "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE",
23*cda5da8dSAndroid Build Coastguard Worker           "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE",
24*cda5da8dSAndroid Build Coastguard Worker           "CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder",
25*cda5da8dSAndroid Build Coastguard Worker           "StreamReader", "StreamWriter",
26*cda5da8dSAndroid Build Coastguard Worker           "StreamReaderWriter", "StreamRecoder",
27*cda5da8dSAndroid Build Coastguard Worker           "getencoder", "getdecoder", "getincrementalencoder",
28*cda5da8dSAndroid Build Coastguard Worker           "getincrementaldecoder", "getreader", "getwriter",
29*cda5da8dSAndroid Build Coastguard Worker           "encode", "decode", "iterencode", "iterdecode",
30*cda5da8dSAndroid Build Coastguard Worker           "strict_errors", "ignore_errors", "replace_errors",
31*cda5da8dSAndroid Build Coastguard Worker           "xmlcharrefreplace_errors",
32*cda5da8dSAndroid Build Coastguard Worker           "backslashreplace_errors", "namereplace_errors",
33*cda5da8dSAndroid Build Coastguard Worker           "register_error", "lookup_error"]
34*cda5da8dSAndroid Build Coastguard Worker
35*cda5da8dSAndroid Build Coastguard Worker### Constants
36*cda5da8dSAndroid Build Coastguard Worker
37*cda5da8dSAndroid Build Coastguard Worker#
38*cda5da8dSAndroid Build Coastguard Worker# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF)
39*cda5da8dSAndroid Build Coastguard Worker# and its possible byte string values
40*cda5da8dSAndroid Build Coastguard Worker# for UTF8/UTF16/UTF32 output and little/big endian machines
41*cda5da8dSAndroid Build Coastguard Worker#
42*cda5da8dSAndroid Build Coastguard Worker
43*cda5da8dSAndroid Build Coastguard Worker# UTF-8
44*cda5da8dSAndroid Build Coastguard WorkerBOM_UTF8 = b'\xef\xbb\xbf'
45*cda5da8dSAndroid Build Coastguard Worker
46*cda5da8dSAndroid Build Coastguard Worker# UTF-16, little endian
47*cda5da8dSAndroid Build Coastguard WorkerBOM_LE = BOM_UTF16_LE = b'\xff\xfe'
48*cda5da8dSAndroid Build Coastguard Worker
49*cda5da8dSAndroid Build Coastguard Worker# UTF-16, big endian
50*cda5da8dSAndroid Build Coastguard WorkerBOM_BE = BOM_UTF16_BE = b'\xfe\xff'
51*cda5da8dSAndroid Build Coastguard Worker
52*cda5da8dSAndroid Build Coastguard Worker# UTF-32, little endian
53*cda5da8dSAndroid Build Coastguard WorkerBOM_UTF32_LE = b'\xff\xfe\x00\x00'
54*cda5da8dSAndroid Build Coastguard Worker
55*cda5da8dSAndroid Build Coastguard Worker# UTF-32, big endian
56*cda5da8dSAndroid Build Coastguard WorkerBOM_UTF32_BE = b'\x00\x00\xfe\xff'
57*cda5da8dSAndroid Build Coastguard Worker
58*cda5da8dSAndroid Build Coastguard Workerif sys.byteorder == 'little':
59*cda5da8dSAndroid Build Coastguard Worker
60*cda5da8dSAndroid Build Coastguard Worker    # UTF-16, native endianness
61*cda5da8dSAndroid Build Coastguard Worker    BOM = BOM_UTF16 = BOM_UTF16_LE
62*cda5da8dSAndroid Build Coastguard Worker
63*cda5da8dSAndroid Build Coastguard Worker    # UTF-32, native endianness
64*cda5da8dSAndroid Build Coastguard Worker    BOM_UTF32 = BOM_UTF32_LE
65*cda5da8dSAndroid Build Coastguard Worker
66*cda5da8dSAndroid Build Coastguard Workerelse:
67*cda5da8dSAndroid Build Coastguard Worker
68*cda5da8dSAndroid Build Coastguard Worker    # UTF-16, native endianness
69*cda5da8dSAndroid Build Coastguard Worker    BOM = BOM_UTF16 = BOM_UTF16_BE
70*cda5da8dSAndroid Build Coastguard Worker
71*cda5da8dSAndroid Build Coastguard Worker    # UTF-32, native endianness
72*cda5da8dSAndroid Build Coastguard Worker    BOM_UTF32 = BOM_UTF32_BE
73*cda5da8dSAndroid Build Coastguard Worker
74*cda5da8dSAndroid Build Coastguard Worker# Old broken names (don't use in new code)
75*cda5da8dSAndroid Build Coastguard WorkerBOM32_LE = BOM_UTF16_LE
76*cda5da8dSAndroid Build Coastguard WorkerBOM32_BE = BOM_UTF16_BE
77*cda5da8dSAndroid Build Coastguard WorkerBOM64_LE = BOM_UTF32_LE
78*cda5da8dSAndroid Build Coastguard WorkerBOM64_BE = BOM_UTF32_BE
79*cda5da8dSAndroid Build Coastguard Worker
80*cda5da8dSAndroid Build Coastguard Worker
81*cda5da8dSAndroid Build Coastguard Worker### Codec base classes (defining the API)
82*cda5da8dSAndroid Build Coastguard Worker
83*cda5da8dSAndroid Build Coastguard Workerclass CodecInfo(tuple):
84*cda5da8dSAndroid Build Coastguard Worker    """Codec details when looking up the codec registry"""
85*cda5da8dSAndroid Build Coastguard Worker
86*cda5da8dSAndroid Build Coastguard Worker    # Private API to allow Python 3.4 to denylist the known non-Unicode
87*cda5da8dSAndroid Build Coastguard Worker    # codecs in the standard library. A more general mechanism to
88*cda5da8dSAndroid Build Coastguard Worker    # reliably distinguish test encodings from other codecs will hopefully
89*cda5da8dSAndroid Build Coastguard Worker    # be defined for Python 3.5
90*cda5da8dSAndroid Build Coastguard Worker    #
91*cda5da8dSAndroid Build Coastguard Worker    # See http://bugs.python.org/issue19619
92*cda5da8dSAndroid Build Coastguard Worker    _is_text_encoding = True # Assume codecs are text encodings by default
93*cda5da8dSAndroid Build Coastguard Worker
94*cda5da8dSAndroid Build Coastguard Worker    def __new__(cls, encode, decode, streamreader=None, streamwriter=None,
95*cda5da8dSAndroid Build Coastguard Worker        incrementalencoder=None, incrementaldecoder=None, name=None,
96*cda5da8dSAndroid Build Coastguard Worker        *, _is_text_encoding=None):
97*cda5da8dSAndroid Build Coastguard Worker        self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter))
98*cda5da8dSAndroid Build Coastguard Worker        self.name = name
99*cda5da8dSAndroid Build Coastguard Worker        self.encode = encode
100*cda5da8dSAndroid Build Coastguard Worker        self.decode = decode
101*cda5da8dSAndroid Build Coastguard Worker        self.incrementalencoder = incrementalencoder
102*cda5da8dSAndroid Build Coastguard Worker        self.incrementaldecoder = incrementaldecoder
103*cda5da8dSAndroid Build Coastguard Worker        self.streamwriter = streamwriter
104*cda5da8dSAndroid Build Coastguard Worker        self.streamreader = streamreader
105*cda5da8dSAndroid Build Coastguard Worker        if _is_text_encoding is not None:
106*cda5da8dSAndroid Build Coastguard Worker            self._is_text_encoding = _is_text_encoding
107*cda5da8dSAndroid Build Coastguard Worker        return self
108*cda5da8dSAndroid Build Coastguard Worker
109*cda5da8dSAndroid Build Coastguard Worker    def __repr__(self):
110*cda5da8dSAndroid Build Coastguard Worker        return "<%s.%s object for encoding %s at %#x>" % \
111*cda5da8dSAndroid Build Coastguard Worker                (self.__class__.__module__, self.__class__.__qualname__,
112*cda5da8dSAndroid Build Coastguard Worker                 self.name, id(self))
113*cda5da8dSAndroid Build Coastguard Worker
114*cda5da8dSAndroid Build Coastguard Workerclass Codec:
115*cda5da8dSAndroid Build Coastguard Worker
116*cda5da8dSAndroid Build Coastguard Worker    """ Defines the interface for stateless encoders/decoders.
117*cda5da8dSAndroid Build Coastguard Worker
118*cda5da8dSAndroid Build Coastguard Worker        The .encode()/.decode() methods may use different error
119*cda5da8dSAndroid Build Coastguard Worker        handling schemes by providing the errors argument. These
120*cda5da8dSAndroid Build Coastguard Worker        string values are predefined:
121*cda5da8dSAndroid Build Coastguard Worker
122*cda5da8dSAndroid Build Coastguard Worker         'strict' - raise a ValueError error (or a subclass)
123*cda5da8dSAndroid Build Coastguard Worker         'ignore' - ignore the character and continue with the next
124*cda5da8dSAndroid Build Coastguard Worker         'replace' - replace with a suitable replacement character;
125*cda5da8dSAndroid Build Coastguard Worker                    Python will use the official U+FFFD REPLACEMENT
126*cda5da8dSAndroid Build Coastguard Worker                    CHARACTER for the builtin Unicode codecs on
127*cda5da8dSAndroid Build Coastguard Worker                    decoding and '?' on encoding.
128*cda5da8dSAndroid Build Coastguard Worker         'surrogateescape' - replace with private code points U+DCnn.
129*cda5da8dSAndroid Build Coastguard Worker         'xmlcharrefreplace' - Replace with the appropriate XML
130*cda5da8dSAndroid Build Coastguard Worker                               character reference (only for encoding).
131*cda5da8dSAndroid Build Coastguard Worker         'backslashreplace'  - Replace with backslashed escape sequences.
132*cda5da8dSAndroid Build Coastguard Worker         'namereplace'       - Replace with \\N{...} escape sequences
133*cda5da8dSAndroid Build Coastguard Worker                               (only for encoding).
134*cda5da8dSAndroid Build Coastguard Worker
135*cda5da8dSAndroid Build Coastguard Worker        The set of allowed values can be extended via register_error.
136*cda5da8dSAndroid Build Coastguard Worker
137*cda5da8dSAndroid Build Coastguard Worker    """
138*cda5da8dSAndroid Build Coastguard Worker    def encode(self, input, errors='strict'):
139*cda5da8dSAndroid Build Coastguard Worker
140*cda5da8dSAndroid Build Coastguard Worker        """ Encodes the object input and returns a tuple (output
141*cda5da8dSAndroid Build Coastguard Worker            object, length consumed).
142*cda5da8dSAndroid Build Coastguard Worker
143*cda5da8dSAndroid Build Coastguard Worker            errors defines the error handling to apply. It defaults to
144*cda5da8dSAndroid Build Coastguard Worker            'strict' handling.
145*cda5da8dSAndroid Build Coastguard Worker
146*cda5da8dSAndroid Build Coastguard Worker            The method may not store state in the Codec instance. Use
147*cda5da8dSAndroid Build Coastguard Worker            StreamWriter for codecs which have to keep state in order to
148*cda5da8dSAndroid Build Coastguard Worker            make encoding efficient.
149*cda5da8dSAndroid Build Coastguard Worker
150*cda5da8dSAndroid Build Coastguard Worker            The encoder must be able to handle zero length input and
151*cda5da8dSAndroid Build Coastguard Worker            return an empty object of the output object type in this
152*cda5da8dSAndroid Build Coastguard Worker            situation.
153*cda5da8dSAndroid Build Coastguard Worker
154*cda5da8dSAndroid Build Coastguard Worker        """
155*cda5da8dSAndroid Build Coastguard Worker        raise NotImplementedError
156*cda5da8dSAndroid Build Coastguard Worker
157*cda5da8dSAndroid Build Coastguard Worker    def decode(self, input, errors='strict'):
158*cda5da8dSAndroid Build Coastguard Worker
159*cda5da8dSAndroid Build Coastguard Worker        """ Decodes the object input and returns a tuple (output
160*cda5da8dSAndroid Build Coastguard Worker            object, length consumed).
161*cda5da8dSAndroid Build Coastguard Worker
162*cda5da8dSAndroid Build Coastguard Worker            input must be an object which provides the bf_getreadbuf
163*cda5da8dSAndroid Build Coastguard Worker            buffer slot. Python strings, buffer objects and memory
164*cda5da8dSAndroid Build Coastguard Worker            mapped files are examples of objects providing this slot.
165*cda5da8dSAndroid Build Coastguard Worker
166*cda5da8dSAndroid Build Coastguard Worker            errors defines the error handling to apply. It defaults to
167*cda5da8dSAndroid Build Coastguard Worker            'strict' handling.
168*cda5da8dSAndroid Build Coastguard Worker
169*cda5da8dSAndroid Build Coastguard Worker            The method may not store state in the Codec instance. Use
170*cda5da8dSAndroid Build Coastguard Worker            StreamReader for codecs which have to keep state in order to
171*cda5da8dSAndroid Build Coastguard Worker            make decoding efficient.
172*cda5da8dSAndroid Build Coastguard Worker
173*cda5da8dSAndroid Build Coastguard Worker            The decoder must be able to handle zero length input and
174*cda5da8dSAndroid Build Coastguard Worker            return an empty object of the output object type in this
175*cda5da8dSAndroid Build Coastguard Worker            situation.
176*cda5da8dSAndroid Build Coastguard Worker
177*cda5da8dSAndroid Build Coastguard Worker        """
178*cda5da8dSAndroid Build Coastguard Worker        raise NotImplementedError
179*cda5da8dSAndroid Build Coastguard Worker
180*cda5da8dSAndroid Build Coastguard Workerclass IncrementalEncoder(object):
181*cda5da8dSAndroid Build Coastguard Worker    """
182*cda5da8dSAndroid Build Coastguard Worker    An IncrementalEncoder encodes an input in multiple steps. The input can
183*cda5da8dSAndroid Build Coastguard Worker    be passed piece by piece to the encode() method. The IncrementalEncoder
184*cda5da8dSAndroid Build Coastguard Worker    remembers the state of the encoding process between calls to encode().
185*cda5da8dSAndroid Build Coastguard Worker    """
186*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, errors='strict'):
187*cda5da8dSAndroid Build Coastguard Worker        """
188*cda5da8dSAndroid Build Coastguard Worker        Creates an IncrementalEncoder instance.
189*cda5da8dSAndroid Build Coastguard Worker
190*cda5da8dSAndroid Build Coastguard Worker        The IncrementalEncoder may use different error handling schemes by
191*cda5da8dSAndroid Build Coastguard Worker        providing the errors keyword argument. See the module docstring
192*cda5da8dSAndroid Build Coastguard Worker        for a list of possible values.
193*cda5da8dSAndroid Build Coastguard Worker        """
194*cda5da8dSAndroid Build Coastguard Worker        self.errors = errors
195*cda5da8dSAndroid Build Coastguard Worker        self.buffer = ""
196*cda5da8dSAndroid Build Coastguard Worker
197*cda5da8dSAndroid Build Coastguard Worker    def encode(self, input, final=False):
198*cda5da8dSAndroid Build Coastguard Worker        """
199*cda5da8dSAndroid Build Coastguard Worker        Encodes input and returns the resulting object.
200*cda5da8dSAndroid Build Coastguard Worker        """
201*cda5da8dSAndroid Build Coastguard Worker        raise NotImplementedError
202*cda5da8dSAndroid Build Coastguard Worker
203*cda5da8dSAndroid Build Coastguard Worker    def reset(self):
204*cda5da8dSAndroid Build Coastguard Worker        """
205*cda5da8dSAndroid Build Coastguard Worker        Resets the encoder to the initial state.
206*cda5da8dSAndroid Build Coastguard Worker        """
207*cda5da8dSAndroid Build Coastguard Worker
208*cda5da8dSAndroid Build Coastguard Worker    def getstate(self):
209*cda5da8dSAndroid Build Coastguard Worker        """
210*cda5da8dSAndroid Build Coastguard Worker        Return the current state of the encoder.
211*cda5da8dSAndroid Build Coastguard Worker        """
212*cda5da8dSAndroid Build Coastguard Worker        return 0
213*cda5da8dSAndroid Build Coastguard Worker
214*cda5da8dSAndroid Build Coastguard Worker    def setstate(self, state):
215*cda5da8dSAndroid Build Coastguard Worker        """
216*cda5da8dSAndroid Build Coastguard Worker        Set the current state of the encoder. state must have been
217*cda5da8dSAndroid Build Coastguard Worker        returned by getstate().
218*cda5da8dSAndroid Build Coastguard Worker        """
219*cda5da8dSAndroid Build Coastguard Worker
220*cda5da8dSAndroid Build Coastguard Workerclass BufferedIncrementalEncoder(IncrementalEncoder):
221*cda5da8dSAndroid Build Coastguard Worker    """
222*cda5da8dSAndroid Build Coastguard Worker    This subclass of IncrementalEncoder can be used as the baseclass for an
223*cda5da8dSAndroid Build Coastguard Worker    incremental encoder if the encoder must keep some of the output in a
224*cda5da8dSAndroid Build Coastguard Worker    buffer between calls to encode().
225*cda5da8dSAndroid Build Coastguard Worker    """
226*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, errors='strict'):
227*cda5da8dSAndroid Build Coastguard Worker        IncrementalEncoder.__init__(self, errors)
228*cda5da8dSAndroid Build Coastguard Worker        # unencoded input that is kept between calls to encode()
229*cda5da8dSAndroid Build Coastguard Worker        self.buffer = ""
230*cda5da8dSAndroid Build Coastguard Worker
231*cda5da8dSAndroid Build Coastguard Worker    def _buffer_encode(self, input, errors, final):
232*cda5da8dSAndroid Build Coastguard Worker        # Overwrite this method in subclasses: It must encode input
233*cda5da8dSAndroid Build Coastguard Worker        # and return an (output, length consumed) tuple
234*cda5da8dSAndroid Build Coastguard Worker        raise NotImplementedError
235*cda5da8dSAndroid Build Coastguard Worker
236*cda5da8dSAndroid Build Coastguard Worker    def encode(self, input, final=False):
237*cda5da8dSAndroid Build Coastguard Worker        # encode input (taking the buffer into account)
238*cda5da8dSAndroid Build Coastguard Worker        data = self.buffer + input
239*cda5da8dSAndroid Build Coastguard Worker        (result, consumed) = self._buffer_encode(data, self.errors, final)
240*cda5da8dSAndroid Build Coastguard Worker        # keep unencoded input until the next call
241*cda5da8dSAndroid Build Coastguard Worker        self.buffer = data[consumed:]
242*cda5da8dSAndroid Build Coastguard Worker        return result
243*cda5da8dSAndroid Build Coastguard Worker
244*cda5da8dSAndroid Build Coastguard Worker    def reset(self):
245*cda5da8dSAndroid Build Coastguard Worker        IncrementalEncoder.reset(self)
246*cda5da8dSAndroid Build Coastguard Worker        self.buffer = ""
247*cda5da8dSAndroid Build Coastguard Worker
248*cda5da8dSAndroid Build Coastguard Worker    def getstate(self):
249*cda5da8dSAndroid Build Coastguard Worker        return self.buffer or 0
250*cda5da8dSAndroid Build Coastguard Worker
251*cda5da8dSAndroid Build Coastguard Worker    def setstate(self, state):
252*cda5da8dSAndroid Build Coastguard Worker        self.buffer = state or ""
253*cda5da8dSAndroid Build Coastguard Worker
254*cda5da8dSAndroid Build Coastguard Workerclass IncrementalDecoder(object):
255*cda5da8dSAndroid Build Coastguard Worker    """
256*cda5da8dSAndroid Build Coastguard Worker    An IncrementalDecoder decodes an input in multiple steps. The input can
257*cda5da8dSAndroid Build Coastguard Worker    be passed piece by piece to the decode() method. The IncrementalDecoder
258*cda5da8dSAndroid Build Coastguard Worker    remembers the state of the decoding process between calls to decode().
259*cda5da8dSAndroid Build Coastguard Worker    """
260*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, errors='strict'):
261*cda5da8dSAndroid Build Coastguard Worker        """
262*cda5da8dSAndroid Build Coastguard Worker        Create an IncrementalDecoder instance.
263*cda5da8dSAndroid Build Coastguard Worker
264*cda5da8dSAndroid Build Coastguard Worker        The IncrementalDecoder may use different error handling schemes by
265*cda5da8dSAndroid Build Coastguard Worker        providing the errors keyword argument. See the module docstring
266*cda5da8dSAndroid Build Coastguard Worker        for a list of possible values.
267*cda5da8dSAndroid Build Coastguard Worker        """
268*cda5da8dSAndroid Build Coastguard Worker        self.errors = errors
269*cda5da8dSAndroid Build Coastguard Worker
270*cda5da8dSAndroid Build Coastguard Worker    def decode(self, input, final=False):
271*cda5da8dSAndroid Build Coastguard Worker        """
272*cda5da8dSAndroid Build Coastguard Worker        Decode input and returns the resulting object.
273*cda5da8dSAndroid Build Coastguard Worker        """
274*cda5da8dSAndroid Build Coastguard Worker        raise NotImplementedError
275*cda5da8dSAndroid Build Coastguard Worker
276*cda5da8dSAndroid Build Coastguard Worker    def reset(self):
277*cda5da8dSAndroid Build Coastguard Worker        """
278*cda5da8dSAndroid Build Coastguard Worker        Reset the decoder to the initial state.
279*cda5da8dSAndroid Build Coastguard Worker        """
280*cda5da8dSAndroid Build Coastguard Worker
281*cda5da8dSAndroid Build Coastguard Worker    def getstate(self):
282*cda5da8dSAndroid Build Coastguard Worker        """
283*cda5da8dSAndroid Build Coastguard Worker        Return the current state of the decoder.
284*cda5da8dSAndroid Build Coastguard Worker
285*cda5da8dSAndroid Build Coastguard Worker        This must be a (buffered_input, additional_state_info) tuple.
286*cda5da8dSAndroid Build Coastguard Worker        buffered_input must be a bytes object containing bytes that
287*cda5da8dSAndroid Build Coastguard Worker        were passed to decode() that have not yet been converted.
288*cda5da8dSAndroid Build Coastguard Worker        additional_state_info must be a non-negative integer
289*cda5da8dSAndroid Build Coastguard Worker        representing the state of the decoder WITHOUT yet having
290*cda5da8dSAndroid Build Coastguard Worker        processed the contents of buffered_input.  In the initial state
291*cda5da8dSAndroid Build Coastguard Worker        and after reset(), getstate() must return (b"", 0).
292*cda5da8dSAndroid Build Coastguard Worker        """
293*cda5da8dSAndroid Build Coastguard Worker        return (b"", 0)
294*cda5da8dSAndroid Build Coastguard Worker
295*cda5da8dSAndroid Build Coastguard Worker    def setstate(self, state):
296*cda5da8dSAndroid Build Coastguard Worker        """
297*cda5da8dSAndroid Build Coastguard Worker        Set the current state of the decoder.
298*cda5da8dSAndroid Build Coastguard Worker
299*cda5da8dSAndroid Build Coastguard Worker        state must have been returned by getstate().  The effect of
300*cda5da8dSAndroid Build Coastguard Worker        setstate((b"", 0)) must be equivalent to reset().
301*cda5da8dSAndroid Build Coastguard Worker        """
302*cda5da8dSAndroid Build Coastguard Worker
303*cda5da8dSAndroid Build Coastguard Workerclass BufferedIncrementalDecoder(IncrementalDecoder):
304*cda5da8dSAndroid Build Coastguard Worker    """
305*cda5da8dSAndroid Build Coastguard Worker    This subclass of IncrementalDecoder can be used as the baseclass for an
306*cda5da8dSAndroid Build Coastguard Worker    incremental decoder if the decoder must be able to handle incomplete
307*cda5da8dSAndroid Build Coastguard Worker    byte sequences.
308*cda5da8dSAndroid Build Coastguard Worker    """
309*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, errors='strict'):
310*cda5da8dSAndroid Build Coastguard Worker        IncrementalDecoder.__init__(self, errors)
311*cda5da8dSAndroid Build Coastguard Worker        # undecoded input that is kept between calls to decode()
312*cda5da8dSAndroid Build Coastguard Worker        self.buffer = b""
313*cda5da8dSAndroid Build Coastguard Worker
314*cda5da8dSAndroid Build Coastguard Worker    def _buffer_decode(self, input, errors, final):
315*cda5da8dSAndroid Build Coastguard Worker        # Overwrite this method in subclasses: It must decode input
316*cda5da8dSAndroid Build Coastguard Worker        # and return an (output, length consumed) tuple
317*cda5da8dSAndroid Build Coastguard Worker        raise NotImplementedError
318*cda5da8dSAndroid Build Coastguard Worker
319*cda5da8dSAndroid Build Coastguard Worker    def decode(self, input, final=False):
320*cda5da8dSAndroid Build Coastguard Worker        # decode input (taking the buffer into account)
321*cda5da8dSAndroid Build Coastguard Worker        data = self.buffer + input
322*cda5da8dSAndroid Build Coastguard Worker        (result, consumed) = self._buffer_decode(data, self.errors, final)
323*cda5da8dSAndroid Build Coastguard Worker        # keep undecoded input until the next call
324*cda5da8dSAndroid Build Coastguard Worker        self.buffer = data[consumed:]
325*cda5da8dSAndroid Build Coastguard Worker        return result
326*cda5da8dSAndroid Build Coastguard Worker
327*cda5da8dSAndroid Build Coastguard Worker    def reset(self):
328*cda5da8dSAndroid Build Coastguard Worker        IncrementalDecoder.reset(self)
329*cda5da8dSAndroid Build Coastguard Worker        self.buffer = b""
330*cda5da8dSAndroid Build Coastguard Worker
331*cda5da8dSAndroid Build Coastguard Worker    def getstate(self):
332*cda5da8dSAndroid Build Coastguard Worker        # additional state info is always 0
333*cda5da8dSAndroid Build Coastguard Worker        return (self.buffer, 0)
334*cda5da8dSAndroid Build Coastguard Worker
335*cda5da8dSAndroid Build Coastguard Worker    def setstate(self, state):
336*cda5da8dSAndroid Build Coastguard Worker        # ignore additional state info
337*cda5da8dSAndroid Build Coastguard Worker        self.buffer = state[0]
338*cda5da8dSAndroid Build Coastguard Worker
339*cda5da8dSAndroid Build Coastguard Worker#
340*cda5da8dSAndroid Build Coastguard Worker# The StreamWriter and StreamReader class provide generic working
341*cda5da8dSAndroid Build Coastguard Worker# interfaces which can be used to implement new encoding submodules
342*cda5da8dSAndroid Build Coastguard Worker# very easily. See encodings/utf_8.py for an example on how this is
343*cda5da8dSAndroid Build Coastguard Worker# done.
344*cda5da8dSAndroid Build Coastguard Worker#
345*cda5da8dSAndroid Build Coastguard Worker
346*cda5da8dSAndroid Build Coastguard Workerclass StreamWriter(Codec):
347*cda5da8dSAndroid Build Coastguard Worker
348*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, stream, errors='strict'):
349*cda5da8dSAndroid Build Coastguard Worker
350*cda5da8dSAndroid Build Coastguard Worker        """ Creates a StreamWriter instance.
351*cda5da8dSAndroid Build Coastguard Worker
352*cda5da8dSAndroid Build Coastguard Worker            stream must be a file-like object open for writing.
353*cda5da8dSAndroid Build Coastguard Worker
354*cda5da8dSAndroid Build Coastguard Worker            The StreamWriter may use different error handling
355*cda5da8dSAndroid Build Coastguard Worker            schemes by providing the errors keyword argument. These
356*cda5da8dSAndroid Build Coastguard Worker            parameters are predefined:
357*cda5da8dSAndroid Build Coastguard Worker
358*cda5da8dSAndroid Build Coastguard Worker             'strict' - raise a ValueError (or a subclass)
359*cda5da8dSAndroid Build Coastguard Worker             'ignore' - ignore the character and continue with the next
360*cda5da8dSAndroid Build Coastguard Worker             'replace'- replace with a suitable replacement character
361*cda5da8dSAndroid Build Coastguard Worker             'xmlcharrefreplace' - Replace with the appropriate XML
362*cda5da8dSAndroid Build Coastguard Worker                                   character reference.
363*cda5da8dSAndroid Build Coastguard Worker             'backslashreplace'  - Replace with backslashed escape
364*cda5da8dSAndroid Build Coastguard Worker                                   sequences.
365*cda5da8dSAndroid Build Coastguard Worker             'namereplace'       - Replace with \\N{...} escape sequences.
366*cda5da8dSAndroid Build Coastguard Worker
367*cda5da8dSAndroid Build Coastguard Worker            The set of allowed parameter values can be extended via
368*cda5da8dSAndroid Build Coastguard Worker            register_error.
369*cda5da8dSAndroid Build Coastguard Worker        """
370*cda5da8dSAndroid Build Coastguard Worker        self.stream = stream
371*cda5da8dSAndroid Build Coastguard Worker        self.errors = errors
372*cda5da8dSAndroid Build Coastguard Worker
373*cda5da8dSAndroid Build Coastguard Worker    def write(self, object):
374*cda5da8dSAndroid Build Coastguard Worker
375*cda5da8dSAndroid Build Coastguard Worker        """ Writes the object's contents encoded to self.stream.
376*cda5da8dSAndroid Build Coastguard Worker        """
377*cda5da8dSAndroid Build Coastguard Worker        data, consumed = self.encode(object, self.errors)
378*cda5da8dSAndroid Build Coastguard Worker        self.stream.write(data)
379*cda5da8dSAndroid Build Coastguard Worker
380*cda5da8dSAndroid Build Coastguard Worker    def writelines(self, list):
381*cda5da8dSAndroid Build Coastguard Worker
382*cda5da8dSAndroid Build Coastguard Worker        """ Writes the concatenated list of strings to the stream
383*cda5da8dSAndroid Build Coastguard Worker            using .write().
384*cda5da8dSAndroid Build Coastguard Worker        """
385*cda5da8dSAndroid Build Coastguard Worker        self.write(''.join(list))
386*cda5da8dSAndroid Build Coastguard Worker
387*cda5da8dSAndroid Build Coastguard Worker    def reset(self):
388*cda5da8dSAndroid Build Coastguard Worker
389*cda5da8dSAndroid Build Coastguard Worker        """ Resets the codec buffers used for keeping internal state.
390*cda5da8dSAndroid Build Coastguard Worker
391*cda5da8dSAndroid Build Coastguard Worker            Calling this method should ensure that the data on the
392*cda5da8dSAndroid Build Coastguard Worker            output is put into a clean state, that allows appending
393*cda5da8dSAndroid Build Coastguard Worker            of new fresh data without having to rescan the whole
394*cda5da8dSAndroid Build Coastguard Worker            stream to recover state.
395*cda5da8dSAndroid Build Coastguard Worker
396*cda5da8dSAndroid Build Coastguard Worker        """
397*cda5da8dSAndroid Build Coastguard Worker        pass
398*cda5da8dSAndroid Build Coastguard Worker
399*cda5da8dSAndroid Build Coastguard Worker    def seek(self, offset, whence=0):
400*cda5da8dSAndroid Build Coastguard Worker        self.stream.seek(offset, whence)
401*cda5da8dSAndroid Build Coastguard Worker        if whence == 0 and offset == 0:
402*cda5da8dSAndroid Build Coastguard Worker            self.reset()
403*cda5da8dSAndroid Build Coastguard Worker
404*cda5da8dSAndroid Build Coastguard Worker    def __getattr__(self, name,
405*cda5da8dSAndroid Build Coastguard Worker                    getattr=getattr):
406*cda5da8dSAndroid Build Coastguard Worker
407*cda5da8dSAndroid Build Coastguard Worker        """ Inherit all other methods from the underlying stream.
408*cda5da8dSAndroid Build Coastguard Worker        """
409*cda5da8dSAndroid Build Coastguard Worker        return getattr(self.stream, name)
410*cda5da8dSAndroid Build Coastguard Worker
411*cda5da8dSAndroid Build Coastguard Worker    def __enter__(self):
412*cda5da8dSAndroid Build Coastguard Worker        return self
413*cda5da8dSAndroid Build Coastguard Worker
414*cda5da8dSAndroid Build Coastguard Worker    def __exit__(self, type, value, tb):
415*cda5da8dSAndroid Build Coastguard Worker        self.stream.close()
416*cda5da8dSAndroid Build Coastguard Worker
417*cda5da8dSAndroid Build Coastguard Worker###
418*cda5da8dSAndroid Build Coastguard Worker
419*cda5da8dSAndroid Build Coastguard Workerclass StreamReader(Codec):
420*cda5da8dSAndroid Build Coastguard Worker
421*cda5da8dSAndroid Build Coastguard Worker    charbuffertype = str
422*cda5da8dSAndroid Build Coastguard Worker
423*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, stream, errors='strict'):
424*cda5da8dSAndroid Build Coastguard Worker
425*cda5da8dSAndroid Build Coastguard Worker        """ Creates a StreamReader instance.
426*cda5da8dSAndroid Build Coastguard Worker
427*cda5da8dSAndroid Build Coastguard Worker            stream must be a file-like object open for reading.
428*cda5da8dSAndroid Build Coastguard Worker
429*cda5da8dSAndroid Build Coastguard Worker            The StreamReader may use different error handling
430*cda5da8dSAndroid Build Coastguard Worker            schemes by providing the errors keyword argument. These
431*cda5da8dSAndroid Build Coastguard Worker            parameters are predefined:
432*cda5da8dSAndroid Build Coastguard Worker
433*cda5da8dSAndroid Build Coastguard Worker             'strict' - raise a ValueError (or a subclass)
434*cda5da8dSAndroid Build Coastguard Worker             'ignore' - ignore the character and continue with the next
435*cda5da8dSAndroid Build Coastguard Worker             'replace'- replace with a suitable replacement character
436*cda5da8dSAndroid Build Coastguard Worker             'backslashreplace' - Replace with backslashed escape sequences;
437*cda5da8dSAndroid Build Coastguard Worker
438*cda5da8dSAndroid Build Coastguard Worker            The set of allowed parameter values can be extended via
439*cda5da8dSAndroid Build Coastguard Worker            register_error.
440*cda5da8dSAndroid Build Coastguard Worker        """
441*cda5da8dSAndroid Build Coastguard Worker        self.stream = stream
442*cda5da8dSAndroid Build Coastguard Worker        self.errors = errors
443*cda5da8dSAndroid Build Coastguard Worker        self.bytebuffer = b""
444*cda5da8dSAndroid Build Coastguard Worker        self._empty_charbuffer = self.charbuffertype()
445*cda5da8dSAndroid Build Coastguard Worker        self.charbuffer = self._empty_charbuffer
446*cda5da8dSAndroid Build Coastguard Worker        self.linebuffer = None
447*cda5da8dSAndroid Build Coastguard Worker
448*cda5da8dSAndroid Build Coastguard Worker    def decode(self, input, errors='strict'):
449*cda5da8dSAndroid Build Coastguard Worker        raise NotImplementedError
450*cda5da8dSAndroid Build Coastguard Worker
451*cda5da8dSAndroid Build Coastguard Worker    def read(self, size=-1, chars=-1, firstline=False):
452*cda5da8dSAndroid Build Coastguard Worker
453*cda5da8dSAndroid Build Coastguard Worker        """ Decodes data from the stream self.stream and returns the
454*cda5da8dSAndroid Build Coastguard Worker            resulting object.
455*cda5da8dSAndroid Build Coastguard Worker
456*cda5da8dSAndroid Build Coastguard Worker            chars indicates the number of decoded code points or bytes to
457*cda5da8dSAndroid Build Coastguard Worker            return. read() will never return more data than requested,
458*cda5da8dSAndroid Build Coastguard Worker            but it might return less, if there is not enough available.
459*cda5da8dSAndroid Build Coastguard Worker
460*cda5da8dSAndroid Build Coastguard Worker            size indicates the approximate maximum number of decoded
461*cda5da8dSAndroid Build Coastguard Worker            bytes or code points to read for decoding. The decoder
462*cda5da8dSAndroid Build Coastguard Worker            can modify this setting as appropriate. The default value
463*cda5da8dSAndroid Build Coastguard Worker            -1 indicates to read and decode as much as possible.  size
464*cda5da8dSAndroid Build Coastguard Worker            is intended to prevent having to decode huge files in one
465*cda5da8dSAndroid Build Coastguard Worker            step.
466*cda5da8dSAndroid Build Coastguard Worker
467*cda5da8dSAndroid Build Coastguard Worker            If firstline is true, and a UnicodeDecodeError happens
468*cda5da8dSAndroid Build Coastguard Worker            after the first line terminator in the input only the first line
469*cda5da8dSAndroid Build Coastguard Worker            will be returned, the rest of the input will be kept until the
470*cda5da8dSAndroid Build Coastguard Worker            next call to read().
471*cda5da8dSAndroid Build Coastguard Worker
472*cda5da8dSAndroid Build Coastguard Worker            The method should use a greedy read strategy, meaning that
473*cda5da8dSAndroid Build Coastguard Worker            it should read as much data as is allowed within the
474*cda5da8dSAndroid Build Coastguard Worker            definition of the encoding and the given size, e.g.  if
475*cda5da8dSAndroid Build Coastguard Worker            optional encoding endings or state markers are available
476*cda5da8dSAndroid Build Coastguard Worker            on the stream, these should be read too.
477*cda5da8dSAndroid Build Coastguard Worker        """
478*cda5da8dSAndroid Build Coastguard Worker        # If we have lines cached, first merge them back into characters
479*cda5da8dSAndroid Build Coastguard Worker        if self.linebuffer:
480*cda5da8dSAndroid Build Coastguard Worker            self.charbuffer = self._empty_charbuffer.join(self.linebuffer)
481*cda5da8dSAndroid Build Coastguard Worker            self.linebuffer = None
482*cda5da8dSAndroid Build Coastguard Worker
483*cda5da8dSAndroid Build Coastguard Worker        if chars < 0:
484*cda5da8dSAndroid Build Coastguard Worker            # For compatibility with other read() methods that take a
485*cda5da8dSAndroid Build Coastguard Worker            # single argument
486*cda5da8dSAndroid Build Coastguard Worker            chars = size
487*cda5da8dSAndroid Build Coastguard Worker
488*cda5da8dSAndroid Build Coastguard Worker        # read until we get the required number of characters (if available)
489*cda5da8dSAndroid Build Coastguard Worker        while True:
490*cda5da8dSAndroid Build Coastguard Worker            # can the request be satisfied from the character buffer?
491*cda5da8dSAndroid Build Coastguard Worker            if chars >= 0:
492*cda5da8dSAndroid Build Coastguard Worker                if len(self.charbuffer) >= chars:
493*cda5da8dSAndroid Build Coastguard Worker                    break
494*cda5da8dSAndroid Build Coastguard Worker            # we need more data
495*cda5da8dSAndroid Build Coastguard Worker            if size < 0:
496*cda5da8dSAndroid Build Coastguard Worker                newdata = self.stream.read()
497*cda5da8dSAndroid Build Coastguard Worker            else:
498*cda5da8dSAndroid Build Coastguard Worker                newdata = self.stream.read(size)
499*cda5da8dSAndroid Build Coastguard Worker            # decode bytes (those remaining from the last call included)
500*cda5da8dSAndroid Build Coastguard Worker            data = self.bytebuffer + newdata
501*cda5da8dSAndroid Build Coastguard Worker            if not data:
502*cda5da8dSAndroid Build Coastguard Worker                break
503*cda5da8dSAndroid Build Coastguard Worker            try:
504*cda5da8dSAndroid Build Coastguard Worker                newchars, decodedbytes = self.decode(data, self.errors)
505*cda5da8dSAndroid Build Coastguard Worker            except UnicodeDecodeError as exc:
506*cda5da8dSAndroid Build Coastguard Worker                if firstline:
507*cda5da8dSAndroid Build Coastguard Worker                    newchars, decodedbytes = \
508*cda5da8dSAndroid Build Coastguard Worker                        self.decode(data[:exc.start], self.errors)
509*cda5da8dSAndroid Build Coastguard Worker                    lines = newchars.splitlines(keepends=True)
510*cda5da8dSAndroid Build Coastguard Worker                    if len(lines)<=1:
511*cda5da8dSAndroid Build Coastguard Worker                        raise
512*cda5da8dSAndroid Build Coastguard Worker                else:
513*cda5da8dSAndroid Build Coastguard Worker                    raise
514*cda5da8dSAndroid Build Coastguard Worker            # keep undecoded bytes until the next call
515*cda5da8dSAndroid Build Coastguard Worker            self.bytebuffer = data[decodedbytes:]
516*cda5da8dSAndroid Build Coastguard Worker            # put new characters in the character buffer
517*cda5da8dSAndroid Build Coastguard Worker            self.charbuffer += newchars
518*cda5da8dSAndroid Build Coastguard Worker            # there was no data available
519*cda5da8dSAndroid Build Coastguard Worker            if not newdata:
520*cda5da8dSAndroid Build Coastguard Worker                break
521*cda5da8dSAndroid Build Coastguard Worker        if chars < 0:
522*cda5da8dSAndroid Build Coastguard Worker            # Return everything we've got
523*cda5da8dSAndroid Build Coastguard Worker            result = self.charbuffer
524*cda5da8dSAndroid Build Coastguard Worker            self.charbuffer = self._empty_charbuffer
525*cda5da8dSAndroid Build Coastguard Worker        else:
526*cda5da8dSAndroid Build Coastguard Worker            # Return the first chars characters
527*cda5da8dSAndroid Build Coastguard Worker            result = self.charbuffer[:chars]
528*cda5da8dSAndroid Build Coastguard Worker            self.charbuffer = self.charbuffer[chars:]
529*cda5da8dSAndroid Build Coastguard Worker        return result
530*cda5da8dSAndroid Build Coastguard Worker
531*cda5da8dSAndroid Build Coastguard Worker    def readline(self, size=None, keepends=True):
532*cda5da8dSAndroid Build Coastguard Worker
533*cda5da8dSAndroid Build Coastguard Worker        """ Read one line from the input stream and return the
534*cda5da8dSAndroid Build Coastguard Worker            decoded data.
535*cda5da8dSAndroid Build Coastguard Worker
536*cda5da8dSAndroid Build Coastguard Worker            size, if given, is passed as size argument to the
537*cda5da8dSAndroid Build Coastguard Worker            read() method.
538*cda5da8dSAndroid Build Coastguard Worker
539*cda5da8dSAndroid Build Coastguard Worker        """
540*cda5da8dSAndroid Build Coastguard Worker        # If we have lines cached from an earlier read, return
541*cda5da8dSAndroid Build Coastguard Worker        # them unconditionally
542*cda5da8dSAndroid Build Coastguard Worker        if self.linebuffer:
543*cda5da8dSAndroid Build Coastguard Worker            line = self.linebuffer[0]
544*cda5da8dSAndroid Build Coastguard Worker            del self.linebuffer[0]
545*cda5da8dSAndroid Build Coastguard Worker            if len(self.linebuffer) == 1:
546*cda5da8dSAndroid Build Coastguard Worker                # revert to charbuffer mode; we might need more data
547*cda5da8dSAndroid Build Coastguard Worker                # next time
548*cda5da8dSAndroid Build Coastguard Worker                self.charbuffer = self.linebuffer[0]
549*cda5da8dSAndroid Build Coastguard Worker                self.linebuffer = None
550*cda5da8dSAndroid Build Coastguard Worker            if not keepends:
551*cda5da8dSAndroid Build Coastguard Worker                line = line.splitlines(keepends=False)[0]
552*cda5da8dSAndroid Build Coastguard Worker            return line
553*cda5da8dSAndroid Build Coastguard Worker
554*cda5da8dSAndroid Build Coastguard Worker        readsize = size or 72
555*cda5da8dSAndroid Build Coastguard Worker        line = self._empty_charbuffer
556*cda5da8dSAndroid Build Coastguard Worker        # If size is given, we call read() only once
557*cda5da8dSAndroid Build Coastguard Worker        while True:
558*cda5da8dSAndroid Build Coastguard Worker            data = self.read(readsize, firstline=True)
559*cda5da8dSAndroid Build Coastguard Worker            if data:
560*cda5da8dSAndroid Build Coastguard Worker                # If we're at a "\r" read one extra character (which might
561*cda5da8dSAndroid Build Coastguard Worker                # be a "\n") to get a proper line ending. If the stream is
562*cda5da8dSAndroid Build Coastguard Worker                # temporarily exhausted we return the wrong line ending.
563*cda5da8dSAndroid Build Coastguard Worker                if (isinstance(data, str) and data.endswith("\r")) or \
564*cda5da8dSAndroid Build Coastguard Worker                   (isinstance(data, bytes) and data.endswith(b"\r")):
565*cda5da8dSAndroid Build Coastguard Worker                    data += self.read(size=1, chars=1)
566*cda5da8dSAndroid Build Coastguard Worker
567*cda5da8dSAndroid Build Coastguard Worker            line += data
568*cda5da8dSAndroid Build Coastguard Worker            lines = line.splitlines(keepends=True)
569*cda5da8dSAndroid Build Coastguard Worker            if lines:
570*cda5da8dSAndroid Build Coastguard Worker                if len(lines) > 1:
571*cda5da8dSAndroid Build Coastguard Worker                    # More than one line result; the first line is a full line
572*cda5da8dSAndroid Build Coastguard Worker                    # to return
573*cda5da8dSAndroid Build Coastguard Worker                    line = lines[0]
574*cda5da8dSAndroid Build Coastguard Worker                    del lines[0]
575*cda5da8dSAndroid Build Coastguard Worker                    if len(lines) > 1:
576*cda5da8dSAndroid Build Coastguard Worker                        # cache the remaining lines
577*cda5da8dSAndroid Build Coastguard Worker                        lines[-1] += self.charbuffer
578*cda5da8dSAndroid Build Coastguard Worker                        self.linebuffer = lines
579*cda5da8dSAndroid Build Coastguard Worker                        self.charbuffer = None
580*cda5da8dSAndroid Build Coastguard Worker                    else:
581*cda5da8dSAndroid Build Coastguard Worker                        # only one remaining line, put it back into charbuffer
582*cda5da8dSAndroid Build Coastguard Worker                        self.charbuffer = lines[0] + self.charbuffer
583*cda5da8dSAndroid Build Coastguard Worker                    if not keepends:
584*cda5da8dSAndroid Build Coastguard Worker                        line = line.splitlines(keepends=False)[0]
585*cda5da8dSAndroid Build Coastguard Worker                    break
586*cda5da8dSAndroid Build Coastguard Worker                line0withend = lines[0]
587*cda5da8dSAndroid Build Coastguard Worker                line0withoutend = lines[0].splitlines(keepends=False)[0]
588*cda5da8dSAndroid Build Coastguard Worker                if line0withend != line0withoutend: # We really have a line end
589*cda5da8dSAndroid Build Coastguard Worker                    # Put the rest back together and keep it until the next call
590*cda5da8dSAndroid Build Coastguard Worker                    self.charbuffer = self._empty_charbuffer.join(lines[1:]) + \
591*cda5da8dSAndroid Build Coastguard Worker                                      self.charbuffer
592*cda5da8dSAndroid Build Coastguard Worker                    if keepends:
593*cda5da8dSAndroid Build Coastguard Worker                        line = line0withend
594*cda5da8dSAndroid Build Coastguard Worker                    else:
595*cda5da8dSAndroid Build Coastguard Worker                        line = line0withoutend
596*cda5da8dSAndroid Build Coastguard Worker                    break
597*cda5da8dSAndroid Build Coastguard Worker            # we didn't get anything or this was our only try
598*cda5da8dSAndroid Build Coastguard Worker            if not data or size is not None:
599*cda5da8dSAndroid Build Coastguard Worker                if line and not keepends:
600*cda5da8dSAndroid Build Coastguard Worker                    line = line.splitlines(keepends=False)[0]
601*cda5da8dSAndroid Build Coastguard Worker                break
602*cda5da8dSAndroid Build Coastguard Worker            if readsize < 8000:
603*cda5da8dSAndroid Build Coastguard Worker                readsize *= 2
604*cda5da8dSAndroid Build Coastguard Worker        return line
605*cda5da8dSAndroid Build Coastguard Worker
606*cda5da8dSAndroid Build Coastguard Worker    def readlines(self, sizehint=None, keepends=True):
607*cda5da8dSAndroid Build Coastguard Worker
608*cda5da8dSAndroid Build Coastguard Worker        """ Read all lines available on the input stream
609*cda5da8dSAndroid Build Coastguard Worker            and return them as a list.
610*cda5da8dSAndroid Build Coastguard Worker
611*cda5da8dSAndroid Build Coastguard Worker            Line breaks are implemented using the codec's decoder
612*cda5da8dSAndroid Build Coastguard Worker            method and are included in the list entries.
613*cda5da8dSAndroid Build Coastguard Worker
614*cda5da8dSAndroid Build Coastguard Worker            sizehint, if given, is ignored since there is no efficient
615*cda5da8dSAndroid Build Coastguard Worker            way to finding the true end-of-line.
616*cda5da8dSAndroid Build Coastguard Worker
617*cda5da8dSAndroid Build Coastguard Worker        """
618*cda5da8dSAndroid Build Coastguard Worker        data = self.read()
619*cda5da8dSAndroid Build Coastguard Worker        return data.splitlines(keepends)
620*cda5da8dSAndroid Build Coastguard Worker
621*cda5da8dSAndroid Build Coastguard Worker    def reset(self):
622*cda5da8dSAndroid Build Coastguard Worker
623*cda5da8dSAndroid Build Coastguard Worker        """ Resets the codec buffers used for keeping internal state.
624*cda5da8dSAndroid Build Coastguard Worker
625*cda5da8dSAndroid Build Coastguard Worker            Note that no stream repositioning should take place.
626*cda5da8dSAndroid Build Coastguard Worker            This method is primarily intended to be able to recover
627*cda5da8dSAndroid Build Coastguard Worker            from decoding errors.
628*cda5da8dSAndroid Build Coastguard Worker
629*cda5da8dSAndroid Build Coastguard Worker        """
630*cda5da8dSAndroid Build Coastguard Worker        self.bytebuffer = b""
631*cda5da8dSAndroid Build Coastguard Worker        self.charbuffer = self._empty_charbuffer
632*cda5da8dSAndroid Build Coastguard Worker        self.linebuffer = None
633*cda5da8dSAndroid Build Coastguard Worker
634*cda5da8dSAndroid Build Coastguard Worker    def seek(self, offset, whence=0):
635*cda5da8dSAndroid Build Coastguard Worker        """ Set the input stream's current position.
636*cda5da8dSAndroid Build Coastguard Worker
637*cda5da8dSAndroid Build Coastguard Worker            Resets the codec buffers used for keeping state.
638*cda5da8dSAndroid Build Coastguard Worker        """
639*cda5da8dSAndroid Build Coastguard Worker        self.stream.seek(offset, whence)
640*cda5da8dSAndroid Build Coastguard Worker        self.reset()
641*cda5da8dSAndroid Build Coastguard Worker
642*cda5da8dSAndroid Build Coastguard Worker    def __next__(self):
643*cda5da8dSAndroid Build Coastguard Worker
644*cda5da8dSAndroid Build Coastguard Worker        """ Return the next decoded line from the input stream."""
645*cda5da8dSAndroid Build Coastguard Worker        line = self.readline()
646*cda5da8dSAndroid Build Coastguard Worker        if line:
647*cda5da8dSAndroid Build Coastguard Worker            return line
648*cda5da8dSAndroid Build Coastguard Worker        raise StopIteration
649*cda5da8dSAndroid Build Coastguard Worker
650*cda5da8dSAndroid Build Coastguard Worker    def __iter__(self):
651*cda5da8dSAndroid Build Coastguard Worker        return self
652*cda5da8dSAndroid Build Coastguard Worker
653*cda5da8dSAndroid Build Coastguard Worker    def __getattr__(self, name,
654*cda5da8dSAndroid Build Coastguard Worker                    getattr=getattr):
655*cda5da8dSAndroid Build Coastguard Worker
656*cda5da8dSAndroid Build Coastguard Worker        """ Inherit all other methods from the underlying stream.
657*cda5da8dSAndroid Build Coastguard Worker        """
658*cda5da8dSAndroid Build Coastguard Worker        return getattr(self.stream, name)
659*cda5da8dSAndroid Build Coastguard Worker
660*cda5da8dSAndroid Build Coastguard Worker    def __enter__(self):
661*cda5da8dSAndroid Build Coastguard Worker        return self
662*cda5da8dSAndroid Build Coastguard Worker
663*cda5da8dSAndroid Build Coastguard Worker    def __exit__(self, type, value, tb):
664*cda5da8dSAndroid Build Coastguard Worker        self.stream.close()
665*cda5da8dSAndroid Build Coastguard Worker
666*cda5da8dSAndroid Build Coastguard Worker###
667*cda5da8dSAndroid Build Coastguard Worker
668*cda5da8dSAndroid Build Coastguard Workerclass StreamReaderWriter:
669*cda5da8dSAndroid Build Coastguard Worker
670*cda5da8dSAndroid Build Coastguard Worker    """ StreamReaderWriter instances allow wrapping streams which
671*cda5da8dSAndroid Build Coastguard Worker        work in both read and write modes.
672*cda5da8dSAndroid Build Coastguard Worker
673*cda5da8dSAndroid Build Coastguard Worker        The design is such that one can use the factory functions
674*cda5da8dSAndroid Build Coastguard Worker        returned by the codec.lookup() function to construct the
675*cda5da8dSAndroid Build Coastguard Worker        instance.
676*cda5da8dSAndroid Build Coastguard Worker
677*cda5da8dSAndroid Build Coastguard Worker    """
678*cda5da8dSAndroid Build Coastguard Worker    # Optional attributes set by the file wrappers below
679*cda5da8dSAndroid Build Coastguard Worker    encoding = 'unknown'
680*cda5da8dSAndroid Build Coastguard Worker
681*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, stream, Reader, Writer, errors='strict'):
682*cda5da8dSAndroid Build Coastguard Worker
683*cda5da8dSAndroid Build Coastguard Worker        """ Creates a StreamReaderWriter instance.
684*cda5da8dSAndroid Build Coastguard Worker
685*cda5da8dSAndroid Build Coastguard Worker            stream must be a Stream-like object.
686*cda5da8dSAndroid Build Coastguard Worker
687*cda5da8dSAndroid Build Coastguard Worker            Reader, Writer must be factory functions or classes
688*cda5da8dSAndroid Build Coastguard Worker            providing the StreamReader, StreamWriter interface resp.
689*cda5da8dSAndroid Build Coastguard Worker
690*cda5da8dSAndroid Build Coastguard Worker            Error handling is done in the same way as defined for the
691*cda5da8dSAndroid Build Coastguard Worker            StreamWriter/Readers.
692*cda5da8dSAndroid Build Coastguard Worker
693*cda5da8dSAndroid Build Coastguard Worker        """
694*cda5da8dSAndroid Build Coastguard Worker        self.stream = stream
695*cda5da8dSAndroid Build Coastguard Worker        self.reader = Reader(stream, errors)
696*cda5da8dSAndroid Build Coastguard Worker        self.writer = Writer(stream, errors)
697*cda5da8dSAndroid Build Coastguard Worker        self.errors = errors
698*cda5da8dSAndroid Build Coastguard Worker
699*cda5da8dSAndroid Build Coastguard Worker    def read(self, size=-1):
700*cda5da8dSAndroid Build Coastguard Worker
701*cda5da8dSAndroid Build Coastguard Worker        return self.reader.read(size)
702*cda5da8dSAndroid Build Coastguard Worker
703*cda5da8dSAndroid Build Coastguard Worker    def readline(self, size=None):
704*cda5da8dSAndroid Build Coastguard Worker
705*cda5da8dSAndroid Build Coastguard Worker        return self.reader.readline(size)
706*cda5da8dSAndroid Build Coastguard Worker
707*cda5da8dSAndroid Build Coastguard Worker    def readlines(self, sizehint=None):
708*cda5da8dSAndroid Build Coastguard Worker
709*cda5da8dSAndroid Build Coastguard Worker        return self.reader.readlines(sizehint)
710*cda5da8dSAndroid Build Coastguard Worker
711*cda5da8dSAndroid Build Coastguard Worker    def __next__(self):
712*cda5da8dSAndroid Build Coastguard Worker
713*cda5da8dSAndroid Build Coastguard Worker        """ Return the next decoded line from the input stream."""
714*cda5da8dSAndroid Build Coastguard Worker        return next(self.reader)
715*cda5da8dSAndroid Build Coastguard Worker
716*cda5da8dSAndroid Build Coastguard Worker    def __iter__(self):
717*cda5da8dSAndroid Build Coastguard Worker        return self
718*cda5da8dSAndroid Build Coastguard Worker
719*cda5da8dSAndroid Build Coastguard Worker    def write(self, data):
720*cda5da8dSAndroid Build Coastguard Worker
721*cda5da8dSAndroid Build Coastguard Worker        return self.writer.write(data)
722*cda5da8dSAndroid Build Coastguard Worker
723*cda5da8dSAndroid Build Coastguard Worker    def writelines(self, list):
724*cda5da8dSAndroid Build Coastguard Worker
725*cda5da8dSAndroid Build Coastguard Worker        return self.writer.writelines(list)
726*cda5da8dSAndroid Build Coastguard Worker
727*cda5da8dSAndroid Build Coastguard Worker    def reset(self):
728*cda5da8dSAndroid Build Coastguard Worker
729*cda5da8dSAndroid Build Coastguard Worker        self.reader.reset()
730*cda5da8dSAndroid Build Coastguard Worker        self.writer.reset()
731*cda5da8dSAndroid Build Coastguard Worker
732*cda5da8dSAndroid Build Coastguard Worker    def seek(self, offset, whence=0):
733*cda5da8dSAndroid Build Coastguard Worker        self.stream.seek(offset, whence)
734*cda5da8dSAndroid Build Coastguard Worker        self.reader.reset()
735*cda5da8dSAndroid Build Coastguard Worker        if whence == 0 and offset == 0:
736*cda5da8dSAndroid Build Coastguard Worker            self.writer.reset()
737*cda5da8dSAndroid Build Coastguard Worker
738*cda5da8dSAndroid Build Coastguard Worker    def __getattr__(self, name,
739*cda5da8dSAndroid Build Coastguard Worker                    getattr=getattr):
740*cda5da8dSAndroid Build Coastguard Worker
741*cda5da8dSAndroid Build Coastguard Worker        """ Inherit all other methods from the underlying stream.
742*cda5da8dSAndroid Build Coastguard Worker        """
743*cda5da8dSAndroid Build Coastguard Worker        return getattr(self.stream, name)
744*cda5da8dSAndroid Build Coastguard Worker
745*cda5da8dSAndroid Build Coastguard Worker    # these are needed to make "with StreamReaderWriter(...)" work properly
746*cda5da8dSAndroid Build Coastguard Worker
747*cda5da8dSAndroid Build Coastguard Worker    def __enter__(self):
748*cda5da8dSAndroid Build Coastguard Worker        return self
749*cda5da8dSAndroid Build Coastguard Worker
750*cda5da8dSAndroid Build Coastguard Worker    def __exit__(self, type, value, tb):
751*cda5da8dSAndroid Build Coastguard Worker        self.stream.close()
752*cda5da8dSAndroid Build Coastguard Worker
753*cda5da8dSAndroid Build Coastguard Worker###
754*cda5da8dSAndroid Build Coastguard Worker
755*cda5da8dSAndroid Build Coastguard Workerclass StreamRecoder:
756*cda5da8dSAndroid Build Coastguard Worker
757*cda5da8dSAndroid Build Coastguard Worker    """ StreamRecoder instances translate data from one encoding to another.
758*cda5da8dSAndroid Build Coastguard Worker
759*cda5da8dSAndroid Build Coastguard Worker        They use the complete set of APIs returned by the
760*cda5da8dSAndroid Build Coastguard Worker        codecs.lookup() function to implement their task.
761*cda5da8dSAndroid Build Coastguard Worker
762*cda5da8dSAndroid Build Coastguard Worker        Data written to the StreamRecoder is first decoded into an
763*cda5da8dSAndroid Build Coastguard Worker        intermediate format (depending on the "decode" codec) and then
764*cda5da8dSAndroid Build Coastguard Worker        written to the underlying stream using an instance of the provided
765*cda5da8dSAndroid Build Coastguard Worker        Writer class.
766*cda5da8dSAndroid Build Coastguard Worker
767*cda5da8dSAndroid Build Coastguard Worker        In the other direction, data is read from the underlying stream using
768*cda5da8dSAndroid Build Coastguard Worker        a Reader instance and then encoded and returned to the caller.
769*cda5da8dSAndroid Build Coastguard Worker
770*cda5da8dSAndroid Build Coastguard Worker    """
771*cda5da8dSAndroid Build Coastguard Worker    # Optional attributes set by the file wrappers below
772*cda5da8dSAndroid Build Coastguard Worker    data_encoding = 'unknown'
773*cda5da8dSAndroid Build Coastguard Worker    file_encoding = 'unknown'
774*cda5da8dSAndroid Build Coastguard Worker
775*cda5da8dSAndroid Build Coastguard Worker    def __init__(self, stream, encode, decode, Reader, Writer,
776*cda5da8dSAndroid Build Coastguard Worker                 errors='strict'):
777*cda5da8dSAndroid Build Coastguard Worker
778*cda5da8dSAndroid Build Coastguard Worker        """ Creates a StreamRecoder instance which implements a two-way
779*cda5da8dSAndroid Build Coastguard Worker            conversion: encode and decode work on the frontend (the
780*cda5da8dSAndroid Build Coastguard Worker            data visible to .read() and .write()) while Reader and Writer
781*cda5da8dSAndroid Build Coastguard Worker            work on the backend (the data in stream).
782*cda5da8dSAndroid Build Coastguard Worker
783*cda5da8dSAndroid Build Coastguard Worker            You can use these objects to do transparent
784*cda5da8dSAndroid Build Coastguard Worker            transcodings from e.g. latin-1 to utf-8 and back.
785*cda5da8dSAndroid Build Coastguard Worker
786*cda5da8dSAndroid Build Coastguard Worker            stream must be a file-like object.
787*cda5da8dSAndroid Build Coastguard Worker
788*cda5da8dSAndroid Build Coastguard Worker            encode and decode must adhere to the Codec interface; Reader and
789*cda5da8dSAndroid Build Coastguard Worker            Writer must be factory functions or classes providing the
790*cda5da8dSAndroid Build Coastguard Worker            StreamReader and StreamWriter interfaces resp.
791*cda5da8dSAndroid Build Coastguard Worker
792*cda5da8dSAndroid Build Coastguard Worker            Error handling is done in the same way as defined for the
793*cda5da8dSAndroid Build Coastguard Worker            StreamWriter/Readers.
794*cda5da8dSAndroid Build Coastguard Worker
795*cda5da8dSAndroid Build Coastguard Worker        """
796*cda5da8dSAndroid Build Coastguard Worker        self.stream = stream
797*cda5da8dSAndroid Build Coastguard Worker        self.encode = encode
798*cda5da8dSAndroid Build Coastguard Worker        self.decode = decode
799*cda5da8dSAndroid Build Coastguard Worker        self.reader = Reader(stream, errors)
800*cda5da8dSAndroid Build Coastguard Worker        self.writer = Writer(stream, errors)
801*cda5da8dSAndroid Build Coastguard Worker        self.errors = errors
802*cda5da8dSAndroid Build Coastguard Worker
803*cda5da8dSAndroid Build Coastguard Worker    def read(self, size=-1):
804*cda5da8dSAndroid Build Coastguard Worker
805*cda5da8dSAndroid Build Coastguard Worker        data = self.reader.read(size)
806*cda5da8dSAndroid Build Coastguard Worker        data, bytesencoded = self.encode(data, self.errors)
807*cda5da8dSAndroid Build Coastguard Worker        return data
808*cda5da8dSAndroid Build Coastguard Worker
809*cda5da8dSAndroid Build Coastguard Worker    def readline(self, size=None):
810*cda5da8dSAndroid Build Coastguard Worker
811*cda5da8dSAndroid Build Coastguard Worker        if size is None:
812*cda5da8dSAndroid Build Coastguard Worker            data = self.reader.readline()
813*cda5da8dSAndroid Build Coastguard Worker        else:
814*cda5da8dSAndroid Build Coastguard Worker            data = self.reader.readline(size)
815*cda5da8dSAndroid Build Coastguard Worker        data, bytesencoded = self.encode(data, self.errors)
816*cda5da8dSAndroid Build Coastguard Worker        return data
817*cda5da8dSAndroid Build Coastguard Worker
818*cda5da8dSAndroid Build Coastguard Worker    def readlines(self, sizehint=None):
819*cda5da8dSAndroid Build Coastguard Worker
820*cda5da8dSAndroid Build Coastguard Worker        data = self.reader.read()
821*cda5da8dSAndroid Build Coastguard Worker        data, bytesencoded = self.encode(data, self.errors)
822*cda5da8dSAndroid Build Coastguard Worker        return data.splitlines(keepends=True)
823*cda5da8dSAndroid Build Coastguard Worker
824*cda5da8dSAndroid Build Coastguard Worker    def __next__(self):
825*cda5da8dSAndroid Build Coastguard Worker
826*cda5da8dSAndroid Build Coastguard Worker        """ Return the next decoded line from the input stream."""
827*cda5da8dSAndroid Build Coastguard Worker        data = next(self.reader)
828*cda5da8dSAndroid Build Coastguard Worker        data, bytesencoded = self.encode(data, self.errors)
829*cda5da8dSAndroid Build Coastguard Worker        return data
830*cda5da8dSAndroid Build Coastguard Worker
831*cda5da8dSAndroid Build Coastguard Worker    def __iter__(self):
832*cda5da8dSAndroid Build Coastguard Worker        return self
833*cda5da8dSAndroid Build Coastguard Worker
834*cda5da8dSAndroid Build Coastguard Worker    def write(self, data):
835*cda5da8dSAndroid Build Coastguard Worker
836*cda5da8dSAndroid Build Coastguard Worker        data, bytesdecoded = self.decode(data, self.errors)
837*cda5da8dSAndroid Build Coastguard Worker        return self.writer.write(data)
838*cda5da8dSAndroid Build Coastguard Worker
839*cda5da8dSAndroid Build Coastguard Worker    def writelines(self, list):
840*cda5da8dSAndroid Build Coastguard Worker
841*cda5da8dSAndroid Build Coastguard Worker        data = b''.join(list)
842*cda5da8dSAndroid Build Coastguard Worker        data, bytesdecoded = self.decode(data, self.errors)
843*cda5da8dSAndroid Build Coastguard Worker        return self.writer.write(data)
844*cda5da8dSAndroid Build Coastguard Worker
845*cda5da8dSAndroid Build Coastguard Worker    def reset(self):
846*cda5da8dSAndroid Build Coastguard Worker
847*cda5da8dSAndroid Build Coastguard Worker        self.reader.reset()
848*cda5da8dSAndroid Build Coastguard Worker        self.writer.reset()
849*cda5da8dSAndroid Build Coastguard Worker
850*cda5da8dSAndroid Build Coastguard Worker    def seek(self, offset, whence=0):
851*cda5da8dSAndroid Build Coastguard Worker        # Seeks must be propagated to both the readers and writers
852*cda5da8dSAndroid Build Coastguard Worker        # as they might need to reset their internal buffers.
853*cda5da8dSAndroid Build Coastguard Worker        self.reader.seek(offset, whence)
854*cda5da8dSAndroid Build Coastguard Worker        self.writer.seek(offset, whence)
855*cda5da8dSAndroid Build Coastguard Worker
856*cda5da8dSAndroid Build Coastguard Worker    def __getattr__(self, name,
857*cda5da8dSAndroid Build Coastguard Worker                    getattr=getattr):
858*cda5da8dSAndroid Build Coastguard Worker
859*cda5da8dSAndroid Build Coastguard Worker        """ Inherit all other methods from the underlying stream.
860*cda5da8dSAndroid Build Coastguard Worker        """
861*cda5da8dSAndroid Build Coastguard Worker        return getattr(self.stream, name)
862*cda5da8dSAndroid Build Coastguard Worker
863*cda5da8dSAndroid Build Coastguard Worker    def __enter__(self):
864*cda5da8dSAndroid Build Coastguard Worker        return self
865*cda5da8dSAndroid Build Coastguard Worker
866*cda5da8dSAndroid Build Coastguard Worker    def __exit__(self, type, value, tb):
867*cda5da8dSAndroid Build Coastguard Worker        self.stream.close()
868*cda5da8dSAndroid Build Coastguard Worker
869*cda5da8dSAndroid Build Coastguard Worker### Shortcuts
870*cda5da8dSAndroid Build Coastguard Worker
871*cda5da8dSAndroid Build Coastguard Workerdef open(filename, mode='r', encoding=None, errors='strict', buffering=-1):
872*cda5da8dSAndroid Build Coastguard Worker
873*cda5da8dSAndroid Build Coastguard Worker    """ Open an encoded file using the given mode and return
874*cda5da8dSAndroid Build Coastguard Worker        a wrapped version providing transparent encoding/decoding.
875*cda5da8dSAndroid Build Coastguard Worker
876*cda5da8dSAndroid Build Coastguard Worker        Note: The wrapped version will only accept the object format
877*cda5da8dSAndroid Build Coastguard Worker        defined by the codecs, i.e. Unicode objects for most builtin
878*cda5da8dSAndroid Build Coastguard Worker        codecs. Output is also codec dependent and will usually be
879*cda5da8dSAndroid Build Coastguard Worker        Unicode as well.
880*cda5da8dSAndroid Build Coastguard Worker
881*cda5da8dSAndroid Build Coastguard Worker        If encoding is not None, then the
882*cda5da8dSAndroid Build Coastguard Worker        underlying encoded files are always opened in binary mode.
883*cda5da8dSAndroid Build Coastguard Worker        The default file mode is 'r', meaning to open the file in read mode.
884*cda5da8dSAndroid Build Coastguard Worker
885*cda5da8dSAndroid Build Coastguard Worker        encoding specifies the encoding which is to be used for the
886*cda5da8dSAndroid Build Coastguard Worker        file.
887*cda5da8dSAndroid Build Coastguard Worker
888*cda5da8dSAndroid Build Coastguard Worker        errors may be given to define the error handling. It defaults
889*cda5da8dSAndroid Build Coastguard Worker        to 'strict' which causes ValueErrors to be raised in case an
890*cda5da8dSAndroid Build Coastguard Worker        encoding error occurs.
891*cda5da8dSAndroid Build Coastguard Worker
892*cda5da8dSAndroid Build Coastguard Worker        buffering has the same meaning as for the builtin open() API.
893*cda5da8dSAndroid Build Coastguard Worker        It defaults to -1 which means that the default buffer size will
894*cda5da8dSAndroid Build Coastguard Worker        be used.
895*cda5da8dSAndroid Build Coastguard Worker
896*cda5da8dSAndroid Build Coastguard Worker        The returned wrapped file object provides an extra attribute
897*cda5da8dSAndroid Build Coastguard Worker        .encoding which allows querying the used encoding. This
898*cda5da8dSAndroid Build Coastguard Worker        attribute is only available if an encoding was specified as
899*cda5da8dSAndroid Build Coastguard Worker        parameter.
900*cda5da8dSAndroid Build Coastguard Worker
901*cda5da8dSAndroid Build Coastguard Worker    """
902*cda5da8dSAndroid Build Coastguard Worker    if encoding is not None and \
903*cda5da8dSAndroid Build Coastguard Worker       'b' not in mode:
904*cda5da8dSAndroid Build Coastguard Worker        # Force opening of the file in binary mode
905*cda5da8dSAndroid Build Coastguard Worker        mode = mode + 'b'
906*cda5da8dSAndroid Build Coastguard Worker    file = builtins.open(filename, mode, buffering)
907*cda5da8dSAndroid Build Coastguard Worker    if encoding is None:
908*cda5da8dSAndroid Build Coastguard Worker        return file
909*cda5da8dSAndroid Build Coastguard Worker
910*cda5da8dSAndroid Build Coastguard Worker    try:
911*cda5da8dSAndroid Build Coastguard Worker        info = lookup(encoding)
912*cda5da8dSAndroid Build Coastguard Worker        srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors)
913*cda5da8dSAndroid Build Coastguard Worker        # Add attributes to simplify introspection
914*cda5da8dSAndroid Build Coastguard Worker        srw.encoding = encoding
915*cda5da8dSAndroid Build Coastguard Worker        return srw
916*cda5da8dSAndroid Build Coastguard Worker    except:
917*cda5da8dSAndroid Build Coastguard Worker        file.close()
918*cda5da8dSAndroid Build Coastguard Worker        raise
919*cda5da8dSAndroid Build Coastguard Worker
920*cda5da8dSAndroid Build Coastguard Workerdef EncodedFile(file, data_encoding, file_encoding=None, errors='strict'):
921*cda5da8dSAndroid Build Coastguard Worker
922*cda5da8dSAndroid Build Coastguard Worker    """ Return a wrapped version of file which provides transparent
923*cda5da8dSAndroid Build Coastguard Worker        encoding translation.
924*cda5da8dSAndroid Build Coastguard Worker
925*cda5da8dSAndroid Build Coastguard Worker        Data written to the wrapped file is decoded according
926*cda5da8dSAndroid Build Coastguard Worker        to the given data_encoding and then encoded to the underlying
927*cda5da8dSAndroid Build Coastguard Worker        file using file_encoding. The intermediate data type
928*cda5da8dSAndroid Build Coastguard Worker        will usually be Unicode but depends on the specified codecs.
929*cda5da8dSAndroid Build Coastguard Worker
930*cda5da8dSAndroid Build Coastguard Worker        Bytes read from the file are decoded using file_encoding and then
931*cda5da8dSAndroid Build Coastguard Worker        passed back to the caller encoded using data_encoding.
932*cda5da8dSAndroid Build Coastguard Worker
933*cda5da8dSAndroid Build Coastguard Worker        If file_encoding is not given, it defaults to data_encoding.
934*cda5da8dSAndroid Build Coastguard Worker
935*cda5da8dSAndroid Build Coastguard Worker        errors may be given to define the error handling. It defaults
936*cda5da8dSAndroid Build Coastguard Worker        to 'strict' which causes ValueErrors to be raised in case an
937*cda5da8dSAndroid Build Coastguard Worker        encoding error occurs.
938*cda5da8dSAndroid Build Coastguard Worker
939*cda5da8dSAndroid Build Coastguard Worker        The returned wrapped file object provides two extra attributes
940*cda5da8dSAndroid Build Coastguard Worker        .data_encoding and .file_encoding which reflect the given
941*cda5da8dSAndroid Build Coastguard Worker        parameters of the same name. The attributes can be used for
942*cda5da8dSAndroid Build Coastguard Worker        introspection by Python programs.
943*cda5da8dSAndroid Build Coastguard Worker
944*cda5da8dSAndroid Build Coastguard Worker    """
945*cda5da8dSAndroid Build Coastguard Worker    if file_encoding is None:
946*cda5da8dSAndroid Build Coastguard Worker        file_encoding = data_encoding
947*cda5da8dSAndroid Build Coastguard Worker    data_info = lookup(data_encoding)
948*cda5da8dSAndroid Build Coastguard Worker    file_info = lookup(file_encoding)
949*cda5da8dSAndroid Build Coastguard Worker    sr = StreamRecoder(file, data_info.encode, data_info.decode,
950*cda5da8dSAndroid Build Coastguard Worker                       file_info.streamreader, file_info.streamwriter, errors)
951*cda5da8dSAndroid Build Coastguard Worker    # Add attributes to simplify introspection
952*cda5da8dSAndroid Build Coastguard Worker    sr.data_encoding = data_encoding
953*cda5da8dSAndroid Build Coastguard Worker    sr.file_encoding = file_encoding
954*cda5da8dSAndroid Build Coastguard Worker    return sr
955*cda5da8dSAndroid Build Coastguard Worker
956*cda5da8dSAndroid Build Coastguard Worker### Helpers for codec lookup
957*cda5da8dSAndroid Build Coastguard Worker
958*cda5da8dSAndroid Build Coastguard Workerdef getencoder(encoding):
959*cda5da8dSAndroid Build Coastguard Worker
960*cda5da8dSAndroid Build Coastguard Worker    """ Lookup up the codec for the given encoding and return
961*cda5da8dSAndroid Build Coastguard Worker        its encoder function.
962*cda5da8dSAndroid Build Coastguard Worker
963*cda5da8dSAndroid Build Coastguard Worker        Raises a LookupError in case the encoding cannot be found.
964*cda5da8dSAndroid Build Coastguard Worker
965*cda5da8dSAndroid Build Coastguard Worker    """
966*cda5da8dSAndroid Build Coastguard Worker    return lookup(encoding).encode
967*cda5da8dSAndroid Build Coastguard Worker
968*cda5da8dSAndroid Build Coastguard Workerdef getdecoder(encoding):
969*cda5da8dSAndroid Build Coastguard Worker
970*cda5da8dSAndroid Build Coastguard Worker    """ Lookup up the codec for the given encoding and return
971*cda5da8dSAndroid Build Coastguard Worker        its decoder function.
972*cda5da8dSAndroid Build Coastguard Worker
973*cda5da8dSAndroid Build Coastguard Worker        Raises a LookupError in case the encoding cannot be found.
974*cda5da8dSAndroid Build Coastguard Worker
975*cda5da8dSAndroid Build Coastguard Worker    """
976*cda5da8dSAndroid Build Coastguard Worker    return lookup(encoding).decode
977*cda5da8dSAndroid Build Coastguard Worker
978*cda5da8dSAndroid Build Coastguard Workerdef getincrementalencoder(encoding):
979*cda5da8dSAndroid Build Coastguard Worker
980*cda5da8dSAndroid Build Coastguard Worker    """ Lookup up the codec for the given encoding and return
981*cda5da8dSAndroid Build Coastguard Worker        its IncrementalEncoder class or factory function.
982*cda5da8dSAndroid Build Coastguard Worker
983*cda5da8dSAndroid Build Coastguard Worker        Raises a LookupError in case the encoding cannot be found
984*cda5da8dSAndroid Build Coastguard Worker        or the codecs doesn't provide an incremental encoder.
985*cda5da8dSAndroid Build Coastguard Worker
986*cda5da8dSAndroid Build Coastguard Worker    """
987*cda5da8dSAndroid Build Coastguard Worker    encoder = lookup(encoding).incrementalencoder
988*cda5da8dSAndroid Build Coastguard Worker    if encoder is None:
989*cda5da8dSAndroid Build Coastguard Worker        raise LookupError(encoding)
990*cda5da8dSAndroid Build Coastguard Worker    return encoder
991*cda5da8dSAndroid Build Coastguard Worker
992*cda5da8dSAndroid Build Coastguard Workerdef getincrementaldecoder(encoding):
993*cda5da8dSAndroid Build Coastguard Worker
994*cda5da8dSAndroid Build Coastguard Worker    """ Lookup up the codec for the given encoding and return
995*cda5da8dSAndroid Build Coastguard Worker        its IncrementalDecoder class or factory function.
996*cda5da8dSAndroid Build Coastguard Worker
997*cda5da8dSAndroid Build Coastguard Worker        Raises a LookupError in case the encoding cannot be found
998*cda5da8dSAndroid Build Coastguard Worker        or the codecs doesn't provide an incremental decoder.
999*cda5da8dSAndroid Build Coastguard Worker
1000*cda5da8dSAndroid Build Coastguard Worker    """
1001*cda5da8dSAndroid Build Coastguard Worker    decoder = lookup(encoding).incrementaldecoder
1002*cda5da8dSAndroid Build Coastguard Worker    if decoder is None:
1003*cda5da8dSAndroid Build Coastguard Worker        raise LookupError(encoding)
1004*cda5da8dSAndroid Build Coastguard Worker    return decoder
1005*cda5da8dSAndroid Build Coastguard Worker
1006*cda5da8dSAndroid Build Coastguard Workerdef getreader(encoding):
1007*cda5da8dSAndroid Build Coastguard Worker
1008*cda5da8dSAndroid Build Coastguard Worker    """ Lookup up the codec for the given encoding and return
1009*cda5da8dSAndroid Build Coastguard Worker        its StreamReader class or factory function.
1010*cda5da8dSAndroid Build Coastguard Worker
1011*cda5da8dSAndroid Build Coastguard Worker        Raises a LookupError in case the encoding cannot be found.
1012*cda5da8dSAndroid Build Coastguard Worker
1013*cda5da8dSAndroid Build Coastguard Worker    """
1014*cda5da8dSAndroid Build Coastguard Worker    return lookup(encoding).streamreader
1015*cda5da8dSAndroid Build Coastguard Worker
1016*cda5da8dSAndroid Build Coastguard Workerdef getwriter(encoding):
1017*cda5da8dSAndroid Build Coastguard Worker
1018*cda5da8dSAndroid Build Coastguard Worker    """ Lookup up the codec for the given encoding and return
1019*cda5da8dSAndroid Build Coastguard Worker        its StreamWriter class or factory function.
1020*cda5da8dSAndroid Build Coastguard Worker
1021*cda5da8dSAndroid Build Coastguard Worker        Raises a LookupError in case the encoding cannot be found.
1022*cda5da8dSAndroid Build Coastguard Worker
1023*cda5da8dSAndroid Build Coastguard Worker    """
1024*cda5da8dSAndroid Build Coastguard Worker    return lookup(encoding).streamwriter
1025*cda5da8dSAndroid Build Coastguard Worker
1026*cda5da8dSAndroid Build Coastguard Workerdef iterencode(iterator, encoding, errors='strict', **kwargs):
1027*cda5da8dSAndroid Build Coastguard Worker    """
1028*cda5da8dSAndroid Build Coastguard Worker    Encoding iterator.
1029*cda5da8dSAndroid Build Coastguard Worker
1030*cda5da8dSAndroid Build Coastguard Worker    Encodes the input strings from the iterator using an IncrementalEncoder.
1031*cda5da8dSAndroid Build Coastguard Worker
1032*cda5da8dSAndroid Build Coastguard Worker    errors and kwargs are passed through to the IncrementalEncoder
1033*cda5da8dSAndroid Build Coastguard Worker    constructor.
1034*cda5da8dSAndroid Build Coastguard Worker    """
1035*cda5da8dSAndroid Build Coastguard Worker    encoder = getincrementalencoder(encoding)(errors, **kwargs)
1036*cda5da8dSAndroid Build Coastguard Worker    for input in iterator:
1037*cda5da8dSAndroid Build Coastguard Worker        output = encoder.encode(input)
1038*cda5da8dSAndroid Build Coastguard Worker        if output:
1039*cda5da8dSAndroid Build Coastguard Worker            yield output
1040*cda5da8dSAndroid Build Coastguard Worker    output = encoder.encode("", True)
1041*cda5da8dSAndroid Build Coastguard Worker    if output:
1042*cda5da8dSAndroid Build Coastguard Worker        yield output
1043*cda5da8dSAndroid Build Coastguard Worker
1044*cda5da8dSAndroid Build Coastguard Workerdef iterdecode(iterator, encoding, errors='strict', **kwargs):
1045*cda5da8dSAndroid Build Coastguard Worker    """
1046*cda5da8dSAndroid Build Coastguard Worker    Decoding iterator.
1047*cda5da8dSAndroid Build Coastguard Worker
1048*cda5da8dSAndroid Build Coastguard Worker    Decodes the input strings from the iterator using an IncrementalDecoder.
1049*cda5da8dSAndroid Build Coastguard Worker
1050*cda5da8dSAndroid Build Coastguard Worker    errors and kwargs are passed through to the IncrementalDecoder
1051*cda5da8dSAndroid Build Coastguard Worker    constructor.
1052*cda5da8dSAndroid Build Coastguard Worker    """
1053*cda5da8dSAndroid Build Coastguard Worker    decoder = getincrementaldecoder(encoding)(errors, **kwargs)
1054*cda5da8dSAndroid Build Coastguard Worker    for input in iterator:
1055*cda5da8dSAndroid Build Coastguard Worker        output = decoder.decode(input)
1056*cda5da8dSAndroid Build Coastguard Worker        if output:
1057*cda5da8dSAndroid Build Coastguard Worker            yield output
1058*cda5da8dSAndroid Build Coastguard Worker    output = decoder.decode(b"", True)
1059*cda5da8dSAndroid Build Coastguard Worker    if output:
1060*cda5da8dSAndroid Build Coastguard Worker        yield output
1061*cda5da8dSAndroid Build Coastguard Worker
1062*cda5da8dSAndroid Build Coastguard Worker### Helpers for charmap-based codecs
1063*cda5da8dSAndroid Build Coastguard Worker
1064*cda5da8dSAndroid Build Coastguard Workerdef make_identity_dict(rng):
1065*cda5da8dSAndroid Build Coastguard Worker
1066*cda5da8dSAndroid Build Coastguard Worker    """ make_identity_dict(rng) -> dict
1067*cda5da8dSAndroid Build Coastguard Worker
1068*cda5da8dSAndroid Build Coastguard Worker        Return a dictionary where elements of the rng sequence are
1069*cda5da8dSAndroid Build Coastguard Worker        mapped to themselves.
1070*cda5da8dSAndroid Build Coastguard Worker
1071*cda5da8dSAndroid Build Coastguard Worker    """
1072*cda5da8dSAndroid Build Coastguard Worker    return {i:i for i in rng}
1073*cda5da8dSAndroid Build Coastguard Worker
1074*cda5da8dSAndroid Build Coastguard Workerdef make_encoding_map(decoding_map):
1075*cda5da8dSAndroid Build Coastguard Worker
1076*cda5da8dSAndroid Build Coastguard Worker    """ Creates an encoding map from a decoding map.
1077*cda5da8dSAndroid Build Coastguard Worker
1078*cda5da8dSAndroid Build Coastguard Worker        If a target mapping in the decoding map occurs multiple
1079*cda5da8dSAndroid Build Coastguard Worker        times, then that target is mapped to None (undefined mapping),
1080*cda5da8dSAndroid Build Coastguard Worker        causing an exception when encountered by the charmap codec
1081*cda5da8dSAndroid Build Coastguard Worker        during translation.
1082*cda5da8dSAndroid Build Coastguard Worker
1083*cda5da8dSAndroid Build Coastguard Worker        One example where this happens is cp875.py which decodes
1084*cda5da8dSAndroid Build Coastguard Worker        multiple character to \\u001a.
1085*cda5da8dSAndroid Build Coastguard Worker
1086*cda5da8dSAndroid Build Coastguard Worker    """
1087*cda5da8dSAndroid Build Coastguard Worker    m = {}
1088*cda5da8dSAndroid Build Coastguard Worker    for k,v in decoding_map.items():
1089*cda5da8dSAndroid Build Coastguard Worker        if not v in m:
1090*cda5da8dSAndroid Build Coastguard Worker            m[v] = k
1091*cda5da8dSAndroid Build Coastguard Worker        else:
1092*cda5da8dSAndroid Build Coastguard Worker            m[v] = None
1093*cda5da8dSAndroid Build Coastguard Worker    return m
1094*cda5da8dSAndroid Build Coastguard Worker
1095*cda5da8dSAndroid Build Coastguard Worker### error handlers
1096*cda5da8dSAndroid Build Coastguard Worker
1097*cda5da8dSAndroid Build Coastguard Workertry:
1098*cda5da8dSAndroid Build Coastguard Worker    strict_errors = lookup_error("strict")
1099*cda5da8dSAndroid Build Coastguard Worker    ignore_errors = lookup_error("ignore")
1100*cda5da8dSAndroid Build Coastguard Worker    replace_errors = lookup_error("replace")
1101*cda5da8dSAndroid Build Coastguard Worker    xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace")
1102*cda5da8dSAndroid Build Coastguard Worker    backslashreplace_errors = lookup_error("backslashreplace")
1103*cda5da8dSAndroid Build Coastguard Worker    namereplace_errors = lookup_error("namereplace")
1104*cda5da8dSAndroid Build Coastguard Workerexcept LookupError:
1105*cda5da8dSAndroid Build Coastguard Worker    # In --disable-unicode builds, these error handler are missing
1106*cda5da8dSAndroid Build Coastguard Worker    strict_errors = None
1107*cda5da8dSAndroid Build Coastguard Worker    ignore_errors = None
1108*cda5da8dSAndroid Build Coastguard Worker    replace_errors = None
1109*cda5da8dSAndroid Build Coastguard Worker    xmlcharrefreplace_errors = None
1110*cda5da8dSAndroid Build Coastguard Worker    backslashreplace_errors = None
1111*cda5da8dSAndroid Build Coastguard Worker    namereplace_errors = None
1112*cda5da8dSAndroid Build Coastguard Worker
1113*cda5da8dSAndroid Build Coastguard Worker# Tell modulefinder that using codecs probably needs the encodings
1114*cda5da8dSAndroid Build Coastguard Worker# package
1115*cda5da8dSAndroid Build Coastguard Worker_false = 0
1116*cda5da8dSAndroid Build Coastguard Workerif _false:
1117*cda5da8dSAndroid Build Coastguard Worker    import encodings
1118*cda5da8dSAndroid Build Coastguard Worker
1119*cda5da8dSAndroid Build Coastguard Worker### Tests
1120*cda5da8dSAndroid Build Coastguard Worker
1121*cda5da8dSAndroid Build Coastguard Workerif __name__ == '__main__':
1122*cda5da8dSAndroid Build Coastguard Worker
1123*cda5da8dSAndroid Build Coastguard Worker    # Make stdout translate Latin-1 output into UTF-8 output
1124*cda5da8dSAndroid Build Coastguard Worker    sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8')
1125*cda5da8dSAndroid Build Coastguard Worker
1126*cda5da8dSAndroid Build Coastguard Worker    # Have stdin translate Latin-1 input into UTF-8 input
1127*cda5da8dSAndroid Build Coastguard Worker    sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
1128