1 /*
2     An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3 
4     Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5 
6     Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8 
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_interp.h"        // PyInterpreterState.fs_codec
12 #include "pycore_long.h"          // _PyLong_GetZero()
13 #include "pycore_fileutils.h"     // _Py_GetLocaleEncoding()
14 #include "pycore_object.h"
15 #include "pycore_pystate.h"       // _PyInterpreterState_GET()
16 #include "structmember.h"         // PyMemberDef
17 #include "_iomodule.h"
18 
19 /*[clinic input]
20 module _io
21 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
22 class _io.TextIOWrapper "textio *" "&TextIOWrapper_Type"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=ed072384f8aada2c]*/
25 
26 /* TextIOBase */
27 
28 PyDoc_STRVAR(textiobase_doc,
29     "Base class for text I/O.\n"
30     "\n"
31     "This class provides a character and line based interface to stream\n"
32     "I/O. There is no readinto method because Python's character strings\n"
33     "are immutable.\n"
34     );
35 
36 static PyObject *
_unsupported(const char * message)37 _unsupported(const char *message)
38 {
39     _PyIO_State *state = IO_STATE();
40     if (state != NULL)
41         PyErr_SetString(state->unsupported_operation, message);
42     return NULL;
43 }
44 
45 PyDoc_STRVAR(textiobase_detach_doc,
46     "Separate the underlying buffer from the TextIOBase and return it.\n"
47     "\n"
48     "After the underlying buffer has been detached, the TextIO is in an\n"
49     "unusable state.\n"
50     );
51 
52 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))53 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
54 {
55     return _unsupported("detach");
56 }
57 
58 PyDoc_STRVAR(textiobase_read_doc,
59     "Read at most n characters from stream.\n"
60     "\n"
61     "Read from underlying buffer until we have n characters or we hit EOF.\n"
62     "If n is negative or omitted, read until EOF.\n"
63     );
64 
65 static PyObject *
textiobase_read(PyObject * self,PyObject * args)66 textiobase_read(PyObject *self, PyObject *args)
67 {
68     return _unsupported("read");
69 }
70 
71 PyDoc_STRVAR(textiobase_readline_doc,
72     "Read until newline or EOF.\n"
73     "\n"
74     "Returns an empty string if EOF is hit immediately.\n"
75     );
76 
77 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)78 textiobase_readline(PyObject *self, PyObject *args)
79 {
80     return _unsupported("readline");
81 }
82 
83 PyDoc_STRVAR(textiobase_write_doc,
84     "Write string to stream.\n"
85     "Returns the number of characters written (which is always equal to\n"
86     "the length of the string).\n"
87     );
88 
89 static PyObject *
textiobase_write(PyObject * self,PyObject * args)90 textiobase_write(PyObject *self, PyObject *args)
91 {
92     return _unsupported("write");
93 }
94 
95 PyDoc_STRVAR(textiobase_encoding_doc,
96     "Encoding of the text stream.\n"
97     "\n"
98     "Subclasses should override.\n"
99     );
100 
101 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)102 textiobase_encoding_get(PyObject *self, void *context)
103 {
104     Py_RETURN_NONE;
105 }
106 
107 PyDoc_STRVAR(textiobase_newlines_doc,
108     "Line endings translated so far.\n"
109     "\n"
110     "Only line endings translated during reading are considered.\n"
111     "\n"
112     "Subclasses should override.\n"
113     );
114 
115 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)116 textiobase_newlines_get(PyObject *self, void *context)
117 {
118     Py_RETURN_NONE;
119 }
120 
121 PyDoc_STRVAR(textiobase_errors_doc,
122     "The error setting of the decoder or encoder.\n"
123     "\n"
124     "Subclasses should override.\n"
125     );
126 
127 static PyObject *
textiobase_errors_get(PyObject * self,void * context)128 textiobase_errors_get(PyObject *self, void *context)
129 {
130     Py_RETURN_NONE;
131 }
132 
133 
134 static PyMethodDef textiobase_methods[] = {
135     {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
136     {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
137     {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
138     {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
139     {NULL, NULL}
140 };
141 
142 static PyGetSetDef textiobase_getset[] = {
143     {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
144     {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
145     {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
146     {NULL}
147 };
148 
149 PyTypeObject PyTextIOBase_Type = {
150     PyVarObject_HEAD_INIT(NULL, 0)
151     "_io._TextIOBase",          /*tp_name*/
152     0,                          /*tp_basicsize*/
153     0,                          /*tp_itemsize*/
154     0,                          /*tp_dealloc*/
155     0,                          /*tp_vectorcall_offset*/
156     0,                          /*tp_getattr*/
157     0,                          /*tp_setattr*/
158     0,                          /*tp_as_async*/
159     0,                          /*tp_repr*/
160     0,                          /*tp_as_number*/
161     0,                          /*tp_as_sequence*/
162     0,                          /*tp_as_mapping*/
163     0,                          /*tp_hash */
164     0,                          /*tp_call*/
165     0,                          /*tp_str*/
166     0,                          /*tp_getattro*/
167     0,                          /*tp_setattro*/
168     0,                          /*tp_as_buffer*/
169     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
170     textiobase_doc,             /* tp_doc */
171     0,                          /* tp_traverse */
172     0,                          /* tp_clear */
173     0,                          /* tp_richcompare */
174     0,                          /* tp_weaklistoffset */
175     0,                          /* tp_iter */
176     0,                          /* tp_iternext */
177     textiobase_methods,         /* tp_methods */
178     0,                          /* tp_members */
179     textiobase_getset,          /* tp_getset */
180     &PyIOBase_Type,             /* tp_base */
181     0,                          /* tp_dict */
182     0,                          /* tp_descr_get */
183     0,                          /* tp_descr_set */
184     0,                          /* tp_dictoffset */
185     0,                          /* tp_init */
186     0,                          /* tp_alloc */
187     0,                          /* tp_new */
188     0,                          /* tp_free */
189     0,                          /* tp_is_gc */
190     0,                          /* tp_bases */
191     0,                          /* tp_mro */
192     0,                          /* tp_cache */
193     0,                          /* tp_subclasses */
194     0,                          /* tp_weaklist */
195     0,                          /* tp_del */
196     0,                          /* tp_version_tag */
197     0,                          /* tp_finalize */
198 };
199 
200 
201 /* IncrementalNewlineDecoder */
202 
203 typedef struct {
204     PyObject_HEAD
205     PyObject *decoder;
206     PyObject *errors;
207     unsigned int pendingcr: 1;
208     unsigned int translate: 1;
209     unsigned int seennl: 3;
210 } nldecoder_object;
211 
212 /*[clinic input]
213 _io.IncrementalNewlineDecoder.__init__
214     decoder: object
215     translate: int
216     errors: object(c_default="NULL") = "strict"
217 
218 Codec used when reading a file in universal newlines mode.
219 
220 It wraps another incremental decoder, translating \r\n and \r into \n.
221 It also records the types of newlines encountered.  When used with
222 translate=False, it ensures that the newline sequence is returned in
223 one piece. When used with decoder=None, it expects unicode strings as
224 decode input and translates newlines without first invoking an external
225 decoder.
226 [clinic start generated code]*/
227 
228 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)229 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
230                                             PyObject *decoder, int translate,
231                                             PyObject *errors)
232 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
233 {
234 
235     if (errors == NULL) {
236         errors = Py_NewRef(&_Py_ID(strict));
237     }
238     else {
239         errors = Py_NewRef(errors);
240     }
241 
242     Py_XSETREF(self->errors, errors);
243     Py_XSETREF(self->decoder, Py_NewRef(decoder));
244     self->translate = translate ? 1 : 0;
245     self->seennl = 0;
246     self->pendingcr = 0;
247 
248     return 0;
249 }
250 
251 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)252 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
253 {
254     Py_CLEAR(self->decoder);
255     Py_CLEAR(self->errors);
256     Py_TYPE(self)->tp_free((PyObject *)self);
257 }
258 
259 static int
check_decoded(PyObject * decoded)260 check_decoded(PyObject *decoded)
261 {
262     if (decoded == NULL)
263         return -1;
264     if (!PyUnicode_Check(decoded)) {
265         PyErr_Format(PyExc_TypeError,
266                      "decoder should return a string result, not '%.200s'",
267                      Py_TYPE(decoded)->tp_name);
268         Py_DECREF(decoded);
269         return -1;
270     }
271     if (PyUnicode_READY(decoded) < 0) {
272         Py_DECREF(decoded);
273         return -1;
274     }
275     return 0;
276 }
277 
278 #define CHECK_INITIALIZED_DECODER(self) \
279     if (self->errors == NULL) { \
280         PyErr_SetString(PyExc_ValueError, \
281                         "IncrementalNewlineDecoder.__init__() not called"); \
282         return NULL; \
283     }
284 
285 #define SEEN_CR   1
286 #define SEEN_LF   2
287 #define SEEN_CRLF 4
288 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
289 
290 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)291 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
292                                     PyObject *input, int final)
293 {
294     PyObject *output;
295     Py_ssize_t output_len;
296     nldecoder_object *self = (nldecoder_object *) myself;
297 
298     CHECK_INITIALIZED_DECODER(self);
299 
300     /* decode input (with the eventual \r from a previous pass) */
301     if (self->decoder != Py_None) {
302         output = PyObject_CallMethodObjArgs(self->decoder,
303             &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
304     }
305     else {
306         output = input;
307         Py_INCREF(output);
308     }
309 
310     if (check_decoded(output) < 0)
311         return NULL;
312 
313     output_len = PyUnicode_GET_LENGTH(output);
314     if (self->pendingcr && (final || output_len > 0)) {
315         /* Prefix output with CR */
316         int kind;
317         PyObject *modified;
318         char *out;
319 
320         modified = PyUnicode_New(output_len + 1,
321                                  PyUnicode_MAX_CHAR_VALUE(output));
322         if (modified == NULL)
323             goto error;
324         kind = PyUnicode_KIND(modified);
325         out = PyUnicode_DATA(modified);
326         PyUnicode_WRITE(kind, out, 0, '\r');
327         memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
328         Py_DECREF(output);
329         output = modified; /* output remains ready */
330         self->pendingcr = 0;
331         output_len++;
332     }
333 
334     /* retain last \r even when not translating data:
335      * then readline() is sure to get \r\n in one pass
336      */
337     if (!final) {
338         if (output_len > 0
339             && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
340         {
341             PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
342             if (modified == NULL)
343                 goto error;
344             Py_DECREF(output);
345             output = modified;
346             self->pendingcr = 1;
347         }
348     }
349 
350     /* Record which newlines are read and do newline translation if desired,
351        all in one pass. */
352     {
353         const void *in_str;
354         Py_ssize_t len;
355         int seennl = self->seennl;
356         int only_lf = 0;
357         int kind;
358 
359         in_str = PyUnicode_DATA(output);
360         len = PyUnicode_GET_LENGTH(output);
361         kind = PyUnicode_KIND(output);
362 
363         if (len == 0)
364             return output;
365 
366         /* If, up to now, newlines are consistently \n, do a quick check
367            for the \r *byte* with the libc's optimized memchr.
368            */
369         if (seennl == SEEN_LF || seennl == 0) {
370             only_lf = (memchr(in_str, '\r', kind * len) == NULL);
371         }
372 
373         if (only_lf) {
374             /* If not already seen, quick scan for a possible "\n" character.
375                (there's nothing else to be done, even when in translation mode)
376             */
377             if (seennl == 0 &&
378                 memchr(in_str, '\n', kind * len) != NULL) {
379                 if (kind == PyUnicode_1BYTE_KIND)
380                     seennl |= SEEN_LF;
381                 else {
382                     Py_ssize_t i = 0;
383                     for (;;) {
384                         Py_UCS4 c;
385                         /* Fast loop for non-control characters */
386                         while (PyUnicode_READ(kind, in_str, i) > '\n')
387                             i++;
388                         c = PyUnicode_READ(kind, in_str, i++);
389                         if (c == '\n') {
390                             seennl |= SEEN_LF;
391                             break;
392                         }
393                         if (i >= len)
394                             break;
395                     }
396                 }
397             }
398             /* Finished: we have scanned for newlines, and none of them
399                need translating */
400         }
401         else if (!self->translate) {
402             Py_ssize_t i = 0;
403             /* We have already seen all newline types, no need to scan again */
404             if (seennl == SEEN_ALL)
405                 goto endscan;
406             for (;;) {
407                 Py_UCS4 c;
408                 /* Fast loop for non-control characters */
409                 while (PyUnicode_READ(kind, in_str, i) > '\r')
410                     i++;
411                 c = PyUnicode_READ(kind, in_str, i++);
412                 if (c == '\n')
413                     seennl |= SEEN_LF;
414                 else if (c == '\r') {
415                     if (PyUnicode_READ(kind, in_str, i) == '\n') {
416                         seennl |= SEEN_CRLF;
417                         i++;
418                     }
419                     else
420                         seennl |= SEEN_CR;
421                 }
422                 if (i >= len)
423                     break;
424                 if (seennl == SEEN_ALL)
425                     break;
426             }
427         endscan:
428             ;
429         }
430         else {
431             void *translated;
432             int kind = PyUnicode_KIND(output);
433             const void *in_str = PyUnicode_DATA(output);
434             Py_ssize_t in, out;
435             /* XXX: Previous in-place translation here is disabled as
436                resizing is not possible anymore */
437             /* We could try to optimize this so that we only do a copy
438                when there is something to translate. On the other hand,
439                we already know there is a \r byte, so chances are high
440                that something needs to be done. */
441             translated = PyMem_Malloc(kind * len);
442             if (translated == NULL) {
443                 PyErr_NoMemory();
444                 goto error;
445             }
446             in = out = 0;
447             for (;;) {
448                 Py_UCS4 c;
449                 /* Fast loop for non-control characters */
450                 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
451                     PyUnicode_WRITE(kind, translated, out++, c);
452                 if (c == '\n') {
453                     PyUnicode_WRITE(kind, translated, out++, c);
454                     seennl |= SEEN_LF;
455                     continue;
456                 }
457                 if (c == '\r') {
458                     if (PyUnicode_READ(kind, in_str, in) == '\n') {
459                         in++;
460                         seennl |= SEEN_CRLF;
461                     }
462                     else
463                         seennl |= SEEN_CR;
464                     PyUnicode_WRITE(kind, translated, out++, '\n');
465                     continue;
466                 }
467                 if (in > len)
468                     break;
469                 PyUnicode_WRITE(kind, translated, out++, c);
470             }
471             Py_DECREF(output);
472             output = PyUnicode_FromKindAndData(kind, translated, out);
473             PyMem_Free(translated);
474             if (!output)
475                 return NULL;
476         }
477         self->seennl |= seennl;
478     }
479 
480     return output;
481 
482   error:
483     Py_DECREF(output);
484     return NULL;
485 }
486 
487 /*[clinic input]
488 _io.IncrementalNewlineDecoder.decode
489     input: object
490     final: bool(accept={int}) = False
491 [clinic start generated code]*/
492 
493 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)494 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
495                                           PyObject *input, int final)
496 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
497 {
498     return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
499 }
500 
501 /*[clinic input]
502 _io.IncrementalNewlineDecoder.getstate
503 [clinic start generated code]*/
504 
505 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)506 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
507 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
508 {
509     PyObject *buffer;
510     unsigned long long flag;
511 
512     CHECK_INITIALIZED_DECODER(self);
513 
514     if (self->decoder != Py_None) {
515         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
516            &_Py_ID(getstate));
517         if (state == NULL)
518             return NULL;
519         if (!PyTuple_Check(state)) {
520             PyErr_SetString(PyExc_TypeError,
521                             "illegal decoder state");
522             Py_DECREF(state);
523             return NULL;
524         }
525         if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
526                               &buffer, &flag))
527         {
528             Py_DECREF(state);
529             return NULL;
530         }
531         Py_INCREF(buffer);
532         Py_DECREF(state);
533     }
534     else {
535         buffer = PyBytes_FromString("");
536         flag = 0;
537     }
538     flag <<= 1;
539     if (self->pendingcr)
540         flag |= 1;
541     return Py_BuildValue("NK", buffer, flag);
542 }
543 
544 /*[clinic input]
545 _io.IncrementalNewlineDecoder.setstate
546     state: object
547     /
548 [clinic start generated code]*/
549 
550 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)551 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
552                                        PyObject *state)
553 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
554 {
555     PyObject *buffer;
556     unsigned long long flag;
557 
558     CHECK_INITIALIZED_DECODER(self);
559 
560     if (!PyTuple_Check(state)) {
561         PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
562         return NULL;
563     }
564     if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
565                           &buffer, &flag))
566     {
567         return NULL;
568     }
569 
570     self->pendingcr = (int) (flag & 1);
571     flag >>= 1;
572 
573     if (self->decoder != Py_None) {
574         return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
575                                     "((OK))", buffer, flag);
576     }
577     else {
578         Py_RETURN_NONE;
579     }
580 }
581 
582 /*[clinic input]
583 _io.IncrementalNewlineDecoder.reset
584 [clinic start generated code]*/
585 
586 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)587 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
588 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
589 {
590     CHECK_INITIALIZED_DECODER(self);
591 
592     self->seennl = 0;
593     self->pendingcr = 0;
594     if (self->decoder != Py_None)
595         return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
596     else
597         Py_RETURN_NONE;
598 }
599 
600 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)601 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
602 {
603     CHECK_INITIALIZED_DECODER(self);
604 
605     switch (self->seennl) {
606     case SEEN_CR:
607         return PyUnicode_FromString("\r");
608     case SEEN_LF:
609         return PyUnicode_FromString("\n");
610     case SEEN_CRLF:
611         return PyUnicode_FromString("\r\n");
612     case SEEN_CR | SEEN_LF:
613         return Py_BuildValue("ss", "\r", "\n");
614     case SEEN_CR | SEEN_CRLF:
615         return Py_BuildValue("ss", "\r", "\r\n");
616     case SEEN_LF | SEEN_CRLF:
617         return Py_BuildValue("ss", "\n", "\r\n");
618     case SEEN_CR | SEEN_LF | SEEN_CRLF:
619         return Py_BuildValue("sss", "\r", "\n", "\r\n");
620     default:
621         Py_RETURN_NONE;
622    }
623 
624 }
625 
626 /* TextIOWrapper */
627 
628 typedef PyObject *
629         (*encodefunc_t)(PyObject *, PyObject *);
630 
631 typedef struct
632 {
633     PyObject_HEAD
634     int ok; /* initialized? */
635     int detached;
636     Py_ssize_t chunk_size;
637     PyObject *buffer;
638     PyObject *encoding;
639     PyObject *encoder;
640     PyObject *decoder;
641     PyObject *readnl;
642     PyObject *errors;
643     const char *writenl; /* ASCII-encoded; NULL stands for \n */
644     char line_buffering;
645     char write_through;
646     char readuniversal;
647     char readtranslate;
648     char writetranslate;
649     char seekable;
650     char has_read1;
651     char telling;
652     char finalizing;
653     /* Specialized encoding func (see below) */
654     encodefunc_t encodefunc;
655     /* Whether or not it's the start of the stream */
656     char encoding_start_of_stream;
657 
658     /* Reads and writes are internally buffered in order to speed things up.
659        However, any read will first flush the write buffer if itsn't empty.
660 
661        Please also note that text to be written is first encoded before being
662        buffered. This is necessary so that encoding errors are immediately
663        reported to the caller, but it unfortunately means that the
664        IncrementalEncoder (whose encode() method is always written in Python)
665        becomes a bottleneck for small writes.
666     */
667     PyObject *decoded_chars;       /* buffer for text returned from decoder */
668     Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
669     PyObject *pending_bytes;       // data waiting to be written.
670                                    // ascii unicode, bytes, or list of them.
671     Py_ssize_t pending_bytes_count;
672 
673     /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
674      * dec_flags is the second (integer) item of the decoder state and
675      * next_input is the chunk of input bytes that comes next after the
676      * snapshot point.  We use this to reconstruct decoder states in tell().
677      */
678     PyObject *snapshot;
679     /* Bytes-to-characters ratio for the current chunk. Serves as input for
680        the heuristic in tell(). */
681     double b2cratio;
682 
683     /* Cache raw object if it's a FileIO object */
684     PyObject *raw;
685 
686     PyObject *weakreflist;
687     PyObject *dict;
688 } textio;
689 
690 static void
691 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
692 
693 /* A couple of specialized cases in order to bypass the slow incremental
694    encoding methods for the most popular encodings. */
695 
696 static PyObject *
ascii_encode(textio * self,PyObject * text)697 ascii_encode(textio *self, PyObject *text)
698 {
699     return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
700 }
701 
702 static PyObject *
utf16be_encode(textio * self,PyObject * text)703 utf16be_encode(textio *self, PyObject *text)
704 {
705     return _PyUnicode_EncodeUTF16(text,
706                                   PyUnicode_AsUTF8(self->errors), 1);
707 }
708 
709 static PyObject *
utf16le_encode(textio * self,PyObject * text)710 utf16le_encode(textio *self, PyObject *text)
711 {
712     return _PyUnicode_EncodeUTF16(text,
713                                   PyUnicode_AsUTF8(self->errors), -1);
714 }
715 
716 static PyObject *
utf16_encode(textio * self,PyObject * text)717 utf16_encode(textio *self, PyObject *text)
718 {
719     if (!self->encoding_start_of_stream) {
720         /* Skip the BOM and use native byte ordering */
721 #if PY_BIG_ENDIAN
722         return utf16be_encode(self, text);
723 #else
724         return utf16le_encode(self, text);
725 #endif
726     }
727     return _PyUnicode_EncodeUTF16(text,
728                                   PyUnicode_AsUTF8(self->errors), 0);
729 }
730 
731 static PyObject *
utf32be_encode(textio * self,PyObject * text)732 utf32be_encode(textio *self, PyObject *text)
733 {
734     return _PyUnicode_EncodeUTF32(text,
735                                   PyUnicode_AsUTF8(self->errors), 1);
736 }
737 
738 static PyObject *
utf32le_encode(textio * self,PyObject * text)739 utf32le_encode(textio *self, PyObject *text)
740 {
741     return _PyUnicode_EncodeUTF32(text,
742                                   PyUnicode_AsUTF8(self->errors), -1);
743 }
744 
745 static PyObject *
utf32_encode(textio * self,PyObject * text)746 utf32_encode(textio *self, PyObject *text)
747 {
748     if (!self->encoding_start_of_stream) {
749         /* Skip the BOM and use native byte ordering */
750 #if PY_BIG_ENDIAN
751         return utf32be_encode(self, text);
752 #else
753         return utf32le_encode(self, text);
754 #endif
755     }
756     return _PyUnicode_EncodeUTF32(text,
757                                   PyUnicode_AsUTF8(self->errors), 0);
758 }
759 
760 static PyObject *
utf8_encode(textio * self,PyObject * text)761 utf8_encode(textio *self, PyObject *text)
762 {
763     return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
764 }
765 
766 static PyObject *
latin1_encode(textio * self,PyObject * text)767 latin1_encode(textio *self, PyObject *text)
768 {
769     return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
770 }
771 
772 // Return true when encoding can be skipped when text is ascii.
773 static inline int
is_asciicompat_encoding(encodefunc_t f)774 is_asciicompat_encoding(encodefunc_t f)
775 {
776     return f == (encodefunc_t) ascii_encode
777         || f == (encodefunc_t) latin1_encode
778         || f == (encodefunc_t) utf8_encode;
779 }
780 
781 /* Map normalized encoding names onto the specialized encoding funcs */
782 
783 typedef struct {
784     const char *name;
785     encodefunc_t encodefunc;
786 } encodefuncentry;
787 
788 static const encodefuncentry encodefuncs[] = {
789     {"ascii",       (encodefunc_t) ascii_encode},
790     {"iso8859-1",   (encodefunc_t) latin1_encode},
791     {"utf-8",       (encodefunc_t) utf8_encode},
792     {"utf-16-be",   (encodefunc_t) utf16be_encode},
793     {"utf-16-le",   (encodefunc_t) utf16le_encode},
794     {"utf-16",      (encodefunc_t) utf16_encode},
795     {"utf-32-be",   (encodefunc_t) utf32be_encode},
796     {"utf-32-le",   (encodefunc_t) utf32le_encode},
797     {"utf-32",      (encodefunc_t) utf32_encode},
798     {NULL, NULL}
799 };
800 
801 static int
validate_newline(const char * newline)802 validate_newline(const char *newline)
803 {
804     if (newline && newline[0] != '\0'
805         && !(newline[0] == '\n' && newline[1] == '\0')
806         && !(newline[0] == '\r' && newline[1] == '\0')
807         && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
808         PyErr_Format(PyExc_ValueError,
809                      "illegal newline value: %s", newline);
810         return -1;
811     }
812     return 0;
813 }
814 
815 static int
set_newline(textio * self,const char * newline)816 set_newline(textio *self, const char *newline)
817 {
818     PyObject *old = self->readnl;
819     if (newline == NULL) {
820         self->readnl = NULL;
821     }
822     else {
823         self->readnl = PyUnicode_FromString(newline);
824         if (self->readnl == NULL) {
825             self->readnl = old;
826             return -1;
827         }
828     }
829     self->readuniversal = (newline == NULL || newline[0] == '\0');
830     self->readtranslate = (newline == NULL);
831     self->writetranslate = (newline == NULL || newline[0] != '\0');
832     if (!self->readuniversal && self->readnl != NULL) {
833         // validate_newline() accepts only ASCII newlines.
834         assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
835         self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
836         if (strcmp(self->writenl, "\n") == 0) {
837             self->writenl = NULL;
838         }
839     }
840     else {
841 #ifdef MS_WINDOWS
842         self->writenl = "\r\n";
843 #else
844         self->writenl = NULL;
845 #endif
846     }
847     Py_XDECREF(old);
848     return 0;
849 }
850 
851 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)852 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
853                            const char *errors)
854 {
855     PyObject *res;
856     int r;
857 
858     res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
859     if (res == NULL)
860         return -1;
861 
862     r = PyObject_IsTrue(res);
863     Py_DECREF(res);
864     if (r == -1)
865         return -1;
866 
867     if (r != 1)
868         return 0;
869 
870     Py_CLEAR(self->decoder);
871     self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
872     if (self->decoder == NULL)
873         return -1;
874 
875     if (self->readuniversal) {
876         PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
877             (PyObject *)&PyIncrementalNewlineDecoder_Type,
878             self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
879         if (incrementalDecoder == NULL)
880             return -1;
881         Py_CLEAR(self->decoder);
882         self->decoder = incrementalDecoder;
883     }
884 
885     return 0;
886 }
887 
888 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)889 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
890 {
891     PyObject *chars;
892 
893     if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
894         chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
895     else
896         chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
897                                            eof ? Py_True : Py_False, NULL);
898 
899     if (check_decoded(chars) < 0)
900         // check_decoded already decreases refcount
901         return NULL;
902 
903     return chars;
904 }
905 
906 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)907 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
908                            const char *errors)
909 {
910     PyObject *res;
911     int r;
912 
913     res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
914     if (res == NULL)
915         return -1;
916 
917     r = PyObject_IsTrue(res);
918     Py_DECREF(res);
919     if (r == -1)
920         return -1;
921 
922     if (r != 1)
923         return 0;
924 
925     Py_CLEAR(self->encoder);
926     self->encodefunc = NULL;
927     self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
928     if (self->encoder == NULL)
929         return -1;
930 
931     /* Get the normalized named of the codec */
932     if (_PyObject_LookupAttr(codec_info, &_Py_ID(name), &res) < 0) {
933         return -1;
934     }
935     if (res != NULL && PyUnicode_Check(res)) {
936         const encodefuncentry *e = encodefuncs;
937         while (e->name != NULL) {
938             if (_PyUnicode_EqualToASCIIString(res, e->name)) {
939                 self->encodefunc = e->encodefunc;
940                 break;
941             }
942             e++;
943         }
944     }
945     Py_XDECREF(res);
946 
947     return 0;
948 }
949 
950 static int
_textiowrapper_fix_encoder_state(textio * self)951 _textiowrapper_fix_encoder_state(textio *self)
952 {
953     if (!self->seekable || !self->encoder) {
954         return 0;
955     }
956 
957     self->encoding_start_of_stream = 1;
958 
959     PyObject *cookieObj = PyObject_CallMethodNoArgs(
960         self->buffer, &_Py_ID(tell));
961     if (cookieObj == NULL) {
962         return -1;
963     }
964 
965     int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
966     Py_DECREF(cookieObj);
967     if (cmp < 0) {
968         return -1;
969     }
970 
971     if (cmp == 0) {
972         self->encoding_start_of_stream = 0;
973         PyObject *res = PyObject_CallMethodOneArg(
974             self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
975         if (res == NULL) {
976             return -1;
977         }
978         Py_DECREF(res);
979     }
980 
981     return 0;
982 }
983 
984 static int
io_check_errors(PyObject * errors)985 io_check_errors(PyObject *errors)
986 {
987     assert(errors != NULL && errors != Py_None);
988 
989     PyInterpreterState *interp = _PyInterpreterState_GET();
990 #ifndef Py_DEBUG
991     /* In release mode, only check in development mode (-X dev) */
992     if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
993         return 0;
994     }
995 #else
996     /* Always check in debug mode */
997 #endif
998 
999     /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1000        before_PyUnicode_InitEncodings() is called. */
1001     if (!interp->unicode.fs_codec.encoding) {
1002         return 0;
1003     }
1004 
1005     Py_ssize_t name_length;
1006     const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1007     if (name == NULL) {
1008         return -1;
1009     }
1010     if (strlen(name) != (size_t)name_length) {
1011         PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1012         return -1;
1013     }
1014     PyObject *handler = PyCodec_LookupError(name);
1015     if (handler != NULL) {
1016         Py_DECREF(handler);
1017         return 0;
1018     }
1019     return -1;
1020 }
1021 
1022 
1023 
1024 /*[clinic input]
1025 _io.TextIOWrapper.__init__
1026     buffer: object
1027     encoding: str(accept={str, NoneType}) = None
1028     errors: object = None
1029     newline: str(accept={str, NoneType}) = None
1030     line_buffering: bool(accept={int}) = False
1031     write_through: bool(accept={int}) = False
1032 
1033 Character and line based layer over a BufferedIOBase object, buffer.
1034 
1035 encoding gives the name of the encoding that the stream will be
1036 decoded or encoded with. It defaults to locale.getencoding().
1037 
1038 errors determines the strictness of encoding and decoding (see
1039 help(codecs.Codec) or the documentation for codecs.register) and
1040 defaults to "strict".
1041 
1042 newline controls how line endings are handled. It can be None, '',
1043 '\n', '\r', and '\r\n'.  It works as follows:
1044 
1045 * On input, if newline is None, universal newlines mode is
1046   enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1047   these are translated into '\n' before being returned to the
1048   caller. If it is '', universal newline mode is enabled, but line
1049   endings are returned to the caller untranslated. If it has any of
1050   the other legal values, input lines are only terminated by the given
1051   string, and the line ending is returned to the caller untranslated.
1052 
1053 * On output, if newline is None, any '\n' characters written are
1054   translated to the system default line separator, os.linesep. If
1055   newline is '' or '\n', no translation takes place. If newline is any
1056   of the other legal values, any '\n' characters written are translated
1057   to the given string.
1058 
1059 If line_buffering is True, a call to flush is implied when a call to
1060 write contains a newline character.
1061 [clinic start generated code]*/
1062 
1063 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1064 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1065                                 const char *encoding, PyObject *errors,
1066                                 const char *newline, int line_buffering,
1067                                 int write_through)
1068 /*[clinic end generated code: output=72267c0c01032ed2 input=72590963698f289b]*/
1069 {
1070     PyObject *raw, *codec_info = NULL;
1071     PyObject *res;
1072     int r;
1073 
1074     self->ok = 0;
1075     self->detached = 0;
1076 
1077     if (encoding == NULL) {
1078         PyInterpreterState *interp = _PyInterpreterState_GET();
1079         if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1080             if (PyErr_WarnEx(PyExc_EncodingWarning,
1081                              "'encoding' argument not specified", 1)) {
1082                 return -1;
1083             }
1084         }
1085     }
1086 
1087     if (errors == Py_None) {
1088         errors = &_Py_ID(strict);
1089     }
1090     else if (!PyUnicode_Check(errors)) {
1091         // Check 'errors' argument here because Argument Clinic doesn't support
1092         // 'str(accept={str, NoneType})' converter.
1093         PyErr_Format(
1094             PyExc_TypeError,
1095             "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1096             Py_TYPE(errors)->tp_name);
1097         return -1;
1098     }
1099     else if (io_check_errors(errors)) {
1100         return -1;
1101     }
1102 
1103     if (validate_newline(newline) < 0) {
1104         return -1;
1105     }
1106 
1107     Py_CLEAR(self->buffer);
1108     Py_CLEAR(self->encoding);
1109     Py_CLEAR(self->encoder);
1110     Py_CLEAR(self->decoder);
1111     Py_CLEAR(self->readnl);
1112     Py_CLEAR(self->decoded_chars);
1113     Py_CLEAR(self->pending_bytes);
1114     Py_CLEAR(self->snapshot);
1115     Py_CLEAR(self->errors);
1116     Py_CLEAR(self->raw);
1117     self->decoded_chars_used = 0;
1118     self->pending_bytes_count = 0;
1119     self->encodefunc = NULL;
1120     self->b2cratio = 0.0;
1121 
1122     if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1123         _Py_DECLARE_STR(utf_8, "utf-8");
1124         self->encoding = Py_NewRef(&_Py_STR(utf_8));
1125     }
1126     else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1127         self->encoding = _Py_GetLocaleEncodingObject();
1128         if (self->encoding == NULL) {
1129             goto error;
1130         }
1131         assert(PyUnicode_Check(self->encoding));
1132     }
1133 
1134     if (self->encoding != NULL) {
1135         encoding = PyUnicode_AsUTF8(self->encoding);
1136         if (encoding == NULL)
1137             goto error;
1138     }
1139     else if (encoding != NULL) {
1140         self->encoding = PyUnicode_FromString(encoding);
1141         if (self->encoding == NULL)
1142             goto error;
1143     }
1144     else {
1145         PyErr_SetString(PyExc_OSError,
1146                         "could not determine default encoding");
1147         goto error;
1148     }
1149 
1150     /* Check we have been asked for a real text encoding */
1151     codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1152     if (codec_info == NULL) {
1153         Py_CLEAR(self->encoding);
1154         goto error;
1155     }
1156 
1157     /* XXX: Failures beyond this point have the potential to leak elements
1158      * of the partially constructed object (like self->encoding)
1159      */
1160 
1161     Py_INCREF(errors);
1162     self->errors = errors;
1163     self->chunk_size = 8192;
1164     self->line_buffering = line_buffering;
1165     self->write_through = write_through;
1166     if (set_newline(self, newline) < 0) {
1167         goto error;
1168     }
1169 
1170     self->buffer = buffer;
1171     Py_INCREF(buffer);
1172 
1173     /* Build the decoder object */
1174     if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1175         goto error;
1176 
1177     /* Build the encoder object */
1178     if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1179         goto error;
1180 
1181     /* Finished sorting out the codec details */
1182     Py_CLEAR(codec_info);
1183 
1184     if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1185         Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1186         Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
1187     {
1188         if (_PyObject_LookupAttr(buffer, &_Py_ID(raw), &raw) < 0)
1189             goto error;
1190         /* Cache the raw FileIO object to speed up 'closed' checks */
1191         if (raw != NULL) {
1192             if (Py_IS_TYPE(raw, &PyFileIO_Type))
1193                 self->raw = raw;
1194             else
1195                 Py_DECREF(raw);
1196         }
1197     }
1198 
1199     res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1200     if (res == NULL)
1201         goto error;
1202     r = PyObject_IsTrue(res);
1203     Py_DECREF(res);
1204     if (r < 0)
1205         goto error;
1206     self->seekable = self->telling = r;
1207 
1208     r = _PyObject_LookupAttr(buffer, &_Py_ID(read1), &res);
1209     if (r < 0) {
1210         goto error;
1211     }
1212     Py_XDECREF(res);
1213     self->has_read1 = r;
1214 
1215     self->encoding_start_of_stream = 0;
1216     if (_textiowrapper_fix_encoder_state(self) < 0) {
1217         goto error;
1218     }
1219 
1220     self->ok = 1;
1221     return 0;
1222 
1223   error:
1224     Py_XDECREF(codec_info);
1225     return -1;
1226 }
1227 
1228 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1229  * -1 on error.
1230  */
1231 static int
convert_optional_bool(PyObject * obj,int default_value)1232 convert_optional_bool(PyObject *obj, int default_value)
1233 {
1234     long v;
1235     if (obj == Py_None) {
1236         v = default_value;
1237     }
1238     else {
1239         v = PyLong_AsLong(obj);
1240         if (v == -1 && PyErr_Occurred())
1241             return -1;
1242     }
1243     return v != 0;
1244 }
1245 
1246 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1247 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1248                               PyObject *errors, int newline_changed)
1249 {
1250     /* Use existing settings where new settings are not specified */
1251     if (encoding == Py_None && errors == Py_None && !newline_changed) {
1252         return 0;  // no change
1253     }
1254 
1255     if (encoding == Py_None) {
1256         encoding = self->encoding;
1257         if (errors == Py_None) {
1258             errors = self->errors;
1259         }
1260         Py_INCREF(encoding);
1261     }
1262     else {
1263         if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1264             encoding = _Py_GetLocaleEncodingObject();
1265             if (encoding == NULL) {
1266                 return -1;
1267             }
1268         } else {
1269             Py_INCREF(encoding);
1270         }
1271         if (errors == Py_None) {
1272             errors = &_Py_ID(strict);
1273         }
1274     }
1275 
1276     const char *c_errors = PyUnicode_AsUTF8(errors);
1277     if (c_errors == NULL) {
1278         Py_DECREF(encoding);
1279         return -1;
1280     }
1281 
1282     // Create new encoder & decoder
1283     PyObject *codec_info = _PyCodec_LookupTextEncoding(
1284         PyUnicode_AsUTF8(encoding), "codecs.open()");
1285     if (codec_info == NULL) {
1286         Py_DECREF(encoding);
1287         return -1;
1288     }
1289     if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1290             _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1291         Py_DECREF(codec_info);
1292         Py_DECREF(encoding);
1293         return -1;
1294     }
1295     Py_DECREF(codec_info);
1296 
1297     Py_INCREF(errors);
1298     Py_SETREF(self->encoding, encoding);
1299     Py_SETREF(self->errors, errors);
1300 
1301     return _textiowrapper_fix_encoder_state(self);
1302 }
1303 
1304 /*[clinic input]
1305 _io.TextIOWrapper.reconfigure
1306     *
1307     encoding: object = None
1308     errors: object = None
1309     newline as newline_obj: object(c_default="NULL") = None
1310     line_buffering as line_buffering_obj: object = None
1311     write_through as write_through_obj: object = None
1312 
1313 Reconfigure the text stream with new parameters.
1314 
1315 This also does an implicit stream flush.
1316 
1317 [clinic start generated code]*/
1318 
1319 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1320 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1321                                    PyObject *errors, PyObject *newline_obj,
1322                                    PyObject *line_buffering_obj,
1323                                    PyObject *write_through_obj)
1324 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1325 {
1326     int line_buffering;
1327     int write_through;
1328     const char *newline = NULL;
1329 
1330     /* Check if something is in the read buffer */
1331     if (self->decoded_chars != NULL) {
1332         if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1333             _unsupported("It is not possible to set the encoding or newline "
1334                          "of stream after the first read");
1335             return NULL;
1336         }
1337     }
1338 
1339     if (newline_obj != NULL && newline_obj != Py_None) {
1340         newline = PyUnicode_AsUTF8(newline_obj);
1341         if (newline == NULL || validate_newline(newline) < 0) {
1342             return NULL;
1343         }
1344     }
1345 
1346     line_buffering = convert_optional_bool(line_buffering_obj,
1347                                            self->line_buffering);
1348     write_through = convert_optional_bool(write_through_obj,
1349                                           self->write_through);
1350     if (line_buffering < 0 || write_through < 0) {
1351         return NULL;
1352     }
1353 
1354     PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
1355     if (res == NULL) {
1356         return NULL;
1357     }
1358     Py_DECREF(res);
1359     self->b2cratio = 0;
1360 
1361     if (newline_obj != NULL && set_newline(self, newline) < 0) {
1362         return NULL;
1363     }
1364 
1365     if (textiowrapper_change_encoding(
1366             self, encoding, errors, newline_obj != NULL) < 0) {
1367         return NULL;
1368     }
1369 
1370     self->line_buffering = line_buffering;
1371     self->write_through = write_through;
1372     Py_RETURN_NONE;
1373 }
1374 
1375 static int
textiowrapper_clear(textio * self)1376 textiowrapper_clear(textio *self)
1377 {
1378     self->ok = 0;
1379     Py_CLEAR(self->buffer);
1380     Py_CLEAR(self->encoding);
1381     Py_CLEAR(self->encoder);
1382     Py_CLEAR(self->decoder);
1383     Py_CLEAR(self->readnl);
1384     Py_CLEAR(self->decoded_chars);
1385     Py_CLEAR(self->pending_bytes);
1386     Py_CLEAR(self->snapshot);
1387     Py_CLEAR(self->errors);
1388     Py_CLEAR(self->raw);
1389 
1390     Py_CLEAR(self->dict);
1391     return 0;
1392 }
1393 
1394 static void
textiowrapper_dealloc(textio * self)1395 textiowrapper_dealloc(textio *self)
1396 {
1397     self->finalizing = 1;
1398     if (_PyIOBase_finalize((PyObject *) self) < 0)
1399         return;
1400     self->ok = 0;
1401     _PyObject_GC_UNTRACK(self);
1402     if (self->weakreflist != NULL)
1403         PyObject_ClearWeakRefs((PyObject *)self);
1404     textiowrapper_clear(self);
1405     Py_TYPE(self)->tp_free((PyObject *)self);
1406 }
1407 
1408 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1409 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1410 {
1411     Py_VISIT(self->buffer);
1412     Py_VISIT(self->encoding);
1413     Py_VISIT(self->encoder);
1414     Py_VISIT(self->decoder);
1415     Py_VISIT(self->readnl);
1416     Py_VISIT(self->decoded_chars);
1417     Py_VISIT(self->pending_bytes);
1418     Py_VISIT(self->snapshot);
1419     Py_VISIT(self->errors);
1420     Py_VISIT(self->raw);
1421 
1422     Py_VISIT(self->dict);
1423     return 0;
1424 }
1425 
1426 static PyObject *
1427 textiowrapper_closed_get(textio *self, void *context);
1428 
1429 /* This macro takes some shortcuts to make the common case faster. */
1430 #define CHECK_CLOSED(self) \
1431     do { \
1432         int r; \
1433         PyObject *_res; \
1434         if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
1435             if (self->raw != NULL) \
1436                 r = _PyFileIO_closed(self->raw); \
1437             else { \
1438                 _res = textiowrapper_closed_get(self, NULL); \
1439                 if (_res == NULL) \
1440                     return NULL; \
1441                 r = PyObject_IsTrue(_res); \
1442                 Py_DECREF(_res); \
1443                 if (r < 0) \
1444                     return NULL; \
1445             } \
1446             if (r > 0) { \
1447                 PyErr_SetString(PyExc_ValueError, \
1448                                 "I/O operation on closed file."); \
1449                 return NULL; \
1450             } \
1451         } \
1452         else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1453             return NULL; \
1454     } while (0)
1455 
1456 #define CHECK_INITIALIZED(self) \
1457     if (self->ok <= 0) { \
1458         PyErr_SetString(PyExc_ValueError, \
1459             "I/O operation on uninitialized object"); \
1460         return NULL; \
1461     }
1462 
1463 #define CHECK_ATTACHED(self) \
1464     CHECK_INITIALIZED(self); \
1465     if (self->detached) { \
1466         PyErr_SetString(PyExc_ValueError, \
1467              "underlying buffer has been detached"); \
1468         return NULL; \
1469     }
1470 
1471 #define CHECK_ATTACHED_INT(self) \
1472     if (self->ok <= 0) { \
1473         PyErr_SetString(PyExc_ValueError, \
1474             "I/O operation on uninitialized object"); \
1475         return -1; \
1476     } else if (self->detached) { \
1477         PyErr_SetString(PyExc_ValueError, \
1478              "underlying buffer has been detached"); \
1479         return -1; \
1480     }
1481 
1482 
1483 /*[clinic input]
1484 _io.TextIOWrapper.detach
1485 [clinic start generated code]*/
1486 
1487 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1488 _io_TextIOWrapper_detach_impl(textio *self)
1489 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1490 {
1491     PyObject *buffer, *res;
1492     CHECK_ATTACHED(self);
1493     res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
1494     if (res == NULL)
1495         return NULL;
1496     Py_DECREF(res);
1497     buffer = self->buffer;
1498     self->buffer = NULL;
1499     self->detached = 1;
1500     return buffer;
1501 }
1502 
1503 /* Flush the internal write buffer. This doesn't explicitly flush the
1504    underlying buffered object, though. */
1505 static int
_textiowrapper_writeflush(textio * self)1506 _textiowrapper_writeflush(textio *self)
1507 {
1508     if (self->pending_bytes == NULL)
1509         return 0;
1510 
1511     PyObject *pending = self->pending_bytes;
1512     PyObject *b;
1513 
1514     if (PyBytes_Check(pending)) {
1515         b = pending;
1516         Py_INCREF(b);
1517     }
1518     else if (PyUnicode_Check(pending)) {
1519         assert(PyUnicode_IS_ASCII(pending));
1520         assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1521         b = PyBytes_FromStringAndSize(
1522                 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1523         if (b == NULL) {
1524             return -1;
1525         }
1526     }
1527     else {
1528         assert(PyList_Check(pending));
1529         b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1530         if (b == NULL) {
1531             return -1;
1532         }
1533 
1534         char *buf = PyBytes_AsString(b);
1535         Py_ssize_t pos = 0;
1536 
1537         for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1538             PyObject *obj = PyList_GET_ITEM(pending, i);
1539             char *src;
1540             Py_ssize_t len;
1541             if (PyUnicode_Check(obj)) {
1542                 assert(PyUnicode_IS_ASCII(obj));
1543                 src = PyUnicode_DATA(obj);
1544                 len = PyUnicode_GET_LENGTH(obj);
1545             }
1546             else {
1547                 assert(PyBytes_Check(obj));
1548                 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1549                     Py_DECREF(b);
1550                     return -1;
1551                 }
1552             }
1553             memcpy(buf + pos, src, len);
1554             pos += len;
1555         }
1556         assert(pos == self->pending_bytes_count);
1557     }
1558 
1559     self->pending_bytes_count = 0;
1560     self->pending_bytes = NULL;
1561     Py_DECREF(pending);
1562 
1563     PyObject *ret;
1564     do {
1565         ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1566     } while (ret == NULL && _PyIO_trap_eintr());
1567     Py_DECREF(b);
1568     // NOTE: We cleared buffer but we don't know how many bytes are actually written
1569     // when an error occurred.
1570     if (ret == NULL)
1571         return -1;
1572     Py_DECREF(ret);
1573     return 0;
1574 }
1575 
1576 /*[clinic input]
1577 _io.TextIOWrapper.write
1578     text: unicode
1579     /
1580 [clinic start generated code]*/
1581 
1582 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1583 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1584 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1585 {
1586     PyObject *ret;
1587     PyObject *b;
1588     Py_ssize_t textlen;
1589     int haslf = 0;
1590     int needflush = 0, text_needflush = 0;
1591 
1592     if (PyUnicode_READY(text) == -1)
1593         return NULL;
1594 
1595     CHECK_ATTACHED(self);
1596     CHECK_CLOSED(self);
1597 
1598     if (self->encoder == NULL)
1599         return _unsupported("not writable");
1600 
1601     Py_INCREF(text);
1602 
1603     textlen = PyUnicode_GET_LENGTH(text);
1604 
1605     if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1606         if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1607             haslf = 1;
1608 
1609     if (haslf && self->writetranslate && self->writenl != NULL) {
1610         PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1611                                                  "ss", "\n", self->writenl);
1612         Py_DECREF(text);
1613         if (newtext == NULL)
1614             return NULL;
1615         text = newtext;
1616     }
1617 
1618     if (self->write_through)
1619         text_needflush = 1;
1620     if (self->line_buffering &&
1621         (haslf ||
1622          PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1623         needflush = 1;
1624 
1625     /* XXX What if we were just reading? */
1626     if (self->encodefunc != NULL) {
1627         if (PyUnicode_IS_ASCII(text) &&
1628                 // See bpo-43260
1629                 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1630                 is_asciicompat_encoding(self->encodefunc)) {
1631             b = text;
1632             Py_INCREF(b);
1633         }
1634         else {
1635             b = (*self->encodefunc)((PyObject *) self, text);
1636         }
1637         self->encoding_start_of_stream = 0;
1638     }
1639     else {
1640         b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1641     }
1642 
1643     Py_DECREF(text);
1644     if (b == NULL)
1645         return NULL;
1646     if (b != text && !PyBytes_Check(b)) {
1647         PyErr_Format(PyExc_TypeError,
1648                      "encoder should return a bytes object, not '%.200s'",
1649                      Py_TYPE(b)->tp_name);
1650         Py_DECREF(b);
1651         return NULL;
1652     }
1653 
1654     Py_ssize_t bytes_len;
1655     if (b == text) {
1656         bytes_len = PyUnicode_GET_LENGTH(b);
1657     }
1658     else {
1659         bytes_len = PyBytes_GET_SIZE(b);
1660     }
1661 
1662     if (self->pending_bytes == NULL) {
1663         self->pending_bytes_count = 0;
1664         self->pending_bytes = b;
1665     }
1666     else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1667         // Prevent to concatenate more than chunk_size data.
1668         if (_textiowrapper_writeflush(self) < 0) {
1669             Py_DECREF(b);
1670             return NULL;
1671         }
1672         self->pending_bytes = b;
1673     }
1674     else if (!PyList_CheckExact(self->pending_bytes)) {
1675         PyObject *list = PyList_New(2);
1676         if (list == NULL) {
1677             Py_DECREF(b);
1678             return NULL;
1679         }
1680         PyList_SET_ITEM(list, 0, self->pending_bytes);
1681         PyList_SET_ITEM(list, 1, b);
1682         self->pending_bytes = list;
1683     }
1684     else {
1685         if (PyList_Append(self->pending_bytes, b) < 0) {
1686             Py_DECREF(b);
1687             return NULL;
1688         }
1689         Py_DECREF(b);
1690     }
1691 
1692     self->pending_bytes_count += bytes_len;
1693     if (self->pending_bytes_count >= self->chunk_size || needflush ||
1694         text_needflush) {
1695         if (_textiowrapper_writeflush(self) < 0)
1696             return NULL;
1697     }
1698 
1699     if (needflush) {
1700         ret = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
1701         if (ret == NULL)
1702             return NULL;
1703         Py_DECREF(ret);
1704     }
1705 
1706     textiowrapper_set_decoded_chars(self, NULL);
1707     Py_CLEAR(self->snapshot);
1708 
1709     if (self->decoder) {
1710         ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1711         if (ret == NULL)
1712             return NULL;
1713         Py_DECREF(ret);
1714     }
1715 
1716     return PyLong_FromSsize_t(textlen);
1717 }
1718 
1719 /* Steal a reference to chars and store it in the decoded_char buffer;
1720  */
1721 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1722 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1723 {
1724     Py_XSETREF(self->decoded_chars, chars);
1725     self->decoded_chars_used = 0;
1726 }
1727 
1728 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1729 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1730 {
1731     PyObject *chars;
1732     Py_ssize_t avail;
1733 
1734     if (self->decoded_chars == NULL)
1735         return PyUnicode_FromStringAndSize(NULL, 0);
1736 
1737     /* decoded_chars is guaranteed to be "ready". */
1738     avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1739              - self->decoded_chars_used);
1740 
1741     assert(avail >= 0);
1742 
1743     if (n < 0 || n > avail)
1744         n = avail;
1745 
1746     if (self->decoded_chars_used > 0 || n < avail) {
1747         chars = PyUnicode_Substring(self->decoded_chars,
1748                                     self->decoded_chars_used,
1749                                     self->decoded_chars_used + n);
1750         if (chars == NULL)
1751             return NULL;
1752     }
1753     else {
1754         chars = self->decoded_chars;
1755         Py_INCREF(chars);
1756     }
1757 
1758     self->decoded_chars_used += n;
1759     return chars;
1760 }
1761 
1762 /* Read and decode the next chunk of data from the BufferedReader.
1763  */
1764 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1765 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1766 {
1767     PyObject *dec_buffer = NULL;
1768     PyObject *dec_flags = NULL;
1769     PyObject *input_chunk = NULL;
1770     Py_buffer input_chunk_buf;
1771     PyObject *decoded_chars, *chunk_size;
1772     Py_ssize_t nbytes, nchars;
1773     int eof;
1774 
1775     /* The return value is True unless EOF was reached.  The decoded string is
1776      * placed in self._decoded_chars (replacing its previous value).  The
1777      * entire input chunk is sent to the decoder, though some of it may remain
1778      * buffered in the decoder, yet to be converted.
1779      */
1780 
1781     if (self->decoder == NULL) {
1782         _unsupported("not readable");
1783         return -1;
1784     }
1785 
1786     if (self->telling) {
1787         /* To prepare for tell(), we need to snapshot a point in the file
1788          * where the decoder's input buffer is empty.
1789          */
1790         PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1791                                                      &_Py_ID(getstate));
1792         if (state == NULL)
1793             return -1;
1794         /* Given this, we know there was a valid snapshot point
1795          * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1796          */
1797         if (!PyTuple_Check(state)) {
1798             PyErr_SetString(PyExc_TypeError,
1799                             "illegal decoder state");
1800             Py_DECREF(state);
1801             return -1;
1802         }
1803         if (!PyArg_ParseTuple(state,
1804                               "OO;illegal decoder state", &dec_buffer, &dec_flags))
1805         {
1806             Py_DECREF(state);
1807             return -1;
1808         }
1809 
1810         if (!PyBytes_Check(dec_buffer)) {
1811             PyErr_Format(PyExc_TypeError,
1812                          "illegal decoder state: the first item should be a "
1813                          "bytes object, not '%.200s'",
1814                          Py_TYPE(dec_buffer)->tp_name);
1815             Py_DECREF(state);
1816             return -1;
1817         }
1818         Py_INCREF(dec_buffer);
1819         Py_INCREF(dec_flags);
1820         Py_DECREF(state);
1821     }
1822 
1823     /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1824     if (size_hint > 0) {
1825         size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1826     }
1827     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1828     if (chunk_size == NULL)
1829         goto fail;
1830 
1831     input_chunk = PyObject_CallMethodOneArg(self->buffer,
1832         (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1833         chunk_size);
1834     Py_DECREF(chunk_size);
1835     if (input_chunk == NULL)
1836         goto fail;
1837 
1838     if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1839         PyErr_Format(PyExc_TypeError,
1840                      "underlying %s() should have returned a bytes-like object, "
1841                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
1842                      Py_TYPE(input_chunk)->tp_name);
1843         goto fail;
1844     }
1845 
1846     nbytes = input_chunk_buf.len;
1847     eof = (nbytes == 0);
1848 
1849     decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1850     PyBuffer_Release(&input_chunk_buf);
1851     if (decoded_chars == NULL)
1852         goto fail;
1853 
1854     textiowrapper_set_decoded_chars(self, decoded_chars);
1855     nchars = PyUnicode_GET_LENGTH(decoded_chars);
1856     if (nchars > 0)
1857         self->b2cratio = (double) nbytes / nchars;
1858     else
1859         self->b2cratio = 0.0;
1860     if (nchars > 0)
1861         eof = 0;
1862 
1863     if (self->telling) {
1864         /* At the snapshot point, len(dec_buffer) bytes before the read, the
1865          * next input to be decoded is dec_buffer + input_chunk.
1866          */
1867         PyObject *next_input = dec_buffer;
1868         PyBytes_Concat(&next_input, input_chunk);
1869         dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1870         if (next_input == NULL) {
1871             goto fail;
1872         }
1873         PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1874         if (snapshot == NULL) {
1875             dec_flags = NULL;
1876             goto fail;
1877         }
1878         Py_XSETREF(self->snapshot, snapshot);
1879     }
1880     Py_DECREF(input_chunk);
1881 
1882     return (eof == 0);
1883 
1884   fail:
1885     Py_XDECREF(dec_buffer);
1886     Py_XDECREF(dec_flags);
1887     Py_XDECREF(input_chunk);
1888     return -1;
1889 }
1890 
1891 /*[clinic input]
1892 _io.TextIOWrapper.read
1893     size as n: Py_ssize_t(accept={int, NoneType}) = -1
1894     /
1895 [clinic start generated code]*/
1896 
1897 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1898 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1899 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1900 {
1901     PyObject *result = NULL, *chunks = NULL;
1902 
1903     CHECK_ATTACHED(self);
1904     CHECK_CLOSED(self);
1905 
1906     if (self->decoder == NULL)
1907         return _unsupported("not readable");
1908 
1909     if (_textiowrapper_writeflush(self) < 0)
1910         return NULL;
1911 
1912     if (n < 0) {
1913         /* Read everything */
1914         PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
1915         PyObject *decoded;
1916         if (bytes == NULL)
1917             goto fail;
1918 
1919         if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
1920             decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1921                                                           bytes, 1);
1922         else
1923             decoded = PyObject_CallMethodObjArgs(
1924                 self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
1925         Py_DECREF(bytes);
1926         if (check_decoded(decoded) < 0)
1927             goto fail;
1928 
1929         result = textiowrapper_get_decoded_chars(self, -1);
1930 
1931         if (result == NULL) {
1932             Py_DECREF(decoded);
1933             return NULL;
1934         }
1935 
1936         PyUnicode_AppendAndDel(&result, decoded);
1937         if (result == NULL)
1938             goto fail;
1939 
1940         textiowrapper_set_decoded_chars(self, NULL);
1941         Py_CLEAR(self->snapshot);
1942         return result;
1943     }
1944     else {
1945         int res = 1;
1946         Py_ssize_t remaining = n;
1947 
1948         result = textiowrapper_get_decoded_chars(self, n);
1949         if (result == NULL)
1950             goto fail;
1951         if (PyUnicode_READY(result) == -1)
1952             goto fail;
1953         remaining -= PyUnicode_GET_LENGTH(result);
1954 
1955         /* Keep reading chunks until we have n characters to return */
1956         while (remaining > 0) {
1957             res = textiowrapper_read_chunk(self, remaining);
1958             if (res < 0) {
1959                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1960                    when EINTR occurs so we needn't do it ourselves. */
1961                 if (_PyIO_trap_eintr()) {
1962                     continue;
1963                 }
1964                 goto fail;
1965             }
1966             if (res == 0)  /* EOF */
1967                 break;
1968             if (chunks == NULL) {
1969                 chunks = PyList_New(0);
1970                 if (chunks == NULL)
1971                     goto fail;
1972             }
1973             if (PyUnicode_GET_LENGTH(result) > 0 &&
1974                 PyList_Append(chunks, result) < 0)
1975                 goto fail;
1976             Py_DECREF(result);
1977             result = textiowrapper_get_decoded_chars(self, remaining);
1978             if (result == NULL)
1979                 goto fail;
1980             remaining -= PyUnicode_GET_LENGTH(result);
1981         }
1982         if (chunks != NULL) {
1983             if (result != NULL && PyList_Append(chunks, result) < 0)
1984                 goto fail;
1985             _Py_DECLARE_STR(empty, "");
1986             Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
1987             if (result == NULL)
1988                 goto fail;
1989             Py_CLEAR(chunks);
1990         }
1991         return result;
1992     }
1993   fail:
1994     Py_XDECREF(result);
1995     Py_XDECREF(chunks);
1996     return NULL;
1997 }
1998 
1999 
2000 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2001    that is to the NUL character. Otherwise the function will produce
2002    incorrect results. */
2003 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)2004 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2005 {
2006     if (kind == PyUnicode_1BYTE_KIND) {
2007         assert(ch < 256);
2008         return (char *) memchr((const void *) s, (char) ch, end - s);
2009     }
2010     for (;;) {
2011         while (PyUnicode_READ(kind, s, 0) > ch)
2012             s += kind;
2013         if (PyUnicode_READ(kind, s, 0) == ch)
2014             return s;
2015         if (s == end)
2016             return NULL;
2017         s += kind;
2018     }
2019 }
2020 
2021 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2022 _PyIO_find_line_ending(
2023     int translated, int universal, PyObject *readnl,
2024     int kind, const char *start, const char *end, Py_ssize_t *consumed)
2025 {
2026     Py_ssize_t len = (end - start)/kind;
2027 
2028     if (translated) {
2029         /* Newlines are already translated, only search for \n */
2030         const char *pos = find_control_char(kind, start, end, '\n');
2031         if (pos != NULL)
2032             return (pos - start)/kind + 1;
2033         else {
2034             *consumed = len;
2035             return -1;
2036         }
2037     }
2038     else if (universal) {
2039         /* Universal newline search. Find any of \r, \r\n, \n
2040          * The decoder ensures that \r\n are not split in two pieces
2041          */
2042         const char *s = start;
2043         for (;;) {
2044             Py_UCS4 ch;
2045             /* Fast path for non-control chars. The loop always ends
2046                since the Unicode string is NUL-terminated. */
2047             while (PyUnicode_READ(kind, s, 0) > '\r')
2048                 s += kind;
2049             if (s >= end) {
2050                 *consumed = len;
2051                 return -1;
2052             }
2053             ch = PyUnicode_READ(kind, s, 0);
2054             s += kind;
2055             if (ch == '\n')
2056                 return (s - start)/kind;
2057             if (ch == '\r') {
2058                 if (PyUnicode_READ(kind, s, 0) == '\n')
2059                     return (s - start)/kind + 1;
2060                 else
2061                     return (s - start)/kind;
2062             }
2063         }
2064     }
2065     else {
2066         /* Non-universal mode. */
2067         Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2068         const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2069         /* Assume that readnl is an ASCII character. */
2070         assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2071         if (readnl_len == 1) {
2072             const char *pos = find_control_char(kind, start, end, nl[0]);
2073             if (pos != NULL)
2074                 return (pos - start)/kind + 1;
2075             *consumed = len;
2076             return -1;
2077         }
2078         else {
2079             const char *s = start;
2080             const char *e = end - (readnl_len - 1)*kind;
2081             const char *pos;
2082             if (e < s)
2083                 e = s;
2084             while (s < e) {
2085                 Py_ssize_t i;
2086                 const char *pos = find_control_char(kind, s, end, nl[0]);
2087                 if (pos == NULL || pos >= e)
2088                     break;
2089                 for (i = 1; i < readnl_len; i++) {
2090                     if (PyUnicode_READ(kind, pos, i) != nl[i])
2091                         break;
2092                 }
2093                 if (i == readnl_len)
2094                     return (pos - start)/kind + readnl_len;
2095                 s = pos + kind;
2096             }
2097             pos = find_control_char(kind, e, end, nl[0]);
2098             if (pos == NULL)
2099                 *consumed = len;
2100             else
2101                 *consumed = (pos - start)/kind;
2102             return -1;
2103         }
2104     }
2105 }
2106 
2107 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2108 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2109 {
2110     PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2111     Py_ssize_t start, endpos, chunked, offset_to_buffer;
2112     int res;
2113 
2114     CHECK_CLOSED(self);
2115 
2116     if (_textiowrapper_writeflush(self) < 0)
2117         return NULL;
2118 
2119     chunked = 0;
2120 
2121     while (1) {
2122         const char *ptr;
2123         Py_ssize_t line_len;
2124         int kind;
2125         Py_ssize_t consumed = 0;
2126 
2127         /* First, get some data if necessary */
2128         res = 1;
2129         while (!self->decoded_chars ||
2130                !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2131             res = textiowrapper_read_chunk(self, 0);
2132             if (res < 0) {
2133                 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2134                    when EINTR occurs so we needn't do it ourselves. */
2135                 if (_PyIO_trap_eintr()) {
2136                     continue;
2137                 }
2138                 goto error;
2139             }
2140             if (res == 0)
2141                 break;
2142         }
2143         if (res == 0) {
2144             /* end of file */
2145             textiowrapper_set_decoded_chars(self, NULL);
2146             Py_CLEAR(self->snapshot);
2147             start = endpos = offset_to_buffer = 0;
2148             break;
2149         }
2150 
2151         if (remaining == NULL) {
2152             line = self->decoded_chars;
2153             start = self->decoded_chars_used;
2154             offset_to_buffer = 0;
2155             Py_INCREF(line);
2156         }
2157         else {
2158             assert(self->decoded_chars_used == 0);
2159             line = PyUnicode_Concat(remaining, self->decoded_chars);
2160             start = 0;
2161             offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2162             Py_CLEAR(remaining);
2163             if (line == NULL)
2164                 goto error;
2165             if (PyUnicode_READY(line) == -1)
2166                 goto error;
2167         }
2168 
2169         ptr = PyUnicode_DATA(line);
2170         line_len = PyUnicode_GET_LENGTH(line);
2171         kind = PyUnicode_KIND(line);
2172 
2173         endpos = _PyIO_find_line_ending(
2174             self->readtranslate, self->readuniversal, self->readnl,
2175             kind,
2176             ptr + kind * start,
2177             ptr + kind * line_len,
2178             &consumed);
2179         if (endpos >= 0) {
2180             endpos += start;
2181             if (limit >= 0 && (endpos - start) + chunked >= limit)
2182                 endpos = start + limit - chunked;
2183             break;
2184         }
2185 
2186         /* We can put aside up to `endpos` */
2187         endpos = consumed + start;
2188         if (limit >= 0 && (endpos - start) + chunked >= limit) {
2189             /* Didn't find line ending, but reached length limit */
2190             endpos = start + limit - chunked;
2191             break;
2192         }
2193 
2194         if (endpos > start) {
2195             /* No line ending seen yet - put aside current data */
2196             PyObject *s;
2197             if (chunks == NULL) {
2198                 chunks = PyList_New(0);
2199                 if (chunks == NULL)
2200                     goto error;
2201             }
2202             s = PyUnicode_Substring(line, start, endpos);
2203             if (s == NULL)
2204                 goto error;
2205             if (PyList_Append(chunks, s) < 0) {
2206                 Py_DECREF(s);
2207                 goto error;
2208             }
2209             chunked += PyUnicode_GET_LENGTH(s);
2210             Py_DECREF(s);
2211         }
2212         /* There may be some remaining bytes we'll have to prepend to the
2213            next chunk of data */
2214         if (endpos < line_len) {
2215             remaining = PyUnicode_Substring(line, endpos, line_len);
2216             if (remaining == NULL)
2217                 goto error;
2218         }
2219         Py_CLEAR(line);
2220         /* We have consumed the buffer */
2221         textiowrapper_set_decoded_chars(self, NULL);
2222     }
2223 
2224     if (line != NULL) {
2225         /* Our line ends in the current buffer */
2226         self->decoded_chars_used = endpos - offset_to_buffer;
2227         if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2228             PyObject *s = PyUnicode_Substring(line, start, endpos);
2229             Py_CLEAR(line);
2230             if (s == NULL)
2231                 goto error;
2232             line = s;
2233         }
2234     }
2235     if (remaining != NULL) {
2236         if (chunks == NULL) {
2237             chunks = PyList_New(0);
2238             if (chunks == NULL)
2239                 goto error;
2240         }
2241         if (PyList_Append(chunks, remaining) < 0)
2242             goto error;
2243         Py_CLEAR(remaining);
2244     }
2245     if (chunks != NULL) {
2246         if (line != NULL) {
2247             if (PyList_Append(chunks, line) < 0)
2248                 goto error;
2249             Py_DECREF(line);
2250         }
2251         line = PyUnicode_Join(&_Py_STR(empty), chunks);
2252         if (line == NULL)
2253             goto error;
2254         Py_CLEAR(chunks);
2255     }
2256     if (line == NULL) {
2257         line = Py_NewRef(&_Py_STR(empty));
2258     }
2259 
2260     return line;
2261 
2262   error:
2263     Py_XDECREF(chunks);
2264     Py_XDECREF(remaining);
2265     Py_XDECREF(line);
2266     return NULL;
2267 }
2268 
2269 /*[clinic input]
2270 _io.TextIOWrapper.readline
2271     size: Py_ssize_t = -1
2272     /
2273 [clinic start generated code]*/
2274 
2275 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2276 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2277 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2278 {
2279     CHECK_ATTACHED(self);
2280     return _textiowrapper_readline(self, size);
2281 }
2282 
2283 /* Seek and Tell */
2284 
2285 typedef struct {
2286     Py_off_t start_pos;
2287     int dec_flags;
2288     int bytes_to_feed;
2289     int chars_to_skip;
2290     char need_eof;
2291 } cookie_type;
2292 
2293 /*
2294    To speed up cookie packing/unpacking, we store the fields in a temporary
2295    string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2296    The following macros define at which offsets in the intermediary byte
2297    string the various CookieStruct fields will be stored.
2298  */
2299 
2300 #define COOKIE_BUF_LEN      (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2301 
2302 #if PY_BIG_ENDIAN
2303 /* We want the least significant byte of start_pos to also be the least
2304    significant byte of the cookie, which means that in big-endian mode we
2305    must copy the fields in reverse order. */
2306 
2307 # define OFF_START_POS      (sizeof(char) + 3 * sizeof(int))
2308 # define OFF_DEC_FLAGS      (sizeof(char) + 2 * sizeof(int))
2309 # define OFF_BYTES_TO_FEED  (sizeof(char) + sizeof(int))
2310 # define OFF_CHARS_TO_SKIP  (sizeof(char))
2311 # define OFF_NEED_EOF       0
2312 
2313 #else
2314 /* Little-endian mode: the least significant byte of start_pos will
2315    naturally end up the least significant byte of the cookie. */
2316 
2317 # define OFF_START_POS      0
2318 # define OFF_DEC_FLAGS      (sizeof(Py_off_t))
2319 # define OFF_BYTES_TO_FEED  (sizeof(Py_off_t) + sizeof(int))
2320 # define OFF_CHARS_TO_SKIP  (sizeof(Py_off_t) + 2 * sizeof(int))
2321 # define OFF_NEED_EOF       (sizeof(Py_off_t) + 3 * sizeof(int))
2322 
2323 #endif
2324 
2325 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2326 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2327 {
2328     unsigned char buffer[COOKIE_BUF_LEN];
2329     PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2330     if (cookieLong == NULL)
2331         return -1;
2332 
2333     if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2334                             PY_LITTLE_ENDIAN, 0) < 0) {
2335         Py_DECREF(cookieLong);
2336         return -1;
2337     }
2338     Py_DECREF(cookieLong);
2339 
2340     memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2341     memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2342     memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2343     memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2344     memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2345 
2346     return 0;
2347 }
2348 
2349 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2350 textiowrapper_build_cookie(cookie_type *cookie)
2351 {
2352     unsigned char buffer[COOKIE_BUF_LEN];
2353 
2354     memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2355     memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2356     memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2357     memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2358     memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2359 
2360     return _PyLong_FromByteArray(buffer, sizeof(buffer),
2361                                  PY_LITTLE_ENDIAN, 0);
2362 }
2363 
2364 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2365 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2366 {
2367     PyObject *res;
2368     /* When seeking to the start of the stream, we call decoder.reset()
2369        rather than decoder.getstate().
2370        This is for a few decoders such as utf-16 for which the state value
2371        at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2372        utf-16, that we are expecting a BOM).
2373     */
2374     if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2375         res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2376     }
2377     else {
2378         res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2379                                    "((yi))", "", cookie->dec_flags);
2380     }
2381     if (res == NULL) {
2382         return -1;
2383     }
2384     Py_DECREF(res);
2385     return 0;
2386 }
2387 
2388 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2389 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2390 {
2391     PyObject *res;
2392     if (start_of_stream) {
2393         res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2394         self->encoding_start_of_stream = 1;
2395     }
2396     else {
2397         res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2398                                         _PyLong_GetZero());
2399         self->encoding_start_of_stream = 0;
2400     }
2401     if (res == NULL)
2402         return -1;
2403     Py_DECREF(res);
2404     return 0;
2405 }
2406 
2407 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2408 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2409 {
2410     /* Same as _textiowrapper_decoder_setstate() above. */
2411     return _textiowrapper_encoder_reset(
2412         self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2413 }
2414 
2415 /*[clinic input]
2416 _io.TextIOWrapper.seek
2417     cookie as cookieObj: object
2418     whence: int = 0
2419     /
2420 [clinic start generated code]*/
2421 
2422 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2423 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2424 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2425 {
2426     PyObject *posobj;
2427     cookie_type cookie;
2428     PyObject *res;
2429     int cmp;
2430     PyObject *snapshot;
2431 
2432     CHECK_ATTACHED(self);
2433     CHECK_CLOSED(self);
2434 
2435     Py_INCREF(cookieObj);
2436 
2437     if (!self->seekable) {
2438         _unsupported("underlying stream is not seekable");
2439         goto fail;
2440     }
2441 
2442     PyObject *zero = _PyLong_GetZero();  // borrowed reference
2443 
2444     switch (whence) {
2445     case SEEK_CUR:
2446         /* seek relative to current position */
2447         cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2448         if (cmp < 0)
2449             goto fail;
2450 
2451         if (cmp == 0) {
2452             _unsupported("can't do nonzero cur-relative seeks");
2453             goto fail;
2454         }
2455 
2456         /* Seeking to the current position should attempt to
2457          * sync the underlying buffer with the current position.
2458          */
2459         Py_DECREF(cookieObj);
2460         cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2461         if (cookieObj == NULL)
2462             goto fail;
2463         break;
2464 
2465     case SEEK_END:
2466         /* seek relative to end of file */
2467         cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2468         if (cmp < 0)
2469             goto fail;
2470 
2471         if (cmp == 0) {
2472             _unsupported("can't do nonzero end-relative seeks");
2473             goto fail;
2474         }
2475 
2476         res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2477         if (res == NULL)
2478             goto fail;
2479         Py_DECREF(res);
2480 
2481         textiowrapper_set_decoded_chars(self, NULL);
2482         Py_CLEAR(self->snapshot);
2483         if (self->decoder) {
2484             res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2485             if (res == NULL)
2486                 goto fail;
2487             Py_DECREF(res);
2488         }
2489 
2490         res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2491         Py_CLEAR(cookieObj);
2492         if (res == NULL)
2493             goto fail;
2494         if (self->encoder) {
2495             /* If seek() == 0, we are at the start of stream, otherwise not */
2496             cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2497             if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2498                 Py_DECREF(res);
2499                 goto fail;
2500             }
2501         }
2502         return res;
2503 
2504     case SEEK_SET:
2505         break;
2506 
2507     default:
2508         PyErr_Format(PyExc_ValueError,
2509                      "invalid whence (%d, should be %d, %d or %d)", whence,
2510                      SEEK_SET, SEEK_CUR, SEEK_END);
2511         goto fail;
2512     }
2513 
2514     cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2515     if (cmp < 0)
2516         goto fail;
2517 
2518     if (cmp == 1) {
2519         PyErr_Format(PyExc_ValueError,
2520                      "negative seek position %R", cookieObj);
2521         goto fail;
2522     }
2523 
2524     res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2525     if (res == NULL)
2526         goto fail;
2527     Py_DECREF(res);
2528 
2529     /* The strategy of seek() is to go back to the safe start point
2530      * and replay the effect of read(chars_to_skip) from there.
2531      */
2532     if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2533         goto fail;
2534 
2535     /* Seek back to the safe start point. */
2536     posobj = PyLong_FromOff_t(cookie.start_pos);
2537     if (posobj == NULL)
2538         goto fail;
2539     res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2540     Py_DECREF(posobj);
2541     if (res == NULL)
2542         goto fail;
2543     Py_DECREF(res);
2544 
2545     textiowrapper_set_decoded_chars(self, NULL);
2546     Py_CLEAR(self->snapshot);
2547 
2548     /* Restore the decoder to its state from the safe start point. */
2549     if (self->decoder) {
2550         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2551             goto fail;
2552     }
2553 
2554     if (cookie.chars_to_skip) {
2555         /* Just like _read_chunk, feed the decoder and save a snapshot. */
2556         PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2557                                                      "i", cookie.bytes_to_feed);
2558         PyObject *decoded;
2559 
2560         if (input_chunk == NULL)
2561             goto fail;
2562 
2563         if (!PyBytes_Check(input_chunk)) {
2564             PyErr_Format(PyExc_TypeError,
2565                          "underlying read() should have returned a bytes "
2566                          "object, not '%.200s'",
2567                          Py_TYPE(input_chunk)->tp_name);
2568             Py_DECREF(input_chunk);
2569             goto fail;
2570         }
2571 
2572         snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2573         if (snapshot == NULL) {
2574             goto fail;
2575         }
2576         Py_XSETREF(self->snapshot, snapshot);
2577 
2578         decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2579             input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2580 
2581         if (check_decoded(decoded) < 0)
2582             goto fail;
2583 
2584         textiowrapper_set_decoded_chars(self, decoded);
2585 
2586         /* Skip chars_to_skip of the decoded characters. */
2587         if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2588             PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2589             goto fail;
2590         }
2591         self->decoded_chars_used = cookie.chars_to_skip;
2592     }
2593     else {
2594         snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2595         if (snapshot == NULL)
2596             goto fail;
2597         Py_XSETREF(self->snapshot, snapshot);
2598     }
2599 
2600     /* Finally, reset the encoder (merely useful for proper BOM handling) */
2601     if (self->encoder) {
2602         if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2603             goto fail;
2604     }
2605     return cookieObj;
2606   fail:
2607     Py_XDECREF(cookieObj);
2608     return NULL;
2609 
2610 }
2611 
2612 /*[clinic input]
2613 _io.TextIOWrapper.tell
2614 [clinic start generated code]*/
2615 
2616 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2617 _io_TextIOWrapper_tell_impl(textio *self)
2618 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2619 {
2620     PyObject *res;
2621     PyObject *posobj = NULL;
2622     cookie_type cookie = {0,0,0,0,0};
2623     PyObject *next_input;
2624     Py_ssize_t chars_to_skip, chars_decoded;
2625     Py_ssize_t skip_bytes, skip_back;
2626     PyObject *saved_state = NULL;
2627     const char *input, *input_end;
2628     Py_ssize_t dec_buffer_len;
2629     int dec_flags;
2630 
2631     CHECK_ATTACHED(self);
2632     CHECK_CLOSED(self);
2633 
2634     if (!self->seekable) {
2635         _unsupported("underlying stream is not seekable");
2636         goto fail;
2637     }
2638     if (!self->telling) {
2639         PyErr_SetString(PyExc_OSError,
2640                         "telling position disabled by next() call");
2641         goto fail;
2642     }
2643 
2644     if (_textiowrapper_writeflush(self) < 0)
2645         return NULL;
2646     res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2647     if (res == NULL)
2648         goto fail;
2649     Py_DECREF(res);
2650 
2651     posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2652     if (posobj == NULL)
2653         goto fail;
2654 
2655     if (self->decoder == NULL || self->snapshot == NULL) {
2656         assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2657         return posobj;
2658     }
2659 
2660 #if defined(HAVE_LARGEFILE_SUPPORT)
2661     cookie.start_pos = PyLong_AsLongLong(posobj);
2662 #else
2663     cookie.start_pos = PyLong_AsLong(posobj);
2664 #endif
2665     Py_DECREF(posobj);
2666     if (PyErr_Occurred())
2667         goto fail;
2668 
2669     /* Skip backward to the snapshot point (see _read_chunk). */
2670     assert(PyTuple_Check(self->snapshot));
2671     if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2672         goto fail;
2673 
2674     assert (PyBytes_Check(next_input));
2675 
2676     cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2677 
2678     /* How many decoded characters have been used up since the snapshot? */
2679     if (self->decoded_chars_used == 0)  {
2680         /* We haven't moved from the snapshot point. */
2681         return textiowrapper_build_cookie(&cookie);
2682     }
2683 
2684     chars_to_skip = self->decoded_chars_used;
2685 
2686     /* Decoder state will be restored at the end */
2687     saved_state = PyObject_CallMethodNoArgs(self->decoder,
2688                                              &_Py_ID(getstate));
2689     if (saved_state == NULL)
2690         goto fail;
2691 
2692 #define DECODER_GETSTATE() do { \
2693         PyObject *dec_buffer; \
2694         PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2695             &_Py_ID(getstate)); \
2696         if (_state == NULL) \
2697             goto fail; \
2698         if (!PyTuple_Check(_state)) { \
2699             PyErr_SetString(PyExc_TypeError, \
2700                             "illegal decoder state"); \
2701             Py_DECREF(_state); \
2702             goto fail; \
2703         } \
2704         if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2705                               &dec_buffer, &dec_flags)) \
2706         { \
2707             Py_DECREF(_state); \
2708             goto fail; \
2709         } \
2710         if (!PyBytes_Check(dec_buffer)) { \
2711             PyErr_Format(PyExc_TypeError, \
2712                          "illegal decoder state: the first item should be a " \
2713                          "bytes object, not '%.200s'", \
2714                          Py_TYPE(dec_buffer)->tp_name); \
2715             Py_DECREF(_state); \
2716             goto fail; \
2717         } \
2718         dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2719         Py_DECREF(_state); \
2720     } while (0)
2721 
2722 #define DECODER_DECODE(start, len, res) do { \
2723         PyObject *_decoded = _PyObject_CallMethod( \
2724             self->decoder, &_Py_ID(decode), "y#", start, len); \
2725         if (check_decoded(_decoded) < 0) \
2726             goto fail; \
2727         res = PyUnicode_GET_LENGTH(_decoded); \
2728         Py_DECREF(_decoded); \
2729     } while (0)
2730 
2731     /* Fast search for an acceptable start point, close to our
2732        current pos */
2733     skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2734     skip_back = 1;
2735     assert(skip_back <= PyBytes_GET_SIZE(next_input));
2736     input = PyBytes_AS_STRING(next_input);
2737     while (skip_bytes > 0) {
2738         /* Decode up to temptative start point */
2739         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2740             goto fail;
2741         DECODER_DECODE(input, skip_bytes, chars_decoded);
2742         if (chars_decoded <= chars_to_skip) {
2743             DECODER_GETSTATE();
2744             if (dec_buffer_len == 0) {
2745                 /* Before pos and no bytes buffered in decoder => OK */
2746                 cookie.dec_flags = dec_flags;
2747                 chars_to_skip -= chars_decoded;
2748                 break;
2749             }
2750             /* Skip back by buffered amount and reset heuristic */
2751             skip_bytes -= dec_buffer_len;
2752             skip_back = 1;
2753         }
2754         else {
2755             /* We're too far ahead, skip back a bit */
2756             skip_bytes -= skip_back;
2757             skip_back *= 2;
2758         }
2759     }
2760     if (skip_bytes <= 0) {
2761         skip_bytes = 0;
2762         if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2763             goto fail;
2764     }
2765 
2766     /* Note our initial start point. */
2767     cookie.start_pos += skip_bytes;
2768     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2769     if (chars_to_skip == 0)
2770         goto finally;
2771 
2772     /* We should be close to the desired position.  Now feed the decoder one
2773      * byte at a time until we reach the `chars_to_skip` target.
2774      * As we go, note the nearest "safe start point" before the current
2775      * location (a point where the decoder has nothing buffered, so seek()
2776      * can safely start from there and advance to this location).
2777      */
2778     chars_decoded = 0;
2779     input = PyBytes_AS_STRING(next_input);
2780     input_end = input + PyBytes_GET_SIZE(next_input);
2781     input += skip_bytes;
2782     while (input < input_end) {
2783         Py_ssize_t n;
2784 
2785         DECODER_DECODE(input, (Py_ssize_t)1, n);
2786         /* We got n chars for 1 byte */
2787         chars_decoded += n;
2788         cookie.bytes_to_feed += 1;
2789         DECODER_GETSTATE();
2790 
2791         if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2792             /* Decoder buffer is empty, so this is a safe start point. */
2793             cookie.start_pos += cookie.bytes_to_feed;
2794             chars_to_skip -= chars_decoded;
2795             cookie.dec_flags = dec_flags;
2796             cookie.bytes_to_feed = 0;
2797             chars_decoded = 0;
2798         }
2799         if (chars_decoded >= chars_to_skip)
2800             break;
2801         input++;
2802     }
2803     if (input == input_end) {
2804         /* We didn't get enough decoded data; signal EOF to get more. */
2805         PyObject *decoded = _PyObject_CallMethod(
2806             self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2807         if (check_decoded(decoded) < 0)
2808             goto fail;
2809         chars_decoded += PyUnicode_GET_LENGTH(decoded);
2810         Py_DECREF(decoded);
2811         cookie.need_eof = 1;
2812 
2813         if (chars_decoded < chars_to_skip) {
2814             PyErr_SetString(PyExc_OSError,
2815                             "can't reconstruct logical file position");
2816             goto fail;
2817         }
2818     }
2819 
2820 finally:
2821     res = PyObject_CallMethodOneArg(
2822             self->decoder, &_Py_ID(setstate), saved_state);
2823     Py_DECREF(saved_state);
2824     if (res == NULL)
2825         return NULL;
2826     Py_DECREF(res);
2827 
2828     /* The returned cookie corresponds to the last safe start point. */
2829     cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2830     return textiowrapper_build_cookie(&cookie);
2831 
2832 fail:
2833     if (saved_state) {
2834         PyObject *type, *value, *traceback;
2835         PyErr_Fetch(&type, &value, &traceback);
2836         res = PyObject_CallMethodOneArg(
2837                 self->decoder, &_Py_ID(setstate), saved_state);
2838         _PyErr_ChainExceptions(type, value, traceback);
2839         Py_DECREF(saved_state);
2840         Py_XDECREF(res);
2841     }
2842     return NULL;
2843 }
2844 
2845 /*[clinic input]
2846 _io.TextIOWrapper.truncate
2847     pos: object = None
2848     /
2849 [clinic start generated code]*/
2850 
2851 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2852 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2853 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2854 {
2855     PyObject *res;
2856 
2857     CHECK_ATTACHED(self)
2858 
2859     res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2860     if (res == NULL)
2861         return NULL;
2862     Py_DECREF(res);
2863 
2864     return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2865 }
2866 
2867 static PyObject *
textiowrapper_repr(textio * self)2868 textiowrapper_repr(textio *self)
2869 {
2870     PyObject *nameobj, *modeobj, *res, *s;
2871     int status;
2872 
2873     CHECK_INITIALIZED(self);
2874 
2875     res = PyUnicode_FromString("<_io.TextIOWrapper");
2876     if (res == NULL)
2877         return NULL;
2878 
2879     status = Py_ReprEnter((PyObject *)self);
2880     if (status != 0) {
2881         if (status > 0) {
2882             PyErr_Format(PyExc_RuntimeError,
2883                          "reentrant call inside %s.__repr__",
2884                          Py_TYPE(self)->tp_name);
2885         }
2886         goto error;
2887     }
2888     if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) {
2889         if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2890             goto error;
2891         }
2892         /* Ignore ValueError raised if the underlying stream was detached */
2893         PyErr_Clear();
2894     }
2895     if (nameobj != NULL) {
2896         s = PyUnicode_FromFormat(" name=%R", nameobj);
2897         Py_DECREF(nameobj);
2898         if (s == NULL)
2899             goto error;
2900         PyUnicode_AppendAndDel(&res, s);
2901         if (res == NULL)
2902             goto error;
2903     }
2904     if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) {
2905         goto error;
2906     }
2907     if (modeobj != NULL) {
2908         s = PyUnicode_FromFormat(" mode=%R", modeobj);
2909         Py_DECREF(modeobj);
2910         if (s == NULL)
2911             goto error;
2912         PyUnicode_AppendAndDel(&res, s);
2913         if (res == NULL)
2914             goto error;
2915     }
2916     s = PyUnicode_FromFormat("%U encoding=%R>",
2917                              res, self->encoding);
2918     Py_DECREF(res);
2919     if (status == 0) {
2920         Py_ReprLeave((PyObject *)self);
2921     }
2922     return s;
2923 
2924   error:
2925     Py_XDECREF(res);
2926     if (status == 0) {
2927         Py_ReprLeave((PyObject *)self);
2928     }
2929     return NULL;
2930 }
2931 
2932 
2933 /* Inquiries */
2934 
2935 /*[clinic input]
2936 _io.TextIOWrapper.fileno
2937 [clinic start generated code]*/
2938 
2939 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2940 _io_TextIOWrapper_fileno_impl(textio *self)
2941 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2942 {
2943     CHECK_ATTACHED(self);
2944     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
2945 }
2946 
2947 /*[clinic input]
2948 _io.TextIOWrapper.seekable
2949 [clinic start generated code]*/
2950 
2951 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2952 _io_TextIOWrapper_seekable_impl(textio *self)
2953 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2954 {
2955     CHECK_ATTACHED(self);
2956     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
2957 }
2958 
2959 /*[clinic input]
2960 _io.TextIOWrapper.readable
2961 [clinic start generated code]*/
2962 
2963 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2964 _io_TextIOWrapper_readable_impl(textio *self)
2965 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2966 {
2967     CHECK_ATTACHED(self);
2968     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
2969 }
2970 
2971 /*[clinic input]
2972 _io.TextIOWrapper.writable
2973 [clinic start generated code]*/
2974 
2975 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2976 _io_TextIOWrapper_writable_impl(textio *self)
2977 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2978 {
2979     CHECK_ATTACHED(self);
2980     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
2981 }
2982 
2983 /*[clinic input]
2984 _io.TextIOWrapper.isatty
2985 [clinic start generated code]*/
2986 
2987 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)2988 _io_TextIOWrapper_isatty_impl(textio *self)
2989 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2990 {
2991     CHECK_ATTACHED(self);
2992     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
2993 }
2994 
2995 /*[clinic input]
2996 _io.TextIOWrapper.flush
2997 [clinic start generated code]*/
2998 
2999 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)3000 _io_TextIOWrapper_flush_impl(textio *self)
3001 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
3002 {
3003     CHECK_ATTACHED(self);
3004     CHECK_CLOSED(self);
3005     self->telling = self->seekable;
3006     if (_textiowrapper_writeflush(self) < 0)
3007         return NULL;
3008     return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3009 }
3010 
3011 /*[clinic input]
3012 _io.TextIOWrapper.close
3013 [clinic start generated code]*/
3014 
3015 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3016 _io_TextIOWrapper_close_impl(textio *self)
3017 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3018 {
3019     PyObject *res;
3020     int r;
3021     CHECK_ATTACHED(self);
3022 
3023     res = textiowrapper_closed_get(self, NULL);
3024     if (res == NULL)
3025         return NULL;
3026     r = PyObject_IsTrue(res);
3027     Py_DECREF(res);
3028     if (r < 0)
3029         return NULL;
3030 
3031     if (r > 0) {
3032         Py_RETURN_NONE; /* stream already closed */
3033     }
3034     else {
3035         PyObject *exc = NULL, *val, *tb;
3036         if (self->finalizing) {
3037             res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3038                                             (PyObject *)self);
3039             if (res)
3040                 Py_DECREF(res);
3041             else
3042                 PyErr_Clear();
3043         }
3044         res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
3045         if (res == NULL)
3046             PyErr_Fetch(&exc, &val, &tb);
3047         else
3048             Py_DECREF(res);
3049 
3050         res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3051         if (exc != NULL) {
3052             _PyErr_ChainExceptions(exc, val, tb);
3053             Py_CLEAR(res);
3054         }
3055         return res;
3056     }
3057 }
3058 
3059 static PyObject *
textiowrapper_iternext(textio * self)3060 textiowrapper_iternext(textio *self)
3061 {
3062     PyObject *line;
3063 
3064     CHECK_ATTACHED(self);
3065 
3066     self->telling = 0;
3067     if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
3068         /* Skip method call overhead for speed */
3069         line = _textiowrapper_readline(self, -1);
3070     }
3071     else {
3072         line = PyObject_CallMethodNoArgs((PyObject *)self,
3073                                           &_Py_ID(readline));
3074         if (line && !PyUnicode_Check(line)) {
3075             PyErr_Format(PyExc_OSError,
3076                          "readline() should have returned a str object, "
3077                          "not '%.200s'", Py_TYPE(line)->tp_name);
3078             Py_DECREF(line);
3079             return NULL;
3080         }
3081     }
3082 
3083     if (line == NULL || PyUnicode_READY(line) == -1)
3084         return NULL;
3085 
3086     if (PyUnicode_GET_LENGTH(line) == 0) {
3087         /* Reached EOF or would have blocked */
3088         Py_DECREF(line);
3089         Py_CLEAR(self->snapshot);
3090         self->telling = self->seekable;
3091         return NULL;
3092     }
3093 
3094     return line;
3095 }
3096 
3097 static PyObject *
textiowrapper_name_get(textio * self,void * context)3098 textiowrapper_name_get(textio *self, void *context)
3099 {
3100     CHECK_ATTACHED(self);
3101     return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3102 }
3103 
3104 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3105 textiowrapper_closed_get(textio *self, void *context)
3106 {
3107     CHECK_ATTACHED(self);
3108     return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3109 }
3110 
3111 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3112 textiowrapper_newlines_get(textio *self, void *context)
3113 {
3114     PyObject *res;
3115     CHECK_ATTACHED(self);
3116     if (self->decoder == NULL ||
3117         _PyObject_LookupAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3118     {
3119         Py_RETURN_NONE;
3120     }
3121     return res;
3122 }
3123 
3124 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3125 textiowrapper_errors_get(textio *self, void *context)
3126 {
3127     CHECK_INITIALIZED(self);
3128     Py_INCREF(self->errors);
3129     return self->errors;
3130 }
3131 
3132 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3133 textiowrapper_chunk_size_get(textio *self, void *context)
3134 {
3135     CHECK_ATTACHED(self);
3136     return PyLong_FromSsize_t(self->chunk_size);
3137 }
3138 
3139 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3140 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3141 {
3142     Py_ssize_t n;
3143     CHECK_ATTACHED_INT(self);
3144     if (arg == NULL) {
3145         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3146         return -1;
3147     }
3148     n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3149     if (n == -1 && PyErr_Occurred())
3150         return -1;
3151     if (n <= 0) {
3152         PyErr_SetString(PyExc_ValueError,
3153                         "a strictly positive integer is required");
3154         return -1;
3155     }
3156     self->chunk_size = n;
3157     return 0;
3158 }
3159 
3160 #include "clinic/textio.c.h"
3161 
3162 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3163     _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3164     _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3165     _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3166     _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3167     {NULL}
3168 };
3169 
3170 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3171     {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3172     {NULL}
3173 };
3174 
3175 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3176     PyVarObject_HEAD_INIT(NULL, 0)
3177     "_io.IncrementalNewlineDecoder", /*tp_name*/
3178     sizeof(nldecoder_object), /*tp_basicsize*/
3179     0,                          /*tp_itemsize*/
3180     (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3181     0,                          /*tp_vectorcall_offset*/
3182     0,                          /*tp_getattr*/
3183     0,                          /*tp_setattr*/
3184     0,                          /*tp_as_async*/
3185     0,                          /*tp_repr*/
3186     0,                          /*tp_as_number*/
3187     0,                          /*tp_as_sequence*/
3188     0,                          /*tp_as_mapping*/
3189     0,                          /*tp_hash */
3190     0,                          /*tp_call*/
3191     0,                          /*tp_str*/
3192     0,                          /*tp_getattro*/
3193     0,                          /*tp_setattro*/
3194     0,                          /*tp_as_buffer*/
3195     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
3196     _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3197     0,                          /* tp_traverse */
3198     0,                          /* tp_clear */
3199     0,                          /* tp_richcompare */
3200     0,                          /*tp_weaklistoffset*/
3201     0,                          /* tp_iter */
3202     0,                          /* tp_iternext */
3203     incrementalnewlinedecoder_methods, /* tp_methods */
3204     0,                          /* tp_members */
3205     incrementalnewlinedecoder_getset, /* tp_getset */
3206     0,                          /* tp_base */
3207     0,                          /* tp_dict */
3208     0,                          /* tp_descr_get */
3209     0,                          /* tp_descr_set */
3210     0,                          /* tp_dictoffset */
3211     _io_IncrementalNewlineDecoder___init__, /* tp_init */
3212     0,                          /* tp_alloc */
3213     PyType_GenericNew,          /* tp_new */
3214 };
3215 
3216 
3217 static PyMethodDef textiowrapper_methods[] = {
3218     _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3219     _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3220     _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3221     _IO_TEXTIOWRAPPER_READ_METHODDEF
3222     _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3223     _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3224     _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3225 
3226     _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3227     _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3228     _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3229     _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3230     _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3231 
3232     _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3233     _IO_TEXTIOWRAPPER_TELL_METHODDEF
3234     _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3235     {NULL, NULL}
3236 };
3237 
3238 static PyMemberDef textiowrapper_members[] = {
3239     {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3240     {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3241     {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3242     {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3243     {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3244     {NULL}
3245 };
3246 
3247 static PyGetSetDef textiowrapper_getset[] = {
3248     {"name", (getter)textiowrapper_name_get, NULL, NULL},
3249     {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3250 /*    {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3251 */
3252     {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3253     {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3254     {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3255                     (setter)textiowrapper_chunk_size_set, NULL},
3256     {NULL}
3257 };
3258 
3259 PyTypeObject PyTextIOWrapper_Type = {
3260     PyVarObject_HEAD_INIT(NULL, 0)
3261     "_io.TextIOWrapper",        /*tp_name*/
3262     sizeof(textio), /*tp_basicsize*/
3263     0,                          /*tp_itemsize*/
3264     (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3265     0,                          /*tp_vectorcall_offset*/
3266     0,                          /*tp_getattr*/
3267     0,                          /*tps_etattr*/
3268     0,                          /*tp_as_async*/
3269     (reprfunc)textiowrapper_repr,/*tp_repr*/
3270     0,                          /*tp_as_number*/
3271     0,                          /*tp_as_sequence*/
3272     0,                          /*tp_as_mapping*/
3273     0,                          /*tp_hash */
3274     0,                          /*tp_call*/
3275     0,                          /*tp_str*/
3276     0,                          /*tp_getattro*/
3277     0,                          /*tp_setattro*/
3278     0,                          /*tp_as_buffer*/
3279     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3280         | Py_TPFLAGS_HAVE_GC,   /*tp_flags*/
3281     _io_TextIOWrapper___init____doc__, /* tp_doc */
3282     (traverseproc)textiowrapper_traverse, /* tp_traverse */
3283     (inquiry)textiowrapper_clear, /* tp_clear */
3284     0,                          /* tp_richcompare */
3285     offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3286     0,                          /* tp_iter */
3287     (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3288     textiowrapper_methods,      /* tp_methods */
3289     textiowrapper_members,      /* tp_members */
3290     textiowrapper_getset,       /* tp_getset */
3291     0,                          /* tp_base */
3292     0,                          /* tp_dict */
3293     0,                          /* tp_descr_get */
3294     0,                          /* tp_descr_set */
3295     offsetof(textio, dict), /*tp_dictoffset*/
3296     _io_TextIOWrapper___init__, /* tp_init */
3297     0,                          /* tp_alloc */
3298     PyType_GenericNew,          /* tp_new */
3299     0,                          /* tp_free */
3300     0,                          /* tp_is_gc */
3301     0,                          /* tp_bases */
3302     0,                          /* tp_mro */
3303     0,                          /* tp_cache */
3304     0,                          /* tp_subclasses */
3305     0,                          /* tp_weaklist */
3306     0,                          /* tp_del */
3307     0,                          /* tp_version_tag */
3308     0,                          /* tp_finalize */
3309 };
3310