1 /*
2 An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4 Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6 Written by Amaury Forgeot d'Arc and Antoine Pitrou
7 */
8
9 #define PY_SSIZE_T_CLEAN
10 #include "Python.h"
11 #include "pycore_interp.h" // PyInterpreterState.fs_codec
12 #include "pycore_long.h" // _PyLong_GetZero()
13 #include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
14 #include "pycore_object.h"
15 #include "pycore_pystate.h" // _PyInterpreterState_GET()
16 #include "structmember.h" // PyMemberDef
17 #include "_iomodule.h"
18
19 /*[clinic input]
20 module _io
21 class _io.IncrementalNewlineDecoder "nldecoder_object *" "&PyIncrementalNewlineDecoder_Type"
22 class _io.TextIOWrapper "textio *" "&TextIOWrapper_Type"
23 [clinic start generated code]*/
24 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=ed072384f8aada2c]*/
25
26 /* TextIOBase */
27
28 PyDoc_STRVAR(textiobase_doc,
29 "Base class for text I/O.\n"
30 "\n"
31 "This class provides a character and line based interface to stream\n"
32 "I/O. There is no readinto method because Python's character strings\n"
33 "are immutable.\n"
34 );
35
36 static PyObject *
_unsupported(const char * message)37 _unsupported(const char *message)
38 {
39 _PyIO_State *state = IO_STATE();
40 if (state != NULL)
41 PyErr_SetString(state->unsupported_operation, message);
42 return NULL;
43 }
44
45 PyDoc_STRVAR(textiobase_detach_doc,
46 "Separate the underlying buffer from the TextIOBase and return it.\n"
47 "\n"
48 "After the underlying buffer has been detached, the TextIO is in an\n"
49 "unusable state.\n"
50 );
51
52 static PyObject *
textiobase_detach(PyObject * self,PyObject * Py_UNUSED (ignored))53 textiobase_detach(PyObject *self, PyObject *Py_UNUSED(ignored))
54 {
55 return _unsupported("detach");
56 }
57
58 PyDoc_STRVAR(textiobase_read_doc,
59 "Read at most n characters from stream.\n"
60 "\n"
61 "Read from underlying buffer until we have n characters or we hit EOF.\n"
62 "If n is negative or omitted, read until EOF.\n"
63 );
64
65 static PyObject *
textiobase_read(PyObject * self,PyObject * args)66 textiobase_read(PyObject *self, PyObject *args)
67 {
68 return _unsupported("read");
69 }
70
71 PyDoc_STRVAR(textiobase_readline_doc,
72 "Read until newline or EOF.\n"
73 "\n"
74 "Returns an empty string if EOF is hit immediately.\n"
75 );
76
77 static PyObject *
textiobase_readline(PyObject * self,PyObject * args)78 textiobase_readline(PyObject *self, PyObject *args)
79 {
80 return _unsupported("readline");
81 }
82
83 PyDoc_STRVAR(textiobase_write_doc,
84 "Write string to stream.\n"
85 "Returns the number of characters written (which is always equal to\n"
86 "the length of the string).\n"
87 );
88
89 static PyObject *
textiobase_write(PyObject * self,PyObject * args)90 textiobase_write(PyObject *self, PyObject *args)
91 {
92 return _unsupported("write");
93 }
94
95 PyDoc_STRVAR(textiobase_encoding_doc,
96 "Encoding of the text stream.\n"
97 "\n"
98 "Subclasses should override.\n"
99 );
100
101 static PyObject *
textiobase_encoding_get(PyObject * self,void * context)102 textiobase_encoding_get(PyObject *self, void *context)
103 {
104 Py_RETURN_NONE;
105 }
106
107 PyDoc_STRVAR(textiobase_newlines_doc,
108 "Line endings translated so far.\n"
109 "\n"
110 "Only line endings translated during reading are considered.\n"
111 "\n"
112 "Subclasses should override.\n"
113 );
114
115 static PyObject *
textiobase_newlines_get(PyObject * self,void * context)116 textiobase_newlines_get(PyObject *self, void *context)
117 {
118 Py_RETURN_NONE;
119 }
120
121 PyDoc_STRVAR(textiobase_errors_doc,
122 "The error setting of the decoder or encoder.\n"
123 "\n"
124 "Subclasses should override.\n"
125 );
126
127 static PyObject *
textiobase_errors_get(PyObject * self,void * context)128 textiobase_errors_get(PyObject *self, void *context)
129 {
130 Py_RETURN_NONE;
131 }
132
133
134 static PyMethodDef textiobase_methods[] = {
135 {"detach", textiobase_detach, METH_NOARGS, textiobase_detach_doc},
136 {"read", textiobase_read, METH_VARARGS, textiobase_read_doc},
137 {"readline", textiobase_readline, METH_VARARGS, textiobase_readline_doc},
138 {"write", textiobase_write, METH_VARARGS, textiobase_write_doc},
139 {NULL, NULL}
140 };
141
142 static PyGetSetDef textiobase_getset[] = {
143 {"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
144 {"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
145 {"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
146 {NULL}
147 };
148
149 PyTypeObject PyTextIOBase_Type = {
150 PyVarObject_HEAD_INIT(NULL, 0)
151 "_io._TextIOBase", /*tp_name*/
152 0, /*tp_basicsize*/
153 0, /*tp_itemsize*/
154 0, /*tp_dealloc*/
155 0, /*tp_vectorcall_offset*/
156 0, /*tp_getattr*/
157 0, /*tp_setattr*/
158 0, /*tp_as_async*/
159 0, /*tp_repr*/
160 0, /*tp_as_number*/
161 0, /*tp_as_sequence*/
162 0, /*tp_as_mapping*/
163 0, /*tp_hash */
164 0, /*tp_call*/
165 0, /*tp_str*/
166 0, /*tp_getattro*/
167 0, /*tp_setattro*/
168 0, /*tp_as_buffer*/
169 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
170 textiobase_doc, /* tp_doc */
171 0, /* tp_traverse */
172 0, /* tp_clear */
173 0, /* tp_richcompare */
174 0, /* tp_weaklistoffset */
175 0, /* tp_iter */
176 0, /* tp_iternext */
177 textiobase_methods, /* tp_methods */
178 0, /* tp_members */
179 textiobase_getset, /* tp_getset */
180 &PyIOBase_Type, /* tp_base */
181 0, /* tp_dict */
182 0, /* tp_descr_get */
183 0, /* tp_descr_set */
184 0, /* tp_dictoffset */
185 0, /* tp_init */
186 0, /* tp_alloc */
187 0, /* tp_new */
188 0, /* tp_free */
189 0, /* tp_is_gc */
190 0, /* tp_bases */
191 0, /* tp_mro */
192 0, /* tp_cache */
193 0, /* tp_subclasses */
194 0, /* tp_weaklist */
195 0, /* tp_del */
196 0, /* tp_version_tag */
197 0, /* tp_finalize */
198 };
199
200
201 /* IncrementalNewlineDecoder */
202
203 typedef struct {
204 PyObject_HEAD
205 PyObject *decoder;
206 PyObject *errors;
207 unsigned int pendingcr: 1;
208 unsigned int translate: 1;
209 unsigned int seennl: 3;
210 } nldecoder_object;
211
212 /*[clinic input]
213 _io.IncrementalNewlineDecoder.__init__
214 decoder: object
215 translate: int
216 errors: object(c_default="NULL") = "strict"
217
218 Codec used when reading a file in universal newlines mode.
219
220 It wraps another incremental decoder, translating \r\n and \r into \n.
221 It also records the types of newlines encountered. When used with
222 translate=False, it ensures that the newline sequence is returned in
223 one piece. When used with decoder=None, it expects unicode strings as
224 decode input and translates newlines without first invoking an external
225 decoder.
226 [clinic start generated code]*/
227
228 static int
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object * self,PyObject * decoder,int translate,PyObject * errors)229 _io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
230 PyObject *decoder, int translate,
231 PyObject *errors)
232 /*[clinic end generated code: output=fbd04d443e764ec2 input=89db6b19c6b126bf]*/
233 {
234
235 if (errors == NULL) {
236 errors = Py_NewRef(&_Py_ID(strict));
237 }
238 else {
239 errors = Py_NewRef(errors);
240 }
241
242 Py_XSETREF(self->errors, errors);
243 Py_XSETREF(self->decoder, Py_NewRef(decoder));
244 self->translate = translate ? 1 : 0;
245 self->seennl = 0;
246 self->pendingcr = 0;
247
248 return 0;
249 }
250
251 static void
incrementalnewlinedecoder_dealloc(nldecoder_object * self)252 incrementalnewlinedecoder_dealloc(nldecoder_object *self)
253 {
254 Py_CLEAR(self->decoder);
255 Py_CLEAR(self->errors);
256 Py_TYPE(self)->tp_free((PyObject *)self);
257 }
258
259 static int
check_decoded(PyObject * decoded)260 check_decoded(PyObject *decoded)
261 {
262 if (decoded == NULL)
263 return -1;
264 if (!PyUnicode_Check(decoded)) {
265 PyErr_Format(PyExc_TypeError,
266 "decoder should return a string result, not '%.200s'",
267 Py_TYPE(decoded)->tp_name);
268 Py_DECREF(decoded);
269 return -1;
270 }
271 if (PyUnicode_READY(decoded) < 0) {
272 Py_DECREF(decoded);
273 return -1;
274 }
275 return 0;
276 }
277
278 #define CHECK_INITIALIZED_DECODER(self) \
279 if (self->errors == NULL) { \
280 PyErr_SetString(PyExc_ValueError, \
281 "IncrementalNewlineDecoder.__init__() not called"); \
282 return NULL; \
283 }
284
285 #define SEEN_CR 1
286 #define SEEN_LF 2
287 #define SEEN_CRLF 4
288 #define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
289
290 PyObject *
_PyIncrementalNewlineDecoder_decode(PyObject * myself,PyObject * input,int final)291 _PyIncrementalNewlineDecoder_decode(PyObject *myself,
292 PyObject *input, int final)
293 {
294 PyObject *output;
295 Py_ssize_t output_len;
296 nldecoder_object *self = (nldecoder_object *) myself;
297
298 CHECK_INITIALIZED_DECODER(self);
299
300 /* decode input (with the eventual \r from a previous pass) */
301 if (self->decoder != Py_None) {
302 output = PyObject_CallMethodObjArgs(self->decoder,
303 &_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
304 }
305 else {
306 output = input;
307 Py_INCREF(output);
308 }
309
310 if (check_decoded(output) < 0)
311 return NULL;
312
313 output_len = PyUnicode_GET_LENGTH(output);
314 if (self->pendingcr && (final || output_len > 0)) {
315 /* Prefix output with CR */
316 int kind;
317 PyObject *modified;
318 char *out;
319
320 modified = PyUnicode_New(output_len + 1,
321 PyUnicode_MAX_CHAR_VALUE(output));
322 if (modified == NULL)
323 goto error;
324 kind = PyUnicode_KIND(modified);
325 out = PyUnicode_DATA(modified);
326 PyUnicode_WRITE(kind, out, 0, '\r');
327 memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
328 Py_DECREF(output);
329 output = modified; /* output remains ready */
330 self->pendingcr = 0;
331 output_len++;
332 }
333
334 /* retain last \r even when not translating data:
335 * then readline() is sure to get \r\n in one pass
336 */
337 if (!final) {
338 if (output_len > 0
339 && PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
340 {
341 PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
342 if (modified == NULL)
343 goto error;
344 Py_DECREF(output);
345 output = modified;
346 self->pendingcr = 1;
347 }
348 }
349
350 /* Record which newlines are read and do newline translation if desired,
351 all in one pass. */
352 {
353 const void *in_str;
354 Py_ssize_t len;
355 int seennl = self->seennl;
356 int only_lf = 0;
357 int kind;
358
359 in_str = PyUnicode_DATA(output);
360 len = PyUnicode_GET_LENGTH(output);
361 kind = PyUnicode_KIND(output);
362
363 if (len == 0)
364 return output;
365
366 /* If, up to now, newlines are consistently \n, do a quick check
367 for the \r *byte* with the libc's optimized memchr.
368 */
369 if (seennl == SEEN_LF || seennl == 0) {
370 only_lf = (memchr(in_str, '\r', kind * len) == NULL);
371 }
372
373 if (only_lf) {
374 /* If not already seen, quick scan for a possible "\n" character.
375 (there's nothing else to be done, even when in translation mode)
376 */
377 if (seennl == 0 &&
378 memchr(in_str, '\n', kind * len) != NULL) {
379 if (kind == PyUnicode_1BYTE_KIND)
380 seennl |= SEEN_LF;
381 else {
382 Py_ssize_t i = 0;
383 for (;;) {
384 Py_UCS4 c;
385 /* Fast loop for non-control characters */
386 while (PyUnicode_READ(kind, in_str, i) > '\n')
387 i++;
388 c = PyUnicode_READ(kind, in_str, i++);
389 if (c == '\n') {
390 seennl |= SEEN_LF;
391 break;
392 }
393 if (i >= len)
394 break;
395 }
396 }
397 }
398 /* Finished: we have scanned for newlines, and none of them
399 need translating */
400 }
401 else if (!self->translate) {
402 Py_ssize_t i = 0;
403 /* We have already seen all newline types, no need to scan again */
404 if (seennl == SEEN_ALL)
405 goto endscan;
406 for (;;) {
407 Py_UCS4 c;
408 /* Fast loop for non-control characters */
409 while (PyUnicode_READ(kind, in_str, i) > '\r')
410 i++;
411 c = PyUnicode_READ(kind, in_str, i++);
412 if (c == '\n')
413 seennl |= SEEN_LF;
414 else if (c == '\r') {
415 if (PyUnicode_READ(kind, in_str, i) == '\n') {
416 seennl |= SEEN_CRLF;
417 i++;
418 }
419 else
420 seennl |= SEEN_CR;
421 }
422 if (i >= len)
423 break;
424 if (seennl == SEEN_ALL)
425 break;
426 }
427 endscan:
428 ;
429 }
430 else {
431 void *translated;
432 int kind = PyUnicode_KIND(output);
433 const void *in_str = PyUnicode_DATA(output);
434 Py_ssize_t in, out;
435 /* XXX: Previous in-place translation here is disabled as
436 resizing is not possible anymore */
437 /* We could try to optimize this so that we only do a copy
438 when there is something to translate. On the other hand,
439 we already know there is a \r byte, so chances are high
440 that something needs to be done. */
441 translated = PyMem_Malloc(kind * len);
442 if (translated == NULL) {
443 PyErr_NoMemory();
444 goto error;
445 }
446 in = out = 0;
447 for (;;) {
448 Py_UCS4 c;
449 /* Fast loop for non-control characters */
450 while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
451 PyUnicode_WRITE(kind, translated, out++, c);
452 if (c == '\n') {
453 PyUnicode_WRITE(kind, translated, out++, c);
454 seennl |= SEEN_LF;
455 continue;
456 }
457 if (c == '\r') {
458 if (PyUnicode_READ(kind, in_str, in) == '\n') {
459 in++;
460 seennl |= SEEN_CRLF;
461 }
462 else
463 seennl |= SEEN_CR;
464 PyUnicode_WRITE(kind, translated, out++, '\n');
465 continue;
466 }
467 if (in > len)
468 break;
469 PyUnicode_WRITE(kind, translated, out++, c);
470 }
471 Py_DECREF(output);
472 output = PyUnicode_FromKindAndData(kind, translated, out);
473 PyMem_Free(translated);
474 if (!output)
475 return NULL;
476 }
477 self->seennl |= seennl;
478 }
479
480 return output;
481
482 error:
483 Py_DECREF(output);
484 return NULL;
485 }
486
487 /*[clinic input]
488 _io.IncrementalNewlineDecoder.decode
489 input: object
490 final: bool(accept={int}) = False
491 [clinic start generated code]*/
492
493 static PyObject *
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object * self,PyObject * input,int final)494 _io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
495 PyObject *input, int final)
496 /*[clinic end generated code: output=0d486755bb37a66e input=a4ea97f26372d866]*/
497 {
498 return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
499 }
500
501 /*[clinic input]
502 _io.IncrementalNewlineDecoder.getstate
503 [clinic start generated code]*/
504
505 static PyObject *
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object * self)506 _io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
507 /*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
508 {
509 PyObject *buffer;
510 unsigned long long flag;
511
512 CHECK_INITIALIZED_DECODER(self);
513
514 if (self->decoder != Py_None) {
515 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
516 &_Py_ID(getstate));
517 if (state == NULL)
518 return NULL;
519 if (!PyTuple_Check(state)) {
520 PyErr_SetString(PyExc_TypeError,
521 "illegal decoder state");
522 Py_DECREF(state);
523 return NULL;
524 }
525 if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
526 &buffer, &flag))
527 {
528 Py_DECREF(state);
529 return NULL;
530 }
531 Py_INCREF(buffer);
532 Py_DECREF(state);
533 }
534 else {
535 buffer = PyBytes_FromString("");
536 flag = 0;
537 }
538 flag <<= 1;
539 if (self->pendingcr)
540 flag |= 1;
541 return Py_BuildValue("NK", buffer, flag);
542 }
543
544 /*[clinic input]
545 _io.IncrementalNewlineDecoder.setstate
546 state: object
547 /
548 [clinic start generated code]*/
549
550 static PyObject *
_io_IncrementalNewlineDecoder_setstate(nldecoder_object * self,PyObject * state)551 _io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
552 PyObject *state)
553 /*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
554 {
555 PyObject *buffer;
556 unsigned long long flag;
557
558 CHECK_INITIALIZED_DECODER(self);
559
560 if (!PyTuple_Check(state)) {
561 PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
562 return NULL;
563 }
564 if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
565 &buffer, &flag))
566 {
567 return NULL;
568 }
569
570 self->pendingcr = (int) (flag & 1);
571 flag >>= 1;
572
573 if (self->decoder != Py_None) {
574 return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
575 "((OK))", buffer, flag);
576 }
577 else {
578 Py_RETURN_NONE;
579 }
580 }
581
582 /*[clinic input]
583 _io.IncrementalNewlineDecoder.reset
584 [clinic start generated code]*/
585
586 static PyObject *
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object * self)587 _io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
588 /*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
589 {
590 CHECK_INITIALIZED_DECODER(self);
591
592 self->seennl = 0;
593 self->pendingcr = 0;
594 if (self->decoder != Py_None)
595 return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
596 else
597 Py_RETURN_NONE;
598 }
599
600 static PyObject *
incrementalnewlinedecoder_newlines_get(nldecoder_object * self,void * context)601 incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
602 {
603 CHECK_INITIALIZED_DECODER(self);
604
605 switch (self->seennl) {
606 case SEEN_CR:
607 return PyUnicode_FromString("\r");
608 case SEEN_LF:
609 return PyUnicode_FromString("\n");
610 case SEEN_CRLF:
611 return PyUnicode_FromString("\r\n");
612 case SEEN_CR | SEEN_LF:
613 return Py_BuildValue("ss", "\r", "\n");
614 case SEEN_CR | SEEN_CRLF:
615 return Py_BuildValue("ss", "\r", "\r\n");
616 case SEEN_LF | SEEN_CRLF:
617 return Py_BuildValue("ss", "\n", "\r\n");
618 case SEEN_CR | SEEN_LF | SEEN_CRLF:
619 return Py_BuildValue("sss", "\r", "\n", "\r\n");
620 default:
621 Py_RETURN_NONE;
622 }
623
624 }
625
626 /* TextIOWrapper */
627
628 typedef PyObject *
629 (*encodefunc_t)(PyObject *, PyObject *);
630
631 typedef struct
632 {
633 PyObject_HEAD
634 int ok; /* initialized? */
635 int detached;
636 Py_ssize_t chunk_size;
637 PyObject *buffer;
638 PyObject *encoding;
639 PyObject *encoder;
640 PyObject *decoder;
641 PyObject *readnl;
642 PyObject *errors;
643 const char *writenl; /* ASCII-encoded; NULL stands for \n */
644 char line_buffering;
645 char write_through;
646 char readuniversal;
647 char readtranslate;
648 char writetranslate;
649 char seekable;
650 char has_read1;
651 char telling;
652 char finalizing;
653 /* Specialized encoding func (see below) */
654 encodefunc_t encodefunc;
655 /* Whether or not it's the start of the stream */
656 char encoding_start_of_stream;
657
658 /* Reads and writes are internally buffered in order to speed things up.
659 However, any read will first flush the write buffer if itsn't empty.
660
661 Please also note that text to be written is first encoded before being
662 buffered. This is necessary so that encoding errors are immediately
663 reported to the caller, but it unfortunately means that the
664 IncrementalEncoder (whose encode() method is always written in Python)
665 becomes a bottleneck for small writes.
666 */
667 PyObject *decoded_chars; /* buffer for text returned from decoder */
668 Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
669 PyObject *pending_bytes; // data waiting to be written.
670 // ascii unicode, bytes, or list of them.
671 Py_ssize_t pending_bytes_count;
672
673 /* snapshot is either NULL, or a tuple (dec_flags, next_input) where
674 * dec_flags is the second (integer) item of the decoder state and
675 * next_input is the chunk of input bytes that comes next after the
676 * snapshot point. We use this to reconstruct decoder states in tell().
677 */
678 PyObject *snapshot;
679 /* Bytes-to-characters ratio for the current chunk. Serves as input for
680 the heuristic in tell(). */
681 double b2cratio;
682
683 /* Cache raw object if it's a FileIO object */
684 PyObject *raw;
685
686 PyObject *weakreflist;
687 PyObject *dict;
688 } textio;
689
690 static void
691 textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
692
693 /* A couple of specialized cases in order to bypass the slow incremental
694 encoding methods for the most popular encodings. */
695
696 static PyObject *
ascii_encode(textio * self,PyObject * text)697 ascii_encode(textio *self, PyObject *text)
698 {
699 return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
700 }
701
702 static PyObject *
utf16be_encode(textio * self,PyObject * text)703 utf16be_encode(textio *self, PyObject *text)
704 {
705 return _PyUnicode_EncodeUTF16(text,
706 PyUnicode_AsUTF8(self->errors), 1);
707 }
708
709 static PyObject *
utf16le_encode(textio * self,PyObject * text)710 utf16le_encode(textio *self, PyObject *text)
711 {
712 return _PyUnicode_EncodeUTF16(text,
713 PyUnicode_AsUTF8(self->errors), -1);
714 }
715
716 static PyObject *
utf16_encode(textio * self,PyObject * text)717 utf16_encode(textio *self, PyObject *text)
718 {
719 if (!self->encoding_start_of_stream) {
720 /* Skip the BOM and use native byte ordering */
721 #if PY_BIG_ENDIAN
722 return utf16be_encode(self, text);
723 #else
724 return utf16le_encode(self, text);
725 #endif
726 }
727 return _PyUnicode_EncodeUTF16(text,
728 PyUnicode_AsUTF8(self->errors), 0);
729 }
730
731 static PyObject *
utf32be_encode(textio * self,PyObject * text)732 utf32be_encode(textio *self, PyObject *text)
733 {
734 return _PyUnicode_EncodeUTF32(text,
735 PyUnicode_AsUTF8(self->errors), 1);
736 }
737
738 static PyObject *
utf32le_encode(textio * self,PyObject * text)739 utf32le_encode(textio *self, PyObject *text)
740 {
741 return _PyUnicode_EncodeUTF32(text,
742 PyUnicode_AsUTF8(self->errors), -1);
743 }
744
745 static PyObject *
utf32_encode(textio * self,PyObject * text)746 utf32_encode(textio *self, PyObject *text)
747 {
748 if (!self->encoding_start_of_stream) {
749 /* Skip the BOM and use native byte ordering */
750 #if PY_BIG_ENDIAN
751 return utf32be_encode(self, text);
752 #else
753 return utf32le_encode(self, text);
754 #endif
755 }
756 return _PyUnicode_EncodeUTF32(text,
757 PyUnicode_AsUTF8(self->errors), 0);
758 }
759
760 static PyObject *
utf8_encode(textio * self,PyObject * text)761 utf8_encode(textio *self, PyObject *text)
762 {
763 return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
764 }
765
766 static PyObject *
latin1_encode(textio * self,PyObject * text)767 latin1_encode(textio *self, PyObject *text)
768 {
769 return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
770 }
771
772 // Return true when encoding can be skipped when text is ascii.
773 static inline int
is_asciicompat_encoding(encodefunc_t f)774 is_asciicompat_encoding(encodefunc_t f)
775 {
776 return f == (encodefunc_t) ascii_encode
777 || f == (encodefunc_t) latin1_encode
778 || f == (encodefunc_t) utf8_encode;
779 }
780
781 /* Map normalized encoding names onto the specialized encoding funcs */
782
783 typedef struct {
784 const char *name;
785 encodefunc_t encodefunc;
786 } encodefuncentry;
787
788 static const encodefuncentry encodefuncs[] = {
789 {"ascii", (encodefunc_t) ascii_encode},
790 {"iso8859-1", (encodefunc_t) latin1_encode},
791 {"utf-8", (encodefunc_t) utf8_encode},
792 {"utf-16-be", (encodefunc_t) utf16be_encode},
793 {"utf-16-le", (encodefunc_t) utf16le_encode},
794 {"utf-16", (encodefunc_t) utf16_encode},
795 {"utf-32-be", (encodefunc_t) utf32be_encode},
796 {"utf-32-le", (encodefunc_t) utf32le_encode},
797 {"utf-32", (encodefunc_t) utf32_encode},
798 {NULL, NULL}
799 };
800
801 static int
validate_newline(const char * newline)802 validate_newline(const char *newline)
803 {
804 if (newline && newline[0] != '\0'
805 && !(newline[0] == '\n' && newline[1] == '\0')
806 && !(newline[0] == '\r' && newline[1] == '\0')
807 && !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
808 PyErr_Format(PyExc_ValueError,
809 "illegal newline value: %s", newline);
810 return -1;
811 }
812 return 0;
813 }
814
815 static int
set_newline(textio * self,const char * newline)816 set_newline(textio *self, const char *newline)
817 {
818 PyObject *old = self->readnl;
819 if (newline == NULL) {
820 self->readnl = NULL;
821 }
822 else {
823 self->readnl = PyUnicode_FromString(newline);
824 if (self->readnl == NULL) {
825 self->readnl = old;
826 return -1;
827 }
828 }
829 self->readuniversal = (newline == NULL || newline[0] == '\0');
830 self->readtranslate = (newline == NULL);
831 self->writetranslate = (newline == NULL || newline[0] != '\0');
832 if (!self->readuniversal && self->readnl != NULL) {
833 // validate_newline() accepts only ASCII newlines.
834 assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
835 self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
836 if (strcmp(self->writenl, "\n") == 0) {
837 self->writenl = NULL;
838 }
839 }
840 else {
841 #ifdef MS_WINDOWS
842 self->writenl = "\r\n";
843 #else
844 self->writenl = NULL;
845 #endif
846 }
847 Py_XDECREF(old);
848 return 0;
849 }
850
851 static int
_textiowrapper_set_decoder(textio * self,PyObject * codec_info,const char * errors)852 _textiowrapper_set_decoder(textio *self, PyObject *codec_info,
853 const char *errors)
854 {
855 PyObject *res;
856 int r;
857
858 res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
859 if (res == NULL)
860 return -1;
861
862 r = PyObject_IsTrue(res);
863 Py_DECREF(res);
864 if (r == -1)
865 return -1;
866
867 if (r != 1)
868 return 0;
869
870 Py_CLEAR(self->decoder);
871 self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
872 if (self->decoder == NULL)
873 return -1;
874
875 if (self->readuniversal) {
876 PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
877 (PyObject *)&PyIncrementalNewlineDecoder_Type,
878 self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
879 if (incrementalDecoder == NULL)
880 return -1;
881 Py_CLEAR(self->decoder);
882 self->decoder = incrementalDecoder;
883 }
884
885 return 0;
886 }
887
888 static PyObject*
_textiowrapper_decode(PyObject * decoder,PyObject * bytes,int eof)889 _textiowrapper_decode(PyObject *decoder, PyObject *bytes, int eof)
890 {
891 PyObject *chars;
892
893 if (Py_IS_TYPE(decoder, &PyIncrementalNewlineDecoder_Type))
894 chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
895 else
896 chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
897 eof ? Py_True : Py_False, NULL);
898
899 if (check_decoded(chars) < 0)
900 // check_decoded already decreases refcount
901 return NULL;
902
903 return chars;
904 }
905
906 static int
_textiowrapper_set_encoder(textio * self,PyObject * codec_info,const char * errors)907 _textiowrapper_set_encoder(textio *self, PyObject *codec_info,
908 const char *errors)
909 {
910 PyObject *res;
911 int r;
912
913 res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
914 if (res == NULL)
915 return -1;
916
917 r = PyObject_IsTrue(res);
918 Py_DECREF(res);
919 if (r == -1)
920 return -1;
921
922 if (r != 1)
923 return 0;
924
925 Py_CLEAR(self->encoder);
926 self->encodefunc = NULL;
927 self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
928 if (self->encoder == NULL)
929 return -1;
930
931 /* Get the normalized named of the codec */
932 if (_PyObject_LookupAttr(codec_info, &_Py_ID(name), &res) < 0) {
933 return -1;
934 }
935 if (res != NULL && PyUnicode_Check(res)) {
936 const encodefuncentry *e = encodefuncs;
937 while (e->name != NULL) {
938 if (_PyUnicode_EqualToASCIIString(res, e->name)) {
939 self->encodefunc = e->encodefunc;
940 break;
941 }
942 e++;
943 }
944 }
945 Py_XDECREF(res);
946
947 return 0;
948 }
949
950 static int
_textiowrapper_fix_encoder_state(textio * self)951 _textiowrapper_fix_encoder_state(textio *self)
952 {
953 if (!self->seekable || !self->encoder) {
954 return 0;
955 }
956
957 self->encoding_start_of_stream = 1;
958
959 PyObject *cookieObj = PyObject_CallMethodNoArgs(
960 self->buffer, &_Py_ID(tell));
961 if (cookieObj == NULL) {
962 return -1;
963 }
964
965 int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
966 Py_DECREF(cookieObj);
967 if (cmp < 0) {
968 return -1;
969 }
970
971 if (cmp == 0) {
972 self->encoding_start_of_stream = 0;
973 PyObject *res = PyObject_CallMethodOneArg(
974 self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
975 if (res == NULL) {
976 return -1;
977 }
978 Py_DECREF(res);
979 }
980
981 return 0;
982 }
983
984 static int
io_check_errors(PyObject * errors)985 io_check_errors(PyObject *errors)
986 {
987 assert(errors != NULL && errors != Py_None);
988
989 PyInterpreterState *interp = _PyInterpreterState_GET();
990 #ifndef Py_DEBUG
991 /* In release mode, only check in development mode (-X dev) */
992 if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
993 return 0;
994 }
995 #else
996 /* Always check in debug mode */
997 #endif
998
999 /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1000 before_PyUnicode_InitEncodings() is called. */
1001 if (!interp->unicode.fs_codec.encoding) {
1002 return 0;
1003 }
1004
1005 Py_ssize_t name_length;
1006 const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1007 if (name == NULL) {
1008 return -1;
1009 }
1010 if (strlen(name) != (size_t)name_length) {
1011 PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1012 return -1;
1013 }
1014 PyObject *handler = PyCodec_LookupError(name);
1015 if (handler != NULL) {
1016 Py_DECREF(handler);
1017 return 0;
1018 }
1019 return -1;
1020 }
1021
1022
1023
1024 /*[clinic input]
1025 _io.TextIOWrapper.__init__
1026 buffer: object
1027 encoding: str(accept={str, NoneType}) = None
1028 errors: object = None
1029 newline: str(accept={str, NoneType}) = None
1030 line_buffering: bool(accept={int}) = False
1031 write_through: bool(accept={int}) = False
1032
1033 Character and line based layer over a BufferedIOBase object, buffer.
1034
1035 encoding gives the name of the encoding that the stream will be
1036 decoded or encoded with. It defaults to locale.getencoding().
1037
1038 errors determines the strictness of encoding and decoding (see
1039 help(codecs.Codec) or the documentation for codecs.register) and
1040 defaults to "strict".
1041
1042 newline controls how line endings are handled. It can be None, '',
1043 '\n', '\r', and '\r\n'. It works as follows:
1044
1045 * On input, if newline is None, universal newlines mode is
1046 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1047 these are translated into '\n' before being returned to the
1048 caller. If it is '', universal newline mode is enabled, but line
1049 endings are returned to the caller untranslated. If it has any of
1050 the other legal values, input lines are only terminated by the given
1051 string, and the line ending is returned to the caller untranslated.
1052
1053 * On output, if newline is None, any '\n' characters written are
1054 translated to the system default line separator, os.linesep. If
1055 newline is '' or '\n', no translation takes place. If newline is any
1056 of the other legal values, any '\n' characters written are translated
1057 to the given string.
1058
1059 If line_buffering is True, a call to flush is implied when a call to
1060 write contains a newline character.
1061 [clinic start generated code]*/
1062
1063 static int
_io_TextIOWrapper___init___impl(textio * self,PyObject * buffer,const char * encoding,PyObject * errors,const char * newline,int line_buffering,int write_through)1064 _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1065 const char *encoding, PyObject *errors,
1066 const char *newline, int line_buffering,
1067 int write_through)
1068 /*[clinic end generated code: output=72267c0c01032ed2 input=72590963698f289b]*/
1069 {
1070 PyObject *raw, *codec_info = NULL;
1071 PyObject *res;
1072 int r;
1073
1074 self->ok = 0;
1075 self->detached = 0;
1076
1077 if (encoding == NULL) {
1078 PyInterpreterState *interp = _PyInterpreterState_GET();
1079 if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1080 if (PyErr_WarnEx(PyExc_EncodingWarning,
1081 "'encoding' argument not specified", 1)) {
1082 return -1;
1083 }
1084 }
1085 }
1086
1087 if (errors == Py_None) {
1088 errors = &_Py_ID(strict);
1089 }
1090 else if (!PyUnicode_Check(errors)) {
1091 // Check 'errors' argument here because Argument Clinic doesn't support
1092 // 'str(accept={str, NoneType})' converter.
1093 PyErr_Format(
1094 PyExc_TypeError,
1095 "TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1096 Py_TYPE(errors)->tp_name);
1097 return -1;
1098 }
1099 else if (io_check_errors(errors)) {
1100 return -1;
1101 }
1102
1103 if (validate_newline(newline) < 0) {
1104 return -1;
1105 }
1106
1107 Py_CLEAR(self->buffer);
1108 Py_CLEAR(self->encoding);
1109 Py_CLEAR(self->encoder);
1110 Py_CLEAR(self->decoder);
1111 Py_CLEAR(self->readnl);
1112 Py_CLEAR(self->decoded_chars);
1113 Py_CLEAR(self->pending_bytes);
1114 Py_CLEAR(self->snapshot);
1115 Py_CLEAR(self->errors);
1116 Py_CLEAR(self->raw);
1117 self->decoded_chars_used = 0;
1118 self->pending_bytes_count = 0;
1119 self->encodefunc = NULL;
1120 self->b2cratio = 0.0;
1121
1122 if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1123 _Py_DECLARE_STR(utf_8, "utf-8");
1124 self->encoding = Py_NewRef(&_Py_STR(utf_8));
1125 }
1126 else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1127 self->encoding = _Py_GetLocaleEncodingObject();
1128 if (self->encoding == NULL) {
1129 goto error;
1130 }
1131 assert(PyUnicode_Check(self->encoding));
1132 }
1133
1134 if (self->encoding != NULL) {
1135 encoding = PyUnicode_AsUTF8(self->encoding);
1136 if (encoding == NULL)
1137 goto error;
1138 }
1139 else if (encoding != NULL) {
1140 self->encoding = PyUnicode_FromString(encoding);
1141 if (self->encoding == NULL)
1142 goto error;
1143 }
1144 else {
1145 PyErr_SetString(PyExc_OSError,
1146 "could not determine default encoding");
1147 goto error;
1148 }
1149
1150 /* Check we have been asked for a real text encoding */
1151 codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1152 if (codec_info == NULL) {
1153 Py_CLEAR(self->encoding);
1154 goto error;
1155 }
1156
1157 /* XXX: Failures beyond this point have the potential to leak elements
1158 * of the partially constructed object (like self->encoding)
1159 */
1160
1161 Py_INCREF(errors);
1162 self->errors = errors;
1163 self->chunk_size = 8192;
1164 self->line_buffering = line_buffering;
1165 self->write_through = write_through;
1166 if (set_newline(self, newline) < 0) {
1167 goto error;
1168 }
1169
1170 self->buffer = buffer;
1171 Py_INCREF(buffer);
1172
1173 /* Build the decoder object */
1174 if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1175 goto error;
1176
1177 /* Build the encoder object */
1178 if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1179 goto error;
1180
1181 /* Finished sorting out the codec details */
1182 Py_CLEAR(codec_info);
1183
1184 if (Py_IS_TYPE(buffer, &PyBufferedReader_Type) ||
1185 Py_IS_TYPE(buffer, &PyBufferedWriter_Type) ||
1186 Py_IS_TYPE(buffer, &PyBufferedRandom_Type))
1187 {
1188 if (_PyObject_LookupAttr(buffer, &_Py_ID(raw), &raw) < 0)
1189 goto error;
1190 /* Cache the raw FileIO object to speed up 'closed' checks */
1191 if (raw != NULL) {
1192 if (Py_IS_TYPE(raw, &PyFileIO_Type))
1193 self->raw = raw;
1194 else
1195 Py_DECREF(raw);
1196 }
1197 }
1198
1199 res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1200 if (res == NULL)
1201 goto error;
1202 r = PyObject_IsTrue(res);
1203 Py_DECREF(res);
1204 if (r < 0)
1205 goto error;
1206 self->seekable = self->telling = r;
1207
1208 r = _PyObject_LookupAttr(buffer, &_Py_ID(read1), &res);
1209 if (r < 0) {
1210 goto error;
1211 }
1212 Py_XDECREF(res);
1213 self->has_read1 = r;
1214
1215 self->encoding_start_of_stream = 0;
1216 if (_textiowrapper_fix_encoder_state(self) < 0) {
1217 goto error;
1218 }
1219
1220 self->ok = 1;
1221 return 0;
1222
1223 error:
1224 Py_XDECREF(codec_info);
1225 return -1;
1226 }
1227
1228 /* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1229 * -1 on error.
1230 */
1231 static int
convert_optional_bool(PyObject * obj,int default_value)1232 convert_optional_bool(PyObject *obj, int default_value)
1233 {
1234 long v;
1235 if (obj == Py_None) {
1236 v = default_value;
1237 }
1238 else {
1239 v = PyLong_AsLong(obj);
1240 if (v == -1 && PyErr_Occurred())
1241 return -1;
1242 }
1243 return v != 0;
1244 }
1245
1246 static int
textiowrapper_change_encoding(textio * self,PyObject * encoding,PyObject * errors,int newline_changed)1247 textiowrapper_change_encoding(textio *self, PyObject *encoding,
1248 PyObject *errors, int newline_changed)
1249 {
1250 /* Use existing settings where new settings are not specified */
1251 if (encoding == Py_None && errors == Py_None && !newline_changed) {
1252 return 0; // no change
1253 }
1254
1255 if (encoding == Py_None) {
1256 encoding = self->encoding;
1257 if (errors == Py_None) {
1258 errors = self->errors;
1259 }
1260 Py_INCREF(encoding);
1261 }
1262 else {
1263 if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1264 encoding = _Py_GetLocaleEncodingObject();
1265 if (encoding == NULL) {
1266 return -1;
1267 }
1268 } else {
1269 Py_INCREF(encoding);
1270 }
1271 if (errors == Py_None) {
1272 errors = &_Py_ID(strict);
1273 }
1274 }
1275
1276 const char *c_errors = PyUnicode_AsUTF8(errors);
1277 if (c_errors == NULL) {
1278 Py_DECREF(encoding);
1279 return -1;
1280 }
1281
1282 // Create new encoder & decoder
1283 PyObject *codec_info = _PyCodec_LookupTextEncoding(
1284 PyUnicode_AsUTF8(encoding), "codecs.open()");
1285 if (codec_info == NULL) {
1286 Py_DECREF(encoding);
1287 return -1;
1288 }
1289 if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1290 _textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1291 Py_DECREF(codec_info);
1292 Py_DECREF(encoding);
1293 return -1;
1294 }
1295 Py_DECREF(codec_info);
1296
1297 Py_INCREF(errors);
1298 Py_SETREF(self->encoding, encoding);
1299 Py_SETREF(self->errors, errors);
1300
1301 return _textiowrapper_fix_encoder_state(self);
1302 }
1303
1304 /*[clinic input]
1305 _io.TextIOWrapper.reconfigure
1306 *
1307 encoding: object = None
1308 errors: object = None
1309 newline as newline_obj: object(c_default="NULL") = None
1310 line_buffering as line_buffering_obj: object = None
1311 write_through as write_through_obj: object = None
1312
1313 Reconfigure the text stream with new parameters.
1314
1315 This also does an implicit stream flush.
1316
1317 [clinic start generated code]*/
1318
1319 static PyObject *
_io_TextIOWrapper_reconfigure_impl(textio * self,PyObject * encoding,PyObject * errors,PyObject * newline_obj,PyObject * line_buffering_obj,PyObject * write_through_obj)1320 _io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1321 PyObject *errors, PyObject *newline_obj,
1322 PyObject *line_buffering_obj,
1323 PyObject *write_through_obj)
1324 /*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1325 {
1326 int line_buffering;
1327 int write_through;
1328 const char *newline = NULL;
1329
1330 /* Check if something is in the read buffer */
1331 if (self->decoded_chars != NULL) {
1332 if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1333 _unsupported("It is not possible to set the encoding or newline "
1334 "of stream after the first read");
1335 return NULL;
1336 }
1337 }
1338
1339 if (newline_obj != NULL && newline_obj != Py_None) {
1340 newline = PyUnicode_AsUTF8(newline_obj);
1341 if (newline == NULL || validate_newline(newline) < 0) {
1342 return NULL;
1343 }
1344 }
1345
1346 line_buffering = convert_optional_bool(line_buffering_obj,
1347 self->line_buffering);
1348 write_through = convert_optional_bool(write_through_obj,
1349 self->write_through);
1350 if (line_buffering < 0 || write_through < 0) {
1351 return NULL;
1352 }
1353
1354 PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
1355 if (res == NULL) {
1356 return NULL;
1357 }
1358 Py_DECREF(res);
1359 self->b2cratio = 0;
1360
1361 if (newline_obj != NULL && set_newline(self, newline) < 0) {
1362 return NULL;
1363 }
1364
1365 if (textiowrapper_change_encoding(
1366 self, encoding, errors, newline_obj != NULL) < 0) {
1367 return NULL;
1368 }
1369
1370 self->line_buffering = line_buffering;
1371 self->write_through = write_through;
1372 Py_RETURN_NONE;
1373 }
1374
1375 static int
textiowrapper_clear(textio * self)1376 textiowrapper_clear(textio *self)
1377 {
1378 self->ok = 0;
1379 Py_CLEAR(self->buffer);
1380 Py_CLEAR(self->encoding);
1381 Py_CLEAR(self->encoder);
1382 Py_CLEAR(self->decoder);
1383 Py_CLEAR(self->readnl);
1384 Py_CLEAR(self->decoded_chars);
1385 Py_CLEAR(self->pending_bytes);
1386 Py_CLEAR(self->snapshot);
1387 Py_CLEAR(self->errors);
1388 Py_CLEAR(self->raw);
1389
1390 Py_CLEAR(self->dict);
1391 return 0;
1392 }
1393
1394 static void
textiowrapper_dealloc(textio * self)1395 textiowrapper_dealloc(textio *self)
1396 {
1397 self->finalizing = 1;
1398 if (_PyIOBase_finalize((PyObject *) self) < 0)
1399 return;
1400 self->ok = 0;
1401 _PyObject_GC_UNTRACK(self);
1402 if (self->weakreflist != NULL)
1403 PyObject_ClearWeakRefs((PyObject *)self);
1404 textiowrapper_clear(self);
1405 Py_TYPE(self)->tp_free((PyObject *)self);
1406 }
1407
1408 static int
textiowrapper_traverse(textio * self,visitproc visit,void * arg)1409 textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1410 {
1411 Py_VISIT(self->buffer);
1412 Py_VISIT(self->encoding);
1413 Py_VISIT(self->encoder);
1414 Py_VISIT(self->decoder);
1415 Py_VISIT(self->readnl);
1416 Py_VISIT(self->decoded_chars);
1417 Py_VISIT(self->pending_bytes);
1418 Py_VISIT(self->snapshot);
1419 Py_VISIT(self->errors);
1420 Py_VISIT(self->raw);
1421
1422 Py_VISIT(self->dict);
1423 return 0;
1424 }
1425
1426 static PyObject *
1427 textiowrapper_closed_get(textio *self, void *context);
1428
1429 /* This macro takes some shortcuts to make the common case faster. */
1430 #define CHECK_CLOSED(self) \
1431 do { \
1432 int r; \
1433 PyObject *_res; \
1434 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) { \
1435 if (self->raw != NULL) \
1436 r = _PyFileIO_closed(self->raw); \
1437 else { \
1438 _res = textiowrapper_closed_get(self, NULL); \
1439 if (_res == NULL) \
1440 return NULL; \
1441 r = PyObject_IsTrue(_res); \
1442 Py_DECREF(_res); \
1443 if (r < 0) \
1444 return NULL; \
1445 } \
1446 if (r > 0) { \
1447 PyErr_SetString(PyExc_ValueError, \
1448 "I/O operation on closed file."); \
1449 return NULL; \
1450 } \
1451 } \
1452 else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1453 return NULL; \
1454 } while (0)
1455
1456 #define CHECK_INITIALIZED(self) \
1457 if (self->ok <= 0) { \
1458 PyErr_SetString(PyExc_ValueError, \
1459 "I/O operation on uninitialized object"); \
1460 return NULL; \
1461 }
1462
1463 #define CHECK_ATTACHED(self) \
1464 CHECK_INITIALIZED(self); \
1465 if (self->detached) { \
1466 PyErr_SetString(PyExc_ValueError, \
1467 "underlying buffer has been detached"); \
1468 return NULL; \
1469 }
1470
1471 #define CHECK_ATTACHED_INT(self) \
1472 if (self->ok <= 0) { \
1473 PyErr_SetString(PyExc_ValueError, \
1474 "I/O operation on uninitialized object"); \
1475 return -1; \
1476 } else if (self->detached) { \
1477 PyErr_SetString(PyExc_ValueError, \
1478 "underlying buffer has been detached"); \
1479 return -1; \
1480 }
1481
1482
1483 /*[clinic input]
1484 _io.TextIOWrapper.detach
1485 [clinic start generated code]*/
1486
1487 static PyObject *
_io_TextIOWrapper_detach_impl(textio * self)1488 _io_TextIOWrapper_detach_impl(textio *self)
1489 /*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1490 {
1491 PyObject *buffer, *res;
1492 CHECK_ATTACHED(self);
1493 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
1494 if (res == NULL)
1495 return NULL;
1496 Py_DECREF(res);
1497 buffer = self->buffer;
1498 self->buffer = NULL;
1499 self->detached = 1;
1500 return buffer;
1501 }
1502
1503 /* Flush the internal write buffer. This doesn't explicitly flush the
1504 underlying buffered object, though. */
1505 static int
_textiowrapper_writeflush(textio * self)1506 _textiowrapper_writeflush(textio *self)
1507 {
1508 if (self->pending_bytes == NULL)
1509 return 0;
1510
1511 PyObject *pending = self->pending_bytes;
1512 PyObject *b;
1513
1514 if (PyBytes_Check(pending)) {
1515 b = pending;
1516 Py_INCREF(b);
1517 }
1518 else if (PyUnicode_Check(pending)) {
1519 assert(PyUnicode_IS_ASCII(pending));
1520 assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1521 b = PyBytes_FromStringAndSize(
1522 PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1523 if (b == NULL) {
1524 return -1;
1525 }
1526 }
1527 else {
1528 assert(PyList_Check(pending));
1529 b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1530 if (b == NULL) {
1531 return -1;
1532 }
1533
1534 char *buf = PyBytes_AsString(b);
1535 Py_ssize_t pos = 0;
1536
1537 for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1538 PyObject *obj = PyList_GET_ITEM(pending, i);
1539 char *src;
1540 Py_ssize_t len;
1541 if (PyUnicode_Check(obj)) {
1542 assert(PyUnicode_IS_ASCII(obj));
1543 src = PyUnicode_DATA(obj);
1544 len = PyUnicode_GET_LENGTH(obj);
1545 }
1546 else {
1547 assert(PyBytes_Check(obj));
1548 if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1549 Py_DECREF(b);
1550 return -1;
1551 }
1552 }
1553 memcpy(buf + pos, src, len);
1554 pos += len;
1555 }
1556 assert(pos == self->pending_bytes_count);
1557 }
1558
1559 self->pending_bytes_count = 0;
1560 self->pending_bytes = NULL;
1561 Py_DECREF(pending);
1562
1563 PyObject *ret;
1564 do {
1565 ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1566 } while (ret == NULL && _PyIO_trap_eintr());
1567 Py_DECREF(b);
1568 // NOTE: We cleared buffer but we don't know how many bytes are actually written
1569 // when an error occurred.
1570 if (ret == NULL)
1571 return -1;
1572 Py_DECREF(ret);
1573 return 0;
1574 }
1575
1576 /*[clinic input]
1577 _io.TextIOWrapper.write
1578 text: unicode
1579 /
1580 [clinic start generated code]*/
1581
1582 static PyObject *
_io_TextIOWrapper_write_impl(textio * self,PyObject * text)1583 _io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1584 /*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1585 {
1586 PyObject *ret;
1587 PyObject *b;
1588 Py_ssize_t textlen;
1589 int haslf = 0;
1590 int needflush = 0, text_needflush = 0;
1591
1592 if (PyUnicode_READY(text) == -1)
1593 return NULL;
1594
1595 CHECK_ATTACHED(self);
1596 CHECK_CLOSED(self);
1597
1598 if (self->encoder == NULL)
1599 return _unsupported("not writable");
1600
1601 Py_INCREF(text);
1602
1603 textlen = PyUnicode_GET_LENGTH(text);
1604
1605 if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1606 if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1607 haslf = 1;
1608
1609 if (haslf && self->writetranslate && self->writenl != NULL) {
1610 PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1611 "ss", "\n", self->writenl);
1612 Py_DECREF(text);
1613 if (newtext == NULL)
1614 return NULL;
1615 text = newtext;
1616 }
1617
1618 if (self->write_through)
1619 text_needflush = 1;
1620 if (self->line_buffering &&
1621 (haslf ||
1622 PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1623 needflush = 1;
1624
1625 /* XXX What if we were just reading? */
1626 if (self->encodefunc != NULL) {
1627 if (PyUnicode_IS_ASCII(text) &&
1628 // See bpo-43260
1629 PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1630 is_asciicompat_encoding(self->encodefunc)) {
1631 b = text;
1632 Py_INCREF(b);
1633 }
1634 else {
1635 b = (*self->encodefunc)((PyObject *) self, text);
1636 }
1637 self->encoding_start_of_stream = 0;
1638 }
1639 else {
1640 b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1641 }
1642
1643 Py_DECREF(text);
1644 if (b == NULL)
1645 return NULL;
1646 if (b != text && !PyBytes_Check(b)) {
1647 PyErr_Format(PyExc_TypeError,
1648 "encoder should return a bytes object, not '%.200s'",
1649 Py_TYPE(b)->tp_name);
1650 Py_DECREF(b);
1651 return NULL;
1652 }
1653
1654 Py_ssize_t bytes_len;
1655 if (b == text) {
1656 bytes_len = PyUnicode_GET_LENGTH(b);
1657 }
1658 else {
1659 bytes_len = PyBytes_GET_SIZE(b);
1660 }
1661
1662 if (self->pending_bytes == NULL) {
1663 self->pending_bytes_count = 0;
1664 self->pending_bytes = b;
1665 }
1666 else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1667 // Prevent to concatenate more than chunk_size data.
1668 if (_textiowrapper_writeflush(self) < 0) {
1669 Py_DECREF(b);
1670 return NULL;
1671 }
1672 self->pending_bytes = b;
1673 }
1674 else if (!PyList_CheckExact(self->pending_bytes)) {
1675 PyObject *list = PyList_New(2);
1676 if (list == NULL) {
1677 Py_DECREF(b);
1678 return NULL;
1679 }
1680 PyList_SET_ITEM(list, 0, self->pending_bytes);
1681 PyList_SET_ITEM(list, 1, b);
1682 self->pending_bytes = list;
1683 }
1684 else {
1685 if (PyList_Append(self->pending_bytes, b) < 0) {
1686 Py_DECREF(b);
1687 return NULL;
1688 }
1689 Py_DECREF(b);
1690 }
1691
1692 self->pending_bytes_count += bytes_len;
1693 if (self->pending_bytes_count >= self->chunk_size || needflush ||
1694 text_needflush) {
1695 if (_textiowrapper_writeflush(self) < 0)
1696 return NULL;
1697 }
1698
1699 if (needflush) {
1700 ret = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
1701 if (ret == NULL)
1702 return NULL;
1703 Py_DECREF(ret);
1704 }
1705
1706 textiowrapper_set_decoded_chars(self, NULL);
1707 Py_CLEAR(self->snapshot);
1708
1709 if (self->decoder) {
1710 ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1711 if (ret == NULL)
1712 return NULL;
1713 Py_DECREF(ret);
1714 }
1715
1716 return PyLong_FromSsize_t(textlen);
1717 }
1718
1719 /* Steal a reference to chars and store it in the decoded_char buffer;
1720 */
1721 static void
textiowrapper_set_decoded_chars(textio * self,PyObject * chars)1722 textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1723 {
1724 Py_XSETREF(self->decoded_chars, chars);
1725 self->decoded_chars_used = 0;
1726 }
1727
1728 static PyObject *
textiowrapper_get_decoded_chars(textio * self,Py_ssize_t n)1729 textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1730 {
1731 PyObject *chars;
1732 Py_ssize_t avail;
1733
1734 if (self->decoded_chars == NULL)
1735 return PyUnicode_FromStringAndSize(NULL, 0);
1736
1737 /* decoded_chars is guaranteed to be "ready". */
1738 avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1739 - self->decoded_chars_used);
1740
1741 assert(avail >= 0);
1742
1743 if (n < 0 || n > avail)
1744 n = avail;
1745
1746 if (self->decoded_chars_used > 0 || n < avail) {
1747 chars = PyUnicode_Substring(self->decoded_chars,
1748 self->decoded_chars_used,
1749 self->decoded_chars_used + n);
1750 if (chars == NULL)
1751 return NULL;
1752 }
1753 else {
1754 chars = self->decoded_chars;
1755 Py_INCREF(chars);
1756 }
1757
1758 self->decoded_chars_used += n;
1759 return chars;
1760 }
1761
1762 /* Read and decode the next chunk of data from the BufferedReader.
1763 */
1764 static int
textiowrapper_read_chunk(textio * self,Py_ssize_t size_hint)1765 textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1766 {
1767 PyObject *dec_buffer = NULL;
1768 PyObject *dec_flags = NULL;
1769 PyObject *input_chunk = NULL;
1770 Py_buffer input_chunk_buf;
1771 PyObject *decoded_chars, *chunk_size;
1772 Py_ssize_t nbytes, nchars;
1773 int eof;
1774
1775 /* The return value is True unless EOF was reached. The decoded string is
1776 * placed in self._decoded_chars (replacing its previous value). The
1777 * entire input chunk is sent to the decoder, though some of it may remain
1778 * buffered in the decoder, yet to be converted.
1779 */
1780
1781 if (self->decoder == NULL) {
1782 _unsupported("not readable");
1783 return -1;
1784 }
1785
1786 if (self->telling) {
1787 /* To prepare for tell(), we need to snapshot a point in the file
1788 * where the decoder's input buffer is empty.
1789 */
1790 PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1791 &_Py_ID(getstate));
1792 if (state == NULL)
1793 return -1;
1794 /* Given this, we know there was a valid snapshot point
1795 * len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1796 */
1797 if (!PyTuple_Check(state)) {
1798 PyErr_SetString(PyExc_TypeError,
1799 "illegal decoder state");
1800 Py_DECREF(state);
1801 return -1;
1802 }
1803 if (!PyArg_ParseTuple(state,
1804 "OO;illegal decoder state", &dec_buffer, &dec_flags))
1805 {
1806 Py_DECREF(state);
1807 return -1;
1808 }
1809
1810 if (!PyBytes_Check(dec_buffer)) {
1811 PyErr_Format(PyExc_TypeError,
1812 "illegal decoder state: the first item should be a "
1813 "bytes object, not '%.200s'",
1814 Py_TYPE(dec_buffer)->tp_name);
1815 Py_DECREF(state);
1816 return -1;
1817 }
1818 Py_INCREF(dec_buffer);
1819 Py_INCREF(dec_flags);
1820 Py_DECREF(state);
1821 }
1822
1823 /* Read a chunk, decode it, and put the result in self._decoded_chars. */
1824 if (size_hint > 0) {
1825 size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1826 }
1827 chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1828 if (chunk_size == NULL)
1829 goto fail;
1830
1831 input_chunk = PyObject_CallMethodOneArg(self->buffer,
1832 (self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1833 chunk_size);
1834 Py_DECREF(chunk_size);
1835 if (input_chunk == NULL)
1836 goto fail;
1837
1838 if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1839 PyErr_Format(PyExc_TypeError,
1840 "underlying %s() should have returned a bytes-like object, "
1841 "not '%.200s'", (self->has_read1 ? "read1": "read"),
1842 Py_TYPE(input_chunk)->tp_name);
1843 goto fail;
1844 }
1845
1846 nbytes = input_chunk_buf.len;
1847 eof = (nbytes == 0);
1848
1849 decoded_chars = _textiowrapper_decode(self->decoder, input_chunk, eof);
1850 PyBuffer_Release(&input_chunk_buf);
1851 if (decoded_chars == NULL)
1852 goto fail;
1853
1854 textiowrapper_set_decoded_chars(self, decoded_chars);
1855 nchars = PyUnicode_GET_LENGTH(decoded_chars);
1856 if (nchars > 0)
1857 self->b2cratio = (double) nbytes / nchars;
1858 else
1859 self->b2cratio = 0.0;
1860 if (nchars > 0)
1861 eof = 0;
1862
1863 if (self->telling) {
1864 /* At the snapshot point, len(dec_buffer) bytes before the read, the
1865 * next input to be decoded is dec_buffer + input_chunk.
1866 */
1867 PyObject *next_input = dec_buffer;
1868 PyBytes_Concat(&next_input, input_chunk);
1869 dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1870 if (next_input == NULL) {
1871 goto fail;
1872 }
1873 PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1874 if (snapshot == NULL) {
1875 dec_flags = NULL;
1876 goto fail;
1877 }
1878 Py_XSETREF(self->snapshot, snapshot);
1879 }
1880 Py_DECREF(input_chunk);
1881
1882 return (eof == 0);
1883
1884 fail:
1885 Py_XDECREF(dec_buffer);
1886 Py_XDECREF(dec_flags);
1887 Py_XDECREF(input_chunk);
1888 return -1;
1889 }
1890
1891 /*[clinic input]
1892 _io.TextIOWrapper.read
1893 size as n: Py_ssize_t(accept={int, NoneType}) = -1
1894 /
1895 [clinic start generated code]*/
1896
1897 static PyObject *
_io_TextIOWrapper_read_impl(textio * self,Py_ssize_t n)1898 _io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1899 /*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1900 {
1901 PyObject *result = NULL, *chunks = NULL;
1902
1903 CHECK_ATTACHED(self);
1904 CHECK_CLOSED(self);
1905
1906 if (self->decoder == NULL)
1907 return _unsupported("not readable");
1908
1909 if (_textiowrapper_writeflush(self) < 0)
1910 return NULL;
1911
1912 if (n < 0) {
1913 /* Read everything */
1914 PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
1915 PyObject *decoded;
1916 if (bytes == NULL)
1917 goto fail;
1918
1919 if (Py_IS_TYPE(self->decoder, &PyIncrementalNewlineDecoder_Type))
1920 decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1921 bytes, 1);
1922 else
1923 decoded = PyObject_CallMethodObjArgs(
1924 self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
1925 Py_DECREF(bytes);
1926 if (check_decoded(decoded) < 0)
1927 goto fail;
1928
1929 result = textiowrapper_get_decoded_chars(self, -1);
1930
1931 if (result == NULL) {
1932 Py_DECREF(decoded);
1933 return NULL;
1934 }
1935
1936 PyUnicode_AppendAndDel(&result, decoded);
1937 if (result == NULL)
1938 goto fail;
1939
1940 textiowrapper_set_decoded_chars(self, NULL);
1941 Py_CLEAR(self->snapshot);
1942 return result;
1943 }
1944 else {
1945 int res = 1;
1946 Py_ssize_t remaining = n;
1947
1948 result = textiowrapper_get_decoded_chars(self, n);
1949 if (result == NULL)
1950 goto fail;
1951 if (PyUnicode_READY(result) == -1)
1952 goto fail;
1953 remaining -= PyUnicode_GET_LENGTH(result);
1954
1955 /* Keep reading chunks until we have n characters to return */
1956 while (remaining > 0) {
1957 res = textiowrapper_read_chunk(self, remaining);
1958 if (res < 0) {
1959 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1960 when EINTR occurs so we needn't do it ourselves. */
1961 if (_PyIO_trap_eintr()) {
1962 continue;
1963 }
1964 goto fail;
1965 }
1966 if (res == 0) /* EOF */
1967 break;
1968 if (chunks == NULL) {
1969 chunks = PyList_New(0);
1970 if (chunks == NULL)
1971 goto fail;
1972 }
1973 if (PyUnicode_GET_LENGTH(result) > 0 &&
1974 PyList_Append(chunks, result) < 0)
1975 goto fail;
1976 Py_DECREF(result);
1977 result = textiowrapper_get_decoded_chars(self, remaining);
1978 if (result == NULL)
1979 goto fail;
1980 remaining -= PyUnicode_GET_LENGTH(result);
1981 }
1982 if (chunks != NULL) {
1983 if (result != NULL && PyList_Append(chunks, result) < 0)
1984 goto fail;
1985 _Py_DECLARE_STR(empty, "");
1986 Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
1987 if (result == NULL)
1988 goto fail;
1989 Py_CLEAR(chunks);
1990 }
1991 return result;
1992 }
1993 fail:
1994 Py_XDECREF(result);
1995 Py_XDECREF(chunks);
1996 return NULL;
1997 }
1998
1999
2000 /* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2001 that is to the NUL character. Otherwise the function will produce
2002 incorrect results. */
2003 static const char *
find_control_char(int kind,const char * s,const char * end,Py_UCS4 ch)2004 find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2005 {
2006 if (kind == PyUnicode_1BYTE_KIND) {
2007 assert(ch < 256);
2008 return (char *) memchr((const void *) s, (char) ch, end - s);
2009 }
2010 for (;;) {
2011 while (PyUnicode_READ(kind, s, 0) > ch)
2012 s += kind;
2013 if (PyUnicode_READ(kind, s, 0) == ch)
2014 return s;
2015 if (s == end)
2016 return NULL;
2017 s += kind;
2018 }
2019 }
2020
2021 Py_ssize_t
_PyIO_find_line_ending(int translated,int universal,PyObject * readnl,int kind,const char * start,const char * end,Py_ssize_t * consumed)2022 _PyIO_find_line_ending(
2023 int translated, int universal, PyObject *readnl,
2024 int kind, const char *start, const char *end, Py_ssize_t *consumed)
2025 {
2026 Py_ssize_t len = (end - start)/kind;
2027
2028 if (translated) {
2029 /* Newlines are already translated, only search for \n */
2030 const char *pos = find_control_char(kind, start, end, '\n');
2031 if (pos != NULL)
2032 return (pos - start)/kind + 1;
2033 else {
2034 *consumed = len;
2035 return -1;
2036 }
2037 }
2038 else if (universal) {
2039 /* Universal newline search. Find any of \r, \r\n, \n
2040 * The decoder ensures that \r\n are not split in two pieces
2041 */
2042 const char *s = start;
2043 for (;;) {
2044 Py_UCS4 ch;
2045 /* Fast path for non-control chars. The loop always ends
2046 since the Unicode string is NUL-terminated. */
2047 while (PyUnicode_READ(kind, s, 0) > '\r')
2048 s += kind;
2049 if (s >= end) {
2050 *consumed = len;
2051 return -1;
2052 }
2053 ch = PyUnicode_READ(kind, s, 0);
2054 s += kind;
2055 if (ch == '\n')
2056 return (s - start)/kind;
2057 if (ch == '\r') {
2058 if (PyUnicode_READ(kind, s, 0) == '\n')
2059 return (s - start)/kind + 1;
2060 else
2061 return (s - start)/kind;
2062 }
2063 }
2064 }
2065 else {
2066 /* Non-universal mode. */
2067 Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2068 const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2069 /* Assume that readnl is an ASCII character. */
2070 assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2071 if (readnl_len == 1) {
2072 const char *pos = find_control_char(kind, start, end, nl[0]);
2073 if (pos != NULL)
2074 return (pos - start)/kind + 1;
2075 *consumed = len;
2076 return -1;
2077 }
2078 else {
2079 const char *s = start;
2080 const char *e = end - (readnl_len - 1)*kind;
2081 const char *pos;
2082 if (e < s)
2083 e = s;
2084 while (s < e) {
2085 Py_ssize_t i;
2086 const char *pos = find_control_char(kind, s, end, nl[0]);
2087 if (pos == NULL || pos >= e)
2088 break;
2089 for (i = 1; i < readnl_len; i++) {
2090 if (PyUnicode_READ(kind, pos, i) != nl[i])
2091 break;
2092 }
2093 if (i == readnl_len)
2094 return (pos - start)/kind + readnl_len;
2095 s = pos + kind;
2096 }
2097 pos = find_control_char(kind, e, end, nl[0]);
2098 if (pos == NULL)
2099 *consumed = len;
2100 else
2101 *consumed = (pos - start)/kind;
2102 return -1;
2103 }
2104 }
2105 }
2106
2107 static PyObject *
_textiowrapper_readline(textio * self,Py_ssize_t limit)2108 _textiowrapper_readline(textio *self, Py_ssize_t limit)
2109 {
2110 PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2111 Py_ssize_t start, endpos, chunked, offset_to_buffer;
2112 int res;
2113
2114 CHECK_CLOSED(self);
2115
2116 if (_textiowrapper_writeflush(self) < 0)
2117 return NULL;
2118
2119 chunked = 0;
2120
2121 while (1) {
2122 const char *ptr;
2123 Py_ssize_t line_len;
2124 int kind;
2125 Py_ssize_t consumed = 0;
2126
2127 /* First, get some data if necessary */
2128 res = 1;
2129 while (!self->decoded_chars ||
2130 !PyUnicode_GET_LENGTH(self->decoded_chars)) {
2131 res = textiowrapper_read_chunk(self, 0);
2132 if (res < 0) {
2133 /* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2134 when EINTR occurs so we needn't do it ourselves. */
2135 if (_PyIO_trap_eintr()) {
2136 continue;
2137 }
2138 goto error;
2139 }
2140 if (res == 0)
2141 break;
2142 }
2143 if (res == 0) {
2144 /* end of file */
2145 textiowrapper_set_decoded_chars(self, NULL);
2146 Py_CLEAR(self->snapshot);
2147 start = endpos = offset_to_buffer = 0;
2148 break;
2149 }
2150
2151 if (remaining == NULL) {
2152 line = self->decoded_chars;
2153 start = self->decoded_chars_used;
2154 offset_to_buffer = 0;
2155 Py_INCREF(line);
2156 }
2157 else {
2158 assert(self->decoded_chars_used == 0);
2159 line = PyUnicode_Concat(remaining, self->decoded_chars);
2160 start = 0;
2161 offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2162 Py_CLEAR(remaining);
2163 if (line == NULL)
2164 goto error;
2165 if (PyUnicode_READY(line) == -1)
2166 goto error;
2167 }
2168
2169 ptr = PyUnicode_DATA(line);
2170 line_len = PyUnicode_GET_LENGTH(line);
2171 kind = PyUnicode_KIND(line);
2172
2173 endpos = _PyIO_find_line_ending(
2174 self->readtranslate, self->readuniversal, self->readnl,
2175 kind,
2176 ptr + kind * start,
2177 ptr + kind * line_len,
2178 &consumed);
2179 if (endpos >= 0) {
2180 endpos += start;
2181 if (limit >= 0 && (endpos - start) + chunked >= limit)
2182 endpos = start + limit - chunked;
2183 break;
2184 }
2185
2186 /* We can put aside up to `endpos` */
2187 endpos = consumed + start;
2188 if (limit >= 0 && (endpos - start) + chunked >= limit) {
2189 /* Didn't find line ending, but reached length limit */
2190 endpos = start + limit - chunked;
2191 break;
2192 }
2193
2194 if (endpos > start) {
2195 /* No line ending seen yet - put aside current data */
2196 PyObject *s;
2197 if (chunks == NULL) {
2198 chunks = PyList_New(0);
2199 if (chunks == NULL)
2200 goto error;
2201 }
2202 s = PyUnicode_Substring(line, start, endpos);
2203 if (s == NULL)
2204 goto error;
2205 if (PyList_Append(chunks, s) < 0) {
2206 Py_DECREF(s);
2207 goto error;
2208 }
2209 chunked += PyUnicode_GET_LENGTH(s);
2210 Py_DECREF(s);
2211 }
2212 /* There may be some remaining bytes we'll have to prepend to the
2213 next chunk of data */
2214 if (endpos < line_len) {
2215 remaining = PyUnicode_Substring(line, endpos, line_len);
2216 if (remaining == NULL)
2217 goto error;
2218 }
2219 Py_CLEAR(line);
2220 /* We have consumed the buffer */
2221 textiowrapper_set_decoded_chars(self, NULL);
2222 }
2223
2224 if (line != NULL) {
2225 /* Our line ends in the current buffer */
2226 self->decoded_chars_used = endpos - offset_to_buffer;
2227 if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2228 PyObject *s = PyUnicode_Substring(line, start, endpos);
2229 Py_CLEAR(line);
2230 if (s == NULL)
2231 goto error;
2232 line = s;
2233 }
2234 }
2235 if (remaining != NULL) {
2236 if (chunks == NULL) {
2237 chunks = PyList_New(0);
2238 if (chunks == NULL)
2239 goto error;
2240 }
2241 if (PyList_Append(chunks, remaining) < 0)
2242 goto error;
2243 Py_CLEAR(remaining);
2244 }
2245 if (chunks != NULL) {
2246 if (line != NULL) {
2247 if (PyList_Append(chunks, line) < 0)
2248 goto error;
2249 Py_DECREF(line);
2250 }
2251 line = PyUnicode_Join(&_Py_STR(empty), chunks);
2252 if (line == NULL)
2253 goto error;
2254 Py_CLEAR(chunks);
2255 }
2256 if (line == NULL) {
2257 line = Py_NewRef(&_Py_STR(empty));
2258 }
2259
2260 return line;
2261
2262 error:
2263 Py_XDECREF(chunks);
2264 Py_XDECREF(remaining);
2265 Py_XDECREF(line);
2266 return NULL;
2267 }
2268
2269 /*[clinic input]
2270 _io.TextIOWrapper.readline
2271 size: Py_ssize_t = -1
2272 /
2273 [clinic start generated code]*/
2274
2275 static PyObject *
_io_TextIOWrapper_readline_impl(textio * self,Py_ssize_t size)2276 _io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2277 /*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2278 {
2279 CHECK_ATTACHED(self);
2280 return _textiowrapper_readline(self, size);
2281 }
2282
2283 /* Seek and Tell */
2284
2285 typedef struct {
2286 Py_off_t start_pos;
2287 int dec_flags;
2288 int bytes_to_feed;
2289 int chars_to_skip;
2290 char need_eof;
2291 } cookie_type;
2292
2293 /*
2294 To speed up cookie packing/unpacking, we store the fields in a temporary
2295 string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2296 The following macros define at which offsets in the intermediary byte
2297 string the various CookieStruct fields will be stored.
2298 */
2299
2300 #define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2301
2302 #if PY_BIG_ENDIAN
2303 /* We want the least significant byte of start_pos to also be the least
2304 significant byte of the cookie, which means that in big-endian mode we
2305 must copy the fields in reverse order. */
2306
2307 # define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2308 # define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2309 # define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2310 # define OFF_CHARS_TO_SKIP (sizeof(char))
2311 # define OFF_NEED_EOF 0
2312
2313 #else
2314 /* Little-endian mode: the least significant byte of start_pos will
2315 naturally end up the least significant byte of the cookie. */
2316
2317 # define OFF_START_POS 0
2318 # define OFF_DEC_FLAGS (sizeof(Py_off_t))
2319 # define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2320 # define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2321 # define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2322
2323 #endif
2324
2325 static int
textiowrapper_parse_cookie(cookie_type * cookie,PyObject * cookieObj)2326 textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2327 {
2328 unsigned char buffer[COOKIE_BUF_LEN];
2329 PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2330 if (cookieLong == NULL)
2331 return -1;
2332
2333 if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2334 PY_LITTLE_ENDIAN, 0) < 0) {
2335 Py_DECREF(cookieLong);
2336 return -1;
2337 }
2338 Py_DECREF(cookieLong);
2339
2340 memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2341 memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2342 memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2343 memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2344 memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2345
2346 return 0;
2347 }
2348
2349 static PyObject *
textiowrapper_build_cookie(cookie_type * cookie)2350 textiowrapper_build_cookie(cookie_type *cookie)
2351 {
2352 unsigned char buffer[COOKIE_BUF_LEN];
2353
2354 memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2355 memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2356 memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2357 memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2358 memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2359
2360 return _PyLong_FromByteArray(buffer, sizeof(buffer),
2361 PY_LITTLE_ENDIAN, 0);
2362 }
2363
2364 static int
_textiowrapper_decoder_setstate(textio * self,cookie_type * cookie)2365 _textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2366 {
2367 PyObject *res;
2368 /* When seeking to the start of the stream, we call decoder.reset()
2369 rather than decoder.getstate().
2370 This is for a few decoders such as utf-16 for which the state value
2371 at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2372 utf-16, that we are expecting a BOM).
2373 */
2374 if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2375 res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2376 }
2377 else {
2378 res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2379 "((yi))", "", cookie->dec_flags);
2380 }
2381 if (res == NULL) {
2382 return -1;
2383 }
2384 Py_DECREF(res);
2385 return 0;
2386 }
2387
2388 static int
_textiowrapper_encoder_reset(textio * self,int start_of_stream)2389 _textiowrapper_encoder_reset(textio *self, int start_of_stream)
2390 {
2391 PyObject *res;
2392 if (start_of_stream) {
2393 res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2394 self->encoding_start_of_stream = 1;
2395 }
2396 else {
2397 res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2398 _PyLong_GetZero());
2399 self->encoding_start_of_stream = 0;
2400 }
2401 if (res == NULL)
2402 return -1;
2403 Py_DECREF(res);
2404 return 0;
2405 }
2406
2407 static int
_textiowrapper_encoder_setstate(textio * self,cookie_type * cookie)2408 _textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2409 {
2410 /* Same as _textiowrapper_decoder_setstate() above. */
2411 return _textiowrapper_encoder_reset(
2412 self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2413 }
2414
2415 /*[clinic input]
2416 _io.TextIOWrapper.seek
2417 cookie as cookieObj: object
2418 whence: int = 0
2419 /
2420 [clinic start generated code]*/
2421
2422 static PyObject *
_io_TextIOWrapper_seek_impl(textio * self,PyObject * cookieObj,int whence)2423 _io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2424 /*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2425 {
2426 PyObject *posobj;
2427 cookie_type cookie;
2428 PyObject *res;
2429 int cmp;
2430 PyObject *snapshot;
2431
2432 CHECK_ATTACHED(self);
2433 CHECK_CLOSED(self);
2434
2435 Py_INCREF(cookieObj);
2436
2437 if (!self->seekable) {
2438 _unsupported("underlying stream is not seekable");
2439 goto fail;
2440 }
2441
2442 PyObject *zero = _PyLong_GetZero(); // borrowed reference
2443
2444 switch (whence) {
2445 case SEEK_CUR:
2446 /* seek relative to current position */
2447 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2448 if (cmp < 0)
2449 goto fail;
2450
2451 if (cmp == 0) {
2452 _unsupported("can't do nonzero cur-relative seeks");
2453 goto fail;
2454 }
2455
2456 /* Seeking to the current position should attempt to
2457 * sync the underlying buffer with the current position.
2458 */
2459 Py_DECREF(cookieObj);
2460 cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2461 if (cookieObj == NULL)
2462 goto fail;
2463 break;
2464
2465 case SEEK_END:
2466 /* seek relative to end of file */
2467 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2468 if (cmp < 0)
2469 goto fail;
2470
2471 if (cmp == 0) {
2472 _unsupported("can't do nonzero end-relative seeks");
2473 goto fail;
2474 }
2475
2476 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2477 if (res == NULL)
2478 goto fail;
2479 Py_DECREF(res);
2480
2481 textiowrapper_set_decoded_chars(self, NULL);
2482 Py_CLEAR(self->snapshot);
2483 if (self->decoder) {
2484 res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2485 if (res == NULL)
2486 goto fail;
2487 Py_DECREF(res);
2488 }
2489
2490 res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2491 Py_CLEAR(cookieObj);
2492 if (res == NULL)
2493 goto fail;
2494 if (self->encoder) {
2495 /* If seek() == 0, we are at the start of stream, otherwise not */
2496 cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2497 if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2498 Py_DECREF(res);
2499 goto fail;
2500 }
2501 }
2502 return res;
2503
2504 case SEEK_SET:
2505 break;
2506
2507 default:
2508 PyErr_Format(PyExc_ValueError,
2509 "invalid whence (%d, should be %d, %d or %d)", whence,
2510 SEEK_SET, SEEK_CUR, SEEK_END);
2511 goto fail;
2512 }
2513
2514 cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2515 if (cmp < 0)
2516 goto fail;
2517
2518 if (cmp == 1) {
2519 PyErr_Format(PyExc_ValueError,
2520 "negative seek position %R", cookieObj);
2521 goto fail;
2522 }
2523
2524 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2525 if (res == NULL)
2526 goto fail;
2527 Py_DECREF(res);
2528
2529 /* The strategy of seek() is to go back to the safe start point
2530 * and replay the effect of read(chars_to_skip) from there.
2531 */
2532 if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2533 goto fail;
2534
2535 /* Seek back to the safe start point. */
2536 posobj = PyLong_FromOff_t(cookie.start_pos);
2537 if (posobj == NULL)
2538 goto fail;
2539 res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2540 Py_DECREF(posobj);
2541 if (res == NULL)
2542 goto fail;
2543 Py_DECREF(res);
2544
2545 textiowrapper_set_decoded_chars(self, NULL);
2546 Py_CLEAR(self->snapshot);
2547
2548 /* Restore the decoder to its state from the safe start point. */
2549 if (self->decoder) {
2550 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2551 goto fail;
2552 }
2553
2554 if (cookie.chars_to_skip) {
2555 /* Just like _read_chunk, feed the decoder and save a snapshot. */
2556 PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2557 "i", cookie.bytes_to_feed);
2558 PyObject *decoded;
2559
2560 if (input_chunk == NULL)
2561 goto fail;
2562
2563 if (!PyBytes_Check(input_chunk)) {
2564 PyErr_Format(PyExc_TypeError,
2565 "underlying read() should have returned a bytes "
2566 "object, not '%.200s'",
2567 Py_TYPE(input_chunk)->tp_name);
2568 Py_DECREF(input_chunk);
2569 goto fail;
2570 }
2571
2572 snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2573 if (snapshot == NULL) {
2574 goto fail;
2575 }
2576 Py_XSETREF(self->snapshot, snapshot);
2577
2578 decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2579 input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2580
2581 if (check_decoded(decoded) < 0)
2582 goto fail;
2583
2584 textiowrapper_set_decoded_chars(self, decoded);
2585
2586 /* Skip chars_to_skip of the decoded characters. */
2587 if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2588 PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2589 goto fail;
2590 }
2591 self->decoded_chars_used = cookie.chars_to_skip;
2592 }
2593 else {
2594 snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2595 if (snapshot == NULL)
2596 goto fail;
2597 Py_XSETREF(self->snapshot, snapshot);
2598 }
2599
2600 /* Finally, reset the encoder (merely useful for proper BOM handling) */
2601 if (self->encoder) {
2602 if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2603 goto fail;
2604 }
2605 return cookieObj;
2606 fail:
2607 Py_XDECREF(cookieObj);
2608 return NULL;
2609
2610 }
2611
2612 /*[clinic input]
2613 _io.TextIOWrapper.tell
2614 [clinic start generated code]*/
2615
2616 static PyObject *
_io_TextIOWrapper_tell_impl(textio * self)2617 _io_TextIOWrapper_tell_impl(textio *self)
2618 /*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2619 {
2620 PyObject *res;
2621 PyObject *posobj = NULL;
2622 cookie_type cookie = {0,0,0,0,0};
2623 PyObject *next_input;
2624 Py_ssize_t chars_to_skip, chars_decoded;
2625 Py_ssize_t skip_bytes, skip_back;
2626 PyObject *saved_state = NULL;
2627 const char *input, *input_end;
2628 Py_ssize_t dec_buffer_len;
2629 int dec_flags;
2630
2631 CHECK_ATTACHED(self);
2632 CHECK_CLOSED(self);
2633
2634 if (!self->seekable) {
2635 _unsupported("underlying stream is not seekable");
2636 goto fail;
2637 }
2638 if (!self->telling) {
2639 PyErr_SetString(PyExc_OSError,
2640 "telling position disabled by next() call");
2641 goto fail;
2642 }
2643
2644 if (_textiowrapper_writeflush(self) < 0)
2645 return NULL;
2646 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2647 if (res == NULL)
2648 goto fail;
2649 Py_DECREF(res);
2650
2651 posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2652 if (posobj == NULL)
2653 goto fail;
2654
2655 if (self->decoder == NULL || self->snapshot == NULL) {
2656 assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2657 return posobj;
2658 }
2659
2660 #if defined(HAVE_LARGEFILE_SUPPORT)
2661 cookie.start_pos = PyLong_AsLongLong(posobj);
2662 #else
2663 cookie.start_pos = PyLong_AsLong(posobj);
2664 #endif
2665 Py_DECREF(posobj);
2666 if (PyErr_Occurred())
2667 goto fail;
2668
2669 /* Skip backward to the snapshot point (see _read_chunk). */
2670 assert(PyTuple_Check(self->snapshot));
2671 if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2672 goto fail;
2673
2674 assert (PyBytes_Check(next_input));
2675
2676 cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2677
2678 /* How many decoded characters have been used up since the snapshot? */
2679 if (self->decoded_chars_used == 0) {
2680 /* We haven't moved from the snapshot point. */
2681 return textiowrapper_build_cookie(&cookie);
2682 }
2683
2684 chars_to_skip = self->decoded_chars_used;
2685
2686 /* Decoder state will be restored at the end */
2687 saved_state = PyObject_CallMethodNoArgs(self->decoder,
2688 &_Py_ID(getstate));
2689 if (saved_state == NULL)
2690 goto fail;
2691
2692 #define DECODER_GETSTATE() do { \
2693 PyObject *dec_buffer; \
2694 PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2695 &_Py_ID(getstate)); \
2696 if (_state == NULL) \
2697 goto fail; \
2698 if (!PyTuple_Check(_state)) { \
2699 PyErr_SetString(PyExc_TypeError, \
2700 "illegal decoder state"); \
2701 Py_DECREF(_state); \
2702 goto fail; \
2703 } \
2704 if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2705 &dec_buffer, &dec_flags)) \
2706 { \
2707 Py_DECREF(_state); \
2708 goto fail; \
2709 } \
2710 if (!PyBytes_Check(dec_buffer)) { \
2711 PyErr_Format(PyExc_TypeError, \
2712 "illegal decoder state: the first item should be a " \
2713 "bytes object, not '%.200s'", \
2714 Py_TYPE(dec_buffer)->tp_name); \
2715 Py_DECREF(_state); \
2716 goto fail; \
2717 } \
2718 dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2719 Py_DECREF(_state); \
2720 } while (0)
2721
2722 #define DECODER_DECODE(start, len, res) do { \
2723 PyObject *_decoded = _PyObject_CallMethod( \
2724 self->decoder, &_Py_ID(decode), "y#", start, len); \
2725 if (check_decoded(_decoded) < 0) \
2726 goto fail; \
2727 res = PyUnicode_GET_LENGTH(_decoded); \
2728 Py_DECREF(_decoded); \
2729 } while (0)
2730
2731 /* Fast search for an acceptable start point, close to our
2732 current pos */
2733 skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2734 skip_back = 1;
2735 assert(skip_back <= PyBytes_GET_SIZE(next_input));
2736 input = PyBytes_AS_STRING(next_input);
2737 while (skip_bytes > 0) {
2738 /* Decode up to temptative start point */
2739 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2740 goto fail;
2741 DECODER_DECODE(input, skip_bytes, chars_decoded);
2742 if (chars_decoded <= chars_to_skip) {
2743 DECODER_GETSTATE();
2744 if (dec_buffer_len == 0) {
2745 /* Before pos and no bytes buffered in decoder => OK */
2746 cookie.dec_flags = dec_flags;
2747 chars_to_skip -= chars_decoded;
2748 break;
2749 }
2750 /* Skip back by buffered amount and reset heuristic */
2751 skip_bytes -= dec_buffer_len;
2752 skip_back = 1;
2753 }
2754 else {
2755 /* We're too far ahead, skip back a bit */
2756 skip_bytes -= skip_back;
2757 skip_back *= 2;
2758 }
2759 }
2760 if (skip_bytes <= 0) {
2761 skip_bytes = 0;
2762 if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2763 goto fail;
2764 }
2765
2766 /* Note our initial start point. */
2767 cookie.start_pos += skip_bytes;
2768 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2769 if (chars_to_skip == 0)
2770 goto finally;
2771
2772 /* We should be close to the desired position. Now feed the decoder one
2773 * byte at a time until we reach the `chars_to_skip` target.
2774 * As we go, note the nearest "safe start point" before the current
2775 * location (a point where the decoder has nothing buffered, so seek()
2776 * can safely start from there and advance to this location).
2777 */
2778 chars_decoded = 0;
2779 input = PyBytes_AS_STRING(next_input);
2780 input_end = input + PyBytes_GET_SIZE(next_input);
2781 input += skip_bytes;
2782 while (input < input_end) {
2783 Py_ssize_t n;
2784
2785 DECODER_DECODE(input, (Py_ssize_t)1, n);
2786 /* We got n chars for 1 byte */
2787 chars_decoded += n;
2788 cookie.bytes_to_feed += 1;
2789 DECODER_GETSTATE();
2790
2791 if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2792 /* Decoder buffer is empty, so this is a safe start point. */
2793 cookie.start_pos += cookie.bytes_to_feed;
2794 chars_to_skip -= chars_decoded;
2795 cookie.dec_flags = dec_flags;
2796 cookie.bytes_to_feed = 0;
2797 chars_decoded = 0;
2798 }
2799 if (chars_decoded >= chars_to_skip)
2800 break;
2801 input++;
2802 }
2803 if (input == input_end) {
2804 /* We didn't get enough decoded data; signal EOF to get more. */
2805 PyObject *decoded = _PyObject_CallMethod(
2806 self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2807 if (check_decoded(decoded) < 0)
2808 goto fail;
2809 chars_decoded += PyUnicode_GET_LENGTH(decoded);
2810 Py_DECREF(decoded);
2811 cookie.need_eof = 1;
2812
2813 if (chars_decoded < chars_to_skip) {
2814 PyErr_SetString(PyExc_OSError,
2815 "can't reconstruct logical file position");
2816 goto fail;
2817 }
2818 }
2819
2820 finally:
2821 res = PyObject_CallMethodOneArg(
2822 self->decoder, &_Py_ID(setstate), saved_state);
2823 Py_DECREF(saved_state);
2824 if (res == NULL)
2825 return NULL;
2826 Py_DECREF(res);
2827
2828 /* The returned cookie corresponds to the last safe start point. */
2829 cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2830 return textiowrapper_build_cookie(&cookie);
2831
2832 fail:
2833 if (saved_state) {
2834 PyObject *type, *value, *traceback;
2835 PyErr_Fetch(&type, &value, &traceback);
2836 res = PyObject_CallMethodOneArg(
2837 self->decoder, &_Py_ID(setstate), saved_state);
2838 _PyErr_ChainExceptions(type, value, traceback);
2839 Py_DECREF(saved_state);
2840 Py_XDECREF(res);
2841 }
2842 return NULL;
2843 }
2844
2845 /*[clinic input]
2846 _io.TextIOWrapper.truncate
2847 pos: object = None
2848 /
2849 [clinic start generated code]*/
2850
2851 static PyObject *
_io_TextIOWrapper_truncate_impl(textio * self,PyObject * pos)2852 _io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2853 /*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2854 {
2855 PyObject *res;
2856
2857 CHECK_ATTACHED(self)
2858
2859 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2860 if (res == NULL)
2861 return NULL;
2862 Py_DECREF(res);
2863
2864 return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2865 }
2866
2867 static PyObject *
textiowrapper_repr(textio * self)2868 textiowrapper_repr(textio *self)
2869 {
2870 PyObject *nameobj, *modeobj, *res, *s;
2871 int status;
2872
2873 CHECK_INITIALIZED(self);
2874
2875 res = PyUnicode_FromString("<_io.TextIOWrapper");
2876 if (res == NULL)
2877 return NULL;
2878
2879 status = Py_ReprEnter((PyObject *)self);
2880 if (status != 0) {
2881 if (status > 0) {
2882 PyErr_Format(PyExc_RuntimeError,
2883 "reentrant call inside %s.__repr__",
2884 Py_TYPE(self)->tp_name);
2885 }
2886 goto error;
2887 }
2888 if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) {
2889 if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2890 goto error;
2891 }
2892 /* Ignore ValueError raised if the underlying stream was detached */
2893 PyErr_Clear();
2894 }
2895 if (nameobj != NULL) {
2896 s = PyUnicode_FromFormat(" name=%R", nameobj);
2897 Py_DECREF(nameobj);
2898 if (s == NULL)
2899 goto error;
2900 PyUnicode_AppendAndDel(&res, s);
2901 if (res == NULL)
2902 goto error;
2903 }
2904 if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) {
2905 goto error;
2906 }
2907 if (modeobj != NULL) {
2908 s = PyUnicode_FromFormat(" mode=%R", modeobj);
2909 Py_DECREF(modeobj);
2910 if (s == NULL)
2911 goto error;
2912 PyUnicode_AppendAndDel(&res, s);
2913 if (res == NULL)
2914 goto error;
2915 }
2916 s = PyUnicode_FromFormat("%U encoding=%R>",
2917 res, self->encoding);
2918 Py_DECREF(res);
2919 if (status == 0) {
2920 Py_ReprLeave((PyObject *)self);
2921 }
2922 return s;
2923
2924 error:
2925 Py_XDECREF(res);
2926 if (status == 0) {
2927 Py_ReprLeave((PyObject *)self);
2928 }
2929 return NULL;
2930 }
2931
2932
2933 /* Inquiries */
2934
2935 /*[clinic input]
2936 _io.TextIOWrapper.fileno
2937 [clinic start generated code]*/
2938
2939 static PyObject *
_io_TextIOWrapper_fileno_impl(textio * self)2940 _io_TextIOWrapper_fileno_impl(textio *self)
2941 /*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2942 {
2943 CHECK_ATTACHED(self);
2944 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
2945 }
2946
2947 /*[clinic input]
2948 _io.TextIOWrapper.seekable
2949 [clinic start generated code]*/
2950
2951 static PyObject *
_io_TextIOWrapper_seekable_impl(textio * self)2952 _io_TextIOWrapper_seekable_impl(textio *self)
2953 /*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2954 {
2955 CHECK_ATTACHED(self);
2956 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
2957 }
2958
2959 /*[clinic input]
2960 _io.TextIOWrapper.readable
2961 [clinic start generated code]*/
2962
2963 static PyObject *
_io_TextIOWrapper_readable_impl(textio * self)2964 _io_TextIOWrapper_readable_impl(textio *self)
2965 /*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2966 {
2967 CHECK_ATTACHED(self);
2968 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
2969 }
2970
2971 /*[clinic input]
2972 _io.TextIOWrapper.writable
2973 [clinic start generated code]*/
2974
2975 static PyObject *
_io_TextIOWrapper_writable_impl(textio * self)2976 _io_TextIOWrapper_writable_impl(textio *self)
2977 /*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2978 {
2979 CHECK_ATTACHED(self);
2980 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
2981 }
2982
2983 /*[clinic input]
2984 _io.TextIOWrapper.isatty
2985 [clinic start generated code]*/
2986
2987 static PyObject *
_io_TextIOWrapper_isatty_impl(textio * self)2988 _io_TextIOWrapper_isatty_impl(textio *self)
2989 /*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
2990 {
2991 CHECK_ATTACHED(self);
2992 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
2993 }
2994
2995 /*[clinic input]
2996 _io.TextIOWrapper.flush
2997 [clinic start generated code]*/
2998
2999 static PyObject *
_io_TextIOWrapper_flush_impl(textio * self)3000 _io_TextIOWrapper_flush_impl(textio *self)
3001 /*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
3002 {
3003 CHECK_ATTACHED(self);
3004 CHECK_CLOSED(self);
3005 self->telling = self->seekable;
3006 if (_textiowrapper_writeflush(self) < 0)
3007 return NULL;
3008 return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3009 }
3010
3011 /*[clinic input]
3012 _io.TextIOWrapper.close
3013 [clinic start generated code]*/
3014
3015 static PyObject *
_io_TextIOWrapper_close_impl(textio * self)3016 _io_TextIOWrapper_close_impl(textio *self)
3017 /*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3018 {
3019 PyObject *res;
3020 int r;
3021 CHECK_ATTACHED(self);
3022
3023 res = textiowrapper_closed_get(self, NULL);
3024 if (res == NULL)
3025 return NULL;
3026 r = PyObject_IsTrue(res);
3027 Py_DECREF(res);
3028 if (r < 0)
3029 return NULL;
3030
3031 if (r > 0) {
3032 Py_RETURN_NONE; /* stream already closed */
3033 }
3034 else {
3035 PyObject *exc = NULL, *val, *tb;
3036 if (self->finalizing) {
3037 res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3038 (PyObject *)self);
3039 if (res)
3040 Py_DECREF(res);
3041 else
3042 PyErr_Clear();
3043 }
3044 res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
3045 if (res == NULL)
3046 PyErr_Fetch(&exc, &val, &tb);
3047 else
3048 Py_DECREF(res);
3049
3050 res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3051 if (exc != NULL) {
3052 _PyErr_ChainExceptions(exc, val, tb);
3053 Py_CLEAR(res);
3054 }
3055 return res;
3056 }
3057 }
3058
3059 static PyObject *
textiowrapper_iternext(textio * self)3060 textiowrapper_iternext(textio *self)
3061 {
3062 PyObject *line;
3063
3064 CHECK_ATTACHED(self);
3065
3066 self->telling = 0;
3067 if (Py_IS_TYPE(self, &PyTextIOWrapper_Type)) {
3068 /* Skip method call overhead for speed */
3069 line = _textiowrapper_readline(self, -1);
3070 }
3071 else {
3072 line = PyObject_CallMethodNoArgs((PyObject *)self,
3073 &_Py_ID(readline));
3074 if (line && !PyUnicode_Check(line)) {
3075 PyErr_Format(PyExc_OSError,
3076 "readline() should have returned a str object, "
3077 "not '%.200s'", Py_TYPE(line)->tp_name);
3078 Py_DECREF(line);
3079 return NULL;
3080 }
3081 }
3082
3083 if (line == NULL || PyUnicode_READY(line) == -1)
3084 return NULL;
3085
3086 if (PyUnicode_GET_LENGTH(line) == 0) {
3087 /* Reached EOF or would have blocked */
3088 Py_DECREF(line);
3089 Py_CLEAR(self->snapshot);
3090 self->telling = self->seekable;
3091 return NULL;
3092 }
3093
3094 return line;
3095 }
3096
3097 static PyObject *
textiowrapper_name_get(textio * self,void * context)3098 textiowrapper_name_get(textio *self, void *context)
3099 {
3100 CHECK_ATTACHED(self);
3101 return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3102 }
3103
3104 static PyObject *
textiowrapper_closed_get(textio * self,void * context)3105 textiowrapper_closed_get(textio *self, void *context)
3106 {
3107 CHECK_ATTACHED(self);
3108 return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3109 }
3110
3111 static PyObject *
textiowrapper_newlines_get(textio * self,void * context)3112 textiowrapper_newlines_get(textio *self, void *context)
3113 {
3114 PyObject *res;
3115 CHECK_ATTACHED(self);
3116 if (self->decoder == NULL ||
3117 _PyObject_LookupAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3118 {
3119 Py_RETURN_NONE;
3120 }
3121 return res;
3122 }
3123
3124 static PyObject *
textiowrapper_errors_get(textio * self,void * context)3125 textiowrapper_errors_get(textio *self, void *context)
3126 {
3127 CHECK_INITIALIZED(self);
3128 Py_INCREF(self->errors);
3129 return self->errors;
3130 }
3131
3132 static PyObject *
textiowrapper_chunk_size_get(textio * self,void * context)3133 textiowrapper_chunk_size_get(textio *self, void *context)
3134 {
3135 CHECK_ATTACHED(self);
3136 return PyLong_FromSsize_t(self->chunk_size);
3137 }
3138
3139 static int
textiowrapper_chunk_size_set(textio * self,PyObject * arg,void * context)3140 textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3141 {
3142 Py_ssize_t n;
3143 CHECK_ATTACHED_INT(self);
3144 if (arg == NULL) {
3145 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3146 return -1;
3147 }
3148 n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3149 if (n == -1 && PyErr_Occurred())
3150 return -1;
3151 if (n <= 0) {
3152 PyErr_SetString(PyExc_ValueError,
3153 "a strictly positive integer is required");
3154 return -1;
3155 }
3156 self->chunk_size = n;
3157 return 0;
3158 }
3159
3160 #include "clinic/textio.c.h"
3161
3162 static PyMethodDef incrementalnewlinedecoder_methods[] = {
3163 _IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3164 _IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3165 _IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3166 _IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3167 {NULL}
3168 };
3169
3170 static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3171 {"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3172 {NULL}
3173 };
3174
3175 PyTypeObject PyIncrementalNewlineDecoder_Type = {
3176 PyVarObject_HEAD_INIT(NULL, 0)
3177 "_io.IncrementalNewlineDecoder", /*tp_name*/
3178 sizeof(nldecoder_object), /*tp_basicsize*/
3179 0, /*tp_itemsize*/
3180 (destructor)incrementalnewlinedecoder_dealloc, /*tp_dealloc*/
3181 0, /*tp_vectorcall_offset*/
3182 0, /*tp_getattr*/
3183 0, /*tp_setattr*/
3184 0, /*tp_as_async*/
3185 0, /*tp_repr*/
3186 0, /*tp_as_number*/
3187 0, /*tp_as_sequence*/
3188 0, /*tp_as_mapping*/
3189 0, /*tp_hash */
3190 0, /*tp_call*/
3191 0, /*tp_str*/
3192 0, /*tp_getattro*/
3193 0, /*tp_setattro*/
3194 0, /*tp_as_buffer*/
3195 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
3196 _io_IncrementalNewlineDecoder___init____doc__, /* tp_doc */
3197 0, /* tp_traverse */
3198 0, /* tp_clear */
3199 0, /* tp_richcompare */
3200 0, /*tp_weaklistoffset*/
3201 0, /* tp_iter */
3202 0, /* tp_iternext */
3203 incrementalnewlinedecoder_methods, /* tp_methods */
3204 0, /* tp_members */
3205 incrementalnewlinedecoder_getset, /* tp_getset */
3206 0, /* tp_base */
3207 0, /* tp_dict */
3208 0, /* tp_descr_get */
3209 0, /* tp_descr_set */
3210 0, /* tp_dictoffset */
3211 _io_IncrementalNewlineDecoder___init__, /* tp_init */
3212 0, /* tp_alloc */
3213 PyType_GenericNew, /* tp_new */
3214 };
3215
3216
3217 static PyMethodDef textiowrapper_methods[] = {
3218 _IO_TEXTIOWRAPPER_DETACH_METHODDEF
3219 _IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3220 _IO_TEXTIOWRAPPER_WRITE_METHODDEF
3221 _IO_TEXTIOWRAPPER_READ_METHODDEF
3222 _IO_TEXTIOWRAPPER_READLINE_METHODDEF
3223 _IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3224 _IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3225
3226 _IO_TEXTIOWRAPPER_FILENO_METHODDEF
3227 _IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3228 _IO_TEXTIOWRAPPER_READABLE_METHODDEF
3229 _IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3230 _IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3231
3232 _IO_TEXTIOWRAPPER_SEEK_METHODDEF
3233 _IO_TEXTIOWRAPPER_TELL_METHODDEF
3234 _IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3235 {NULL, NULL}
3236 };
3237
3238 static PyMemberDef textiowrapper_members[] = {
3239 {"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3240 {"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3241 {"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3242 {"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3243 {"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3244 {NULL}
3245 };
3246
3247 static PyGetSetDef textiowrapper_getset[] = {
3248 {"name", (getter)textiowrapper_name_get, NULL, NULL},
3249 {"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3250 /* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3251 */
3252 {"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3253 {"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3254 {"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3255 (setter)textiowrapper_chunk_size_set, NULL},
3256 {NULL}
3257 };
3258
3259 PyTypeObject PyTextIOWrapper_Type = {
3260 PyVarObject_HEAD_INIT(NULL, 0)
3261 "_io.TextIOWrapper", /*tp_name*/
3262 sizeof(textio), /*tp_basicsize*/
3263 0, /*tp_itemsize*/
3264 (destructor)textiowrapper_dealloc, /*tp_dealloc*/
3265 0, /*tp_vectorcall_offset*/
3266 0, /*tp_getattr*/
3267 0, /*tps_etattr*/
3268 0, /*tp_as_async*/
3269 (reprfunc)textiowrapper_repr,/*tp_repr*/
3270 0, /*tp_as_number*/
3271 0, /*tp_as_sequence*/
3272 0, /*tp_as_mapping*/
3273 0, /*tp_hash */
3274 0, /*tp_call*/
3275 0, /*tp_str*/
3276 0, /*tp_getattro*/
3277 0, /*tp_setattro*/
3278 0, /*tp_as_buffer*/
3279 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE
3280 | Py_TPFLAGS_HAVE_GC, /*tp_flags*/
3281 _io_TextIOWrapper___init____doc__, /* tp_doc */
3282 (traverseproc)textiowrapper_traverse, /* tp_traverse */
3283 (inquiry)textiowrapper_clear, /* tp_clear */
3284 0, /* tp_richcompare */
3285 offsetof(textio, weakreflist), /*tp_weaklistoffset*/
3286 0, /* tp_iter */
3287 (iternextfunc)textiowrapper_iternext, /* tp_iternext */
3288 textiowrapper_methods, /* tp_methods */
3289 textiowrapper_members, /* tp_members */
3290 textiowrapper_getset, /* tp_getset */
3291 0, /* tp_base */
3292 0, /* tp_dict */
3293 0, /* tp_descr_get */
3294 0, /* tp_descr_set */
3295 offsetof(textio, dict), /*tp_dictoffset*/
3296 _io_TextIOWrapper___init__, /* tp_init */
3297 0, /* tp_alloc */
3298 PyType_GenericNew, /* tp_new */
3299 0, /* tp_free */
3300 0, /* tp_is_gc */
3301 0, /* tp_bases */
3302 0, /* tp_mro */
3303 0, /* tp_cache */
3304 0, /* tp_subclasses */
3305 0, /* tp_weaklist */
3306 0, /* tp_del */
3307 0, /* tp_version_tag */
3308 0, /* tp_finalize */
3309 };
3310