1 /*
2  * multibytecodec.c: Common Multibyte Codec Implementation
3  *
4  * Written by Hye-Shik Chang <[email protected]>
5  */
6 
7 #define PY_SSIZE_T_CLEAN
8 #include "Python.h"
9 #include "structmember.h"         // PyMemberDef
10 #include "multibytecodec.h"
11 #include "clinic/multibytecodec.c.h"
12 
13 #define MODULE_NAME "_multibytecodec"
14 
15 typedef struct {
16     PyTypeObject *encoder_type;
17     PyTypeObject *decoder_type;
18     PyTypeObject *reader_type;
19     PyTypeObject *writer_type;
20     PyTypeObject *multibytecodec_type;
21     PyObject *str_write;
22 } _multibytecodec_state;
23 
24 static _multibytecodec_state *
_multibytecodec_get_state(PyObject * module)25 _multibytecodec_get_state(PyObject *module)
26 {
27     _multibytecodec_state *state = PyModule_GetState(module);
28     assert(state != NULL);
29     return state;
30 }
31 
32 static struct PyModuleDef _multibytecodecmodule;
33 static _multibytecodec_state *
_multibyte_codec_find_state_by_type(PyTypeObject * type)34 _multibyte_codec_find_state_by_type(PyTypeObject *type)
35 {
36     PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
37     assert(module != NULL);
38     return _multibytecodec_get_state(module);
39 }
40 
41 #define clinic_get_state() _multibyte_codec_find_state_by_type(type)
42 /*[clinic input]
43 module _multibytecodec
44 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
45 class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
46 class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
47 class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
48 class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
49 [clinic start generated code]*/
50 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
51 #undef clinic_get_state
52 
53 typedef struct {
54     PyObject            *inobj;
55     Py_ssize_t          inpos, inlen;
56     unsigned char       *outbuf, *outbuf_end;
57     PyObject            *excobj, *outobj;
58 } MultibyteEncodeBuffer;
59 
60 typedef struct {
61     const unsigned char *inbuf, *inbuf_top, *inbuf_end;
62     PyObject            *excobj;
63     _PyUnicodeWriter    writer;
64 } MultibyteDecodeBuffer;
65 
66 static char *incnewkwarglist[] = {"errors", NULL};
67 static char *streamkwarglist[] = {"stream", "errors", NULL};
68 
69 static PyObject *multibytecodec_encode(MultibyteCodec *,
70                 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
71                 PyObject *, int);
72 
73 #define MBENC_RESET     MBENC_MAX<<1 /* reset after an encoding session */
74 
75 static PyObject *
make_tuple(PyObject * object,Py_ssize_t len)76 make_tuple(PyObject *object, Py_ssize_t len)
77 {
78     PyObject *v, *w;
79 
80     if (object == NULL)
81         return NULL;
82 
83     v = PyTuple_New(2);
84     if (v == NULL) {
85         Py_DECREF(object);
86         return NULL;
87     }
88     PyTuple_SET_ITEM(v, 0, object);
89 
90     w = PyLong_FromSsize_t(len);
91     if (w == NULL) {
92         Py_DECREF(v);
93         return NULL;
94     }
95     PyTuple_SET_ITEM(v, 1, w);
96 
97     return v;
98 }
99 
100 static PyObject *
internal_error_callback(const char * errors)101 internal_error_callback(const char *errors)
102 {
103     if (errors == NULL || strcmp(errors, "strict") == 0)
104         return ERROR_STRICT;
105     else if (strcmp(errors, "ignore") == 0)
106         return ERROR_IGNORE;
107     else if (strcmp(errors, "replace") == 0)
108         return ERROR_REPLACE;
109     else
110         return PyUnicode_FromString(errors);
111 }
112 
113 static PyObject *
call_error_callback(PyObject * errors,PyObject * exc)114 call_error_callback(PyObject *errors, PyObject *exc)
115 {
116     PyObject *cb, *r;
117     const char *str;
118 
119     assert(PyUnicode_Check(errors));
120     str = PyUnicode_AsUTF8(errors);
121     if (str == NULL)
122         return NULL;
123     cb = PyCodec_LookupError(str);
124     if (cb == NULL)
125         return NULL;
126 
127     r = PyObject_CallOneArg(cb, exc);
128     Py_DECREF(cb);
129     return r;
130 }
131 
132 static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext * self,void * Py_UNUSED (ignored))133 codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
134 {
135     const char *errors;
136 
137     if (self->errors == ERROR_STRICT)
138         errors = "strict";
139     else if (self->errors == ERROR_IGNORE)
140         errors = "ignore";
141     else if (self->errors == ERROR_REPLACE)
142         errors = "replace";
143     else {
144         Py_INCREF(self->errors);
145         return self->errors;
146     }
147 
148     return PyUnicode_FromString(errors);
149 }
150 
151 static int
codecctx_errors_set(MultibyteStatefulCodecContext * self,PyObject * value,void * closure)152 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
153                     void *closure)
154 {
155     PyObject *cb;
156     const char *str;
157 
158     if (value == NULL) {
159         PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
160         return -1;
161     }
162     if (!PyUnicode_Check(value)) {
163         PyErr_SetString(PyExc_TypeError, "errors must be a string");
164         return -1;
165     }
166 
167     str = PyUnicode_AsUTF8(value);
168     if (str == NULL)
169         return -1;
170 
171     cb = internal_error_callback(str);
172     if (cb == NULL)
173         return -1;
174 
175     ERROR_DECREF(self->errors);
176     self->errors = cb;
177     return 0;
178 }
179 
180 /* This getset handlers list is used by all the stateful codec objects */
181 static PyGetSetDef codecctx_getsets[] = {
182     {"errors",          (getter)codecctx_errors_get,
183                     (setter)codecctx_errors_set,
184                     PyDoc_STR("how to treat errors")},
185     {NULL,}
186 };
187 
188 static int
expand_encodebuffer(MultibyteEncodeBuffer * buf,Py_ssize_t esize)189 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
190 {
191     Py_ssize_t orgpos, orgsize, incsize;
192 
193     orgpos = (Py_ssize_t)((char *)buf->outbuf -
194                             PyBytes_AS_STRING(buf->outobj));
195     orgsize = PyBytes_GET_SIZE(buf->outobj);
196     incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
197 
198     if (orgsize > PY_SSIZE_T_MAX - incsize) {
199         PyErr_NoMemory();
200         return -1;
201     }
202 
203     if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
204         return -1;
205 
206     buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
207     buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
208         + PyBytes_GET_SIZE(buf->outobj);
209 
210     return 0;
211 }
212 #define REQUIRE_ENCODEBUFFER(buf, s) do {                               \
213     if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf)             \
214         if (expand_encodebuffer(buf, s) == -1)                          \
215             goto errorexit;                                             \
216 } while(0)
217 
218 
219 /**
220  * MultibyteCodec object
221  */
222 
223 static int
multibytecodec_encerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteEncodeBuffer * buf,PyObject * errors,Py_ssize_t e)224 multibytecodec_encerror(MultibyteCodec *codec,
225                         MultibyteCodec_State *state,
226                         MultibyteEncodeBuffer *buf,
227                         PyObject *errors, Py_ssize_t e)
228 {
229     PyObject *retobj = NULL, *retstr = NULL, *tobj;
230     Py_ssize_t retstrsize, newpos;
231     Py_ssize_t esize, start, end;
232     const char *reason;
233 
234     if (e > 0) {
235         reason = "illegal multibyte sequence";
236         esize = e;
237     }
238     else {
239         switch (e) {
240         case MBERR_TOOSMALL:
241             REQUIRE_ENCODEBUFFER(buf, -1);
242             return 0; /* retry it */
243         case MBERR_TOOFEW:
244             reason = "incomplete multibyte sequence";
245             esize = (Py_ssize_t)buf->inpos;
246             break;
247         case MBERR_INTERNAL:
248             PyErr_SetString(PyExc_RuntimeError,
249                             "internal codec error");
250             return -1;
251         default:
252             PyErr_SetString(PyExc_RuntimeError,
253                             "unknown runtime error");
254             return -1;
255         }
256     }
257 
258     if (errors == ERROR_REPLACE) {
259         PyObject *replchar;
260         Py_ssize_t r;
261         Py_ssize_t inpos;
262         int kind;
263         const void *data;
264 
265         replchar = PyUnicode_FromOrdinal('?');
266         if (replchar == NULL)
267             goto errorexit;
268         kind = PyUnicode_KIND(replchar);
269         data = PyUnicode_DATA(replchar);
270 
271         inpos = 0;
272         for (;;) {
273             Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
274 
275             r = codec->encode(state, codec->config,
276                               kind, data, &inpos, 1,
277                               &buf->outbuf, outleft, 0);
278             if (r == MBERR_TOOSMALL) {
279                 REQUIRE_ENCODEBUFFER(buf, -1);
280                 continue;
281             }
282             else
283                 break;
284         }
285 
286         Py_DECREF(replchar);
287 
288         if (r != 0) {
289             REQUIRE_ENCODEBUFFER(buf, 1);
290             *buf->outbuf++ = '?';
291         }
292     }
293     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
294         buf->inpos += esize;
295         return 0;
296     }
297 
298     start = (Py_ssize_t)buf->inpos;
299     end = start + esize;
300 
301     /* use cached exception object if available */
302     if (buf->excobj == NULL) {
303         buf->excobj =  PyObject_CallFunction(PyExc_UnicodeEncodeError,
304                                              "sOnns",
305                                              codec->encoding, buf->inobj,
306                                              start, end, reason);
307         if (buf->excobj == NULL)
308             goto errorexit;
309     }
310     else
311         if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
312             PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
313             PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
314             goto errorexit;
315 
316     if (errors == ERROR_STRICT) {
317         PyCodec_StrictErrors(buf->excobj);
318         goto errorexit;
319     }
320 
321     retobj = call_error_callback(errors, buf->excobj);
322     if (retobj == NULL)
323         goto errorexit;
324 
325     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
326         (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
327         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
328         PyErr_SetString(PyExc_TypeError,
329                         "encoding error handler must return "
330                         "(str, int) tuple");
331         goto errorexit;
332     }
333 
334     if (PyUnicode_Check(tobj)) {
335         Py_ssize_t inpos;
336 
337         retstr = multibytecodec_encode(codec, state, tobj,
338                         &inpos, ERROR_STRICT,
339                         MBENC_FLUSH);
340         if (retstr == NULL)
341             goto errorexit;
342     }
343     else {
344         Py_INCREF(tobj);
345         retstr = tobj;
346     }
347 
348     assert(PyBytes_Check(retstr));
349     retstrsize = PyBytes_GET_SIZE(retstr);
350     if (retstrsize > 0) {
351         REQUIRE_ENCODEBUFFER(buf, retstrsize);
352         memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
353         buf->outbuf += retstrsize;
354     }
355 
356     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
357     if (newpos < 0 && !PyErr_Occurred())
358         newpos += (Py_ssize_t)buf->inlen;
359     if (newpos < 0 || newpos > buf->inlen) {
360         PyErr_Clear();
361         PyErr_Format(PyExc_IndexError,
362                      "position %zd from error handler out of bounds",
363                      newpos);
364         goto errorexit;
365     }
366     buf->inpos = newpos;
367 
368     Py_DECREF(retobj);
369     Py_DECREF(retstr);
370     return 0;
371 
372 errorexit:
373     Py_XDECREF(retobj);
374     Py_XDECREF(retstr);
375     return -1;
376 }
377 
378 static int
multibytecodec_decerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteDecodeBuffer * buf,PyObject * errors,Py_ssize_t e)379 multibytecodec_decerror(MultibyteCodec *codec,
380                         MultibyteCodec_State *state,
381                         MultibyteDecodeBuffer *buf,
382                         PyObject *errors, Py_ssize_t e)
383 {
384     PyObject *retobj = NULL, *retuni = NULL;
385     Py_ssize_t newpos;
386     const char *reason;
387     Py_ssize_t esize, start, end;
388 
389     if (e > 0) {
390         reason = "illegal multibyte sequence";
391         esize = e;
392     }
393     else {
394         switch (e) {
395         case MBERR_TOOSMALL:
396             return 0; /* retry it */
397         case MBERR_TOOFEW:
398             reason = "incomplete multibyte sequence";
399             esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
400             break;
401         case MBERR_INTERNAL:
402             PyErr_SetString(PyExc_RuntimeError,
403                             "internal codec error");
404             return -1;
405         case MBERR_EXCEPTION:
406             return -1;
407         default:
408             PyErr_SetString(PyExc_RuntimeError,
409                             "unknown runtime error");
410             return -1;
411         }
412     }
413 
414     if (errors == ERROR_REPLACE) {
415         if (_PyUnicodeWriter_WriteChar(&buf->writer,
416                                        Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
417             goto errorexit;
418     }
419     if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
420         buf->inbuf += esize;
421         return 0;
422     }
423 
424     start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
425     end = start + esize;
426 
427     /* use cached exception object if available */
428     if (buf->excobj == NULL) {
429         buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
430                         (const char *)buf->inbuf_top,
431                         (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
432                         start, end, reason);
433         if (buf->excobj == NULL)
434             goto errorexit;
435     }
436     else
437         if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
438             PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
439             PyUnicodeDecodeError_SetReason(buf->excobj, reason))
440             goto errorexit;
441 
442     if (errors == ERROR_STRICT) {
443         PyCodec_StrictErrors(buf->excobj);
444         goto errorexit;
445     }
446 
447     retobj = call_error_callback(errors, buf->excobj);
448     if (retobj == NULL)
449         goto errorexit;
450 
451     if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
452         !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
453         !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
454         PyErr_SetString(PyExc_TypeError,
455                         "decoding error handler must return "
456                         "(str, int) tuple");
457         goto errorexit;
458     }
459 
460     if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
461         goto errorexit;
462 
463     newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
464     if (newpos < 0 && !PyErr_Occurred())
465         newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
466     if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
467         PyErr_Clear();
468         PyErr_Format(PyExc_IndexError,
469                      "position %zd from error handler out of bounds",
470                      newpos);
471         goto errorexit;
472     }
473     buf->inbuf = buf->inbuf_top + newpos;
474     Py_DECREF(retobj);
475     return 0;
476 
477 errorexit:
478     Py_XDECREF(retobj);
479     return -1;
480 }
481 
482 static PyObject *
multibytecodec_encode(MultibyteCodec * codec,MultibyteCodec_State * state,PyObject * text,Py_ssize_t * inpos_t,PyObject * errors,int flags)483 multibytecodec_encode(MultibyteCodec *codec,
484                       MultibyteCodec_State *state,
485                       PyObject *text, Py_ssize_t *inpos_t,
486                       PyObject *errors, int flags)
487 {
488     MultibyteEncodeBuffer buf;
489     Py_ssize_t finalsize, r = 0;
490     Py_ssize_t datalen;
491     int kind;
492     const void *data;
493 
494     if (PyUnicode_READY(text) < 0)
495         return NULL;
496     datalen = PyUnicode_GET_LENGTH(text);
497 
498     if (datalen == 0 && !(flags & MBENC_RESET))
499         return PyBytes_FromStringAndSize(NULL, 0);
500 
501     buf.excobj = NULL;
502     buf.outobj = NULL;
503     buf.inobj = text;   /* borrowed reference */
504     buf.inpos = 0;
505     buf.inlen = datalen;
506     kind = PyUnicode_KIND(buf.inobj);
507     data = PyUnicode_DATA(buf.inobj);
508 
509     if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
510         PyErr_NoMemory();
511         goto errorexit;
512     }
513 
514     buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
515     if (buf.outobj == NULL)
516         goto errorexit;
517     buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
518     buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
519 
520     while (buf.inpos < buf.inlen) {
521         /* we don't reuse inleft and outleft here.
522          * error callbacks can relocate the cursor anywhere on buffer*/
523         Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
524 
525         r = codec->encode(state, codec->config,
526                           kind, data,
527                           &buf.inpos, buf.inlen,
528                           &buf.outbuf, outleft, flags);
529         if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
530             break;
531         else if (multibytecodec_encerror(codec, state, &buf, errors,r))
532             goto errorexit;
533         else if (r == MBERR_TOOFEW)
534             break;
535     }
536 
537     if (codec->encreset != NULL && (flags & MBENC_RESET))
538         for (;;) {
539             Py_ssize_t outleft;
540 
541             outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
542             r = codec->encreset(state, codec->config, &buf.outbuf,
543                                 outleft);
544             if (r == 0)
545                 break;
546             else if (multibytecodec_encerror(codec, state,
547                                              &buf, errors, r))
548                 goto errorexit;
549         }
550 
551     finalsize = (Py_ssize_t)((char *)buf.outbuf -
552                              PyBytes_AS_STRING(buf.outobj));
553 
554     if (finalsize != PyBytes_GET_SIZE(buf.outobj))
555         if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
556             goto errorexit;
557 
558     if (inpos_t)
559         *inpos_t = buf.inpos;
560     Py_XDECREF(buf.excobj);
561     return buf.outobj;
562 
563 errorexit:
564     Py_XDECREF(buf.excobj);
565     Py_XDECREF(buf.outobj);
566     return NULL;
567 }
568 
569 /*[clinic input]
570 _multibytecodec.MultibyteCodec.encode
571 
572   input: object
573   errors: str(accept={str, NoneType}) = None
574 
575 Return an encoded string version of `input'.
576 
577 'errors' may be given to set a different error handling scheme. Default is
578 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
579 values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
580 registered with codecs.register_error that can handle UnicodeEncodeErrors.
581 [clinic start generated code]*/
582 
583 static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject * self,PyObject * input,const char * errors)584 _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
585                                            PyObject *input,
586                                            const char *errors)
587 /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
588 {
589     MultibyteCodec_State state;
590     PyObject *errorcb, *r, *ucvt;
591     Py_ssize_t datalen;
592 
593     if (PyUnicode_Check(input))
594         ucvt = NULL;
595     else {
596         input = ucvt = PyObject_Str(input);
597         if (input == NULL)
598             return NULL;
599         else if (!PyUnicode_Check(input)) {
600             PyErr_SetString(PyExc_TypeError,
601                 "couldn't convert the object to unicode.");
602             Py_DECREF(ucvt);
603             return NULL;
604         }
605     }
606 
607     if (PyUnicode_READY(input) < 0) {
608         Py_XDECREF(ucvt);
609         return NULL;
610     }
611     datalen = PyUnicode_GET_LENGTH(input);
612 
613     errorcb = internal_error_callback(errors);
614     if (errorcb == NULL) {
615         Py_XDECREF(ucvt);
616         return NULL;
617     }
618 
619     if (self->codec->encinit != NULL &&
620         self->codec->encinit(&state, self->codec->config) != 0)
621         goto errorexit;
622     r = multibytecodec_encode(self->codec, &state,
623                     input, NULL, errorcb,
624                     MBENC_FLUSH | MBENC_RESET);
625     if (r == NULL)
626         goto errorexit;
627 
628     ERROR_DECREF(errorcb);
629     Py_XDECREF(ucvt);
630     return make_tuple(r, datalen);
631 
632 errorexit:
633     ERROR_DECREF(errorcb);
634     Py_XDECREF(ucvt);
635     return NULL;
636 }
637 
638 /*[clinic input]
639 _multibytecodec.MultibyteCodec.decode
640 
641   input: Py_buffer
642   errors: str(accept={str, NoneType}) = None
643 
644 Decodes 'input'.
645 
646 'errors' may be given to set a different error handling scheme. Default is
647 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
648 values are 'ignore' and 'replace' as well as any other name registered with
649 codecs.register_error that is able to handle UnicodeDecodeErrors."
650 [clinic start generated code]*/
651 
652 static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject * self,Py_buffer * input,const char * errors)653 _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
654                                            Py_buffer *input,
655                                            const char *errors)
656 /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
657 {
658     MultibyteCodec_State state;
659     MultibyteDecodeBuffer buf;
660     PyObject *errorcb, *res;
661     const char *data;
662     Py_ssize_t datalen;
663 
664     data = input->buf;
665     datalen = input->len;
666 
667     errorcb = internal_error_callback(errors);
668     if (errorcb == NULL) {
669         return NULL;
670     }
671 
672     if (datalen == 0) {
673         ERROR_DECREF(errorcb);
674         return make_tuple(PyUnicode_New(0, 0), 0);
675     }
676 
677     _PyUnicodeWriter_Init(&buf.writer);
678     buf.writer.min_length = datalen;
679     buf.excobj = NULL;
680     buf.inbuf = buf.inbuf_top = (unsigned char *)data;
681     buf.inbuf_end = buf.inbuf_top + datalen;
682 
683     if (self->codec->decinit != NULL &&
684         self->codec->decinit(&state, self->codec->config) != 0)
685         goto errorexit;
686 
687     while (buf.inbuf < buf.inbuf_end) {
688         Py_ssize_t inleft, r;
689 
690         inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
691 
692         r = self->codec->decode(&state, self->codec->config,
693                         &buf.inbuf, inleft, &buf.writer);
694         if (r == 0)
695             break;
696         else if (multibytecodec_decerror(self->codec, &state,
697                                          &buf, errorcb, r))
698             goto errorexit;
699     }
700 
701     res = _PyUnicodeWriter_Finish(&buf.writer);
702     if (res == NULL)
703         goto errorexit;
704 
705     Py_XDECREF(buf.excobj);
706     ERROR_DECREF(errorcb);
707     return make_tuple(res, datalen);
708 
709 errorexit:
710     ERROR_DECREF(errorcb);
711     Py_XDECREF(buf.excobj);
712     _PyUnicodeWriter_Dealloc(&buf.writer);
713 
714     return NULL;
715 }
716 
717 static struct PyMethodDef multibytecodec_methods[] = {
718     _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
719     _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
720     {NULL, NULL},
721 };
722 
723 static int
multibytecodec_traverse(PyObject * self,visitproc visit,void * arg)724 multibytecodec_traverse(PyObject *self, visitproc visit, void *arg)
725 {
726     Py_VISIT(Py_TYPE(self));
727     return 0;
728 }
729 
730 static void
multibytecodec_dealloc(MultibyteCodecObject * self)731 multibytecodec_dealloc(MultibyteCodecObject *self)
732 {
733     PyObject_GC_UnTrack(self);
734     PyTypeObject *tp = Py_TYPE(self);
735     tp->tp_free(self);
736     Py_DECREF(tp);
737 }
738 
739 static PyType_Slot multibytecodec_slots[] = {
740     {Py_tp_dealloc, multibytecodec_dealloc},
741     {Py_tp_getattro, PyObject_GenericGetAttr},
742     {Py_tp_methods, multibytecodec_methods},
743     {Py_tp_traverse, multibytecodec_traverse},
744     {0, NULL},
745 };
746 
747 static PyType_Spec multibytecodec_spec = {
748     .name = MODULE_NAME ".MultibyteCodec",
749     .basicsize = sizeof(MultibyteCodecObject),
750     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
751               Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
752     .slots = multibytecodec_slots,
753 };
754 
755 
756 /**
757  * Utility functions for stateful codec mechanism
758  */
759 
760 #define STATEFUL_DCTX(o)        ((MultibyteStatefulDecoderContext *)(o))
761 #define STATEFUL_ECTX(o)        ((MultibyteStatefulEncoderContext *)(o))
762 
763 static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext * ctx,PyObject * unistr,int final)764 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
765                         PyObject *unistr, int final)
766 {
767     PyObject *ucvt, *r = NULL;
768     PyObject *inbuf = NULL;
769     Py_ssize_t inpos, datalen;
770     PyObject *origpending = NULL;
771 
772     if (PyUnicode_Check(unistr))
773         ucvt = NULL;
774     else {
775         unistr = ucvt = PyObject_Str(unistr);
776         if (unistr == NULL)
777             return NULL;
778         else if (!PyUnicode_Check(unistr)) {
779             PyErr_SetString(PyExc_TypeError,
780                 "couldn't convert the object to str.");
781             Py_DECREF(ucvt);
782             return NULL;
783         }
784     }
785 
786     if (ctx->pending) {
787         PyObject *inbuf_tmp;
788 
789         Py_INCREF(ctx->pending);
790         origpending = ctx->pending;
791 
792         Py_INCREF(ctx->pending);
793         inbuf_tmp = ctx->pending;
794         PyUnicode_Append(&inbuf_tmp, unistr);
795         if (inbuf_tmp == NULL)
796             goto errorexit;
797         Py_CLEAR(ctx->pending);
798         inbuf = inbuf_tmp;
799     }
800     else {
801         origpending = NULL;
802 
803         Py_INCREF(unistr);
804         inbuf = unistr;
805     }
806     if (PyUnicode_READY(inbuf) < 0)
807         goto errorexit;
808     inpos = 0;
809     datalen = PyUnicode_GET_LENGTH(inbuf);
810 
811     r = multibytecodec_encode(ctx->codec, &ctx->state,
812                               inbuf, &inpos,
813                               ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
814     if (r == NULL) {
815         /* recover the original pending buffer */
816         Py_XSETREF(ctx->pending, origpending);
817         origpending = NULL;
818         goto errorexit;
819     }
820     Py_XDECREF(origpending);
821 
822     if (inpos < datalen) {
823         if (datalen - inpos > MAXENCPENDING) {
824             /* normal codecs can't reach here */
825             PyErr_SetString(PyExc_UnicodeError,
826                             "pending buffer overflow");
827             goto errorexit;
828         }
829         ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
830         if (ctx->pending == NULL) {
831             /* normal codecs can't reach here */
832             goto errorexit;
833         }
834     }
835 
836     Py_DECREF(inbuf);
837     Py_XDECREF(ucvt);
838     return r;
839 
840 errorexit:
841     Py_XDECREF(r);
842     Py_XDECREF(ucvt);
843     Py_XDECREF(origpending);
844     Py_XDECREF(inbuf);
845     return NULL;
846 }
847 
848 static int
decoder_append_pending(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)849 decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
850                        MultibyteDecodeBuffer *buf)
851 {
852     Py_ssize_t npendings;
853 
854     npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
855     if (npendings + ctx->pendingsize > MAXDECPENDING ||
856         npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
857             PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
858             return -1;
859     }
860     memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
861     ctx->pendingsize += npendings;
862     return 0;
863 }
864 
865 static int
decoder_prepare_buffer(MultibyteDecodeBuffer * buf,const char * data,Py_ssize_t size)866 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
867                        Py_ssize_t size)
868 {
869     buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
870     buf->inbuf_end = buf->inbuf_top + size;
871     buf->writer.min_length += size;
872     return 0;
873 }
874 
875 static int
decoder_feed_buffer(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)876 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
877                     MultibyteDecodeBuffer *buf)
878 {
879     while (buf->inbuf < buf->inbuf_end) {
880         Py_ssize_t inleft;
881         Py_ssize_t r;
882 
883         inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
884 
885         r = ctx->codec->decode(&ctx->state, ctx->codec->config,
886             &buf->inbuf, inleft, &buf->writer);
887         if (r == 0 || r == MBERR_TOOFEW)
888             break;
889         else if (multibytecodec_decerror(ctx->codec, &ctx->state,
890                                          buf, ctx->errors, r))
891             return -1;
892     }
893     return 0;
894 }
895 
896 
897 /*[clinic input]
898 _multibytecodec.MultibyteIncrementalEncoder.encode
899 
900     input: object
901     final: bool(accept={int}) = False
902 [clinic start generated code]*/
903 
904 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject * self,PyObject * input,int final)905 _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
906                                                         PyObject *input,
907                                                         int final)
908 /*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
909 {
910     return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
911 }
912 
913 /*[clinic input]
914 _multibytecodec.MultibyteIncrementalEncoder.getstate
915 [clinic start generated code]*/
916 
917 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject * self)918 _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
919 /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
920 {
921     /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
922        for UTF-8 encoded buffer (each character can use up to 4
923        bytes), and required bytes for MultibyteCodec_State.c. A byte
924        array is used to avoid different compilers generating different
925        values for the same state, e.g. as a result of struct padding.
926     */
927     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
928     Py_ssize_t statesize;
929     const char *pendingbuffer = NULL;
930     Py_ssize_t pendingsize;
931 
932     if (self->pending != NULL) {
933         pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
934         if (pendingbuffer == NULL) {
935             return NULL;
936         }
937         if (pendingsize > MAXENCPENDING*4) {
938             PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
939             return NULL;
940         }
941         statebytes[0] = (unsigned char)pendingsize;
942         memcpy(statebytes + 1, pendingbuffer, pendingsize);
943         statesize = 1 + pendingsize;
944     } else {
945         statebytes[0] = 0;
946         statesize = 1;
947     }
948     memcpy(statebytes+statesize, self->state.c,
949            sizeof(self->state.c));
950     statesize += sizeof(self->state.c);
951 
952     return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
953                                              1 /* little-endian */ ,
954                                              0 /* unsigned */ );
955 }
956 
957 /*[clinic input]
958 _multibytecodec.MultibyteIncrementalEncoder.setstate
959     state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
960     /
961 [clinic start generated code]*/
962 
963 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject * self,PyLongObject * statelong)964 _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
965                                                           PyLongObject *statelong)
966 /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
967 {
968     PyObject *pending = NULL;
969     unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
970 
971     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
972                             1 /* little-endian */ ,
973                             0 /* unsigned */ ) < 0) {
974         goto errorexit;
975     }
976 
977     if (statebytes[0] > MAXENCPENDING*4) {
978         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
979         return NULL;
980     }
981 
982     pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
983                                    statebytes[0], "strict");
984     if (pending == NULL) {
985         goto errorexit;
986     }
987 
988     Py_CLEAR(self->pending);
989     self->pending = pending;
990     memcpy(self->state.c, statebytes+1+statebytes[0],
991            sizeof(self->state.c));
992 
993     Py_RETURN_NONE;
994 
995 errorexit:
996     Py_XDECREF(pending);
997     return NULL;
998 }
999 
1000 /*[clinic input]
1001 _multibytecodec.MultibyteIncrementalEncoder.reset
1002 [clinic start generated code]*/
1003 
1004 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject * self)1005 _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
1006 /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
1007 {
1008     /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1009     unsigned char buffer[4], *outbuf;
1010     Py_ssize_t r;
1011     if (self->codec->encreset != NULL) {
1012         outbuf = buffer;
1013         r = self->codec->encreset(&self->state, self->codec->config,
1014                                   &outbuf, sizeof(buffer));
1015         if (r != 0)
1016             return NULL;
1017     }
1018     Py_CLEAR(self->pending);
1019     Py_RETURN_NONE;
1020 }
1021 
1022 static struct PyMethodDef mbiencoder_methods[] = {
1023     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
1024     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1025     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
1026     _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1027     {NULL, NULL},
1028 };
1029 
1030 static PyObject *
mbiencoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1031 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1032 {
1033     MultibyteIncrementalEncoderObject *self;
1034     PyObject *codec = NULL;
1035     char *errors = NULL;
1036 
1037     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1038                                      incnewkwarglist, &errors))
1039         return NULL;
1040 
1041     self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1042     if (self == NULL)
1043         return NULL;
1044 
1045     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1046     if (codec == NULL)
1047         goto errorexit;
1048 
1049     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1050     if (!MultibyteCodec_Check(state, codec)) {
1051         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1052         goto errorexit;
1053     }
1054 
1055     self->codec = ((MultibyteCodecObject *)codec)->codec;
1056     self->pending = NULL;
1057     self->errors = internal_error_callback(errors);
1058     if (self->errors == NULL)
1059         goto errorexit;
1060     if (self->codec->encinit != NULL &&
1061         self->codec->encinit(&self->state, self->codec->config) != 0)
1062         goto errorexit;
1063 
1064     Py_DECREF(codec);
1065     return (PyObject *)self;
1066 
1067 errorexit:
1068     Py_XDECREF(self);
1069     Py_XDECREF(codec);
1070     return NULL;
1071 }
1072 
1073 static int
mbiencoder_init(PyObject * self,PyObject * args,PyObject * kwds)1074 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1075 {
1076     return 0;
1077 }
1078 
1079 static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject * self,visitproc visit,void * arg)1080 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
1081                     visitproc visit, void *arg)
1082 {
1083     if (ERROR_ISCUSTOM(self->errors))
1084         Py_VISIT(self->errors);
1085     return 0;
1086 }
1087 
1088 static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject * self)1089 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1090 {
1091     PyTypeObject *tp = Py_TYPE(self);
1092     PyObject_GC_UnTrack(self);
1093     ERROR_DECREF(self->errors);
1094     Py_CLEAR(self->pending);
1095     tp->tp_free(self);
1096     Py_DECREF(tp);
1097 }
1098 
1099 static PyType_Slot encoder_slots[] = {
1100     {Py_tp_dealloc, mbiencoder_dealloc},
1101     {Py_tp_getattro, PyObject_GenericGetAttr},
1102     {Py_tp_traverse, mbiencoder_traverse},
1103     {Py_tp_methods, mbiencoder_methods},
1104     {Py_tp_getset, codecctx_getsets},
1105     {Py_tp_init, mbiencoder_init},
1106     {Py_tp_new, mbiencoder_new},
1107     {0, NULL},
1108 };
1109 
1110 static PyType_Spec encoder_spec = {
1111     .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1112     .basicsize = sizeof(MultibyteIncrementalEncoderObject),
1113     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1114               Py_TPFLAGS_IMMUTABLETYPE),
1115     .slots = encoder_slots,
1116 };
1117 
1118 
1119 /*[clinic input]
1120 _multibytecodec.MultibyteIncrementalDecoder.decode
1121 
1122     input: Py_buffer
1123     final: bool(accept={int}) = False
1124 [clinic start generated code]*/
1125 
1126 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject * self,Py_buffer * input,int final)1127 _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1128                                                         Py_buffer *input,
1129                                                         int final)
1130 /*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
1131 {
1132     MultibyteDecodeBuffer buf;
1133     char *data, *wdata = NULL;
1134     Py_ssize_t wsize, size, origpending;
1135     PyObject *res;
1136 
1137     data = input->buf;
1138     size = input->len;
1139 
1140     _PyUnicodeWriter_Init(&buf.writer);
1141     buf.excobj = NULL;
1142     origpending = self->pendingsize;
1143 
1144     if (self->pendingsize == 0) {
1145         wsize = size;
1146         wdata = data;
1147     }
1148     else {
1149         if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1150             PyErr_NoMemory();
1151             goto errorexit;
1152         }
1153         wsize = size + self->pendingsize;
1154         wdata = PyMem_Malloc(wsize);
1155         if (wdata == NULL) {
1156             PyErr_NoMemory();
1157             goto errorexit;
1158         }
1159         memcpy(wdata, self->pending, self->pendingsize);
1160         memcpy(wdata + self->pendingsize, data, size);
1161         self->pendingsize = 0;
1162     }
1163 
1164     if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1165         goto errorexit;
1166 
1167     if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1168         goto errorexit;
1169 
1170     if (final && buf.inbuf < buf.inbuf_end) {
1171         if (multibytecodec_decerror(self->codec, &self->state,
1172                         &buf, self->errors, MBERR_TOOFEW)) {
1173             /* recover the original pending buffer */
1174             memcpy(self->pending, wdata, origpending);
1175             self->pendingsize = origpending;
1176             goto errorexit;
1177         }
1178     }
1179 
1180     if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1181         if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1182             goto errorexit;
1183     }
1184 
1185     res = _PyUnicodeWriter_Finish(&buf.writer);
1186     if (res == NULL)
1187         goto errorexit;
1188 
1189     if (wdata != data)
1190         PyMem_Free(wdata);
1191     Py_XDECREF(buf.excobj);
1192     return res;
1193 
1194 errorexit:
1195     if (wdata != NULL && wdata != data)
1196         PyMem_Free(wdata);
1197     Py_XDECREF(buf.excobj);
1198     _PyUnicodeWriter_Dealloc(&buf.writer);
1199     return NULL;
1200 }
1201 
1202 /*[clinic input]
1203 _multibytecodec.MultibyteIncrementalDecoder.getstate
1204 [clinic start generated code]*/
1205 
1206 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject * self)1207 _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1208 /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1209 {
1210     PyObject *buffer;
1211     PyObject *statelong;
1212 
1213     buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1214                                        self->pendingsize);
1215     if (buffer == NULL) {
1216         return NULL;
1217     }
1218 
1219     statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1220                                                   sizeof(self->state.c),
1221                                                   1 /* little-endian */ ,
1222                                                   0 /* unsigned */ );
1223     if (statelong == NULL) {
1224         Py_DECREF(buffer);
1225         return NULL;
1226     }
1227 
1228     return Py_BuildValue("NN", buffer, statelong);
1229 }
1230 
1231 /*[clinic input]
1232 _multibytecodec.MultibyteIncrementalDecoder.setstate
1233     state: object(subclass_of='&PyTuple_Type')
1234     /
1235 [clinic start generated code]*/
1236 
1237 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject * self,PyObject * state)1238 _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1239                                                           PyObject *state)
1240 /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1241 {
1242     PyObject *buffer;
1243     PyLongObject *statelong;
1244     Py_ssize_t buffersize;
1245     const char *bufferstr;
1246     unsigned char statebytes[8];
1247 
1248     if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1249                           &buffer, &PyLong_Type, &statelong))
1250     {
1251         return NULL;
1252     }
1253 
1254     if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1255                             1 /* little-endian */ ,
1256                             0 /* unsigned */ ) < 0) {
1257         return NULL;
1258     }
1259 
1260     buffersize = PyBytes_Size(buffer);
1261     if (buffersize == -1) {
1262         return NULL;
1263     }
1264 
1265     if (buffersize > MAXDECPENDING) {
1266         PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1267         return NULL;
1268     }
1269 
1270     bufferstr = PyBytes_AsString(buffer);
1271     if (bufferstr == NULL) {
1272         return NULL;
1273     }
1274     self->pendingsize = buffersize;
1275     memcpy(self->pending, bufferstr, self->pendingsize);
1276     memcpy(self->state.c, statebytes, sizeof(statebytes));
1277 
1278     Py_RETURN_NONE;
1279 }
1280 
1281 /*[clinic input]
1282 _multibytecodec.MultibyteIncrementalDecoder.reset
1283 [clinic start generated code]*/
1284 
1285 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject * self)1286 _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1287 /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1288 {
1289     if (self->codec->decreset != NULL &&
1290         self->codec->decreset(&self->state, self->codec->config) != 0)
1291         return NULL;
1292     self->pendingsize = 0;
1293 
1294     Py_RETURN_NONE;
1295 }
1296 
1297 static struct PyMethodDef mbidecoder_methods[] = {
1298     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1299     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1300     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
1301     _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1302     {NULL, NULL},
1303 };
1304 
1305 static PyObject *
mbidecoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1306 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1307 {
1308     MultibyteIncrementalDecoderObject *self;
1309     PyObject *codec = NULL;
1310     char *errors = NULL;
1311 
1312     if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1313                                      incnewkwarglist, &errors))
1314         return NULL;
1315 
1316     self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1317     if (self == NULL)
1318         return NULL;
1319 
1320     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1321     if (codec == NULL)
1322         goto errorexit;
1323 
1324     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1325     if (!MultibyteCodec_Check(state, codec)) {
1326         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1327         goto errorexit;
1328     }
1329 
1330     self->codec = ((MultibyteCodecObject *)codec)->codec;
1331     self->pendingsize = 0;
1332     self->errors = internal_error_callback(errors);
1333     if (self->errors == NULL)
1334         goto errorexit;
1335     if (self->codec->decinit != NULL &&
1336         self->codec->decinit(&self->state, self->codec->config) != 0)
1337         goto errorexit;
1338 
1339     Py_DECREF(codec);
1340     return (PyObject *)self;
1341 
1342 errorexit:
1343     Py_XDECREF(self);
1344     Py_XDECREF(codec);
1345     return NULL;
1346 }
1347 
1348 static int
mbidecoder_init(PyObject * self,PyObject * args,PyObject * kwds)1349 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1350 {
1351     return 0;
1352 }
1353 
1354 static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject * self,visitproc visit,void * arg)1355 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1356                     visitproc visit, void *arg)
1357 {
1358     if (ERROR_ISCUSTOM(self->errors))
1359         Py_VISIT(self->errors);
1360     return 0;
1361 }
1362 
1363 static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject * self)1364 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1365 {
1366     PyTypeObject *tp = Py_TYPE(self);
1367     PyObject_GC_UnTrack(self);
1368     ERROR_DECREF(self->errors);
1369     tp->tp_free(self);
1370     Py_DECREF(tp);
1371 }
1372 
1373 static PyType_Slot decoder_slots[] = {
1374     {Py_tp_dealloc, mbidecoder_dealloc},
1375     {Py_tp_getattro, PyObject_GenericGetAttr},
1376     {Py_tp_traverse, mbidecoder_traverse},
1377     {Py_tp_methods, mbidecoder_methods},
1378     {Py_tp_getset, codecctx_getsets},
1379     {Py_tp_init, mbidecoder_init},
1380     {Py_tp_new, mbidecoder_new},
1381     {0, NULL},
1382 };
1383 
1384 static PyType_Spec decoder_spec = {
1385     .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1386     .basicsize = sizeof(MultibyteIncrementalDecoderObject),
1387     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1388               Py_TPFLAGS_IMMUTABLETYPE),
1389     .slots = decoder_slots,
1390 };
1391 
1392 static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject * self,const char * method,Py_ssize_t sizehint)1393 mbstreamreader_iread(MultibyteStreamReaderObject *self,
1394                      const char *method, Py_ssize_t sizehint)
1395 {
1396     MultibyteDecodeBuffer buf;
1397     PyObject *cres, *res;
1398     Py_ssize_t rsize;
1399 
1400     if (sizehint == 0)
1401         return PyUnicode_New(0, 0);
1402 
1403     _PyUnicodeWriter_Init(&buf.writer);
1404     buf.excobj = NULL;
1405     cres = NULL;
1406 
1407     for (;;) {
1408         int endoffile;
1409 
1410         if (sizehint < 0)
1411             cres = PyObject_CallMethod(self->stream,
1412                             method, NULL);
1413         else
1414             cres = PyObject_CallMethod(self->stream,
1415                             method, "i", sizehint);
1416         if (cres == NULL)
1417             goto errorexit;
1418 
1419         if (!PyBytes_Check(cres)) {
1420             PyErr_Format(PyExc_TypeError,
1421                          "stream function returned a "
1422                          "non-bytes object (%.100s)",
1423                          Py_TYPE(cres)->tp_name);
1424             goto errorexit;
1425         }
1426 
1427         endoffile = (PyBytes_GET_SIZE(cres) == 0);
1428 
1429         if (self->pendingsize > 0) {
1430             PyObject *ctr;
1431             char *ctrdata;
1432 
1433             if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1434                 PyErr_NoMemory();
1435                 goto errorexit;
1436             }
1437             rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1438             ctr = PyBytes_FromStringAndSize(NULL, rsize);
1439             if (ctr == NULL)
1440                 goto errorexit;
1441             ctrdata = PyBytes_AS_STRING(ctr);
1442             memcpy(ctrdata, self->pending, self->pendingsize);
1443             memcpy(ctrdata + self->pendingsize,
1444                     PyBytes_AS_STRING(cres),
1445                     PyBytes_GET_SIZE(cres));
1446             Py_DECREF(cres);
1447             cres = ctr;
1448             self->pendingsize = 0;
1449         }
1450 
1451         rsize = PyBytes_GET_SIZE(cres);
1452         if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1453                                    rsize) != 0)
1454             goto errorexit;
1455 
1456         if (rsize > 0 && decoder_feed_buffer(
1457                         (MultibyteStatefulDecoderContext *)self, &buf))
1458             goto errorexit;
1459 
1460         if (endoffile || sizehint < 0) {
1461             if (buf.inbuf < buf.inbuf_end &&
1462                 multibytecodec_decerror(self->codec, &self->state,
1463                             &buf, self->errors, MBERR_TOOFEW))
1464                 goto errorexit;
1465         }
1466 
1467         if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1468             if (decoder_append_pending(STATEFUL_DCTX(self),
1469                                        &buf) != 0)
1470                 goto errorexit;
1471         }
1472 
1473         Py_DECREF(cres);
1474         cres = NULL;
1475 
1476         if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1477             break;
1478 
1479         sizehint = 1; /* read 1 more byte and retry */
1480     }
1481 
1482     res = _PyUnicodeWriter_Finish(&buf.writer);
1483     if (res == NULL)
1484         goto errorexit;
1485 
1486     Py_XDECREF(cres);
1487     Py_XDECREF(buf.excobj);
1488     return res;
1489 
1490 errorexit:
1491     Py_XDECREF(cres);
1492     Py_XDECREF(buf.excobj);
1493     _PyUnicodeWriter_Dealloc(&buf.writer);
1494     return NULL;
1495 }
1496 
1497 /*[clinic input]
1498  _multibytecodec.MultibyteStreamReader.read
1499 
1500     sizeobj: object = None
1501     /
1502 [clinic start generated code]*/
1503 
1504 static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1505 _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1506                                                 PyObject *sizeobj)
1507 /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1508 {
1509     Py_ssize_t size;
1510 
1511     if (sizeobj == Py_None)
1512         size = -1;
1513     else if (PyLong_Check(sizeobj))
1514         size = PyLong_AsSsize_t(sizeobj);
1515     else {
1516         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1517         return NULL;
1518     }
1519 
1520     if (size == -1 && PyErr_Occurred())
1521         return NULL;
1522 
1523     return mbstreamreader_iread(self, "read", size);
1524 }
1525 
1526 /*[clinic input]
1527  _multibytecodec.MultibyteStreamReader.readline
1528 
1529     sizeobj: object = None
1530     /
1531 [clinic start generated code]*/
1532 
1533 static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1534 _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1535                                                     PyObject *sizeobj)
1536 /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1537 {
1538     Py_ssize_t size;
1539 
1540     if (sizeobj == Py_None)
1541         size = -1;
1542     else if (PyLong_Check(sizeobj))
1543         size = PyLong_AsSsize_t(sizeobj);
1544     else {
1545         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1546         return NULL;
1547     }
1548 
1549     if (size == -1 && PyErr_Occurred())
1550         return NULL;
1551 
1552     return mbstreamreader_iread(self, "readline", size);
1553 }
1554 
1555 /*[clinic input]
1556  _multibytecodec.MultibyteStreamReader.readlines
1557 
1558     sizehintobj: object = None
1559     /
1560 [clinic start generated code]*/
1561 
1562 static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject * self,PyObject * sizehintobj)1563 _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1564                                                      PyObject *sizehintobj)
1565 /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1566 {
1567     PyObject *r, *sr;
1568     Py_ssize_t sizehint;
1569 
1570     if (sizehintobj == Py_None)
1571         sizehint = -1;
1572     else if (PyLong_Check(sizehintobj))
1573         sizehint = PyLong_AsSsize_t(sizehintobj);
1574     else {
1575         PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1576         return NULL;
1577     }
1578 
1579     if (sizehint == -1 && PyErr_Occurred())
1580         return NULL;
1581 
1582     r = mbstreamreader_iread(self, "read", sizehint);
1583     if (r == NULL)
1584         return NULL;
1585 
1586     sr = PyUnicode_Splitlines(r, 1);
1587     Py_DECREF(r);
1588     return sr;
1589 }
1590 
1591 /*[clinic input]
1592  _multibytecodec.MultibyteStreamReader.reset
1593 [clinic start generated code]*/
1594 
1595 static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject * self)1596 _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1597 /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1598 {
1599     if (self->codec->decreset != NULL &&
1600         self->codec->decreset(&self->state, self->codec->config) != 0)
1601         return NULL;
1602     self->pendingsize = 0;
1603 
1604     Py_RETURN_NONE;
1605 }
1606 
1607 static struct PyMethodDef mbstreamreader_methods[] = {
1608     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1609     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1610     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1611     _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1612     {NULL,              NULL},
1613 };
1614 
1615 static PyMemberDef mbstreamreader_members[] = {
1616     {"stream",          T_OBJECT,
1617                     offsetof(MultibyteStreamReaderObject, stream),
1618                     READONLY, NULL},
1619     {NULL,}
1620 };
1621 
1622 static PyObject *
mbstreamreader_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1623 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1624 {
1625     MultibyteStreamReaderObject *self;
1626     PyObject *stream, *codec = NULL;
1627     char *errors = NULL;
1628 
1629     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1630                             streamkwarglist, &stream, &errors))
1631         return NULL;
1632 
1633     self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1634     if (self == NULL)
1635         return NULL;
1636 
1637     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1638     if (codec == NULL)
1639         goto errorexit;
1640 
1641     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1642     if (!MultibyteCodec_Check(state, codec)) {
1643         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1644         goto errorexit;
1645     }
1646 
1647     self->codec = ((MultibyteCodecObject *)codec)->codec;
1648     self->stream = stream;
1649     Py_INCREF(stream);
1650     self->pendingsize = 0;
1651     self->errors = internal_error_callback(errors);
1652     if (self->errors == NULL)
1653         goto errorexit;
1654     if (self->codec->decinit != NULL &&
1655         self->codec->decinit(&self->state, self->codec->config) != 0)
1656         goto errorexit;
1657 
1658     Py_DECREF(codec);
1659     return (PyObject *)self;
1660 
1661 errorexit:
1662     Py_XDECREF(self);
1663     Py_XDECREF(codec);
1664     return NULL;
1665 }
1666 
1667 static int
mbstreamreader_init(PyObject * self,PyObject * args,PyObject * kwds)1668 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1669 {
1670     return 0;
1671 }
1672 
1673 static int
mbstreamreader_traverse(MultibyteStreamReaderObject * self,visitproc visit,void * arg)1674 mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1675                         visitproc visit, void *arg)
1676 {
1677     if (ERROR_ISCUSTOM(self->errors))
1678         Py_VISIT(self->errors);
1679     Py_VISIT(self->stream);
1680     return 0;
1681 }
1682 
1683 static void
mbstreamreader_dealloc(MultibyteStreamReaderObject * self)1684 mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1685 {
1686     PyTypeObject *tp = Py_TYPE(self);
1687     PyObject_GC_UnTrack(self);
1688     ERROR_DECREF(self->errors);
1689     Py_XDECREF(self->stream);
1690     tp->tp_free(self);
1691     Py_DECREF(tp);
1692 }
1693 
1694 static PyType_Slot reader_slots[] = {
1695     {Py_tp_dealloc, mbstreamreader_dealloc},
1696     {Py_tp_getattro, PyObject_GenericGetAttr},
1697     {Py_tp_traverse, mbstreamreader_traverse},
1698     {Py_tp_methods, mbstreamreader_methods},
1699     {Py_tp_members, mbstreamreader_members},
1700     {Py_tp_getset, codecctx_getsets},
1701     {Py_tp_init, mbstreamreader_init},
1702     {Py_tp_new, mbstreamreader_new},
1703     {0, NULL},
1704 };
1705 
1706 static PyType_Spec reader_spec = {
1707     .name = MODULE_NAME ".MultibyteStreamReader",
1708     .basicsize = sizeof(MultibyteStreamReaderObject),
1709     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1710               Py_TPFLAGS_IMMUTABLETYPE),
1711     .slots = reader_slots,
1712 };
1713 
1714 static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject * self,PyObject * unistr,PyObject * str_write)1715 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1716                       PyObject *unistr, PyObject *str_write)
1717 {
1718     PyObject *str, *wr;
1719 
1720     str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1721     if (str == NULL)
1722         return -1;
1723 
1724     wr = _PyObject_CallMethodOneArg(self->stream, str_write, str);
1725     Py_DECREF(str);
1726     if (wr == NULL)
1727         return -1;
1728 
1729     Py_DECREF(wr);
1730     return 0;
1731 }
1732 
1733 /*[clinic input]
1734  _multibytecodec.MultibyteStreamWriter.write
1735 
1736     cls: defining_class
1737     strobj: object
1738     /
1739 [clinic start generated code]*/
1740 
1741 static PyObject *
_multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls,PyObject * strobj)1742 _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *self,
1743                                                  PyTypeObject *cls,
1744                                                  PyObject *strobj)
1745 /*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
1746 {
1747     _multibytecodec_state *state = PyType_GetModuleState(cls);
1748     assert(state != NULL);
1749     if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
1750         return NULL;
1751     }
1752     Py_RETURN_NONE;
1753 }
1754 
1755 /*[clinic input]
1756  _multibytecodec.MultibyteStreamWriter.writelines
1757 
1758     cls: defining_class
1759     lines: object
1760     /
1761 [clinic start generated code]*/
1762 
1763 static PyObject *
_multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls,PyObject * lines)1764 _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject *self,
1765                                                       PyTypeObject *cls,
1766                                                       PyObject *lines)
1767 /*[clinic end generated code: output=b4c99d2cf23ffb88 input=a6d5fe7c74972a34]*/
1768 {
1769     PyObject *strobj;
1770     int i, r;
1771 
1772     if (!PySequence_Check(lines)) {
1773         PyErr_SetString(PyExc_TypeError,
1774                         "arg must be a sequence object");
1775         return NULL;
1776     }
1777 
1778     _multibytecodec_state *state = PyType_GetModuleState(cls);
1779     assert(state != NULL);
1780     for (i = 0; i < PySequence_Length(lines); i++) {
1781         /* length can be changed even within this loop */
1782         strobj = PySequence_GetItem(lines, i);
1783         if (strobj == NULL)
1784             return NULL;
1785 
1786         r = mbstreamwriter_iwrite(self, strobj, state->str_write);
1787         Py_DECREF(strobj);
1788         if (r == -1)
1789             return NULL;
1790     }
1791     /* PySequence_Length() can fail */
1792     if (PyErr_Occurred())
1793         return NULL;
1794 
1795     Py_RETURN_NONE;
1796 }
1797 
1798 /*[clinic input]
1799  _multibytecodec.MultibyteStreamWriter.reset
1800 
1801     cls: defining_class
1802     /
1803 
1804 [clinic start generated code]*/
1805 
1806 static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls)1807 _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self,
1808                                                  PyTypeObject *cls)
1809 /*[clinic end generated code: output=32ef224c2a38aa3d input=28af6a9cd38d1979]*/
1810 {
1811     PyObject *pwrt;
1812 
1813     if (!self->pending)
1814         Py_RETURN_NONE;
1815 
1816     pwrt = multibytecodec_encode(self->codec, &self->state,
1817                     self->pending, NULL, self->errors,
1818                     MBENC_FLUSH | MBENC_RESET);
1819     /* some pending buffer can be truncated when UnicodeEncodeError is
1820      * raised on 'strict' mode. but, 'reset' method is designed to
1821      * reset the pending buffer or states so failed string sequence
1822      * ought to be missed */
1823     Py_CLEAR(self->pending);
1824     if (pwrt == NULL)
1825         return NULL;
1826 
1827     assert(PyBytes_Check(pwrt));
1828 
1829     _multibytecodec_state *state = PyType_GetModuleState(cls);
1830     assert(state != NULL);
1831 
1832     if (PyBytes_Size(pwrt) > 0) {
1833         PyObject *wr;
1834 
1835         wr = _PyObject_CallMethodOneArg(self->stream, state->str_write, pwrt);
1836         if (wr == NULL) {
1837             Py_DECREF(pwrt);
1838             return NULL;
1839         }
1840     }
1841     Py_DECREF(pwrt);
1842 
1843     Py_RETURN_NONE;
1844 }
1845 
1846 static PyObject *
mbstreamwriter_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1847 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1848 {
1849     MultibyteStreamWriterObject *self;
1850     PyObject *stream, *codec = NULL;
1851     char *errors = NULL;
1852 
1853     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1854                             streamkwarglist, &stream, &errors))
1855         return NULL;
1856 
1857     self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1858     if (self == NULL)
1859         return NULL;
1860 
1861     codec = PyObject_GetAttrString((PyObject *)type, "codec");
1862     if (codec == NULL)
1863         goto errorexit;
1864 
1865     _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1866     if (!MultibyteCodec_Check(state, codec)) {
1867         PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1868         goto errorexit;
1869     }
1870 
1871     self->codec = ((MultibyteCodecObject *)codec)->codec;
1872     self->stream = stream;
1873     Py_INCREF(stream);
1874     self->pending = NULL;
1875     self->errors = internal_error_callback(errors);
1876     if (self->errors == NULL)
1877         goto errorexit;
1878     if (self->codec->encinit != NULL &&
1879         self->codec->encinit(&self->state, self->codec->config) != 0)
1880         goto errorexit;
1881 
1882     Py_DECREF(codec);
1883     return (PyObject *)self;
1884 
1885 errorexit:
1886     Py_XDECREF(self);
1887     Py_XDECREF(codec);
1888     return NULL;
1889 }
1890 
1891 static int
mbstreamwriter_init(PyObject * self,PyObject * args,PyObject * kwds)1892 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1893 {
1894     return 0;
1895 }
1896 
1897 static int
mbstreamwriter_traverse(MultibyteStreamWriterObject * self,visitproc visit,void * arg)1898 mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1899                         visitproc visit, void *arg)
1900 {
1901     if (ERROR_ISCUSTOM(self->errors))
1902         Py_VISIT(self->errors);
1903     Py_VISIT(self->stream);
1904     return 0;
1905 }
1906 
1907 static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject * self)1908 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1909 {
1910     PyTypeObject *tp = Py_TYPE(self);
1911     PyObject_GC_UnTrack(self);
1912     ERROR_DECREF(self->errors);
1913     Py_XDECREF(self->stream);
1914     tp->tp_free(self);
1915     Py_DECREF(tp);
1916 }
1917 
1918 static struct PyMethodDef mbstreamwriter_methods[] = {
1919     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1920     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1921     _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1922     {NULL, NULL},
1923 };
1924 
1925 static PyMemberDef mbstreamwriter_members[] = {
1926     {"stream",          T_OBJECT,
1927                     offsetof(MultibyteStreamWriterObject, stream),
1928                     READONLY, NULL},
1929     {NULL,}
1930 };
1931 
1932 static PyType_Slot writer_slots[] = {
1933     {Py_tp_dealloc, mbstreamwriter_dealloc},
1934     {Py_tp_getattro, PyObject_GenericGetAttr},
1935     {Py_tp_traverse, mbstreamwriter_traverse},
1936     {Py_tp_methods, mbstreamwriter_methods},
1937     {Py_tp_members, mbstreamwriter_members},
1938     {Py_tp_getset, codecctx_getsets},
1939     {Py_tp_init, mbstreamwriter_init},
1940     {Py_tp_new, mbstreamwriter_new},
1941     {0, NULL},
1942 };
1943 
1944 static PyType_Spec writer_spec = {
1945     .name = MODULE_NAME ".MultibyteStreamWriter",
1946     .basicsize = sizeof(MultibyteStreamWriterObject),
1947     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1948               Py_TPFLAGS_IMMUTABLETYPE),
1949     .slots = writer_slots,
1950 };
1951 
1952 
1953 /*[clinic input]
1954 _multibytecodec.__create_codec
1955 
1956     arg: object
1957     /
1958 [clinic start generated code]*/
1959 
1960 static PyObject *
_multibytecodec___create_codec(PyObject * module,PyObject * arg)1961 _multibytecodec___create_codec(PyObject *module, PyObject *arg)
1962 /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
1963 {
1964     MultibyteCodecObject *self;
1965     MultibyteCodec *codec;
1966 
1967     if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1968         PyErr_SetString(PyExc_ValueError, "argument type invalid");
1969         return NULL;
1970     }
1971 
1972     codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1973     if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1974         return NULL;
1975 
1976     _multibytecodec_state *state = _multibytecodec_get_state(module);
1977     self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
1978     if (self == NULL)
1979         return NULL;
1980     self->codec = codec;
1981 
1982     PyObject_GC_Track(self);
1983     return (PyObject *)self;
1984 }
1985 
1986 static int
_multibytecodec_traverse(PyObject * mod,visitproc visit,void * arg)1987 _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
1988 {
1989     _multibytecodec_state *state = _multibytecodec_get_state(mod);
1990     Py_VISIT(state->multibytecodec_type);
1991     Py_VISIT(state->encoder_type);
1992     Py_VISIT(state->decoder_type);
1993     Py_VISIT(state->reader_type);
1994     Py_VISIT(state->writer_type);
1995     return 0;
1996 }
1997 
1998 static int
_multibytecodec_clear(PyObject * mod)1999 _multibytecodec_clear(PyObject *mod)
2000 {
2001     _multibytecodec_state *state = _multibytecodec_get_state(mod);
2002     Py_CLEAR(state->multibytecodec_type);
2003     Py_CLEAR(state->encoder_type);
2004     Py_CLEAR(state->decoder_type);
2005     Py_CLEAR(state->reader_type);
2006     Py_CLEAR(state->writer_type);
2007     Py_CLEAR(state->str_write);
2008     return 0;
2009 }
2010 
2011 static void
_multibytecodec_free(void * mod)2012 _multibytecodec_free(void *mod)
2013 {
2014     _multibytecodec_clear((PyObject *)mod);
2015 }
2016 
2017 #define CREATE_TYPE(module, type, spec)                                      \
2018     do {                                                                     \
2019         type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
2020         if (!type) {                                                         \
2021             return -1;                                                       \
2022         }                                                                    \
2023     } while (0)
2024 
2025 #define ADD_TYPE(module, type)                    \
2026     do {                                          \
2027         if (PyModule_AddType(module, type) < 0) { \
2028             return -1;                            \
2029         }                                         \
2030     } while (0)
2031 
2032 static int
_multibytecodec_exec(PyObject * mod)2033 _multibytecodec_exec(PyObject *mod)
2034 {
2035     _multibytecodec_state *state = _multibytecodec_get_state(mod);
2036     state->str_write = PyUnicode_InternFromString("write");
2037     if (state->str_write == NULL) {
2038         return -1;
2039     }
2040     CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2041     CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2042     CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2043     CREATE_TYPE(mod, state->reader_type, &reader_spec);
2044     CREATE_TYPE(mod, state->writer_type, &writer_spec);
2045 
2046     ADD_TYPE(mod, state->encoder_type);
2047     ADD_TYPE(mod, state->decoder_type);
2048     ADD_TYPE(mod, state->reader_type);
2049     ADD_TYPE(mod, state->writer_type);
2050     return 0;
2051 }
2052 
2053 #undef CREATE_TYPE
2054 #undef ADD_TYPE
2055 
2056 static struct PyMethodDef _multibytecodec_methods[] = {
2057     _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
2058     {NULL, NULL},
2059 };
2060 
2061 static PyModuleDef_Slot _multibytecodec_slots[] = {
2062     {Py_mod_exec, _multibytecodec_exec},
2063     {0, NULL}
2064 };
2065 
2066 static struct PyModuleDef _multibytecodecmodule = {
2067     .m_base = PyModuleDef_HEAD_INIT,
2068     .m_name = "_multibytecodec",
2069     .m_size = sizeof(_multibytecodec_state),
2070     .m_methods = _multibytecodec_methods,
2071     .m_slots = _multibytecodec_slots,
2072     .m_traverse = _multibytecodec_traverse,
2073     .m_clear = _multibytecodec_clear,
2074     .m_free = _multibytecodec_free,
2075 };
2076 
2077 PyMODINIT_FUNC
PyInit__multibytecodec(void)2078 PyInit__multibytecodec(void)
2079 {
2080     return PyModuleDef_Init(&_multibytecodecmodule);
2081 }
2082