1 /*
2 * multibytecodec.c: Common Multibyte Codec Implementation
3 *
4 * Written by Hye-Shik Chang <[email protected]>
5 */
6
7 #define PY_SSIZE_T_CLEAN
8 #include "Python.h"
9 #include "structmember.h" // PyMemberDef
10 #include "multibytecodec.h"
11 #include "clinic/multibytecodec.c.h"
12
13 #define MODULE_NAME "_multibytecodec"
14
15 typedef struct {
16 PyTypeObject *encoder_type;
17 PyTypeObject *decoder_type;
18 PyTypeObject *reader_type;
19 PyTypeObject *writer_type;
20 PyTypeObject *multibytecodec_type;
21 PyObject *str_write;
22 } _multibytecodec_state;
23
24 static _multibytecodec_state *
_multibytecodec_get_state(PyObject * module)25 _multibytecodec_get_state(PyObject *module)
26 {
27 _multibytecodec_state *state = PyModule_GetState(module);
28 assert(state != NULL);
29 return state;
30 }
31
32 static struct PyModuleDef _multibytecodecmodule;
33 static _multibytecodec_state *
_multibyte_codec_find_state_by_type(PyTypeObject * type)34 _multibyte_codec_find_state_by_type(PyTypeObject *type)
35 {
36 PyObject *module = PyType_GetModuleByDef(type, &_multibytecodecmodule);
37 assert(module != NULL);
38 return _multibytecodec_get_state(module);
39 }
40
41 #define clinic_get_state() _multibyte_codec_find_state_by_type(type)
42 /*[clinic input]
43 module _multibytecodec
44 class _multibytecodec.MultibyteCodec "MultibyteCodecObject *" "clinic_get_state()->multibytecodec_type"
45 class _multibytecodec.MultibyteIncrementalEncoder "MultibyteIncrementalEncoderObject *" "clinic_get_state()->encoder_type"
46 class _multibytecodec.MultibyteIncrementalDecoder "MultibyteIncrementalDecoderObject *" "clinic_get_state()->decoder_type"
47 class _multibytecodec.MultibyteStreamReader "MultibyteStreamReaderObject *" "clinic_get_state()->reader_type"
48 class _multibytecodec.MultibyteStreamWriter "MultibyteStreamWriterObject *" "clinic_get_state()->writer_type"
49 [clinic start generated code]*/
50 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=305a76dfdd24b99c]*/
51 #undef clinic_get_state
52
53 typedef struct {
54 PyObject *inobj;
55 Py_ssize_t inpos, inlen;
56 unsigned char *outbuf, *outbuf_end;
57 PyObject *excobj, *outobj;
58 } MultibyteEncodeBuffer;
59
60 typedef struct {
61 const unsigned char *inbuf, *inbuf_top, *inbuf_end;
62 PyObject *excobj;
63 _PyUnicodeWriter writer;
64 } MultibyteDecodeBuffer;
65
66 static char *incnewkwarglist[] = {"errors", NULL};
67 static char *streamkwarglist[] = {"stream", "errors", NULL};
68
69 static PyObject *multibytecodec_encode(MultibyteCodec *,
70 MultibyteCodec_State *, PyObject *, Py_ssize_t *,
71 PyObject *, int);
72
73 #define MBENC_RESET MBENC_MAX<<1 /* reset after an encoding session */
74
75 static PyObject *
make_tuple(PyObject * object,Py_ssize_t len)76 make_tuple(PyObject *object, Py_ssize_t len)
77 {
78 PyObject *v, *w;
79
80 if (object == NULL)
81 return NULL;
82
83 v = PyTuple_New(2);
84 if (v == NULL) {
85 Py_DECREF(object);
86 return NULL;
87 }
88 PyTuple_SET_ITEM(v, 0, object);
89
90 w = PyLong_FromSsize_t(len);
91 if (w == NULL) {
92 Py_DECREF(v);
93 return NULL;
94 }
95 PyTuple_SET_ITEM(v, 1, w);
96
97 return v;
98 }
99
100 static PyObject *
internal_error_callback(const char * errors)101 internal_error_callback(const char *errors)
102 {
103 if (errors == NULL || strcmp(errors, "strict") == 0)
104 return ERROR_STRICT;
105 else if (strcmp(errors, "ignore") == 0)
106 return ERROR_IGNORE;
107 else if (strcmp(errors, "replace") == 0)
108 return ERROR_REPLACE;
109 else
110 return PyUnicode_FromString(errors);
111 }
112
113 static PyObject *
call_error_callback(PyObject * errors,PyObject * exc)114 call_error_callback(PyObject *errors, PyObject *exc)
115 {
116 PyObject *cb, *r;
117 const char *str;
118
119 assert(PyUnicode_Check(errors));
120 str = PyUnicode_AsUTF8(errors);
121 if (str == NULL)
122 return NULL;
123 cb = PyCodec_LookupError(str);
124 if (cb == NULL)
125 return NULL;
126
127 r = PyObject_CallOneArg(cb, exc);
128 Py_DECREF(cb);
129 return r;
130 }
131
132 static PyObject *
codecctx_errors_get(MultibyteStatefulCodecContext * self,void * Py_UNUSED (ignored))133 codecctx_errors_get(MultibyteStatefulCodecContext *self, void *Py_UNUSED(ignored))
134 {
135 const char *errors;
136
137 if (self->errors == ERROR_STRICT)
138 errors = "strict";
139 else if (self->errors == ERROR_IGNORE)
140 errors = "ignore";
141 else if (self->errors == ERROR_REPLACE)
142 errors = "replace";
143 else {
144 Py_INCREF(self->errors);
145 return self->errors;
146 }
147
148 return PyUnicode_FromString(errors);
149 }
150
151 static int
codecctx_errors_set(MultibyteStatefulCodecContext * self,PyObject * value,void * closure)152 codecctx_errors_set(MultibyteStatefulCodecContext *self, PyObject *value,
153 void *closure)
154 {
155 PyObject *cb;
156 const char *str;
157
158 if (value == NULL) {
159 PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
160 return -1;
161 }
162 if (!PyUnicode_Check(value)) {
163 PyErr_SetString(PyExc_TypeError, "errors must be a string");
164 return -1;
165 }
166
167 str = PyUnicode_AsUTF8(value);
168 if (str == NULL)
169 return -1;
170
171 cb = internal_error_callback(str);
172 if (cb == NULL)
173 return -1;
174
175 ERROR_DECREF(self->errors);
176 self->errors = cb;
177 return 0;
178 }
179
180 /* This getset handlers list is used by all the stateful codec objects */
181 static PyGetSetDef codecctx_getsets[] = {
182 {"errors", (getter)codecctx_errors_get,
183 (setter)codecctx_errors_set,
184 PyDoc_STR("how to treat errors")},
185 {NULL,}
186 };
187
188 static int
expand_encodebuffer(MultibyteEncodeBuffer * buf,Py_ssize_t esize)189 expand_encodebuffer(MultibyteEncodeBuffer *buf, Py_ssize_t esize)
190 {
191 Py_ssize_t orgpos, orgsize, incsize;
192
193 orgpos = (Py_ssize_t)((char *)buf->outbuf -
194 PyBytes_AS_STRING(buf->outobj));
195 orgsize = PyBytes_GET_SIZE(buf->outobj);
196 incsize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
197
198 if (orgsize > PY_SSIZE_T_MAX - incsize) {
199 PyErr_NoMemory();
200 return -1;
201 }
202
203 if (_PyBytes_Resize(&buf->outobj, orgsize + incsize) == -1)
204 return -1;
205
206 buf->outbuf = (unsigned char *)PyBytes_AS_STRING(buf->outobj) +orgpos;
207 buf->outbuf_end = (unsigned char *)PyBytes_AS_STRING(buf->outobj)
208 + PyBytes_GET_SIZE(buf->outobj);
209
210 return 0;
211 }
212 #define REQUIRE_ENCODEBUFFER(buf, s) do { \
213 if ((s) < 0 || (s) > (buf)->outbuf_end - (buf)->outbuf) \
214 if (expand_encodebuffer(buf, s) == -1) \
215 goto errorexit; \
216 } while(0)
217
218
219 /**
220 * MultibyteCodec object
221 */
222
223 static int
multibytecodec_encerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteEncodeBuffer * buf,PyObject * errors,Py_ssize_t e)224 multibytecodec_encerror(MultibyteCodec *codec,
225 MultibyteCodec_State *state,
226 MultibyteEncodeBuffer *buf,
227 PyObject *errors, Py_ssize_t e)
228 {
229 PyObject *retobj = NULL, *retstr = NULL, *tobj;
230 Py_ssize_t retstrsize, newpos;
231 Py_ssize_t esize, start, end;
232 const char *reason;
233
234 if (e > 0) {
235 reason = "illegal multibyte sequence";
236 esize = e;
237 }
238 else {
239 switch (e) {
240 case MBERR_TOOSMALL:
241 REQUIRE_ENCODEBUFFER(buf, -1);
242 return 0; /* retry it */
243 case MBERR_TOOFEW:
244 reason = "incomplete multibyte sequence";
245 esize = (Py_ssize_t)buf->inpos;
246 break;
247 case MBERR_INTERNAL:
248 PyErr_SetString(PyExc_RuntimeError,
249 "internal codec error");
250 return -1;
251 default:
252 PyErr_SetString(PyExc_RuntimeError,
253 "unknown runtime error");
254 return -1;
255 }
256 }
257
258 if (errors == ERROR_REPLACE) {
259 PyObject *replchar;
260 Py_ssize_t r;
261 Py_ssize_t inpos;
262 int kind;
263 const void *data;
264
265 replchar = PyUnicode_FromOrdinal('?');
266 if (replchar == NULL)
267 goto errorexit;
268 kind = PyUnicode_KIND(replchar);
269 data = PyUnicode_DATA(replchar);
270
271 inpos = 0;
272 for (;;) {
273 Py_ssize_t outleft = (Py_ssize_t)(buf->outbuf_end - buf->outbuf);
274
275 r = codec->encode(state, codec->config,
276 kind, data, &inpos, 1,
277 &buf->outbuf, outleft, 0);
278 if (r == MBERR_TOOSMALL) {
279 REQUIRE_ENCODEBUFFER(buf, -1);
280 continue;
281 }
282 else
283 break;
284 }
285
286 Py_DECREF(replchar);
287
288 if (r != 0) {
289 REQUIRE_ENCODEBUFFER(buf, 1);
290 *buf->outbuf++ = '?';
291 }
292 }
293 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
294 buf->inpos += esize;
295 return 0;
296 }
297
298 start = (Py_ssize_t)buf->inpos;
299 end = start + esize;
300
301 /* use cached exception object if available */
302 if (buf->excobj == NULL) {
303 buf->excobj = PyObject_CallFunction(PyExc_UnicodeEncodeError,
304 "sOnns",
305 codec->encoding, buf->inobj,
306 start, end, reason);
307 if (buf->excobj == NULL)
308 goto errorexit;
309 }
310 else
311 if (PyUnicodeEncodeError_SetStart(buf->excobj, start) != 0 ||
312 PyUnicodeEncodeError_SetEnd(buf->excobj, end) != 0 ||
313 PyUnicodeEncodeError_SetReason(buf->excobj, reason) != 0)
314 goto errorexit;
315
316 if (errors == ERROR_STRICT) {
317 PyCodec_StrictErrors(buf->excobj);
318 goto errorexit;
319 }
320
321 retobj = call_error_callback(errors, buf->excobj);
322 if (retobj == NULL)
323 goto errorexit;
324
325 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
326 (!PyUnicode_Check((tobj = PyTuple_GET_ITEM(retobj, 0))) && !PyBytes_Check(tobj)) ||
327 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
328 PyErr_SetString(PyExc_TypeError,
329 "encoding error handler must return "
330 "(str, int) tuple");
331 goto errorexit;
332 }
333
334 if (PyUnicode_Check(tobj)) {
335 Py_ssize_t inpos;
336
337 retstr = multibytecodec_encode(codec, state, tobj,
338 &inpos, ERROR_STRICT,
339 MBENC_FLUSH);
340 if (retstr == NULL)
341 goto errorexit;
342 }
343 else {
344 Py_INCREF(tobj);
345 retstr = tobj;
346 }
347
348 assert(PyBytes_Check(retstr));
349 retstrsize = PyBytes_GET_SIZE(retstr);
350 if (retstrsize > 0) {
351 REQUIRE_ENCODEBUFFER(buf, retstrsize);
352 memcpy(buf->outbuf, PyBytes_AS_STRING(retstr), retstrsize);
353 buf->outbuf += retstrsize;
354 }
355
356 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
357 if (newpos < 0 && !PyErr_Occurred())
358 newpos += (Py_ssize_t)buf->inlen;
359 if (newpos < 0 || newpos > buf->inlen) {
360 PyErr_Clear();
361 PyErr_Format(PyExc_IndexError,
362 "position %zd from error handler out of bounds",
363 newpos);
364 goto errorexit;
365 }
366 buf->inpos = newpos;
367
368 Py_DECREF(retobj);
369 Py_DECREF(retstr);
370 return 0;
371
372 errorexit:
373 Py_XDECREF(retobj);
374 Py_XDECREF(retstr);
375 return -1;
376 }
377
378 static int
multibytecodec_decerror(MultibyteCodec * codec,MultibyteCodec_State * state,MultibyteDecodeBuffer * buf,PyObject * errors,Py_ssize_t e)379 multibytecodec_decerror(MultibyteCodec *codec,
380 MultibyteCodec_State *state,
381 MultibyteDecodeBuffer *buf,
382 PyObject *errors, Py_ssize_t e)
383 {
384 PyObject *retobj = NULL, *retuni = NULL;
385 Py_ssize_t newpos;
386 const char *reason;
387 Py_ssize_t esize, start, end;
388
389 if (e > 0) {
390 reason = "illegal multibyte sequence";
391 esize = e;
392 }
393 else {
394 switch (e) {
395 case MBERR_TOOSMALL:
396 return 0; /* retry it */
397 case MBERR_TOOFEW:
398 reason = "incomplete multibyte sequence";
399 esize = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
400 break;
401 case MBERR_INTERNAL:
402 PyErr_SetString(PyExc_RuntimeError,
403 "internal codec error");
404 return -1;
405 case MBERR_EXCEPTION:
406 return -1;
407 default:
408 PyErr_SetString(PyExc_RuntimeError,
409 "unknown runtime error");
410 return -1;
411 }
412 }
413
414 if (errors == ERROR_REPLACE) {
415 if (_PyUnicodeWriter_WriteChar(&buf->writer,
416 Py_UNICODE_REPLACEMENT_CHARACTER) < 0)
417 goto errorexit;
418 }
419 if (errors == ERROR_IGNORE || errors == ERROR_REPLACE) {
420 buf->inbuf += esize;
421 return 0;
422 }
423
424 start = (Py_ssize_t)(buf->inbuf - buf->inbuf_top);
425 end = start + esize;
426
427 /* use cached exception object if available */
428 if (buf->excobj == NULL) {
429 buf->excobj = PyUnicodeDecodeError_Create(codec->encoding,
430 (const char *)buf->inbuf_top,
431 (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top),
432 start, end, reason);
433 if (buf->excobj == NULL)
434 goto errorexit;
435 }
436 else
437 if (PyUnicodeDecodeError_SetStart(buf->excobj, start) ||
438 PyUnicodeDecodeError_SetEnd(buf->excobj, end) ||
439 PyUnicodeDecodeError_SetReason(buf->excobj, reason))
440 goto errorexit;
441
442 if (errors == ERROR_STRICT) {
443 PyCodec_StrictErrors(buf->excobj);
444 goto errorexit;
445 }
446
447 retobj = call_error_callback(errors, buf->excobj);
448 if (retobj == NULL)
449 goto errorexit;
450
451 if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
452 !PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
453 !PyLong_Check(PyTuple_GET_ITEM(retobj, 1))) {
454 PyErr_SetString(PyExc_TypeError,
455 "decoding error handler must return "
456 "(str, int) tuple");
457 goto errorexit;
458 }
459
460 if (_PyUnicodeWriter_WriteStr(&buf->writer, retuni) < 0)
461 goto errorexit;
462
463 newpos = PyLong_AsSsize_t(PyTuple_GET_ITEM(retobj, 1));
464 if (newpos < 0 && !PyErr_Occurred())
465 newpos += (Py_ssize_t)(buf->inbuf_end - buf->inbuf_top);
466 if (newpos < 0 || buf->inbuf_top + newpos > buf->inbuf_end) {
467 PyErr_Clear();
468 PyErr_Format(PyExc_IndexError,
469 "position %zd from error handler out of bounds",
470 newpos);
471 goto errorexit;
472 }
473 buf->inbuf = buf->inbuf_top + newpos;
474 Py_DECREF(retobj);
475 return 0;
476
477 errorexit:
478 Py_XDECREF(retobj);
479 return -1;
480 }
481
482 static PyObject *
multibytecodec_encode(MultibyteCodec * codec,MultibyteCodec_State * state,PyObject * text,Py_ssize_t * inpos_t,PyObject * errors,int flags)483 multibytecodec_encode(MultibyteCodec *codec,
484 MultibyteCodec_State *state,
485 PyObject *text, Py_ssize_t *inpos_t,
486 PyObject *errors, int flags)
487 {
488 MultibyteEncodeBuffer buf;
489 Py_ssize_t finalsize, r = 0;
490 Py_ssize_t datalen;
491 int kind;
492 const void *data;
493
494 if (PyUnicode_READY(text) < 0)
495 return NULL;
496 datalen = PyUnicode_GET_LENGTH(text);
497
498 if (datalen == 0 && !(flags & MBENC_RESET))
499 return PyBytes_FromStringAndSize(NULL, 0);
500
501 buf.excobj = NULL;
502 buf.outobj = NULL;
503 buf.inobj = text; /* borrowed reference */
504 buf.inpos = 0;
505 buf.inlen = datalen;
506 kind = PyUnicode_KIND(buf.inobj);
507 data = PyUnicode_DATA(buf.inobj);
508
509 if (datalen > (PY_SSIZE_T_MAX - 16) / 2) {
510 PyErr_NoMemory();
511 goto errorexit;
512 }
513
514 buf.outobj = PyBytes_FromStringAndSize(NULL, datalen * 2 + 16);
515 if (buf.outobj == NULL)
516 goto errorexit;
517 buf.outbuf = (unsigned char *)PyBytes_AS_STRING(buf.outobj);
518 buf.outbuf_end = buf.outbuf + PyBytes_GET_SIZE(buf.outobj);
519
520 while (buf.inpos < buf.inlen) {
521 /* we don't reuse inleft and outleft here.
522 * error callbacks can relocate the cursor anywhere on buffer*/
523 Py_ssize_t outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
524
525 r = codec->encode(state, codec->config,
526 kind, data,
527 &buf.inpos, buf.inlen,
528 &buf.outbuf, outleft, flags);
529 if ((r == 0) || (r == MBERR_TOOFEW && !(flags & MBENC_FLUSH)))
530 break;
531 else if (multibytecodec_encerror(codec, state, &buf, errors,r))
532 goto errorexit;
533 else if (r == MBERR_TOOFEW)
534 break;
535 }
536
537 if (codec->encreset != NULL && (flags & MBENC_RESET))
538 for (;;) {
539 Py_ssize_t outleft;
540
541 outleft = (Py_ssize_t)(buf.outbuf_end - buf.outbuf);
542 r = codec->encreset(state, codec->config, &buf.outbuf,
543 outleft);
544 if (r == 0)
545 break;
546 else if (multibytecodec_encerror(codec, state,
547 &buf, errors, r))
548 goto errorexit;
549 }
550
551 finalsize = (Py_ssize_t)((char *)buf.outbuf -
552 PyBytes_AS_STRING(buf.outobj));
553
554 if (finalsize != PyBytes_GET_SIZE(buf.outobj))
555 if (_PyBytes_Resize(&buf.outobj, finalsize) == -1)
556 goto errorexit;
557
558 if (inpos_t)
559 *inpos_t = buf.inpos;
560 Py_XDECREF(buf.excobj);
561 return buf.outobj;
562
563 errorexit:
564 Py_XDECREF(buf.excobj);
565 Py_XDECREF(buf.outobj);
566 return NULL;
567 }
568
569 /*[clinic input]
570 _multibytecodec.MultibyteCodec.encode
571
572 input: object
573 errors: str(accept={str, NoneType}) = None
574
575 Return an encoded string version of `input'.
576
577 'errors' may be given to set a different error handling scheme. Default is
578 'strict' meaning that encoding errors raise a UnicodeEncodeError. Other possible
579 values are 'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name
580 registered with codecs.register_error that can handle UnicodeEncodeErrors.
581 [clinic start generated code]*/
582
583 static PyObject *
_multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject * self,PyObject * input,const char * errors)584 _multibytecodec_MultibyteCodec_encode_impl(MultibyteCodecObject *self,
585 PyObject *input,
586 const char *errors)
587 /*[clinic end generated code: output=7b26652045ba56a9 input=606d0e128a577bae]*/
588 {
589 MultibyteCodec_State state;
590 PyObject *errorcb, *r, *ucvt;
591 Py_ssize_t datalen;
592
593 if (PyUnicode_Check(input))
594 ucvt = NULL;
595 else {
596 input = ucvt = PyObject_Str(input);
597 if (input == NULL)
598 return NULL;
599 else if (!PyUnicode_Check(input)) {
600 PyErr_SetString(PyExc_TypeError,
601 "couldn't convert the object to unicode.");
602 Py_DECREF(ucvt);
603 return NULL;
604 }
605 }
606
607 if (PyUnicode_READY(input) < 0) {
608 Py_XDECREF(ucvt);
609 return NULL;
610 }
611 datalen = PyUnicode_GET_LENGTH(input);
612
613 errorcb = internal_error_callback(errors);
614 if (errorcb == NULL) {
615 Py_XDECREF(ucvt);
616 return NULL;
617 }
618
619 if (self->codec->encinit != NULL &&
620 self->codec->encinit(&state, self->codec->config) != 0)
621 goto errorexit;
622 r = multibytecodec_encode(self->codec, &state,
623 input, NULL, errorcb,
624 MBENC_FLUSH | MBENC_RESET);
625 if (r == NULL)
626 goto errorexit;
627
628 ERROR_DECREF(errorcb);
629 Py_XDECREF(ucvt);
630 return make_tuple(r, datalen);
631
632 errorexit:
633 ERROR_DECREF(errorcb);
634 Py_XDECREF(ucvt);
635 return NULL;
636 }
637
638 /*[clinic input]
639 _multibytecodec.MultibyteCodec.decode
640
641 input: Py_buffer
642 errors: str(accept={str, NoneType}) = None
643
644 Decodes 'input'.
645
646 'errors' may be given to set a different error handling scheme. Default is
647 'strict' meaning that encoding errors raise a UnicodeDecodeError. Other possible
648 values are 'ignore' and 'replace' as well as any other name registered with
649 codecs.register_error that is able to handle UnicodeDecodeErrors."
650 [clinic start generated code]*/
651
652 static PyObject *
_multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject * self,Py_buffer * input,const char * errors)653 _multibytecodec_MultibyteCodec_decode_impl(MultibyteCodecObject *self,
654 Py_buffer *input,
655 const char *errors)
656 /*[clinic end generated code: output=ff419f65bad6cc77 input=e0c78fc7ab190def]*/
657 {
658 MultibyteCodec_State state;
659 MultibyteDecodeBuffer buf;
660 PyObject *errorcb, *res;
661 const char *data;
662 Py_ssize_t datalen;
663
664 data = input->buf;
665 datalen = input->len;
666
667 errorcb = internal_error_callback(errors);
668 if (errorcb == NULL) {
669 return NULL;
670 }
671
672 if (datalen == 0) {
673 ERROR_DECREF(errorcb);
674 return make_tuple(PyUnicode_New(0, 0), 0);
675 }
676
677 _PyUnicodeWriter_Init(&buf.writer);
678 buf.writer.min_length = datalen;
679 buf.excobj = NULL;
680 buf.inbuf = buf.inbuf_top = (unsigned char *)data;
681 buf.inbuf_end = buf.inbuf_top + datalen;
682
683 if (self->codec->decinit != NULL &&
684 self->codec->decinit(&state, self->codec->config) != 0)
685 goto errorexit;
686
687 while (buf.inbuf < buf.inbuf_end) {
688 Py_ssize_t inleft, r;
689
690 inleft = (Py_ssize_t)(buf.inbuf_end - buf.inbuf);
691
692 r = self->codec->decode(&state, self->codec->config,
693 &buf.inbuf, inleft, &buf.writer);
694 if (r == 0)
695 break;
696 else if (multibytecodec_decerror(self->codec, &state,
697 &buf, errorcb, r))
698 goto errorexit;
699 }
700
701 res = _PyUnicodeWriter_Finish(&buf.writer);
702 if (res == NULL)
703 goto errorexit;
704
705 Py_XDECREF(buf.excobj);
706 ERROR_DECREF(errorcb);
707 return make_tuple(res, datalen);
708
709 errorexit:
710 ERROR_DECREF(errorcb);
711 Py_XDECREF(buf.excobj);
712 _PyUnicodeWriter_Dealloc(&buf.writer);
713
714 return NULL;
715 }
716
717 static struct PyMethodDef multibytecodec_methods[] = {
718 _MULTIBYTECODEC_MULTIBYTECODEC_ENCODE_METHODDEF
719 _MULTIBYTECODEC_MULTIBYTECODEC_DECODE_METHODDEF
720 {NULL, NULL},
721 };
722
723 static int
multibytecodec_traverse(PyObject * self,visitproc visit,void * arg)724 multibytecodec_traverse(PyObject *self, visitproc visit, void *arg)
725 {
726 Py_VISIT(Py_TYPE(self));
727 return 0;
728 }
729
730 static void
multibytecodec_dealloc(MultibyteCodecObject * self)731 multibytecodec_dealloc(MultibyteCodecObject *self)
732 {
733 PyObject_GC_UnTrack(self);
734 PyTypeObject *tp = Py_TYPE(self);
735 tp->tp_free(self);
736 Py_DECREF(tp);
737 }
738
739 static PyType_Slot multibytecodec_slots[] = {
740 {Py_tp_dealloc, multibytecodec_dealloc},
741 {Py_tp_getattro, PyObject_GenericGetAttr},
742 {Py_tp_methods, multibytecodec_methods},
743 {Py_tp_traverse, multibytecodec_traverse},
744 {0, NULL},
745 };
746
747 static PyType_Spec multibytecodec_spec = {
748 .name = MODULE_NAME ".MultibyteCodec",
749 .basicsize = sizeof(MultibyteCodecObject),
750 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
751 Py_TPFLAGS_DISALLOW_INSTANTIATION | Py_TPFLAGS_IMMUTABLETYPE),
752 .slots = multibytecodec_slots,
753 };
754
755
756 /**
757 * Utility functions for stateful codec mechanism
758 */
759
760 #define STATEFUL_DCTX(o) ((MultibyteStatefulDecoderContext *)(o))
761 #define STATEFUL_ECTX(o) ((MultibyteStatefulEncoderContext *)(o))
762
763 static PyObject *
encoder_encode_stateful(MultibyteStatefulEncoderContext * ctx,PyObject * unistr,int final)764 encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
765 PyObject *unistr, int final)
766 {
767 PyObject *ucvt, *r = NULL;
768 PyObject *inbuf = NULL;
769 Py_ssize_t inpos, datalen;
770 PyObject *origpending = NULL;
771
772 if (PyUnicode_Check(unistr))
773 ucvt = NULL;
774 else {
775 unistr = ucvt = PyObject_Str(unistr);
776 if (unistr == NULL)
777 return NULL;
778 else if (!PyUnicode_Check(unistr)) {
779 PyErr_SetString(PyExc_TypeError,
780 "couldn't convert the object to str.");
781 Py_DECREF(ucvt);
782 return NULL;
783 }
784 }
785
786 if (ctx->pending) {
787 PyObject *inbuf_tmp;
788
789 Py_INCREF(ctx->pending);
790 origpending = ctx->pending;
791
792 Py_INCREF(ctx->pending);
793 inbuf_tmp = ctx->pending;
794 PyUnicode_Append(&inbuf_tmp, unistr);
795 if (inbuf_tmp == NULL)
796 goto errorexit;
797 Py_CLEAR(ctx->pending);
798 inbuf = inbuf_tmp;
799 }
800 else {
801 origpending = NULL;
802
803 Py_INCREF(unistr);
804 inbuf = unistr;
805 }
806 if (PyUnicode_READY(inbuf) < 0)
807 goto errorexit;
808 inpos = 0;
809 datalen = PyUnicode_GET_LENGTH(inbuf);
810
811 r = multibytecodec_encode(ctx->codec, &ctx->state,
812 inbuf, &inpos,
813 ctx->errors, final ? MBENC_FLUSH | MBENC_RESET : 0);
814 if (r == NULL) {
815 /* recover the original pending buffer */
816 Py_XSETREF(ctx->pending, origpending);
817 origpending = NULL;
818 goto errorexit;
819 }
820 Py_XDECREF(origpending);
821
822 if (inpos < datalen) {
823 if (datalen - inpos > MAXENCPENDING) {
824 /* normal codecs can't reach here */
825 PyErr_SetString(PyExc_UnicodeError,
826 "pending buffer overflow");
827 goto errorexit;
828 }
829 ctx->pending = PyUnicode_Substring(inbuf, inpos, datalen);
830 if (ctx->pending == NULL) {
831 /* normal codecs can't reach here */
832 goto errorexit;
833 }
834 }
835
836 Py_DECREF(inbuf);
837 Py_XDECREF(ucvt);
838 return r;
839
840 errorexit:
841 Py_XDECREF(r);
842 Py_XDECREF(ucvt);
843 Py_XDECREF(origpending);
844 Py_XDECREF(inbuf);
845 return NULL;
846 }
847
848 static int
decoder_append_pending(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)849 decoder_append_pending(MultibyteStatefulDecoderContext *ctx,
850 MultibyteDecodeBuffer *buf)
851 {
852 Py_ssize_t npendings;
853
854 npendings = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
855 if (npendings + ctx->pendingsize > MAXDECPENDING ||
856 npendings > PY_SSIZE_T_MAX - ctx->pendingsize) {
857 PyErr_SetString(PyExc_UnicodeError, "pending buffer overflow");
858 return -1;
859 }
860 memcpy(ctx->pending + ctx->pendingsize, buf->inbuf, npendings);
861 ctx->pendingsize += npendings;
862 return 0;
863 }
864
865 static int
decoder_prepare_buffer(MultibyteDecodeBuffer * buf,const char * data,Py_ssize_t size)866 decoder_prepare_buffer(MultibyteDecodeBuffer *buf, const char *data,
867 Py_ssize_t size)
868 {
869 buf->inbuf = buf->inbuf_top = (const unsigned char *)data;
870 buf->inbuf_end = buf->inbuf_top + size;
871 buf->writer.min_length += size;
872 return 0;
873 }
874
875 static int
decoder_feed_buffer(MultibyteStatefulDecoderContext * ctx,MultibyteDecodeBuffer * buf)876 decoder_feed_buffer(MultibyteStatefulDecoderContext *ctx,
877 MultibyteDecodeBuffer *buf)
878 {
879 while (buf->inbuf < buf->inbuf_end) {
880 Py_ssize_t inleft;
881 Py_ssize_t r;
882
883 inleft = (Py_ssize_t)(buf->inbuf_end - buf->inbuf);
884
885 r = ctx->codec->decode(&ctx->state, ctx->codec->config,
886 &buf->inbuf, inleft, &buf->writer);
887 if (r == 0 || r == MBERR_TOOFEW)
888 break;
889 else if (multibytecodec_decerror(ctx->codec, &ctx->state,
890 buf, ctx->errors, r))
891 return -1;
892 }
893 return 0;
894 }
895
896
897 /*[clinic input]
898 _multibytecodec.MultibyteIncrementalEncoder.encode
899
900 input: object
901 final: bool(accept={int}) = False
902 [clinic start generated code]*/
903
904 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject * self,PyObject * input,int final)905 _multibytecodec_MultibyteIncrementalEncoder_encode_impl(MultibyteIncrementalEncoderObject *self,
906 PyObject *input,
907 int final)
908 /*[clinic end generated code: output=123361b6c505e2c1 input=093a1ddbb2fc6721]*/
909 {
910 return encoder_encode_stateful(STATEFUL_ECTX(self), input, final);
911 }
912
913 /*[clinic input]
914 _multibytecodec.MultibyteIncrementalEncoder.getstate
915 [clinic start generated code]*/
916
917 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject * self)918 _multibytecodec_MultibyteIncrementalEncoder_getstate_impl(MultibyteIncrementalEncoderObject *self)
919 /*[clinic end generated code: output=9794a5ace70d7048 input=4a2a82874ffa40bb]*/
920 {
921 /* state made up of 1 byte for buffer size, up to MAXENCPENDING*4 bytes
922 for UTF-8 encoded buffer (each character can use up to 4
923 bytes), and required bytes for MultibyteCodec_State.c. A byte
924 array is used to avoid different compilers generating different
925 values for the same state, e.g. as a result of struct padding.
926 */
927 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
928 Py_ssize_t statesize;
929 const char *pendingbuffer = NULL;
930 Py_ssize_t pendingsize;
931
932 if (self->pending != NULL) {
933 pendingbuffer = PyUnicode_AsUTF8AndSize(self->pending, &pendingsize);
934 if (pendingbuffer == NULL) {
935 return NULL;
936 }
937 if (pendingsize > MAXENCPENDING*4) {
938 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
939 return NULL;
940 }
941 statebytes[0] = (unsigned char)pendingsize;
942 memcpy(statebytes + 1, pendingbuffer, pendingsize);
943 statesize = 1 + pendingsize;
944 } else {
945 statebytes[0] = 0;
946 statesize = 1;
947 }
948 memcpy(statebytes+statesize, self->state.c,
949 sizeof(self->state.c));
950 statesize += sizeof(self->state.c);
951
952 return (PyObject *)_PyLong_FromByteArray(statebytes, statesize,
953 1 /* little-endian */ ,
954 0 /* unsigned */ );
955 }
956
957 /*[clinic input]
958 _multibytecodec.MultibyteIncrementalEncoder.setstate
959 state as statelong: object(type='PyLongObject *', subclass_of='&PyLong_Type')
960 /
961 [clinic start generated code]*/
962
963 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject * self,PyLongObject * statelong)964 _multibytecodec_MultibyteIncrementalEncoder_setstate_impl(MultibyteIncrementalEncoderObject *self,
965 PyLongObject *statelong)
966 /*[clinic end generated code: output=4e5e98ac1f4039ca input=c80fb5830d4d2f76]*/
967 {
968 PyObject *pending = NULL;
969 unsigned char statebytes[1 + MAXENCPENDING*4 + sizeof(self->state.c)];
970
971 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
972 1 /* little-endian */ ,
973 0 /* unsigned */ ) < 0) {
974 goto errorexit;
975 }
976
977 if (statebytes[0] > MAXENCPENDING*4) {
978 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
979 return NULL;
980 }
981
982 pending = PyUnicode_DecodeUTF8((const char *)statebytes+1,
983 statebytes[0], "strict");
984 if (pending == NULL) {
985 goto errorexit;
986 }
987
988 Py_CLEAR(self->pending);
989 self->pending = pending;
990 memcpy(self->state.c, statebytes+1+statebytes[0],
991 sizeof(self->state.c));
992
993 Py_RETURN_NONE;
994
995 errorexit:
996 Py_XDECREF(pending);
997 return NULL;
998 }
999
1000 /*[clinic input]
1001 _multibytecodec.MultibyteIncrementalEncoder.reset
1002 [clinic start generated code]*/
1003
1004 static PyObject *
_multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject * self)1005 _multibytecodec_MultibyteIncrementalEncoder_reset_impl(MultibyteIncrementalEncoderObject *self)
1006 /*[clinic end generated code: output=b4125d8f537a253f input=930f06760707b6ea]*/
1007 {
1008 /* Longest output: 4 bytes (b'\x0F\x1F(B') with ISO 2022 */
1009 unsigned char buffer[4], *outbuf;
1010 Py_ssize_t r;
1011 if (self->codec->encreset != NULL) {
1012 outbuf = buffer;
1013 r = self->codec->encreset(&self->state, self->codec->config,
1014 &outbuf, sizeof(buffer));
1015 if (r != 0)
1016 return NULL;
1017 }
1018 Py_CLEAR(self->pending);
1019 Py_RETURN_NONE;
1020 }
1021
1022 static struct PyMethodDef mbiencoder_methods[] = {
1023 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_ENCODE_METHODDEF
1024 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_GETSTATE_METHODDEF
1025 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_SETSTATE_METHODDEF
1026 _MULTIBYTECODEC_MULTIBYTEINCREMENTALENCODER_RESET_METHODDEF
1027 {NULL, NULL},
1028 };
1029
1030 static PyObject *
mbiencoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1031 mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1032 {
1033 MultibyteIncrementalEncoderObject *self;
1034 PyObject *codec = NULL;
1035 char *errors = NULL;
1036
1037 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalEncoder",
1038 incnewkwarglist, &errors))
1039 return NULL;
1040
1041 self = (MultibyteIncrementalEncoderObject *)type->tp_alloc(type, 0);
1042 if (self == NULL)
1043 return NULL;
1044
1045 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1046 if (codec == NULL)
1047 goto errorexit;
1048
1049 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1050 if (!MultibyteCodec_Check(state, codec)) {
1051 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1052 goto errorexit;
1053 }
1054
1055 self->codec = ((MultibyteCodecObject *)codec)->codec;
1056 self->pending = NULL;
1057 self->errors = internal_error_callback(errors);
1058 if (self->errors == NULL)
1059 goto errorexit;
1060 if (self->codec->encinit != NULL &&
1061 self->codec->encinit(&self->state, self->codec->config) != 0)
1062 goto errorexit;
1063
1064 Py_DECREF(codec);
1065 return (PyObject *)self;
1066
1067 errorexit:
1068 Py_XDECREF(self);
1069 Py_XDECREF(codec);
1070 return NULL;
1071 }
1072
1073 static int
mbiencoder_init(PyObject * self,PyObject * args,PyObject * kwds)1074 mbiencoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1075 {
1076 return 0;
1077 }
1078
1079 static int
mbiencoder_traverse(MultibyteIncrementalEncoderObject * self,visitproc visit,void * arg)1080 mbiencoder_traverse(MultibyteIncrementalEncoderObject *self,
1081 visitproc visit, void *arg)
1082 {
1083 if (ERROR_ISCUSTOM(self->errors))
1084 Py_VISIT(self->errors);
1085 return 0;
1086 }
1087
1088 static void
mbiencoder_dealloc(MultibyteIncrementalEncoderObject * self)1089 mbiencoder_dealloc(MultibyteIncrementalEncoderObject *self)
1090 {
1091 PyTypeObject *tp = Py_TYPE(self);
1092 PyObject_GC_UnTrack(self);
1093 ERROR_DECREF(self->errors);
1094 Py_CLEAR(self->pending);
1095 tp->tp_free(self);
1096 Py_DECREF(tp);
1097 }
1098
1099 static PyType_Slot encoder_slots[] = {
1100 {Py_tp_dealloc, mbiencoder_dealloc},
1101 {Py_tp_getattro, PyObject_GenericGetAttr},
1102 {Py_tp_traverse, mbiencoder_traverse},
1103 {Py_tp_methods, mbiencoder_methods},
1104 {Py_tp_getset, codecctx_getsets},
1105 {Py_tp_init, mbiencoder_init},
1106 {Py_tp_new, mbiencoder_new},
1107 {0, NULL},
1108 };
1109
1110 static PyType_Spec encoder_spec = {
1111 .name = MODULE_NAME ".MultibyteIncrementalEncoder",
1112 .basicsize = sizeof(MultibyteIncrementalEncoderObject),
1113 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1114 Py_TPFLAGS_IMMUTABLETYPE),
1115 .slots = encoder_slots,
1116 };
1117
1118
1119 /*[clinic input]
1120 _multibytecodec.MultibyteIncrementalDecoder.decode
1121
1122 input: Py_buffer
1123 final: bool(accept={int}) = False
1124 [clinic start generated code]*/
1125
1126 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject * self,Py_buffer * input,int final)1127 _multibytecodec_MultibyteIncrementalDecoder_decode_impl(MultibyteIncrementalDecoderObject *self,
1128 Py_buffer *input,
1129 int final)
1130 /*[clinic end generated code: output=b9b9090e8a9ce2ba input=c9132b24d503eb1d]*/
1131 {
1132 MultibyteDecodeBuffer buf;
1133 char *data, *wdata = NULL;
1134 Py_ssize_t wsize, size, origpending;
1135 PyObject *res;
1136
1137 data = input->buf;
1138 size = input->len;
1139
1140 _PyUnicodeWriter_Init(&buf.writer);
1141 buf.excobj = NULL;
1142 origpending = self->pendingsize;
1143
1144 if (self->pendingsize == 0) {
1145 wsize = size;
1146 wdata = data;
1147 }
1148 else {
1149 if (size > PY_SSIZE_T_MAX - self->pendingsize) {
1150 PyErr_NoMemory();
1151 goto errorexit;
1152 }
1153 wsize = size + self->pendingsize;
1154 wdata = PyMem_Malloc(wsize);
1155 if (wdata == NULL) {
1156 PyErr_NoMemory();
1157 goto errorexit;
1158 }
1159 memcpy(wdata, self->pending, self->pendingsize);
1160 memcpy(wdata + self->pendingsize, data, size);
1161 self->pendingsize = 0;
1162 }
1163
1164 if (decoder_prepare_buffer(&buf, wdata, wsize) != 0)
1165 goto errorexit;
1166
1167 if (decoder_feed_buffer(STATEFUL_DCTX(self), &buf))
1168 goto errorexit;
1169
1170 if (final && buf.inbuf < buf.inbuf_end) {
1171 if (multibytecodec_decerror(self->codec, &self->state,
1172 &buf, self->errors, MBERR_TOOFEW)) {
1173 /* recover the original pending buffer */
1174 memcpy(self->pending, wdata, origpending);
1175 self->pendingsize = origpending;
1176 goto errorexit;
1177 }
1178 }
1179
1180 if (buf.inbuf < buf.inbuf_end) { /* pending sequence still exists */
1181 if (decoder_append_pending(STATEFUL_DCTX(self), &buf) != 0)
1182 goto errorexit;
1183 }
1184
1185 res = _PyUnicodeWriter_Finish(&buf.writer);
1186 if (res == NULL)
1187 goto errorexit;
1188
1189 if (wdata != data)
1190 PyMem_Free(wdata);
1191 Py_XDECREF(buf.excobj);
1192 return res;
1193
1194 errorexit:
1195 if (wdata != NULL && wdata != data)
1196 PyMem_Free(wdata);
1197 Py_XDECREF(buf.excobj);
1198 _PyUnicodeWriter_Dealloc(&buf.writer);
1199 return NULL;
1200 }
1201
1202 /*[clinic input]
1203 _multibytecodec.MultibyteIncrementalDecoder.getstate
1204 [clinic start generated code]*/
1205
1206 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject * self)1207 _multibytecodec_MultibyteIncrementalDecoder_getstate_impl(MultibyteIncrementalDecoderObject *self)
1208 /*[clinic end generated code: output=255009c4713b7f82 input=4006aa49bddbaa75]*/
1209 {
1210 PyObject *buffer;
1211 PyObject *statelong;
1212
1213 buffer = PyBytes_FromStringAndSize((const char *)self->pending,
1214 self->pendingsize);
1215 if (buffer == NULL) {
1216 return NULL;
1217 }
1218
1219 statelong = (PyObject *)_PyLong_FromByteArray(self->state.c,
1220 sizeof(self->state.c),
1221 1 /* little-endian */ ,
1222 0 /* unsigned */ );
1223 if (statelong == NULL) {
1224 Py_DECREF(buffer);
1225 return NULL;
1226 }
1227
1228 return Py_BuildValue("NN", buffer, statelong);
1229 }
1230
1231 /*[clinic input]
1232 _multibytecodec.MultibyteIncrementalDecoder.setstate
1233 state: object(subclass_of='&PyTuple_Type')
1234 /
1235 [clinic start generated code]*/
1236
1237 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject * self,PyObject * state)1238 _multibytecodec_MultibyteIncrementalDecoder_setstate_impl(MultibyteIncrementalDecoderObject *self,
1239 PyObject *state)
1240 /*[clinic end generated code: output=106b2fbca3e2dcc2 input=e5d794e8baba1a47]*/
1241 {
1242 PyObject *buffer;
1243 PyLongObject *statelong;
1244 Py_ssize_t buffersize;
1245 const char *bufferstr;
1246 unsigned char statebytes[8];
1247
1248 if (!PyArg_ParseTuple(state, "SO!;setstate(): illegal state argument",
1249 &buffer, &PyLong_Type, &statelong))
1250 {
1251 return NULL;
1252 }
1253
1254 if (_PyLong_AsByteArray(statelong, statebytes, sizeof(statebytes),
1255 1 /* little-endian */ ,
1256 0 /* unsigned */ ) < 0) {
1257 return NULL;
1258 }
1259
1260 buffersize = PyBytes_Size(buffer);
1261 if (buffersize == -1) {
1262 return NULL;
1263 }
1264
1265 if (buffersize > MAXDECPENDING) {
1266 PyErr_SetString(PyExc_UnicodeError, "pending buffer too large");
1267 return NULL;
1268 }
1269
1270 bufferstr = PyBytes_AsString(buffer);
1271 if (bufferstr == NULL) {
1272 return NULL;
1273 }
1274 self->pendingsize = buffersize;
1275 memcpy(self->pending, bufferstr, self->pendingsize);
1276 memcpy(self->state.c, statebytes, sizeof(statebytes));
1277
1278 Py_RETURN_NONE;
1279 }
1280
1281 /*[clinic input]
1282 _multibytecodec.MultibyteIncrementalDecoder.reset
1283 [clinic start generated code]*/
1284
1285 static PyObject *
_multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject * self)1286 _multibytecodec_MultibyteIncrementalDecoder_reset_impl(MultibyteIncrementalDecoderObject *self)
1287 /*[clinic end generated code: output=da423b1782c23ed1 input=3b63b3be85b2fb45]*/
1288 {
1289 if (self->codec->decreset != NULL &&
1290 self->codec->decreset(&self->state, self->codec->config) != 0)
1291 return NULL;
1292 self->pendingsize = 0;
1293
1294 Py_RETURN_NONE;
1295 }
1296
1297 static struct PyMethodDef mbidecoder_methods[] = {
1298 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_DECODE_METHODDEF
1299 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_GETSTATE_METHODDEF
1300 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_SETSTATE_METHODDEF
1301 _MULTIBYTECODEC_MULTIBYTEINCREMENTALDECODER_RESET_METHODDEF
1302 {NULL, NULL},
1303 };
1304
1305 static PyObject *
mbidecoder_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1306 mbidecoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1307 {
1308 MultibyteIncrementalDecoderObject *self;
1309 PyObject *codec = NULL;
1310 char *errors = NULL;
1311
1312 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|s:IncrementalDecoder",
1313 incnewkwarglist, &errors))
1314 return NULL;
1315
1316 self = (MultibyteIncrementalDecoderObject *)type->tp_alloc(type, 0);
1317 if (self == NULL)
1318 return NULL;
1319
1320 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1321 if (codec == NULL)
1322 goto errorexit;
1323
1324 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1325 if (!MultibyteCodec_Check(state, codec)) {
1326 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1327 goto errorexit;
1328 }
1329
1330 self->codec = ((MultibyteCodecObject *)codec)->codec;
1331 self->pendingsize = 0;
1332 self->errors = internal_error_callback(errors);
1333 if (self->errors == NULL)
1334 goto errorexit;
1335 if (self->codec->decinit != NULL &&
1336 self->codec->decinit(&self->state, self->codec->config) != 0)
1337 goto errorexit;
1338
1339 Py_DECREF(codec);
1340 return (PyObject *)self;
1341
1342 errorexit:
1343 Py_XDECREF(self);
1344 Py_XDECREF(codec);
1345 return NULL;
1346 }
1347
1348 static int
mbidecoder_init(PyObject * self,PyObject * args,PyObject * kwds)1349 mbidecoder_init(PyObject *self, PyObject *args, PyObject *kwds)
1350 {
1351 return 0;
1352 }
1353
1354 static int
mbidecoder_traverse(MultibyteIncrementalDecoderObject * self,visitproc visit,void * arg)1355 mbidecoder_traverse(MultibyteIncrementalDecoderObject *self,
1356 visitproc visit, void *arg)
1357 {
1358 if (ERROR_ISCUSTOM(self->errors))
1359 Py_VISIT(self->errors);
1360 return 0;
1361 }
1362
1363 static void
mbidecoder_dealloc(MultibyteIncrementalDecoderObject * self)1364 mbidecoder_dealloc(MultibyteIncrementalDecoderObject *self)
1365 {
1366 PyTypeObject *tp = Py_TYPE(self);
1367 PyObject_GC_UnTrack(self);
1368 ERROR_DECREF(self->errors);
1369 tp->tp_free(self);
1370 Py_DECREF(tp);
1371 }
1372
1373 static PyType_Slot decoder_slots[] = {
1374 {Py_tp_dealloc, mbidecoder_dealloc},
1375 {Py_tp_getattro, PyObject_GenericGetAttr},
1376 {Py_tp_traverse, mbidecoder_traverse},
1377 {Py_tp_methods, mbidecoder_methods},
1378 {Py_tp_getset, codecctx_getsets},
1379 {Py_tp_init, mbidecoder_init},
1380 {Py_tp_new, mbidecoder_new},
1381 {0, NULL},
1382 };
1383
1384 static PyType_Spec decoder_spec = {
1385 .name = MODULE_NAME ".MultibyteIncrementalDecoder",
1386 .basicsize = sizeof(MultibyteIncrementalDecoderObject),
1387 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1388 Py_TPFLAGS_IMMUTABLETYPE),
1389 .slots = decoder_slots,
1390 };
1391
1392 static PyObject *
mbstreamreader_iread(MultibyteStreamReaderObject * self,const char * method,Py_ssize_t sizehint)1393 mbstreamreader_iread(MultibyteStreamReaderObject *self,
1394 const char *method, Py_ssize_t sizehint)
1395 {
1396 MultibyteDecodeBuffer buf;
1397 PyObject *cres, *res;
1398 Py_ssize_t rsize;
1399
1400 if (sizehint == 0)
1401 return PyUnicode_New(0, 0);
1402
1403 _PyUnicodeWriter_Init(&buf.writer);
1404 buf.excobj = NULL;
1405 cres = NULL;
1406
1407 for (;;) {
1408 int endoffile;
1409
1410 if (sizehint < 0)
1411 cres = PyObject_CallMethod(self->stream,
1412 method, NULL);
1413 else
1414 cres = PyObject_CallMethod(self->stream,
1415 method, "i", sizehint);
1416 if (cres == NULL)
1417 goto errorexit;
1418
1419 if (!PyBytes_Check(cres)) {
1420 PyErr_Format(PyExc_TypeError,
1421 "stream function returned a "
1422 "non-bytes object (%.100s)",
1423 Py_TYPE(cres)->tp_name);
1424 goto errorexit;
1425 }
1426
1427 endoffile = (PyBytes_GET_SIZE(cres) == 0);
1428
1429 if (self->pendingsize > 0) {
1430 PyObject *ctr;
1431 char *ctrdata;
1432
1433 if (PyBytes_GET_SIZE(cres) > PY_SSIZE_T_MAX - self->pendingsize) {
1434 PyErr_NoMemory();
1435 goto errorexit;
1436 }
1437 rsize = PyBytes_GET_SIZE(cres) + self->pendingsize;
1438 ctr = PyBytes_FromStringAndSize(NULL, rsize);
1439 if (ctr == NULL)
1440 goto errorexit;
1441 ctrdata = PyBytes_AS_STRING(ctr);
1442 memcpy(ctrdata, self->pending, self->pendingsize);
1443 memcpy(ctrdata + self->pendingsize,
1444 PyBytes_AS_STRING(cres),
1445 PyBytes_GET_SIZE(cres));
1446 Py_DECREF(cres);
1447 cres = ctr;
1448 self->pendingsize = 0;
1449 }
1450
1451 rsize = PyBytes_GET_SIZE(cres);
1452 if (decoder_prepare_buffer(&buf, PyBytes_AS_STRING(cres),
1453 rsize) != 0)
1454 goto errorexit;
1455
1456 if (rsize > 0 && decoder_feed_buffer(
1457 (MultibyteStatefulDecoderContext *)self, &buf))
1458 goto errorexit;
1459
1460 if (endoffile || sizehint < 0) {
1461 if (buf.inbuf < buf.inbuf_end &&
1462 multibytecodec_decerror(self->codec, &self->state,
1463 &buf, self->errors, MBERR_TOOFEW))
1464 goto errorexit;
1465 }
1466
1467 if (buf.inbuf < buf.inbuf_end) { /* pending sequence exists */
1468 if (decoder_append_pending(STATEFUL_DCTX(self),
1469 &buf) != 0)
1470 goto errorexit;
1471 }
1472
1473 Py_DECREF(cres);
1474 cres = NULL;
1475
1476 if (sizehint < 0 || buf.writer.pos != 0 || rsize == 0)
1477 break;
1478
1479 sizehint = 1; /* read 1 more byte and retry */
1480 }
1481
1482 res = _PyUnicodeWriter_Finish(&buf.writer);
1483 if (res == NULL)
1484 goto errorexit;
1485
1486 Py_XDECREF(cres);
1487 Py_XDECREF(buf.excobj);
1488 return res;
1489
1490 errorexit:
1491 Py_XDECREF(cres);
1492 Py_XDECREF(buf.excobj);
1493 _PyUnicodeWriter_Dealloc(&buf.writer);
1494 return NULL;
1495 }
1496
1497 /*[clinic input]
1498 _multibytecodec.MultibyteStreamReader.read
1499
1500 sizeobj: object = None
1501 /
1502 [clinic start generated code]*/
1503
1504 static PyObject *
_multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1505 _multibytecodec_MultibyteStreamReader_read_impl(MultibyteStreamReaderObject *self,
1506 PyObject *sizeobj)
1507 /*[clinic end generated code: output=35621eb75355d5b8 input=015b0d3ff2fca485]*/
1508 {
1509 Py_ssize_t size;
1510
1511 if (sizeobj == Py_None)
1512 size = -1;
1513 else if (PyLong_Check(sizeobj))
1514 size = PyLong_AsSsize_t(sizeobj);
1515 else {
1516 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1517 return NULL;
1518 }
1519
1520 if (size == -1 && PyErr_Occurred())
1521 return NULL;
1522
1523 return mbstreamreader_iread(self, "read", size);
1524 }
1525
1526 /*[clinic input]
1527 _multibytecodec.MultibyteStreamReader.readline
1528
1529 sizeobj: object = None
1530 /
1531 [clinic start generated code]*/
1532
1533 static PyObject *
_multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject * self,PyObject * sizeobj)1534 _multibytecodec_MultibyteStreamReader_readline_impl(MultibyteStreamReaderObject *self,
1535 PyObject *sizeobj)
1536 /*[clinic end generated code: output=4fbfaae1ed457a11 input=41ccc64f9bb0cec3]*/
1537 {
1538 Py_ssize_t size;
1539
1540 if (sizeobj == Py_None)
1541 size = -1;
1542 else if (PyLong_Check(sizeobj))
1543 size = PyLong_AsSsize_t(sizeobj);
1544 else {
1545 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1546 return NULL;
1547 }
1548
1549 if (size == -1 && PyErr_Occurred())
1550 return NULL;
1551
1552 return mbstreamreader_iread(self, "readline", size);
1553 }
1554
1555 /*[clinic input]
1556 _multibytecodec.MultibyteStreamReader.readlines
1557
1558 sizehintobj: object = None
1559 /
1560 [clinic start generated code]*/
1561
1562 static PyObject *
_multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject * self,PyObject * sizehintobj)1563 _multibytecodec_MultibyteStreamReader_readlines_impl(MultibyteStreamReaderObject *self,
1564 PyObject *sizehintobj)
1565 /*[clinic end generated code: output=e7c4310768ed2ad4 input=54932f5d4d88e880]*/
1566 {
1567 PyObject *r, *sr;
1568 Py_ssize_t sizehint;
1569
1570 if (sizehintobj == Py_None)
1571 sizehint = -1;
1572 else if (PyLong_Check(sizehintobj))
1573 sizehint = PyLong_AsSsize_t(sizehintobj);
1574 else {
1575 PyErr_SetString(PyExc_TypeError, "arg 1 must be an integer");
1576 return NULL;
1577 }
1578
1579 if (sizehint == -1 && PyErr_Occurred())
1580 return NULL;
1581
1582 r = mbstreamreader_iread(self, "read", sizehint);
1583 if (r == NULL)
1584 return NULL;
1585
1586 sr = PyUnicode_Splitlines(r, 1);
1587 Py_DECREF(r);
1588 return sr;
1589 }
1590
1591 /*[clinic input]
1592 _multibytecodec.MultibyteStreamReader.reset
1593 [clinic start generated code]*/
1594
1595 static PyObject *
_multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject * self)1596 _multibytecodec_MultibyteStreamReader_reset_impl(MultibyteStreamReaderObject *self)
1597 /*[clinic end generated code: output=138490370a680abc input=5d4140db84b5e1e2]*/
1598 {
1599 if (self->codec->decreset != NULL &&
1600 self->codec->decreset(&self->state, self->codec->config) != 0)
1601 return NULL;
1602 self->pendingsize = 0;
1603
1604 Py_RETURN_NONE;
1605 }
1606
1607 static struct PyMethodDef mbstreamreader_methods[] = {
1608 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READ_METHODDEF
1609 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINE_METHODDEF
1610 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_READLINES_METHODDEF
1611 _MULTIBYTECODEC_MULTIBYTESTREAMREADER_RESET_METHODDEF
1612 {NULL, NULL},
1613 };
1614
1615 static PyMemberDef mbstreamreader_members[] = {
1616 {"stream", T_OBJECT,
1617 offsetof(MultibyteStreamReaderObject, stream),
1618 READONLY, NULL},
1619 {NULL,}
1620 };
1621
1622 static PyObject *
mbstreamreader_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1623 mbstreamreader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1624 {
1625 MultibyteStreamReaderObject *self;
1626 PyObject *stream, *codec = NULL;
1627 char *errors = NULL;
1628
1629 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamReader",
1630 streamkwarglist, &stream, &errors))
1631 return NULL;
1632
1633 self = (MultibyteStreamReaderObject *)type->tp_alloc(type, 0);
1634 if (self == NULL)
1635 return NULL;
1636
1637 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1638 if (codec == NULL)
1639 goto errorexit;
1640
1641 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1642 if (!MultibyteCodec_Check(state, codec)) {
1643 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1644 goto errorexit;
1645 }
1646
1647 self->codec = ((MultibyteCodecObject *)codec)->codec;
1648 self->stream = stream;
1649 Py_INCREF(stream);
1650 self->pendingsize = 0;
1651 self->errors = internal_error_callback(errors);
1652 if (self->errors == NULL)
1653 goto errorexit;
1654 if (self->codec->decinit != NULL &&
1655 self->codec->decinit(&self->state, self->codec->config) != 0)
1656 goto errorexit;
1657
1658 Py_DECREF(codec);
1659 return (PyObject *)self;
1660
1661 errorexit:
1662 Py_XDECREF(self);
1663 Py_XDECREF(codec);
1664 return NULL;
1665 }
1666
1667 static int
mbstreamreader_init(PyObject * self,PyObject * args,PyObject * kwds)1668 mbstreamreader_init(PyObject *self, PyObject *args, PyObject *kwds)
1669 {
1670 return 0;
1671 }
1672
1673 static int
mbstreamreader_traverse(MultibyteStreamReaderObject * self,visitproc visit,void * arg)1674 mbstreamreader_traverse(MultibyteStreamReaderObject *self,
1675 visitproc visit, void *arg)
1676 {
1677 if (ERROR_ISCUSTOM(self->errors))
1678 Py_VISIT(self->errors);
1679 Py_VISIT(self->stream);
1680 return 0;
1681 }
1682
1683 static void
mbstreamreader_dealloc(MultibyteStreamReaderObject * self)1684 mbstreamreader_dealloc(MultibyteStreamReaderObject *self)
1685 {
1686 PyTypeObject *tp = Py_TYPE(self);
1687 PyObject_GC_UnTrack(self);
1688 ERROR_DECREF(self->errors);
1689 Py_XDECREF(self->stream);
1690 tp->tp_free(self);
1691 Py_DECREF(tp);
1692 }
1693
1694 static PyType_Slot reader_slots[] = {
1695 {Py_tp_dealloc, mbstreamreader_dealloc},
1696 {Py_tp_getattro, PyObject_GenericGetAttr},
1697 {Py_tp_traverse, mbstreamreader_traverse},
1698 {Py_tp_methods, mbstreamreader_methods},
1699 {Py_tp_members, mbstreamreader_members},
1700 {Py_tp_getset, codecctx_getsets},
1701 {Py_tp_init, mbstreamreader_init},
1702 {Py_tp_new, mbstreamreader_new},
1703 {0, NULL},
1704 };
1705
1706 static PyType_Spec reader_spec = {
1707 .name = MODULE_NAME ".MultibyteStreamReader",
1708 .basicsize = sizeof(MultibyteStreamReaderObject),
1709 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1710 Py_TPFLAGS_IMMUTABLETYPE),
1711 .slots = reader_slots,
1712 };
1713
1714 static int
mbstreamwriter_iwrite(MultibyteStreamWriterObject * self,PyObject * unistr,PyObject * str_write)1715 mbstreamwriter_iwrite(MultibyteStreamWriterObject *self,
1716 PyObject *unistr, PyObject *str_write)
1717 {
1718 PyObject *str, *wr;
1719
1720 str = encoder_encode_stateful(STATEFUL_ECTX(self), unistr, 0);
1721 if (str == NULL)
1722 return -1;
1723
1724 wr = _PyObject_CallMethodOneArg(self->stream, str_write, str);
1725 Py_DECREF(str);
1726 if (wr == NULL)
1727 return -1;
1728
1729 Py_DECREF(wr);
1730 return 0;
1731 }
1732
1733 /*[clinic input]
1734 _multibytecodec.MultibyteStreamWriter.write
1735
1736 cls: defining_class
1737 strobj: object
1738 /
1739 [clinic start generated code]*/
1740
1741 static PyObject *
_multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls,PyObject * strobj)1742 _multibytecodec_MultibyteStreamWriter_write_impl(MultibyteStreamWriterObject *self,
1743 PyTypeObject *cls,
1744 PyObject *strobj)
1745 /*[clinic end generated code: output=68ade3aea26410ac input=199f26f68bd8425a]*/
1746 {
1747 _multibytecodec_state *state = PyType_GetModuleState(cls);
1748 assert(state != NULL);
1749 if (mbstreamwriter_iwrite(self, strobj, state->str_write)) {
1750 return NULL;
1751 }
1752 Py_RETURN_NONE;
1753 }
1754
1755 /*[clinic input]
1756 _multibytecodec.MultibyteStreamWriter.writelines
1757
1758 cls: defining_class
1759 lines: object
1760 /
1761 [clinic start generated code]*/
1762
1763 static PyObject *
_multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls,PyObject * lines)1764 _multibytecodec_MultibyteStreamWriter_writelines_impl(MultibyteStreamWriterObject *self,
1765 PyTypeObject *cls,
1766 PyObject *lines)
1767 /*[clinic end generated code: output=b4c99d2cf23ffb88 input=a6d5fe7c74972a34]*/
1768 {
1769 PyObject *strobj;
1770 int i, r;
1771
1772 if (!PySequence_Check(lines)) {
1773 PyErr_SetString(PyExc_TypeError,
1774 "arg must be a sequence object");
1775 return NULL;
1776 }
1777
1778 _multibytecodec_state *state = PyType_GetModuleState(cls);
1779 assert(state != NULL);
1780 for (i = 0; i < PySequence_Length(lines); i++) {
1781 /* length can be changed even within this loop */
1782 strobj = PySequence_GetItem(lines, i);
1783 if (strobj == NULL)
1784 return NULL;
1785
1786 r = mbstreamwriter_iwrite(self, strobj, state->str_write);
1787 Py_DECREF(strobj);
1788 if (r == -1)
1789 return NULL;
1790 }
1791 /* PySequence_Length() can fail */
1792 if (PyErr_Occurred())
1793 return NULL;
1794
1795 Py_RETURN_NONE;
1796 }
1797
1798 /*[clinic input]
1799 _multibytecodec.MultibyteStreamWriter.reset
1800
1801 cls: defining_class
1802 /
1803
1804 [clinic start generated code]*/
1805
1806 static PyObject *
_multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject * self,PyTypeObject * cls)1807 _multibytecodec_MultibyteStreamWriter_reset_impl(MultibyteStreamWriterObject *self,
1808 PyTypeObject *cls)
1809 /*[clinic end generated code: output=32ef224c2a38aa3d input=28af6a9cd38d1979]*/
1810 {
1811 PyObject *pwrt;
1812
1813 if (!self->pending)
1814 Py_RETURN_NONE;
1815
1816 pwrt = multibytecodec_encode(self->codec, &self->state,
1817 self->pending, NULL, self->errors,
1818 MBENC_FLUSH | MBENC_RESET);
1819 /* some pending buffer can be truncated when UnicodeEncodeError is
1820 * raised on 'strict' mode. but, 'reset' method is designed to
1821 * reset the pending buffer or states so failed string sequence
1822 * ought to be missed */
1823 Py_CLEAR(self->pending);
1824 if (pwrt == NULL)
1825 return NULL;
1826
1827 assert(PyBytes_Check(pwrt));
1828
1829 _multibytecodec_state *state = PyType_GetModuleState(cls);
1830 assert(state != NULL);
1831
1832 if (PyBytes_Size(pwrt) > 0) {
1833 PyObject *wr;
1834
1835 wr = _PyObject_CallMethodOneArg(self->stream, state->str_write, pwrt);
1836 if (wr == NULL) {
1837 Py_DECREF(pwrt);
1838 return NULL;
1839 }
1840 }
1841 Py_DECREF(pwrt);
1842
1843 Py_RETURN_NONE;
1844 }
1845
1846 static PyObject *
mbstreamwriter_new(PyTypeObject * type,PyObject * args,PyObject * kwds)1847 mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1848 {
1849 MultibyteStreamWriterObject *self;
1850 PyObject *stream, *codec = NULL;
1851 char *errors = NULL;
1852
1853 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|s:StreamWriter",
1854 streamkwarglist, &stream, &errors))
1855 return NULL;
1856
1857 self = (MultibyteStreamWriterObject *)type->tp_alloc(type, 0);
1858 if (self == NULL)
1859 return NULL;
1860
1861 codec = PyObject_GetAttrString((PyObject *)type, "codec");
1862 if (codec == NULL)
1863 goto errorexit;
1864
1865 _multibytecodec_state *state = _multibyte_codec_find_state_by_type(type);
1866 if (!MultibyteCodec_Check(state, codec)) {
1867 PyErr_SetString(PyExc_TypeError, "codec is unexpected type");
1868 goto errorexit;
1869 }
1870
1871 self->codec = ((MultibyteCodecObject *)codec)->codec;
1872 self->stream = stream;
1873 Py_INCREF(stream);
1874 self->pending = NULL;
1875 self->errors = internal_error_callback(errors);
1876 if (self->errors == NULL)
1877 goto errorexit;
1878 if (self->codec->encinit != NULL &&
1879 self->codec->encinit(&self->state, self->codec->config) != 0)
1880 goto errorexit;
1881
1882 Py_DECREF(codec);
1883 return (PyObject *)self;
1884
1885 errorexit:
1886 Py_XDECREF(self);
1887 Py_XDECREF(codec);
1888 return NULL;
1889 }
1890
1891 static int
mbstreamwriter_init(PyObject * self,PyObject * args,PyObject * kwds)1892 mbstreamwriter_init(PyObject *self, PyObject *args, PyObject *kwds)
1893 {
1894 return 0;
1895 }
1896
1897 static int
mbstreamwriter_traverse(MultibyteStreamWriterObject * self,visitproc visit,void * arg)1898 mbstreamwriter_traverse(MultibyteStreamWriterObject *self,
1899 visitproc visit, void *arg)
1900 {
1901 if (ERROR_ISCUSTOM(self->errors))
1902 Py_VISIT(self->errors);
1903 Py_VISIT(self->stream);
1904 return 0;
1905 }
1906
1907 static void
mbstreamwriter_dealloc(MultibyteStreamWriterObject * self)1908 mbstreamwriter_dealloc(MultibyteStreamWriterObject *self)
1909 {
1910 PyTypeObject *tp = Py_TYPE(self);
1911 PyObject_GC_UnTrack(self);
1912 ERROR_DECREF(self->errors);
1913 Py_XDECREF(self->stream);
1914 tp->tp_free(self);
1915 Py_DECREF(tp);
1916 }
1917
1918 static struct PyMethodDef mbstreamwriter_methods[] = {
1919 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITE_METHODDEF
1920 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_WRITELINES_METHODDEF
1921 _MULTIBYTECODEC_MULTIBYTESTREAMWRITER_RESET_METHODDEF
1922 {NULL, NULL},
1923 };
1924
1925 static PyMemberDef mbstreamwriter_members[] = {
1926 {"stream", T_OBJECT,
1927 offsetof(MultibyteStreamWriterObject, stream),
1928 READONLY, NULL},
1929 {NULL,}
1930 };
1931
1932 static PyType_Slot writer_slots[] = {
1933 {Py_tp_dealloc, mbstreamwriter_dealloc},
1934 {Py_tp_getattro, PyObject_GenericGetAttr},
1935 {Py_tp_traverse, mbstreamwriter_traverse},
1936 {Py_tp_methods, mbstreamwriter_methods},
1937 {Py_tp_members, mbstreamwriter_members},
1938 {Py_tp_getset, codecctx_getsets},
1939 {Py_tp_init, mbstreamwriter_init},
1940 {Py_tp_new, mbstreamwriter_new},
1941 {0, NULL},
1942 };
1943
1944 static PyType_Spec writer_spec = {
1945 .name = MODULE_NAME ".MultibyteStreamWriter",
1946 .basicsize = sizeof(MultibyteStreamWriterObject),
1947 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | Py_TPFLAGS_BASETYPE |
1948 Py_TPFLAGS_IMMUTABLETYPE),
1949 .slots = writer_slots,
1950 };
1951
1952
1953 /*[clinic input]
1954 _multibytecodec.__create_codec
1955
1956 arg: object
1957 /
1958 [clinic start generated code]*/
1959
1960 static PyObject *
_multibytecodec___create_codec(PyObject * module,PyObject * arg)1961 _multibytecodec___create_codec(PyObject *module, PyObject *arg)
1962 /*[clinic end generated code: output=cfa3dce8260e809d input=6840b2a6b183fcfa]*/
1963 {
1964 MultibyteCodecObject *self;
1965 MultibyteCodec *codec;
1966
1967 if (!PyCapsule_IsValid(arg, PyMultibyteCodec_CAPSULE_NAME)) {
1968 PyErr_SetString(PyExc_ValueError, "argument type invalid");
1969 return NULL;
1970 }
1971
1972 codec = PyCapsule_GetPointer(arg, PyMultibyteCodec_CAPSULE_NAME);
1973 if (codec->codecinit != NULL && codec->codecinit(codec->config) != 0)
1974 return NULL;
1975
1976 _multibytecodec_state *state = _multibytecodec_get_state(module);
1977 self = PyObject_GC_New(MultibyteCodecObject, state->multibytecodec_type);
1978 if (self == NULL)
1979 return NULL;
1980 self->codec = codec;
1981
1982 PyObject_GC_Track(self);
1983 return (PyObject *)self;
1984 }
1985
1986 static int
_multibytecodec_traverse(PyObject * mod,visitproc visit,void * arg)1987 _multibytecodec_traverse(PyObject *mod, visitproc visit, void *arg)
1988 {
1989 _multibytecodec_state *state = _multibytecodec_get_state(mod);
1990 Py_VISIT(state->multibytecodec_type);
1991 Py_VISIT(state->encoder_type);
1992 Py_VISIT(state->decoder_type);
1993 Py_VISIT(state->reader_type);
1994 Py_VISIT(state->writer_type);
1995 return 0;
1996 }
1997
1998 static int
_multibytecodec_clear(PyObject * mod)1999 _multibytecodec_clear(PyObject *mod)
2000 {
2001 _multibytecodec_state *state = _multibytecodec_get_state(mod);
2002 Py_CLEAR(state->multibytecodec_type);
2003 Py_CLEAR(state->encoder_type);
2004 Py_CLEAR(state->decoder_type);
2005 Py_CLEAR(state->reader_type);
2006 Py_CLEAR(state->writer_type);
2007 Py_CLEAR(state->str_write);
2008 return 0;
2009 }
2010
2011 static void
_multibytecodec_free(void * mod)2012 _multibytecodec_free(void *mod)
2013 {
2014 _multibytecodec_clear((PyObject *)mod);
2015 }
2016
2017 #define CREATE_TYPE(module, type, spec) \
2018 do { \
2019 type = (PyTypeObject *)PyType_FromModuleAndSpec(module, spec, NULL); \
2020 if (!type) { \
2021 return -1; \
2022 } \
2023 } while (0)
2024
2025 #define ADD_TYPE(module, type) \
2026 do { \
2027 if (PyModule_AddType(module, type) < 0) { \
2028 return -1; \
2029 } \
2030 } while (0)
2031
2032 static int
_multibytecodec_exec(PyObject * mod)2033 _multibytecodec_exec(PyObject *mod)
2034 {
2035 _multibytecodec_state *state = _multibytecodec_get_state(mod);
2036 state->str_write = PyUnicode_InternFromString("write");
2037 if (state->str_write == NULL) {
2038 return -1;
2039 }
2040 CREATE_TYPE(mod, state->multibytecodec_type, &multibytecodec_spec);
2041 CREATE_TYPE(mod, state->encoder_type, &encoder_spec);
2042 CREATE_TYPE(mod, state->decoder_type, &decoder_spec);
2043 CREATE_TYPE(mod, state->reader_type, &reader_spec);
2044 CREATE_TYPE(mod, state->writer_type, &writer_spec);
2045
2046 ADD_TYPE(mod, state->encoder_type);
2047 ADD_TYPE(mod, state->decoder_type);
2048 ADD_TYPE(mod, state->reader_type);
2049 ADD_TYPE(mod, state->writer_type);
2050 return 0;
2051 }
2052
2053 #undef CREATE_TYPE
2054 #undef ADD_TYPE
2055
2056 static struct PyMethodDef _multibytecodec_methods[] = {
2057 _MULTIBYTECODEC___CREATE_CODEC_METHODDEF
2058 {NULL, NULL},
2059 };
2060
2061 static PyModuleDef_Slot _multibytecodec_slots[] = {
2062 {Py_mod_exec, _multibytecodec_exec},
2063 {0, NULL}
2064 };
2065
2066 static struct PyModuleDef _multibytecodecmodule = {
2067 .m_base = PyModuleDef_HEAD_INIT,
2068 .m_name = "_multibytecodec",
2069 .m_size = sizeof(_multibytecodec_state),
2070 .m_methods = _multibytecodec_methods,
2071 .m_slots = _multibytecodec_slots,
2072 .m_traverse = _multibytecodec_traverse,
2073 .m_clear = _multibytecodec_clear,
2074 .m_free = _multibytecodec_free,
2075 };
2076
2077 PyMODINIT_FUNC
PyInit__multibytecodec(void)2078 PyInit__multibytecodec(void)
2079 {
2080 return PyModuleDef_Init(&_multibytecodecmodule);
2081 }
2082