1 /* _lzma - Low-level Python interface to liblzma.
2 
3    Initial implementation by Per Øyvind Karlsen.
4    Rewritten by Nadeem Vawda.
5 
6 */
7 
8 #define PY_SSIZE_T_CLEAN
9 
10 #include "Python.h"
11 #include "structmember.h"         // PyMemberDef
12 
13 #include <stdlib.h>               // free()
14 #include <string.h>
15 
16 #include <lzma.h>
17 
18 // Blocks output buffer wrappers
19 #include "pycore_blocks_output_buffer.h"
20 
21 #if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
22     #error "The maximum block size accepted by liblzma is SIZE_MAX."
23 #endif
24 
25 /* On success, return value >= 0
26    On failure, return -1 */
27 static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,Py_ssize_t max_length,uint8_t ** next_out,size_t * avail_out)28 OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
29                          uint8_t **next_out, size_t *avail_out)
30 {
31     Py_ssize_t allocated;
32 
33     allocated = _BlocksOutputBuffer_InitAndGrow(
34                     buffer, max_length, (void**) next_out);
35     *avail_out = (size_t) allocated;
36     return allocated;
37 }
38 
39 /* On success, return value >= 0
40    On failure, return -1 */
41 static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer * buffer,uint8_t ** next_out,size_t * avail_out)42 OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
43                   uint8_t **next_out, size_t *avail_out)
44 {
45     Py_ssize_t allocated;
46 
47     allocated = _BlocksOutputBuffer_Grow(
48                     buffer, (void**) next_out, (Py_ssize_t) *avail_out);
49     *avail_out = (size_t) allocated;
50     return allocated;
51 }
52 
53 static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,size_t avail_out)54 OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
55 {
56     return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
57 }
58 
59 static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer * buffer,size_t avail_out)60 OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
61 {
62     return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
63 }
64 
65 static inline void
OutputBuffer_OnError(_BlocksOutputBuffer * buffer)66 OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
67 {
68     _BlocksOutputBuffer_OnError(buffer);
69 }
70 
71 
72 #define ACQUIRE_LOCK(obj) do { \
73     if (!PyThread_acquire_lock((obj)->lock, 0)) { \
74         Py_BEGIN_ALLOW_THREADS \
75         PyThread_acquire_lock((obj)->lock, 1); \
76         Py_END_ALLOW_THREADS \
77     } } while (0)
78 #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
79 
80 typedef struct {
81     PyTypeObject *lzma_compressor_type;
82     PyTypeObject *lzma_decompressor_type;
83     PyObject *error;
84     PyObject *empty_tuple;
85 } _lzma_state;
86 
87 static inline _lzma_state*
get_lzma_state(PyObject * module)88 get_lzma_state(PyObject *module)
89 {
90     void *state = PyModule_GetState(module);
91     assert(state != NULL);
92     return (_lzma_state *)state;
93 }
94 
95 /* Container formats: */
96 enum {
97     FORMAT_AUTO,
98     FORMAT_XZ,
99     FORMAT_ALONE,
100     FORMAT_RAW,
101 };
102 
103 #define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
104 
105 
106 typedef struct {
107     PyObject_HEAD
108     lzma_allocator alloc;
109     lzma_stream lzs;
110     int flushed;
111     PyThread_type_lock lock;
112 } Compressor;
113 
114 typedef struct {
115     PyObject_HEAD
116     lzma_allocator alloc;
117     lzma_stream lzs;
118     int check;
119     char eof;
120     PyObject *unused_data;
121     char needs_input;
122     uint8_t *input_buffer;
123     size_t input_buffer_size;
124     PyThread_type_lock lock;
125 } Decompressor;
126 
127 /* Helper functions. */
128 
129 static int
catch_lzma_error(_lzma_state * state,lzma_ret lzret)130 catch_lzma_error(_lzma_state *state, lzma_ret lzret)
131 {
132     switch (lzret) {
133         case LZMA_OK:
134         case LZMA_GET_CHECK:
135         case LZMA_NO_CHECK:
136         case LZMA_STREAM_END:
137             return 0;
138         case LZMA_UNSUPPORTED_CHECK:
139             PyErr_SetString(state->error, "Unsupported integrity check");
140             return 1;
141         case LZMA_MEM_ERROR:
142             PyErr_NoMemory();
143             return 1;
144         case LZMA_MEMLIMIT_ERROR:
145             PyErr_SetString(state->error, "Memory usage limit exceeded");
146             return 1;
147         case LZMA_FORMAT_ERROR:
148             PyErr_SetString(state->error, "Input format not supported by decoder");
149             return 1;
150         case LZMA_OPTIONS_ERROR:
151             PyErr_SetString(state->error, "Invalid or unsupported options");
152             return 1;
153         case LZMA_DATA_ERROR:
154             PyErr_SetString(state->error, "Corrupt input data");
155             return 1;
156         case LZMA_BUF_ERROR:
157             PyErr_SetString(state->error, "Insufficient buffer space");
158             return 1;
159         case LZMA_PROG_ERROR:
160             PyErr_SetString(state->error, "Internal error");
161             return 1;
162         default:
163             PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
164             return 1;
165     }
166 }
167 
168 static void*
PyLzma_Malloc(void * opaque,size_t items,size_t size)169 PyLzma_Malloc(void *opaque, size_t items, size_t size)
170 {
171     if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
172         return NULL;
173     }
174     /* PyMem_Malloc() cannot be used:
175        the GIL is not held when lzma_code() is called */
176     return PyMem_RawMalloc(items * size);
177 }
178 
179 static void
PyLzma_Free(void * opaque,void * ptr)180 PyLzma_Free(void *opaque, void *ptr)
181 {
182     PyMem_RawFree(ptr);
183 }
184 
185 
186 /* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
187    since the predefined conversion specifiers do not suit our needs:
188 
189       uint32_t - the "I" (unsigned int) specifier is the right size, but
190       silently ignores overflows on conversion.
191 
192       lzma_vli - the "K" (unsigned long long) specifier is the right
193       size, but like "I" it silently ignores overflows on conversion.
194 
195       lzma_mode and lzma_match_finder - these are enumeration types, and
196       so the size of each is implementation-defined. Worse, different
197       enum types can be of different sizes within the same program, so
198       to be strictly correct, we need to define two separate converters.
199  */
200 
201 #define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
202     static int \
203     FUNCNAME(PyObject *obj, void *ptr) \
204     { \
205         unsigned long long val; \
206         \
207         val = PyLong_AsUnsignedLongLong(obj); \
208         if (PyErr_Occurred()) \
209             return 0; \
210         if ((unsigned long long)(TYPE)val != val) { \
211             PyErr_SetString(PyExc_OverflowError, \
212                             "Value too large for " #TYPE " type"); \
213             return 0; \
214         } \
215         *(TYPE *)ptr = (TYPE)val; \
216         return 1; \
217     }
218 
INT_TYPE_CONVERTER_FUNC(uint32_t,uint32_converter)219 INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
220 INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
221 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
222 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
223 
224 #undef INT_TYPE_CONVERTER_FUNC
225 
226 
227 /* Filter specifier parsing.
228 
229    This code handles converting filter specifiers (Python dicts) into
230    the C lzma_filter structs expected by liblzma. */
231 
232 static void *
233 parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
234 {
235     static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
236                                "pb", "mode", "nice_len", "mf", "depth", NULL};
237     PyObject *id;
238     PyObject *preset_obj;
239     uint32_t preset = LZMA_PRESET_DEFAULT;
240     lzma_options_lzma *options;
241 
242     /* First, fill in default values for all the options using a preset.
243        Then, override the defaults with any values given by the caller. */
244 
245     preset_obj = PyMapping_GetItemString(spec, "preset");
246     if (preset_obj == NULL) {
247         if (PyErr_ExceptionMatches(PyExc_KeyError)) {
248             PyErr_Clear();
249         }
250         else {
251             return NULL;
252         }
253     } else {
254         int ok = uint32_converter(preset_obj, &preset);
255         Py_DECREF(preset_obj);
256         if (!ok) {
257             return NULL;
258         }
259     }
260 
261     options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
262     if (options == NULL) {
263         return PyErr_NoMemory();
264     }
265 
266     if (lzma_lzma_preset(options, preset)) {
267         PyMem_Free(options);
268         PyErr_Format(state->error, "Invalid compression preset: %u", preset);
269         return NULL;
270     }
271 
272     if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
273                                      "|OOO&O&O&O&O&O&O&O&", optnames,
274                                      &id, &preset_obj,
275                                      uint32_converter, &options->dict_size,
276                                      uint32_converter, &options->lc,
277                                      uint32_converter, &options->lp,
278                                      uint32_converter, &options->pb,
279                                      lzma_mode_converter, &options->mode,
280                                      uint32_converter, &options->nice_len,
281                                      lzma_mf_converter, &options->mf,
282                                      uint32_converter, &options->depth)) {
283         PyErr_SetString(PyExc_ValueError,
284                         "Invalid filter specifier for LZMA filter");
285         PyMem_Free(options);
286         return NULL;
287     }
288 
289     return options;
290 }
291 
292 static void *
parse_filter_spec_delta(_lzma_state * state,PyObject * spec)293 parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
294 {
295     static char *optnames[] = {"id", "dist", NULL};
296     PyObject *id;
297     uint32_t dist = 1;
298     lzma_options_delta *options;
299 
300     if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
301                                      &id, uint32_converter, &dist)) {
302         PyErr_SetString(PyExc_ValueError,
303                         "Invalid filter specifier for delta filter");
304         return NULL;
305     }
306 
307     options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
308     if (options == NULL) {
309         return PyErr_NoMemory();
310     }
311     options->type = LZMA_DELTA_TYPE_BYTE;
312     options->dist = dist;
313     return options;
314 }
315 
316 static void *
parse_filter_spec_bcj(_lzma_state * state,PyObject * spec)317 parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
318 {
319     static char *optnames[] = {"id", "start_offset", NULL};
320     PyObject *id;
321     uint32_t start_offset = 0;
322     lzma_options_bcj *options;
323 
324     if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
325                                      &id, uint32_converter, &start_offset)) {
326         PyErr_SetString(PyExc_ValueError,
327                         "Invalid filter specifier for BCJ filter");
328         return NULL;
329     }
330 
331     options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
332     if (options == NULL) {
333         return PyErr_NoMemory();
334     }
335     options->start_offset = start_offset;
336     return options;
337 }
338 
339 static int
lzma_filter_converter(_lzma_state * state,PyObject * spec,void * ptr)340 lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
341 {
342     lzma_filter *f = (lzma_filter *)ptr;
343     PyObject *id_obj;
344 
345     if (!PyMapping_Check(spec)) {
346         PyErr_SetString(PyExc_TypeError,
347                         "Filter specifier must be a dict or dict-like object");
348         return 0;
349     }
350     id_obj = PyMapping_GetItemString(spec, "id");
351     if (id_obj == NULL) {
352         if (PyErr_ExceptionMatches(PyExc_KeyError))
353             PyErr_SetString(PyExc_ValueError,
354                             "Filter specifier must have an \"id\" entry");
355         return 0;
356     }
357     f->id = PyLong_AsUnsignedLongLong(id_obj);
358     Py_DECREF(id_obj);
359     if (PyErr_Occurred()) {
360         return 0;
361     }
362 
363     switch (f->id) {
364         case LZMA_FILTER_LZMA1:
365         case LZMA_FILTER_LZMA2:
366             f->options = parse_filter_spec_lzma(state, spec);
367             return f->options != NULL;
368         case LZMA_FILTER_DELTA:
369             f->options = parse_filter_spec_delta(state, spec);
370             return f->options != NULL;
371         case LZMA_FILTER_X86:
372         case LZMA_FILTER_POWERPC:
373         case LZMA_FILTER_IA64:
374         case LZMA_FILTER_ARM:
375         case LZMA_FILTER_ARMTHUMB:
376         case LZMA_FILTER_SPARC:
377             f->options = parse_filter_spec_bcj(state, spec);
378             return f->options != NULL;
379         default:
380             PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
381             return 0;
382     }
383 }
384 
385 static void
free_filter_chain(lzma_filter filters[])386 free_filter_chain(lzma_filter filters[])
387 {
388     for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
389         PyMem_Free(filters[i].options);
390     }
391 }
392 
393 static int
parse_filter_chain_spec(_lzma_state * state,lzma_filter filters[],PyObject * filterspecs)394 parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
395 {
396     Py_ssize_t i, num_filters;
397 
398     num_filters = PySequence_Length(filterspecs);
399     if (num_filters == -1) {
400         return -1;
401     }
402     if (num_filters > LZMA_FILTERS_MAX) {
403         PyErr_Format(PyExc_ValueError,
404                      "Too many filters - liblzma supports a maximum of %d",
405                      LZMA_FILTERS_MAX);
406         return -1;
407     }
408 
409     for (i = 0; i < num_filters; i++) {
410         int ok = 1;
411         PyObject *spec = PySequence_GetItem(filterspecs, i);
412         if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
413             ok = 0;
414         }
415         Py_XDECREF(spec);
416         if (!ok) {
417             filters[i].id = LZMA_VLI_UNKNOWN;
418             free_filter_chain(filters);
419             return -1;
420         }
421     }
422     filters[num_filters].id = LZMA_VLI_UNKNOWN;
423     return 0;
424 }
425 
426 
427 /* Filter specifier construction.
428 
429    This code handles converting C lzma_filter structs into
430    Python-level filter specifiers (represented as dicts). */
431 
432 static int
spec_add_field(PyObject * spec,const char * key,unsigned long long value)433 spec_add_field(PyObject *spec, const char *key, unsigned long long value)
434 {
435     PyObject *value_object = PyLong_FromUnsignedLongLong(value);
436     if (value_object == NULL) {
437         return -1;
438     }
439     PyObject *key_object = PyUnicode_InternFromString(key);
440     if (key_object == NULL) {
441         Py_DECREF(value_object);
442         return -1;
443     }
444     int status = PyDict_SetItem(spec, key_object, value_object);
445     Py_DECREF(key_object);
446     Py_DECREF(value_object);
447     return status;
448 }
449 
450 static PyObject *
build_filter_spec(const lzma_filter * f)451 build_filter_spec(const lzma_filter *f)
452 {
453     PyObject *spec;
454 
455     spec = PyDict_New();
456     if (spec == NULL) {
457         return NULL;
458     }
459 
460 #define ADD_FIELD(SOURCE, FIELD) \
461     do { \
462         if (spec_add_field(spec, #FIELD, SOURCE->FIELD) == -1) \
463             goto error;\
464     } while (0)
465 
466     ADD_FIELD(f, id);
467 
468     switch (f->id) {
469         /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
470            lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
471            dict_size field is used. */
472         case LZMA_FILTER_LZMA1: {
473             lzma_options_lzma *options = f->options;
474             ADD_FIELD(options, lc);
475             ADD_FIELD(options, lp);
476             ADD_FIELD(options, pb);
477             ADD_FIELD(options, dict_size);
478             break;
479         }
480         case LZMA_FILTER_LZMA2: {
481             lzma_options_lzma *options = f->options;
482             ADD_FIELD(options, dict_size);
483             break;
484         }
485         case LZMA_FILTER_DELTA: {
486             lzma_options_delta *options = f->options;
487             ADD_FIELD(options, dist);
488             break;
489         }
490         case LZMA_FILTER_X86:
491         case LZMA_FILTER_POWERPC:
492         case LZMA_FILTER_IA64:
493         case LZMA_FILTER_ARM:
494         case LZMA_FILTER_ARMTHUMB:
495         case LZMA_FILTER_SPARC: {
496             lzma_options_bcj *options = f->options;
497             ADD_FIELD(options, start_offset);
498             break;
499         }
500         default:
501             PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
502             goto error;
503     }
504 
505 #undef ADD_FIELD
506 
507     return spec;
508 
509 error:
510     Py_DECREF(spec);
511     return NULL;
512 }
513 
514 
515 /*[clinic input]
516 module _lzma
517 class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
518 class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
519 [clinic start generated code]*/
520 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
521 
522 #include "clinic/_lzmamodule.c.h"
523 
524 /*[python input]
525 
526 class lzma_vli_converter(CConverter):
527     type = 'lzma_vli'
528     converter = 'lzma_vli_converter'
529 
530 class lzma_filter_converter(CConverter):
531     type = 'lzma_filter'
532     converter = 'lzma_filter_converter'
533     c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
534 
535     def cleanup(self):
536         name = ensure_legal_c_identifier(self.name)
537         return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
538                 '   PyMem_Free(%(name)s.options);\n') % {'name': name}
539 
540 [python start generated code]*/
541 /*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
542 
543 
544 /* LZMACompressor class. */
545 
546 static PyObject *
compress(Compressor * c,uint8_t * data,size_t len,lzma_action action)547 compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
548 {
549     PyObject *result;
550     _BlocksOutputBuffer buffer = {.list = NULL};
551     _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
552     assert(state != NULL);
553 
554     if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
555         goto error;
556     }
557     c->lzs.next_in = data;
558     c->lzs.avail_in = len;
559 
560     for (;;) {
561         lzma_ret lzret;
562 
563         Py_BEGIN_ALLOW_THREADS
564         lzret = lzma_code(&c->lzs, action);
565         Py_END_ALLOW_THREADS
566 
567         if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
568             lzret = LZMA_OK; /* That wasn't a real error */
569         }
570         if (catch_lzma_error(state, lzret)) {
571             goto error;
572         }
573         if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
574             (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
575             break;
576         } else if (c->lzs.avail_out == 0) {
577             if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
578                 goto error;
579             }
580         }
581     }
582 
583     result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
584     if (result != NULL) {
585         return result;
586     }
587 
588 error:
589     OutputBuffer_OnError(&buffer);
590     return NULL;
591 }
592 
593 /*[clinic input]
594 _lzma.LZMACompressor.compress
595 
596     data: Py_buffer
597     /
598 
599 Provide data to the compressor object.
600 
601 Returns a chunk of compressed data if possible, or b'' otherwise.
602 
603 When you have finished providing data to the compressor, call the
604 flush() method to finish the compression process.
605 [clinic start generated code]*/
606 
607 static PyObject *
_lzma_LZMACompressor_compress_impl(Compressor * self,Py_buffer * data)608 _lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
609 /*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
610 {
611     PyObject *result = NULL;
612 
613     ACQUIRE_LOCK(self);
614     if (self->flushed) {
615         PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
616     }
617     else {
618         result = compress(self, data->buf, data->len, LZMA_RUN);
619     }
620     RELEASE_LOCK(self);
621     return result;
622 }
623 
624 /*[clinic input]
625 _lzma.LZMACompressor.flush
626 
627 Finish the compression process.
628 
629 Returns the compressed data left in internal buffers.
630 
631 The compressor object may not be used after this method is called.
632 [clinic start generated code]*/
633 
634 static PyObject *
_lzma_LZMACompressor_flush_impl(Compressor * self)635 _lzma_LZMACompressor_flush_impl(Compressor *self)
636 /*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
637 {
638     PyObject *result = NULL;
639 
640     ACQUIRE_LOCK(self);
641     if (self->flushed) {
642         PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
643     } else {
644         self->flushed = 1;
645         result = compress(self, NULL, 0, LZMA_FINISH);
646     }
647     RELEASE_LOCK(self);
648     return result;
649 }
650 
651 static int
Compressor_init_xz(_lzma_state * state,lzma_stream * lzs,int check,uint32_t preset,PyObject * filterspecs)652 Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
653                    int check, uint32_t preset, PyObject *filterspecs)
654 {
655     lzma_ret lzret;
656 
657     if (filterspecs == Py_None) {
658         lzret = lzma_easy_encoder(lzs, preset, check);
659     } else {
660         lzma_filter filters[LZMA_FILTERS_MAX + 1];
661 
662         if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
663             return -1;
664         lzret = lzma_stream_encoder(lzs, filters, check);
665         free_filter_chain(filters);
666     }
667     if (catch_lzma_error(state, lzret)) {
668         return -1;
669     }
670     else {
671         return 0;
672     }
673 }
674 
675 static int
Compressor_init_alone(_lzma_state * state,lzma_stream * lzs,uint32_t preset,PyObject * filterspecs)676 Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
677 {
678     lzma_ret lzret;
679 
680     if (filterspecs == Py_None) {
681         lzma_options_lzma options;
682 
683         if (lzma_lzma_preset(&options, preset)) {
684             PyErr_Format(state->error, "Invalid compression preset: %u", preset);
685             return -1;
686         }
687         lzret = lzma_alone_encoder(lzs, &options);
688     } else {
689         lzma_filter filters[LZMA_FILTERS_MAX + 1];
690 
691         if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
692             return -1;
693         if (filters[0].id == LZMA_FILTER_LZMA1 &&
694             filters[1].id == LZMA_VLI_UNKNOWN) {
695             lzret = lzma_alone_encoder(lzs, filters[0].options);
696         } else {
697             PyErr_SetString(PyExc_ValueError,
698                             "Invalid filter chain for FORMAT_ALONE - "
699                             "must be a single LZMA1 filter");
700             lzret = LZMA_PROG_ERROR;
701         }
702         free_filter_chain(filters);
703     }
704     if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
705         return -1;
706     }
707     else {
708         return 0;
709     }
710 }
711 
712 static int
Compressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)713 Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
714 {
715     lzma_filter filters[LZMA_FILTERS_MAX + 1];
716     lzma_ret lzret;
717 
718     if (filterspecs == Py_None) {
719         PyErr_SetString(PyExc_ValueError,
720                         "Must specify filters for FORMAT_RAW");
721         return -1;
722     }
723     if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
724         return -1;
725     }
726     lzret = lzma_raw_encoder(lzs, filters);
727     free_filter_chain(filters);
728     if (catch_lzma_error(state, lzret)) {
729         return -1;
730     }
731     else {
732         return 0;
733     }
734 }
735 
736 /*[-clinic input]
737 _lzma.LZMACompressor.__init__
738 
739     format: int(c_default="FORMAT_XZ") = FORMAT_XZ
740         The container format to use for the output.  This can
741         be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
742 
743     check: int(c_default="-1") = unspecified
744         The integrity check to use.  For FORMAT_XZ, the default
745         is CHECK_CRC64.  FORMAT_ALONE and FORMAT_RAW do not support integrity
746         checks; for these formats, check must be omitted, or be CHECK_NONE.
747 
748     preset: object = None
749         If provided should be an integer in the range 0-9, optionally
750         OR-ed with the constant PRESET_EXTREME.
751 
752     filters: object = None
753         If provided should be a sequence of dicts.  Each dict should
754         have an entry for "id" indicating the ID of the filter, plus
755         additional entries for options to the filter.
756 
757 Create a compressor object for compressing data incrementally.
758 
759 The settings used by the compressor can be specified either as a
760 preset compression level (with the 'preset' argument), or in detail
761 as a custom filter chain (with the 'filters' argument).  For FORMAT_XZ
762 and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
763 level.  For FORMAT_RAW, the caller must always specify a filter chain;
764 the raw compressor does not support preset compression levels.
765 
766 For one-shot compression, use the compress() function instead.
767 [-clinic start generated code]*/
768 static int
Compressor_init(Compressor * self,PyObject * args,PyObject * kwargs)769 Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
770 {
771     static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
772     int format = FORMAT_XZ;
773     int check = -1;
774     uint32_t preset = LZMA_PRESET_DEFAULT;
775     PyObject *preset_obj = Py_None;
776     PyObject *filterspecs = Py_None;
777     _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
778     assert(state != NULL);
779     if (!PyArg_ParseTupleAndKeywords(args, kwargs,
780                                      "|iiOO:LZMACompressor", arg_names,
781                                      &format, &check, &preset_obj,
782                                      &filterspecs)) {
783         return -1;
784     }
785 
786     if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
787         PyErr_SetString(PyExc_ValueError,
788                         "Integrity checks are only supported by FORMAT_XZ");
789         return -1;
790     }
791 
792     if (preset_obj != Py_None && filterspecs != Py_None) {
793         PyErr_SetString(PyExc_ValueError,
794                         "Cannot specify both preset and filter chain");
795         return -1;
796     }
797 
798     if (preset_obj != Py_None) {
799         if (!uint32_converter(preset_obj, &preset)) {
800             return -1;
801         }
802     }
803 
804     self->alloc.opaque = NULL;
805     self->alloc.alloc = PyLzma_Malloc;
806     self->alloc.free = PyLzma_Free;
807     self->lzs.allocator = &self->alloc;
808 
809     self->lock = PyThread_allocate_lock();
810     if (self->lock == NULL) {
811         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
812         return -1;
813     }
814 
815     self->flushed = 0;
816     switch (format) {
817         case FORMAT_XZ:
818             if (check == -1) {
819                 check = LZMA_CHECK_CRC64;
820             }
821             if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
822                 break;
823             }
824             return 0;
825 
826         case FORMAT_ALONE:
827             if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
828                 break;
829             }
830             return 0;
831 
832         case FORMAT_RAW:
833             if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
834                 break;
835             }
836             return 0;
837 
838         default:
839             PyErr_Format(PyExc_ValueError,
840                          "Invalid container format: %d", format);
841             break;
842     }
843 
844     PyThread_free_lock(self->lock);
845     self->lock = NULL;
846     return -1;
847 }
848 
849 static void
Compressor_dealloc(Compressor * self)850 Compressor_dealloc(Compressor *self)
851 {
852     lzma_end(&self->lzs);
853     if (self->lock != NULL) {
854         PyThread_free_lock(self->lock);
855     }
856     PyTypeObject *tp = Py_TYPE(self);
857     tp->tp_free((PyObject *)self);
858     Py_DECREF(tp);
859 }
860 
861 static PyMethodDef Compressor_methods[] = {
862     _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
863     _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
864     {NULL}
865 };
866 
867 static int
Compressor_traverse(Compressor * self,visitproc visit,void * arg)868 Compressor_traverse(Compressor *self, visitproc visit, void *arg)
869 {
870     Py_VISIT(Py_TYPE(self));
871     return 0;
872 }
873 
874 PyDoc_STRVAR(Compressor_doc,
875 "LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
876 "\n"
877 "Create a compressor object for compressing data incrementally.\n"
878 "\n"
879 "format specifies the container format to use for the output. This can\n"
880 "be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
881 "\n"
882 "check specifies the integrity check to use. For FORMAT_XZ, the default\n"
883 "is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
884 "checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
885 "\n"
886 "The settings used by the compressor can be specified either as a\n"
887 "preset compression level (with the 'preset' argument), or in detail\n"
888 "as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
889 "and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
890 "level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
891 "the raw compressor does not support preset compression levels.\n"
892 "\n"
893 "preset (if provided) should be an integer in the range 0-9, optionally\n"
894 "OR-ed with the constant PRESET_EXTREME.\n"
895 "\n"
896 "filters (if provided) should be a sequence of dicts. Each dict should\n"
897 "have an entry for \"id\" indicating the ID of the filter, plus\n"
898 "additional entries for options to the filter.\n"
899 "\n"
900 "For one-shot compression, use the compress() function instead.\n");
901 
902 static PyType_Slot lzma_compressor_type_slots[] = {
903     {Py_tp_dealloc, Compressor_dealloc},
904     {Py_tp_methods, Compressor_methods},
905     {Py_tp_init, Compressor_init},
906     {Py_tp_new, PyType_GenericNew},
907     {Py_tp_doc, (char *)Compressor_doc},
908     {Py_tp_traverse, Compressor_traverse},
909     {0, 0}
910 };
911 
912 static PyType_Spec lzma_compressor_type_spec = {
913     .name = "_lzma.LZMACompressor",
914     .basicsize = sizeof(Compressor),
915     // Calling PyType_GetModuleState() on a subclass is not safe.
916     // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
917     // which prevents to create a subclass.
918     // So calling PyType_GetModuleState() in this file is always safe.
919     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
920     .slots = lzma_compressor_type_slots,
921 };
922 
923 /* LZMADecompressor class. */
924 
925 /* Decompress data of length d->lzs.avail_in in d->lzs.next_in.  The output
926    buffer is allocated dynamically and returned.  At most max_length bytes are
927    returned, so some of the input may not be consumed. d->lzs.next_in and
928    d->lzs.avail_in are updated to reflect the consumed input. */
929 static PyObject*
decompress_buf(Decompressor * d,Py_ssize_t max_length)930 decompress_buf(Decompressor *d, Py_ssize_t max_length)
931 {
932     PyObject *result;
933     lzma_stream *lzs = &d->lzs;
934     _BlocksOutputBuffer buffer = {.list = NULL};
935     _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
936     assert(state != NULL);
937 
938     if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
939         goto error;
940     }
941 
942     for (;;) {
943         lzma_ret lzret;
944 
945         Py_BEGIN_ALLOW_THREADS
946         lzret = lzma_code(lzs, LZMA_RUN);
947         Py_END_ALLOW_THREADS
948 
949         if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
950             lzret = LZMA_OK; /* That wasn't a real error */
951         }
952         if (catch_lzma_error(state, lzret)) {
953             goto error;
954         }
955         if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
956             d->check = lzma_get_check(&d->lzs);
957         }
958         if (lzret == LZMA_STREAM_END) {
959             d->eof = 1;
960             break;
961         } else if (lzs->avail_out == 0) {
962             /* Need to check lzs->avail_out before lzs->avail_in.
963                Maybe lzs's internal state still have a few bytes
964                can be output, grow the output buffer and continue
965                if max_lengh < 0. */
966             if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
967                 break;
968             }
969             if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
970                 goto error;
971             }
972         } else if (lzs->avail_in == 0) {
973             break;
974         }
975     }
976 
977     result = OutputBuffer_Finish(&buffer, lzs->avail_out);
978     if (result != NULL) {
979         return result;
980     }
981 
982 error:
983     OutputBuffer_OnError(&buffer);
984     return NULL;
985 }
986 
987 static PyObject *
decompress(Decompressor * d,uint8_t * data,size_t len,Py_ssize_t max_length)988 decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
989 {
990     char input_buffer_in_use;
991     PyObject *result;
992     lzma_stream *lzs = &d->lzs;
993 
994     /* Prepend unconsumed input if necessary */
995     if (lzs->next_in != NULL) {
996         size_t avail_now, avail_total;
997 
998         /* Number of bytes we can append to input buffer */
999         avail_now = (d->input_buffer + d->input_buffer_size)
1000             - (lzs->next_in + lzs->avail_in);
1001 
1002         /* Number of bytes we can append if we move existing
1003            contents to beginning of buffer (overwriting
1004            consumed input) */
1005         avail_total = d->input_buffer_size - lzs->avail_in;
1006 
1007         if (avail_total < len) {
1008             size_t offset = lzs->next_in - d->input_buffer;
1009             uint8_t *tmp;
1010             size_t new_size = d->input_buffer_size + len - avail_now;
1011 
1012             /* Assign to temporary variable first, so we don't
1013                lose address of allocated buffer if realloc fails */
1014             tmp = PyMem_Realloc(d->input_buffer, new_size);
1015             if (tmp == NULL) {
1016                 PyErr_SetNone(PyExc_MemoryError);
1017                 return NULL;
1018             }
1019             d->input_buffer = tmp;
1020             d->input_buffer_size = new_size;
1021 
1022             lzs->next_in = d->input_buffer + offset;
1023         }
1024         else if (avail_now < len) {
1025             memmove(d->input_buffer, lzs->next_in,
1026                     lzs->avail_in);
1027             lzs->next_in = d->input_buffer;
1028         }
1029         memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
1030         lzs->avail_in += len;
1031         input_buffer_in_use = 1;
1032     }
1033     else {
1034         lzs->next_in = data;
1035         lzs->avail_in = len;
1036         input_buffer_in_use = 0;
1037     }
1038 
1039     result = decompress_buf(d, max_length);
1040     if (result == NULL) {
1041         lzs->next_in = NULL;
1042         return NULL;
1043     }
1044 
1045     if (d->eof) {
1046         d->needs_input = 0;
1047         if (lzs->avail_in > 0) {
1048             Py_XSETREF(d->unused_data,
1049                       PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
1050             if (d->unused_data == NULL) {
1051                 goto error;
1052             }
1053         }
1054     }
1055     else if (lzs->avail_in == 0) {
1056         lzs->next_in = NULL;
1057 
1058         if (lzs->avail_out == 0) {
1059             /* (avail_in==0 && avail_out==0)
1060                Maybe lzs's internal state still have a few bytes can
1061                be output, try to output them next time. */
1062             d->needs_input = 0;
1063 
1064             /* If max_length < 0, lzs->avail_out always > 0 */
1065             assert(max_length >= 0);
1066         } else {
1067             /* Input buffer exhausted, output buffer has space. */
1068             d->needs_input = 1;
1069         }
1070     }
1071     else {
1072         d->needs_input = 0;
1073 
1074         /* If we did not use the input buffer, we now have
1075            to copy the tail from the caller's buffer into the
1076            input buffer */
1077         if (!input_buffer_in_use) {
1078 
1079             /* Discard buffer if it's too small
1080                (resizing it may needlessly copy the current contents) */
1081             if (d->input_buffer != NULL &&
1082                 d->input_buffer_size < lzs->avail_in) {
1083                 PyMem_Free(d->input_buffer);
1084                 d->input_buffer = NULL;
1085             }
1086 
1087             /* Allocate if necessary */
1088             if (d->input_buffer == NULL) {
1089                 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1090                 if (d->input_buffer == NULL) {
1091                     PyErr_SetNone(PyExc_MemoryError);
1092                     goto error;
1093                 }
1094                 d->input_buffer_size = lzs->avail_in;
1095             }
1096 
1097             /* Copy tail */
1098             memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1099             lzs->next_in = d->input_buffer;
1100         }
1101     }
1102 
1103     return result;
1104 
1105 error:
1106     Py_XDECREF(result);
1107     return NULL;
1108 }
1109 
1110 /*[clinic input]
1111 _lzma.LZMADecompressor.decompress
1112 
1113     data: Py_buffer
1114     max_length: Py_ssize_t=-1
1115 
1116 Decompress *data*, returning uncompressed data as bytes.
1117 
1118 If *max_length* is nonnegative, returns at most *max_length* bytes of
1119 decompressed data. If this limit is reached and further output can be
1120 produced, *self.needs_input* will be set to ``False``. In this case, the next
1121 call to *decompress()* may provide *data* as b'' to obtain more of the output.
1122 
1123 If all of the input data was decompressed and returned (either because this
1124 was less than *max_length* bytes, or because *max_length* was negative),
1125 *self.needs_input* will be set to True.
1126 
1127 Attempting to decompress data after the end of stream is reached raises an
1128 EOFError.  Any data found after the end of the stream is ignored and saved in
1129 the unused_data attribute.
1130 [clinic start generated code]*/
1131 
1132 static PyObject *
_lzma_LZMADecompressor_decompress_impl(Decompressor * self,Py_buffer * data,Py_ssize_t max_length)1133 _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1134                                        Py_ssize_t max_length)
1135 /*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
1136 {
1137     PyObject *result = NULL;
1138 
1139     ACQUIRE_LOCK(self);
1140     if (self->eof)
1141         PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1142     else
1143         result = decompress(self, data->buf, data->len, max_length);
1144     RELEASE_LOCK(self);
1145     return result;
1146 }
1147 
1148 static int
Decompressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)1149 Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
1150 {
1151     lzma_filter filters[LZMA_FILTERS_MAX + 1];
1152     lzma_ret lzret;
1153 
1154     if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
1155         return -1;
1156     }
1157     lzret = lzma_raw_decoder(lzs, filters);
1158     free_filter_chain(filters);
1159     if (catch_lzma_error(state, lzret)) {
1160         return -1;
1161     }
1162     else {
1163         return 0;
1164     }
1165 }
1166 
1167 /*[clinic input]
1168 _lzma.LZMADecompressor.__init__
1169 
1170     format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1171         Specifies the container format of the input stream.  If this is
1172         FORMAT_AUTO (the default), the decompressor will automatically detect
1173         whether the input is FORMAT_XZ or FORMAT_ALONE.  Streams created with
1174         FORMAT_RAW cannot be autodetected.
1175 
1176     memlimit: object = None
1177         Limit the amount of memory used by the decompressor.  This will cause
1178         decompression to fail if the input cannot be decompressed within the
1179         given limit.
1180 
1181     filters: object = None
1182         A custom filter chain.  This argument is required for FORMAT_RAW, and
1183         not accepted with any other format.  When provided, this should be a
1184         sequence of dicts, each indicating the ID and options for a single
1185         filter.
1186 
1187 Create a decompressor object for decompressing data incrementally.
1188 
1189 For one-shot decompression, use the decompress() function instead.
1190 [clinic start generated code]*/
1191 
1192 static int
_lzma_LZMADecompressor___init___impl(Decompressor * self,int format,PyObject * memlimit,PyObject * filters)1193 _lzma_LZMADecompressor___init___impl(Decompressor *self, int format,
1194                                      PyObject *memlimit, PyObject *filters)
1195 /*[clinic end generated code: output=3e1821f8aa36564c input=81fe684a6c2f8a27]*/
1196 {
1197     const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1198     uint64_t memlimit_ = UINT64_MAX;
1199     lzma_ret lzret;
1200     _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
1201     assert(state != NULL);
1202 
1203     if (memlimit != Py_None) {
1204         if (format == FORMAT_RAW) {
1205             PyErr_SetString(PyExc_ValueError,
1206                             "Cannot specify memory limit with FORMAT_RAW");
1207             return -1;
1208         }
1209         memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
1210         if (PyErr_Occurred()) {
1211             return -1;
1212         }
1213     }
1214 
1215     if (format == FORMAT_RAW && filters == Py_None) {
1216         PyErr_SetString(PyExc_ValueError,
1217                         "Must specify filters for FORMAT_RAW");
1218         return -1;
1219     } else if (format != FORMAT_RAW && filters != Py_None) {
1220         PyErr_SetString(PyExc_ValueError,
1221                         "Cannot specify filters except with FORMAT_RAW");
1222         return -1;
1223     }
1224 
1225     self->alloc.opaque = NULL;
1226     self->alloc.alloc = PyLzma_Malloc;
1227     self->alloc.free = PyLzma_Free;
1228     self->lzs.allocator = &self->alloc;
1229     self->lzs.next_in = NULL;
1230 
1231     PyThread_type_lock lock = PyThread_allocate_lock();
1232     if (lock == NULL) {
1233         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1234         return -1;
1235     }
1236     if (self->lock != NULL) {
1237         PyThread_free_lock(self->lock);
1238     }
1239     self->lock = lock;
1240 
1241     self->check = LZMA_CHECK_UNKNOWN;
1242     self->needs_input = 1;
1243     self->input_buffer = NULL;
1244     self->input_buffer_size = 0;
1245     Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
1246     if (self->unused_data == NULL) {
1247         goto error;
1248     }
1249 
1250     switch (format) {
1251         case FORMAT_AUTO:
1252             lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
1253             if (catch_lzma_error(state, lzret)) {
1254                 break;
1255             }
1256             return 0;
1257 
1258         case FORMAT_XZ:
1259             lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
1260             if (catch_lzma_error(state, lzret)) {
1261                 break;
1262             }
1263             return 0;
1264 
1265         case FORMAT_ALONE:
1266             self->check = LZMA_CHECK_NONE;
1267             lzret = lzma_alone_decoder(&self->lzs, memlimit_);
1268             if (catch_lzma_error(state, lzret)) {
1269                 break;
1270             }
1271             return 0;
1272 
1273         case FORMAT_RAW:
1274             self->check = LZMA_CHECK_NONE;
1275             if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
1276                 break;
1277             }
1278             return 0;
1279 
1280         default:
1281             PyErr_Format(PyExc_ValueError,
1282                          "Invalid container format: %d", format);
1283             break;
1284     }
1285 
1286 error:
1287     Py_CLEAR(self->unused_data);
1288     PyThread_free_lock(self->lock);
1289     self->lock = NULL;
1290     return -1;
1291 }
1292 
1293 static void
Decompressor_dealloc(Decompressor * self)1294 Decompressor_dealloc(Decompressor *self)
1295 {
1296     if(self->input_buffer != NULL)
1297         PyMem_Free(self->input_buffer);
1298 
1299     lzma_end(&self->lzs);
1300     Py_CLEAR(self->unused_data);
1301     if (self->lock != NULL) {
1302         PyThread_free_lock(self->lock);
1303     }
1304     PyTypeObject *tp = Py_TYPE(self);
1305     tp->tp_free((PyObject *)self);
1306     Py_DECREF(tp);
1307 }
1308 
1309 static int
Decompressor_traverse(Decompressor * self,visitproc visit,void * arg)1310 Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
1311 {
1312     Py_VISIT(Py_TYPE(self));
1313     return 0;
1314 }
1315 
1316 static PyMethodDef Decompressor_methods[] = {
1317     _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
1318     {NULL}
1319 };
1320 
1321 PyDoc_STRVAR(Decompressor_check_doc,
1322 "ID of the integrity check used by the input stream.");
1323 
1324 PyDoc_STRVAR(Decompressor_eof_doc,
1325 "True if the end-of-stream marker has been reached.");
1326 
1327 PyDoc_STRVAR(Decompressor_needs_input_doc,
1328 "True if more input is needed before more decompressed data can be produced.");
1329 
1330 PyDoc_STRVAR(Decompressor_unused_data_doc,
1331 "Data found after the end of the compressed stream.");
1332 
1333 static PyMemberDef Decompressor_members[] = {
1334     {"check", T_INT, offsetof(Decompressor, check), READONLY,
1335      Decompressor_check_doc},
1336     {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1337      Decompressor_eof_doc},
1338     {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
1339      Decompressor_needs_input_doc},
1340     {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1341      Decompressor_unused_data_doc},
1342     {NULL}
1343 };
1344 
1345 static PyType_Slot lzma_decompressor_type_slots[] = {
1346     {Py_tp_dealloc, Decompressor_dealloc},
1347     {Py_tp_methods, Decompressor_methods},
1348     {Py_tp_init, _lzma_LZMADecompressor___init__},
1349     {Py_tp_new, PyType_GenericNew},
1350     {Py_tp_doc, (char *)_lzma_LZMADecompressor___init____doc__},
1351     {Py_tp_traverse, Decompressor_traverse},
1352     {Py_tp_members, Decompressor_members},
1353     {0, 0}
1354 };
1355 
1356 static PyType_Spec lzma_decompressor_type_spec = {
1357     .name = "_lzma.LZMADecompressor",
1358     .basicsize = sizeof(Decompressor),
1359     // Calling PyType_GetModuleState() on a subclass is not safe.
1360     // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
1361     // which prevents to create a subclass.
1362     // So calling PyType_GetModuleState() in this file is always safe.
1363     .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
1364     .slots = lzma_decompressor_type_slots,
1365 };
1366 
1367 
1368 /* Module-level functions. */
1369 
1370 /*[clinic input]
1371 _lzma.is_check_supported
1372     check_id: int
1373     /
1374 
1375 Test whether the given integrity check is supported.
1376 
1377 Always returns True for CHECK_NONE and CHECK_CRC32.
1378 [clinic start generated code]*/
1379 
1380 static PyObject *
_lzma_is_check_supported_impl(PyObject * module,int check_id)1381 _lzma_is_check_supported_impl(PyObject *module, int check_id)
1382 /*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
1383 {
1384     return PyBool_FromLong(lzma_check_is_supported(check_id));
1385 }
1386 
1387 PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
1388 "_encode_filter_properties($module, filter, /)\n"
1389 "--\n"
1390 "\n"
1391 "Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
1392 "\n"
1393 "The result does not include the filter ID itself, only the options.");
1394 
1395 #define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF    \
1396     {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
1397 
1398 static PyObject *
1399 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
1400 
1401 static PyObject *
_lzma__encode_filter_properties(PyObject * module,PyObject * arg)1402 _lzma__encode_filter_properties(PyObject *module, PyObject *arg)
1403 {
1404     PyObject *return_value = NULL;
1405     lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
1406     _lzma_state *state = get_lzma_state(module);
1407     assert(state != NULL);
1408     if (!lzma_filter_converter(state, arg, &filter)) {
1409         goto exit;
1410     }
1411     return_value = _lzma__encode_filter_properties_impl(module, filter);
1412 
1413 exit:
1414     /* Cleanup for filter */
1415     if (filter.id != LZMA_VLI_UNKNOWN) {
1416        PyMem_Free(filter.options);
1417     }
1418 
1419     return return_value;
1420 }
1421 
1422 static PyObject *
_lzma__encode_filter_properties_impl(PyObject * module,lzma_filter filter)1423 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
1424 {
1425     lzma_ret lzret;
1426     uint32_t encoded_size;
1427     PyObject *result = NULL;
1428     _lzma_state *state = get_lzma_state(module);
1429     assert(state != NULL);
1430 
1431     lzret = lzma_properties_size(&encoded_size, &filter);
1432     if (catch_lzma_error(state, lzret))
1433         goto error;
1434 
1435     result = PyBytes_FromStringAndSize(NULL, encoded_size);
1436     if (result == NULL)
1437         goto error;
1438 
1439     lzret = lzma_properties_encode(
1440             &filter, (uint8_t *)PyBytes_AS_STRING(result));
1441     if (catch_lzma_error(state, lzret)) {
1442         goto error;
1443     }
1444 
1445     return result;
1446 
1447 error:
1448     Py_XDECREF(result);
1449     return NULL;
1450 }
1451 
1452 
1453 /*[clinic input]
1454 _lzma._decode_filter_properties
1455     filter_id: lzma_vli
1456     encoded_props: Py_buffer
1457     /
1458 
1459 Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1460 
1461 The result does not include the filter ID itself, only the options.
1462 [clinic start generated code]*/
1463 
1464 static PyObject *
_lzma__decode_filter_properties_impl(PyObject * module,lzma_vli filter_id,Py_buffer * encoded_props)1465 _lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
1466                                      Py_buffer *encoded_props)
1467 /*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
1468 {
1469     lzma_filter filter;
1470     lzma_ret lzret;
1471     PyObject *result = NULL;
1472     filter.id = filter_id;
1473     _lzma_state *state = get_lzma_state(module);
1474     assert(state != NULL);
1475 
1476     lzret = lzma_properties_decode(
1477             &filter, NULL, encoded_props->buf, encoded_props->len);
1478     if (catch_lzma_error(state, lzret)) {
1479         return NULL;
1480     }
1481 
1482     result = build_filter_spec(&filter);
1483 
1484     /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1485        allocated by lzma_properties_decode() using the default allocator. */
1486     free(filter.options);
1487     return result;
1488 }
1489 
1490 /* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1491    would not work correctly on platforms with 32-bit longs. */
1492 static int
module_add_int_constant(PyObject * m,const char * name,long long value)1493 module_add_int_constant(PyObject *m, const char *name, long long value)
1494 {
1495     PyObject *o = PyLong_FromLongLong(value);
1496     if (o == NULL) {
1497         return -1;
1498     }
1499     if (PyModule_AddObject(m, name, o) == 0) {
1500         return 0;
1501     }
1502     Py_DECREF(o);
1503     return -1;
1504 }
1505 
1506 static int
lzma_exec(PyObject * module)1507 lzma_exec(PyObject *module)
1508 {
1509 #define ADD_INT_PREFIX_MACRO(module, macro)                                 \
1510     do {                                                                    \
1511         if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) {  \
1512             return -1;                                                      \
1513         }                                                                   \
1514     } while(0)
1515 
1516 #define ADD_INT_MACRO(module, macro)                                        \
1517     do {                                                                    \
1518         if (PyModule_AddIntMacro(module, macro) < 0) {                      \
1519             return -1;                                                      \
1520         }                                                                   \
1521     } while (0)
1522 
1523 
1524     _lzma_state *state = get_lzma_state(module);
1525 
1526     state->empty_tuple = PyTuple_New(0);
1527     if (state->empty_tuple == NULL) {
1528         return -1;
1529     }
1530 
1531     ADD_INT_MACRO(module, FORMAT_AUTO);
1532     ADD_INT_MACRO(module, FORMAT_XZ);
1533     ADD_INT_MACRO(module, FORMAT_ALONE);
1534     ADD_INT_MACRO(module, FORMAT_RAW);
1535     ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
1536     ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
1537     ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
1538     ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
1539     ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
1540     ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
1541     ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
1542     ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
1543     ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
1544     ADD_INT_PREFIX_MACRO(module, FILTER_X86);
1545     ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
1546     ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
1547     ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
1548     ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
1549     ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
1550     ADD_INT_PREFIX_MACRO(module, MF_HC3);
1551     ADD_INT_PREFIX_MACRO(module, MF_HC4);
1552     ADD_INT_PREFIX_MACRO(module, MF_BT2);
1553     ADD_INT_PREFIX_MACRO(module, MF_BT3);
1554     ADD_INT_PREFIX_MACRO(module, MF_BT4);
1555     ADD_INT_PREFIX_MACRO(module, MODE_FAST);
1556     ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
1557     ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
1558     ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
1559 
1560     state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1561     if (state->error == NULL) {
1562         return -1;
1563     }
1564 
1565     if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
1566         return -1;
1567     }
1568 
1569 
1570     state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1571                                                             &lzma_compressor_type_spec, NULL);
1572     if (state->lzma_compressor_type == NULL) {
1573         return -1;
1574     }
1575 
1576     if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
1577         return -1;
1578     }
1579 
1580     state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1581                                                          &lzma_decompressor_type_spec, NULL);
1582     if (state->lzma_decompressor_type == NULL) {
1583         return -1;
1584     }
1585 
1586     if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
1587         return -1;
1588     }
1589 
1590     return 0;
1591 }
1592 
1593 static PyMethodDef lzma_methods[] = {
1594     _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1595     _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1596     _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1597     {NULL}
1598 };
1599 
1600 static PyModuleDef_Slot lzma_slots[] = {
1601     {Py_mod_exec, lzma_exec},
1602     {0, NULL}
1603 };
1604 
1605 static int
lzma_traverse(PyObject * module,visitproc visit,void * arg)1606 lzma_traverse(PyObject *module, visitproc visit, void *arg)
1607 {
1608     _lzma_state *state = get_lzma_state(module);
1609     Py_VISIT(state->lzma_compressor_type);
1610     Py_VISIT(state->lzma_decompressor_type);
1611     Py_VISIT(state->error);
1612     Py_VISIT(state->empty_tuple);
1613     return 0;
1614 }
1615 
1616 static int
lzma_clear(PyObject * module)1617 lzma_clear(PyObject *module)
1618 {
1619     _lzma_state *state = get_lzma_state(module);
1620     Py_CLEAR(state->lzma_compressor_type);
1621     Py_CLEAR(state->lzma_decompressor_type);
1622     Py_CLEAR(state->error);
1623     Py_CLEAR(state->empty_tuple);
1624     return 0;
1625 }
1626 
1627 static void
lzma_free(void * module)1628 lzma_free(void *module)
1629 {
1630     lzma_clear((PyObject *)module);
1631 }
1632 
1633 static PyModuleDef _lzmamodule = {
1634     PyModuleDef_HEAD_INIT,
1635     .m_name = "_lzma",
1636     .m_size = sizeof(_lzma_state),
1637     .m_methods = lzma_methods,
1638     .m_slots = lzma_slots,
1639     .m_traverse = lzma_traverse,
1640     .m_clear = lzma_clear,
1641     .m_free = lzma_free,
1642 };
1643 
1644 PyMODINIT_FUNC
PyInit__lzma(void)1645 PyInit__lzma(void)
1646 {
1647     return PyModuleDef_Init(&_lzmamodule);
1648 }
1649