1 /* _lzma - Low-level Python interface to liblzma.
2
3 Initial implementation by Per Øyvind Karlsen.
4 Rewritten by Nadeem Vawda.
5
6 */
7
8 #define PY_SSIZE_T_CLEAN
9
10 #include "Python.h"
11 #include "structmember.h" // PyMemberDef
12
13 #include <stdlib.h> // free()
14 #include <string.h>
15
16 #include <lzma.h>
17
18 // Blocks output buffer wrappers
19 #include "pycore_blocks_output_buffer.h"
20
21 #if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
22 #error "The maximum block size accepted by liblzma is SIZE_MAX."
23 #endif
24
25 /* On success, return value >= 0
26 On failure, return -1 */
27 static inline Py_ssize_t
OutputBuffer_InitAndGrow(_BlocksOutputBuffer * buffer,Py_ssize_t max_length,uint8_t ** next_out,size_t * avail_out)28 OutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
29 uint8_t **next_out, size_t *avail_out)
30 {
31 Py_ssize_t allocated;
32
33 allocated = _BlocksOutputBuffer_InitAndGrow(
34 buffer, max_length, (void**) next_out);
35 *avail_out = (size_t) allocated;
36 return allocated;
37 }
38
39 /* On success, return value >= 0
40 On failure, return -1 */
41 static inline Py_ssize_t
OutputBuffer_Grow(_BlocksOutputBuffer * buffer,uint8_t ** next_out,size_t * avail_out)42 OutputBuffer_Grow(_BlocksOutputBuffer *buffer,
43 uint8_t **next_out, size_t *avail_out)
44 {
45 Py_ssize_t allocated;
46
47 allocated = _BlocksOutputBuffer_Grow(
48 buffer, (void**) next_out, (Py_ssize_t) *avail_out);
49 *avail_out = (size_t) allocated;
50 return allocated;
51 }
52
53 static inline Py_ssize_t
OutputBuffer_GetDataSize(_BlocksOutputBuffer * buffer,size_t avail_out)54 OutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
55 {
56 return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
57 }
58
59 static inline PyObject *
OutputBuffer_Finish(_BlocksOutputBuffer * buffer,size_t avail_out)60 OutputBuffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
61 {
62 return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
63 }
64
65 static inline void
OutputBuffer_OnError(_BlocksOutputBuffer * buffer)66 OutputBuffer_OnError(_BlocksOutputBuffer *buffer)
67 {
68 _BlocksOutputBuffer_OnError(buffer);
69 }
70
71
72 #define ACQUIRE_LOCK(obj) do { \
73 if (!PyThread_acquire_lock((obj)->lock, 0)) { \
74 Py_BEGIN_ALLOW_THREADS \
75 PyThread_acquire_lock((obj)->lock, 1); \
76 Py_END_ALLOW_THREADS \
77 } } while (0)
78 #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock)
79
80 typedef struct {
81 PyTypeObject *lzma_compressor_type;
82 PyTypeObject *lzma_decompressor_type;
83 PyObject *error;
84 PyObject *empty_tuple;
85 } _lzma_state;
86
87 static inline _lzma_state*
get_lzma_state(PyObject * module)88 get_lzma_state(PyObject *module)
89 {
90 void *state = PyModule_GetState(module);
91 assert(state != NULL);
92 return (_lzma_state *)state;
93 }
94
95 /* Container formats: */
96 enum {
97 FORMAT_AUTO,
98 FORMAT_XZ,
99 FORMAT_ALONE,
100 FORMAT_RAW,
101 };
102
103 #define LZMA_CHECK_UNKNOWN (LZMA_CHECK_ID_MAX + 1)
104
105
106 typedef struct {
107 PyObject_HEAD
108 lzma_allocator alloc;
109 lzma_stream lzs;
110 int flushed;
111 PyThread_type_lock lock;
112 } Compressor;
113
114 typedef struct {
115 PyObject_HEAD
116 lzma_allocator alloc;
117 lzma_stream lzs;
118 int check;
119 char eof;
120 PyObject *unused_data;
121 char needs_input;
122 uint8_t *input_buffer;
123 size_t input_buffer_size;
124 PyThread_type_lock lock;
125 } Decompressor;
126
127 /* Helper functions. */
128
129 static int
catch_lzma_error(_lzma_state * state,lzma_ret lzret)130 catch_lzma_error(_lzma_state *state, lzma_ret lzret)
131 {
132 switch (lzret) {
133 case LZMA_OK:
134 case LZMA_GET_CHECK:
135 case LZMA_NO_CHECK:
136 case LZMA_STREAM_END:
137 return 0;
138 case LZMA_UNSUPPORTED_CHECK:
139 PyErr_SetString(state->error, "Unsupported integrity check");
140 return 1;
141 case LZMA_MEM_ERROR:
142 PyErr_NoMemory();
143 return 1;
144 case LZMA_MEMLIMIT_ERROR:
145 PyErr_SetString(state->error, "Memory usage limit exceeded");
146 return 1;
147 case LZMA_FORMAT_ERROR:
148 PyErr_SetString(state->error, "Input format not supported by decoder");
149 return 1;
150 case LZMA_OPTIONS_ERROR:
151 PyErr_SetString(state->error, "Invalid or unsupported options");
152 return 1;
153 case LZMA_DATA_ERROR:
154 PyErr_SetString(state->error, "Corrupt input data");
155 return 1;
156 case LZMA_BUF_ERROR:
157 PyErr_SetString(state->error, "Insufficient buffer space");
158 return 1;
159 case LZMA_PROG_ERROR:
160 PyErr_SetString(state->error, "Internal error");
161 return 1;
162 default:
163 PyErr_Format(state->error, "Unrecognized error from liblzma: %d", lzret);
164 return 1;
165 }
166 }
167
168 static void*
PyLzma_Malloc(void * opaque,size_t items,size_t size)169 PyLzma_Malloc(void *opaque, size_t items, size_t size)
170 {
171 if (size != 0 && items > (size_t)PY_SSIZE_T_MAX / size) {
172 return NULL;
173 }
174 /* PyMem_Malloc() cannot be used:
175 the GIL is not held when lzma_code() is called */
176 return PyMem_RawMalloc(items * size);
177 }
178
179 static void
PyLzma_Free(void * opaque,void * ptr)180 PyLzma_Free(void *opaque, void *ptr)
181 {
182 PyMem_RawFree(ptr);
183 }
184
185
186 /* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
187 since the predefined conversion specifiers do not suit our needs:
188
189 uint32_t - the "I" (unsigned int) specifier is the right size, but
190 silently ignores overflows on conversion.
191
192 lzma_vli - the "K" (unsigned long long) specifier is the right
193 size, but like "I" it silently ignores overflows on conversion.
194
195 lzma_mode and lzma_match_finder - these are enumeration types, and
196 so the size of each is implementation-defined. Worse, different
197 enum types can be of different sizes within the same program, so
198 to be strictly correct, we need to define two separate converters.
199 */
200
201 #define INT_TYPE_CONVERTER_FUNC(TYPE, FUNCNAME) \
202 static int \
203 FUNCNAME(PyObject *obj, void *ptr) \
204 { \
205 unsigned long long val; \
206 \
207 val = PyLong_AsUnsignedLongLong(obj); \
208 if (PyErr_Occurred()) \
209 return 0; \
210 if ((unsigned long long)(TYPE)val != val) { \
211 PyErr_SetString(PyExc_OverflowError, \
212 "Value too large for " #TYPE " type"); \
213 return 0; \
214 } \
215 *(TYPE *)ptr = (TYPE)val; \
216 return 1; \
217 }
218
INT_TYPE_CONVERTER_FUNC(uint32_t,uint32_converter)219 INT_TYPE_CONVERTER_FUNC(uint32_t, uint32_converter)
220 INT_TYPE_CONVERTER_FUNC(lzma_vli, lzma_vli_converter)
221 INT_TYPE_CONVERTER_FUNC(lzma_mode, lzma_mode_converter)
222 INT_TYPE_CONVERTER_FUNC(lzma_match_finder, lzma_mf_converter)
223
224 #undef INT_TYPE_CONVERTER_FUNC
225
226
227 /* Filter specifier parsing.
228
229 This code handles converting filter specifiers (Python dicts) into
230 the C lzma_filter structs expected by liblzma. */
231
232 static void *
233 parse_filter_spec_lzma(_lzma_state *state, PyObject *spec)
234 {
235 static char *optnames[] = {"id", "preset", "dict_size", "lc", "lp",
236 "pb", "mode", "nice_len", "mf", "depth", NULL};
237 PyObject *id;
238 PyObject *preset_obj;
239 uint32_t preset = LZMA_PRESET_DEFAULT;
240 lzma_options_lzma *options;
241
242 /* First, fill in default values for all the options using a preset.
243 Then, override the defaults with any values given by the caller. */
244
245 preset_obj = PyMapping_GetItemString(spec, "preset");
246 if (preset_obj == NULL) {
247 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
248 PyErr_Clear();
249 }
250 else {
251 return NULL;
252 }
253 } else {
254 int ok = uint32_converter(preset_obj, &preset);
255 Py_DECREF(preset_obj);
256 if (!ok) {
257 return NULL;
258 }
259 }
260
261 options = (lzma_options_lzma *)PyMem_Calloc(1, sizeof *options);
262 if (options == NULL) {
263 return PyErr_NoMemory();
264 }
265
266 if (lzma_lzma_preset(options, preset)) {
267 PyMem_Free(options);
268 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
269 return NULL;
270 }
271
272 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec,
273 "|OOO&O&O&O&O&O&O&O&", optnames,
274 &id, &preset_obj,
275 uint32_converter, &options->dict_size,
276 uint32_converter, &options->lc,
277 uint32_converter, &options->lp,
278 uint32_converter, &options->pb,
279 lzma_mode_converter, &options->mode,
280 uint32_converter, &options->nice_len,
281 lzma_mf_converter, &options->mf,
282 uint32_converter, &options->depth)) {
283 PyErr_SetString(PyExc_ValueError,
284 "Invalid filter specifier for LZMA filter");
285 PyMem_Free(options);
286 return NULL;
287 }
288
289 return options;
290 }
291
292 static void *
parse_filter_spec_delta(_lzma_state * state,PyObject * spec)293 parse_filter_spec_delta(_lzma_state *state, PyObject *spec)
294 {
295 static char *optnames[] = {"id", "dist", NULL};
296 PyObject *id;
297 uint32_t dist = 1;
298 lzma_options_delta *options;
299
300 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
301 &id, uint32_converter, &dist)) {
302 PyErr_SetString(PyExc_ValueError,
303 "Invalid filter specifier for delta filter");
304 return NULL;
305 }
306
307 options = (lzma_options_delta *)PyMem_Calloc(1, sizeof *options);
308 if (options == NULL) {
309 return PyErr_NoMemory();
310 }
311 options->type = LZMA_DELTA_TYPE_BYTE;
312 options->dist = dist;
313 return options;
314 }
315
316 static void *
parse_filter_spec_bcj(_lzma_state * state,PyObject * spec)317 parse_filter_spec_bcj(_lzma_state *state, PyObject *spec)
318 {
319 static char *optnames[] = {"id", "start_offset", NULL};
320 PyObject *id;
321 uint32_t start_offset = 0;
322 lzma_options_bcj *options;
323
324 if (!PyArg_ParseTupleAndKeywords(state->empty_tuple, spec, "|OO&", optnames,
325 &id, uint32_converter, &start_offset)) {
326 PyErr_SetString(PyExc_ValueError,
327 "Invalid filter specifier for BCJ filter");
328 return NULL;
329 }
330
331 options = (lzma_options_bcj *)PyMem_Calloc(1, sizeof *options);
332 if (options == NULL) {
333 return PyErr_NoMemory();
334 }
335 options->start_offset = start_offset;
336 return options;
337 }
338
339 static int
lzma_filter_converter(_lzma_state * state,PyObject * spec,void * ptr)340 lzma_filter_converter(_lzma_state *state, PyObject *spec, void *ptr)
341 {
342 lzma_filter *f = (lzma_filter *)ptr;
343 PyObject *id_obj;
344
345 if (!PyMapping_Check(spec)) {
346 PyErr_SetString(PyExc_TypeError,
347 "Filter specifier must be a dict or dict-like object");
348 return 0;
349 }
350 id_obj = PyMapping_GetItemString(spec, "id");
351 if (id_obj == NULL) {
352 if (PyErr_ExceptionMatches(PyExc_KeyError))
353 PyErr_SetString(PyExc_ValueError,
354 "Filter specifier must have an \"id\" entry");
355 return 0;
356 }
357 f->id = PyLong_AsUnsignedLongLong(id_obj);
358 Py_DECREF(id_obj);
359 if (PyErr_Occurred()) {
360 return 0;
361 }
362
363 switch (f->id) {
364 case LZMA_FILTER_LZMA1:
365 case LZMA_FILTER_LZMA2:
366 f->options = parse_filter_spec_lzma(state, spec);
367 return f->options != NULL;
368 case LZMA_FILTER_DELTA:
369 f->options = parse_filter_spec_delta(state, spec);
370 return f->options != NULL;
371 case LZMA_FILTER_X86:
372 case LZMA_FILTER_POWERPC:
373 case LZMA_FILTER_IA64:
374 case LZMA_FILTER_ARM:
375 case LZMA_FILTER_ARMTHUMB:
376 case LZMA_FILTER_SPARC:
377 f->options = parse_filter_spec_bcj(state, spec);
378 return f->options != NULL;
379 default:
380 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
381 return 0;
382 }
383 }
384
385 static void
free_filter_chain(lzma_filter filters[])386 free_filter_chain(lzma_filter filters[])
387 {
388 for (int i = 0; filters[i].id != LZMA_VLI_UNKNOWN; i++) {
389 PyMem_Free(filters[i].options);
390 }
391 }
392
393 static int
parse_filter_chain_spec(_lzma_state * state,lzma_filter filters[],PyObject * filterspecs)394 parse_filter_chain_spec(_lzma_state *state, lzma_filter filters[], PyObject *filterspecs)
395 {
396 Py_ssize_t i, num_filters;
397
398 num_filters = PySequence_Length(filterspecs);
399 if (num_filters == -1) {
400 return -1;
401 }
402 if (num_filters > LZMA_FILTERS_MAX) {
403 PyErr_Format(PyExc_ValueError,
404 "Too many filters - liblzma supports a maximum of %d",
405 LZMA_FILTERS_MAX);
406 return -1;
407 }
408
409 for (i = 0; i < num_filters; i++) {
410 int ok = 1;
411 PyObject *spec = PySequence_GetItem(filterspecs, i);
412 if (spec == NULL || !lzma_filter_converter(state, spec, &filters[i])) {
413 ok = 0;
414 }
415 Py_XDECREF(spec);
416 if (!ok) {
417 filters[i].id = LZMA_VLI_UNKNOWN;
418 free_filter_chain(filters);
419 return -1;
420 }
421 }
422 filters[num_filters].id = LZMA_VLI_UNKNOWN;
423 return 0;
424 }
425
426
427 /* Filter specifier construction.
428
429 This code handles converting C lzma_filter structs into
430 Python-level filter specifiers (represented as dicts). */
431
432 static int
spec_add_field(PyObject * spec,const char * key,unsigned long long value)433 spec_add_field(PyObject *spec, const char *key, unsigned long long value)
434 {
435 PyObject *value_object = PyLong_FromUnsignedLongLong(value);
436 if (value_object == NULL) {
437 return -1;
438 }
439 PyObject *key_object = PyUnicode_InternFromString(key);
440 if (key_object == NULL) {
441 Py_DECREF(value_object);
442 return -1;
443 }
444 int status = PyDict_SetItem(spec, key_object, value_object);
445 Py_DECREF(key_object);
446 Py_DECREF(value_object);
447 return status;
448 }
449
450 static PyObject *
build_filter_spec(const lzma_filter * f)451 build_filter_spec(const lzma_filter *f)
452 {
453 PyObject *spec;
454
455 spec = PyDict_New();
456 if (spec == NULL) {
457 return NULL;
458 }
459
460 #define ADD_FIELD(SOURCE, FIELD) \
461 do { \
462 if (spec_add_field(spec, #FIELD, SOURCE->FIELD) == -1) \
463 goto error;\
464 } while (0)
465
466 ADD_FIELD(f, id);
467
468 switch (f->id) {
469 /* For LZMA1 filters, lzma_properties_{encode,decode}() only look at the
470 lc, lp, pb, and dict_size fields. For LZMA2 filters, only the
471 dict_size field is used. */
472 case LZMA_FILTER_LZMA1: {
473 lzma_options_lzma *options = f->options;
474 ADD_FIELD(options, lc);
475 ADD_FIELD(options, lp);
476 ADD_FIELD(options, pb);
477 ADD_FIELD(options, dict_size);
478 break;
479 }
480 case LZMA_FILTER_LZMA2: {
481 lzma_options_lzma *options = f->options;
482 ADD_FIELD(options, dict_size);
483 break;
484 }
485 case LZMA_FILTER_DELTA: {
486 lzma_options_delta *options = f->options;
487 ADD_FIELD(options, dist);
488 break;
489 }
490 case LZMA_FILTER_X86:
491 case LZMA_FILTER_POWERPC:
492 case LZMA_FILTER_IA64:
493 case LZMA_FILTER_ARM:
494 case LZMA_FILTER_ARMTHUMB:
495 case LZMA_FILTER_SPARC: {
496 lzma_options_bcj *options = f->options;
497 ADD_FIELD(options, start_offset);
498 break;
499 }
500 default:
501 PyErr_Format(PyExc_ValueError, "Invalid filter ID: %llu", f->id);
502 goto error;
503 }
504
505 #undef ADD_FIELD
506
507 return spec;
508
509 error:
510 Py_DECREF(spec);
511 return NULL;
512 }
513
514
515 /*[clinic input]
516 module _lzma
517 class _lzma.LZMACompressor "Compressor *" "&Compressor_type"
518 class _lzma.LZMADecompressor "Decompressor *" "&Decompressor_type"
519 [clinic start generated code]*/
520 /*[clinic end generated code: output=da39a3ee5e6b4b0d input=2c14bbe05ff0c147]*/
521
522 #include "clinic/_lzmamodule.c.h"
523
524 /*[python input]
525
526 class lzma_vli_converter(CConverter):
527 type = 'lzma_vli'
528 converter = 'lzma_vli_converter'
529
530 class lzma_filter_converter(CConverter):
531 type = 'lzma_filter'
532 converter = 'lzma_filter_converter'
533 c_default = c_ignored_default = "{LZMA_VLI_UNKNOWN, NULL}"
534
535 def cleanup(self):
536 name = ensure_legal_c_identifier(self.name)
537 return ('if (%(name)s.id != LZMA_VLI_UNKNOWN)\n'
538 ' PyMem_Free(%(name)s.options);\n') % {'name': name}
539
540 [python start generated code]*/
541 /*[python end generated code: output=da39a3ee5e6b4b0d input=74fe7631ce377a94]*/
542
543
544 /* LZMACompressor class. */
545
546 static PyObject *
compress(Compressor * c,uint8_t * data,size_t len,lzma_action action)547 compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
548 {
549 PyObject *result;
550 _BlocksOutputBuffer buffer = {.list = NULL};
551 _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
552 assert(state != NULL);
553
554 if (OutputBuffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
555 goto error;
556 }
557 c->lzs.next_in = data;
558 c->lzs.avail_in = len;
559
560 for (;;) {
561 lzma_ret lzret;
562
563 Py_BEGIN_ALLOW_THREADS
564 lzret = lzma_code(&c->lzs, action);
565 Py_END_ALLOW_THREADS
566
567 if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
568 lzret = LZMA_OK; /* That wasn't a real error */
569 }
570 if (catch_lzma_error(state, lzret)) {
571 goto error;
572 }
573 if ((action == LZMA_RUN && c->lzs.avail_in == 0) ||
574 (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
575 break;
576 } else if (c->lzs.avail_out == 0) {
577 if (OutputBuffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
578 goto error;
579 }
580 }
581 }
582
583 result = OutputBuffer_Finish(&buffer, c->lzs.avail_out);
584 if (result != NULL) {
585 return result;
586 }
587
588 error:
589 OutputBuffer_OnError(&buffer);
590 return NULL;
591 }
592
593 /*[clinic input]
594 _lzma.LZMACompressor.compress
595
596 data: Py_buffer
597 /
598
599 Provide data to the compressor object.
600
601 Returns a chunk of compressed data if possible, or b'' otherwise.
602
603 When you have finished providing data to the compressor, call the
604 flush() method to finish the compression process.
605 [clinic start generated code]*/
606
607 static PyObject *
_lzma_LZMACompressor_compress_impl(Compressor * self,Py_buffer * data)608 _lzma_LZMACompressor_compress_impl(Compressor *self, Py_buffer *data)
609 /*[clinic end generated code: output=31f615136963e00f input=64019eac7f2cc8d0]*/
610 {
611 PyObject *result = NULL;
612
613 ACQUIRE_LOCK(self);
614 if (self->flushed) {
615 PyErr_SetString(PyExc_ValueError, "Compressor has been flushed");
616 }
617 else {
618 result = compress(self, data->buf, data->len, LZMA_RUN);
619 }
620 RELEASE_LOCK(self);
621 return result;
622 }
623
624 /*[clinic input]
625 _lzma.LZMACompressor.flush
626
627 Finish the compression process.
628
629 Returns the compressed data left in internal buffers.
630
631 The compressor object may not be used after this method is called.
632 [clinic start generated code]*/
633
634 static PyObject *
_lzma_LZMACompressor_flush_impl(Compressor * self)635 _lzma_LZMACompressor_flush_impl(Compressor *self)
636 /*[clinic end generated code: output=fec21f3e22504f50 input=6b369303f67ad0a8]*/
637 {
638 PyObject *result = NULL;
639
640 ACQUIRE_LOCK(self);
641 if (self->flushed) {
642 PyErr_SetString(PyExc_ValueError, "Repeated call to flush()");
643 } else {
644 self->flushed = 1;
645 result = compress(self, NULL, 0, LZMA_FINISH);
646 }
647 RELEASE_LOCK(self);
648 return result;
649 }
650
651 static int
Compressor_init_xz(_lzma_state * state,lzma_stream * lzs,int check,uint32_t preset,PyObject * filterspecs)652 Compressor_init_xz(_lzma_state *state, lzma_stream *lzs,
653 int check, uint32_t preset, PyObject *filterspecs)
654 {
655 lzma_ret lzret;
656
657 if (filterspecs == Py_None) {
658 lzret = lzma_easy_encoder(lzs, preset, check);
659 } else {
660 lzma_filter filters[LZMA_FILTERS_MAX + 1];
661
662 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
663 return -1;
664 lzret = lzma_stream_encoder(lzs, filters, check);
665 free_filter_chain(filters);
666 }
667 if (catch_lzma_error(state, lzret)) {
668 return -1;
669 }
670 else {
671 return 0;
672 }
673 }
674
675 static int
Compressor_init_alone(_lzma_state * state,lzma_stream * lzs,uint32_t preset,PyObject * filterspecs)676 Compressor_init_alone(_lzma_state *state, lzma_stream *lzs, uint32_t preset, PyObject *filterspecs)
677 {
678 lzma_ret lzret;
679
680 if (filterspecs == Py_None) {
681 lzma_options_lzma options;
682
683 if (lzma_lzma_preset(&options, preset)) {
684 PyErr_Format(state->error, "Invalid compression preset: %u", preset);
685 return -1;
686 }
687 lzret = lzma_alone_encoder(lzs, &options);
688 } else {
689 lzma_filter filters[LZMA_FILTERS_MAX + 1];
690
691 if (parse_filter_chain_spec(state, filters, filterspecs) == -1)
692 return -1;
693 if (filters[0].id == LZMA_FILTER_LZMA1 &&
694 filters[1].id == LZMA_VLI_UNKNOWN) {
695 lzret = lzma_alone_encoder(lzs, filters[0].options);
696 } else {
697 PyErr_SetString(PyExc_ValueError,
698 "Invalid filter chain for FORMAT_ALONE - "
699 "must be a single LZMA1 filter");
700 lzret = LZMA_PROG_ERROR;
701 }
702 free_filter_chain(filters);
703 }
704 if (PyErr_Occurred() || catch_lzma_error(state, lzret)) {
705 return -1;
706 }
707 else {
708 return 0;
709 }
710 }
711
712 static int
Compressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)713 Compressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
714 {
715 lzma_filter filters[LZMA_FILTERS_MAX + 1];
716 lzma_ret lzret;
717
718 if (filterspecs == Py_None) {
719 PyErr_SetString(PyExc_ValueError,
720 "Must specify filters for FORMAT_RAW");
721 return -1;
722 }
723 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
724 return -1;
725 }
726 lzret = lzma_raw_encoder(lzs, filters);
727 free_filter_chain(filters);
728 if (catch_lzma_error(state, lzret)) {
729 return -1;
730 }
731 else {
732 return 0;
733 }
734 }
735
736 /*[-clinic input]
737 _lzma.LZMACompressor.__init__
738
739 format: int(c_default="FORMAT_XZ") = FORMAT_XZ
740 The container format to use for the output. This can
741 be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.
742
743 check: int(c_default="-1") = unspecified
744 The integrity check to use. For FORMAT_XZ, the default
745 is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity
746 checks; for these formats, check must be omitted, or be CHECK_NONE.
747
748 preset: object = None
749 If provided should be an integer in the range 0-9, optionally
750 OR-ed with the constant PRESET_EXTREME.
751
752 filters: object = None
753 If provided should be a sequence of dicts. Each dict should
754 have an entry for "id" indicating the ID of the filter, plus
755 additional entries for options to the filter.
756
757 Create a compressor object for compressing data incrementally.
758
759 The settings used by the compressor can be specified either as a
760 preset compression level (with the 'preset' argument), or in detail
761 as a custom filter chain (with the 'filters' argument). For FORMAT_XZ
762 and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset
763 level. For FORMAT_RAW, the caller must always specify a filter chain;
764 the raw compressor does not support preset compression levels.
765
766 For one-shot compression, use the compress() function instead.
767 [-clinic start generated code]*/
768 static int
Compressor_init(Compressor * self,PyObject * args,PyObject * kwargs)769 Compressor_init(Compressor *self, PyObject *args, PyObject *kwargs)
770 {
771 static char *arg_names[] = {"format", "check", "preset", "filters", NULL};
772 int format = FORMAT_XZ;
773 int check = -1;
774 uint32_t preset = LZMA_PRESET_DEFAULT;
775 PyObject *preset_obj = Py_None;
776 PyObject *filterspecs = Py_None;
777 _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
778 assert(state != NULL);
779 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
780 "|iiOO:LZMACompressor", arg_names,
781 &format, &check, &preset_obj,
782 &filterspecs)) {
783 return -1;
784 }
785
786 if (format != FORMAT_XZ && check != -1 && check != LZMA_CHECK_NONE) {
787 PyErr_SetString(PyExc_ValueError,
788 "Integrity checks are only supported by FORMAT_XZ");
789 return -1;
790 }
791
792 if (preset_obj != Py_None && filterspecs != Py_None) {
793 PyErr_SetString(PyExc_ValueError,
794 "Cannot specify both preset and filter chain");
795 return -1;
796 }
797
798 if (preset_obj != Py_None) {
799 if (!uint32_converter(preset_obj, &preset)) {
800 return -1;
801 }
802 }
803
804 self->alloc.opaque = NULL;
805 self->alloc.alloc = PyLzma_Malloc;
806 self->alloc.free = PyLzma_Free;
807 self->lzs.allocator = &self->alloc;
808
809 self->lock = PyThread_allocate_lock();
810 if (self->lock == NULL) {
811 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
812 return -1;
813 }
814
815 self->flushed = 0;
816 switch (format) {
817 case FORMAT_XZ:
818 if (check == -1) {
819 check = LZMA_CHECK_CRC64;
820 }
821 if (Compressor_init_xz(state, &self->lzs, check, preset, filterspecs) != 0) {
822 break;
823 }
824 return 0;
825
826 case FORMAT_ALONE:
827 if (Compressor_init_alone(state, &self->lzs, preset, filterspecs) != 0) {
828 break;
829 }
830 return 0;
831
832 case FORMAT_RAW:
833 if (Compressor_init_raw(state, &self->lzs, filterspecs) != 0) {
834 break;
835 }
836 return 0;
837
838 default:
839 PyErr_Format(PyExc_ValueError,
840 "Invalid container format: %d", format);
841 break;
842 }
843
844 PyThread_free_lock(self->lock);
845 self->lock = NULL;
846 return -1;
847 }
848
849 static void
Compressor_dealloc(Compressor * self)850 Compressor_dealloc(Compressor *self)
851 {
852 lzma_end(&self->lzs);
853 if (self->lock != NULL) {
854 PyThread_free_lock(self->lock);
855 }
856 PyTypeObject *tp = Py_TYPE(self);
857 tp->tp_free((PyObject *)self);
858 Py_DECREF(tp);
859 }
860
861 static PyMethodDef Compressor_methods[] = {
862 _LZMA_LZMACOMPRESSOR_COMPRESS_METHODDEF
863 _LZMA_LZMACOMPRESSOR_FLUSH_METHODDEF
864 {NULL}
865 };
866
867 static int
Compressor_traverse(Compressor * self,visitproc visit,void * arg)868 Compressor_traverse(Compressor *self, visitproc visit, void *arg)
869 {
870 Py_VISIT(Py_TYPE(self));
871 return 0;
872 }
873
874 PyDoc_STRVAR(Compressor_doc,
875 "LZMACompressor(format=FORMAT_XZ, check=-1, preset=None, filters=None)\n"
876 "\n"
877 "Create a compressor object for compressing data incrementally.\n"
878 "\n"
879 "format specifies the container format to use for the output. This can\n"
880 "be FORMAT_XZ (default), FORMAT_ALONE, or FORMAT_RAW.\n"
881 "\n"
882 "check specifies the integrity check to use. For FORMAT_XZ, the default\n"
883 "is CHECK_CRC64. FORMAT_ALONE and FORMAT_RAW do not support integrity\n"
884 "checks; for these formats, check must be omitted, or be CHECK_NONE.\n"
885 "\n"
886 "The settings used by the compressor can be specified either as a\n"
887 "preset compression level (with the 'preset' argument), or in detail\n"
888 "as a custom filter chain (with the 'filters' argument). For FORMAT_XZ\n"
889 "and FORMAT_ALONE, the default is to use the PRESET_DEFAULT preset\n"
890 "level. For FORMAT_RAW, the caller must always specify a filter chain;\n"
891 "the raw compressor does not support preset compression levels.\n"
892 "\n"
893 "preset (if provided) should be an integer in the range 0-9, optionally\n"
894 "OR-ed with the constant PRESET_EXTREME.\n"
895 "\n"
896 "filters (if provided) should be a sequence of dicts. Each dict should\n"
897 "have an entry for \"id\" indicating the ID of the filter, plus\n"
898 "additional entries for options to the filter.\n"
899 "\n"
900 "For one-shot compression, use the compress() function instead.\n");
901
902 static PyType_Slot lzma_compressor_type_slots[] = {
903 {Py_tp_dealloc, Compressor_dealloc},
904 {Py_tp_methods, Compressor_methods},
905 {Py_tp_init, Compressor_init},
906 {Py_tp_new, PyType_GenericNew},
907 {Py_tp_doc, (char *)Compressor_doc},
908 {Py_tp_traverse, Compressor_traverse},
909 {0, 0}
910 };
911
912 static PyType_Spec lzma_compressor_type_spec = {
913 .name = "_lzma.LZMACompressor",
914 .basicsize = sizeof(Compressor),
915 // Calling PyType_GetModuleState() on a subclass is not safe.
916 // lzma_compressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
917 // which prevents to create a subclass.
918 // So calling PyType_GetModuleState() in this file is always safe.
919 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
920 .slots = lzma_compressor_type_slots,
921 };
922
923 /* LZMADecompressor class. */
924
925 /* Decompress data of length d->lzs.avail_in in d->lzs.next_in. The output
926 buffer is allocated dynamically and returned. At most max_length bytes are
927 returned, so some of the input may not be consumed. d->lzs.next_in and
928 d->lzs.avail_in are updated to reflect the consumed input. */
929 static PyObject*
decompress_buf(Decompressor * d,Py_ssize_t max_length)930 decompress_buf(Decompressor *d, Py_ssize_t max_length)
931 {
932 PyObject *result;
933 lzma_stream *lzs = &d->lzs;
934 _BlocksOutputBuffer buffer = {.list = NULL};
935 _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
936 assert(state != NULL);
937
938 if (OutputBuffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
939 goto error;
940 }
941
942 for (;;) {
943 lzma_ret lzret;
944
945 Py_BEGIN_ALLOW_THREADS
946 lzret = lzma_code(lzs, LZMA_RUN);
947 Py_END_ALLOW_THREADS
948
949 if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
950 lzret = LZMA_OK; /* That wasn't a real error */
951 }
952 if (catch_lzma_error(state, lzret)) {
953 goto error;
954 }
955 if (lzret == LZMA_GET_CHECK || lzret == LZMA_NO_CHECK) {
956 d->check = lzma_get_check(&d->lzs);
957 }
958 if (lzret == LZMA_STREAM_END) {
959 d->eof = 1;
960 break;
961 } else if (lzs->avail_out == 0) {
962 /* Need to check lzs->avail_out before lzs->avail_in.
963 Maybe lzs's internal state still have a few bytes
964 can be output, grow the output buffer and continue
965 if max_lengh < 0. */
966 if (OutputBuffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
967 break;
968 }
969 if (OutputBuffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
970 goto error;
971 }
972 } else if (lzs->avail_in == 0) {
973 break;
974 }
975 }
976
977 result = OutputBuffer_Finish(&buffer, lzs->avail_out);
978 if (result != NULL) {
979 return result;
980 }
981
982 error:
983 OutputBuffer_OnError(&buffer);
984 return NULL;
985 }
986
987 static PyObject *
decompress(Decompressor * d,uint8_t * data,size_t len,Py_ssize_t max_length)988 decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
989 {
990 char input_buffer_in_use;
991 PyObject *result;
992 lzma_stream *lzs = &d->lzs;
993
994 /* Prepend unconsumed input if necessary */
995 if (lzs->next_in != NULL) {
996 size_t avail_now, avail_total;
997
998 /* Number of bytes we can append to input buffer */
999 avail_now = (d->input_buffer + d->input_buffer_size)
1000 - (lzs->next_in + lzs->avail_in);
1001
1002 /* Number of bytes we can append if we move existing
1003 contents to beginning of buffer (overwriting
1004 consumed input) */
1005 avail_total = d->input_buffer_size - lzs->avail_in;
1006
1007 if (avail_total < len) {
1008 size_t offset = lzs->next_in - d->input_buffer;
1009 uint8_t *tmp;
1010 size_t new_size = d->input_buffer_size + len - avail_now;
1011
1012 /* Assign to temporary variable first, so we don't
1013 lose address of allocated buffer if realloc fails */
1014 tmp = PyMem_Realloc(d->input_buffer, new_size);
1015 if (tmp == NULL) {
1016 PyErr_SetNone(PyExc_MemoryError);
1017 return NULL;
1018 }
1019 d->input_buffer = tmp;
1020 d->input_buffer_size = new_size;
1021
1022 lzs->next_in = d->input_buffer + offset;
1023 }
1024 else if (avail_now < len) {
1025 memmove(d->input_buffer, lzs->next_in,
1026 lzs->avail_in);
1027 lzs->next_in = d->input_buffer;
1028 }
1029 memcpy((void*)(lzs->next_in + lzs->avail_in), data, len);
1030 lzs->avail_in += len;
1031 input_buffer_in_use = 1;
1032 }
1033 else {
1034 lzs->next_in = data;
1035 lzs->avail_in = len;
1036 input_buffer_in_use = 0;
1037 }
1038
1039 result = decompress_buf(d, max_length);
1040 if (result == NULL) {
1041 lzs->next_in = NULL;
1042 return NULL;
1043 }
1044
1045 if (d->eof) {
1046 d->needs_input = 0;
1047 if (lzs->avail_in > 0) {
1048 Py_XSETREF(d->unused_data,
1049 PyBytes_FromStringAndSize((char *)lzs->next_in, lzs->avail_in));
1050 if (d->unused_data == NULL) {
1051 goto error;
1052 }
1053 }
1054 }
1055 else if (lzs->avail_in == 0) {
1056 lzs->next_in = NULL;
1057
1058 if (lzs->avail_out == 0) {
1059 /* (avail_in==0 && avail_out==0)
1060 Maybe lzs's internal state still have a few bytes can
1061 be output, try to output them next time. */
1062 d->needs_input = 0;
1063
1064 /* If max_length < 0, lzs->avail_out always > 0 */
1065 assert(max_length >= 0);
1066 } else {
1067 /* Input buffer exhausted, output buffer has space. */
1068 d->needs_input = 1;
1069 }
1070 }
1071 else {
1072 d->needs_input = 0;
1073
1074 /* If we did not use the input buffer, we now have
1075 to copy the tail from the caller's buffer into the
1076 input buffer */
1077 if (!input_buffer_in_use) {
1078
1079 /* Discard buffer if it's too small
1080 (resizing it may needlessly copy the current contents) */
1081 if (d->input_buffer != NULL &&
1082 d->input_buffer_size < lzs->avail_in) {
1083 PyMem_Free(d->input_buffer);
1084 d->input_buffer = NULL;
1085 }
1086
1087 /* Allocate if necessary */
1088 if (d->input_buffer == NULL) {
1089 d->input_buffer = PyMem_Malloc(lzs->avail_in);
1090 if (d->input_buffer == NULL) {
1091 PyErr_SetNone(PyExc_MemoryError);
1092 goto error;
1093 }
1094 d->input_buffer_size = lzs->avail_in;
1095 }
1096
1097 /* Copy tail */
1098 memcpy(d->input_buffer, lzs->next_in, lzs->avail_in);
1099 lzs->next_in = d->input_buffer;
1100 }
1101 }
1102
1103 return result;
1104
1105 error:
1106 Py_XDECREF(result);
1107 return NULL;
1108 }
1109
1110 /*[clinic input]
1111 _lzma.LZMADecompressor.decompress
1112
1113 data: Py_buffer
1114 max_length: Py_ssize_t=-1
1115
1116 Decompress *data*, returning uncompressed data as bytes.
1117
1118 If *max_length* is nonnegative, returns at most *max_length* bytes of
1119 decompressed data. If this limit is reached and further output can be
1120 produced, *self.needs_input* will be set to ``False``. In this case, the next
1121 call to *decompress()* may provide *data* as b'' to obtain more of the output.
1122
1123 If all of the input data was decompressed and returned (either because this
1124 was less than *max_length* bytes, or because *max_length* was negative),
1125 *self.needs_input* will be set to True.
1126
1127 Attempting to decompress data after the end of stream is reached raises an
1128 EOFError. Any data found after the end of the stream is ignored and saved in
1129 the unused_data attribute.
1130 [clinic start generated code]*/
1131
1132 static PyObject *
_lzma_LZMADecompressor_decompress_impl(Decompressor * self,Py_buffer * data,Py_ssize_t max_length)1133 _lzma_LZMADecompressor_decompress_impl(Decompressor *self, Py_buffer *data,
1134 Py_ssize_t max_length)
1135 /*[clinic end generated code: output=ef4e20ec7122241d input=60c1f135820e309d]*/
1136 {
1137 PyObject *result = NULL;
1138
1139 ACQUIRE_LOCK(self);
1140 if (self->eof)
1141 PyErr_SetString(PyExc_EOFError, "Already at end of stream");
1142 else
1143 result = decompress(self, data->buf, data->len, max_length);
1144 RELEASE_LOCK(self);
1145 return result;
1146 }
1147
1148 static int
Decompressor_init_raw(_lzma_state * state,lzma_stream * lzs,PyObject * filterspecs)1149 Decompressor_init_raw(_lzma_state *state, lzma_stream *lzs, PyObject *filterspecs)
1150 {
1151 lzma_filter filters[LZMA_FILTERS_MAX + 1];
1152 lzma_ret lzret;
1153
1154 if (parse_filter_chain_spec(state, filters, filterspecs) == -1) {
1155 return -1;
1156 }
1157 lzret = lzma_raw_decoder(lzs, filters);
1158 free_filter_chain(filters);
1159 if (catch_lzma_error(state, lzret)) {
1160 return -1;
1161 }
1162 else {
1163 return 0;
1164 }
1165 }
1166
1167 /*[clinic input]
1168 _lzma.LZMADecompressor.__init__
1169
1170 format: int(c_default="FORMAT_AUTO") = FORMAT_AUTO
1171 Specifies the container format of the input stream. If this is
1172 FORMAT_AUTO (the default), the decompressor will automatically detect
1173 whether the input is FORMAT_XZ or FORMAT_ALONE. Streams created with
1174 FORMAT_RAW cannot be autodetected.
1175
1176 memlimit: object = None
1177 Limit the amount of memory used by the decompressor. This will cause
1178 decompression to fail if the input cannot be decompressed within the
1179 given limit.
1180
1181 filters: object = None
1182 A custom filter chain. This argument is required for FORMAT_RAW, and
1183 not accepted with any other format. When provided, this should be a
1184 sequence of dicts, each indicating the ID and options for a single
1185 filter.
1186
1187 Create a decompressor object for decompressing data incrementally.
1188
1189 For one-shot decompression, use the decompress() function instead.
1190 [clinic start generated code]*/
1191
1192 static int
_lzma_LZMADecompressor___init___impl(Decompressor * self,int format,PyObject * memlimit,PyObject * filters)1193 _lzma_LZMADecompressor___init___impl(Decompressor *self, int format,
1194 PyObject *memlimit, PyObject *filters)
1195 /*[clinic end generated code: output=3e1821f8aa36564c input=81fe684a6c2f8a27]*/
1196 {
1197 const uint32_t decoder_flags = LZMA_TELL_ANY_CHECK | LZMA_TELL_NO_CHECK;
1198 uint64_t memlimit_ = UINT64_MAX;
1199 lzma_ret lzret;
1200 _lzma_state *state = PyType_GetModuleState(Py_TYPE(self));
1201 assert(state != NULL);
1202
1203 if (memlimit != Py_None) {
1204 if (format == FORMAT_RAW) {
1205 PyErr_SetString(PyExc_ValueError,
1206 "Cannot specify memory limit with FORMAT_RAW");
1207 return -1;
1208 }
1209 memlimit_ = PyLong_AsUnsignedLongLong(memlimit);
1210 if (PyErr_Occurred()) {
1211 return -1;
1212 }
1213 }
1214
1215 if (format == FORMAT_RAW && filters == Py_None) {
1216 PyErr_SetString(PyExc_ValueError,
1217 "Must specify filters for FORMAT_RAW");
1218 return -1;
1219 } else if (format != FORMAT_RAW && filters != Py_None) {
1220 PyErr_SetString(PyExc_ValueError,
1221 "Cannot specify filters except with FORMAT_RAW");
1222 return -1;
1223 }
1224
1225 self->alloc.opaque = NULL;
1226 self->alloc.alloc = PyLzma_Malloc;
1227 self->alloc.free = PyLzma_Free;
1228 self->lzs.allocator = &self->alloc;
1229 self->lzs.next_in = NULL;
1230
1231 PyThread_type_lock lock = PyThread_allocate_lock();
1232 if (lock == NULL) {
1233 PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock");
1234 return -1;
1235 }
1236 if (self->lock != NULL) {
1237 PyThread_free_lock(self->lock);
1238 }
1239 self->lock = lock;
1240
1241 self->check = LZMA_CHECK_UNKNOWN;
1242 self->needs_input = 1;
1243 self->input_buffer = NULL;
1244 self->input_buffer_size = 0;
1245 Py_XSETREF(self->unused_data, PyBytes_FromStringAndSize(NULL, 0));
1246 if (self->unused_data == NULL) {
1247 goto error;
1248 }
1249
1250 switch (format) {
1251 case FORMAT_AUTO:
1252 lzret = lzma_auto_decoder(&self->lzs, memlimit_, decoder_flags);
1253 if (catch_lzma_error(state, lzret)) {
1254 break;
1255 }
1256 return 0;
1257
1258 case FORMAT_XZ:
1259 lzret = lzma_stream_decoder(&self->lzs, memlimit_, decoder_flags);
1260 if (catch_lzma_error(state, lzret)) {
1261 break;
1262 }
1263 return 0;
1264
1265 case FORMAT_ALONE:
1266 self->check = LZMA_CHECK_NONE;
1267 lzret = lzma_alone_decoder(&self->lzs, memlimit_);
1268 if (catch_lzma_error(state, lzret)) {
1269 break;
1270 }
1271 return 0;
1272
1273 case FORMAT_RAW:
1274 self->check = LZMA_CHECK_NONE;
1275 if (Decompressor_init_raw(state, &self->lzs, filters) == -1) {
1276 break;
1277 }
1278 return 0;
1279
1280 default:
1281 PyErr_Format(PyExc_ValueError,
1282 "Invalid container format: %d", format);
1283 break;
1284 }
1285
1286 error:
1287 Py_CLEAR(self->unused_data);
1288 PyThread_free_lock(self->lock);
1289 self->lock = NULL;
1290 return -1;
1291 }
1292
1293 static void
Decompressor_dealloc(Decompressor * self)1294 Decompressor_dealloc(Decompressor *self)
1295 {
1296 if(self->input_buffer != NULL)
1297 PyMem_Free(self->input_buffer);
1298
1299 lzma_end(&self->lzs);
1300 Py_CLEAR(self->unused_data);
1301 if (self->lock != NULL) {
1302 PyThread_free_lock(self->lock);
1303 }
1304 PyTypeObject *tp = Py_TYPE(self);
1305 tp->tp_free((PyObject *)self);
1306 Py_DECREF(tp);
1307 }
1308
1309 static int
Decompressor_traverse(Decompressor * self,visitproc visit,void * arg)1310 Decompressor_traverse(Decompressor *self, visitproc visit, void *arg)
1311 {
1312 Py_VISIT(Py_TYPE(self));
1313 return 0;
1314 }
1315
1316 static PyMethodDef Decompressor_methods[] = {
1317 _LZMA_LZMADECOMPRESSOR_DECOMPRESS_METHODDEF
1318 {NULL}
1319 };
1320
1321 PyDoc_STRVAR(Decompressor_check_doc,
1322 "ID of the integrity check used by the input stream.");
1323
1324 PyDoc_STRVAR(Decompressor_eof_doc,
1325 "True if the end-of-stream marker has been reached.");
1326
1327 PyDoc_STRVAR(Decompressor_needs_input_doc,
1328 "True if more input is needed before more decompressed data can be produced.");
1329
1330 PyDoc_STRVAR(Decompressor_unused_data_doc,
1331 "Data found after the end of the compressed stream.");
1332
1333 static PyMemberDef Decompressor_members[] = {
1334 {"check", T_INT, offsetof(Decompressor, check), READONLY,
1335 Decompressor_check_doc},
1336 {"eof", T_BOOL, offsetof(Decompressor, eof), READONLY,
1337 Decompressor_eof_doc},
1338 {"needs_input", T_BOOL, offsetof(Decompressor, needs_input), READONLY,
1339 Decompressor_needs_input_doc},
1340 {"unused_data", T_OBJECT_EX, offsetof(Decompressor, unused_data), READONLY,
1341 Decompressor_unused_data_doc},
1342 {NULL}
1343 };
1344
1345 static PyType_Slot lzma_decompressor_type_slots[] = {
1346 {Py_tp_dealloc, Decompressor_dealloc},
1347 {Py_tp_methods, Decompressor_methods},
1348 {Py_tp_init, _lzma_LZMADecompressor___init__},
1349 {Py_tp_new, PyType_GenericNew},
1350 {Py_tp_doc, (char *)_lzma_LZMADecompressor___init____doc__},
1351 {Py_tp_traverse, Decompressor_traverse},
1352 {Py_tp_members, Decompressor_members},
1353 {0, 0}
1354 };
1355
1356 static PyType_Spec lzma_decompressor_type_spec = {
1357 .name = "_lzma.LZMADecompressor",
1358 .basicsize = sizeof(Decompressor),
1359 // Calling PyType_GetModuleState() on a subclass is not safe.
1360 // lzma_decompressor_type_spec does not have Py_TPFLAGS_BASETYPE flag
1361 // which prevents to create a subclass.
1362 // So calling PyType_GetModuleState() in this file is always safe.
1363 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE),
1364 .slots = lzma_decompressor_type_slots,
1365 };
1366
1367
1368 /* Module-level functions. */
1369
1370 /*[clinic input]
1371 _lzma.is_check_supported
1372 check_id: int
1373 /
1374
1375 Test whether the given integrity check is supported.
1376
1377 Always returns True for CHECK_NONE and CHECK_CRC32.
1378 [clinic start generated code]*/
1379
1380 static PyObject *
_lzma_is_check_supported_impl(PyObject * module,int check_id)1381 _lzma_is_check_supported_impl(PyObject *module, int check_id)
1382 /*[clinic end generated code: output=e4f14ba3ce2ad0a5 input=5518297b97b2318f]*/
1383 {
1384 return PyBool_FromLong(lzma_check_is_supported(check_id));
1385 }
1386
1387 PyDoc_STRVAR(_lzma__encode_filter_properties__doc__,
1388 "_encode_filter_properties($module, filter, /)\n"
1389 "--\n"
1390 "\n"
1391 "Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).\n"
1392 "\n"
1393 "The result does not include the filter ID itself, only the options.");
1394
1395 #define _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF \
1396 {"_encode_filter_properties", (PyCFunction)_lzma__encode_filter_properties, METH_O, _lzma__encode_filter_properties__doc__},
1397
1398 static PyObject *
1399 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter);
1400
1401 static PyObject *
_lzma__encode_filter_properties(PyObject * module,PyObject * arg)1402 _lzma__encode_filter_properties(PyObject *module, PyObject *arg)
1403 {
1404 PyObject *return_value = NULL;
1405 lzma_filter filter = {LZMA_VLI_UNKNOWN, NULL};
1406 _lzma_state *state = get_lzma_state(module);
1407 assert(state != NULL);
1408 if (!lzma_filter_converter(state, arg, &filter)) {
1409 goto exit;
1410 }
1411 return_value = _lzma__encode_filter_properties_impl(module, filter);
1412
1413 exit:
1414 /* Cleanup for filter */
1415 if (filter.id != LZMA_VLI_UNKNOWN) {
1416 PyMem_Free(filter.options);
1417 }
1418
1419 return return_value;
1420 }
1421
1422 static PyObject *
_lzma__encode_filter_properties_impl(PyObject * module,lzma_filter filter)1423 _lzma__encode_filter_properties_impl(PyObject *module, lzma_filter filter)
1424 {
1425 lzma_ret lzret;
1426 uint32_t encoded_size;
1427 PyObject *result = NULL;
1428 _lzma_state *state = get_lzma_state(module);
1429 assert(state != NULL);
1430
1431 lzret = lzma_properties_size(&encoded_size, &filter);
1432 if (catch_lzma_error(state, lzret))
1433 goto error;
1434
1435 result = PyBytes_FromStringAndSize(NULL, encoded_size);
1436 if (result == NULL)
1437 goto error;
1438
1439 lzret = lzma_properties_encode(
1440 &filter, (uint8_t *)PyBytes_AS_STRING(result));
1441 if (catch_lzma_error(state, lzret)) {
1442 goto error;
1443 }
1444
1445 return result;
1446
1447 error:
1448 Py_XDECREF(result);
1449 return NULL;
1450 }
1451
1452
1453 /*[clinic input]
1454 _lzma._decode_filter_properties
1455 filter_id: lzma_vli
1456 encoded_props: Py_buffer
1457 /
1458
1459 Return a bytes object encoding the options (properties) of the filter specified by *filter* (a dict).
1460
1461 The result does not include the filter ID itself, only the options.
1462 [clinic start generated code]*/
1463
1464 static PyObject *
_lzma__decode_filter_properties_impl(PyObject * module,lzma_vli filter_id,Py_buffer * encoded_props)1465 _lzma__decode_filter_properties_impl(PyObject *module, lzma_vli filter_id,
1466 Py_buffer *encoded_props)
1467 /*[clinic end generated code: output=714fd2ef565d5c60 input=246410800782160c]*/
1468 {
1469 lzma_filter filter;
1470 lzma_ret lzret;
1471 PyObject *result = NULL;
1472 filter.id = filter_id;
1473 _lzma_state *state = get_lzma_state(module);
1474 assert(state != NULL);
1475
1476 lzret = lzma_properties_decode(
1477 &filter, NULL, encoded_props->buf, encoded_props->len);
1478 if (catch_lzma_error(state, lzret)) {
1479 return NULL;
1480 }
1481
1482 result = build_filter_spec(&filter);
1483
1484 /* We use vanilla free() here instead of PyMem_Free() - filter.options was
1485 allocated by lzma_properties_decode() using the default allocator. */
1486 free(filter.options);
1487 return result;
1488 }
1489
1490 /* Some of our constants are more than 32 bits wide, so PyModule_AddIntConstant
1491 would not work correctly on platforms with 32-bit longs. */
1492 static int
module_add_int_constant(PyObject * m,const char * name,long long value)1493 module_add_int_constant(PyObject *m, const char *name, long long value)
1494 {
1495 PyObject *o = PyLong_FromLongLong(value);
1496 if (o == NULL) {
1497 return -1;
1498 }
1499 if (PyModule_AddObject(m, name, o) == 0) {
1500 return 0;
1501 }
1502 Py_DECREF(o);
1503 return -1;
1504 }
1505
1506 static int
lzma_exec(PyObject * module)1507 lzma_exec(PyObject *module)
1508 {
1509 #define ADD_INT_PREFIX_MACRO(module, macro) \
1510 do { \
1511 if (module_add_int_constant(module, #macro, LZMA_ ## macro) < 0) { \
1512 return -1; \
1513 } \
1514 } while(0)
1515
1516 #define ADD_INT_MACRO(module, macro) \
1517 do { \
1518 if (PyModule_AddIntMacro(module, macro) < 0) { \
1519 return -1; \
1520 } \
1521 } while (0)
1522
1523
1524 _lzma_state *state = get_lzma_state(module);
1525
1526 state->empty_tuple = PyTuple_New(0);
1527 if (state->empty_tuple == NULL) {
1528 return -1;
1529 }
1530
1531 ADD_INT_MACRO(module, FORMAT_AUTO);
1532 ADD_INT_MACRO(module, FORMAT_XZ);
1533 ADD_INT_MACRO(module, FORMAT_ALONE);
1534 ADD_INT_MACRO(module, FORMAT_RAW);
1535 ADD_INT_PREFIX_MACRO(module, CHECK_NONE);
1536 ADD_INT_PREFIX_MACRO(module, CHECK_CRC32);
1537 ADD_INT_PREFIX_MACRO(module, CHECK_CRC64);
1538 ADD_INT_PREFIX_MACRO(module, CHECK_SHA256);
1539 ADD_INT_PREFIX_MACRO(module, CHECK_ID_MAX);
1540 ADD_INT_PREFIX_MACRO(module, CHECK_UNKNOWN);
1541 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA1);
1542 ADD_INT_PREFIX_MACRO(module, FILTER_LZMA2);
1543 ADD_INT_PREFIX_MACRO(module, FILTER_DELTA);
1544 ADD_INT_PREFIX_MACRO(module, FILTER_X86);
1545 ADD_INT_PREFIX_MACRO(module, FILTER_IA64);
1546 ADD_INT_PREFIX_MACRO(module, FILTER_ARM);
1547 ADD_INT_PREFIX_MACRO(module, FILTER_ARMTHUMB);
1548 ADD_INT_PREFIX_MACRO(module, FILTER_SPARC);
1549 ADD_INT_PREFIX_MACRO(module, FILTER_POWERPC);
1550 ADD_INT_PREFIX_MACRO(module, MF_HC3);
1551 ADD_INT_PREFIX_MACRO(module, MF_HC4);
1552 ADD_INT_PREFIX_MACRO(module, MF_BT2);
1553 ADD_INT_PREFIX_MACRO(module, MF_BT3);
1554 ADD_INT_PREFIX_MACRO(module, MF_BT4);
1555 ADD_INT_PREFIX_MACRO(module, MODE_FAST);
1556 ADD_INT_PREFIX_MACRO(module, MODE_NORMAL);
1557 ADD_INT_PREFIX_MACRO(module, PRESET_DEFAULT);
1558 ADD_INT_PREFIX_MACRO(module, PRESET_EXTREME);
1559
1560 state->error = PyErr_NewExceptionWithDoc("_lzma.LZMAError", "Call to liblzma failed.", NULL, NULL);
1561 if (state->error == NULL) {
1562 return -1;
1563 }
1564
1565 if (PyModule_AddType(module, (PyTypeObject *)state->error) < 0) {
1566 return -1;
1567 }
1568
1569
1570 state->lzma_compressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1571 &lzma_compressor_type_spec, NULL);
1572 if (state->lzma_compressor_type == NULL) {
1573 return -1;
1574 }
1575
1576 if (PyModule_AddType(module, state->lzma_compressor_type) < 0) {
1577 return -1;
1578 }
1579
1580 state->lzma_decompressor_type = (PyTypeObject *)PyType_FromModuleAndSpec(module,
1581 &lzma_decompressor_type_spec, NULL);
1582 if (state->lzma_decompressor_type == NULL) {
1583 return -1;
1584 }
1585
1586 if (PyModule_AddType(module, state->lzma_decompressor_type) < 0) {
1587 return -1;
1588 }
1589
1590 return 0;
1591 }
1592
1593 static PyMethodDef lzma_methods[] = {
1594 _LZMA_IS_CHECK_SUPPORTED_METHODDEF
1595 _LZMA__ENCODE_FILTER_PROPERTIES_METHODDEF
1596 _LZMA__DECODE_FILTER_PROPERTIES_METHODDEF
1597 {NULL}
1598 };
1599
1600 static PyModuleDef_Slot lzma_slots[] = {
1601 {Py_mod_exec, lzma_exec},
1602 {0, NULL}
1603 };
1604
1605 static int
lzma_traverse(PyObject * module,visitproc visit,void * arg)1606 lzma_traverse(PyObject *module, visitproc visit, void *arg)
1607 {
1608 _lzma_state *state = get_lzma_state(module);
1609 Py_VISIT(state->lzma_compressor_type);
1610 Py_VISIT(state->lzma_decompressor_type);
1611 Py_VISIT(state->error);
1612 Py_VISIT(state->empty_tuple);
1613 return 0;
1614 }
1615
1616 static int
lzma_clear(PyObject * module)1617 lzma_clear(PyObject *module)
1618 {
1619 _lzma_state *state = get_lzma_state(module);
1620 Py_CLEAR(state->lzma_compressor_type);
1621 Py_CLEAR(state->lzma_decompressor_type);
1622 Py_CLEAR(state->error);
1623 Py_CLEAR(state->empty_tuple);
1624 return 0;
1625 }
1626
1627 static void
lzma_free(void * module)1628 lzma_free(void *module)
1629 {
1630 lzma_clear((PyObject *)module);
1631 }
1632
1633 static PyModuleDef _lzmamodule = {
1634 PyModuleDef_HEAD_INIT,
1635 .m_name = "_lzma",
1636 .m_size = sizeof(_lzma_state),
1637 .m_methods = lzma_methods,
1638 .m_slots = lzma_slots,
1639 .m_traverse = lzma_traverse,
1640 .m_clear = lzma_clear,
1641 .m_free = lzma_free,
1642 };
1643
1644 PyMODINIT_FUNC
PyInit__lzma(void)1645 PyInit__lzma(void)
1646 {
1647 return PyModuleDef_Init(&_lzmamodule);
1648 }
1649