1 use std::cmp;
2 use std::io;
3 use std::io::prelude::*;
4 use std::mem;
5 
6 use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
7 use crate::crc::CrcReader;
8 use crate::deflate;
9 use crate::Compression;
10 
copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize11 fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
12     let min = cmp::min(into.len(), from.len() - *pos);
13     for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) {
14         *slot = *val;
15     }
16     *pos += min;
17     min
18 }
19 
20 /// A gzip streaming encoder
21 ///
22 /// This structure implements a [`Read`] interface. When read from, it reads
23 /// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
24 ///
25 /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
26 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
27 ///
28 /// # Examples
29 ///
30 /// ```
31 /// use std::io::prelude::*;
32 /// use std::io;
33 /// use flate2::Compression;
34 /// use flate2::bufread::GzEncoder;
35 /// use std::fs::File;
36 /// use std::io::BufReader;
37 ///
38 /// // Opens sample file, compresses the contents and returns a Vector or error
39 /// // File wrapped in a BufReader implements BufRead
40 ///
41 /// fn open_hello_world() -> io::Result<Vec<u8>> {
42 ///     let f = File::open("examples/hello_world.txt")?;
43 ///     let b = BufReader::new(f);
44 ///     let mut gz = GzEncoder::new(b, Compression::fast());
45 ///     let mut buffer = Vec::new();
46 ///     gz.read_to_end(&mut buffer)?;
47 ///     Ok(buffer)
48 /// }
49 /// ```
50 #[derive(Debug)]
51 pub struct GzEncoder<R> {
52     inner: deflate::bufread::DeflateEncoder<CrcReader<R>>,
53     header: Vec<u8>,
54     pos: usize,
55     eof: bool,
56 }
57 
gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R>58 pub fn gz_encoder<R: BufRead>(header: Vec<u8>, r: R, lvl: Compression) -> GzEncoder<R> {
59     let crc = CrcReader::new(r);
60     GzEncoder {
61         inner: deflate::bufread::DeflateEncoder::new(crc, lvl),
62         header,
63         pos: 0,
64         eof: false,
65     }
66 }
67 
68 impl<R: BufRead> GzEncoder<R> {
69     /// Creates a new encoder which will use the given compression level.
70     ///
71     /// The encoder is not configured specially for the emitted header. For
72     /// header configuration, see the `GzBuilder` type.
73     ///
74     /// The data read from the stream `r` will be compressed and available
75     /// through the returned reader.
new(r: R, level: Compression) -> GzEncoder<R>76     pub fn new(r: R, level: Compression) -> GzEncoder<R> {
77         GzBuilder::new().buf_read(r, level)
78     }
79 
read_footer(&mut self, into: &mut [u8]) -> io::Result<usize>80     fn read_footer(&mut self, into: &mut [u8]) -> io::Result<usize> {
81         if self.pos == 8 {
82             return Ok(0);
83         }
84         let crc = self.inner.get_ref().crc();
85         let ref arr = [
86             (crc.sum() >> 0) as u8,
87             (crc.sum() >> 8) as u8,
88             (crc.sum() >> 16) as u8,
89             (crc.sum() >> 24) as u8,
90             (crc.amount() >> 0) as u8,
91             (crc.amount() >> 8) as u8,
92             (crc.amount() >> 16) as u8,
93             (crc.amount() >> 24) as u8,
94         ];
95         Ok(copy(into, arr, &mut self.pos))
96     }
97 }
98 
99 impl<R> GzEncoder<R> {
100     /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R101     pub fn get_ref(&self) -> &R {
102         self.inner.get_ref().get_ref()
103     }
104 
105     /// Acquires a mutable reference to the underlying reader.
106     ///
107     /// Note that mutation of the reader may result in surprising results if
108     /// this encoder is continued to be used.
get_mut(&mut self) -> &mut R109     pub fn get_mut(&mut self) -> &mut R {
110         self.inner.get_mut().get_mut()
111     }
112 
113     /// Returns the underlying stream, consuming this encoder
into_inner(self) -> R114     pub fn into_inner(self) -> R {
115         self.inner.into_inner().into_inner()
116     }
117 }
118 
119 #[inline]
finish(buf: &[u8; 8]) -> (u32, u32)120 fn finish(buf: &[u8; 8]) -> (u32, u32) {
121     let crc = ((buf[0] as u32) << 0)
122         | ((buf[1] as u32) << 8)
123         | ((buf[2] as u32) << 16)
124         | ((buf[3] as u32) << 24);
125     let amt = ((buf[4] as u32) << 0)
126         | ((buf[5] as u32) << 8)
127         | ((buf[6] as u32) << 16)
128         | ((buf[7] as u32) << 24);
129     (crc, amt)
130 }
131 
132 impl<R: BufRead> Read for GzEncoder<R> {
read(&mut self, mut into: &mut [u8]) -> io::Result<usize>133     fn read(&mut self, mut into: &mut [u8]) -> io::Result<usize> {
134         let mut amt = 0;
135         if self.eof {
136             return self.read_footer(into);
137         } else if self.pos < self.header.len() {
138             amt += copy(into, &self.header, &mut self.pos);
139             if amt == into.len() {
140                 return Ok(amt);
141             }
142             let tmp = into;
143             into = &mut tmp[amt..];
144         }
145         match self.inner.read(into)? {
146             0 => {
147                 self.eof = true;
148                 self.pos = 0;
149                 self.read_footer(into)
150             }
151             n => Ok(amt + n),
152         }
153     }
154 }
155 
156 impl<R: BufRead + Write> Write for GzEncoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>157     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
158         self.get_mut().write(buf)
159     }
160 
flush(&mut self) -> io::Result<()>161     fn flush(&mut self) -> io::Result<()> {
162         self.get_mut().flush()
163     }
164 }
165 
166 /// A decoder for a single member of a [gzip file].
167 ///
168 /// This structure implements a [`Read`] interface. When read from, it reads
169 /// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
170 ///
171 /// After reading a single member of the gzip data this reader will return
172 /// Ok(0) even if there are more bytes available in the underlying reader.
173 /// If you need the following bytes, call `into_inner()` after Ok(0) to
174 /// recover the underlying reader.
175 ///
176 /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
177 /// or read more
178 /// [in the introduction](../index.html#about-multi-member-gzip-files).
179 ///
180 /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
181 /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
182 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
183 ///
184 /// # Examples
185 ///
186 /// ```
187 /// use std::io::prelude::*;
188 /// use std::io;
189 /// # use flate2::Compression;
190 /// # use flate2::write::GzEncoder;
191 /// use flate2::bufread::GzDecoder;
192 ///
193 /// # fn main() {
194 /// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
195 /// #   e.write_all(b"Hello World").unwrap();
196 /// #   let bytes = e.finish().unwrap();
197 /// #   println!("{}", decode_reader(bytes).unwrap());
198 /// # }
199 /// #
200 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
201 /// // Here &[u8] implements BufRead
202 ///
203 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
204 ///    let mut gz = GzDecoder::new(&bytes[..]);
205 ///    let mut s = String::new();
206 ///    gz.read_to_string(&mut s)?;
207 ///    Ok(s)
208 /// }
209 /// ```
210 #[derive(Debug)]
211 pub struct GzDecoder<R> {
212     state: GzState,
213     reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
214     multi: bool,
215 }
216 
217 #[derive(Debug)]
218 enum GzState {
219     Header(GzHeaderParser),
220     Body(GzHeader),
221     Finished(GzHeader, usize, [u8; 8]),
222     Err(io::Error),
223     End(Option<GzHeader>),
224 }
225 
226 impl<R: BufRead> GzDecoder<R> {
227     /// Creates a new decoder from the given reader, immediately parsing the
228     /// gzip header.
new(mut r: R) -> GzDecoder<R>229     pub fn new(mut r: R) -> GzDecoder<R> {
230         let mut header_parser = GzHeaderParser::new();
231 
232         let state = match header_parser.parse(&mut r) {
233             Ok(_) => GzState::Body(GzHeader::from(header_parser)),
234             Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
235                 GzState::Header(header_parser)
236             }
237             Err(err) => GzState::Err(err),
238         };
239 
240         GzDecoder {
241             state,
242             reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
243             multi: false,
244         }
245     }
246 
multi(mut self, flag: bool) -> GzDecoder<R>247     fn multi(mut self, flag: bool) -> GzDecoder<R> {
248         self.multi = flag;
249         self
250     }
251 }
252 
253 impl<R> GzDecoder<R> {
254     /// Returns the header associated with this stream, if it was valid
header(&self) -> Option<&GzHeader>255     pub fn header(&self) -> Option<&GzHeader> {
256         match &self.state {
257             GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
258             GzState::End(header) => header.as_ref(),
259             _ => None,
260         }
261     }
262 
263     /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R264     pub fn get_ref(&self) -> &R {
265         self.reader.get_ref().get_ref()
266     }
267 
268     /// Acquires a mutable reference to the underlying stream.
269     ///
270     /// Note that mutation of the stream may result in surprising results if
271     /// this decoder is continued to be used.
get_mut(&mut self) -> &mut R272     pub fn get_mut(&mut self) -> &mut R {
273         self.reader.get_mut().get_mut()
274     }
275 
276     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R277     pub fn into_inner(self) -> R {
278         self.reader.into_inner().into_inner()
279     }
280 }
281 
282 impl<R: BufRead> Read for GzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>283     fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
284         loop {
285             match &mut self.state {
286                 GzState::Header(parser) => {
287                     parser.parse(self.reader.get_mut().get_mut())?;
288                     self.state = GzState::Body(GzHeader::from(mem::take(parser)));
289                 }
290                 GzState::Body(header) => {
291                     if into.is_empty() {
292                         return Ok(0);
293                     }
294                     match self.reader.read(into)? {
295                         0 => {
296                             self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
297                         }
298                         n => {
299                             return Ok(n);
300                         }
301                     }
302                 }
303                 GzState::Finished(header, pos, buf) => {
304                     if *pos < buf.len() {
305                         *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
306                     } else {
307                         let (crc, amt) = finish(&buf);
308 
309                         if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
310                             self.state = GzState::End(Some(mem::take(header)));
311                             return Err(corrupt());
312                         } else if self.multi {
313                             let is_eof = self
314                                 .reader
315                                 .get_mut()
316                                 .get_mut()
317                                 .fill_buf()
318                                 .map(|buf| buf.is_empty())?;
319 
320                             if is_eof {
321                                 self.state = GzState::End(Some(mem::take(header)));
322                             } else {
323                                 self.reader.reset();
324                                 self.reader.get_mut().reset_data();
325                                 self.state = GzState::Header(GzHeaderParser::new())
326                             }
327                         } else {
328                             self.state = GzState::End(Some(mem::take(header)));
329                         }
330                     }
331                 }
332                 GzState::Err(err) => {
333                     let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
334                     self.state = GzState::End(None);
335                     return result;
336                 }
337                 GzState::End(_) => return Ok(0),
338             }
339         }
340     }
341 }
342 
343 impl<R: BufRead + Write> Write for GzDecoder<R> {
write(&mut self, buf: &[u8]) -> io::Result<usize>344     fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
345         self.get_mut().write(buf)
346     }
347 
flush(&mut self) -> io::Result<()>348     fn flush(&mut self) -> io::Result<()> {
349         self.get_mut().flush()
350     }
351 }
352 
353 /// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
354 ///
355 /// This structure implements a [`Read`] interface. When read from, it reads
356 /// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
357 ///
358 /// A gzip file consists of a series of *members* concatenated one after another.
359 /// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the
360 /// underlying reader does. For a file, this reads to the end of the file.
361 ///
362 /// To handle members seperately, see [GzDecoder] or read more
363 /// [in the introduction](../index.html#about-multi-member-gzip-files).
364 ///
365 /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
366 /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
367 /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
368 ///
369 /// # Examples
370 ///
371 /// ```
372 /// use std::io::prelude::*;
373 /// use std::io;
374 /// # use flate2::Compression;
375 /// # use flate2::write::GzEncoder;
376 /// use flate2::bufread::MultiGzDecoder;
377 ///
378 /// # fn main() {
379 /// #   let mut e = GzEncoder::new(Vec::new(), Compression::default());
380 /// #   e.write_all(b"Hello World").unwrap();
381 /// #   let bytes = e.finish().unwrap();
382 /// #   println!("{}", decode_reader(bytes).unwrap());
383 /// # }
384 /// #
385 /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error
386 /// // Here &[u8] implements BufRead
387 ///
388 /// fn decode_reader(bytes: Vec<u8>) -> io::Result<String> {
389 ///    let mut gz = MultiGzDecoder::new(&bytes[..]);
390 ///    let mut s = String::new();
391 ///    gz.read_to_string(&mut s)?;
392 ///    Ok(s)
393 /// }
394 /// ```
395 #[derive(Debug)]
396 pub struct MultiGzDecoder<R>(GzDecoder<R>);
397 
398 impl<R: BufRead> MultiGzDecoder<R> {
399     /// Creates a new decoder from the given reader, immediately parsing the
400     /// (first) gzip header. If the gzip stream contains multiple members all will
401     /// be decoded.
new(r: R) -> MultiGzDecoder<R>402     pub fn new(r: R) -> MultiGzDecoder<R> {
403         MultiGzDecoder(GzDecoder::new(r).multi(true))
404     }
405 }
406 
407 impl<R> MultiGzDecoder<R> {
408     /// Returns the current header associated with this stream, if it's valid
header(&self) -> Option<&GzHeader>409     pub fn header(&self) -> Option<&GzHeader> {
410         self.0.header()
411     }
412 
413     /// Acquires a reference to the underlying reader.
get_ref(&self) -> &R414     pub fn get_ref(&self) -> &R {
415         self.0.get_ref()
416     }
417 
418     /// Acquires a mutable reference to the underlying stream.
419     ///
420     /// Note that mutation of the stream may result in surprising results if
421     /// this decoder is continued to be used.
get_mut(&mut self) -> &mut R422     pub fn get_mut(&mut self) -> &mut R {
423         self.0.get_mut()
424     }
425 
426     /// Consumes this decoder, returning the underlying reader.
into_inner(self) -> R427     pub fn into_inner(self) -> R {
428         self.0.into_inner()
429     }
430 }
431 
432 impl<R: BufRead> Read for MultiGzDecoder<R> {
read(&mut self, into: &mut [u8]) -> io::Result<usize>433     fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
434         self.0.read(into)
435     }
436 }
437 
438 #[cfg(test)]
439 mod test {
440     use crate::bufread::GzDecoder;
441     use crate::gz::write;
442     use crate::Compression;
443     use std::io::{Read, Write};
444 
445     // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
446     // additional data to be consumed by the caller.
447     #[test]
decode_extra_data()448     fn decode_extra_data() {
449         let expected = "Hello World";
450 
451         let compressed = {
452             let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
453             e.write(expected.as_ref()).unwrap();
454             let mut b = e.finish().unwrap();
455             b.push(b'x');
456             b
457         };
458 
459         let mut output = Vec::new();
460         let mut decoder = GzDecoder::new(compressed.as_slice());
461         let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
462         assert_eq!(decoded_bytes, output.len());
463         let actual = std::str::from_utf8(&output).expect("String parsing error");
464         assert_eq!(
465             actual, expected,
466             "after decompression we obtain the original input"
467         );
468 
469         output.clear();
470         assert_eq!(
471             decoder.read(&mut output).unwrap(),
472             0,
473             "subsequent read of decoder returns 0, but inner reader can return additional data"
474         );
475         let mut reader = decoder.into_inner();
476         assert_eq!(
477             reader.read_to_end(&mut output).unwrap(),
478             1,
479             "extra data is accessible in underlying buf-read"
480         );
481         assert_eq!(output, b"x");
482     }
483 }
484