1 use crate::{engine::Engine, DecodeError, DecodeSliceError, PAD_BYTE};
2 use std::{cmp, fmt, io};
3 
4 // This should be large, but it has to fit on the stack.
5 pub(crate) const BUF_SIZE: usize = 1024;
6 
7 // 4 bytes of base64 data encode 3 bytes of raw data (modulo padding).
8 const BASE64_CHUNK_SIZE: usize = 4;
9 const DECODED_CHUNK_SIZE: usize = 3;
10 
11 /// A `Read` implementation that decodes base64 data read from an underlying reader.
12 ///
13 /// # Examples
14 ///
15 /// ```
16 /// use std::io::Read;
17 /// use std::io::Cursor;
18 /// use base64::engine::general_purpose;
19 ///
20 /// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc.
21 /// let mut wrapped_reader = Cursor::new(b"YXNkZg==");
22 /// let mut decoder = base64::read::DecoderReader::new(
23 ///     &mut wrapped_reader,
24 ///     &general_purpose::STANDARD);
25 ///
26 /// // handle errors as you normally would
27 /// let mut result = Vec::new();
28 /// decoder.read_to_end(&mut result).unwrap();
29 ///
30 /// assert_eq!(b"asdf", &result[..]);
31 ///
32 /// ```
33 pub struct DecoderReader<'e, E: Engine, R: io::Read> {
34     engine: &'e E,
35     /// Where b64 data is read from
36     inner: R,
37 
38     /// Holds b64 data read from the delegate reader.
39     b64_buffer: [u8; BUF_SIZE],
40     /// The start of the pending buffered data in `b64_buffer`.
41     b64_offset: usize,
42     /// The amount of buffered b64 data after `b64_offset` in `b64_len`.
43     b64_len: usize,
44     /// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a
45     /// decoded chunk in to, we have to be able to hang on to a few decoded bytes.
46     /// Technically we only need to hold 2 bytes, but then we'd need a separate temporary buffer to
47     /// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest
48     /// into here, which seems like a lot of complexity for 1 extra byte of storage.
49     decoded_chunk_buffer: [u8; DECODED_CHUNK_SIZE],
50     /// Index of start of decoded data in `decoded_chunk_buffer`
51     decoded_offset: usize,
52     /// Length of decoded data after `decoded_offset` in `decoded_chunk_buffer`
53     decoded_len: usize,
54     /// Input length consumed so far.
55     /// Used to provide accurate offsets in errors
56     input_consumed_len: usize,
57     /// offset of previously seen padding, if any
58     padding_offset: Option<usize>,
59 }
60 
61 // exclude b64_buffer as it's uselessly large
62 impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result63     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
64         f.debug_struct("DecoderReader")
65             .field("b64_offset", &self.b64_offset)
66             .field("b64_len", &self.b64_len)
67             .field("decoded_chunk_buffer", &self.decoded_chunk_buffer)
68             .field("decoded_offset", &self.decoded_offset)
69             .field("decoded_len", &self.decoded_len)
70             .field("input_consumed_len", &self.input_consumed_len)
71             .field("padding_offset", &self.padding_offset)
72             .finish()
73     }
74 }
75 
76 impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> {
77     /// Create a new decoder that will read from the provided reader `r`.
new(reader: R, engine: &'e E) -> Self78     pub fn new(reader: R, engine: &'e E) -> Self {
79         DecoderReader {
80             engine,
81             inner: reader,
82             b64_buffer: [0; BUF_SIZE],
83             b64_offset: 0,
84             b64_len: 0,
85             decoded_chunk_buffer: [0; DECODED_CHUNK_SIZE],
86             decoded_offset: 0,
87             decoded_len: 0,
88             input_consumed_len: 0,
89             padding_offset: None,
90         }
91     }
92 
93     /// Write as much as possible of the decoded buffer into the target buffer.
94     /// Must only be called when there is something to write and space to write into.
95     /// Returns a Result with the number of (decoded) bytes copied.
flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize>96     fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> {
97         debug_assert!(self.decoded_len > 0);
98         debug_assert!(!buf.is_empty());
99 
100         let copy_len = cmp::min(self.decoded_len, buf.len());
101         debug_assert!(copy_len > 0);
102         debug_assert!(copy_len <= self.decoded_len);
103 
104         buf[..copy_len].copy_from_slice(
105             &self.decoded_chunk_buffer[self.decoded_offset..self.decoded_offset + copy_len],
106         );
107 
108         self.decoded_offset += copy_len;
109         self.decoded_len -= copy_len;
110 
111         debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
112 
113         Ok(copy_len)
114     }
115 
116     /// Read into the remaining space in the buffer after the current contents.
117     /// Must only be called when there is space to read into in the buffer.
118     /// Returns the number of bytes read.
read_from_delegate(&mut self) -> io::Result<usize>119     fn read_from_delegate(&mut self) -> io::Result<usize> {
120         debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE);
121 
122         let read = self
123             .inner
124             .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?;
125         self.b64_len += read;
126 
127         debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
128 
129         Ok(read)
130     }
131 
132     /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the
133     /// caller's responsibility to choose the number of b64 bytes to decode correctly.
134     ///
135     /// Returns a Result with the number of decoded bytes written to `buf`.
136     ///
137     /// # Panics
138     ///
139     /// panics if `buf` is too small
decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize>140     fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> {
141         debug_assert!(self.b64_len >= b64_len_to_decode);
142         debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
143         debug_assert!(!buf.is_empty());
144 
145         let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode];
146         let decode_metadata = self
147             .engine
148             .internal_decode(
149                 b64_to_decode,
150                 buf,
151                 self.engine.internal_decoded_len_estimate(b64_len_to_decode),
152             )
153             .map_err(|dse| match dse {
154                 DecodeSliceError::DecodeError(de) => {
155                     match de {
156                         DecodeError::InvalidByte(offset, byte) => {
157                             match (byte, self.padding_offset) {
158                                 // if there was padding in a previous block of decoding that happened to
159                                 // be correct, and we now find more padding that happens to be incorrect,
160                                 // to be consistent with non-reader decodes, record the error at the first
161                                 // padding
162                                 (PAD_BYTE, Some(first_pad_offset)) => {
163                                     DecodeError::InvalidByte(first_pad_offset, PAD_BYTE)
164                                 }
165                                 _ => {
166                                     DecodeError::InvalidByte(self.input_consumed_len + offset, byte)
167                                 }
168                             }
169                         }
170                         DecodeError::InvalidLength(len) => {
171                             DecodeError::InvalidLength(self.input_consumed_len + len)
172                         }
173                         DecodeError::InvalidLastSymbol(offset, byte) => {
174                             DecodeError::InvalidLastSymbol(self.input_consumed_len + offset, byte)
175                         }
176                         DecodeError::InvalidPadding => DecodeError::InvalidPadding,
177                     }
178                 }
179                 DecodeSliceError::OutputSliceTooSmall => {
180                     unreachable!("buf is sized correctly in calling code")
181                 }
182             })
183             .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?;
184 
185         if let Some(offset) = self.padding_offset {
186             // we've already seen padding
187             if decode_metadata.decoded_len > 0 {
188                 // we read more after already finding padding; report error at first padding byte
189                 return Err(io::Error::new(
190                     io::ErrorKind::InvalidData,
191                     DecodeError::InvalidByte(offset, PAD_BYTE),
192                 ));
193             }
194         }
195 
196         self.padding_offset = self.padding_offset.or(decode_metadata
197             .padding_offset
198             .map(|offset| self.input_consumed_len + offset));
199         self.input_consumed_len += b64_len_to_decode;
200         self.b64_offset += b64_len_to_decode;
201         self.b64_len -= b64_len_to_decode;
202 
203         debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
204 
205         Ok(decode_metadata.decoded_len)
206     }
207 
208     /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded
209     /// input from.
210     ///
211     /// Because `DecoderReader` performs internal buffering, the state of the inner reader is
212     /// unspecified. This function is mainly provided because the inner reader type may provide
213     /// additional functionality beyond the `Read` implementation which may still be useful.
into_inner(self) -> R214     pub fn into_inner(self) -> R {
215         self.inner
216     }
217 }
218 
219 impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> {
220     /// Decode input from the wrapped reader.
221     ///
222     /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
223     /// written in `buf`.
224     ///
225     /// Where possible, this function buffers base64 to minimize the number of read() calls to the
226     /// delegate reader.
227     ///
228     /// # Errors
229     ///
230     /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid
231     /// base64 are also possible, and will have `io::ErrorKind::InvalidData`.
read(&mut self, buf: &mut [u8]) -> io::Result<usize>232     fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
233         if buf.is_empty() {
234             return Ok(0);
235         }
236 
237         // offset == BUF_SIZE when we copied it all last time
238         debug_assert!(self.b64_offset <= BUF_SIZE);
239         debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE);
240         debug_assert!(if self.b64_offset == BUF_SIZE {
241             self.b64_len == 0
242         } else {
243             self.b64_len <= BUF_SIZE
244         });
245 
246         debug_assert!(if self.decoded_len == 0 {
247             // can be = when we were able to copy the complete chunk
248             self.decoded_offset <= DECODED_CHUNK_SIZE
249         } else {
250             self.decoded_offset < DECODED_CHUNK_SIZE
251         });
252 
253         // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one
254         // byte into the provided buf, so the effective length should only be 3 momentarily between
255         // when we decode and when we copy into the target buffer.
256         debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE);
257         debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE);
258 
259         if self.decoded_len > 0 {
260             // we have a few leftover decoded bytes; flush that rather than pull in more b64
261             self.flush_decoded_buf(buf)
262         } else {
263             let mut at_eof = false;
264             while self.b64_len < BASE64_CHUNK_SIZE {
265                 // Copy any bytes we have to the start of the buffer.
266                 self.b64_buffer
267                     .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0);
268                 self.b64_offset = 0;
269 
270                 // then fill in more data
271                 let read = self.read_from_delegate()?;
272                 if read == 0 {
273                     // we never read into an empty buf, so 0 => we've hit EOF
274                     at_eof = true;
275                     break;
276                 }
277             }
278 
279             if self.b64_len == 0 {
280                 debug_assert!(at_eof);
281                 // we must be at EOF, and we have no data left to decode
282                 return Ok(0);
283             };
284 
285             debug_assert!(if at_eof {
286                 // if we are at eof, we may not have a complete chunk
287                 self.b64_len > 0
288             } else {
289                 // otherwise, we must have at least one chunk
290                 self.b64_len >= BASE64_CHUNK_SIZE
291             });
292 
293             debug_assert_eq!(0, self.decoded_len);
294 
295             if buf.len() < DECODED_CHUNK_SIZE {
296                 // caller requested an annoyingly short read
297                 // have to write to a tmp buf first to avoid double mutable borrow
298                 let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE];
299                 // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have
300                 // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64
301                 // tokens, not 1, since 1 token can't decode to 1 byte).
302                 let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE);
303 
304                 let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?;
305                 self.decoded_chunk_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]);
306 
307                 self.decoded_offset = 0;
308                 self.decoded_len = decoded;
309 
310                 // can be less than 3 on last block due to padding
311                 debug_assert!(decoded <= 3);
312 
313                 self.flush_decoded_buf(buf)
314             } else {
315                 let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE)
316                     .checked_mul(BASE64_CHUNK_SIZE)
317                     .expect("too many chunks");
318                 debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE);
319 
320                 let b64_bytes_available_to_decode = if at_eof {
321                     self.b64_len
322                 } else {
323                     // only use complete chunks
324                     self.b64_len - self.b64_len % 4
325                 };
326 
327                 let actual_decode_len = cmp::min(
328                     b64_bytes_that_can_decode_into_buf,
329                     b64_bytes_available_to_decode,
330                 );
331                 self.decode_to_buf(actual_decode_len, buf)
332             }
333         }
334     }
335 }
336