1 use crate::{engine::Engine, DecodeError, DecodeSliceError, PAD_BYTE}; 2 use std::{cmp, fmt, io}; 3 4 // This should be large, but it has to fit on the stack. 5 pub(crate) const BUF_SIZE: usize = 1024; 6 7 // 4 bytes of base64 data encode 3 bytes of raw data (modulo padding). 8 const BASE64_CHUNK_SIZE: usize = 4; 9 const DECODED_CHUNK_SIZE: usize = 3; 10 11 /// A `Read` implementation that decodes base64 data read from an underlying reader. 12 /// 13 /// # Examples 14 /// 15 /// ``` 16 /// use std::io::Read; 17 /// use std::io::Cursor; 18 /// use base64::engine::general_purpose; 19 /// 20 /// // use a cursor as the simplest possible `Read` -- in real code this is probably a file, etc. 21 /// let mut wrapped_reader = Cursor::new(b"YXNkZg=="); 22 /// let mut decoder = base64::read::DecoderReader::new( 23 /// &mut wrapped_reader, 24 /// &general_purpose::STANDARD); 25 /// 26 /// // handle errors as you normally would 27 /// let mut result = Vec::new(); 28 /// decoder.read_to_end(&mut result).unwrap(); 29 /// 30 /// assert_eq!(b"asdf", &result[..]); 31 /// 32 /// ``` 33 pub struct DecoderReader<'e, E: Engine, R: io::Read> { 34 engine: &'e E, 35 /// Where b64 data is read from 36 inner: R, 37 38 /// Holds b64 data read from the delegate reader. 39 b64_buffer: [u8; BUF_SIZE], 40 /// The start of the pending buffered data in `b64_buffer`. 41 b64_offset: usize, 42 /// The amount of buffered b64 data after `b64_offset` in `b64_len`. 43 b64_len: usize, 44 /// Since the caller may provide us with a buffer of size 1 or 2 that's too small to copy a 45 /// decoded chunk in to, we have to be able to hang on to a few decoded bytes. 46 /// Technically we only need to hold 2 bytes, but then we'd need a separate temporary buffer to 47 /// decode 3 bytes into and then juggle copying one byte into the provided read buf and the rest 48 /// into here, which seems like a lot of complexity for 1 extra byte of storage. 49 decoded_chunk_buffer: [u8; DECODED_CHUNK_SIZE], 50 /// Index of start of decoded data in `decoded_chunk_buffer` 51 decoded_offset: usize, 52 /// Length of decoded data after `decoded_offset` in `decoded_chunk_buffer` 53 decoded_len: usize, 54 /// Input length consumed so far. 55 /// Used to provide accurate offsets in errors 56 input_consumed_len: usize, 57 /// offset of previously seen padding, if any 58 padding_offset: Option<usize>, 59 } 60 61 // exclude b64_buffer as it's uselessly large 62 impl<'e, E: Engine, R: io::Read> fmt::Debug for DecoderReader<'e, E, R> { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result63 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 64 f.debug_struct("DecoderReader") 65 .field("b64_offset", &self.b64_offset) 66 .field("b64_len", &self.b64_len) 67 .field("decoded_chunk_buffer", &self.decoded_chunk_buffer) 68 .field("decoded_offset", &self.decoded_offset) 69 .field("decoded_len", &self.decoded_len) 70 .field("input_consumed_len", &self.input_consumed_len) 71 .field("padding_offset", &self.padding_offset) 72 .finish() 73 } 74 } 75 76 impl<'e, E: Engine, R: io::Read> DecoderReader<'e, E, R> { 77 /// Create a new decoder that will read from the provided reader `r`. new(reader: R, engine: &'e E) -> Self78 pub fn new(reader: R, engine: &'e E) -> Self { 79 DecoderReader { 80 engine, 81 inner: reader, 82 b64_buffer: [0; BUF_SIZE], 83 b64_offset: 0, 84 b64_len: 0, 85 decoded_chunk_buffer: [0; DECODED_CHUNK_SIZE], 86 decoded_offset: 0, 87 decoded_len: 0, 88 input_consumed_len: 0, 89 padding_offset: None, 90 } 91 } 92 93 /// Write as much as possible of the decoded buffer into the target buffer. 94 /// Must only be called when there is something to write and space to write into. 95 /// Returns a Result with the number of (decoded) bytes copied. flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize>96 fn flush_decoded_buf(&mut self, buf: &mut [u8]) -> io::Result<usize> { 97 debug_assert!(self.decoded_len > 0); 98 debug_assert!(!buf.is_empty()); 99 100 let copy_len = cmp::min(self.decoded_len, buf.len()); 101 debug_assert!(copy_len > 0); 102 debug_assert!(copy_len <= self.decoded_len); 103 104 buf[..copy_len].copy_from_slice( 105 &self.decoded_chunk_buffer[self.decoded_offset..self.decoded_offset + copy_len], 106 ); 107 108 self.decoded_offset += copy_len; 109 self.decoded_len -= copy_len; 110 111 debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); 112 113 Ok(copy_len) 114 } 115 116 /// Read into the remaining space in the buffer after the current contents. 117 /// Must only be called when there is space to read into in the buffer. 118 /// Returns the number of bytes read. read_from_delegate(&mut self) -> io::Result<usize>119 fn read_from_delegate(&mut self) -> io::Result<usize> { 120 debug_assert!(self.b64_offset + self.b64_len < BUF_SIZE); 121 122 let read = self 123 .inner 124 .read(&mut self.b64_buffer[self.b64_offset + self.b64_len..])?; 125 self.b64_len += read; 126 127 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); 128 129 Ok(read) 130 } 131 132 /// Decode the requested number of bytes from the b64 buffer into the provided buffer. It's the 133 /// caller's responsibility to choose the number of b64 bytes to decode correctly. 134 /// 135 /// Returns a Result with the number of decoded bytes written to `buf`. 136 /// 137 /// # Panics 138 /// 139 /// panics if `buf` is too small decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize>140 fn decode_to_buf(&mut self, b64_len_to_decode: usize, buf: &mut [u8]) -> io::Result<usize> { 141 debug_assert!(self.b64_len >= b64_len_to_decode); 142 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); 143 debug_assert!(!buf.is_empty()); 144 145 let b64_to_decode = &self.b64_buffer[self.b64_offset..self.b64_offset + b64_len_to_decode]; 146 let decode_metadata = self 147 .engine 148 .internal_decode( 149 b64_to_decode, 150 buf, 151 self.engine.internal_decoded_len_estimate(b64_len_to_decode), 152 ) 153 .map_err(|dse| match dse { 154 DecodeSliceError::DecodeError(de) => { 155 match de { 156 DecodeError::InvalidByte(offset, byte) => { 157 match (byte, self.padding_offset) { 158 // if there was padding in a previous block of decoding that happened to 159 // be correct, and we now find more padding that happens to be incorrect, 160 // to be consistent with non-reader decodes, record the error at the first 161 // padding 162 (PAD_BYTE, Some(first_pad_offset)) => { 163 DecodeError::InvalidByte(first_pad_offset, PAD_BYTE) 164 } 165 _ => { 166 DecodeError::InvalidByte(self.input_consumed_len + offset, byte) 167 } 168 } 169 } 170 DecodeError::InvalidLength(len) => { 171 DecodeError::InvalidLength(self.input_consumed_len + len) 172 } 173 DecodeError::InvalidLastSymbol(offset, byte) => { 174 DecodeError::InvalidLastSymbol(self.input_consumed_len + offset, byte) 175 } 176 DecodeError::InvalidPadding => DecodeError::InvalidPadding, 177 } 178 } 179 DecodeSliceError::OutputSliceTooSmall => { 180 unreachable!("buf is sized correctly in calling code") 181 } 182 }) 183 .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; 184 185 if let Some(offset) = self.padding_offset { 186 // we've already seen padding 187 if decode_metadata.decoded_len > 0 { 188 // we read more after already finding padding; report error at first padding byte 189 return Err(io::Error::new( 190 io::ErrorKind::InvalidData, 191 DecodeError::InvalidByte(offset, PAD_BYTE), 192 )); 193 } 194 } 195 196 self.padding_offset = self.padding_offset.or(decode_metadata 197 .padding_offset 198 .map(|offset| self.input_consumed_len + offset)); 199 self.input_consumed_len += b64_len_to_decode; 200 self.b64_offset += b64_len_to_decode; 201 self.b64_len -= b64_len_to_decode; 202 203 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); 204 205 Ok(decode_metadata.decoded_len) 206 } 207 208 /// Unwraps this `DecoderReader`, returning the base reader which it reads base64 encoded 209 /// input from. 210 /// 211 /// Because `DecoderReader` performs internal buffering, the state of the inner reader is 212 /// unspecified. This function is mainly provided because the inner reader type may provide 213 /// additional functionality beyond the `Read` implementation which may still be useful. into_inner(self) -> R214 pub fn into_inner(self) -> R { 215 self.inner 216 } 217 } 218 219 impl<'e, E: Engine, R: io::Read> io::Read for DecoderReader<'e, E, R> { 220 /// Decode input from the wrapped reader. 221 /// 222 /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes 223 /// written in `buf`. 224 /// 225 /// Where possible, this function buffers base64 to minimize the number of read() calls to the 226 /// delegate reader. 227 /// 228 /// # Errors 229 /// 230 /// Any errors emitted by the delegate reader are returned. Decoding errors due to invalid 231 /// base64 are also possible, and will have `io::ErrorKind::InvalidData`. read(&mut self, buf: &mut [u8]) -> io::Result<usize>232 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { 233 if buf.is_empty() { 234 return Ok(0); 235 } 236 237 // offset == BUF_SIZE when we copied it all last time 238 debug_assert!(self.b64_offset <= BUF_SIZE); 239 debug_assert!(self.b64_offset + self.b64_len <= BUF_SIZE); 240 debug_assert!(if self.b64_offset == BUF_SIZE { 241 self.b64_len == 0 242 } else { 243 self.b64_len <= BUF_SIZE 244 }); 245 246 debug_assert!(if self.decoded_len == 0 { 247 // can be = when we were able to copy the complete chunk 248 self.decoded_offset <= DECODED_CHUNK_SIZE 249 } else { 250 self.decoded_offset < DECODED_CHUNK_SIZE 251 }); 252 253 // We shouldn't ever decode into decoded_buffer when we can't immediately write at least one 254 // byte into the provided buf, so the effective length should only be 3 momentarily between 255 // when we decode and when we copy into the target buffer. 256 debug_assert!(self.decoded_len < DECODED_CHUNK_SIZE); 257 debug_assert!(self.decoded_len + self.decoded_offset <= DECODED_CHUNK_SIZE); 258 259 if self.decoded_len > 0 { 260 // we have a few leftover decoded bytes; flush that rather than pull in more b64 261 self.flush_decoded_buf(buf) 262 } else { 263 let mut at_eof = false; 264 while self.b64_len < BASE64_CHUNK_SIZE { 265 // Copy any bytes we have to the start of the buffer. 266 self.b64_buffer 267 .copy_within(self.b64_offset..self.b64_offset + self.b64_len, 0); 268 self.b64_offset = 0; 269 270 // then fill in more data 271 let read = self.read_from_delegate()?; 272 if read == 0 { 273 // we never read into an empty buf, so 0 => we've hit EOF 274 at_eof = true; 275 break; 276 } 277 } 278 279 if self.b64_len == 0 { 280 debug_assert!(at_eof); 281 // we must be at EOF, and we have no data left to decode 282 return Ok(0); 283 }; 284 285 debug_assert!(if at_eof { 286 // if we are at eof, we may not have a complete chunk 287 self.b64_len > 0 288 } else { 289 // otherwise, we must have at least one chunk 290 self.b64_len >= BASE64_CHUNK_SIZE 291 }); 292 293 debug_assert_eq!(0, self.decoded_len); 294 295 if buf.len() < DECODED_CHUNK_SIZE { 296 // caller requested an annoyingly short read 297 // have to write to a tmp buf first to avoid double mutable borrow 298 let mut decoded_chunk = [0_u8; DECODED_CHUNK_SIZE]; 299 // if we are at eof, could have less than BASE64_CHUNK_SIZE, in which case we have 300 // to assume that these last few tokens are, in fact, valid (i.e. must be 2-4 b64 301 // tokens, not 1, since 1 token can't decode to 1 byte). 302 let to_decode = cmp::min(self.b64_len, BASE64_CHUNK_SIZE); 303 304 let decoded = self.decode_to_buf(to_decode, &mut decoded_chunk[..])?; 305 self.decoded_chunk_buffer[..decoded].copy_from_slice(&decoded_chunk[..decoded]); 306 307 self.decoded_offset = 0; 308 self.decoded_len = decoded; 309 310 // can be less than 3 on last block due to padding 311 debug_assert!(decoded <= 3); 312 313 self.flush_decoded_buf(buf) 314 } else { 315 let b64_bytes_that_can_decode_into_buf = (buf.len() / DECODED_CHUNK_SIZE) 316 .checked_mul(BASE64_CHUNK_SIZE) 317 .expect("too many chunks"); 318 debug_assert!(b64_bytes_that_can_decode_into_buf >= BASE64_CHUNK_SIZE); 319 320 let b64_bytes_available_to_decode = if at_eof { 321 self.b64_len 322 } else { 323 // only use complete chunks 324 self.b64_len - self.b64_len % 4 325 }; 326 327 let actual_decode_len = cmp::min( 328 b64_bytes_that_can_decode_into_buf, 329 b64_bytes_available_to_decode, 330 ); 331 self.decode_to_buf(actual_decode_len, buf) 332 } 333 } 334 } 335 } 336