1 use crate::engine::Engine; 2 use std::{ 3 cmp, fmt, io, 4 io::{ErrorKind, Result}, 5 }; 6 7 pub(crate) const BUF_SIZE: usize = 1024; 8 /// The most bytes whose encoding will fit in `BUF_SIZE` 9 const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3; 10 // 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping) 11 const MIN_ENCODE_CHUNK_SIZE: usize = 3; 12 13 /// A `Write` implementation that base64 encodes data before delegating to the wrapped writer. 14 /// 15 /// Because base64 has special handling for the end of the input data (padding, etc), there's a 16 /// `finish()` method on this type that encodes any leftover input bytes and adds padding if 17 /// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but 18 /// any error that occurs when invoking the underlying writer will be suppressed. If you want to 19 /// handle such errors, call `finish()` yourself. 20 /// 21 /// # Examples 22 /// 23 /// ``` 24 /// use std::io::Write; 25 /// use base64::engine::general_purpose; 26 /// 27 /// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc. 28 /// let mut enc = base64::write::EncoderWriter::new(Vec::new(), &general_purpose::STANDARD); 29 /// 30 /// // handle errors as you normally would 31 /// enc.write_all(b"asdf").unwrap(); 32 /// 33 /// // could leave this out to be called by Drop, if you don't care 34 /// // about handling errors or getting the delegate writer back 35 /// let delegate = enc.finish().unwrap(); 36 /// 37 /// // base64 was written to the writer 38 /// assert_eq!(b"YXNkZg==", &delegate[..]); 39 /// 40 /// ``` 41 /// 42 /// # Panics 43 /// 44 /// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without 45 /// error is invalid and will panic. 46 /// 47 /// # Errors 48 /// 49 /// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be 50 /// returned as per the contract of `Write`. 51 /// 52 /// # Performance 53 /// 54 /// It has some minor performance loss compared to encoding slices (a couple percent). 55 /// It does not do any heap allocation. 56 /// 57 /// # Limitations 58 /// 59 /// Owing to the specification of the `write` and `flush` methods on the `Write` trait and their 60 /// implications for a buffering implementation, these methods may not behave as expected. In 61 /// particular, calling `write_all` on this interface may fail with `io::ErrorKind::WriteZero`. 62 /// See the documentation of the `Write` trait implementation for further details. 63 pub struct EncoderWriter<'e, E: Engine, W: io::Write> { 64 engine: &'e E, 65 /// Where encoded data is written to. It's an Option as it's None immediately before Drop is 66 /// called so that finish() can return the underlying writer. None implies that finish() has 67 /// been called successfully. 68 delegate: Option<W>, 69 /// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk 70 /// with the next `write()`, encode it, then proceed with the rest of the input normally. 71 extra_input: [u8; MIN_ENCODE_CHUNK_SIZE], 72 /// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`. 73 extra_input_occupied_len: usize, 74 /// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer 75 /// did not write last time. 76 output: [u8; BUF_SIZE], 77 /// How much of `output` is occupied with encoded data that couldn't be written last time 78 output_occupied_len: usize, 79 /// panic safety: don't write again in destructor if writer panicked while we were writing to it 80 panicked: bool, 81 } 82 83 impl<'e, E: Engine, W: io::Write> fmt::Debug for EncoderWriter<'e, E, W> { fmt(&self, f: &mut fmt::Formatter) -> fmt::Result84 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 85 write!( 86 f, 87 "extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}", 88 self.extra_input, 89 self.extra_input_occupied_len, 90 &self.output[0..5], 91 self.output_occupied_len 92 ) 93 } 94 } 95 96 impl<'e, E: Engine, W: io::Write> EncoderWriter<'e, E, W> { 97 /// Create a new encoder that will write to the provided delegate writer. new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W>98 pub fn new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W> { 99 EncoderWriter { 100 engine, 101 delegate: Some(delegate), 102 extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE], 103 extra_input_occupied_len: 0, 104 output: [0u8; BUF_SIZE], 105 output_occupied_len: 0, 106 panicked: false, 107 } 108 } 109 110 /// Encode all remaining buffered data and write it, including any trailing incomplete input 111 /// triples and associated padding. 112 /// 113 /// Once this succeeds, no further writes or calls to this method are allowed. 114 /// 115 /// This may write to the delegate writer multiple times if the delegate writer does not accept 116 /// all input provided to its `write` each invocation. 117 /// 118 /// If you don't care about error handling, it is not necessary to call this function, as the 119 /// equivalent finalization is done by the Drop impl. 120 /// 121 /// Returns the writer that this was constructed around. 122 /// 123 /// # Errors 124 /// 125 /// The first error that is not of `ErrorKind::Interrupted` will be returned. finish(&mut self) -> Result<W>126 pub fn finish(&mut self) -> Result<W> { 127 // If we could consume self in finish(), we wouldn't have to worry about this case, but 128 // finish() is retryable in the face of I/O errors, so we can't consume here. 129 if self.delegate.is_none() { 130 panic!("Encoder has already had finish() called"); 131 }; 132 133 self.write_final_leftovers()?; 134 135 let writer = self.delegate.take().expect("Writer must be present"); 136 137 Ok(writer) 138 } 139 140 /// Write any remaining buffered data to the delegate writer. write_final_leftovers(&mut self) -> Result<()>141 fn write_final_leftovers(&mut self) -> Result<()> { 142 if self.delegate.is_none() { 143 // finish() has already successfully called this, and we are now in drop() with a None 144 // writer, so just no-op 145 return Ok(()); 146 } 147 148 self.write_all_encoded_output()?; 149 150 if self.extra_input_occupied_len > 0 { 151 let encoded_len = self 152 .engine 153 .encode_slice( 154 &self.extra_input[..self.extra_input_occupied_len], 155 &mut self.output[..], 156 ) 157 .expect("buffer is large enough"); 158 159 self.output_occupied_len = encoded_len; 160 161 self.write_all_encoded_output()?; 162 163 // write succeeded, do not write the encoding of extra again if finish() is retried 164 self.extra_input_occupied_len = 0; 165 } 166 167 Ok(()) 168 } 169 170 /// Write as much of the encoded output to the delegate writer as it will accept, and store the 171 /// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`. 172 /// 173 /// # Errors 174 /// 175 /// Errors from the delegate writer are returned. In the case of an error, 176 /// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean 177 /// that no write took place. write_to_delegate(&mut self, current_output_len: usize) -> Result<()>178 fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> { 179 self.panicked = true; 180 let res = self 181 .delegate 182 .as_mut() 183 .expect("Writer must be present") 184 .write(&self.output[..current_output_len]); 185 self.panicked = false; 186 187 res.map(|consumed| { 188 debug_assert!(consumed <= current_output_len); 189 190 if consumed < current_output_len { 191 self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap(); 192 // If we're blocking on I/O, the minor inefficiency of copying bytes to the 193 // start of the buffer is the least of our concerns... 194 // TODO Rotate moves more than we need to; copy_within now stable. 195 self.output.rotate_left(consumed); 196 } else { 197 self.output_occupied_len = 0; 198 } 199 }) 200 } 201 202 /// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`. 203 /// 204 /// This is basically write_all for the remaining buffered data but without the undesirable 205 /// abort-on-`Ok(0)` behavior. 206 /// 207 /// # Errors 208 /// 209 /// Any error emitted by the delegate writer abort the write loop and is returned, unless it's 210 /// `Interrupted`, in which case the error is ignored and writes will continue. write_all_encoded_output(&mut self) -> Result<()>211 fn write_all_encoded_output(&mut self) -> Result<()> { 212 while self.output_occupied_len > 0 { 213 let remaining_len = self.output_occupied_len; 214 match self.write_to_delegate(remaining_len) { 215 // try again on interrupts ala write_all 216 Err(ref e) if e.kind() == ErrorKind::Interrupted => {} 217 // other errors return 218 Err(e) => return Err(e), 219 // success no-ops because remaining length is already updated 220 Ok(_) => {} 221 }; 222 } 223 224 debug_assert_eq!(0, self.output_occupied_len); 225 Ok(()) 226 } 227 228 /// Unwraps this `EncoderWriter`, returning the base writer it writes base64 encoded output 229 /// to. 230 /// 231 /// Normally this method should not be needed, since `finish()` returns the inner writer if 232 /// it completes successfully. That will also ensure all data has been flushed, which the 233 /// `into_inner()` function does *not* do. 234 /// 235 /// Calling this method after `finish()` has completed successfully will panic, since the 236 /// writer has already been returned. 237 /// 238 /// This method may be useful if the writer implements additional APIs beyond the `Write` 239 /// trait. Note that the inner writer might be in an error state or have an incomplete 240 /// base64 string written to it. into_inner(mut self) -> W241 pub fn into_inner(mut self) -> W { 242 self.delegate 243 .take() 244 .expect("Encoder has already had finish() called") 245 } 246 } 247 248 impl<'e, E: Engine, W: io::Write> io::Write for EncoderWriter<'e, E, W> { 249 /// Encode input and then write to the delegate writer. 250 /// 251 /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes 252 /// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which 253 /// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See 254 /// <https://github.com/rust-lang/rust/issues/56889> for more on that. 255 /// 256 /// If the previous call to `write` provided more (encoded) data than the delegate writer could 257 /// accept in a single call to its `write`, the remaining data is buffered. As long as buffered 258 /// data is present, subsequent calls to `write` will try to write the remaining buffered data 259 /// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or 260 /// an error. 261 /// 262 /// # Errors 263 /// 264 /// Any errors emitted by the delegate writer are returned. write(&mut self, input: &[u8]) -> Result<usize>265 fn write(&mut self, input: &[u8]) -> Result<usize> { 266 if self.delegate.is_none() { 267 panic!("Cannot write more after calling finish()"); 268 } 269 270 if input.is_empty() { 271 return Ok(0); 272 } 273 274 // The contract of `Write::write` places some constraints on this implementation: 275 // - a call to `write()` represents at most one call to a wrapped `Write`, so we can't 276 // iterate over the input and encode multiple chunks. 277 // - Errors mean that "no bytes were written to this writer", so we need to reset the 278 // internal state to what it was before the error occurred 279 280 // before reading any input, write any leftover encoded output from last time 281 if self.output_occupied_len > 0 { 282 let current_len = self.output_occupied_len; 283 return self 284 .write_to_delegate(current_len) 285 // did not read any input 286 .map(|_| 0); 287 } 288 289 debug_assert_eq!(0, self.output_occupied_len); 290 291 // how many bytes, if any, were read into `extra` to create a triple to encode 292 let mut extra_input_read_len = 0; 293 let mut input = input; 294 295 let orig_extra_len = self.extra_input_occupied_len; 296 297 let mut encoded_size = 0; 298 // always a multiple of MIN_ENCODE_CHUNK_SIZE 299 let mut max_input_len = MAX_INPUT_LEN; 300 301 // process leftover un-encoded input from last write 302 if self.extra_input_occupied_len > 0 { 303 debug_assert!(self.extra_input_occupied_len < 3); 304 if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE { 305 // Fill up `extra`, encode that into `output`, and consume as much of the rest of 306 // `input` as possible. 307 // We could write just the encoding of `extra` by itself but then we'd have to 308 // return after writing only 4 bytes, which is inefficient if the underlying writer 309 // would make a syscall. 310 extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len; 311 debug_assert!(extra_input_read_len > 0); 312 // overwrite only bytes that weren't already used. If we need to rollback extra_len 313 // (when the subsequent write errors), the old leading bytes will still be there. 314 self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE] 315 .copy_from_slice(&input[0..extra_input_read_len]); 316 317 let len = self.engine.internal_encode( 318 &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE], 319 &mut self.output[..], 320 ); 321 debug_assert_eq!(4, len); 322 323 input = &input[extra_input_read_len..]; 324 325 // consider extra to be used up, since we encoded it 326 self.extra_input_occupied_len = 0; 327 // don't clobber where we just encoded to 328 encoded_size = 4; 329 // and don't read more than can be encoded 330 max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE; 331 332 // fall through to normal encoding 333 } else { 334 // `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be 335 // 1 byte in each. 336 debug_assert_eq!(1, input.len()); 337 debug_assert_eq!(1, self.extra_input_occupied_len); 338 339 self.extra_input[self.extra_input_occupied_len] = input[0]; 340 self.extra_input_occupied_len += 1; 341 return Ok(1); 342 }; 343 } else if input.len() < MIN_ENCODE_CHUNK_SIZE { 344 // `extra` is empty, and `input` fits inside it 345 self.extra_input[0..input.len()].copy_from_slice(input); 346 self.extra_input_occupied_len = input.len(); 347 return Ok(input.len()); 348 }; 349 350 // either 0 or 1 complete chunks encoded from extra 351 debug_assert!(encoded_size == 0 || encoded_size == 4); 352 debug_assert!( 353 // didn't encode extra input 354 MAX_INPUT_LEN == max_input_len 355 // encoded one triple 356 || MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE 357 ); 358 359 // encode complete triples only 360 let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE); 361 let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len); 362 debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE); 363 debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE); 364 365 encoded_size += self.engine.internal_encode( 366 &input[..(input_chunks_to_encode_len)], 367 &mut self.output[encoded_size..], 368 ); 369 370 // not updating `self.output_occupied_len` here because if the below write fails, it should 371 // "never take place" -- the buffer contents we encoded are ignored and perhaps retried 372 // later, if the consumer chooses. 373 374 self.write_to_delegate(encoded_size) 375 // no matter whether we wrote the full encoded buffer or not, we consumed the same 376 // input 377 .map(|_| extra_input_read_len + input_chunks_to_encode_len) 378 .map_err(|e| { 379 // in case we filled and encoded `extra`, reset extra_len 380 self.extra_input_occupied_len = orig_extra_len; 381 382 e 383 }) 384 } 385 386 /// Because this is usually treated as OK to call multiple times, it will *not* flush any 387 /// incomplete chunks of input or write padding. 388 /// # Errors 389 /// 390 /// The first error that is not of [`ErrorKind::Interrupted`] will be returned. flush(&mut self) -> Result<()>391 fn flush(&mut self) -> Result<()> { 392 self.write_all_encoded_output()?; 393 self.delegate 394 .as_mut() 395 .expect("Writer must be present") 396 .flush() 397 } 398 } 399 400 impl<'e, E: Engine, W: io::Write> Drop for EncoderWriter<'e, E, W> { drop(&mut self)401 fn drop(&mut self) { 402 if !self.panicked { 403 // like `BufWriter`, ignore errors during drop 404 let _ = self.write_final_leftovers(); 405 } 406 } 407 } 408