1 use crate::engine::Engine;
2 use std::{
3     cmp, fmt, io,
4     io::{ErrorKind, Result},
5 };
6 
7 pub(crate) const BUF_SIZE: usize = 1024;
8 /// The most bytes whose encoding will fit in `BUF_SIZE`
9 const MAX_INPUT_LEN: usize = BUF_SIZE / 4 * 3;
10 // 3 bytes of input = 4 bytes of base64, always (because we don't allow line wrapping)
11 const MIN_ENCODE_CHUNK_SIZE: usize = 3;
12 
13 /// A `Write` implementation that base64 encodes data before delegating to the wrapped writer.
14 ///
15 /// Because base64 has special handling for the end of the input data (padding, etc), there's a
16 /// `finish()` method on this type that encodes any leftover input bytes and adds padding if
17 /// appropriate. It's called automatically when deallocated (see the `Drop` implementation), but
18 /// any error that occurs when invoking the underlying writer will be suppressed. If you want to
19 /// handle such errors, call `finish()` yourself.
20 ///
21 /// # Examples
22 ///
23 /// ```
24 /// use std::io::Write;
25 /// use base64::engine::general_purpose;
26 ///
27 /// // use a vec as the simplest possible `Write` -- in real code this is probably a file, etc.
28 /// let mut enc = base64::write::EncoderWriter::new(Vec::new(), &general_purpose::STANDARD);
29 ///
30 /// // handle errors as you normally would
31 /// enc.write_all(b"asdf").unwrap();
32 ///
33 /// // could leave this out to be called by Drop, if you don't care
34 /// // about handling errors or getting the delegate writer back
35 /// let delegate = enc.finish().unwrap();
36 ///
37 /// // base64 was written to the writer
38 /// assert_eq!(b"YXNkZg==", &delegate[..]);
39 ///
40 /// ```
41 ///
42 /// # Panics
43 ///
44 /// Calling `write()` (or related methods) or `finish()` after `finish()` has completed without
45 /// error is invalid and will panic.
46 ///
47 /// # Errors
48 ///
49 /// Base64 encoding itself does not generate errors, but errors from the wrapped writer will be
50 /// returned as per the contract of `Write`.
51 ///
52 /// # Performance
53 ///
54 /// It has some minor performance loss compared to encoding slices (a couple percent).
55 /// It does not do any heap allocation.
56 ///
57 /// # Limitations
58 ///
59 /// Owing to the specification of the `write` and `flush` methods on the `Write` trait and their
60 /// implications for a buffering implementation, these methods may not behave as expected. In
61 /// particular, calling `write_all` on this interface may fail with `io::ErrorKind::WriteZero`.
62 /// See the documentation of the `Write` trait implementation for further details.
63 pub struct EncoderWriter<'e, E: Engine, W: io::Write> {
64     engine: &'e E,
65     /// Where encoded data is written to. It's an Option as it's None immediately before Drop is
66     /// called so that finish() can return the underlying writer. None implies that finish() has
67     /// been called successfully.
68     delegate: Option<W>,
69     /// Holds a partial chunk, if any, after the last `write()`, so that we may then fill the chunk
70     /// with the next `write()`, encode it, then proceed with the rest of the input normally.
71     extra_input: [u8; MIN_ENCODE_CHUNK_SIZE],
72     /// How much of `extra` is occupied, in `[0, MIN_ENCODE_CHUNK_SIZE]`.
73     extra_input_occupied_len: usize,
74     /// Buffer to encode into. May hold leftover encoded bytes from a previous write call that the underlying writer
75     /// did not write last time.
76     output: [u8; BUF_SIZE],
77     /// How much of `output` is occupied with encoded data that couldn't be written last time
78     output_occupied_len: usize,
79     /// panic safety: don't write again in destructor if writer panicked while we were writing to it
80     panicked: bool,
81 }
82 
83 impl<'e, E: Engine, W: io::Write> fmt::Debug for EncoderWriter<'e, E, W> {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result84     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
85         write!(
86             f,
87             "extra_input: {:?} extra_input_occupied_len:{:?} output[..5]: {:?} output_occupied_len: {:?}",
88             self.extra_input,
89             self.extra_input_occupied_len,
90             &self.output[0..5],
91             self.output_occupied_len
92         )
93     }
94 }
95 
96 impl<'e, E: Engine, W: io::Write> EncoderWriter<'e, E, W> {
97     /// Create a new encoder that will write to the provided delegate writer.
new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W>98     pub fn new(delegate: W, engine: &'e E) -> EncoderWriter<'e, E, W> {
99         EncoderWriter {
100             engine,
101             delegate: Some(delegate),
102             extra_input: [0u8; MIN_ENCODE_CHUNK_SIZE],
103             extra_input_occupied_len: 0,
104             output: [0u8; BUF_SIZE],
105             output_occupied_len: 0,
106             panicked: false,
107         }
108     }
109 
110     /// Encode all remaining buffered data and write it, including any trailing incomplete input
111     /// triples and associated padding.
112     ///
113     /// Once this succeeds, no further writes or calls to this method are allowed.
114     ///
115     /// This may write to the delegate writer multiple times if the delegate writer does not accept
116     /// all input provided to its `write` each invocation.
117     ///
118     /// If you don't care about error handling, it is not necessary to call this function, as the
119     /// equivalent finalization is done by the Drop impl.
120     ///
121     /// Returns the writer that this was constructed around.
122     ///
123     /// # Errors
124     ///
125     /// The first error that is not of `ErrorKind::Interrupted` will be returned.
finish(&mut self) -> Result<W>126     pub fn finish(&mut self) -> Result<W> {
127         // If we could consume self in finish(), we wouldn't have to worry about this case, but
128         // finish() is retryable in the face of I/O errors, so we can't consume here.
129         if self.delegate.is_none() {
130             panic!("Encoder has already had finish() called");
131         };
132 
133         self.write_final_leftovers()?;
134 
135         let writer = self.delegate.take().expect("Writer must be present");
136 
137         Ok(writer)
138     }
139 
140     /// Write any remaining buffered data to the delegate writer.
write_final_leftovers(&mut self) -> Result<()>141     fn write_final_leftovers(&mut self) -> Result<()> {
142         if self.delegate.is_none() {
143             // finish() has already successfully called this, and we are now in drop() with a None
144             // writer, so just no-op
145             return Ok(());
146         }
147 
148         self.write_all_encoded_output()?;
149 
150         if self.extra_input_occupied_len > 0 {
151             let encoded_len = self
152                 .engine
153                 .encode_slice(
154                     &self.extra_input[..self.extra_input_occupied_len],
155                     &mut self.output[..],
156                 )
157                 .expect("buffer is large enough");
158 
159             self.output_occupied_len = encoded_len;
160 
161             self.write_all_encoded_output()?;
162 
163             // write succeeded, do not write the encoding of extra again if finish() is retried
164             self.extra_input_occupied_len = 0;
165         }
166 
167         Ok(())
168     }
169 
170     /// Write as much of the encoded output to the delegate writer as it will accept, and store the
171     /// leftovers to be attempted at the next write() call. Updates `self.output_occupied_len`.
172     ///
173     /// # Errors
174     ///
175     /// Errors from the delegate writer are returned. In the case of an error,
176     /// `self.output_occupied_len` will not be updated, as errors from `write` are specified to mean
177     /// that no write took place.
write_to_delegate(&mut self, current_output_len: usize) -> Result<()>178     fn write_to_delegate(&mut self, current_output_len: usize) -> Result<()> {
179         self.panicked = true;
180         let res = self
181             .delegate
182             .as_mut()
183             .expect("Writer must be present")
184             .write(&self.output[..current_output_len]);
185         self.panicked = false;
186 
187         res.map(|consumed| {
188             debug_assert!(consumed <= current_output_len);
189 
190             if consumed < current_output_len {
191                 self.output_occupied_len = current_output_len.checked_sub(consumed).unwrap();
192                 // If we're blocking on I/O, the minor inefficiency of copying bytes to the
193                 // start of the buffer is the least of our concerns...
194                 // TODO Rotate moves more than we need to; copy_within now stable.
195                 self.output.rotate_left(consumed);
196             } else {
197                 self.output_occupied_len = 0;
198             }
199         })
200     }
201 
202     /// Write all buffered encoded output. If this returns `Ok`, `self.output_occupied_len` is `0`.
203     ///
204     /// This is basically write_all for the remaining buffered data but without the undesirable
205     /// abort-on-`Ok(0)` behavior.
206     ///
207     /// # Errors
208     ///
209     /// Any error emitted by the delegate writer abort the write loop and is returned, unless it's
210     /// `Interrupted`, in which case the error is ignored and writes will continue.
write_all_encoded_output(&mut self) -> Result<()>211     fn write_all_encoded_output(&mut self) -> Result<()> {
212         while self.output_occupied_len > 0 {
213             let remaining_len = self.output_occupied_len;
214             match self.write_to_delegate(remaining_len) {
215                 // try again on interrupts ala write_all
216                 Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
217                 // other errors return
218                 Err(e) => return Err(e),
219                 // success no-ops because remaining length is already updated
220                 Ok(_) => {}
221             };
222         }
223 
224         debug_assert_eq!(0, self.output_occupied_len);
225         Ok(())
226     }
227 
228     /// Unwraps this `EncoderWriter`, returning the base writer it writes base64 encoded output
229     /// to.
230     ///
231     /// Normally this method should not be needed, since `finish()` returns the inner writer if
232     /// it completes successfully. That will also ensure all data has been flushed, which the
233     /// `into_inner()` function does *not* do.
234     ///
235     /// Calling this method after `finish()` has completed successfully will panic, since the
236     /// writer has already been returned.
237     ///
238     /// This method may be useful if the writer implements additional APIs beyond the `Write`
239     /// trait. Note that the inner writer might be in an error state or have an incomplete
240     /// base64 string written to it.
into_inner(mut self) -> W241     pub fn into_inner(mut self) -> W {
242         self.delegate
243             .take()
244             .expect("Encoder has already had finish() called")
245     }
246 }
247 
248 impl<'e, E: Engine, W: io::Write> io::Write for EncoderWriter<'e, E, W> {
249     /// Encode input and then write to the delegate writer.
250     ///
251     /// Under non-error circumstances, this returns `Ok` with the value being the number of bytes
252     /// of `input` consumed. The value may be `0`, which interacts poorly with `write_all`, which
253     /// interprets `Ok(0)` as an error, despite it being allowed by the contract of `write`. See
254     /// <https://github.com/rust-lang/rust/issues/56889> for more on that.
255     ///
256     /// If the previous call to `write` provided more (encoded) data than the delegate writer could
257     /// accept in a single call to its `write`, the remaining data is buffered. As long as buffered
258     /// data is present, subsequent calls to `write` will try to write the remaining buffered data
259     /// to the delegate and return either `Ok(0)` -- and therefore not consume any of `input` -- or
260     /// an error.
261     ///
262     /// # Errors
263     ///
264     /// Any errors emitted by the delegate writer are returned.
write(&mut self, input: &[u8]) -> Result<usize>265     fn write(&mut self, input: &[u8]) -> Result<usize> {
266         if self.delegate.is_none() {
267             panic!("Cannot write more after calling finish()");
268         }
269 
270         if input.is_empty() {
271             return Ok(0);
272         }
273 
274         // The contract of `Write::write` places some constraints on this implementation:
275         // - a call to `write()` represents at most one call to a wrapped `Write`, so we can't
276         // iterate over the input and encode multiple chunks.
277         // - Errors mean that "no bytes were written to this writer", so we need to reset the
278         // internal state to what it was before the error occurred
279 
280         // before reading any input, write any leftover encoded output from last time
281         if self.output_occupied_len > 0 {
282             let current_len = self.output_occupied_len;
283             return self
284                 .write_to_delegate(current_len)
285                 // did not read any input
286                 .map(|_| 0);
287         }
288 
289         debug_assert_eq!(0, self.output_occupied_len);
290 
291         // how many bytes, if any, were read into `extra` to create a triple to encode
292         let mut extra_input_read_len = 0;
293         let mut input = input;
294 
295         let orig_extra_len = self.extra_input_occupied_len;
296 
297         let mut encoded_size = 0;
298         // always a multiple of MIN_ENCODE_CHUNK_SIZE
299         let mut max_input_len = MAX_INPUT_LEN;
300 
301         // process leftover un-encoded input from last write
302         if self.extra_input_occupied_len > 0 {
303             debug_assert!(self.extra_input_occupied_len < 3);
304             if input.len() + self.extra_input_occupied_len >= MIN_ENCODE_CHUNK_SIZE {
305                 // Fill up `extra`, encode that into `output`, and consume as much of the rest of
306                 // `input` as possible.
307                 // We could write just the encoding of `extra` by itself but then we'd have to
308                 // return after writing only 4 bytes, which is inefficient if the underlying writer
309                 // would make a syscall.
310                 extra_input_read_len = MIN_ENCODE_CHUNK_SIZE - self.extra_input_occupied_len;
311                 debug_assert!(extra_input_read_len > 0);
312                 // overwrite only bytes that weren't already used. If we need to rollback extra_len
313                 // (when the subsequent write errors), the old leading bytes will still be there.
314                 self.extra_input[self.extra_input_occupied_len..MIN_ENCODE_CHUNK_SIZE]
315                     .copy_from_slice(&input[0..extra_input_read_len]);
316 
317                 let len = self.engine.internal_encode(
318                     &self.extra_input[0..MIN_ENCODE_CHUNK_SIZE],
319                     &mut self.output[..],
320                 );
321                 debug_assert_eq!(4, len);
322 
323                 input = &input[extra_input_read_len..];
324 
325                 // consider extra to be used up, since we encoded it
326                 self.extra_input_occupied_len = 0;
327                 // don't clobber where we just encoded to
328                 encoded_size = 4;
329                 // and don't read more than can be encoded
330                 max_input_len = MAX_INPUT_LEN - MIN_ENCODE_CHUNK_SIZE;
331 
332             // fall through to normal encoding
333             } else {
334                 // `extra` and `input` are non empty, but `|extra| + |input| < 3`, so there must be
335                 // 1 byte in each.
336                 debug_assert_eq!(1, input.len());
337                 debug_assert_eq!(1, self.extra_input_occupied_len);
338 
339                 self.extra_input[self.extra_input_occupied_len] = input[0];
340                 self.extra_input_occupied_len += 1;
341                 return Ok(1);
342             };
343         } else if input.len() < MIN_ENCODE_CHUNK_SIZE {
344             // `extra` is empty, and `input` fits inside it
345             self.extra_input[0..input.len()].copy_from_slice(input);
346             self.extra_input_occupied_len = input.len();
347             return Ok(input.len());
348         };
349 
350         // either 0 or 1 complete chunks encoded from extra
351         debug_assert!(encoded_size == 0 || encoded_size == 4);
352         debug_assert!(
353             // didn't encode extra input
354             MAX_INPUT_LEN == max_input_len
355                 // encoded one triple
356                 || MAX_INPUT_LEN == max_input_len + MIN_ENCODE_CHUNK_SIZE
357         );
358 
359         // encode complete triples only
360         let input_complete_chunks_len = input.len() - (input.len() % MIN_ENCODE_CHUNK_SIZE);
361         let input_chunks_to_encode_len = cmp::min(input_complete_chunks_len, max_input_len);
362         debug_assert_eq!(0, max_input_len % MIN_ENCODE_CHUNK_SIZE);
363         debug_assert_eq!(0, input_chunks_to_encode_len % MIN_ENCODE_CHUNK_SIZE);
364 
365         encoded_size += self.engine.internal_encode(
366             &input[..(input_chunks_to_encode_len)],
367             &mut self.output[encoded_size..],
368         );
369 
370         // not updating `self.output_occupied_len` here because if the below write fails, it should
371         // "never take place" -- the buffer contents we encoded are ignored and perhaps retried
372         // later, if the consumer chooses.
373 
374         self.write_to_delegate(encoded_size)
375             // no matter whether we wrote the full encoded buffer or not, we consumed the same
376             // input
377             .map(|_| extra_input_read_len + input_chunks_to_encode_len)
378             .map_err(|e| {
379                 // in case we filled and encoded `extra`, reset extra_len
380                 self.extra_input_occupied_len = orig_extra_len;
381 
382                 e
383             })
384     }
385 
386     /// Because this is usually treated as OK to call multiple times, it will *not* flush any
387     /// incomplete chunks of input or write padding.
388     /// # Errors
389     ///
390     /// The first error that is not of [`ErrorKind::Interrupted`] will be returned.
flush(&mut self) -> Result<()>391     fn flush(&mut self) -> Result<()> {
392         self.write_all_encoded_output()?;
393         self.delegate
394             .as_mut()
395             .expect("Writer must be present")
396             .flush()
397     }
398 }
399 
400 impl<'e, E: Engine, W: io::Write> Drop for EncoderWriter<'e, E, W> {
drop(&mut self)401     fn drop(&mut self) {
402         if !self.panicked {
403             // like `BufWriter`, ignore errors during drop
404             let _ = self.write_final_leftovers();
405         }
406     }
407 }
408