1 //! Provides the [Engine] abstraction and out of the box implementations.
2 #[cfg(any(feature = "alloc", test))]
3 use crate::chunked_encoder;
4 use crate::{
5     encode::{encode_with_padding, EncodeSliceError},
6     encoded_len, DecodeError, DecodeSliceError,
7 };
8 #[cfg(any(feature = "alloc", test))]
9 use alloc::vec::Vec;
10 
11 #[cfg(any(feature = "alloc", test))]
12 use alloc::{string::String, vec};
13 
14 pub mod general_purpose;
15 
16 #[cfg(test)]
17 mod naive;
18 
19 #[cfg(test)]
20 mod tests;
21 
22 pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig};
23 
24 /// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this.
25 ///
26 /// Different implementations offer different characteristics. The library currently ships with
27 /// [GeneralPurpose] that offers good speed and works on any CPU, with more choices
28 /// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed.
29 ///
30 /// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's
31 /// recommended to store the engine in a `const` so that references to it won't pose any lifetime
32 /// issues, and to avoid repeating the cost of engine setup.
33 ///
34 /// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden.
35 // When adding an implementation of Engine, include them in the engine test suite:
36 // - add an implementation of [engine::tests::EngineWrapper]
37 // - add the implementation to the `all_engines` macro
38 // All tests run on all engines listed in the macro.
39 pub trait Engine: Send + Sync {
40     /// The config type used by this engine
41     type Config: Config;
42     /// The decode estimate used by this engine
43     type DecodeEstimate: DecodeEstimate;
44 
45     /// This is not meant to be called directly; it is only for `Engine` implementors.
46     /// See the other `encode*` functions on this trait.
47     ///
48     /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`.
49     ///
50     /// `output` will be long enough to hold the encoded data.
51     ///
52     /// Returns the number of bytes written.
53     ///
54     /// No padding should be written; that is handled separately.
55     ///
56     /// Must not write any bytes into the output slice other than the encoded data.
57     #[doc(hidden)]
internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize58     fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize;
59 
60     /// This is not meant to be called directly; it is only for `Engine` implementors.
61     ///
62     /// As an optimization to prevent the decoded length from being calculated twice, it is
63     /// sometimes helpful to have a conservative estimate of the decoded size before doing the
64     /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed.
65     #[doc(hidden)]
internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate66     fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate;
67 
68     /// This is not meant to be called directly; it is only for `Engine` implementors.
69     /// See the other `decode*` functions on this trait.
70     ///
71     /// Decode `input` base64 bytes into the `output` buffer.
72     ///
73     /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid
74     /// calculating it again (expensive on short inputs).`
75     ///
76     /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this
77     /// function must also handle the final possibly partial chunk.
78     /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4,
79     /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the
80     /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5).
81     ///
82     /// Decoding must not write any bytes into the output slice other than the decoded data.
83     ///
84     /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as
85     /// errors unless the engine is configured otherwise.
86     #[doc(hidden)]
internal_decode( &self, input: &[u8], output: &mut [u8], decode_estimate: Self::DecodeEstimate, ) -> Result<DecodeMetadata, DecodeSliceError>87     fn internal_decode(
88         &self,
89         input: &[u8],
90         output: &mut [u8],
91         decode_estimate: Self::DecodeEstimate,
92     ) -> Result<DecodeMetadata, DecodeSliceError>;
93 
94     /// Returns the config for this engine.
config(&self) -> &Self::Config95     fn config(&self) -> &Self::Config;
96 
97     /// Encode arbitrary octets as base64 using the provided `Engine`.
98     /// Returns a `String`.
99     ///
100     /// # Example
101     ///
102     /// ```rust
103     /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
104     ///
105     /// let b64 = general_purpose::STANDARD.encode(b"hello world~");
106     /// println!("{}", b64);
107     ///
108     /// const CUSTOM_ENGINE: engine::GeneralPurpose =
109     ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
110     ///
111     /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~");
112     /// ```
113     #[cfg(any(feature = "alloc", test))]
114     #[inline]
encode<T: AsRef<[u8]>>(&self, input: T) -> String115     fn encode<T: AsRef<[u8]>>(&self, input: T) -> String {
116         fn inner<E>(engine: &E, input_bytes: &[u8]) -> String
117         where
118             E: Engine + ?Sized,
119         {
120             let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
121                 .expect("integer overflow when calculating buffer size");
122 
123             let mut buf = vec![0; encoded_size];
124 
125             encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size);
126 
127             String::from_utf8(buf).expect("Invalid UTF8")
128         }
129 
130         inner(self, input.as_ref())
131     }
132 
133     /// Encode arbitrary octets as base64 into a supplied `String`.
134     /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough.
135     ///
136     /// # Example
137     ///
138     /// ```rust
139     /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet};
140     /// const CUSTOM_ENGINE: engine::GeneralPurpose =
141     ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD);
142     ///
143     /// fn main() {
144     ///     let mut buf = String::new();
145     ///     general_purpose::STANDARD.encode_string(b"hello world~", &mut buf);
146     ///     println!("{}", buf);
147     ///
148     ///     buf.clear();
149     ///     CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf);
150     ///     println!("{}", buf);
151     /// }
152     /// ```
153     #[cfg(any(feature = "alloc", test))]
154     #[inline]
encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String)155     fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) {
156         fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String)
157         where
158             E: Engine + ?Sized,
159         {
160             let mut sink = chunked_encoder::StringSink::new(output_buf);
161 
162             chunked_encoder::ChunkedEncoder::new(engine)
163                 .encode(input_bytes, &mut sink)
164                 .expect("Writing to a String shouldn't fail");
165         }
166 
167         inner(self, input.as_ref(), output_buf)
168     }
169 
170     /// Encode arbitrary octets as base64 into a supplied slice.
171     /// Writes into the supplied output buffer.
172     ///
173     /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident
174     /// or statically-allocated buffer).
175     ///
176     /// # Example
177     ///
178     #[cfg_attr(feature = "alloc", doc = "```")]
179     #[cfg_attr(not(feature = "alloc"), doc = "```ignore")]
180     /// use base64::{Engine as _, engine::general_purpose};
181     /// let s = b"hello internet!";
182     /// let mut buf = Vec::new();
183     /// // make sure we'll have a slice big enough for base64 + padding
184     /// buf.resize(s.len() * 4 / 3 + 4, 0);
185     ///
186     /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap();
187     ///
188     /// // shorten our vec down to just what was written
189     /// buf.truncate(bytes_written);
190     ///
191     /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice());
192     /// ```
193     #[inline]
encode_slice<T: AsRef<[u8]>>( &self, input: T, output_buf: &mut [u8], ) -> Result<usize, EncodeSliceError>194     fn encode_slice<T: AsRef<[u8]>>(
195         &self,
196         input: T,
197         output_buf: &mut [u8],
198     ) -> Result<usize, EncodeSliceError> {
199         fn inner<E>(
200             engine: &E,
201             input_bytes: &[u8],
202             output_buf: &mut [u8],
203         ) -> Result<usize, EncodeSliceError>
204         where
205             E: Engine + ?Sized,
206         {
207             let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding())
208                 .expect("usize overflow when calculating buffer size");
209 
210             if output_buf.len() < encoded_size {
211                 return Err(EncodeSliceError::OutputSliceTooSmall);
212             }
213 
214             let b64_output = &mut output_buf[0..encoded_size];
215 
216             encode_with_padding(input_bytes, b64_output, engine, encoded_size);
217 
218             Ok(encoded_size)
219         }
220 
221         inner(self, input.as_ref(), output_buf)
222     }
223 
224     /// Decode the input into a new `Vec`.
225     ///
226     /// # Example
227     ///
228     /// ```rust
229     /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
230     ///
231     /// let bytes = general_purpose::STANDARD
232     ///     .decode("aGVsbG8gd29ybGR+Cg==").unwrap();
233     /// println!("{:?}", bytes);
234     ///
235     /// // custom engine setup
236     /// let bytes_url = engine::GeneralPurpose::new(
237     ///              &alphabet::URL_SAFE,
238     ///              general_purpose::NO_PAD)
239     ///     .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap();
240     /// println!("{:?}", bytes_url);
241     /// ```
242     #[cfg(any(feature = "alloc", test))]
243     #[inline]
decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError>244     fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> {
245         fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError>
246         where
247             E: Engine + ?Sized,
248         {
249             let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
250             let mut buffer = vec![0; estimate.decoded_len_estimate()];
251 
252             let bytes_written = engine
253                 .internal_decode(input_bytes, &mut buffer, estimate)
254                 .map_err(|e| match e {
255                     DecodeSliceError::DecodeError(e) => e,
256                     DecodeSliceError::OutputSliceTooSmall => {
257                         unreachable!("Vec is sized conservatively")
258                     }
259                 })?
260                 .decoded_len;
261 
262             buffer.truncate(bytes_written);
263 
264             Ok(buffer)
265         }
266 
267         inner(self, input.as_ref())
268     }
269 
270     /// Decode the `input` into the supplied `buffer`.
271     ///
272     /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough.
273     /// Returns a `Result` containing an empty tuple, aka `()`.
274     ///
275     /// # Example
276     ///
277     /// ```rust
278     /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}};
279     /// const CUSTOM_ENGINE: engine::GeneralPurpose =
280     ///     engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD);
281     ///
282     /// fn main() {
283     ///     use base64::Engine;
284     ///     let mut buffer = Vec::<u8>::new();
285     ///     // with the default engine
286     ///     general_purpose::STANDARD
287     ///         .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap();
288     ///     println!("{:?}", buffer);
289     ///
290     ///     buffer.clear();
291     ///
292     ///     // with a custom engine
293     ///     CUSTOM_ENGINE.decode_vec(
294     ///         "aGVsbG8gaW50ZXJuZXR-Cg==",
295     ///         &mut buffer,
296     ///     ).unwrap();
297     ///     println!("{:?}", buffer);
298     /// }
299     /// ```
300     #[cfg(any(feature = "alloc", test))]
301     #[inline]
decode_vec<T: AsRef<[u8]>>( &self, input: T, buffer: &mut Vec<u8>, ) -> Result<(), DecodeError>302     fn decode_vec<T: AsRef<[u8]>>(
303         &self,
304         input: T,
305         buffer: &mut Vec<u8>,
306     ) -> Result<(), DecodeError> {
307         fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError>
308         where
309             E: Engine + ?Sized,
310         {
311             let starting_output_len = buffer.len();
312             let estimate = engine.internal_decoded_len_estimate(input_bytes.len());
313 
314             let total_len_estimate = estimate
315                 .decoded_len_estimate()
316                 .checked_add(starting_output_len)
317                 .expect("Overflow when calculating output buffer length");
318 
319             buffer.resize(total_len_estimate, 0);
320 
321             let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..];
322 
323             let bytes_written = engine
324                 .internal_decode(input_bytes, buffer_slice, estimate)
325                 .map_err(|e| match e {
326                     DecodeSliceError::DecodeError(e) => e,
327                     DecodeSliceError::OutputSliceTooSmall => {
328                         unreachable!("Vec is sized conservatively")
329                     }
330                 })?
331                 .decoded_len;
332 
333             buffer.truncate(starting_output_len + bytes_written);
334 
335             Ok(())
336         }
337 
338         inner(self, input.as_ref(), buffer)
339     }
340 
341     /// Decode the input into the provided output slice.
342     ///
343     /// Returns the number of bytes written to the slice, or an error if `output` is smaller than
344     /// the estimated decoded length.
345     ///
346     /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
347     ///
348     /// See [crate::decoded_len_estimate] for calculating buffer sizes.
349     ///
350     /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error
351     /// if the output buffer is too small.
352     #[inline]
decode_slice<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeSliceError>353     fn decode_slice<T: AsRef<[u8]>>(
354         &self,
355         input: T,
356         output: &mut [u8],
357     ) -> Result<usize, DecodeSliceError> {
358         fn inner<E>(
359             engine: &E,
360             input_bytes: &[u8],
361             output: &mut [u8],
362         ) -> Result<usize, DecodeSliceError>
363         where
364             E: Engine + ?Sized,
365         {
366             engine
367                 .internal_decode(
368                     input_bytes,
369                     output,
370                     engine.internal_decoded_len_estimate(input_bytes.len()),
371                 )
372                 .map(|dm| dm.decoded_len)
373         }
374 
375         inner(self, input.as_ref(), output)
376     }
377 
378     /// Decode the input into the provided output slice.
379     ///
380     /// Returns the number of bytes written to the slice.
381     ///
382     /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end).
383     ///
384     /// See [crate::decoded_len_estimate] for calculating buffer sizes.
385     ///
386     /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output
387     /// buffer is too small.
388     ///
389     /// # Panics
390     ///
391     /// Panics if the provided output buffer is too small for the decoded data.
392     #[inline]
decode_slice_unchecked<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeError>393     fn decode_slice_unchecked<T: AsRef<[u8]>>(
394         &self,
395         input: T,
396         output: &mut [u8],
397     ) -> Result<usize, DecodeError> {
398         fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError>
399         where
400             E: Engine + ?Sized,
401         {
402             engine
403                 .internal_decode(
404                     input_bytes,
405                     output,
406                     engine.internal_decoded_len_estimate(input_bytes.len()),
407                 )
408                 .map(|dm| dm.decoded_len)
409                 .map_err(|e| match e {
410                     DecodeSliceError::DecodeError(e) => e,
411                     DecodeSliceError::OutputSliceTooSmall => {
412                         panic!("Output slice is too small")
413                     }
414                 })
415         }
416 
417         inner(self, input.as_ref(), output)
418     }
419 }
420 
421 /// The minimal level of configuration that engines must support.
422 pub trait Config {
423     /// Returns `true` if padding should be added after the encoded output.
424     ///
425     /// Padding is added outside the engine's encode() since the engine may be used
426     /// to encode only a chunk of the overall output, so it can't always know when
427     /// the output is "done" and would therefore need padding (if configured).
428     // It could be provided as a separate parameter when encoding, but that feels like
429     // leaking an implementation detail to the user, and it's hopefully more convenient
430     // to have to only pass one thing (the engine) to any part of the API.
encode_padding(&self) -> bool431     fn encode_padding(&self) -> bool;
432 }
433 
434 /// The decode estimate used by an engine implementation. Users do not need to interact with this;
435 /// it is only for engine implementors.
436 ///
437 /// Implementors may store relevant data here when constructing this to avoid having to calculate
438 /// them again during actual decoding.
439 pub trait DecodeEstimate {
440     /// Returns a conservative (err on the side of too big) estimate of the decoded length to use
441     /// for pre-allocating buffers, etc.
442     ///
443     /// The estimate must be no larger than the next largest complete triple of decoded bytes.
444     /// That is, the final quad of tokens to decode may be assumed to be complete with no padding.
decoded_len_estimate(&self) -> usize445     fn decoded_len_estimate(&self) -> usize;
446 }
447 
448 /// Controls how pad bytes are handled when decoding.
449 ///
450 /// Each [Engine] must support at least the behavior indicated by
451 /// [DecodePaddingMode::RequireCanonical], and may support other modes.
452 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
453 pub enum DecodePaddingMode {
454     /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed.
455     Indifferent,
456     /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix).
457     RequireCanonical,
458     /// Padding must be absent -- for when you want predictable padding, without any wasted bytes.
459     RequireNone,
460 }
461 
462 /// Metadata about the result of a decode operation
463 #[derive(PartialEq, Eq, Debug)]
464 pub struct DecodeMetadata {
465     /// Number of decoded bytes output
466     pub(crate) decoded_len: usize,
467     /// Offset of the first padding byte in the input, if any
468     pub(crate) padding_offset: Option<usize>,
469 }
470 
471 impl DecodeMetadata {
new(decoded_bytes: usize, padding_index: Option<usize>) -> Self472     pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self {
473         Self {
474             decoded_len: decoded_bytes,
475             padding_offset: padding_index,
476         }
477     }
478 }
479