1 //! Provides the [Engine] abstraction and out of the box implementations. 2 #[cfg(any(feature = "alloc", test))] 3 use crate::chunked_encoder; 4 use crate::{ 5 encode::{encode_with_padding, EncodeSliceError}, 6 encoded_len, DecodeError, DecodeSliceError, 7 }; 8 #[cfg(any(feature = "alloc", test))] 9 use alloc::vec::Vec; 10 11 #[cfg(any(feature = "alloc", test))] 12 use alloc::{string::String, vec}; 13 14 pub mod general_purpose; 15 16 #[cfg(test)] 17 mod naive; 18 19 #[cfg(test)] 20 mod tests; 21 22 pub use general_purpose::{GeneralPurpose, GeneralPurposeConfig}; 23 24 /// An `Engine` provides low-level encoding and decoding operations that all other higher-level parts of the API use. Users of the library will generally not need to implement this. 25 /// 26 /// Different implementations offer different characteristics. The library currently ships with 27 /// [GeneralPurpose] that offers good speed and works on any CPU, with more choices 28 /// coming later, like a constant-time one when side channel resistance is called for, and vendor-specific vectorized ones for more speed. 29 /// 30 /// See [general_purpose::STANDARD_NO_PAD] if you just want standard base64. Otherwise, when possible, it's 31 /// recommended to store the engine in a `const` so that references to it won't pose any lifetime 32 /// issues, and to avoid repeating the cost of engine setup. 33 /// 34 /// Since almost nobody will need to implement `Engine`, docs for internal methods are hidden. 35 // When adding an implementation of Engine, include them in the engine test suite: 36 // - add an implementation of [engine::tests::EngineWrapper] 37 // - add the implementation to the `all_engines` macro 38 // All tests run on all engines listed in the macro. 39 pub trait Engine: Send + Sync { 40 /// The config type used by this engine 41 type Config: Config; 42 /// The decode estimate used by this engine 43 type DecodeEstimate: DecodeEstimate; 44 45 /// This is not meant to be called directly; it is only for `Engine` implementors. 46 /// See the other `encode*` functions on this trait. 47 /// 48 /// Encode the `input` bytes into the `output` buffer based on the mapping in `encode_table`. 49 /// 50 /// `output` will be long enough to hold the encoded data. 51 /// 52 /// Returns the number of bytes written. 53 /// 54 /// No padding should be written; that is handled separately. 55 /// 56 /// Must not write any bytes into the output slice other than the encoded data. 57 #[doc(hidden)] internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize58 fn internal_encode(&self, input: &[u8], output: &mut [u8]) -> usize; 59 60 /// This is not meant to be called directly; it is only for `Engine` implementors. 61 /// 62 /// As an optimization to prevent the decoded length from being calculated twice, it is 63 /// sometimes helpful to have a conservative estimate of the decoded size before doing the 64 /// decoding, so this calculation is done separately and passed to [Engine::decode()] as needed. 65 #[doc(hidden)] internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate66 fn internal_decoded_len_estimate(&self, input_len: usize) -> Self::DecodeEstimate; 67 68 /// This is not meant to be called directly; it is only for `Engine` implementors. 69 /// See the other `decode*` functions on this trait. 70 /// 71 /// Decode `input` base64 bytes into the `output` buffer. 72 /// 73 /// `decode_estimate` is the result of [Engine::internal_decoded_len_estimate()], which is passed in to avoid 74 /// calculating it again (expensive on short inputs).` 75 /// 76 /// Each complete 4-byte chunk of encoded data decodes to 3 bytes of decoded data, but this 77 /// function must also handle the final possibly partial chunk. 78 /// If the input length is not a multiple of 4, or uses padding bytes to reach a multiple of 4, 79 /// the trailing 2 or 3 bytes must decode to 1 or 2 bytes, respectively, as per the 80 /// [RFC](https://tools.ietf.org/html/rfc4648#section-3.5). 81 /// 82 /// Decoding must not write any bytes into the output slice other than the decoded data. 83 /// 84 /// Non-canonical trailing bits in the final tokens or non-canonical padding must be reported as 85 /// errors unless the engine is configured otherwise. 86 #[doc(hidden)] internal_decode( &self, input: &[u8], output: &mut [u8], decode_estimate: Self::DecodeEstimate, ) -> Result<DecodeMetadata, DecodeSliceError>87 fn internal_decode( 88 &self, 89 input: &[u8], 90 output: &mut [u8], 91 decode_estimate: Self::DecodeEstimate, 92 ) -> Result<DecodeMetadata, DecodeSliceError>; 93 94 /// Returns the config for this engine. config(&self) -> &Self::Config95 fn config(&self) -> &Self::Config; 96 97 /// Encode arbitrary octets as base64 using the provided `Engine`. 98 /// Returns a `String`. 99 /// 100 /// # Example 101 /// 102 /// ```rust 103 /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; 104 /// 105 /// let b64 = general_purpose::STANDARD.encode(b"hello world~"); 106 /// println!("{}", b64); 107 /// 108 /// const CUSTOM_ENGINE: engine::GeneralPurpose = 109 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); 110 /// 111 /// let b64_url = CUSTOM_ENGINE.encode(b"hello internet~"); 112 /// ``` 113 #[cfg(any(feature = "alloc", test))] 114 #[inline] encode<T: AsRef<[u8]>>(&self, input: T) -> String115 fn encode<T: AsRef<[u8]>>(&self, input: T) -> String { 116 fn inner<E>(engine: &E, input_bytes: &[u8]) -> String 117 where 118 E: Engine + ?Sized, 119 { 120 let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) 121 .expect("integer overflow when calculating buffer size"); 122 123 let mut buf = vec![0; encoded_size]; 124 125 encode_with_padding(input_bytes, &mut buf[..], engine, encoded_size); 126 127 String::from_utf8(buf).expect("Invalid UTF8") 128 } 129 130 inner(self, input.as_ref()) 131 } 132 133 /// Encode arbitrary octets as base64 into a supplied `String`. 134 /// Writes into the supplied `String`, which may allocate if its internal buffer isn't big enough. 135 /// 136 /// # Example 137 /// 138 /// ```rust 139 /// use base64::{Engine as _, engine::{self, general_purpose}, alphabet}; 140 /// const CUSTOM_ENGINE: engine::GeneralPurpose = 141 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::NO_PAD); 142 /// 143 /// fn main() { 144 /// let mut buf = String::new(); 145 /// general_purpose::STANDARD.encode_string(b"hello world~", &mut buf); 146 /// println!("{}", buf); 147 /// 148 /// buf.clear(); 149 /// CUSTOM_ENGINE.encode_string(b"hello internet~", &mut buf); 150 /// println!("{}", buf); 151 /// } 152 /// ``` 153 #[cfg(any(feature = "alloc", test))] 154 #[inline] encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String)155 fn encode_string<T: AsRef<[u8]>>(&self, input: T, output_buf: &mut String) { 156 fn inner<E>(engine: &E, input_bytes: &[u8], output_buf: &mut String) 157 where 158 E: Engine + ?Sized, 159 { 160 let mut sink = chunked_encoder::StringSink::new(output_buf); 161 162 chunked_encoder::ChunkedEncoder::new(engine) 163 .encode(input_bytes, &mut sink) 164 .expect("Writing to a String shouldn't fail"); 165 } 166 167 inner(self, input.as_ref(), output_buf) 168 } 169 170 /// Encode arbitrary octets as base64 into a supplied slice. 171 /// Writes into the supplied output buffer. 172 /// 173 /// This is useful if you wish to avoid allocation entirely (e.g. encoding into a stack-resident 174 /// or statically-allocated buffer). 175 /// 176 /// # Example 177 /// 178 #[cfg_attr(feature = "alloc", doc = "```")] 179 #[cfg_attr(not(feature = "alloc"), doc = "```ignore")] 180 /// use base64::{Engine as _, engine::general_purpose}; 181 /// let s = b"hello internet!"; 182 /// let mut buf = Vec::new(); 183 /// // make sure we'll have a slice big enough for base64 + padding 184 /// buf.resize(s.len() * 4 / 3 + 4, 0); 185 /// 186 /// let bytes_written = general_purpose::STANDARD.encode_slice(s, &mut buf).unwrap(); 187 /// 188 /// // shorten our vec down to just what was written 189 /// buf.truncate(bytes_written); 190 /// 191 /// assert_eq!(s, general_purpose::STANDARD.decode(&buf).unwrap().as_slice()); 192 /// ``` 193 #[inline] encode_slice<T: AsRef<[u8]>>( &self, input: T, output_buf: &mut [u8], ) -> Result<usize, EncodeSliceError>194 fn encode_slice<T: AsRef<[u8]>>( 195 &self, 196 input: T, 197 output_buf: &mut [u8], 198 ) -> Result<usize, EncodeSliceError> { 199 fn inner<E>( 200 engine: &E, 201 input_bytes: &[u8], 202 output_buf: &mut [u8], 203 ) -> Result<usize, EncodeSliceError> 204 where 205 E: Engine + ?Sized, 206 { 207 let encoded_size = encoded_len(input_bytes.len(), engine.config().encode_padding()) 208 .expect("usize overflow when calculating buffer size"); 209 210 if output_buf.len() < encoded_size { 211 return Err(EncodeSliceError::OutputSliceTooSmall); 212 } 213 214 let b64_output = &mut output_buf[0..encoded_size]; 215 216 encode_with_padding(input_bytes, b64_output, engine, encoded_size); 217 218 Ok(encoded_size) 219 } 220 221 inner(self, input.as_ref(), output_buf) 222 } 223 224 /// Decode the input into a new `Vec`. 225 /// 226 /// # Example 227 /// 228 /// ```rust 229 /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; 230 /// 231 /// let bytes = general_purpose::STANDARD 232 /// .decode("aGVsbG8gd29ybGR+Cg==").unwrap(); 233 /// println!("{:?}", bytes); 234 /// 235 /// // custom engine setup 236 /// let bytes_url = engine::GeneralPurpose::new( 237 /// &alphabet::URL_SAFE, 238 /// general_purpose::NO_PAD) 239 /// .decode("aGVsbG8gaW50ZXJuZXR-Cg").unwrap(); 240 /// println!("{:?}", bytes_url); 241 /// ``` 242 #[cfg(any(feature = "alloc", test))] 243 #[inline] decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError>244 fn decode<T: AsRef<[u8]>>(&self, input: T) -> Result<Vec<u8>, DecodeError> { 245 fn inner<E>(engine: &E, input_bytes: &[u8]) -> Result<Vec<u8>, DecodeError> 246 where 247 E: Engine + ?Sized, 248 { 249 let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); 250 let mut buffer = vec![0; estimate.decoded_len_estimate()]; 251 252 let bytes_written = engine 253 .internal_decode(input_bytes, &mut buffer, estimate) 254 .map_err(|e| match e { 255 DecodeSliceError::DecodeError(e) => e, 256 DecodeSliceError::OutputSliceTooSmall => { 257 unreachable!("Vec is sized conservatively") 258 } 259 })? 260 .decoded_len; 261 262 buffer.truncate(bytes_written); 263 264 Ok(buffer) 265 } 266 267 inner(self, input.as_ref()) 268 } 269 270 /// Decode the `input` into the supplied `buffer`. 271 /// 272 /// Writes into the supplied `Vec`, which may allocate if its internal buffer isn't big enough. 273 /// Returns a `Result` containing an empty tuple, aka `()`. 274 /// 275 /// # Example 276 /// 277 /// ```rust 278 /// use base64::{Engine as _, alphabet, engine::{self, general_purpose}}; 279 /// const CUSTOM_ENGINE: engine::GeneralPurpose = 280 /// engine::GeneralPurpose::new(&alphabet::URL_SAFE, general_purpose::PAD); 281 /// 282 /// fn main() { 283 /// use base64::Engine; 284 /// let mut buffer = Vec::<u8>::new(); 285 /// // with the default engine 286 /// general_purpose::STANDARD 287 /// .decode_vec("aGVsbG8gd29ybGR+Cg==", &mut buffer,).unwrap(); 288 /// println!("{:?}", buffer); 289 /// 290 /// buffer.clear(); 291 /// 292 /// // with a custom engine 293 /// CUSTOM_ENGINE.decode_vec( 294 /// "aGVsbG8gaW50ZXJuZXR-Cg==", 295 /// &mut buffer, 296 /// ).unwrap(); 297 /// println!("{:?}", buffer); 298 /// } 299 /// ``` 300 #[cfg(any(feature = "alloc", test))] 301 #[inline] decode_vec<T: AsRef<[u8]>>( &self, input: T, buffer: &mut Vec<u8>, ) -> Result<(), DecodeError>302 fn decode_vec<T: AsRef<[u8]>>( 303 &self, 304 input: T, 305 buffer: &mut Vec<u8>, 306 ) -> Result<(), DecodeError> { 307 fn inner<E>(engine: &E, input_bytes: &[u8], buffer: &mut Vec<u8>) -> Result<(), DecodeError> 308 where 309 E: Engine + ?Sized, 310 { 311 let starting_output_len = buffer.len(); 312 let estimate = engine.internal_decoded_len_estimate(input_bytes.len()); 313 314 let total_len_estimate = estimate 315 .decoded_len_estimate() 316 .checked_add(starting_output_len) 317 .expect("Overflow when calculating output buffer length"); 318 319 buffer.resize(total_len_estimate, 0); 320 321 let buffer_slice = &mut buffer.as_mut_slice()[starting_output_len..]; 322 323 let bytes_written = engine 324 .internal_decode(input_bytes, buffer_slice, estimate) 325 .map_err(|e| match e { 326 DecodeSliceError::DecodeError(e) => e, 327 DecodeSliceError::OutputSliceTooSmall => { 328 unreachable!("Vec is sized conservatively") 329 } 330 })? 331 .decoded_len; 332 333 buffer.truncate(starting_output_len + bytes_written); 334 335 Ok(()) 336 } 337 338 inner(self, input.as_ref(), buffer) 339 } 340 341 /// Decode the input into the provided output slice. 342 /// 343 /// Returns the number of bytes written to the slice, or an error if `output` is smaller than 344 /// the estimated decoded length. 345 /// 346 /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). 347 /// 348 /// See [crate::decoded_len_estimate] for calculating buffer sizes. 349 /// 350 /// See [Engine::decode_slice_unchecked] for a version that panics instead of returning an error 351 /// if the output buffer is too small. 352 #[inline] decode_slice<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeSliceError>353 fn decode_slice<T: AsRef<[u8]>>( 354 &self, 355 input: T, 356 output: &mut [u8], 357 ) -> Result<usize, DecodeSliceError> { 358 fn inner<E>( 359 engine: &E, 360 input_bytes: &[u8], 361 output: &mut [u8], 362 ) -> Result<usize, DecodeSliceError> 363 where 364 E: Engine + ?Sized, 365 { 366 engine 367 .internal_decode( 368 input_bytes, 369 output, 370 engine.internal_decoded_len_estimate(input_bytes.len()), 371 ) 372 .map(|dm| dm.decoded_len) 373 } 374 375 inner(self, input.as_ref(), output) 376 } 377 378 /// Decode the input into the provided output slice. 379 /// 380 /// Returns the number of bytes written to the slice. 381 /// 382 /// This will not write any bytes past exactly what is decoded (no stray garbage bytes at the end). 383 /// 384 /// See [crate::decoded_len_estimate] for calculating buffer sizes. 385 /// 386 /// See [Engine::decode_slice] for a version that returns an error instead of panicking if the output 387 /// buffer is too small. 388 /// 389 /// # Panics 390 /// 391 /// Panics if the provided output buffer is too small for the decoded data. 392 #[inline] decode_slice_unchecked<T: AsRef<[u8]>>( &self, input: T, output: &mut [u8], ) -> Result<usize, DecodeError>393 fn decode_slice_unchecked<T: AsRef<[u8]>>( 394 &self, 395 input: T, 396 output: &mut [u8], 397 ) -> Result<usize, DecodeError> { 398 fn inner<E>(engine: &E, input_bytes: &[u8], output: &mut [u8]) -> Result<usize, DecodeError> 399 where 400 E: Engine + ?Sized, 401 { 402 engine 403 .internal_decode( 404 input_bytes, 405 output, 406 engine.internal_decoded_len_estimate(input_bytes.len()), 407 ) 408 .map(|dm| dm.decoded_len) 409 .map_err(|e| match e { 410 DecodeSliceError::DecodeError(e) => e, 411 DecodeSliceError::OutputSliceTooSmall => { 412 panic!("Output slice is too small") 413 } 414 }) 415 } 416 417 inner(self, input.as_ref(), output) 418 } 419 } 420 421 /// The minimal level of configuration that engines must support. 422 pub trait Config { 423 /// Returns `true` if padding should be added after the encoded output. 424 /// 425 /// Padding is added outside the engine's encode() since the engine may be used 426 /// to encode only a chunk of the overall output, so it can't always know when 427 /// the output is "done" and would therefore need padding (if configured). 428 // It could be provided as a separate parameter when encoding, but that feels like 429 // leaking an implementation detail to the user, and it's hopefully more convenient 430 // to have to only pass one thing (the engine) to any part of the API. encode_padding(&self) -> bool431 fn encode_padding(&self) -> bool; 432 } 433 434 /// The decode estimate used by an engine implementation. Users do not need to interact with this; 435 /// it is only for engine implementors. 436 /// 437 /// Implementors may store relevant data here when constructing this to avoid having to calculate 438 /// them again during actual decoding. 439 pub trait DecodeEstimate { 440 /// Returns a conservative (err on the side of too big) estimate of the decoded length to use 441 /// for pre-allocating buffers, etc. 442 /// 443 /// The estimate must be no larger than the next largest complete triple of decoded bytes. 444 /// That is, the final quad of tokens to decode may be assumed to be complete with no padding. decoded_len_estimate(&self) -> usize445 fn decoded_len_estimate(&self) -> usize; 446 } 447 448 /// Controls how pad bytes are handled when decoding. 449 /// 450 /// Each [Engine] must support at least the behavior indicated by 451 /// [DecodePaddingMode::RequireCanonical], and may support other modes. 452 #[derive(Clone, Copy, Debug, PartialEq, Eq)] 453 pub enum DecodePaddingMode { 454 /// Canonical padding is allowed, but any fewer padding bytes than that is also allowed. 455 Indifferent, 456 /// Padding must be canonical (0, 1, or 2 `=` as needed to produce a 4 byte suffix). 457 RequireCanonical, 458 /// Padding must be absent -- for when you want predictable padding, without any wasted bytes. 459 RequireNone, 460 } 461 462 /// Metadata about the result of a decode operation 463 #[derive(PartialEq, Eq, Debug)] 464 pub struct DecodeMetadata { 465 /// Number of decoded bytes output 466 pub(crate) decoded_len: usize, 467 /// Offset of the first padding byte in the input, if any 468 pub(crate) padding_offset: Option<usize>, 469 } 470 471 impl DecodeMetadata { new(decoded_bytes: usize, padding_index: Option<usize>) -> Self472 pub(crate) fn new(decoded_bytes: usize, padding_index: Option<usize>) -> Self { 473 Self { 474 decoded_len: decoded_bytes, 475 padding_offset: padding_index, 476 } 477 } 478 } 479