// Copyright 2016 Brian Smith. // Portions Copyright (c) 2016, Google Inc. // // Permission to use, copy, modify, and/or distribute this software for any // purpose with or without fee is hereby granted, provided that the above // copyright notice and this permission notice appear in all copies. // // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. use super::{quic::Sample, Nonce}; use crate::{cpu, polyfill::ChunksFixed}; #[cfg(any( test, not(any( target_arch = "aarch64", target_arch = "arm", target_arch = "x86", target_arch = "x86_64" )) ))] mod fallback; use core::ops::RangeFrom; #[derive(Clone)] pub struct Key { words: [u32; KEY_LEN / 4], cpu_features: cpu::Features, } impl Key { pub(super) fn new(value: [u8; KEY_LEN], cpu_features: cpu::Features) -> Self { let value: &[[u8; 4]; KEY_LEN / 4] = value.chunks_fixed(); Self { words: value.map(u32::from_le_bytes), cpu_features, } } pub(super) fn cpu_features(&self) -> cpu::Features { self.cpu_features } } impl Key { #[inline] pub fn encrypt_in_place(&self, counter: Counter, in_out: &mut [u8]) { self.encrypt_less_safe(counter, in_out, 0..); } #[inline] pub fn encrypt_iv_xor_in_place(&self, iv: Iv, in_out: &mut [u8; 32]) { // It is safe to use `into_counter_for_single_block_less_safe()` // because `in_out` is exactly one block long. debug_assert!(in_out.len() <= BLOCK_LEN); self.encrypt_less_safe(iv.into_counter_for_single_block_less_safe(), in_out, 0..); } #[inline] pub fn new_mask(&self, sample: Sample) -> [u8; 5] { let mut out: [u8; 5] = [0; 5]; let iv = Iv::assume_unique_for_key(sample); debug_assert!(out.len() <= BLOCK_LEN); self.encrypt_less_safe(iv.into_counter_for_single_block_less_safe(), &mut out, 0..); out } /// Analogous to `slice::copy_within()`. pub fn encrypt_within(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom) { // XXX: The x86 and at least one branch of the ARM assembly language // code doesn't allow overlapping input and output unless they are // exactly overlapping. TODO: Figure out which branch of the ARM code // has this limitation and come up with a better solution. // // https://rt.openssl.org/Ticket/Display.html?id=4362 if cfg!(any(target_arch = "arm", target_arch = "x86")) && src.start != 0 { let len = in_out.len() - src.start; in_out.copy_within(src, 0); self.encrypt_in_place(counter, &mut in_out[..len]); } else { self.encrypt_less_safe(counter, in_out, src); } } /// This is "less safe" because it skips the important check that `encrypt_within` does. /// Only call this with `src` equal to `0..` or from `encrypt_within`. #[inline] fn encrypt_less_safe(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom) { #[cfg(any( target_arch = "aarch64", target_arch = "arm", target_arch = "x86", target_arch = "x86_64" ))] #[inline(always)] pub(super) fn ChaCha20_ctr32( key: &Key, counter: Counter, in_out: &mut [u8], src: RangeFrom, ) { let in_out_len = in_out.len().checked_sub(src.start).unwrap(); // There's no need to worry if `counter` is incremented because it is // owned here and we drop immediately after the call. prefixed_extern! { fn ChaCha20_ctr32( out: *mut u8, in_: *const u8, in_len: crate::c::size_t, key: &[u32; KEY_LEN / 4], counter: &Counter, ); } unsafe { ChaCha20_ctr32( in_out.as_mut_ptr(), in_out[src].as_ptr(), in_out_len, key.words_less_safe(), &counter, ) } } #[cfg(not(any( target_arch = "aarch64", target_arch = "arm", target_arch = "x86", target_arch = "x86_64" )))] use fallback::ChaCha20_ctr32; ChaCha20_ctr32(self, counter, in_out, src); } #[inline] pub(super) fn words_less_safe(&self) -> &[u32; KEY_LEN / 4] { &self.words } } /// Counter || Nonce, all native endian. #[repr(transparent)] pub struct Counter([u32; 4]); impl Counter { pub fn zero(nonce: Nonce) -> Self { Self::from_nonce_and_ctr(nonce, 0) } fn from_nonce_and_ctr(nonce: Nonce, ctr: u32) -> Self { let nonce = nonce.as_ref().chunks_fixed(); Self([ ctr, u32::from_le_bytes(nonce[0]), u32::from_le_bytes(nonce[1]), u32::from_le_bytes(nonce[2]), ]) } pub fn increment(&mut self) -> Iv { let iv = Iv(self.0); self.0[0] += 1; iv } /// This is "less safe" because it hands off management of the counter to /// the caller. #[cfg(any( test, not(any( target_arch = "aarch64", target_arch = "arm", target_arch = "x86", target_arch = "x86_64" )) ))] fn into_words_less_safe(self) -> [u32; 4] { self.0 } } /// The IV for a single block encryption. /// /// Intentionally not `Clone` to ensure each is used only once. pub struct Iv([u32; 4]); impl Iv { fn assume_unique_for_key(value: [u8; 16]) -> Self { let value: &[[u8; 4]; 4] = value.chunks_fixed(); Self(value.map(u32::from_le_bytes)) } fn into_counter_for_single_block_less_safe(self) -> Counter { Counter(self.0) } } pub const KEY_LEN: usize = 32; const BLOCK_LEN: usize = 64; #[cfg(test)] mod tests { extern crate alloc; use super::*; use crate::test; use alloc::vec; const MAX_ALIGNMENT_AND_OFFSET: (usize, usize) = (15, 259); const MAX_ALIGNMENT_AND_OFFSET_SUBSET: (usize, usize) = if cfg!(any(debug_assertions = "false", feature = "slow_tests")) { MAX_ALIGNMENT_AND_OFFSET } else { (0, 0) }; #[test] fn chacha20_test_default() { // Always use `MAX_OFFSET` if we hav assembly code. let max_offset = if cfg!(any( target_arch = "aarch64", target_arch = "arm", target_arch = "x86", target_arch = "x86_64" )) { MAX_ALIGNMENT_AND_OFFSET } else { MAX_ALIGNMENT_AND_OFFSET_SUBSET }; chacha20_test(max_offset, Key::encrypt_within); } // Smoketest the fallback implementation. #[test] fn chacha20_test_fallback() { chacha20_test(MAX_ALIGNMENT_AND_OFFSET_SUBSET, fallback::ChaCha20_ctr32); } // Verifies the encryption is successful when done on overlapping buffers. // // On some branches of the 32-bit x86 and ARM assembly code the in-place // operation fails in some situations where the input/output buffers are // not exactly overlapping. Such failures are dependent not only on the // degree of overlapping but also the length of the data. `encrypt_within` // works around that. fn chacha20_test( max_alignment_and_offset: (usize, usize), f: impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom), ) { // Reuse a buffer to avoid slowing down the tests with allocations. let mut buf = vec![0u8; 1300]; test::run(test_file!("chacha_tests.txt"), move |section, test_case| { assert_eq!(section, ""); let key = test_case.consume_bytes("Key"); let key: &[u8; KEY_LEN] = key.as_slice().try_into()?; let key = Key::new(*key, cpu::features()); let ctr = test_case.consume_usize("Ctr"); let nonce = test_case.consume_bytes("Nonce"); let input = test_case.consume_bytes("Input"); let output = test_case.consume_bytes("Output"); // Run the test case over all prefixes of the input because the // behavior of ChaCha20 implementation changes dependent on the // length of the input. for len in 0..=input.len() { chacha20_test_case_inner( &key, &nonce, ctr as u32, &input[..len], &output[..len], &mut buf, max_alignment_and_offset, &f, ); } Ok(()) }); } fn chacha20_test_case_inner( key: &Key, nonce: &[u8], ctr: u32, input: &[u8], expected: &[u8], buf: &mut [u8], (max_alignment, max_offset): (usize, usize), f: &impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom), ) { const ARBITRARY: u8 = 123; for alignment in 0..=max_alignment { buf[..alignment].fill(ARBITRARY); let buf = &mut buf[alignment..]; for offset in 0..=max_offset { let buf = &mut buf[..(offset + input.len())]; buf[..offset].fill(ARBITRARY); let src = offset..; buf[src.clone()].copy_from_slice(input); let ctr = Counter::from_nonce_and_ctr( Nonce::try_assume_unique_for_key(nonce).unwrap(), ctr, ); f(key, ctr, buf, src); assert_eq!(&buf[..input.len()], expected) } } } }