1 // Copyright 2016 Brian Smith.
2 // Portions Copyright (c) 2016, Google Inc.
3 //
4 // Permission to use, copy, modify, and/or distribute this software for any
5 // purpose with or without fee is hereby granted, provided that the above
6 // copyright notice and this permission notice appear in all copies.
7 //
8 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
9 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
11 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
13 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
14 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 
16 use super::{quic::Sample, Nonce};
17 use crate::{cpu, polyfill::ChunksFixed};
18 
19 #[cfg(any(
20     test,
21     not(any(
22         target_arch = "aarch64",
23         target_arch = "arm",
24         target_arch = "x86",
25         target_arch = "x86_64"
26     ))
27 ))]
28 mod fallback;
29 
30 use core::ops::RangeFrom;
31 
32 #[derive(Clone)]
33 pub struct Key {
34     words: [u32; KEY_LEN / 4],
35     cpu_features: cpu::Features,
36 }
37 
38 impl Key {
new(value: [u8; KEY_LEN], cpu_features: cpu::Features) -> Self39     pub(super) fn new(value: [u8; KEY_LEN], cpu_features: cpu::Features) -> Self {
40         let value: &[[u8; 4]; KEY_LEN / 4] = value.chunks_fixed();
41         Self {
42             words: value.map(u32::from_le_bytes),
43             cpu_features,
44         }
45     }
46 
cpu_features(&self) -> cpu::Features47     pub(super) fn cpu_features(&self) -> cpu::Features {
48         self.cpu_features
49     }
50 }
51 
52 impl Key {
53     #[inline]
encrypt_in_place(&self, counter: Counter, in_out: &mut [u8])54     pub fn encrypt_in_place(&self, counter: Counter, in_out: &mut [u8]) {
55         self.encrypt_less_safe(counter, in_out, 0..);
56     }
57 
58     #[inline]
encrypt_iv_xor_in_place(&self, iv: Iv, in_out: &mut [u8; 32])59     pub fn encrypt_iv_xor_in_place(&self, iv: Iv, in_out: &mut [u8; 32]) {
60         // It is safe to use `into_counter_for_single_block_less_safe()`
61         // because `in_out` is exactly one block long.
62         debug_assert!(in_out.len() <= BLOCK_LEN);
63         self.encrypt_less_safe(iv.into_counter_for_single_block_less_safe(), in_out, 0..);
64     }
65 
66     #[inline]
new_mask(&self, sample: Sample) -> [u8; 5]67     pub fn new_mask(&self, sample: Sample) -> [u8; 5] {
68         let mut out: [u8; 5] = [0; 5];
69         let iv = Iv::assume_unique_for_key(sample);
70 
71         debug_assert!(out.len() <= BLOCK_LEN);
72         self.encrypt_less_safe(iv.into_counter_for_single_block_less_safe(), &mut out, 0..);
73 
74         out
75     }
76 
77     /// Analogous to `slice::copy_within()`.
encrypt_within(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>)78     pub fn encrypt_within(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>) {
79         // XXX: The x86 and at least one branch of the ARM assembly language
80         // code doesn't allow overlapping input and output unless they are
81         // exactly overlapping. TODO: Figure out which branch of the ARM code
82         // has this limitation and come up with a better solution.
83         //
84         // https://rt.openssl.org/Ticket/Display.html?id=4362
85         if cfg!(any(target_arch = "arm", target_arch = "x86")) && src.start != 0 {
86             let len = in_out.len() - src.start;
87             in_out.copy_within(src, 0);
88             self.encrypt_in_place(counter, &mut in_out[..len]);
89         } else {
90             self.encrypt_less_safe(counter, in_out, src);
91         }
92     }
93 
94     /// This is "less safe" because it skips the important check that `encrypt_within` does.
95     /// Only call this with `src` equal to `0..` or from `encrypt_within`.
96     #[inline]
encrypt_less_safe(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>)97     fn encrypt_less_safe(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>) {
98         #[cfg(any(
99             target_arch = "aarch64",
100             target_arch = "arm",
101             target_arch = "x86",
102             target_arch = "x86_64"
103         ))]
104         #[inline(always)]
105         pub(super) fn ChaCha20_ctr32(
106             key: &Key,
107             counter: Counter,
108             in_out: &mut [u8],
109             src: RangeFrom<usize>,
110         ) {
111             let in_out_len = in_out.len().checked_sub(src.start).unwrap();
112 
113             // There's no need to worry if `counter` is incremented because it is
114             // owned here and we drop immediately after the call.
115             prefixed_extern! {
116                 fn ChaCha20_ctr32(
117                     out: *mut u8,
118                     in_: *const u8,
119                     in_len: crate::c::size_t,
120                     key: &[u32; KEY_LEN / 4],
121                     counter: &Counter,
122                 );
123             }
124             unsafe {
125                 ChaCha20_ctr32(
126                     in_out.as_mut_ptr(),
127                     in_out[src].as_ptr(),
128                     in_out_len,
129                     key.words_less_safe(),
130                     &counter,
131                 )
132             }
133         }
134 
135         #[cfg(not(any(
136             target_arch = "aarch64",
137             target_arch = "arm",
138             target_arch = "x86",
139             target_arch = "x86_64"
140         )))]
141         use fallback::ChaCha20_ctr32;
142 
143         ChaCha20_ctr32(self, counter, in_out, src);
144     }
145 
146     #[inline]
words_less_safe(&self) -> &[u32; KEY_LEN / 4]147     pub(super) fn words_less_safe(&self) -> &[u32; KEY_LEN / 4] {
148         &self.words
149     }
150 }
151 
152 /// Counter || Nonce, all native endian.
153 #[repr(transparent)]
154 pub struct Counter([u32; 4]);
155 
156 impl Counter {
zero(nonce: Nonce) -> Self157     pub fn zero(nonce: Nonce) -> Self {
158         Self::from_nonce_and_ctr(nonce, 0)
159     }
160 
from_nonce_and_ctr(nonce: Nonce, ctr: u32) -> Self161     fn from_nonce_and_ctr(nonce: Nonce, ctr: u32) -> Self {
162         let nonce = nonce.as_ref().chunks_fixed();
163         Self([
164             ctr,
165             u32::from_le_bytes(nonce[0]),
166             u32::from_le_bytes(nonce[1]),
167             u32::from_le_bytes(nonce[2]),
168         ])
169     }
170 
increment(&mut self) -> Iv171     pub fn increment(&mut self) -> Iv {
172         let iv = Iv(self.0);
173         self.0[0] += 1;
174         iv
175     }
176 
177     /// This is "less safe" because it hands off management of the counter to
178     /// the caller.
179     #[cfg(any(
180         test,
181         not(any(
182             target_arch = "aarch64",
183             target_arch = "arm",
184             target_arch = "x86",
185             target_arch = "x86_64"
186         ))
187     ))]
into_words_less_safe(self) -> [u32; 4]188     fn into_words_less_safe(self) -> [u32; 4] {
189         self.0
190     }
191 }
192 
193 /// The IV for a single block encryption.
194 ///
195 /// Intentionally not `Clone` to ensure each is used only once.
196 pub struct Iv([u32; 4]);
197 
198 impl Iv {
assume_unique_for_key(value: [u8; 16]) -> Self199     fn assume_unique_for_key(value: [u8; 16]) -> Self {
200         let value: &[[u8; 4]; 4] = value.chunks_fixed();
201         Self(value.map(u32::from_le_bytes))
202     }
203 
into_counter_for_single_block_less_safe(self) -> Counter204     fn into_counter_for_single_block_less_safe(self) -> Counter {
205         Counter(self.0)
206     }
207 }
208 
209 pub const KEY_LEN: usize = 32;
210 
211 const BLOCK_LEN: usize = 64;
212 
213 #[cfg(test)]
214 mod tests {
215     extern crate alloc;
216 
217     use super::*;
218     use crate::test;
219     use alloc::vec;
220 
221     const MAX_ALIGNMENT_AND_OFFSET: (usize, usize) = (15, 259);
222     const MAX_ALIGNMENT_AND_OFFSET_SUBSET: (usize, usize) =
223         if cfg!(any(debug_assertions = "false", feature = "slow_tests")) {
224             MAX_ALIGNMENT_AND_OFFSET
225         } else {
226             (0, 0)
227         };
228 
229     #[test]
chacha20_test_default()230     fn chacha20_test_default() {
231         // Always use `MAX_OFFSET` if we hav assembly code.
232         let max_offset = if cfg!(any(
233             target_arch = "aarch64",
234             target_arch = "arm",
235             target_arch = "x86",
236             target_arch = "x86_64"
237         )) {
238             MAX_ALIGNMENT_AND_OFFSET
239         } else {
240             MAX_ALIGNMENT_AND_OFFSET_SUBSET
241         };
242         chacha20_test(max_offset, Key::encrypt_within);
243     }
244 
245     // Smoketest the fallback implementation.
246     #[test]
chacha20_test_fallback()247     fn chacha20_test_fallback() {
248         chacha20_test(MAX_ALIGNMENT_AND_OFFSET_SUBSET, fallback::ChaCha20_ctr32);
249     }
250 
251     // Verifies the encryption is successful when done on overlapping buffers.
252     //
253     // On some branches of the 32-bit x86 and ARM assembly code the in-place
254     // operation fails in some situations where the input/output buffers are
255     // not exactly overlapping. Such failures are dependent not only on the
256     // degree of overlapping but also the length of the data. `encrypt_within`
257     // works around that.
chacha20_test( max_alignment_and_offset: (usize, usize), f: impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>), )258     fn chacha20_test(
259         max_alignment_and_offset: (usize, usize),
260         f: impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>),
261     ) {
262         // Reuse a buffer to avoid slowing down the tests with allocations.
263         let mut buf = vec![0u8; 1300];
264 
265         test::run(test_file!("chacha_tests.txt"), move |section, test_case| {
266             assert_eq!(section, "");
267 
268             let key = test_case.consume_bytes("Key");
269             let key: &[u8; KEY_LEN] = key.as_slice().try_into()?;
270             let key = Key::new(*key, cpu::features());
271 
272             let ctr = test_case.consume_usize("Ctr");
273             let nonce = test_case.consume_bytes("Nonce");
274             let input = test_case.consume_bytes("Input");
275             let output = test_case.consume_bytes("Output");
276 
277             // Run the test case over all prefixes of the input because the
278             // behavior of ChaCha20 implementation changes dependent on the
279             // length of the input.
280             for len in 0..=input.len() {
281                 chacha20_test_case_inner(
282                     &key,
283                     &nonce,
284                     ctr as u32,
285                     &input[..len],
286                     &output[..len],
287                     &mut buf,
288                     max_alignment_and_offset,
289                     &f,
290                 );
291             }
292 
293             Ok(())
294         });
295     }
296 
chacha20_test_case_inner( key: &Key, nonce: &[u8], ctr: u32, input: &[u8], expected: &[u8], buf: &mut [u8], (max_alignment, max_offset): (usize, usize), f: &impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>), )297     fn chacha20_test_case_inner(
298         key: &Key,
299         nonce: &[u8],
300         ctr: u32,
301         input: &[u8],
302         expected: &[u8],
303         buf: &mut [u8],
304         (max_alignment, max_offset): (usize, usize),
305         f: &impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>),
306     ) {
307         const ARBITRARY: u8 = 123;
308 
309         for alignment in 0..=max_alignment {
310             buf[..alignment].fill(ARBITRARY);
311             let buf = &mut buf[alignment..];
312             for offset in 0..=max_offset {
313                 let buf = &mut buf[..(offset + input.len())];
314                 buf[..offset].fill(ARBITRARY);
315                 let src = offset..;
316                 buf[src.clone()].copy_from_slice(input);
317 
318                 let ctr = Counter::from_nonce_and_ctr(
319                     Nonce::try_assume_unique_for_key(nonce).unwrap(),
320                     ctr,
321                 );
322                 f(key, ctr, buf, src);
323                 assert_eq!(&buf[..input.len()], expected)
324             }
325         }
326     }
327 }
328