1 // Copyright 2018 Brian Smith.
2 //
3 // Permission to use, copy, modify, and/or distribute this software for any
4 // purpose with or without fee is hereby granted, provided that the above
5 // copyright notice and this permission notice appear in all copies.
6 //
7 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
10 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14 
15 use super::{
16     block::{Block, BLOCK_LEN},
17     Aad,
18 };
19 use crate::cpu;
20 use core::ops::BitXorAssign;
21 
22 #[cfg(not(target_arch = "aarch64"))]
23 mod gcm_nohw;
24 
25 #[derive(Clone)]
26 pub struct Key {
27     h_table: HTable,
28     cpu_features: cpu::Features,
29 }
30 
31 impl Key {
new(h_be: Block, cpu_features: cpu::Features) -> Self32     pub(super) fn new(h_be: Block, cpu_features: cpu::Features) -> Self {
33         let h: [u64; 2] = h_be.into();
34 
35         let mut key = Self {
36             h_table: HTable {
37                 Htable: [u128 { hi: 0, lo: 0 }; HTABLE_LEN],
38             },
39             cpu_features,
40         };
41         let h_table = &mut key.h_table;
42 
43         match detect_implementation(cpu_features) {
44             #[cfg(target_arch = "x86_64")]
45             Implementation::CLMUL if has_avx_movbe(cpu_features) => {
46                 prefixed_extern! {
47                     fn gcm_init_avx(HTable: &mut HTable, h: &[u64; 2]);
48                 }
49                 unsafe {
50                     gcm_init_avx(h_table, &h);
51                 }
52             }
53 
54             #[cfg(any(
55                 target_arch = "aarch64",
56                 target_arch = "arm",
57                 target_arch = "x86_64",
58                 target_arch = "x86"
59             ))]
60             Implementation::CLMUL => {
61                 prefixed_extern! {
62                     fn gcm_init_clmul(Htable: &mut HTable, h: &[u64; 2]);
63                 }
64                 unsafe {
65                     gcm_init_clmul(h_table, &h);
66                 }
67             }
68 
69             #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
70             Implementation::NEON => {
71                 prefixed_extern! {
72                     fn gcm_init_neon(Htable: &mut HTable, h: &[u64; 2]);
73                 }
74                 unsafe {
75                     gcm_init_neon(h_table, &h);
76                 }
77             }
78 
79             #[cfg(not(target_arch = "aarch64"))]
80             Implementation::Fallback => {
81                 h_table.Htable[0] = gcm_nohw::init(h);
82             }
83         }
84 
85         key
86     }
87 }
88 
89 pub struct Context {
90     inner: ContextInner,
91     cpu_features: cpu::Features,
92 }
93 
94 impl Context {
new(key: &Key, aad: Aad<&[u8]>) -> Self95     pub(crate) fn new(key: &Key, aad: Aad<&[u8]>) -> Self {
96         let mut ctx = Self {
97             inner: ContextInner {
98                 Xi: Xi(Block::zero()),
99                 Htable: key.h_table.clone(),
100             },
101             cpu_features: key.cpu_features,
102         };
103 
104         for ad in aad.0.chunks(BLOCK_LEN) {
105             let mut block = Block::zero();
106             block.overwrite_part_at(0, ad);
107             ctx.update_block(block);
108         }
109 
110         ctx
111     }
112 
113     /// Access to `inner` for the integrated AES-GCM implementations only.
114     #[cfg(target_arch = "x86_64")]
115     #[inline]
inner(&mut self) -> (&HTable, &mut Xi)116     pub(super) fn inner(&mut self) -> (&HTable, &mut Xi) {
117         (&self.inner.Htable, &mut self.inner.Xi)
118     }
119 
update_blocks(&mut self, input: &[u8])120     pub fn update_blocks(&mut self, input: &[u8]) {
121         // Th assembly functions take the input length in bytes, not blocks.
122         let input_bytes = input.len();
123 
124         debug_assert_eq!(input_bytes % BLOCK_LEN, 0);
125         debug_assert!(input_bytes > 0);
126 
127         let input = input.as_ptr() as *const [u8; BLOCK_LEN];
128         // SAFETY:
129         // - `[[u8; BLOCK_LEN]]` has the same bit validity as `[u8]`.
130         // - `[[u8; BLOCK_LEN]]` has the same alignment requirement as `[u8]`.
131         // - `input_bytes / BLOCK_LEN` ensures that the total length in bytes of
132         //   the new `[[u8; BLOCK_LEN]]` will not be longer than the original
133         //   `[u8]`.
134         let input = unsafe { core::slice::from_raw_parts(input, input_bytes / BLOCK_LEN) };
135 
136         let xi = &mut self.inner.Xi;
137         let h_table = &self.inner.Htable;
138 
139         match detect_implementation(self.cpu_features) {
140             #[cfg(target_arch = "x86_64")]
141             Implementation::CLMUL if has_avx_movbe(self.cpu_features) => {
142                 prefixed_extern! {
143                     fn gcm_ghash_avx(
144                         xi: &mut Xi,
145                         Htable: &HTable,
146                         inp: *const [u8; BLOCK_LEN],
147                         len: crate::c::size_t,
148                     );
149                 }
150                 unsafe {
151                     gcm_ghash_avx(xi, h_table, input.as_ptr(), input_bytes);
152                 }
153             }
154 
155             #[cfg(any(
156                 target_arch = "aarch64",
157                 target_arch = "arm",
158                 target_arch = "x86_64",
159                 target_arch = "x86"
160             ))]
161             Implementation::CLMUL => {
162                 prefixed_extern! {
163                     fn gcm_ghash_clmul(
164                         xi: &mut Xi,
165                         Htable: &HTable,
166                         inp: *const [u8; BLOCK_LEN],
167                         len: crate::c::size_t,
168                     );
169                 }
170                 unsafe {
171                     gcm_ghash_clmul(xi, h_table, input.as_ptr(), input_bytes);
172                 }
173             }
174 
175             #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
176             Implementation::NEON => {
177                 prefixed_extern! {
178                     fn gcm_ghash_neon(
179                         xi: &mut Xi,
180                         Htable: &HTable,
181                         inp: *const [u8; BLOCK_LEN],
182                         len: crate::c::size_t,
183                     );
184                 }
185                 unsafe {
186                     gcm_ghash_neon(xi, h_table, input.as_ptr(), input_bytes);
187                 }
188             }
189 
190             #[cfg(not(target_arch = "aarch64"))]
191             Implementation::Fallback => {
192                 gcm_nohw::ghash(xi, h_table.Htable[0], input);
193             }
194         }
195     }
196 
update_block(&mut self, a: Block)197     pub fn update_block(&mut self, a: Block) {
198         self.inner.Xi.bitxor_assign(a);
199 
200         // Although these functions take `Xi` and `h_table` as separate
201         // parameters, one or more of them might assume that they are part of
202         // the same `ContextInner` structure.
203         let xi = &mut self.inner.Xi;
204         let h_table = &self.inner.Htable;
205 
206         match detect_implementation(self.cpu_features) {
207             #[cfg(any(
208                 target_arch = "aarch64",
209                 target_arch = "arm",
210                 target_arch = "x86_64",
211                 target_arch = "x86"
212             ))]
213             Implementation::CLMUL => {
214                 prefixed_extern! {
215                     fn gcm_gmult_clmul(xi: &mut Xi, Htable: &HTable);
216                 }
217                 unsafe {
218                     gcm_gmult_clmul(xi, h_table);
219                 }
220             }
221 
222             #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
223             Implementation::NEON => {
224                 prefixed_extern! {
225                     fn gcm_gmult_neon(xi: &mut Xi, Htable: &HTable);
226                 }
227                 unsafe {
228                     gcm_gmult_neon(xi, h_table);
229                 }
230             }
231 
232             #[cfg(not(target_arch = "aarch64"))]
233             Implementation::Fallback => {
234                 gcm_nohw::gmult(xi, h_table.Htable[0]);
235             }
236         }
237     }
238 
pre_finish<F>(self, f: F) -> super::Tag where F: FnOnce(Block) -> super::Tag,239     pub(super) fn pre_finish<F>(self, f: F) -> super::Tag
240     where
241         F: FnOnce(Block) -> super::Tag,
242     {
243         f(self.inner.Xi.0)
244     }
245 
246     #[cfg(target_arch = "x86_64")]
is_avx2(&self) -> bool247     pub(super) fn is_avx2(&self) -> bool {
248         match detect_implementation(self.cpu_features) {
249             Implementation::CLMUL => has_avx_movbe(self.cpu_features),
250             _ => false,
251         }
252     }
253 }
254 
255 // The alignment is required by non-Rust code that uses `GCM128_CONTEXT`.
256 #[derive(Clone)]
257 #[repr(C, align(16))]
258 pub(super) struct HTable {
259     Htable: [u128; HTABLE_LEN],
260 }
261 
262 #[derive(Clone, Copy)]
263 #[repr(C)]
264 struct u128 {
265     hi: u64,
266     lo: u64,
267 }
268 
269 const HTABLE_LEN: usize = 16;
270 
271 #[repr(transparent)]
272 pub struct Xi(Block);
273 
274 impl BitXorAssign<Block> for Xi {
275     #[inline]
bitxor_assign(&mut self, a: Block)276     fn bitxor_assign(&mut self, a: Block) {
277         self.0 ^= a;
278     }
279 }
280 
281 impl From<Xi> for Block {
282     #[inline]
from(Xi(block): Xi) -> Self283     fn from(Xi(block): Xi) -> Self {
284         block
285     }
286 }
287 
288 // This corresponds roughly to the `GCM128_CONTEXT` structure in BoringSSL.
289 // Some assembly language code, in particular the MOVEBE+AVX2 X86-64
290 // implementation, requires this exact layout.
291 #[repr(C, align(16))]
292 struct ContextInner {
293     Xi: Xi,
294     Htable: HTable,
295 }
296 
297 enum Implementation {
298     #[cfg(any(
299         target_arch = "aarch64",
300         target_arch = "arm",
301         target_arch = "x86_64",
302         target_arch = "x86"
303     ))]
304     CLMUL,
305 
306     #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
307     NEON,
308 
309     #[cfg(not(target_arch = "aarch64"))]
310     Fallback,
311 }
312 
313 #[inline]
detect_implementation(cpu_features: cpu::Features) -> Implementation314 fn detect_implementation(cpu_features: cpu::Features) -> Implementation {
315     // `cpu_features` is only used for specific platforms.
316     #[cfg(not(any(
317         target_arch = "aarch64",
318         target_arch = "arm",
319         target_arch = "x86_64",
320         target_arch = "x86"
321     )))]
322     let _cpu_features = cpu_features;
323 
324     #[cfg(any(
325         target_arch = "aarch64",
326         target_arch = "arm",
327         target_arch = "x86_64",
328         target_arch = "x86"
329     ))]
330     {
331         if (cpu::intel::FXSR.available(cpu_features)
332             && cpu::intel::PCLMULQDQ.available(cpu_features))
333             || cpu::arm::PMULL.available(cpu_features)
334         {
335             return Implementation::CLMUL;
336         }
337     }
338 
339     #[cfg(target_arch = "arm")]
340     {
341         if cpu::arm::NEON.available(cpu_features) {
342             return Implementation::NEON;
343         }
344     }
345 
346     #[cfg(target_arch = "aarch64")]
347     {
348         return Implementation::NEON;
349     }
350 
351     #[cfg(not(target_arch = "aarch64"))]
352     Implementation::Fallback
353 }
354 
355 #[cfg(target_arch = "x86_64")]
has_avx_movbe(cpu_features: cpu::Features) -> bool356 fn has_avx_movbe(cpu_features: cpu::Features) -> bool {
357     cpu::intel::AVX.available(cpu_features) && cpu::intel::MOVBE.available(cpu_features)
358 }
359