1 // Copyright 2018 Brian Smith.
2 //
3 // Permission to use, copy, modify, and/or distribute this software for any
4 // purpose with or without fee is hereby granted, provided that the above
5 // copyright notice and this permission notice appear in all copies.
6 //
7 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
10 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15 use super::{
16 block::{Block, BLOCK_LEN},
17 Aad,
18 };
19 use crate::cpu;
20 use core::ops::BitXorAssign;
21
22 #[cfg(not(target_arch = "aarch64"))]
23 mod gcm_nohw;
24
25 #[derive(Clone)]
26 pub struct Key {
27 h_table: HTable,
28 cpu_features: cpu::Features,
29 }
30
31 impl Key {
new(h_be: Block, cpu_features: cpu::Features) -> Self32 pub(super) fn new(h_be: Block, cpu_features: cpu::Features) -> Self {
33 let h: [u64; 2] = h_be.into();
34
35 let mut key = Self {
36 h_table: HTable {
37 Htable: [u128 { hi: 0, lo: 0 }; HTABLE_LEN],
38 },
39 cpu_features,
40 };
41 let h_table = &mut key.h_table;
42
43 match detect_implementation(cpu_features) {
44 #[cfg(target_arch = "x86_64")]
45 Implementation::CLMUL if has_avx_movbe(cpu_features) => {
46 prefixed_extern! {
47 fn gcm_init_avx(HTable: &mut HTable, h: &[u64; 2]);
48 }
49 unsafe {
50 gcm_init_avx(h_table, &h);
51 }
52 }
53
54 #[cfg(any(
55 target_arch = "aarch64",
56 target_arch = "arm",
57 target_arch = "x86_64",
58 target_arch = "x86"
59 ))]
60 Implementation::CLMUL => {
61 prefixed_extern! {
62 fn gcm_init_clmul(Htable: &mut HTable, h: &[u64; 2]);
63 }
64 unsafe {
65 gcm_init_clmul(h_table, &h);
66 }
67 }
68
69 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
70 Implementation::NEON => {
71 prefixed_extern! {
72 fn gcm_init_neon(Htable: &mut HTable, h: &[u64; 2]);
73 }
74 unsafe {
75 gcm_init_neon(h_table, &h);
76 }
77 }
78
79 #[cfg(not(target_arch = "aarch64"))]
80 Implementation::Fallback => {
81 h_table.Htable[0] = gcm_nohw::init(h);
82 }
83 }
84
85 key
86 }
87 }
88
89 pub struct Context {
90 inner: ContextInner,
91 cpu_features: cpu::Features,
92 }
93
94 impl Context {
new(key: &Key, aad: Aad<&[u8]>) -> Self95 pub(crate) fn new(key: &Key, aad: Aad<&[u8]>) -> Self {
96 let mut ctx = Self {
97 inner: ContextInner {
98 Xi: Xi(Block::zero()),
99 Htable: key.h_table.clone(),
100 },
101 cpu_features: key.cpu_features,
102 };
103
104 for ad in aad.0.chunks(BLOCK_LEN) {
105 let mut block = Block::zero();
106 block.overwrite_part_at(0, ad);
107 ctx.update_block(block);
108 }
109
110 ctx
111 }
112
113 /// Access to `inner` for the integrated AES-GCM implementations only.
114 #[cfg(target_arch = "x86_64")]
115 #[inline]
inner(&mut self) -> (&HTable, &mut Xi)116 pub(super) fn inner(&mut self) -> (&HTable, &mut Xi) {
117 (&self.inner.Htable, &mut self.inner.Xi)
118 }
119
update_blocks(&mut self, input: &[u8])120 pub fn update_blocks(&mut self, input: &[u8]) {
121 // Th assembly functions take the input length in bytes, not blocks.
122 let input_bytes = input.len();
123
124 debug_assert_eq!(input_bytes % BLOCK_LEN, 0);
125 debug_assert!(input_bytes > 0);
126
127 let input = input.as_ptr() as *const [u8; BLOCK_LEN];
128 // SAFETY:
129 // - `[[u8; BLOCK_LEN]]` has the same bit validity as `[u8]`.
130 // - `[[u8; BLOCK_LEN]]` has the same alignment requirement as `[u8]`.
131 // - `input_bytes / BLOCK_LEN` ensures that the total length in bytes of
132 // the new `[[u8; BLOCK_LEN]]` will not be longer than the original
133 // `[u8]`.
134 let input = unsafe { core::slice::from_raw_parts(input, input_bytes / BLOCK_LEN) };
135
136 let xi = &mut self.inner.Xi;
137 let h_table = &self.inner.Htable;
138
139 match detect_implementation(self.cpu_features) {
140 #[cfg(target_arch = "x86_64")]
141 Implementation::CLMUL if has_avx_movbe(self.cpu_features) => {
142 prefixed_extern! {
143 fn gcm_ghash_avx(
144 xi: &mut Xi,
145 Htable: &HTable,
146 inp: *const [u8; BLOCK_LEN],
147 len: crate::c::size_t,
148 );
149 }
150 unsafe {
151 gcm_ghash_avx(xi, h_table, input.as_ptr(), input_bytes);
152 }
153 }
154
155 #[cfg(any(
156 target_arch = "aarch64",
157 target_arch = "arm",
158 target_arch = "x86_64",
159 target_arch = "x86"
160 ))]
161 Implementation::CLMUL => {
162 prefixed_extern! {
163 fn gcm_ghash_clmul(
164 xi: &mut Xi,
165 Htable: &HTable,
166 inp: *const [u8; BLOCK_LEN],
167 len: crate::c::size_t,
168 );
169 }
170 unsafe {
171 gcm_ghash_clmul(xi, h_table, input.as_ptr(), input_bytes);
172 }
173 }
174
175 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
176 Implementation::NEON => {
177 prefixed_extern! {
178 fn gcm_ghash_neon(
179 xi: &mut Xi,
180 Htable: &HTable,
181 inp: *const [u8; BLOCK_LEN],
182 len: crate::c::size_t,
183 );
184 }
185 unsafe {
186 gcm_ghash_neon(xi, h_table, input.as_ptr(), input_bytes);
187 }
188 }
189
190 #[cfg(not(target_arch = "aarch64"))]
191 Implementation::Fallback => {
192 gcm_nohw::ghash(xi, h_table.Htable[0], input);
193 }
194 }
195 }
196
update_block(&mut self, a: Block)197 pub fn update_block(&mut self, a: Block) {
198 self.inner.Xi.bitxor_assign(a);
199
200 // Although these functions take `Xi` and `h_table` as separate
201 // parameters, one or more of them might assume that they are part of
202 // the same `ContextInner` structure.
203 let xi = &mut self.inner.Xi;
204 let h_table = &self.inner.Htable;
205
206 match detect_implementation(self.cpu_features) {
207 #[cfg(any(
208 target_arch = "aarch64",
209 target_arch = "arm",
210 target_arch = "x86_64",
211 target_arch = "x86"
212 ))]
213 Implementation::CLMUL => {
214 prefixed_extern! {
215 fn gcm_gmult_clmul(xi: &mut Xi, Htable: &HTable);
216 }
217 unsafe {
218 gcm_gmult_clmul(xi, h_table);
219 }
220 }
221
222 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
223 Implementation::NEON => {
224 prefixed_extern! {
225 fn gcm_gmult_neon(xi: &mut Xi, Htable: &HTable);
226 }
227 unsafe {
228 gcm_gmult_neon(xi, h_table);
229 }
230 }
231
232 #[cfg(not(target_arch = "aarch64"))]
233 Implementation::Fallback => {
234 gcm_nohw::gmult(xi, h_table.Htable[0]);
235 }
236 }
237 }
238
pre_finish<F>(self, f: F) -> super::Tag where F: FnOnce(Block) -> super::Tag,239 pub(super) fn pre_finish<F>(self, f: F) -> super::Tag
240 where
241 F: FnOnce(Block) -> super::Tag,
242 {
243 f(self.inner.Xi.0)
244 }
245
246 #[cfg(target_arch = "x86_64")]
is_avx2(&self) -> bool247 pub(super) fn is_avx2(&self) -> bool {
248 match detect_implementation(self.cpu_features) {
249 Implementation::CLMUL => has_avx_movbe(self.cpu_features),
250 _ => false,
251 }
252 }
253 }
254
255 // The alignment is required by non-Rust code that uses `GCM128_CONTEXT`.
256 #[derive(Clone)]
257 #[repr(C, align(16))]
258 pub(super) struct HTable {
259 Htable: [u128; HTABLE_LEN],
260 }
261
262 #[derive(Clone, Copy)]
263 #[repr(C)]
264 struct u128 {
265 hi: u64,
266 lo: u64,
267 }
268
269 const HTABLE_LEN: usize = 16;
270
271 #[repr(transparent)]
272 pub struct Xi(Block);
273
274 impl BitXorAssign<Block> for Xi {
275 #[inline]
bitxor_assign(&mut self, a: Block)276 fn bitxor_assign(&mut self, a: Block) {
277 self.0 ^= a;
278 }
279 }
280
281 impl From<Xi> for Block {
282 #[inline]
from(Xi(block): Xi) -> Self283 fn from(Xi(block): Xi) -> Self {
284 block
285 }
286 }
287
288 // This corresponds roughly to the `GCM128_CONTEXT` structure in BoringSSL.
289 // Some assembly language code, in particular the MOVEBE+AVX2 X86-64
290 // implementation, requires this exact layout.
291 #[repr(C, align(16))]
292 struct ContextInner {
293 Xi: Xi,
294 Htable: HTable,
295 }
296
297 enum Implementation {
298 #[cfg(any(
299 target_arch = "aarch64",
300 target_arch = "arm",
301 target_arch = "x86_64",
302 target_arch = "x86"
303 ))]
304 CLMUL,
305
306 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
307 NEON,
308
309 #[cfg(not(target_arch = "aarch64"))]
310 Fallback,
311 }
312
313 #[inline]
detect_implementation(cpu_features: cpu::Features) -> Implementation314 fn detect_implementation(cpu_features: cpu::Features) -> Implementation {
315 // `cpu_features` is only used for specific platforms.
316 #[cfg(not(any(
317 target_arch = "aarch64",
318 target_arch = "arm",
319 target_arch = "x86_64",
320 target_arch = "x86"
321 )))]
322 let _cpu_features = cpu_features;
323
324 #[cfg(any(
325 target_arch = "aarch64",
326 target_arch = "arm",
327 target_arch = "x86_64",
328 target_arch = "x86"
329 ))]
330 {
331 if (cpu::intel::FXSR.available(cpu_features)
332 && cpu::intel::PCLMULQDQ.available(cpu_features))
333 || cpu::arm::PMULL.available(cpu_features)
334 {
335 return Implementation::CLMUL;
336 }
337 }
338
339 #[cfg(target_arch = "arm")]
340 {
341 if cpu::arm::NEON.available(cpu_features) {
342 return Implementation::NEON;
343 }
344 }
345
346 #[cfg(target_arch = "aarch64")]
347 {
348 return Implementation::NEON;
349 }
350
351 #[cfg(not(target_arch = "aarch64"))]
352 Implementation::Fallback
353 }
354
355 #[cfg(target_arch = "x86_64")]
has_avx_movbe(cpu_features: cpu::Features) -> bool356 fn has_avx_movbe(cpu_features: cpu::Features) -> bool {
357 cpu::intel::AVX.available(cpu_features) && cpu::intel::MOVBE.available(cpu_features)
358 }
359