1 use crate::convert::*;
2 #[allow(unused)]
3 use zerocopy::transmute;
4 
5 ///This constant comes from Kunth's prng (Empirically it works better than those from splitmix32).
6 pub(crate) const MULTIPLE: u64 = 6364136223846793005;
7 
8 /// This is a constant with a lot of special properties found by automated search.
9 /// See the unit tests below. (Below are alternative values)
10 #[cfg(all(target_feature = "ssse3", not(miri)))]
11 const SHUFFLE_MASK: u128 = 0x020a0700_0c01030e_050f0d08_06090b04_u128;
12 //const SHUFFLE_MASK: u128 = 0x000d0702_0a040301_05080f0c_0e0b0609_u128;
13 //const SHUFFLE_MASK: u128 = 0x040A0700_030E0106_0D050F08_020B0C09_u128;
14 
15 #[inline(always)]
16 #[cfg(feature = "folded_multiply")]
folded_multiply(s: u64, by: u64) -> u6417 pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
18     let result = (s as u128).wrapping_mul(by as u128);
19     ((result & 0xffff_ffff_ffff_ffff) as u64) ^ ((result >> 64) as u64)
20 }
21 
22 #[inline(always)]
23 #[cfg(not(feature = "folded_multiply"))]
folded_multiply(s: u64, by: u64) -> u6424 pub(crate) const fn folded_multiply(s: u64, by: u64) -> u64 {
25     let b1 = s.wrapping_mul(by.swap_bytes());
26     let b2 = s.swap_bytes().wrapping_mul(!by);
27     b1 ^ b2.swap_bytes()
28 }
29 
30 /// Given a small (less than 8 byte slice) returns the same data stored in two u32s.
31 /// (order of and non-duplication of bytes is NOT guaranteed)
32 #[inline(always)]
read_small(data: &[u8]) -> [u64; 2]33 pub(crate) fn read_small(data: &[u8]) -> [u64; 2] {
34     debug_assert!(data.len() <= 8);
35     if data.len() >= 2 {
36         if data.len() >= 4 {
37             //len 4-8
38             [data.read_u32().0 as u64, data.read_last_u32() as u64]
39         } else {
40             //len 2-3
41             [data.read_u16().0 as u64, data[data.len() - 1] as u64]
42         }
43     } else {
44         if data.len() > 0 {
45             [data[0] as u64, data[0] as u64]
46         } else {
47             [0, 0]
48         }
49     }
50 }
51 
52 #[inline(always)]
shuffle(a: u128) -> u12853 pub(crate) fn shuffle(a: u128) -> u128 {
54     #[cfg(all(target_feature = "ssse3", not(miri)))]
55     {
56         #[cfg(target_arch = "x86")]
57         use core::arch::x86::*;
58         #[cfg(target_arch = "x86_64")]
59         use core::arch::x86_64::*;
60         unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(SHUFFLE_MASK))) }
61     }
62     #[cfg(not(all(target_feature = "ssse3", not(miri))))]
63     {
64         a.swap_bytes()
65     }
66 }
67 
68 #[allow(unused)] //not used by fallback
69 #[inline(always)]
add_and_shuffle(a: u128, b: u128) -> u12870 pub(crate) fn add_and_shuffle(a: u128, b: u128) -> u128 {
71     let sum = add_by_64s(a.convert(), b.convert());
72     shuffle(sum.convert())
73 }
74 
75 #[allow(unused)] //not used by fallback
76 #[inline(always)]
shuffle_and_add(base: u128, to_add: u128) -> u12877 pub(crate) fn shuffle_and_add(base: u128, to_add: u128) -> u128 {
78     let shuffled: [u64; 2] = shuffle(base).convert();
79     add_by_64s(shuffled, to_add.convert()).convert()
80 }
81 
82 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri)))]
83 #[inline(always)]
add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2]84 pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
85     unsafe {
86         #[cfg(target_arch = "x86")]
87         use core::arch::x86::*;
88         #[cfg(target_arch = "x86_64")]
89         use core::arch::x86_64::*;
90         transmute!(_mm_add_epi64(transmute!(a), transmute!(b)))
91     }
92 }
93 
94 #[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2", not(miri))))]
95 #[inline(always)]
add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2]96 pub(crate) fn add_by_64s(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
97     [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
98 }
99 
100 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
101 #[allow(unused)]
102 #[inline(always)]
aesenc(value: u128, xor: u128) -> u128103 pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
104     #[cfg(target_arch = "x86")]
105     use core::arch::x86::*;
106     #[cfg(target_arch = "x86_64")]
107     use core::arch::x86_64::*;
108     unsafe {
109         let value = transmute!(value);
110         transmute!(_mm_aesenc_si128(value, transmute!(xor)))
111     }
112 }
113 
114 #[cfg(any(
115     all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)),
116     all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)),
117 ))]
118 #[allow(unused)]
119 #[inline(always)]
aesenc(value: u128, xor: u128) -> u128120 pub(crate) fn aesenc(value: u128, xor: u128) -> u128 {
121     #[cfg(target_arch = "aarch64")]
122     use core::arch::aarch64::*;
123     #[cfg(target_arch = "arm")]
124     use core::arch::arm::*;
125     let res = unsafe { vaesmcq_u8(vaeseq_u8(transmute!(value), transmute!(0u128))) };
126     let value: u128 = transmute!(res);
127     xor ^ value
128 }
129 
130 #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "aes", not(miri)))]
131 #[allow(unused)]
132 #[inline(always)]
aesdec(value: u128, xor: u128) -> u128133 pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
134     #[cfg(target_arch = "x86")]
135     use core::arch::x86::*;
136     #[cfg(target_arch = "x86_64")]
137     use core::arch::x86_64::*;
138     unsafe {
139         let value = transmute!(value);
140         transmute!(_mm_aesdec_si128(value, transmute!(xor)))
141     }
142 }
143 
144 #[cfg(any(
145     all(feature = "nightly-arm-aes", target_arch = "aarch64", target_feature = "aes", not(miri)),
146     all(feature = "nightly-arm-aes", target_arch = "arm", target_feature = "aes", not(miri)),
147 ))]
148 #[allow(unused)]
149 #[inline(always)]
aesdec(value: u128, xor: u128) -> u128150 pub(crate) fn aesdec(value: u128, xor: u128) -> u128 {
151     #[cfg(target_arch = "aarch64")]
152     use core::arch::aarch64::*;
153     #[cfg(target_arch = "arm")]
154     use core::arch::arm::*;
155     let res = unsafe { vaesimcq_u8(vaesdq_u8(transmute!(value), transmute!(0u128))) };
156     let value: u128 = transmute!(res);
157     xor ^ value
158 }
159 
160 #[allow(unused)]
161 #[inline(always)]
add_in_length(enc: &mut u128, len: u64)162 pub(crate) fn add_in_length(enc: &mut u128, len: u64) {
163     #[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(miri)))]
164     {
165         #[cfg(target_arch = "x86_64")]
166         use core::arch::x86_64::*;
167 
168         unsafe {
169             let enc = enc as *mut u128;
170             let len = _mm_cvtsi64_si128(len as i64);
171             let data = _mm_loadu_si128(enc.cast());
172             let sum = _mm_add_epi64(data, len);
173             _mm_storeu_si128(enc.cast(), sum);
174         }
175     }
176     #[cfg(not(all(target_arch = "x86_64", target_feature = "sse2", not(miri))))]
177     {
178         let mut t: [u64; 2] = enc.convert();
179         t[0] = t[0].wrapping_add(len);
180         *enc = t.convert();
181     }
182 }
183 
184 #[cfg(test)]
185 mod test {
186     use super::*;
187 
188     // This is code to search for the shuffle constant
189     //
190     //thread_local! { static MASK: Cell<u128> = Cell::new(0); }
191     //
192     // fn shuffle(a: u128) -> u128 {
193     //     use std::intrinsics::transmute;
194     //     #[cfg(target_arch = "x86")]
195     //     use core::arch::x86::*;
196     //     #[cfg(target_arch = "x86_64")]
197     //     use core::arch::x86_64::*;
198     //     MASK.with(|mask| {
199     //         unsafe { transmute!(_mm_shuffle_epi8(transmute!(a), transmute!(mask.get()))) }
200     //     })
201     // }
202     //
203     // #[test]
204     // fn find_shuffle() {
205     //     use rand::prelude::*;
206     //     use SliceRandom;
207     //     use std::panic;
208     //     use std::io::Write;
209     //
210     //     let mut value: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 ,13, 14, 15];
211     //     let mut rand = thread_rng();
212     //     let mut successful_list = HashMap::new();
213     //     for _attempt in 0..10000000 {
214     //         rand.shuffle(&mut value);
215     //         let test_val = value.convert();
216     //         MASK.with(|mask| {
217     //             mask.set(test_val);
218     //         });
219     //         if let Ok(successful) = panic::catch_unwind(|| {
220     //             test_shuffle_does_not_collide_with_aes();
221     //             test_shuffle_moves_high_bits();
222     //             test_shuffle_moves_every_value();
223     //             //test_shuffle_does_not_loop();
224     //             value
225     //         }) {
226     //             let successful: u128 = successful.convert();
227     //             successful_list.insert(successful, iters_before_loop());
228     //         }
229     //     }
230     //     let write_file = File::create("/tmp/output").unwrap();
231     //     let mut writer = BufWriter::new(&write_file);
232     //
233     //     for success in successful_list {
234     //         writeln!(writer, "Found successful: {:x?} - {:?}", success.0, success.1);
235     //     }
236     // }
237     //
238     // fn iters_before_loop() -> u32 {
239     //     let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF;
240     //     let mut shuffled = shuffle(numbered);
241     //     let mut count = 0;
242     //     loop {
243     //         // println!("{:>16x}", shuffled);
244     //         if numbered == shuffled {
245     //             break;
246     //         }
247     //         count += 1;
248     //         shuffled = shuffle(shuffled);
249     //     }
250     //     count
251     // }
252 
253     #[cfg(all(
254         any(target_arch = "x86", target_arch = "x86_64"),
255         target_feature = "ssse3",
256         target_feature = "aes",
257         not(miri)
258     ))]
259     #[test]
test_shuffle_does_not_collide_with_aes()260     fn test_shuffle_does_not_collide_with_aes() {
261         let mut value: [u8; 16] = [0; 16];
262         let zero_mask_enc = aesenc(0, 0);
263         let zero_mask_dec = aesdec(0, 0);
264         for index in 0..16 {
265             value[index] = 1;
266             let excluded_positions_enc: [u8; 16] = aesenc(value.convert(), zero_mask_enc).convert();
267             let excluded_positions_dec: [u8; 16] = aesdec(value.convert(), zero_mask_dec).convert();
268             let actual_location: [u8; 16] = shuffle(value.convert()).convert();
269             for pos in 0..16 {
270                 if actual_location[pos] != 0 {
271                     assert_eq!(
272                         0, excluded_positions_enc[pos],
273                         "Forward Overlap between {:?} and {:?} at {}",
274                         excluded_positions_enc, actual_location, index
275                     );
276                     assert_eq!(
277                         0, excluded_positions_dec[pos],
278                         "Reverse Overlap between {:?} and {:?} at {}",
279                         excluded_positions_dec, actual_location, index
280                     );
281                 }
282             }
283             value[index] = 0;
284         }
285     }
286 
287     #[test]
test_shuffle_contains_each_value()288     fn test_shuffle_contains_each_value() {
289         let value: [u8; 16] = 0x00010203_04050607_08090A0B_0C0D0E0F_u128.convert();
290         let shuffled: [u8; 16] = shuffle(value.convert()).convert();
291         for index in 0..16_u8 {
292             assert!(shuffled.contains(&index), "Value is missing {}", index);
293         }
294     }
295 
296     #[test]
test_shuffle_moves_every_value()297     fn test_shuffle_moves_every_value() {
298         let mut value: [u8; 16] = [0; 16];
299         for index in 0..16 {
300             value[index] = 1;
301             let shuffled: [u8; 16] = shuffle(value.convert()).convert();
302             assert_eq!(0, shuffled[index], "Value is not moved {}", index);
303             value[index] = 0;
304         }
305     }
306 
307     #[test]
test_shuffle_moves_high_bits()308     fn test_shuffle_moves_high_bits() {
309         assert!(
310             shuffle(1) > (1_u128 << 80),
311             "Low bits must be moved to other half {:?} -> {:?}",
312             0,
313             shuffle(1)
314         );
315 
316         assert!(
317             shuffle(1_u128 << 58) >= (1_u128 << 64),
318             "High bits must be moved to other half {:?} -> {:?}",
319             7,
320             shuffle(1_u128 << 58)
321         );
322         assert!(
323             shuffle(1_u128 << 58) < (1_u128 << 112),
324             "High bits must not remain high {:?} -> {:?}",
325             7,
326             shuffle(1_u128 << 58)
327         );
328         assert!(
329             shuffle(1_u128 << 64) < (1_u128 << 64),
330             "Low bits must be moved to other half {:?} -> {:?}",
331             8,
332             shuffle(1_u128 << 64)
333         );
334         assert!(
335             shuffle(1_u128 << 64) >= (1_u128 << 16),
336             "Low bits must not remain low {:?} -> {:?}",
337             8,
338             shuffle(1_u128 << 64)
339         );
340 
341         assert!(
342             shuffle(1_u128 << 120) < (1_u128 << 50),
343             "High bits must be moved to low half {:?} -> {:?}",
344             15,
345             shuffle(1_u128 << 120)
346         );
347     }
348 
349     #[cfg(all(
350         any(target_arch = "x86", target_arch = "x86_64"),
351         target_feature = "ssse3",
352         not(miri)
353     ))]
354     #[test]
test_shuffle_does_not_loop()355     fn test_shuffle_does_not_loop() {
356         let numbered = 0x00112233_44556677_8899AABB_CCDDEEFF;
357         let mut shuffled = shuffle(numbered);
358         for count in 0..100 {
359             // println!("{:>16x}", shuffled);
360             assert_ne!(numbered, shuffled, "Equal after {} vs {:x}", count, shuffled);
361             shuffled = shuffle(shuffled);
362         }
363     }
364 
365     #[test]
test_add_length()366     fn test_add_length() {
367         let mut enc = (u64::MAX as u128) << 64 | 50;
368         add_in_length(&mut enc, u64::MAX);
369         assert_eq!(enc >> 64, u64::MAX as u128);
370         assert_eq!(enc as u64, 49);
371     }
372 }
373