1*c8dee2aaSAndroid Build Coastguard Worker /*
2*c8dee2aaSAndroid Build Coastguard Worker * Copyright 2021 Google LLC
3*c8dee2aaSAndroid Build Coastguard Worker *
4*c8dee2aaSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license that can be
5*c8dee2aaSAndroid Build Coastguard Worker * found in the LICENSE file.
6*c8dee2aaSAndroid Build Coastguard Worker */
7*c8dee2aaSAndroid Build Coastguard Worker
8*c8dee2aaSAndroid Build Coastguard Worker #ifndef QMath_DEFINED
9*c8dee2aaSAndroid Build Coastguard Worker #define QMath_DEFINED
10*c8dee2aaSAndroid Build Coastguard Worker
11*c8dee2aaSAndroid Build Coastguard Worker template <int N, typename T> using V = T __attribute__((ext_vector_type(N)));
12*c8dee2aaSAndroid Build Coastguard Worker
13*c8dee2aaSAndroid Build Coastguard Worker #if !defined(__clang__)
14*c8dee2aaSAndroid Build Coastguard Worker static_assert(false, "This only works on clang.");
15*c8dee2aaSAndroid Build Coastguard Worker #endif
16*c8dee2aaSAndroid Build Coastguard Worker
17*c8dee2aaSAndroid Build Coastguard Worker #if defined(__SSSE3__)
18*c8dee2aaSAndroid Build Coastguard Worker #include <immintrin.h>
19*c8dee2aaSAndroid Build Coastguard Worker #endif
20*c8dee2aaSAndroid Build Coastguard Worker
21*c8dee2aaSAndroid Build Coastguard Worker #if defined(__ARM_NEON)
22*c8dee2aaSAndroid Build Coastguard Worker // From section 5.5.5 of the ARM C Language Extensions (ACLE)
23*c8dee2aaSAndroid Build Coastguard Worker #include <arm_neon.h>
24*c8dee2aaSAndroid Build Coastguard Worker #endif
25*c8dee2aaSAndroid Build Coastguard Worker
26*c8dee2aaSAndroid Build Coastguard Worker #include <cassert>
27*c8dee2aaSAndroid Build Coastguard Worker #include <cstdint>
28*c8dee2aaSAndroid Build Coastguard Worker
29*c8dee2aaSAndroid Build Coastguard Worker using Q15 = V<8, uint16_t>;
30*c8dee2aaSAndroid Build Coastguard Worker using I16 = V<8, int16_t>;
31*c8dee2aaSAndroid Build Coastguard Worker using U16 = V<8, uint16_t>;
32*c8dee2aaSAndroid Build Coastguard Worker
33*c8dee2aaSAndroid Build Coastguard Worker
constrained_add(I16 a,U16 b)34*c8dee2aaSAndroid Build Coastguard Worker static inline U16 constrained_add(I16 a, U16 b) {
35*c8dee2aaSAndroid Build Coastguard Worker for (size_t i = 0; i < 8; i++) {
36*c8dee2aaSAndroid Build Coastguard Worker // Ensure that a + b is on the interval [0, UINT16_MAX]
37*c8dee2aaSAndroid Build Coastguard Worker assert(-b[i] <= a[i] && a[i] <= UINT16_MAX - b[i]);
38*c8dee2aaSAndroid Build Coastguard Worker }
39*c8dee2aaSAndroid Build Coastguard Worker U16 answer = b + a;
40*c8dee2aaSAndroid Build Coastguard Worker return answer;
41*c8dee2aaSAndroid Build Coastguard Worker }
42*c8dee2aaSAndroid Build Coastguard Worker
43*c8dee2aaSAndroid Build Coastguard Worker // A pure C version of the ssse3 intrinsic mm_mulhrs_epi16;
simulate_ssse3_mm_mulhrs_epi16(I16 a,I16 b)44*c8dee2aaSAndroid Build Coastguard Worker static inline I16 simulate_ssse3_mm_mulhrs_epi16(I16 a, I16 b) {
45*c8dee2aaSAndroid Build Coastguard Worker I16 result;
46*c8dee2aaSAndroid Build Coastguard Worker auto m = [](int16_t r, int16_t s) {
47*c8dee2aaSAndroid Build Coastguard Worker const int32_t rounding = 1 << 14;
48*c8dee2aaSAndroid Build Coastguard Worker int32_t temp = (int32_t)r * (int32_t)s + rounding;
49*c8dee2aaSAndroid Build Coastguard Worker return (int16_t)(temp >> 15);
50*c8dee2aaSAndroid Build Coastguard Worker };
51*c8dee2aaSAndroid Build Coastguard Worker for (int i = 0; i < 8; i++) {
52*c8dee2aaSAndroid Build Coastguard Worker result[i] = m(a[i], b[i]);
53*c8dee2aaSAndroid Build Coastguard Worker }
54*c8dee2aaSAndroid Build Coastguard Worker return result;
55*c8dee2aaSAndroid Build Coastguard Worker }
56*c8dee2aaSAndroid Build Coastguard Worker
57*c8dee2aaSAndroid Build Coastguard Worker // A pure C version of the neon intrinsic vqrdmulhq_s16;
simulate_neon_vqrdmulhq_s16(Q15 a,Q15 b)58*c8dee2aaSAndroid Build Coastguard Worker static inline Q15 simulate_neon_vqrdmulhq_s16(Q15 a, Q15 b) {
59*c8dee2aaSAndroid Build Coastguard Worker Q15 result;
60*c8dee2aaSAndroid Build Coastguard Worker const int esize = 16;
61*c8dee2aaSAndroid Build Coastguard Worker auto m = [](int16_t r, int16_t s) {
62*c8dee2aaSAndroid Build Coastguard Worker const int64_t rounding = 1 << (esize - 1);
63*c8dee2aaSAndroid Build Coastguard Worker int64_t product = 2LL * (int64_t)r * (int64_t)s + rounding;
64*c8dee2aaSAndroid Build Coastguard Worker int64_t result = product >> esize;
65*c8dee2aaSAndroid Build Coastguard Worker
66*c8dee2aaSAndroid Build Coastguard Worker // Saturate the result
67*c8dee2aaSAndroid Build Coastguard Worker if (int64_t limit = (1LL << (esize - 1)) - 1; result > limit) { result = limit; }
68*c8dee2aaSAndroid Build Coastguard Worker if (int64_t limit = -(1LL << (esize - 1)) ; result < limit) { result = limit; }
69*c8dee2aaSAndroid Build Coastguard Worker return result;
70*c8dee2aaSAndroid Build Coastguard Worker };
71*c8dee2aaSAndroid Build Coastguard Worker for (int i = 0; i < 8; i++) {
72*c8dee2aaSAndroid Build Coastguard Worker result[i] = m(a[i], b[i]);
73*c8dee2aaSAndroid Build Coastguard Worker }
74*c8dee2aaSAndroid Build Coastguard Worker return result;
75*c8dee2aaSAndroid Build Coastguard Worker }
76*c8dee2aaSAndroid Build Coastguard Worker
77*c8dee2aaSAndroid Build Coastguard Worker #endif // QMath_DEFINED
78