xref: /aosp_15_r20/external/libopenapv/src/avx/oapv_tq_avx.h (revision abb65b4b03b69e1d508d4d9a44dcf199df16e7c3)
1*abb65b4bSAndroid Build Coastguard Worker /*
2*abb65b4bSAndroid Build Coastguard Worker  * Copyright (c) 2022 Samsung Electronics Co., Ltd.
3*abb65b4bSAndroid Build Coastguard Worker  * All Rights Reserved.
4*abb65b4bSAndroid Build Coastguard Worker  *
5*abb65b4bSAndroid Build Coastguard Worker  * Redistribution and use in source and binary forms, with or without
6*abb65b4bSAndroid Build Coastguard Worker  * modification, are permitted provided that the following conditions are met:
7*abb65b4bSAndroid Build Coastguard Worker  *
8*abb65b4bSAndroid Build Coastguard Worker  * - Redistributions of source code must retain the above copyright notice,
9*abb65b4bSAndroid Build Coastguard Worker  *   this list of conditions and the following disclaimer.
10*abb65b4bSAndroid Build Coastguard Worker  *
11*abb65b4bSAndroid Build Coastguard Worker  * - Redistributions in binary form must reproduce the above copyright notice,
12*abb65b4bSAndroid Build Coastguard Worker  *   this list of conditions and the following disclaimer in the documentation
13*abb65b4bSAndroid Build Coastguard Worker  *   and/or other materials provided with the distribution.
14*abb65b4bSAndroid Build Coastguard Worker  *
15*abb65b4bSAndroid Build Coastguard Worker  * - Neither the name of the copyright owner, nor the names of its contributors
16*abb65b4bSAndroid Build Coastguard Worker  *   may be used to endorse or promote products derived from this software
17*abb65b4bSAndroid Build Coastguard Worker  *   without specific prior written permission.
18*abb65b4bSAndroid Build Coastguard Worker  *
19*abb65b4bSAndroid Build Coastguard Worker  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20*abb65b4bSAndroid Build Coastguard Worker  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21*abb65b4bSAndroid Build Coastguard Worker  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22*abb65b4bSAndroid Build Coastguard Worker  * ARE DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23*abb65b4bSAndroid Build Coastguard Worker  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24*abb65b4bSAndroid Build Coastguard Worker  * CONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25*abb65b4bSAndroid Build Coastguard Worker  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26*abb65b4bSAndroid Build Coastguard Worker  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27*abb65b4bSAndroid Build Coastguard Worker  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28*abb65b4bSAndroid Build Coastguard Worker  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29*abb65b4bSAndroid Build Coastguard Worker  * POSSIBILITY OF SUCH DAMAGE.
30*abb65b4bSAndroid Build Coastguard Worker  */
31*abb65b4bSAndroid Build Coastguard Worker 
32*abb65b4bSAndroid Build Coastguard Worker #ifndef _OAPV_TQ_AVX_H_
33*abb65b4bSAndroid Build Coastguard Worker #define _OAPV_TQ_AVX_H_
34*abb65b4bSAndroid Build Coastguard Worker 
35*abb65b4bSAndroid Build Coastguard Worker 
36*abb65b4bSAndroid Build Coastguard Worker ///////////////////////////////////////////////////////////////////////////////
37*abb65b4bSAndroid Build Coastguard Worker // start of encoder code
38*abb65b4bSAndroid Build Coastguard Worker #if ENABLE_ENCODER
39*abb65b4bSAndroid Build Coastguard Worker ///////////////////////////////////////////////////////////////////////////////
40*abb65b4bSAndroid Build Coastguard Worker 
41*abb65b4bSAndroid Build Coastguard Worker #if X86_SSE
42*abb65b4bSAndroid Build Coastguard Worker 
43*abb65b4bSAndroid Build Coastguard Worker #define CALCU_2x8(c0, c1, d0, d1) \
44*abb65b4bSAndroid Build Coastguard Worker     v0 = _mm256_madd_epi16(s0, c0); \
45*abb65b4bSAndroid Build Coastguard Worker     v1 = _mm256_madd_epi16(s1, c0); \
46*abb65b4bSAndroid Build Coastguard Worker     v2 = _mm256_madd_epi16(s2, c0); \
47*abb65b4bSAndroid Build Coastguard Worker     v3 = _mm256_madd_epi16(s3, c0); \
48*abb65b4bSAndroid Build Coastguard Worker     v4 = _mm256_madd_epi16(s0, c1); \
49*abb65b4bSAndroid Build Coastguard Worker     v5 = _mm256_madd_epi16(s1, c1); \
50*abb65b4bSAndroid Build Coastguard Worker     v6 = _mm256_madd_epi16(s2, c1); \
51*abb65b4bSAndroid Build Coastguard Worker     v7 = _mm256_madd_epi16(s3, c1); \
52*abb65b4bSAndroid Build Coastguard Worker     v0 = _mm256_hadd_epi32(v0, v1); \
53*abb65b4bSAndroid Build Coastguard Worker     v2 = _mm256_hadd_epi32(v2, v3); \
54*abb65b4bSAndroid Build Coastguard Worker     v4 = _mm256_hadd_epi32(v4, v5); \
55*abb65b4bSAndroid Build Coastguard Worker     v6 = _mm256_hadd_epi32(v6, v7); \
56*abb65b4bSAndroid Build Coastguard Worker     d0 = _mm256_hadd_epi32(v0, v2); \
57*abb65b4bSAndroid Build Coastguard Worker     d1 = _mm256_hadd_epi32(v4, v6)
58*abb65b4bSAndroid Build Coastguard Worker 
59*abb65b4bSAndroid Build Coastguard Worker #define CALCU_2x8_ADD_SHIFT(d0, d1, d2, d3, add, shift)\
60*abb65b4bSAndroid Build Coastguard Worker     d0 = _mm256_add_epi32(d0, add); \
61*abb65b4bSAndroid Build Coastguard Worker     d1 = _mm256_add_epi32(d1, add); \
62*abb65b4bSAndroid Build Coastguard Worker     d2 = _mm256_add_epi32(d2, add); \
63*abb65b4bSAndroid Build Coastguard Worker     d3 = _mm256_add_epi32(d3, add); \
64*abb65b4bSAndroid Build Coastguard Worker     d0 = _mm256_srai_epi32(d0, shift); \
65*abb65b4bSAndroid Build Coastguard Worker     d1 = _mm256_srai_epi32(d1, shift); \
66*abb65b4bSAndroid Build Coastguard Worker     d2 = _mm256_srai_epi32(d2, shift); \
67*abb65b4bSAndroid Build Coastguard Worker     d3 = _mm256_srai_epi32(d3, shift);
68*abb65b4bSAndroid Build Coastguard Worker 
69*abb65b4bSAndroid Build Coastguard Worker #define CALCU_2x4(c0, c1, c2, c3, d0, d1) \
70*abb65b4bSAndroid Build Coastguard Worker     v0 = _mm256_madd_epi16(s0, c0); \
71*abb65b4bSAndroid Build Coastguard Worker     v1 = _mm256_madd_epi16(s1, c0); \
72*abb65b4bSAndroid Build Coastguard Worker     v2 = _mm256_madd_epi16(s0, c1); \
73*abb65b4bSAndroid Build Coastguard Worker     v3 = _mm256_madd_epi16(s1, c1); \
74*abb65b4bSAndroid Build Coastguard Worker     v4 = _mm256_madd_epi16(s0, c2); \
75*abb65b4bSAndroid Build Coastguard Worker     v5 = _mm256_madd_epi16(s1, c2); \
76*abb65b4bSAndroid Build Coastguard Worker     v6 = _mm256_madd_epi16(s0, c3); \
77*abb65b4bSAndroid Build Coastguard Worker     v7 = _mm256_madd_epi16(s1, c3); \
78*abb65b4bSAndroid Build Coastguard Worker     v0 = _mm256_hadd_epi32(v0, v1); \
79*abb65b4bSAndroid Build Coastguard Worker     v2 = _mm256_hadd_epi32(v2, v3); \
80*abb65b4bSAndroid Build Coastguard Worker     v4 = _mm256_hadd_epi32(v4, v5); \
81*abb65b4bSAndroid Build Coastguard Worker     v6 = _mm256_hadd_epi32(v6, v7); \
82*abb65b4bSAndroid Build Coastguard Worker     d0 = _mm256_hadd_epi32(v0, v2); \
83*abb65b4bSAndroid Build Coastguard Worker     d1 = _mm256_hadd_epi32(v4, v6); \
84*abb65b4bSAndroid Build Coastguard Worker     d0 = _mm256_permute4x64_epi64(d0, 0xd8); \
85*abb65b4bSAndroid Build Coastguard Worker     d1 = _mm256_permute4x64_epi64(d1, 0xd8)
86*abb65b4bSAndroid Build Coastguard Worker 
87*abb65b4bSAndroid Build Coastguard Worker #define CALCU_LINE_1x8(coeff0, dst) \
88*abb65b4bSAndroid Build Coastguard Worker     v0 = _mm256_madd_epi16(s00, coeff0);          \
89*abb65b4bSAndroid Build Coastguard Worker     v1 = _mm256_madd_epi16(s01, coeff0);          \
90*abb65b4bSAndroid Build Coastguard Worker     v2 = _mm256_madd_epi16(s02, coeff0);          \
91*abb65b4bSAndroid Build Coastguard Worker     v3 = _mm256_madd_epi16(s03, coeff0);          \
92*abb65b4bSAndroid Build Coastguard Worker     v4 = _mm256_madd_epi16(s04, coeff0);          \
93*abb65b4bSAndroid Build Coastguard Worker     v5 = _mm256_madd_epi16(s05, coeff0);          \
94*abb65b4bSAndroid Build Coastguard Worker     v6 = _mm256_madd_epi16(s06, coeff0);          \
95*abb65b4bSAndroid Build Coastguard Worker     v7 = _mm256_madd_epi16(s07, coeff0);          \
96*abb65b4bSAndroid Build Coastguard Worker     v0 = _mm256_hadd_epi32(v0, v1);               \
97*abb65b4bSAndroid Build Coastguard Worker     v2 = _mm256_hadd_epi32(v2, v3);               \
98*abb65b4bSAndroid Build Coastguard Worker     v4 = _mm256_hadd_epi32(v4, v5);               \
99*abb65b4bSAndroid Build Coastguard Worker     v6 = _mm256_hadd_epi32(v6, v7);               \
100*abb65b4bSAndroid Build Coastguard Worker     v0 = _mm256_hadd_epi32(v0, v2);               \
101*abb65b4bSAndroid Build Coastguard Worker     v4 = _mm256_hadd_epi32(v4, v6);               \
102*abb65b4bSAndroid Build Coastguard Worker     v1 = _mm256_permute2x128_si256(v0, v4, 0x20); \
103*abb65b4bSAndroid Build Coastguard Worker     v2 = _mm256_permute2x128_si256(v0, v4, 0x31); \
104*abb65b4bSAndroid Build Coastguard Worker     dst = _mm256_add_epi32(v1, v2)
105*abb65b4bSAndroid Build Coastguard Worker 
106*abb65b4bSAndroid Build Coastguard Worker #define CALCU_LINE_1x8_ADD_SHIFT(d0, d1, d2, d3, d4, d5, d6, d7, add, shift) \
107*abb65b4bSAndroid Build Coastguard Worker     d0 = _mm256_add_epi32(d0, add); \
108*abb65b4bSAndroid Build Coastguard Worker     d1 = _mm256_add_epi32(d1, add); \
109*abb65b4bSAndroid Build Coastguard Worker     d2 = _mm256_add_epi32(d2, add); \
110*abb65b4bSAndroid Build Coastguard Worker     d3 = _mm256_add_epi32(d3, add); \
111*abb65b4bSAndroid Build Coastguard Worker     d4 = _mm256_add_epi32(d4, add); \
112*abb65b4bSAndroid Build Coastguard Worker     d5 = _mm256_add_epi32(d5, add); \
113*abb65b4bSAndroid Build Coastguard Worker     d6 = _mm256_add_epi32(d6, add); \
114*abb65b4bSAndroid Build Coastguard Worker     d7 = _mm256_add_epi32(d7, add); \
115*abb65b4bSAndroid Build Coastguard Worker     d0 = _mm256_srai_epi32(d0, shift); \
116*abb65b4bSAndroid Build Coastguard Worker     d1 = _mm256_srai_epi32(d1, shift); \
117*abb65b4bSAndroid Build Coastguard Worker     d2 = _mm256_srai_epi32(d2, shift); \
118*abb65b4bSAndroid Build Coastguard Worker     d3 = _mm256_srai_epi32(d3, shift); \
119*abb65b4bSAndroid Build Coastguard Worker     d4 = _mm256_srai_epi32(d4, shift); \
120*abb65b4bSAndroid Build Coastguard Worker     d5 = _mm256_srai_epi32(d5, shift); \
121*abb65b4bSAndroid Build Coastguard Worker     d6 = _mm256_srai_epi32(d6, shift); \
122*abb65b4bSAndroid Build Coastguard Worker     d7 = _mm256_srai_epi32(d7, shift)
123*abb65b4bSAndroid Build Coastguard Worker #endif /* X86_SSE */
124*abb65b4bSAndroid Build Coastguard Worker 
125*abb65b4bSAndroid Build Coastguard Worker ///////////////////////////////////////////////////////////////////////////////
126*abb65b4bSAndroid Build Coastguard Worker // end of encoder code
127*abb65b4bSAndroid Build Coastguard Worker #endif // ENABLE_ENCODER
128*abb65b4bSAndroid Build Coastguard Worker ///////////////////////////////////////////////////////////////////////////////
129*abb65b4bSAndroid Build Coastguard Worker 
130*abb65b4bSAndroid Build Coastguard Worker 
131*abb65b4bSAndroid Build Coastguard Worker #if X86_SSE
132*abb65b4bSAndroid Build Coastguard Worker extern const oapv_fn_tx_t oapv_tbl_fn_txb_avx[2];
133*abb65b4bSAndroid Build Coastguard Worker extern const oapv_fn_quant_t oapv_tbl_fn_quant_avx[2];
134*abb65b4bSAndroid Build Coastguard Worker extern const oapv_fn_itx_part_t oapv_tbl_fn_itx_part_avx[2];
135*abb65b4bSAndroid Build Coastguard Worker extern const oapv_fn_itx_t oapv_tbl_fn_itx_avx[2];
136*abb65b4bSAndroid Build Coastguard Worker extern const oapv_fn_dquant_t oapv_tbl_fn_dquant_avx[2];
137*abb65b4bSAndroid Build Coastguard Worker extern const oapv_fn_itx_adj_t oapv_tbl_fn_itx_adj_avx[2];
138*abb65b4bSAndroid Build Coastguard Worker #endif /* X86_SSE */
139*abb65b4bSAndroid Build Coastguard Worker 
140*abb65b4bSAndroid Build Coastguard Worker 
141*abb65b4bSAndroid Build Coastguard Worker #endif /* _OAPV_TQ_AVX_H_  */
142