xref: /aosp_15_r20/external/libaom/av1/common/x86/cfl_simd.h (revision 77c1e3ccc04c968bd2bc212e87364f250e820521)
1*77c1e3ccSAndroid Build Coastguard Worker /*
2*77c1e3ccSAndroid Build Coastguard Worker  * Copyright (c) 2017, Alliance for Open Media. All rights reserved.
3*77c1e3ccSAndroid Build Coastguard Worker  *
4*77c1e3ccSAndroid Build Coastguard Worker  * This source code is subject to the terms of the BSD 2 Clause License and
5*77c1e3ccSAndroid Build Coastguard Worker  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6*77c1e3ccSAndroid Build Coastguard Worker  * was not distributed with this source code in the LICENSE file, you can
7*77c1e3ccSAndroid Build Coastguard Worker  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8*77c1e3ccSAndroid Build Coastguard Worker  * Media Patent License 1.0 was not distributed with this source code in the
9*77c1e3ccSAndroid Build Coastguard Worker  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10*77c1e3ccSAndroid Build Coastguard Worker  */
11*77c1e3ccSAndroid Build Coastguard Worker 
12*77c1e3ccSAndroid Build Coastguard Worker #ifndef AOM_AV1_COMMON_X86_CFL_SIMD_H_
13*77c1e3ccSAndroid Build Coastguard Worker #define AOM_AV1_COMMON_X86_CFL_SIMD_H_
14*77c1e3ccSAndroid Build Coastguard Worker 
15*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/blockd.h"
16*77c1e3ccSAndroid Build Coastguard Worker 
17*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 4, we reuse them in AVX2
18*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_4x4_ssse3(const uint8_t *cfl_type, int input_stride,
19*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
20*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_4x8_ssse3(const uint8_t *cfl_type, int input_stride,
21*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
22*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_4x16_ssse3(const uint8_t *cfl_type, int input_stride,
23*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
24*77c1e3ccSAndroid Build Coastguard Worker 
25*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2
26*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_8x4_ssse3(const uint8_t *cfl_type, int input_stride,
27*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
28*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_8x8_ssse3(const uint8_t *cfl_type, int input_stride,
29*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
30*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_8x16_ssse3(const uint8_t *cfl_type, int input_stride,
31*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
32*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_8x32_ssse3(const uint8_t *cfl_type, int input_stride,
33*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
34*77c1e3ccSAndroid Build Coastguard Worker 
35*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 16, we reuse it in AVX2
36*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_16x4_ssse3(const uint8_t *cfl_type, int input_stride,
37*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
38*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_16x8_ssse3(const uint8_t *cfl_type, int input_stride,
39*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
40*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_16x16_ssse3(const uint8_t *cfl_type,
41*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
42*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_16x32_ssse3(const uint8_t *cfl_type,
43*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
44*77c1e3ccSAndroid Build Coastguard Worker 
45*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 4, we reuse them in AVX2
46*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_4x4_ssse3(const uint8_t *cfl_type, int input_stride,
47*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
48*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_4x8_ssse3(const uint8_t *cfl_type, int input_stride,
49*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
50*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_4x16_ssse3(const uint8_t *cfl_type, int input_stride,
51*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
52*77c1e3ccSAndroid Build Coastguard Worker 
53*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2
54*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_8x4_ssse3(const uint8_t *cfl_type, int input_stride,
55*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
56*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_8x8_ssse3(const uint8_t *cfl_type, int input_stride,
57*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
58*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_8x16_ssse3(const uint8_t *cfl_type, int input_stride,
59*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
60*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_8x32_ssse3(const uint8_t *cfl_type, int input_stride,
61*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
62*77c1e3ccSAndroid Build Coastguard Worker 
63*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 16, we reuse it in AVX2
64*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_16x4_ssse3(const uint8_t *cfl_type, int input_stride,
65*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
66*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_16x8_ssse3(const uint8_t *cfl_type, int input_stride,
67*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
68*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_16x16_ssse3(const uint8_t *cfl_type,
69*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
70*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_16x32_ssse3(const uint8_t *cfl_type,
71*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
72*77c1e3ccSAndroid Build Coastguard Worker 
73*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 4, we reuse them in AVX2
74*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_4x4_ssse3(const uint8_t *cfl_type, int input_stride,
75*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
76*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_4x8_ssse3(const uint8_t *cfl_type, int input_stride,
77*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
78*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_4x16_ssse3(const uint8_t *cfl_type, int input_stride,
79*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
80*77c1e3ccSAndroid Build Coastguard Worker 
81*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2
82*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_8x4_ssse3(const uint8_t *cfl_type, int input_stride,
83*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
84*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_8x8_ssse3(const uint8_t *cfl_type, int input_stride,
85*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
86*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_8x16_ssse3(const uint8_t *cfl_type, int input_stride,
87*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
88*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_8x32_ssse3(const uint8_t *cfl_type, int input_stride,
89*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
90*77c1e3ccSAndroid Build Coastguard Worker 
91*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 16, we reuse it in AVX2
92*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_16x4_ssse3(const uint8_t *cfl_type, int input_stride,
93*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
94*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_16x8_ssse3(const uint8_t *cfl_type, int input_stride,
95*77c1e3ccSAndroid Build Coastguard Worker                                       uint16_t *output_q3);
96*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_16x16_ssse3(const uint8_t *cfl_type,
97*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
98*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_16x32_ssse3(const uint8_t *cfl_type,
99*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
100*77c1e3ccSAndroid Build Coastguard Worker 
101*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
102*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_4x4_ssse3(const uint16_t *cfl_type, int input_stride,
103*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
104*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_4x8_ssse3(const uint16_t *cfl_type, int input_stride,
105*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
106*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_4x16_ssse3(const uint16_t *cfl_type,
107*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
108*77c1e3ccSAndroid Build Coastguard Worker 
109*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2
110*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_8x4_ssse3(const uint16_t *cfl_type, int input_stride,
111*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
112*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_8x8_ssse3(const uint16_t *cfl_type, int input_stride,
113*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
114*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_8x16_ssse3(const uint16_t *cfl_type,
115*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
116*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_8x32_ssse3(const uint16_t *cfl_type,
117*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
118*77c1e3ccSAndroid Build Coastguard Worker 
119*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is faster for with == 16, we reuse it in AVX2
120*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_16x4_ssse3(const uint16_t *cfl_type,
121*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
122*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_16x8_ssse3(const uint16_t *cfl_type,
123*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
124*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_16x16_ssse3(const uint16_t *cfl_type,
125*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
126*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_16x32_ssse3(const uint16_t *cfl_type,
127*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
128*77c1e3ccSAndroid Build Coastguard Worker 
129*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_4x4_ssse3(const uint16_t *cfl_type, int input_stride,
130*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
131*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_4x8_ssse3(const uint16_t *cfl_type, int input_stride,
132*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
133*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_4x16_ssse3(const uint16_t *cfl_type,
134*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
135*77c1e3ccSAndroid Build Coastguard Worker 
136*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2
137*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_8x4_ssse3(const uint16_t *cfl_type, int input_stride,
138*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
139*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_8x8_ssse3(const uint16_t *cfl_type, int input_stride,
140*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
141*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_8x16_ssse3(const uint16_t *cfl_type,
142*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
143*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_8x32_ssse3(const uint16_t *cfl_type,
144*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
145*77c1e3ccSAndroid Build Coastguard Worker 
146*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is faster for with == 16, we reuse it in AVX2
147*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_16x4_ssse3(const uint16_t *cfl_type,
148*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
149*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_16x8_ssse3(const uint16_t *cfl_type,
150*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
151*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_16x16_ssse3(const uint16_t *cfl_type,
152*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
153*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_16x32_ssse3(const uint16_t *cfl_type,
154*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
155*77c1e3ccSAndroid Build Coastguard Worker 
156*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_4x4_ssse3(const uint16_t *cfl_type, int input_stride,
157*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
158*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_4x8_ssse3(const uint16_t *cfl_type, int input_stride,
159*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
160*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_4x16_ssse3(const uint16_t *cfl_type,
161*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
162*77c1e3ccSAndroid Build Coastguard Worker 
163*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2
164*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_8x4_ssse3(const uint16_t *cfl_type, int input_stride,
165*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
166*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_8x8_ssse3(const uint16_t *cfl_type, int input_stride,
167*77c1e3ccSAndroid Build Coastguard Worker                                      uint16_t *output_q3);
168*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_8x16_ssse3(const uint16_t *cfl_type,
169*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
170*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_8x32_ssse3(const uint16_t *cfl_type,
171*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
172*77c1e3ccSAndroid Build Coastguard Worker 
173*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is faster for with == 16, we reuse it in AVX2
174*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_16x4_ssse3(const uint16_t *cfl_type,
175*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
176*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_16x8_ssse3(const uint16_t *cfl_type,
177*77c1e3ccSAndroid Build Coastguard Worker                                       int input_stride, uint16_t *output_q3);
178*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_16x16_ssse3(const uint16_t *cfl_type,
179*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
180*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_16x32_ssse3(const uint16_t *cfl_type,
181*77c1e3ccSAndroid Build Coastguard Worker                                        int input_stride, uint16_t *output_q3);
182*77c1e3ccSAndroid Build Coastguard Worker #endif  // CONFIG_AV1_HIGHBITDEPTH
183*77c1e3ccSAndroid Build Coastguard Worker 
184*77c1e3ccSAndroid Build Coastguard Worker // SSE2 version is optimal for with == 4, we reuse them in AVX2
185*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_4x4_sse2(const uint16_t *src, int16_t *dst);
186*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_4x8_sse2(const uint16_t *src, int16_t *dst);
187*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_4x16_sse2(const uint16_t *src, int16_t *dst);
188*77c1e3ccSAndroid Build Coastguard Worker 
189*77c1e3ccSAndroid Build Coastguard Worker // SSE2 version is optimal for with == 8, we reuse them in AVX2
190*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_8x4_sse2(const uint16_t *src, int16_t *dst);
191*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_8x8_sse2(const uint16_t *src, int16_t *dst);
192*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_8x16_sse2(const uint16_t *src, int16_t *dst);
193*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_8x32_sse2(const uint16_t *src, int16_t *dst);
194*77c1e3ccSAndroid Build Coastguard Worker 
195*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_4x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
196*77c1e3ccSAndroid Build Coastguard Worker                                int dst_stride, int alpha_q3);
197*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_4x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
198*77c1e3ccSAndroid Build Coastguard Worker                                int dst_stride, int alpha_q3);
199*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_4x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
200*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3);
201*77c1e3ccSAndroid Build Coastguard Worker 
202*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_8x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
203*77c1e3ccSAndroid Build Coastguard Worker                                int dst_stride, int alpha_q3);
204*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_8x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
205*77c1e3ccSAndroid Build Coastguard Worker                                int dst_stride, int alpha_q3);
206*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_8x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
207*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3);
208*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_8x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
209*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3);
210*77c1e3ccSAndroid Build Coastguard Worker 
211*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_16x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
212*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3);
213*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_16x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
214*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3);
215*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_16x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
216*77c1e3ccSAndroid Build Coastguard Worker                                  int dst_stride, int alpha_q3);
217*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_16x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst,
218*77c1e3ccSAndroid Build Coastguard Worker                                  int dst_stride, int alpha_q3);
219*77c1e3ccSAndroid Build Coastguard Worker 
220*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH
221*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_4x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
222*77c1e3ccSAndroid Build Coastguard Worker                                int dst_stride, int alpha_q3, int bd);
223*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_4x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
224*77c1e3ccSAndroid Build Coastguard Worker                                int dst_stride, int alpha_q3, int bd);
225*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_4x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
226*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3, int bd);
227*77c1e3ccSAndroid Build Coastguard Worker 
228*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_8x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
229*77c1e3ccSAndroid Build Coastguard Worker                                int dst_stride, int alpha_q3, int bd);
230*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_8x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
231*77c1e3ccSAndroid Build Coastguard Worker                                int dst_stride, int alpha_q3, int bd);
232*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_8x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
233*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3, int bd);
234*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_8x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
235*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3, int bd);
236*77c1e3ccSAndroid Build Coastguard Worker 
237*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_16x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
238*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3, int bd);
239*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_16x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
240*77c1e3ccSAndroid Build Coastguard Worker                                 int dst_stride, int alpha_q3, int bd);
241*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_16x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
242*77c1e3ccSAndroid Build Coastguard Worker                                  int dst_stride, int alpha_q3, int bd);
243*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_16x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst,
244*77c1e3ccSAndroid Build Coastguard Worker                                  int dst_stride, int alpha_q3, int bd);
245*77c1e3ccSAndroid Build Coastguard Worker #endif  // CONFIG_AV1_HIGHBITDEPTH
246*77c1e3ccSAndroid Build Coastguard Worker #endif  // AOM_AV1_COMMON_X86_CFL_SIMD_H_
247