1*77c1e3ccSAndroid Build Coastguard Worker /* 2*77c1e3ccSAndroid Build Coastguard Worker * Copyright (c) 2017, Alliance for Open Media. All rights reserved. 3*77c1e3ccSAndroid Build Coastguard Worker * 4*77c1e3ccSAndroid Build Coastguard Worker * This source code is subject to the terms of the BSD 2 Clause License and 5*77c1e3ccSAndroid Build Coastguard Worker * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6*77c1e3ccSAndroid Build Coastguard Worker * was not distributed with this source code in the LICENSE file, you can 7*77c1e3ccSAndroid Build Coastguard Worker * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8*77c1e3ccSAndroid Build Coastguard Worker * Media Patent License 1.0 was not distributed with this source code in the 9*77c1e3ccSAndroid Build Coastguard Worker * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10*77c1e3ccSAndroid Build Coastguard Worker */ 11*77c1e3ccSAndroid Build Coastguard Worker 12*77c1e3ccSAndroid Build Coastguard Worker #ifndef AOM_AV1_COMMON_X86_CFL_SIMD_H_ 13*77c1e3ccSAndroid Build Coastguard Worker #define AOM_AV1_COMMON_X86_CFL_SIMD_H_ 14*77c1e3ccSAndroid Build Coastguard Worker 15*77c1e3ccSAndroid Build Coastguard Worker #include "av1/common/blockd.h" 16*77c1e3ccSAndroid Build Coastguard Worker 17*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 4, we reuse them in AVX2 18*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_4x4_ssse3(const uint8_t *cfl_type, int input_stride, 19*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 20*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_4x8_ssse3(const uint8_t *cfl_type, int input_stride, 21*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 22*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_4x16_ssse3(const uint8_t *cfl_type, int input_stride, 23*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 24*77c1e3ccSAndroid Build Coastguard Worker 25*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2 26*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_8x4_ssse3(const uint8_t *cfl_type, int input_stride, 27*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 28*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_8x8_ssse3(const uint8_t *cfl_type, int input_stride, 29*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 30*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_8x16_ssse3(const uint8_t *cfl_type, int input_stride, 31*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 32*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_8x32_ssse3(const uint8_t *cfl_type, int input_stride, 33*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 34*77c1e3ccSAndroid Build Coastguard Worker 35*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 16, we reuse it in AVX2 36*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_16x4_ssse3(const uint8_t *cfl_type, int input_stride, 37*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 38*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_16x8_ssse3(const uint8_t *cfl_type, int input_stride, 39*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 40*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_16x16_ssse3(const uint8_t *cfl_type, 41*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 42*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_420_16x32_ssse3(const uint8_t *cfl_type, 43*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 44*77c1e3ccSAndroid Build Coastguard Worker 45*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 4, we reuse them in AVX2 46*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_4x4_ssse3(const uint8_t *cfl_type, int input_stride, 47*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 48*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_4x8_ssse3(const uint8_t *cfl_type, int input_stride, 49*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 50*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_4x16_ssse3(const uint8_t *cfl_type, int input_stride, 51*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 52*77c1e3ccSAndroid Build Coastguard Worker 53*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2 54*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_8x4_ssse3(const uint8_t *cfl_type, int input_stride, 55*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 56*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_8x8_ssse3(const uint8_t *cfl_type, int input_stride, 57*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 58*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_8x16_ssse3(const uint8_t *cfl_type, int input_stride, 59*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 60*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_8x32_ssse3(const uint8_t *cfl_type, int input_stride, 61*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 62*77c1e3ccSAndroid Build Coastguard Worker 63*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 16, we reuse it in AVX2 64*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_16x4_ssse3(const uint8_t *cfl_type, int input_stride, 65*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 66*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_16x8_ssse3(const uint8_t *cfl_type, int input_stride, 67*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 68*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_16x16_ssse3(const uint8_t *cfl_type, 69*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 70*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_422_16x32_ssse3(const uint8_t *cfl_type, 71*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 72*77c1e3ccSAndroid Build Coastguard Worker 73*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 4, we reuse them in AVX2 74*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_4x4_ssse3(const uint8_t *cfl_type, int input_stride, 75*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 76*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_4x8_ssse3(const uint8_t *cfl_type, int input_stride, 77*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 78*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_4x16_ssse3(const uint8_t *cfl_type, int input_stride, 79*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 80*77c1e3ccSAndroid Build Coastguard Worker 81*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2 82*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_8x4_ssse3(const uint8_t *cfl_type, int input_stride, 83*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 84*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_8x8_ssse3(const uint8_t *cfl_type, int input_stride, 85*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 86*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_8x16_ssse3(const uint8_t *cfl_type, int input_stride, 87*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 88*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_8x32_ssse3(const uint8_t *cfl_type, int input_stride, 89*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 90*77c1e3ccSAndroid Build Coastguard Worker 91*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 16, we reuse it in AVX2 92*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_16x4_ssse3(const uint8_t *cfl_type, int input_stride, 93*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 94*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_16x8_ssse3(const uint8_t *cfl_type, int input_stride, 95*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 96*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_16x16_ssse3(const uint8_t *cfl_type, 97*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 98*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_lbd_444_16x32_ssse3(const uint8_t *cfl_type, 99*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 100*77c1e3ccSAndroid Build Coastguard Worker 101*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH 102*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_4x4_ssse3(const uint16_t *cfl_type, int input_stride, 103*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 104*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_4x8_ssse3(const uint16_t *cfl_type, int input_stride, 105*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 106*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_4x16_ssse3(const uint16_t *cfl_type, 107*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 108*77c1e3ccSAndroid Build Coastguard Worker 109*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2 110*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_8x4_ssse3(const uint16_t *cfl_type, int input_stride, 111*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 112*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_8x8_ssse3(const uint16_t *cfl_type, int input_stride, 113*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 114*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_8x16_ssse3(const uint16_t *cfl_type, 115*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 116*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_8x32_ssse3(const uint16_t *cfl_type, 117*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 118*77c1e3ccSAndroid Build Coastguard Worker 119*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is faster for with == 16, we reuse it in AVX2 120*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_16x4_ssse3(const uint16_t *cfl_type, 121*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 122*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_16x8_ssse3(const uint16_t *cfl_type, 123*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 124*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_16x16_ssse3(const uint16_t *cfl_type, 125*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 126*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_420_16x32_ssse3(const uint16_t *cfl_type, 127*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 128*77c1e3ccSAndroid Build Coastguard Worker 129*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_4x4_ssse3(const uint16_t *cfl_type, int input_stride, 130*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 131*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_4x8_ssse3(const uint16_t *cfl_type, int input_stride, 132*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 133*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_4x16_ssse3(const uint16_t *cfl_type, 134*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 135*77c1e3ccSAndroid Build Coastguard Worker 136*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2 137*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_8x4_ssse3(const uint16_t *cfl_type, int input_stride, 138*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 139*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_8x8_ssse3(const uint16_t *cfl_type, int input_stride, 140*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 141*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_8x16_ssse3(const uint16_t *cfl_type, 142*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 143*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_8x32_ssse3(const uint16_t *cfl_type, 144*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 145*77c1e3ccSAndroid Build Coastguard Worker 146*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is faster for with == 16, we reuse it in AVX2 147*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_16x4_ssse3(const uint16_t *cfl_type, 148*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 149*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_16x8_ssse3(const uint16_t *cfl_type, 150*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 151*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_16x16_ssse3(const uint16_t *cfl_type, 152*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 153*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_422_16x32_ssse3(const uint16_t *cfl_type, 154*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 155*77c1e3ccSAndroid Build Coastguard Worker 156*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_4x4_ssse3(const uint16_t *cfl_type, int input_stride, 157*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 158*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_4x8_ssse3(const uint16_t *cfl_type, int input_stride, 159*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 160*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_4x16_ssse3(const uint16_t *cfl_type, 161*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 162*77c1e3ccSAndroid Build Coastguard Worker 163*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is optimal for with == 8, we reuse it in AVX2 164*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_8x4_ssse3(const uint16_t *cfl_type, int input_stride, 165*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 166*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_8x8_ssse3(const uint16_t *cfl_type, int input_stride, 167*77c1e3ccSAndroid Build Coastguard Worker uint16_t *output_q3); 168*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_8x16_ssse3(const uint16_t *cfl_type, 169*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 170*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_8x32_ssse3(const uint16_t *cfl_type, 171*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 172*77c1e3ccSAndroid Build Coastguard Worker 173*77c1e3ccSAndroid Build Coastguard Worker // SSSE3 version is faster for with == 16, we reuse it in AVX2 174*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_16x4_ssse3(const uint16_t *cfl_type, 175*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 176*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_16x8_ssse3(const uint16_t *cfl_type, 177*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 178*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_16x16_ssse3(const uint16_t *cfl_type, 179*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 180*77c1e3ccSAndroid Build Coastguard Worker void cfl_subsample_hbd_444_16x32_ssse3(const uint16_t *cfl_type, 181*77c1e3ccSAndroid Build Coastguard Worker int input_stride, uint16_t *output_q3); 182*77c1e3ccSAndroid Build Coastguard Worker #endif // CONFIG_AV1_HIGHBITDEPTH 183*77c1e3ccSAndroid Build Coastguard Worker 184*77c1e3ccSAndroid Build Coastguard Worker // SSE2 version is optimal for with == 4, we reuse them in AVX2 185*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_4x4_sse2(const uint16_t *src, int16_t *dst); 186*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_4x8_sse2(const uint16_t *src, int16_t *dst); 187*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_4x16_sse2(const uint16_t *src, int16_t *dst); 188*77c1e3ccSAndroid Build Coastguard Worker 189*77c1e3ccSAndroid Build Coastguard Worker // SSE2 version is optimal for with == 8, we reuse them in AVX2 190*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_8x4_sse2(const uint16_t *src, int16_t *dst); 191*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_8x8_sse2(const uint16_t *src, int16_t *dst); 192*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_8x16_sse2(const uint16_t *src, int16_t *dst); 193*77c1e3ccSAndroid Build Coastguard Worker void cfl_subtract_average_8x32_sse2(const uint16_t *src, int16_t *dst); 194*77c1e3ccSAndroid Build Coastguard Worker 195*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_4x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 196*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 197*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_4x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 198*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 199*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_4x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 200*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 201*77c1e3ccSAndroid Build Coastguard Worker 202*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_8x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 203*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 204*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_8x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 205*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 206*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_8x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 207*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 208*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_8x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 209*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 210*77c1e3ccSAndroid Build Coastguard Worker 211*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_16x4_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 212*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 213*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_16x8_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 214*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 215*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_16x16_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 216*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 217*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_lbd_16x32_ssse3(const int16_t *pred_buf_q3, uint8_t *dst, 218*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3); 219*77c1e3ccSAndroid Build Coastguard Worker 220*77c1e3ccSAndroid Build Coastguard Worker #if CONFIG_AV1_HIGHBITDEPTH 221*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_4x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 222*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 223*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_4x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 224*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 225*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_4x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 226*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 227*77c1e3ccSAndroid Build Coastguard Worker 228*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_8x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 229*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 230*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_8x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 231*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 232*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_8x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 233*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 234*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_8x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 235*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 236*77c1e3ccSAndroid Build Coastguard Worker 237*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_16x4_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 238*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 239*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_16x8_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 240*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 241*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_16x16_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 242*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 243*77c1e3ccSAndroid Build Coastguard Worker void cfl_predict_hbd_16x32_ssse3(const int16_t *pred_buf_q3, uint16_t *dst, 244*77c1e3ccSAndroid Build Coastguard Worker int dst_stride, int alpha_q3, int bd); 245*77c1e3ccSAndroid Build Coastguard Worker #endif // CONFIG_AV1_HIGHBITDEPTH 246*77c1e3ccSAndroid Build Coastguard Worker #endif // AOM_AV1_COMMON_X86_CFL_SIMD_H_ 247