xref: /aosp_15_r20/external/libvpx/vpx_dsp/loongarch/sad_lsx.c (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker  *  Copyright (c) 2022 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker  *
4*fb1b10abSAndroid Build Coastguard Worker  *  Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker  *  that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker  *  tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker  *  in the file PATENTS.  All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker  *  be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker  */
10*fb1b10abSAndroid Build Coastguard Worker 
11*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_config.h"
12*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
13*fb1b10abSAndroid Build Coastguard Worker #include "vpx_util/loongson_intrinsics.h"
14*fb1b10abSAndroid Build Coastguard Worker 
sad_ub2_uh(__m128i in0,__m128i in1,__m128i ref0,__m128i ref1)15*fb1b10abSAndroid Build Coastguard Worker static INLINE __m128i sad_ub2_uh(__m128i in0, __m128i in1, __m128i ref0,
16*fb1b10abSAndroid Build Coastguard Worker                                  __m128i ref1) {
17*fb1b10abSAndroid Build Coastguard Worker   __m128i diff0_m, diff1_m, sad_m0;
18*fb1b10abSAndroid Build Coastguard Worker   __m128i sad_m = __lsx_vldi(0);
19*fb1b10abSAndroid Build Coastguard Worker 
20*fb1b10abSAndroid Build Coastguard Worker   diff0_m = __lsx_vabsd_bu(in0, ref0);
21*fb1b10abSAndroid Build Coastguard Worker   diff1_m = __lsx_vabsd_bu(in1, ref1);
22*fb1b10abSAndroid Build Coastguard Worker 
23*fb1b10abSAndroid Build Coastguard Worker   sad_m0 = __lsx_vhaddw_hu_bu(diff0_m, diff0_m);
24*fb1b10abSAndroid Build Coastguard Worker   sad_m = __lsx_vadd_h(sad_m, sad_m0);
25*fb1b10abSAndroid Build Coastguard Worker   sad_m0 = __lsx_vhaddw_hu_bu(diff1_m, diff1_m);
26*fb1b10abSAndroid Build Coastguard Worker   sad_m = __lsx_vadd_h(sad_m, sad_m0);
27*fb1b10abSAndroid Build Coastguard Worker 
28*fb1b10abSAndroid Build Coastguard Worker   return sad_m;
29*fb1b10abSAndroid Build Coastguard Worker }
30*fb1b10abSAndroid Build Coastguard Worker 
hadd_uw_u32(__m128i in)31*fb1b10abSAndroid Build Coastguard Worker static INLINE uint32_t hadd_uw_u32(__m128i in) {
32*fb1b10abSAndroid Build Coastguard Worker   __m128i res0_m;
33*fb1b10abSAndroid Build Coastguard Worker   uint32_t sum_m;
34*fb1b10abSAndroid Build Coastguard Worker 
35*fb1b10abSAndroid Build Coastguard Worker   res0_m = __lsx_vhaddw_du_wu(in, in);
36*fb1b10abSAndroid Build Coastguard Worker   res0_m = __lsx_vhaddw_qu_du(res0_m, res0_m);
37*fb1b10abSAndroid Build Coastguard Worker   sum_m = __lsx_vpickve2gr_w(res0_m, 0);
38*fb1b10abSAndroid Build Coastguard Worker 
39*fb1b10abSAndroid Build Coastguard Worker   return sum_m;
40*fb1b10abSAndroid Build Coastguard Worker }
41*fb1b10abSAndroid Build Coastguard Worker 
hadd_uh_u32(__m128i in)42*fb1b10abSAndroid Build Coastguard Worker static INLINE uint32_t hadd_uh_u32(__m128i in) {
43*fb1b10abSAndroid Build Coastguard Worker   __m128i res_m;
44*fb1b10abSAndroid Build Coastguard Worker   uint32_t sum_m;
45*fb1b10abSAndroid Build Coastguard Worker 
46*fb1b10abSAndroid Build Coastguard Worker   res_m = __lsx_vhaddw_wu_hu(in, in);
47*fb1b10abSAndroid Build Coastguard Worker   sum_m = hadd_uw_u32(res_m);
48*fb1b10abSAndroid Build Coastguard Worker 
49*fb1b10abSAndroid Build Coastguard Worker   return sum_m;
50*fb1b10abSAndroid Build Coastguard Worker }
51*fb1b10abSAndroid Build Coastguard Worker 
hadd_sw_s32(__m128i in)52*fb1b10abSAndroid Build Coastguard Worker static INLINE int32_t hadd_sw_s32(__m128i in) {
53*fb1b10abSAndroid Build Coastguard Worker   __m128i res0_m;
54*fb1b10abSAndroid Build Coastguard Worker   int32_t sum_m;
55*fb1b10abSAndroid Build Coastguard Worker 
56*fb1b10abSAndroid Build Coastguard Worker   res0_m = __lsx_vhaddw_d_w(in, in);
57*fb1b10abSAndroid Build Coastguard Worker   res0_m = __lsx_vhaddw_q_d(res0_m, res0_m);
58*fb1b10abSAndroid Build Coastguard Worker   sum_m = __lsx_vpickve2gr_w(res0_m, 0);
59*fb1b10abSAndroid Build Coastguard Worker 
60*fb1b10abSAndroid Build Coastguard Worker   return sum_m;
61*fb1b10abSAndroid Build Coastguard Worker }
62*fb1b10abSAndroid Build Coastguard Worker 
sad_8width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)63*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_8width_lsx(const uint8_t *src, int32_t src_stride,
64*fb1b10abSAndroid Build Coastguard Worker                                const uint8_t *ref, int32_t ref_stride,
65*fb1b10abSAndroid Build Coastguard Worker                                int32_t height) {
66*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt;
67*fb1b10abSAndroid Build Coastguard Worker   uint32_t res;
68*fb1b10abSAndroid Build Coastguard Worker   __m128i src0, src1, src2, src3, ref0, ref1, ref2, ref3, sad_tmp;
69*fb1b10abSAndroid Build Coastguard Worker   __m128i sad = __lsx_vldi(0);
70*fb1b10abSAndroid Build Coastguard Worker 
71*fb1b10abSAndroid Build Coastguard Worker   for (ht_cnt = (height >> 2); ht_cnt--;) {
72*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src0, ref0);
73*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
74*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
75*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src1, ref1);
76*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
77*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
78*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src2, ref2);
79*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
80*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
81*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src3, ref3);
82*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
83*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
84*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vpickev_d, src1, src0, src3, src2, ref1, ref0, ref3, ref2,
85*fb1b10abSAndroid Build Coastguard Worker               src0, src1, ref0, ref1);
86*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
87*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
88*fb1b10abSAndroid Build Coastguard Worker   }
89*fb1b10abSAndroid Build Coastguard Worker   res = hadd_uh_u32(sad);
90*fb1b10abSAndroid Build Coastguard Worker   return res;
91*fb1b10abSAndroid Build Coastguard Worker }
92*fb1b10abSAndroid Build Coastguard Worker 
sad_16width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)93*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_16width_lsx(const uint8_t *src, int32_t src_stride,
94*fb1b10abSAndroid Build Coastguard Worker                                 const uint8_t *ref, int32_t ref_stride,
95*fb1b10abSAndroid Build Coastguard Worker                                 int32_t height) {
96*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt = (height >> 2);
97*fb1b10abSAndroid Build Coastguard Worker   uint32_t res;
98*fb1b10abSAndroid Build Coastguard Worker   __m128i src0, src1, ref0, ref1, sad_tmp;
99*fb1b10abSAndroid Build Coastguard Worker   __m128i sad = __lsx_vldi(0);
100*fb1b10abSAndroid Build Coastguard Worker   int32_t src_stride2 = src_stride << 1;
101*fb1b10abSAndroid Build Coastguard Worker   int32_t ref_stride2 = ref_stride << 1;
102*fb1b10abSAndroid Build Coastguard Worker 
103*fb1b10abSAndroid Build Coastguard Worker   for (; ht_cnt--;) {
104*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src0, ref0);
105*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, src, src_stride, ref, ref_stride, src1, ref1);
106*fb1b10abSAndroid Build Coastguard Worker     src += src_stride2;
107*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride2;
108*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
109*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
110*fb1b10abSAndroid Build Coastguard Worker 
111*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src0, ref0);
112*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, src, src_stride, ref, ref_stride, src1, ref1);
113*fb1b10abSAndroid Build Coastguard Worker     src += src_stride2;
114*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride2;
115*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
116*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
117*fb1b10abSAndroid Build Coastguard Worker   }
118*fb1b10abSAndroid Build Coastguard Worker 
119*fb1b10abSAndroid Build Coastguard Worker   res = hadd_uh_u32(sad);
120*fb1b10abSAndroid Build Coastguard Worker   return res;
121*fb1b10abSAndroid Build Coastguard Worker }
122*fb1b10abSAndroid Build Coastguard Worker 
sad_32width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)123*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_32width_lsx(const uint8_t *src, int32_t src_stride,
124*fb1b10abSAndroid Build Coastguard Worker                                 const uint8_t *ref, int32_t ref_stride,
125*fb1b10abSAndroid Build Coastguard Worker                                 int32_t height) {
126*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt = (height >> 2);
127*fb1b10abSAndroid Build Coastguard Worker   uint32_t res;
128*fb1b10abSAndroid Build Coastguard Worker   __m128i src0, src1, ref0, ref1;
129*fb1b10abSAndroid Build Coastguard Worker   __m128i sad_tmp;
130*fb1b10abSAndroid Build Coastguard Worker   __m128i sad = __lsx_vldi(0);
131*fb1b10abSAndroid Build Coastguard Worker 
132*fb1b10abSAndroid Build Coastguard Worker   for (; ht_cnt--;) {
133*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
134*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
135*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, ref, 0, ref, 16, ref0, ref1);
136*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
137*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
138*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
139*fb1b10abSAndroid Build Coastguard Worker 
140*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
141*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
142*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, ref, 0, ref, 16, ref0, ref1);
143*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
144*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
145*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
146*fb1b10abSAndroid Build Coastguard Worker 
147*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
148*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
149*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, ref, 0, ref, 16, ref0, ref1);
150*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
151*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
152*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
153*fb1b10abSAndroid Build Coastguard Worker 
154*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
155*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
156*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, ref, 0, ref, 16, ref0, ref1);
157*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
158*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
159*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
160*fb1b10abSAndroid Build Coastguard Worker   }
161*fb1b10abSAndroid Build Coastguard Worker   res = hadd_uh_u32(sad);
162*fb1b10abSAndroid Build Coastguard Worker   return res;
163*fb1b10abSAndroid Build Coastguard Worker }
164*fb1b10abSAndroid Build Coastguard Worker 
sad_64width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)165*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_64width_lsx(const uint8_t *src, int32_t src_stride,
166*fb1b10abSAndroid Build Coastguard Worker                                 const uint8_t *ref, int32_t ref_stride,
167*fb1b10abSAndroid Build Coastguard Worker                                 int32_t height) {
168*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt = (height >> 1);
169*fb1b10abSAndroid Build Coastguard Worker   uint32_t sad = 0;
170*fb1b10abSAndroid Build Coastguard Worker   __m128i src0, src1, src2, src3;
171*fb1b10abSAndroid Build Coastguard Worker   __m128i ref0, ref1, ref2, ref3;
172*fb1b10abSAndroid Build Coastguard Worker   __m128i sad_tmp;
173*fb1b10abSAndroid Build Coastguard Worker   __m128i sad0 = __lsx_vldi(0);
174*fb1b10abSAndroid Build Coastguard Worker   __m128i sad1 = sad0;
175*fb1b10abSAndroid Build Coastguard Worker 
176*fb1b10abSAndroid Build Coastguard Worker   for (; ht_cnt--;) {
177*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
178*fb1b10abSAndroid Build Coastguard Worker               src3);
179*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
180*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
181*fb1b10abSAndroid Build Coastguard Worker               ref3);
182*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
183*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
184*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
185*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
186*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
187*fb1b10abSAndroid Build Coastguard Worker 
188*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
189*fb1b10abSAndroid Build Coastguard Worker               src3);
190*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
191*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
192*fb1b10abSAndroid Build Coastguard Worker               ref3);
193*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
194*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
195*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
196*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
197*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
198*fb1b10abSAndroid Build Coastguard Worker   }
199*fb1b10abSAndroid Build Coastguard Worker 
200*fb1b10abSAndroid Build Coastguard Worker   sad = hadd_uh_u32(sad0);
201*fb1b10abSAndroid Build Coastguard Worker   sad += hadd_uh_u32(sad1);
202*fb1b10abSAndroid Build Coastguard Worker 
203*fb1b10abSAndroid Build Coastguard Worker   return sad;
204*fb1b10abSAndroid Build Coastguard Worker }
205*fb1b10abSAndroid Build Coastguard Worker 
sad_8width_x4d_lsx(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)206*fb1b10abSAndroid Build Coastguard Worker static void sad_8width_x4d_lsx(const uint8_t *src_ptr, int32_t src_stride,
207*fb1b10abSAndroid Build Coastguard Worker                                const uint8_t *const aref_ptr[],
208*fb1b10abSAndroid Build Coastguard Worker                                int32_t ref_stride, int32_t height,
209*fb1b10abSAndroid Build Coastguard Worker                                uint32_t *sad_array) {
210*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt = (height >> 2);
211*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
212*fb1b10abSAndroid Build Coastguard Worker   __m128i src0, src1, src2, src3, sad_tmp;
213*fb1b10abSAndroid Build Coastguard Worker   __m128i ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
214*fb1b10abSAndroid Build Coastguard Worker   __m128i ref8, ref9, ref10, ref11, ref12, ref13, ref14, ref15;
215*fb1b10abSAndroid Build Coastguard Worker   __m128i sad0 = __lsx_vldi(0);
216*fb1b10abSAndroid Build Coastguard Worker   __m128i sad1 = sad0;
217*fb1b10abSAndroid Build Coastguard Worker   __m128i sad2 = sad0;
218*fb1b10abSAndroid Build Coastguard Worker   __m128i sad3 = sad0;
219*fb1b10abSAndroid Build Coastguard Worker   int32_t src_stride2 = src_stride << 1;
220*fb1b10abSAndroid Build Coastguard Worker   int32_t src_stride3 = src_stride2 + src_stride;
221*fb1b10abSAndroid Build Coastguard Worker   int32_t src_stride4 = src_stride2 << 1;
222*fb1b10abSAndroid Build Coastguard Worker   int32_t ref_stride2 = ref_stride << 1;
223*fb1b10abSAndroid Build Coastguard Worker   int32_t ref_stride3 = ref_stride2 + ref_stride;
224*fb1b10abSAndroid Build Coastguard Worker   int32_t ref_stride4 = ref_stride2 << 1;
225*fb1b10abSAndroid Build Coastguard Worker 
226*fb1b10abSAndroid Build Coastguard Worker   ref0_ptr = aref_ptr[0];
227*fb1b10abSAndroid Build Coastguard Worker   ref1_ptr = aref_ptr[1];
228*fb1b10abSAndroid Build Coastguard Worker   ref2_ptr = aref_ptr[2];
229*fb1b10abSAndroid Build Coastguard Worker   ref3_ptr = aref_ptr[3];
230*fb1b10abSAndroid Build Coastguard Worker 
231*fb1b10abSAndroid Build Coastguard Worker   for (; ht_cnt--;) {
232*fb1b10abSAndroid Build Coastguard Worker     src0 = __lsx_vld(src_ptr, 0);
233*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, src_ptr, src_stride, src_ptr, src_stride2, src1,
234*fb1b10abSAndroid Build Coastguard Worker               src2);
235*fb1b10abSAndroid Build Coastguard Worker     src3 = __lsx_vldx(src_ptr, src_stride3);
236*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride4;
237*fb1b10abSAndroid Build Coastguard Worker     ref0 = __lsx_vld(ref0_ptr, 0);
238*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, ref0_ptr, ref_stride, ref0_ptr, ref_stride2, ref1,
239*fb1b10abSAndroid Build Coastguard Worker               ref2);
240*fb1b10abSAndroid Build Coastguard Worker     ref3 = __lsx_vldx(ref0_ptr, ref_stride3);
241*fb1b10abSAndroid Build Coastguard Worker     ref0_ptr += ref_stride4;
242*fb1b10abSAndroid Build Coastguard Worker     ref4 = __lsx_vld(ref1_ptr, 0);
243*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, ref1_ptr, ref_stride, ref1_ptr, ref_stride2, ref5,
244*fb1b10abSAndroid Build Coastguard Worker               ref6);
245*fb1b10abSAndroid Build Coastguard Worker     ref7 = __lsx_vldx(ref1_ptr, ref_stride3);
246*fb1b10abSAndroid Build Coastguard Worker     ref1_ptr += ref_stride4;
247*fb1b10abSAndroid Build Coastguard Worker     ref8 = __lsx_vld(ref2_ptr, 0);
248*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, ref2_ptr, ref_stride, ref2_ptr, ref_stride2, ref9,
249*fb1b10abSAndroid Build Coastguard Worker               ref10);
250*fb1b10abSAndroid Build Coastguard Worker     ref11 = __lsx_vldx(ref2_ptr, ref_stride3);
251*fb1b10abSAndroid Build Coastguard Worker     ref2_ptr += ref_stride4;
252*fb1b10abSAndroid Build Coastguard Worker     ref12 = __lsx_vld(ref3_ptr, 0);
253*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, ref3_ptr, ref_stride, ref3_ptr, ref_stride2, ref13,
254*fb1b10abSAndroid Build Coastguard Worker               ref14);
255*fb1b10abSAndroid Build Coastguard Worker     ref15 = __lsx_vldx(ref3_ptr, ref_stride3);
256*fb1b10abSAndroid Build Coastguard Worker     ref3_ptr += ref_stride4;
257*fb1b10abSAndroid Build Coastguard Worker 
258*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vpickev_d, src1, src0, src3, src2, src0, src1);
259*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vpickev_d, ref1, ref0, ref3, ref2, ref0, ref1);
260*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
261*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
262*fb1b10abSAndroid Build Coastguard Worker 
263*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vpickev_d, ref5, ref4, ref7, ref6, ref0, ref1);
264*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
265*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
266*fb1b10abSAndroid Build Coastguard Worker 
267*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vpickev_d, ref9, ref8, ref11, ref10, ref0, ref1);
268*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
269*fb1b10abSAndroid Build Coastguard Worker     sad2 = __lsx_vadd_h(sad2, sad_tmp);
270*fb1b10abSAndroid Build Coastguard Worker 
271*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vpickev_d, ref13, ref12, ref15, ref14, ref0, ref1);
272*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
273*fb1b10abSAndroid Build Coastguard Worker     sad3 = __lsx_vadd_h(sad3, sad_tmp);
274*fb1b10abSAndroid Build Coastguard Worker   }
275*fb1b10abSAndroid Build Coastguard Worker   sad_array[0] = hadd_uh_u32(sad0);
276*fb1b10abSAndroid Build Coastguard Worker   sad_array[1] = hadd_uh_u32(sad1);
277*fb1b10abSAndroid Build Coastguard Worker   sad_array[2] = hadd_uh_u32(sad2);
278*fb1b10abSAndroid Build Coastguard Worker   sad_array[3] = hadd_uh_u32(sad3);
279*fb1b10abSAndroid Build Coastguard Worker }
280*fb1b10abSAndroid Build Coastguard Worker 
sad_16width_x4d_lsx(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)281*fb1b10abSAndroid Build Coastguard Worker static void sad_16width_x4d_lsx(const uint8_t *src_ptr, int32_t src_stride,
282*fb1b10abSAndroid Build Coastguard Worker                                 const uint8_t *const aref_ptr[],
283*fb1b10abSAndroid Build Coastguard Worker                                 int32_t ref_stride, int32_t height,
284*fb1b10abSAndroid Build Coastguard Worker                                 uint32_t *sad_array) {
285*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt = (height >> 1);
286*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
287*fb1b10abSAndroid Build Coastguard Worker   __m128i src, ref0, ref1, ref2, ref3, diff, sad_tmp;
288*fb1b10abSAndroid Build Coastguard Worker   __m128i sad0 = __lsx_vldi(0);
289*fb1b10abSAndroid Build Coastguard Worker   __m128i sad1 = sad0;
290*fb1b10abSAndroid Build Coastguard Worker   __m128i sad2 = sad0;
291*fb1b10abSAndroid Build Coastguard Worker   __m128i sad3 = sad0;
292*fb1b10abSAndroid Build Coastguard Worker 
293*fb1b10abSAndroid Build Coastguard Worker   ref0_ptr = aref_ptr[0];
294*fb1b10abSAndroid Build Coastguard Worker   ref1_ptr = aref_ptr[1];
295*fb1b10abSAndroid Build Coastguard Worker   ref2_ptr = aref_ptr[2];
296*fb1b10abSAndroid Build Coastguard Worker   ref3_ptr = aref_ptr[3];
297*fb1b10abSAndroid Build Coastguard Worker 
298*fb1b10abSAndroid Build Coastguard Worker   for (; ht_cnt--;) {
299*fb1b10abSAndroid Build Coastguard Worker     src = __lsx_vld(src_ptr, 0);
300*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
301*fb1b10abSAndroid Build Coastguard Worker     ref0 = __lsx_vld(ref0_ptr, 0);
302*fb1b10abSAndroid Build Coastguard Worker     ref0_ptr += ref_stride;
303*fb1b10abSAndroid Build Coastguard Worker     ref1 = __lsx_vld(ref1_ptr, 0);
304*fb1b10abSAndroid Build Coastguard Worker     ref1_ptr += ref_stride;
305*fb1b10abSAndroid Build Coastguard Worker     ref2 = __lsx_vld(ref2_ptr, 0);
306*fb1b10abSAndroid Build Coastguard Worker     ref2_ptr += ref_stride;
307*fb1b10abSAndroid Build Coastguard Worker     ref3 = __lsx_vld(ref3_ptr, 0);
308*fb1b10abSAndroid Build Coastguard Worker     ref3_ptr += ref_stride;
309*fb1b10abSAndroid Build Coastguard Worker 
310*fb1b10abSAndroid Build Coastguard Worker     diff = __lsx_vabsd_bu(src, ref0);
311*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
312*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
313*fb1b10abSAndroid Build Coastguard Worker     diff = __lsx_vabsd_bu(src, ref1);
314*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
315*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
316*fb1b10abSAndroid Build Coastguard Worker     diff = __lsx_vabsd_bu(src, ref2);
317*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
318*fb1b10abSAndroid Build Coastguard Worker     sad2 = __lsx_vadd_h(sad2, sad_tmp);
319*fb1b10abSAndroid Build Coastguard Worker     diff = __lsx_vabsd_bu(src, ref3);
320*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
321*fb1b10abSAndroid Build Coastguard Worker     sad3 = __lsx_vadd_h(sad3, sad_tmp);
322*fb1b10abSAndroid Build Coastguard Worker 
323*fb1b10abSAndroid Build Coastguard Worker     src = __lsx_vld(src_ptr, 0);
324*fb1b10abSAndroid Build Coastguard Worker     src_ptr += src_stride;
325*fb1b10abSAndroid Build Coastguard Worker     ref0 = __lsx_vld(ref0_ptr, 0);
326*fb1b10abSAndroid Build Coastguard Worker     ref0_ptr += ref_stride;
327*fb1b10abSAndroid Build Coastguard Worker     ref1 = __lsx_vld(ref1_ptr, 0);
328*fb1b10abSAndroid Build Coastguard Worker     ref1_ptr += ref_stride;
329*fb1b10abSAndroid Build Coastguard Worker     ref2 = __lsx_vld(ref2_ptr, 0);
330*fb1b10abSAndroid Build Coastguard Worker     ref2_ptr += ref_stride;
331*fb1b10abSAndroid Build Coastguard Worker     ref3 = __lsx_vld(ref3_ptr, 0);
332*fb1b10abSAndroid Build Coastguard Worker     ref3_ptr += ref_stride;
333*fb1b10abSAndroid Build Coastguard Worker 
334*fb1b10abSAndroid Build Coastguard Worker     diff = __lsx_vabsd_bu(src, ref0);
335*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
336*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
337*fb1b10abSAndroid Build Coastguard Worker     diff = __lsx_vabsd_bu(src, ref1);
338*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
339*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
340*fb1b10abSAndroid Build Coastguard Worker     diff = __lsx_vabsd_bu(src, ref2);
341*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
342*fb1b10abSAndroid Build Coastguard Worker     sad2 = __lsx_vadd_h(sad2, sad_tmp);
343*fb1b10abSAndroid Build Coastguard Worker     diff = __lsx_vabsd_bu(src, ref3);
344*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
345*fb1b10abSAndroid Build Coastguard Worker     sad3 = __lsx_vadd_h(sad3, sad_tmp);
346*fb1b10abSAndroid Build Coastguard Worker   }
347*fb1b10abSAndroid Build Coastguard Worker   sad_array[0] = hadd_uh_u32(sad0);
348*fb1b10abSAndroid Build Coastguard Worker   sad_array[1] = hadd_uh_u32(sad1);
349*fb1b10abSAndroid Build Coastguard Worker   sad_array[2] = hadd_uh_u32(sad2);
350*fb1b10abSAndroid Build Coastguard Worker   sad_array[3] = hadd_uh_u32(sad3);
351*fb1b10abSAndroid Build Coastguard Worker }
352*fb1b10abSAndroid Build Coastguard Worker 
sad_32width_x4d_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)353*fb1b10abSAndroid Build Coastguard Worker static void sad_32width_x4d_lsx(const uint8_t *src, int32_t src_stride,
354*fb1b10abSAndroid Build Coastguard Worker                                 const uint8_t *const aref_ptr[],
355*fb1b10abSAndroid Build Coastguard Worker                                 int32_t ref_stride, int32_t height,
356*fb1b10abSAndroid Build Coastguard Worker                                 uint32_t *sad_array) {
357*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
358*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt = height;
359*fb1b10abSAndroid Build Coastguard Worker   __m128i src0, src1, ref0, ref1, sad_tmp;
360*fb1b10abSAndroid Build Coastguard Worker   __m128i sad0 = __lsx_vldi(0);
361*fb1b10abSAndroid Build Coastguard Worker   __m128i sad1 = sad0;
362*fb1b10abSAndroid Build Coastguard Worker   __m128i sad2 = sad0;
363*fb1b10abSAndroid Build Coastguard Worker   __m128i sad3 = sad0;
364*fb1b10abSAndroid Build Coastguard Worker 
365*fb1b10abSAndroid Build Coastguard Worker   ref0_ptr = aref_ptr[0];
366*fb1b10abSAndroid Build Coastguard Worker   ref1_ptr = aref_ptr[1];
367*fb1b10abSAndroid Build Coastguard Worker   ref2_ptr = aref_ptr[2];
368*fb1b10abSAndroid Build Coastguard Worker   ref3_ptr = aref_ptr[3];
369*fb1b10abSAndroid Build Coastguard Worker 
370*fb1b10abSAndroid Build Coastguard Worker   for (; ht_cnt--;) {
371*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
372*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
373*fb1b10abSAndroid Build Coastguard Worker 
374*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, ref0_ptr, 0, ref0_ptr, 16, ref0, ref1);
375*fb1b10abSAndroid Build Coastguard Worker     ref0_ptr += ref_stride;
376*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
377*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
378*fb1b10abSAndroid Build Coastguard Worker 
379*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, ref1_ptr, 0, ref1_ptr, 16, ref0, ref1);
380*fb1b10abSAndroid Build Coastguard Worker     ref1_ptr += ref_stride;
381*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
382*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
383*fb1b10abSAndroid Build Coastguard Worker 
384*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, ref2_ptr, 0, ref2_ptr, 16, ref0, ref1);
385*fb1b10abSAndroid Build Coastguard Worker     ref2_ptr += ref_stride;
386*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
387*fb1b10abSAndroid Build Coastguard Worker     sad2 = __lsx_vadd_h(sad2, sad_tmp);
388*fb1b10abSAndroid Build Coastguard Worker 
389*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vld, ref3_ptr, 0, ref3_ptr, 16, ref0, ref1);
390*fb1b10abSAndroid Build Coastguard Worker     ref3_ptr += ref_stride;
391*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
392*fb1b10abSAndroid Build Coastguard Worker     sad3 = __lsx_vadd_h(sad3, sad_tmp);
393*fb1b10abSAndroid Build Coastguard Worker   }
394*fb1b10abSAndroid Build Coastguard Worker   sad_array[0] = hadd_uh_u32(sad0);
395*fb1b10abSAndroid Build Coastguard Worker   sad_array[1] = hadd_uh_u32(sad1);
396*fb1b10abSAndroid Build Coastguard Worker   sad_array[2] = hadd_uh_u32(sad2);
397*fb1b10abSAndroid Build Coastguard Worker   sad_array[3] = hadd_uh_u32(sad3);
398*fb1b10abSAndroid Build Coastguard Worker }
399*fb1b10abSAndroid Build Coastguard Worker 
sad_64width_x4d_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)400*fb1b10abSAndroid Build Coastguard Worker static void sad_64width_x4d_lsx(const uint8_t *src, int32_t src_stride,
401*fb1b10abSAndroid Build Coastguard Worker                                 const uint8_t *const aref_ptr[],
402*fb1b10abSAndroid Build Coastguard Worker                                 int32_t ref_stride, int32_t height,
403*fb1b10abSAndroid Build Coastguard Worker                                 uint32_t *sad_array) {
404*fb1b10abSAndroid Build Coastguard Worker   const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
405*fb1b10abSAndroid Build Coastguard Worker   int32_t ht_cnt = height;
406*fb1b10abSAndroid Build Coastguard Worker   __m128i src0, src1, src2, src3;
407*fb1b10abSAndroid Build Coastguard Worker   __m128i ref0, ref1, ref2, ref3;
408*fb1b10abSAndroid Build Coastguard Worker   __m128i sad, sad_tmp;
409*fb1b10abSAndroid Build Coastguard Worker 
410*fb1b10abSAndroid Build Coastguard Worker   __m128i sad0_0 = __lsx_vldi(0);
411*fb1b10abSAndroid Build Coastguard Worker   __m128i sad0_1 = sad0_0;
412*fb1b10abSAndroid Build Coastguard Worker   __m128i sad1_0 = sad0_0;
413*fb1b10abSAndroid Build Coastguard Worker   __m128i sad1_1 = sad0_0;
414*fb1b10abSAndroid Build Coastguard Worker   __m128i sad2_0 = sad0_0;
415*fb1b10abSAndroid Build Coastguard Worker   __m128i sad2_1 = sad0_0;
416*fb1b10abSAndroid Build Coastguard Worker   __m128i sad3_0 = sad0_0;
417*fb1b10abSAndroid Build Coastguard Worker   __m128i sad3_1 = sad0_0;
418*fb1b10abSAndroid Build Coastguard Worker 
419*fb1b10abSAndroid Build Coastguard Worker   ref0_ptr = aref_ptr[0];
420*fb1b10abSAndroid Build Coastguard Worker   ref1_ptr = aref_ptr[1];
421*fb1b10abSAndroid Build Coastguard Worker   ref2_ptr = aref_ptr[2];
422*fb1b10abSAndroid Build Coastguard Worker   ref3_ptr = aref_ptr[3];
423*fb1b10abSAndroid Build Coastguard Worker 
424*fb1b10abSAndroid Build Coastguard Worker   for (; ht_cnt--;) {
425*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
426*fb1b10abSAndroid Build Coastguard Worker               src3);
427*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
428*fb1b10abSAndroid Build Coastguard Worker 
429*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref0_ptr, 0, ref0_ptr, 16, ref0_ptr, 32, ref0_ptr, 48,
430*fb1b10abSAndroid Build Coastguard Worker               ref0, ref1, ref2, ref3);
431*fb1b10abSAndroid Build Coastguard Worker     ref0_ptr += ref_stride;
432*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
433*fb1b10abSAndroid Build Coastguard Worker     sad0_0 = __lsx_vadd_h(sad0_0, sad_tmp);
434*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
435*fb1b10abSAndroid Build Coastguard Worker     sad0_1 = __lsx_vadd_h(sad0_1, sad_tmp);
436*fb1b10abSAndroid Build Coastguard Worker 
437*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref1_ptr, 0, ref1_ptr, 16, ref1_ptr, 32, ref1_ptr, 48,
438*fb1b10abSAndroid Build Coastguard Worker               ref0, ref1, ref2, ref3);
439*fb1b10abSAndroid Build Coastguard Worker     ref1_ptr += ref_stride;
440*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
441*fb1b10abSAndroid Build Coastguard Worker     sad1_0 = __lsx_vadd_h(sad1_0, sad_tmp);
442*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
443*fb1b10abSAndroid Build Coastguard Worker     sad1_1 = __lsx_vadd_h(sad1_1, sad_tmp);
444*fb1b10abSAndroid Build Coastguard Worker 
445*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref2_ptr, 0, ref2_ptr, 16, ref2_ptr, 32, ref2_ptr, 48,
446*fb1b10abSAndroid Build Coastguard Worker               ref0, ref1, ref2, ref3);
447*fb1b10abSAndroid Build Coastguard Worker     ref2_ptr += ref_stride;
448*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
449*fb1b10abSAndroid Build Coastguard Worker     sad2_0 = __lsx_vadd_h(sad2_0, sad_tmp);
450*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
451*fb1b10abSAndroid Build Coastguard Worker     sad2_1 = __lsx_vadd_h(sad2_1, sad_tmp);
452*fb1b10abSAndroid Build Coastguard Worker 
453*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref3_ptr, 0, ref3_ptr, 16, ref3_ptr, 32, ref3_ptr, 48,
454*fb1b10abSAndroid Build Coastguard Worker               ref0, ref1, ref2, ref3);
455*fb1b10abSAndroid Build Coastguard Worker     ref3_ptr += ref_stride;
456*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
457*fb1b10abSAndroid Build Coastguard Worker     sad3_0 = __lsx_vadd_h(sad3_0, sad_tmp);
458*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
459*fb1b10abSAndroid Build Coastguard Worker     sad3_1 = __lsx_vadd_h(sad3_1, sad_tmp);
460*fb1b10abSAndroid Build Coastguard Worker   }
461*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vhaddw_wu_hu(sad0_0, sad0_0);
462*fb1b10abSAndroid Build Coastguard Worker   sad_tmp = __lsx_vhaddw_wu_hu(sad0_1, sad0_1);
463*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vadd_w(sad, sad_tmp);
464*fb1b10abSAndroid Build Coastguard Worker   sad_array[0] = hadd_uw_u32(sad);
465*fb1b10abSAndroid Build Coastguard Worker 
466*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vhaddw_wu_hu(sad1_0, sad1_0);
467*fb1b10abSAndroid Build Coastguard Worker   sad_tmp = __lsx_vhaddw_wu_hu(sad1_1, sad1_1);
468*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vadd_w(sad, sad_tmp);
469*fb1b10abSAndroid Build Coastguard Worker   sad_array[1] = hadd_uw_u32(sad);
470*fb1b10abSAndroid Build Coastguard Worker 
471*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vhaddw_wu_hu(sad2_0, sad2_0);
472*fb1b10abSAndroid Build Coastguard Worker   sad_tmp = __lsx_vhaddw_wu_hu(sad2_1, sad2_1);
473*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vadd_w(sad, sad_tmp);
474*fb1b10abSAndroid Build Coastguard Worker   sad_array[2] = hadd_uw_u32(sad);
475*fb1b10abSAndroid Build Coastguard Worker 
476*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vhaddw_wu_hu(sad3_0, sad3_0);
477*fb1b10abSAndroid Build Coastguard Worker   sad_tmp = __lsx_vhaddw_wu_hu(sad3_1, sad3_1);
478*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vadd_w(sad, sad_tmp);
479*fb1b10abSAndroid Build Coastguard Worker   sad_array[3] = hadd_uw_u32(sad);
480*fb1b10abSAndroid Build Coastguard Worker }
481*fb1b10abSAndroid Build Coastguard Worker 
avgsad_32width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height,const uint8_t * sec_pred)482*fb1b10abSAndroid Build Coastguard Worker static uint32_t avgsad_32width_lsx(const uint8_t *src, int32_t src_stride,
483*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *ref, int32_t ref_stride,
484*fb1b10abSAndroid Build Coastguard Worker                                    int32_t height, const uint8_t *sec_pred) {
485*fb1b10abSAndroid Build Coastguard Worker   int32_t res, ht_cnt = (height >> 2);
486*fb1b10abSAndroid Build Coastguard Worker   __m128i src0, src1, src2, src3, src4, src5, src6, src7;
487*fb1b10abSAndroid Build Coastguard Worker   __m128i ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
488*fb1b10abSAndroid Build Coastguard Worker   __m128i pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
489*fb1b10abSAndroid Build Coastguard Worker   __m128i comp0, comp1, sad_tmp;
490*fb1b10abSAndroid Build Coastguard Worker   __m128i sad = __lsx_vldi(0);
491*fb1b10abSAndroid Build Coastguard Worker   uint8_t *src_tmp, *ref_tmp;
492*fb1b10abSAndroid Build Coastguard Worker   int32_t src_stride2 = src_stride << 1;
493*fb1b10abSAndroid Build Coastguard Worker   int32_t src_stride3 = src_stride2 + src_stride;
494*fb1b10abSAndroid Build Coastguard Worker   int32_t src_stride4 = src_stride2 << 1;
495*fb1b10abSAndroid Build Coastguard Worker   int32_t ref_stride2 = ref_stride << 1;
496*fb1b10abSAndroid Build Coastguard Worker   int32_t ref_stride3 = ref_stride2 + ref_stride;
497*fb1b10abSAndroid Build Coastguard Worker   int32_t ref_stride4 = ref_stride2 << 1;
498*fb1b10abSAndroid Build Coastguard Worker 
499*fb1b10abSAndroid Build Coastguard Worker   for (; ht_cnt--;) {
500*fb1b10abSAndroid Build Coastguard Worker     src_tmp = (uint8_t *)src + 16;
501*fb1b10abSAndroid Build Coastguard Worker     src0 = __lsx_vld(src, 0);
502*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src2, src4);
503*fb1b10abSAndroid Build Coastguard Worker     src6 = __lsx_vldx(src, src_stride3);
504*fb1b10abSAndroid Build Coastguard Worker     src1 = __lsx_vld(src_tmp, 0);
505*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride2, src3,
506*fb1b10abSAndroid Build Coastguard Worker               src5);
507*fb1b10abSAndroid Build Coastguard Worker     src7 = __lsx_vldx(src_tmp, src_stride3);
508*fb1b10abSAndroid Build Coastguard Worker     src += src_stride4;
509*fb1b10abSAndroid Build Coastguard Worker 
510*fb1b10abSAndroid Build Coastguard Worker     ref_tmp = (uint8_t *)ref + 16;
511*fb1b10abSAndroid Build Coastguard Worker     ref0 = __lsx_vld(ref, 0);
512*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, ref, ref_stride, ref, ref_stride2, ref2, ref4);
513*fb1b10abSAndroid Build Coastguard Worker     ref6 = __lsx_vldx(ref, ref_stride3);
514*fb1b10abSAndroid Build Coastguard Worker     ref1 = __lsx_vld(ref_tmp, 0);
515*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vldx, ref_tmp, ref_stride, ref_tmp, ref_stride2, ref3,
516*fb1b10abSAndroid Build Coastguard Worker               ref5);
517*fb1b10abSAndroid Build Coastguard Worker     ref7 = __lsx_vldx(ref_tmp, ref_stride3);
518*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride4;
519*fb1b10abSAndroid Build Coastguard Worker 
520*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 32, sec_pred, 64, sec_pred, 96,
521*fb1b10abSAndroid Build Coastguard Worker               pred0, pred2, pred4, pred6);
522*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, sec_pred, 16, sec_pred, 48, sec_pred, 80, sec_pred,
523*fb1b10abSAndroid Build Coastguard Worker               112, pred1, pred3, pred5, pred7);
524*fb1b10abSAndroid Build Coastguard Worker     sec_pred += 128;
525*fb1b10abSAndroid Build Coastguard Worker 
526*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, comp0, comp1);
527*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
528*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
529*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vavgr_bu, pred2, ref2, pred3, ref3, comp0, comp1);
530*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, comp0, comp1);
531*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
532*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vavgr_bu, pred4, ref4, pred5, ref5, comp0, comp1);
533*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src4, src5, comp0, comp1);
534*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
535*fb1b10abSAndroid Build Coastguard Worker     DUP2_ARG2(__lsx_vavgr_bu, pred6, ref6, pred7, ref7, comp0, comp1);
536*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src6, src7, comp0, comp1);
537*fb1b10abSAndroid Build Coastguard Worker     sad = __lsx_vadd_h(sad, sad_tmp);
538*fb1b10abSAndroid Build Coastguard Worker   }
539*fb1b10abSAndroid Build Coastguard Worker   res = hadd_uh_u32(sad);
540*fb1b10abSAndroid Build Coastguard Worker   return res;
541*fb1b10abSAndroid Build Coastguard Worker }
542*fb1b10abSAndroid Build Coastguard Worker 
avgsad_64width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height,const uint8_t * sec_pred)543*fb1b10abSAndroid Build Coastguard Worker static uint32_t avgsad_64width_lsx(const uint8_t *src, int32_t src_stride,
544*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *ref, int32_t ref_stride,
545*fb1b10abSAndroid Build Coastguard Worker                                    int32_t height, const uint8_t *sec_pred) {
546*fb1b10abSAndroid Build Coastguard Worker   int32_t res, ht_cnt = (height >> 2);
547*fb1b10abSAndroid Build Coastguard Worker   __m128i src0, src1, src2, src3, ref0, ref1, ref2, ref3;
548*fb1b10abSAndroid Build Coastguard Worker   __m128i comp0, comp1, comp2, comp3, pred0, pred1, pred2, pred3;
549*fb1b10abSAndroid Build Coastguard Worker   __m128i sad, sad_tmp;
550*fb1b10abSAndroid Build Coastguard Worker   __m128i sad0 = __lsx_vldi(0);
551*fb1b10abSAndroid Build Coastguard Worker   __m128i sad1 = sad0;
552*fb1b10abSAndroid Build Coastguard Worker 
553*fb1b10abSAndroid Build Coastguard Worker   for (; ht_cnt--;) {
554*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
555*fb1b10abSAndroid Build Coastguard Worker               src3);
556*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
557*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
558*fb1b10abSAndroid Build Coastguard Worker               ref3);
559*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
560*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 16, sec_pred, 32, sec_pred, 48,
561*fb1b10abSAndroid Build Coastguard Worker               pred0, pred1, pred2, pred3);
562*fb1b10abSAndroid Build Coastguard Worker     sec_pred += 64;
563*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, pred2, ref2, pred3,
564*fb1b10abSAndroid Build Coastguard Worker               ref3, comp0, comp1, comp2, comp3);
565*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
566*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
567*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, comp2, comp3);
568*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
569*fb1b10abSAndroid Build Coastguard Worker 
570*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
571*fb1b10abSAndroid Build Coastguard Worker               src3);
572*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
573*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
574*fb1b10abSAndroid Build Coastguard Worker               ref3);
575*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
576*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 16, sec_pred, 32, sec_pred, 48,
577*fb1b10abSAndroid Build Coastguard Worker               pred0, pred1, pred2, pred3);
578*fb1b10abSAndroid Build Coastguard Worker     sec_pred += 64;
579*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, pred2, ref2, pred3,
580*fb1b10abSAndroid Build Coastguard Worker               ref3, comp0, comp1, comp2, comp3);
581*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
582*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
583*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, comp2, comp3);
584*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
585*fb1b10abSAndroid Build Coastguard Worker 
586*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
587*fb1b10abSAndroid Build Coastguard Worker               src3);
588*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
589*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
590*fb1b10abSAndroid Build Coastguard Worker               ref3);
591*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
592*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 16, sec_pred, 32, sec_pred, 48,
593*fb1b10abSAndroid Build Coastguard Worker               pred0, pred1, pred2, pred3);
594*fb1b10abSAndroid Build Coastguard Worker     sec_pred += 64;
595*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, pred2, ref2, pred3,
596*fb1b10abSAndroid Build Coastguard Worker               ref3, comp0, comp1, comp2, comp3);
597*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
598*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
599*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, comp2, comp3);
600*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
601*fb1b10abSAndroid Build Coastguard Worker 
602*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
603*fb1b10abSAndroid Build Coastguard Worker               src3);
604*fb1b10abSAndroid Build Coastguard Worker     src += src_stride;
605*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
606*fb1b10abSAndroid Build Coastguard Worker               ref3);
607*fb1b10abSAndroid Build Coastguard Worker     ref += ref_stride;
608*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 16, sec_pred, 32, sec_pred, 48,
609*fb1b10abSAndroid Build Coastguard Worker               pred0, pred1, pred2, pred3);
610*fb1b10abSAndroid Build Coastguard Worker     sec_pred += 64;
611*fb1b10abSAndroid Build Coastguard Worker     DUP4_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, pred2, ref2, pred3,
612*fb1b10abSAndroid Build Coastguard Worker               ref3, comp0, comp1, comp2, comp3);
613*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
614*fb1b10abSAndroid Build Coastguard Worker     sad0 = __lsx_vadd_h(sad0, sad_tmp);
615*fb1b10abSAndroid Build Coastguard Worker     sad_tmp = sad_ub2_uh(src2, src3, comp2, comp3);
616*fb1b10abSAndroid Build Coastguard Worker     sad1 = __lsx_vadd_h(sad1, sad_tmp);
617*fb1b10abSAndroid Build Coastguard Worker   }
618*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vhaddw_wu_hu(sad0, sad0);
619*fb1b10abSAndroid Build Coastguard Worker   sad_tmp = __lsx_vhaddw_wu_hu(sad1, sad1);
620*fb1b10abSAndroid Build Coastguard Worker   sad = __lsx_vadd_w(sad, sad_tmp);
621*fb1b10abSAndroid Build Coastguard Worker 
622*fb1b10abSAndroid Build Coastguard Worker   res = hadd_sw_s32(sad);
623*fb1b10abSAndroid Build Coastguard Worker   return res;
624*fb1b10abSAndroid Build Coastguard Worker }
625*fb1b10abSAndroid Build Coastguard Worker 
626*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_8xHT_LSX(height)                                             \
627*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sad8x##height##_lsx(const uint8_t *src, int32_t src_stride,   \
628*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *ref, int32_t ref_stride) { \
629*fb1b10abSAndroid Build Coastguard Worker     return sad_8width_lsx(src, src_stride, ref, ref_stride, height);         \
630*fb1b10abSAndroid Build Coastguard Worker   }
631*fb1b10abSAndroid Build Coastguard Worker 
632*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_16xHT_LSX(height)                                             \
633*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sad16x##height##_lsx(const uint8_t *src, int32_t src_stride,   \
634*fb1b10abSAndroid Build Coastguard Worker                                     const uint8_t *ref, int32_t ref_stride) { \
635*fb1b10abSAndroid Build Coastguard Worker     return sad_16width_lsx(src, src_stride, ref, ref_stride, height);         \
636*fb1b10abSAndroid Build Coastguard Worker   }
637*fb1b10abSAndroid Build Coastguard Worker 
638*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_32xHT_LSX(height)                                             \
639*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sad32x##height##_lsx(const uint8_t *src, int32_t src_stride,   \
640*fb1b10abSAndroid Build Coastguard Worker                                     const uint8_t *ref, int32_t ref_stride) { \
641*fb1b10abSAndroid Build Coastguard Worker     return sad_32width_lsx(src, src_stride, ref, ref_stride, height);         \
642*fb1b10abSAndroid Build Coastguard Worker   }
643*fb1b10abSAndroid Build Coastguard Worker 
644*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_64xHT_LSX(height)                                             \
645*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sad64x##height##_lsx(const uint8_t *src, int32_t src_stride,   \
646*fb1b10abSAndroid Build Coastguard Worker                                     const uint8_t *ref, int32_t ref_stride) { \
647*fb1b10abSAndroid Build Coastguard Worker     return sad_64width_lsx(src, src_stride, ref, ref_stride, height);         \
648*fb1b10abSAndroid Build Coastguard Worker   }
649*fb1b10abSAndroid Build Coastguard Worker 
650*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_8xHTx4D_LSX(height)                                       \
651*fb1b10abSAndroid Build Coastguard Worker   void vpx_sad8x##height##x4d_lsx(const uint8_t *src, int32_t src_stride, \
652*fb1b10abSAndroid Build Coastguard Worker                                   const uint8_t *const refs[4],           \
653*fb1b10abSAndroid Build Coastguard Worker                                   int32_t ref_stride, uint32_t sads[4]) { \
654*fb1b10abSAndroid Build Coastguard Worker     sad_8width_x4d_lsx(src, src_stride, refs, ref_stride, height, sads);  \
655*fb1b10abSAndroid Build Coastguard Worker   }
656*fb1b10abSAndroid Build Coastguard Worker 
657*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_16xHTx4D_LSX(height)                                       \
658*fb1b10abSAndroid Build Coastguard Worker   void vpx_sad16x##height##x4d_lsx(const uint8_t *src, int32_t src_stride, \
659*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *const refs[],            \
660*fb1b10abSAndroid Build Coastguard Worker                                    int32_t ref_stride, uint32_t *sads) {   \
661*fb1b10abSAndroid Build Coastguard Worker     sad_16width_x4d_lsx(src, src_stride, refs, ref_stride, height, sads);  \
662*fb1b10abSAndroid Build Coastguard Worker   }
663*fb1b10abSAndroid Build Coastguard Worker 
664*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_32xHTx4D_LSX(height)                                       \
665*fb1b10abSAndroid Build Coastguard Worker   void vpx_sad32x##height##x4d_lsx(const uint8_t *src, int32_t src_stride, \
666*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *const refs[],            \
667*fb1b10abSAndroid Build Coastguard Worker                                    int32_t ref_stride, uint32_t *sads) {   \
668*fb1b10abSAndroid Build Coastguard Worker     sad_32width_x4d_lsx(src, src_stride, refs, ref_stride, height, sads);  \
669*fb1b10abSAndroid Build Coastguard Worker   }
670*fb1b10abSAndroid Build Coastguard Worker 
671*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_64xHTx4D_LSX(height)                                       \
672*fb1b10abSAndroid Build Coastguard Worker   void vpx_sad64x##height##x4d_lsx(const uint8_t *src, int32_t src_stride, \
673*fb1b10abSAndroid Build Coastguard Worker                                    const uint8_t *const refs[],            \
674*fb1b10abSAndroid Build Coastguard Worker                                    int32_t ref_stride, uint32_t *sads) {   \
675*fb1b10abSAndroid Build Coastguard Worker     sad_64width_x4d_lsx(src, src_stride, refs, ref_stride, height, sads);  \
676*fb1b10abSAndroid Build Coastguard Worker   }
677*fb1b10abSAndroid Build Coastguard Worker 
678*fb1b10abSAndroid Build Coastguard Worker #define VPX_AVGSAD_32xHT_LSX(height)                                    \
679*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sad32x##height##_avg_lsx(                                \
680*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *src, int32_t src_stride, const uint8_t *ref,       \
681*fb1b10abSAndroid Build Coastguard Worker       int32_t ref_stride, const uint8_t *second_pred) {                 \
682*fb1b10abSAndroid Build Coastguard Worker     return avgsad_32width_lsx(src, src_stride, ref, ref_stride, height, \
683*fb1b10abSAndroid Build Coastguard Worker                               second_pred);                             \
684*fb1b10abSAndroid Build Coastguard Worker   }
685*fb1b10abSAndroid Build Coastguard Worker 
686*fb1b10abSAndroid Build Coastguard Worker #define VPX_AVGSAD_64xHT_LSX(height)                                    \
687*fb1b10abSAndroid Build Coastguard Worker   uint32_t vpx_sad64x##height##_avg_lsx(                                \
688*fb1b10abSAndroid Build Coastguard Worker       const uint8_t *src, int32_t src_stride, const uint8_t *ref,       \
689*fb1b10abSAndroid Build Coastguard Worker       int32_t ref_stride, const uint8_t *second_pred) {                 \
690*fb1b10abSAndroid Build Coastguard Worker     return avgsad_64width_lsx(src, src_stride, ref, ref_stride, height, \
691*fb1b10abSAndroid Build Coastguard Worker                               second_pred);                             \
692*fb1b10abSAndroid Build Coastguard Worker   }
693*fb1b10abSAndroid Build Coastguard Worker 
694*fb1b10abSAndroid Build Coastguard Worker #define SAD64                                                             \
695*fb1b10abSAndroid Build Coastguard Worker   VPX_SAD_64xHT_LSX(64) VPX_SAD_64xHTx4D_LSX(64) VPX_SAD_64xHTx4D_LSX(32) \
696*fb1b10abSAndroid Build Coastguard Worker       VPX_AVGSAD_64xHT_LSX(64)
697*fb1b10abSAndroid Build Coastguard Worker 
698*fb1b10abSAndroid Build Coastguard Worker SAD64
699*fb1b10abSAndroid Build Coastguard Worker 
700*fb1b10abSAndroid Build Coastguard Worker #define SAD32                                                             \
701*fb1b10abSAndroid Build Coastguard Worker   VPX_SAD_32xHT_LSX(32) VPX_SAD_32xHTx4D_LSX(32) VPX_SAD_32xHTx4D_LSX(64) \
702*fb1b10abSAndroid Build Coastguard Worker       VPX_AVGSAD_32xHT_LSX(32)
703*fb1b10abSAndroid Build Coastguard Worker 
704*fb1b10abSAndroid Build Coastguard Worker SAD32
705*fb1b10abSAndroid Build Coastguard Worker 
706*fb1b10abSAndroid Build Coastguard Worker #define SAD16 VPX_SAD_16xHT_LSX(16) VPX_SAD_16xHTx4D_LSX(16)
707*fb1b10abSAndroid Build Coastguard Worker 
708*fb1b10abSAndroid Build Coastguard Worker SAD16
709*fb1b10abSAndroid Build Coastguard Worker 
710*fb1b10abSAndroid Build Coastguard Worker #define SAD8 VPX_SAD_8xHT_LSX(8) VPX_SAD_8xHTx4D_LSX(8)
711*fb1b10abSAndroid Build Coastguard Worker 
712*fb1b10abSAndroid Build Coastguard Worker SAD8
713*fb1b10abSAndroid Build Coastguard Worker 
714*fb1b10abSAndroid Build Coastguard Worker #undef SAD64
715*fb1b10abSAndroid Build Coastguard Worker #undef SAD32
716*fb1b10abSAndroid Build Coastguard Worker #undef SAD16
717*fb1b10abSAndroid Build Coastguard Worker #undef SAD8
718