1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2022 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker *
4*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker */
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_config.h"
12*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
13*fb1b10abSAndroid Build Coastguard Worker #include "vpx_util/loongson_intrinsics.h"
14*fb1b10abSAndroid Build Coastguard Worker
sad_ub2_uh(__m128i in0,__m128i in1,__m128i ref0,__m128i ref1)15*fb1b10abSAndroid Build Coastguard Worker static INLINE __m128i sad_ub2_uh(__m128i in0, __m128i in1, __m128i ref0,
16*fb1b10abSAndroid Build Coastguard Worker __m128i ref1) {
17*fb1b10abSAndroid Build Coastguard Worker __m128i diff0_m, diff1_m, sad_m0;
18*fb1b10abSAndroid Build Coastguard Worker __m128i sad_m = __lsx_vldi(0);
19*fb1b10abSAndroid Build Coastguard Worker
20*fb1b10abSAndroid Build Coastguard Worker diff0_m = __lsx_vabsd_bu(in0, ref0);
21*fb1b10abSAndroid Build Coastguard Worker diff1_m = __lsx_vabsd_bu(in1, ref1);
22*fb1b10abSAndroid Build Coastguard Worker
23*fb1b10abSAndroid Build Coastguard Worker sad_m0 = __lsx_vhaddw_hu_bu(diff0_m, diff0_m);
24*fb1b10abSAndroid Build Coastguard Worker sad_m = __lsx_vadd_h(sad_m, sad_m0);
25*fb1b10abSAndroid Build Coastguard Worker sad_m0 = __lsx_vhaddw_hu_bu(diff1_m, diff1_m);
26*fb1b10abSAndroid Build Coastguard Worker sad_m = __lsx_vadd_h(sad_m, sad_m0);
27*fb1b10abSAndroid Build Coastguard Worker
28*fb1b10abSAndroid Build Coastguard Worker return sad_m;
29*fb1b10abSAndroid Build Coastguard Worker }
30*fb1b10abSAndroid Build Coastguard Worker
hadd_uw_u32(__m128i in)31*fb1b10abSAndroid Build Coastguard Worker static INLINE uint32_t hadd_uw_u32(__m128i in) {
32*fb1b10abSAndroid Build Coastguard Worker __m128i res0_m;
33*fb1b10abSAndroid Build Coastguard Worker uint32_t sum_m;
34*fb1b10abSAndroid Build Coastguard Worker
35*fb1b10abSAndroid Build Coastguard Worker res0_m = __lsx_vhaddw_du_wu(in, in);
36*fb1b10abSAndroid Build Coastguard Worker res0_m = __lsx_vhaddw_qu_du(res0_m, res0_m);
37*fb1b10abSAndroid Build Coastguard Worker sum_m = __lsx_vpickve2gr_w(res0_m, 0);
38*fb1b10abSAndroid Build Coastguard Worker
39*fb1b10abSAndroid Build Coastguard Worker return sum_m;
40*fb1b10abSAndroid Build Coastguard Worker }
41*fb1b10abSAndroid Build Coastguard Worker
hadd_uh_u32(__m128i in)42*fb1b10abSAndroid Build Coastguard Worker static INLINE uint32_t hadd_uh_u32(__m128i in) {
43*fb1b10abSAndroid Build Coastguard Worker __m128i res_m;
44*fb1b10abSAndroid Build Coastguard Worker uint32_t sum_m;
45*fb1b10abSAndroid Build Coastguard Worker
46*fb1b10abSAndroid Build Coastguard Worker res_m = __lsx_vhaddw_wu_hu(in, in);
47*fb1b10abSAndroid Build Coastguard Worker sum_m = hadd_uw_u32(res_m);
48*fb1b10abSAndroid Build Coastguard Worker
49*fb1b10abSAndroid Build Coastguard Worker return sum_m;
50*fb1b10abSAndroid Build Coastguard Worker }
51*fb1b10abSAndroid Build Coastguard Worker
hadd_sw_s32(__m128i in)52*fb1b10abSAndroid Build Coastguard Worker static INLINE int32_t hadd_sw_s32(__m128i in) {
53*fb1b10abSAndroid Build Coastguard Worker __m128i res0_m;
54*fb1b10abSAndroid Build Coastguard Worker int32_t sum_m;
55*fb1b10abSAndroid Build Coastguard Worker
56*fb1b10abSAndroid Build Coastguard Worker res0_m = __lsx_vhaddw_d_w(in, in);
57*fb1b10abSAndroid Build Coastguard Worker res0_m = __lsx_vhaddw_q_d(res0_m, res0_m);
58*fb1b10abSAndroid Build Coastguard Worker sum_m = __lsx_vpickve2gr_w(res0_m, 0);
59*fb1b10abSAndroid Build Coastguard Worker
60*fb1b10abSAndroid Build Coastguard Worker return sum_m;
61*fb1b10abSAndroid Build Coastguard Worker }
62*fb1b10abSAndroid Build Coastguard Worker
sad_8width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)63*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_8width_lsx(const uint8_t *src, int32_t src_stride,
64*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
65*fb1b10abSAndroid Build Coastguard Worker int32_t height) {
66*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt;
67*fb1b10abSAndroid Build Coastguard Worker uint32_t res;
68*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, src2, src3, ref0, ref1, ref2, ref3, sad_tmp;
69*fb1b10abSAndroid Build Coastguard Worker __m128i sad = __lsx_vldi(0);
70*fb1b10abSAndroid Build Coastguard Worker
71*fb1b10abSAndroid Build Coastguard Worker for (ht_cnt = (height >> 2); ht_cnt--;) {
72*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src0, ref0);
73*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
74*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
75*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src1, ref1);
76*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
77*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
78*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src2, ref2);
79*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
80*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
81*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src3, ref3);
82*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
83*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
84*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vpickev_d, src1, src0, src3, src2, ref1, ref0, ref3, ref2,
85*fb1b10abSAndroid Build Coastguard Worker src0, src1, ref0, ref1);
86*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
87*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
88*fb1b10abSAndroid Build Coastguard Worker }
89*fb1b10abSAndroid Build Coastguard Worker res = hadd_uh_u32(sad);
90*fb1b10abSAndroid Build Coastguard Worker return res;
91*fb1b10abSAndroid Build Coastguard Worker }
92*fb1b10abSAndroid Build Coastguard Worker
sad_16width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)93*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_16width_lsx(const uint8_t *src, int32_t src_stride,
94*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
95*fb1b10abSAndroid Build Coastguard Worker int32_t height) {
96*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt = (height >> 2);
97*fb1b10abSAndroid Build Coastguard Worker uint32_t res;
98*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, ref0, ref1, sad_tmp;
99*fb1b10abSAndroid Build Coastguard Worker __m128i sad = __lsx_vldi(0);
100*fb1b10abSAndroid Build Coastguard Worker int32_t src_stride2 = src_stride << 1;
101*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride2 = ref_stride << 1;
102*fb1b10abSAndroid Build Coastguard Worker
103*fb1b10abSAndroid Build Coastguard Worker for (; ht_cnt--;) {
104*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src0, ref0);
105*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, src, src_stride, ref, ref_stride, src1, ref1);
106*fb1b10abSAndroid Build Coastguard Worker src += src_stride2;
107*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride2;
108*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
109*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
110*fb1b10abSAndroid Build Coastguard Worker
111*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, ref, 0, src0, ref0);
112*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, src, src_stride, ref, ref_stride, src1, ref1);
113*fb1b10abSAndroid Build Coastguard Worker src += src_stride2;
114*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride2;
115*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
116*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
117*fb1b10abSAndroid Build Coastguard Worker }
118*fb1b10abSAndroid Build Coastguard Worker
119*fb1b10abSAndroid Build Coastguard Worker res = hadd_uh_u32(sad);
120*fb1b10abSAndroid Build Coastguard Worker return res;
121*fb1b10abSAndroid Build Coastguard Worker }
122*fb1b10abSAndroid Build Coastguard Worker
sad_32width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)123*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_32width_lsx(const uint8_t *src, int32_t src_stride,
124*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
125*fb1b10abSAndroid Build Coastguard Worker int32_t height) {
126*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt = (height >> 2);
127*fb1b10abSAndroid Build Coastguard Worker uint32_t res;
128*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, ref0, ref1;
129*fb1b10abSAndroid Build Coastguard Worker __m128i sad_tmp;
130*fb1b10abSAndroid Build Coastguard Worker __m128i sad = __lsx_vldi(0);
131*fb1b10abSAndroid Build Coastguard Worker
132*fb1b10abSAndroid Build Coastguard Worker for (; ht_cnt--;) {
133*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
134*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
135*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, ref, 0, ref, 16, ref0, ref1);
136*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
137*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
138*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
139*fb1b10abSAndroid Build Coastguard Worker
140*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
141*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
142*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, ref, 0, ref, 16, ref0, ref1);
143*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
144*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
145*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
146*fb1b10abSAndroid Build Coastguard Worker
147*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
148*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
149*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, ref, 0, ref, 16, ref0, ref1);
150*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
151*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
152*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
153*fb1b10abSAndroid Build Coastguard Worker
154*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
155*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
156*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, ref, 0, ref, 16, ref0, ref1);
157*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
158*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
159*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
160*fb1b10abSAndroid Build Coastguard Worker }
161*fb1b10abSAndroid Build Coastguard Worker res = hadd_uh_u32(sad);
162*fb1b10abSAndroid Build Coastguard Worker return res;
163*fb1b10abSAndroid Build Coastguard Worker }
164*fb1b10abSAndroid Build Coastguard Worker
sad_64width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height)165*fb1b10abSAndroid Build Coastguard Worker static uint32_t sad_64width_lsx(const uint8_t *src, int32_t src_stride,
166*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
167*fb1b10abSAndroid Build Coastguard Worker int32_t height) {
168*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt = (height >> 1);
169*fb1b10abSAndroid Build Coastguard Worker uint32_t sad = 0;
170*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, src2, src3;
171*fb1b10abSAndroid Build Coastguard Worker __m128i ref0, ref1, ref2, ref3;
172*fb1b10abSAndroid Build Coastguard Worker __m128i sad_tmp;
173*fb1b10abSAndroid Build Coastguard Worker __m128i sad0 = __lsx_vldi(0);
174*fb1b10abSAndroid Build Coastguard Worker __m128i sad1 = sad0;
175*fb1b10abSAndroid Build Coastguard Worker
176*fb1b10abSAndroid Build Coastguard Worker for (; ht_cnt--;) {
177*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
178*fb1b10abSAndroid Build Coastguard Worker src3);
179*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
180*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
181*fb1b10abSAndroid Build Coastguard Worker ref3);
182*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
183*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
184*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
185*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
186*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
187*fb1b10abSAndroid Build Coastguard Worker
188*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
189*fb1b10abSAndroid Build Coastguard Worker src3);
190*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
191*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
192*fb1b10abSAndroid Build Coastguard Worker ref3);
193*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
194*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
195*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
196*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
197*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
198*fb1b10abSAndroid Build Coastguard Worker }
199*fb1b10abSAndroid Build Coastguard Worker
200*fb1b10abSAndroid Build Coastguard Worker sad = hadd_uh_u32(sad0);
201*fb1b10abSAndroid Build Coastguard Worker sad += hadd_uh_u32(sad1);
202*fb1b10abSAndroid Build Coastguard Worker
203*fb1b10abSAndroid Build Coastguard Worker return sad;
204*fb1b10abSAndroid Build Coastguard Worker }
205*fb1b10abSAndroid Build Coastguard Worker
sad_8width_x4d_lsx(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)206*fb1b10abSAndroid Build Coastguard Worker static void sad_8width_x4d_lsx(const uint8_t *src_ptr, int32_t src_stride,
207*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const aref_ptr[],
208*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, int32_t height,
209*fb1b10abSAndroid Build Coastguard Worker uint32_t *sad_array) {
210*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt = (height >> 2);
211*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
212*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, src2, src3, sad_tmp;
213*fb1b10abSAndroid Build Coastguard Worker __m128i ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
214*fb1b10abSAndroid Build Coastguard Worker __m128i ref8, ref9, ref10, ref11, ref12, ref13, ref14, ref15;
215*fb1b10abSAndroid Build Coastguard Worker __m128i sad0 = __lsx_vldi(0);
216*fb1b10abSAndroid Build Coastguard Worker __m128i sad1 = sad0;
217*fb1b10abSAndroid Build Coastguard Worker __m128i sad2 = sad0;
218*fb1b10abSAndroid Build Coastguard Worker __m128i sad3 = sad0;
219*fb1b10abSAndroid Build Coastguard Worker int32_t src_stride2 = src_stride << 1;
220*fb1b10abSAndroid Build Coastguard Worker int32_t src_stride3 = src_stride2 + src_stride;
221*fb1b10abSAndroid Build Coastguard Worker int32_t src_stride4 = src_stride2 << 1;
222*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride2 = ref_stride << 1;
223*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride3 = ref_stride2 + ref_stride;
224*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride4 = ref_stride2 << 1;
225*fb1b10abSAndroid Build Coastguard Worker
226*fb1b10abSAndroid Build Coastguard Worker ref0_ptr = aref_ptr[0];
227*fb1b10abSAndroid Build Coastguard Worker ref1_ptr = aref_ptr[1];
228*fb1b10abSAndroid Build Coastguard Worker ref2_ptr = aref_ptr[2];
229*fb1b10abSAndroid Build Coastguard Worker ref3_ptr = aref_ptr[3];
230*fb1b10abSAndroid Build Coastguard Worker
231*fb1b10abSAndroid Build Coastguard Worker for (; ht_cnt--;) {
232*fb1b10abSAndroid Build Coastguard Worker src0 = __lsx_vld(src_ptr, 0);
233*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, src_ptr, src_stride, src_ptr, src_stride2, src1,
234*fb1b10abSAndroid Build Coastguard Worker src2);
235*fb1b10abSAndroid Build Coastguard Worker src3 = __lsx_vldx(src_ptr, src_stride3);
236*fb1b10abSAndroid Build Coastguard Worker src_ptr += src_stride4;
237*fb1b10abSAndroid Build Coastguard Worker ref0 = __lsx_vld(ref0_ptr, 0);
238*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, ref0_ptr, ref_stride, ref0_ptr, ref_stride2, ref1,
239*fb1b10abSAndroid Build Coastguard Worker ref2);
240*fb1b10abSAndroid Build Coastguard Worker ref3 = __lsx_vldx(ref0_ptr, ref_stride3);
241*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += ref_stride4;
242*fb1b10abSAndroid Build Coastguard Worker ref4 = __lsx_vld(ref1_ptr, 0);
243*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, ref1_ptr, ref_stride, ref1_ptr, ref_stride2, ref5,
244*fb1b10abSAndroid Build Coastguard Worker ref6);
245*fb1b10abSAndroid Build Coastguard Worker ref7 = __lsx_vldx(ref1_ptr, ref_stride3);
246*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += ref_stride4;
247*fb1b10abSAndroid Build Coastguard Worker ref8 = __lsx_vld(ref2_ptr, 0);
248*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, ref2_ptr, ref_stride, ref2_ptr, ref_stride2, ref9,
249*fb1b10abSAndroid Build Coastguard Worker ref10);
250*fb1b10abSAndroid Build Coastguard Worker ref11 = __lsx_vldx(ref2_ptr, ref_stride3);
251*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += ref_stride4;
252*fb1b10abSAndroid Build Coastguard Worker ref12 = __lsx_vld(ref3_ptr, 0);
253*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, ref3_ptr, ref_stride, ref3_ptr, ref_stride2, ref13,
254*fb1b10abSAndroid Build Coastguard Worker ref14);
255*fb1b10abSAndroid Build Coastguard Worker ref15 = __lsx_vldx(ref3_ptr, ref_stride3);
256*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += ref_stride4;
257*fb1b10abSAndroid Build Coastguard Worker
258*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickev_d, src1, src0, src3, src2, src0, src1);
259*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickev_d, ref1, ref0, ref3, ref2, ref0, ref1);
260*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
261*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
262*fb1b10abSAndroid Build Coastguard Worker
263*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickev_d, ref5, ref4, ref7, ref6, ref0, ref1);
264*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
265*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
266*fb1b10abSAndroid Build Coastguard Worker
267*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickev_d, ref9, ref8, ref11, ref10, ref0, ref1);
268*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
269*fb1b10abSAndroid Build Coastguard Worker sad2 = __lsx_vadd_h(sad2, sad_tmp);
270*fb1b10abSAndroid Build Coastguard Worker
271*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickev_d, ref13, ref12, ref15, ref14, ref0, ref1);
272*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
273*fb1b10abSAndroid Build Coastguard Worker sad3 = __lsx_vadd_h(sad3, sad_tmp);
274*fb1b10abSAndroid Build Coastguard Worker }
275*fb1b10abSAndroid Build Coastguard Worker sad_array[0] = hadd_uh_u32(sad0);
276*fb1b10abSAndroid Build Coastguard Worker sad_array[1] = hadd_uh_u32(sad1);
277*fb1b10abSAndroid Build Coastguard Worker sad_array[2] = hadd_uh_u32(sad2);
278*fb1b10abSAndroid Build Coastguard Worker sad_array[3] = hadd_uh_u32(sad3);
279*fb1b10abSAndroid Build Coastguard Worker }
280*fb1b10abSAndroid Build Coastguard Worker
sad_16width_x4d_lsx(const uint8_t * src_ptr,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)281*fb1b10abSAndroid Build Coastguard Worker static void sad_16width_x4d_lsx(const uint8_t *src_ptr, int32_t src_stride,
282*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const aref_ptr[],
283*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, int32_t height,
284*fb1b10abSAndroid Build Coastguard Worker uint32_t *sad_array) {
285*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt = (height >> 1);
286*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
287*fb1b10abSAndroid Build Coastguard Worker __m128i src, ref0, ref1, ref2, ref3, diff, sad_tmp;
288*fb1b10abSAndroid Build Coastguard Worker __m128i sad0 = __lsx_vldi(0);
289*fb1b10abSAndroid Build Coastguard Worker __m128i sad1 = sad0;
290*fb1b10abSAndroid Build Coastguard Worker __m128i sad2 = sad0;
291*fb1b10abSAndroid Build Coastguard Worker __m128i sad3 = sad0;
292*fb1b10abSAndroid Build Coastguard Worker
293*fb1b10abSAndroid Build Coastguard Worker ref0_ptr = aref_ptr[0];
294*fb1b10abSAndroid Build Coastguard Worker ref1_ptr = aref_ptr[1];
295*fb1b10abSAndroid Build Coastguard Worker ref2_ptr = aref_ptr[2];
296*fb1b10abSAndroid Build Coastguard Worker ref3_ptr = aref_ptr[3];
297*fb1b10abSAndroid Build Coastguard Worker
298*fb1b10abSAndroid Build Coastguard Worker for (; ht_cnt--;) {
299*fb1b10abSAndroid Build Coastguard Worker src = __lsx_vld(src_ptr, 0);
300*fb1b10abSAndroid Build Coastguard Worker src_ptr += src_stride;
301*fb1b10abSAndroid Build Coastguard Worker ref0 = __lsx_vld(ref0_ptr, 0);
302*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += ref_stride;
303*fb1b10abSAndroid Build Coastguard Worker ref1 = __lsx_vld(ref1_ptr, 0);
304*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += ref_stride;
305*fb1b10abSAndroid Build Coastguard Worker ref2 = __lsx_vld(ref2_ptr, 0);
306*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += ref_stride;
307*fb1b10abSAndroid Build Coastguard Worker ref3 = __lsx_vld(ref3_ptr, 0);
308*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += ref_stride;
309*fb1b10abSAndroid Build Coastguard Worker
310*fb1b10abSAndroid Build Coastguard Worker diff = __lsx_vabsd_bu(src, ref0);
311*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
312*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
313*fb1b10abSAndroid Build Coastguard Worker diff = __lsx_vabsd_bu(src, ref1);
314*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
315*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
316*fb1b10abSAndroid Build Coastguard Worker diff = __lsx_vabsd_bu(src, ref2);
317*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
318*fb1b10abSAndroid Build Coastguard Worker sad2 = __lsx_vadd_h(sad2, sad_tmp);
319*fb1b10abSAndroid Build Coastguard Worker diff = __lsx_vabsd_bu(src, ref3);
320*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
321*fb1b10abSAndroid Build Coastguard Worker sad3 = __lsx_vadd_h(sad3, sad_tmp);
322*fb1b10abSAndroid Build Coastguard Worker
323*fb1b10abSAndroid Build Coastguard Worker src = __lsx_vld(src_ptr, 0);
324*fb1b10abSAndroid Build Coastguard Worker src_ptr += src_stride;
325*fb1b10abSAndroid Build Coastguard Worker ref0 = __lsx_vld(ref0_ptr, 0);
326*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += ref_stride;
327*fb1b10abSAndroid Build Coastguard Worker ref1 = __lsx_vld(ref1_ptr, 0);
328*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += ref_stride;
329*fb1b10abSAndroid Build Coastguard Worker ref2 = __lsx_vld(ref2_ptr, 0);
330*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += ref_stride;
331*fb1b10abSAndroid Build Coastguard Worker ref3 = __lsx_vld(ref3_ptr, 0);
332*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += ref_stride;
333*fb1b10abSAndroid Build Coastguard Worker
334*fb1b10abSAndroid Build Coastguard Worker diff = __lsx_vabsd_bu(src, ref0);
335*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
336*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
337*fb1b10abSAndroid Build Coastguard Worker diff = __lsx_vabsd_bu(src, ref1);
338*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
339*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
340*fb1b10abSAndroid Build Coastguard Worker diff = __lsx_vabsd_bu(src, ref2);
341*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
342*fb1b10abSAndroid Build Coastguard Worker sad2 = __lsx_vadd_h(sad2, sad_tmp);
343*fb1b10abSAndroid Build Coastguard Worker diff = __lsx_vabsd_bu(src, ref3);
344*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_hu_bu(diff, diff);
345*fb1b10abSAndroid Build Coastguard Worker sad3 = __lsx_vadd_h(sad3, sad_tmp);
346*fb1b10abSAndroid Build Coastguard Worker }
347*fb1b10abSAndroid Build Coastguard Worker sad_array[0] = hadd_uh_u32(sad0);
348*fb1b10abSAndroid Build Coastguard Worker sad_array[1] = hadd_uh_u32(sad1);
349*fb1b10abSAndroid Build Coastguard Worker sad_array[2] = hadd_uh_u32(sad2);
350*fb1b10abSAndroid Build Coastguard Worker sad_array[3] = hadd_uh_u32(sad3);
351*fb1b10abSAndroid Build Coastguard Worker }
352*fb1b10abSAndroid Build Coastguard Worker
sad_32width_x4d_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)353*fb1b10abSAndroid Build Coastguard Worker static void sad_32width_x4d_lsx(const uint8_t *src, int32_t src_stride,
354*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const aref_ptr[],
355*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, int32_t height,
356*fb1b10abSAndroid Build Coastguard Worker uint32_t *sad_array) {
357*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
358*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt = height;
359*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, ref0, ref1, sad_tmp;
360*fb1b10abSAndroid Build Coastguard Worker __m128i sad0 = __lsx_vldi(0);
361*fb1b10abSAndroid Build Coastguard Worker __m128i sad1 = sad0;
362*fb1b10abSAndroid Build Coastguard Worker __m128i sad2 = sad0;
363*fb1b10abSAndroid Build Coastguard Worker __m128i sad3 = sad0;
364*fb1b10abSAndroid Build Coastguard Worker
365*fb1b10abSAndroid Build Coastguard Worker ref0_ptr = aref_ptr[0];
366*fb1b10abSAndroid Build Coastguard Worker ref1_ptr = aref_ptr[1];
367*fb1b10abSAndroid Build Coastguard Worker ref2_ptr = aref_ptr[2];
368*fb1b10abSAndroid Build Coastguard Worker ref3_ptr = aref_ptr[3];
369*fb1b10abSAndroid Build Coastguard Worker
370*fb1b10abSAndroid Build Coastguard Worker for (; ht_cnt--;) {
371*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
372*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
373*fb1b10abSAndroid Build Coastguard Worker
374*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, ref0_ptr, 0, ref0_ptr, 16, ref0, ref1);
375*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += ref_stride;
376*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
377*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
378*fb1b10abSAndroid Build Coastguard Worker
379*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, ref1_ptr, 0, ref1_ptr, 16, ref0, ref1);
380*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += ref_stride;
381*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
382*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
383*fb1b10abSAndroid Build Coastguard Worker
384*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, ref2_ptr, 0, ref2_ptr, 16, ref0, ref1);
385*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += ref_stride;
386*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
387*fb1b10abSAndroid Build Coastguard Worker sad2 = __lsx_vadd_h(sad2, sad_tmp);
388*fb1b10abSAndroid Build Coastguard Worker
389*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, ref3_ptr, 0, ref3_ptr, 16, ref0, ref1);
390*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += ref_stride;
391*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
392*fb1b10abSAndroid Build Coastguard Worker sad3 = __lsx_vadd_h(sad3, sad_tmp);
393*fb1b10abSAndroid Build Coastguard Worker }
394*fb1b10abSAndroid Build Coastguard Worker sad_array[0] = hadd_uh_u32(sad0);
395*fb1b10abSAndroid Build Coastguard Worker sad_array[1] = hadd_uh_u32(sad1);
396*fb1b10abSAndroid Build Coastguard Worker sad_array[2] = hadd_uh_u32(sad2);
397*fb1b10abSAndroid Build Coastguard Worker sad_array[3] = hadd_uh_u32(sad3);
398*fb1b10abSAndroid Build Coastguard Worker }
399*fb1b10abSAndroid Build Coastguard Worker
sad_64width_x4d_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * const aref_ptr[],int32_t ref_stride,int32_t height,uint32_t * sad_array)400*fb1b10abSAndroid Build Coastguard Worker static void sad_64width_x4d_lsx(const uint8_t *src, int32_t src_stride,
401*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const aref_ptr[],
402*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, int32_t height,
403*fb1b10abSAndroid Build Coastguard Worker uint32_t *sad_array) {
404*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr;
405*fb1b10abSAndroid Build Coastguard Worker int32_t ht_cnt = height;
406*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, src2, src3;
407*fb1b10abSAndroid Build Coastguard Worker __m128i ref0, ref1, ref2, ref3;
408*fb1b10abSAndroid Build Coastguard Worker __m128i sad, sad_tmp;
409*fb1b10abSAndroid Build Coastguard Worker
410*fb1b10abSAndroid Build Coastguard Worker __m128i sad0_0 = __lsx_vldi(0);
411*fb1b10abSAndroid Build Coastguard Worker __m128i sad0_1 = sad0_0;
412*fb1b10abSAndroid Build Coastguard Worker __m128i sad1_0 = sad0_0;
413*fb1b10abSAndroid Build Coastguard Worker __m128i sad1_1 = sad0_0;
414*fb1b10abSAndroid Build Coastguard Worker __m128i sad2_0 = sad0_0;
415*fb1b10abSAndroid Build Coastguard Worker __m128i sad2_1 = sad0_0;
416*fb1b10abSAndroid Build Coastguard Worker __m128i sad3_0 = sad0_0;
417*fb1b10abSAndroid Build Coastguard Worker __m128i sad3_1 = sad0_0;
418*fb1b10abSAndroid Build Coastguard Worker
419*fb1b10abSAndroid Build Coastguard Worker ref0_ptr = aref_ptr[0];
420*fb1b10abSAndroid Build Coastguard Worker ref1_ptr = aref_ptr[1];
421*fb1b10abSAndroid Build Coastguard Worker ref2_ptr = aref_ptr[2];
422*fb1b10abSAndroid Build Coastguard Worker ref3_ptr = aref_ptr[3];
423*fb1b10abSAndroid Build Coastguard Worker
424*fb1b10abSAndroid Build Coastguard Worker for (; ht_cnt--;) {
425*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
426*fb1b10abSAndroid Build Coastguard Worker src3);
427*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
428*fb1b10abSAndroid Build Coastguard Worker
429*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref0_ptr, 0, ref0_ptr, 16, ref0_ptr, 32, ref0_ptr, 48,
430*fb1b10abSAndroid Build Coastguard Worker ref0, ref1, ref2, ref3);
431*fb1b10abSAndroid Build Coastguard Worker ref0_ptr += ref_stride;
432*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
433*fb1b10abSAndroid Build Coastguard Worker sad0_0 = __lsx_vadd_h(sad0_0, sad_tmp);
434*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
435*fb1b10abSAndroid Build Coastguard Worker sad0_1 = __lsx_vadd_h(sad0_1, sad_tmp);
436*fb1b10abSAndroid Build Coastguard Worker
437*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref1_ptr, 0, ref1_ptr, 16, ref1_ptr, 32, ref1_ptr, 48,
438*fb1b10abSAndroid Build Coastguard Worker ref0, ref1, ref2, ref3);
439*fb1b10abSAndroid Build Coastguard Worker ref1_ptr += ref_stride;
440*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
441*fb1b10abSAndroid Build Coastguard Worker sad1_0 = __lsx_vadd_h(sad1_0, sad_tmp);
442*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
443*fb1b10abSAndroid Build Coastguard Worker sad1_1 = __lsx_vadd_h(sad1_1, sad_tmp);
444*fb1b10abSAndroid Build Coastguard Worker
445*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref2_ptr, 0, ref2_ptr, 16, ref2_ptr, 32, ref2_ptr, 48,
446*fb1b10abSAndroid Build Coastguard Worker ref0, ref1, ref2, ref3);
447*fb1b10abSAndroid Build Coastguard Worker ref2_ptr += ref_stride;
448*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
449*fb1b10abSAndroid Build Coastguard Worker sad2_0 = __lsx_vadd_h(sad2_0, sad_tmp);
450*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
451*fb1b10abSAndroid Build Coastguard Worker sad2_1 = __lsx_vadd_h(sad2_1, sad_tmp);
452*fb1b10abSAndroid Build Coastguard Worker
453*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref3_ptr, 0, ref3_ptr, 16, ref3_ptr, 32, ref3_ptr, 48,
454*fb1b10abSAndroid Build Coastguard Worker ref0, ref1, ref2, ref3);
455*fb1b10abSAndroid Build Coastguard Worker ref3_ptr += ref_stride;
456*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, ref0, ref1);
457*fb1b10abSAndroid Build Coastguard Worker sad3_0 = __lsx_vadd_h(sad3_0, sad_tmp);
458*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, ref2, ref3);
459*fb1b10abSAndroid Build Coastguard Worker sad3_1 = __lsx_vadd_h(sad3_1, sad_tmp);
460*fb1b10abSAndroid Build Coastguard Worker }
461*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vhaddw_wu_hu(sad0_0, sad0_0);
462*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_wu_hu(sad0_1, sad0_1);
463*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_w(sad, sad_tmp);
464*fb1b10abSAndroid Build Coastguard Worker sad_array[0] = hadd_uw_u32(sad);
465*fb1b10abSAndroid Build Coastguard Worker
466*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vhaddw_wu_hu(sad1_0, sad1_0);
467*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_wu_hu(sad1_1, sad1_1);
468*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_w(sad, sad_tmp);
469*fb1b10abSAndroid Build Coastguard Worker sad_array[1] = hadd_uw_u32(sad);
470*fb1b10abSAndroid Build Coastguard Worker
471*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vhaddw_wu_hu(sad2_0, sad2_0);
472*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_wu_hu(sad2_1, sad2_1);
473*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_w(sad, sad_tmp);
474*fb1b10abSAndroid Build Coastguard Worker sad_array[2] = hadd_uw_u32(sad);
475*fb1b10abSAndroid Build Coastguard Worker
476*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vhaddw_wu_hu(sad3_0, sad3_0);
477*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_wu_hu(sad3_1, sad3_1);
478*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_w(sad, sad_tmp);
479*fb1b10abSAndroid Build Coastguard Worker sad_array[3] = hadd_uw_u32(sad);
480*fb1b10abSAndroid Build Coastguard Worker }
481*fb1b10abSAndroid Build Coastguard Worker
avgsad_32width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height,const uint8_t * sec_pred)482*fb1b10abSAndroid Build Coastguard Worker static uint32_t avgsad_32width_lsx(const uint8_t *src, int32_t src_stride,
483*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
484*fb1b10abSAndroid Build Coastguard Worker int32_t height, const uint8_t *sec_pred) {
485*fb1b10abSAndroid Build Coastguard Worker int32_t res, ht_cnt = (height >> 2);
486*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, src2, src3, src4, src5, src6, src7;
487*fb1b10abSAndroid Build Coastguard Worker __m128i ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7;
488*fb1b10abSAndroid Build Coastguard Worker __m128i pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7;
489*fb1b10abSAndroid Build Coastguard Worker __m128i comp0, comp1, sad_tmp;
490*fb1b10abSAndroid Build Coastguard Worker __m128i sad = __lsx_vldi(0);
491*fb1b10abSAndroid Build Coastguard Worker uint8_t *src_tmp, *ref_tmp;
492*fb1b10abSAndroid Build Coastguard Worker int32_t src_stride2 = src_stride << 1;
493*fb1b10abSAndroid Build Coastguard Worker int32_t src_stride3 = src_stride2 + src_stride;
494*fb1b10abSAndroid Build Coastguard Worker int32_t src_stride4 = src_stride2 << 1;
495*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride2 = ref_stride << 1;
496*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride3 = ref_stride2 + ref_stride;
497*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride4 = ref_stride2 << 1;
498*fb1b10abSAndroid Build Coastguard Worker
499*fb1b10abSAndroid Build Coastguard Worker for (; ht_cnt--;) {
500*fb1b10abSAndroid Build Coastguard Worker src_tmp = (uint8_t *)src + 16;
501*fb1b10abSAndroid Build Coastguard Worker src0 = __lsx_vld(src, 0);
502*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, src, src_stride, src, src_stride2, src2, src4);
503*fb1b10abSAndroid Build Coastguard Worker src6 = __lsx_vldx(src, src_stride3);
504*fb1b10abSAndroid Build Coastguard Worker src1 = __lsx_vld(src_tmp, 0);
505*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, src_tmp, src_stride, src_tmp, src_stride2, src3,
506*fb1b10abSAndroid Build Coastguard Worker src5);
507*fb1b10abSAndroid Build Coastguard Worker src7 = __lsx_vldx(src_tmp, src_stride3);
508*fb1b10abSAndroid Build Coastguard Worker src += src_stride4;
509*fb1b10abSAndroid Build Coastguard Worker
510*fb1b10abSAndroid Build Coastguard Worker ref_tmp = (uint8_t *)ref + 16;
511*fb1b10abSAndroid Build Coastguard Worker ref0 = __lsx_vld(ref, 0);
512*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, ref, ref_stride, ref, ref_stride2, ref2, ref4);
513*fb1b10abSAndroid Build Coastguard Worker ref6 = __lsx_vldx(ref, ref_stride3);
514*fb1b10abSAndroid Build Coastguard Worker ref1 = __lsx_vld(ref_tmp, 0);
515*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, ref_tmp, ref_stride, ref_tmp, ref_stride2, ref3,
516*fb1b10abSAndroid Build Coastguard Worker ref5);
517*fb1b10abSAndroid Build Coastguard Worker ref7 = __lsx_vldx(ref_tmp, ref_stride3);
518*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride4;
519*fb1b10abSAndroid Build Coastguard Worker
520*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 32, sec_pred, 64, sec_pred, 96,
521*fb1b10abSAndroid Build Coastguard Worker pred0, pred2, pred4, pred6);
522*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, sec_pred, 16, sec_pred, 48, sec_pred, 80, sec_pred,
523*fb1b10abSAndroid Build Coastguard Worker 112, pred1, pred3, pred5, pred7);
524*fb1b10abSAndroid Build Coastguard Worker sec_pred += 128;
525*fb1b10abSAndroid Build Coastguard Worker
526*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, comp0, comp1);
527*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
528*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
529*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vavgr_bu, pred2, ref2, pred3, ref3, comp0, comp1);
530*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, comp0, comp1);
531*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
532*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vavgr_bu, pred4, ref4, pred5, ref5, comp0, comp1);
533*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src4, src5, comp0, comp1);
534*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
535*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vavgr_bu, pred6, ref6, pred7, ref7, comp0, comp1);
536*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src6, src7, comp0, comp1);
537*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_h(sad, sad_tmp);
538*fb1b10abSAndroid Build Coastguard Worker }
539*fb1b10abSAndroid Build Coastguard Worker res = hadd_uh_u32(sad);
540*fb1b10abSAndroid Build Coastguard Worker return res;
541*fb1b10abSAndroid Build Coastguard Worker }
542*fb1b10abSAndroid Build Coastguard Worker
avgsad_64width_lsx(const uint8_t * src,int32_t src_stride,const uint8_t * ref,int32_t ref_stride,int32_t height,const uint8_t * sec_pred)543*fb1b10abSAndroid Build Coastguard Worker static uint32_t avgsad_64width_lsx(const uint8_t *src, int32_t src_stride,
544*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride,
545*fb1b10abSAndroid Build Coastguard Worker int32_t height, const uint8_t *sec_pred) {
546*fb1b10abSAndroid Build Coastguard Worker int32_t res, ht_cnt = (height >> 2);
547*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, src2, src3, ref0, ref1, ref2, ref3;
548*fb1b10abSAndroid Build Coastguard Worker __m128i comp0, comp1, comp2, comp3, pred0, pred1, pred2, pred3;
549*fb1b10abSAndroid Build Coastguard Worker __m128i sad, sad_tmp;
550*fb1b10abSAndroid Build Coastguard Worker __m128i sad0 = __lsx_vldi(0);
551*fb1b10abSAndroid Build Coastguard Worker __m128i sad1 = sad0;
552*fb1b10abSAndroid Build Coastguard Worker
553*fb1b10abSAndroid Build Coastguard Worker for (; ht_cnt--;) {
554*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
555*fb1b10abSAndroid Build Coastguard Worker src3);
556*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
557*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
558*fb1b10abSAndroid Build Coastguard Worker ref3);
559*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
560*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 16, sec_pred, 32, sec_pred, 48,
561*fb1b10abSAndroid Build Coastguard Worker pred0, pred1, pred2, pred3);
562*fb1b10abSAndroid Build Coastguard Worker sec_pred += 64;
563*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, pred2, ref2, pred3,
564*fb1b10abSAndroid Build Coastguard Worker ref3, comp0, comp1, comp2, comp3);
565*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
566*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
567*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, comp2, comp3);
568*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
569*fb1b10abSAndroid Build Coastguard Worker
570*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
571*fb1b10abSAndroid Build Coastguard Worker src3);
572*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
573*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
574*fb1b10abSAndroid Build Coastguard Worker ref3);
575*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
576*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 16, sec_pred, 32, sec_pred, 48,
577*fb1b10abSAndroid Build Coastguard Worker pred0, pred1, pred2, pred3);
578*fb1b10abSAndroid Build Coastguard Worker sec_pred += 64;
579*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, pred2, ref2, pred3,
580*fb1b10abSAndroid Build Coastguard Worker ref3, comp0, comp1, comp2, comp3);
581*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
582*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
583*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, comp2, comp3);
584*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
585*fb1b10abSAndroid Build Coastguard Worker
586*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
587*fb1b10abSAndroid Build Coastguard Worker src3);
588*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
589*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
590*fb1b10abSAndroid Build Coastguard Worker ref3);
591*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
592*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 16, sec_pred, 32, sec_pred, 48,
593*fb1b10abSAndroid Build Coastguard Worker pred0, pred1, pred2, pred3);
594*fb1b10abSAndroid Build Coastguard Worker sec_pred += 64;
595*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, pred2, ref2, pred3,
596*fb1b10abSAndroid Build Coastguard Worker ref3, comp0, comp1, comp2, comp3);
597*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
598*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
599*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, comp2, comp3);
600*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
601*fb1b10abSAndroid Build Coastguard Worker
602*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, src, 0, src, 16, src, 32, src, 48, src0, src1, src2,
603*fb1b10abSAndroid Build Coastguard Worker src3);
604*fb1b10abSAndroid Build Coastguard Worker src += src_stride;
605*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, ref, 0, ref, 16, ref, 32, ref, 48, ref0, ref1, ref2,
606*fb1b10abSAndroid Build Coastguard Worker ref3);
607*fb1b10abSAndroid Build Coastguard Worker ref += ref_stride;
608*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vld, sec_pred, 0, sec_pred, 16, sec_pred, 32, sec_pred, 48,
609*fb1b10abSAndroid Build Coastguard Worker pred0, pred1, pred2, pred3);
610*fb1b10abSAndroid Build Coastguard Worker sec_pred += 64;
611*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vavgr_bu, pred0, ref0, pred1, ref1, pred2, ref2, pred3,
612*fb1b10abSAndroid Build Coastguard Worker ref3, comp0, comp1, comp2, comp3);
613*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src0, src1, comp0, comp1);
614*fb1b10abSAndroid Build Coastguard Worker sad0 = __lsx_vadd_h(sad0, sad_tmp);
615*fb1b10abSAndroid Build Coastguard Worker sad_tmp = sad_ub2_uh(src2, src3, comp2, comp3);
616*fb1b10abSAndroid Build Coastguard Worker sad1 = __lsx_vadd_h(sad1, sad_tmp);
617*fb1b10abSAndroid Build Coastguard Worker }
618*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vhaddw_wu_hu(sad0, sad0);
619*fb1b10abSAndroid Build Coastguard Worker sad_tmp = __lsx_vhaddw_wu_hu(sad1, sad1);
620*fb1b10abSAndroid Build Coastguard Worker sad = __lsx_vadd_w(sad, sad_tmp);
621*fb1b10abSAndroid Build Coastguard Worker
622*fb1b10abSAndroid Build Coastguard Worker res = hadd_sw_s32(sad);
623*fb1b10abSAndroid Build Coastguard Worker return res;
624*fb1b10abSAndroid Build Coastguard Worker }
625*fb1b10abSAndroid Build Coastguard Worker
626*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_8xHT_LSX(height) \
627*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad8x##height##_lsx(const uint8_t *src, int32_t src_stride, \
628*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride) { \
629*fb1b10abSAndroid Build Coastguard Worker return sad_8width_lsx(src, src_stride, ref, ref_stride, height); \
630*fb1b10abSAndroid Build Coastguard Worker }
631*fb1b10abSAndroid Build Coastguard Worker
632*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_16xHT_LSX(height) \
633*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad16x##height##_lsx(const uint8_t *src, int32_t src_stride, \
634*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride) { \
635*fb1b10abSAndroid Build Coastguard Worker return sad_16width_lsx(src, src_stride, ref, ref_stride, height); \
636*fb1b10abSAndroid Build Coastguard Worker }
637*fb1b10abSAndroid Build Coastguard Worker
638*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_32xHT_LSX(height) \
639*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad32x##height##_lsx(const uint8_t *src, int32_t src_stride, \
640*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride) { \
641*fb1b10abSAndroid Build Coastguard Worker return sad_32width_lsx(src, src_stride, ref, ref_stride, height); \
642*fb1b10abSAndroid Build Coastguard Worker }
643*fb1b10abSAndroid Build Coastguard Worker
644*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_64xHT_LSX(height) \
645*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad64x##height##_lsx(const uint8_t *src, int32_t src_stride, \
646*fb1b10abSAndroid Build Coastguard Worker const uint8_t *ref, int32_t ref_stride) { \
647*fb1b10abSAndroid Build Coastguard Worker return sad_64width_lsx(src, src_stride, ref, ref_stride, height); \
648*fb1b10abSAndroid Build Coastguard Worker }
649*fb1b10abSAndroid Build Coastguard Worker
650*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_8xHTx4D_LSX(height) \
651*fb1b10abSAndroid Build Coastguard Worker void vpx_sad8x##height##x4d_lsx(const uint8_t *src, int32_t src_stride, \
652*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const refs[4], \
653*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, uint32_t sads[4]) { \
654*fb1b10abSAndroid Build Coastguard Worker sad_8width_x4d_lsx(src, src_stride, refs, ref_stride, height, sads); \
655*fb1b10abSAndroid Build Coastguard Worker }
656*fb1b10abSAndroid Build Coastguard Worker
657*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_16xHTx4D_LSX(height) \
658*fb1b10abSAndroid Build Coastguard Worker void vpx_sad16x##height##x4d_lsx(const uint8_t *src, int32_t src_stride, \
659*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const refs[], \
660*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, uint32_t *sads) { \
661*fb1b10abSAndroid Build Coastguard Worker sad_16width_x4d_lsx(src, src_stride, refs, ref_stride, height, sads); \
662*fb1b10abSAndroid Build Coastguard Worker }
663*fb1b10abSAndroid Build Coastguard Worker
664*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_32xHTx4D_LSX(height) \
665*fb1b10abSAndroid Build Coastguard Worker void vpx_sad32x##height##x4d_lsx(const uint8_t *src, int32_t src_stride, \
666*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const refs[], \
667*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, uint32_t *sads) { \
668*fb1b10abSAndroid Build Coastguard Worker sad_32width_x4d_lsx(src, src_stride, refs, ref_stride, height, sads); \
669*fb1b10abSAndroid Build Coastguard Worker }
670*fb1b10abSAndroid Build Coastguard Worker
671*fb1b10abSAndroid Build Coastguard Worker #define VPX_SAD_64xHTx4D_LSX(height) \
672*fb1b10abSAndroid Build Coastguard Worker void vpx_sad64x##height##x4d_lsx(const uint8_t *src, int32_t src_stride, \
673*fb1b10abSAndroid Build Coastguard Worker const uint8_t *const refs[], \
674*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, uint32_t *sads) { \
675*fb1b10abSAndroid Build Coastguard Worker sad_64width_x4d_lsx(src, src_stride, refs, ref_stride, height, sads); \
676*fb1b10abSAndroid Build Coastguard Worker }
677*fb1b10abSAndroid Build Coastguard Worker
678*fb1b10abSAndroid Build Coastguard Worker #define VPX_AVGSAD_32xHT_LSX(height) \
679*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad32x##height##_avg_lsx( \
680*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
681*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, const uint8_t *second_pred) { \
682*fb1b10abSAndroid Build Coastguard Worker return avgsad_32width_lsx(src, src_stride, ref, ref_stride, height, \
683*fb1b10abSAndroid Build Coastguard Worker second_pred); \
684*fb1b10abSAndroid Build Coastguard Worker }
685*fb1b10abSAndroid Build Coastguard Worker
686*fb1b10abSAndroid Build Coastguard Worker #define VPX_AVGSAD_64xHT_LSX(height) \
687*fb1b10abSAndroid Build Coastguard Worker uint32_t vpx_sad64x##height##_avg_lsx( \
688*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src, int32_t src_stride, const uint8_t *ref, \
689*fb1b10abSAndroid Build Coastguard Worker int32_t ref_stride, const uint8_t *second_pred) { \
690*fb1b10abSAndroid Build Coastguard Worker return avgsad_64width_lsx(src, src_stride, ref, ref_stride, height, \
691*fb1b10abSAndroid Build Coastguard Worker second_pred); \
692*fb1b10abSAndroid Build Coastguard Worker }
693*fb1b10abSAndroid Build Coastguard Worker
694*fb1b10abSAndroid Build Coastguard Worker #define SAD64 \
695*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_64xHT_LSX(64) VPX_SAD_64xHTx4D_LSX(64) VPX_SAD_64xHTx4D_LSX(32) \
696*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_64xHT_LSX(64)
697*fb1b10abSAndroid Build Coastguard Worker
698*fb1b10abSAndroid Build Coastguard Worker SAD64
699*fb1b10abSAndroid Build Coastguard Worker
700*fb1b10abSAndroid Build Coastguard Worker #define SAD32 \
701*fb1b10abSAndroid Build Coastguard Worker VPX_SAD_32xHT_LSX(32) VPX_SAD_32xHTx4D_LSX(32) VPX_SAD_32xHTx4D_LSX(64) \
702*fb1b10abSAndroid Build Coastguard Worker VPX_AVGSAD_32xHT_LSX(32)
703*fb1b10abSAndroid Build Coastguard Worker
704*fb1b10abSAndroid Build Coastguard Worker SAD32
705*fb1b10abSAndroid Build Coastguard Worker
706*fb1b10abSAndroid Build Coastguard Worker #define SAD16 VPX_SAD_16xHT_LSX(16) VPX_SAD_16xHTx4D_LSX(16)
707*fb1b10abSAndroid Build Coastguard Worker
708*fb1b10abSAndroid Build Coastguard Worker SAD16
709*fb1b10abSAndroid Build Coastguard Worker
710*fb1b10abSAndroid Build Coastguard Worker #define SAD8 VPX_SAD_8xHT_LSX(8) VPX_SAD_8xHTx4D_LSX(8)
711*fb1b10abSAndroid Build Coastguard Worker
712*fb1b10abSAndroid Build Coastguard Worker SAD8
713*fb1b10abSAndroid Build Coastguard Worker
714*fb1b10abSAndroid Build Coastguard Worker #undef SAD64
715*fb1b10abSAndroid Build Coastguard Worker #undef SAD32
716*fb1b10abSAndroid Build Coastguard Worker #undef SAD16
717*fb1b10abSAndroid Build Coastguard Worker #undef SAD8
718