1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2022 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker *
4*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker */
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker #include <stdint.h>
12*fb1b10abSAndroid Build Coastguard Worker #include "./vp8_rtcd.h"
13*fb1b10abSAndroid Build Coastguard Worker #include "vpx_util/loongson_intrinsics.h"
14*fb1b10abSAndroid Build Coastguard Worker
15*fb1b10abSAndroid Build Coastguard Worker #define LSX_TRANSPOSE4x4_H(_in0, _in1, _in2, _in3, _out0, _out1, _out2, _out3) \
16*fb1b10abSAndroid Build Coastguard Worker { \
17*fb1b10abSAndroid Build Coastguard Worker __m128i _s0, _s1, _s2, _s3, _t0, _t1, _t2, _t3; \
18*fb1b10abSAndroid Build Coastguard Worker \
19*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vilvl_h, _in2, _in0, _in3, _in1, _s0, _s1); \
20*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vilvh_h, _in2, _in0, _in3, _in1, _s2, _s3); \
21*fb1b10abSAndroid Build Coastguard Worker _t0 = __lsx_vilvl_h(_s1, _s0); \
22*fb1b10abSAndroid Build Coastguard Worker _t1 = __lsx_vilvh_h(_s1, _s0); \
23*fb1b10abSAndroid Build Coastguard Worker _t2 = __lsx_vilvl_h(_s3, _s2); \
24*fb1b10abSAndroid Build Coastguard Worker _t3 = __lsx_vilvh_h(_s3, _s2); \
25*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickev_d, _t2, _t0, _t3, _t1, _out0, _out2); \
26*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickod_d, _t2, _t0, _t3, _t1, _out1, _out3); \
27*fb1b10abSAndroid Build Coastguard Worker }
28*fb1b10abSAndroid Build Coastguard Worker
29*fb1b10abSAndroid Build Coastguard Worker #define SET_DOTP_VALUES(coeff, val0, val1, val2, const1, const2) \
30*fb1b10abSAndroid Build Coastguard Worker { \
31*fb1b10abSAndroid Build Coastguard Worker __m128i tmp0_m, tmp1_m, tmp2_m; \
32*fb1b10abSAndroid Build Coastguard Worker \
33*fb1b10abSAndroid Build Coastguard Worker tmp0_m = __lsx_vreplvei_h(coeff, val0); \
34*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vreplvei_h, coeff, val1, coeff, val2, tmp1_m, tmp2_m); \
35*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpackev_h, tmp1_m, tmp0_m, tmp0_m, tmp2_m, const1, \
36*fb1b10abSAndroid Build Coastguard Worker const2); \
37*fb1b10abSAndroid Build Coastguard Worker }
38*fb1b10abSAndroid Build Coastguard Worker
39*fb1b10abSAndroid Build Coastguard Worker #define RET_1_IF_NZERO_H(_in) \
40*fb1b10abSAndroid Build Coastguard Worker ({ \
41*fb1b10abSAndroid Build Coastguard Worker __m128i tmp_m; \
42*fb1b10abSAndroid Build Coastguard Worker __m128i one_m = __lsx_vldi(0x401); \
43*fb1b10abSAndroid Build Coastguard Worker __m128i max_m = __lsx_vldi(0xFF); \
44*fb1b10abSAndroid Build Coastguard Worker \
45*fb1b10abSAndroid Build Coastguard Worker tmp_m = __lsx_vseqi_h(_in, 0); \
46*fb1b10abSAndroid Build Coastguard Worker tmp_m = __lsx_vxor_v(tmp_m, max_m); \
47*fb1b10abSAndroid Build Coastguard Worker tmp_m = __lsx_vand_v(tmp_m, one_m); \
48*fb1b10abSAndroid Build Coastguard Worker \
49*fb1b10abSAndroid Build Coastguard Worker tmp_m; \
50*fb1b10abSAndroid Build Coastguard Worker })
51*fb1b10abSAndroid Build Coastguard Worker
vp8_short_fdct4x4_lsx(int16_t * input,int16_t * output,int32_t pitch)52*fb1b10abSAndroid Build Coastguard Worker void vp8_short_fdct4x4_lsx(int16_t *input, int16_t *output, int32_t pitch) {
53*fb1b10abSAndroid Build Coastguard Worker __m128i in0, in1, in2, in3;
54*fb1b10abSAndroid Build Coastguard Worker __m128i tmp0, tmp1, tmp2, tmp3, const0, const1;
55*fb1b10abSAndroid Build Coastguard Worker __m128i coeff = { 0x38a4eb1814e808a9, 0x659061a82ee01d4c };
56*fb1b10abSAndroid Build Coastguard Worker __m128i out0, out1, out2, out3;
57*fb1b10abSAndroid Build Coastguard Worker __m128i zero = __lsx_vldi(0);
58*fb1b10abSAndroid Build Coastguard Worker int32_t pitch2 = pitch << 1;
59*fb1b10abSAndroid Build Coastguard Worker int32_t pitch3 = pitch2 + pitch;
60*fb1b10abSAndroid Build Coastguard Worker
61*fb1b10abSAndroid Build Coastguard Worker in0 = __lsx_vld(input, 0);
62*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, input, pitch, input, pitch2, in1, in2);
63*fb1b10abSAndroid Build Coastguard Worker in3 = __lsx_vldx(input, pitch3);
64*fb1b10abSAndroid Build Coastguard Worker
65*fb1b10abSAndroid Build Coastguard Worker LSX_TRANSPOSE4x4_H(in0, in1, in2, in3, in0, in1, in2, in3);
66*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_4_H(in0, in1, in2, in3, tmp0, tmp1, in1, in3);
67*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vslli_h, tmp0, 3, tmp1, 3, in1, 3, in3, 3, tmp0, tmp1, in1,
68*fb1b10abSAndroid Build Coastguard Worker in3);
69*fb1b10abSAndroid Build Coastguard Worker in0 = __lsx_vadd_h(tmp0, tmp1);
70*fb1b10abSAndroid Build Coastguard Worker in2 = __lsx_vsub_h(tmp0, tmp1);
71*fb1b10abSAndroid Build Coastguard Worker SET_DOTP_VALUES(coeff, 0, 1, 2, const0, const1);
72*fb1b10abSAndroid Build Coastguard Worker tmp0 = __lsx_vilvl_h(in3, in1);
73*fb1b10abSAndroid Build Coastguard Worker in1 = __lsx_vreplvei_h(coeff, 3);
74*fb1b10abSAndroid Build Coastguard Worker out0 = __lsx_vpackev_h(zero, in1);
75*fb1b10abSAndroid Build Coastguard Worker coeff = __lsx_vilvl_h(zero, coeff);
76*fb1b10abSAndroid Build Coastguard Worker out1 = __lsx_vreplvei_w(coeff, 0);
77*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG3(__lsx_vdp2add_w_h, out0, tmp0, const0, out1, tmp0, const1, out0,
78*fb1b10abSAndroid Build Coastguard Worker out1);
79*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG3(__lsx_vsrani_h_w, out0, out0, 12, out1, out1, 12, in1, in3);
80*fb1b10abSAndroid Build Coastguard Worker LSX_TRANSPOSE4x4_H(in0, in1, in2, in3, in0, in1, in2, in3);
81*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_4_H(in0, in1, in2, in3, tmp0, tmp1, in1, in3);
82*fb1b10abSAndroid Build Coastguard Worker tmp2 = __lsx_vadd_h(tmp0, tmp1);
83*fb1b10abSAndroid Build Coastguard Worker tmp3 = __lsx_vsub_h(tmp0, tmp1);
84*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vaddi_hu, tmp2, 7, tmp3, 7, in0, in2);
85*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vsrai_h, in0, 4, in2, 4, in0, in2);
86*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vilvl_h, zero, in0, zero, in2, out0, out2);
87*fb1b10abSAndroid Build Coastguard Worker tmp1 = RET_1_IF_NZERO_H(in3);
88*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vilvl_h, zero, tmp1, in3, in1, tmp1, tmp0);
89*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vreplvei_w, coeff, 2, coeff, 3, out3, out1);
90*fb1b10abSAndroid Build Coastguard Worker out3 = __lsx_vadd_w(out3, out1);
91*fb1b10abSAndroid Build Coastguard Worker out1 = __lsx_vreplvei_w(coeff, 1);
92*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG3(__lsx_vdp2add_w_h, out1, tmp0, const0, out3, tmp0, const1, out1,
93*fb1b10abSAndroid Build Coastguard Worker out3);
94*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vsrai_w, out1, 16, out3, 16, out1, out3);
95*fb1b10abSAndroid Build Coastguard Worker out1 = __lsx_vadd_w(out1, tmp1);
96*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickev_h, out1, out0, out3, out2, in0, in2);
97*fb1b10abSAndroid Build Coastguard Worker __lsx_vst(in0, output, 0);
98*fb1b10abSAndroid Build Coastguard Worker __lsx_vst(in2, output, 16);
99*fb1b10abSAndroid Build Coastguard Worker }
100*fb1b10abSAndroid Build Coastguard Worker
vp8_short_fdct8x4_lsx(int16_t * input,int16_t * output,int32_t pitch)101*fb1b10abSAndroid Build Coastguard Worker void vp8_short_fdct8x4_lsx(int16_t *input, int16_t *output, int32_t pitch) {
102*fb1b10abSAndroid Build Coastguard Worker __m128i in0, in1, in2, in3, temp0, temp1, tmp0, tmp1;
103*fb1b10abSAndroid Build Coastguard Worker __m128i const0, const1, const2, vec0_w, vec1_w, vec2_w, vec3_w;
104*fb1b10abSAndroid Build Coastguard Worker __m128i coeff = { 0x38a4eb1814e808a9, 0x659061a82ee01d4c };
105*fb1b10abSAndroid Build Coastguard Worker __m128i zero = __lsx_vldi(0);
106*fb1b10abSAndroid Build Coastguard Worker int32_t pitch2 = pitch << 1;
107*fb1b10abSAndroid Build Coastguard Worker int32_t pitch3 = pitch2 + pitch;
108*fb1b10abSAndroid Build Coastguard Worker
109*fb1b10abSAndroid Build Coastguard Worker in0 = __lsx_vld(input, 0);
110*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, input, pitch, input, pitch2, in1, in2);
111*fb1b10abSAndroid Build Coastguard Worker in3 = __lsx_vldx(input, pitch3);
112*fb1b10abSAndroid Build Coastguard Worker LSX_TRANSPOSE4x4_H(in0, in1, in2, in3, in0, in1, in2, in3);
113*fb1b10abSAndroid Build Coastguard Worker
114*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_4_H(in0, in1, in2, in3, temp0, temp1, in1, in3);
115*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vslli_h, temp0, 3, temp1, 3, in1, 3, in3, 3, temp0, temp1,
116*fb1b10abSAndroid Build Coastguard Worker in1, in3);
117*fb1b10abSAndroid Build Coastguard Worker in0 = __lsx_vadd_h(temp0, temp1);
118*fb1b10abSAndroid Build Coastguard Worker in2 = __lsx_vsub_h(temp0, temp1);
119*fb1b10abSAndroid Build Coastguard Worker SET_DOTP_VALUES(coeff, 0, 1, 2, const1, const2);
120*fb1b10abSAndroid Build Coastguard Worker temp0 = __lsx_vreplvei_h(coeff, 3);
121*fb1b10abSAndroid Build Coastguard Worker vec1_w = __lsx_vpackev_h(zero, temp0);
122*fb1b10abSAndroid Build Coastguard Worker coeff = __lsx_vilvh_h(zero, coeff);
123*fb1b10abSAndroid Build Coastguard Worker vec3_w = __lsx_vreplvei_w(coeff, 0);
124*fb1b10abSAndroid Build Coastguard Worker tmp1 = __lsx_vilvl_h(in3, in1);
125*fb1b10abSAndroid Build Coastguard Worker tmp0 = __lsx_vilvh_h(in3, in1);
126*fb1b10abSAndroid Build Coastguard Worker vec0_w = vec1_w;
127*fb1b10abSAndroid Build Coastguard Worker vec2_w = vec3_w;
128*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG3(__lsx_vdp2add_w_h, vec0_w, tmp1, const1, vec1_w, tmp0, const1,
129*fb1b10abSAndroid Build Coastguard Worker vec2_w, tmp1, const2, vec3_w, tmp0, const2, vec0_w, vec1_w, vec2_w,
130*fb1b10abSAndroid Build Coastguard Worker vec3_w);
131*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG3(__lsx_vsrani_h_w, vec1_w, vec0_w, 12, vec3_w, vec2_w, 12, in1, in3);
132*fb1b10abSAndroid Build Coastguard Worker LSX_TRANSPOSE4x4_H(in0, in1, in2, in3, in0, in1, in2, in3);
133*fb1b10abSAndroid Build Coastguard Worker
134*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_4_H(in0, in1, in2, in3, temp0, temp1, in1, in3);
135*fb1b10abSAndroid Build Coastguard Worker in0 = __lsx_vadd_h(temp0, temp1);
136*fb1b10abSAndroid Build Coastguard Worker in0 = __lsx_vaddi_hu(in0, 7);
137*fb1b10abSAndroid Build Coastguard Worker in2 = __lsx_vsub_h(temp0, temp1);
138*fb1b10abSAndroid Build Coastguard Worker in2 = __lsx_vaddi_hu(in2, 7);
139*fb1b10abSAndroid Build Coastguard Worker in0 = __lsx_vsrai_h(in0, 4);
140*fb1b10abSAndroid Build Coastguard Worker in2 = __lsx_vsrai_h(in2, 4);
141*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vreplvei_w, coeff, 2, coeff, 3, vec3_w, vec1_w);
142*fb1b10abSAndroid Build Coastguard Worker vec3_w = __lsx_vadd_w(vec3_w, vec1_w);
143*fb1b10abSAndroid Build Coastguard Worker vec1_w = __lsx_vreplvei_w(coeff, 1);
144*fb1b10abSAndroid Build Coastguard Worker const0 = RET_1_IF_NZERO_H(in3);
145*fb1b10abSAndroid Build Coastguard Worker tmp1 = __lsx_vilvl_h(in3, in1);
146*fb1b10abSAndroid Build Coastguard Worker tmp0 = __lsx_vilvh_h(in3, in1);
147*fb1b10abSAndroid Build Coastguard Worker vec0_w = vec1_w;
148*fb1b10abSAndroid Build Coastguard Worker vec2_w = vec3_w;
149*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG3(__lsx_vdp2add_w_h, vec0_w, tmp1, const1, vec1_w, tmp0, const1,
150*fb1b10abSAndroid Build Coastguard Worker vec2_w, tmp1, const2, vec3_w, tmp0, const2, vec0_w, vec1_w, vec2_w,
151*fb1b10abSAndroid Build Coastguard Worker vec3_w);
152*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG3(__lsx_vsrani_h_w, vec1_w, vec0_w, 16, vec3_w, vec2_w, 16, in1, in3);
153*fb1b10abSAndroid Build Coastguard Worker in1 = __lsx_vadd_h(in1, const0);
154*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickev_d, in1, in0, in3, in2, temp0, temp1);
155*fb1b10abSAndroid Build Coastguard Worker __lsx_vst(temp0, output, 0);
156*fb1b10abSAndroid Build Coastguard Worker __lsx_vst(temp1, output, 16);
157*fb1b10abSAndroid Build Coastguard Worker
158*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vpickod_d, in1, in0, in3, in2, in0, in2);
159*fb1b10abSAndroid Build Coastguard Worker __lsx_vst(in0, output, 32);
160*fb1b10abSAndroid Build Coastguard Worker __lsx_vst(in2, output, 48);
161*fb1b10abSAndroid Build Coastguard Worker }
162