1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2022 The WebM project authors. All Rights Reserved.
3*fb1b10abSAndroid Build Coastguard Worker *
4*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license
5*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source
6*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found
7*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may
8*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree.
9*fb1b10abSAndroid Build Coastguard Worker */
10*fb1b10abSAndroid Build Coastguard Worker
11*fb1b10abSAndroid Build Coastguard Worker #include <stdlib.h>
12*fb1b10abSAndroid Build Coastguard Worker
13*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
14*fb1b10abSAndroid Build Coastguard Worker #include "vpx_dsp/loongarch/bitdepth_conversion_lsx.h"
15*fb1b10abSAndroid Build Coastguard Worker
vpx_hadamard_8x8_lsx(const int16_t * src,ptrdiff_t src_stride,tran_low_t * dst)16*fb1b10abSAndroid Build Coastguard Worker void vpx_hadamard_8x8_lsx(const int16_t *src, ptrdiff_t src_stride,
17*fb1b10abSAndroid Build Coastguard Worker tran_low_t *dst) {
18*fb1b10abSAndroid Build Coastguard Worker __m128i src0, src1, src2, src3, src4, src5, src6, src7;
19*fb1b10abSAndroid Build Coastguard Worker __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
20*fb1b10abSAndroid Build Coastguard Worker ptrdiff_t src_stride2 = src_stride << 1;
21*fb1b10abSAndroid Build Coastguard Worker ptrdiff_t src_stride3 = src_stride2 + src_stride;
22*fb1b10abSAndroid Build Coastguard Worker ptrdiff_t src_stride4 = src_stride2 << 1;
23*fb1b10abSAndroid Build Coastguard Worker ptrdiff_t src_stride6 = src_stride3 << 1;
24*fb1b10abSAndroid Build Coastguard Worker
25*fb1b10abSAndroid Build Coastguard Worker int16_t *src_tmp = (int16_t *)src;
26*fb1b10abSAndroid Build Coastguard Worker src0 = __lsx_vld(src_tmp, 0);
27*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, src_tmp, src_stride2, src_tmp, src_stride4, src1, src2);
28*fb1b10abSAndroid Build Coastguard Worker src3 = __lsx_vldx(src_tmp, src_stride6);
29*fb1b10abSAndroid Build Coastguard Worker src_tmp += src_stride4;
30*fb1b10abSAndroid Build Coastguard Worker src4 = __lsx_vld(src_tmp, 0);
31*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vldx, src_tmp, src_stride2, src_tmp, src_stride4, src5, src6);
32*fb1b10abSAndroid Build Coastguard Worker src7 = __lsx_vldx(src_tmp, src_stride6);
33*fb1b10abSAndroid Build Coastguard Worker
34*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_8_H(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2,
35*fb1b10abSAndroid Build Coastguard Worker tmp4, tmp6, tmp7, tmp5, tmp3, tmp1);
36*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_8_H(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1,
37*fb1b10abSAndroid Build Coastguard Worker src4, src5, src7, src6, src3, src2);
38*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_8_H(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7,
39*fb1b10abSAndroid Build Coastguard Worker tmp3, tmp4, tmp5, tmp1, tmp6, tmp2);
40*fb1b10abSAndroid Build Coastguard Worker LSX_TRANSPOSE8x8_H(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1,
41*fb1b10abSAndroid Build Coastguard Worker src2, src3, src4, src5, src6, src7);
42*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_8_H(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2,
43*fb1b10abSAndroid Build Coastguard Worker tmp4, tmp6, tmp7, tmp5, tmp3, tmp1);
44*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_8_H(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1,
45*fb1b10abSAndroid Build Coastguard Worker src4, src5, src7, src6, src3, src2);
46*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_8_H(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7,
47*fb1b10abSAndroid Build Coastguard Worker tmp3, tmp4, tmp5, tmp1, tmp6, tmp2);
48*fb1b10abSAndroid Build Coastguard Worker store_tran_low(tmp0, dst, 0);
49*fb1b10abSAndroid Build Coastguard Worker store_tran_low(tmp1, dst, 8);
50*fb1b10abSAndroid Build Coastguard Worker store_tran_low(tmp2, dst, 16);
51*fb1b10abSAndroid Build Coastguard Worker store_tran_low(tmp3, dst, 24);
52*fb1b10abSAndroid Build Coastguard Worker store_tran_low(tmp4, dst, 32);
53*fb1b10abSAndroid Build Coastguard Worker store_tran_low(tmp5, dst, 40);
54*fb1b10abSAndroid Build Coastguard Worker store_tran_low(tmp6, dst, 48);
55*fb1b10abSAndroid Build Coastguard Worker store_tran_low(tmp7, dst, 56);
56*fb1b10abSAndroid Build Coastguard Worker }
57*fb1b10abSAndroid Build Coastguard Worker
vpx_hadamard_16x16_lsx(const int16_t * src,ptrdiff_t src_stride,tran_low_t * dst)58*fb1b10abSAndroid Build Coastguard Worker void vpx_hadamard_16x16_lsx(const int16_t *src, ptrdiff_t src_stride,
59*fb1b10abSAndroid Build Coastguard Worker tran_low_t *dst) {
60*fb1b10abSAndroid Build Coastguard Worker int i;
61*fb1b10abSAndroid Build Coastguard Worker __m128i a0, a1, a2, a3, b0, b1, b2, b3;
62*fb1b10abSAndroid Build Coastguard Worker
63*fb1b10abSAndroid Build Coastguard Worker /* Rearrange 16x16 to 8x32 and remove stride.
64*fb1b10abSAndroid Build Coastguard Worker * Top left first. */
65*fb1b10abSAndroid Build Coastguard Worker vpx_hadamard_8x8_lsx(src + 0 + 0 * src_stride, src_stride, dst + 0);
66*fb1b10abSAndroid Build Coastguard Worker /* Top right. */
67*fb1b10abSAndroid Build Coastguard Worker vpx_hadamard_8x8_lsx(src + 8 + 0 * src_stride, src_stride, dst + 64);
68*fb1b10abSAndroid Build Coastguard Worker /* Bottom left. */
69*fb1b10abSAndroid Build Coastguard Worker vpx_hadamard_8x8_lsx(src + 0 + 8 * src_stride, src_stride, dst + 128);
70*fb1b10abSAndroid Build Coastguard Worker /* Bottom right. */
71*fb1b10abSAndroid Build Coastguard Worker vpx_hadamard_8x8_lsx(src + 8 + 8 * src_stride, src_stride, dst + 192);
72*fb1b10abSAndroid Build Coastguard Worker
73*fb1b10abSAndroid Build Coastguard Worker for (i = 0; i < 64; i += 8) {
74*fb1b10abSAndroid Build Coastguard Worker a0 = load_tran_low(dst);
75*fb1b10abSAndroid Build Coastguard Worker a1 = load_tran_low(dst + 64);
76*fb1b10abSAndroid Build Coastguard Worker a2 = load_tran_low(dst + 128);
77*fb1b10abSAndroid Build Coastguard Worker a3 = load_tran_low(dst + 192);
78*fb1b10abSAndroid Build Coastguard Worker
79*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_4_H(a0, a2, a3, a1, b0, b2, b3, b1);
80*fb1b10abSAndroid Build Coastguard Worker DUP4_ARG2(__lsx_vsrai_h, b0, 1, b1, 1, b2, 1, b3, 1, b0, b1, b2, b3);
81*fb1b10abSAndroid Build Coastguard Worker LSX_BUTTERFLY_4_H(b0, b1, b3, b2, a0, a1, a3, a2);
82*fb1b10abSAndroid Build Coastguard Worker
83*fb1b10abSAndroid Build Coastguard Worker store_tran_low(a0, dst, 0);
84*fb1b10abSAndroid Build Coastguard Worker store_tran_low(a1, dst, 64);
85*fb1b10abSAndroid Build Coastguard Worker store_tran_low(a2, dst, 128);
86*fb1b10abSAndroid Build Coastguard Worker store_tran_low(a3, dst, 192);
87*fb1b10abSAndroid Build Coastguard Worker
88*fb1b10abSAndroid Build Coastguard Worker dst += 8;
89*fb1b10abSAndroid Build Coastguard Worker }
90*fb1b10abSAndroid Build Coastguard Worker }
91