1*fb1b10abSAndroid Build Coastguard Worker /*
2*fb1b10abSAndroid Build Coastguard Worker * Copyright (c) 2021 Loongson Technology Corporation Limited
3*fb1b10abSAndroid Build Coastguard Worker * Contributed by Lu Wang <[email protected]>
4*fb1b10abSAndroid Build Coastguard Worker *
5*fb1b10abSAndroid Build Coastguard Worker * Use of this source code is governed by a BSD-style license
6*fb1b10abSAndroid Build Coastguard Worker * that can be found in the LICENSE file in the root of the source
7*fb1b10abSAndroid Build Coastguard Worker * tree. An additional intellectual property rights grant can be found
8*fb1b10abSAndroid Build Coastguard Worker * in the file PATENTS. All contributing project authors may
9*fb1b10abSAndroid Build Coastguard Worker * be found in the AUTHORS file in the root of the source tree.
10*fb1b10abSAndroid Build Coastguard Worker */
11*fb1b10abSAndroid Build Coastguard Worker
12*fb1b10abSAndroid Build Coastguard Worker #include "./vpx_dsp_rtcd.h"
13*fb1b10abSAndroid Build Coastguard Worker #include "vpx_util/loongson_intrinsics.h"
14*fb1b10abSAndroid Build Coastguard Worker
intra_predict_dc_8x8_lsx(const uint8_t * src_top,const uint8_t * src_left,uint8_t * dst,int32_t dst_stride)15*fb1b10abSAndroid Build Coastguard Worker static inline void intra_predict_dc_8x8_lsx(const uint8_t *src_top,
16*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src_left,
17*fb1b10abSAndroid Build Coastguard Worker uint8_t *dst, int32_t dst_stride) {
18*fb1b10abSAndroid Build Coastguard Worker uint64_t val0, val1;
19*fb1b10abSAndroid Build Coastguard Worker int32_t dst_stride_x2 = dst_stride << 1;
20*fb1b10abSAndroid Build Coastguard Worker int32_t dst_stride_x3 = dst_stride_x2 + dst_stride;
21*fb1b10abSAndroid Build Coastguard Worker int32_t dst_stride_x4 = dst_stride << 2;
22*fb1b10abSAndroid Build Coastguard Worker __m128i store, sum_h, sum_w, sum_d;
23*fb1b10abSAndroid Build Coastguard Worker __m128i src = { 0 };
24*fb1b10abSAndroid Build Coastguard Worker
25*fb1b10abSAndroid Build Coastguard Worker val0 = *(const uint64_t *)src_top;
26*fb1b10abSAndroid Build Coastguard Worker val1 = *(const uint64_t *)src_left;
27*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG3(__lsx_vinsgr2vr_d, src, val0, 0, src, val1, 1, src, src);
28*fb1b10abSAndroid Build Coastguard Worker sum_h = __lsx_vhaddw_hu_bu(src, src);
29*fb1b10abSAndroid Build Coastguard Worker sum_w = __lsx_vhaddw_wu_hu(sum_h, sum_h);
30*fb1b10abSAndroid Build Coastguard Worker sum_d = __lsx_vhaddw_du_wu(sum_w, sum_w);
31*fb1b10abSAndroid Build Coastguard Worker sum_w = __lsx_vpickev_w(sum_d, sum_d);
32*fb1b10abSAndroid Build Coastguard Worker sum_d = __lsx_vhaddw_du_wu(sum_w, sum_w);
33*fb1b10abSAndroid Build Coastguard Worker sum_w = __lsx_vsrari_w(sum_d, 4);
34*fb1b10abSAndroid Build Coastguard Worker store = __lsx_vreplvei_b(sum_w, 0);
35*fb1b10abSAndroid Build Coastguard Worker
36*fb1b10abSAndroid Build Coastguard Worker __lsx_vstelm_d(store, dst, 0, 0);
37*fb1b10abSAndroid Build Coastguard Worker __lsx_vstelm_d(store, dst + dst_stride, 0, 0);
38*fb1b10abSAndroid Build Coastguard Worker __lsx_vstelm_d(store, dst + dst_stride_x2, 0, 0);
39*fb1b10abSAndroid Build Coastguard Worker __lsx_vstelm_d(store, dst + dst_stride_x3, 0, 0);
40*fb1b10abSAndroid Build Coastguard Worker dst += dst_stride_x4;
41*fb1b10abSAndroid Build Coastguard Worker __lsx_vstelm_d(store, dst, 0, 0);
42*fb1b10abSAndroid Build Coastguard Worker __lsx_vstelm_d(store, dst + dst_stride, 0, 0);
43*fb1b10abSAndroid Build Coastguard Worker __lsx_vstelm_d(store, dst + dst_stride_x2, 0, 0);
44*fb1b10abSAndroid Build Coastguard Worker __lsx_vstelm_d(store, dst + dst_stride_x3, 0, 0);
45*fb1b10abSAndroid Build Coastguard Worker }
46*fb1b10abSAndroid Build Coastguard Worker
intra_predict_dc_16x16_lsx(const uint8_t * src_top,const uint8_t * src_left,uint8_t * dst,int32_t dst_stride)47*fb1b10abSAndroid Build Coastguard Worker static inline void intra_predict_dc_16x16_lsx(const uint8_t *src_top,
48*fb1b10abSAndroid Build Coastguard Worker const uint8_t *src_left,
49*fb1b10abSAndroid Build Coastguard Worker uint8_t *dst,
50*fb1b10abSAndroid Build Coastguard Worker int32_t dst_stride) {
51*fb1b10abSAndroid Build Coastguard Worker int32_t dst_stride_x2 = dst_stride << 1;
52*fb1b10abSAndroid Build Coastguard Worker int32_t dst_stride_x3 = dst_stride_x2 + dst_stride;
53*fb1b10abSAndroid Build Coastguard Worker int32_t dst_stride_x4 = dst_stride << 2;
54*fb1b10abSAndroid Build Coastguard Worker __m128i top, left, out;
55*fb1b10abSAndroid Build Coastguard Worker __m128i sum_h, sum_top, sum_left;
56*fb1b10abSAndroid Build Coastguard Worker __m128i sum_w;
57*fb1b10abSAndroid Build Coastguard Worker __m128i sum_d;
58*fb1b10abSAndroid Build Coastguard Worker
59*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vld, src_top, 0, src_left, 0, top, left);
60*fb1b10abSAndroid Build Coastguard Worker DUP2_ARG2(__lsx_vhaddw_hu_bu, top, top, left, left, sum_top, sum_left);
61*fb1b10abSAndroid Build Coastguard Worker sum_h = __lsx_vadd_h(sum_top, sum_left);
62*fb1b10abSAndroid Build Coastguard Worker sum_w = __lsx_vhaddw_wu_hu(sum_h, sum_h);
63*fb1b10abSAndroid Build Coastguard Worker sum_d = __lsx_vhaddw_du_wu(sum_w, sum_w);
64*fb1b10abSAndroid Build Coastguard Worker sum_w = __lsx_vpickev_w(sum_d, sum_d);
65*fb1b10abSAndroid Build Coastguard Worker sum_d = __lsx_vhaddw_du_wu(sum_w, sum_w);
66*fb1b10abSAndroid Build Coastguard Worker sum_w = __lsx_vsrari_w(sum_d, 5);
67*fb1b10abSAndroid Build Coastguard Worker out = __lsx_vreplvei_b(sum_w, 0);
68*fb1b10abSAndroid Build Coastguard Worker
69*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, 0);
70*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride);
71*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride_x2);
72*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride_x3);
73*fb1b10abSAndroid Build Coastguard Worker dst += dst_stride_x4;
74*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, 0);
75*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride);
76*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride_x2);
77*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride_x3);
78*fb1b10abSAndroid Build Coastguard Worker dst += dst_stride_x4;
79*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, 0);
80*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride);
81*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride_x2);
82*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride_x3);
83*fb1b10abSAndroid Build Coastguard Worker dst += dst_stride_x4;
84*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, 0);
85*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride);
86*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride_x2);
87*fb1b10abSAndroid Build Coastguard Worker __lsx_vstx(out, dst, dst_stride_x3);
88*fb1b10abSAndroid Build Coastguard Worker }
89*fb1b10abSAndroid Build Coastguard Worker
vpx_dc_predictor_8x8_lsx(uint8_t * dst,ptrdiff_t y_stride,const uint8_t * above,const uint8_t * left)90*fb1b10abSAndroid Build Coastguard Worker void vpx_dc_predictor_8x8_lsx(uint8_t *dst, ptrdiff_t y_stride,
91*fb1b10abSAndroid Build Coastguard Worker const uint8_t *above, const uint8_t *left) {
92*fb1b10abSAndroid Build Coastguard Worker intra_predict_dc_8x8_lsx(above, left, dst, y_stride);
93*fb1b10abSAndroid Build Coastguard Worker }
94*fb1b10abSAndroid Build Coastguard Worker
vpx_dc_predictor_16x16_lsx(uint8_t * dst,ptrdiff_t y_stride,const uint8_t * above,const uint8_t * left)95*fb1b10abSAndroid Build Coastguard Worker void vpx_dc_predictor_16x16_lsx(uint8_t *dst, ptrdiff_t y_stride,
96*fb1b10abSAndroid Build Coastguard Worker const uint8_t *above, const uint8_t *left) {
97*fb1b10abSAndroid Build Coastguard Worker intra_predict_dc_16x16_lsx(above, left, dst, y_stride);
98*fb1b10abSAndroid Build Coastguard Worker }
99