1 /*
2 * Copyright (c) 2021 Loongson Technology Corporation Limited
3 * Contributed by Lu Wang <[email protected]>
4 *
5 * Use of this source code is governed by a BSD-style license
6 * that can be found in the LICENSE file in the root of the source
7 * tree. An additional intellectual property rights grant can be found
8 * in the file PATENTS. All contributing project authors may
9 * be found in the AUTHORS file in the root of the source tree.
10 */
11
12 #include "./vpx_dsp_rtcd.h"
13 #include "vpx_util/loongson_intrinsics.h"
14
intra_predict_dc_8x8_lsx(const uint8_t * src_top,const uint8_t * src_left,uint8_t * dst,int32_t dst_stride)15 static inline void intra_predict_dc_8x8_lsx(const uint8_t *src_top,
16 const uint8_t *src_left,
17 uint8_t *dst, int32_t dst_stride) {
18 uint64_t val0, val1;
19 int32_t dst_stride_x2 = dst_stride << 1;
20 int32_t dst_stride_x3 = dst_stride_x2 + dst_stride;
21 int32_t dst_stride_x4 = dst_stride << 2;
22 __m128i store, sum_h, sum_w, sum_d;
23 __m128i src = { 0 };
24
25 val0 = *(const uint64_t *)src_top;
26 val1 = *(const uint64_t *)src_left;
27 DUP2_ARG3(__lsx_vinsgr2vr_d, src, val0, 0, src, val1, 1, src, src);
28 sum_h = __lsx_vhaddw_hu_bu(src, src);
29 sum_w = __lsx_vhaddw_wu_hu(sum_h, sum_h);
30 sum_d = __lsx_vhaddw_du_wu(sum_w, sum_w);
31 sum_w = __lsx_vpickev_w(sum_d, sum_d);
32 sum_d = __lsx_vhaddw_du_wu(sum_w, sum_w);
33 sum_w = __lsx_vsrari_w(sum_d, 4);
34 store = __lsx_vreplvei_b(sum_w, 0);
35
36 __lsx_vstelm_d(store, dst, 0, 0);
37 __lsx_vstelm_d(store, dst + dst_stride, 0, 0);
38 __lsx_vstelm_d(store, dst + dst_stride_x2, 0, 0);
39 __lsx_vstelm_d(store, dst + dst_stride_x3, 0, 0);
40 dst += dst_stride_x4;
41 __lsx_vstelm_d(store, dst, 0, 0);
42 __lsx_vstelm_d(store, dst + dst_stride, 0, 0);
43 __lsx_vstelm_d(store, dst + dst_stride_x2, 0, 0);
44 __lsx_vstelm_d(store, dst + dst_stride_x3, 0, 0);
45 }
46
intra_predict_dc_16x16_lsx(const uint8_t * src_top,const uint8_t * src_left,uint8_t * dst,int32_t dst_stride)47 static inline void intra_predict_dc_16x16_lsx(const uint8_t *src_top,
48 const uint8_t *src_left,
49 uint8_t *dst,
50 int32_t dst_stride) {
51 int32_t dst_stride_x2 = dst_stride << 1;
52 int32_t dst_stride_x3 = dst_stride_x2 + dst_stride;
53 int32_t dst_stride_x4 = dst_stride << 2;
54 __m128i top, left, out;
55 __m128i sum_h, sum_top, sum_left;
56 __m128i sum_w;
57 __m128i sum_d;
58
59 DUP2_ARG2(__lsx_vld, src_top, 0, src_left, 0, top, left);
60 DUP2_ARG2(__lsx_vhaddw_hu_bu, top, top, left, left, sum_top, sum_left);
61 sum_h = __lsx_vadd_h(sum_top, sum_left);
62 sum_w = __lsx_vhaddw_wu_hu(sum_h, sum_h);
63 sum_d = __lsx_vhaddw_du_wu(sum_w, sum_w);
64 sum_w = __lsx_vpickev_w(sum_d, sum_d);
65 sum_d = __lsx_vhaddw_du_wu(sum_w, sum_w);
66 sum_w = __lsx_vsrari_w(sum_d, 5);
67 out = __lsx_vreplvei_b(sum_w, 0);
68
69 __lsx_vstx(out, dst, 0);
70 __lsx_vstx(out, dst, dst_stride);
71 __lsx_vstx(out, dst, dst_stride_x2);
72 __lsx_vstx(out, dst, dst_stride_x3);
73 dst += dst_stride_x4;
74 __lsx_vstx(out, dst, 0);
75 __lsx_vstx(out, dst, dst_stride);
76 __lsx_vstx(out, dst, dst_stride_x2);
77 __lsx_vstx(out, dst, dst_stride_x3);
78 dst += dst_stride_x4;
79 __lsx_vstx(out, dst, 0);
80 __lsx_vstx(out, dst, dst_stride);
81 __lsx_vstx(out, dst, dst_stride_x2);
82 __lsx_vstx(out, dst, dst_stride_x3);
83 dst += dst_stride_x4;
84 __lsx_vstx(out, dst, 0);
85 __lsx_vstx(out, dst, dst_stride);
86 __lsx_vstx(out, dst, dst_stride_x2);
87 __lsx_vstx(out, dst, dst_stride_x3);
88 }
89
vpx_dc_predictor_8x8_lsx(uint8_t * dst,ptrdiff_t y_stride,const uint8_t * above,const uint8_t * left)90 void vpx_dc_predictor_8x8_lsx(uint8_t *dst, ptrdiff_t y_stride,
91 const uint8_t *above, const uint8_t *left) {
92 intra_predict_dc_8x8_lsx(above, left, dst, y_stride);
93 }
94
vpx_dc_predictor_16x16_lsx(uint8_t * dst,ptrdiff_t y_stride,const uint8_t * above,const uint8_t * left)95 void vpx_dc_predictor_16x16_lsx(uint8_t *dst, ptrdiff_t y_stride,
96 const uint8_t *above, const uint8_t *left) {
97 intra_predict_dc_16x16_lsx(above, left, dst, y_stride);
98 }
99