xref: /aosp_15_r20/external/libdav1d/src/loongarch/looprestoration_tmpl.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker  * Copyright © 2023, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker  * Copyright © 2023, Loongson Technology Corporation Limited
4*c0909341SAndroid Build Coastguard Worker  * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker  *
6*c0909341SAndroid Build Coastguard Worker  * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker  * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker  *
9*c0909341SAndroid Build Coastguard Worker  * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker  *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker  *
12*c0909341SAndroid Build Coastguard Worker  * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker  *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker  *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker  *
16*c0909341SAndroid Build Coastguard Worker  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker  */
27*c0909341SAndroid Build Coastguard Worker 
28*c0909341SAndroid Build Coastguard Worker #include "src/loongarch/looprestoration.h"
29*c0909341SAndroid Build Coastguard Worker 
30*c0909341SAndroid Build Coastguard Worker #if BITDEPTH == 8
31*c0909341SAndroid Build Coastguard Worker 
32*c0909341SAndroid Build Coastguard Worker #define REST_UNIT_STRIDE (400)
33*c0909341SAndroid Build Coastguard Worker 
34*c0909341SAndroid Build Coastguard Worker void BF(dav1d_wiener_filter_h, lsx)(int32_t *hor_ptr,
35*c0909341SAndroid Build Coastguard Worker                                     uint8_t *tmp_ptr,
36*c0909341SAndroid Build Coastguard Worker                                     const int16_t filterh[8],
37*c0909341SAndroid Build Coastguard Worker                                     const int w, const int h);
38*c0909341SAndroid Build Coastguard Worker 
39*c0909341SAndroid Build Coastguard Worker void BF(dav1d_wiener_filter_h, lasx)(int32_t *hor_ptr,
40*c0909341SAndroid Build Coastguard Worker                                      uint8_t *tmp_ptr,
41*c0909341SAndroid Build Coastguard Worker                                      const int16_t filterh[8],
42*c0909341SAndroid Build Coastguard Worker                                      const int w, const int h);
43*c0909341SAndroid Build Coastguard Worker 
44*c0909341SAndroid Build Coastguard Worker void BF(dav1d_wiener_filter_v, lsx)(uint8_t *p,
45*c0909341SAndroid Build Coastguard Worker                                     const ptrdiff_t p_stride,
46*c0909341SAndroid Build Coastguard Worker                                     const int32_t *hor,
47*c0909341SAndroid Build Coastguard Worker                                     const int16_t filterv[8],
48*c0909341SAndroid Build Coastguard Worker                                     const int w, const int h);
49*c0909341SAndroid Build Coastguard Worker 
50*c0909341SAndroid Build Coastguard Worker void BF(dav1d_wiener_filter_v, lasx)(uint8_t *p,
51*c0909341SAndroid Build Coastguard Worker                                      const ptrdiff_t p_stride,
52*c0909341SAndroid Build Coastguard Worker                                      const int32_t *hor,
53*c0909341SAndroid Build Coastguard Worker                                      const int16_t filterv[8],
54*c0909341SAndroid Build Coastguard Worker                                      const int w, const int h);
55*c0909341SAndroid Build Coastguard Worker 
56*c0909341SAndroid Build Coastguard Worker // This function refers to the function in the ppc/looprestoration_init_tmpl.c.
padding(uint8_t * dst,const uint8_t * p,const ptrdiff_t stride,const uint8_t (* left)[4],const uint8_t * lpf,int unit_w,const int stripe_h,const enum LrEdgeFlags edges)57*c0909341SAndroid Build Coastguard Worker static inline void padding(uint8_t *dst, const uint8_t *p,
58*c0909341SAndroid Build Coastguard Worker                            const ptrdiff_t stride, const uint8_t (*left)[4],
59*c0909341SAndroid Build Coastguard Worker                            const uint8_t *lpf, int unit_w, const int stripe_h,
60*c0909341SAndroid Build Coastguard Worker                            const enum LrEdgeFlags edges)
61*c0909341SAndroid Build Coastguard Worker {
62*c0909341SAndroid Build Coastguard Worker     const int have_left = !!(edges & LR_HAVE_LEFT);
63*c0909341SAndroid Build Coastguard Worker     const int have_right = !!(edges & LR_HAVE_RIGHT);
64*c0909341SAndroid Build Coastguard Worker 
65*c0909341SAndroid Build Coastguard Worker     // Copy more pixels if we don't have to pad them
66*c0909341SAndroid Build Coastguard Worker     unit_w += 3 * have_left + 3 * have_right;
67*c0909341SAndroid Build Coastguard Worker     uint8_t *dst_l = dst + 3 * !have_left;
68*c0909341SAndroid Build Coastguard Worker     p -= 3 * have_left;
69*c0909341SAndroid Build Coastguard Worker     lpf -= 3 * have_left;
70*c0909341SAndroid Build Coastguard Worker 
71*c0909341SAndroid Build Coastguard Worker     if (edges & LR_HAVE_TOP) {
72*c0909341SAndroid Build Coastguard Worker         // Copy previous loop filtered rows
73*c0909341SAndroid Build Coastguard Worker         const uint8_t *const above_1 = lpf;
74*c0909341SAndroid Build Coastguard Worker         const uint8_t *const above_2 = above_1 + PXSTRIDE(stride);
75*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l, above_1, unit_w);
76*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l + REST_UNIT_STRIDE, above_1, unit_w);
77*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l + 2 * REST_UNIT_STRIDE, above_2, unit_w);
78*c0909341SAndroid Build Coastguard Worker     } else {
79*c0909341SAndroid Build Coastguard Worker         // Pad with first row
80*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l, p, unit_w);
81*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l + REST_UNIT_STRIDE, p, unit_w);
82*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l + 2 * REST_UNIT_STRIDE, p, unit_w);
83*c0909341SAndroid Build Coastguard Worker         if (have_left) {
84*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_l, &left[0][1], 3);
85*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_l + REST_UNIT_STRIDE, &left[0][1], 3);
86*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_l + 2 * REST_UNIT_STRIDE, &left[0][1], 3);
87*c0909341SAndroid Build Coastguard Worker         }
88*c0909341SAndroid Build Coastguard Worker     }
89*c0909341SAndroid Build Coastguard Worker 
90*c0909341SAndroid Build Coastguard Worker     uint8_t *dst_tl = dst_l + 3 * REST_UNIT_STRIDE;
91*c0909341SAndroid Build Coastguard Worker     if (edges & LR_HAVE_BOTTOM) {
92*c0909341SAndroid Build Coastguard Worker         // Copy next loop filtered rows
93*c0909341SAndroid Build Coastguard Worker         const uint8_t *const below_1 = lpf + 6 * PXSTRIDE(stride);
94*c0909341SAndroid Build Coastguard Worker         const uint8_t *const below_2 = below_1 + PXSTRIDE(stride);
95*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + stripe_h * REST_UNIT_STRIDE, below_1, unit_w);
96*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + (stripe_h + 1) * REST_UNIT_STRIDE, below_2, unit_w);
97*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + (stripe_h + 2) * REST_UNIT_STRIDE, below_2, unit_w);
98*c0909341SAndroid Build Coastguard Worker     } else {
99*c0909341SAndroid Build Coastguard Worker         // Pad with last row
100*c0909341SAndroid Build Coastguard Worker         const uint8_t *const src = p + (stripe_h - 1) * PXSTRIDE(stride);
101*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + stripe_h * REST_UNIT_STRIDE, src, unit_w);
102*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + (stripe_h + 1) * REST_UNIT_STRIDE, src, unit_w);
103*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + (stripe_h + 2) * REST_UNIT_STRIDE, src, unit_w);
104*c0909341SAndroid Build Coastguard Worker         if (have_left) {
105*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_tl + stripe_h * REST_UNIT_STRIDE, &left[stripe_h - 1][1], 3);
106*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_tl + (stripe_h + 1) * REST_UNIT_STRIDE, &left[stripe_h - 1][1], 3);
107*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_tl + (stripe_h + 2) * REST_UNIT_STRIDE, &left[stripe_h - 1][1], 3);
108*c0909341SAndroid Build Coastguard Worker         }
109*c0909341SAndroid Build Coastguard Worker     }
110*c0909341SAndroid Build Coastguard Worker 
111*c0909341SAndroid Build Coastguard Worker     // Inner UNIT_WxSTRIPE_H
112*c0909341SAndroid Build Coastguard Worker     for (int j = 0; j < stripe_h; j++) {
113*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + 3 * have_left, p + 3 * have_left, unit_w - 3 * have_left);
114*c0909341SAndroid Build Coastguard Worker         dst_tl += REST_UNIT_STRIDE;
115*c0909341SAndroid Build Coastguard Worker         p += PXSTRIDE(stride);
116*c0909341SAndroid Build Coastguard Worker     }
117*c0909341SAndroid Build Coastguard Worker 
118*c0909341SAndroid Build Coastguard Worker     if (!have_right) {
119*c0909341SAndroid Build Coastguard Worker         uint8_t *pad = dst_l + unit_w;
120*c0909341SAndroid Build Coastguard Worker         uint8_t *row_last = &dst_l[unit_w - 1];
121*c0909341SAndroid Build Coastguard Worker         // Pad 3x(STRIPE_H+6) with last column
122*c0909341SAndroid Build Coastguard Worker         for (int j = 0; j < stripe_h + 6; j++) {
123*c0909341SAndroid Build Coastguard Worker             pixel_set(pad, *row_last, 3);
124*c0909341SAndroid Build Coastguard Worker             pad += REST_UNIT_STRIDE;
125*c0909341SAndroid Build Coastguard Worker             row_last += REST_UNIT_STRIDE;
126*c0909341SAndroid Build Coastguard Worker         }
127*c0909341SAndroid Build Coastguard Worker     }
128*c0909341SAndroid Build Coastguard Worker 
129*c0909341SAndroid Build Coastguard Worker     if (!have_left) {
130*c0909341SAndroid Build Coastguard Worker         // Pad 3x(STRIPE_H+6) with first column
131*c0909341SAndroid Build Coastguard Worker         for (int j = 0; j < stripe_h + 6; j++) {
132*c0909341SAndroid Build Coastguard Worker             pixel_set(dst, *dst_l, 3);
133*c0909341SAndroid Build Coastguard Worker             dst += REST_UNIT_STRIDE;
134*c0909341SAndroid Build Coastguard Worker             dst_l += REST_UNIT_STRIDE;
135*c0909341SAndroid Build Coastguard Worker         }
136*c0909341SAndroid Build Coastguard Worker     } else {
137*c0909341SAndroid Build Coastguard Worker         dst += 3 * REST_UNIT_STRIDE;
138*c0909341SAndroid Build Coastguard Worker         for (int j = 0; j < stripe_h; j++) {
139*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst, &left[j][1], 3);
140*c0909341SAndroid Build Coastguard Worker             dst += REST_UNIT_STRIDE;
141*c0909341SAndroid Build Coastguard Worker         }
142*c0909341SAndroid Build Coastguard Worker     }
143*c0909341SAndroid Build Coastguard Worker }
144*c0909341SAndroid Build Coastguard Worker 
145*c0909341SAndroid Build Coastguard Worker // This function refers to the function in the ppc/looprestoration_init_tmpl.c.
146*c0909341SAndroid Build Coastguard Worker 
147*c0909341SAndroid Build Coastguard Worker // FIXME Could split into luma and chroma specific functions,
148*c0909341SAndroid Build Coastguard Worker // (since first and last tops are always 0 for chroma)
149*c0909341SAndroid Build Coastguard Worker // FIXME Could implement a version that requires less temporary memory
150*c0909341SAndroid Build Coastguard Worker // (should be possible to implement with only 6 rows of temp storage)
dav1d_wiener_filter_lsx(uint8_t * p,const ptrdiff_t p_stride,const uint8_t (* const left)[4],const uint8_t * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)151*c0909341SAndroid Build Coastguard Worker void dav1d_wiener_filter_lsx(uint8_t *p, const ptrdiff_t p_stride,
152*c0909341SAndroid Build Coastguard Worker                               const uint8_t (*const left)[4],
153*c0909341SAndroid Build Coastguard Worker                               const uint8_t *lpf,
154*c0909341SAndroid Build Coastguard Worker                               const int w, const int h,
155*c0909341SAndroid Build Coastguard Worker                               const LooprestorationParams *const params,
156*c0909341SAndroid Build Coastguard Worker                               const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
157*c0909341SAndroid Build Coastguard Worker {
158*c0909341SAndroid Build Coastguard Worker     const int16_t (*const filter)[8] = params->filter;
159*c0909341SAndroid Build Coastguard Worker 
160*c0909341SAndroid Build Coastguard Worker     // Wiener filtering is applied to a maximum stripe height of 64 + 3 pixels
161*c0909341SAndroid Build Coastguard Worker     // of padding above and below
162*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
163*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, p_stride, left, lpf, w, h, edges);
164*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int32_t, hor, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE + 64,);
165*c0909341SAndroid Build Coastguard Worker 
166*c0909341SAndroid Build Coastguard Worker     BF(dav1d_wiener_filter_h, lsx)(hor, tmp, filter[0], w, h + 6);
167*c0909341SAndroid Build Coastguard Worker     BF(dav1d_wiener_filter_v, lsx)(p, p_stride, hor, filter[1], w, h);
168*c0909341SAndroid Build Coastguard Worker }
169*c0909341SAndroid Build Coastguard Worker 
dav1d_wiener_filter_lasx(uint8_t * p,const ptrdiff_t p_stride,const uint8_t (* const left)[4],const uint8_t * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)170*c0909341SAndroid Build Coastguard Worker void dav1d_wiener_filter_lasx(uint8_t *p, const ptrdiff_t p_stride,
171*c0909341SAndroid Build Coastguard Worker                               const uint8_t (*const left)[4],
172*c0909341SAndroid Build Coastguard Worker                               const uint8_t *lpf,
173*c0909341SAndroid Build Coastguard Worker                               const int w, const int h,
174*c0909341SAndroid Build Coastguard Worker                               const LooprestorationParams *const params,
175*c0909341SAndroid Build Coastguard Worker                               const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
176*c0909341SAndroid Build Coastguard Worker {
177*c0909341SAndroid Build Coastguard Worker     const int16_t (*const filter)[8] = params->filter;
178*c0909341SAndroid Build Coastguard Worker 
179*c0909341SAndroid Build Coastguard Worker     // Wiener filtering is applied to a maximum stripe height of 64 + 3 pixels
180*c0909341SAndroid Build Coastguard Worker     // of padding above and below
181*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
182*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, p_stride, left, lpf, w, h, edges);
183*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int32_t, hor, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE + 64,);
184*c0909341SAndroid Build Coastguard Worker 
185*c0909341SAndroid Build Coastguard Worker     BF(dav1d_wiener_filter_h, lasx)(hor, tmp, filter[0], w, h + 6);
186*c0909341SAndroid Build Coastguard Worker     BF(dav1d_wiener_filter_v, lasx)(p, p_stride, hor, filter[1], w, h);
187*c0909341SAndroid Build Coastguard Worker }
188*c0909341SAndroid Build Coastguard Worker 
189*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum3_h, lsx)(int32_t *sumsq, int16_t *sum, pixel *src,
190*c0909341SAndroid Build Coastguard Worker                               const int w, const int h);
191*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum3_v, lsx)(int32_t *sumsq, int16_t *sum,
192*c0909341SAndroid Build Coastguard Worker                               const int w, const int h);
193*c0909341SAndroid Build Coastguard Worker 
194*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum3_sgf_h, lsx)(int32_t *sumsq, int16_t *sum,
195*c0909341SAndroid Build Coastguard Worker                                   const int w, const int h, const int w1);
196*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum3_sgf_v, lsx)(int16_t *dst, uint8_t *tmp,
197*c0909341SAndroid Build Coastguard Worker                                   int32_t *sumsq, int16_t *sum,
198*c0909341SAndroid Build Coastguard Worker                                   const int w, const int h);
199*c0909341SAndroid Build Coastguard Worker void BF(dav1d_sgr_3x3_finish, lsx)(pixel *p, const ptrdiff_t p_stride,
200*c0909341SAndroid Build Coastguard Worker                                    int16_t *dst, int w1,
201*c0909341SAndroid Build Coastguard Worker                                    const int w, const int h);
202*c0909341SAndroid Build Coastguard Worker 
203*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum3_h, lasx)(int32_t *sumsq, int16_t *sum, pixel *src,
204*c0909341SAndroid Build Coastguard Worker                                const int w, const int h);
205*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum3_sgf_h, lasx)(int32_t *sumsq, int16_t *sum,
206*c0909341SAndroid Build Coastguard Worker                                    const int w, const int h, const int w1);
207*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum3_sgf_v, lasx)(int16_t *dst, uint8_t *tmp,
208*c0909341SAndroid Build Coastguard Worker                                    int32_t *sumsq, int16_t *sum,
209*c0909341SAndroid Build Coastguard Worker                                    const int w, const int h);
210*c0909341SAndroid Build Coastguard Worker 
boxsum3_lsx(int32_t * sumsq,coef * sum,pixel * src,const int w,const int h)211*c0909341SAndroid Build Coastguard Worker static inline void boxsum3_lsx(int32_t *sumsq, coef *sum, pixel *src,
212*c0909341SAndroid Build Coastguard Worker                                const int w, const int h)
213*c0909341SAndroid Build Coastguard Worker {
214*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_h, lsx)(sumsq, sum, src, w + 6, h + 6);
215*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_v, lsx)(sumsq, sum, w + 6, h + 6);
216*c0909341SAndroid Build Coastguard Worker }
217*c0909341SAndroid Build Coastguard Worker 
boxsum3_lasx(int32_t * sumsq,coef * sum,pixel * src,const int w,const int h)218*c0909341SAndroid Build Coastguard Worker static inline void boxsum3_lasx(int32_t *sumsq, coef *sum, pixel *src,
219*c0909341SAndroid Build Coastguard Worker                                const int w, const int h)
220*c0909341SAndroid Build Coastguard Worker {
221*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_h, lasx)(sumsq, sum, src, w + 6, h + 6);
222*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_v, lsx)(sumsq, sum, w + 6, h + 6);
223*c0909341SAndroid Build Coastguard Worker }
224*c0909341SAndroid Build Coastguard Worker 
dav1d_sgr_filter_3x3_lsx(pixel * p,const ptrdiff_t p_stride,const pixel (* const left)[4],const pixel * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)225*c0909341SAndroid Build Coastguard Worker void dav1d_sgr_filter_3x3_lsx(pixel *p, const ptrdiff_t p_stride,
226*c0909341SAndroid Build Coastguard Worker                               const pixel (*const left)[4],
227*c0909341SAndroid Build Coastguard Worker                               const pixel *lpf,
228*c0909341SAndroid Build Coastguard Worker                               const int w, const int h,
229*c0909341SAndroid Build Coastguard Worker                               const LooprestorationParams *const params,
230*c0909341SAndroid Build Coastguard Worker                               const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
231*c0909341SAndroid Build Coastguard Worker {
232*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
233*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, p_stride, left, lpf, w, h, edges);
234*c0909341SAndroid Build Coastguard Worker     coef dst[64 * 384];
235*c0909341SAndroid Build Coastguard Worker 
236*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int32_t, sumsq, 68 * REST_UNIT_STRIDE + 8, );
237*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int16_t, sum, 68 * REST_UNIT_STRIDE + 16, );
238*c0909341SAndroid Build Coastguard Worker 
239*c0909341SAndroid Build Coastguard Worker     boxsum3_lsx(sumsq, sum, tmp, w, h);
240*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_sgf_h, lsx)(sumsq, sum, w, h, params->sgr.s1);
241*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_sgf_v, lsx)(dst, tmp, sumsq, sum, w, h);
242*c0909341SAndroid Build Coastguard Worker     BF(dav1d_sgr_3x3_finish, lsx)(p, p_stride, dst, params->sgr.w1, w, h);
243*c0909341SAndroid Build Coastguard Worker }
244*c0909341SAndroid Build Coastguard Worker 
dav1d_sgr_filter_3x3_lasx(pixel * p,const ptrdiff_t p_stride,const pixel (* const left)[4],const pixel * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)245*c0909341SAndroid Build Coastguard Worker void dav1d_sgr_filter_3x3_lasx(pixel *p, const ptrdiff_t p_stride,
246*c0909341SAndroid Build Coastguard Worker                               const pixel (*const left)[4],
247*c0909341SAndroid Build Coastguard Worker                               const pixel *lpf,
248*c0909341SAndroid Build Coastguard Worker                               const int w, const int h,
249*c0909341SAndroid Build Coastguard Worker                               const LooprestorationParams *const params,
250*c0909341SAndroid Build Coastguard Worker                               const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
251*c0909341SAndroid Build Coastguard Worker {
252*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
253*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, p_stride, left, lpf, w, h, edges);
254*c0909341SAndroid Build Coastguard Worker     coef dst[64 * 384];
255*c0909341SAndroid Build Coastguard Worker 
256*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int32_t, sumsq, 68 * REST_UNIT_STRIDE + 8, );
257*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int16_t, sum, 68 * REST_UNIT_STRIDE + 16, );
258*c0909341SAndroid Build Coastguard Worker 
259*c0909341SAndroid Build Coastguard Worker     boxsum3_lasx(sumsq, sum, tmp, w, h);
260*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_sgf_h, lasx)(sumsq, sum, w, h, params->sgr.s1);
261*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_sgf_v, lasx)(dst, tmp, sumsq, sum, w, h);
262*c0909341SAndroid Build Coastguard Worker     BF(dav1d_sgr_3x3_finish, lsx)(p, p_stride, dst, params->sgr.w1, w, h);
263*c0909341SAndroid Build Coastguard Worker }
264*c0909341SAndroid Build Coastguard Worker 
265*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum5_h, lsx)(int32_t *sumsq, int16_t *sum,
266*c0909341SAndroid Build Coastguard Worker                               const uint8_t *const src,
267*c0909341SAndroid Build Coastguard Worker                               const int w, const int h);
268*c0909341SAndroid Build Coastguard Worker 
269*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum5_v, lsx)(int32_t *sumsq, int16_t *sum,
270*c0909341SAndroid Build Coastguard Worker                               const int w, const int h);
271*c0909341SAndroid Build Coastguard Worker 
272*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum5_sgf_h, lsx)(int32_t *sumsq, int16_t *sum,
273*c0909341SAndroid Build Coastguard Worker                                   const int w, const int h,
274*c0909341SAndroid Build Coastguard Worker                                   const unsigned s);
275*c0909341SAndroid Build Coastguard Worker 
276*c0909341SAndroid Build Coastguard Worker void BF(dav1d_boxsum5_sgf_v, lsx)(int16_t *dst, uint8_t *src,
277*c0909341SAndroid Build Coastguard Worker                                   int32_t *sumsq, int16_t *sum,
278*c0909341SAndroid Build Coastguard Worker                                   const int w, const int h);
279*c0909341SAndroid Build Coastguard Worker 
280*c0909341SAndroid Build Coastguard Worker void BF(dav1d_sgr_mix_finish, lsx)(uint8_t *p, const ptrdiff_t stride,
281*c0909341SAndroid Build Coastguard Worker                                    const int16_t *dst0, const int16_t *dst1,
282*c0909341SAndroid Build Coastguard Worker                                    const int w0, const int w1,
283*c0909341SAndroid Build Coastguard Worker                                    const int w, const int h);
284*c0909341SAndroid Build Coastguard Worker 
boxsum5_lsx(int32_t * sumsq,coef * sum,pixel * src,const int w,const int h)285*c0909341SAndroid Build Coastguard Worker static inline void boxsum5_lsx(int32_t *sumsq, coef *sum, pixel *src,
286*c0909341SAndroid Build Coastguard Worker                                const int w, const int h)
287*c0909341SAndroid Build Coastguard Worker {
288*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum5_h, lsx)(sumsq, sum, src, w + 6, h + 6);
289*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum5_v, lsx)(sumsq, sum, w + 6, h + 6);
290*c0909341SAndroid Build Coastguard Worker }
291*c0909341SAndroid Build Coastguard Worker 
dav1d_sgr_filter_5x5_lsx(pixel * p,const ptrdiff_t p_stride,const pixel (* const left)[4],const pixel * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)292*c0909341SAndroid Build Coastguard Worker void dav1d_sgr_filter_5x5_lsx(pixel *p, const ptrdiff_t p_stride,
293*c0909341SAndroid Build Coastguard Worker                               const pixel (*const left)[4],
294*c0909341SAndroid Build Coastguard Worker                               const pixel *lpf,
295*c0909341SAndroid Build Coastguard Worker                               const int w, const int h,
296*c0909341SAndroid Build Coastguard Worker                               const LooprestorationParams *const params,
297*c0909341SAndroid Build Coastguard Worker                               const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
298*c0909341SAndroid Build Coastguard Worker {
299*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
300*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, p_stride, left, lpf, w, h, edges);
301*c0909341SAndroid Build Coastguard Worker     coef dst[64 * 384];
302*c0909341SAndroid Build Coastguard Worker 
303*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int32_t, sumsq, 68 * REST_UNIT_STRIDE + 8, );
304*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int16_t, sum, 68 * REST_UNIT_STRIDE + 16, );
305*c0909341SAndroid Build Coastguard Worker 
306*c0909341SAndroid Build Coastguard Worker     boxsum5_lsx(sumsq, sum, tmp, w, h);
307*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum5_sgf_h, lsx)(sumsq, sum, w, h, params->sgr.s0);
308*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum5_sgf_v, lsx)(dst, tmp, sumsq, sum, w, h);
309*c0909341SAndroid Build Coastguard Worker     BF(dav1d_sgr_3x3_finish, lsx)(p, p_stride, dst, params->sgr.w0, w, h);
310*c0909341SAndroid Build Coastguard Worker }
311*c0909341SAndroid Build Coastguard Worker 
dav1d_sgr_filter_mix_lsx(pixel * p,const ptrdiff_t p_stride,const pixel (* const left)[4],const pixel * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)312*c0909341SAndroid Build Coastguard Worker void dav1d_sgr_filter_mix_lsx(pixel *p, const ptrdiff_t p_stride,
313*c0909341SAndroid Build Coastguard Worker                               const pixel (*const left)[4],
314*c0909341SAndroid Build Coastguard Worker                               const pixel *lpf,
315*c0909341SAndroid Build Coastguard Worker                               const int w, const int h,
316*c0909341SAndroid Build Coastguard Worker                               const LooprestorationParams *const params,
317*c0909341SAndroid Build Coastguard Worker                               const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
318*c0909341SAndroid Build Coastguard Worker {
319*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(uint8_t, tmp, 70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE,);
320*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, p_stride, left, lpf, w, h, edges);
321*c0909341SAndroid Build Coastguard Worker     coef dst0[64 * 384];
322*c0909341SAndroid Build Coastguard Worker     coef dst1[64 * 384];
323*c0909341SAndroid Build Coastguard Worker 
324*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int32_t, sumsq0, 68 * REST_UNIT_STRIDE + 8, );
325*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(int16_t, sum0, 68 * REST_UNIT_STRIDE + 16, );
326*c0909341SAndroid Build Coastguard Worker 
327*c0909341SAndroid Build Coastguard Worker     boxsum5_lsx(sumsq0, sum0, tmp, w, h);
328*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum5_sgf_h, lsx)(sumsq0, sum0, w, h, params->sgr.s0);
329*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum5_sgf_v, lsx)(dst0, tmp, sumsq0, sum0, w, h);
330*c0909341SAndroid Build Coastguard Worker 
331*c0909341SAndroid Build Coastguard Worker     boxsum3_lsx(sumsq0, sum0, tmp, w, h);
332*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_sgf_h, lsx)(sumsq0, sum0, w, h, params->sgr.s1);
333*c0909341SAndroid Build Coastguard Worker     BF(dav1d_boxsum3_sgf_v, lsx)(dst1, tmp, sumsq0, sum0, w, h);
334*c0909341SAndroid Build Coastguard Worker 
335*c0909341SAndroid Build Coastguard Worker     BF(dav1d_sgr_mix_finish, lsx)(p, p_stride, dst0, dst1, params->sgr.w0,
336*c0909341SAndroid Build Coastguard Worker                                    params->sgr.w1, w, h);
337*c0909341SAndroid Build Coastguard Worker }
338*c0909341SAndroid Build Coastguard Worker #endif
339