xref: /aosp_15_r20/external/libdav1d/src/looprestoration_tmpl.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker  * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker  * Copyright © 2018, Two Orioles, LLC
4*c0909341SAndroid Build Coastguard Worker  * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker  *
6*c0909341SAndroid Build Coastguard Worker  * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker  * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker  *
9*c0909341SAndroid Build Coastguard Worker  * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker  *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker  *
12*c0909341SAndroid Build Coastguard Worker  * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker  *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker  *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker  *
16*c0909341SAndroid Build Coastguard Worker  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker  */
27*c0909341SAndroid Build Coastguard Worker 
28*c0909341SAndroid Build Coastguard Worker #include "config.h"
29*c0909341SAndroid Build Coastguard Worker 
30*c0909341SAndroid Build Coastguard Worker #include <stdlib.h>
31*c0909341SAndroid Build Coastguard Worker 
32*c0909341SAndroid Build Coastguard Worker #include "common/intops.h"
33*c0909341SAndroid Build Coastguard Worker 
34*c0909341SAndroid Build Coastguard Worker #include "src/looprestoration.h"
35*c0909341SAndroid Build Coastguard Worker #include "src/tables.h"
36*c0909341SAndroid Build Coastguard Worker 
37*c0909341SAndroid Build Coastguard Worker // 256 * 1.5 + 3 + 3 = 390
38*c0909341SAndroid Build Coastguard Worker #define REST_UNIT_STRIDE (390)
39*c0909341SAndroid Build Coastguard Worker 
40*c0909341SAndroid Build Coastguard Worker // TODO Reuse p when no padding is needed (add and remove lpf pixels in p)
41*c0909341SAndroid Build Coastguard Worker // TODO Chroma only requires 2 rows of padding.
42*c0909341SAndroid Build Coastguard Worker static NOINLINE void
padding(pixel * dst,const pixel * p,const ptrdiff_t stride,const pixel (* left)[4],const pixel * lpf,int unit_w,const int stripe_h,const enum LrEdgeFlags edges)43*c0909341SAndroid Build Coastguard Worker padding(pixel *dst, const pixel *p, const ptrdiff_t stride,
44*c0909341SAndroid Build Coastguard Worker         const pixel (*left)[4], const pixel *lpf, int unit_w,
45*c0909341SAndroid Build Coastguard Worker         const int stripe_h, const enum LrEdgeFlags edges)
46*c0909341SAndroid Build Coastguard Worker {
47*c0909341SAndroid Build Coastguard Worker     const int have_left = !!(edges & LR_HAVE_LEFT);
48*c0909341SAndroid Build Coastguard Worker     const int have_right = !!(edges & LR_HAVE_RIGHT);
49*c0909341SAndroid Build Coastguard Worker 
50*c0909341SAndroid Build Coastguard Worker     // Copy more pixels if we don't have to pad them
51*c0909341SAndroid Build Coastguard Worker     unit_w += 3 * have_left + 3 * have_right;
52*c0909341SAndroid Build Coastguard Worker     pixel *dst_l = dst + 3 * !have_left;
53*c0909341SAndroid Build Coastguard Worker     p -= 3 * have_left;
54*c0909341SAndroid Build Coastguard Worker     lpf -= 3 * have_left;
55*c0909341SAndroid Build Coastguard Worker 
56*c0909341SAndroid Build Coastguard Worker     if (edges & LR_HAVE_TOP) {
57*c0909341SAndroid Build Coastguard Worker         // Copy previous loop filtered rows
58*c0909341SAndroid Build Coastguard Worker         const pixel *const above_1 = lpf;
59*c0909341SAndroid Build Coastguard Worker         const pixel *const above_2 = above_1 + PXSTRIDE(stride);
60*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l, above_1, unit_w);
61*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l + REST_UNIT_STRIDE, above_1, unit_w);
62*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l + 2 * REST_UNIT_STRIDE, above_2, unit_w);
63*c0909341SAndroid Build Coastguard Worker     } else {
64*c0909341SAndroid Build Coastguard Worker         // Pad with first row
65*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l, p, unit_w);
66*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l + REST_UNIT_STRIDE, p, unit_w);
67*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_l + 2 * REST_UNIT_STRIDE, p, unit_w);
68*c0909341SAndroid Build Coastguard Worker         if (have_left) {
69*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_l, &left[0][1], 3);
70*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_l + REST_UNIT_STRIDE, &left[0][1], 3);
71*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_l + 2 * REST_UNIT_STRIDE, &left[0][1], 3);
72*c0909341SAndroid Build Coastguard Worker         }
73*c0909341SAndroid Build Coastguard Worker     }
74*c0909341SAndroid Build Coastguard Worker 
75*c0909341SAndroid Build Coastguard Worker     pixel *dst_tl = dst_l + 3 * REST_UNIT_STRIDE;
76*c0909341SAndroid Build Coastguard Worker     if (edges & LR_HAVE_BOTTOM) {
77*c0909341SAndroid Build Coastguard Worker         // Copy next loop filtered rows
78*c0909341SAndroid Build Coastguard Worker         const pixel *const below_1 = lpf + 6 * PXSTRIDE(stride);
79*c0909341SAndroid Build Coastguard Worker         const pixel *const below_2 = below_1 + PXSTRIDE(stride);
80*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + stripe_h * REST_UNIT_STRIDE, below_1, unit_w);
81*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + (stripe_h + 1) * REST_UNIT_STRIDE, below_2, unit_w);
82*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + (stripe_h + 2) * REST_UNIT_STRIDE, below_2, unit_w);
83*c0909341SAndroid Build Coastguard Worker     } else {
84*c0909341SAndroid Build Coastguard Worker         // Pad with last row
85*c0909341SAndroid Build Coastguard Worker         const pixel *const src = p + (stripe_h - 1) * PXSTRIDE(stride);
86*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + stripe_h * REST_UNIT_STRIDE, src, unit_w);
87*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + (stripe_h + 1) * REST_UNIT_STRIDE, src, unit_w);
88*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + (stripe_h + 2) * REST_UNIT_STRIDE, src, unit_w);
89*c0909341SAndroid Build Coastguard Worker         if (have_left) {
90*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_tl + stripe_h * REST_UNIT_STRIDE, &left[stripe_h - 1][1], 3);
91*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_tl + (stripe_h + 1) * REST_UNIT_STRIDE, &left[stripe_h - 1][1], 3);
92*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst_tl + (stripe_h + 2) * REST_UNIT_STRIDE, &left[stripe_h - 1][1], 3);
93*c0909341SAndroid Build Coastguard Worker         }
94*c0909341SAndroid Build Coastguard Worker     }
95*c0909341SAndroid Build Coastguard Worker 
96*c0909341SAndroid Build Coastguard Worker     // Inner UNIT_WxSTRIPE_H
97*c0909341SAndroid Build Coastguard Worker     for (int j = 0; j < stripe_h; j++) {
98*c0909341SAndroid Build Coastguard Worker         pixel_copy(dst_tl + 3 * have_left, p + 3 * have_left, unit_w - 3 * have_left);
99*c0909341SAndroid Build Coastguard Worker         dst_tl += REST_UNIT_STRIDE;
100*c0909341SAndroid Build Coastguard Worker         p += PXSTRIDE(stride);
101*c0909341SAndroid Build Coastguard Worker     }
102*c0909341SAndroid Build Coastguard Worker 
103*c0909341SAndroid Build Coastguard Worker     if (!have_right) {
104*c0909341SAndroid Build Coastguard Worker         pixel *pad = dst_l + unit_w;
105*c0909341SAndroid Build Coastguard Worker         pixel *row_last = &dst_l[unit_w - 1];
106*c0909341SAndroid Build Coastguard Worker         // Pad 3x(STRIPE_H+6) with last column
107*c0909341SAndroid Build Coastguard Worker         for (int j = 0; j < stripe_h + 6; j++) {
108*c0909341SAndroid Build Coastguard Worker             pixel_set(pad, *row_last, 3);
109*c0909341SAndroid Build Coastguard Worker             pad += REST_UNIT_STRIDE;
110*c0909341SAndroid Build Coastguard Worker             row_last += REST_UNIT_STRIDE;
111*c0909341SAndroid Build Coastguard Worker         }
112*c0909341SAndroid Build Coastguard Worker     }
113*c0909341SAndroid Build Coastguard Worker 
114*c0909341SAndroid Build Coastguard Worker     if (!have_left) {
115*c0909341SAndroid Build Coastguard Worker         // Pad 3x(STRIPE_H+6) with first column
116*c0909341SAndroid Build Coastguard Worker         for (int j = 0; j < stripe_h + 6; j++) {
117*c0909341SAndroid Build Coastguard Worker             pixel_set(dst, *dst_l, 3);
118*c0909341SAndroid Build Coastguard Worker             dst += REST_UNIT_STRIDE;
119*c0909341SAndroid Build Coastguard Worker             dst_l += REST_UNIT_STRIDE;
120*c0909341SAndroid Build Coastguard Worker         }
121*c0909341SAndroid Build Coastguard Worker     } else {
122*c0909341SAndroid Build Coastguard Worker         dst += 3 * REST_UNIT_STRIDE;
123*c0909341SAndroid Build Coastguard Worker         for (int j = 0; j < stripe_h; j++) {
124*c0909341SAndroid Build Coastguard Worker             pixel_copy(dst, &left[j][1], 3);
125*c0909341SAndroid Build Coastguard Worker             dst += REST_UNIT_STRIDE;
126*c0909341SAndroid Build Coastguard Worker         }
127*c0909341SAndroid Build Coastguard Worker     }
128*c0909341SAndroid Build Coastguard Worker }
129*c0909341SAndroid Build Coastguard Worker 
130*c0909341SAndroid Build Coastguard Worker // FIXME Could split into luma and chroma specific functions,
131*c0909341SAndroid Build Coastguard Worker // (since first and last tops are always 0 for chroma)
132*c0909341SAndroid Build Coastguard Worker // FIXME Could implement a version that requires less temporary memory
133*c0909341SAndroid Build Coastguard Worker // (should be possible to implement with only 6 rows of temp storage)
wiener_c(pixel * p,const ptrdiff_t stride,const pixel (* const left)[4],const pixel * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)134*c0909341SAndroid Build Coastguard Worker static void wiener_c(pixel *p, const ptrdiff_t stride,
135*c0909341SAndroid Build Coastguard Worker                      const pixel (*const left)[4],
136*c0909341SAndroid Build Coastguard Worker                      const pixel *lpf, const int w, const int h,
137*c0909341SAndroid Build Coastguard Worker                      const LooprestorationParams *const params,
138*c0909341SAndroid Build Coastguard Worker                      const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
139*c0909341SAndroid Build Coastguard Worker {
140*c0909341SAndroid Build Coastguard Worker     // Wiener filtering is applied to a maximum stripe height of 64 + 3 pixels
141*c0909341SAndroid Build Coastguard Worker     // of padding above and below
142*c0909341SAndroid Build Coastguard Worker     pixel tmp[70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE];
143*c0909341SAndroid Build Coastguard Worker     pixel *tmp_ptr = tmp;
144*c0909341SAndroid Build Coastguard Worker 
145*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, stride, left, lpf, w, h, edges);
146*c0909341SAndroid Build Coastguard Worker 
147*c0909341SAndroid Build Coastguard Worker     // Values stored between horizontal and vertical filtering don't
148*c0909341SAndroid Build Coastguard Worker     // fit in a uint8_t.
149*c0909341SAndroid Build Coastguard Worker     uint16_t hor[70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE];
150*c0909341SAndroid Build Coastguard Worker     uint16_t *hor_ptr = hor;
151*c0909341SAndroid Build Coastguard Worker 
152*c0909341SAndroid Build Coastguard Worker     const int16_t (*const filter)[8] = params->filter;
153*c0909341SAndroid Build Coastguard Worker     const int bitdepth = bitdepth_from_max(bitdepth_max);
154*c0909341SAndroid Build Coastguard Worker     const int round_bits_h = 3 + (bitdepth == 12) * 2;
155*c0909341SAndroid Build Coastguard Worker     const int rounding_off_h = 1 << (round_bits_h - 1);
156*c0909341SAndroid Build Coastguard Worker     const int clip_limit = 1 << (bitdepth + 1 + 7 - round_bits_h);
157*c0909341SAndroid Build Coastguard Worker     for (int j = 0; j < h + 6; j++) {
158*c0909341SAndroid Build Coastguard Worker         for (int i = 0; i < w; i++) {
159*c0909341SAndroid Build Coastguard Worker             int sum = (1 << (bitdepth + 6));
160*c0909341SAndroid Build Coastguard Worker #if BITDEPTH == 8
161*c0909341SAndroid Build Coastguard Worker             sum += tmp_ptr[i + 3] * 128;
162*c0909341SAndroid Build Coastguard Worker #endif
163*c0909341SAndroid Build Coastguard Worker 
164*c0909341SAndroid Build Coastguard Worker             for (int k = 0; k < 7; k++) {
165*c0909341SAndroid Build Coastguard Worker                 sum += tmp_ptr[i + k] * filter[0][k];
166*c0909341SAndroid Build Coastguard Worker             }
167*c0909341SAndroid Build Coastguard Worker 
168*c0909341SAndroid Build Coastguard Worker             hor_ptr[i] =
169*c0909341SAndroid Build Coastguard Worker                 iclip((sum + rounding_off_h) >> round_bits_h, 0, clip_limit - 1);
170*c0909341SAndroid Build Coastguard Worker         }
171*c0909341SAndroid Build Coastguard Worker         tmp_ptr += REST_UNIT_STRIDE;
172*c0909341SAndroid Build Coastguard Worker         hor_ptr += REST_UNIT_STRIDE;
173*c0909341SAndroid Build Coastguard Worker     }
174*c0909341SAndroid Build Coastguard Worker 
175*c0909341SAndroid Build Coastguard Worker     const int round_bits_v = 11 - (bitdepth == 12) * 2;
176*c0909341SAndroid Build Coastguard Worker     const int rounding_off_v = 1 << (round_bits_v - 1);
177*c0909341SAndroid Build Coastguard Worker     const int round_offset = 1 << (bitdepth + (round_bits_v - 1));
178*c0909341SAndroid Build Coastguard Worker     for (int j = 0; j < h; j++) {
179*c0909341SAndroid Build Coastguard Worker         for (int i = 0; i < w; i++) {
180*c0909341SAndroid Build Coastguard Worker             int sum = -round_offset;
181*c0909341SAndroid Build Coastguard Worker 
182*c0909341SAndroid Build Coastguard Worker             for (int k = 0; k < 7; k++) {
183*c0909341SAndroid Build Coastguard Worker                 sum += hor[(j + k) * REST_UNIT_STRIDE + i] * filter[1][k];
184*c0909341SAndroid Build Coastguard Worker             }
185*c0909341SAndroid Build Coastguard Worker 
186*c0909341SAndroid Build Coastguard Worker             p[j * PXSTRIDE(stride) + i] =
187*c0909341SAndroid Build Coastguard Worker                 iclip_pixel((sum + rounding_off_v) >> round_bits_v);
188*c0909341SAndroid Build Coastguard Worker         }
189*c0909341SAndroid Build Coastguard Worker     }
190*c0909341SAndroid Build Coastguard Worker }
191*c0909341SAndroid Build Coastguard Worker 
192*c0909341SAndroid Build Coastguard Worker // Sum over a 3x3 area
193*c0909341SAndroid Build Coastguard Worker // The dst and src pointers are positioned 3 pixels above and 3 pixels to the
194*c0909341SAndroid Build Coastguard Worker // left of the top left corner. However, the self guided filter only needs 1
195*c0909341SAndroid Build Coastguard Worker // pixel above and one pixel to the left. As for the pixels below and to the
196*c0909341SAndroid Build Coastguard Worker // right they must be computed in the sums, but don't need to be stored.
197*c0909341SAndroid Build Coastguard Worker //
198*c0909341SAndroid Build Coastguard Worker // Example for a 4x4 block:
199*c0909341SAndroid Build Coastguard Worker //      x x x x x x x x x x
200*c0909341SAndroid Build Coastguard Worker //      x c c c c c c c c x
201*c0909341SAndroid Build Coastguard Worker //      x i s s s s s s i x
202*c0909341SAndroid Build Coastguard Worker //      x i s s s s s s i x
203*c0909341SAndroid Build Coastguard Worker //      x i s s s s s s i x
204*c0909341SAndroid Build Coastguard Worker //      x i s s s s s s i x
205*c0909341SAndroid Build Coastguard Worker //      x i s s s s s s i x
206*c0909341SAndroid Build Coastguard Worker //      x i s s s s s s i x
207*c0909341SAndroid Build Coastguard Worker //      x c c c c c c c c x
208*c0909341SAndroid Build Coastguard Worker //      x x x x x x x x x x
209*c0909341SAndroid Build Coastguard Worker //
210*c0909341SAndroid Build Coastguard Worker // s: Pixel summed and stored
211*c0909341SAndroid Build Coastguard Worker // i: Pixel summed and stored (between loops)
212*c0909341SAndroid Build Coastguard Worker // c: Pixel summed not stored
213*c0909341SAndroid Build Coastguard Worker // x: Pixel not summed not stored
boxsum3(int32_t * sumsq,coef * sum,const pixel * src,const int w,const int h)214*c0909341SAndroid Build Coastguard Worker static void boxsum3(int32_t *sumsq, coef *sum, const pixel *src,
215*c0909341SAndroid Build Coastguard Worker                     const int w, const int h)
216*c0909341SAndroid Build Coastguard Worker {
217*c0909341SAndroid Build Coastguard Worker     // We skip the first row, as it is never used
218*c0909341SAndroid Build Coastguard Worker     src += REST_UNIT_STRIDE;
219*c0909341SAndroid Build Coastguard Worker 
220*c0909341SAndroid Build Coastguard Worker     // We skip the first and last columns, as they are never used
221*c0909341SAndroid Build Coastguard Worker     for (int x = 1; x < w - 1; x++) {
222*c0909341SAndroid Build Coastguard Worker         coef *sum_v = sum + x;
223*c0909341SAndroid Build Coastguard Worker         int32_t *sumsq_v = sumsq + x;
224*c0909341SAndroid Build Coastguard Worker         const pixel *s = src + x;
225*c0909341SAndroid Build Coastguard Worker         int a = s[0], a2 = a * a;
226*c0909341SAndroid Build Coastguard Worker         int b = s[REST_UNIT_STRIDE], b2 = b * b;
227*c0909341SAndroid Build Coastguard Worker 
228*c0909341SAndroid Build Coastguard Worker         // We skip the first 2 rows, as they are skipped in the next loop and
229*c0909341SAndroid Build Coastguard Worker         // we don't need the last 2 row as it is skipped in the next loop
230*c0909341SAndroid Build Coastguard Worker         for (int y = 2; y < h - 2; y++) {
231*c0909341SAndroid Build Coastguard Worker             s += REST_UNIT_STRIDE;
232*c0909341SAndroid Build Coastguard Worker             const int c = s[REST_UNIT_STRIDE];
233*c0909341SAndroid Build Coastguard Worker             const int c2 = c * c;
234*c0909341SAndroid Build Coastguard Worker             sum_v += REST_UNIT_STRIDE;
235*c0909341SAndroid Build Coastguard Worker             sumsq_v += REST_UNIT_STRIDE;
236*c0909341SAndroid Build Coastguard Worker             *sum_v = a + b + c;
237*c0909341SAndroid Build Coastguard Worker             *sumsq_v = a2 + b2 + c2;
238*c0909341SAndroid Build Coastguard Worker             a = b;
239*c0909341SAndroid Build Coastguard Worker             a2 = b2;
240*c0909341SAndroid Build Coastguard Worker             b = c;
241*c0909341SAndroid Build Coastguard Worker             b2 = c2;
242*c0909341SAndroid Build Coastguard Worker         }
243*c0909341SAndroid Build Coastguard Worker      }
244*c0909341SAndroid Build Coastguard Worker 
245*c0909341SAndroid Build Coastguard Worker     // We skip the first row as it is never read
246*c0909341SAndroid Build Coastguard Worker     sum += REST_UNIT_STRIDE;
247*c0909341SAndroid Build Coastguard Worker     sumsq += REST_UNIT_STRIDE;
248*c0909341SAndroid Build Coastguard Worker     // We skip the last 2 rows as it is never read
249*c0909341SAndroid Build Coastguard Worker     for (int y = 2; y < h - 2; y++) {
250*c0909341SAndroid Build Coastguard Worker         int a = sum[1], a2 = sumsq[1];
251*c0909341SAndroid Build Coastguard Worker         int b = sum[2], b2 = sumsq[2];
252*c0909341SAndroid Build Coastguard Worker 
253*c0909341SAndroid Build Coastguard Worker         // We don't store the first column as it is never read and
254*c0909341SAndroid Build Coastguard Worker         // we don't store the last 2 columns as they are never read
255*c0909341SAndroid Build Coastguard Worker         for (int x = 2; x < w - 2; x++) {
256*c0909341SAndroid Build Coastguard Worker             const int c = sum[x + 1], c2 = sumsq[x + 1];
257*c0909341SAndroid Build Coastguard Worker             sum[x] = a + b + c;
258*c0909341SAndroid Build Coastguard Worker             sumsq[x] = a2 + b2 + c2;
259*c0909341SAndroid Build Coastguard Worker             a = b;
260*c0909341SAndroid Build Coastguard Worker             a2 = b2;
261*c0909341SAndroid Build Coastguard Worker             b = c;
262*c0909341SAndroid Build Coastguard Worker             b2 = c2;
263*c0909341SAndroid Build Coastguard Worker         }
264*c0909341SAndroid Build Coastguard Worker         sum += REST_UNIT_STRIDE;
265*c0909341SAndroid Build Coastguard Worker         sumsq += REST_UNIT_STRIDE;
266*c0909341SAndroid Build Coastguard Worker     }
267*c0909341SAndroid Build Coastguard Worker }
268*c0909341SAndroid Build Coastguard Worker 
269*c0909341SAndroid Build Coastguard Worker // Sum over a 5x5 area
270*c0909341SAndroid Build Coastguard Worker // The dst and src pointers are positioned 3 pixels above and 3 pixels to the
271*c0909341SAndroid Build Coastguard Worker // left of the top left corner. However, the self guided filter only needs 1
272*c0909341SAndroid Build Coastguard Worker // pixel above and one pixel to the left. As for the pixels below and to the
273*c0909341SAndroid Build Coastguard Worker // right they must be computed in the sums, but don't need to be stored.
274*c0909341SAndroid Build Coastguard Worker //
275*c0909341SAndroid Build Coastguard Worker // Example for a 4x4 block:
276*c0909341SAndroid Build Coastguard Worker //      c c c c c c c c c c
277*c0909341SAndroid Build Coastguard Worker //      c c c c c c c c c c
278*c0909341SAndroid Build Coastguard Worker //      i i s s s s s s i i
279*c0909341SAndroid Build Coastguard Worker //      i i s s s s s s i i
280*c0909341SAndroid Build Coastguard Worker //      i i s s s s s s i i
281*c0909341SAndroid Build Coastguard Worker //      i i s s s s s s i i
282*c0909341SAndroid Build Coastguard Worker //      i i s s s s s s i i
283*c0909341SAndroid Build Coastguard Worker //      i i s s s s s s i i
284*c0909341SAndroid Build Coastguard Worker //      c c c c c c c c c c
285*c0909341SAndroid Build Coastguard Worker //      c c c c c c c c c c
286*c0909341SAndroid Build Coastguard Worker //
287*c0909341SAndroid Build Coastguard Worker // s: Pixel summed and stored
288*c0909341SAndroid Build Coastguard Worker // i: Pixel summed and stored (between loops)
289*c0909341SAndroid Build Coastguard Worker // c: Pixel summed not stored
290*c0909341SAndroid Build Coastguard Worker // x: Pixel not summed not stored
boxsum5(int32_t * sumsq,coef * sum,const pixel * const src,const int w,const int h)291*c0909341SAndroid Build Coastguard Worker static void boxsum5(int32_t *sumsq, coef *sum, const pixel *const src,
292*c0909341SAndroid Build Coastguard Worker                     const int w, const int h)
293*c0909341SAndroid Build Coastguard Worker {
294*c0909341SAndroid Build Coastguard Worker     for (int x = 0; x < w; x++) {
295*c0909341SAndroid Build Coastguard Worker         coef *sum_v = sum + x;
296*c0909341SAndroid Build Coastguard Worker         int32_t *sumsq_v = sumsq + x;
297*c0909341SAndroid Build Coastguard Worker         const pixel *s = src + 3 * REST_UNIT_STRIDE + x;
298*c0909341SAndroid Build Coastguard Worker         int a = s[-3 * REST_UNIT_STRIDE], a2 = a * a;
299*c0909341SAndroid Build Coastguard Worker         int b = s[-2 * REST_UNIT_STRIDE], b2 = b * b;
300*c0909341SAndroid Build Coastguard Worker         int c = s[-1 * REST_UNIT_STRIDE], c2 = c * c;
301*c0909341SAndroid Build Coastguard Worker         int d = s[0], d2 = d * d;
302*c0909341SAndroid Build Coastguard Worker 
303*c0909341SAndroid Build Coastguard Worker         // We skip the first 2 rows, as they are skipped in the next loop and
304*c0909341SAndroid Build Coastguard Worker         // we don't need the last 2 row as it is skipped in the next loop
305*c0909341SAndroid Build Coastguard Worker         for (int y = 2; y < h - 2; y++) {
306*c0909341SAndroid Build Coastguard Worker             s += REST_UNIT_STRIDE;
307*c0909341SAndroid Build Coastguard Worker             const int e = *s, e2 = e * e;
308*c0909341SAndroid Build Coastguard Worker             sum_v += REST_UNIT_STRIDE;
309*c0909341SAndroid Build Coastguard Worker             sumsq_v += REST_UNIT_STRIDE;
310*c0909341SAndroid Build Coastguard Worker             *sum_v = a + b + c + d + e;
311*c0909341SAndroid Build Coastguard Worker             *sumsq_v = a2 + b2 + c2 + d2 + e2;
312*c0909341SAndroid Build Coastguard Worker             a = b;
313*c0909341SAndroid Build Coastguard Worker             b = c;
314*c0909341SAndroid Build Coastguard Worker             c = d;
315*c0909341SAndroid Build Coastguard Worker             d = e;
316*c0909341SAndroid Build Coastguard Worker             a2 = b2;
317*c0909341SAndroid Build Coastguard Worker             b2 = c2;
318*c0909341SAndroid Build Coastguard Worker             c2 = d2;
319*c0909341SAndroid Build Coastguard Worker             d2 = e2;
320*c0909341SAndroid Build Coastguard Worker         }
321*c0909341SAndroid Build Coastguard Worker     }
322*c0909341SAndroid Build Coastguard Worker 
323*c0909341SAndroid Build Coastguard Worker     // We skip the first row as it is never read
324*c0909341SAndroid Build Coastguard Worker     sum += REST_UNIT_STRIDE;
325*c0909341SAndroid Build Coastguard Worker     sumsq += REST_UNIT_STRIDE;
326*c0909341SAndroid Build Coastguard Worker     for (int y = 2; y < h - 2; y++) {
327*c0909341SAndroid Build Coastguard Worker         int a = sum[0], a2 = sumsq[0];
328*c0909341SAndroid Build Coastguard Worker         int b = sum[1], b2 = sumsq[1];
329*c0909341SAndroid Build Coastguard Worker         int c = sum[2], c2 = sumsq[2];
330*c0909341SAndroid Build Coastguard Worker         int d = sum[3], d2 = sumsq[3];
331*c0909341SAndroid Build Coastguard Worker 
332*c0909341SAndroid Build Coastguard Worker         for (int x = 2; x < w - 2; x++) {
333*c0909341SAndroid Build Coastguard Worker             const int e = sum[x + 2], e2 = sumsq[x + 2];
334*c0909341SAndroid Build Coastguard Worker             sum[x] = a + b + c + d + e;
335*c0909341SAndroid Build Coastguard Worker             sumsq[x] = a2 + b2 + c2 + d2 + e2;
336*c0909341SAndroid Build Coastguard Worker             a = b;
337*c0909341SAndroid Build Coastguard Worker             b = c;
338*c0909341SAndroid Build Coastguard Worker             c = d;
339*c0909341SAndroid Build Coastguard Worker             d = e;
340*c0909341SAndroid Build Coastguard Worker             a2 = b2;
341*c0909341SAndroid Build Coastguard Worker             b2 = c2;
342*c0909341SAndroid Build Coastguard Worker             c2 = d2;
343*c0909341SAndroid Build Coastguard Worker             d2 = e2;
344*c0909341SAndroid Build Coastguard Worker         }
345*c0909341SAndroid Build Coastguard Worker         sum += REST_UNIT_STRIDE;
346*c0909341SAndroid Build Coastguard Worker         sumsq += REST_UNIT_STRIDE;
347*c0909341SAndroid Build Coastguard Worker     }
348*c0909341SAndroid Build Coastguard Worker }
349*c0909341SAndroid Build Coastguard Worker 
350*c0909341SAndroid Build Coastguard Worker static NOINLINE void
selfguided_filter(coef * dst,const pixel * src,const ptrdiff_t src_stride,const int w,const int h,const int n,const unsigned s HIGHBD_DECL_SUFFIX)351*c0909341SAndroid Build Coastguard Worker selfguided_filter(coef *dst, const pixel *src, const ptrdiff_t src_stride,
352*c0909341SAndroid Build Coastguard Worker                   const int w, const int h, const int n, const unsigned s
353*c0909341SAndroid Build Coastguard Worker                   HIGHBD_DECL_SUFFIX)
354*c0909341SAndroid Build Coastguard Worker {
355*c0909341SAndroid Build Coastguard Worker     const unsigned sgr_one_by_x = n == 25 ? 164 : 455;
356*c0909341SAndroid Build Coastguard Worker 
357*c0909341SAndroid Build Coastguard Worker     // Selfguided filter is applied to a maximum stripe height of 64 + 3 pixels
358*c0909341SAndroid Build Coastguard Worker     // of padding above and below
359*c0909341SAndroid Build Coastguard Worker     int32_t sumsq[68 /*(64 + 2 + 2)*/ * REST_UNIT_STRIDE];
360*c0909341SAndroid Build Coastguard Worker     int32_t *A = sumsq + 2 * REST_UNIT_STRIDE + 3;
361*c0909341SAndroid Build Coastguard Worker     // By inverting A and B after the boxsums, B can be of size coef instead
362*c0909341SAndroid Build Coastguard Worker     // of int32_t
363*c0909341SAndroid Build Coastguard Worker     coef sum[68 /*(64 + 2 + 2)*/ * REST_UNIT_STRIDE];
364*c0909341SAndroid Build Coastguard Worker     coef *B = sum + 2 * REST_UNIT_STRIDE + 3;
365*c0909341SAndroid Build Coastguard Worker 
366*c0909341SAndroid Build Coastguard Worker     const int step = (n == 25) + 1;
367*c0909341SAndroid Build Coastguard Worker     if (n == 25)
368*c0909341SAndroid Build Coastguard Worker         boxsum5(sumsq, sum, src, w + 6, h + 6);
369*c0909341SAndroid Build Coastguard Worker     else
370*c0909341SAndroid Build Coastguard Worker         boxsum3(sumsq, sum, src, w + 6, h + 6);
371*c0909341SAndroid Build Coastguard Worker     const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
372*c0909341SAndroid Build Coastguard Worker 
373*c0909341SAndroid Build Coastguard Worker     int32_t *AA = A - REST_UNIT_STRIDE;
374*c0909341SAndroid Build Coastguard Worker     coef *BB = B - REST_UNIT_STRIDE;
375*c0909341SAndroid Build Coastguard Worker     for (int j = -1; j < h + 1; j+= step) {
376*c0909341SAndroid Build Coastguard Worker         for (int i = -1; i < w + 1; i++) {
377*c0909341SAndroid Build Coastguard Worker             const int a =
378*c0909341SAndroid Build Coastguard Worker                 (AA[i] + ((1 << (2 * bitdepth_min_8)) >> 1)) >> (2 * bitdepth_min_8);
379*c0909341SAndroid Build Coastguard Worker             const int b =
380*c0909341SAndroid Build Coastguard Worker                 (BB[i] + ((1 << bitdepth_min_8) >> 1)) >> bitdepth_min_8;
381*c0909341SAndroid Build Coastguard Worker 
382*c0909341SAndroid Build Coastguard Worker             const unsigned p = imax(a * n - b * b, 0);
383*c0909341SAndroid Build Coastguard Worker             const unsigned z = (p * s + (1 << 19)) >> 20;
384*c0909341SAndroid Build Coastguard Worker             const unsigned x = dav1d_sgr_x_by_x[umin(z, 255)];
385*c0909341SAndroid Build Coastguard Worker 
386*c0909341SAndroid Build Coastguard Worker             // This is where we invert A and B, so that B is of size coef.
387*c0909341SAndroid Build Coastguard Worker             AA[i] = (x * BB[i] * sgr_one_by_x + (1 << 11)) >> 12;
388*c0909341SAndroid Build Coastguard Worker             BB[i] = x;
389*c0909341SAndroid Build Coastguard Worker         }
390*c0909341SAndroid Build Coastguard Worker         AA += step * REST_UNIT_STRIDE;
391*c0909341SAndroid Build Coastguard Worker         BB += step * REST_UNIT_STRIDE;
392*c0909341SAndroid Build Coastguard Worker     }
393*c0909341SAndroid Build Coastguard Worker 
394*c0909341SAndroid Build Coastguard Worker     src += 3 * REST_UNIT_STRIDE + 3;
395*c0909341SAndroid Build Coastguard Worker     if (n == 25) {
396*c0909341SAndroid Build Coastguard Worker         int j = 0;
397*c0909341SAndroid Build Coastguard Worker #define SIX_NEIGHBORS(P, i)\
398*c0909341SAndroid Build Coastguard Worker     ((P[i - REST_UNIT_STRIDE]     + P[i + REST_UNIT_STRIDE]) * 6 +   \
399*c0909341SAndroid Build Coastguard Worker      (P[i - 1 - REST_UNIT_STRIDE] + P[i - 1 + REST_UNIT_STRIDE] +    \
400*c0909341SAndroid Build Coastguard Worker       P[i + 1 - REST_UNIT_STRIDE] + P[i + 1 + REST_UNIT_STRIDE]) * 5)
401*c0909341SAndroid Build Coastguard Worker         for (; j < h - 1; j+=2) {
402*c0909341SAndroid Build Coastguard Worker             for (int i = 0; i < w; i++) {
403*c0909341SAndroid Build Coastguard Worker                 const int a = SIX_NEIGHBORS(B, i);
404*c0909341SAndroid Build Coastguard Worker                 const int b = SIX_NEIGHBORS(A, i);
405*c0909341SAndroid Build Coastguard Worker                 dst[i] = (b - a * src[i] + (1 << 8)) >> 9;
406*c0909341SAndroid Build Coastguard Worker             }
407*c0909341SAndroid Build Coastguard Worker             dst += 384 /* Maximum restoration width is 384 (256 * 1.5) */;
408*c0909341SAndroid Build Coastguard Worker             src += REST_UNIT_STRIDE;
409*c0909341SAndroid Build Coastguard Worker             B += REST_UNIT_STRIDE;
410*c0909341SAndroid Build Coastguard Worker             A += REST_UNIT_STRIDE;
411*c0909341SAndroid Build Coastguard Worker             for (int i = 0; i < w; i++) {
412*c0909341SAndroid Build Coastguard Worker                 const int a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
413*c0909341SAndroid Build Coastguard Worker                 const int b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
414*c0909341SAndroid Build Coastguard Worker                 dst[i] = (b - a * src[i] + (1 << 7)) >> 8;
415*c0909341SAndroid Build Coastguard Worker             }
416*c0909341SAndroid Build Coastguard Worker             dst += 384 /* Maximum restoration width is 384 (256 * 1.5) */;
417*c0909341SAndroid Build Coastguard Worker             src += REST_UNIT_STRIDE;
418*c0909341SAndroid Build Coastguard Worker             B += REST_UNIT_STRIDE;
419*c0909341SAndroid Build Coastguard Worker             A += REST_UNIT_STRIDE;
420*c0909341SAndroid Build Coastguard Worker         }
421*c0909341SAndroid Build Coastguard Worker         if (j + 1 == h) { // Last row, when number of rows is odd
422*c0909341SAndroid Build Coastguard Worker             for (int i = 0; i < w; i++) {
423*c0909341SAndroid Build Coastguard Worker                 const int a = SIX_NEIGHBORS(B, i);
424*c0909341SAndroid Build Coastguard Worker                 const int b = SIX_NEIGHBORS(A, i);
425*c0909341SAndroid Build Coastguard Worker                 dst[i] = (b - a * src[i] + (1 << 8)) >> 9;
426*c0909341SAndroid Build Coastguard Worker             }
427*c0909341SAndroid Build Coastguard Worker         }
428*c0909341SAndroid Build Coastguard Worker #undef SIX_NEIGHBORS
429*c0909341SAndroid Build Coastguard Worker     } else {
430*c0909341SAndroid Build Coastguard Worker #define EIGHT_NEIGHBORS(P, i)\
431*c0909341SAndroid Build Coastguard Worker     ((P[i] + P[i - 1] + P[i + 1] + P[i - REST_UNIT_STRIDE] + P[i + REST_UNIT_STRIDE]) * 4 + \
432*c0909341SAndroid Build Coastguard Worker      (P[i - 1 - REST_UNIT_STRIDE] + P[i - 1 + REST_UNIT_STRIDE] +                           \
433*c0909341SAndroid Build Coastguard Worker       P[i + 1 - REST_UNIT_STRIDE] + P[i + 1 + REST_UNIT_STRIDE]) * 3)
434*c0909341SAndroid Build Coastguard Worker         for (int j = 0; j < h; j++) {
435*c0909341SAndroid Build Coastguard Worker             for (int i = 0; i < w; i++) {
436*c0909341SAndroid Build Coastguard Worker                 const int a = EIGHT_NEIGHBORS(B, i);
437*c0909341SAndroid Build Coastguard Worker                 const int b = EIGHT_NEIGHBORS(A, i);
438*c0909341SAndroid Build Coastguard Worker                 dst[i] = (b - a * src[i] + (1 << 8)) >> 9;
439*c0909341SAndroid Build Coastguard Worker             }
440*c0909341SAndroid Build Coastguard Worker             dst += 384;
441*c0909341SAndroid Build Coastguard Worker             src += REST_UNIT_STRIDE;
442*c0909341SAndroid Build Coastguard Worker             B += REST_UNIT_STRIDE;
443*c0909341SAndroid Build Coastguard Worker             A += REST_UNIT_STRIDE;
444*c0909341SAndroid Build Coastguard Worker         }
445*c0909341SAndroid Build Coastguard Worker     }
446*c0909341SAndroid Build Coastguard Worker #undef EIGHT_NEIGHBORS
447*c0909341SAndroid Build Coastguard Worker }
448*c0909341SAndroid Build Coastguard Worker 
sgr_5x5_c(pixel * p,const ptrdiff_t stride,const pixel (* const left)[4],const pixel * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)449*c0909341SAndroid Build Coastguard Worker static void sgr_5x5_c(pixel *p, const ptrdiff_t stride,
450*c0909341SAndroid Build Coastguard Worker                       const pixel (*const left)[4], const pixel *lpf,
451*c0909341SAndroid Build Coastguard Worker                       const int w, const int h,
452*c0909341SAndroid Build Coastguard Worker                       const LooprestorationParams *const params,
453*c0909341SAndroid Build Coastguard Worker                       const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
454*c0909341SAndroid Build Coastguard Worker {
455*c0909341SAndroid Build Coastguard Worker     // Selfguided filter is applied to a maximum stripe height of 64 + 3 pixels
456*c0909341SAndroid Build Coastguard Worker     // of padding above and below
457*c0909341SAndroid Build Coastguard Worker     pixel tmp[70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE];
458*c0909341SAndroid Build Coastguard Worker 
459*c0909341SAndroid Build Coastguard Worker     // Selfguided filter outputs to a maximum stripe height of 64 and a
460*c0909341SAndroid Build Coastguard Worker     // maximum restoration width of 384 (256 * 1.5)
461*c0909341SAndroid Build Coastguard Worker     coef dst[64 * 384];
462*c0909341SAndroid Build Coastguard Worker 
463*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, stride, left, lpf, w, h, edges);
464*c0909341SAndroid Build Coastguard Worker     selfguided_filter(dst, tmp, REST_UNIT_STRIDE, w, h, 25,
465*c0909341SAndroid Build Coastguard Worker                       params->sgr.s0 HIGHBD_TAIL_SUFFIX);
466*c0909341SAndroid Build Coastguard Worker 
467*c0909341SAndroid Build Coastguard Worker     const int w0 = params->sgr.w0;
468*c0909341SAndroid Build Coastguard Worker     for (int j = 0; j < h; j++) {
469*c0909341SAndroid Build Coastguard Worker         for (int i = 0; i < w; i++) {
470*c0909341SAndroid Build Coastguard Worker             const int v = w0 * dst[j * 384 + i];
471*c0909341SAndroid Build Coastguard Worker             p[i] = iclip_pixel(p[i] + ((v + (1 << 10)) >> 11));
472*c0909341SAndroid Build Coastguard Worker         }
473*c0909341SAndroid Build Coastguard Worker         p += PXSTRIDE(stride);
474*c0909341SAndroid Build Coastguard Worker     }
475*c0909341SAndroid Build Coastguard Worker }
476*c0909341SAndroid Build Coastguard Worker 
sgr_3x3_c(pixel * p,const ptrdiff_t stride,const pixel (* const left)[4],const pixel * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)477*c0909341SAndroid Build Coastguard Worker static void sgr_3x3_c(pixel *p, const ptrdiff_t stride,
478*c0909341SAndroid Build Coastguard Worker                       const pixel (*const left)[4], const pixel *lpf,
479*c0909341SAndroid Build Coastguard Worker                       const int w, const int h,
480*c0909341SAndroid Build Coastguard Worker                       const LooprestorationParams *const params,
481*c0909341SAndroid Build Coastguard Worker                       const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
482*c0909341SAndroid Build Coastguard Worker {
483*c0909341SAndroid Build Coastguard Worker     pixel tmp[70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE];
484*c0909341SAndroid Build Coastguard Worker     coef dst[64 * 384];
485*c0909341SAndroid Build Coastguard Worker 
486*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, stride, left, lpf, w, h, edges);
487*c0909341SAndroid Build Coastguard Worker     selfguided_filter(dst, tmp, REST_UNIT_STRIDE, w, h, 9,
488*c0909341SAndroid Build Coastguard Worker                       params->sgr.s1 HIGHBD_TAIL_SUFFIX);
489*c0909341SAndroid Build Coastguard Worker 
490*c0909341SAndroid Build Coastguard Worker     const int w1 = params->sgr.w1;
491*c0909341SAndroid Build Coastguard Worker     for (int j = 0; j < h; j++) {
492*c0909341SAndroid Build Coastguard Worker         for (int i = 0; i < w; i++) {
493*c0909341SAndroid Build Coastguard Worker             const int v = w1 * dst[j * 384 + i];
494*c0909341SAndroid Build Coastguard Worker             p[i] = iclip_pixel(p[i] + ((v + (1 << 10)) >> 11));
495*c0909341SAndroid Build Coastguard Worker         }
496*c0909341SAndroid Build Coastguard Worker         p += PXSTRIDE(stride);
497*c0909341SAndroid Build Coastguard Worker     }
498*c0909341SAndroid Build Coastguard Worker }
499*c0909341SAndroid Build Coastguard Worker 
sgr_mix_c(pixel * p,const ptrdiff_t stride,const pixel (* const left)[4],const pixel * lpf,const int w,const int h,const LooprestorationParams * const params,const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)500*c0909341SAndroid Build Coastguard Worker static void sgr_mix_c(pixel *p, const ptrdiff_t stride,
501*c0909341SAndroid Build Coastguard Worker                       const pixel (*const left)[4], const pixel *lpf,
502*c0909341SAndroid Build Coastguard Worker                       const int w, const int h,
503*c0909341SAndroid Build Coastguard Worker                       const LooprestorationParams *const params,
504*c0909341SAndroid Build Coastguard Worker                       const enum LrEdgeFlags edges HIGHBD_DECL_SUFFIX)
505*c0909341SAndroid Build Coastguard Worker {
506*c0909341SAndroid Build Coastguard Worker     pixel tmp[70 /*(64 + 3 + 3)*/ * REST_UNIT_STRIDE];
507*c0909341SAndroid Build Coastguard Worker     coef dst0[64 * 384];
508*c0909341SAndroid Build Coastguard Worker     coef dst1[64 * 384];
509*c0909341SAndroid Build Coastguard Worker 
510*c0909341SAndroid Build Coastguard Worker     padding(tmp, p, stride, left, lpf, w, h, edges);
511*c0909341SAndroid Build Coastguard Worker     selfguided_filter(dst0, tmp, REST_UNIT_STRIDE, w, h, 25,
512*c0909341SAndroid Build Coastguard Worker                       params->sgr.s0 HIGHBD_TAIL_SUFFIX);
513*c0909341SAndroid Build Coastguard Worker     selfguided_filter(dst1, tmp, REST_UNIT_STRIDE, w, h,  9,
514*c0909341SAndroid Build Coastguard Worker                       params->sgr.s1 HIGHBD_TAIL_SUFFIX);
515*c0909341SAndroid Build Coastguard Worker 
516*c0909341SAndroid Build Coastguard Worker     const int w0 = params->sgr.w0;
517*c0909341SAndroid Build Coastguard Worker     const int w1 = params->sgr.w1;
518*c0909341SAndroid Build Coastguard Worker     for (int j = 0; j < h; j++) {
519*c0909341SAndroid Build Coastguard Worker         for (int i = 0; i < w; i++) {
520*c0909341SAndroid Build Coastguard Worker             const int v = w0 * dst0[j * 384 + i] + w1 * dst1[j * 384 + i];
521*c0909341SAndroid Build Coastguard Worker             p[i] = iclip_pixel(p[i] + ((v + (1 << 10)) >> 11));
522*c0909341SAndroid Build Coastguard Worker         }
523*c0909341SAndroid Build Coastguard Worker         p += PXSTRIDE(stride);
524*c0909341SAndroid Build Coastguard Worker     }
525*c0909341SAndroid Build Coastguard Worker }
526*c0909341SAndroid Build Coastguard Worker 
527*c0909341SAndroid Build Coastguard Worker #if HAVE_ASM
528*c0909341SAndroid Build Coastguard Worker #if ARCH_AARCH64 || ARCH_ARM
529*c0909341SAndroid Build Coastguard Worker #include "src/arm/looprestoration.h"
530*c0909341SAndroid Build Coastguard Worker #elif ARCH_LOONGARCH64
531*c0909341SAndroid Build Coastguard Worker #include "src/loongarch/looprestoration.h"
532*c0909341SAndroid Build Coastguard Worker #elif ARCH_PPC64LE
533*c0909341SAndroid Build Coastguard Worker #include "src/ppc/looprestoration.h"
534*c0909341SAndroid Build Coastguard Worker #elif ARCH_X86
535*c0909341SAndroid Build Coastguard Worker #include "src/x86/looprestoration.h"
536*c0909341SAndroid Build Coastguard Worker #endif
537*c0909341SAndroid Build Coastguard Worker #endif
538*c0909341SAndroid Build Coastguard Worker 
bitfn(dav1d_loop_restoration_dsp_init)539*c0909341SAndroid Build Coastguard Worker COLD void bitfn(dav1d_loop_restoration_dsp_init)(Dav1dLoopRestorationDSPContext *const c,
540*c0909341SAndroid Build Coastguard Worker                                                  const int bpc)
541*c0909341SAndroid Build Coastguard Worker {
542*c0909341SAndroid Build Coastguard Worker     c->wiener[0] = c->wiener[1] = wiener_c;
543*c0909341SAndroid Build Coastguard Worker     c->sgr[0] = sgr_5x5_c;
544*c0909341SAndroid Build Coastguard Worker     c->sgr[1] = sgr_3x3_c;
545*c0909341SAndroid Build Coastguard Worker     c->sgr[2] = sgr_mix_c;
546*c0909341SAndroid Build Coastguard Worker 
547*c0909341SAndroid Build Coastguard Worker #if HAVE_ASM
548*c0909341SAndroid Build Coastguard Worker #if ARCH_AARCH64 || ARCH_ARM
549*c0909341SAndroid Build Coastguard Worker     loop_restoration_dsp_init_arm(c, bpc);
550*c0909341SAndroid Build Coastguard Worker #elif ARCH_LOONGARCH64
551*c0909341SAndroid Build Coastguard Worker     loop_restoration_dsp_init_loongarch(c, bpc);
552*c0909341SAndroid Build Coastguard Worker #elif ARCH_PPC64LE
553*c0909341SAndroid Build Coastguard Worker     loop_restoration_dsp_init_ppc(c, bpc);
554*c0909341SAndroid Build Coastguard Worker #elif ARCH_X86
555*c0909341SAndroid Build Coastguard Worker     loop_restoration_dsp_init_x86(c, bpc);
556*c0909341SAndroid Build Coastguard Worker #endif
557*c0909341SAndroid Build Coastguard Worker #endif
558*c0909341SAndroid Build Coastguard Worker }
559