xref: /aosp_15_r20/external/libdav1d/src/lf_apply_tmpl.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1 /*
2  * Copyright © 2018, VideoLAN and dav1d authors
3  * Copyright © 2018, Two Orioles, LLC
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  *    list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  *    this list of conditions and the following disclaimer in the documentation
14  *    and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 #include "config.h"
29 
30 #include <string.h>
31 
32 #include "common/intops.h"
33 
34 #include "src/lf_apply.h"
35 #include "src/lr_apply.h"
36 
37 // The loop filter buffer stores 12 rows of pixels. A superblock block will
38 // contain at most 2 stripes. Each stripe requires 4 rows pixels (2 above
39 // and 2 below) the final 4 rows are used to swap the bottom of the last
40 // stripe with the top of the next super block row.
backup_lpf(const Dav1dFrameContext * const f,pixel * dst,const ptrdiff_t dst_stride,const pixel * src,const ptrdiff_t src_stride,const int ss_ver,const int sb128,int row,const int row_h,const int src_w,const int h,const int ss_hor,const int lr_backup)41 static void backup_lpf(const Dav1dFrameContext *const f,
42                        pixel *dst, const ptrdiff_t dst_stride,
43                        const pixel *src, const ptrdiff_t src_stride,
44                        const int ss_ver, const int sb128,
45                        int row, const int row_h, const int src_w,
46                        const int h, const int ss_hor, const int lr_backup)
47 {
48     const int cdef_backup = !lr_backup;
49     const int dst_w = f->frame_hdr->super_res.enabled ?
50                       (f->frame_hdr->width[1] + ss_hor) >> ss_hor : src_w;
51 
52     // The first stripe of the frame is shorter by 8 luma pixel rows.
53     int stripe_h = ((64 << (cdef_backup & sb128)) - 8 * !row) >> ss_ver;
54     src += (stripe_h - 2) * PXSTRIDE(src_stride);
55 
56     if (f->c->n_tc == 1) {
57         if (row) {
58             const int top = 4 << sb128;
59             // Copy the top part of the stored loop filtered pixels from the
60             // previous sb row needed above the first stripe of this sb row.
61             pixel_copy(&dst[PXSTRIDE(dst_stride) *  0],
62                        &dst[PXSTRIDE(dst_stride) *  top],      dst_w);
63             pixel_copy(&dst[PXSTRIDE(dst_stride) *  1],
64                        &dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);
65             pixel_copy(&dst[PXSTRIDE(dst_stride) *  2],
66                        &dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);
67             pixel_copy(&dst[PXSTRIDE(dst_stride) *  3],
68                        &dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);
69         }
70         dst += 4 * PXSTRIDE(dst_stride);
71     }
72 
73     if (lr_backup && (f->frame_hdr->width[0] != f->frame_hdr->width[1])) {
74         while (row + stripe_h <= row_h) {
75             const int n_lines = 4 - (row + stripe_h + 1 == h);
76             f->dsp->mc.resize(dst, dst_stride, src, src_stride,
77                               dst_w, n_lines, src_w, f->resize_step[ss_hor],
78                               f->resize_start[ss_hor] HIGHBD_CALL_SUFFIX);
79             row += stripe_h; // unmodified stripe_h for the 1st stripe
80             stripe_h = 64 >> ss_ver;
81             src += stripe_h * PXSTRIDE(src_stride);
82             dst += n_lines * PXSTRIDE(dst_stride);
83             if (n_lines == 3) {
84                 pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
85                 dst += PXSTRIDE(dst_stride);
86             }
87         }
88     } else {
89         while (row + stripe_h <= row_h) {
90             const int n_lines = 4 - (row + stripe_h + 1 == h);
91             for (int i = 0; i < 4; i++) {
92                 pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
93                                                src, src_w);
94                 dst += PXSTRIDE(dst_stride);
95                 src += PXSTRIDE(src_stride);
96             }
97             row += stripe_h; // unmodified stripe_h for the 1st stripe
98             stripe_h = 64 >> ss_ver;
99             src += (stripe_h - 4) * PXSTRIDE(src_stride);
100         }
101     }
102 }
103 
bytefn(dav1d_copy_lpf)104 void bytefn(dav1d_copy_lpf)(Dav1dFrameContext *const f,
105                             /*const*/ pixel *const src[3], const int sby)
106 {
107     const int have_tt = f->c->n_tc > 1;
108     const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
109     const int offset = 8 * !!sby;
110     const ptrdiff_t *const src_stride = f->cur.stride;
111     const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
112     const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
113     pixel *const dst[3] = {
114         f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
115         f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
116         f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
117     };
118 
119     // TODO Also check block level restore type to reduce copying.
120     const int restore_planes = f->lf.restore_planes;
121 
122     if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
123         const int h = f->cur.p.h;
124         const int w = f->bw << 2;
125         const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
126         const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
127         if (restore_planes & LR_RESTORE_Y || !resize)
128             backup_lpf(f, dst[0], lr_stride[0],
129                        src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
130                        0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
131         if (have_tt && resize) {
132             const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
133             backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
134                        src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
135                        0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
136         }
137     }
138     if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
139         f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
140     {
141         const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
142         const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
143         const int h = (f->cur.p.h + ss_ver) >> ss_ver;
144         const int w = f->bw << (2 - ss_hor);
145         const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
146         const int offset_uv = offset >> ss_ver;
147         const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
148         const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
149         if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
150             if (restore_planes & LR_RESTORE_U || !resize)
151                 backup_lpf(f, dst[1], lr_stride[1],
152                            src[1] - offset_uv * PXSTRIDE(src_stride[1]),
153                            src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
154                            row_h, w, h, ss_hor, 1);
155             if (have_tt && resize)
156                 backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
157                            src[1] - offset_uv * PXSTRIDE(src_stride[1]),
158                            src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
159                            row_h, w, h, ss_hor, 0);
160         }
161         if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
162             if (restore_planes & LR_RESTORE_V || !resize)
163                 backup_lpf(f, dst[2], lr_stride[1],
164                            src[2] - offset_uv * PXSTRIDE(src_stride[1]),
165                            src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
166                            row_h, w, h, ss_hor, 1);
167             if (have_tt && resize)
168                 backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
169                            src[2] - offset_uv * PXSTRIDE(src_stride[1]),
170                            src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
171                            row_h, w, h, ss_hor, 0);
172         }
173     }
174 }
175 
filter_plane_cols_y(const Dav1dFrameContext * const f,const int have_left,const uint8_t (* lvl)[4],const ptrdiff_t b4_stride,const uint16_t (* const mask)[3][2],pixel * dst,const ptrdiff_t ls,const int w,const int starty4,const int endy4)176 static inline void filter_plane_cols_y(const Dav1dFrameContext *const f,
177                                        const int have_left,
178                                        const uint8_t (*lvl)[4],
179                                        const ptrdiff_t b4_stride,
180                                        const uint16_t (*const mask)[3][2],
181                                        pixel *dst, const ptrdiff_t ls,
182                                        const int w,
183                                        const int starty4, const int endy4)
184 {
185     const Dav1dDSPContext *const dsp = f->dsp;
186 
187     // filter edges between columns (e.g. block1 | block2)
188     for (int x = 0; x < w; x++) {
189         if (!have_left && !x) continue;
190         uint32_t hmask[4];
191         if (!starty4) {
192             hmask[0] = mask[x][0][0];
193             hmask[1] = mask[x][1][0];
194             hmask[2] = mask[x][2][0];
195             if (endy4 > 16) {
196                 hmask[0] |= (unsigned) mask[x][0][1] << 16;
197                 hmask[1] |= (unsigned) mask[x][1][1] << 16;
198                 hmask[2] |= (unsigned) mask[x][2][1] << 16;
199             }
200         } else {
201             hmask[0] = mask[x][0][1];
202             hmask[1] = mask[x][1][1];
203             hmask[2] = mask[x][2][1];
204         }
205         hmask[3] = 0;
206         dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
207                                      (const uint8_t(*)[4]) &lvl[x][0], b4_stride,
208                                      &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
209     }
210 }
211 
filter_plane_rows_y(const Dav1dFrameContext * const f,const int have_top,const uint8_t (* lvl)[4],const ptrdiff_t b4_stride,const uint16_t (* const mask)[3][2],pixel * dst,const ptrdiff_t ls,const int w,const int starty4,const int endy4)212 static inline void filter_plane_rows_y(const Dav1dFrameContext *const f,
213                                        const int have_top,
214                                        const uint8_t (*lvl)[4],
215                                        const ptrdiff_t b4_stride,
216                                        const uint16_t (*const mask)[3][2],
217                                        pixel *dst, const ptrdiff_t ls,
218                                        const int w,
219                                        const int starty4, const int endy4)
220 {
221     const Dav1dDSPContext *const dsp = f->dsp;
222 
223     //                                 block1
224     // filter edges between rows (e.g. ------)
225     //                                 block2
226     for (int y = starty4; y < endy4;
227          y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
228     {
229         if (!have_top && !y) continue;
230         const uint32_t vmask[4] = {
231             mask[y][0][0] | ((unsigned) mask[y][0][1] << 16),
232             mask[y][1][0] | ((unsigned) mask[y][1][1] << 16),
233             mask[y][2][0] | ((unsigned) mask[y][2][1] << 16),
234             0,
235         };
236         dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
237                                      (const uint8_t(*)[4]) &lvl[0][1], b4_stride,
238                                      &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
239     }
240 }
241 
filter_plane_cols_uv(const Dav1dFrameContext * const f,const int have_left,const uint8_t (* lvl)[4],const ptrdiff_t b4_stride,const uint16_t (* const mask)[2][2],pixel * const u,pixel * const v,const ptrdiff_t ls,const int w,const int starty4,const int endy4,const int ss_ver)242 static inline void filter_plane_cols_uv(const Dav1dFrameContext *const f,
243                                         const int have_left,
244                                         const uint8_t (*lvl)[4],
245                                         const ptrdiff_t b4_stride,
246                                         const uint16_t (*const mask)[2][2],
247                                         pixel *const u, pixel *const v,
248                                         const ptrdiff_t ls, const int w,
249                                         const int starty4, const int endy4,
250                                         const int ss_ver)
251 {
252     const Dav1dDSPContext *const dsp = f->dsp;
253 
254     // filter edges between columns (e.g. block1 | block2)
255     for (int x = 0; x < w; x++) {
256         if (!have_left && !x) continue;
257         uint32_t hmask[3];
258         if (!starty4) {
259             hmask[0] = mask[x][0][0];
260             hmask[1] = mask[x][1][0];
261             if (endy4 > (16 >> ss_ver)) {
262                 hmask[0] |= (unsigned) mask[x][0][1] << (16 >> ss_ver);
263                 hmask[1] |= (unsigned) mask[x][1][1] << (16 >> ss_ver);
264             }
265         } else {
266             hmask[0] = mask[x][0][1];
267             hmask[1] = mask[x][1][1];
268         }
269         hmask[2] = 0;
270         dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
271                                      (const uint8_t(*)[4]) &lvl[x][2], b4_stride,
272                                      &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
273         dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
274                                      (const uint8_t(*)[4]) &lvl[x][3], b4_stride,
275                                      &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
276     }
277 }
278 
filter_plane_rows_uv(const Dav1dFrameContext * const f,const int have_top,const uint8_t (* lvl)[4],const ptrdiff_t b4_stride,const uint16_t (* const mask)[2][2],pixel * const u,pixel * const v,const ptrdiff_t ls,const int w,const int starty4,const int endy4,const int ss_hor)279 static inline void filter_plane_rows_uv(const Dav1dFrameContext *const f,
280                                         const int have_top,
281                                         const uint8_t (*lvl)[4],
282                                         const ptrdiff_t b4_stride,
283                                         const uint16_t (*const mask)[2][2],
284                                         pixel *const u, pixel *const v,
285                                         const ptrdiff_t ls, const int w,
286                                         const int starty4, const int endy4,
287                                         const int ss_hor)
288 {
289     const Dav1dDSPContext *const dsp = f->dsp;
290     ptrdiff_t off_l = 0;
291 
292     //                                 block1
293     // filter edges between rows (e.g. ------)
294     //                                 block2
295     for (int y = starty4; y < endy4;
296          y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride)
297     {
298         if (!have_top && !y) continue;
299         const uint32_t vmask[3] = {
300             mask[y][0][0] | ((unsigned) mask[y][0][1] << (16 >> ss_hor)),
301             mask[y][1][0] | ((unsigned) mask[y][1][1] << (16 >> ss_hor)),
302             0,
303         };
304         dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
305                                      (const uint8_t(*)[4]) &lvl[0][2], b4_stride,
306                                      &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
307         dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
308                                      (const uint8_t(*)[4]) &lvl[0][3], b4_stride,
309                                      &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
310     }
311 }
312 
bytefn(dav1d_loopfilter_sbrow_cols)313 void bytefn(dav1d_loopfilter_sbrow_cols)(const Dav1dFrameContext *const f,
314                                          pixel *const p[3], Av1Filter *const lflvl,
315                                          int sby, const int start_of_tile_row)
316 {
317     int x, have_left;
318     // Don't filter outside the frame
319     const int is_sb64 = !f->seq_hdr->sb128;
320     const int starty4 = (sby & is_sb64) << 4;
321     const int sbsz = 32 >> is_sb64;
322     const int sbl2 = 5 - is_sb64;
323     const int halign = (f->bh + 31) & ~31;
324     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
325     const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
326     const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
327     const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
328     const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
329     const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
330 
331     // fix lpf strength at tile col boundaries
332     const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
333     const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
334     for (int tile_col = 1;; tile_col++) {
335         x = f->frame_hdr->tiling.col_start_sb[tile_col];
336         if ((x << sbl2) >= f->bw) break;
337         const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
338         x >>= is_sb64;
339 
340         uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
341         for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
342             const int sidx = mask >= 0x10000U;
343             const unsigned smask = mask >> (sidx << 4);
344             const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
345                                 !!(y_hmask[1][sidx] & smask);
346             y_hmask[2][sidx] &= ~smask;
347             y_hmask[1][sidx] &= ~smask;
348             y_hmask[0][sidx] &= ~smask;
349             y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
350         }
351 
352         if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
353             uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
354             for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
355                  y++, uv_mask <<= 1)
356             {
357                 const int sidx = uv_mask >= vmax;
358                 const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
359                 const int idx = !!(uv_hmask[1][sidx] & smask);
360                 uv_hmask[1][sidx] &= ~smask;
361                 uv_hmask[0][sidx] &= ~smask;
362                 uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
363             }
364         }
365         lpf_y  += halign;
366         lpf_uv += halign >> ss_ver;
367     }
368 
369     // fix lpf strength at tile row boundaries
370     if (start_of_tile_row) {
371         const BlockContext *a;
372         for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
373              x < f->sb128w; x++, a++)
374         {
375             uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
376             const unsigned w = imin(32, f->w4 - (x << 5));
377             for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
378                 const int sidx = mask >= 0x10000U;
379                 const unsigned smask = mask >> (sidx << 4);
380                 const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
381                                     !!(y_vmask[1][sidx] & smask);
382                 y_vmask[2][sidx] &= ~smask;
383                 y_vmask[1][sidx] &= ~smask;
384                 y_vmask[0][sidx] &= ~smask;
385                 y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
386             }
387 
388             if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
389                 const unsigned cw = (w + ss_hor) >> ss_hor;
390                 uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
391                 for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
392                     const int sidx = uv_mask >= hmax;
393                     const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
394                     const int idx = !!(uv_vmask[1][sidx] & smask);
395                     uv_vmask[1][sidx] &= ~smask;
396                     uv_vmask[0][sidx] &= ~smask;
397                     uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
398                 }
399             }
400         }
401     }
402 
403     pixel *ptr;
404     uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
405     for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
406          x++, have_left = 1, ptr += 128, level_ptr += 32)
407     {
408         filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
409                             lflvl[x].filter_y[0], ptr, f->cur.stride[0],
410                             imin(32, f->w4 - x * 32), starty4, endy4);
411     }
412 
413     if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
414         return;
415 
416     ptrdiff_t uv_off;
417     level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
418     for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
419          x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
420     {
421         filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
422                              lflvl[x].filter_uv[0],
423                              &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
424                              (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
425                              starty4 >> ss_ver, uv_endy4, ss_ver);
426     }
427 }
428 
bytefn(dav1d_loopfilter_sbrow_rows)429 void bytefn(dav1d_loopfilter_sbrow_rows)(const Dav1dFrameContext *const f,
430                                          pixel *const p[3], Av1Filter *const lflvl,
431                                          int sby)
432 {
433     int x;
434     // Don't filter outside the frame
435     const int have_top = sby > 0;
436     const int is_sb64 = !f->seq_hdr->sb128;
437     const int starty4 = (sby & is_sb64) << 4;
438     const int sbsz = 32 >> is_sb64;
439     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
440     const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
441     const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
442     const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
443 
444     pixel *ptr;
445     uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
446     for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
447         filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
448                             lflvl[x].filter_y[1], ptr, f->cur.stride[0],
449                             imin(32, f->w4 - x * 32), starty4, endy4);
450     }
451 
452     if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
453         return;
454 
455     ptrdiff_t uv_off;
456     level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
457     for (uv_off = 0, x = 0; x < f->sb128w;
458          x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
459     {
460         filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
461                              lflvl[x].filter_uv[1],
462                              &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
463                              (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
464                              starty4 >> ss_ver, uv_endy4, ss_hor);
465     }
466 }
467