1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Two Orioles, LLC
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker */
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker #include "config.h"
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Worker #include <string.h>
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Worker #include "common/intops.h"
33*c0909341SAndroid Build Coastguard Worker
34*c0909341SAndroid Build Coastguard Worker #include "src/lf_apply.h"
35*c0909341SAndroid Build Coastguard Worker #include "src/lr_apply.h"
36*c0909341SAndroid Build Coastguard Worker
37*c0909341SAndroid Build Coastguard Worker // The loop filter buffer stores 12 rows of pixels. A superblock block will
38*c0909341SAndroid Build Coastguard Worker // contain at most 2 stripes. Each stripe requires 4 rows pixels (2 above
39*c0909341SAndroid Build Coastguard Worker // and 2 below) the final 4 rows are used to swap the bottom of the last
40*c0909341SAndroid Build Coastguard Worker // stripe with the top of the next super block row.
backup_lpf(const Dav1dFrameContext * const f,pixel * dst,const ptrdiff_t dst_stride,const pixel * src,const ptrdiff_t src_stride,const int ss_ver,const int sb128,int row,const int row_h,const int src_w,const int h,const int ss_hor,const int lr_backup)41*c0909341SAndroid Build Coastguard Worker static void backup_lpf(const Dav1dFrameContext *const f,
42*c0909341SAndroid Build Coastguard Worker pixel *dst, const ptrdiff_t dst_stride,
43*c0909341SAndroid Build Coastguard Worker const pixel *src, const ptrdiff_t src_stride,
44*c0909341SAndroid Build Coastguard Worker const int ss_ver, const int sb128,
45*c0909341SAndroid Build Coastguard Worker int row, const int row_h, const int src_w,
46*c0909341SAndroid Build Coastguard Worker const int h, const int ss_hor, const int lr_backup)
47*c0909341SAndroid Build Coastguard Worker {
48*c0909341SAndroid Build Coastguard Worker const int cdef_backup = !lr_backup;
49*c0909341SAndroid Build Coastguard Worker const int dst_w = f->frame_hdr->super_res.enabled ?
50*c0909341SAndroid Build Coastguard Worker (f->frame_hdr->width[1] + ss_hor) >> ss_hor : src_w;
51*c0909341SAndroid Build Coastguard Worker
52*c0909341SAndroid Build Coastguard Worker // The first stripe of the frame is shorter by 8 luma pixel rows.
53*c0909341SAndroid Build Coastguard Worker int stripe_h = ((64 << (cdef_backup & sb128)) - 8 * !row) >> ss_ver;
54*c0909341SAndroid Build Coastguard Worker src += (stripe_h - 2) * PXSTRIDE(src_stride);
55*c0909341SAndroid Build Coastguard Worker
56*c0909341SAndroid Build Coastguard Worker if (f->c->n_tc == 1) {
57*c0909341SAndroid Build Coastguard Worker if (row) {
58*c0909341SAndroid Build Coastguard Worker const int top = 4 << sb128;
59*c0909341SAndroid Build Coastguard Worker // Copy the top part of the stored loop filtered pixels from the
60*c0909341SAndroid Build Coastguard Worker // previous sb row needed above the first stripe of this sb row.
61*c0909341SAndroid Build Coastguard Worker pixel_copy(&dst[PXSTRIDE(dst_stride) * 0],
62*c0909341SAndroid Build Coastguard Worker &dst[PXSTRIDE(dst_stride) * top], dst_w);
63*c0909341SAndroid Build Coastguard Worker pixel_copy(&dst[PXSTRIDE(dst_stride) * 1],
64*c0909341SAndroid Build Coastguard Worker &dst[PXSTRIDE(dst_stride) * (top + 1)], dst_w);
65*c0909341SAndroid Build Coastguard Worker pixel_copy(&dst[PXSTRIDE(dst_stride) * 2],
66*c0909341SAndroid Build Coastguard Worker &dst[PXSTRIDE(dst_stride) * (top + 2)], dst_w);
67*c0909341SAndroid Build Coastguard Worker pixel_copy(&dst[PXSTRIDE(dst_stride) * 3],
68*c0909341SAndroid Build Coastguard Worker &dst[PXSTRIDE(dst_stride) * (top + 3)], dst_w);
69*c0909341SAndroid Build Coastguard Worker }
70*c0909341SAndroid Build Coastguard Worker dst += 4 * PXSTRIDE(dst_stride);
71*c0909341SAndroid Build Coastguard Worker }
72*c0909341SAndroid Build Coastguard Worker
73*c0909341SAndroid Build Coastguard Worker if (lr_backup && (f->frame_hdr->width[0] != f->frame_hdr->width[1])) {
74*c0909341SAndroid Build Coastguard Worker while (row + stripe_h <= row_h) {
75*c0909341SAndroid Build Coastguard Worker const int n_lines = 4 - (row + stripe_h + 1 == h);
76*c0909341SAndroid Build Coastguard Worker f->dsp->mc.resize(dst, dst_stride, src, src_stride,
77*c0909341SAndroid Build Coastguard Worker dst_w, n_lines, src_w, f->resize_step[ss_hor],
78*c0909341SAndroid Build Coastguard Worker f->resize_start[ss_hor] HIGHBD_CALL_SUFFIX);
79*c0909341SAndroid Build Coastguard Worker row += stripe_h; // unmodified stripe_h for the 1st stripe
80*c0909341SAndroid Build Coastguard Worker stripe_h = 64 >> ss_ver;
81*c0909341SAndroid Build Coastguard Worker src += stripe_h * PXSTRIDE(src_stride);
82*c0909341SAndroid Build Coastguard Worker dst += n_lines * PXSTRIDE(dst_stride);
83*c0909341SAndroid Build Coastguard Worker if (n_lines == 3) {
84*c0909341SAndroid Build Coastguard Worker pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w);
85*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
86*c0909341SAndroid Build Coastguard Worker }
87*c0909341SAndroid Build Coastguard Worker }
88*c0909341SAndroid Build Coastguard Worker } else {
89*c0909341SAndroid Build Coastguard Worker while (row + stripe_h <= row_h) {
90*c0909341SAndroid Build Coastguard Worker const int n_lines = 4 - (row + stripe_h + 1 == h);
91*c0909341SAndroid Build Coastguard Worker for (int i = 0; i < 4; i++) {
92*c0909341SAndroid Build Coastguard Worker pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] :
93*c0909341SAndroid Build Coastguard Worker src, src_w);
94*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
95*c0909341SAndroid Build Coastguard Worker src += PXSTRIDE(src_stride);
96*c0909341SAndroid Build Coastguard Worker }
97*c0909341SAndroid Build Coastguard Worker row += stripe_h; // unmodified stripe_h for the 1st stripe
98*c0909341SAndroid Build Coastguard Worker stripe_h = 64 >> ss_ver;
99*c0909341SAndroid Build Coastguard Worker src += (stripe_h - 4) * PXSTRIDE(src_stride);
100*c0909341SAndroid Build Coastguard Worker }
101*c0909341SAndroid Build Coastguard Worker }
102*c0909341SAndroid Build Coastguard Worker }
103*c0909341SAndroid Build Coastguard Worker
bytefn(dav1d_copy_lpf)104*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_copy_lpf)(Dav1dFrameContext *const f,
105*c0909341SAndroid Build Coastguard Worker /*const*/ pixel *const src[3], const int sby)
106*c0909341SAndroid Build Coastguard Worker {
107*c0909341SAndroid Build Coastguard Worker const int have_tt = f->c->n_tc > 1;
108*c0909341SAndroid Build Coastguard Worker const int resize = f->frame_hdr->width[0] != f->frame_hdr->width[1];
109*c0909341SAndroid Build Coastguard Worker const int offset = 8 * !!sby;
110*c0909341SAndroid Build Coastguard Worker const ptrdiff_t *const src_stride = f->cur.stride;
111*c0909341SAndroid Build Coastguard Worker const ptrdiff_t *const lr_stride = f->sr_cur.p.stride;
112*c0909341SAndroid Build Coastguard Worker const int tt_off = have_tt * sby * (4 << f->seq_hdr->sb128);
113*c0909341SAndroid Build Coastguard Worker pixel *const dst[3] = {
114*c0909341SAndroid Build Coastguard Worker f->lf.lr_lpf_line[0] + tt_off * PXSTRIDE(lr_stride[0]),
115*c0909341SAndroid Build Coastguard Worker f->lf.lr_lpf_line[1] + tt_off * PXSTRIDE(lr_stride[1]),
116*c0909341SAndroid Build Coastguard Worker f->lf.lr_lpf_line[2] + tt_off * PXSTRIDE(lr_stride[1])
117*c0909341SAndroid Build Coastguard Worker };
118*c0909341SAndroid Build Coastguard Worker
119*c0909341SAndroid Build Coastguard Worker // TODO Also check block level restore type to reduce copying.
120*c0909341SAndroid Build Coastguard Worker const int restore_planes = f->lf.restore_planes;
121*c0909341SAndroid Build Coastguard Worker
122*c0909341SAndroid Build Coastguard Worker if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_Y) {
123*c0909341SAndroid Build Coastguard Worker const int h = f->cur.p.h;
124*c0909341SAndroid Build Coastguard Worker const int w = f->bw << 2;
125*c0909341SAndroid Build Coastguard Worker const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1);
126*c0909341SAndroid Build Coastguard Worker const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset;
127*c0909341SAndroid Build Coastguard Worker if (restore_planes & LR_RESTORE_Y || !resize)
128*c0909341SAndroid Build Coastguard Worker backup_lpf(f, dst[0], lr_stride[0],
129*c0909341SAndroid Build Coastguard Worker src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
130*c0909341SAndroid Build Coastguard Worker 0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 1);
131*c0909341SAndroid Build Coastguard Worker if (have_tt && resize) {
132*c0909341SAndroid Build Coastguard Worker const ptrdiff_t cdef_off_y = sby * 4 * PXSTRIDE(src_stride[0]);
133*c0909341SAndroid Build Coastguard Worker backup_lpf(f, f->lf.cdef_lpf_line[0] + cdef_off_y, src_stride[0],
134*c0909341SAndroid Build Coastguard Worker src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0],
135*c0909341SAndroid Build Coastguard Worker 0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0, 0);
136*c0909341SAndroid Build Coastguard Worker }
137*c0909341SAndroid Build Coastguard Worker }
138*c0909341SAndroid Build Coastguard Worker if ((f->seq_hdr->cdef || restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) &&
139*c0909341SAndroid Build Coastguard Worker f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400)
140*c0909341SAndroid Build Coastguard Worker {
141*c0909341SAndroid Build Coastguard Worker const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420;
142*c0909341SAndroid Build Coastguard Worker const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444;
143*c0909341SAndroid Build Coastguard Worker const int h = (f->cur.p.h + ss_ver) >> ss_ver;
144*c0909341SAndroid Build Coastguard Worker const int w = f->bw << (2 - ss_hor);
145*c0909341SAndroid Build Coastguard Worker const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1);
146*c0909341SAndroid Build Coastguard Worker const int offset_uv = offset >> ss_ver;
147*c0909341SAndroid Build Coastguard Worker const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv;
148*c0909341SAndroid Build Coastguard Worker const ptrdiff_t cdef_off_uv = sby * 4 * PXSTRIDE(src_stride[1]);
149*c0909341SAndroid Build Coastguard Worker if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_U) {
150*c0909341SAndroid Build Coastguard Worker if (restore_planes & LR_RESTORE_U || !resize)
151*c0909341SAndroid Build Coastguard Worker backup_lpf(f, dst[1], lr_stride[1],
152*c0909341SAndroid Build Coastguard Worker src[1] - offset_uv * PXSTRIDE(src_stride[1]),
153*c0909341SAndroid Build Coastguard Worker src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
154*c0909341SAndroid Build Coastguard Worker row_h, w, h, ss_hor, 1);
155*c0909341SAndroid Build Coastguard Worker if (have_tt && resize)
156*c0909341SAndroid Build Coastguard Worker backup_lpf(f, f->lf.cdef_lpf_line[1] + cdef_off_uv, src_stride[1],
157*c0909341SAndroid Build Coastguard Worker src[1] - offset_uv * PXSTRIDE(src_stride[1]),
158*c0909341SAndroid Build Coastguard Worker src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
159*c0909341SAndroid Build Coastguard Worker row_h, w, h, ss_hor, 0);
160*c0909341SAndroid Build Coastguard Worker }
161*c0909341SAndroid Build Coastguard Worker if (f->seq_hdr->cdef || restore_planes & LR_RESTORE_V) {
162*c0909341SAndroid Build Coastguard Worker if (restore_planes & LR_RESTORE_V || !resize)
163*c0909341SAndroid Build Coastguard Worker backup_lpf(f, dst[2], lr_stride[1],
164*c0909341SAndroid Build Coastguard Worker src[2] - offset_uv * PXSTRIDE(src_stride[1]),
165*c0909341SAndroid Build Coastguard Worker src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
166*c0909341SAndroid Build Coastguard Worker row_h, w, h, ss_hor, 1);
167*c0909341SAndroid Build Coastguard Worker if (have_tt && resize)
168*c0909341SAndroid Build Coastguard Worker backup_lpf(f, f->lf.cdef_lpf_line[2] + cdef_off_uv, src_stride[1],
169*c0909341SAndroid Build Coastguard Worker src[2] - offset_uv * PXSTRIDE(src_stride[1]),
170*c0909341SAndroid Build Coastguard Worker src_stride[1], ss_ver, f->seq_hdr->sb128, y_stripe,
171*c0909341SAndroid Build Coastguard Worker row_h, w, h, ss_hor, 0);
172*c0909341SAndroid Build Coastguard Worker }
173*c0909341SAndroid Build Coastguard Worker }
174*c0909341SAndroid Build Coastguard Worker }
175*c0909341SAndroid Build Coastguard Worker
filter_plane_cols_y(const Dav1dFrameContext * const f,const int have_left,const uint8_t (* lvl)[4],const ptrdiff_t b4_stride,const uint16_t (* const mask)[3][2],pixel * dst,const ptrdiff_t ls,const int w,const int starty4,const int endy4)176*c0909341SAndroid Build Coastguard Worker static inline void filter_plane_cols_y(const Dav1dFrameContext *const f,
177*c0909341SAndroid Build Coastguard Worker const int have_left,
178*c0909341SAndroid Build Coastguard Worker const uint8_t (*lvl)[4],
179*c0909341SAndroid Build Coastguard Worker const ptrdiff_t b4_stride,
180*c0909341SAndroid Build Coastguard Worker const uint16_t (*const mask)[3][2],
181*c0909341SAndroid Build Coastguard Worker pixel *dst, const ptrdiff_t ls,
182*c0909341SAndroid Build Coastguard Worker const int w,
183*c0909341SAndroid Build Coastguard Worker const int starty4, const int endy4)
184*c0909341SAndroid Build Coastguard Worker {
185*c0909341SAndroid Build Coastguard Worker const Dav1dDSPContext *const dsp = f->dsp;
186*c0909341SAndroid Build Coastguard Worker
187*c0909341SAndroid Build Coastguard Worker // filter edges between columns (e.g. block1 | block2)
188*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++) {
189*c0909341SAndroid Build Coastguard Worker if (!have_left && !x) continue;
190*c0909341SAndroid Build Coastguard Worker uint32_t hmask[4];
191*c0909341SAndroid Build Coastguard Worker if (!starty4) {
192*c0909341SAndroid Build Coastguard Worker hmask[0] = mask[x][0][0];
193*c0909341SAndroid Build Coastguard Worker hmask[1] = mask[x][1][0];
194*c0909341SAndroid Build Coastguard Worker hmask[2] = mask[x][2][0];
195*c0909341SAndroid Build Coastguard Worker if (endy4 > 16) {
196*c0909341SAndroid Build Coastguard Worker hmask[0] |= (unsigned) mask[x][0][1] << 16;
197*c0909341SAndroid Build Coastguard Worker hmask[1] |= (unsigned) mask[x][1][1] << 16;
198*c0909341SAndroid Build Coastguard Worker hmask[2] |= (unsigned) mask[x][2][1] << 16;
199*c0909341SAndroid Build Coastguard Worker }
200*c0909341SAndroid Build Coastguard Worker } else {
201*c0909341SAndroid Build Coastguard Worker hmask[0] = mask[x][0][1];
202*c0909341SAndroid Build Coastguard Worker hmask[1] = mask[x][1][1];
203*c0909341SAndroid Build Coastguard Worker hmask[2] = mask[x][2][1];
204*c0909341SAndroid Build Coastguard Worker }
205*c0909341SAndroid Build Coastguard Worker hmask[3] = 0;
206*c0909341SAndroid Build Coastguard Worker dsp->lf.loop_filter_sb[0][0](&dst[x * 4], ls, hmask,
207*c0909341SAndroid Build Coastguard Worker (const uint8_t(*)[4]) &lvl[x][0], b4_stride,
208*c0909341SAndroid Build Coastguard Worker &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
209*c0909341SAndroid Build Coastguard Worker }
210*c0909341SAndroid Build Coastguard Worker }
211*c0909341SAndroid Build Coastguard Worker
filter_plane_rows_y(const Dav1dFrameContext * const f,const int have_top,const uint8_t (* lvl)[4],const ptrdiff_t b4_stride,const uint16_t (* const mask)[3][2],pixel * dst,const ptrdiff_t ls,const int w,const int starty4,const int endy4)212*c0909341SAndroid Build Coastguard Worker static inline void filter_plane_rows_y(const Dav1dFrameContext *const f,
213*c0909341SAndroid Build Coastguard Worker const int have_top,
214*c0909341SAndroid Build Coastguard Worker const uint8_t (*lvl)[4],
215*c0909341SAndroid Build Coastguard Worker const ptrdiff_t b4_stride,
216*c0909341SAndroid Build Coastguard Worker const uint16_t (*const mask)[3][2],
217*c0909341SAndroid Build Coastguard Worker pixel *dst, const ptrdiff_t ls,
218*c0909341SAndroid Build Coastguard Worker const int w,
219*c0909341SAndroid Build Coastguard Worker const int starty4, const int endy4)
220*c0909341SAndroid Build Coastguard Worker {
221*c0909341SAndroid Build Coastguard Worker const Dav1dDSPContext *const dsp = f->dsp;
222*c0909341SAndroid Build Coastguard Worker
223*c0909341SAndroid Build Coastguard Worker // block1
224*c0909341SAndroid Build Coastguard Worker // filter edges between rows (e.g. ------)
225*c0909341SAndroid Build Coastguard Worker // block2
226*c0909341SAndroid Build Coastguard Worker for (int y = starty4; y < endy4;
227*c0909341SAndroid Build Coastguard Worker y++, dst += 4 * PXSTRIDE(ls), lvl += b4_stride)
228*c0909341SAndroid Build Coastguard Worker {
229*c0909341SAndroid Build Coastguard Worker if (!have_top && !y) continue;
230*c0909341SAndroid Build Coastguard Worker const uint32_t vmask[4] = {
231*c0909341SAndroid Build Coastguard Worker mask[y][0][0] | ((unsigned) mask[y][0][1] << 16),
232*c0909341SAndroid Build Coastguard Worker mask[y][1][0] | ((unsigned) mask[y][1][1] << 16),
233*c0909341SAndroid Build Coastguard Worker mask[y][2][0] | ((unsigned) mask[y][2][1] << 16),
234*c0909341SAndroid Build Coastguard Worker 0,
235*c0909341SAndroid Build Coastguard Worker };
236*c0909341SAndroid Build Coastguard Worker dsp->lf.loop_filter_sb[0][1](dst, ls, vmask,
237*c0909341SAndroid Build Coastguard Worker (const uint8_t(*)[4]) &lvl[0][1], b4_stride,
238*c0909341SAndroid Build Coastguard Worker &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
239*c0909341SAndroid Build Coastguard Worker }
240*c0909341SAndroid Build Coastguard Worker }
241*c0909341SAndroid Build Coastguard Worker
filter_plane_cols_uv(const Dav1dFrameContext * const f,const int have_left,const uint8_t (* lvl)[4],const ptrdiff_t b4_stride,const uint16_t (* const mask)[2][2],pixel * const u,pixel * const v,const ptrdiff_t ls,const int w,const int starty4,const int endy4,const int ss_ver)242*c0909341SAndroid Build Coastguard Worker static inline void filter_plane_cols_uv(const Dav1dFrameContext *const f,
243*c0909341SAndroid Build Coastguard Worker const int have_left,
244*c0909341SAndroid Build Coastguard Worker const uint8_t (*lvl)[4],
245*c0909341SAndroid Build Coastguard Worker const ptrdiff_t b4_stride,
246*c0909341SAndroid Build Coastguard Worker const uint16_t (*const mask)[2][2],
247*c0909341SAndroid Build Coastguard Worker pixel *const u, pixel *const v,
248*c0909341SAndroid Build Coastguard Worker const ptrdiff_t ls, const int w,
249*c0909341SAndroid Build Coastguard Worker const int starty4, const int endy4,
250*c0909341SAndroid Build Coastguard Worker const int ss_ver)
251*c0909341SAndroid Build Coastguard Worker {
252*c0909341SAndroid Build Coastguard Worker const Dav1dDSPContext *const dsp = f->dsp;
253*c0909341SAndroid Build Coastguard Worker
254*c0909341SAndroid Build Coastguard Worker // filter edges between columns (e.g. block1 | block2)
255*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++) {
256*c0909341SAndroid Build Coastguard Worker if (!have_left && !x) continue;
257*c0909341SAndroid Build Coastguard Worker uint32_t hmask[3];
258*c0909341SAndroid Build Coastguard Worker if (!starty4) {
259*c0909341SAndroid Build Coastguard Worker hmask[0] = mask[x][0][0];
260*c0909341SAndroid Build Coastguard Worker hmask[1] = mask[x][1][0];
261*c0909341SAndroid Build Coastguard Worker if (endy4 > (16 >> ss_ver)) {
262*c0909341SAndroid Build Coastguard Worker hmask[0] |= (unsigned) mask[x][0][1] << (16 >> ss_ver);
263*c0909341SAndroid Build Coastguard Worker hmask[1] |= (unsigned) mask[x][1][1] << (16 >> ss_ver);
264*c0909341SAndroid Build Coastguard Worker }
265*c0909341SAndroid Build Coastguard Worker } else {
266*c0909341SAndroid Build Coastguard Worker hmask[0] = mask[x][0][1];
267*c0909341SAndroid Build Coastguard Worker hmask[1] = mask[x][1][1];
268*c0909341SAndroid Build Coastguard Worker }
269*c0909341SAndroid Build Coastguard Worker hmask[2] = 0;
270*c0909341SAndroid Build Coastguard Worker dsp->lf.loop_filter_sb[1][0](&u[x * 4], ls, hmask,
271*c0909341SAndroid Build Coastguard Worker (const uint8_t(*)[4]) &lvl[x][2], b4_stride,
272*c0909341SAndroid Build Coastguard Worker &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
273*c0909341SAndroid Build Coastguard Worker dsp->lf.loop_filter_sb[1][0](&v[x * 4], ls, hmask,
274*c0909341SAndroid Build Coastguard Worker (const uint8_t(*)[4]) &lvl[x][3], b4_stride,
275*c0909341SAndroid Build Coastguard Worker &f->lf.lim_lut, endy4 - starty4 HIGHBD_CALL_SUFFIX);
276*c0909341SAndroid Build Coastguard Worker }
277*c0909341SAndroid Build Coastguard Worker }
278*c0909341SAndroid Build Coastguard Worker
filter_plane_rows_uv(const Dav1dFrameContext * const f,const int have_top,const uint8_t (* lvl)[4],const ptrdiff_t b4_stride,const uint16_t (* const mask)[2][2],pixel * const u,pixel * const v,const ptrdiff_t ls,const int w,const int starty4,const int endy4,const int ss_hor)279*c0909341SAndroid Build Coastguard Worker static inline void filter_plane_rows_uv(const Dav1dFrameContext *const f,
280*c0909341SAndroid Build Coastguard Worker const int have_top,
281*c0909341SAndroid Build Coastguard Worker const uint8_t (*lvl)[4],
282*c0909341SAndroid Build Coastguard Worker const ptrdiff_t b4_stride,
283*c0909341SAndroid Build Coastguard Worker const uint16_t (*const mask)[2][2],
284*c0909341SAndroid Build Coastguard Worker pixel *const u, pixel *const v,
285*c0909341SAndroid Build Coastguard Worker const ptrdiff_t ls, const int w,
286*c0909341SAndroid Build Coastguard Worker const int starty4, const int endy4,
287*c0909341SAndroid Build Coastguard Worker const int ss_hor)
288*c0909341SAndroid Build Coastguard Worker {
289*c0909341SAndroid Build Coastguard Worker const Dav1dDSPContext *const dsp = f->dsp;
290*c0909341SAndroid Build Coastguard Worker ptrdiff_t off_l = 0;
291*c0909341SAndroid Build Coastguard Worker
292*c0909341SAndroid Build Coastguard Worker // block1
293*c0909341SAndroid Build Coastguard Worker // filter edges between rows (e.g. ------)
294*c0909341SAndroid Build Coastguard Worker // block2
295*c0909341SAndroid Build Coastguard Worker for (int y = starty4; y < endy4;
296*c0909341SAndroid Build Coastguard Worker y++, off_l += 4 * PXSTRIDE(ls), lvl += b4_stride)
297*c0909341SAndroid Build Coastguard Worker {
298*c0909341SAndroid Build Coastguard Worker if (!have_top && !y) continue;
299*c0909341SAndroid Build Coastguard Worker const uint32_t vmask[3] = {
300*c0909341SAndroid Build Coastguard Worker mask[y][0][0] | ((unsigned) mask[y][0][1] << (16 >> ss_hor)),
301*c0909341SAndroid Build Coastguard Worker mask[y][1][0] | ((unsigned) mask[y][1][1] << (16 >> ss_hor)),
302*c0909341SAndroid Build Coastguard Worker 0,
303*c0909341SAndroid Build Coastguard Worker };
304*c0909341SAndroid Build Coastguard Worker dsp->lf.loop_filter_sb[1][1](&u[off_l], ls, vmask,
305*c0909341SAndroid Build Coastguard Worker (const uint8_t(*)[4]) &lvl[0][2], b4_stride,
306*c0909341SAndroid Build Coastguard Worker &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
307*c0909341SAndroid Build Coastguard Worker dsp->lf.loop_filter_sb[1][1](&v[off_l], ls, vmask,
308*c0909341SAndroid Build Coastguard Worker (const uint8_t(*)[4]) &lvl[0][3], b4_stride,
309*c0909341SAndroid Build Coastguard Worker &f->lf.lim_lut, w HIGHBD_CALL_SUFFIX);
310*c0909341SAndroid Build Coastguard Worker }
311*c0909341SAndroid Build Coastguard Worker }
312*c0909341SAndroid Build Coastguard Worker
bytefn(dav1d_loopfilter_sbrow_cols)313*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_loopfilter_sbrow_cols)(const Dav1dFrameContext *const f,
314*c0909341SAndroid Build Coastguard Worker pixel *const p[3], Av1Filter *const lflvl,
315*c0909341SAndroid Build Coastguard Worker int sby, const int start_of_tile_row)
316*c0909341SAndroid Build Coastguard Worker {
317*c0909341SAndroid Build Coastguard Worker int x, have_left;
318*c0909341SAndroid Build Coastguard Worker // Don't filter outside the frame
319*c0909341SAndroid Build Coastguard Worker const int is_sb64 = !f->seq_hdr->sb128;
320*c0909341SAndroid Build Coastguard Worker const int starty4 = (sby & is_sb64) << 4;
321*c0909341SAndroid Build Coastguard Worker const int sbsz = 32 >> is_sb64;
322*c0909341SAndroid Build Coastguard Worker const int sbl2 = 5 - is_sb64;
323*c0909341SAndroid Build Coastguard Worker const int halign = (f->bh + 31) & ~31;
324*c0909341SAndroid Build Coastguard Worker const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
325*c0909341SAndroid Build Coastguard Worker const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
326*c0909341SAndroid Build Coastguard Worker const int vmask = 16 >> ss_ver, hmask = 16 >> ss_hor;
327*c0909341SAndroid Build Coastguard Worker const unsigned vmax = 1U << vmask, hmax = 1U << hmask;
328*c0909341SAndroid Build Coastguard Worker const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
329*c0909341SAndroid Build Coastguard Worker const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
330*c0909341SAndroid Build Coastguard Worker
331*c0909341SAndroid Build Coastguard Worker // fix lpf strength at tile col boundaries
332*c0909341SAndroid Build Coastguard Worker const uint8_t *lpf_y = &f->lf.tx_lpf_right_edge[0][sby << sbl2];
333*c0909341SAndroid Build Coastguard Worker const uint8_t *lpf_uv = &f->lf.tx_lpf_right_edge[1][sby << (sbl2 - ss_ver)];
334*c0909341SAndroid Build Coastguard Worker for (int tile_col = 1;; tile_col++) {
335*c0909341SAndroid Build Coastguard Worker x = f->frame_hdr->tiling.col_start_sb[tile_col];
336*c0909341SAndroid Build Coastguard Worker if ((x << sbl2) >= f->bw) break;
337*c0909341SAndroid Build Coastguard Worker const int bx4 = x & is_sb64 ? 16 : 0, cbx4 = bx4 >> ss_hor;
338*c0909341SAndroid Build Coastguard Worker x >>= is_sb64;
339*c0909341SAndroid Build Coastguard Worker
340*c0909341SAndroid Build Coastguard Worker uint16_t (*const y_hmask)[2] = lflvl[x].filter_y[0][bx4];
341*c0909341SAndroid Build Coastguard Worker for (unsigned y = starty4, mask = 1 << y; y < endy4; y++, mask <<= 1) {
342*c0909341SAndroid Build Coastguard Worker const int sidx = mask >= 0x10000U;
343*c0909341SAndroid Build Coastguard Worker const unsigned smask = mask >> (sidx << 4);
344*c0909341SAndroid Build Coastguard Worker const int idx = 2 * !!(y_hmask[2][sidx] & smask) +
345*c0909341SAndroid Build Coastguard Worker !!(y_hmask[1][sidx] & smask);
346*c0909341SAndroid Build Coastguard Worker y_hmask[2][sidx] &= ~smask;
347*c0909341SAndroid Build Coastguard Worker y_hmask[1][sidx] &= ~smask;
348*c0909341SAndroid Build Coastguard Worker y_hmask[0][sidx] &= ~smask;
349*c0909341SAndroid Build Coastguard Worker y_hmask[imin(idx, lpf_y[y - starty4])][sidx] |= smask;
350*c0909341SAndroid Build Coastguard Worker }
351*c0909341SAndroid Build Coastguard Worker
352*c0909341SAndroid Build Coastguard Worker if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
353*c0909341SAndroid Build Coastguard Worker uint16_t (*const uv_hmask)[2] = lflvl[x].filter_uv[0][cbx4];
354*c0909341SAndroid Build Coastguard Worker for (unsigned y = starty4 >> ss_ver, uv_mask = 1 << y; y < uv_endy4;
355*c0909341SAndroid Build Coastguard Worker y++, uv_mask <<= 1)
356*c0909341SAndroid Build Coastguard Worker {
357*c0909341SAndroid Build Coastguard Worker const int sidx = uv_mask >= vmax;
358*c0909341SAndroid Build Coastguard Worker const unsigned smask = uv_mask >> (sidx << (4 - ss_ver));
359*c0909341SAndroid Build Coastguard Worker const int idx = !!(uv_hmask[1][sidx] & smask);
360*c0909341SAndroid Build Coastguard Worker uv_hmask[1][sidx] &= ~smask;
361*c0909341SAndroid Build Coastguard Worker uv_hmask[0][sidx] &= ~smask;
362*c0909341SAndroid Build Coastguard Worker uv_hmask[imin(idx, lpf_uv[y - (starty4 >> ss_ver)])][sidx] |= smask;
363*c0909341SAndroid Build Coastguard Worker }
364*c0909341SAndroid Build Coastguard Worker }
365*c0909341SAndroid Build Coastguard Worker lpf_y += halign;
366*c0909341SAndroid Build Coastguard Worker lpf_uv += halign >> ss_ver;
367*c0909341SAndroid Build Coastguard Worker }
368*c0909341SAndroid Build Coastguard Worker
369*c0909341SAndroid Build Coastguard Worker // fix lpf strength at tile row boundaries
370*c0909341SAndroid Build Coastguard Worker if (start_of_tile_row) {
371*c0909341SAndroid Build Coastguard Worker const BlockContext *a;
372*c0909341SAndroid Build Coastguard Worker for (x = 0, a = &f->a[f->sb128w * (start_of_tile_row - 1)];
373*c0909341SAndroid Build Coastguard Worker x < f->sb128w; x++, a++)
374*c0909341SAndroid Build Coastguard Worker {
375*c0909341SAndroid Build Coastguard Worker uint16_t (*const y_vmask)[2] = lflvl[x].filter_y[1][starty4];
376*c0909341SAndroid Build Coastguard Worker const unsigned w = imin(32, f->w4 - (x << 5));
377*c0909341SAndroid Build Coastguard Worker for (unsigned mask = 1, i = 0; i < w; mask <<= 1, i++) {
378*c0909341SAndroid Build Coastguard Worker const int sidx = mask >= 0x10000U;
379*c0909341SAndroid Build Coastguard Worker const unsigned smask = mask >> (sidx << 4);
380*c0909341SAndroid Build Coastguard Worker const int idx = 2 * !!(y_vmask[2][sidx] & smask) +
381*c0909341SAndroid Build Coastguard Worker !!(y_vmask[1][sidx] & smask);
382*c0909341SAndroid Build Coastguard Worker y_vmask[2][sidx] &= ~smask;
383*c0909341SAndroid Build Coastguard Worker y_vmask[1][sidx] &= ~smask;
384*c0909341SAndroid Build Coastguard Worker y_vmask[0][sidx] &= ~smask;
385*c0909341SAndroid Build Coastguard Worker y_vmask[imin(idx, a->tx_lpf_y[i])][sidx] |= smask;
386*c0909341SAndroid Build Coastguard Worker }
387*c0909341SAndroid Build Coastguard Worker
388*c0909341SAndroid Build Coastguard Worker if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
389*c0909341SAndroid Build Coastguard Worker const unsigned cw = (w + ss_hor) >> ss_hor;
390*c0909341SAndroid Build Coastguard Worker uint16_t (*const uv_vmask)[2] = lflvl[x].filter_uv[1][starty4 >> ss_ver];
391*c0909341SAndroid Build Coastguard Worker for (unsigned uv_mask = 1, i = 0; i < cw; uv_mask <<= 1, i++) {
392*c0909341SAndroid Build Coastguard Worker const int sidx = uv_mask >= hmax;
393*c0909341SAndroid Build Coastguard Worker const unsigned smask = uv_mask >> (sidx << (4 - ss_hor));
394*c0909341SAndroid Build Coastguard Worker const int idx = !!(uv_vmask[1][sidx] & smask);
395*c0909341SAndroid Build Coastguard Worker uv_vmask[1][sidx] &= ~smask;
396*c0909341SAndroid Build Coastguard Worker uv_vmask[0][sidx] &= ~smask;
397*c0909341SAndroid Build Coastguard Worker uv_vmask[imin(idx, a->tx_lpf_uv[i])][sidx] |= smask;
398*c0909341SAndroid Build Coastguard Worker }
399*c0909341SAndroid Build Coastguard Worker }
400*c0909341SAndroid Build Coastguard Worker }
401*c0909341SAndroid Build Coastguard Worker }
402*c0909341SAndroid Build Coastguard Worker
403*c0909341SAndroid Build Coastguard Worker pixel *ptr;
404*c0909341SAndroid Build Coastguard Worker uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
405*c0909341SAndroid Build Coastguard Worker for (ptr = p[0], have_left = 0, x = 0; x < f->sb128w;
406*c0909341SAndroid Build Coastguard Worker x++, have_left = 1, ptr += 128, level_ptr += 32)
407*c0909341SAndroid Build Coastguard Worker {
408*c0909341SAndroid Build Coastguard Worker filter_plane_cols_y(f, have_left, level_ptr, f->b4_stride,
409*c0909341SAndroid Build Coastguard Worker lflvl[x].filter_y[0], ptr, f->cur.stride[0],
410*c0909341SAndroid Build Coastguard Worker imin(32, f->w4 - x * 32), starty4, endy4);
411*c0909341SAndroid Build Coastguard Worker }
412*c0909341SAndroid Build Coastguard Worker
413*c0909341SAndroid Build Coastguard Worker if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
414*c0909341SAndroid Build Coastguard Worker return;
415*c0909341SAndroid Build Coastguard Worker
416*c0909341SAndroid Build Coastguard Worker ptrdiff_t uv_off;
417*c0909341SAndroid Build Coastguard Worker level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
418*c0909341SAndroid Build Coastguard Worker for (uv_off = 0, have_left = 0, x = 0; x < f->sb128w;
419*c0909341SAndroid Build Coastguard Worker x++, have_left = 1, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
420*c0909341SAndroid Build Coastguard Worker {
421*c0909341SAndroid Build Coastguard Worker filter_plane_cols_uv(f, have_left, level_ptr, f->b4_stride,
422*c0909341SAndroid Build Coastguard Worker lflvl[x].filter_uv[0],
423*c0909341SAndroid Build Coastguard Worker &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
424*c0909341SAndroid Build Coastguard Worker (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
425*c0909341SAndroid Build Coastguard Worker starty4 >> ss_ver, uv_endy4, ss_ver);
426*c0909341SAndroid Build Coastguard Worker }
427*c0909341SAndroid Build Coastguard Worker }
428*c0909341SAndroid Build Coastguard Worker
bytefn(dav1d_loopfilter_sbrow_rows)429*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_loopfilter_sbrow_rows)(const Dav1dFrameContext *const f,
430*c0909341SAndroid Build Coastguard Worker pixel *const p[3], Av1Filter *const lflvl,
431*c0909341SAndroid Build Coastguard Worker int sby)
432*c0909341SAndroid Build Coastguard Worker {
433*c0909341SAndroid Build Coastguard Worker int x;
434*c0909341SAndroid Build Coastguard Worker // Don't filter outside the frame
435*c0909341SAndroid Build Coastguard Worker const int have_top = sby > 0;
436*c0909341SAndroid Build Coastguard Worker const int is_sb64 = !f->seq_hdr->sb128;
437*c0909341SAndroid Build Coastguard Worker const int starty4 = (sby & is_sb64) << 4;
438*c0909341SAndroid Build Coastguard Worker const int sbsz = 32 >> is_sb64;
439*c0909341SAndroid Build Coastguard Worker const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
440*c0909341SAndroid Build Coastguard Worker const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
441*c0909341SAndroid Build Coastguard Worker const unsigned endy4 = starty4 + imin(f->h4 - sby * sbsz, sbsz);
442*c0909341SAndroid Build Coastguard Worker const unsigned uv_endy4 = (endy4 + ss_ver) >> ss_ver;
443*c0909341SAndroid Build Coastguard Worker
444*c0909341SAndroid Build Coastguard Worker pixel *ptr;
445*c0909341SAndroid Build Coastguard Worker uint8_t (*level_ptr)[4] = f->lf.level + f->b4_stride * sby * sbsz;
446*c0909341SAndroid Build Coastguard Worker for (ptr = p[0], x = 0; x < f->sb128w; x++, ptr += 128, level_ptr += 32) {
447*c0909341SAndroid Build Coastguard Worker filter_plane_rows_y(f, have_top, level_ptr, f->b4_stride,
448*c0909341SAndroid Build Coastguard Worker lflvl[x].filter_y[1], ptr, f->cur.stride[0],
449*c0909341SAndroid Build Coastguard Worker imin(32, f->w4 - x * 32), starty4, endy4);
450*c0909341SAndroid Build Coastguard Worker }
451*c0909341SAndroid Build Coastguard Worker
452*c0909341SAndroid Build Coastguard Worker if (!f->frame_hdr->loopfilter.level_u && !f->frame_hdr->loopfilter.level_v)
453*c0909341SAndroid Build Coastguard Worker return;
454*c0909341SAndroid Build Coastguard Worker
455*c0909341SAndroid Build Coastguard Worker ptrdiff_t uv_off;
456*c0909341SAndroid Build Coastguard Worker level_ptr = f->lf.level + f->b4_stride * (sby * sbsz >> ss_ver);
457*c0909341SAndroid Build Coastguard Worker for (uv_off = 0, x = 0; x < f->sb128w;
458*c0909341SAndroid Build Coastguard Worker x++, uv_off += 128 >> ss_hor, level_ptr += 32 >> ss_hor)
459*c0909341SAndroid Build Coastguard Worker {
460*c0909341SAndroid Build Coastguard Worker filter_plane_rows_uv(f, have_top, level_ptr, f->b4_stride,
461*c0909341SAndroid Build Coastguard Worker lflvl[x].filter_uv[1],
462*c0909341SAndroid Build Coastguard Worker &p[1][uv_off], &p[2][uv_off], f->cur.stride[1],
463*c0909341SAndroid Build Coastguard Worker (imin(32, f->w4 - x * 32) + ss_hor) >> ss_hor,
464*c0909341SAndroid Build Coastguard Worker starty4 >> ss_ver, uv_endy4, ss_hor);
465*c0909341SAndroid Build Coastguard Worker }
466*c0909341SAndroid Build Coastguard Worker }
467