1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Two Orioles, LLC
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker */
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker #include "config.h"
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Worker #include <stdlib.h>
31*c0909341SAndroid Build Coastguard Worker #include <string.h>
32*c0909341SAndroid Build Coastguard Worker
33*c0909341SAndroid Build Coastguard Worker #include "common/attributes.h"
34*c0909341SAndroid Build Coastguard Worker #include "common/intops.h"
35*c0909341SAndroid Build Coastguard Worker
36*c0909341SAndroid Build Coastguard Worker #include "src/mc.h"
37*c0909341SAndroid Build Coastguard Worker #include "src/tables.h"
38*c0909341SAndroid Build Coastguard Worker
39*c0909341SAndroid Build Coastguard Worker #if BITDEPTH == 8
40*c0909341SAndroid Build Coastguard Worker #define get_intermediate_bits(bitdepth_max) 4
41*c0909341SAndroid Build Coastguard Worker // Output in interval [-5132, 9212], fits in int16_t as is
42*c0909341SAndroid Build Coastguard Worker #define PREP_BIAS 0
43*c0909341SAndroid Build Coastguard Worker #else
44*c0909341SAndroid Build Coastguard Worker // 4 for 10 bits/component, 2 for 12 bits/component
45*c0909341SAndroid Build Coastguard Worker #define get_intermediate_bits(bitdepth_max) (14 - bitdepth_from_max(bitdepth_max))
46*c0909341SAndroid Build Coastguard Worker // Output in interval [-20588, 36956] (10-bit), [-20602, 36983] (12-bit)
47*c0909341SAndroid Build Coastguard Worker // Subtract a bias to ensure the output fits in int16_t
48*c0909341SAndroid Build Coastguard Worker #define PREP_BIAS 8192
49*c0909341SAndroid Build Coastguard Worker #endif
50*c0909341SAndroid Build Coastguard Worker
51*c0909341SAndroid Build Coastguard Worker static NOINLINE void
put_c(pixel * dst,const ptrdiff_t dst_stride,const pixel * src,const ptrdiff_t src_stride,const int w,int h)52*c0909341SAndroid Build Coastguard Worker put_c(pixel *dst, const ptrdiff_t dst_stride,
53*c0909341SAndroid Build Coastguard Worker const pixel *src, const ptrdiff_t src_stride, const int w, int h)
54*c0909341SAndroid Build Coastguard Worker {
55*c0909341SAndroid Build Coastguard Worker do {
56*c0909341SAndroid Build Coastguard Worker pixel_copy(dst, src, w);
57*c0909341SAndroid Build Coastguard Worker
58*c0909341SAndroid Build Coastguard Worker dst += dst_stride;
59*c0909341SAndroid Build Coastguard Worker src += src_stride;
60*c0909341SAndroid Build Coastguard Worker } while (--h);
61*c0909341SAndroid Build Coastguard Worker }
62*c0909341SAndroid Build Coastguard Worker
63*c0909341SAndroid Build Coastguard Worker static NOINLINE void
prep_c(int16_t * tmp,const pixel * src,const ptrdiff_t src_stride,const int w,int h HIGHBD_DECL_SUFFIX)64*c0909341SAndroid Build Coastguard Worker prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride,
65*c0909341SAndroid Build Coastguard Worker const int w, int h HIGHBD_DECL_SUFFIX)
66*c0909341SAndroid Build Coastguard Worker {
67*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
68*c0909341SAndroid Build Coastguard Worker do {
69*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
70*c0909341SAndroid Build Coastguard Worker tmp[x] = (src[x] << intermediate_bits) - PREP_BIAS;
71*c0909341SAndroid Build Coastguard Worker
72*c0909341SAndroid Build Coastguard Worker tmp += w;
73*c0909341SAndroid Build Coastguard Worker src += src_stride;
74*c0909341SAndroid Build Coastguard Worker } while (--h);
75*c0909341SAndroid Build Coastguard Worker }
76*c0909341SAndroid Build Coastguard Worker
77*c0909341SAndroid Build Coastguard Worker #define FILTER_8TAP(src, x, F, stride) \
78*c0909341SAndroid Build Coastguard Worker (F[0] * src[x + -3 * stride] + \
79*c0909341SAndroid Build Coastguard Worker F[1] * src[x + -2 * stride] + \
80*c0909341SAndroid Build Coastguard Worker F[2] * src[x + -1 * stride] + \
81*c0909341SAndroid Build Coastguard Worker F[3] * src[x + +0 * stride] + \
82*c0909341SAndroid Build Coastguard Worker F[4] * src[x + +1 * stride] + \
83*c0909341SAndroid Build Coastguard Worker F[5] * src[x + +2 * stride] + \
84*c0909341SAndroid Build Coastguard Worker F[6] * src[x + +3 * stride] + \
85*c0909341SAndroid Build Coastguard Worker F[7] * src[x + +4 * stride])
86*c0909341SAndroid Build Coastguard Worker
87*c0909341SAndroid Build Coastguard Worker #define DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh) \
88*c0909341SAndroid Build Coastguard Worker ((FILTER_8TAP(src, x, F, stride) + ((1 << (sh)) >> 1)) >> (sh))
89*c0909341SAndroid Build Coastguard Worker
90*c0909341SAndroid Build Coastguard Worker #define DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh) \
91*c0909341SAndroid Build Coastguard Worker ((FILTER_8TAP(src, x, F, stride) + (rnd)) >> (sh))
92*c0909341SAndroid Build Coastguard Worker
93*c0909341SAndroid Build Coastguard Worker #define DAV1D_FILTER_8TAP_CLIP(src, x, F, stride, sh) \
94*c0909341SAndroid Build Coastguard Worker iclip_pixel(DAV1D_FILTER_8TAP_RND(src, x, F, stride, sh))
95*c0909341SAndroid Build Coastguard Worker
96*c0909341SAndroid Build Coastguard Worker #define DAV1D_FILTER_8TAP_CLIP2(src, x, F, stride, rnd, sh) \
97*c0909341SAndroid Build Coastguard Worker iclip_pixel(DAV1D_FILTER_8TAP_RND2(src, x, F, stride, rnd, sh))
98*c0909341SAndroid Build Coastguard Worker
99*c0909341SAndroid Build Coastguard Worker #define GET_H_FILTER(mx) \
100*c0909341SAndroid Build Coastguard Worker const int8_t *const fh = !(mx) ? NULL : w > 4 ? \
101*c0909341SAndroid Build Coastguard Worker dav1d_mc_subpel_filters[filter_type & 3][(mx) - 1] : \
102*c0909341SAndroid Build Coastguard Worker dav1d_mc_subpel_filters[3 + (filter_type & 1)][(mx) - 1]
103*c0909341SAndroid Build Coastguard Worker
104*c0909341SAndroid Build Coastguard Worker #define GET_V_FILTER(my) \
105*c0909341SAndroid Build Coastguard Worker const int8_t *const fv = !(my) ? NULL : h > 4 ? \
106*c0909341SAndroid Build Coastguard Worker dav1d_mc_subpel_filters[filter_type >> 2][(my) - 1] : \
107*c0909341SAndroid Build Coastguard Worker dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][(my) - 1]
108*c0909341SAndroid Build Coastguard Worker
109*c0909341SAndroid Build Coastguard Worker #define GET_FILTERS() \
110*c0909341SAndroid Build Coastguard Worker GET_H_FILTER(mx); \
111*c0909341SAndroid Build Coastguard Worker GET_V_FILTER(my)
112*c0909341SAndroid Build Coastguard Worker
113*c0909341SAndroid Build Coastguard Worker static NOINLINE void
put_8tap_c(pixel * dst,ptrdiff_t dst_stride,const pixel * src,ptrdiff_t src_stride,const int w,int h,const int mx,const int my,const int filter_type HIGHBD_DECL_SUFFIX)114*c0909341SAndroid Build Coastguard Worker put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
115*c0909341SAndroid Build Coastguard Worker const pixel *src, ptrdiff_t src_stride,
116*c0909341SAndroid Build Coastguard Worker const int w, int h, const int mx, const int my,
117*c0909341SAndroid Build Coastguard Worker const int filter_type HIGHBD_DECL_SUFFIX)
118*c0909341SAndroid Build Coastguard Worker {
119*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
120*c0909341SAndroid Build Coastguard Worker const int intermediate_rnd = 32 + ((1 << (6 - intermediate_bits)) >> 1);
121*c0909341SAndroid Build Coastguard Worker
122*c0909341SAndroid Build Coastguard Worker GET_FILTERS();
123*c0909341SAndroid Build Coastguard Worker dst_stride = PXSTRIDE(dst_stride);
124*c0909341SAndroid Build Coastguard Worker src_stride = PXSTRIDE(src_stride);
125*c0909341SAndroid Build Coastguard Worker
126*c0909341SAndroid Build Coastguard Worker if (fh) {
127*c0909341SAndroid Build Coastguard Worker if (fv) {
128*c0909341SAndroid Build Coastguard Worker int tmp_h = h + 7;
129*c0909341SAndroid Build Coastguard Worker int16_t mid[128 * 135], *mid_ptr = mid;
130*c0909341SAndroid Build Coastguard Worker
131*c0909341SAndroid Build Coastguard Worker src -= src_stride * 3;
132*c0909341SAndroid Build Coastguard Worker do {
133*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
134*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1,
135*c0909341SAndroid Build Coastguard Worker 6 - intermediate_bits);
136*c0909341SAndroid Build Coastguard Worker
137*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
138*c0909341SAndroid Build Coastguard Worker src += src_stride;
139*c0909341SAndroid Build Coastguard Worker } while (--tmp_h);
140*c0909341SAndroid Build Coastguard Worker
141*c0909341SAndroid Build Coastguard Worker mid_ptr = mid + 128 * 3;
142*c0909341SAndroid Build Coastguard Worker do {
143*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
144*c0909341SAndroid Build Coastguard Worker dst[x] = DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128,
145*c0909341SAndroid Build Coastguard Worker 6 + intermediate_bits);
146*c0909341SAndroid Build Coastguard Worker
147*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
148*c0909341SAndroid Build Coastguard Worker dst += dst_stride;
149*c0909341SAndroid Build Coastguard Worker } while (--h);
150*c0909341SAndroid Build Coastguard Worker } else {
151*c0909341SAndroid Build Coastguard Worker do {
152*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++) {
153*c0909341SAndroid Build Coastguard Worker dst[x] = DAV1D_FILTER_8TAP_CLIP2(src, x, fh, 1,
154*c0909341SAndroid Build Coastguard Worker intermediate_rnd, 6);
155*c0909341SAndroid Build Coastguard Worker }
156*c0909341SAndroid Build Coastguard Worker
157*c0909341SAndroid Build Coastguard Worker dst += dst_stride;
158*c0909341SAndroid Build Coastguard Worker src += src_stride;
159*c0909341SAndroid Build Coastguard Worker } while (--h);
160*c0909341SAndroid Build Coastguard Worker }
161*c0909341SAndroid Build Coastguard Worker } else if (fv) {
162*c0909341SAndroid Build Coastguard Worker do {
163*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
164*c0909341SAndroid Build Coastguard Worker dst[x] = DAV1D_FILTER_8TAP_CLIP(src, x, fv, src_stride, 6);
165*c0909341SAndroid Build Coastguard Worker
166*c0909341SAndroid Build Coastguard Worker dst += dst_stride;
167*c0909341SAndroid Build Coastguard Worker src += src_stride;
168*c0909341SAndroid Build Coastguard Worker } while (--h);
169*c0909341SAndroid Build Coastguard Worker } else
170*c0909341SAndroid Build Coastguard Worker put_c(dst, dst_stride, src, src_stride, w, h);
171*c0909341SAndroid Build Coastguard Worker }
172*c0909341SAndroid Build Coastguard Worker
173*c0909341SAndroid Build Coastguard Worker static NOINLINE void
put_8tap_scaled_c(pixel * dst,const ptrdiff_t dst_stride,const pixel * src,ptrdiff_t src_stride,const int w,int h,const int mx,int my,const int dx,const int dy,const int filter_type HIGHBD_DECL_SUFFIX)174*c0909341SAndroid Build Coastguard Worker put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
175*c0909341SAndroid Build Coastguard Worker const pixel *src, ptrdiff_t src_stride,
176*c0909341SAndroid Build Coastguard Worker const int w, int h, const int mx, int my,
177*c0909341SAndroid Build Coastguard Worker const int dx, const int dy, const int filter_type
178*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
179*c0909341SAndroid Build Coastguard Worker {
180*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
181*c0909341SAndroid Build Coastguard Worker const int intermediate_rnd = (1 << intermediate_bits) >> 1;
182*c0909341SAndroid Build Coastguard Worker int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
183*c0909341SAndroid Build Coastguard Worker int16_t mid[128 * (256 + 7)], *mid_ptr = mid;
184*c0909341SAndroid Build Coastguard Worker src_stride = PXSTRIDE(src_stride);
185*c0909341SAndroid Build Coastguard Worker
186*c0909341SAndroid Build Coastguard Worker src -= src_stride * 3;
187*c0909341SAndroid Build Coastguard Worker do {
188*c0909341SAndroid Build Coastguard Worker int x;
189*c0909341SAndroid Build Coastguard Worker int imx = mx, ioff = 0;
190*c0909341SAndroid Build Coastguard Worker
191*c0909341SAndroid Build Coastguard Worker for (x = 0; x < w; x++) {
192*c0909341SAndroid Build Coastguard Worker GET_H_FILTER(imx >> 6);
193*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1,
194*c0909341SAndroid Build Coastguard Worker 6 - intermediate_bits) :
195*c0909341SAndroid Build Coastguard Worker src[ioff] << intermediate_bits;
196*c0909341SAndroid Build Coastguard Worker imx += dx;
197*c0909341SAndroid Build Coastguard Worker ioff += imx >> 10;
198*c0909341SAndroid Build Coastguard Worker imx &= 0x3ff;
199*c0909341SAndroid Build Coastguard Worker }
200*c0909341SAndroid Build Coastguard Worker
201*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
202*c0909341SAndroid Build Coastguard Worker src += src_stride;
203*c0909341SAndroid Build Coastguard Worker } while (--tmp_h);
204*c0909341SAndroid Build Coastguard Worker
205*c0909341SAndroid Build Coastguard Worker mid_ptr = mid + 128 * 3;
206*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < h; y++) {
207*c0909341SAndroid Build Coastguard Worker int x;
208*c0909341SAndroid Build Coastguard Worker GET_V_FILTER(my >> 6);
209*c0909341SAndroid Build Coastguard Worker
210*c0909341SAndroid Build Coastguard Worker for (x = 0; x < w; x++)
211*c0909341SAndroid Build Coastguard Worker dst[x] = fv ? DAV1D_FILTER_8TAP_CLIP(mid_ptr, x, fv, 128,
212*c0909341SAndroid Build Coastguard Worker 6 + intermediate_bits) :
213*c0909341SAndroid Build Coastguard Worker iclip_pixel((mid_ptr[x] + intermediate_rnd) >>
214*c0909341SAndroid Build Coastguard Worker intermediate_bits);
215*c0909341SAndroid Build Coastguard Worker
216*c0909341SAndroid Build Coastguard Worker my += dy;
217*c0909341SAndroid Build Coastguard Worker mid_ptr += (my >> 10) * 128;
218*c0909341SAndroid Build Coastguard Worker my &= 0x3ff;
219*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
220*c0909341SAndroid Build Coastguard Worker }
221*c0909341SAndroid Build Coastguard Worker }
222*c0909341SAndroid Build Coastguard Worker
223*c0909341SAndroid Build Coastguard Worker static NOINLINE void
prep_8tap_c(int16_t * tmp,const pixel * src,ptrdiff_t src_stride,const int w,int h,const int mx,const int my,const int filter_type HIGHBD_DECL_SUFFIX)224*c0909341SAndroid Build Coastguard Worker prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
225*c0909341SAndroid Build Coastguard Worker const int w, int h, const int mx, const int my,
226*c0909341SAndroid Build Coastguard Worker const int filter_type HIGHBD_DECL_SUFFIX)
227*c0909341SAndroid Build Coastguard Worker {
228*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
229*c0909341SAndroid Build Coastguard Worker GET_FILTERS();
230*c0909341SAndroid Build Coastguard Worker src_stride = PXSTRIDE(src_stride);
231*c0909341SAndroid Build Coastguard Worker
232*c0909341SAndroid Build Coastguard Worker if (fh) {
233*c0909341SAndroid Build Coastguard Worker if (fv) {
234*c0909341SAndroid Build Coastguard Worker int tmp_h = h + 7;
235*c0909341SAndroid Build Coastguard Worker int16_t mid[128 * 135], *mid_ptr = mid;
236*c0909341SAndroid Build Coastguard Worker
237*c0909341SAndroid Build Coastguard Worker src -= src_stride * 3;
238*c0909341SAndroid Build Coastguard Worker do {
239*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
240*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1,
241*c0909341SAndroid Build Coastguard Worker 6 - intermediate_bits);
242*c0909341SAndroid Build Coastguard Worker
243*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
244*c0909341SAndroid Build Coastguard Worker src += src_stride;
245*c0909341SAndroid Build Coastguard Worker } while (--tmp_h);
246*c0909341SAndroid Build Coastguard Worker
247*c0909341SAndroid Build Coastguard Worker mid_ptr = mid + 128 * 3;
248*c0909341SAndroid Build Coastguard Worker do {
249*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++) {
250*c0909341SAndroid Build Coastguard Worker int t = DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6) -
251*c0909341SAndroid Build Coastguard Worker PREP_BIAS;
252*c0909341SAndroid Build Coastguard Worker assert(t >= INT16_MIN && t <= INT16_MAX);
253*c0909341SAndroid Build Coastguard Worker tmp[x] = t;
254*c0909341SAndroid Build Coastguard Worker }
255*c0909341SAndroid Build Coastguard Worker
256*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
257*c0909341SAndroid Build Coastguard Worker tmp += w;
258*c0909341SAndroid Build Coastguard Worker } while (--h);
259*c0909341SAndroid Build Coastguard Worker } else {
260*c0909341SAndroid Build Coastguard Worker do {
261*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
262*c0909341SAndroid Build Coastguard Worker tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fh, 1,
263*c0909341SAndroid Build Coastguard Worker 6 - intermediate_bits) -
264*c0909341SAndroid Build Coastguard Worker PREP_BIAS;
265*c0909341SAndroid Build Coastguard Worker
266*c0909341SAndroid Build Coastguard Worker tmp += w;
267*c0909341SAndroid Build Coastguard Worker src += src_stride;
268*c0909341SAndroid Build Coastguard Worker } while (--h);
269*c0909341SAndroid Build Coastguard Worker }
270*c0909341SAndroid Build Coastguard Worker } else if (fv) {
271*c0909341SAndroid Build Coastguard Worker do {
272*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
273*c0909341SAndroid Build Coastguard Worker tmp[x] = DAV1D_FILTER_8TAP_RND(src, x, fv, src_stride,
274*c0909341SAndroid Build Coastguard Worker 6 - intermediate_bits) -
275*c0909341SAndroid Build Coastguard Worker PREP_BIAS;
276*c0909341SAndroid Build Coastguard Worker
277*c0909341SAndroid Build Coastguard Worker tmp += w;
278*c0909341SAndroid Build Coastguard Worker src += src_stride;
279*c0909341SAndroid Build Coastguard Worker } while (--h);
280*c0909341SAndroid Build Coastguard Worker } else
281*c0909341SAndroid Build Coastguard Worker prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX);
282*c0909341SAndroid Build Coastguard Worker }
283*c0909341SAndroid Build Coastguard Worker
284*c0909341SAndroid Build Coastguard Worker static NOINLINE void
prep_8tap_scaled_c(int16_t * tmp,const pixel * src,ptrdiff_t src_stride,const int w,int h,const int mx,int my,const int dx,const int dy,const int filter_type HIGHBD_DECL_SUFFIX)285*c0909341SAndroid Build Coastguard Worker prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,
286*c0909341SAndroid Build Coastguard Worker const int w, int h, const int mx, int my,
287*c0909341SAndroid Build Coastguard Worker const int dx, const int dy, const int filter_type
288*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
289*c0909341SAndroid Build Coastguard Worker {
290*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
291*c0909341SAndroid Build Coastguard Worker int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
292*c0909341SAndroid Build Coastguard Worker int16_t mid[128 * (256 + 7)], *mid_ptr = mid;
293*c0909341SAndroid Build Coastguard Worker src_stride = PXSTRIDE(src_stride);
294*c0909341SAndroid Build Coastguard Worker
295*c0909341SAndroid Build Coastguard Worker src -= src_stride * 3;
296*c0909341SAndroid Build Coastguard Worker do {
297*c0909341SAndroid Build Coastguard Worker int x;
298*c0909341SAndroid Build Coastguard Worker int imx = mx, ioff = 0;
299*c0909341SAndroid Build Coastguard Worker
300*c0909341SAndroid Build Coastguard Worker for (x = 0; x < w; x++) {
301*c0909341SAndroid Build Coastguard Worker GET_H_FILTER(imx >> 6);
302*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = fh ? DAV1D_FILTER_8TAP_RND(src, ioff, fh, 1,
303*c0909341SAndroid Build Coastguard Worker 6 - intermediate_bits) :
304*c0909341SAndroid Build Coastguard Worker src[ioff] << intermediate_bits;
305*c0909341SAndroid Build Coastguard Worker imx += dx;
306*c0909341SAndroid Build Coastguard Worker ioff += imx >> 10;
307*c0909341SAndroid Build Coastguard Worker imx &= 0x3ff;
308*c0909341SAndroid Build Coastguard Worker }
309*c0909341SAndroid Build Coastguard Worker
310*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
311*c0909341SAndroid Build Coastguard Worker src += src_stride;
312*c0909341SAndroid Build Coastguard Worker } while (--tmp_h);
313*c0909341SAndroid Build Coastguard Worker
314*c0909341SAndroid Build Coastguard Worker mid_ptr = mid + 128 * 3;
315*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < h; y++) {
316*c0909341SAndroid Build Coastguard Worker int x;
317*c0909341SAndroid Build Coastguard Worker GET_V_FILTER(my >> 6);
318*c0909341SAndroid Build Coastguard Worker
319*c0909341SAndroid Build Coastguard Worker for (x = 0; x < w; x++)
320*c0909341SAndroid Build Coastguard Worker tmp[x] = (fv ? DAV1D_FILTER_8TAP_RND(mid_ptr, x, fv, 128, 6)
321*c0909341SAndroid Build Coastguard Worker : mid_ptr[x]) - PREP_BIAS;
322*c0909341SAndroid Build Coastguard Worker
323*c0909341SAndroid Build Coastguard Worker my += dy;
324*c0909341SAndroid Build Coastguard Worker mid_ptr += (my >> 10) * 128;
325*c0909341SAndroid Build Coastguard Worker my &= 0x3ff;
326*c0909341SAndroid Build Coastguard Worker tmp += w;
327*c0909341SAndroid Build Coastguard Worker }
328*c0909341SAndroid Build Coastguard Worker }
329*c0909341SAndroid Build Coastguard Worker
330*c0909341SAndroid Build Coastguard Worker #define filter_fns(type, type_h, type_v) \
331*c0909341SAndroid Build Coastguard Worker static void put_8tap_##type##_c(pixel *const dst, \
332*c0909341SAndroid Build Coastguard Worker const ptrdiff_t dst_stride, \
333*c0909341SAndroid Build Coastguard Worker const pixel *const src, \
334*c0909341SAndroid Build Coastguard Worker const ptrdiff_t src_stride, \
335*c0909341SAndroid Build Coastguard Worker const int w, const int h, \
336*c0909341SAndroid Build Coastguard Worker const int mx, const int my \
337*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX) \
338*c0909341SAndroid Build Coastguard Worker { \
339*c0909341SAndroid Build Coastguard Worker put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \
340*c0909341SAndroid Build Coastguard Worker type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \
341*c0909341SAndroid Build Coastguard Worker } \
342*c0909341SAndroid Build Coastguard Worker static void put_8tap_##type##_scaled_c(pixel *const dst, \
343*c0909341SAndroid Build Coastguard Worker const ptrdiff_t dst_stride, \
344*c0909341SAndroid Build Coastguard Worker const pixel *const src, \
345*c0909341SAndroid Build Coastguard Worker const ptrdiff_t src_stride, \
346*c0909341SAndroid Build Coastguard Worker const int w, const int h, \
347*c0909341SAndroid Build Coastguard Worker const int mx, const int my, \
348*c0909341SAndroid Build Coastguard Worker const int dx, const int dy \
349*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX) \
350*c0909341SAndroid Build Coastguard Worker { \
351*c0909341SAndroid Build Coastguard Worker put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
352*c0909341SAndroid Build Coastguard Worker type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \
353*c0909341SAndroid Build Coastguard Worker } \
354*c0909341SAndroid Build Coastguard Worker static void prep_8tap_##type##_c(int16_t *const tmp, \
355*c0909341SAndroid Build Coastguard Worker const pixel *const src, \
356*c0909341SAndroid Build Coastguard Worker const ptrdiff_t src_stride, \
357*c0909341SAndroid Build Coastguard Worker const int w, const int h, \
358*c0909341SAndroid Build Coastguard Worker const int mx, const int my \
359*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX) \
360*c0909341SAndroid Build Coastguard Worker { \
361*c0909341SAndroid Build Coastguard Worker prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \
362*c0909341SAndroid Build Coastguard Worker type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \
363*c0909341SAndroid Build Coastguard Worker } \
364*c0909341SAndroid Build Coastguard Worker static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \
365*c0909341SAndroid Build Coastguard Worker const pixel *const src, \
366*c0909341SAndroid Build Coastguard Worker const ptrdiff_t src_stride, \
367*c0909341SAndroid Build Coastguard Worker const int w, const int h, \
368*c0909341SAndroid Build Coastguard Worker const int mx, const int my, \
369*c0909341SAndroid Build Coastguard Worker const int dx, const int dy \
370*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX) \
371*c0909341SAndroid Build Coastguard Worker { \
372*c0909341SAndroid Build Coastguard Worker prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \
373*c0909341SAndroid Build Coastguard Worker type_h | (type_v << 2) HIGHBD_TAIL_SUFFIX); \
374*c0909341SAndroid Build Coastguard Worker }
375*c0909341SAndroid Build Coastguard Worker
filter_fns(regular,DAV1D_FILTER_8TAP_REGULAR,DAV1D_FILTER_8TAP_REGULAR)376*c0909341SAndroid Build Coastguard Worker filter_fns(regular, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_REGULAR)
377*c0909341SAndroid Build Coastguard Worker filter_fns(regular_sharp, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SHARP)
378*c0909341SAndroid Build Coastguard Worker filter_fns(regular_smooth, DAV1D_FILTER_8TAP_REGULAR, DAV1D_FILTER_8TAP_SMOOTH)
379*c0909341SAndroid Build Coastguard Worker filter_fns(smooth, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SMOOTH)
380*c0909341SAndroid Build Coastguard Worker filter_fns(smooth_regular, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_REGULAR)
381*c0909341SAndroid Build Coastguard Worker filter_fns(smooth_sharp, DAV1D_FILTER_8TAP_SMOOTH, DAV1D_FILTER_8TAP_SHARP)
382*c0909341SAndroid Build Coastguard Worker filter_fns(sharp, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SHARP)
383*c0909341SAndroid Build Coastguard Worker filter_fns(sharp_regular, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_REGULAR)
384*c0909341SAndroid Build Coastguard Worker filter_fns(sharp_smooth, DAV1D_FILTER_8TAP_SHARP, DAV1D_FILTER_8TAP_SMOOTH)
385*c0909341SAndroid Build Coastguard Worker
386*c0909341SAndroid Build Coastguard Worker #define FILTER_BILIN(src, x, mxy, stride) \
387*c0909341SAndroid Build Coastguard Worker (16 * src[x] + ((mxy) * (src[x + stride] - src[x])))
388*c0909341SAndroid Build Coastguard Worker
389*c0909341SAndroid Build Coastguard Worker #define FILTER_BILIN_RND(src, x, mxy, stride, sh) \
390*c0909341SAndroid Build Coastguard Worker ((FILTER_BILIN(src, x, mxy, stride) + ((1 << (sh)) >> 1)) >> (sh))
391*c0909341SAndroid Build Coastguard Worker
392*c0909341SAndroid Build Coastguard Worker #define FILTER_BILIN_CLIP(src, x, mxy, stride, sh) \
393*c0909341SAndroid Build Coastguard Worker iclip_pixel(FILTER_BILIN_RND(src, x, mxy, stride, sh))
394*c0909341SAndroid Build Coastguard Worker
395*c0909341SAndroid Build Coastguard Worker static void put_bilin_c(pixel *dst, ptrdiff_t dst_stride,
396*c0909341SAndroid Build Coastguard Worker const pixel *src, ptrdiff_t src_stride,
397*c0909341SAndroid Build Coastguard Worker const int w, int h, const int mx, const int my
398*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
399*c0909341SAndroid Build Coastguard Worker {
400*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
401*c0909341SAndroid Build Coastguard Worker const int intermediate_rnd = (1 << intermediate_bits) >> 1;
402*c0909341SAndroid Build Coastguard Worker dst_stride = PXSTRIDE(dst_stride);
403*c0909341SAndroid Build Coastguard Worker src_stride = PXSTRIDE(src_stride);
404*c0909341SAndroid Build Coastguard Worker
405*c0909341SAndroid Build Coastguard Worker if (mx) {
406*c0909341SAndroid Build Coastguard Worker if (my) {
407*c0909341SAndroid Build Coastguard Worker int16_t mid[128 * 129], *mid_ptr = mid;
408*c0909341SAndroid Build Coastguard Worker int tmp_h = h + 1;
409*c0909341SAndroid Build Coastguard Worker
410*c0909341SAndroid Build Coastguard Worker do {
411*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
412*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1,
413*c0909341SAndroid Build Coastguard Worker 4 - intermediate_bits);
414*c0909341SAndroid Build Coastguard Worker
415*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
416*c0909341SAndroid Build Coastguard Worker src += src_stride;
417*c0909341SAndroid Build Coastguard Worker } while (--tmp_h);
418*c0909341SAndroid Build Coastguard Worker
419*c0909341SAndroid Build Coastguard Worker mid_ptr = mid;
420*c0909341SAndroid Build Coastguard Worker do {
421*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
422*c0909341SAndroid Build Coastguard Worker dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my, 128,
423*c0909341SAndroid Build Coastguard Worker 4 + intermediate_bits);
424*c0909341SAndroid Build Coastguard Worker
425*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
426*c0909341SAndroid Build Coastguard Worker dst += dst_stride;
427*c0909341SAndroid Build Coastguard Worker } while (--h);
428*c0909341SAndroid Build Coastguard Worker } else {
429*c0909341SAndroid Build Coastguard Worker do {
430*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++) {
431*c0909341SAndroid Build Coastguard Worker const int px = FILTER_BILIN_RND(src, x, mx, 1,
432*c0909341SAndroid Build Coastguard Worker 4 - intermediate_bits);
433*c0909341SAndroid Build Coastguard Worker dst[x] = iclip_pixel((px + intermediate_rnd) >> intermediate_bits);
434*c0909341SAndroid Build Coastguard Worker }
435*c0909341SAndroid Build Coastguard Worker
436*c0909341SAndroid Build Coastguard Worker dst += dst_stride;
437*c0909341SAndroid Build Coastguard Worker src += src_stride;
438*c0909341SAndroid Build Coastguard Worker } while (--h);
439*c0909341SAndroid Build Coastguard Worker }
440*c0909341SAndroid Build Coastguard Worker } else if (my) {
441*c0909341SAndroid Build Coastguard Worker do {
442*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
443*c0909341SAndroid Build Coastguard Worker dst[x] = FILTER_BILIN_CLIP(src, x, my, src_stride, 4);
444*c0909341SAndroid Build Coastguard Worker
445*c0909341SAndroid Build Coastguard Worker dst += dst_stride;
446*c0909341SAndroid Build Coastguard Worker src += src_stride;
447*c0909341SAndroid Build Coastguard Worker } while (--h);
448*c0909341SAndroid Build Coastguard Worker } else
449*c0909341SAndroid Build Coastguard Worker put_c(dst, dst_stride, src, src_stride, w, h);
450*c0909341SAndroid Build Coastguard Worker }
451*c0909341SAndroid Build Coastguard Worker
put_bilin_scaled_c(pixel * dst,ptrdiff_t dst_stride,const pixel * src,ptrdiff_t src_stride,const int w,int h,const int mx,int my,const int dx,const int dy HIGHBD_DECL_SUFFIX)452*c0909341SAndroid Build Coastguard Worker static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride,
453*c0909341SAndroid Build Coastguard Worker const pixel *src, ptrdiff_t src_stride,
454*c0909341SAndroid Build Coastguard Worker const int w, int h, const int mx, int my,
455*c0909341SAndroid Build Coastguard Worker const int dx, const int dy
456*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
457*c0909341SAndroid Build Coastguard Worker {
458*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
459*c0909341SAndroid Build Coastguard Worker int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
460*c0909341SAndroid Build Coastguard Worker int16_t mid[128 * (256 + 1)], *mid_ptr = mid;
461*c0909341SAndroid Build Coastguard Worker
462*c0909341SAndroid Build Coastguard Worker do {
463*c0909341SAndroid Build Coastguard Worker int x;
464*c0909341SAndroid Build Coastguard Worker int imx = mx, ioff = 0;
465*c0909341SAndroid Build Coastguard Worker
466*c0909341SAndroid Build Coastguard Worker for (x = 0; x < w; x++) {
467*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1,
468*c0909341SAndroid Build Coastguard Worker 4 - intermediate_bits);
469*c0909341SAndroid Build Coastguard Worker imx += dx;
470*c0909341SAndroid Build Coastguard Worker ioff += imx >> 10;
471*c0909341SAndroid Build Coastguard Worker imx &= 0x3ff;
472*c0909341SAndroid Build Coastguard Worker }
473*c0909341SAndroid Build Coastguard Worker
474*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
475*c0909341SAndroid Build Coastguard Worker src += PXSTRIDE(src_stride);
476*c0909341SAndroid Build Coastguard Worker } while (--tmp_h);
477*c0909341SAndroid Build Coastguard Worker
478*c0909341SAndroid Build Coastguard Worker mid_ptr = mid;
479*c0909341SAndroid Build Coastguard Worker do {
480*c0909341SAndroid Build Coastguard Worker int x;
481*c0909341SAndroid Build Coastguard Worker
482*c0909341SAndroid Build Coastguard Worker for (x = 0; x < w; x++)
483*c0909341SAndroid Build Coastguard Worker dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my >> 6, 128,
484*c0909341SAndroid Build Coastguard Worker 4 + intermediate_bits);
485*c0909341SAndroid Build Coastguard Worker
486*c0909341SAndroid Build Coastguard Worker my += dy;
487*c0909341SAndroid Build Coastguard Worker mid_ptr += (my >> 10) * 128;
488*c0909341SAndroid Build Coastguard Worker my &= 0x3ff;
489*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
490*c0909341SAndroid Build Coastguard Worker } while (--h);
491*c0909341SAndroid Build Coastguard Worker }
492*c0909341SAndroid Build Coastguard Worker
prep_bilin_c(int16_t * tmp,const pixel * src,ptrdiff_t src_stride,const int w,int h,const int mx,const int my HIGHBD_DECL_SUFFIX)493*c0909341SAndroid Build Coastguard Worker static void prep_bilin_c(int16_t *tmp,
494*c0909341SAndroid Build Coastguard Worker const pixel *src, ptrdiff_t src_stride,
495*c0909341SAndroid Build Coastguard Worker const int w, int h, const int mx, const int my
496*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
497*c0909341SAndroid Build Coastguard Worker {
498*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
499*c0909341SAndroid Build Coastguard Worker src_stride = PXSTRIDE(src_stride);
500*c0909341SAndroid Build Coastguard Worker
501*c0909341SAndroid Build Coastguard Worker if (mx) {
502*c0909341SAndroid Build Coastguard Worker if (my) {
503*c0909341SAndroid Build Coastguard Worker int16_t mid[128 * 129], *mid_ptr = mid;
504*c0909341SAndroid Build Coastguard Worker int tmp_h = h + 1;
505*c0909341SAndroid Build Coastguard Worker
506*c0909341SAndroid Build Coastguard Worker do {
507*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
508*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = FILTER_BILIN_RND(src, x, mx, 1,
509*c0909341SAndroid Build Coastguard Worker 4 - intermediate_bits);
510*c0909341SAndroid Build Coastguard Worker
511*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
512*c0909341SAndroid Build Coastguard Worker src += src_stride;
513*c0909341SAndroid Build Coastguard Worker } while (--tmp_h);
514*c0909341SAndroid Build Coastguard Worker
515*c0909341SAndroid Build Coastguard Worker mid_ptr = mid;
516*c0909341SAndroid Build Coastguard Worker do {
517*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
518*c0909341SAndroid Build Coastguard Worker tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my, 128, 4) -
519*c0909341SAndroid Build Coastguard Worker PREP_BIAS;
520*c0909341SAndroid Build Coastguard Worker
521*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
522*c0909341SAndroid Build Coastguard Worker tmp += w;
523*c0909341SAndroid Build Coastguard Worker } while (--h);
524*c0909341SAndroid Build Coastguard Worker } else {
525*c0909341SAndroid Build Coastguard Worker do {
526*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
527*c0909341SAndroid Build Coastguard Worker tmp[x] = FILTER_BILIN_RND(src, x, mx, 1,
528*c0909341SAndroid Build Coastguard Worker 4 - intermediate_bits) -
529*c0909341SAndroid Build Coastguard Worker PREP_BIAS;
530*c0909341SAndroid Build Coastguard Worker
531*c0909341SAndroid Build Coastguard Worker tmp += w;
532*c0909341SAndroid Build Coastguard Worker src += src_stride;
533*c0909341SAndroid Build Coastguard Worker } while (--h);
534*c0909341SAndroid Build Coastguard Worker }
535*c0909341SAndroid Build Coastguard Worker } else if (my) {
536*c0909341SAndroid Build Coastguard Worker do {
537*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
538*c0909341SAndroid Build Coastguard Worker tmp[x] = FILTER_BILIN_RND(src, x, my, src_stride,
539*c0909341SAndroid Build Coastguard Worker 4 - intermediate_bits) - PREP_BIAS;
540*c0909341SAndroid Build Coastguard Worker
541*c0909341SAndroid Build Coastguard Worker tmp += w;
542*c0909341SAndroid Build Coastguard Worker src += src_stride;
543*c0909341SAndroid Build Coastguard Worker } while (--h);
544*c0909341SAndroid Build Coastguard Worker } else
545*c0909341SAndroid Build Coastguard Worker prep_c(tmp, src, src_stride, w, h HIGHBD_TAIL_SUFFIX);
546*c0909341SAndroid Build Coastguard Worker }
547*c0909341SAndroid Build Coastguard Worker
prep_bilin_scaled_c(int16_t * tmp,const pixel * src,ptrdiff_t src_stride,const int w,int h,const int mx,int my,const int dx,const int dy HIGHBD_DECL_SUFFIX)548*c0909341SAndroid Build Coastguard Worker static void prep_bilin_scaled_c(int16_t *tmp,
549*c0909341SAndroid Build Coastguard Worker const pixel *src, ptrdiff_t src_stride,
550*c0909341SAndroid Build Coastguard Worker const int w, int h, const int mx, int my,
551*c0909341SAndroid Build Coastguard Worker const int dx, const int dy HIGHBD_DECL_SUFFIX)
552*c0909341SAndroid Build Coastguard Worker {
553*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
554*c0909341SAndroid Build Coastguard Worker int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
555*c0909341SAndroid Build Coastguard Worker int16_t mid[128 * (256 + 1)], *mid_ptr = mid;
556*c0909341SAndroid Build Coastguard Worker
557*c0909341SAndroid Build Coastguard Worker do {
558*c0909341SAndroid Build Coastguard Worker int x;
559*c0909341SAndroid Build Coastguard Worker int imx = mx, ioff = 0;
560*c0909341SAndroid Build Coastguard Worker
561*c0909341SAndroid Build Coastguard Worker for (x = 0; x < w; x++) {
562*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = FILTER_BILIN_RND(src, ioff, imx >> 6, 1,
563*c0909341SAndroid Build Coastguard Worker 4 - intermediate_bits);
564*c0909341SAndroid Build Coastguard Worker imx += dx;
565*c0909341SAndroid Build Coastguard Worker ioff += imx >> 10;
566*c0909341SAndroid Build Coastguard Worker imx &= 0x3ff;
567*c0909341SAndroid Build Coastguard Worker }
568*c0909341SAndroid Build Coastguard Worker
569*c0909341SAndroid Build Coastguard Worker mid_ptr += 128;
570*c0909341SAndroid Build Coastguard Worker src += PXSTRIDE(src_stride);
571*c0909341SAndroid Build Coastguard Worker } while (--tmp_h);
572*c0909341SAndroid Build Coastguard Worker
573*c0909341SAndroid Build Coastguard Worker mid_ptr = mid;
574*c0909341SAndroid Build Coastguard Worker do {
575*c0909341SAndroid Build Coastguard Worker int x;
576*c0909341SAndroid Build Coastguard Worker
577*c0909341SAndroid Build Coastguard Worker for (x = 0; x < w; x++)
578*c0909341SAndroid Build Coastguard Worker tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my >> 6, 128, 4) - PREP_BIAS;
579*c0909341SAndroid Build Coastguard Worker
580*c0909341SAndroid Build Coastguard Worker my += dy;
581*c0909341SAndroid Build Coastguard Worker mid_ptr += (my >> 10) * 128;
582*c0909341SAndroid Build Coastguard Worker my &= 0x3ff;
583*c0909341SAndroid Build Coastguard Worker tmp += w;
584*c0909341SAndroid Build Coastguard Worker } while (--h);
585*c0909341SAndroid Build Coastguard Worker }
586*c0909341SAndroid Build Coastguard Worker
avg_c(pixel * dst,const ptrdiff_t dst_stride,const int16_t * tmp1,const int16_t * tmp2,const int w,int h HIGHBD_DECL_SUFFIX)587*c0909341SAndroid Build Coastguard Worker static void avg_c(pixel *dst, const ptrdiff_t dst_stride,
588*c0909341SAndroid Build Coastguard Worker const int16_t *tmp1, const int16_t *tmp2, const int w, int h
589*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
590*c0909341SAndroid Build Coastguard Worker {
591*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
592*c0909341SAndroid Build Coastguard Worker const int sh = intermediate_bits + 1;
593*c0909341SAndroid Build Coastguard Worker const int rnd = (1 << intermediate_bits) + PREP_BIAS * 2;
594*c0909341SAndroid Build Coastguard Worker do {
595*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
596*c0909341SAndroid Build Coastguard Worker dst[x] = iclip_pixel((tmp1[x] + tmp2[x] + rnd) >> sh);
597*c0909341SAndroid Build Coastguard Worker
598*c0909341SAndroid Build Coastguard Worker tmp1 += w;
599*c0909341SAndroid Build Coastguard Worker tmp2 += w;
600*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
601*c0909341SAndroid Build Coastguard Worker } while (--h);
602*c0909341SAndroid Build Coastguard Worker }
603*c0909341SAndroid Build Coastguard Worker
w_avg_c(pixel * dst,const ptrdiff_t dst_stride,const int16_t * tmp1,const int16_t * tmp2,const int w,int h,const int weight HIGHBD_DECL_SUFFIX)604*c0909341SAndroid Build Coastguard Worker static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride,
605*c0909341SAndroid Build Coastguard Worker const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
606*c0909341SAndroid Build Coastguard Worker const int weight HIGHBD_DECL_SUFFIX)
607*c0909341SAndroid Build Coastguard Worker {
608*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
609*c0909341SAndroid Build Coastguard Worker const int sh = intermediate_bits + 4;
610*c0909341SAndroid Build Coastguard Worker const int rnd = (8 << intermediate_bits) + PREP_BIAS * 16;
611*c0909341SAndroid Build Coastguard Worker do {
612*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
613*c0909341SAndroid Build Coastguard Worker dst[x] = iclip_pixel((tmp1[x] * weight +
614*c0909341SAndroid Build Coastguard Worker tmp2[x] * (16 - weight) + rnd) >> sh);
615*c0909341SAndroid Build Coastguard Worker
616*c0909341SAndroid Build Coastguard Worker tmp1 += w;
617*c0909341SAndroid Build Coastguard Worker tmp2 += w;
618*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
619*c0909341SAndroid Build Coastguard Worker } while (--h);
620*c0909341SAndroid Build Coastguard Worker }
621*c0909341SAndroid Build Coastguard Worker
mask_c(pixel * dst,const ptrdiff_t dst_stride,const int16_t * tmp1,const int16_t * tmp2,const int w,int h,const uint8_t * mask HIGHBD_DECL_SUFFIX)622*c0909341SAndroid Build Coastguard Worker static void mask_c(pixel *dst, const ptrdiff_t dst_stride,
623*c0909341SAndroid Build Coastguard Worker const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
624*c0909341SAndroid Build Coastguard Worker const uint8_t *mask HIGHBD_DECL_SUFFIX)
625*c0909341SAndroid Build Coastguard Worker {
626*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
627*c0909341SAndroid Build Coastguard Worker const int sh = intermediate_bits + 6;
628*c0909341SAndroid Build Coastguard Worker const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64;
629*c0909341SAndroid Build Coastguard Worker do {
630*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++)
631*c0909341SAndroid Build Coastguard Worker dst[x] = iclip_pixel((tmp1[x] * mask[x] +
632*c0909341SAndroid Build Coastguard Worker tmp2[x] * (64 - mask[x]) + rnd) >> sh);
633*c0909341SAndroid Build Coastguard Worker
634*c0909341SAndroid Build Coastguard Worker tmp1 += w;
635*c0909341SAndroid Build Coastguard Worker tmp2 += w;
636*c0909341SAndroid Build Coastguard Worker mask += w;
637*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
638*c0909341SAndroid Build Coastguard Worker } while (--h);
639*c0909341SAndroid Build Coastguard Worker }
640*c0909341SAndroid Build Coastguard Worker
641*c0909341SAndroid Build Coastguard Worker #define blend_px(a, b, m) (((a * (64 - m) + b * m) + 32) >> 6)
blend_c(pixel * dst,const ptrdiff_t dst_stride,const pixel * tmp,const int w,int h,const uint8_t * mask)642*c0909341SAndroid Build Coastguard Worker static void blend_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp,
643*c0909341SAndroid Build Coastguard Worker const int w, int h, const uint8_t *mask)
644*c0909341SAndroid Build Coastguard Worker {
645*c0909341SAndroid Build Coastguard Worker do {
646*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++) {
647*c0909341SAndroid Build Coastguard Worker dst[x] = blend_px(dst[x], tmp[x], mask[x]);
648*c0909341SAndroid Build Coastguard Worker }
649*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
650*c0909341SAndroid Build Coastguard Worker tmp += w;
651*c0909341SAndroid Build Coastguard Worker mask += w;
652*c0909341SAndroid Build Coastguard Worker } while (--h);
653*c0909341SAndroid Build Coastguard Worker }
654*c0909341SAndroid Build Coastguard Worker
blend_v_c(pixel * dst,const ptrdiff_t dst_stride,const pixel * tmp,const int w,int h)655*c0909341SAndroid Build Coastguard Worker static void blend_v_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp,
656*c0909341SAndroid Build Coastguard Worker const int w, int h)
657*c0909341SAndroid Build Coastguard Worker {
658*c0909341SAndroid Build Coastguard Worker const uint8_t *const mask = &dav1d_obmc_masks[w];
659*c0909341SAndroid Build Coastguard Worker do {
660*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < (w * 3) >> 2; x++) {
661*c0909341SAndroid Build Coastguard Worker dst[x] = blend_px(dst[x], tmp[x], mask[x]);
662*c0909341SAndroid Build Coastguard Worker }
663*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
664*c0909341SAndroid Build Coastguard Worker tmp += w;
665*c0909341SAndroid Build Coastguard Worker } while (--h);
666*c0909341SAndroid Build Coastguard Worker }
667*c0909341SAndroid Build Coastguard Worker
blend_h_c(pixel * dst,const ptrdiff_t dst_stride,const pixel * tmp,const int w,int h)668*c0909341SAndroid Build Coastguard Worker static void blend_h_c(pixel *dst, const ptrdiff_t dst_stride, const pixel *tmp,
669*c0909341SAndroid Build Coastguard Worker const int w, int h)
670*c0909341SAndroid Build Coastguard Worker {
671*c0909341SAndroid Build Coastguard Worker const uint8_t *mask = &dav1d_obmc_masks[h];
672*c0909341SAndroid Build Coastguard Worker h = (h * 3) >> 2;
673*c0909341SAndroid Build Coastguard Worker do {
674*c0909341SAndroid Build Coastguard Worker const int m = *mask++;
675*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++) {
676*c0909341SAndroid Build Coastguard Worker dst[x] = blend_px(dst[x], tmp[x], m);
677*c0909341SAndroid Build Coastguard Worker }
678*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
679*c0909341SAndroid Build Coastguard Worker tmp += w;
680*c0909341SAndroid Build Coastguard Worker } while (--h);
681*c0909341SAndroid Build Coastguard Worker }
682*c0909341SAndroid Build Coastguard Worker
w_mask_c(pixel * dst,const ptrdiff_t dst_stride,const int16_t * tmp1,const int16_t * tmp2,const int w,int h,uint8_t * mask,const int sign,const int ss_hor,const int ss_ver HIGHBD_DECL_SUFFIX)683*c0909341SAndroid Build Coastguard Worker static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,
684*c0909341SAndroid Build Coastguard Worker const int16_t *tmp1, const int16_t *tmp2, const int w, int h,
685*c0909341SAndroid Build Coastguard Worker uint8_t *mask, const int sign,
686*c0909341SAndroid Build Coastguard Worker const int ss_hor, const int ss_ver HIGHBD_DECL_SUFFIX)
687*c0909341SAndroid Build Coastguard Worker {
688*c0909341SAndroid Build Coastguard Worker // store mask at 2x2 resolution, i.e. store 2x1 sum for even rows,
689*c0909341SAndroid Build Coastguard Worker // and then load this intermediate to calculate final value for odd rows
690*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
691*c0909341SAndroid Build Coastguard Worker const int bitdepth = bitdepth_from_max(bitdepth_max);
692*c0909341SAndroid Build Coastguard Worker const int sh = intermediate_bits + 6;
693*c0909341SAndroid Build Coastguard Worker const int rnd = (32 << intermediate_bits) + PREP_BIAS * 64;
694*c0909341SAndroid Build Coastguard Worker const int mask_sh = bitdepth + intermediate_bits - 4;
695*c0909341SAndroid Build Coastguard Worker const int mask_rnd = 1 << (mask_sh - 5);
696*c0909341SAndroid Build Coastguard Worker do {
697*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < w; x++) {
698*c0909341SAndroid Build Coastguard Worker const int m = imin(38 + ((abs(tmp1[x] - tmp2[x]) + mask_rnd) >> mask_sh), 64);
699*c0909341SAndroid Build Coastguard Worker dst[x] = iclip_pixel((tmp1[x] * m +
700*c0909341SAndroid Build Coastguard Worker tmp2[x] * (64 - m) + rnd) >> sh);
701*c0909341SAndroid Build Coastguard Worker
702*c0909341SAndroid Build Coastguard Worker if (ss_hor) {
703*c0909341SAndroid Build Coastguard Worker x++;
704*c0909341SAndroid Build Coastguard Worker
705*c0909341SAndroid Build Coastguard Worker const int n = imin(38 + ((abs(tmp1[x] - tmp2[x]) + mask_rnd) >> mask_sh), 64);
706*c0909341SAndroid Build Coastguard Worker dst[x] = iclip_pixel((tmp1[x] * n +
707*c0909341SAndroid Build Coastguard Worker tmp2[x] * (64 - n) + rnd) >> sh);
708*c0909341SAndroid Build Coastguard Worker
709*c0909341SAndroid Build Coastguard Worker if (h & ss_ver) {
710*c0909341SAndroid Build Coastguard Worker mask[x >> 1] = (m + n + mask[x >> 1] + 2 - sign) >> 2;
711*c0909341SAndroid Build Coastguard Worker } else if (ss_ver) {
712*c0909341SAndroid Build Coastguard Worker mask[x >> 1] = m + n;
713*c0909341SAndroid Build Coastguard Worker } else {
714*c0909341SAndroid Build Coastguard Worker mask[x >> 1] = (m + n + 1 - sign) >> 1;
715*c0909341SAndroid Build Coastguard Worker }
716*c0909341SAndroid Build Coastguard Worker } else {
717*c0909341SAndroid Build Coastguard Worker mask[x] = m;
718*c0909341SAndroid Build Coastguard Worker }
719*c0909341SAndroid Build Coastguard Worker }
720*c0909341SAndroid Build Coastguard Worker
721*c0909341SAndroid Build Coastguard Worker tmp1 += w;
722*c0909341SAndroid Build Coastguard Worker tmp2 += w;
723*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
724*c0909341SAndroid Build Coastguard Worker if (!ss_ver || (h & 1)) mask += w >> ss_hor;
725*c0909341SAndroid Build Coastguard Worker } while (--h);
726*c0909341SAndroid Build Coastguard Worker }
727*c0909341SAndroid Build Coastguard Worker
728*c0909341SAndroid Build Coastguard Worker #define w_mask_fns(ssn, ss_hor, ss_ver) \
729*c0909341SAndroid Build Coastguard Worker static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \
730*c0909341SAndroid Build Coastguard Worker const int16_t *const tmp1, const int16_t *const tmp2, \
731*c0909341SAndroid Build Coastguard Worker const int w, const int h, uint8_t *mask, \
732*c0909341SAndroid Build Coastguard Worker const int sign HIGHBD_DECL_SUFFIX) \
733*c0909341SAndroid Build Coastguard Worker { \
734*c0909341SAndroid Build Coastguard Worker w_mask_c(dst, dst_stride, tmp1, tmp2, w, h, mask, sign, ss_hor, ss_ver \
735*c0909341SAndroid Build Coastguard Worker HIGHBD_TAIL_SUFFIX); \
736*c0909341SAndroid Build Coastguard Worker }
737*c0909341SAndroid Build Coastguard Worker
738*c0909341SAndroid Build Coastguard Worker w_mask_fns(444, 0, 0);
739*c0909341SAndroid Build Coastguard Worker w_mask_fns(422, 1, 0);
740*c0909341SAndroid Build Coastguard Worker w_mask_fns(420, 1, 1);
741*c0909341SAndroid Build Coastguard Worker
742*c0909341SAndroid Build Coastguard Worker #undef w_mask_fns
743*c0909341SAndroid Build Coastguard Worker
744*c0909341SAndroid Build Coastguard Worker #define FILTER_WARP_RND(src, x, F, stride, sh) \
745*c0909341SAndroid Build Coastguard Worker ((F[0] * src[x - 3 * stride] + \
746*c0909341SAndroid Build Coastguard Worker F[1] * src[x - 2 * stride] + \
747*c0909341SAndroid Build Coastguard Worker F[2] * src[x - 1 * stride] + \
748*c0909341SAndroid Build Coastguard Worker F[3] * src[x + 0 * stride] + \
749*c0909341SAndroid Build Coastguard Worker F[4] * src[x + 1 * stride] + \
750*c0909341SAndroid Build Coastguard Worker F[5] * src[x + 2 * stride] + \
751*c0909341SAndroid Build Coastguard Worker F[6] * src[x + 3 * stride] + \
752*c0909341SAndroid Build Coastguard Worker F[7] * src[x + 4 * stride] + \
753*c0909341SAndroid Build Coastguard Worker ((1 << (sh)) >> 1)) >> (sh))
754*c0909341SAndroid Build Coastguard Worker
755*c0909341SAndroid Build Coastguard Worker #define FILTER_WARP_CLIP(src, x, F, stride, sh) \
756*c0909341SAndroid Build Coastguard Worker iclip_pixel(FILTER_WARP_RND(src, x, F, stride, sh))
757*c0909341SAndroid Build Coastguard Worker
warp_affine_8x8_c(pixel * dst,const ptrdiff_t dst_stride,const pixel * src,const ptrdiff_t src_stride,const int16_t * const abcd,int mx,int my HIGHBD_DECL_SUFFIX)758*c0909341SAndroid Build Coastguard Worker static void warp_affine_8x8_c(pixel *dst, const ptrdiff_t dst_stride,
759*c0909341SAndroid Build Coastguard Worker const pixel *src, const ptrdiff_t src_stride,
760*c0909341SAndroid Build Coastguard Worker const int16_t *const abcd, int mx, int my
761*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
762*c0909341SAndroid Build Coastguard Worker {
763*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
764*c0909341SAndroid Build Coastguard Worker int16_t mid[15 * 8], *mid_ptr = mid;
765*c0909341SAndroid Build Coastguard Worker
766*c0909341SAndroid Build Coastguard Worker src -= 3 * PXSTRIDE(src_stride);
767*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < 15; y++, mx += abcd[1]) {
768*c0909341SAndroid Build Coastguard Worker for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) {
769*c0909341SAndroid Build Coastguard Worker const int8_t *const filter =
770*c0909341SAndroid Build Coastguard Worker dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)];
771*c0909341SAndroid Build Coastguard Worker
772*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1,
773*c0909341SAndroid Build Coastguard Worker 7 - intermediate_bits);
774*c0909341SAndroid Build Coastguard Worker }
775*c0909341SAndroid Build Coastguard Worker src += PXSTRIDE(src_stride);
776*c0909341SAndroid Build Coastguard Worker mid_ptr += 8;
777*c0909341SAndroid Build Coastguard Worker }
778*c0909341SAndroid Build Coastguard Worker
779*c0909341SAndroid Build Coastguard Worker mid_ptr = &mid[3 * 8];
780*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < 8; y++, my += abcd[3]) {
781*c0909341SAndroid Build Coastguard Worker for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) {
782*c0909341SAndroid Build Coastguard Worker const int8_t *const filter =
783*c0909341SAndroid Build Coastguard Worker dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)];
784*c0909341SAndroid Build Coastguard Worker
785*c0909341SAndroid Build Coastguard Worker dst[x] = FILTER_WARP_CLIP(mid_ptr, x, filter, 8,
786*c0909341SAndroid Build Coastguard Worker 7 + intermediate_bits);
787*c0909341SAndroid Build Coastguard Worker }
788*c0909341SAndroid Build Coastguard Worker mid_ptr += 8;
789*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
790*c0909341SAndroid Build Coastguard Worker }
791*c0909341SAndroid Build Coastguard Worker }
792*c0909341SAndroid Build Coastguard Worker
warp_affine_8x8t_c(int16_t * tmp,const ptrdiff_t tmp_stride,const pixel * src,const ptrdiff_t src_stride,const int16_t * const abcd,int mx,int my HIGHBD_DECL_SUFFIX)793*c0909341SAndroid Build Coastguard Worker static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride,
794*c0909341SAndroid Build Coastguard Worker const pixel *src, const ptrdiff_t src_stride,
795*c0909341SAndroid Build Coastguard Worker const int16_t *const abcd, int mx, int my
796*c0909341SAndroid Build Coastguard Worker HIGHBD_DECL_SUFFIX)
797*c0909341SAndroid Build Coastguard Worker {
798*c0909341SAndroid Build Coastguard Worker const int intermediate_bits = get_intermediate_bits(bitdepth_max);
799*c0909341SAndroid Build Coastguard Worker int16_t mid[15 * 8], *mid_ptr = mid;
800*c0909341SAndroid Build Coastguard Worker
801*c0909341SAndroid Build Coastguard Worker src -= 3 * PXSTRIDE(src_stride);
802*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < 15; y++, mx += abcd[1]) {
803*c0909341SAndroid Build Coastguard Worker for (int x = 0, tmx = mx; x < 8; x++, tmx += abcd[0]) {
804*c0909341SAndroid Build Coastguard Worker const int8_t *const filter =
805*c0909341SAndroid Build Coastguard Worker dav1d_mc_warp_filter[64 + ((tmx + 512) >> 10)];
806*c0909341SAndroid Build Coastguard Worker
807*c0909341SAndroid Build Coastguard Worker mid_ptr[x] = FILTER_WARP_RND(src, x, filter, 1,
808*c0909341SAndroid Build Coastguard Worker 7 - intermediate_bits);
809*c0909341SAndroid Build Coastguard Worker }
810*c0909341SAndroid Build Coastguard Worker src += PXSTRIDE(src_stride);
811*c0909341SAndroid Build Coastguard Worker mid_ptr += 8;
812*c0909341SAndroid Build Coastguard Worker }
813*c0909341SAndroid Build Coastguard Worker
814*c0909341SAndroid Build Coastguard Worker mid_ptr = &mid[3 * 8];
815*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < 8; y++, my += abcd[3]) {
816*c0909341SAndroid Build Coastguard Worker for (int x = 0, tmy = my; x < 8; x++, tmy += abcd[2]) {
817*c0909341SAndroid Build Coastguard Worker const int8_t *const filter =
818*c0909341SAndroid Build Coastguard Worker dav1d_mc_warp_filter[64 + ((tmy + 512) >> 10)];
819*c0909341SAndroid Build Coastguard Worker
820*c0909341SAndroid Build Coastguard Worker tmp[x] = FILTER_WARP_RND(mid_ptr, x, filter, 8, 7) - PREP_BIAS;
821*c0909341SAndroid Build Coastguard Worker }
822*c0909341SAndroid Build Coastguard Worker mid_ptr += 8;
823*c0909341SAndroid Build Coastguard Worker tmp += tmp_stride;
824*c0909341SAndroid Build Coastguard Worker }
825*c0909341SAndroid Build Coastguard Worker }
826*c0909341SAndroid Build Coastguard Worker
emu_edge_c(const intptr_t bw,const intptr_t bh,const intptr_t iw,const intptr_t ih,const intptr_t x,const intptr_t y,pixel * dst,const ptrdiff_t dst_stride,const pixel * ref,const ptrdiff_t ref_stride)827*c0909341SAndroid Build Coastguard Worker static void emu_edge_c(const intptr_t bw, const intptr_t bh,
828*c0909341SAndroid Build Coastguard Worker const intptr_t iw, const intptr_t ih,
829*c0909341SAndroid Build Coastguard Worker const intptr_t x, const intptr_t y,
830*c0909341SAndroid Build Coastguard Worker pixel *dst, const ptrdiff_t dst_stride,
831*c0909341SAndroid Build Coastguard Worker const pixel *ref, const ptrdiff_t ref_stride)
832*c0909341SAndroid Build Coastguard Worker {
833*c0909341SAndroid Build Coastguard Worker // find offset in reference of visible block to copy
834*c0909341SAndroid Build Coastguard Worker ref += iclip((int) y, 0, (int) ih - 1) * PXSTRIDE(ref_stride) +
835*c0909341SAndroid Build Coastguard Worker iclip((int) x, 0, (int) iw - 1);
836*c0909341SAndroid Build Coastguard Worker
837*c0909341SAndroid Build Coastguard Worker // number of pixels to extend (left, right, top, bottom)
838*c0909341SAndroid Build Coastguard Worker const int left_ext = iclip((int) -x, 0, (int) bw - 1);
839*c0909341SAndroid Build Coastguard Worker const int right_ext = iclip((int) (x + bw - iw), 0, (int) bw - 1);
840*c0909341SAndroid Build Coastguard Worker assert(left_ext + right_ext < bw);
841*c0909341SAndroid Build Coastguard Worker const int top_ext = iclip((int) -y, 0, (int) bh - 1);
842*c0909341SAndroid Build Coastguard Worker const int bottom_ext = iclip((int) (y + bh - ih), 0, (int) bh - 1);
843*c0909341SAndroid Build Coastguard Worker assert(top_ext + bottom_ext < bh);
844*c0909341SAndroid Build Coastguard Worker
845*c0909341SAndroid Build Coastguard Worker // copy visible portion first
846*c0909341SAndroid Build Coastguard Worker pixel *blk = dst + top_ext * PXSTRIDE(dst_stride);
847*c0909341SAndroid Build Coastguard Worker const int center_w = (int) (bw - left_ext - right_ext);
848*c0909341SAndroid Build Coastguard Worker const int center_h = (int) (bh - top_ext - bottom_ext);
849*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < center_h; y++) {
850*c0909341SAndroid Build Coastguard Worker pixel_copy(blk + left_ext, ref, center_w);
851*c0909341SAndroid Build Coastguard Worker // extend left edge for this line
852*c0909341SAndroid Build Coastguard Worker if (left_ext)
853*c0909341SAndroid Build Coastguard Worker pixel_set(blk, blk[left_ext], left_ext);
854*c0909341SAndroid Build Coastguard Worker // extend right edge for this line
855*c0909341SAndroid Build Coastguard Worker if (right_ext)
856*c0909341SAndroid Build Coastguard Worker pixel_set(blk + left_ext + center_w, blk[left_ext + center_w - 1],
857*c0909341SAndroid Build Coastguard Worker right_ext);
858*c0909341SAndroid Build Coastguard Worker ref += PXSTRIDE(ref_stride);
859*c0909341SAndroid Build Coastguard Worker blk += PXSTRIDE(dst_stride);
860*c0909341SAndroid Build Coastguard Worker }
861*c0909341SAndroid Build Coastguard Worker
862*c0909341SAndroid Build Coastguard Worker // copy top
863*c0909341SAndroid Build Coastguard Worker blk = dst + top_ext * PXSTRIDE(dst_stride);
864*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < top_ext; y++) {
865*c0909341SAndroid Build Coastguard Worker pixel_copy(dst, blk, bw);
866*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
867*c0909341SAndroid Build Coastguard Worker }
868*c0909341SAndroid Build Coastguard Worker
869*c0909341SAndroid Build Coastguard Worker // copy bottom
870*c0909341SAndroid Build Coastguard Worker dst += center_h * PXSTRIDE(dst_stride);
871*c0909341SAndroid Build Coastguard Worker for (int y = 0; y < bottom_ext; y++) {
872*c0909341SAndroid Build Coastguard Worker pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], bw);
873*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
874*c0909341SAndroid Build Coastguard Worker }
875*c0909341SAndroid Build Coastguard Worker }
876*c0909341SAndroid Build Coastguard Worker
resize_c(pixel * dst,const ptrdiff_t dst_stride,const pixel * src,const ptrdiff_t src_stride,const int dst_w,int h,const int src_w,const int dx,const int mx0 HIGHBD_DECL_SUFFIX)877*c0909341SAndroid Build Coastguard Worker static void resize_c(pixel *dst, const ptrdiff_t dst_stride,
878*c0909341SAndroid Build Coastguard Worker const pixel *src, const ptrdiff_t src_stride,
879*c0909341SAndroid Build Coastguard Worker const int dst_w, int h, const int src_w,
880*c0909341SAndroid Build Coastguard Worker const int dx, const int mx0 HIGHBD_DECL_SUFFIX)
881*c0909341SAndroid Build Coastguard Worker {
882*c0909341SAndroid Build Coastguard Worker do {
883*c0909341SAndroid Build Coastguard Worker int mx = mx0, src_x = -1;
884*c0909341SAndroid Build Coastguard Worker for (int x = 0; x < dst_w; x++) {
885*c0909341SAndroid Build Coastguard Worker const int8_t *const F = dav1d_resize_filter[mx >> 8];
886*c0909341SAndroid Build Coastguard Worker dst[x] = iclip_pixel((-(F[0] * src[iclip(src_x - 3, 0, src_w - 1)] +
887*c0909341SAndroid Build Coastguard Worker F[1] * src[iclip(src_x - 2, 0, src_w - 1)] +
888*c0909341SAndroid Build Coastguard Worker F[2] * src[iclip(src_x - 1, 0, src_w - 1)] +
889*c0909341SAndroid Build Coastguard Worker F[3] * src[iclip(src_x + 0, 0, src_w - 1)] +
890*c0909341SAndroid Build Coastguard Worker F[4] * src[iclip(src_x + 1, 0, src_w - 1)] +
891*c0909341SAndroid Build Coastguard Worker F[5] * src[iclip(src_x + 2, 0, src_w - 1)] +
892*c0909341SAndroid Build Coastguard Worker F[6] * src[iclip(src_x + 3, 0, src_w - 1)] +
893*c0909341SAndroid Build Coastguard Worker F[7] * src[iclip(src_x + 4, 0, src_w - 1)]) +
894*c0909341SAndroid Build Coastguard Worker 64) >> 7);
895*c0909341SAndroid Build Coastguard Worker mx += dx;
896*c0909341SAndroid Build Coastguard Worker src_x += mx >> 14;
897*c0909341SAndroid Build Coastguard Worker mx &= 0x3fff;
898*c0909341SAndroid Build Coastguard Worker }
899*c0909341SAndroid Build Coastguard Worker
900*c0909341SAndroid Build Coastguard Worker dst += PXSTRIDE(dst_stride);
901*c0909341SAndroid Build Coastguard Worker src += PXSTRIDE(src_stride);
902*c0909341SAndroid Build Coastguard Worker } while (--h);
903*c0909341SAndroid Build Coastguard Worker }
904*c0909341SAndroid Build Coastguard Worker
905*c0909341SAndroid Build Coastguard Worker #if HAVE_ASM
906*c0909341SAndroid Build Coastguard Worker #if ARCH_AARCH64 || ARCH_ARM
907*c0909341SAndroid Build Coastguard Worker #include "src/arm/mc.h"
908*c0909341SAndroid Build Coastguard Worker #elif ARCH_LOONGARCH64
909*c0909341SAndroid Build Coastguard Worker #include "src/loongarch/mc.h"
910*c0909341SAndroid Build Coastguard Worker #elif ARCH_RISCV
911*c0909341SAndroid Build Coastguard Worker #include "src/riscv/mc.h"
912*c0909341SAndroid Build Coastguard Worker #elif ARCH_X86
913*c0909341SAndroid Build Coastguard Worker #include "src/x86/mc.h"
914*c0909341SAndroid Build Coastguard Worker #endif
915*c0909341SAndroid Build Coastguard Worker #endif
916*c0909341SAndroid Build Coastguard Worker
bitfn(dav1d_mc_dsp_init)917*c0909341SAndroid Build Coastguard Worker COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
918*c0909341SAndroid Build Coastguard Worker #define init_mc_fns(type, name) do { \
919*c0909341SAndroid Build Coastguard Worker c->mc [type] = put_##name##_c; \
920*c0909341SAndroid Build Coastguard Worker c->mc_scaled [type] = put_##name##_scaled_c; \
921*c0909341SAndroid Build Coastguard Worker c->mct [type] = prep_##name##_c; \
922*c0909341SAndroid Build Coastguard Worker c->mct_scaled[type] = prep_##name##_scaled_c; \
923*c0909341SAndroid Build Coastguard Worker } while (0)
924*c0909341SAndroid Build Coastguard Worker
925*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular);
926*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth);
927*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_8TAP_REGULAR_SHARP, 8tap_regular_sharp);
928*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_8TAP_SHARP_REGULAR, 8tap_sharp_regular);
929*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_8TAP_SHARP_SMOOTH, 8tap_sharp_smooth);
930*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_8TAP_SHARP, 8tap_sharp);
931*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular);
932*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_8TAP_SMOOTH, 8tap_smooth);
933*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_8TAP_SMOOTH_SHARP, 8tap_smooth_sharp);
934*c0909341SAndroid Build Coastguard Worker init_mc_fns(FILTER_2D_BILINEAR, bilin);
935*c0909341SAndroid Build Coastguard Worker
936*c0909341SAndroid Build Coastguard Worker c->avg = avg_c;
937*c0909341SAndroid Build Coastguard Worker c->w_avg = w_avg_c;
938*c0909341SAndroid Build Coastguard Worker c->mask = mask_c;
939*c0909341SAndroid Build Coastguard Worker c->blend = blend_c;
940*c0909341SAndroid Build Coastguard Worker c->blend_v = blend_v_c;
941*c0909341SAndroid Build Coastguard Worker c->blend_h = blend_h_c;
942*c0909341SAndroid Build Coastguard Worker c->w_mask[0] = w_mask_444_c;
943*c0909341SAndroid Build Coastguard Worker c->w_mask[1] = w_mask_422_c;
944*c0909341SAndroid Build Coastguard Worker c->w_mask[2] = w_mask_420_c;
945*c0909341SAndroid Build Coastguard Worker c->warp8x8 = warp_affine_8x8_c;
946*c0909341SAndroid Build Coastguard Worker c->warp8x8t = warp_affine_8x8t_c;
947*c0909341SAndroid Build Coastguard Worker c->emu_edge = emu_edge_c;
948*c0909341SAndroid Build Coastguard Worker c->resize = resize_c;
949*c0909341SAndroid Build Coastguard Worker
950*c0909341SAndroid Build Coastguard Worker #if HAVE_ASM
951*c0909341SAndroid Build Coastguard Worker #if ARCH_AARCH64 || ARCH_ARM
952*c0909341SAndroid Build Coastguard Worker mc_dsp_init_arm(c);
953*c0909341SAndroid Build Coastguard Worker #elif ARCH_LOONGARCH64
954*c0909341SAndroid Build Coastguard Worker mc_dsp_init_loongarch(c);
955*c0909341SAndroid Build Coastguard Worker #elif ARCH_RISCV
956*c0909341SAndroid Build Coastguard Worker mc_dsp_init_riscv(c);
957*c0909341SAndroid Build Coastguard Worker #elif ARCH_X86
958*c0909341SAndroid Build Coastguard Worker mc_dsp_init_x86(c);
959*c0909341SAndroid Build Coastguard Worker #endif
960*c0909341SAndroid Build Coastguard Worker #endif
961*c0909341SAndroid Build Coastguard Worker }
962