xref: /aosp_15_r20/external/libdav1d/src/arm/ipred.h (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker  * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker  * All rights reserved.
4*c0909341SAndroid Build Coastguard Worker  *
5*c0909341SAndroid Build Coastguard Worker  * Redistribution and use in source and binary forms, with or without
6*c0909341SAndroid Build Coastguard Worker  * modification, are permitted provided that the following conditions are met:
7*c0909341SAndroid Build Coastguard Worker  *
8*c0909341SAndroid Build Coastguard Worker  * 1. Redistributions of source code must retain the above copyright notice, this
9*c0909341SAndroid Build Coastguard Worker  *    list of conditions and the following disclaimer.
10*c0909341SAndroid Build Coastguard Worker  *
11*c0909341SAndroid Build Coastguard Worker  * 2. Redistributions in binary form must reproduce the above copyright notice,
12*c0909341SAndroid Build Coastguard Worker  *    this list of conditions and the following disclaimer in the documentation
13*c0909341SAndroid Build Coastguard Worker  *    and/or other materials provided with the distribution.
14*c0909341SAndroid Build Coastguard Worker  *
15*c0909341SAndroid Build Coastguard Worker  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16*c0909341SAndroid Build Coastguard Worker  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17*c0909341SAndroid Build Coastguard Worker  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18*c0909341SAndroid Build Coastguard Worker  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19*c0909341SAndroid Build Coastguard Worker  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20*c0909341SAndroid Build Coastguard Worker  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21*c0909341SAndroid Build Coastguard Worker  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22*c0909341SAndroid Build Coastguard Worker  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23*c0909341SAndroid Build Coastguard Worker  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24*c0909341SAndroid Build Coastguard Worker  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*c0909341SAndroid Build Coastguard Worker  */
26*c0909341SAndroid Build Coastguard Worker 
27*c0909341SAndroid Build Coastguard Worker #include "src/cpu.h"
28*c0909341SAndroid Build Coastguard Worker #include "src/ipred.h"
29*c0909341SAndroid Build Coastguard Worker 
30*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_dc, neon));
31*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_dc_128, neon));
32*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_dc_top, neon));
33*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_dc_left, neon));
34*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_h, neon));
35*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_v, neon));
36*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_paeth, neon));
37*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_smooth, neon));
38*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_smooth_v, neon));
39*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_smooth_h, neon));
40*c0909341SAndroid Build Coastguard Worker decl_angular_ipred_fn(BF(dav1d_ipred_filter, neon));
41*c0909341SAndroid Build Coastguard Worker 
42*c0909341SAndroid Build Coastguard Worker decl_cfl_pred_fn(BF(dav1d_ipred_cfl, neon));
43*c0909341SAndroid Build Coastguard Worker decl_cfl_pred_fn(BF(dav1d_ipred_cfl_128, neon));
44*c0909341SAndroid Build Coastguard Worker decl_cfl_pred_fn(BF(dav1d_ipred_cfl_top, neon));
45*c0909341SAndroid Build Coastguard Worker decl_cfl_pred_fn(BF(dav1d_ipred_cfl_left, neon));
46*c0909341SAndroid Build Coastguard Worker 
47*c0909341SAndroid Build Coastguard Worker decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_420, neon));
48*c0909341SAndroid Build Coastguard Worker decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_422, neon));
49*c0909341SAndroid Build Coastguard Worker decl_cfl_ac_fn(BF(dav1d_ipred_cfl_ac_444, neon));
50*c0909341SAndroid Build Coastguard Worker 
51*c0909341SAndroid Build Coastguard Worker decl_pal_pred_fn(BF(dav1d_pal_pred, neon));
52*c0909341SAndroid Build Coastguard Worker 
53*c0909341SAndroid Build Coastguard Worker #if ARCH_AARCH64
54*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z1_upsample_edge, neon)(pixel *out, const int hsz,
55*c0909341SAndroid Build Coastguard Worker                                             const pixel *const in,
56*c0909341SAndroid Build Coastguard Worker                                             const int end HIGHBD_DECL_SUFFIX);
57*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z1_filter_edge, neon)(pixel *out, const int sz,
58*c0909341SAndroid Build Coastguard Worker                                           const pixel *const in,
59*c0909341SAndroid Build Coastguard Worker                                           const int end, const int strength);
60*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_pixel_set, neon)(pixel *out, const pixel px,
61*c0909341SAndroid Build Coastguard Worker                                      const int n);
62*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z1_fill1, neon)(pixel *dst, ptrdiff_t stride,
63*c0909341SAndroid Build Coastguard Worker                                     const pixel *const top, const int width,
64*c0909341SAndroid Build Coastguard Worker                                     const int height, const int dx,
65*c0909341SAndroid Build Coastguard Worker                                     const int max_base_x);
66*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z1_fill2, neon)(pixel *dst, ptrdiff_t stride,
67*c0909341SAndroid Build Coastguard Worker                                     const pixel *const top, const int width,
68*c0909341SAndroid Build Coastguard Worker                                     const int height, const int dx,
69*c0909341SAndroid Build Coastguard Worker                                     const int max_base_x);
70*c0909341SAndroid Build Coastguard Worker 
ipred_z1_neon(pixel * dst,const ptrdiff_t stride,const pixel * const topleft_in,const int width,const int height,int angle,const int max_width,const int max_height HIGHBD_DECL_SUFFIX)71*c0909341SAndroid Build Coastguard Worker static void ipred_z1_neon(pixel *dst, const ptrdiff_t stride,
72*c0909341SAndroid Build Coastguard Worker                           const pixel *const topleft_in,
73*c0909341SAndroid Build Coastguard Worker                           const int width, const int height, int angle,
74*c0909341SAndroid Build Coastguard Worker                           const int max_width, const int max_height
75*c0909341SAndroid Build Coastguard Worker                           HIGHBD_DECL_SUFFIX)
76*c0909341SAndroid Build Coastguard Worker {
77*c0909341SAndroid Build Coastguard Worker     const int is_sm = (angle >> 9) & 0x1;
78*c0909341SAndroid Build Coastguard Worker     const int enable_intra_edge_filter = angle >> 10;
79*c0909341SAndroid Build Coastguard Worker     angle &= 511;
80*c0909341SAndroid Build Coastguard Worker     int dx = dav1d_dr_intra_derivative[angle >> 1];
81*c0909341SAndroid Build Coastguard Worker     pixel top_out[64 + 64 + (64+15)*2 + 16];
82*c0909341SAndroid Build Coastguard Worker     int max_base_x;
83*c0909341SAndroid Build Coastguard Worker     const int upsample_above = enable_intra_edge_filter ?
84*c0909341SAndroid Build Coastguard Worker         get_upsample(width + height, 90 - angle, is_sm) : 0;
85*c0909341SAndroid Build Coastguard Worker     if (upsample_above) {
86*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z1_upsample_edge, neon)(top_out, width + height,
87*c0909341SAndroid Build Coastguard Worker                                                topleft_in,
88*c0909341SAndroid Build Coastguard Worker                                                width + imin(width, height)
89*c0909341SAndroid Build Coastguard Worker                                                HIGHBD_TAIL_SUFFIX);
90*c0909341SAndroid Build Coastguard Worker         max_base_x = 2 * (width + height) - 2;
91*c0909341SAndroid Build Coastguard Worker         dx <<= 1;
92*c0909341SAndroid Build Coastguard Worker     } else {
93*c0909341SAndroid Build Coastguard Worker         const int filter_strength = enable_intra_edge_filter ?
94*c0909341SAndroid Build Coastguard Worker             get_filter_strength(width + height, 90 - angle, is_sm) : 0;
95*c0909341SAndroid Build Coastguard Worker         if (filter_strength) {
96*c0909341SAndroid Build Coastguard Worker             BF(dav1d_ipred_z1_filter_edge, neon)(top_out, width + height,
97*c0909341SAndroid Build Coastguard Worker                                                  topleft_in,
98*c0909341SAndroid Build Coastguard Worker                                                  width + imin(width, height),
99*c0909341SAndroid Build Coastguard Worker                                                  filter_strength);
100*c0909341SAndroid Build Coastguard Worker             max_base_x = width + height - 1;
101*c0909341SAndroid Build Coastguard Worker         } else {
102*c0909341SAndroid Build Coastguard Worker             max_base_x = width + imin(width, height) - 1;
103*c0909341SAndroid Build Coastguard Worker             memcpy(top_out, &topleft_in[1], (max_base_x + 1) * sizeof(pixel));
104*c0909341SAndroid Build Coastguard Worker         }
105*c0909341SAndroid Build Coastguard Worker     }
106*c0909341SAndroid Build Coastguard Worker     const int base_inc = 1 + upsample_above;
107*c0909341SAndroid Build Coastguard Worker     int pad_pixels = width + 15; // max(dx >> 6) == 15
108*c0909341SAndroid Build Coastguard Worker     BF(dav1d_ipred_pixel_set, neon)(&top_out[max_base_x + 1],
109*c0909341SAndroid Build Coastguard Worker                                     top_out[max_base_x], pad_pixels * base_inc);
110*c0909341SAndroid Build Coastguard Worker     if (upsample_above)
111*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z1_fill2, neon)(dst, stride, top_out, width, height,
112*c0909341SAndroid Build Coastguard Worker                                        dx, max_base_x);
113*c0909341SAndroid Build Coastguard Worker     else
114*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z1_fill1, neon)(dst, stride, top_out, width, height,
115*c0909341SAndroid Build Coastguard Worker                                        dx, max_base_x);
116*c0909341SAndroid Build Coastguard Worker }
117*c0909341SAndroid Build Coastguard Worker 
118*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_reverse, neon)(pixel *dst, const pixel *const src,
119*c0909341SAndroid Build Coastguard Worker                                    const int n);
120*c0909341SAndroid Build Coastguard Worker 
121*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z2_upsample_edge, neon)(pixel *out, const int sz,
122*c0909341SAndroid Build Coastguard Worker                                             const pixel *const in
123*c0909341SAndroid Build Coastguard Worker                                             HIGHBD_DECL_SUFFIX);
124*c0909341SAndroid Build Coastguard Worker 
125*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z2_fill1, neon)(pixel *dst, ptrdiff_t stride,
126*c0909341SAndroid Build Coastguard Worker                                     const pixel *const top,
127*c0909341SAndroid Build Coastguard Worker                                     const pixel *const left,
128*c0909341SAndroid Build Coastguard Worker                                     const int width, const int height,
129*c0909341SAndroid Build Coastguard Worker                                     const int dx, const int dy);
130*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z2_fill2, neon)(pixel *dst, ptrdiff_t stride,
131*c0909341SAndroid Build Coastguard Worker                                     const pixel *const top,
132*c0909341SAndroid Build Coastguard Worker                                     const pixel *const left,
133*c0909341SAndroid Build Coastguard Worker                                     const int width, const int height,
134*c0909341SAndroid Build Coastguard Worker                                     const int dx, const int dy);
135*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z2_fill3, neon)(pixel *dst, ptrdiff_t stride,
136*c0909341SAndroid Build Coastguard Worker                                     const pixel *const top,
137*c0909341SAndroid Build Coastguard Worker                                     const pixel *const left,
138*c0909341SAndroid Build Coastguard Worker                                     const int width, const int height,
139*c0909341SAndroid Build Coastguard Worker                                     const int dx, const int dy);
140*c0909341SAndroid Build Coastguard Worker 
ipred_z2_neon(pixel * dst,const ptrdiff_t stride,const pixel * const topleft_in,const int width,const int height,int angle,const int max_width,const int max_height HIGHBD_DECL_SUFFIX)141*c0909341SAndroid Build Coastguard Worker static void ipred_z2_neon(pixel *dst, const ptrdiff_t stride,
142*c0909341SAndroid Build Coastguard Worker                           const pixel *const topleft_in,
143*c0909341SAndroid Build Coastguard Worker                           const int width, const int height, int angle,
144*c0909341SAndroid Build Coastguard Worker                           const int max_width, const int max_height
145*c0909341SAndroid Build Coastguard Worker                           HIGHBD_DECL_SUFFIX)
146*c0909341SAndroid Build Coastguard Worker {
147*c0909341SAndroid Build Coastguard Worker     const int is_sm = (angle >> 9) & 0x1;
148*c0909341SAndroid Build Coastguard Worker     const int enable_intra_edge_filter = angle >> 10;
149*c0909341SAndroid Build Coastguard Worker     angle &= 511;
150*c0909341SAndroid Build Coastguard Worker     assert(angle > 90 && angle < 180);
151*c0909341SAndroid Build Coastguard Worker     int dy = dav1d_dr_intra_derivative[(angle - 90) >> 1];
152*c0909341SAndroid Build Coastguard Worker     int dx = dav1d_dr_intra_derivative[(180 - angle) >> 1];
153*c0909341SAndroid Build Coastguard Worker     const int upsample_left = enable_intra_edge_filter ?
154*c0909341SAndroid Build Coastguard Worker         get_upsample(width + height, 180 - angle, is_sm) : 0;
155*c0909341SAndroid Build Coastguard Worker     const int upsample_above = enable_intra_edge_filter ?
156*c0909341SAndroid Build Coastguard Worker         get_upsample(width + height, angle - 90, is_sm) : 0;
157*c0909341SAndroid Build Coastguard Worker     pixel buf[3*(64+1)];
158*c0909341SAndroid Build Coastguard Worker     pixel *left = &buf[2*(64+1)];
159*c0909341SAndroid Build Coastguard Worker     // The asm can underread below the start of top[] and left[]; to avoid
160*c0909341SAndroid Build Coastguard Worker     // surprising behaviour, make sure this is within the allocated stack space.
161*c0909341SAndroid Build Coastguard Worker     pixel *top = &buf[1*(64+1)];
162*c0909341SAndroid Build Coastguard Worker     pixel *flipped = &buf[0*(64+1)];
163*c0909341SAndroid Build Coastguard Worker 
164*c0909341SAndroid Build Coastguard Worker     if (upsample_above) {
165*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z2_upsample_edge, neon)(top, width, topleft_in
166*c0909341SAndroid Build Coastguard Worker                                                HIGHBD_TAIL_SUFFIX);
167*c0909341SAndroid Build Coastguard Worker         dx <<= 1;
168*c0909341SAndroid Build Coastguard Worker     } else {
169*c0909341SAndroid Build Coastguard Worker         const int filter_strength = enable_intra_edge_filter ?
170*c0909341SAndroid Build Coastguard Worker             get_filter_strength(width + height, angle - 90, is_sm) : 0;
171*c0909341SAndroid Build Coastguard Worker 
172*c0909341SAndroid Build Coastguard Worker         if (filter_strength) {
173*c0909341SAndroid Build Coastguard Worker             BF(dav1d_ipred_z1_filter_edge, neon)(&top[1], imin(max_width, width),
174*c0909341SAndroid Build Coastguard Worker                                                  topleft_in, width,
175*c0909341SAndroid Build Coastguard Worker                                                  filter_strength);
176*c0909341SAndroid Build Coastguard Worker             if (max_width < width)
177*c0909341SAndroid Build Coastguard Worker                 memcpy(&top[1 + max_width], &topleft_in[1 + max_width],
178*c0909341SAndroid Build Coastguard Worker                        (width - max_width) * sizeof(pixel));
179*c0909341SAndroid Build Coastguard Worker         } else {
180*c0909341SAndroid Build Coastguard Worker             pixel_copy(&top[1], &topleft_in[1], width);
181*c0909341SAndroid Build Coastguard Worker         }
182*c0909341SAndroid Build Coastguard Worker     }
183*c0909341SAndroid Build Coastguard Worker     if (upsample_left) {
184*c0909341SAndroid Build Coastguard Worker         flipped[0] = topleft_in[0];
185*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_reverse, neon)(&flipped[1], &topleft_in[0],
186*c0909341SAndroid Build Coastguard Worker                                       height);
187*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z2_upsample_edge, neon)(left, height, flipped
188*c0909341SAndroid Build Coastguard Worker                                                HIGHBD_TAIL_SUFFIX);
189*c0909341SAndroid Build Coastguard Worker         dy <<= 1;
190*c0909341SAndroid Build Coastguard Worker     } else {
191*c0909341SAndroid Build Coastguard Worker         const int filter_strength = enable_intra_edge_filter ?
192*c0909341SAndroid Build Coastguard Worker             get_filter_strength(width + height, 180 - angle, is_sm) : 0;
193*c0909341SAndroid Build Coastguard Worker 
194*c0909341SAndroid Build Coastguard Worker         if (filter_strength) {
195*c0909341SAndroid Build Coastguard Worker             flipped[0] = topleft_in[0];
196*c0909341SAndroid Build Coastguard Worker             BF(dav1d_ipred_reverse, neon)(&flipped[1], &topleft_in[0],
197*c0909341SAndroid Build Coastguard Worker                                           height);
198*c0909341SAndroid Build Coastguard Worker             BF(dav1d_ipred_z1_filter_edge, neon)(&left[1], imin(max_height, height),
199*c0909341SAndroid Build Coastguard Worker                                                  flipped, height,
200*c0909341SAndroid Build Coastguard Worker                                                  filter_strength);
201*c0909341SAndroid Build Coastguard Worker             if (max_height < height)
202*c0909341SAndroid Build Coastguard Worker                 memcpy(&left[1 + max_height], &flipped[1 + max_height],
203*c0909341SAndroid Build Coastguard Worker                        (height - max_height) * sizeof(pixel));
204*c0909341SAndroid Build Coastguard Worker         } else {
205*c0909341SAndroid Build Coastguard Worker             BF(dav1d_ipred_reverse, neon)(&left[1], &topleft_in[0],
206*c0909341SAndroid Build Coastguard Worker                                           height);
207*c0909341SAndroid Build Coastguard Worker         }
208*c0909341SAndroid Build Coastguard Worker     }
209*c0909341SAndroid Build Coastguard Worker     top[0] = left[0] = *topleft_in;
210*c0909341SAndroid Build Coastguard Worker 
211*c0909341SAndroid Build Coastguard Worker     assert(!(upsample_above && upsample_left));
212*c0909341SAndroid Build Coastguard Worker     if (!upsample_above && !upsample_left) {
213*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z2_fill1, neon)(dst, stride, top, left, width, height,
214*c0909341SAndroid Build Coastguard Worker                                        dx, dy);
215*c0909341SAndroid Build Coastguard Worker     } else if (upsample_above) {
216*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z2_fill2, neon)(dst, stride, top, left, width, height,
217*c0909341SAndroid Build Coastguard Worker                                        dx, dy);
218*c0909341SAndroid Build Coastguard Worker     } else /*if (upsample_left)*/ {
219*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z2_fill3, neon)(dst, stride, top, left, width, height,
220*c0909341SAndroid Build Coastguard Worker                                        dx, dy);
221*c0909341SAndroid Build Coastguard Worker     }
222*c0909341SAndroid Build Coastguard Worker }
223*c0909341SAndroid Build Coastguard Worker 
224*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z3_fill1, neon)(pixel *dst, ptrdiff_t stride,
225*c0909341SAndroid Build Coastguard Worker                                     const pixel *const left, const int width,
226*c0909341SAndroid Build Coastguard Worker                                     const int height, const int dy,
227*c0909341SAndroid Build Coastguard Worker                                     const int max_base_y);
228*c0909341SAndroid Build Coastguard Worker void BF(dav1d_ipred_z3_fill2, neon)(pixel *dst, ptrdiff_t stride,
229*c0909341SAndroid Build Coastguard Worker                                     const pixel *const left, const int width,
230*c0909341SAndroid Build Coastguard Worker                                     const int height, const int dy,
231*c0909341SAndroid Build Coastguard Worker                                     const int max_base_y);
232*c0909341SAndroid Build Coastguard Worker 
ipred_z3_neon(pixel * dst,const ptrdiff_t stride,const pixel * const topleft_in,const int width,const int height,int angle,const int max_width,const int max_height HIGHBD_DECL_SUFFIX)233*c0909341SAndroid Build Coastguard Worker static void ipred_z3_neon(pixel *dst, const ptrdiff_t stride,
234*c0909341SAndroid Build Coastguard Worker                           const pixel *const topleft_in,
235*c0909341SAndroid Build Coastguard Worker                           const int width, const int height, int angle,
236*c0909341SAndroid Build Coastguard Worker                           const int max_width, const int max_height
237*c0909341SAndroid Build Coastguard Worker                           HIGHBD_DECL_SUFFIX)
238*c0909341SAndroid Build Coastguard Worker {
239*c0909341SAndroid Build Coastguard Worker     const int is_sm = (angle >> 9) & 0x1;
240*c0909341SAndroid Build Coastguard Worker     const int enable_intra_edge_filter = angle >> 10;
241*c0909341SAndroid Build Coastguard Worker     angle &= 511;
242*c0909341SAndroid Build Coastguard Worker     assert(angle > 180);
243*c0909341SAndroid Build Coastguard Worker     int dy = dav1d_dr_intra_derivative[(270 - angle) >> 1];
244*c0909341SAndroid Build Coastguard Worker     pixel flipped[64 + 64 + 16];
245*c0909341SAndroid Build Coastguard Worker     pixel left_out[64 + 64 + (64+15)*2];
246*c0909341SAndroid Build Coastguard Worker     int max_base_y;
247*c0909341SAndroid Build Coastguard Worker     const int upsample_left = enable_intra_edge_filter ?
248*c0909341SAndroid Build Coastguard Worker         get_upsample(width + height, angle - 180, is_sm) : 0;
249*c0909341SAndroid Build Coastguard Worker     if (upsample_left) {
250*c0909341SAndroid Build Coastguard Worker         flipped[0] = topleft_in[0];
251*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_reverse, neon)(&flipped[1], &topleft_in[0],
252*c0909341SAndroid Build Coastguard Worker                                       height + imax(width, height));
253*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z1_upsample_edge, neon)(left_out, width + height,
254*c0909341SAndroid Build Coastguard Worker                                                flipped,
255*c0909341SAndroid Build Coastguard Worker                                                height + imin(width, height)
256*c0909341SAndroid Build Coastguard Worker                                                HIGHBD_TAIL_SUFFIX);
257*c0909341SAndroid Build Coastguard Worker         max_base_y = 2 * (width + height) - 2;
258*c0909341SAndroid Build Coastguard Worker         dy <<= 1;
259*c0909341SAndroid Build Coastguard Worker     } else {
260*c0909341SAndroid Build Coastguard Worker         const int filter_strength = enable_intra_edge_filter ?
261*c0909341SAndroid Build Coastguard Worker             get_filter_strength(width + height, angle - 180, is_sm) : 0;
262*c0909341SAndroid Build Coastguard Worker 
263*c0909341SAndroid Build Coastguard Worker         if (filter_strength) {
264*c0909341SAndroid Build Coastguard Worker             flipped[0] = topleft_in[0];
265*c0909341SAndroid Build Coastguard Worker             BF(dav1d_ipred_reverse, neon)(&flipped[1], &topleft_in[0],
266*c0909341SAndroid Build Coastguard Worker                                           height + imax(width, height));
267*c0909341SAndroid Build Coastguard Worker             BF(dav1d_ipred_z1_filter_edge, neon)(left_out, width + height,
268*c0909341SAndroid Build Coastguard Worker                                                  flipped,
269*c0909341SAndroid Build Coastguard Worker                                                  height + imin(width, height),
270*c0909341SAndroid Build Coastguard Worker                                                  filter_strength);
271*c0909341SAndroid Build Coastguard Worker             max_base_y = width + height - 1;
272*c0909341SAndroid Build Coastguard Worker         } else {
273*c0909341SAndroid Build Coastguard Worker             BF(dav1d_ipred_reverse, neon)(left_out, &topleft_in[0],
274*c0909341SAndroid Build Coastguard Worker                                           height + imin(width, height));
275*c0909341SAndroid Build Coastguard Worker             max_base_y = height + imin(width, height) - 1;
276*c0909341SAndroid Build Coastguard Worker         }
277*c0909341SAndroid Build Coastguard Worker     }
278*c0909341SAndroid Build Coastguard Worker     const int base_inc = 1 + upsample_left;
279*c0909341SAndroid Build Coastguard Worker     // The tbx based implementation needs left[] to have 64 bytes intitialized,
280*c0909341SAndroid Build Coastguard Worker     // the other implementation can read height + max(dy >> 6) past the end.
281*c0909341SAndroid Build Coastguard Worker     int pad_pixels = imax(64 - max_base_y - 1, height + 15);
282*c0909341SAndroid Build Coastguard Worker 
283*c0909341SAndroid Build Coastguard Worker     BF(dav1d_ipred_pixel_set, neon)(&left_out[max_base_y + 1],
284*c0909341SAndroid Build Coastguard Worker                                     left_out[max_base_y], pad_pixels * base_inc);
285*c0909341SAndroid Build Coastguard Worker     if (upsample_left)
286*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z3_fill2, neon)(dst, stride, left_out, width, height,
287*c0909341SAndroid Build Coastguard Worker                                        dy, max_base_y);
288*c0909341SAndroid Build Coastguard Worker     else
289*c0909341SAndroid Build Coastguard Worker         BF(dav1d_ipred_z3_fill1, neon)(dst, stride, left_out, width, height,
290*c0909341SAndroid Build Coastguard Worker                                        dy, max_base_y);
291*c0909341SAndroid Build Coastguard Worker }
292*c0909341SAndroid Build Coastguard Worker #endif
293*c0909341SAndroid Build Coastguard Worker 
intra_pred_dsp_init_arm(Dav1dIntraPredDSPContext * const c)294*c0909341SAndroid Build Coastguard Worker static ALWAYS_INLINE void intra_pred_dsp_init_arm(Dav1dIntraPredDSPContext *const c) {
295*c0909341SAndroid Build Coastguard Worker     const unsigned flags = dav1d_get_cpu_flags();
296*c0909341SAndroid Build Coastguard Worker 
297*c0909341SAndroid Build Coastguard Worker     if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
298*c0909341SAndroid Build Coastguard Worker 
299*c0909341SAndroid Build Coastguard Worker     c->intra_pred[DC_PRED]       = BF(dav1d_ipred_dc, neon);
300*c0909341SAndroid Build Coastguard Worker     c->intra_pred[DC_128_PRED]   = BF(dav1d_ipred_dc_128, neon);
301*c0909341SAndroid Build Coastguard Worker     c->intra_pred[TOP_DC_PRED]   = BF(dav1d_ipred_dc_top, neon);
302*c0909341SAndroid Build Coastguard Worker     c->intra_pred[LEFT_DC_PRED]  = BF(dav1d_ipred_dc_left, neon);
303*c0909341SAndroid Build Coastguard Worker     c->intra_pred[HOR_PRED]      = BF(dav1d_ipred_h, neon);
304*c0909341SAndroid Build Coastguard Worker     c->intra_pred[VERT_PRED]     = BF(dav1d_ipred_v, neon);
305*c0909341SAndroid Build Coastguard Worker     c->intra_pred[PAETH_PRED]    = BF(dav1d_ipred_paeth, neon);
306*c0909341SAndroid Build Coastguard Worker     c->intra_pred[SMOOTH_PRED]   = BF(dav1d_ipred_smooth, neon);
307*c0909341SAndroid Build Coastguard Worker     c->intra_pred[SMOOTH_V_PRED] = BF(dav1d_ipred_smooth_v, neon);
308*c0909341SAndroid Build Coastguard Worker     c->intra_pred[SMOOTH_H_PRED] = BF(dav1d_ipred_smooth_h, neon);
309*c0909341SAndroid Build Coastguard Worker #if ARCH_AARCH64
310*c0909341SAndroid Build Coastguard Worker     c->intra_pred[Z1_PRED]       = ipred_z1_neon;
311*c0909341SAndroid Build Coastguard Worker     c->intra_pred[Z2_PRED]       = ipred_z2_neon;
312*c0909341SAndroid Build Coastguard Worker     c->intra_pred[Z3_PRED]       = ipred_z3_neon;
313*c0909341SAndroid Build Coastguard Worker #endif
314*c0909341SAndroid Build Coastguard Worker     c->intra_pred[FILTER_PRED]   = BF(dav1d_ipred_filter, neon);
315*c0909341SAndroid Build Coastguard Worker 
316*c0909341SAndroid Build Coastguard Worker     c->cfl_pred[DC_PRED]         = BF(dav1d_ipred_cfl, neon);
317*c0909341SAndroid Build Coastguard Worker     c->cfl_pred[DC_128_PRED]     = BF(dav1d_ipred_cfl_128, neon);
318*c0909341SAndroid Build Coastguard Worker     c->cfl_pred[TOP_DC_PRED]     = BF(dav1d_ipred_cfl_top, neon);
319*c0909341SAndroid Build Coastguard Worker     c->cfl_pred[LEFT_DC_PRED]    = BF(dav1d_ipred_cfl_left, neon);
320*c0909341SAndroid Build Coastguard Worker 
321*c0909341SAndroid Build Coastguard Worker     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = BF(dav1d_ipred_cfl_ac_420, neon);
322*c0909341SAndroid Build Coastguard Worker     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = BF(dav1d_ipred_cfl_ac_422, neon);
323*c0909341SAndroid Build Coastguard Worker     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = BF(dav1d_ipred_cfl_ac_444, neon);
324*c0909341SAndroid Build Coastguard Worker 
325*c0909341SAndroid Build Coastguard Worker     c->pal_pred                  = BF(dav1d_pal_pred, neon);
326*c0909341SAndroid Build Coastguard Worker }
327