xref: /aosp_15_r20/external/libdav1d/src/ppc/cdef_tmpl.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker  * Copyright © 2019, Luca Barbato
3*c0909341SAndroid Build Coastguard Worker  * All rights reserved.
4*c0909341SAndroid Build Coastguard Worker  *
5*c0909341SAndroid Build Coastguard Worker  * Redistribution and use in source and binary forms, with or without
6*c0909341SAndroid Build Coastguard Worker  * modification, are permitted provided that the following conditions are met:
7*c0909341SAndroid Build Coastguard Worker  *
8*c0909341SAndroid Build Coastguard Worker  * 1. Redistributions of source code must retain the above copyright notice, this
9*c0909341SAndroid Build Coastguard Worker  *    list of conditions and the following disclaimer.
10*c0909341SAndroid Build Coastguard Worker  *
11*c0909341SAndroid Build Coastguard Worker  * 2. Redistributions in binary form must reproduce the above copyright notice,
12*c0909341SAndroid Build Coastguard Worker  *    this list of conditions and the following disclaimer in the documentation
13*c0909341SAndroid Build Coastguard Worker  *    and/or other materials provided with the distribution.
14*c0909341SAndroid Build Coastguard Worker  *
15*c0909341SAndroid Build Coastguard Worker  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16*c0909341SAndroid Build Coastguard Worker  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17*c0909341SAndroid Build Coastguard Worker  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18*c0909341SAndroid Build Coastguard Worker  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19*c0909341SAndroid Build Coastguard Worker  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20*c0909341SAndroid Build Coastguard Worker  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21*c0909341SAndroid Build Coastguard Worker  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22*c0909341SAndroid Build Coastguard Worker  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23*c0909341SAndroid Build Coastguard Worker  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24*c0909341SAndroid Build Coastguard Worker  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*c0909341SAndroid Build Coastguard Worker  */
26*c0909341SAndroid Build Coastguard Worker 
27*c0909341SAndroid Build Coastguard Worker #include "src/ppc/dav1d_types.h"
28*c0909341SAndroid Build Coastguard Worker #include "src/ppc/cdef.h"
29*c0909341SAndroid Build Coastguard Worker 
30*c0909341SAndroid Build Coastguard Worker #if BITDEPTH == 8
vconstrain(const i16x8 diff,const int16_t threshold,const uint16_t shift)31*c0909341SAndroid Build Coastguard Worker static inline i16x8 vconstrain(const i16x8 diff, const int16_t threshold,
32*c0909341SAndroid Build Coastguard Worker                                const uint16_t shift)
33*c0909341SAndroid Build Coastguard Worker {
34*c0909341SAndroid Build Coastguard Worker     const i16x8 zero = vec_splat_s16(0);
35*c0909341SAndroid Build Coastguard Worker     if (!threshold) return zero;
36*c0909341SAndroid Build Coastguard Worker     const i16x8 abs_diff = vec_abs(diff);
37*c0909341SAndroid Build Coastguard Worker     const b16x8 mask = vec_cmplt(diff, zero);
38*c0909341SAndroid Build Coastguard Worker     const i16x8 thr = vec_splats(threshold);
39*c0909341SAndroid Build Coastguard Worker     const i16x8 sub = vec_sub(thr, vec_sra(abs_diff, vec_splats(shift)));
40*c0909341SAndroid Build Coastguard Worker     const i16x8 max = vec_max(zero, sub);
41*c0909341SAndroid Build Coastguard Worker     const i16x8 min = vec_min(abs_diff, max);
42*c0909341SAndroid Build Coastguard Worker     const i16x8 neg = vec_sub(zero, min);
43*c0909341SAndroid Build Coastguard Worker     return vec_sel(min, neg, mask);
44*c0909341SAndroid Build Coastguard Worker }
45*c0909341SAndroid Build Coastguard Worker 
copy4xN(uint16_t * tmp,const uint8_t * src,const ptrdiff_t src_stride,const uint8_t (* left)[2],const uint8_t * const top,const uint8_t * const bottom,const int w,const int h,const enum CdefEdgeFlags edges)46*c0909341SAndroid Build Coastguard Worker static inline void copy4xN(uint16_t *tmp,
47*c0909341SAndroid Build Coastguard Worker                            const uint8_t *src, const ptrdiff_t src_stride,
48*c0909341SAndroid Build Coastguard Worker                            const uint8_t (*left)[2], const uint8_t *const top,
49*c0909341SAndroid Build Coastguard Worker                            const uint8_t *const bottom, const int w, const int h,
50*c0909341SAndroid Build Coastguard Worker                            const enum CdefEdgeFlags edges)
51*c0909341SAndroid Build Coastguard Worker {
52*c0909341SAndroid Build Coastguard Worker     const u16x8 fill = vec_splats((uint16_t)INT16_MAX);
53*c0909341SAndroid Build Coastguard Worker 
54*c0909341SAndroid Build Coastguard Worker     u16x8 l0;
55*c0909341SAndroid Build Coastguard Worker     u16x8 l1;
56*c0909341SAndroid Build Coastguard Worker 
57*c0909341SAndroid Build Coastguard Worker     int y_start = -2, y_end = h + 2;
58*c0909341SAndroid Build Coastguard Worker 
59*c0909341SAndroid Build Coastguard Worker     // Copy top and bottom first
60*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_TOP)) {
61*c0909341SAndroid Build Coastguard Worker         l0 = fill;
62*c0909341SAndroid Build Coastguard Worker         l1 = fill;
63*c0909341SAndroid Build Coastguard Worker         y_start = 0;
64*c0909341SAndroid Build Coastguard Worker     } else {
65*c0909341SAndroid Build Coastguard Worker         l0 = u8h_to_u16(vec_vsx_ld(0, top + 0 * src_stride - 2));
66*c0909341SAndroid Build Coastguard Worker         l1 = u8h_to_u16(vec_vsx_ld(0, top + 1 * src_stride - 2));
67*c0909341SAndroid Build Coastguard Worker     }
68*c0909341SAndroid Build Coastguard Worker 
69*c0909341SAndroid Build Coastguard Worker     vec_st(l0, 0, tmp - 2 * 8);
70*c0909341SAndroid Build Coastguard Worker     vec_st(l1, 0, tmp - 1 * 8);
71*c0909341SAndroid Build Coastguard Worker 
72*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_BOTTOM)) {
73*c0909341SAndroid Build Coastguard Worker         l0 = fill;
74*c0909341SAndroid Build Coastguard Worker         l1 = fill;
75*c0909341SAndroid Build Coastguard Worker         y_end -= 2;
76*c0909341SAndroid Build Coastguard Worker     } else {
77*c0909341SAndroid Build Coastguard Worker         l0 = u8h_to_u16(vec_vsx_ld(0, bottom + 0 * src_stride - 2));
78*c0909341SAndroid Build Coastguard Worker         l1 = u8h_to_u16(vec_vsx_ld(0, bottom + 1 * src_stride - 2));
79*c0909341SAndroid Build Coastguard Worker     }
80*c0909341SAndroid Build Coastguard Worker 
81*c0909341SAndroid Build Coastguard Worker     vec_st(l0, 0, tmp + (h + 0) * 8);
82*c0909341SAndroid Build Coastguard Worker     vec_st(l1, 0, tmp + (h + 1) * 8);
83*c0909341SAndroid Build Coastguard Worker 
84*c0909341SAndroid Build Coastguard Worker     int y_with_left_edge = 0;
85*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_LEFT)) {
86*c0909341SAndroid Build Coastguard Worker         u16x8 l = u8h_to_u16(vec_vsx_ld(0, src));
87*c0909341SAndroid Build Coastguard Worker         vec_vsx_st(l, 0, tmp + 2);
88*c0909341SAndroid Build Coastguard Worker 
89*c0909341SAndroid Build Coastguard Worker         y_with_left_edge = 1;
90*c0909341SAndroid Build Coastguard Worker     }
91*c0909341SAndroid Build Coastguard Worker 
92*c0909341SAndroid Build Coastguard Worker     for (int y = y_with_left_edge; y < h; y++) {
93*c0909341SAndroid Build Coastguard Worker         u16x8 l = u8h_to_u16(vec_vsx_ld(0, src - 2 + y * src_stride));
94*c0909341SAndroid Build Coastguard Worker         vec_st(l, 0, tmp + y * 8);
95*c0909341SAndroid Build Coastguard Worker     }
96*c0909341SAndroid Build Coastguard Worker 
97*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_LEFT)) {
98*c0909341SAndroid Build Coastguard Worker         for (int y = y_start; y < y_end; y++) {
99*c0909341SAndroid Build Coastguard Worker             tmp[y * 8] = INT16_MAX;
100*c0909341SAndroid Build Coastguard Worker             tmp[1 + y * 8] = INT16_MAX;
101*c0909341SAndroid Build Coastguard Worker         }
102*c0909341SAndroid Build Coastguard Worker     } else {
103*c0909341SAndroid Build Coastguard Worker         for (int y = 0; y < h; y++) {
104*c0909341SAndroid Build Coastguard Worker             tmp[y * 8] = left[y][0];
105*c0909341SAndroid Build Coastguard Worker             tmp[1 + y * 8] = left[y][1];
106*c0909341SAndroid Build Coastguard Worker         }
107*c0909341SAndroid Build Coastguard Worker     }
108*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_RIGHT)) {
109*c0909341SAndroid Build Coastguard Worker         for (int y = y_start; y < y_end; y++) {
110*c0909341SAndroid Build Coastguard Worker             tmp[- 2 + (y + 1) * 8] = INT16_MAX;
111*c0909341SAndroid Build Coastguard Worker             tmp[- 1 + (y + 1) * 8] = INT16_MAX;
112*c0909341SAndroid Build Coastguard Worker         }
113*c0909341SAndroid Build Coastguard Worker     }
114*c0909341SAndroid Build Coastguard Worker }
115*c0909341SAndroid Build Coastguard Worker 
copy8xN(uint16_t * tmp,const uint8_t * src,const ptrdiff_t src_stride,const uint8_t (* left)[2],const uint8_t * const top,const uint8_t * const bottom,const int w,const int h,const enum CdefEdgeFlags edges)116*c0909341SAndroid Build Coastguard Worker static inline void copy8xN(uint16_t *tmp,
117*c0909341SAndroid Build Coastguard Worker                            const uint8_t *src, const ptrdiff_t src_stride,
118*c0909341SAndroid Build Coastguard Worker                            const uint8_t (*left)[2], const uint8_t *const top,
119*c0909341SAndroid Build Coastguard Worker                            const uint8_t *const bottom, const int w, const int h,
120*c0909341SAndroid Build Coastguard Worker                            const enum CdefEdgeFlags edges)
121*c0909341SAndroid Build Coastguard Worker {
122*c0909341SAndroid Build Coastguard Worker     const u16x8 fill = vec_splats((uint16_t)INT16_MAX);
123*c0909341SAndroid Build Coastguard Worker 
124*c0909341SAndroid Build Coastguard Worker     u16x8 l0h, l0l;
125*c0909341SAndroid Build Coastguard Worker     u16x8 l1h, l1l;
126*c0909341SAndroid Build Coastguard Worker 
127*c0909341SAndroid Build Coastguard Worker     int y_start = -2, y_end = h + 2;
128*c0909341SAndroid Build Coastguard Worker 
129*c0909341SAndroid Build Coastguard Worker     // Copy top and bottom first
130*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_TOP)) {
131*c0909341SAndroid Build Coastguard Worker         l0h = fill;
132*c0909341SAndroid Build Coastguard Worker         l0l = fill;
133*c0909341SAndroid Build Coastguard Worker         l1h = fill;
134*c0909341SAndroid Build Coastguard Worker         l1l = fill;
135*c0909341SAndroid Build Coastguard Worker         y_start = 0;
136*c0909341SAndroid Build Coastguard Worker     } else {
137*c0909341SAndroid Build Coastguard Worker         u8x16 l0 = vec_vsx_ld(0, top + 0 * src_stride - 2);
138*c0909341SAndroid Build Coastguard Worker         u8x16 l1 = vec_vsx_ld(0, top + 1 * src_stride - 2);
139*c0909341SAndroid Build Coastguard Worker         l0h = u8h_to_u16(l0);
140*c0909341SAndroid Build Coastguard Worker         l0l = u8l_to_u16(l0);
141*c0909341SAndroid Build Coastguard Worker         l1h = u8h_to_u16(l1);
142*c0909341SAndroid Build Coastguard Worker         l1l = u8l_to_u16(l1);
143*c0909341SAndroid Build Coastguard Worker     }
144*c0909341SAndroid Build Coastguard Worker 
145*c0909341SAndroid Build Coastguard Worker     vec_st(l0h, 0, tmp - 4 * 8);
146*c0909341SAndroid Build Coastguard Worker     vec_st(l0l, 0, tmp - 3 * 8);
147*c0909341SAndroid Build Coastguard Worker     vec_st(l1h, 0, tmp - 2 * 8);
148*c0909341SAndroid Build Coastguard Worker     vec_st(l1l, 0, tmp - 1 * 8);
149*c0909341SAndroid Build Coastguard Worker 
150*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_BOTTOM)) {
151*c0909341SAndroid Build Coastguard Worker         l0h = fill;
152*c0909341SAndroid Build Coastguard Worker         l0l = fill;
153*c0909341SAndroid Build Coastguard Worker         l1h = fill;
154*c0909341SAndroid Build Coastguard Worker         l1l = fill;
155*c0909341SAndroid Build Coastguard Worker         y_end -= 2;
156*c0909341SAndroid Build Coastguard Worker     } else {
157*c0909341SAndroid Build Coastguard Worker         u8x16 l0 = vec_vsx_ld(0, bottom + 0 * src_stride - 2);
158*c0909341SAndroid Build Coastguard Worker         u8x16 l1 = vec_vsx_ld(0, bottom + 1 * src_stride - 2);
159*c0909341SAndroid Build Coastguard Worker         l0h = u8h_to_u16(l0);
160*c0909341SAndroid Build Coastguard Worker         l0l = u8l_to_u16(l0);
161*c0909341SAndroid Build Coastguard Worker         l1h = u8h_to_u16(l1);
162*c0909341SAndroid Build Coastguard Worker         l1l = u8l_to_u16(l1);
163*c0909341SAndroid Build Coastguard Worker     }
164*c0909341SAndroid Build Coastguard Worker 
165*c0909341SAndroid Build Coastguard Worker     vec_st(l0h, 0, tmp + (h + 0) * 16);
166*c0909341SAndroid Build Coastguard Worker     vec_st(l0l, 0, tmp + (h + 0) * 16 + 8);
167*c0909341SAndroid Build Coastguard Worker     vec_st(l1h, 0, tmp + (h + 1) * 16);
168*c0909341SAndroid Build Coastguard Worker     vec_st(l1l, 0, tmp + (h + 1) * 16 + 8);
169*c0909341SAndroid Build Coastguard Worker 
170*c0909341SAndroid Build Coastguard Worker     int y_with_left_edge = 0;
171*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_LEFT)) {
172*c0909341SAndroid Build Coastguard Worker         u8x16 l = vec_vsx_ld(0, src);
173*c0909341SAndroid Build Coastguard Worker         u16x8 lh = u8h_to_u16(l);
174*c0909341SAndroid Build Coastguard Worker         u16x8 ll = u8l_to_u16(l);
175*c0909341SAndroid Build Coastguard Worker         vec_vsx_st(lh, 0, tmp + 2);
176*c0909341SAndroid Build Coastguard Worker         vec_vsx_st(ll, 0, tmp + 8 + 2);
177*c0909341SAndroid Build Coastguard Worker 
178*c0909341SAndroid Build Coastguard Worker         y_with_left_edge = 1;
179*c0909341SAndroid Build Coastguard Worker     }
180*c0909341SAndroid Build Coastguard Worker 
181*c0909341SAndroid Build Coastguard Worker     for (int y = y_with_left_edge; y < h; y++) {
182*c0909341SAndroid Build Coastguard Worker         u8x16 l = vec_vsx_ld(0, src - 2 + y * src_stride);
183*c0909341SAndroid Build Coastguard Worker         u16x8 lh = u8h_to_u16(l);
184*c0909341SAndroid Build Coastguard Worker         u16x8 ll = u8l_to_u16(l);
185*c0909341SAndroid Build Coastguard Worker         vec_st(lh, 0, tmp + y * 16);
186*c0909341SAndroid Build Coastguard Worker         vec_st(ll, 0, tmp + 8 + y * 16);
187*c0909341SAndroid Build Coastguard Worker     }
188*c0909341SAndroid Build Coastguard Worker 
189*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_LEFT)) {
190*c0909341SAndroid Build Coastguard Worker         for (int y = y_start; y < y_end; y++) {
191*c0909341SAndroid Build Coastguard Worker             tmp[y * 16] = INT16_MAX;
192*c0909341SAndroid Build Coastguard Worker             tmp[1 + y * 16] = INT16_MAX;
193*c0909341SAndroid Build Coastguard Worker         }
194*c0909341SAndroid Build Coastguard Worker     } else {
195*c0909341SAndroid Build Coastguard Worker         for (int y = 0; y < h; y++) {
196*c0909341SAndroid Build Coastguard Worker             tmp[y * 16] = left[y][0];
197*c0909341SAndroid Build Coastguard Worker             tmp[1 + y * 16] = left[y][1];
198*c0909341SAndroid Build Coastguard Worker         }
199*c0909341SAndroid Build Coastguard Worker     }
200*c0909341SAndroid Build Coastguard Worker     if (!(edges & CDEF_HAVE_RIGHT)) {
201*c0909341SAndroid Build Coastguard Worker         for (int y = y_start; y < y_end; y++) {
202*c0909341SAndroid Build Coastguard Worker             tmp[- 6 + (y + 1) * 16] = INT16_MAX;
203*c0909341SAndroid Build Coastguard Worker             tmp[- 5 + (y + 1) * 16] = INT16_MAX;
204*c0909341SAndroid Build Coastguard Worker         }
205*c0909341SAndroid Build Coastguard Worker     }
206*c0909341SAndroid Build Coastguard Worker }
207*c0909341SAndroid Build Coastguard Worker 
max_mask(i16x8 a,i16x8 b)208*c0909341SAndroid Build Coastguard Worker static inline i16x8 max_mask(i16x8 a, i16x8 b) {
209*c0909341SAndroid Build Coastguard Worker     const i16x8 I16X8_INT16_MAX = vec_splats((int16_t)INT16_MAX);
210*c0909341SAndroid Build Coastguard Worker 
211*c0909341SAndroid Build Coastguard Worker     const b16x8 mask = vec_cmpeq(a, I16X8_INT16_MAX);
212*c0909341SAndroid Build Coastguard Worker 
213*c0909341SAndroid Build Coastguard Worker     const i16x8 val = vec_sel(a, b, mask);
214*c0909341SAndroid Build Coastguard Worker 
215*c0909341SAndroid Build Coastguard Worker     return vec_max(val, b);
216*c0909341SAndroid Build Coastguard Worker }
217*c0909341SAndroid Build Coastguard Worker 
218*c0909341SAndroid Build Coastguard Worker #define LOAD_PIX(addr) \
219*c0909341SAndroid Build Coastguard Worker     const i16x8 px = (i16x8)vec_vsx_ld(0, addr); \
220*c0909341SAndroid Build Coastguard Worker     i16x8 sum = vec_splat_s16(0);
221*c0909341SAndroid Build Coastguard Worker 
222*c0909341SAndroid Build Coastguard Worker #define LOAD_PIX4(addr) \
223*c0909341SAndroid Build Coastguard Worker     const i16x8 a = (i16x8)vec_vsx_ld(0, addr); \
224*c0909341SAndroid Build Coastguard Worker     const i16x8 b = (i16x8)vec_vsx_ld(0, addr + 8); \
225*c0909341SAndroid Build Coastguard Worker     const i16x8 px = vec_xxpermdi(a, b, 0); \
226*c0909341SAndroid Build Coastguard Worker     i16x8 sum = vec_splat_s16(0);
227*c0909341SAndroid Build Coastguard Worker 
228*c0909341SAndroid Build Coastguard Worker #define LOAD_DIR(p, addr, o0, o1) \
229*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## 0 = (i16x8)vec_vsx_ld(0, addr + o0); \
230*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## 1 = (i16x8)vec_vsx_ld(0, addr - o0); \
231*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## 2 = (i16x8)vec_vsx_ld(0, addr + o1); \
232*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## 3 = (i16x8)vec_vsx_ld(0, addr - o1);
233*c0909341SAndroid Build Coastguard Worker 
234*c0909341SAndroid Build Coastguard Worker #define LOAD_DIR4(p, addr, o0, o1) \
235*c0909341SAndroid Build Coastguard Worker     LOAD_DIR(p ## a, addr, o0, o1) \
236*c0909341SAndroid Build Coastguard Worker     LOAD_DIR(p ## b, addr + 8, o0, o1) \
237*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## 0 = vec_xxpermdi(p ## a ## 0, p ## b ## 0, 0); \
238*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## 1 = vec_xxpermdi(p ## a ## 1, p ## b ## 1, 0); \
239*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## 2 = vec_xxpermdi(p ## a ## 2, p ## b ## 2, 0); \
240*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## 3 = vec_xxpermdi(p ## a ## 3, p ## b ## 3, 0);
241*c0909341SAndroid Build Coastguard Worker 
242*c0909341SAndroid Build Coastguard Worker #define CONSTRAIN(p, strength, shift) \
243*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## _d0 = vec_sub(p ## 0, px); \
244*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## _d1 = vec_sub(p ## 1, px); \
245*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## _d2 = vec_sub(p ## 2, px); \
246*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## _d3 = vec_sub(p ## 3, px); \
247*c0909341SAndroid Build Coastguard Worker \
248*c0909341SAndroid Build Coastguard Worker     i16x8 p ## _c0 = vconstrain(p ## _d0, strength, shift); \
249*c0909341SAndroid Build Coastguard Worker     i16x8 p ## _c1 = vconstrain(p ## _d1, strength, shift); \
250*c0909341SAndroid Build Coastguard Worker     i16x8 p ## _c2 = vconstrain(p ## _d2, strength, shift); \
251*c0909341SAndroid Build Coastguard Worker     i16x8 p ## _c3 = vconstrain(p ## _d3, strength, shift);
252*c0909341SAndroid Build Coastguard Worker 
253*c0909341SAndroid Build Coastguard Worker #define SETUP_MINMAX \
254*c0909341SAndroid Build Coastguard Worker     i16x8 max = px; \
255*c0909341SAndroid Build Coastguard Worker     i16x8 min = px; \
256*c0909341SAndroid Build Coastguard Worker 
257*c0909341SAndroid Build Coastguard Worker #define MIN_MAX(p) \
258*c0909341SAndroid Build Coastguard Worker     max = max_mask(p ## 0, max); \
259*c0909341SAndroid Build Coastguard Worker     min = vec_min(p ## 0, min); \
260*c0909341SAndroid Build Coastguard Worker     max = max_mask(p ## 1, max); \
261*c0909341SAndroid Build Coastguard Worker     min = vec_min(p ## 1, min); \
262*c0909341SAndroid Build Coastguard Worker     max = max_mask(p ## 2, max); \
263*c0909341SAndroid Build Coastguard Worker     min = vec_min(p ## 2, min); \
264*c0909341SAndroid Build Coastguard Worker     max = max_mask(p ## 3, max); \
265*c0909341SAndroid Build Coastguard Worker     min = vec_min(p ## 3, min);
266*c0909341SAndroid Build Coastguard Worker 
267*c0909341SAndroid Build Coastguard Worker #define MAKE_TAPS \
268*c0909341SAndroid Build Coastguard Worker     const int16_t tap_odd = (pri_strength >> bitdepth_min_8) & 1; \
269*c0909341SAndroid Build Coastguard Worker     const i16x8 tap0 = vec_splats((int16_t)(4 - tap_odd)); \
270*c0909341SAndroid Build Coastguard Worker     const i16x8 tap1 = vec_splats((int16_t)(2 + tap_odd));
271*c0909341SAndroid Build Coastguard Worker 
272*c0909341SAndroid Build Coastguard Worker #define PRI_0_UPDATE_SUM(p) \
273*c0909341SAndroid Build Coastguard Worker     sum = vec_madd(tap0, p ## _c0, sum); \
274*c0909341SAndroid Build Coastguard Worker     sum = vec_madd(tap0, p ## _c1, sum); \
275*c0909341SAndroid Build Coastguard Worker     sum = vec_madd(tap1, p ## _c2, sum); \
276*c0909341SAndroid Build Coastguard Worker     sum = vec_madd(tap1, p ## _c3, sum);
277*c0909341SAndroid Build Coastguard Worker 
278*c0909341SAndroid Build Coastguard Worker #define UPDATE_SUM(p) \
279*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## sum0 = vec_add(p ## _c0, p ## _c1); \
280*c0909341SAndroid Build Coastguard Worker     const i16x8 p ## sum1 = vec_add(p ## _c2, p ## _c3); \
281*c0909341SAndroid Build Coastguard Worker     sum = vec_add(sum, p ## sum0); \
282*c0909341SAndroid Build Coastguard Worker     sum = vec_add(sum, p ## sum1);
283*c0909341SAndroid Build Coastguard Worker 
284*c0909341SAndroid Build Coastguard Worker #define SEC_0_UPDATE_SUM(p) \
285*c0909341SAndroid Build Coastguard Worker     sum = vec_madd(vec_splat_s16(2), p ## _c0, sum); \
286*c0909341SAndroid Build Coastguard Worker     sum = vec_madd(vec_splat_s16(2), p ## _c1, sum); \
287*c0909341SAndroid Build Coastguard Worker     sum = vec_madd(vec_splat_s16(2), p ## _c2, sum); \
288*c0909341SAndroid Build Coastguard Worker     sum = vec_madd(vec_splat_s16(2), p ## _c3, sum);
289*c0909341SAndroid Build Coastguard Worker 
290*c0909341SAndroid Build Coastguard Worker #define BIAS \
291*c0909341SAndroid Build Coastguard Worker     i16x8 bias = vec_and((i16x8)vec_cmplt(sum, vec_splat_s16(0)), vec_splat_s16(1)); \
292*c0909341SAndroid Build Coastguard Worker     bias = vec_sub(vec_splat_s16(8), bias); \
293*c0909341SAndroid Build Coastguard Worker 
294*c0909341SAndroid Build Coastguard Worker #define STORE4 \
295*c0909341SAndroid Build Coastguard Worker     dst[0] = vdst[0]; \
296*c0909341SAndroid Build Coastguard Worker     dst[1] = vdst[1]; \
297*c0909341SAndroid Build Coastguard Worker     dst[2] = vdst[2]; \
298*c0909341SAndroid Build Coastguard Worker     dst[3] = vdst[3]; \
299*c0909341SAndroid Build Coastguard Worker \
300*c0909341SAndroid Build Coastguard Worker     tmp += 8; \
301*c0909341SAndroid Build Coastguard Worker     dst += PXSTRIDE(dst_stride); \
302*c0909341SAndroid Build Coastguard Worker     dst[0] = vdst[4]; \
303*c0909341SAndroid Build Coastguard Worker     dst[1] = vdst[5]; \
304*c0909341SAndroid Build Coastguard Worker     dst[2] = vdst[6]; \
305*c0909341SAndroid Build Coastguard Worker     dst[3] = vdst[7]; \
306*c0909341SAndroid Build Coastguard Worker \
307*c0909341SAndroid Build Coastguard Worker     tmp += 8; \
308*c0909341SAndroid Build Coastguard Worker     dst += PXSTRIDE(dst_stride);
309*c0909341SAndroid Build Coastguard Worker 
310*c0909341SAndroid Build Coastguard Worker #define STORE4_CLAMPED \
311*c0909341SAndroid Build Coastguard Worker     BIAS \
312*c0909341SAndroid Build Coastguard Worker     i16x8 unclamped = vec_add(px, vec_sra(vec_add(sum, bias), vec_splat_u16(4))); \
313*c0909341SAndroid Build Coastguard Worker     i16x8 vdst = vec_max(vec_min(unclamped, max), min); \
314*c0909341SAndroid Build Coastguard Worker     STORE4
315*c0909341SAndroid Build Coastguard Worker 
316*c0909341SAndroid Build Coastguard Worker #define STORE4_UNCLAMPED \
317*c0909341SAndroid Build Coastguard Worker     BIAS \
318*c0909341SAndroid Build Coastguard Worker     i16x8 vdst = vec_add(px, vec_sra(vec_add(sum, bias), vec_splat_u16(4))); \
319*c0909341SAndroid Build Coastguard Worker     STORE4
320*c0909341SAndroid Build Coastguard Worker 
321*c0909341SAndroid Build Coastguard Worker #define STORE8 \
322*c0909341SAndroid Build Coastguard Worker     dst[0] = vdst[0]; \
323*c0909341SAndroid Build Coastguard Worker     dst[1] = vdst[1]; \
324*c0909341SAndroid Build Coastguard Worker     dst[2] = vdst[2]; \
325*c0909341SAndroid Build Coastguard Worker     dst[3] = vdst[3]; \
326*c0909341SAndroid Build Coastguard Worker     dst[4] = vdst[4]; \
327*c0909341SAndroid Build Coastguard Worker     dst[5] = vdst[5]; \
328*c0909341SAndroid Build Coastguard Worker     dst[6] = vdst[6]; \
329*c0909341SAndroid Build Coastguard Worker     dst[7] = vdst[7]; \
330*c0909341SAndroid Build Coastguard Worker \
331*c0909341SAndroid Build Coastguard Worker     tmp += 16; \
332*c0909341SAndroid Build Coastguard Worker     dst += PXSTRIDE(dst_stride);
333*c0909341SAndroid Build Coastguard Worker 
334*c0909341SAndroid Build Coastguard Worker #define STORE8_CLAMPED \
335*c0909341SAndroid Build Coastguard Worker     BIAS \
336*c0909341SAndroid Build Coastguard Worker     i16x8 unclamped = vec_add(px, vec_sra(vec_add(sum, bias), vec_splat_u16(4))); \
337*c0909341SAndroid Build Coastguard Worker     i16x8 vdst = vec_max(vec_min(unclamped, max), min); \
338*c0909341SAndroid Build Coastguard Worker     STORE8
339*c0909341SAndroid Build Coastguard Worker 
340*c0909341SAndroid Build Coastguard Worker #define STORE8_UNCLAMPED \
341*c0909341SAndroid Build Coastguard Worker     BIAS \
342*c0909341SAndroid Build Coastguard Worker     i16x8 vdst = vec_add(px, vec_sra(vec_add(sum, bias), vec_splat_u16(4))); \
343*c0909341SAndroid Build Coastguard Worker     STORE8
344*c0909341SAndroid Build Coastguard Worker 
345*c0909341SAndroid Build Coastguard Worker #define DIRECTIONS(w, tmp_stride) \
346*c0909341SAndroid Build Coastguard Worker     static const int8_t cdef_directions##w[8 /* dir */][2 /* pass */] = { \
347*c0909341SAndroid Build Coastguard Worker         { -1 * tmp_stride + 1, -2 * tmp_stride + 2 }, \
348*c0909341SAndroid Build Coastguard Worker         {  0 * tmp_stride + 1, -1 * tmp_stride + 2 }, \
349*c0909341SAndroid Build Coastguard Worker         {  0 * tmp_stride + 1,  0 * tmp_stride + 2 }, \
350*c0909341SAndroid Build Coastguard Worker         {  0 * tmp_stride + 1,  1 * tmp_stride + 2 }, \
351*c0909341SAndroid Build Coastguard Worker         {  1 * tmp_stride + 1,  2 * tmp_stride + 2 }, \
352*c0909341SAndroid Build Coastguard Worker         {  1 * tmp_stride + 0,  2 * tmp_stride + 1 }, \
353*c0909341SAndroid Build Coastguard Worker         {  1 * tmp_stride + 0,  2 * tmp_stride + 0 }, \
354*c0909341SAndroid Build Coastguard Worker         {  1 * tmp_stride + 0,  2 * tmp_stride - 1 } \
355*c0909341SAndroid Build Coastguard Worker     };
356*c0909341SAndroid Build Coastguard Worker 
357*c0909341SAndroid Build Coastguard Worker DIRECTIONS(4, 8)
358*c0909341SAndroid Build Coastguard Worker DIRECTIONS(8, 16)
359*c0909341SAndroid Build Coastguard Worker 
360*c0909341SAndroid Build Coastguard Worker static inline void
filter_4xN(pixel * dst,const ptrdiff_t dst_stride,const pixel (* left)[2],const pixel * const top,const pixel * const bottom,const int w,const int h,const int pri_strength,const int sec_strength,const int dir,const int pri_shift,const int sec_shift,const enum CdefEdgeFlags edges,uint16_t * tmp)361*c0909341SAndroid Build Coastguard Worker filter_4xN(pixel *dst, const ptrdiff_t dst_stride,
362*c0909341SAndroid Build Coastguard Worker            const pixel (*left)[2], const pixel *const top,
363*c0909341SAndroid Build Coastguard Worker            const pixel *const bottom, const int w, const int h,
364*c0909341SAndroid Build Coastguard Worker            const int pri_strength, const int sec_strength, const int dir,
365*c0909341SAndroid Build Coastguard Worker            const int pri_shift, const int sec_shift,
366*c0909341SAndroid Build Coastguard Worker            const enum CdefEdgeFlags edges, uint16_t *tmp)
367*c0909341SAndroid Build Coastguard Worker {
368*c0909341SAndroid Build Coastguard Worker     const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
369*c0909341SAndroid Build Coastguard Worker     const int off1 = cdef_directions4[dir][0];
370*c0909341SAndroid Build Coastguard Worker     const int off1_1 = cdef_directions4[dir][1];
371*c0909341SAndroid Build Coastguard Worker 
372*c0909341SAndroid Build Coastguard Worker     const int off2 = cdef_directions4[(dir + 2) & 7][0];
373*c0909341SAndroid Build Coastguard Worker     const int off3 = cdef_directions4[(dir + 6) & 7][0];
374*c0909341SAndroid Build Coastguard Worker 
375*c0909341SAndroid Build Coastguard Worker     const int off2_1 = cdef_directions4[(dir + 2) & 7][1];
376*c0909341SAndroid Build Coastguard Worker     const int off3_1 = cdef_directions4[(dir + 6) & 7][1];
377*c0909341SAndroid Build Coastguard Worker 
378*c0909341SAndroid Build Coastguard Worker     MAKE_TAPS
379*c0909341SAndroid Build Coastguard Worker 
380*c0909341SAndroid Build Coastguard Worker     for (int y = 0; y < h / 2; y++) {
381*c0909341SAndroid Build Coastguard Worker         LOAD_PIX4(tmp)
382*c0909341SAndroid Build Coastguard Worker 
383*c0909341SAndroid Build Coastguard Worker         SETUP_MINMAX
384*c0909341SAndroid Build Coastguard Worker 
385*c0909341SAndroid Build Coastguard Worker         // Primary pass
386*c0909341SAndroid Build Coastguard Worker         LOAD_DIR4(p, tmp, off1, off1_1)
387*c0909341SAndroid Build Coastguard Worker 
388*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(p, pri_strength, pri_shift)
389*c0909341SAndroid Build Coastguard Worker 
390*c0909341SAndroid Build Coastguard Worker         MIN_MAX(p)
391*c0909341SAndroid Build Coastguard Worker 
392*c0909341SAndroid Build Coastguard Worker         PRI_0_UPDATE_SUM(p)
393*c0909341SAndroid Build Coastguard Worker 
394*c0909341SAndroid Build Coastguard Worker         // Secondary pass 1
395*c0909341SAndroid Build Coastguard Worker         LOAD_DIR4(s, tmp, off2, off3)
396*c0909341SAndroid Build Coastguard Worker 
397*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(s, sec_strength, sec_shift)
398*c0909341SAndroid Build Coastguard Worker 
399*c0909341SAndroid Build Coastguard Worker         MIN_MAX(s)
400*c0909341SAndroid Build Coastguard Worker 
401*c0909341SAndroid Build Coastguard Worker         SEC_0_UPDATE_SUM(s)
402*c0909341SAndroid Build Coastguard Worker 
403*c0909341SAndroid Build Coastguard Worker         // Secondary pass 2
404*c0909341SAndroid Build Coastguard Worker         LOAD_DIR4(s2, tmp, off2_1, off3_1)
405*c0909341SAndroid Build Coastguard Worker 
406*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(s2, sec_strength, sec_shift)
407*c0909341SAndroid Build Coastguard Worker 
408*c0909341SAndroid Build Coastguard Worker         MIN_MAX(s2)
409*c0909341SAndroid Build Coastguard Worker 
410*c0909341SAndroid Build Coastguard Worker         UPDATE_SUM(s2)
411*c0909341SAndroid Build Coastguard Worker 
412*c0909341SAndroid Build Coastguard Worker         // Store
413*c0909341SAndroid Build Coastguard Worker         STORE4_CLAMPED
414*c0909341SAndroid Build Coastguard Worker     }
415*c0909341SAndroid Build Coastguard Worker }
416*c0909341SAndroid Build Coastguard Worker 
417*c0909341SAndroid Build Coastguard Worker static inline void
filter_4xN_pri(pixel * dst,const ptrdiff_t dst_stride,const pixel (* left)[2],const pixel * const top,const pixel * const bottom,const int w,const int h,const int pri_strength,const int dir,const int pri_shift,const enum CdefEdgeFlags edges,uint16_t * tmp)418*c0909341SAndroid Build Coastguard Worker filter_4xN_pri(pixel *dst, const ptrdiff_t dst_stride,
419*c0909341SAndroid Build Coastguard Worker            const pixel (*left)[2], const pixel *const top,
420*c0909341SAndroid Build Coastguard Worker            const pixel *const bottom, const int w, const int h,
421*c0909341SAndroid Build Coastguard Worker            const int pri_strength, const int dir,
422*c0909341SAndroid Build Coastguard Worker            const int pri_shift, const enum CdefEdgeFlags edges,
423*c0909341SAndroid Build Coastguard Worker            uint16_t *tmp)
424*c0909341SAndroid Build Coastguard Worker {
425*c0909341SAndroid Build Coastguard Worker     const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
426*c0909341SAndroid Build Coastguard Worker     const int off1 = cdef_directions4[dir][0];
427*c0909341SAndroid Build Coastguard Worker     const int off1_1 = cdef_directions4[dir][1];
428*c0909341SAndroid Build Coastguard Worker 
429*c0909341SAndroid Build Coastguard Worker     MAKE_TAPS
430*c0909341SAndroid Build Coastguard Worker 
431*c0909341SAndroid Build Coastguard Worker     for (int y = 0; y < h / 2; y++) {
432*c0909341SAndroid Build Coastguard Worker         LOAD_PIX4(tmp)
433*c0909341SAndroid Build Coastguard Worker 
434*c0909341SAndroid Build Coastguard Worker         // Primary pass
435*c0909341SAndroid Build Coastguard Worker         LOAD_DIR4(p, tmp, off1, off1_1)
436*c0909341SAndroid Build Coastguard Worker 
437*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(p, pri_strength, pri_shift)
438*c0909341SAndroid Build Coastguard Worker 
439*c0909341SAndroid Build Coastguard Worker         PRI_0_UPDATE_SUM(p)
440*c0909341SAndroid Build Coastguard Worker 
441*c0909341SAndroid Build Coastguard Worker         STORE4_UNCLAMPED
442*c0909341SAndroid Build Coastguard Worker     }
443*c0909341SAndroid Build Coastguard Worker }
444*c0909341SAndroid Build Coastguard Worker 
445*c0909341SAndroid Build Coastguard Worker static inline void
filter_4xN_sec(pixel * dst,const ptrdiff_t dst_stride,const pixel (* left)[2],const pixel * const top,const pixel * const bottom,const int w,const int h,const int sec_strength,const int dir,const int sec_shift,const enum CdefEdgeFlags edges,uint16_t * tmp)446*c0909341SAndroid Build Coastguard Worker filter_4xN_sec(pixel *dst, const ptrdiff_t dst_stride,
447*c0909341SAndroid Build Coastguard Worker            const pixel (*left)[2], const pixel *const top,
448*c0909341SAndroid Build Coastguard Worker            const pixel *const bottom, const int w, const int h,
449*c0909341SAndroid Build Coastguard Worker            const int sec_strength, const int dir,
450*c0909341SAndroid Build Coastguard Worker            const int sec_shift, const enum CdefEdgeFlags edges,
451*c0909341SAndroid Build Coastguard Worker            uint16_t *tmp)
452*c0909341SAndroid Build Coastguard Worker {
453*c0909341SAndroid Build Coastguard Worker     const int off2 = cdef_directions4[(dir + 2) & 7][0];
454*c0909341SAndroid Build Coastguard Worker     const int off3 = cdef_directions4[(dir + 6) & 7][0];
455*c0909341SAndroid Build Coastguard Worker 
456*c0909341SAndroid Build Coastguard Worker     const int off2_1 = cdef_directions4[(dir + 2) & 7][1];
457*c0909341SAndroid Build Coastguard Worker     const int off3_1 = cdef_directions4[(dir + 6) & 7][1];
458*c0909341SAndroid Build Coastguard Worker 
459*c0909341SAndroid Build Coastguard Worker     for (int y = 0; y < h / 2; y++) {
460*c0909341SAndroid Build Coastguard Worker         LOAD_PIX4(tmp)
461*c0909341SAndroid Build Coastguard Worker         // Secondary pass 1
462*c0909341SAndroid Build Coastguard Worker         LOAD_DIR4(s, tmp, off2, off3)
463*c0909341SAndroid Build Coastguard Worker 
464*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(s, sec_strength, sec_shift)
465*c0909341SAndroid Build Coastguard Worker 
466*c0909341SAndroid Build Coastguard Worker         SEC_0_UPDATE_SUM(s)
467*c0909341SAndroid Build Coastguard Worker 
468*c0909341SAndroid Build Coastguard Worker         // Secondary pass 2
469*c0909341SAndroid Build Coastguard Worker         LOAD_DIR4(s2, tmp, off2_1, off3_1)
470*c0909341SAndroid Build Coastguard Worker 
471*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(s2, sec_strength, sec_shift)
472*c0909341SAndroid Build Coastguard Worker 
473*c0909341SAndroid Build Coastguard Worker         UPDATE_SUM(s2)
474*c0909341SAndroid Build Coastguard Worker 
475*c0909341SAndroid Build Coastguard Worker         STORE4_UNCLAMPED
476*c0909341SAndroid Build Coastguard Worker     }
477*c0909341SAndroid Build Coastguard Worker }
478*c0909341SAndroid Build Coastguard Worker 
479*c0909341SAndroid Build Coastguard Worker static inline void
filter_8xN(pixel * dst,const ptrdiff_t dst_stride,const pixel (* left)[2],const pixel * const top,const pixel * const bottom,const int w,const int h,const int pri_strength,const int sec_strength,const int dir,const int pri_shift,const int sec_shift,const enum CdefEdgeFlags edges,uint16_t * tmp)480*c0909341SAndroid Build Coastguard Worker filter_8xN(pixel *dst, const ptrdiff_t dst_stride,
481*c0909341SAndroid Build Coastguard Worker            const pixel (*left)[2], const pixel *const top,
482*c0909341SAndroid Build Coastguard Worker            const pixel *const bottom, const int w, const int h,
483*c0909341SAndroid Build Coastguard Worker            const int pri_strength, const int sec_strength, const int dir,
484*c0909341SAndroid Build Coastguard Worker            const int pri_shift, const int sec_shift, const enum CdefEdgeFlags edges,
485*c0909341SAndroid Build Coastguard Worker            uint16_t *tmp)
486*c0909341SAndroid Build Coastguard Worker {
487*c0909341SAndroid Build Coastguard Worker     const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
488*c0909341SAndroid Build Coastguard Worker 
489*c0909341SAndroid Build Coastguard Worker     const int off1 = cdef_directions8[dir][0];
490*c0909341SAndroid Build Coastguard Worker     const int off1_1 = cdef_directions8[dir][1];
491*c0909341SAndroid Build Coastguard Worker 
492*c0909341SAndroid Build Coastguard Worker     const int off2 = cdef_directions8[(dir + 2) & 7][0];
493*c0909341SAndroid Build Coastguard Worker     const int off3 = cdef_directions8[(dir + 6) & 7][0];
494*c0909341SAndroid Build Coastguard Worker 
495*c0909341SAndroid Build Coastguard Worker     const int off2_1 = cdef_directions8[(dir + 2) & 7][1];
496*c0909341SAndroid Build Coastguard Worker     const int off3_1 = cdef_directions8[(dir + 6) & 7][1];
497*c0909341SAndroid Build Coastguard Worker 
498*c0909341SAndroid Build Coastguard Worker     MAKE_TAPS
499*c0909341SAndroid Build Coastguard Worker 
500*c0909341SAndroid Build Coastguard Worker     for (int y = 0; y < h; y++) {
501*c0909341SAndroid Build Coastguard Worker         LOAD_PIX(tmp)
502*c0909341SAndroid Build Coastguard Worker 
503*c0909341SAndroid Build Coastguard Worker         SETUP_MINMAX
504*c0909341SAndroid Build Coastguard Worker 
505*c0909341SAndroid Build Coastguard Worker         // Primary pass
506*c0909341SAndroid Build Coastguard Worker         LOAD_DIR(p, tmp, off1, off1_1)
507*c0909341SAndroid Build Coastguard Worker 
508*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(p, pri_strength, pri_shift)
509*c0909341SAndroid Build Coastguard Worker 
510*c0909341SAndroid Build Coastguard Worker         MIN_MAX(p)
511*c0909341SAndroid Build Coastguard Worker 
512*c0909341SAndroid Build Coastguard Worker         PRI_0_UPDATE_SUM(p)
513*c0909341SAndroid Build Coastguard Worker 
514*c0909341SAndroid Build Coastguard Worker         // Secondary pass 1
515*c0909341SAndroid Build Coastguard Worker         LOAD_DIR(s, tmp, off2, off3)
516*c0909341SAndroid Build Coastguard Worker 
517*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(s, sec_strength, sec_shift)
518*c0909341SAndroid Build Coastguard Worker 
519*c0909341SAndroid Build Coastguard Worker         MIN_MAX(s)
520*c0909341SAndroid Build Coastguard Worker 
521*c0909341SAndroid Build Coastguard Worker         SEC_0_UPDATE_SUM(s)
522*c0909341SAndroid Build Coastguard Worker 
523*c0909341SAndroid Build Coastguard Worker         // Secondary pass 2
524*c0909341SAndroid Build Coastguard Worker         LOAD_DIR(s2, tmp, off2_1, off3_1)
525*c0909341SAndroid Build Coastguard Worker 
526*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(s2, sec_strength, sec_shift)
527*c0909341SAndroid Build Coastguard Worker 
528*c0909341SAndroid Build Coastguard Worker         MIN_MAX(s2)
529*c0909341SAndroid Build Coastguard Worker 
530*c0909341SAndroid Build Coastguard Worker         UPDATE_SUM(s2)
531*c0909341SAndroid Build Coastguard Worker 
532*c0909341SAndroid Build Coastguard Worker         // Store
533*c0909341SAndroid Build Coastguard Worker         STORE8_CLAMPED
534*c0909341SAndroid Build Coastguard Worker     }
535*c0909341SAndroid Build Coastguard Worker 
536*c0909341SAndroid Build Coastguard Worker }
537*c0909341SAndroid Build Coastguard Worker 
538*c0909341SAndroid Build Coastguard Worker static inline void
filter_8xN_pri(pixel * dst,const ptrdiff_t dst_stride,const pixel (* left)[2],const pixel * const top,const pixel * const bottom,const int w,const int h,const int pri_strength,const int dir,const int pri_shift,const enum CdefEdgeFlags edges,uint16_t * tmp)539*c0909341SAndroid Build Coastguard Worker filter_8xN_pri(pixel *dst, const ptrdiff_t dst_stride,
540*c0909341SAndroid Build Coastguard Worker            const pixel (*left)[2], const pixel *const top,
541*c0909341SAndroid Build Coastguard Worker            const pixel *const bottom, const int w, const int h,
542*c0909341SAndroid Build Coastguard Worker            const int pri_strength, const int dir,
543*c0909341SAndroid Build Coastguard Worker            const int pri_shift, const enum CdefEdgeFlags edges,
544*c0909341SAndroid Build Coastguard Worker            uint16_t *tmp)
545*c0909341SAndroid Build Coastguard Worker {
546*c0909341SAndroid Build Coastguard Worker     const int bitdepth_min_8 = bitdepth_from_max(bitdepth_max) - 8;
547*c0909341SAndroid Build Coastguard Worker     const int off1 = cdef_directions8[dir][0];
548*c0909341SAndroid Build Coastguard Worker     const int off1_1 = cdef_directions8[dir][1];
549*c0909341SAndroid Build Coastguard Worker 
550*c0909341SAndroid Build Coastguard Worker     MAKE_TAPS
551*c0909341SAndroid Build Coastguard Worker 
552*c0909341SAndroid Build Coastguard Worker     for (int y = 0; y < h; y++) {
553*c0909341SAndroid Build Coastguard Worker         LOAD_PIX(tmp)
554*c0909341SAndroid Build Coastguard Worker 
555*c0909341SAndroid Build Coastguard Worker         // Primary pass
556*c0909341SAndroid Build Coastguard Worker         LOAD_DIR(p, tmp, off1, off1_1)
557*c0909341SAndroid Build Coastguard Worker 
558*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(p, pri_strength, pri_shift)
559*c0909341SAndroid Build Coastguard Worker 
560*c0909341SAndroid Build Coastguard Worker         PRI_0_UPDATE_SUM(p)
561*c0909341SAndroid Build Coastguard Worker 
562*c0909341SAndroid Build Coastguard Worker         STORE8_UNCLAMPED
563*c0909341SAndroid Build Coastguard Worker     }
564*c0909341SAndroid Build Coastguard Worker }
565*c0909341SAndroid Build Coastguard Worker 
566*c0909341SAndroid Build Coastguard Worker static inline void
filter_8xN_sec(pixel * dst,const ptrdiff_t dst_stride,const pixel (* left)[2],const pixel * const top,const pixel * const bottom,const int w,const int h,const int sec_strength,const int dir,const int sec_shift,const enum CdefEdgeFlags edges,uint16_t * tmp)567*c0909341SAndroid Build Coastguard Worker filter_8xN_sec(pixel *dst, const ptrdiff_t dst_stride,
568*c0909341SAndroid Build Coastguard Worker            const pixel (*left)[2], const pixel *const top,
569*c0909341SAndroid Build Coastguard Worker            const pixel *const bottom, const int w, const int h,
570*c0909341SAndroid Build Coastguard Worker            const int sec_strength, const int dir,
571*c0909341SAndroid Build Coastguard Worker            const int sec_shift, const enum CdefEdgeFlags edges,
572*c0909341SAndroid Build Coastguard Worker            uint16_t *tmp)
573*c0909341SAndroid Build Coastguard Worker {
574*c0909341SAndroid Build Coastguard Worker     const int off2 = cdef_directions8[(dir + 2) & 7][0];
575*c0909341SAndroid Build Coastguard Worker     const int off3 = cdef_directions8[(dir + 6) & 7][0];
576*c0909341SAndroid Build Coastguard Worker 
577*c0909341SAndroid Build Coastguard Worker     const int off2_1 = cdef_directions8[(dir + 2) & 7][1];
578*c0909341SAndroid Build Coastguard Worker     const int off3_1 = cdef_directions8[(dir + 6) & 7][1];
579*c0909341SAndroid Build Coastguard Worker 
580*c0909341SAndroid Build Coastguard Worker     for (int y = 0; y < h; y++) {
581*c0909341SAndroid Build Coastguard Worker         LOAD_PIX(tmp)
582*c0909341SAndroid Build Coastguard Worker 
583*c0909341SAndroid Build Coastguard Worker         // Secondary pass 1
584*c0909341SAndroid Build Coastguard Worker         LOAD_DIR(s, tmp, off2, off3)
585*c0909341SAndroid Build Coastguard Worker 
586*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(s, sec_strength, sec_shift)
587*c0909341SAndroid Build Coastguard Worker 
588*c0909341SAndroid Build Coastguard Worker         SEC_0_UPDATE_SUM(s)
589*c0909341SAndroid Build Coastguard Worker 
590*c0909341SAndroid Build Coastguard Worker         // Secondary pass 2
591*c0909341SAndroid Build Coastguard Worker         LOAD_DIR(s2, tmp, off2_1, off3_1)
592*c0909341SAndroid Build Coastguard Worker 
593*c0909341SAndroid Build Coastguard Worker         CONSTRAIN(s2, sec_strength, sec_shift)
594*c0909341SAndroid Build Coastguard Worker 
595*c0909341SAndroid Build Coastguard Worker         UPDATE_SUM(s2)
596*c0909341SAndroid Build Coastguard Worker 
597*c0909341SAndroid Build Coastguard Worker         STORE8_UNCLAMPED
598*c0909341SAndroid Build Coastguard Worker     }
599*c0909341SAndroid Build Coastguard Worker }
600*c0909341SAndroid Build Coastguard Worker 
601*c0909341SAndroid Build Coastguard Worker #define cdef_fn(w, h, tmp_stride) \
602*c0909341SAndroid Build Coastguard Worker void dav1d_cdef_filter_##w##x##h##_vsx(pixel *const dst, \
603*c0909341SAndroid Build Coastguard Worker                                        const ptrdiff_t dst_stride, \
604*c0909341SAndroid Build Coastguard Worker                                        const pixel (*left)[2], \
605*c0909341SAndroid Build Coastguard Worker                                        const pixel *const top, \
606*c0909341SAndroid Build Coastguard Worker                                        const pixel *const bottom, \
607*c0909341SAndroid Build Coastguard Worker                                        const int pri_strength, \
608*c0909341SAndroid Build Coastguard Worker                                        const int sec_strength, \
609*c0909341SAndroid Build Coastguard Worker                                        const int dir, \
610*c0909341SAndroid Build Coastguard Worker                                        const int damping, \
611*c0909341SAndroid Build Coastguard Worker                                        const enum CdefEdgeFlags edges) \
612*c0909341SAndroid Build Coastguard Worker { \
613*c0909341SAndroid Build Coastguard Worker     ALIGN_STK_16(uint16_t, tmp_buf, 12 * tmp_stride + 8,); \
614*c0909341SAndroid Build Coastguard Worker     uint16_t *tmp = tmp_buf + 2 * tmp_stride + 2; \
615*c0909341SAndroid Build Coastguard Worker     copy##w##xN(tmp - 2, dst, dst_stride, left, top, bottom, w, h, edges); \
616*c0909341SAndroid Build Coastguard Worker     if (pri_strength) { \
617*c0909341SAndroid Build Coastguard Worker         const int pri_shift = imax(0, damping - ulog2(pri_strength)); \
618*c0909341SAndroid Build Coastguard Worker         if (sec_strength) { \
619*c0909341SAndroid Build Coastguard Worker             const int sec_shift = damping - ulog2(sec_strength); \
620*c0909341SAndroid Build Coastguard Worker             filter_##w##xN(dst, dst_stride, left, top, bottom, w, h, pri_strength, \
621*c0909341SAndroid Build Coastguard Worker                            sec_strength, dir, pri_shift, sec_shift, edges, tmp); \
622*c0909341SAndroid Build Coastguard Worker         } else { \
623*c0909341SAndroid Build Coastguard Worker             filter_##w##xN_pri(dst, dst_stride, left, top, bottom, w, h, pri_strength, \
624*c0909341SAndroid Build Coastguard Worker                                dir, pri_shift, edges, tmp); \
625*c0909341SAndroid Build Coastguard Worker         } \
626*c0909341SAndroid Build Coastguard Worker     } else { \
627*c0909341SAndroid Build Coastguard Worker         const int sec_shift = damping - ulog2(sec_strength); \
628*c0909341SAndroid Build Coastguard Worker         filter_##w##xN_sec(dst, dst_stride, left, top, bottom, w, h, sec_strength, \
629*c0909341SAndroid Build Coastguard Worker                            dir, sec_shift, edges, tmp); \
630*c0909341SAndroid Build Coastguard Worker     } \
631*c0909341SAndroid Build Coastguard Worker }
632*c0909341SAndroid Build Coastguard Worker 
633*c0909341SAndroid Build Coastguard Worker cdef_fn(4, 4, 8);
634*c0909341SAndroid Build Coastguard Worker cdef_fn(4, 8, 8);
635*c0909341SAndroid Build Coastguard Worker cdef_fn(8, 8, 16);
636*c0909341SAndroid Build Coastguard Worker #endif
637