xref: /aosp_15_r20/external/libdav1d/src/recon_tmpl.c (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker /*
2*c0909341SAndroid Build Coastguard Worker  * Copyright © 2018-2021, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker  * Copyright © 2018, Two Orioles, LLC
4*c0909341SAndroid Build Coastguard Worker  * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker  *
6*c0909341SAndroid Build Coastguard Worker  * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker  * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker  *
9*c0909341SAndroid Build Coastguard Worker  * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker  *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker  *
12*c0909341SAndroid Build Coastguard Worker  * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker  *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker  *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker  *
16*c0909341SAndroid Build Coastguard Worker  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker  */
27*c0909341SAndroid Build Coastguard Worker 
28*c0909341SAndroid Build Coastguard Worker #include "config.h"
29*c0909341SAndroid Build Coastguard Worker 
30*c0909341SAndroid Build Coastguard Worker #include <string.h>
31*c0909341SAndroid Build Coastguard Worker #include <stdio.h>
32*c0909341SAndroid Build Coastguard Worker 
33*c0909341SAndroid Build Coastguard Worker #include "common/attributes.h"
34*c0909341SAndroid Build Coastguard Worker #include "common/bitdepth.h"
35*c0909341SAndroid Build Coastguard Worker #include "common/dump.h"
36*c0909341SAndroid Build Coastguard Worker #include "common/frame.h"
37*c0909341SAndroid Build Coastguard Worker #include "common/intops.h"
38*c0909341SAndroid Build Coastguard Worker 
39*c0909341SAndroid Build Coastguard Worker #include "src/cdef_apply.h"
40*c0909341SAndroid Build Coastguard Worker #include "src/ctx.h"
41*c0909341SAndroid Build Coastguard Worker #include "src/ipred_prepare.h"
42*c0909341SAndroid Build Coastguard Worker #include "src/lf_apply.h"
43*c0909341SAndroid Build Coastguard Worker #include "src/lr_apply.h"
44*c0909341SAndroid Build Coastguard Worker #include "src/recon.h"
45*c0909341SAndroid Build Coastguard Worker #include "src/scan.h"
46*c0909341SAndroid Build Coastguard Worker #include "src/tables.h"
47*c0909341SAndroid Build Coastguard Worker #include "src/wedge.h"
48*c0909341SAndroid Build Coastguard Worker 
read_golomb(MsacContext * const msac)49*c0909341SAndroid Build Coastguard Worker static inline unsigned read_golomb(MsacContext *const msac) {
50*c0909341SAndroid Build Coastguard Worker     int len = 0;
51*c0909341SAndroid Build Coastguard Worker     unsigned val = 1;
52*c0909341SAndroid Build Coastguard Worker 
53*c0909341SAndroid Build Coastguard Worker     while (!dav1d_msac_decode_bool_equi(msac) && len < 32) len++;
54*c0909341SAndroid Build Coastguard Worker     while (len--) val = (val << 1) + dav1d_msac_decode_bool_equi(msac);
55*c0909341SAndroid Build Coastguard Worker 
56*c0909341SAndroid Build Coastguard Worker     return val - 1;
57*c0909341SAndroid Build Coastguard Worker }
58*c0909341SAndroid Build Coastguard Worker 
get_skip_ctx(const TxfmInfo * const t_dim,const enum BlockSize bs,const uint8_t * const a,const uint8_t * const l,const int chroma,const enum Dav1dPixelLayout layout)59*c0909341SAndroid Build Coastguard Worker static inline unsigned get_skip_ctx(const TxfmInfo *const t_dim,
60*c0909341SAndroid Build Coastguard Worker                                     const enum BlockSize bs,
61*c0909341SAndroid Build Coastguard Worker                                     const uint8_t *const a,
62*c0909341SAndroid Build Coastguard Worker                                     const uint8_t *const l,
63*c0909341SAndroid Build Coastguard Worker                                     const int chroma,
64*c0909341SAndroid Build Coastguard Worker                                     const enum Dav1dPixelLayout layout)
65*c0909341SAndroid Build Coastguard Worker {
66*c0909341SAndroid Build Coastguard Worker     const uint8_t *const b_dim = dav1d_block_dimensions[bs];
67*c0909341SAndroid Build Coastguard Worker 
68*c0909341SAndroid Build Coastguard Worker     if (chroma) {
69*c0909341SAndroid Build Coastguard Worker         const int ss_ver = layout == DAV1D_PIXEL_LAYOUT_I420;
70*c0909341SAndroid Build Coastguard Worker         const int ss_hor = layout != DAV1D_PIXEL_LAYOUT_I444;
71*c0909341SAndroid Build Coastguard Worker         const int not_one_blk = b_dim[2] - (!!b_dim[2] && ss_hor) > t_dim->lw ||
72*c0909341SAndroid Build Coastguard Worker                                 b_dim[3] - (!!b_dim[3] && ss_ver) > t_dim->lh;
73*c0909341SAndroid Build Coastguard Worker         unsigned ca, cl;
74*c0909341SAndroid Build Coastguard Worker 
75*c0909341SAndroid Build Coastguard Worker #define MERGE_CTX(dir, type, no_val) \
76*c0909341SAndroid Build Coastguard Worker         c##dir = *(const type *) dir != no_val; \
77*c0909341SAndroid Build Coastguard Worker         break
78*c0909341SAndroid Build Coastguard Worker 
79*c0909341SAndroid Build Coastguard Worker         switch (t_dim->lw) {
80*c0909341SAndroid Build Coastguard Worker         /* For some reason the MSVC CRT _wassert() function is not flagged as
81*c0909341SAndroid Build Coastguard Worker          * __declspec(noreturn), so when using those headers the compiler will
82*c0909341SAndroid Build Coastguard Worker          * expect execution to continue after an assertion has been triggered
83*c0909341SAndroid Build Coastguard Worker          * and will therefore complain about the use of uninitialized variables
84*c0909341SAndroid Build Coastguard Worker          * when compiled in debug mode if we put the default case at the end. */
85*c0909341SAndroid Build Coastguard Worker         default: assert(0); /* fall-through */
86*c0909341SAndroid Build Coastguard Worker         case TX_4X4:   MERGE_CTX(a, uint8_t,  0x40);
87*c0909341SAndroid Build Coastguard Worker         case TX_8X8:   MERGE_CTX(a, uint16_t, 0x4040);
88*c0909341SAndroid Build Coastguard Worker         case TX_16X16: MERGE_CTX(a, uint32_t, 0x40404040U);
89*c0909341SAndroid Build Coastguard Worker         case TX_32X32: MERGE_CTX(a, uint64_t, 0x4040404040404040ULL);
90*c0909341SAndroid Build Coastguard Worker         }
91*c0909341SAndroid Build Coastguard Worker         switch (t_dim->lh) {
92*c0909341SAndroid Build Coastguard Worker         default: assert(0); /* fall-through */
93*c0909341SAndroid Build Coastguard Worker         case TX_4X4:   MERGE_CTX(l, uint8_t,  0x40);
94*c0909341SAndroid Build Coastguard Worker         case TX_8X8:   MERGE_CTX(l, uint16_t, 0x4040);
95*c0909341SAndroid Build Coastguard Worker         case TX_16X16: MERGE_CTX(l, uint32_t, 0x40404040U);
96*c0909341SAndroid Build Coastguard Worker         case TX_32X32: MERGE_CTX(l, uint64_t, 0x4040404040404040ULL);
97*c0909341SAndroid Build Coastguard Worker         }
98*c0909341SAndroid Build Coastguard Worker #undef MERGE_CTX
99*c0909341SAndroid Build Coastguard Worker 
100*c0909341SAndroid Build Coastguard Worker         return 7 + not_one_blk * 3 + ca + cl;
101*c0909341SAndroid Build Coastguard Worker     } else if (b_dim[2] == t_dim->lw && b_dim[3] == t_dim->lh) {
102*c0909341SAndroid Build Coastguard Worker         return 0;
103*c0909341SAndroid Build Coastguard Worker     } else {
104*c0909341SAndroid Build Coastguard Worker         unsigned la, ll;
105*c0909341SAndroid Build Coastguard Worker 
106*c0909341SAndroid Build Coastguard Worker #define MERGE_CTX(dir, type, tx) \
107*c0909341SAndroid Build Coastguard Worker         if (tx == TX_64X64) { \
108*c0909341SAndroid Build Coastguard Worker             uint64_t tmp = *(const uint64_t *) dir; \
109*c0909341SAndroid Build Coastguard Worker             tmp |= *(const uint64_t *) &dir[8]; \
110*c0909341SAndroid Build Coastguard Worker             l##dir = (unsigned) (tmp >> 32) | (unsigned) tmp; \
111*c0909341SAndroid Build Coastguard Worker         } else \
112*c0909341SAndroid Build Coastguard Worker             l##dir = *(const type *) dir; \
113*c0909341SAndroid Build Coastguard Worker         if (tx == TX_32X32) l##dir |= *(const type *) &dir[sizeof(type)]; \
114*c0909341SAndroid Build Coastguard Worker         if (tx >= TX_16X16) l##dir |= l##dir >> 16; \
115*c0909341SAndroid Build Coastguard Worker         if (tx >= TX_8X8)   l##dir |= l##dir >> 8; \
116*c0909341SAndroid Build Coastguard Worker         break
117*c0909341SAndroid Build Coastguard Worker 
118*c0909341SAndroid Build Coastguard Worker         switch (t_dim->lw) {
119*c0909341SAndroid Build Coastguard Worker         default: assert(0); /* fall-through */
120*c0909341SAndroid Build Coastguard Worker         case TX_4X4:   MERGE_CTX(a, uint8_t,  TX_4X4);
121*c0909341SAndroid Build Coastguard Worker         case TX_8X8:   MERGE_CTX(a, uint16_t, TX_8X8);
122*c0909341SAndroid Build Coastguard Worker         case TX_16X16: MERGE_CTX(a, uint32_t, TX_16X16);
123*c0909341SAndroid Build Coastguard Worker         case TX_32X32: MERGE_CTX(a, uint32_t, TX_32X32);
124*c0909341SAndroid Build Coastguard Worker         case TX_64X64: MERGE_CTX(a, uint32_t, TX_64X64);
125*c0909341SAndroid Build Coastguard Worker         }
126*c0909341SAndroid Build Coastguard Worker         switch (t_dim->lh) {
127*c0909341SAndroid Build Coastguard Worker         default: assert(0); /* fall-through */
128*c0909341SAndroid Build Coastguard Worker         case TX_4X4:   MERGE_CTX(l, uint8_t,  TX_4X4);
129*c0909341SAndroid Build Coastguard Worker         case TX_8X8:   MERGE_CTX(l, uint16_t, TX_8X8);
130*c0909341SAndroid Build Coastguard Worker         case TX_16X16: MERGE_CTX(l, uint32_t, TX_16X16);
131*c0909341SAndroid Build Coastguard Worker         case TX_32X32: MERGE_CTX(l, uint32_t, TX_32X32);
132*c0909341SAndroid Build Coastguard Worker         case TX_64X64: MERGE_CTX(l, uint32_t, TX_64X64);
133*c0909341SAndroid Build Coastguard Worker         }
134*c0909341SAndroid Build Coastguard Worker #undef MERGE_CTX
135*c0909341SAndroid Build Coastguard Worker 
136*c0909341SAndroid Build Coastguard Worker         return dav1d_skip_ctx[umin(la & 0x3F, 4)][umin(ll & 0x3F, 4)];
137*c0909341SAndroid Build Coastguard Worker     }
138*c0909341SAndroid Build Coastguard Worker }
139*c0909341SAndroid Build Coastguard Worker 
get_dc_sign_ctx(const int tx,const uint8_t * const a,const uint8_t * const l)140*c0909341SAndroid Build Coastguard Worker static inline unsigned get_dc_sign_ctx(const int /*enum RectTxfmSize*/ tx,
141*c0909341SAndroid Build Coastguard Worker                                        const uint8_t *const a,
142*c0909341SAndroid Build Coastguard Worker                                        const uint8_t *const l)
143*c0909341SAndroid Build Coastguard Worker {
144*c0909341SAndroid Build Coastguard Worker     uint64_t mask = 0xC0C0C0C0C0C0C0C0ULL, mul = 0x0101010101010101ULL;
145*c0909341SAndroid Build Coastguard Worker     int s;
146*c0909341SAndroid Build Coastguard Worker 
147*c0909341SAndroid Build Coastguard Worker #if ARCH_X86_64 && defined(__GNUC__)
148*c0909341SAndroid Build Coastguard Worker     /* Coerce compilers into producing better code. For some reason
149*c0909341SAndroid Build Coastguard Worker      * every x86-64 compiler is awful at handling 64-bit constants. */
150*c0909341SAndroid Build Coastguard Worker     __asm__("" : "+r"(mask), "+r"(mul));
151*c0909341SAndroid Build Coastguard Worker #endif
152*c0909341SAndroid Build Coastguard Worker 
153*c0909341SAndroid Build Coastguard Worker     switch(tx) {
154*c0909341SAndroid Build Coastguard Worker     default: assert(0); /* fall-through */
155*c0909341SAndroid Build Coastguard Worker     case TX_4X4: {
156*c0909341SAndroid Build Coastguard Worker         int t = *(const uint8_t *) a >> 6;
157*c0909341SAndroid Build Coastguard Worker         t    += *(const uint8_t *) l >> 6;
158*c0909341SAndroid Build Coastguard Worker         s = t - 1 - 1;
159*c0909341SAndroid Build Coastguard Worker         break;
160*c0909341SAndroid Build Coastguard Worker     }
161*c0909341SAndroid Build Coastguard Worker     case TX_8X8: {
162*c0909341SAndroid Build Coastguard Worker         uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
163*c0909341SAndroid Build Coastguard Worker         t         += *(const uint16_t *) l & (uint32_t) mask;
164*c0909341SAndroid Build Coastguard Worker         t *= 0x04040404U;
165*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 24) - 2 - 2;
166*c0909341SAndroid Build Coastguard Worker         break;
167*c0909341SAndroid Build Coastguard Worker     }
168*c0909341SAndroid Build Coastguard Worker     case TX_16X16: {
169*c0909341SAndroid Build Coastguard Worker         uint32_t t = (*(const uint32_t *) a & (uint32_t) mask) >> 6;
170*c0909341SAndroid Build Coastguard Worker         t         += (*(const uint32_t *) l & (uint32_t) mask) >> 6;
171*c0909341SAndroid Build Coastguard Worker         t *= (uint32_t) mul;
172*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 24) - 4 - 4;
173*c0909341SAndroid Build Coastguard Worker         break;
174*c0909341SAndroid Build Coastguard Worker     }
175*c0909341SAndroid Build Coastguard Worker     case TX_32X32: {
176*c0909341SAndroid Build Coastguard Worker         uint64_t t = (*(const uint64_t *) a & mask) >> 6;
177*c0909341SAndroid Build Coastguard Worker         t         += (*(const uint64_t *) l & mask) >> 6;
178*c0909341SAndroid Build Coastguard Worker         t *= mul;
179*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 8 - 8;
180*c0909341SAndroid Build Coastguard Worker         break;
181*c0909341SAndroid Build Coastguard Worker     }
182*c0909341SAndroid Build Coastguard Worker     case TX_64X64: {
183*c0909341SAndroid Build Coastguard Worker         uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
184*c0909341SAndroid Build Coastguard Worker         t         += (*(const uint64_t *) &a[8] & mask) >> 6;
185*c0909341SAndroid Build Coastguard Worker         t         += (*(const uint64_t *) &l[0] & mask) >> 6;
186*c0909341SAndroid Build Coastguard Worker         t         += (*(const uint64_t *) &l[8] & mask) >> 6;
187*c0909341SAndroid Build Coastguard Worker         t *= mul;
188*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 16 - 16;
189*c0909341SAndroid Build Coastguard Worker         break;
190*c0909341SAndroid Build Coastguard Worker     }
191*c0909341SAndroid Build Coastguard Worker     case RTX_4X8: {
192*c0909341SAndroid Build Coastguard Worker         uint32_t t = *(const uint8_t  *) a & (uint32_t) mask;
193*c0909341SAndroid Build Coastguard Worker         t         += *(const uint16_t *) l & (uint32_t) mask;
194*c0909341SAndroid Build Coastguard Worker         t *= 0x04040404U;
195*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 24) - 1 - 2;
196*c0909341SAndroid Build Coastguard Worker         break;
197*c0909341SAndroid Build Coastguard Worker     }
198*c0909341SAndroid Build Coastguard Worker     case RTX_8X4: {
199*c0909341SAndroid Build Coastguard Worker         uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
200*c0909341SAndroid Build Coastguard Worker         t         += *(const uint8_t  *) l & (uint32_t) mask;
201*c0909341SAndroid Build Coastguard Worker         t *= 0x04040404U;
202*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 24) - 2 - 1;
203*c0909341SAndroid Build Coastguard Worker         break;
204*c0909341SAndroid Build Coastguard Worker     }
205*c0909341SAndroid Build Coastguard Worker     case RTX_8X16: {
206*c0909341SAndroid Build Coastguard Worker         uint32_t t = *(const uint16_t *) a & (uint32_t) mask;
207*c0909341SAndroid Build Coastguard Worker         t         += *(const uint32_t *) l & (uint32_t) mask;
208*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) * (uint32_t) mul;
209*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 24) - 2 - 4;
210*c0909341SAndroid Build Coastguard Worker         break;
211*c0909341SAndroid Build Coastguard Worker     }
212*c0909341SAndroid Build Coastguard Worker     case RTX_16X8: {
213*c0909341SAndroid Build Coastguard Worker         uint32_t t = *(const uint32_t *) a & (uint32_t) mask;
214*c0909341SAndroid Build Coastguard Worker         t         += *(const uint16_t *) l & (uint32_t) mask;
215*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) * (uint32_t) mul;
216*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 24) - 4 - 2;
217*c0909341SAndroid Build Coastguard Worker         break;
218*c0909341SAndroid Build Coastguard Worker     }
219*c0909341SAndroid Build Coastguard Worker     case RTX_16X32: {
220*c0909341SAndroid Build Coastguard Worker         uint64_t t = *(const uint32_t *) a & (uint32_t) mask;
221*c0909341SAndroid Build Coastguard Worker         t         += *(const uint64_t *) l & mask;
222*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) * mul;
223*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 4 - 8;
224*c0909341SAndroid Build Coastguard Worker         break;
225*c0909341SAndroid Build Coastguard Worker     }
226*c0909341SAndroid Build Coastguard Worker     case RTX_32X16: {
227*c0909341SAndroid Build Coastguard Worker         uint64_t t = *(const uint64_t *) a & mask;
228*c0909341SAndroid Build Coastguard Worker         t         += *(const uint32_t *) l & (uint32_t) mask;
229*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) * mul;
230*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 8 - 4;
231*c0909341SAndroid Build Coastguard Worker         break;
232*c0909341SAndroid Build Coastguard Worker     }
233*c0909341SAndroid Build Coastguard Worker     case RTX_32X64: {
234*c0909341SAndroid Build Coastguard Worker         uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
235*c0909341SAndroid Build Coastguard Worker         t         += (*(const uint64_t *) &l[0] & mask) >> 6;
236*c0909341SAndroid Build Coastguard Worker         t         += (*(const uint64_t *) &l[8] & mask) >> 6;
237*c0909341SAndroid Build Coastguard Worker         t *= mul;
238*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 8 - 16;
239*c0909341SAndroid Build Coastguard Worker         break;
240*c0909341SAndroid Build Coastguard Worker     }
241*c0909341SAndroid Build Coastguard Worker     case RTX_64X32: {
242*c0909341SAndroid Build Coastguard Worker         uint64_t t = (*(const uint64_t *) &a[0] & mask) >> 6;
243*c0909341SAndroid Build Coastguard Worker         t         += (*(const uint64_t *) &a[8] & mask) >> 6;
244*c0909341SAndroid Build Coastguard Worker         t         += (*(const uint64_t *) &l[0] & mask) >> 6;
245*c0909341SAndroid Build Coastguard Worker         t *= mul;
246*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 16 - 8;
247*c0909341SAndroid Build Coastguard Worker         break;
248*c0909341SAndroid Build Coastguard Worker     }
249*c0909341SAndroid Build Coastguard Worker     case RTX_4X16: {
250*c0909341SAndroid Build Coastguard Worker         uint32_t t = *(const uint8_t  *) a & (uint32_t) mask;
251*c0909341SAndroid Build Coastguard Worker         t         += *(const uint32_t *) l & (uint32_t) mask;
252*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) * (uint32_t) mul;
253*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 24) - 1 - 4;
254*c0909341SAndroid Build Coastguard Worker         break;
255*c0909341SAndroid Build Coastguard Worker     }
256*c0909341SAndroid Build Coastguard Worker     case RTX_16X4: {
257*c0909341SAndroid Build Coastguard Worker         uint32_t t = *(const uint32_t *) a & (uint32_t) mask;
258*c0909341SAndroid Build Coastguard Worker         t         += *(const uint8_t  *) l & (uint32_t) mask;
259*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) * (uint32_t) mul;
260*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 24) - 4 - 1;
261*c0909341SAndroid Build Coastguard Worker         break;
262*c0909341SAndroid Build Coastguard Worker     }
263*c0909341SAndroid Build Coastguard Worker     case RTX_8X32: {
264*c0909341SAndroid Build Coastguard Worker         uint64_t t = *(const uint16_t *) a & (uint32_t) mask;
265*c0909341SAndroid Build Coastguard Worker         t         += *(const uint64_t *) l & mask;
266*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) * mul;
267*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 2 - 8;
268*c0909341SAndroid Build Coastguard Worker         break;
269*c0909341SAndroid Build Coastguard Worker     }
270*c0909341SAndroid Build Coastguard Worker     case RTX_32X8: {
271*c0909341SAndroid Build Coastguard Worker         uint64_t t = *(const uint64_t *) a & mask;
272*c0909341SAndroid Build Coastguard Worker         t         += *(const uint16_t *) l & (uint32_t) mask;
273*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) * mul;
274*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 8 - 2;
275*c0909341SAndroid Build Coastguard Worker         break;
276*c0909341SAndroid Build Coastguard Worker     }
277*c0909341SAndroid Build Coastguard Worker     case RTX_16X64: {
278*c0909341SAndroid Build Coastguard Worker         uint64_t t = *(const uint32_t *) a & (uint32_t) mask;
279*c0909341SAndroid Build Coastguard Worker         t         += *(const uint64_t *) &l[0] & mask;
280*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) + ((*(const uint64_t *) &l[8] & mask) >> 6);
281*c0909341SAndroid Build Coastguard Worker         t *= mul;
282*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 4 - 16;
283*c0909341SAndroid Build Coastguard Worker         break;
284*c0909341SAndroid Build Coastguard Worker     }
285*c0909341SAndroid Build Coastguard Worker     case RTX_64X16: {
286*c0909341SAndroid Build Coastguard Worker         uint64_t t = *(const uint64_t *) &a[0] & mask;
287*c0909341SAndroid Build Coastguard Worker         t         += *(const uint32_t *) l & (uint32_t) mask;
288*c0909341SAndroid Build Coastguard Worker         t = (t >> 6) + ((*(const uint64_t *) &a[8] & mask) >> 6);
289*c0909341SAndroid Build Coastguard Worker         t *= mul;
290*c0909341SAndroid Build Coastguard Worker         s = (int) (t >> 56) - 16 - 4;
291*c0909341SAndroid Build Coastguard Worker         break;
292*c0909341SAndroid Build Coastguard Worker     }
293*c0909341SAndroid Build Coastguard Worker     }
294*c0909341SAndroid Build Coastguard Worker 
295*c0909341SAndroid Build Coastguard Worker     return (s != 0) + (s > 0);
296*c0909341SAndroid Build Coastguard Worker }
297*c0909341SAndroid Build Coastguard Worker 
get_lo_ctx(const uint8_t * const levels,const enum TxClass tx_class,unsigned * const hi_mag,const uint8_t (* const ctx_offsets)[5],const unsigned x,const unsigned y,const ptrdiff_t stride)298*c0909341SAndroid Build Coastguard Worker static inline unsigned get_lo_ctx(const uint8_t *const levels,
299*c0909341SAndroid Build Coastguard Worker                                   const enum TxClass tx_class,
300*c0909341SAndroid Build Coastguard Worker                                   unsigned *const hi_mag,
301*c0909341SAndroid Build Coastguard Worker                                   const uint8_t (*const ctx_offsets)[5],
302*c0909341SAndroid Build Coastguard Worker                                   const unsigned x, const unsigned y,
303*c0909341SAndroid Build Coastguard Worker                                   const ptrdiff_t stride)
304*c0909341SAndroid Build Coastguard Worker {
305*c0909341SAndroid Build Coastguard Worker     unsigned mag = levels[0 * stride + 1] + levels[1 * stride + 0];
306*c0909341SAndroid Build Coastguard Worker     unsigned offset;
307*c0909341SAndroid Build Coastguard Worker     if (tx_class == TX_CLASS_2D) {
308*c0909341SAndroid Build Coastguard Worker         mag += levels[1 * stride + 1];
309*c0909341SAndroid Build Coastguard Worker         *hi_mag = mag;
310*c0909341SAndroid Build Coastguard Worker         mag += levels[0 * stride + 2] + levels[2 * stride + 0];
311*c0909341SAndroid Build Coastguard Worker         offset = ctx_offsets[umin(y, 4)][umin(x, 4)];
312*c0909341SAndroid Build Coastguard Worker     } else {
313*c0909341SAndroid Build Coastguard Worker         mag += levels[0 * stride + 2];
314*c0909341SAndroid Build Coastguard Worker         *hi_mag = mag;
315*c0909341SAndroid Build Coastguard Worker         mag += levels[0 * stride + 3] + levels[0 * stride + 4];
316*c0909341SAndroid Build Coastguard Worker         offset = 26 + (y > 1 ? 10 : y * 5);
317*c0909341SAndroid Build Coastguard Worker     }
318*c0909341SAndroid Build Coastguard Worker     return offset + (mag > 512 ? 4 : (mag + 64) >> 7);
319*c0909341SAndroid Build Coastguard Worker }
320*c0909341SAndroid Build Coastguard Worker 
decode_coefs(Dav1dTaskContext * const t,uint8_t * const a,uint8_t * const l,const enum RectTxfmSize tx,const enum BlockSize bs,const Av1Block * const b,const int intra,const int plane,coef * cf,enum TxfmType * const txtp,uint8_t * res_ctx)321*c0909341SAndroid Build Coastguard Worker static int decode_coefs(Dav1dTaskContext *const t,
322*c0909341SAndroid Build Coastguard Worker                         uint8_t *const a, uint8_t *const l,
323*c0909341SAndroid Build Coastguard Worker                         const enum RectTxfmSize tx, const enum BlockSize bs,
324*c0909341SAndroid Build Coastguard Worker                         const Av1Block *const b, const int intra,
325*c0909341SAndroid Build Coastguard Worker                         const int plane, coef *cf,
326*c0909341SAndroid Build Coastguard Worker                         enum TxfmType *const txtp, uint8_t *res_ctx)
327*c0909341SAndroid Build Coastguard Worker {
328*c0909341SAndroid Build Coastguard Worker     Dav1dTileState *const ts = t->ts;
329*c0909341SAndroid Build Coastguard Worker     const int chroma = !!plane;
330*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
331*c0909341SAndroid Build Coastguard Worker     const int lossless = f->frame_hdr->segmentation.lossless[b->seg_id];
332*c0909341SAndroid Build Coastguard Worker     const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[tx];
333*c0909341SAndroid Build Coastguard Worker     const int dbg = DEBUG_BLOCK_INFO && plane && 0;
334*c0909341SAndroid Build Coastguard Worker 
335*c0909341SAndroid Build Coastguard Worker     if (dbg)
336*c0909341SAndroid Build Coastguard Worker         printf("Start: r=%d\n", ts->msac.rng);
337*c0909341SAndroid Build Coastguard Worker 
338*c0909341SAndroid Build Coastguard Worker     // does this block have any non-zero coefficients
339*c0909341SAndroid Build Coastguard Worker     const int sctx = get_skip_ctx(t_dim, bs, a, l, chroma, f->cur.p.layout);
340*c0909341SAndroid Build Coastguard Worker     const int all_skip = dav1d_msac_decode_bool_adapt(&ts->msac,
341*c0909341SAndroid Build Coastguard Worker                              ts->cdf.coef.skip[t_dim->ctx][sctx]);
342*c0909341SAndroid Build Coastguard Worker     if (dbg)
343*c0909341SAndroid Build Coastguard Worker         printf("Post-non-zero[%d][%d][%d]: r=%d\n",
344*c0909341SAndroid Build Coastguard Worker                t_dim->ctx, sctx, all_skip, ts->msac.rng);
345*c0909341SAndroid Build Coastguard Worker     if (all_skip) {
346*c0909341SAndroid Build Coastguard Worker         *res_ctx = 0x40;
347*c0909341SAndroid Build Coastguard Worker         *txtp = lossless * WHT_WHT; /* lossless ? WHT_WHT : DCT_DCT */
348*c0909341SAndroid Build Coastguard Worker         return -1;
349*c0909341SAndroid Build Coastguard Worker     }
350*c0909341SAndroid Build Coastguard Worker 
351*c0909341SAndroid Build Coastguard Worker     // transform type (chroma: derived, luma: explicitly coded)
352*c0909341SAndroid Build Coastguard Worker     if (lossless) {
353*c0909341SAndroid Build Coastguard Worker         assert(t_dim->max == TX_4X4);
354*c0909341SAndroid Build Coastguard Worker         *txtp = WHT_WHT;
355*c0909341SAndroid Build Coastguard Worker     } else if (t_dim->max + intra >= TX_64X64) {
356*c0909341SAndroid Build Coastguard Worker         *txtp = DCT_DCT;
357*c0909341SAndroid Build Coastguard Worker     } else if (chroma) {
358*c0909341SAndroid Build Coastguard Worker         // inferred from either the luma txtp (inter) or a LUT (intra)
359*c0909341SAndroid Build Coastguard Worker         *txtp = intra ? dav1d_txtp_from_uvmode[b->uv_mode] :
360*c0909341SAndroid Build Coastguard Worker                         get_uv_inter_txtp(t_dim, *txtp);
361*c0909341SAndroid Build Coastguard Worker     } else if (!f->frame_hdr->segmentation.qidx[b->seg_id]) {
362*c0909341SAndroid Build Coastguard Worker         // In libaom, lossless is checked by a literal qidx == 0, but not all
363*c0909341SAndroid Build Coastguard Worker         // such blocks are actually lossless. The remainder gets an implicit
364*c0909341SAndroid Build Coastguard Worker         // transform type (for luma)
365*c0909341SAndroid Build Coastguard Worker         *txtp = DCT_DCT;
366*c0909341SAndroid Build Coastguard Worker     } else {
367*c0909341SAndroid Build Coastguard Worker         unsigned idx;
368*c0909341SAndroid Build Coastguard Worker         if (intra) {
369*c0909341SAndroid Build Coastguard Worker             const enum IntraPredMode y_mode_nofilt = b->y_mode == FILTER_PRED ?
370*c0909341SAndroid Build Coastguard Worker                 dav1d_filter_mode_to_y_mode[b->y_angle] : b->y_mode;
371*c0909341SAndroid Build Coastguard Worker             if (f->frame_hdr->reduced_txtp_set || t_dim->min == TX_16X16) {
372*c0909341SAndroid Build Coastguard Worker                 idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
373*c0909341SAndroid Build Coastguard Worker                           ts->cdf.m.txtp_intra2[t_dim->min][y_mode_nofilt], 4);
374*c0909341SAndroid Build Coastguard Worker                 *txtp = dav1d_tx_types_per_set[idx + 0];
375*c0909341SAndroid Build Coastguard Worker             } else {
376*c0909341SAndroid Build Coastguard Worker                 idx = dav1d_msac_decode_symbol_adapt8(&ts->msac,
377*c0909341SAndroid Build Coastguard Worker                           ts->cdf.m.txtp_intra1[t_dim->min][y_mode_nofilt], 6);
378*c0909341SAndroid Build Coastguard Worker                 *txtp = dav1d_tx_types_per_set[idx + 5];
379*c0909341SAndroid Build Coastguard Worker             }
380*c0909341SAndroid Build Coastguard Worker             if (dbg)
381*c0909341SAndroid Build Coastguard Worker                 printf("Post-txtp-intra[%d->%d][%d][%d->%d]: r=%d\n",
382*c0909341SAndroid Build Coastguard Worker                        tx, t_dim->min, y_mode_nofilt, idx, *txtp, ts->msac.rng);
383*c0909341SAndroid Build Coastguard Worker         } else {
384*c0909341SAndroid Build Coastguard Worker             if (f->frame_hdr->reduced_txtp_set || t_dim->max == TX_32X32) {
385*c0909341SAndroid Build Coastguard Worker                 idx = dav1d_msac_decode_bool_adapt(&ts->msac,
386*c0909341SAndroid Build Coastguard Worker                           ts->cdf.m.txtp_inter3[t_dim->min]);
387*c0909341SAndroid Build Coastguard Worker                 *txtp = (idx - 1) & IDTX; /* idx ? DCT_DCT : IDTX */
388*c0909341SAndroid Build Coastguard Worker             } else if (t_dim->min == TX_16X16) {
389*c0909341SAndroid Build Coastguard Worker                 idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
390*c0909341SAndroid Build Coastguard Worker                           ts->cdf.m.txtp_inter2, 11);
391*c0909341SAndroid Build Coastguard Worker                 *txtp = dav1d_tx_types_per_set[idx + 12];
392*c0909341SAndroid Build Coastguard Worker             } else {
393*c0909341SAndroid Build Coastguard Worker                 idx = dav1d_msac_decode_symbol_adapt16(&ts->msac,
394*c0909341SAndroid Build Coastguard Worker                           ts->cdf.m.txtp_inter1[t_dim->min], 15);
395*c0909341SAndroid Build Coastguard Worker                 *txtp = dav1d_tx_types_per_set[idx + 24];
396*c0909341SAndroid Build Coastguard Worker             }
397*c0909341SAndroid Build Coastguard Worker             if (dbg)
398*c0909341SAndroid Build Coastguard Worker                 printf("Post-txtp-inter[%d->%d][%d->%d]: r=%d\n",
399*c0909341SAndroid Build Coastguard Worker                        tx, t_dim->min, idx, *txtp, ts->msac.rng);
400*c0909341SAndroid Build Coastguard Worker         }
401*c0909341SAndroid Build Coastguard Worker     }
402*c0909341SAndroid Build Coastguard Worker 
403*c0909341SAndroid Build Coastguard Worker     // find end-of-block (eob)
404*c0909341SAndroid Build Coastguard Worker     int eob_bin;
405*c0909341SAndroid Build Coastguard Worker     const int slw = imin(t_dim->lw, TX_32X32), slh = imin(t_dim->lh, TX_32X32);
406*c0909341SAndroid Build Coastguard Worker     const int tx2dszctx = slw + slh;
407*c0909341SAndroid Build Coastguard Worker     const enum TxClass tx_class = dav1d_tx_type_class[*txtp];
408*c0909341SAndroid Build Coastguard Worker     const int is_1d = tx_class != TX_CLASS_2D;
409*c0909341SAndroid Build Coastguard Worker     switch (tx2dszctx) {
410*c0909341SAndroid Build Coastguard Worker #define case_sz(sz, bin, ns, is_1d) \
411*c0909341SAndroid Build Coastguard Worker     case sz: { \
412*c0909341SAndroid Build Coastguard Worker         uint16_t *const eob_bin_cdf = ts->cdf.coef.eob_bin_##bin[chroma]is_1d; \
413*c0909341SAndroid Build Coastguard Worker         eob_bin = dav1d_msac_decode_symbol_adapt##ns(&ts->msac, eob_bin_cdf, 4 + sz); \
414*c0909341SAndroid Build Coastguard Worker         break; \
415*c0909341SAndroid Build Coastguard Worker     }
416*c0909341SAndroid Build Coastguard Worker     case_sz(0,   16,  8, [is_1d]);
417*c0909341SAndroid Build Coastguard Worker     case_sz(1,   32,  8, [is_1d]);
418*c0909341SAndroid Build Coastguard Worker     case_sz(2,   64,  8, [is_1d]);
419*c0909341SAndroid Build Coastguard Worker     case_sz(3,  128,  8, [is_1d]);
420*c0909341SAndroid Build Coastguard Worker     case_sz(4,  256, 16, [is_1d]);
421*c0909341SAndroid Build Coastguard Worker     case_sz(5,  512, 16,        );
422*c0909341SAndroid Build Coastguard Worker     case_sz(6, 1024, 16,        );
423*c0909341SAndroid Build Coastguard Worker #undef case_sz
424*c0909341SAndroid Build Coastguard Worker     }
425*c0909341SAndroid Build Coastguard Worker     if (dbg)
426*c0909341SAndroid Build Coastguard Worker         printf("Post-eob_bin_%d[%d][%d][%d]: r=%d\n",
427*c0909341SAndroid Build Coastguard Worker                16 << tx2dszctx, chroma, is_1d, eob_bin, ts->msac.rng);
428*c0909341SAndroid Build Coastguard Worker     int eob;
429*c0909341SAndroid Build Coastguard Worker     if (eob_bin > 1) {
430*c0909341SAndroid Build Coastguard Worker         uint16_t *const eob_hi_bit_cdf =
431*c0909341SAndroid Build Coastguard Worker             ts->cdf.coef.eob_hi_bit[t_dim->ctx][chroma][eob_bin];
432*c0909341SAndroid Build Coastguard Worker         const int eob_hi_bit = dav1d_msac_decode_bool_adapt(&ts->msac, eob_hi_bit_cdf);
433*c0909341SAndroid Build Coastguard Worker         if (dbg)
434*c0909341SAndroid Build Coastguard Worker             printf("Post-eob_hi_bit[%d][%d][%d][%d]: r=%d\n",
435*c0909341SAndroid Build Coastguard Worker                    t_dim->ctx, chroma, eob_bin, eob_hi_bit, ts->msac.rng);
436*c0909341SAndroid Build Coastguard Worker         eob = ((eob_hi_bit | 2) << (eob_bin - 2)) |
437*c0909341SAndroid Build Coastguard Worker               dav1d_msac_decode_bools(&ts->msac, eob_bin - 2);
438*c0909341SAndroid Build Coastguard Worker         if (dbg)
439*c0909341SAndroid Build Coastguard Worker             printf("Post-eob[%d]: r=%d\n", eob, ts->msac.rng);
440*c0909341SAndroid Build Coastguard Worker     } else {
441*c0909341SAndroid Build Coastguard Worker         eob = eob_bin;
442*c0909341SAndroid Build Coastguard Worker     }
443*c0909341SAndroid Build Coastguard Worker     assert(eob >= 0);
444*c0909341SAndroid Build Coastguard Worker 
445*c0909341SAndroid Build Coastguard Worker     // base tokens
446*c0909341SAndroid Build Coastguard Worker     uint16_t (*const eob_cdf)[4] = ts->cdf.coef.eob_base_tok[t_dim->ctx][chroma];
447*c0909341SAndroid Build Coastguard Worker     uint16_t (*const hi_cdf)[4] = ts->cdf.coef.br_tok[imin(t_dim->ctx, 3)][chroma];
448*c0909341SAndroid Build Coastguard Worker     unsigned rc, dc_tok;
449*c0909341SAndroid Build Coastguard Worker 
450*c0909341SAndroid Build Coastguard Worker     if (eob) {
451*c0909341SAndroid Build Coastguard Worker         uint16_t (*const lo_cdf)[4] = ts->cdf.coef.base_tok[t_dim->ctx][chroma];
452*c0909341SAndroid Build Coastguard Worker         uint8_t *const levels = t->scratch.levels; // bits 0-5: tok, 6-7: lo_tok
453*c0909341SAndroid Build Coastguard Worker 
454*c0909341SAndroid Build Coastguard Worker         /* eob */
455*c0909341SAndroid Build Coastguard Worker         unsigned ctx = 1 + (eob > 2 << tx2dszctx) + (eob > 4 << tx2dszctx);
456*c0909341SAndroid Build Coastguard Worker         int eob_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, eob_cdf[ctx], 2);
457*c0909341SAndroid Build Coastguard Worker         int tok = eob_tok + 1;
458*c0909341SAndroid Build Coastguard Worker         int level_tok = tok * 0x41;
459*c0909341SAndroid Build Coastguard Worker         unsigned mag;
460*c0909341SAndroid Build Coastguard Worker 
461*c0909341SAndroid Build Coastguard Worker #define DECODE_COEFS_CLASS(tx_class) \
462*c0909341SAndroid Build Coastguard Worker         unsigned x, y; \
463*c0909341SAndroid Build Coastguard Worker         uint8_t *level; \
464*c0909341SAndroid Build Coastguard Worker         if (tx_class == TX_CLASS_2D) \
465*c0909341SAndroid Build Coastguard Worker             rc = scan[eob], x = rc >> shift, y = rc & mask; \
466*c0909341SAndroid Build Coastguard Worker         else if (tx_class == TX_CLASS_H) \
467*c0909341SAndroid Build Coastguard Worker             /* Transposing reduces the stride and padding requirements */ \
468*c0909341SAndroid Build Coastguard Worker             x = eob & mask, y = eob >> shift, rc = eob; \
469*c0909341SAndroid Build Coastguard Worker         else /* tx_class == TX_CLASS_V */ \
470*c0909341SAndroid Build Coastguard Worker             x = eob & mask, y = eob >> shift, rc = (x << shift2) | y; \
471*c0909341SAndroid Build Coastguard Worker         if (dbg) \
472*c0909341SAndroid Build Coastguard Worker             printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
473*c0909341SAndroid Build Coastguard Worker                    t_dim->ctx, chroma, ctx, eob, rc, tok, ts->msac.rng); \
474*c0909341SAndroid Build Coastguard Worker         if (eob_tok == 2) { \
475*c0909341SAndroid Build Coastguard Worker             ctx = (tx_class == TX_CLASS_2D ? (x | y) > 1 : y != 0) ? 14 : 7; \
476*c0909341SAndroid Build Coastguard Worker             tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
477*c0909341SAndroid Build Coastguard Worker             level_tok = tok + (3 << 6); \
478*c0909341SAndroid Build Coastguard Worker             if (dbg) \
479*c0909341SAndroid Build Coastguard Worker                 printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
480*c0909341SAndroid Build Coastguard Worker                        imin(t_dim->ctx, 3), chroma, ctx, eob, rc, tok, \
481*c0909341SAndroid Build Coastguard Worker                        ts->msac.rng); \
482*c0909341SAndroid Build Coastguard Worker         } \
483*c0909341SAndroid Build Coastguard Worker         cf[rc] = tok << 11; \
484*c0909341SAndroid Build Coastguard Worker         if (TX_CLASS_2D) \
485*c0909341SAndroid Build Coastguard Worker             level = levels + rc; \
486*c0909341SAndroid Build Coastguard Worker         else \
487*c0909341SAndroid Build Coastguard Worker             level = levels + x * stride + y; \
488*c0909341SAndroid Build Coastguard Worker         *level = (uint8_t) level_tok; \
489*c0909341SAndroid Build Coastguard Worker         for (int i = eob - 1; i > 0; i--) { /* ac */ \
490*c0909341SAndroid Build Coastguard Worker             unsigned rc_i; \
491*c0909341SAndroid Build Coastguard Worker             if (tx_class == TX_CLASS_2D) \
492*c0909341SAndroid Build Coastguard Worker                 rc_i = scan[i], x = rc_i >> shift, y = rc_i & mask; \
493*c0909341SAndroid Build Coastguard Worker             else if (tx_class == TX_CLASS_H) \
494*c0909341SAndroid Build Coastguard Worker                 x = i & mask, y = i >> shift, rc_i = i; \
495*c0909341SAndroid Build Coastguard Worker             else /* tx_class == TX_CLASS_V */ \
496*c0909341SAndroid Build Coastguard Worker                 x = i & mask, y = i >> shift, rc_i = (x << shift2) | y; \
497*c0909341SAndroid Build Coastguard Worker             assert(x < 32 && y < 32); \
498*c0909341SAndroid Build Coastguard Worker             if (TX_CLASS_2D) \
499*c0909341SAndroid Build Coastguard Worker                 level = levels + rc; \
500*c0909341SAndroid Build Coastguard Worker             else \
501*c0909341SAndroid Build Coastguard Worker                 level = levels + x * stride + y; \
502*c0909341SAndroid Build Coastguard Worker             ctx = get_lo_ctx(level, tx_class, &mag, lo_ctx_offsets, x, y, stride); \
503*c0909341SAndroid Build Coastguard Worker             if (tx_class == TX_CLASS_2D) \
504*c0909341SAndroid Build Coastguard Worker                 y |= x; \
505*c0909341SAndroid Build Coastguard Worker             tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
506*c0909341SAndroid Build Coastguard Worker             if (dbg) \
507*c0909341SAndroid Build Coastguard Worker                 printf("Post-lo_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
508*c0909341SAndroid Build Coastguard Worker                        t_dim->ctx, chroma, ctx, i, rc_i, tok, ts->msac.rng); \
509*c0909341SAndroid Build Coastguard Worker             if (tok == 3) { \
510*c0909341SAndroid Build Coastguard Worker                 mag &= 63; \
511*c0909341SAndroid Build Coastguard Worker                 ctx = (y > (tx_class == TX_CLASS_2D) ? 14 : 7) + \
512*c0909341SAndroid Build Coastguard Worker                       (mag > 12 ? 6 : (mag + 1) >> 1); \
513*c0909341SAndroid Build Coastguard Worker                 tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
514*c0909341SAndroid Build Coastguard Worker                 if (dbg) \
515*c0909341SAndroid Build Coastguard Worker                     printf("Post-hi_tok[%d][%d][%d][%d=%d=%d]: r=%d\n", \
516*c0909341SAndroid Build Coastguard Worker                            imin(t_dim->ctx, 3), chroma, ctx, i, rc_i, tok, \
517*c0909341SAndroid Build Coastguard Worker                            ts->msac.rng); \
518*c0909341SAndroid Build Coastguard Worker                 *level = (uint8_t) (tok + (3 << 6)); \
519*c0909341SAndroid Build Coastguard Worker                 cf[rc_i] = (tok << 11) | rc; \
520*c0909341SAndroid Build Coastguard Worker                 rc = rc_i; \
521*c0909341SAndroid Build Coastguard Worker             } else { \
522*c0909341SAndroid Build Coastguard Worker                 /* 0x1 for tok, 0x7ff as bitmask for rc, 0x41 for level_tok */ \
523*c0909341SAndroid Build Coastguard Worker                 tok *= 0x17ff41; \
524*c0909341SAndroid Build Coastguard Worker                 *level = (uint8_t) tok; \
525*c0909341SAndroid Build Coastguard Worker                 /* tok ? (tok << 11) | rc : 0 */ \
526*c0909341SAndroid Build Coastguard Worker                 tok = (tok >> 9) & (rc + ~0x7ffu); \
527*c0909341SAndroid Build Coastguard Worker                 if (tok) rc = rc_i; \
528*c0909341SAndroid Build Coastguard Worker                 cf[rc_i] = tok; \
529*c0909341SAndroid Build Coastguard Worker             } \
530*c0909341SAndroid Build Coastguard Worker         } \
531*c0909341SAndroid Build Coastguard Worker         /* dc */ \
532*c0909341SAndroid Build Coastguard Worker         ctx = (tx_class == TX_CLASS_2D) ? 0 : \
533*c0909341SAndroid Build Coastguard Worker             get_lo_ctx(levels, tx_class, &mag, lo_ctx_offsets, 0, 0, stride); \
534*c0909341SAndroid Build Coastguard Worker         dc_tok = dav1d_msac_decode_symbol_adapt4(&ts->msac, lo_cdf[ctx], 3); \
535*c0909341SAndroid Build Coastguard Worker         if (dbg) \
536*c0909341SAndroid Build Coastguard Worker             printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n", \
537*c0909341SAndroid Build Coastguard Worker                    t_dim->ctx, chroma, ctx, dc_tok, ts->msac.rng); \
538*c0909341SAndroid Build Coastguard Worker         if (dc_tok == 3) { \
539*c0909341SAndroid Build Coastguard Worker             if (tx_class == TX_CLASS_2D) \
540*c0909341SAndroid Build Coastguard Worker                 mag = levels[0 * stride + 1] + levels[1 * stride + 0] + \
541*c0909341SAndroid Build Coastguard Worker                       levels[1 * stride + 1]; \
542*c0909341SAndroid Build Coastguard Worker             mag &= 63; \
543*c0909341SAndroid Build Coastguard Worker             ctx = mag > 12 ? 6 : (mag + 1) >> 1; \
544*c0909341SAndroid Build Coastguard Worker             dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[ctx]); \
545*c0909341SAndroid Build Coastguard Worker             if (dbg) \
546*c0909341SAndroid Build Coastguard Worker                 printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n", \
547*c0909341SAndroid Build Coastguard Worker                        imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng); \
548*c0909341SAndroid Build Coastguard Worker         } \
549*c0909341SAndroid Build Coastguard Worker         break
550*c0909341SAndroid Build Coastguard Worker 
551*c0909341SAndroid Build Coastguard Worker         const uint16_t *scan;
552*c0909341SAndroid Build Coastguard Worker         switch (tx_class) {
553*c0909341SAndroid Build Coastguard Worker         case TX_CLASS_2D: {
554*c0909341SAndroid Build Coastguard Worker             const unsigned nonsquare_tx = tx >= RTX_4X8;
555*c0909341SAndroid Build Coastguard Worker             const uint8_t (*const lo_ctx_offsets)[5] =
556*c0909341SAndroid Build Coastguard Worker                 dav1d_lo_ctx_offsets[nonsquare_tx + (tx & nonsquare_tx)];
557*c0909341SAndroid Build Coastguard Worker             scan = dav1d_scans[tx];
558*c0909341SAndroid Build Coastguard Worker             const ptrdiff_t stride = 4 << slh;
559*c0909341SAndroid Build Coastguard Worker             const unsigned shift = slh + 2, shift2 = 0;
560*c0909341SAndroid Build Coastguard Worker             const unsigned mask = (4 << slh) - 1;
561*c0909341SAndroid Build Coastguard Worker             memset(levels, 0, stride * ((4 << slw) + 2));
562*c0909341SAndroid Build Coastguard Worker             DECODE_COEFS_CLASS(TX_CLASS_2D);
563*c0909341SAndroid Build Coastguard Worker         }
564*c0909341SAndroid Build Coastguard Worker         case TX_CLASS_H: {
565*c0909341SAndroid Build Coastguard Worker             const uint8_t (*const lo_ctx_offsets)[5] = NULL;
566*c0909341SAndroid Build Coastguard Worker             const ptrdiff_t stride = 16;
567*c0909341SAndroid Build Coastguard Worker             const unsigned shift = slh + 2, shift2 = 0;
568*c0909341SAndroid Build Coastguard Worker             const unsigned mask = (4 << slh) - 1;
569*c0909341SAndroid Build Coastguard Worker             memset(levels, 0, stride * ((4 << slh) + 2));
570*c0909341SAndroid Build Coastguard Worker             DECODE_COEFS_CLASS(TX_CLASS_H);
571*c0909341SAndroid Build Coastguard Worker         }
572*c0909341SAndroid Build Coastguard Worker         case TX_CLASS_V: {
573*c0909341SAndroid Build Coastguard Worker             const uint8_t (*const lo_ctx_offsets)[5] = NULL;
574*c0909341SAndroid Build Coastguard Worker             const ptrdiff_t stride = 16;
575*c0909341SAndroid Build Coastguard Worker             const unsigned shift = slw + 2, shift2 = slh + 2;
576*c0909341SAndroid Build Coastguard Worker             const unsigned mask = (4 << slw) - 1;
577*c0909341SAndroid Build Coastguard Worker             memset(levels, 0, stride * ((4 << slw) + 2));
578*c0909341SAndroid Build Coastguard Worker             DECODE_COEFS_CLASS(TX_CLASS_V);
579*c0909341SAndroid Build Coastguard Worker         }
580*c0909341SAndroid Build Coastguard Worker #undef DECODE_COEFS_CLASS
581*c0909341SAndroid Build Coastguard Worker         default: assert(0);
582*c0909341SAndroid Build Coastguard Worker         }
583*c0909341SAndroid Build Coastguard Worker     } else { // dc-only
584*c0909341SAndroid Build Coastguard Worker         int tok_br = dav1d_msac_decode_symbol_adapt4(&ts->msac, eob_cdf[0], 2);
585*c0909341SAndroid Build Coastguard Worker         dc_tok = 1 + tok_br;
586*c0909341SAndroid Build Coastguard Worker         if (dbg)
587*c0909341SAndroid Build Coastguard Worker             printf("Post-dc_lo_tok[%d][%d][%d][%d]: r=%d\n",
588*c0909341SAndroid Build Coastguard Worker                    t_dim->ctx, chroma, 0, dc_tok, ts->msac.rng);
589*c0909341SAndroid Build Coastguard Worker         if (tok_br == 2) {
590*c0909341SAndroid Build Coastguard Worker             dc_tok = dav1d_msac_decode_hi_tok(&ts->msac, hi_cdf[0]);
591*c0909341SAndroid Build Coastguard Worker             if (dbg)
592*c0909341SAndroid Build Coastguard Worker                 printf("Post-dc_hi_tok[%d][%d][0][%d]: r=%d\n",
593*c0909341SAndroid Build Coastguard Worker                        imin(t_dim->ctx, 3), chroma, dc_tok, ts->msac.rng);
594*c0909341SAndroid Build Coastguard Worker         }
595*c0909341SAndroid Build Coastguard Worker         rc = 0;
596*c0909341SAndroid Build Coastguard Worker     }
597*c0909341SAndroid Build Coastguard Worker 
598*c0909341SAndroid Build Coastguard Worker     // residual and sign
599*c0909341SAndroid Build Coastguard Worker     const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane];
600*c0909341SAndroid Build Coastguard Worker     const uint8_t *const qm_tbl = *txtp < IDTX ? f->qm[tx][plane] : NULL;
601*c0909341SAndroid Build Coastguard Worker     const int dq_shift = imax(0, t_dim->ctx - 2);
602*c0909341SAndroid Build Coastguard Worker     const int cf_max = ~(~127U << (BITDEPTH == 8 ? 8 : f->cur.p.bpc));
603*c0909341SAndroid Build Coastguard Worker     unsigned cul_level, dc_sign_level;
604*c0909341SAndroid Build Coastguard Worker 
605*c0909341SAndroid Build Coastguard Worker     if (!dc_tok) {
606*c0909341SAndroid Build Coastguard Worker         cul_level = 0;
607*c0909341SAndroid Build Coastguard Worker         dc_sign_level = 1 << 6;
608*c0909341SAndroid Build Coastguard Worker         if (qm_tbl) goto ac_qm;
609*c0909341SAndroid Build Coastguard Worker         goto ac_noqm;
610*c0909341SAndroid Build Coastguard Worker     }
611*c0909341SAndroid Build Coastguard Worker 
612*c0909341SAndroid Build Coastguard Worker     const int dc_sign_ctx = get_dc_sign_ctx(tx, a, l);
613*c0909341SAndroid Build Coastguard Worker     uint16_t *const dc_sign_cdf = ts->cdf.coef.dc_sign[chroma][dc_sign_ctx];
614*c0909341SAndroid Build Coastguard Worker     const int dc_sign = dav1d_msac_decode_bool_adapt(&ts->msac, dc_sign_cdf);
615*c0909341SAndroid Build Coastguard Worker     if (dbg)
616*c0909341SAndroid Build Coastguard Worker         printf("Post-dc_sign[%d][%d][%d]: r=%d\n",
617*c0909341SAndroid Build Coastguard Worker                chroma, dc_sign_ctx, dc_sign, ts->msac.rng);
618*c0909341SAndroid Build Coastguard Worker 
619*c0909341SAndroid Build Coastguard Worker     int dc_dq = dq_tbl[0];
620*c0909341SAndroid Build Coastguard Worker     dc_sign_level = (dc_sign - 1) & (2 << 6);
621*c0909341SAndroid Build Coastguard Worker 
622*c0909341SAndroid Build Coastguard Worker     if (qm_tbl) {
623*c0909341SAndroid Build Coastguard Worker         dc_dq = (dc_dq * qm_tbl[0] + 16) >> 5;
624*c0909341SAndroid Build Coastguard Worker 
625*c0909341SAndroid Build Coastguard Worker         if (dc_tok == 15) {
626*c0909341SAndroid Build Coastguard Worker             dc_tok = read_golomb(&ts->msac) + 15;
627*c0909341SAndroid Build Coastguard Worker             if (dbg)
628*c0909341SAndroid Build Coastguard Worker                 printf("Post-dc_residual[%d->%d]: r=%d\n",
629*c0909341SAndroid Build Coastguard Worker                        dc_tok - 15, dc_tok, ts->msac.rng);
630*c0909341SAndroid Build Coastguard Worker 
631*c0909341SAndroid Build Coastguard Worker             dc_tok &= 0xfffff;
632*c0909341SAndroid Build Coastguard Worker             dc_dq = (dc_dq * dc_tok) & 0xffffff;
633*c0909341SAndroid Build Coastguard Worker         } else {
634*c0909341SAndroid Build Coastguard Worker             dc_dq *= dc_tok;
635*c0909341SAndroid Build Coastguard Worker             assert(dc_dq <= 0xffffff);
636*c0909341SAndroid Build Coastguard Worker         }
637*c0909341SAndroid Build Coastguard Worker         cul_level = dc_tok;
638*c0909341SAndroid Build Coastguard Worker         dc_dq >>= dq_shift;
639*c0909341SAndroid Build Coastguard Worker         dc_dq = umin(dc_dq, cf_max + dc_sign);
640*c0909341SAndroid Build Coastguard Worker         cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
641*c0909341SAndroid Build Coastguard Worker 
642*c0909341SAndroid Build Coastguard Worker         if (rc) ac_qm: {
643*c0909341SAndroid Build Coastguard Worker             const unsigned ac_dq = dq_tbl[1];
644*c0909341SAndroid Build Coastguard Worker             do {
645*c0909341SAndroid Build Coastguard Worker                 const int sign = dav1d_msac_decode_bool_equi(&ts->msac);
646*c0909341SAndroid Build Coastguard Worker                 if (dbg)
647*c0909341SAndroid Build Coastguard Worker                     printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
648*c0909341SAndroid Build Coastguard Worker                 const unsigned rc_tok = cf[rc];
649*c0909341SAndroid Build Coastguard Worker                 unsigned tok, dq = (ac_dq * qm_tbl[rc] + 16) >> 5;
650*c0909341SAndroid Build Coastguard Worker                 int dq_sat;
651*c0909341SAndroid Build Coastguard Worker 
652*c0909341SAndroid Build Coastguard Worker                 if (rc_tok >= (15 << 11)) {
653*c0909341SAndroid Build Coastguard Worker                     tok = read_golomb(&ts->msac) + 15;
654*c0909341SAndroid Build Coastguard Worker                     if (dbg)
655*c0909341SAndroid Build Coastguard Worker                         printf("Post-residual[%d=%d->%d]: r=%d\n",
656*c0909341SAndroid Build Coastguard Worker                                rc, tok - 15, tok, ts->msac.rng);
657*c0909341SAndroid Build Coastguard Worker 
658*c0909341SAndroid Build Coastguard Worker                     tok &= 0xfffff;
659*c0909341SAndroid Build Coastguard Worker                     dq = (dq * tok) & 0xffffff;
660*c0909341SAndroid Build Coastguard Worker                 } else {
661*c0909341SAndroid Build Coastguard Worker                     tok = rc_tok >> 11;
662*c0909341SAndroid Build Coastguard Worker                     dq *= tok;
663*c0909341SAndroid Build Coastguard Worker                     assert(dq <= 0xffffff);
664*c0909341SAndroid Build Coastguard Worker                 }
665*c0909341SAndroid Build Coastguard Worker                 cul_level += tok;
666*c0909341SAndroid Build Coastguard Worker                 dq >>= dq_shift;
667*c0909341SAndroid Build Coastguard Worker                 dq_sat = umin(dq, cf_max + sign);
668*c0909341SAndroid Build Coastguard Worker                 cf[rc] = (coef) (sign ? -dq_sat : dq_sat);
669*c0909341SAndroid Build Coastguard Worker 
670*c0909341SAndroid Build Coastguard Worker                 rc = rc_tok & 0x3ff;
671*c0909341SAndroid Build Coastguard Worker             } while (rc);
672*c0909341SAndroid Build Coastguard Worker         }
673*c0909341SAndroid Build Coastguard Worker     } else {
674*c0909341SAndroid Build Coastguard Worker         // non-qmatrix is the common case and allows for additional optimizations
675*c0909341SAndroid Build Coastguard Worker         if (dc_tok == 15) {
676*c0909341SAndroid Build Coastguard Worker             dc_tok = read_golomb(&ts->msac) + 15;
677*c0909341SAndroid Build Coastguard Worker             if (dbg)
678*c0909341SAndroid Build Coastguard Worker                 printf("Post-dc_residual[%d->%d]: r=%d\n",
679*c0909341SAndroid Build Coastguard Worker                        dc_tok - 15, dc_tok, ts->msac.rng);
680*c0909341SAndroid Build Coastguard Worker 
681*c0909341SAndroid Build Coastguard Worker             dc_tok &= 0xfffff;
682*c0909341SAndroid Build Coastguard Worker             dc_dq = ((dc_dq * dc_tok) & 0xffffff) >> dq_shift;
683*c0909341SAndroid Build Coastguard Worker             dc_dq = umin(dc_dq, cf_max + dc_sign);
684*c0909341SAndroid Build Coastguard Worker         } else {
685*c0909341SAndroid Build Coastguard Worker             dc_dq = ((dc_dq * dc_tok) >> dq_shift);
686*c0909341SAndroid Build Coastguard Worker             assert(dc_dq <= cf_max);
687*c0909341SAndroid Build Coastguard Worker         }
688*c0909341SAndroid Build Coastguard Worker         cul_level = dc_tok;
689*c0909341SAndroid Build Coastguard Worker         cf[0] = (coef) (dc_sign ? -dc_dq : dc_dq);
690*c0909341SAndroid Build Coastguard Worker 
691*c0909341SAndroid Build Coastguard Worker         if (rc) ac_noqm: {
692*c0909341SAndroid Build Coastguard Worker             const unsigned ac_dq = dq_tbl[1];
693*c0909341SAndroid Build Coastguard Worker             do {
694*c0909341SAndroid Build Coastguard Worker                 const int sign = dav1d_msac_decode_bool_equi(&ts->msac);
695*c0909341SAndroid Build Coastguard Worker                 if (dbg)
696*c0909341SAndroid Build Coastguard Worker                     printf("Post-sign[%d=%d]: r=%d\n", rc, sign, ts->msac.rng);
697*c0909341SAndroid Build Coastguard Worker                 const unsigned rc_tok = cf[rc];
698*c0909341SAndroid Build Coastguard Worker                 unsigned tok;
699*c0909341SAndroid Build Coastguard Worker                 int dq;
700*c0909341SAndroid Build Coastguard Worker 
701*c0909341SAndroid Build Coastguard Worker                 // residual
702*c0909341SAndroid Build Coastguard Worker                 if (rc_tok >= (15 << 11)) {
703*c0909341SAndroid Build Coastguard Worker                     tok = read_golomb(&ts->msac) + 15;
704*c0909341SAndroid Build Coastguard Worker                     if (dbg)
705*c0909341SAndroid Build Coastguard Worker                         printf("Post-residual[%d=%d->%d]: r=%d\n",
706*c0909341SAndroid Build Coastguard Worker                                rc, tok - 15, tok, ts->msac.rng);
707*c0909341SAndroid Build Coastguard Worker 
708*c0909341SAndroid Build Coastguard Worker                     // coefficient parsing, see 5.11.39
709*c0909341SAndroid Build Coastguard Worker                     tok &= 0xfffff;
710*c0909341SAndroid Build Coastguard Worker 
711*c0909341SAndroid Build Coastguard Worker                     // dequant, see 7.12.3
712*c0909341SAndroid Build Coastguard Worker                     dq = ((ac_dq * tok) & 0xffffff) >> dq_shift;
713*c0909341SAndroid Build Coastguard Worker                     dq = umin(dq, cf_max + sign);
714*c0909341SAndroid Build Coastguard Worker                 } else {
715*c0909341SAndroid Build Coastguard Worker                     // cannot exceed cf_max, so we can avoid the clipping
716*c0909341SAndroid Build Coastguard Worker                     tok = rc_tok >> 11;
717*c0909341SAndroid Build Coastguard Worker                     dq = ((ac_dq * tok) >> dq_shift);
718*c0909341SAndroid Build Coastguard Worker                     assert(dq <= cf_max);
719*c0909341SAndroid Build Coastguard Worker                 }
720*c0909341SAndroid Build Coastguard Worker                 cul_level += tok;
721*c0909341SAndroid Build Coastguard Worker                 cf[rc] = (coef) (sign ? -dq : dq);
722*c0909341SAndroid Build Coastguard Worker 
723*c0909341SAndroid Build Coastguard Worker                 rc = rc_tok & 0x3ff; // next non-zero rc, zero if eob
724*c0909341SAndroid Build Coastguard Worker             } while (rc);
725*c0909341SAndroid Build Coastguard Worker         }
726*c0909341SAndroid Build Coastguard Worker     }
727*c0909341SAndroid Build Coastguard Worker 
728*c0909341SAndroid Build Coastguard Worker     // context
729*c0909341SAndroid Build Coastguard Worker     *res_ctx = umin(cul_level, 63) | dc_sign_level;
730*c0909341SAndroid Build Coastguard Worker 
731*c0909341SAndroid Build Coastguard Worker     return eob;
732*c0909341SAndroid Build Coastguard Worker }
733*c0909341SAndroid Build Coastguard Worker 
read_coef_tree(Dav1dTaskContext * const t,const enum BlockSize bs,const Av1Block * const b,const enum RectTxfmSize ytx,const int depth,const uint16_t * const tx_split,const int x_off,const int y_off,pixel * dst)734*c0909341SAndroid Build Coastguard Worker static void read_coef_tree(Dav1dTaskContext *const t,
735*c0909341SAndroid Build Coastguard Worker                            const enum BlockSize bs, const Av1Block *const b,
736*c0909341SAndroid Build Coastguard Worker                            const enum RectTxfmSize ytx, const int depth,
737*c0909341SAndroid Build Coastguard Worker                            const uint16_t *const tx_split,
738*c0909341SAndroid Build Coastguard Worker                            const int x_off, const int y_off, pixel *dst)
739*c0909341SAndroid Build Coastguard Worker {
740*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
741*c0909341SAndroid Build Coastguard Worker     Dav1dTileState *const ts = t->ts;
742*c0909341SAndroid Build Coastguard Worker     const Dav1dDSPContext *const dsp = f->dsp;
743*c0909341SAndroid Build Coastguard Worker     const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[ytx];
744*c0909341SAndroid Build Coastguard Worker     const int txw = t_dim->w, txh = t_dim->h;
745*c0909341SAndroid Build Coastguard Worker 
746*c0909341SAndroid Build Coastguard Worker     /* y_off can be larger than 3 since lossless blocks use TX_4X4 but can't
747*c0909341SAndroid Build Coastguard Worker      * be splitted. Aviods an undefined left shift. */
748*c0909341SAndroid Build Coastguard Worker     if (depth < 2 && tx_split[depth] &&
749*c0909341SAndroid Build Coastguard Worker         tx_split[depth] & (1 << (y_off * 4 + x_off)))
750*c0909341SAndroid Build Coastguard Worker     {
751*c0909341SAndroid Build Coastguard Worker         const enum RectTxfmSize sub = t_dim->sub;
752*c0909341SAndroid Build Coastguard Worker         const TxfmInfo *const sub_t_dim = &dav1d_txfm_dimensions[sub];
753*c0909341SAndroid Build Coastguard Worker         const int txsw = sub_t_dim->w, txsh = sub_t_dim->h;
754*c0909341SAndroid Build Coastguard Worker 
755*c0909341SAndroid Build Coastguard Worker         read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
756*c0909341SAndroid Build Coastguard Worker                        x_off * 2 + 0, y_off * 2 + 0, dst);
757*c0909341SAndroid Build Coastguard Worker         t->bx += txsw;
758*c0909341SAndroid Build Coastguard Worker         if (txw >= txh && t->bx < f->bw)
759*c0909341SAndroid Build Coastguard Worker             read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
760*c0909341SAndroid Build Coastguard Worker                            y_off * 2 + 0, dst ? &dst[4 * txsw] : NULL);
761*c0909341SAndroid Build Coastguard Worker         t->bx -= txsw;
762*c0909341SAndroid Build Coastguard Worker         t->by += txsh;
763*c0909341SAndroid Build Coastguard Worker         if (txh >= txw && t->by < f->bh) {
764*c0909341SAndroid Build Coastguard Worker             if (dst)
765*c0909341SAndroid Build Coastguard Worker                 dst += 4 * txsh * PXSTRIDE(f->cur.stride[0]);
766*c0909341SAndroid Build Coastguard Worker             read_coef_tree(t, bs, b, sub, depth + 1, tx_split,
767*c0909341SAndroid Build Coastguard Worker                            x_off * 2 + 0, y_off * 2 + 1, dst);
768*c0909341SAndroid Build Coastguard Worker             t->bx += txsw;
769*c0909341SAndroid Build Coastguard Worker             if (txw >= txh && t->bx < f->bw)
770*c0909341SAndroid Build Coastguard Worker                 read_coef_tree(t, bs, b, sub, depth + 1, tx_split, x_off * 2 + 1,
771*c0909341SAndroid Build Coastguard Worker                                y_off * 2 + 1, dst ? &dst[4 * txsw] : NULL);
772*c0909341SAndroid Build Coastguard Worker             t->bx -= txsw;
773*c0909341SAndroid Build Coastguard Worker         }
774*c0909341SAndroid Build Coastguard Worker         t->by -= txsh;
775*c0909341SAndroid Build Coastguard Worker     } else {
776*c0909341SAndroid Build Coastguard Worker         const int bx4 = t->bx & 31, by4 = t->by & 31;
777*c0909341SAndroid Build Coastguard Worker         enum TxfmType txtp;
778*c0909341SAndroid Build Coastguard Worker         uint8_t cf_ctx;
779*c0909341SAndroid Build Coastguard Worker         int eob;
780*c0909341SAndroid Build Coastguard Worker         coef *cf;
781*c0909341SAndroid Build Coastguard Worker 
782*c0909341SAndroid Build Coastguard Worker         if (t->frame_thread.pass) {
783*c0909341SAndroid Build Coastguard Worker             const int p = t->frame_thread.pass & 1;
784*c0909341SAndroid Build Coastguard Worker             assert(ts->frame_thread[p].cf);
785*c0909341SAndroid Build Coastguard Worker             cf = ts->frame_thread[p].cf;
786*c0909341SAndroid Build Coastguard Worker             ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
787*c0909341SAndroid Build Coastguard Worker         } else {
788*c0909341SAndroid Build Coastguard Worker             cf = bitfn(t->cf);
789*c0909341SAndroid Build Coastguard Worker         }
790*c0909341SAndroid Build Coastguard Worker         if (t->frame_thread.pass != 2) {
791*c0909341SAndroid Build Coastguard Worker             eob = decode_coefs(t, &t->a->lcoef[bx4], &t->l.lcoef[by4],
792*c0909341SAndroid Build Coastguard Worker                                ytx, bs, b, 0, 0, cf, &txtp, &cf_ctx);
793*c0909341SAndroid Build Coastguard Worker             if (DEBUG_BLOCK_INFO)
794*c0909341SAndroid Build Coastguard Worker                 printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
795*c0909341SAndroid Build Coastguard Worker                        ytx, txtp, eob, ts->msac.rng);
796*c0909341SAndroid Build Coastguard Worker             dav1d_memset_likely_pow2(&t->a->lcoef[bx4], cf_ctx, imin(txw, f->bw - t->bx));
797*c0909341SAndroid Build Coastguard Worker             dav1d_memset_likely_pow2(&t->l.lcoef[by4], cf_ctx, imin(txh, f->bh - t->by));
798*c0909341SAndroid Build Coastguard Worker #define set_ctx(rep_macro) \
799*c0909341SAndroid Build Coastguard Worker             for (int y = 0; y < txh; y++) { \
800*c0909341SAndroid Build Coastguard Worker                 rep_macro(txtp_map, 0, txtp); \
801*c0909341SAndroid Build Coastguard Worker                 txtp_map += 32; \
802*c0909341SAndroid Build Coastguard Worker             }
803*c0909341SAndroid Build Coastguard Worker             uint8_t *txtp_map = &t->scratch.txtp_map[by4 * 32 + bx4];
804*c0909341SAndroid Build Coastguard Worker             case_set_upto16(t_dim->lw);
805*c0909341SAndroid Build Coastguard Worker #undef set_ctx
806*c0909341SAndroid Build Coastguard Worker             if (t->frame_thread.pass == 1)
807*c0909341SAndroid Build Coastguard Worker                 *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
808*c0909341SAndroid Build Coastguard Worker         } else {
809*c0909341SAndroid Build Coastguard Worker             const int cbi = *ts->frame_thread[0].cbi++;
810*c0909341SAndroid Build Coastguard Worker             eob  = cbi >> 5;
811*c0909341SAndroid Build Coastguard Worker             txtp = cbi & 0x1f;
812*c0909341SAndroid Build Coastguard Worker         }
813*c0909341SAndroid Build Coastguard Worker         if (!(t->frame_thread.pass & 1)) {
814*c0909341SAndroid Build Coastguard Worker             assert(dst);
815*c0909341SAndroid Build Coastguard Worker             if (eob >= 0) {
816*c0909341SAndroid Build Coastguard Worker                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
817*c0909341SAndroid Build Coastguard Worker                     coef_dump(cf, imin(t_dim->h, 8) * 4, imin(t_dim->w, 8) * 4, 3, "dq");
818*c0909341SAndroid Build Coastguard Worker                 dsp->itx.itxfm_add[ytx][txtp](dst, f->cur.stride[0], cf, eob
819*c0909341SAndroid Build Coastguard Worker                                               HIGHBD_CALL_SUFFIX);
820*c0909341SAndroid Build Coastguard Worker                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
821*c0909341SAndroid Build Coastguard Worker                     hex_dump(dst, f->cur.stride[0], t_dim->w * 4, t_dim->h * 4, "recon");
822*c0909341SAndroid Build Coastguard Worker             }
823*c0909341SAndroid Build Coastguard Worker         }
824*c0909341SAndroid Build Coastguard Worker     }
825*c0909341SAndroid Build Coastguard Worker }
826*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_read_coef_blocks)827*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_read_coef_blocks)(Dav1dTaskContext *const t,
828*c0909341SAndroid Build Coastguard Worker                                     const enum BlockSize bs, const Av1Block *const b)
829*c0909341SAndroid Build Coastguard Worker {
830*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
831*c0909341SAndroid Build Coastguard Worker     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
832*c0909341SAndroid Build Coastguard Worker     const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
833*c0909341SAndroid Build Coastguard Worker     const int bx4 = t->bx & 31, by4 = t->by & 31;
834*c0909341SAndroid Build Coastguard Worker     const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
835*c0909341SAndroid Build Coastguard Worker     const uint8_t *const b_dim = dav1d_block_dimensions[bs];
836*c0909341SAndroid Build Coastguard Worker     const int bw4 = b_dim[0], bh4 = b_dim[1];
837*c0909341SAndroid Build Coastguard Worker     const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
838*c0909341SAndroid Build Coastguard Worker     const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
839*c0909341SAndroid Build Coastguard Worker                            (bw4 > ss_hor || t->bx & 1) &&
840*c0909341SAndroid Build Coastguard Worker                            (bh4 > ss_ver || t->by & 1);
841*c0909341SAndroid Build Coastguard Worker 
842*c0909341SAndroid Build Coastguard Worker     if (b->skip) {
843*c0909341SAndroid Build Coastguard Worker         BlockContext *const a = t->a;
844*c0909341SAndroid Build Coastguard Worker         dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
845*c0909341SAndroid Build Coastguard Worker         dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
846*c0909341SAndroid Build Coastguard Worker         if (has_chroma) {
847*c0909341SAndroid Build Coastguard Worker             dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
848*c0909341SAndroid Build Coastguard Worker             dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
849*c0909341SAndroid Build Coastguard Worker             memset_cw(&a->ccoef[0][cbx4], 0x40);
850*c0909341SAndroid Build Coastguard Worker             memset_cw(&a->ccoef[1][cbx4], 0x40);
851*c0909341SAndroid Build Coastguard Worker             memset_ch(&t->l.ccoef[0][cby4], 0x40);
852*c0909341SAndroid Build Coastguard Worker             memset_ch(&t->l.ccoef[1][cby4], 0x40);
853*c0909341SAndroid Build Coastguard Worker         }
854*c0909341SAndroid Build Coastguard Worker         return;
855*c0909341SAndroid Build Coastguard Worker     }
856*c0909341SAndroid Build Coastguard Worker 
857*c0909341SAndroid Build Coastguard Worker     Dav1dTileState *const ts = t->ts;
858*c0909341SAndroid Build Coastguard Worker     const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
859*c0909341SAndroid Build Coastguard Worker     const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
860*c0909341SAndroid Build Coastguard Worker     assert(t->frame_thread.pass == 1);
861*c0909341SAndroid Build Coastguard Worker     assert(!b->skip);
862*c0909341SAndroid Build Coastguard Worker     const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
863*c0909341SAndroid Build Coastguard Worker     const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->intra ? b->tx : b->max_ytx];
864*c0909341SAndroid Build Coastguard Worker     const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
865*c0909341SAndroid Build Coastguard Worker 
866*c0909341SAndroid Build Coastguard Worker     for (int init_y = 0; init_y < h4; init_y += 16) {
867*c0909341SAndroid Build Coastguard Worker         const int sub_h4 = imin(h4, 16 + init_y);
868*c0909341SAndroid Build Coastguard Worker         for (int init_x = 0; init_x < w4; init_x += 16) {
869*c0909341SAndroid Build Coastguard Worker             const int sub_w4 = imin(w4, init_x + 16);
870*c0909341SAndroid Build Coastguard Worker             int y_off = !!init_y, y, x;
871*c0909341SAndroid Build Coastguard Worker             for (y = init_y, t->by += init_y; y < sub_h4;
872*c0909341SAndroid Build Coastguard Worker                  y += t_dim->h, t->by += t_dim->h, y_off++)
873*c0909341SAndroid Build Coastguard Worker             {
874*c0909341SAndroid Build Coastguard Worker                 int x_off = !!init_x;
875*c0909341SAndroid Build Coastguard Worker                 for (x = init_x, t->bx += init_x; x < sub_w4;
876*c0909341SAndroid Build Coastguard Worker                      x += t_dim->w, t->bx += t_dim->w, x_off++)
877*c0909341SAndroid Build Coastguard Worker                 {
878*c0909341SAndroid Build Coastguard Worker                     if (!b->intra) {
879*c0909341SAndroid Build Coastguard Worker                         read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
880*c0909341SAndroid Build Coastguard Worker                                        x_off, y_off, NULL);
881*c0909341SAndroid Build Coastguard Worker                     } else {
882*c0909341SAndroid Build Coastguard Worker                         uint8_t cf_ctx = 0x40;
883*c0909341SAndroid Build Coastguard Worker                         enum TxfmType txtp;
884*c0909341SAndroid Build Coastguard Worker                         const int eob =
885*c0909341SAndroid Build Coastguard Worker                             decode_coefs(t, &t->a->lcoef[bx4 + x],
886*c0909341SAndroid Build Coastguard Worker                                          &t->l.lcoef[by4 + y], b->tx, bs, b, 1,
887*c0909341SAndroid Build Coastguard Worker                                          0, ts->frame_thread[1].cf, &txtp, &cf_ctx);
888*c0909341SAndroid Build Coastguard Worker                         if (DEBUG_BLOCK_INFO)
889*c0909341SAndroid Build Coastguard Worker                             printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
890*c0909341SAndroid Build Coastguard Worker                                    b->tx, txtp, eob, ts->msac.rng);
891*c0909341SAndroid Build Coastguard Worker                         *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
892*c0909341SAndroid Build Coastguard Worker                         ts->frame_thread[1].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
893*c0909341SAndroid Build Coastguard Worker                         dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
894*c0909341SAndroid Build Coastguard Worker                         dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
895*c0909341SAndroid Build Coastguard Worker                     }
896*c0909341SAndroid Build Coastguard Worker                 }
897*c0909341SAndroid Build Coastguard Worker                 t->bx -= x;
898*c0909341SAndroid Build Coastguard Worker             }
899*c0909341SAndroid Build Coastguard Worker             t->by -= y;
900*c0909341SAndroid Build Coastguard Worker 
901*c0909341SAndroid Build Coastguard Worker             if (!has_chroma) continue;
902*c0909341SAndroid Build Coastguard Worker 
903*c0909341SAndroid Build Coastguard Worker             const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
904*c0909341SAndroid Build Coastguard Worker             const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
905*c0909341SAndroid Build Coastguard Worker             for (int pl = 0; pl < 2; pl++) {
906*c0909341SAndroid Build Coastguard Worker                 for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
907*c0909341SAndroid Build Coastguard Worker                      y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
908*c0909341SAndroid Build Coastguard Worker                 {
909*c0909341SAndroid Build Coastguard Worker                     for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
910*c0909341SAndroid Build Coastguard Worker                          x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
911*c0909341SAndroid Build Coastguard Worker                     {
912*c0909341SAndroid Build Coastguard Worker                         uint8_t cf_ctx = 0x40;
913*c0909341SAndroid Build Coastguard Worker                         enum TxfmType txtp;
914*c0909341SAndroid Build Coastguard Worker                         if (!b->intra)
915*c0909341SAndroid Build Coastguard Worker                             txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
916*c0909341SAndroid Build Coastguard Worker                                                         bx4 + (x << ss_hor)];
917*c0909341SAndroid Build Coastguard Worker                         const int eob =
918*c0909341SAndroid Build Coastguard Worker                             decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
919*c0909341SAndroid Build Coastguard Worker                                          &t->l.ccoef[pl][cby4 + y], b->uvtx, bs,
920*c0909341SAndroid Build Coastguard Worker                                          b, b->intra, 1 + pl, ts->frame_thread[1].cf,
921*c0909341SAndroid Build Coastguard Worker                                          &txtp, &cf_ctx);
922*c0909341SAndroid Build Coastguard Worker                         if (DEBUG_BLOCK_INFO)
923*c0909341SAndroid Build Coastguard Worker                             printf("Post-uv-cf-blk[pl=%d,tx=%d,"
924*c0909341SAndroid Build Coastguard Worker                                    "txtp=%d,eob=%d]: r=%d\n",
925*c0909341SAndroid Build Coastguard Worker                                    pl, b->uvtx, txtp, eob, ts->msac.rng);
926*c0909341SAndroid Build Coastguard Worker                         *ts->frame_thread[1].cbi++ = eob * (1 << 5) + txtp;
927*c0909341SAndroid Build Coastguard Worker                         ts->frame_thread[1].cf += uv_t_dim->w * uv_t_dim->h * 16;
928*c0909341SAndroid Build Coastguard Worker                         int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
929*c0909341SAndroid Build Coastguard Worker                         int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
930*c0909341SAndroid Build Coastguard Worker                         dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
931*c0909341SAndroid Build Coastguard Worker                         dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
932*c0909341SAndroid Build Coastguard Worker                     }
933*c0909341SAndroid Build Coastguard Worker                     t->bx -= x << ss_hor;
934*c0909341SAndroid Build Coastguard Worker                 }
935*c0909341SAndroid Build Coastguard Worker                 t->by -= y << ss_ver;
936*c0909341SAndroid Build Coastguard Worker             }
937*c0909341SAndroid Build Coastguard Worker         }
938*c0909341SAndroid Build Coastguard Worker     }
939*c0909341SAndroid Build Coastguard Worker }
940*c0909341SAndroid Build Coastguard Worker 
mc(Dav1dTaskContext * const t,pixel * const dst8,int16_t * const dst16,const ptrdiff_t dst_stride,const int bw4,const int bh4,const int bx,const int by,const int pl,const mv mv,const Dav1dThreadPicture * const refp,const int refidx,const enum Filter2d filter_2d)941*c0909341SAndroid Build Coastguard Worker static int mc(Dav1dTaskContext *const t,
942*c0909341SAndroid Build Coastguard Worker               pixel *const dst8, int16_t *const dst16, const ptrdiff_t dst_stride,
943*c0909341SAndroid Build Coastguard Worker               const int bw4, const int bh4,
944*c0909341SAndroid Build Coastguard Worker               const int bx, const int by, const int pl,
945*c0909341SAndroid Build Coastguard Worker               const mv mv, const Dav1dThreadPicture *const refp, const int refidx,
946*c0909341SAndroid Build Coastguard Worker               const enum Filter2d filter_2d)
947*c0909341SAndroid Build Coastguard Worker {
948*c0909341SAndroid Build Coastguard Worker     assert((dst8 != NULL) ^ (dst16 != NULL));
949*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
950*c0909341SAndroid Build Coastguard Worker     const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
951*c0909341SAndroid Build Coastguard Worker     const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
952*c0909341SAndroid Build Coastguard Worker     const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
953*c0909341SAndroid Build Coastguard Worker     const int mvx = mv.x, mvy = mv.y;
954*c0909341SAndroid Build Coastguard Worker     const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);
955*c0909341SAndroid Build Coastguard Worker     ptrdiff_t ref_stride = refp->p.stride[!!pl];
956*c0909341SAndroid Build Coastguard Worker     const pixel *ref;
957*c0909341SAndroid Build Coastguard Worker 
958*c0909341SAndroid Build Coastguard Worker     if (refp->p.p.w == f->cur.p.w && refp->p.p.h == f->cur.p.h) {
959*c0909341SAndroid Build Coastguard Worker         const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
960*c0909341SAndroid Build Coastguard Worker         const int dy = by * v_mul + (mvy >> (3 + ss_ver));
961*c0909341SAndroid Build Coastguard Worker         int w, h;
962*c0909341SAndroid Build Coastguard Worker 
963*c0909341SAndroid Build Coastguard Worker         if (refp->p.data[0] != f->cur.data[0]) { // i.e. not for intrabc
964*c0909341SAndroid Build Coastguard Worker             w = (f->cur.p.w + ss_hor) >> ss_hor;
965*c0909341SAndroid Build Coastguard Worker             h = (f->cur.p.h + ss_ver) >> ss_ver;
966*c0909341SAndroid Build Coastguard Worker         } else {
967*c0909341SAndroid Build Coastguard Worker             w = f->bw * 4 >> ss_hor;
968*c0909341SAndroid Build Coastguard Worker             h = f->bh * 4 >> ss_ver;
969*c0909341SAndroid Build Coastguard Worker         }
970*c0909341SAndroid Build Coastguard Worker         if (dx < !!mx * 3 || dy < !!my * 3 ||
971*c0909341SAndroid Build Coastguard Worker             dx + bw4 * h_mul + !!mx * 4 > w ||
972*c0909341SAndroid Build Coastguard Worker             dy + bh4 * v_mul + !!my * 4 > h)
973*c0909341SAndroid Build Coastguard Worker         {
974*c0909341SAndroid Build Coastguard Worker             pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
975*c0909341SAndroid Build Coastguard Worker             f->dsp->mc.emu_edge(bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
976*c0909341SAndroid Build Coastguard Worker                                 w, h, dx - !!mx * 3, dy - !!my * 3,
977*c0909341SAndroid Build Coastguard Worker                                 emu_edge_buf, 192 * sizeof(pixel),
978*c0909341SAndroid Build Coastguard Worker                                 refp->p.data[pl], ref_stride);
979*c0909341SAndroid Build Coastguard Worker             ref = &emu_edge_buf[192 * !!my * 3 + !!mx * 3];
980*c0909341SAndroid Build Coastguard Worker             ref_stride = 192 * sizeof(pixel);
981*c0909341SAndroid Build Coastguard Worker         } else {
982*c0909341SAndroid Build Coastguard Worker             ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
983*c0909341SAndroid Build Coastguard Worker         }
984*c0909341SAndroid Build Coastguard Worker 
985*c0909341SAndroid Build Coastguard Worker         if (dst8 != NULL) {
986*c0909341SAndroid Build Coastguard Worker             f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
987*c0909341SAndroid Build Coastguard Worker                                      bh4 * v_mul, mx << !ss_hor, my << !ss_ver
988*c0909341SAndroid Build Coastguard Worker                                      HIGHBD_CALL_SUFFIX);
989*c0909341SAndroid Build Coastguard Worker         } else {
990*c0909341SAndroid Build Coastguard Worker             f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
991*c0909341SAndroid Build Coastguard Worker                                       bh4 * v_mul, mx << !ss_hor, my << !ss_ver
992*c0909341SAndroid Build Coastguard Worker                                       HIGHBD_CALL_SUFFIX);
993*c0909341SAndroid Build Coastguard Worker         }
994*c0909341SAndroid Build Coastguard Worker     } else {
995*c0909341SAndroid Build Coastguard Worker         assert(refp != &f->sr_cur);
996*c0909341SAndroid Build Coastguard Worker 
997*c0909341SAndroid Build Coastguard Worker         const int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
998*c0909341SAndroid Build Coastguard Worker         const int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
999*c0909341SAndroid Build Coastguard Worker #define scale_mv(res, val, scale) do { \
1000*c0909341SAndroid Build Coastguard Worker             const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
1001*c0909341SAndroid Build Coastguard Worker             res = apply_sign64((int) ((llabs(tmp) + 128) >> 8), tmp) + 32;     \
1002*c0909341SAndroid Build Coastguard Worker         } while (0)
1003*c0909341SAndroid Build Coastguard Worker         int pos_y, pos_x;
1004*c0909341SAndroid Build Coastguard Worker         scale_mv(pos_x, orig_pos_x, f->svc[refidx][0].scale);
1005*c0909341SAndroid Build Coastguard Worker         scale_mv(pos_y, orig_pos_y, f->svc[refidx][1].scale);
1006*c0909341SAndroid Build Coastguard Worker #undef scale_mv
1007*c0909341SAndroid Build Coastguard Worker         const int left = pos_x >> 10;
1008*c0909341SAndroid Build Coastguard Worker         const int top = pos_y >> 10;
1009*c0909341SAndroid Build Coastguard Worker         const int right =
1010*c0909341SAndroid Build Coastguard Worker             ((pos_x + (bw4 * h_mul - 1) * f->svc[refidx][0].step) >> 10) + 1;
1011*c0909341SAndroid Build Coastguard Worker         const int bottom =
1012*c0909341SAndroid Build Coastguard Worker             ((pos_y + (bh4 * v_mul - 1) * f->svc[refidx][1].step) >> 10) + 1;
1013*c0909341SAndroid Build Coastguard Worker 
1014*c0909341SAndroid Build Coastguard Worker         if (DEBUG_BLOCK_INFO)
1015*c0909341SAndroid Build Coastguard Worker             printf("Off %dx%d [%d,%d,%d], size %dx%d [%d,%d]\n",
1016*c0909341SAndroid Build Coastguard Worker                    left, top, orig_pos_x, f->svc[refidx][0].scale, refidx,
1017*c0909341SAndroid Build Coastguard Worker                    right-left, bottom-top,
1018*c0909341SAndroid Build Coastguard Worker                    f->svc[refidx][0].step, f->svc[refidx][1].step);
1019*c0909341SAndroid Build Coastguard Worker 
1020*c0909341SAndroid Build Coastguard Worker         const int w = (refp->p.p.w + ss_hor) >> ss_hor;
1021*c0909341SAndroid Build Coastguard Worker         const int h = (refp->p.p.h + ss_ver) >> ss_ver;
1022*c0909341SAndroid Build Coastguard Worker         if (left < 3 || top < 3 || right + 4 > w || bottom + 4 > h) {
1023*c0909341SAndroid Build Coastguard Worker             pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
1024*c0909341SAndroid Build Coastguard Worker             f->dsp->mc.emu_edge(right - left + 7, bottom - top + 7,
1025*c0909341SAndroid Build Coastguard Worker                                 w, h, left - 3, top - 3,
1026*c0909341SAndroid Build Coastguard Worker                                 emu_edge_buf, 320 * sizeof(pixel),
1027*c0909341SAndroid Build Coastguard Worker                                 refp->p.data[pl], ref_stride);
1028*c0909341SAndroid Build Coastguard Worker             ref = &emu_edge_buf[320 * 3 + 3];
1029*c0909341SAndroid Build Coastguard Worker             ref_stride = 320 * sizeof(pixel);
1030*c0909341SAndroid Build Coastguard Worker             if (DEBUG_BLOCK_INFO) printf("Emu\n");
1031*c0909341SAndroid Build Coastguard Worker         } else {
1032*c0909341SAndroid Build Coastguard Worker             ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * top + left;
1033*c0909341SAndroid Build Coastguard Worker         }
1034*c0909341SAndroid Build Coastguard Worker 
1035*c0909341SAndroid Build Coastguard Worker         if (dst8 != NULL) {
1036*c0909341SAndroid Build Coastguard Worker             f->dsp->mc.mc_scaled[filter_2d](dst8, dst_stride, ref, ref_stride,
1037*c0909341SAndroid Build Coastguard Worker                                             bw4 * h_mul, bh4 * v_mul,
1038*c0909341SAndroid Build Coastguard Worker                                             pos_x & 0x3ff, pos_y & 0x3ff,
1039*c0909341SAndroid Build Coastguard Worker                                             f->svc[refidx][0].step,
1040*c0909341SAndroid Build Coastguard Worker                                             f->svc[refidx][1].step
1041*c0909341SAndroid Build Coastguard Worker                                             HIGHBD_CALL_SUFFIX);
1042*c0909341SAndroid Build Coastguard Worker         } else {
1043*c0909341SAndroid Build Coastguard Worker             f->dsp->mc.mct_scaled[filter_2d](dst16, ref, ref_stride,
1044*c0909341SAndroid Build Coastguard Worker                                              bw4 * h_mul, bh4 * v_mul,
1045*c0909341SAndroid Build Coastguard Worker                                              pos_x & 0x3ff, pos_y & 0x3ff,
1046*c0909341SAndroid Build Coastguard Worker                                              f->svc[refidx][0].step,
1047*c0909341SAndroid Build Coastguard Worker                                              f->svc[refidx][1].step
1048*c0909341SAndroid Build Coastguard Worker                                              HIGHBD_CALL_SUFFIX);
1049*c0909341SAndroid Build Coastguard Worker         }
1050*c0909341SAndroid Build Coastguard Worker     }
1051*c0909341SAndroid Build Coastguard Worker 
1052*c0909341SAndroid Build Coastguard Worker     return 0;
1053*c0909341SAndroid Build Coastguard Worker }
1054*c0909341SAndroid Build Coastguard Worker 
obmc(Dav1dTaskContext * const t,pixel * const dst,const ptrdiff_t dst_stride,const uint8_t * const b_dim,const int pl,const int bx4,const int by4,const int w4,const int h4)1055*c0909341SAndroid Build Coastguard Worker static int obmc(Dav1dTaskContext *const t,
1056*c0909341SAndroid Build Coastguard Worker                 pixel *const dst, const ptrdiff_t dst_stride,
1057*c0909341SAndroid Build Coastguard Worker                 const uint8_t *const b_dim, const int pl,
1058*c0909341SAndroid Build Coastguard Worker                 const int bx4, const int by4, const int w4, const int h4)
1059*c0909341SAndroid Build Coastguard Worker {
1060*c0909341SAndroid Build Coastguard Worker     assert(!(t->bx & 1) && !(t->by & 1));
1061*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
1062*c0909341SAndroid Build Coastguard Worker     /*const*/ refmvs_block **r = &t->rt.r[(t->by & 31) + 5];
1063*c0909341SAndroid Build Coastguard Worker     pixel *const lap = bitfn(t->scratch.lap);
1064*c0909341SAndroid Build Coastguard Worker     const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
1065*c0909341SAndroid Build Coastguard Worker     const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
1066*c0909341SAndroid Build Coastguard Worker     const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
1067*c0909341SAndroid Build Coastguard Worker     int res;
1068*c0909341SAndroid Build Coastguard Worker 
1069*c0909341SAndroid Build Coastguard Worker     if (t->by > t->ts->tiling.row_start &&
1070*c0909341SAndroid Build Coastguard Worker         (!pl || b_dim[0] * h_mul + b_dim[1] * v_mul >= 16))
1071*c0909341SAndroid Build Coastguard Worker     {
1072*c0909341SAndroid Build Coastguard Worker         for (int i = 0, x = 0; x < w4 && i < imin(b_dim[2], 4); ) {
1073*c0909341SAndroid Build Coastguard Worker             // only odd blocks are considered for overlap handling, hence +1
1074*c0909341SAndroid Build Coastguard Worker             const refmvs_block *const a_r = &r[-1][t->bx + x + 1];
1075*c0909341SAndroid Build Coastguard Worker             const uint8_t *const a_b_dim = dav1d_block_dimensions[a_r->bs];
1076*c0909341SAndroid Build Coastguard Worker             const int step4 = iclip(a_b_dim[0], 2, 16);
1077*c0909341SAndroid Build Coastguard Worker 
1078*c0909341SAndroid Build Coastguard Worker             if (a_r->ref.ref[0] > 0) {
1079*c0909341SAndroid Build Coastguard Worker                 const int ow4 = imin(step4, b_dim[0]);
1080*c0909341SAndroid Build Coastguard Worker                 const int oh4 = imin(b_dim[1], 16) >> 1;
1081*c0909341SAndroid Build Coastguard Worker                 res = mc(t, lap, NULL, ow4 * h_mul * sizeof(pixel), ow4, (oh4 * 3 + 3) >> 2,
1082*c0909341SAndroid Build Coastguard Worker                          t->bx + x, t->by, pl, a_r->mv.mv[0],
1083*c0909341SAndroid Build Coastguard Worker                          &f->refp[a_r->ref.ref[0] - 1], a_r->ref.ref[0] - 1,
1084*c0909341SAndroid Build Coastguard Worker                          dav1d_filter_2d[t->a->filter[1][bx4 + x + 1]][t->a->filter[0][bx4 + x + 1]]);
1085*c0909341SAndroid Build Coastguard Worker                 if (res) return res;
1086*c0909341SAndroid Build Coastguard Worker                 f->dsp->mc.blend_h(&dst[x * h_mul], dst_stride, lap,
1087*c0909341SAndroid Build Coastguard Worker                                    h_mul * ow4, v_mul * oh4);
1088*c0909341SAndroid Build Coastguard Worker                 i++;
1089*c0909341SAndroid Build Coastguard Worker             }
1090*c0909341SAndroid Build Coastguard Worker             x += step4;
1091*c0909341SAndroid Build Coastguard Worker         }
1092*c0909341SAndroid Build Coastguard Worker     }
1093*c0909341SAndroid Build Coastguard Worker 
1094*c0909341SAndroid Build Coastguard Worker     if (t->bx > t->ts->tiling.col_start)
1095*c0909341SAndroid Build Coastguard Worker         for (int i = 0, y = 0; y < h4 && i < imin(b_dim[3], 4); ) {
1096*c0909341SAndroid Build Coastguard Worker             // only odd blocks are considered for overlap handling, hence +1
1097*c0909341SAndroid Build Coastguard Worker             const refmvs_block *const l_r = &r[y + 1][t->bx - 1];
1098*c0909341SAndroid Build Coastguard Worker             const uint8_t *const l_b_dim = dav1d_block_dimensions[l_r->bs];
1099*c0909341SAndroid Build Coastguard Worker             const int step4 = iclip(l_b_dim[1], 2, 16);
1100*c0909341SAndroid Build Coastguard Worker 
1101*c0909341SAndroid Build Coastguard Worker             if (l_r->ref.ref[0] > 0) {
1102*c0909341SAndroid Build Coastguard Worker                 const int ow4 = imin(b_dim[0], 16) >> 1;
1103*c0909341SAndroid Build Coastguard Worker                 const int oh4 = imin(step4, b_dim[1]);
1104*c0909341SAndroid Build Coastguard Worker                 res = mc(t, lap, NULL, h_mul * ow4 * sizeof(pixel), ow4, oh4,
1105*c0909341SAndroid Build Coastguard Worker                          t->bx, t->by + y, pl, l_r->mv.mv[0],
1106*c0909341SAndroid Build Coastguard Worker                          &f->refp[l_r->ref.ref[0] - 1], l_r->ref.ref[0] - 1,
1107*c0909341SAndroid Build Coastguard Worker                          dav1d_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
1108*c0909341SAndroid Build Coastguard Worker                 if (res) return res;
1109*c0909341SAndroid Build Coastguard Worker                 f->dsp->mc.blend_v(&dst[y * v_mul * PXSTRIDE(dst_stride)],
1110*c0909341SAndroid Build Coastguard Worker                                    dst_stride, lap, h_mul * ow4, v_mul * oh4);
1111*c0909341SAndroid Build Coastguard Worker                 i++;
1112*c0909341SAndroid Build Coastguard Worker             }
1113*c0909341SAndroid Build Coastguard Worker             y += step4;
1114*c0909341SAndroid Build Coastguard Worker         }
1115*c0909341SAndroid Build Coastguard Worker     return 0;
1116*c0909341SAndroid Build Coastguard Worker }
1117*c0909341SAndroid Build Coastguard Worker 
warp_affine(Dav1dTaskContext * const t,pixel * dst8,int16_t * dst16,const ptrdiff_t dstride,const uint8_t * const b_dim,const int pl,const Dav1dThreadPicture * const refp,const Dav1dWarpedMotionParams * const wmp)1118*c0909341SAndroid Build Coastguard Worker static int warp_affine(Dav1dTaskContext *const t,
1119*c0909341SAndroid Build Coastguard Worker                        pixel *dst8, int16_t *dst16, const ptrdiff_t dstride,
1120*c0909341SAndroid Build Coastguard Worker                        const uint8_t *const b_dim, const int pl,
1121*c0909341SAndroid Build Coastguard Worker                        const Dav1dThreadPicture *const refp,
1122*c0909341SAndroid Build Coastguard Worker                        const Dav1dWarpedMotionParams *const wmp)
1123*c0909341SAndroid Build Coastguard Worker {
1124*c0909341SAndroid Build Coastguard Worker     assert((dst8 != NULL) ^ (dst16 != NULL));
1125*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
1126*c0909341SAndroid Build Coastguard Worker     const Dav1dDSPContext *const dsp = f->dsp;
1127*c0909341SAndroid Build Coastguard Worker     const int ss_ver = !!pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
1128*c0909341SAndroid Build Coastguard Worker     const int ss_hor = !!pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
1129*c0909341SAndroid Build Coastguard Worker     const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
1130*c0909341SAndroid Build Coastguard Worker     assert(!((b_dim[0] * h_mul) & 7) && !((b_dim[1] * v_mul) & 7));
1131*c0909341SAndroid Build Coastguard Worker     const int32_t *const mat = wmp->matrix;
1132*c0909341SAndroid Build Coastguard Worker     const int width = (refp->p.p.w + ss_hor) >> ss_hor;
1133*c0909341SAndroid Build Coastguard Worker     const int height = (refp->p.p.h + ss_ver) >> ss_ver;
1134*c0909341SAndroid Build Coastguard Worker 
1135*c0909341SAndroid Build Coastguard Worker     for (int y = 0; y < b_dim[1] * v_mul; y += 8) {
1136*c0909341SAndroid Build Coastguard Worker         const int src_y = t->by * 4 + ((y + 4) << ss_ver);
1137*c0909341SAndroid Build Coastguard Worker         const int64_t mat3_y = (int64_t) mat[3] * src_y + mat[0];
1138*c0909341SAndroid Build Coastguard Worker         const int64_t mat5_y = (int64_t) mat[5] * src_y + mat[1];
1139*c0909341SAndroid Build Coastguard Worker         for (int x = 0; x < b_dim[0] * h_mul; x += 8) {
1140*c0909341SAndroid Build Coastguard Worker             // calculate transformation relative to center of 8x8 block in
1141*c0909341SAndroid Build Coastguard Worker             // luma pixel units
1142*c0909341SAndroid Build Coastguard Worker             const int src_x = t->bx * 4 + ((x + 4) << ss_hor);
1143*c0909341SAndroid Build Coastguard Worker             const int64_t mvx = ((int64_t) mat[2] * src_x + mat3_y) >> ss_hor;
1144*c0909341SAndroid Build Coastguard Worker             const int64_t mvy = ((int64_t) mat[4] * src_x + mat5_y) >> ss_ver;
1145*c0909341SAndroid Build Coastguard Worker 
1146*c0909341SAndroid Build Coastguard Worker             const int dx = (int) (mvx >> 16) - 4;
1147*c0909341SAndroid Build Coastguard Worker             const int mx = (((int) mvx & 0xffff) - wmp->u.p.alpha * 4 -
1148*c0909341SAndroid Build Coastguard Worker                                                    wmp->u.p.beta  * 7) & ~0x3f;
1149*c0909341SAndroid Build Coastguard Worker             const int dy = (int) (mvy >> 16) - 4;
1150*c0909341SAndroid Build Coastguard Worker             const int my = (((int) mvy & 0xffff) - wmp->u.p.gamma * 4 -
1151*c0909341SAndroid Build Coastguard Worker                                                    wmp->u.p.delta * 4) & ~0x3f;
1152*c0909341SAndroid Build Coastguard Worker 
1153*c0909341SAndroid Build Coastguard Worker             const pixel *ref_ptr;
1154*c0909341SAndroid Build Coastguard Worker             ptrdiff_t ref_stride = refp->p.stride[!!pl];
1155*c0909341SAndroid Build Coastguard Worker 
1156*c0909341SAndroid Build Coastguard Worker             if (dx < 3 || dx + 8 + 4 > width || dy < 3 || dy + 8 + 4 > height) {
1157*c0909341SAndroid Build Coastguard Worker                 pixel *const emu_edge_buf = bitfn(t->scratch.emu_edge);
1158*c0909341SAndroid Build Coastguard Worker                 f->dsp->mc.emu_edge(15, 15, width, height, dx - 3, dy - 3,
1159*c0909341SAndroid Build Coastguard Worker                                     emu_edge_buf, 32 * sizeof(pixel),
1160*c0909341SAndroid Build Coastguard Worker                                     refp->p.data[pl], ref_stride);
1161*c0909341SAndroid Build Coastguard Worker                 ref_ptr = &emu_edge_buf[32 * 3 + 3];
1162*c0909341SAndroid Build Coastguard Worker                 ref_stride = 32 * sizeof(pixel);
1163*c0909341SAndroid Build Coastguard Worker             } else {
1164*c0909341SAndroid Build Coastguard Worker                 ref_ptr = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
1165*c0909341SAndroid Build Coastguard Worker             }
1166*c0909341SAndroid Build Coastguard Worker             if (dst16 != NULL)
1167*c0909341SAndroid Build Coastguard Worker                 dsp->mc.warp8x8t(&dst16[x], dstride, ref_ptr, ref_stride,
1168*c0909341SAndroid Build Coastguard Worker                                  wmp->u.abcd, mx, my HIGHBD_CALL_SUFFIX);
1169*c0909341SAndroid Build Coastguard Worker             else
1170*c0909341SAndroid Build Coastguard Worker                 dsp->mc.warp8x8(&dst8[x], dstride, ref_ptr, ref_stride,
1171*c0909341SAndroid Build Coastguard Worker                                 wmp->u.abcd, mx, my HIGHBD_CALL_SUFFIX);
1172*c0909341SAndroid Build Coastguard Worker         }
1173*c0909341SAndroid Build Coastguard Worker         if (dst8) dst8  += 8 * PXSTRIDE(dstride);
1174*c0909341SAndroid Build Coastguard Worker         else      dst16 += 8 * dstride;
1175*c0909341SAndroid Build Coastguard Worker     }
1176*c0909341SAndroid Build Coastguard Worker     return 0;
1177*c0909341SAndroid Build Coastguard Worker }
1178*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_recon_b_intra)1179*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_recon_b_intra)(Dav1dTaskContext *const t, const enum BlockSize bs,
1180*c0909341SAndroid Build Coastguard Worker                                  const enum EdgeFlags intra_edge_flags,
1181*c0909341SAndroid Build Coastguard Worker                                  const Av1Block *const b)
1182*c0909341SAndroid Build Coastguard Worker {
1183*c0909341SAndroid Build Coastguard Worker     Dav1dTileState *const ts = t->ts;
1184*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
1185*c0909341SAndroid Build Coastguard Worker     const Dav1dDSPContext *const dsp = f->dsp;
1186*c0909341SAndroid Build Coastguard Worker     const int bx4 = t->bx & 31, by4 = t->by & 31;
1187*c0909341SAndroid Build Coastguard Worker     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
1188*c0909341SAndroid Build Coastguard Worker     const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
1189*c0909341SAndroid Build Coastguard Worker     const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
1190*c0909341SAndroid Build Coastguard Worker     const uint8_t *const b_dim = dav1d_block_dimensions[bs];
1191*c0909341SAndroid Build Coastguard Worker     const int bw4 = b_dim[0], bh4 = b_dim[1];
1192*c0909341SAndroid Build Coastguard Worker     const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
1193*c0909341SAndroid Build Coastguard Worker     const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
1194*c0909341SAndroid Build Coastguard Worker     const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
1195*c0909341SAndroid Build Coastguard Worker                            (bw4 > ss_hor || t->bx & 1) &&
1196*c0909341SAndroid Build Coastguard Worker                            (bh4 > ss_ver || t->by & 1);
1197*c0909341SAndroid Build Coastguard Worker     const TxfmInfo *const t_dim = &dav1d_txfm_dimensions[b->tx];
1198*c0909341SAndroid Build Coastguard Worker     const TxfmInfo *const uv_t_dim = &dav1d_txfm_dimensions[b->uvtx];
1199*c0909341SAndroid Build Coastguard Worker 
1200*c0909341SAndroid Build Coastguard Worker     // coefficient coding
1201*c0909341SAndroid Build Coastguard Worker     pixel *const edge = bitfn(t->scratch.edge) + 128;
1202*c0909341SAndroid Build Coastguard Worker     const int cbw4 = (bw4 + ss_hor) >> ss_hor, cbh4 = (bh4 + ss_ver) >> ss_ver;
1203*c0909341SAndroid Build Coastguard Worker 
1204*c0909341SAndroid Build Coastguard Worker     const int intra_edge_filter_flag = f->seq_hdr->intra_edge_filter << 10;
1205*c0909341SAndroid Build Coastguard Worker 
1206*c0909341SAndroid Build Coastguard Worker     for (int init_y = 0; init_y < h4; init_y += 16) {
1207*c0909341SAndroid Build Coastguard Worker         const int sub_h4 = imin(h4, 16 + init_y);
1208*c0909341SAndroid Build Coastguard Worker         const int sub_ch4 = imin(ch4, (init_y + 16) >> ss_ver);
1209*c0909341SAndroid Build Coastguard Worker         for (int init_x = 0; init_x < w4; init_x += 16) {
1210*c0909341SAndroid Build Coastguard Worker             if (b->pal_sz[0]) {
1211*c0909341SAndroid Build Coastguard Worker                 pixel *dst = ((pixel *) f->cur.data[0]) +
1212*c0909341SAndroid Build Coastguard Worker                              4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
1213*c0909341SAndroid Build Coastguard Worker                 const uint8_t *pal_idx;
1214*c0909341SAndroid Build Coastguard Worker                 if (t->frame_thread.pass) {
1215*c0909341SAndroid Build Coastguard Worker                     const int p = t->frame_thread.pass & 1;
1216*c0909341SAndroid Build Coastguard Worker                     assert(ts->frame_thread[p].pal_idx);
1217*c0909341SAndroid Build Coastguard Worker                     pal_idx = ts->frame_thread[p].pal_idx;
1218*c0909341SAndroid Build Coastguard Worker                     ts->frame_thread[p].pal_idx += bw4 * bh4 * 8;
1219*c0909341SAndroid Build Coastguard Worker                 } else {
1220*c0909341SAndroid Build Coastguard Worker                     pal_idx = t->scratch.pal_idx_y;
1221*c0909341SAndroid Build Coastguard Worker                 }
1222*c0909341SAndroid Build Coastguard Worker                 const pixel *const pal = t->frame_thread.pass ?
1223*c0909341SAndroid Build Coastguard Worker                     f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
1224*c0909341SAndroid Build Coastguard Worker                                         ((t->bx >> 1) + (t->by & 1))][0] :
1225*c0909341SAndroid Build Coastguard Worker                     bytefn(t->scratch.pal)[0];
1226*c0909341SAndroid Build Coastguard Worker                 f->dsp->ipred.pal_pred(dst, f->cur.stride[0], pal,
1227*c0909341SAndroid Build Coastguard Worker                                        pal_idx, bw4 * 4, bh4 * 4);
1228*c0909341SAndroid Build Coastguard Worker                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
1229*c0909341SAndroid Build Coastguard Worker                     hex_dump(dst, PXSTRIDE(f->cur.stride[0]),
1230*c0909341SAndroid Build Coastguard Worker                              bw4 * 4, bh4 * 4, "y-pal-pred");
1231*c0909341SAndroid Build Coastguard Worker             }
1232*c0909341SAndroid Build Coastguard Worker 
1233*c0909341SAndroid Build Coastguard Worker             const int intra_flags = (sm_flag(t->a, bx4) |
1234*c0909341SAndroid Build Coastguard Worker                                      sm_flag(&t->l, by4) |
1235*c0909341SAndroid Build Coastguard Worker                                      intra_edge_filter_flag);
1236*c0909341SAndroid Build Coastguard Worker             const int sb_has_tr = init_x + 16 < w4 ? 1 : init_y ? 0 :
1237*c0909341SAndroid Build Coastguard Worker                               intra_edge_flags & EDGE_I444_TOP_HAS_RIGHT;
1238*c0909341SAndroid Build Coastguard Worker             const int sb_has_bl = init_x ? 0 : init_y + 16 < h4 ? 1 :
1239*c0909341SAndroid Build Coastguard Worker                               intra_edge_flags & EDGE_I444_LEFT_HAS_BOTTOM;
1240*c0909341SAndroid Build Coastguard Worker             int y, x;
1241*c0909341SAndroid Build Coastguard Worker             const int sub_w4 = imin(w4, init_x + 16);
1242*c0909341SAndroid Build Coastguard Worker             for (y = init_y, t->by += init_y; y < sub_h4;
1243*c0909341SAndroid Build Coastguard Worker                  y += t_dim->h, t->by += t_dim->h)
1244*c0909341SAndroid Build Coastguard Worker             {
1245*c0909341SAndroid Build Coastguard Worker                 pixel *dst = ((pixel *) f->cur.data[0]) +
1246*c0909341SAndroid Build Coastguard Worker                                4 * (t->by * PXSTRIDE(f->cur.stride[0]) +
1247*c0909341SAndroid Build Coastguard Worker                                     t->bx + init_x);
1248*c0909341SAndroid Build Coastguard Worker                 for (x = init_x, t->bx += init_x; x < sub_w4;
1249*c0909341SAndroid Build Coastguard Worker                      x += t_dim->w, t->bx += t_dim->w)
1250*c0909341SAndroid Build Coastguard Worker                 {
1251*c0909341SAndroid Build Coastguard Worker                     if (b->pal_sz[0]) goto skip_y_pred;
1252*c0909341SAndroid Build Coastguard Worker 
1253*c0909341SAndroid Build Coastguard Worker                     int angle = b->y_angle;
1254*c0909341SAndroid Build Coastguard Worker                     const enum EdgeFlags edge_flags =
1255*c0909341SAndroid Build Coastguard Worker                         (((y > init_y || !sb_has_tr) && (x + t_dim->w >= sub_w4)) ?
1256*c0909341SAndroid Build Coastguard Worker                              0 : EDGE_I444_TOP_HAS_RIGHT) |
1257*c0909341SAndroid Build Coastguard Worker                         ((x > init_x || (!sb_has_bl && y + t_dim->h >= sub_h4)) ?
1258*c0909341SAndroid Build Coastguard Worker                              0 : EDGE_I444_LEFT_HAS_BOTTOM);
1259*c0909341SAndroid Build Coastguard Worker                     const pixel *top_sb_edge = NULL;
1260*c0909341SAndroid Build Coastguard Worker                     if (!(t->by & (f->sb_step - 1))) {
1261*c0909341SAndroid Build Coastguard Worker                         top_sb_edge = f->ipred_edge[0];
1262*c0909341SAndroid Build Coastguard Worker                         const int sby = t->by >> f->sb_shift;
1263*c0909341SAndroid Build Coastguard Worker                         top_sb_edge += f->sb128w * 128 * (sby - 1);
1264*c0909341SAndroid Build Coastguard Worker                     }
1265*c0909341SAndroid Build Coastguard Worker                     const enum IntraPredMode m =
1266*c0909341SAndroid Build Coastguard Worker                         bytefn(dav1d_prepare_intra_edges)(t->bx,
1267*c0909341SAndroid Build Coastguard Worker                                                           t->bx > ts->tiling.col_start,
1268*c0909341SAndroid Build Coastguard Worker                                                           t->by,
1269*c0909341SAndroid Build Coastguard Worker                                                           t->by > ts->tiling.row_start,
1270*c0909341SAndroid Build Coastguard Worker                                                           ts->tiling.col_end,
1271*c0909341SAndroid Build Coastguard Worker                                                           ts->tiling.row_end,
1272*c0909341SAndroid Build Coastguard Worker                                                           edge_flags, dst,
1273*c0909341SAndroid Build Coastguard Worker                                                           f->cur.stride[0], top_sb_edge,
1274*c0909341SAndroid Build Coastguard Worker                                                           b->y_mode, &angle,
1275*c0909341SAndroid Build Coastguard Worker                                                           t_dim->w, t_dim->h,
1276*c0909341SAndroid Build Coastguard Worker                                                           f->seq_hdr->intra_edge_filter,
1277*c0909341SAndroid Build Coastguard Worker                                                           edge HIGHBD_CALL_SUFFIX);
1278*c0909341SAndroid Build Coastguard Worker                     dsp->ipred.intra_pred[m](dst, f->cur.stride[0], edge,
1279*c0909341SAndroid Build Coastguard Worker                                              t_dim->w * 4, t_dim->h * 4,
1280*c0909341SAndroid Build Coastguard Worker                                              angle | intra_flags,
1281*c0909341SAndroid Build Coastguard Worker                                              4 * f->bw - 4 * t->bx,
1282*c0909341SAndroid Build Coastguard Worker                                              4 * f->bh - 4 * t->by
1283*c0909341SAndroid Build Coastguard Worker                                              HIGHBD_CALL_SUFFIX);
1284*c0909341SAndroid Build Coastguard Worker 
1285*c0909341SAndroid Build Coastguard Worker                     if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
1286*c0909341SAndroid Build Coastguard Worker                         hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
1287*c0909341SAndroid Build Coastguard Worker                                  t_dim->h * 4, 2, "l");
1288*c0909341SAndroid Build Coastguard Worker                         hex_dump(edge, 0, 1, 1, "tl");
1289*c0909341SAndroid Build Coastguard Worker                         hex_dump(edge + 1, t_dim->w * 4,
1290*c0909341SAndroid Build Coastguard Worker                                  t_dim->w * 4, 2, "t");
1291*c0909341SAndroid Build Coastguard Worker                         hex_dump(dst, f->cur.stride[0],
1292*c0909341SAndroid Build Coastguard Worker                                  t_dim->w * 4, t_dim->h * 4, "y-intra-pred");
1293*c0909341SAndroid Build Coastguard Worker                     }
1294*c0909341SAndroid Build Coastguard Worker 
1295*c0909341SAndroid Build Coastguard Worker                 skip_y_pred: {}
1296*c0909341SAndroid Build Coastguard Worker                     if (!b->skip) {
1297*c0909341SAndroid Build Coastguard Worker                         coef *cf;
1298*c0909341SAndroid Build Coastguard Worker                         int eob;
1299*c0909341SAndroid Build Coastguard Worker                         enum TxfmType txtp;
1300*c0909341SAndroid Build Coastguard Worker                         if (t->frame_thread.pass) {
1301*c0909341SAndroid Build Coastguard Worker                             const int p = t->frame_thread.pass & 1;
1302*c0909341SAndroid Build Coastguard Worker                             const int cbi = *ts->frame_thread[p].cbi++;
1303*c0909341SAndroid Build Coastguard Worker                             cf = ts->frame_thread[p].cf;
1304*c0909341SAndroid Build Coastguard Worker                             ts->frame_thread[p].cf += imin(t_dim->w, 8) * imin(t_dim->h, 8) * 16;
1305*c0909341SAndroid Build Coastguard Worker                             eob  = cbi >> 5;
1306*c0909341SAndroid Build Coastguard Worker                             txtp = cbi & 0x1f;
1307*c0909341SAndroid Build Coastguard Worker                         } else {
1308*c0909341SAndroid Build Coastguard Worker                             uint8_t cf_ctx;
1309*c0909341SAndroid Build Coastguard Worker                             cf = bitfn(t->cf);
1310*c0909341SAndroid Build Coastguard Worker                             eob = decode_coefs(t, &t->a->lcoef[bx4 + x],
1311*c0909341SAndroid Build Coastguard Worker                                                &t->l.lcoef[by4 + y], b->tx, bs,
1312*c0909341SAndroid Build Coastguard Worker                                                b, 1, 0, cf, &txtp, &cf_ctx);
1313*c0909341SAndroid Build Coastguard Worker                             if (DEBUG_BLOCK_INFO)
1314*c0909341SAndroid Build Coastguard Worker                                 printf("Post-y-cf-blk[tx=%d,txtp=%d,eob=%d]: r=%d\n",
1315*c0909341SAndroid Build Coastguard Worker                                        b->tx, txtp, eob, ts->msac.rng);
1316*c0909341SAndroid Build Coastguard Worker                             dav1d_memset_likely_pow2(&t->a->lcoef[bx4 + x], cf_ctx, imin(t_dim->w, f->bw - t->bx));
1317*c0909341SAndroid Build Coastguard Worker                             dav1d_memset_likely_pow2(&t->l.lcoef[by4 + y], cf_ctx, imin(t_dim->h, f->bh - t->by));
1318*c0909341SAndroid Build Coastguard Worker                         }
1319*c0909341SAndroid Build Coastguard Worker                         if (eob >= 0) {
1320*c0909341SAndroid Build Coastguard Worker                             if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
1321*c0909341SAndroid Build Coastguard Worker                                 coef_dump(cf, imin(t_dim->h, 8) * 4,
1322*c0909341SAndroid Build Coastguard Worker                                           imin(t_dim->w, 8) * 4, 3, "dq");
1323*c0909341SAndroid Build Coastguard Worker                             dsp->itx.itxfm_add[b->tx]
1324*c0909341SAndroid Build Coastguard Worker                                               [txtp](dst,
1325*c0909341SAndroid Build Coastguard Worker                                                      f->cur.stride[0],
1326*c0909341SAndroid Build Coastguard Worker                                                      cf, eob HIGHBD_CALL_SUFFIX);
1327*c0909341SAndroid Build Coastguard Worker                             if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
1328*c0909341SAndroid Build Coastguard Worker                                 hex_dump(dst, f->cur.stride[0],
1329*c0909341SAndroid Build Coastguard Worker                                          t_dim->w * 4, t_dim->h * 4, "recon");
1330*c0909341SAndroid Build Coastguard Worker                         }
1331*c0909341SAndroid Build Coastguard Worker                     } else if (!t->frame_thread.pass) {
1332*c0909341SAndroid Build Coastguard Worker                         dav1d_memset_pow2[t_dim->lw](&t->a->lcoef[bx4 + x], 0x40);
1333*c0909341SAndroid Build Coastguard Worker                         dav1d_memset_pow2[t_dim->lh](&t->l.lcoef[by4 + y], 0x40);
1334*c0909341SAndroid Build Coastguard Worker                     }
1335*c0909341SAndroid Build Coastguard Worker                     dst += 4 * t_dim->w;
1336*c0909341SAndroid Build Coastguard Worker                 }
1337*c0909341SAndroid Build Coastguard Worker                 t->bx -= x;
1338*c0909341SAndroid Build Coastguard Worker             }
1339*c0909341SAndroid Build Coastguard Worker             t->by -= y;
1340*c0909341SAndroid Build Coastguard Worker 
1341*c0909341SAndroid Build Coastguard Worker             if (!has_chroma) continue;
1342*c0909341SAndroid Build Coastguard Worker 
1343*c0909341SAndroid Build Coastguard Worker             const ptrdiff_t stride = f->cur.stride[1];
1344*c0909341SAndroid Build Coastguard Worker 
1345*c0909341SAndroid Build Coastguard Worker             if (b->uv_mode == CFL_PRED) {
1346*c0909341SAndroid Build Coastguard Worker                 assert(!init_x && !init_y);
1347*c0909341SAndroid Build Coastguard Worker 
1348*c0909341SAndroid Build Coastguard Worker                 int16_t *const ac = t->scratch.ac;
1349*c0909341SAndroid Build Coastguard Worker                 pixel *y_src = ((pixel *) f->cur.data[0]) + 4 * (t->bx & ~ss_hor) +
1350*c0909341SAndroid Build Coastguard Worker                                  4 * (t->by & ~ss_ver) * PXSTRIDE(f->cur.stride[0]);
1351*c0909341SAndroid Build Coastguard Worker                 const ptrdiff_t uv_off = 4 * ((t->bx >> ss_hor) +
1352*c0909341SAndroid Build Coastguard Worker                                               (t->by >> ss_ver) * PXSTRIDE(stride));
1353*c0909341SAndroid Build Coastguard Worker                 pixel *const uv_dst[2] = { ((pixel *) f->cur.data[1]) + uv_off,
1354*c0909341SAndroid Build Coastguard Worker                                            ((pixel *) f->cur.data[2]) + uv_off };
1355*c0909341SAndroid Build Coastguard Worker 
1356*c0909341SAndroid Build Coastguard Worker                 const int furthest_r =
1357*c0909341SAndroid Build Coastguard Worker                     ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
1358*c0909341SAndroid Build Coastguard Worker                 const int furthest_b =
1359*c0909341SAndroid Build Coastguard Worker                     ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
1360*c0909341SAndroid Build Coastguard Worker                 dsp->ipred.cfl_ac[f->cur.p.layout - 1](ac, y_src, f->cur.stride[0],
1361*c0909341SAndroid Build Coastguard Worker                                                          cbw4 - (furthest_r >> ss_hor),
1362*c0909341SAndroid Build Coastguard Worker                                                          cbh4 - (furthest_b >> ss_ver),
1363*c0909341SAndroid Build Coastguard Worker                                                          cbw4 * 4, cbh4 * 4);
1364*c0909341SAndroid Build Coastguard Worker                 for (int pl = 0; pl < 2; pl++) {
1365*c0909341SAndroid Build Coastguard Worker                     if (!b->cfl_alpha[pl]) continue;
1366*c0909341SAndroid Build Coastguard Worker                     int angle = 0;
1367*c0909341SAndroid Build Coastguard Worker                     const pixel *top_sb_edge = NULL;
1368*c0909341SAndroid Build Coastguard Worker                     if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
1369*c0909341SAndroid Build Coastguard Worker                         top_sb_edge = f->ipred_edge[pl + 1];
1370*c0909341SAndroid Build Coastguard Worker                         const int sby = t->by >> f->sb_shift;
1371*c0909341SAndroid Build Coastguard Worker                         top_sb_edge += f->sb128w * 128 * (sby - 1);
1372*c0909341SAndroid Build Coastguard Worker                     }
1373*c0909341SAndroid Build Coastguard Worker                     const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
1374*c0909341SAndroid Build Coastguard Worker                     const int xstart = ts->tiling.col_start >> ss_hor;
1375*c0909341SAndroid Build Coastguard Worker                     const int ystart = ts->tiling.row_start >> ss_ver;
1376*c0909341SAndroid Build Coastguard Worker                     const enum IntraPredMode m =
1377*c0909341SAndroid Build Coastguard Worker                         bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
1378*c0909341SAndroid Build Coastguard Worker                                                           ypos, ypos > ystart,
1379*c0909341SAndroid Build Coastguard Worker                                                           ts->tiling.col_end >> ss_hor,
1380*c0909341SAndroid Build Coastguard Worker                                                           ts->tiling.row_end >> ss_ver,
1381*c0909341SAndroid Build Coastguard Worker                                                           0, uv_dst[pl], stride,
1382*c0909341SAndroid Build Coastguard Worker                                                           top_sb_edge, DC_PRED, &angle,
1383*c0909341SAndroid Build Coastguard Worker                                                           uv_t_dim->w, uv_t_dim->h, 0,
1384*c0909341SAndroid Build Coastguard Worker                                                           edge HIGHBD_CALL_SUFFIX);
1385*c0909341SAndroid Build Coastguard Worker                     dsp->ipred.cfl_pred[m](uv_dst[pl], stride, edge,
1386*c0909341SAndroid Build Coastguard Worker                                            uv_t_dim->w * 4,
1387*c0909341SAndroid Build Coastguard Worker                                            uv_t_dim->h * 4,
1388*c0909341SAndroid Build Coastguard Worker                                            ac, b->cfl_alpha[pl]
1389*c0909341SAndroid Build Coastguard Worker                                            HIGHBD_CALL_SUFFIX);
1390*c0909341SAndroid Build Coastguard Worker                 }
1391*c0909341SAndroid Build Coastguard Worker                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
1392*c0909341SAndroid Build Coastguard Worker                     ac_dump(ac, 4*cbw4, 4*cbh4, "ac");
1393*c0909341SAndroid Build Coastguard Worker                     hex_dump(uv_dst[0], stride, cbw4 * 4, cbh4 * 4, "u-cfl-pred");
1394*c0909341SAndroid Build Coastguard Worker                     hex_dump(uv_dst[1], stride, cbw4 * 4, cbh4 * 4, "v-cfl-pred");
1395*c0909341SAndroid Build Coastguard Worker                 }
1396*c0909341SAndroid Build Coastguard Worker             } else if (b->pal_sz[1]) {
1397*c0909341SAndroid Build Coastguard Worker                 const ptrdiff_t uv_dstoff = 4 * ((t->bx >> ss_hor) +
1398*c0909341SAndroid Build Coastguard Worker                                               (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
1399*c0909341SAndroid Build Coastguard Worker                 const pixel (*pal)[8];
1400*c0909341SAndroid Build Coastguard Worker                 const uint8_t *pal_idx;
1401*c0909341SAndroid Build Coastguard Worker                 if (t->frame_thread.pass) {
1402*c0909341SAndroid Build Coastguard Worker                     const int p = t->frame_thread.pass & 1;
1403*c0909341SAndroid Build Coastguard Worker                     assert(ts->frame_thread[p].pal_idx);
1404*c0909341SAndroid Build Coastguard Worker                     pal = f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
1405*c0909341SAndroid Build Coastguard Worker                                               ((t->bx >> 1) + (t->by & 1))];
1406*c0909341SAndroid Build Coastguard Worker                     pal_idx = ts->frame_thread[p].pal_idx;
1407*c0909341SAndroid Build Coastguard Worker                     ts->frame_thread[p].pal_idx += cbw4 * cbh4 * 8;
1408*c0909341SAndroid Build Coastguard Worker                 } else {
1409*c0909341SAndroid Build Coastguard Worker                     pal = bytefn(t->scratch.pal);
1410*c0909341SAndroid Build Coastguard Worker                     pal_idx = t->scratch.pal_idx_uv;
1411*c0909341SAndroid Build Coastguard Worker                 }
1412*c0909341SAndroid Build Coastguard Worker 
1413*c0909341SAndroid Build Coastguard Worker                 f->dsp->ipred.pal_pred(((pixel *) f->cur.data[1]) + uv_dstoff,
1414*c0909341SAndroid Build Coastguard Worker                                        f->cur.stride[1], pal[1],
1415*c0909341SAndroid Build Coastguard Worker                                        pal_idx, cbw4 * 4, cbh4 * 4);
1416*c0909341SAndroid Build Coastguard Worker                 f->dsp->ipred.pal_pred(((pixel *) f->cur.data[2]) + uv_dstoff,
1417*c0909341SAndroid Build Coastguard Worker                                        f->cur.stride[1], pal[2],
1418*c0909341SAndroid Build Coastguard Worker                                        pal_idx, cbw4 * 4, cbh4 * 4);
1419*c0909341SAndroid Build Coastguard Worker                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
1420*c0909341SAndroid Build Coastguard Worker                     hex_dump(((pixel *) f->cur.data[1]) + uv_dstoff,
1421*c0909341SAndroid Build Coastguard Worker                              PXSTRIDE(f->cur.stride[1]),
1422*c0909341SAndroid Build Coastguard Worker                              cbw4 * 4, cbh4 * 4, "u-pal-pred");
1423*c0909341SAndroid Build Coastguard Worker                     hex_dump(((pixel *) f->cur.data[2]) + uv_dstoff,
1424*c0909341SAndroid Build Coastguard Worker                              PXSTRIDE(f->cur.stride[1]),
1425*c0909341SAndroid Build Coastguard Worker                              cbw4 * 4, cbh4 * 4, "v-pal-pred");
1426*c0909341SAndroid Build Coastguard Worker                 }
1427*c0909341SAndroid Build Coastguard Worker             }
1428*c0909341SAndroid Build Coastguard Worker 
1429*c0909341SAndroid Build Coastguard Worker             const int sm_uv_fl = sm_uv_flag(t->a, cbx4) |
1430*c0909341SAndroid Build Coastguard Worker                                  sm_uv_flag(&t->l, cby4);
1431*c0909341SAndroid Build Coastguard Worker             const int uv_sb_has_tr =
1432*c0909341SAndroid Build Coastguard Worker                 ((init_x + 16) >> ss_hor) < cw4 ? 1 : init_y ? 0 :
1433*c0909341SAndroid Build Coastguard Worker                 intra_edge_flags & (EDGE_I420_TOP_HAS_RIGHT >> (f->cur.p.layout - 1));
1434*c0909341SAndroid Build Coastguard Worker             const int uv_sb_has_bl =
1435*c0909341SAndroid Build Coastguard Worker                 init_x ? 0 : ((init_y + 16) >> ss_ver) < ch4 ? 1 :
1436*c0909341SAndroid Build Coastguard Worker                 intra_edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM >> (f->cur.p.layout - 1));
1437*c0909341SAndroid Build Coastguard Worker             const int sub_cw4 = imin(cw4, (init_x + 16) >> ss_hor);
1438*c0909341SAndroid Build Coastguard Worker             for (int pl = 0; pl < 2; pl++) {
1439*c0909341SAndroid Build Coastguard Worker                 for (y = init_y >> ss_ver, t->by += init_y; y < sub_ch4;
1440*c0909341SAndroid Build Coastguard Worker                      y += uv_t_dim->h, t->by += uv_t_dim->h << ss_ver)
1441*c0909341SAndroid Build Coastguard Worker                 {
1442*c0909341SAndroid Build Coastguard Worker                     pixel *dst = ((pixel *) f->cur.data[1 + pl]) +
1443*c0909341SAndroid Build Coastguard Worker                                    4 * ((t->by >> ss_ver) * PXSTRIDE(stride) +
1444*c0909341SAndroid Build Coastguard Worker                                         ((t->bx + init_x) >> ss_hor));
1445*c0909341SAndroid Build Coastguard Worker                     for (x = init_x >> ss_hor, t->bx += init_x; x < sub_cw4;
1446*c0909341SAndroid Build Coastguard Worker                          x += uv_t_dim->w, t->bx += uv_t_dim->w << ss_hor)
1447*c0909341SAndroid Build Coastguard Worker                     {
1448*c0909341SAndroid Build Coastguard Worker                         if ((b->uv_mode == CFL_PRED && b->cfl_alpha[pl]) ||
1449*c0909341SAndroid Build Coastguard Worker                             b->pal_sz[1])
1450*c0909341SAndroid Build Coastguard Worker                         {
1451*c0909341SAndroid Build Coastguard Worker                             goto skip_uv_pred;
1452*c0909341SAndroid Build Coastguard Worker                         }
1453*c0909341SAndroid Build Coastguard Worker 
1454*c0909341SAndroid Build Coastguard Worker                         int angle = b->uv_angle;
1455*c0909341SAndroid Build Coastguard Worker                         // this probably looks weird because we're using
1456*c0909341SAndroid Build Coastguard Worker                         // luma flags in a chroma loop, but that's because
1457*c0909341SAndroid Build Coastguard Worker                         // prepare_intra_edges() expects luma flags as input
1458*c0909341SAndroid Build Coastguard Worker                         const enum EdgeFlags edge_flags =
1459*c0909341SAndroid Build Coastguard Worker                             (((y > (init_y >> ss_ver) || !uv_sb_has_tr) &&
1460*c0909341SAndroid Build Coastguard Worker                               (x + uv_t_dim->w >= sub_cw4)) ?
1461*c0909341SAndroid Build Coastguard Worker                                  0 : EDGE_I444_TOP_HAS_RIGHT) |
1462*c0909341SAndroid Build Coastguard Worker                             ((x > (init_x >> ss_hor) ||
1463*c0909341SAndroid Build Coastguard Worker                               (!uv_sb_has_bl && y + uv_t_dim->h >= sub_ch4)) ?
1464*c0909341SAndroid Build Coastguard Worker                                  0 : EDGE_I444_LEFT_HAS_BOTTOM);
1465*c0909341SAndroid Build Coastguard Worker                         const pixel *top_sb_edge = NULL;
1466*c0909341SAndroid Build Coastguard Worker                         if (!((t->by & ~ss_ver) & (f->sb_step - 1))) {
1467*c0909341SAndroid Build Coastguard Worker                             top_sb_edge = f->ipred_edge[1 + pl];
1468*c0909341SAndroid Build Coastguard Worker                             const int sby = t->by >> f->sb_shift;
1469*c0909341SAndroid Build Coastguard Worker                             top_sb_edge += f->sb128w * 128 * (sby - 1);
1470*c0909341SAndroid Build Coastguard Worker                         }
1471*c0909341SAndroid Build Coastguard Worker                         const enum IntraPredMode uv_mode =
1472*c0909341SAndroid Build Coastguard Worker                              b->uv_mode == CFL_PRED ? DC_PRED : b->uv_mode;
1473*c0909341SAndroid Build Coastguard Worker                         const int xpos = t->bx >> ss_hor, ypos = t->by >> ss_ver;
1474*c0909341SAndroid Build Coastguard Worker                         const int xstart = ts->tiling.col_start >> ss_hor;
1475*c0909341SAndroid Build Coastguard Worker                         const int ystart = ts->tiling.row_start >> ss_ver;
1476*c0909341SAndroid Build Coastguard Worker                         const enum IntraPredMode m =
1477*c0909341SAndroid Build Coastguard Worker                             bytefn(dav1d_prepare_intra_edges)(xpos, xpos > xstart,
1478*c0909341SAndroid Build Coastguard Worker                                                               ypos, ypos > ystart,
1479*c0909341SAndroid Build Coastguard Worker                                                               ts->tiling.col_end >> ss_hor,
1480*c0909341SAndroid Build Coastguard Worker                                                               ts->tiling.row_end >> ss_ver,
1481*c0909341SAndroid Build Coastguard Worker                                                               edge_flags, dst, stride,
1482*c0909341SAndroid Build Coastguard Worker                                                               top_sb_edge, uv_mode,
1483*c0909341SAndroid Build Coastguard Worker                                                               &angle, uv_t_dim->w,
1484*c0909341SAndroid Build Coastguard Worker                                                               uv_t_dim->h,
1485*c0909341SAndroid Build Coastguard Worker                                                               f->seq_hdr->intra_edge_filter,
1486*c0909341SAndroid Build Coastguard Worker                                                               edge HIGHBD_CALL_SUFFIX);
1487*c0909341SAndroid Build Coastguard Worker                         angle |= intra_edge_filter_flag;
1488*c0909341SAndroid Build Coastguard Worker                         dsp->ipred.intra_pred[m](dst, stride, edge,
1489*c0909341SAndroid Build Coastguard Worker                                                  uv_t_dim->w * 4,
1490*c0909341SAndroid Build Coastguard Worker                                                  uv_t_dim->h * 4,
1491*c0909341SAndroid Build Coastguard Worker                                                  angle | sm_uv_fl,
1492*c0909341SAndroid Build Coastguard Worker                                                  (4 * f->bw + ss_hor -
1493*c0909341SAndroid Build Coastguard Worker                                                   4 * (t->bx & ~ss_hor)) >> ss_hor,
1494*c0909341SAndroid Build Coastguard Worker                                                  (4 * f->bh + ss_ver -
1495*c0909341SAndroid Build Coastguard Worker                                                   4 * (t->by & ~ss_ver)) >> ss_ver
1496*c0909341SAndroid Build Coastguard Worker                                                  HIGHBD_CALL_SUFFIX);
1497*c0909341SAndroid Build Coastguard Worker                         if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
1498*c0909341SAndroid Build Coastguard Worker                             hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
1499*c0909341SAndroid Build Coastguard Worker                                      uv_t_dim->h * 4, 2, "l");
1500*c0909341SAndroid Build Coastguard Worker                             hex_dump(edge, 0, 1, 1, "tl");
1501*c0909341SAndroid Build Coastguard Worker                             hex_dump(edge + 1, uv_t_dim->w * 4,
1502*c0909341SAndroid Build Coastguard Worker                                      uv_t_dim->w * 4, 2, "t");
1503*c0909341SAndroid Build Coastguard Worker                             hex_dump(dst, stride, uv_t_dim->w * 4,
1504*c0909341SAndroid Build Coastguard Worker                                      uv_t_dim->h * 4, pl ? "v-intra-pred" : "u-intra-pred");
1505*c0909341SAndroid Build Coastguard Worker                         }
1506*c0909341SAndroid Build Coastguard Worker 
1507*c0909341SAndroid Build Coastguard Worker                     skip_uv_pred: {}
1508*c0909341SAndroid Build Coastguard Worker                         if (!b->skip) {
1509*c0909341SAndroid Build Coastguard Worker                             enum TxfmType txtp;
1510*c0909341SAndroid Build Coastguard Worker                             int eob;
1511*c0909341SAndroid Build Coastguard Worker                             coef *cf;
1512*c0909341SAndroid Build Coastguard Worker                             if (t->frame_thread.pass) {
1513*c0909341SAndroid Build Coastguard Worker                                 const int p = t->frame_thread.pass & 1;
1514*c0909341SAndroid Build Coastguard Worker                                 const int cbi = *ts->frame_thread[p].cbi++;
1515*c0909341SAndroid Build Coastguard Worker                                 cf = ts->frame_thread[p].cf;
1516*c0909341SAndroid Build Coastguard Worker                                 ts->frame_thread[p].cf += uv_t_dim->w * uv_t_dim->h * 16;
1517*c0909341SAndroid Build Coastguard Worker                                 eob  = cbi >> 5;
1518*c0909341SAndroid Build Coastguard Worker                                 txtp = cbi & 0x1f;
1519*c0909341SAndroid Build Coastguard Worker                             } else {
1520*c0909341SAndroid Build Coastguard Worker                                 uint8_t cf_ctx;
1521*c0909341SAndroid Build Coastguard Worker                                 cf = bitfn(t->cf);
1522*c0909341SAndroid Build Coastguard Worker                                 eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
1523*c0909341SAndroid Build Coastguard Worker                                                    &t->l.ccoef[pl][cby4 + y],
1524*c0909341SAndroid Build Coastguard Worker                                                    b->uvtx, bs, b, 1, 1 + pl, cf,
1525*c0909341SAndroid Build Coastguard Worker                                                    &txtp, &cf_ctx);
1526*c0909341SAndroid Build Coastguard Worker                                 if (DEBUG_BLOCK_INFO)
1527*c0909341SAndroid Build Coastguard Worker                                     printf("Post-uv-cf-blk[pl=%d,tx=%d,"
1528*c0909341SAndroid Build Coastguard Worker                                            "txtp=%d,eob=%d]: r=%d [x=%d,cbx4=%d]\n",
1529*c0909341SAndroid Build Coastguard Worker                                            pl, b->uvtx, txtp, eob, ts->msac.rng, x, cbx4);
1530*c0909341SAndroid Build Coastguard Worker                                 int ctw = imin(uv_t_dim->w, (f->bw - t->bx + ss_hor) >> ss_hor);
1531*c0909341SAndroid Build Coastguard Worker                                 int cth = imin(uv_t_dim->h, (f->bh - t->by + ss_ver) >> ss_ver);
1532*c0909341SAndroid Build Coastguard Worker                                 dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
1533*c0909341SAndroid Build Coastguard Worker                                 dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
1534*c0909341SAndroid Build Coastguard Worker                             }
1535*c0909341SAndroid Build Coastguard Worker                             if (eob >= 0) {
1536*c0909341SAndroid Build Coastguard Worker                                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
1537*c0909341SAndroid Build Coastguard Worker                                     coef_dump(cf, uv_t_dim->h * 4,
1538*c0909341SAndroid Build Coastguard Worker                                               uv_t_dim->w * 4, 3, "dq");
1539*c0909341SAndroid Build Coastguard Worker                                 dsp->itx.itxfm_add[b->uvtx]
1540*c0909341SAndroid Build Coastguard Worker                                                   [txtp](dst, stride,
1541*c0909341SAndroid Build Coastguard Worker                                                          cf, eob HIGHBD_CALL_SUFFIX);
1542*c0909341SAndroid Build Coastguard Worker                                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
1543*c0909341SAndroid Build Coastguard Worker                                     hex_dump(dst, stride, uv_t_dim->w * 4,
1544*c0909341SAndroid Build Coastguard Worker                                              uv_t_dim->h * 4, "recon");
1545*c0909341SAndroid Build Coastguard Worker                             }
1546*c0909341SAndroid Build Coastguard Worker                         } else if (!t->frame_thread.pass) {
1547*c0909341SAndroid Build Coastguard Worker                             dav1d_memset_pow2[uv_t_dim->lw](&t->a->ccoef[pl][cbx4 + x], 0x40);
1548*c0909341SAndroid Build Coastguard Worker                             dav1d_memset_pow2[uv_t_dim->lh](&t->l.ccoef[pl][cby4 + y], 0x40);
1549*c0909341SAndroid Build Coastguard Worker                         }
1550*c0909341SAndroid Build Coastguard Worker                         dst += uv_t_dim->w * 4;
1551*c0909341SAndroid Build Coastguard Worker                     }
1552*c0909341SAndroid Build Coastguard Worker                     t->bx -= x << ss_hor;
1553*c0909341SAndroid Build Coastguard Worker                 }
1554*c0909341SAndroid Build Coastguard Worker                 t->by -= y << ss_ver;
1555*c0909341SAndroid Build Coastguard Worker             }
1556*c0909341SAndroid Build Coastguard Worker         }
1557*c0909341SAndroid Build Coastguard Worker     }
1558*c0909341SAndroid Build Coastguard Worker }
1559*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_recon_b_inter)1560*c0909341SAndroid Build Coastguard Worker int bytefn(dav1d_recon_b_inter)(Dav1dTaskContext *const t, const enum BlockSize bs,
1561*c0909341SAndroid Build Coastguard Worker                                 const Av1Block *const b)
1562*c0909341SAndroid Build Coastguard Worker {
1563*c0909341SAndroid Build Coastguard Worker     Dav1dTileState *const ts = t->ts;
1564*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
1565*c0909341SAndroid Build Coastguard Worker     const Dav1dDSPContext *const dsp = f->dsp;
1566*c0909341SAndroid Build Coastguard Worker     const int bx4 = t->bx & 31, by4 = t->by & 31;
1567*c0909341SAndroid Build Coastguard Worker     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
1568*c0909341SAndroid Build Coastguard Worker     const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
1569*c0909341SAndroid Build Coastguard Worker     const int cbx4 = bx4 >> ss_hor, cby4 = by4 >> ss_ver;
1570*c0909341SAndroid Build Coastguard Worker     const uint8_t *const b_dim = dav1d_block_dimensions[bs];
1571*c0909341SAndroid Build Coastguard Worker     const int bw4 = b_dim[0], bh4 = b_dim[1];
1572*c0909341SAndroid Build Coastguard Worker     const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by);
1573*c0909341SAndroid Build Coastguard Worker     const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400 &&
1574*c0909341SAndroid Build Coastguard Worker                            (bw4 > ss_hor || t->bx & 1) &&
1575*c0909341SAndroid Build Coastguard Worker                            (bh4 > ss_ver || t->by & 1);
1576*c0909341SAndroid Build Coastguard Worker     const int chr_layout_idx = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I400 ? 0 :
1577*c0909341SAndroid Build Coastguard Worker                                DAV1D_PIXEL_LAYOUT_I444 - f->cur.p.layout;
1578*c0909341SAndroid Build Coastguard Worker     int res;
1579*c0909341SAndroid Build Coastguard Worker 
1580*c0909341SAndroid Build Coastguard Worker     // prediction
1581*c0909341SAndroid Build Coastguard Worker     const int cbh4 = (bh4 + ss_ver) >> ss_ver, cbw4 = (bw4 + ss_hor) >> ss_hor;
1582*c0909341SAndroid Build Coastguard Worker     pixel *dst = ((pixel *) f->cur.data[0]) +
1583*c0909341SAndroid Build Coastguard Worker         4 * (t->by * PXSTRIDE(f->cur.stride[0]) + t->bx);
1584*c0909341SAndroid Build Coastguard Worker     const ptrdiff_t uvdstoff =
1585*c0909341SAndroid Build Coastguard Worker         4 * ((t->bx >> ss_hor) + (t->by >> ss_ver) * PXSTRIDE(f->cur.stride[1]));
1586*c0909341SAndroid Build Coastguard Worker     if (IS_KEY_OR_INTRA(f->frame_hdr)) {
1587*c0909341SAndroid Build Coastguard Worker         // intrabc
1588*c0909341SAndroid Build Coastguard Worker         assert(!f->frame_hdr->super_res.enabled);
1589*c0909341SAndroid Build Coastguard Worker         res = mc(t, dst, NULL, f->cur.stride[0], bw4, bh4, t->bx, t->by, 0,
1590*c0909341SAndroid Build Coastguard Worker                  b->mv[0], &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
1591*c0909341SAndroid Build Coastguard Worker         if (res) return res;
1592*c0909341SAndroid Build Coastguard Worker         if (has_chroma) for (int pl = 1; pl < 3; pl++) {
1593*c0909341SAndroid Build Coastguard Worker             res = mc(t, ((pixel *)f->cur.data[pl]) + uvdstoff, NULL, f->cur.stride[1],
1594*c0909341SAndroid Build Coastguard Worker                      bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
1595*c0909341SAndroid Build Coastguard Worker                      t->bx & ~ss_hor, t->by & ~ss_ver, pl, b->mv[0],
1596*c0909341SAndroid Build Coastguard Worker                      &f->sr_cur, 0 /* unused */, FILTER_2D_BILINEAR);
1597*c0909341SAndroid Build Coastguard Worker             if (res) return res;
1598*c0909341SAndroid Build Coastguard Worker         }
1599*c0909341SAndroid Build Coastguard Worker     } else if (b->comp_type == COMP_INTER_NONE) {
1600*c0909341SAndroid Build Coastguard Worker         const Dav1dThreadPicture *const refp = &f->refp[b->ref[0]];
1601*c0909341SAndroid Build Coastguard Worker         const enum Filter2d filter_2d = b->filter2d;
1602*c0909341SAndroid Build Coastguard Worker 
1603*c0909341SAndroid Build Coastguard Worker         if (imin(bw4, bh4) > 1 &&
1604*c0909341SAndroid Build Coastguard Worker             ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
1605*c0909341SAndroid Build Coastguard Worker              (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
1606*c0909341SAndroid Build Coastguard Worker         {
1607*c0909341SAndroid Build Coastguard Worker             res = warp_affine(t, dst, NULL, f->cur.stride[0], b_dim, 0, refp,
1608*c0909341SAndroid Build Coastguard Worker                               b->motion_mode == MM_WARP ? &t->warpmv :
1609*c0909341SAndroid Build Coastguard Worker                                   &f->frame_hdr->gmv[b->ref[0]]);
1610*c0909341SAndroid Build Coastguard Worker             if (res) return res;
1611*c0909341SAndroid Build Coastguard Worker         } else {
1612*c0909341SAndroid Build Coastguard Worker             res = mc(t, dst, NULL, f->cur.stride[0],
1613*c0909341SAndroid Build Coastguard Worker                      bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
1614*c0909341SAndroid Build Coastguard Worker             if (res) return res;
1615*c0909341SAndroid Build Coastguard Worker             if (b->motion_mode == MM_OBMC) {
1616*c0909341SAndroid Build Coastguard Worker                 res = obmc(t, dst, f->cur.stride[0], b_dim, 0, bx4, by4, w4, h4);
1617*c0909341SAndroid Build Coastguard Worker                 if (res) return res;
1618*c0909341SAndroid Build Coastguard Worker             }
1619*c0909341SAndroid Build Coastguard Worker         }
1620*c0909341SAndroid Build Coastguard Worker         if (b->interintra_type) {
1621*c0909341SAndroid Build Coastguard Worker             pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
1622*c0909341SAndroid Build Coastguard Worker             enum IntraPredMode m = b->interintra_mode == II_SMOOTH_PRED ?
1623*c0909341SAndroid Build Coastguard Worker                                    SMOOTH_PRED : b->interintra_mode;
1624*c0909341SAndroid Build Coastguard Worker             pixel *const tmp = bitfn(t->scratch.interintra);
1625*c0909341SAndroid Build Coastguard Worker             int angle = 0;
1626*c0909341SAndroid Build Coastguard Worker             const pixel *top_sb_edge = NULL;
1627*c0909341SAndroid Build Coastguard Worker             if (!(t->by & (f->sb_step - 1))) {
1628*c0909341SAndroid Build Coastguard Worker                 top_sb_edge = f->ipred_edge[0];
1629*c0909341SAndroid Build Coastguard Worker                 const int sby = t->by >> f->sb_shift;
1630*c0909341SAndroid Build Coastguard Worker                 top_sb_edge += f->sb128w * 128 * (sby - 1);
1631*c0909341SAndroid Build Coastguard Worker             }
1632*c0909341SAndroid Build Coastguard Worker             m = bytefn(dav1d_prepare_intra_edges)(t->bx, t->bx > ts->tiling.col_start,
1633*c0909341SAndroid Build Coastguard Worker                                                   t->by, t->by > ts->tiling.row_start,
1634*c0909341SAndroid Build Coastguard Worker                                                   ts->tiling.col_end, ts->tiling.row_end,
1635*c0909341SAndroid Build Coastguard Worker                                                   0, dst, f->cur.stride[0], top_sb_edge,
1636*c0909341SAndroid Build Coastguard Worker                                                   m, &angle, bw4, bh4, 0, tl_edge
1637*c0909341SAndroid Build Coastguard Worker                                                   HIGHBD_CALL_SUFFIX);
1638*c0909341SAndroid Build Coastguard Worker             dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
1639*c0909341SAndroid Build Coastguard Worker                                      tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0
1640*c0909341SAndroid Build Coastguard Worker                                      HIGHBD_CALL_SUFFIX);
1641*c0909341SAndroid Build Coastguard Worker             dsp->mc.blend(dst, f->cur.stride[0], tmp,
1642*c0909341SAndroid Build Coastguard Worker                           bw4 * 4, bh4 * 4, II_MASK(0, bs, b));
1643*c0909341SAndroid Build Coastguard Worker         }
1644*c0909341SAndroid Build Coastguard Worker 
1645*c0909341SAndroid Build Coastguard Worker         if (!has_chroma) goto skip_inter_chroma_pred;
1646*c0909341SAndroid Build Coastguard Worker 
1647*c0909341SAndroid Build Coastguard Worker         // sub8x8 derivation
1648*c0909341SAndroid Build Coastguard Worker         int is_sub8x8 = bw4 == ss_hor || bh4 == ss_ver;
1649*c0909341SAndroid Build Coastguard Worker         refmvs_block *const *r;
1650*c0909341SAndroid Build Coastguard Worker         if (is_sub8x8) {
1651*c0909341SAndroid Build Coastguard Worker             assert(ss_hor == 1);
1652*c0909341SAndroid Build Coastguard Worker             r = &t->rt.r[(t->by & 31) + 5];
1653*c0909341SAndroid Build Coastguard Worker             if (bw4 == 1) is_sub8x8 &= r[0][t->bx - 1].ref.ref[0] > 0;
1654*c0909341SAndroid Build Coastguard Worker             if (bh4 == ss_ver) is_sub8x8 &= r[-1][t->bx].ref.ref[0] > 0;
1655*c0909341SAndroid Build Coastguard Worker             if (bw4 == 1 && bh4 == ss_ver)
1656*c0909341SAndroid Build Coastguard Worker                 is_sub8x8 &= r[-1][t->bx - 1].ref.ref[0] > 0;
1657*c0909341SAndroid Build Coastguard Worker         }
1658*c0909341SAndroid Build Coastguard Worker 
1659*c0909341SAndroid Build Coastguard Worker         // chroma prediction
1660*c0909341SAndroid Build Coastguard Worker         if (is_sub8x8) {
1661*c0909341SAndroid Build Coastguard Worker             assert(ss_hor == 1);
1662*c0909341SAndroid Build Coastguard Worker             ptrdiff_t h_off = 0, v_off = 0;
1663*c0909341SAndroid Build Coastguard Worker             if (bw4 == 1 && bh4 == ss_ver) {
1664*c0909341SAndroid Build Coastguard Worker                 for (int pl = 0; pl < 2; pl++) {
1665*c0909341SAndroid Build Coastguard Worker                     res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
1666*c0909341SAndroid Build Coastguard Worker                              NULL, f->cur.stride[1],
1667*c0909341SAndroid Build Coastguard Worker                              bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
1668*c0909341SAndroid Build Coastguard Worker                              r[-1][t->bx - 1].mv.mv[0],
1669*c0909341SAndroid Build Coastguard Worker                              &f->refp[r[-1][t->bx - 1].ref.ref[0] - 1],
1670*c0909341SAndroid Build Coastguard Worker                              r[-1][t->bx - 1].ref.ref[0] - 1,
1671*c0909341SAndroid Build Coastguard Worker                              t->frame_thread.pass != 2 ? t->tl_4x4_filter :
1672*c0909341SAndroid Build Coastguard Worker                                  f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
1673*c0909341SAndroid Build Coastguard Worker                     if (res) return res;
1674*c0909341SAndroid Build Coastguard Worker                 }
1675*c0909341SAndroid Build Coastguard Worker                 v_off = 2 * PXSTRIDE(f->cur.stride[1]);
1676*c0909341SAndroid Build Coastguard Worker                 h_off = 2;
1677*c0909341SAndroid Build Coastguard Worker             }
1678*c0909341SAndroid Build Coastguard Worker             if (bw4 == 1) {
1679*c0909341SAndroid Build Coastguard Worker                 const enum Filter2d left_filter_2d =
1680*c0909341SAndroid Build Coastguard Worker                     dav1d_filter_2d[t->l.filter[1][by4]][t->l.filter[0][by4]];
1681*c0909341SAndroid Build Coastguard Worker                 for (int pl = 0; pl < 2; pl++) {
1682*c0909341SAndroid Build Coastguard Worker                     res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + v_off, NULL,
1683*c0909341SAndroid Build Coastguard Worker                              f->cur.stride[1], bw4, bh4, t->bx - 1,
1684*c0909341SAndroid Build Coastguard Worker                              t->by, 1 + pl, r[0][t->bx - 1].mv.mv[0],
1685*c0909341SAndroid Build Coastguard Worker                              &f->refp[r[0][t->bx - 1].ref.ref[0] - 1],
1686*c0909341SAndroid Build Coastguard Worker                              r[0][t->bx - 1].ref.ref[0] - 1,
1687*c0909341SAndroid Build Coastguard Worker                              t->frame_thread.pass != 2 ? left_filter_2d :
1688*c0909341SAndroid Build Coastguard Worker                                  f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
1689*c0909341SAndroid Build Coastguard Worker                     if (res) return res;
1690*c0909341SAndroid Build Coastguard Worker                 }
1691*c0909341SAndroid Build Coastguard Worker                 h_off = 2;
1692*c0909341SAndroid Build Coastguard Worker             }
1693*c0909341SAndroid Build Coastguard Worker             if (bh4 == ss_ver) {
1694*c0909341SAndroid Build Coastguard Worker                 const enum Filter2d top_filter_2d =
1695*c0909341SAndroid Build Coastguard Worker                     dav1d_filter_2d[t->a->filter[1][bx4]][t->a->filter[0][bx4]];
1696*c0909341SAndroid Build Coastguard Worker                 for (int pl = 0; pl < 2; pl++) {
1697*c0909341SAndroid Build Coastguard Worker                     res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off, NULL,
1698*c0909341SAndroid Build Coastguard Worker                              f->cur.stride[1], bw4, bh4, t->bx, t->by - 1,
1699*c0909341SAndroid Build Coastguard Worker                              1 + pl, r[-1][t->bx].mv.mv[0],
1700*c0909341SAndroid Build Coastguard Worker                              &f->refp[r[-1][t->bx].ref.ref[0] - 1],
1701*c0909341SAndroid Build Coastguard Worker                              r[-1][t->bx].ref.ref[0] - 1,
1702*c0909341SAndroid Build Coastguard Worker                              t->frame_thread.pass != 2 ? top_filter_2d :
1703*c0909341SAndroid Build Coastguard Worker                                  f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
1704*c0909341SAndroid Build Coastguard Worker                     if (res) return res;
1705*c0909341SAndroid Build Coastguard Worker                 }
1706*c0909341SAndroid Build Coastguard Worker                 v_off = 2 * PXSTRIDE(f->cur.stride[1]);
1707*c0909341SAndroid Build Coastguard Worker             }
1708*c0909341SAndroid Build Coastguard Worker             for (int pl = 0; pl < 2; pl++) {
1709*c0909341SAndroid Build Coastguard Worker                 res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.stride[1],
1710*c0909341SAndroid Build Coastguard Worker                          bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
1711*c0909341SAndroid Build Coastguard Worker                          refp, b->ref[0], filter_2d);
1712*c0909341SAndroid Build Coastguard Worker                 if (res) return res;
1713*c0909341SAndroid Build Coastguard Worker             }
1714*c0909341SAndroid Build Coastguard Worker         } else {
1715*c0909341SAndroid Build Coastguard Worker             if (imin(cbw4, cbh4) > 1 &&
1716*c0909341SAndroid Build Coastguard Worker                 ((b->inter_mode == GLOBALMV && f->gmv_warp_allowed[b->ref[0]]) ||
1717*c0909341SAndroid Build Coastguard Worker                  (b->motion_mode == MM_WARP && t->warpmv.type > DAV1D_WM_TYPE_TRANSLATION)))
1718*c0909341SAndroid Build Coastguard Worker             {
1719*c0909341SAndroid Build Coastguard Worker                 for (int pl = 0; pl < 2; pl++) {
1720*c0909341SAndroid Build Coastguard Worker                     res = warp_affine(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff, NULL,
1721*c0909341SAndroid Build Coastguard Worker                                       f->cur.stride[1], b_dim, 1 + pl, refp,
1722*c0909341SAndroid Build Coastguard Worker                                       b->motion_mode == MM_WARP ? &t->warpmv :
1723*c0909341SAndroid Build Coastguard Worker                                           &f->frame_hdr->gmv[b->ref[0]]);
1724*c0909341SAndroid Build Coastguard Worker                     if (res) return res;
1725*c0909341SAndroid Build Coastguard Worker                 }
1726*c0909341SAndroid Build Coastguard Worker             } else {
1727*c0909341SAndroid Build Coastguard Worker                 for (int pl = 0; pl < 2; pl++) {
1728*c0909341SAndroid Build Coastguard Worker                     res = mc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
1729*c0909341SAndroid Build Coastguard Worker                              NULL, f->cur.stride[1],
1730*c0909341SAndroid Build Coastguard Worker                              bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
1731*c0909341SAndroid Build Coastguard Worker                              t->bx & ~ss_hor, t->by & ~ss_ver,
1732*c0909341SAndroid Build Coastguard Worker                              1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
1733*c0909341SAndroid Build Coastguard Worker                     if (res) return res;
1734*c0909341SAndroid Build Coastguard Worker                     if (b->motion_mode == MM_OBMC) {
1735*c0909341SAndroid Build Coastguard Worker                         res = obmc(t, ((pixel *) f->cur.data[1 + pl]) + uvdstoff,
1736*c0909341SAndroid Build Coastguard Worker                                    f->cur.stride[1], b_dim, 1 + pl, bx4, by4, w4, h4);
1737*c0909341SAndroid Build Coastguard Worker                         if (res) return res;
1738*c0909341SAndroid Build Coastguard Worker                     }
1739*c0909341SAndroid Build Coastguard Worker                 }
1740*c0909341SAndroid Build Coastguard Worker             }
1741*c0909341SAndroid Build Coastguard Worker             if (b->interintra_type) {
1742*c0909341SAndroid Build Coastguard Worker                 // FIXME for 8x32 with 4:2:2 subsampling, this probably does
1743*c0909341SAndroid Build Coastguard Worker                 // the wrong thing since it will select 4x16, not 4x32, as a
1744*c0909341SAndroid Build Coastguard Worker                 // transform size...
1745*c0909341SAndroid Build Coastguard Worker                 const uint8_t *const ii_mask = II_MASK(chr_layout_idx, bs, b);
1746*c0909341SAndroid Build Coastguard Worker 
1747*c0909341SAndroid Build Coastguard Worker                 for (int pl = 0; pl < 2; pl++) {
1748*c0909341SAndroid Build Coastguard Worker                     pixel *const tmp = bitfn(t->scratch.interintra);
1749*c0909341SAndroid Build Coastguard Worker                     pixel *const tl_edge = bitfn(t->scratch.edge) + 32;
1750*c0909341SAndroid Build Coastguard Worker                     enum IntraPredMode m =
1751*c0909341SAndroid Build Coastguard Worker                         b->interintra_mode == II_SMOOTH_PRED ?
1752*c0909341SAndroid Build Coastguard Worker                         SMOOTH_PRED : b->interintra_mode;
1753*c0909341SAndroid Build Coastguard Worker                     int angle = 0;
1754*c0909341SAndroid Build Coastguard Worker                     pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
1755*c0909341SAndroid Build Coastguard Worker                     const pixel *top_sb_edge = NULL;
1756*c0909341SAndroid Build Coastguard Worker                     if (!(t->by & (f->sb_step - 1))) {
1757*c0909341SAndroid Build Coastguard Worker                         top_sb_edge = f->ipred_edge[pl + 1];
1758*c0909341SAndroid Build Coastguard Worker                         const int sby = t->by >> f->sb_shift;
1759*c0909341SAndroid Build Coastguard Worker                         top_sb_edge += f->sb128w * 128 * (sby - 1);
1760*c0909341SAndroid Build Coastguard Worker                     }
1761*c0909341SAndroid Build Coastguard Worker                     m = bytefn(dav1d_prepare_intra_edges)(t->bx >> ss_hor,
1762*c0909341SAndroid Build Coastguard Worker                                                           (t->bx >> ss_hor) >
1763*c0909341SAndroid Build Coastguard Worker                                                               (ts->tiling.col_start >> ss_hor),
1764*c0909341SAndroid Build Coastguard Worker                                                           t->by >> ss_ver,
1765*c0909341SAndroid Build Coastguard Worker                                                           (t->by >> ss_ver) >
1766*c0909341SAndroid Build Coastguard Worker                                                               (ts->tiling.row_start >> ss_ver),
1767*c0909341SAndroid Build Coastguard Worker                                                           ts->tiling.col_end >> ss_hor,
1768*c0909341SAndroid Build Coastguard Worker                                                           ts->tiling.row_end >> ss_ver,
1769*c0909341SAndroid Build Coastguard Worker                                                           0, uvdst, f->cur.stride[1],
1770*c0909341SAndroid Build Coastguard Worker                                                           top_sb_edge, m,
1771*c0909341SAndroid Build Coastguard Worker                                                           &angle, cbw4, cbh4, 0, tl_edge
1772*c0909341SAndroid Build Coastguard Worker                                                           HIGHBD_CALL_SUFFIX);
1773*c0909341SAndroid Build Coastguard Worker                     dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
1774*c0909341SAndroid Build Coastguard Worker                                              tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0
1775*c0909341SAndroid Build Coastguard Worker                                              HIGHBD_CALL_SUFFIX);
1776*c0909341SAndroid Build Coastguard Worker                     dsp->mc.blend(uvdst, f->cur.stride[1], tmp,
1777*c0909341SAndroid Build Coastguard Worker                                   cbw4 * 4, cbh4 * 4, ii_mask);
1778*c0909341SAndroid Build Coastguard Worker                 }
1779*c0909341SAndroid Build Coastguard Worker             }
1780*c0909341SAndroid Build Coastguard Worker         }
1781*c0909341SAndroid Build Coastguard Worker 
1782*c0909341SAndroid Build Coastguard Worker     skip_inter_chroma_pred: {}
1783*c0909341SAndroid Build Coastguard Worker         t->tl_4x4_filter = filter_2d;
1784*c0909341SAndroid Build Coastguard Worker     } else {
1785*c0909341SAndroid Build Coastguard Worker         const enum Filter2d filter_2d = b->filter2d;
1786*c0909341SAndroid Build Coastguard Worker         // Maximum super block size is 128x128
1787*c0909341SAndroid Build Coastguard Worker         int16_t (*tmp)[128 * 128] = t->scratch.compinter;
1788*c0909341SAndroid Build Coastguard Worker         int jnt_weight;
1789*c0909341SAndroid Build Coastguard Worker         uint8_t *const seg_mask = t->scratch.seg_mask;
1790*c0909341SAndroid Build Coastguard Worker         const uint8_t *mask;
1791*c0909341SAndroid Build Coastguard Worker 
1792*c0909341SAndroid Build Coastguard Worker         for (int i = 0; i < 2; i++) {
1793*c0909341SAndroid Build Coastguard Worker             const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
1794*c0909341SAndroid Build Coastguard Worker 
1795*c0909341SAndroid Build Coastguard Worker             if (b->inter_mode == GLOBALMV_GLOBALMV && f->gmv_warp_allowed[b->ref[i]]) {
1796*c0909341SAndroid Build Coastguard Worker                 res = warp_affine(t, NULL, tmp[i], bw4 * 4, b_dim, 0, refp,
1797*c0909341SAndroid Build Coastguard Worker                                   &f->frame_hdr->gmv[b->ref[i]]);
1798*c0909341SAndroid Build Coastguard Worker                 if (res) return res;
1799*c0909341SAndroid Build Coastguard Worker             } else {
1800*c0909341SAndroid Build Coastguard Worker                 res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
1801*c0909341SAndroid Build Coastguard Worker                          b->mv[i], refp, b->ref[i], filter_2d);
1802*c0909341SAndroid Build Coastguard Worker                 if (res) return res;
1803*c0909341SAndroid Build Coastguard Worker             }
1804*c0909341SAndroid Build Coastguard Worker         }
1805*c0909341SAndroid Build Coastguard Worker         switch (b->comp_type) {
1806*c0909341SAndroid Build Coastguard Worker         case COMP_INTER_AVG:
1807*c0909341SAndroid Build Coastguard Worker             dsp->mc.avg(dst, f->cur.stride[0], tmp[0], tmp[1],
1808*c0909341SAndroid Build Coastguard Worker                         bw4 * 4, bh4 * 4 HIGHBD_CALL_SUFFIX);
1809*c0909341SAndroid Build Coastguard Worker             break;
1810*c0909341SAndroid Build Coastguard Worker         case COMP_INTER_WEIGHTED_AVG:
1811*c0909341SAndroid Build Coastguard Worker             jnt_weight = f->jnt_weights[b->ref[0]][b->ref[1]];
1812*c0909341SAndroid Build Coastguard Worker             dsp->mc.w_avg(dst, f->cur.stride[0], tmp[0], tmp[1],
1813*c0909341SAndroid Build Coastguard Worker                           bw4 * 4, bh4 * 4, jnt_weight HIGHBD_CALL_SUFFIX);
1814*c0909341SAndroid Build Coastguard Worker             break;
1815*c0909341SAndroid Build Coastguard Worker         case COMP_INTER_SEG:
1816*c0909341SAndroid Build Coastguard Worker             dsp->mc.w_mask[chr_layout_idx](dst, f->cur.stride[0],
1817*c0909341SAndroid Build Coastguard Worker                                            tmp[b->mask_sign], tmp[!b->mask_sign],
1818*c0909341SAndroid Build Coastguard Worker                                            bw4 * 4, bh4 * 4, seg_mask,
1819*c0909341SAndroid Build Coastguard Worker                                            b->mask_sign HIGHBD_CALL_SUFFIX);
1820*c0909341SAndroid Build Coastguard Worker             mask = seg_mask;
1821*c0909341SAndroid Build Coastguard Worker             break;
1822*c0909341SAndroid Build Coastguard Worker         case COMP_INTER_WEDGE:
1823*c0909341SAndroid Build Coastguard Worker             mask = WEDGE_MASK(0, bs, 0, b->wedge_idx);
1824*c0909341SAndroid Build Coastguard Worker             dsp->mc.mask(dst, f->cur.stride[0],
1825*c0909341SAndroid Build Coastguard Worker                          tmp[b->mask_sign], tmp[!b->mask_sign],
1826*c0909341SAndroid Build Coastguard Worker                          bw4 * 4, bh4 * 4, mask HIGHBD_CALL_SUFFIX);
1827*c0909341SAndroid Build Coastguard Worker             if (has_chroma)
1828*c0909341SAndroid Build Coastguard Worker                 mask = WEDGE_MASK(chr_layout_idx, bs, b->mask_sign, b->wedge_idx);
1829*c0909341SAndroid Build Coastguard Worker             break;
1830*c0909341SAndroid Build Coastguard Worker         }
1831*c0909341SAndroid Build Coastguard Worker 
1832*c0909341SAndroid Build Coastguard Worker         // chroma
1833*c0909341SAndroid Build Coastguard Worker         if (has_chroma) for (int pl = 0; pl < 2; pl++) {
1834*c0909341SAndroid Build Coastguard Worker             for (int i = 0; i < 2; i++) {
1835*c0909341SAndroid Build Coastguard Worker                 const Dav1dThreadPicture *const refp = &f->refp[b->ref[i]];
1836*c0909341SAndroid Build Coastguard Worker                 if (b->inter_mode == GLOBALMV_GLOBALMV &&
1837*c0909341SAndroid Build Coastguard Worker                     imin(cbw4, cbh4) > 1 && f->gmv_warp_allowed[b->ref[i]])
1838*c0909341SAndroid Build Coastguard Worker                 {
1839*c0909341SAndroid Build Coastguard Worker                     res = warp_affine(t, NULL, tmp[i], bw4 * 4 >> ss_hor,
1840*c0909341SAndroid Build Coastguard Worker                                       b_dim, 1 + pl,
1841*c0909341SAndroid Build Coastguard Worker                                       refp, &f->frame_hdr->gmv[b->ref[i]]);
1842*c0909341SAndroid Build Coastguard Worker                     if (res) return res;
1843*c0909341SAndroid Build Coastguard Worker                 } else {
1844*c0909341SAndroid Build Coastguard Worker                     res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
1845*c0909341SAndroid Build Coastguard Worker                              1 + pl, b->mv[i], refp, b->ref[i], filter_2d);
1846*c0909341SAndroid Build Coastguard Worker                     if (res) return res;
1847*c0909341SAndroid Build Coastguard Worker                 }
1848*c0909341SAndroid Build Coastguard Worker             }
1849*c0909341SAndroid Build Coastguard Worker             pixel *const uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff;
1850*c0909341SAndroid Build Coastguard Worker             switch (b->comp_type) {
1851*c0909341SAndroid Build Coastguard Worker             case COMP_INTER_AVG:
1852*c0909341SAndroid Build Coastguard Worker                 dsp->mc.avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
1853*c0909341SAndroid Build Coastguard Worker                             bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver
1854*c0909341SAndroid Build Coastguard Worker                             HIGHBD_CALL_SUFFIX);
1855*c0909341SAndroid Build Coastguard Worker                 break;
1856*c0909341SAndroid Build Coastguard Worker             case COMP_INTER_WEIGHTED_AVG:
1857*c0909341SAndroid Build Coastguard Worker                 dsp->mc.w_avg(uvdst, f->cur.stride[1], tmp[0], tmp[1],
1858*c0909341SAndroid Build Coastguard Worker                               bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, jnt_weight
1859*c0909341SAndroid Build Coastguard Worker                               HIGHBD_CALL_SUFFIX);
1860*c0909341SAndroid Build Coastguard Worker                 break;
1861*c0909341SAndroid Build Coastguard Worker             case COMP_INTER_WEDGE:
1862*c0909341SAndroid Build Coastguard Worker             case COMP_INTER_SEG:
1863*c0909341SAndroid Build Coastguard Worker                 dsp->mc.mask(uvdst, f->cur.stride[1],
1864*c0909341SAndroid Build Coastguard Worker                              tmp[b->mask_sign], tmp[!b->mask_sign],
1865*c0909341SAndroid Build Coastguard Worker                              bw4 * 4 >> ss_hor, bh4 * 4 >> ss_ver, mask
1866*c0909341SAndroid Build Coastguard Worker                              HIGHBD_CALL_SUFFIX);
1867*c0909341SAndroid Build Coastguard Worker                 break;
1868*c0909341SAndroid Build Coastguard Worker             }
1869*c0909341SAndroid Build Coastguard Worker         }
1870*c0909341SAndroid Build Coastguard Worker     }
1871*c0909341SAndroid Build Coastguard Worker 
1872*c0909341SAndroid Build Coastguard Worker     if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
1873*c0909341SAndroid Build Coastguard Worker         hex_dump(dst, f->cur.stride[0], b_dim[0] * 4, b_dim[1] * 4, "y-pred");
1874*c0909341SAndroid Build Coastguard Worker         if (has_chroma) {
1875*c0909341SAndroid Build Coastguard Worker             hex_dump(&((pixel *) f->cur.data[1])[uvdstoff], f->cur.stride[1],
1876*c0909341SAndroid Build Coastguard Worker                      cbw4 * 4, cbh4 * 4, "u-pred");
1877*c0909341SAndroid Build Coastguard Worker             hex_dump(&((pixel *) f->cur.data[2])[uvdstoff], f->cur.stride[1],
1878*c0909341SAndroid Build Coastguard Worker                      cbw4 * 4, cbh4 * 4, "v-pred");
1879*c0909341SAndroid Build Coastguard Worker         }
1880*c0909341SAndroid Build Coastguard Worker     }
1881*c0909341SAndroid Build Coastguard Worker 
1882*c0909341SAndroid Build Coastguard Worker     const int cw4 = (w4 + ss_hor) >> ss_hor, ch4 = (h4 + ss_ver) >> ss_ver;
1883*c0909341SAndroid Build Coastguard Worker 
1884*c0909341SAndroid Build Coastguard Worker     if (b->skip) {
1885*c0909341SAndroid Build Coastguard Worker         // reset coef contexts
1886*c0909341SAndroid Build Coastguard Worker         BlockContext *const a = t->a;
1887*c0909341SAndroid Build Coastguard Worker         dav1d_memset_pow2[b_dim[2]](&a->lcoef[bx4], 0x40);
1888*c0909341SAndroid Build Coastguard Worker         dav1d_memset_pow2[b_dim[3]](&t->l.lcoef[by4], 0x40);
1889*c0909341SAndroid Build Coastguard Worker         if (has_chroma) {
1890*c0909341SAndroid Build Coastguard Worker             dav1d_memset_pow2_fn memset_cw = dav1d_memset_pow2[ulog2(cbw4)];
1891*c0909341SAndroid Build Coastguard Worker             dav1d_memset_pow2_fn memset_ch = dav1d_memset_pow2[ulog2(cbh4)];
1892*c0909341SAndroid Build Coastguard Worker             memset_cw(&a->ccoef[0][cbx4], 0x40);
1893*c0909341SAndroid Build Coastguard Worker             memset_cw(&a->ccoef[1][cbx4], 0x40);
1894*c0909341SAndroid Build Coastguard Worker             memset_ch(&t->l.ccoef[0][cby4], 0x40);
1895*c0909341SAndroid Build Coastguard Worker             memset_ch(&t->l.ccoef[1][cby4], 0x40);
1896*c0909341SAndroid Build Coastguard Worker         }
1897*c0909341SAndroid Build Coastguard Worker         return 0;
1898*c0909341SAndroid Build Coastguard Worker     }
1899*c0909341SAndroid Build Coastguard Worker 
1900*c0909341SAndroid Build Coastguard Worker     const TxfmInfo *const uvtx = &dav1d_txfm_dimensions[b->uvtx];
1901*c0909341SAndroid Build Coastguard Worker     const TxfmInfo *const ytx = &dav1d_txfm_dimensions[b->max_ytx];
1902*c0909341SAndroid Build Coastguard Worker     const uint16_t tx_split[2] = { b->tx_split0, b->tx_split1 };
1903*c0909341SAndroid Build Coastguard Worker 
1904*c0909341SAndroid Build Coastguard Worker     for (int init_y = 0; init_y < bh4; init_y += 16) {
1905*c0909341SAndroid Build Coastguard Worker         for (int init_x = 0; init_x < bw4; init_x += 16) {
1906*c0909341SAndroid Build Coastguard Worker             // coefficient coding & inverse transforms
1907*c0909341SAndroid Build Coastguard Worker             int y_off = !!init_y, y;
1908*c0909341SAndroid Build Coastguard Worker             dst += PXSTRIDE(f->cur.stride[0]) * 4 * init_y;
1909*c0909341SAndroid Build Coastguard Worker             for (y = init_y, t->by += init_y; y < imin(h4, init_y + 16);
1910*c0909341SAndroid Build Coastguard Worker                  y += ytx->h, y_off++)
1911*c0909341SAndroid Build Coastguard Worker             {
1912*c0909341SAndroid Build Coastguard Worker                 int x, x_off = !!init_x;
1913*c0909341SAndroid Build Coastguard Worker                 for (x = init_x, t->bx += init_x; x < imin(w4, init_x + 16);
1914*c0909341SAndroid Build Coastguard Worker                      x += ytx->w, x_off++)
1915*c0909341SAndroid Build Coastguard Worker                 {
1916*c0909341SAndroid Build Coastguard Worker                     read_coef_tree(t, bs, b, b->max_ytx, 0, tx_split,
1917*c0909341SAndroid Build Coastguard Worker                                    x_off, y_off, &dst[x * 4]);
1918*c0909341SAndroid Build Coastguard Worker                     t->bx += ytx->w;
1919*c0909341SAndroid Build Coastguard Worker                 }
1920*c0909341SAndroid Build Coastguard Worker                 dst += PXSTRIDE(f->cur.stride[0]) * 4 * ytx->h;
1921*c0909341SAndroid Build Coastguard Worker                 t->bx -= x;
1922*c0909341SAndroid Build Coastguard Worker                 t->by += ytx->h;
1923*c0909341SAndroid Build Coastguard Worker             }
1924*c0909341SAndroid Build Coastguard Worker             dst -= PXSTRIDE(f->cur.stride[0]) * 4 * y;
1925*c0909341SAndroid Build Coastguard Worker             t->by -= y;
1926*c0909341SAndroid Build Coastguard Worker 
1927*c0909341SAndroid Build Coastguard Worker             // chroma coefs and inverse transform
1928*c0909341SAndroid Build Coastguard Worker             if (has_chroma) for (int pl = 0; pl < 2; pl++) {
1929*c0909341SAndroid Build Coastguard Worker                 pixel *uvdst = ((pixel *) f->cur.data[1 + pl]) + uvdstoff +
1930*c0909341SAndroid Build Coastguard Worker                     (PXSTRIDE(f->cur.stride[1]) * init_y * 4 >> ss_ver);
1931*c0909341SAndroid Build Coastguard Worker                 for (y = init_y >> ss_ver, t->by += init_y;
1932*c0909341SAndroid Build Coastguard Worker                      y < imin(ch4, (init_y + 16) >> ss_ver); y += uvtx->h)
1933*c0909341SAndroid Build Coastguard Worker                 {
1934*c0909341SAndroid Build Coastguard Worker                     int x;
1935*c0909341SAndroid Build Coastguard Worker                     for (x = init_x >> ss_hor, t->bx += init_x;
1936*c0909341SAndroid Build Coastguard Worker                          x < imin(cw4, (init_x + 16) >> ss_hor); x += uvtx->w)
1937*c0909341SAndroid Build Coastguard Worker                     {
1938*c0909341SAndroid Build Coastguard Worker                         coef *cf;
1939*c0909341SAndroid Build Coastguard Worker                         int eob;
1940*c0909341SAndroid Build Coastguard Worker                         enum TxfmType txtp;
1941*c0909341SAndroid Build Coastguard Worker                         if (t->frame_thread.pass) {
1942*c0909341SAndroid Build Coastguard Worker                             const int p = t->frame_thread.pass & 1;
1943*c0909341SAndroid Build Coastguard Worker                             const int cbi = *ts->frame_thread[p].cbi++;
1944*c0909341SAndroid Build Coastguard Worker                             cf = ts->frame_thread[p].cf;
1945*c0909341SAndroid Build Coastguard Worker                             ts->frame_thread[p].cf += uvtx->w * uvtx->h * 16;
1946*c0909341SAndroid Build Coastguard Worker                             eob  = cbi >> 5;
1947*c0909341SAndroid Build Coastguard Worker                             txtp = cbi & 0x1f;
1948*c0909341SAndroid Build Coastguard Worker                         } else {
1949*c0909341SAndroid Build Coastguard Worker                             uint8_t cf_ctx;
1950*c0909341SAndroid Build Coastguard Worker                             cf = bitfn(t->cf);
1951*c0909341SAndroid Build Coastguard Worker                             txtp = t->scratch.txtp_map[(by4 + (y << ss_ver)) * 32 +
1952*c0909341SAndroid Build Coastguard Worker                                                         bx4 + (x << ss_hor)];
1953*c0909341SAndroid Build Coastguard Worker                             eob = decode_coefs(t, &t->a->ccoef[pl][cbx4 + x],
1954*c0909341SAndroid Build Coastguard Worker                                                &t->l.ccoef[pl][cby4 + y],
1955*c0909341SAndroid Build Coastguard Worker                                                b->uvtx, bs, b, 0, 1 + pl,
1956*c0909341SAndroid Build Coastguard Worker                                                cf, &txtp, &cf_ctx);
1957*c0909341SAndroid Build Coastguard Worker                             if (DEBUG_BLOCK_INFO)
1958*c0909341SAndroid Build Coastguard Worker                                 printf("Post-uv-cf-blk[pl=%d,tx=%d,"
1959*c0909341SAndroid Build Coastguard Worker                                        "txtp=%d,eob=%d]: r=%d\n",
1960*c0909341SAndroid Build Coastguard Worker                                        pl, b->uvtx, txtp, eob, ts->msac.rng);
1961*c0909341SAndroid Build Coastguard Worker                             int ctw = imin(uvtx->w, (f->bw - t->bx + ss_hor) >> ss_hor);
1962*c0909341SAndroid Build Coastguard Worker                             int cth = imin(uvtx->h, (f->bh - t->by + ss_ver) >> ss_ver);
1963*c0909341SAndroid Build Coastguard Worker                             dav1d_memset_likely_pow2(&t->a->ccoef[pl][cbx4 + x], cf_ctx, ctw);
1964*c0909341SAndroid Build Coastguard Worker                             dav1d_memset_likely_pow2(&t->l.ccoef[pl][cby4 + y], cf_ctx, cth);
1965*c0909341SAndroid Build Coastguard Worker                         }
1966*c0909341SAndroid Build Coastguard Worker                         if (eob >= 0) {
1967*c0909341SAndroid Build Coastguard Worker                             if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
1968*c0909341SAndroid Build Coastguard Worker                                 coef_dump(cf, uvtx->h * 4, uvtx->w * 4, 3, "dq");
1969*c0909341SAndroid Build Coastguard Worker                             dsp->itx.itxfm_add[b->uvtx]
1970*c0909341SAndroid Build Coastguard Worker                                               [txtp](&uvdst[4 * x],
1971*c0909341SAndroid Build Coastguard Worker                                                      f->cur.stride[1],
1972*c0909341SAndroid Build Coastguard Worker                                                      cf, eob HIGHBD_CALL_SUFFIX);
1973*c0909341SAndroid Build Coastguard Worker                             if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS)
1974*c0909341SAndroid Build Coastguard Worker                                 hex_dump(&uvdst[4 * x], f->cur.stride[1],
1975*c0909341SAndroid Build Coastguard Worker                                          uvtx->w * 4, uvtx->h * 4, "recon");
1976*c0909341SAndroid Build Coastguard Worker                         }
1977*c0909341SAndroid Build Coastguard Worker                         t->bx += uvtx->w << ss_hor;
1978*c0909341SAndroid Build Coastguard Worker                     }
1979*c0909341SAndroid Build Coastguard Worker                     uvdst += PXSTRIDE(f->cur.stride[1]) * 4 * uvtx->h;
1980*c0909341SAndroid Build Coastguard Worker                     t->bx -= x << ss_hor;
1981*c0909341SAndroid Build Coastguard Worker                     t->by += uvtx->h << ss_ver;
1982*c0909341SAndroid Build Coastguard Worker                 }
1983*c0909341SAndroid Build Coastguard Worker                 t->by -= y << ss_ver;
1984*c0909341SAndroid Build Coastguard Worker             }
1985*c0909341SAndroid Build Coastguard Worker         }
1986*c0909341SAndroid Build Coastguard Worker     }
1987*c0909341SAndroid Build Coastguard Worker     return 0;
1988*c0909341SAndroid Build Coastguard Worker }
1989*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_filter_sbrow_deblock_cols)1990*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_filter_sbrow_deblock_cols)(Dav1dFrameContext *const f, const int sby) {
1991*c0909341SAndroid Build Coastguard Worker     if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK) ||
1992*c0909341SAndroid Build Coastguard Worker         (!f->frame_hdr->loopfilter.level_y[0] && !f->frame_hdr->loopfilter.level_y[1]))
1993*c0909341SAndroid Build Coastguard Worker     {
1994*c0909341SAndroid Build Coastguard Worker         return;
1995*c0909341SAndroid Build Coastguard Worker     }
1996*c0909341SAndroid Build Coastguard Worker     const int y = sby * f->sb_step * 4;
1997*c0909341SAndroid Build Coastguard Worker     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
1998*c0909341SAndroid Build Coastguard Worker     pixel *const p[3] = {
1999*c0909341SAndroid Build Coastguard Worker         f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
2000*c0909341SAndroid Build Coastguard Worker         f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
2001*c0909341SAndroid Build Coastguard Worker         f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
2002*c0909341SAndroid Build Coastguard Worker     };
2003*c0909341SAndroid Build Coastguard Worker     Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
2004*c0909341SAndroid Build Coastguard Worker     bytefn(dav1d_loopfilter_sbrow_cols)(f, p, mask, sby,
2005*c0909341SAndroid Build Coastguard Worker                                         f->lf.start_of_tile_row[sby]);
2006*c0909341SAndroid Build Coastguard Worker }
2007*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_filter_sbrow_deblock_rows)2008*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_filter_sbrow_deblock_rows)(Dav1dFrameContext *const f, const int sby) {
2009*c0909341SAndroid Build Coastguard Worker     const int y = sby * f->sb_step * 4;
2010*c0909341SAndroid Build Coastguard Worker     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2011*c0909341SAndroid Build Coastguard Worker     pixel *const p[3] = {
2012*c0909341SAndroid Build Coastguard Worker         f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
2013*c0909341SAndroid Build Coastguard Worker         f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
2014*c0909341SAndroid Build Coastguard Worker         f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
2015*c0909341SAndroid Build Coastguard Worker     };
2016*c0909341SAndroid Build Coastguard Worker     Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
2017*c0909341SAndroid Build Coastguard Worker     if (f->c->inloop_filters & DAV1D_INLOOPFILTER_DEBLOCK &&
2018*c0909341SAndroid Build Coastguard Worker         (f->frame_hdr->loopfilter.level_y[0] || f->frame_hdr->loopfilter.level_y[1]))
2019*c0909341SAndroid Build Coastguard Worker     {
2020*c0909341SAndroid Build Coastguard Worker         bytefn(dav1d_loopfilter_sbrow_rows)(f, p, mask, sby);
2021*c0909341SAndroid Build Coastguard Worker     }
2022*c0909341SAndroid Build Coastguard Worker     if (f->seq_hdr->cdef || f->lf.restore_planes) {
2023*c0909341SAndroid Build Coastguard Worker         // Store loop filtered pixels required by CDEF / LR
2024*c0909341SAndroid Build Coastguard Worker         bytefn(dav1d_copy_lpf)(f, p, sby);
2025*c0909341SAndroid Build Coastguard Worker     }
2026*c0909341SAndroid Build Coastguard Worker }
2027*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_filter_sbrow_cdef)2028*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_filter_sbrow_cdef)(Dav1dTaskContext *const tc, const int sby) {
2029*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = tc->f;
2030*c0909341SAndroid Build Coastguard Worker     if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_CDEF)) return;
2031*c0909341SAndroid Build Coastguard Worker     const int sbsz = f->sb_step;
2032*c0909341SAndroid Build Coastguard Worker     const int y = sby * sbsz * 4;
2033*c0909341SAndroid Build Coastguard Worker     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2034*c0909341SAndroid Build Coastguard Worker     pixel *const p[3] = {
2035*c0909341SAndroid Build Coastguard Worker         f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
2036*c0909341SAndroid Build Coastguard Worker         f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
2037*c0909341SAndroid Build Coastguard Worker         f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
2038*c0909341SAndroid Build Coastguard Worker     };
2039*c0909341SAndroid Build Coastguard Worker     Av1Filter *prev_mask = f->lf.mask + ((sby - 1) >> !f->seq_hdr->sb128) * f->sb128w;
2040*c0909341SAndroid Build Coastguard Worker     Av1Filter *mask = f->lf.mask + (sby >> !f->seq_hdr->sb128) * f->sb128w;
2041*c0909341SAndroid Build Coastguard Worker     const int start = sby * sbsz;
2042*c0909341SAndroid Build Coastguard Worker     if (sby) {
2043*c0909341SAndroid Build Coastguard Worker         const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2044*c0909341SAndroid Build Coastguard Worker         pixel *p_up[3] = {
2045*c0909341SAndroid Build Coastguard Worker             p[0] - 8 * PXSTRIDE(f->cur.stride[0]),
2046*c0909341SAndroid Build Coastguard Worker             p[1] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
2047*c0909341SAndroid Build Coastguard Worker             p[2] - (8 * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
2048*c0909341SAndroid Build Coastguard Worker         };
2049*c0909341SAndroid Build Coastguard Worker         bytefn(dav1d_cdef_brow)(tc, p_up, prev_mask, start - 2, start, 1, sby);
2050*c0909341SAndroid Build Coastguard Worker     }
2051*c0909341SAndroid Build Coastguard Worker     const int n_blks = sbsz - 2 * (sby + 1 < f->sbh);
2052*c0909341SAndroid Build Coastguard Worker     const int end = imin(start + n_blks, f->bh);
2053*c0909341SAndroid Build Coastguard Worker     bytefn(dav1d_cdef_brow)(tc, p, mask, start, end, 0, sby);
2054*c0909341SAndroid Build Coastguard Worker }
2055*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_filter_sbrow_resize)2056*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_filter_sbrow_resize)(Dav1dFrameContext *const f, const int sby) {
2057*c0909341SAndroid Build Coastguard Worker     const int sbsz = f->sb_step;
2058*c0909341SAndroid Build Coastguard Worker     const int y = sby * sbsz * 4;
2059*c0909341SAndroid Build Coastguard Worker     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2060*c0909341SAndroid Build Coastguard Worker     const pixel *const p[3] = {
2061*c0909341SAndroid Build Coastguard Worker         f->lf.p[0] + y * PXSTRIDE(f->cur.stride[0]),
2062*c0909341SAndroid Build Coastguard Worker         f->lf.p[1] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver),
2063*c0909341SAndroid Build Coastguard Worker         f->lf.p[2] + (y * PXSTRIDE(f->cur.stride[1]) >> ss_ver)
2064*c0909341SAndroid Build Coastguard Worker     };
2065*c0909341SAndroid Build Coastguard Worker     pixel *const sr_p[3] = {
2066*c0909341SAndroid Build Coastguard Worker         f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
2067*c0909341SAndroid Build Coastguard Worker         f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
2068*c0909341SAndroid Build Coastguard Worker         f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
2069*c0909341SAndroid Build Coastguard Worker     };
2070*c0909341SAndroid Build Coastguard Worker     const int has_chroma = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400;
2071*c0909341SAndroid Build Coastguard Worker     for (int pl = 0; pl < 1 + 2 * has_chroma; pl++) {
2072*c0909341SAndroid Build Coastguard Worker         const int ss_ver = pl && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2073*c0909341SAndroid Build Coastguard Worker         const int h_start = 8 * !!sby >> ss_ver;
2074*c0909341SAndroid Build Coastguard Worker         const ptrdiff_t dst_stride = f->sr_cur.p.stride[!!pl];
2075*c0909341SAndroid Build Coastguard Worker         pixel *dst = sr_p[pl] - h_start * PXSTRIDE(dst_stride);
2076*c0909341SAndroid Build Coastguard Worker         const ptrdiff_t src_stride = f->cur.stride[!!pl];
2077*c0909341SAndroid Build Coastguard Worker         const pixel *src = p[pl] - h_start * PXSTRIDE(src_stride);
2078*c0909341SAndroid Build Coastguard Worker         const int h_end = 4 * (sbsz - 2 * (sby + 1 < f->sbh)) >> ss_ver;
2079*c0909341SAndroid Build Coastguard Worker         const int ss_hor = pl && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2080*c0909341SAndroid Build Coastguard Worker         const int dst_w = (f->sr_cur.p.p.w + ss_hor) >> ss_hor;
2081*c0909341SAndroid Build Coastguard Worker         const int src_w = (4 * f->bw + ss_hor) >> ss_hor;
2082*c0909341SAndroid Build Coastguard Worker         const int img_h = (f->cur.p.h - sbsz * 4 * sby + ss_ver) >> ss_ver;
2083*c0909341SAndroid Build Coastguard Worker 
2084*c0909341SAndroid Build Coastguard Worker         f->dsp->mc.resize(dst, dst_stride, src, src_stride, dst_w,
2085*c0909341SAndroid Build Coastguard Worker                           imin(img_h, h_end) + h_start, src_w,
2086*c0909341SAndroid Build Coastguard Worker                           f->resize_step[!!pl], f->resize_start[!!pl]
2087*c0909341SAndroid Build Coastguard Worker                           HIGHBD_CALL_SUFFIX);
2088*c0909341SAndroid Build Coastguard Worker     }
2089*c0909341SAndroid Build Coastguard Worker }
2090*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_filter_sbrow_lr)2091*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_filter_sbrow_lr)(Dav1dFrameContext *const f, const int sby) {
2092*c0909341SAndroid Build Coastguard Worker     if (!(f->c->inloop_filters & DAV1D_INLOOPFILTER_RESTORATION)) return;
2093*c0909341SAndroid Build Coastguard Worker     const int y = sby * f->sb_step * 4;
2094*c0909341SAndroid Build Coastguard Worker     const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2095*c0909341SAndroid Build Coastguard Worker     pixel *const sr_p[3] = {
2096*c0909341SAndroid Build Coastguard Worker         f->lf.sr_p[0] + y * PXSTRIDE(f->sr_cur.p.stride[0]),
2097*c0909341SAndroid Build Coastguard Worker         f->lf.sr_p[1] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver),
2098*c0909341SAndroid Build Coastguard Worker         f->lf.sr_p[2] + (y * PXSTRIDE(f->sr_cur.p.stride[1]) >> ss_ver)
2099*c0909341SAndroid Build Coastguard Worker     };
2100*c0909341SAndroid Build Coastguard Worker     bytefn(dav1d_lr_sbrow)(f, sr_p, sby);
2101*c0909341SAndroid Build Coastguard Worker }
2102*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_filter_sbrow)2103*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_filter_sbrow)(Dav1dFrameContext *const f, const int sby) {
2104*c0909341SAndroid Build Coastguard Worker     bytefn(dav1d_filter_sbrow_deblock_cols)(f, sby);
2105*c0909341SAndroid Build Coastguard Worker     bytefn(dav1d_filter_sbrow_deblock_rows)(f, sby);
2106*c0909341SAndroid Build Coastguard Worker     if (f->seq_hdr->cdef)
2107*c0909341SAndroid Build Coastguard Worker         bytefn(dav1d_filter_sbrow_cdef)(f->c->tc, sby);
2108*c0909341SAndroid Build Coastguard Worker     if (f->frame_hdr->width[0] != f->frame_hdr->width[1])
2109*c0909341SAndroid Build Coastguard Worker         bytefn(dav1d_filter_sbrow_resize)(f, sby);
2110*c0909341SAndroid Build Coastguard Worker     if (f->lf.restore_planes)
2111*c0909341SAndroid Build Coastguard Worker         bytefn(dav1d_filter_sbrow_lr)(f, sby);
2112*c0909341SAndroid Build Coastguard Worker }
2113*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_backup_ipred_edge)2114*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_backup_ipred_edge)(Dav1dTaskContext *const t) {
2115*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
2116*c0909341SAndroid Build Coastguard Worker     Dav1dTileState *const ts = t->ts;
2117*c0909341SAndroid Build Coastguard Worker     const int sby = t->by >> f->sb_shift;
2118*c0909341SAndroid Build Coastguard Worker     const int sby_off = f->sb128w * 128 * sby;
2119*c0909341SAndroid Build Coastguard Worker     const int x_off = ts->tiling.col_start;
2120*c0909341SAndroid Build Coastguard Worker 
2121*c0909341SAndroid Build Coastguard Worker     const pixel *const y =
2122*c0909341SAndroid Build Coastguard Worker         ((const pixel *) f->cur.data[0]) + x_off * 4 +
2123*c0909341SAndroid Build Coastguard Worker                     ((t->by + f->sb_step) * 4 - 1) * PXSTRIDE(f->cur.stride[0]);
2124*c0909341SAndroid Build Coastguard Worker     pixel_copy(&f->ipred_edge[0][sby_off + x_off * 4], y,
2125*c0909341SAndroid Build Coastguard Worker                4 * (ts->tiling.col_end - x_off));
2126*c0909341SAndroid Build Coastguard Worker 
2127*c0909341SAndroid Build Coastguard Worker     if (f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I400) {
2128*c0909341SAndroid Build Coastguard Worker         const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420;
2129*c0909341SAndroid Build Coastguard Worker         const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444;
2130*c0909341SAndroid Build Coastguard Worker 
2131*c0909341SAndroid Build Coastguard Worker         const ptrdiff_t uv_off = (x_off * 4 >> ss_hor) +
2132*c0909341SAndroid Build Coastguard Worker             (((t->by + f->sb_step) * 4 >> ss_ver) - 1) * PXSTRIDE(f->cur.stride[1]);
2133*c0909341SAndroid Build Coastguard Worker         for (int pl = 1; pl <= 2; pl++)
2134*c0909341SAndroid Build Coastguard Worker             pixel_copy(&f->ipred_edge[pl][sby_off + (x_off * 4 >> ss_hor)],
2135*c0909341SAndroid Build Coastguard Worker                        &((const pixel *) f->cur.data[pl])[uv_off],
2136*c0909341SAndroid Build Coastguard Worker                        4 * (ts->tiling.col_end - x_off) >> ss_hor);
2137*c0909341SAndroid Build Coastguard Worker     }
2138*c0909341SAndroid Build Coastguard Worker }
2139*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_copy_pal_block_y)2140*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_copy_pal_block_y)(Dav1dTaskContext *const t,
2141*c0909341SAndroid Build Coastguard Worker                                     const int bx4, const int by4,
2142*c0909341SAndroid Build Coastguard Worker                                     const int bw4, const int bh4)
2143*c0909341SAndroid Build Coastguard Worker 
2144*c0909341SAndroid Build Coastguard Worker {
2145*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
2146*c0909341SAndroid Build Coastguard Worker     pixel *const pal = t->frame_thread.pass ?
2147*c0909341SAndroid Build Coastguard Worker         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
2148*c0909341SAndroid Build Coastguard Worker                             ((t->bx >> 1) + (t->by & 1))][0] :
2149*c0909341SAndroid Build Coastguard Worker         bytefn(t->scratch.pal)[0];
2150*c0909341SAndroid Build Coastguard Worker     for (int x = 0; x < bw4; x++)
2151*c0909341SAndroid Build Coastguard Worker         memcpy(bytefn(t->al_pal)[0][bx4 + x][0], pal, 8 * sizeof(pixel));
2152*c0909341SAndroid Build Coastguard Worker     for (int y = 0; y < bh4; y++)
2153*c0909341SAndroid Build Coastguard Worker         memcpy(bytefn(t->al_pal)[1][by4 + y][0], pal, 8 * sizeof(pixel));
2154*c0909341SAndroid Build Coastguard Worker }
2155*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_copy_pal_block_uv)2156*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_copy_pal_block_uv)(Dav1dTaskContext *const t,
2157*c0909341SAndroid Build Coastguard Worker                                      const int bx4, const int by4,
2158*c0909341SAndroid Build Coastguard Worker                                      const int bw4, const int bh4)
2159*c0909341SAndroid Build Coastguard Worker 
2160*c0909341SAndroid Build Coastguard Worker {
2161*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
2162*c0909341SAndroid Build Coastguard Worker     const pixel (*const pal)[8] = t->frame_thread.pass ?
2163*c0909341SAndroid Build Coastguard Worker         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
2164*c0909341SAndroid Build Coastguard Worker                             ((t->bx >> 1) + (t->by & 1))] :
2165*c0909341SAndroid Build Coastguard Worker         bytefn(t->scratch.pal);
2166*c0909341SAndroid Build Coastguard Worker     // see aomedia bug 2183 for why we use luma coordinates here
2167*c0909341SAndroid Build Coastguard Worker     for (int pl = 1; pl <= 2; pl++) {
2168*c0909341SAndroid Build Coastguard Worker         for (int x = 0; x < bw4; x++)
2169*c0909341SAndroid Build Coastguard Worker             memcpy(bytefn(t->al_pal)[0][bx4 + x][pl], pal[pl], 8 * sizeof(pixel));
2170*c0909341SAndroid Build Coastguard Worker         for (int y = 0; y < bh4; y++)
2171*c0909341SAndroid Build Coastguard Worker             memcpy(bytefn(t->al_pal)[1][by4 + y][pl], pal[pl], 8 * sizeof(pixel));
2172*c0909341SAndroid Build Coastguard Worker     }
2173*c0909341SAndroid Build Coastguard Worker }
2174*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_read_pal_plane)2175*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_read_pal_plane)(Dav1dTaskContext *const t, Av1Block *const b,
2176*c0909341SAndroid Build Coastguard Worker                                   const int pl, const int sz_ctx,
2177*c0909341SAndroid Build Coastguard Worker                                   const int bx4, const int by4)
2178*c0909341SAndroid Build Coastguard Worker {
2179*c0909341SAndroid Build Coastguard Worker     Dav1dTileState *const ts = t->ts;
2180*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
2181*c0909341SAndroid Build Coastguard Worker     const int pal_sz = b->pal_sz[pl] = dav1d_msac_decode_symbol_adapt8(&ts->msac,
2182*c0909341SAndroid Build Coastguard Worker                                            ts->cdf.m.pal_sz[pl][sz_ctx], 6) + 2;
2183*c0909341SAndroid Build Coastguard Worker     pixel cache[16], used_cache[8];
2184*c0909341SAndroid Build Coastguard Worker     int l_cache = pl ? t->pal_sz_uv[1][by4] : t->l.pal_sz[by4];
2185*c0909341SAndroid Build Coastguard Worker     int n_cache = 0;
2186*c0909341SAndroid Build Coastguard Worker     // don't reuse above palette outside SB64 boundaries
2187*c0909341SAndroid Build Coastguard Worker     int a_cache = by4 & 15 ? pl ? t->pal_sz_uv[0][bx4] : t->a->pal_sz[bx4] : 0;
2188*c0909341SAndroid Build Coastguard Worker     const pixel *l = bytefn(t->al_pal)[1][by4][pl];
2189*c0909341SAndroid Build Coastguard Worker     const pixel *a = bytefn(t->al_pal)[0][bx4][pl];
2190*c0909341SAndroid Build Coastguard Worker 
2191*c0909341SAndroid Build Coastguard Worker     // fill/sort cache
2192*c0909341SAndroid Build Coastguard Worker     while (l_cache && a_cache) {
2193*c0909341SAndroid Build Coastguard Worker         if (*l < *a) {
2194*c0909341SAndroid Build Coastguard Worker             if (!n_cache || cache[n_cache - 1] != *l)
2195*c0909341SAndroid Build Coastguard Worker                 cache[n_cache++] = *l;
2196*c0909341SAndroid Build Coastguard Worker             l++;
2197*c0909341SAndroid Build Coastguard Worker             l_cache--;
2198*c0909341SAndroid Build Coastguard Worker         } else {
2199*c0909341SAndroid Build Coastguard Worker             if (*a == *l) {
2200*c0909341SAndroid Build Coastguard Worker                 l++;
2201*c0909341SAndroid Build Coastguard Worker                 l_cache--;
2202*c0909341SAndroid Build Coastguard Worker             }
2203*c0909341SAndroid Build Coastguard Worker             if (!n_cache || cache[n_cache - 1] != *a)
2204*c0909341SAndroid Build Coastguard Worker                 cache[n_cache++] = *a;
2205*c0909341SAndroid Build Coastguard Worker             a++;
2206*c0909341SAndroid Build Coastguard Worker             a_cache--;
2207*c0909341SAndroid Build Coastguard Worker         }
2208*c0909341SAndroid Build Coastguard Worker     }
2209*c0909341SAndroid Build Coastguard Worker     if (l_cache) {
2210*c0909341SAndroid Build Coastguard Worker         do {
2211*c0909341SAndroid Build Coastguard Worker             if (!n_cache || cache[n_cache - 1] != *l)
2212*c0909341SAndroid Build Coastguard Worker                 cache[n_cache++] = *l;
2213*c0909341SAndroid Build Coastguard Worker             l++;
2214*c0909341SAndroid Build Coastguard Worker         } while (--l_cache > 0);
2215*c0909341SAndroid Build Coastguard Worker     } else if (a_cache) {
2216*c0909341SAndroid Build Coastguard Worker         do {
2217*c0909341SAndroid Build Coastguard Worker             if (!n_cache || cache[n_cache - 1] != *a)
2218*c0909341SAndroid Build Coastguard Worker                 cache[n_cache++] = *a;
2219*c0909341SAndroid Build Coastguard Worker             a++;
2220*c0909341SAndroid Build Coastguard Worker         } while (--a_cache > 0);
2221*c0909341SAndroid Build Coastguard Worker     }
2222*c0909341SAndroid Build Coastguard Worker 
2223*c0909341SAndroid Build Coastguard Worker     // find reused cache entries
2224*c0909341SAndroid Build Coastguard Worker     int i = 0;
2225*c0909341SAndroid Build Coastguard Worker     for (int n = 0; n < n_cache && i < pal_sz; n++)
2226*c0909341SAndroid Build Coastguard Worker         if (dav1d_msac_decode_bool_equi(&ts->msac))
2227*c0909341SAndroid Build Coastguard Worker             used_cache[i++] = cache[n];
2228*c0909341SAndroid Build Coastguard Worker     const int n_used_cache = i;
2229*c0909341SAndroid Build Coastguard Worker 
2230*c0909341SAndroid Build Coastguard Worker     // parse new entries
2231*c0909341SAndroid Build Coastguard Worker     pixel *const pal = t->frame_thread.pass ?
2232*c0909341SAndroid Build Coastguard Worker         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
2233*c0909341SAndroid Build Coastguard Worker                             ((t->bx >> 1) + (t->by & 1))][pl] :
2234*c0909341SAndroid Build Coastguard Worker         bytefn(t->scratch.pal)[pl];
2235*c0909341SAndroid Build Coastguard Worker     if (i < pal_sz) {
2236*c0909341SAndroid Build Coastguard Worker         const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
2237*c0909341SAndroid Build Coastguard Worker         int prev = pal[i++] = dav1d_msac_decode_bools(&ts->msac, bpc);
2238*c0909341SAndroid Build Coastguard Worker 
2239*c0909341SAndroid Build Coastguard Worker         if (i < pal_sz) {
2240*c0909341SAndroid Build Coastguard Worker             int bits = bpc - 3 + dav1d_msac_decode_bools(&ts->msac, 2);
2241*c0909341SAndroid Build Coastguard Worker             const int max = (1 << bpc) - 1;
2242*c0909341SAndroid Build Coastguard Worker 
2243*c0909341SAndroid Build Coastguard Worker             do {
2244*c0909341SAndroid Build Coastguard Worker                 const int delta = dav1d_msac_decode_bools(&ts->msac, bits);
2245*c0909341SAndroid Build Coastguard Worker                 prev = pal[i++] = imin(prev + delta + !pl, max);
2246*c0909341SAndroid Build Coastguard Worker                 if (prev + !pl >= max) {
2247*c0909341SAndroid Build Coastguard Worker                     for (; i < pal_sz; i++)
2248*c0909341SAndroid Build Coastguard Worker                         pal[i] = max;
2249*c0909341SAndroid Build Coastguard Worker                     break;
2250*c0909341SAndroid Build Coastguard Worker                 }
2251*c0909341SAndroid Build Coastguard Worker                 bits = imin(bits, 1 + ulog2(max - prev - !pl));
2252*c0909341SAndroid Build Coastguard Worker             } while (i < pal_sz);
2253*c0909341SAndroid Build Coastguard Worker         }
2254*c0909341SAndroid Build Coastguard Worker 
2255*c0909341SAndroid Build Coastguard Worker         // merge cache+new entries
2256*c0909341SAndroid Build Coastguard Worker         int n = 0, m = n_used_cache;
2257*c0909341SAndroid Build Coastguard Worker         for (i = 0; i < pal_sz; i++) {
2258*c0909341SAndroid Build Coastguard Worker             if (n < n_used_cache && (m >= pal_sz || used_cache[n] <= pal[m])) {
2259*c0909341SAndroid Build Coastguard Worker                 pal[i] = used_cache[n++];
2260*c0909341SAndroid Build Coastguard Worker             } else {
2261*c0909341SAndroid Build Coastguard Worker                 assert(m < pal_sz);
2262*c0909341SAndroid Build Coastguard Worker                 pal[i] = pal[m++];
2263*c0909341SAndroid Build Coastguard Worker             }
2264*c0909341SAndroid Build Coastguard Worker         }
2265*c0909341SAndroid Build Coastguard Worker     } else {
2266*c0909341SAndroid Build Coastguard Worker         memcpy(pal, used_cache, n_used_cache * sizeof(*used_cache));
2267*c0909341SAndroid Build Coastguard Worker     }
2268*c0909341SAndroid Build Coastguard Worker 
2269*c0909341SAndroid Build Coastguard Worker     if (DEBUG_BLOCK_INFO) {
2270*c0909341SAndroid Build Coastguard Worker         printf("Post-pal[pl=%d,sz=%d,cache_size=%d,used_cache=%d]: r=%d, cache=",
2271*c0909341SAndroid Build Coastguard Worker                pl, pal_sz, n_cache, n_used_cache, ts->msac.rng);
2272*c0909341SAndroid Build Coastguard Worker         for (int n = 0; n < n_cache; n++)
2273*c0909341SAndroid Build Coastguard Worker             printf("%c%02x", n ? ' ' : '[', cache[n]);
2274*c0909341SAndroid Build Coastguard Worker         printf("%s, pal=", n_cache ? "]" : "[]");
2275*c0909341SAndroid Build Coastguard Worker         for (int n = 0; n < pal_sz; n++)
2276*c0909341SAndroid Build Coastguard Worker             printf("%c%02x", n ? ' ' : '[', pal[n]);
2277*c0909341SAndroid Build Coastguard Worker         printf("]\n");
2278*c0909341SAndroid Build Coastguard Worker     }
2279*c0909341SAndroid Build Coastguard Worker }
2280*c0909341SAndroid Build Coastguard Worker 
bytefn(dav1d_read_pal_uv)2281*c0909341SAndroid Build Coastguard Worker void bytefn(dav1d_read_pal_uv)(Dav1dTaskContext *const t, Av1Block *const b,
2282*c0909341SAndroid Build Coastguard Worker                                const int sz_ctx, const int bx4, const int by4)
2283*c0909341SAndroid Build Coastguard Worker {
2284*c0909341SAndroid Build Coastguard Worker     bytefn(dav1d_read_pal_plane)(t, b, 1, sz_ctx, bx4, by4);
2285*c0909341SAndroid Build Coastguard Worker 
2286*c0909341SAndroid Build Coastguard Worker     // V pal coding
2287*c0909341SAndroid Build Coastguard Worker     Dav1dTileState *const ts = t->ts;
2288*c0909341SAndroid Build Coastguard Worker     const Dav1dFrameContext *const f = t->f;
2289*c0909341SAndroid Build Coastguard Worker     pixel *const pal = t->frame_thread.pass ?
2290*c0909341SAndroid Build Coastguard Worker         f->frame_thread.pal[((t->by >> 1) + (t->bx & 1)) * (f->b4_stride >> 1) +
2291*c0909341SAndroid Build Coastguard Worker                             ((t->bx >> 1) + (t->by & 1))][2] :
2292*c0909341SAndroid Build Coastguard Worker         bytefn(t->scratch.pal)[2];
2293*c0909341SAndroid Build Coastguard Worker     const int bpc = BITDEPTH == 8 ? 8 : f->cur.p.bpc;
2294*c0909341SAndroid Build Coastguard Worker     if (dav1d_msac_decode_bool_equi(&ts->msac)) {
2295*c0909341SAndroid Build Coastguard Worker         const int bits = bpc - 4 + dav1d_msac_decode_bools(&ts->msac, 2);
2296*c0909341SAndroid Build Coastguard Worker         int prev = pal[0] = dav1d_msac_decode_bools(&ts->msac, bpc);
2297*c0909341SAndroid Build Coastguard Worker         const int max = (1 << bpc) - 1;
2298*c0909341SAndroid Build Coastguard Worker         for (int i = 1; i < b->pal_sz[1]; i++) {
2299*c0909341SAndroid Build Coastguard Worker             int delta = dav1d_msac_decode_bools(&ts->msac, bits);
2300*c0909341SAndroid Build Coastguard Worker             if (delta && dav1d_msac_decode_bool_equi(&ts->msac)) delta = -delta;
2301*c0909341SAndroid Build Coastguard Worker             prev = pal[i] = (prev + delta) & max;
2302*c0909341SAndroid Build Coastguard Worker         }
2303*c0909341SAndroid Build Coastguard Worker     } else {
2304*c0909341SAndroid Build Coastguard Worker         for (int i = 0; i < b->pal_sz[1]; i++)
2305*c0909341SAndroid Build Coastguard Worker             pal[i] = dav1d_msac_decode_bools(&ts->msac, bpc);
2306*c0909341SAndroid Build Coastguard Worker     }
2307*c0909341SAndroid Build Coastguard Worker     if (DEBUG_BLOCK_INFO) {
2308*c0909341SAndroid Build Coastguard Worker         printf("Post-pal[pl=2]: r=%d ", ts->msac.rng);
2309*c0909341SAndroid Build Coastguard Worker         for (int n = 0; n < b->pal_sz[1]; n++)
2310*c0909341SAndroid Build Coastguard Worker             printf("%c%02x", n ? ' ' : '[', pal[n]);
2311*c0909341SAndroid Build Coastguard Worker         printf("]\n");
2312*c0909341SAndroid Build Coastguard Worker     }
2313*c0909341SAndroid Build Coastguard Worker }
2314