xref: /aosp_15_r20/external/libdav1d/src/arm/32/ipred.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo
4*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, B Krishnan Iyer
5*c0909341SAndroid Build Coastguard Worker * All rights reserved.
6*c0909341SAndroid Build Coastguard Worker *
7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
9*c0909341SAndroid Build Coastguard Worker *
10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
11*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
12*c0909341SAndroid Build Coastguard Worker *
13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
14*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
15*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
16*c0909341SAndroid Build Coastguard Worker *
17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*c0909341SAndroid Build Coastguard Worker */
28*c0909341SAndroid Build Coastguard Worker
29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
30*c0909341SAndroid Build Coastguard Worker#include "util.S"
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Worker// void ipred_dc_128_8bpc_neon(pixel *dst, const ptrdiff_t stride,
33*c0909341SAndroid Build Coastguard Worker//                             const pixel *const topleft,
34*c0909341SAndroid Build Coastguard Worker//                             const int width, const int height, const int a,
35*c0909341SAndroid Build Coastguard Worker//                             const int max_width, const int max_height);
36*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_128_8bpc_neon, export=1
37*c0909341SAndroid Build Coastguard Worker        push            {r4, lr}
38*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #8]
39*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
40*c0909341SAndroid Build Coastguard Worker        adr             r2,  L(ipred_dc_128_tbl)
41*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #25
42*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r2,  r3,  lsl #2]
43*c0909341SAndroid Build Coastguard Worker        vmov.i8         q0,  #128
44*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  r3
45*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
46*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
47*c0909341SAndroid Build Coastguard Worker        bx              r2
48*c0909341SAndroid Build Coastguard Worker
49*c0909341SAndroid Build Coastguard Worker        .align 2
50*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_128_tbl):
51*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_dc_128_tbl) + CONFIG_THUMB
52*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_dc_128_tbl) + CONFIG_THUMB
53*c0909341SAndroid Build Coastguard Worker        .word 16f  - L(ipred_dc_128_tbl) + CONFIG_THUMB
54*c0909341SAndroid Build Coastguard Worker        .word 8f   - L(ipred_dc_128_tbl) + CONFIG_THUMB
55*c0909341SAndroid Build Coastguard Worker        .word 4f   - L(ipred_dc_128_tbl) + CONFIG_THUMB
56*c0909341SAndroid Build Coastguard Worker4:
57*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
58*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
59*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
60*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
61*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
62*c0909341SAndroid Build Coastguard Worker        bgt             4b
63*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
64*c0909341SAndroid Build Coastguard Worker8:
65*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
66*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
67*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
68*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
69*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
70*c0909341SAndroid Build Coastguard Worker        bgt             8b
71*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
72*c0909341SAndroid Build Coastguard Worker16:
73*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1}, [r0,  :128], r1
74*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1}, [r12, :128], r1
75*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
76*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1}, [r0,  :128], r1
77*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1}, [r12, :128], r1
78*c0909341SAndroid Build Coastguard Worker        bgt             16b
79*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
80*c0909341SAndroid Build Coastguard Worker320:
81*c0909341SAndroid Build Coastguard Worker        vmov.i8         q1,  #128
82*c0909341SAndroid Build Coastguard Worker32:
83*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
84*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
85*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
86*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
87*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
88*c0909341SAndroid Build Coastguard Worker        bgt             32b
89*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
90*c0909341SAndroid Build Coastguard Worker640:
91*c0909341SAndroid Build Coastguard Worker        vmov.i8         q1,  #128
92*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
93*c0909341SAndroid Build Coastguard Worker64:
94*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
95*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
96*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
97*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
98*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
99*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
100*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
101*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
102*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
103*c0909341SAndroid Build Coastguard Worker        bgt             64b
104*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
105*c0909341SAndroid Build Coastguard Workerendfunc
106*c0909341SAndroid Build Coastguard Worker
107*c0909341SAndroid Build Coastguard Worker// void ipred_v_8bpc_neon(pixel *dst, const ptrdiff_t stride,
108*c0909341SAndroid Build Coastguard Worker//                        const pixel *const topleft,
109*c0909341SAndroid Build Coastguard Worker//                        const int width, const int height, const int a,
110*c0909341SAndroid Build Coastguard Worker//                        const int max_width, const int max_height);
111*c0909341SAndroid Build Coastguard Workerfunction ipred_v_8bpc_neon, export=1
112*c0909341SAndroid Build Coastguard Worker        push            {r4, lr}
113*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [sp, #8]
114*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
115*c0909341SAndroid Build Coastguard Worker        adr             r4,  L(ipred_v_tbl)
116*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #25
117*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r4,  r3,  lsl #2]
118*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
119*c0909341SAndroid Build Coastguard Worker        add             r4,  r4,  r3
120*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
121*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
122*c0909341SAndroid Build Coastguard Worker        bx              r4
123*c0909341SAndroid Build Coastguard Worker
124*c0909341SAndroid Build Coastguard Worker        .align 2
125*c0909341SAndroid Build Coastguard WorkerL(ipred_v_tbl):
126*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_v_tbl) + CONFIG_THUMB
127*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_v_tbl) + CONFIG_THUMB
128*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_v_tbl) + CONFIG_THUMB
129*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_v_tbl) + CONFIG_THUMB
130*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_v_tbl) + CONFIG_THUMB
131*c0909341SAndroid Build Coastguard Worker40:
132*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},   [r2]
133*c0909341SAndroid Build Coastguard Worker4:
134*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
135*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
136*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #4
137*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
138*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
139*c0909341SAndroid Build Coastguard Worker        bgt             4b
140*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
141*c0909341SAndroid Build Coastguard Worker80:
142*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r2]
143*c0909341SAndroid Build Coastguard Worker8:
144*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
145*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
146*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #4
147*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
148*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
149*c0909341SAndroid Build Coastguard Worker        bgt             8b
150*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
151*c0909341SAndroid Build Coastguard Worker160:
152*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0},  [r2]
153*c0909341SAndroid Build Coastguard Worker16:
154*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r0,  :128], r1
155*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r12, :128], r1
156*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #4
157*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r0,  :128], r1
158*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r12, :128], r1
159*c0909341SAndroid Build Coastguard Worker        bgt             16b
160*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
161*c0909341SAndroid Build Coastguard Worker320:
162*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0,  q1},  [r2]
163*c0909341SAndroid Build Coastguard Worker32:
164*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
165*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
166*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #4
167*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
168*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
169*c0909341SAndroid Build Coastguard Worker        bgt             32b
170*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
171*c0909341SAndroid Build Coastguard Worker640:
172*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0,  q1},  [r2]!
173*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
174*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2,  q3},  [r2]
175*c0909341SAndroid Build Coastguard Worker64:
176*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
177*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
178*c0909341SAndroid Build Coastguard Worker        vst1.8          {d4,  d5,  d6,  d7},  [r0,  :128], r1
179*c0909341SAndroid Build Coastguard Worker        vst1.8          {d4,  d5,  d6,  d7},  [r12, :128], r1
180*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #4
181*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
182*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
183*c0909341SAndroid Build Coastguard Worker        vst1.8          {d4,  d5,  d6,  d7},  [r0,  :128], r1
184*c0909341SAndroid Build Coastguard Worker        vst1.8          {d4,  d5,  d6,  d7},  [r12, :128], r1
185*c0909341SAndroid Build Coastguard Worker        bgt             64b
186*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
187*c0909341SAndroid Build Coastguard Workerendfunc
188*c0909341SAndroid Build Coastguard Worker
189*c0909341SAndroid Build Coastguard Worker// void ipred_h_8bpc_neon(pixel *dst, const ptrdiff_t stride,
190*c0909341SAndroid Build Coastguard Worker//                        const pixel *const topleft,
191*c0909341SAndroid Build Coastguard Worker//                        const int width, const int height, const int a,
192*c0909341SAndroid Build Coastguard Worker//                        const int max_width, const int max_height);
193*c0909341SAndroid Build Coastguard Workerfunction ipred_h_8bpc_neon, export=1
194*c0909341SAndroid Build Coastguard Worker        push            {r4-r5, lr}
195*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #12]
196*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
197*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_h_tbl)
198*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #25
199*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r5,  r3,  lsl #2]
200*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
201*c0909341SAndroid Build Coastguard Worker        mov             lr,  #-4
202*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r3
203*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
204*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
205*c0909341SAndroid Build Coastguard Worker        bx              r5
206*c0909341SAndroid Build Coastguard Worker
207*c0909341SAndroid Build Coastguard Worker        .align 2
208*c0909341SAndroid Build Coastguard WorkerL(ipred_h_tbl):
209*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_h_tbl) + CONFIG_THUMB
210*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_h_tbl) + CONFIG_THUMB
211*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_h_tbl) + CONFIG_THUMB
212*c0909341SAndroid Build Coastguard Worker        .word 8f   - L(ipred_h_tbl) + CONFIG_THUMB
213*c0909341SAndroid Build Coastguard Worker        .word 4f   - L(ipred_h_tbl) + CONFIG_THUMB
214*c0909341SAndroid Build Coastguard Worker4:
215*c0909341SAndroid Build Coastguard Worker        vld4.8          {d0[],  d1[],  d2[],  d3[]},  [r2, :32],  lr
216*c0909341SAndroid Build Coastguard Worker        vst1.32         {d3[0]},  [r0,  :32], r1
217*c0909341SAndroid Build Coastguard Worker        vst1.32         {d2[0]},  [r12, :32], r1
218*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
219*c0909341SAndroid Build Coastguard Worker        vst1.32         {d1[0]},  [r0,  :32], r1
220*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
221*c0909341SAndroid Build Coastguard Worker        bgt             4b
222*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
223*c0909341SAndroid Build Coastguard Worker8:
224*c0909341SAndroid Build Coastguard Worker        vld4.8          {d0[],  d1[],  d2[],  d3[]},  [r2, :32],  lr
225*c0909341SAndroid Build Coastguard Worker        vst1.8          {d3},  [r0,  :64], r1
226*c0909341SAndroid Build Coastguard Worker        vst1.8          {d2},  [r12, :64], r1
227*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
228*c0909341SAndroid Build Coastguard Worker        vst1.8          {d1},  [r0,  :64], r1
229*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
230*c0909341SAndroid Build Coastguard Worker        bgt             8b
231*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
232*c0909341SAndroid Build Coastguard Worker160:
233*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #3
234*c0909341SAndroid Build Coastguard Worker        mov             lr,  #-1
235*c0909341SAndroid Build Coastguard Worker16:
236*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0[],  d1[]},  [r2],  lr
237*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
238*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2[],  d3[]},  [r2],  lr
239*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0},  [r0,    :128],  r1
240*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4[],  d5[]},  [r2],  lr
241*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1},  [r12,   :128],  r1
242*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6[],  d7[]},  [r2],  lr
243*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2},  [r0,    :128],  r1
244*c0909341SAndroid Build Coastguard Worker        vst1.8          {q3},  [r12,   :128],  r1
245*c0909341SAndroid Build Coastguard Worker        bgt             16b
246*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
247*c0909341SAndroid Build Coastguard Worker320:
248*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #3
249*c0909341SAndroid Build Coastguard Worker        mov             lr,  #-1
250*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #16
251*c0909341SAndroid Build Coastguard Worker32:
252*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0[],  d1[]}, [r2],  lr
253*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
254*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2[],  d3[]}, [r2],  lr
255*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0},  [r0,   :128]!
256*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4[],  d5[]}, [r2],  lr
257*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1},  [r12,  :128]!
258*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6[],  d7[]}, [r2],  lr
259*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0},  [r0,   :128],  r1
260*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1},  [r12,  :128],  r1
261*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2},  [r0,   :128]!
262*c0909341SAndroid Build Coastguard Worker        vst1.8          {q3},  [r12,  :128]!
263*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2},  [r0,   :128],  r1
264*c0909341SAndroid Build Coastguard Worker        vst1.8          {q3},  [r12,  :128],  r1
265*c0909341SAndroid Build Coastguard Worker        bgt             32b
266*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
267*c0909341SAndroid Build Coastguard Worker640:
268*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #3
269*c0909341SAndroid Build Coastguard Worker        mov             lr,  #-1
270*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #48
271*c0909341SAndroid Build Coastguard Worker64:
272*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0[],  d1[]},  [r2],  lr
273*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
274*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2[],  d3[]},  [r2],  lr
275*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0},  [r0,    :128]!
276*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4[],  d5[]},  [r2],  lr
277*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1},  [r12,   :128]!
278*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6[],  d7[]},  [r2],  lr
279*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0},  [r0,    :128]!
280*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1},  [r12,   :128]!
281*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0},  [r0,    :128]!
282*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1},  [r12,   :128]!
283*c0909341SAndroid Build Coastguard Worker        vst1.8          {q0},  [r0,    :128],  r1
284*c0909341SAndroid Build Coastguard Worker        vst1.8          {q1},  [r12,   :128],  r1
285*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2},  [r0,    :128]!
286*c0909341SAndroid Build Coastguard Worker        vst1.8          {q3},  [r12,   :128]!
287*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2},  [r0,    :128]!
288*c0909341SAndroid Build Coastguard Worker        vst1.8          {q3},  [r12,   :128]!
289*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2},  [r0,    :128]!
290*c0909341SAndroid Build Coastguard Worker        vst1.8          {q3},  [r12,   :128]!
291*c0909341SAndroid Build Coastguard Worker        vst1.8          {q2},  [r0,    :128],  r1
292*c0909341SAndroid Build Coastguard Worker        vst1.8          {q3},  [r12,   :128],  r1
293*c0909341SAndroid Build Coastguard Worker        bgt             64b
294*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
295*c0909341SAndroid Build Coastguard Workerendfunc
296*c0909341SAndroid Build Coastguard Worker
297*c0909341SAndroid Build Coastguard Worker// void ipred_dc_top_8bpc_neon(pixel *dst, const ptrdiff_t stride,
298*c0909341SAndroid Build Coastguard Worker//                             const pixel *const topleft,
299*c0909341SAndroid Build Coastguard Worker//                             const int width, const int height, const int a,
300*c0909341SAndroid Build Coastguard Worker//                             const int max_width, const int max_height);
301*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_top_8bpc_neon, export=1
302*c0909341SAndroid Build Coastguard Worker        push            {r4-r5, lr}
303*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #12]
304*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
305*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_dc_top_tbl)
306*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #25
307*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r5,  r3,  lsl #2]
308*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
309*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r3
310*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
311*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
312*c0909341SAndroid Build Coastguard Worker        bx              r5
313*c0909341SAndroid Build Coastguard Worker
314*c0909341SAndroid Build Coastguard Worker        .align 2
315*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_top_tbl):
316*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_dc_top_tbl) + CONFIG_THUMB
317*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_dc_top_tbl) + CONFIG_THUMB
318*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_dc_top_tbl) + CONFIG_THUMB
319*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_dc_top_tbl) + CONFIG_THUMB
320*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_dc_top_tbl) + CONFIG_THUMB
321*c0909341SAndroid Build Coastguard Worker40:
322*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},  [r2]
323*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
324*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
325*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d0,  q0,  #2
326*c0909341SAndroid Build Coastguard Worker        vdup.8          d0,  d0[0]
327*c0909341SAndroid Build Coastguard Worker4:
328*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
329*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
330*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
331*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
332*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
333*c0909341SAndroid Build Coastguard Worker        bgt             4b
334*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
335*c0909341SAndroid Build Coastguard Worker80:
336*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0},  [r2]
337*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
338*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
339*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
340*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d0,  q0,  #3
341*c0909341SAndroid Build Coastguard Worker        vdup.8          d0,  d0[0]
342*c0909341SAndroid Build Coastguard Worker8:
343*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
344*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
345*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
346*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
347*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
348*c0909341SAndroid Build Coastguard Worker        bgt             8b
349*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
350*c0909341SAndroid Build Coastguard Worker160:
351*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0,  d1},  [r2]
352*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
353*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
354*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
355*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
356*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d0,  q0,  #4
357*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d0[0]
358*c0909341SAndroid Build Coastguard Worker16:
359*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r0,  :128], r1
360*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r12, :128], r1
361*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
362*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r0,  :128], r1
363*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r12, :128], r1
364*c0909341SAndroid Build Coastguard Worker        bgt             16b
365*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
366*c0909341SAndroid Build Coastguard Worker320:
367*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0,  d1,  d2,  d3},  [r2]
368*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
369*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q1,  d2,  d3
370*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q0,  q1
371*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
372*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
373*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
374*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d4,  q0,  #5
375*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d4[0]
376*c0909341SAndroid Build Coastguard Worker        vdup.8          q1,  d4[0]
377*c0909341SAndroid Build Coastguard Worker32:
378*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
379*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
380*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
381*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
382*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
383*c0909341SAndroid Build Coastguard Worker        bgt             32b
384*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
385*c0909341SAndroid Build Coastguard Worker640:
386*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0,  d1,  d2,  d3},  [r2]!
387*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
388*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4,  d5,  d6,  d7},  [r2]
389*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q1,  d2,  d3
390*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
391*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q3,  d6,  d7
392*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q0,  q1
393*c0909341SAndroid Build Coastguard Worker        vadd.u16        q1,  q2,  q3
394*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q0,  q1
395*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
396*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
397*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
398*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d18, q0,  #6
399*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d18[0]
400*c0909341SAndroid Build Coastguard Worker        vdup.8          q1,  d18[0]
401*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
402*c0909341SAndroid Build Coastguard Worker64:
403*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
404*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
405*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
406*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
407*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
408*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
409*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
410*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
411*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
412*c0909341SAndroid Build Coastguard Worker        bgt             64b
413*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
414*c0909341SAndroid Build Coastguard Workerendfunc
415*c0909341SAndroid Build Coastguard Worker
416*c0909341SAndroid Build Coastguard Worker// void ipred_dc_left_8bpc_neon(pixel *dst, const ptrdiff_t stride,
417*c0909341SAndroid Build Coastguard Worker//                              const pixel *const topleft,
418*c0909341SAndroid Build Coastguard Worker//                              const int width, const int height, const int a,
419*c0909341SAndroid Build Coastguard Worker//                              const int max_width, const int max_height);
420*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_left_8bpc_neon, export=1
421*c0909341SAndroid Build Coastguard Worker        push            {r4-r5, lr}
422*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #12]
423*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r4
424*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
425*c0909341SAndroid Build Coastguard Worker        clz             lr,  r4
426*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
427*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_dc_left_tbl)
428*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #20
429*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r5,  r3,  lsl #2]
430*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r5,  lr,  lsl #2]
431*c0909341SAndroid Build Coastguard Worker        add             r3,  r5,  r3
432*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr
433*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
434*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
435*c0909341SAndroid Build Coastguard Worker        bx              r5
436*c0909341SAndroid Build Coastguard Worker
437*c0909341SAndroid Build Coastguard Worker        .align 2
438*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_tbl):
439*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h64) - L(ipred_dc_left_tbl) + CONFIG_THUMB
440*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h32) - L(ipred_dc_left_tbl) + CONFIG_THUMB
441*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h16) - L(ipred_dc_left_tbl) + CONFIG_THUMB
442*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h8)  - L(ipred_dc_left_tbl) + CONFIG_THUMB
443*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h4)  - L(ipred_dc_left_tbl) + CONFIG_THUMB
444*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w64) - L(ipred_dc_left_tbl) + CONFIG_THUMB
445*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w32) - L(ipred_dc_left_tbl) + CONFIG_THUMB
446*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w16) - L(ipred_dc_left_tbl) + CONFIG_THUMB
447*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w8)  - L(ipred_dc_left_tbl) + CONFIG_THUMB
448*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w4)  - L(ipred_dc_left_tbl) + CONFIG_THUMB
449*c0909341SAndroid Build Coastguard Worker
450*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h4):
451*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},  [r2, :32]
452*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
453*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
454*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d0,  q0,  #2
455*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d0[0]
456*c0909341SAndroid Build Coastguard Worker        bx              r3
457*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w4):
458*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
459*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
460*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
461*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
462*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
463*c0909341SAndroid Build Coastguard Worker        bgt             L(ipred_dc_left_w4)
464*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
465*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h8):
466*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0},  [r2, :64]
467*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
468*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
469*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
470*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d0,  q0,  #3
471*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d0[0]
472*c0909341SAndroid Build Coastguard Worker        bx              r3
473*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w8):
474*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
475*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
476*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
477*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
478*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
479*c0909341SAndroid Build Coastguard Worker        bgt             L(ipred_dc_left_w8)
480*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
481*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h16):
482*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0,  d1},  [r2, :128]
483*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
484*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
485*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
486*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
487*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d0,  q0,  #4
488*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d0[0]
489*c0909341SAndroid Build Coastguard Worker        bx              r3
490*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w16):
491*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r0,  :128], r1
492*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r12, :128], r1
493*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
494*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r0,  :128], r1
495*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r12, :128], r1
496*c0909341SAndroid Build Coastguard Worker        bgt             L(ipred_dc_left_w16)
497*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
498*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h32):
499*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0,  d1,  d2,  d3},  [r2, :128]
500*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
501*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q1,  d2,  d3
502*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q0,  q1
503*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
504*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
505*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
506*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d0,  q0,  #5
507*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d0[0]
508*c0909341SAndroid Build Coastguard Worker        bx              r3
509*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w32):
510*c0909341SAndroid Build Coastguard Worker        vmov.8          q1,  q0
511*c0909341SAndroid Build Coastguard Worker1:
512*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
513*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
514*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
515*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
516*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
517*c0909341SAndroid Build Coastguard Worker        bgt             1b
518*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
519*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h64):
520*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0,  d1,  d2,  d3},  [r2, :128]!
521*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4,  d5,  d6,  d7},  [r2, :128]
522*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
523*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q1,  d2,  d3
524*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
525*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q3,  d6,  d7
526*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q0,  q1
527*c0909341SAndroid Build Coastguard Worker        vadd.u16        q1,  q2,  q3
528*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q0,  q1
529*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
530*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
531*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
532*c0909341SAndroid Build Coastguard Worker        vrshrn.u16      d0,  q0,  #6
533*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d0[0]
534*c0909341SAndroid Build Coastguard Worker        bx              r3
535*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w64):
536*c0909341SAndroid Build Coastguard Worker        vmov.8          q1,  q0
537*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
538*c0909341SAndroid Build Coastguard Worker1:
539*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
540*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
541*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
542*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
543*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4, #4
544*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
545*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
546*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
547*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
548*c0909341SAndroid Build Coastguard Worker        bgt             1b
549*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
550*c0909341SAndroid Build Coastguard Workerendfunc
551*c0909341SAndroid Build Coastguard Worker
552*c0909341SAndroid Build Coastguard Worker// void ipred_dc_8bpc_neon(pixel *dst, const ptrdiff_t stride,
553*c0909341SAndroid Build Coastguard Worker//                         const pixel *const topleft,
554*c0909341SAndroid Build Coastguard Worker//                         const int width, const int height, const int a,
555*c0909341SAndroid Build Coastguard Worker//                         const int max_width, const int max_height);
556*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_8bpc_neon, export=1
557*c0909341SAndroid Build Coastguard Worker        push            {r4-r6, lr}
558*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #16]
559*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r4
560*c0909341SAndroid Build Coastguard Worker        add             lr,  r3,  r4        // width + height
561*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
562*c0909341SAndroid Build Coastguard Worker        clz             r12, r4
563*c0909341SAndroid Build Coastguard Worker        vdup.16         q15, lr             // width + height
564*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_dc_tbl)
565*c0909341SAndroid Build Coastguard Worker        rbit            lr,  lr             // rbit(width + height)
566*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #20       // 25 leading bits, minus table offset 5
567*c0909341SAndroid Build Coastguard Worker        sub             r12, r12, #25
568*c0909341SAndroid Build Coastguard Worker        clz             lr,  lr             // ctz(width + height)
569*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r5,  r3,  lsl #2]
570*c0909341SAndroid Build Coastguard Worker        ldr             r12, [r5,  r12, lsl #2]
571*c0909341SAndroid Build Coastguard Worker        neg             lr,  lr             // -ctz(width + height)
572*c0909341SAndroid Build Coastguard Worker        add             r3,  r5,  r3
573*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r12
574*c0909341SAndroid Build Coastguard Worker        vshr.u16        q15, q15, #1        // (width + height) >> 1
575*c0909341SAndroid Build Coastguard Worker        vdup.16         q14, lr             // -ctz(width + height)
576*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
577*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
578*c0909341SAndroid Build Coastguard Worker        bx              r5
579*c0909341SAndroid Build Coastguard Worker
580*c0909341SAndroid Build Coastguard Worker        .align 2
581*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_tbl):
582*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h64) - L(ipred_dc_tbl) + CONFIG_THUMB
583*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h32) - L(ipred_dc_tbl) + CONFIG_THUMB
584*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h16) - L(ipred_dc_tbl) + CONFIG_THUMB
585*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h8)  - L(ipred_dc_tbl) + CONFIG_THUMB
586*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h4)  - L(ipred_dc_tbl) + CONFIG_THUMB
587*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w64) - L(ipred_dc_tbl) + CONFIG_THUMB
588*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w32) - L(ipred_dc_tbl) + CONFIG_THUMB
589*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w16) - L(ipred_dc_tbl) + CONFIG_THUMB
590*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w8)  - L(ipred_dc_tbl) + CONFIG_THUMB
591*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w4)  - L(ipred_dc_tbl) + CONFIG_THUMB
592*c0909341SAndroid Build Coastguard Worker
593*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h4):
594*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},  [r2, :32]!
595*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
596*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
597*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
598*c0909341SAndroid Build Coastguard Worker        bx              r3
599*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w4):
600*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[]},  [r2]
601*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d30
602*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d1,  d1
603*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d1,  d1
604*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #4
605*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d1
606*c0909341SAndroid Build Coastguard Worker        vshl.u16        d0,  d0,  d28
607*c0909341SAndroid Build Coastguard Worker        beq             1f
608*c0909341SAndroid Build Coastguard Worker        // h = 8/16
609*c0909341SAndroid Build Coastguard Worker        movw            lr,  #(0x3334/2)
610*c0909341SAndroid Build Coastguard Worker        movw            r5,  #(0x5556/2)
611*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #16
612*c0909341SAndroid Build Coastguard Worker        it              ne
613*c0909341SAndroid Build Coastguard Worker        movne           lr,  r5
614*c0909341SAndroid Build Coastguard Worker        vdup.16         d30, lr
615*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d0,  d0,  d30
616*c0909341SAndroid Build Coastguard Worker1:
617*c0909341SAndroid Build Coastguard Worker        vdup.8          d0,  d0[0]
618*c0909341SAndroid Build Coastguard Worker2:
619*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
620*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
621*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
622*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r0,  :32], r1
623*c0909341SAndroid Build Coastguard Worker        vst1.32         {d0[0]},  [r12, :32], r1
624*c0909341SAndroid Build Coastguard Worker        bgt             2b
625*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
626*c0909341SAndroid Build Coastguard Worker
627*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h8):
628*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0},  [r2, :64]!
629*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
630*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
631*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
632*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
633*c0909341SAndroid Build Coastguard Worker        bx              r3
634*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w8):
635*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2},  [r2]
636*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d30
637*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d2,  d2
638*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d2,  d2
639*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d2,  d2
640*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #8
641*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d2
642*c0909341SAndroid Build Coastguard Worker        vshl.u16        d0,  d0,  d28
643*c0909341SAndroid Build Coastguard Worker        beq             1f
644*c0909341SAndroid Build Coastguard Worker        // h = 4/16/32
645*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #32
646*c0909341SAndroid Build Coastguard Worker        movw            lr,  #(0x3334/2)
647*c0909341SAndroid Build Coastguard Worker        movw            r5,  #(0x5556/2)
648*c0909341SAndroid Build Coastguard Worker        it              ne
649*c0909341SAndroid Build Coastguard Worker        movne           lr,  r5
650*c0909341SAndroid Build Coastguard Worker        vdup.16         d24, lr
651*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d0,  d0,  d24
652*c0909341SAndroid Build Coastguard Worker1:
653*c0909341SAndroid Build Coastguard Worker        vdup.8          d0,  d0[0]
654*c0909341SAndroid Build Coastguard Worker2:
655*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
656*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
657*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
658*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r0,  :64], r1
659*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0},  [r12, :64], r1
660*c0909341SAndroid Build Coastguard Worker        bgt             2b
661*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
662*c0909341SAndroid Build Coastguard Worker
663*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h16):
664*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0,  d1},  [r2, :128]!
665*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
666*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
667*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
668*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
669*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
670*c0909341SAndroid Build Coastguard Worker        bx              r3
671*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w16):
672*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2,  d3},  [r2]
673*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d30
674*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q1,  d2,  d3
675*c0909341SAndroid Build Coastguard Worker        vadd.u16        d2,  d2,  d3
676*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d2,  d2
677*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d2,  d2
678*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #16
679*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d2
680*c0909341SAndroid Build Coastguard Worker        vshl.u16        d0,  d0,  d28
681*c0909341SAndroid Build Coastguard Worker        beq             1f
682*c0909341SAndroid Build Coastguard Worker        // h = 4/8/32/64
683*c0909341SAndroid Build Coastguard Worker        tst             r4,  #(32+16+8)     // 16 added to make a consecutive bitmask
684*c0909341SAndroid Build Coastguard Worker        movw            lr,  #(0x3334/2)
685*c0909341SAndroid Build Coastguard Worker        movw            r5,  #(0x5556/2)
686*c0909341SAndroid Build Coastguard Worker        it              ne
687*c0909341SAndroid Build Coastguard Worker        movne           lr,  r5
688*c0909341SAndroid Build Coastguard Worker        vdup.16         d24, lr
689*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d0,  d0,  d24
690*c0909341SAndroid Build Coastguard Worker1:
691*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d0[0]
692*c0909341SAndroid Build Coastguard Worker2:
693*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r0,  :128], r1
694*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r12, :128], r1
695*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4, #4
696*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r0,  :128], r1
697*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1},  [r12, :128], r1
698*c0909341SAndroid Build Coastguard Worker        bgt             2b
699*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
700*c0909341SAndroid Build Coastguard Worker
701*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h32):
702*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0,  d1,  d2,  d3},  [r2, :128]!
703*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
704*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q1,  d2,  d3
705*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q0,  q1
706*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
707*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
708*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
709*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
710*c0909341SAndroid Build Coastguard Worker        bx              r3
711*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w32):
712*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2,  d3,  d4,  d5},  [r2]
713*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d30
714*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q1,  d2,  d3
715*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
716*c0909341SAndroid Build Coastguard Worker        vadd.u16        q1,  q1,  q2
717*c0909341SAndroid Build Coastguard Worker        vadd.u16        d2,  d2,  d3
718*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d2,  d2
719*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d2,  d2
720*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #32
721*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d2
722*c0909341SAndroid Build Coastguard Worker        vshl.u16        d4,  d0,  d28
723*c0909341SAndroid Build Coastguard Worker        beq             1f
724*c0909341SAndroid Build Coastguard Worker        // h = 8/16/64
725*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #8
726*c0909341SAndroid Build Coastguard Worker        movw            lr,  #(0x3334/2)
727*c0909341SAndroid Build Coastguard Worker        movw            r5,  #(0x5556/2)
728*c0909341SAndroid Build Coastguard Worker        it              ne
729*c0909341SAndroid Build Coastguard Worker        movne           lr,  r5
730*c0909341SAndroid Build Coastguard Worker        vdup.16         d24, lr
731*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d4,  d4,  d24
732*c0909341SAndroid Build Coastguard Worker1:
733*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d4[0]
734*c0909341SAndroid Build Coastguard Worker        vdup.8          q1,  d4[0]
735*c0909341SAndroid Build Coastguard Worker2:
736*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
737*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
738*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
739*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
740*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
741*c0909341SAndroid Build Coastguard Worker        bgt             2b
742*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
743*c0909341SAndroid Build Coastguard Worker
744*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h64):
745*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0,  d1,  d2,  d3},  [r2, :128]!
746*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
747*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4,  d5,  d6,  d7},  [r2, :128]!
748*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q1,  d2,  d3
749*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
750*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q3,  d6,  d7
751*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q0,  q1
752*c0909341SAndroid Build Coastguard Worker        vadd.u16        q1,  q2,  q3
753*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q0,  q1
754*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
755*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
756*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
757*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
758*c0909341SAndroid Build Coastguard Worker        bx              r3
759*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w64):
760*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2,  d3,  d4,  d5},  [r2]!
761*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d30
762*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
763*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q1,  d2,  d3
764*c0909341SAndroid Build Coastguard Worker        vadd.u16        d4,  d4,  d5
765*c0909341SAndroid Build Coastguard Worker        vadd.u16        d2,  d2,  d3
766*c0909341SAndroid Build Coastguard Worker        vld1.8          {d16, d17, d18, d19}, [r2]
767*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d4,  d4
768*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d2,  d2
769*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d4,  d4
770*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d2,  d2
771*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q8,  d16, d17
772*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q9,  d18, d19
773*c0909341SAndroid Build Coastguard Worker        vadd.u16        d16, d16, d17
774*c0909341SAndroid Build Coastguard Worker        vadd.u16        d18, d18, d19
775*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d16, d16
776*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d18, d18
777*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d16, d16
778*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d18, d18
779*c0909341SAndroid Build Coastguard Worker        vadd.u16        d2,  d2,  d4
780*c0909341SAndroid Build Coastguard Worker        vadd.u16        d3,  d16, d18
781*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #64
782*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d2
783*c0909341SAndroid Build Coastguard Worker        vadd.s16        d0,  d0,  d3
784*c0909341SAndroid Build Coastguard Worker        vshl.u16        d18, d0,  d28
785*c0909341SAndroid Build Coastguard Worker        beq             1f
786*c0909341SAndroid Build Coastguard Worker        // h = 16/32
787*c0909341SAndroid Build Coastguard Worker        movw            lr,  #(0x5556/2)
788*c0909341SAndroid Build Coastguard Worker        movt            lr,  #(0x3334/2)
789*c0909341SAndroid Build Coastguard Worker        and             r5,  r4,  #31
790*c0909341SAndroid Build Coastguard Worker        lsr             lr,  lr,  r5
791*c0909341SAndroid Build Coastguard Worker        vdup.16         d30, lr
792*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d18, d18, d30
793*c0909341SAndroid Build Coastguard Worker1:
794*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
795*c0909341SAndroid Build Coastguard Worker        vdup.8          q0,  d18[0]
796*c0909341SAndroid Build Coastguard Worker        vdup.8          q1,  d18[0]
797*c0909341SAndroid Build Coastguard Worker2:
798*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
799*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
800*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
801*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
802*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
803*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128]!
804*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128]!
805*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r0,  :128], r1
806*c0909341SAndroid Build Coastguard Worker        vst1.8          {d0,  d1,  d2,  d3},  [r12, :128], r1
807*c0909341SAndroid Build Coastguard Worker        bgt             2b
808*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
809*c0909341SAndroid Build Coastguard Workerendfunc
810*c0909341SAndroid Build Coastguard Worker
811*c0909341SAndroid Build Coastguard Worker// void ipred_paeth_8bpc_neon(pixel *dst, const ptrdiff_t stride,
812*c0909341SAndroid Build Coastguard Worker//                            const pixel *const topleft,
813*c0909341SAndroid Build Coastguard Worker//                            const int width, const int height, const int a,
814*c0909341SAndroid Build Coastguard Worker//                            const int max_width, const int max_height);
815*c0909341SAndroid Build Coastguard Workerfunction ipred_paeth_8bpc_neon, export=1
816*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
817*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #24]
818*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
819*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_paeth_tbl)
820*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
821*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r5, lr, lsl #2]
822*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4[], d5[]},  [r2]
823*c0909341SAndroid Build Coastguard Worker        add             r8,  r2,  #1
824*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
825*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr
826*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-4
827*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
828*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
829*c0909341SAndroid Build Coastguard Worker        bx              r5
830*c0909341SAndroid Build Coastguard Worker
831*c0909341SAndroid Build Coastguard Worker        .align 2
832*c0909341SAndroid Build Coastguard WorkerL(ipred_paeth_tbl):
833*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_paeth_tbl) + CONFIG_THUMB
834*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_paeth_tbl) + CONFIG_THUMB
835*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_paeth_tbl) + CONFIG_THUMB
836*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_paeth_tbl) + CONFIG_THUMB
837*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_paeth_tbl) + CONFIG_THUMB
838*c0909341SAndroid Build Coastguard Worker
839*c0909341SAndroid Build Coastguard Worker40:
840*c0909341SAndroid Build Coastguard Worker        vld1.32         {d6[], d7[]},  [r8]
841*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q8,  d6,  d4  // top - topleft
842*c0909341SAndroid Build Coastguard Worker4:
843*c0909341SAndroid Build Coastguard Worker        vld4.8          {d0[], d1[], d2[], d3[]},  [r2, :32], r7
844*c0909341SAndroid Build Coastguard Worker        vzip.32         d0,  d1
845*c0909341SAndroid Build Coastguard Worker        vzip.32         d2,  d3
846*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q9,  q8,  d0
847*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d2
848*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d18, q9       // base
849*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d19, q10
850*c0909341SAndroid Build Coastguard Worker        vmov            d1,  d2
851*c0909341SAndroid Build Coastguard Worker        vabd.u8         q10, q3,  q9  // tdiff
852*c0909341SAndroid Build Coastguard Worker        vabd.u8         q11, q2,  q9  // tldiff
853*c0909341SAndroid Build Coastguard Worker        vabd.u8         q9,  q0,  q9  // ldiff
854*c0909341SAndroid Build Coastguard Worker        vmin.u8         q12, q10, q11 // min(tdiff, tldiff)
855*c0909341SAndroid Build Coastguard Worker        vcge.u8         q10, q11, q10 // tldiff >= tdiff
856*c0909341SAndroid Build Coastguard Worker        vcge.u8         q9,  q12, q9  // min(tdiff, tldiff) >= ldiff
857*c0909341SAndroid Build Coastguard Worker        vbsl            q10, q3,  q2  // tdiff <= tldiff ? top : topleft
858*c0909341SAndroid Build Coastguard Worker        vbit            q10, q0,  q9  // ldiff <= min ? left : ...
859*c0909341SAndroid Build Coastguard Worker        vst1.32         {d21[1]}, [r0, :32], r1
860*c0909341SAndroid Build Coastguard Worker        vst1.32         {d21[0]}, [r6, :32], r1
861*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
862*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[1]}, [r0, :32], r1
863*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[0]}, [r6, :32], r1
864*c0909341SAndroid Build Coastguard Worker        bgt             4b
865*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
866*c0909341SAndroid Build Coastguard Worker80:
867*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6},  [r8]
868*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q8,  d6,  d4  // top - topleft
869*c0909341SAndroid Build Coastguard Worker        vmov            d7,  d6
870*c0909341SAndroid Build Coastguard Worker8:
871*c0909341SAndroid Build Coastguard Worker        vld4.8          {d0[], d1[], d2[], d3[]},  [r2, :32], r7
872*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q9,  q8,  d0
873*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d1
874*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d2
875*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q12, q8,  d3
876*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d18, q9       // base
877*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d19, q10
878*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d20, q11
879*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d21, q12
880*c0909341SAndroid Build Coastguard Worker        vabd.u8         q11, q3,  q9  // tdiff
881*c0909341SAndroid Build Coastguard Worker        vabd.u8         q12, q3,  q10
882*c0909341SAndroid Build Coastguard Worker        vabd.u8         q13, q2,  q9  // tldiff
883*c0909341SAndroid Build Coastguard Worker        vabd.u8         q14, q2,  q10
884*c0909341SAndroid Build Coastguard Worker        vabd.u8         q10, q1,  q10 // ldiff
885*c0909341SAndroid Build Coastguard Worker        vabd.u8         q9,  q0,  q9
886*c0909341SAndroid Build Coastguard Worker        vmin.u8         q15, q12, q14 // min(tdiff, tldiff)
887*c0909341SAndroid Build Coastguard Worker        vcge.u8         q12, q14, q12 // tldiff >= tdiff
888*c0909341SAndroid Build Coastguard Worker        vmin.u8         q14, q11, q13 // min(tdiff, tldiff)
889*c0909341SAndroid Build Coastguard Worker        vcge.u8         q11, q13, q11 // tldiff >= tdiff
890*c0909341SAndroid Build Coastguard Worker        vcge.u8         q10, q15, q10 // min(tdiff, tldiff) >= ldiff
891*c0909341SAndroid Build Coastguard Worker        vcge.u8         q9,  q14, q9
892*c0909341SAndroid Build Coastguard Worker        vbsl            q12, q3,  q2  // tdiff <= tldiff ? top : topleft
893*c0909341SAndroid Build Coastguard Worker        vbsl            q11, q3,  q2
894*c0909341SAndroid Build Coastguard Worker        vbit            q12, q1,  q10 // ldiff <= min ? left : ...
895*c0909341SAndroid Build Coastguard Worker        vbit            q11, q0,  q9
896*c0909341SAndroid Build Coastguard Worker        vst1.8          {d25}, [r0, :64], r1
897*c0909341SAndroid Build Coastguard Worker        vst1.8          {d24}, [r6, :64], r1
898*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
899*c0909341SAndroid Build Coastguard Worker        vst1.8          {d23}, [r0, :64], r1
900*c0909341SAndroid Build Coastguard Worker        vst1.8          {d22}, [r6, :64], r1
901*c0909341SAndroid Build Coastguard Worker        bgt             8b
902*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
903*c0909341SAndroid Build Coastguard Worker160:
904*c0909341SAndroid Build Coastguard Worker320:
905*c0909341SAndroid Build Coastguard Worker640:
906*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6},  [r8]!
907*c0909341SAndroid Build Coastguard Worker        mov             r12, r3
908*c0909341SAndroid Build Coastguard Worker        // Set up pointers for four rows in parallel; r0, r6, r5, lr
909*c0909341SAndroid Build Coastguard Worker        add             r5,  r0,  r1
910*c0909341SAndroid Build Coastguard Worker        add             lr,  r6,  r1
911*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
912*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3
913*c0909341SAndroid Build Coastguard Worker1:
914*c0909341SAndroid Build Coastguard Worker        vld4.8          {d0[], d1[], d2[], d3[]},  [r2, :32], r7
915*c0909341SAndroid Build Coastguard Worker2:
916*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q8,  d6,  d4  // top - topleft
917*c0909341SAndroid Build Coastguard Worker        vmov            d7,  d6
918*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q9,  q8,  d0
919*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q10, q8,  d1
920*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q11, q8,  d2
921*c0909341SAndroid Build Coastguard Worker        vaddw.u8        q12, q8,  d3
922*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d18, q9       // base
923*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d19, q10
924*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d20, q11
925*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d21, q12
926*c0909341SAndroid Build Coastguard Worker        vabd.u8         q11, q3,  q9  // tdiff
927*c0909341SAndroid Build Coastguard Worker        vabd.u8         q12, q3,  q10
928*c0909341SAndroid Build Coastguard Worker        vabd.u8         q13, q2,  q9  // tldiff
929*c0909341SAndroid Build Coastguard Worker        vabd.u8         q14, q2,  q10
930*c0909341SAndroid Build Coastguard Worker        vabd.u8         q10, q1,  q10 // ldiff
931*c0909341SAndroid Build Coastguard Worker        vabd.u8         q9,  q0,  q9
932*c0909341SAndroid Build Coastguard Worker        vmin.u8         q15, q12, q14 // min(tdiff, tldiff)
933*c0909341SAndroid Build Coastguard Worker        vcge.u8         q12, q14, q12 // tldiff >= tdiff
934*c0909341SAndroid Build Coastguard Worker        vmin.u8         q14, q11, q13 // min(tdiff, tldiff)
935*c0909341SAndroid Build Coastguard Worker        vcge.u8         q11, q13, q11 // tldiff >= tdiff
936*c0909341SAndroid Build Coastguard Worker        vcge.u8         q10, q15, q10 // min(tdiff, tldiff) >= ldiff
937*c0909341SAndroid Build Coastguard Worker        vcge.u8         q9,  q14, q9
938*c0909341SAndroid Build Coastguard Worker        vbsl            q12, q3,  q2  // tdiff <= tldiff ? top : topleft
939*c0909341SAndroid Build Coastguard Worker        vbsl            q11, q3,  q2
940*c0909341SAndroid Build Coastguard Worker        vbit            q12, q1,  q10 // ldiff <= min ? left : ...
941*c0909341SAndroid Build Coastguard Worker        vbit            q11, q0,  q9
942*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #8
943*c0909341SAndroid Build Coastguard Worker        vst1.8          {d25}, [r0, :64]!
944*c0909341SAndroid Build Coastguard Worker        vst1.8          {d24}, [r6, :64]!
945*c0909341SAndroid Build Coastguard Worker        vst1.8          {d23}, [r5, :64]!
946*c0909341SAndroid Build Coastguard Worker        vst1.8          {d22}, [lr, :64]!
947*c0909341SAndroid Build Coastguard Worker        ble             8f
948*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6},  [r8]!
949*c0909341SAndroid Build Coastguard Worker        b               2b
950*c0909341SAndroid Build Coastguard Worker8:
951*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
952*c0909341SAndroid Build Coastguard Worker        ble             9f
953*c0909341SAndroid Build Coastguard Worker        // End of horizontal loop, move pointers to next four rows
954*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  r12
955*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
956*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
957*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6},  [r8]!
958*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r1
959*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r1
960*c0909341SAndroid Build Coastguard Worker        mov             r3,  r12
961*c0909341SAndroid Build Coastguard Worker        b               1b
962*c0909341SAndroid Build Coastguard Worker9:
963*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
964*c0909341SAndroid Build Coastguard Workerendfunc
965*c0909341SAndroid Build Coastguard Worker
966*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_8bpc_neon(pixel *dst, const ptrdiff_t stride,
967*c0909341SAndroid Build Coastguard Worker//                             const pixel *const topleft,
968*c0909341SAndroid Build Coastguard Worker//                             const int width, const int height, const int a,
969*c0909341SAndroid Build Coastguard Worker//                             const int max_width, const int max_height);
970*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_8bpc_neon, export=1
971*c0909341SAndroid Build Coastguard Worker        push            {r4-r10, lr}
972*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #32]
973*c0909341SAndroid Build Coastguard Worker        movrel          r10, X(sm_weights)
974*c0909341SAndroid Build Coastguard Worker        add             r12, r10, r4
975*c0909341SAndroid Build Coastguard Worker        add             r10, r10, r3
976*c0909341SAndroid Build Coastguard Worker        clz             r9,  r3
977*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_smooth_tbl)
978*c0909341SAndroid Build Coastguard Worker        sub             lr,  r2,  r4
979*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  #25
980*c0909341SAndroid Build Coastguard Worker        ldr             r9,  [r5, r9, lsl #2]
981*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4[]},  [lr] // bottom
982*c0909341SAndroid Build Coastguard Worker        add             r8,  r2,  #1
983*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r9
984*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
985*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
986*c0909341SAndroid Build Coastguard Worker        bx              r5
987*c0909341SAndroid Build Coastguard Worker
988*c0909341SAndroid Build Coastguard Worker        .align 2
989*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_tbl):
990*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_smooth_tbl) + CONFIG_THUMB
991*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_smooth_tbl) + CONFIG_THUMB
992*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_smooth_tbl) + CONFIG_THUMB
993*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_smooth_tbl) + CONFIG_THUMB
994*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_smooth_tbl) + CONFIG_THUMB
995*c0909341SAndroid Build Coastguard Worker
996*c0909341SAndroid Build Coastguard Worker40:
997*c0909341SAndroid Build Coastguard Worker        vld1.32         {d16[]}, [r8]       // top
998*c0909341SAndroid Build Coastguard Worker        vld1.32         {d18[]}, [r10, :32] // weights_hor
999*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
1000*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-4
1001*c0909341SAndroid Build Coastguard Worker        vdup.8          q3,  d16[3]   // right
1002*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q8,  d16, d4  // top-bottom
1003*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d18      // weights_hor
1004*c0909341SAndroid Build Coastguard Worker4:
1005*c0909341SAndroid Build Coastguard Worker        vld4.8          {d0[],  d1[],  d2[],  d3[]},  [r2,  :32], r7 // left
1006*c0909341SAndroid Build Coastguard Worker        vld4.8          {d20[], d21[], d22[], d23[]}, [r12, :32]!    // weights_ver
1007*c0909341SAndroid Build Coastguard Worker        vshll.i8        q12, d6,  #8  // right*256
1008*c0909341SAndroid Build Coastguard Worker        vshll.i8        q13, d6,  #8
1009*c0909341SAndroid Build Coastguard Worker        vzip.32         d1,  d0       // left, flipped
1010*c0909341SAndroid Build Coastguard Worker        vzip.32         d3,  d2
1011*c0909341SAndroid Build Coastguard Worker        vzip.32         d20, d21      // weights_ver
1012*c0909341SAndroid Build Coastguard Worker        vzip.32         d22, d23
1013*c0909341SAndroid Build Coastguard Worker        vshll.i8        q14, d4,  #8  // bottom*256
1014*c0909341SAndroid Build Coastguard Worker        vshll.i8        q15, d4,  #8
1015*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q0,  d1,  d6  // left-right
1016*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q1,  d3,  d6
1017*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d20      // weights_ver
1018*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q11, d22
1019*c0909341SAndroid Build Coastguard Worker        vmla.i16        q12, q1,  q9  // right*256  + (left-right)*weights_hor
1020*c0909341SAndroid Build Coastguard Worker        vmla.i16        q13, q0,  q9  // (left flipped)
1021*c0909341SAndroid Build Coastguard Worker        vmla.i16        q14, q8,  q10 // bottom*256 + (top-bottom)*weights_ver
1022*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q8,  q11
1023*c0909341SAndroid Build Coastguard Worker        vhadd.u16       q12, q12, q14
1024*c0909341SAndroid Build Coastguard Worker        vhadd.u16       q13, q13, q15
1025*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d24, q12, #8
1026*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d25, q13, #8
1027*c0909341SAndroid Build Coastguard Worker        vst1.32         {d24[0]}, [r0, :32], r1
1028*c0909341SAndroid Build Coastguard Worker        vst1.32         {d24[1]}, [r6, :32], r1
1029*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1030*c0909341SAndroid Build Coastguard Worker        vst1.32         {d25[0]}, [r0, :32], r1
1031*c0909341SAndroid Build Coastguard Worker        vst1.32         {d25[1]}, [r6, :32], r1
1032*c0909341SAndroid Build Coastguard Worker        bgt             4b
1033*c0909341SAndroid Build Coastguard Worker        pop             {r4-r10, pc}
1034*c0909341SAndroid Build Coastguard Worker80:
1035*c0909341SAndroid Build Coastguard Worker        vld1.8          {d16}, [r8]       // top
1036*c0909341SAndroid Build Coastguard Worker        vld1.8          {d18}, [r10, :64] // weights_hor
1037*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #2
1038*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-2
1039*c0909341SAndroid Build Coastguard Worker        vdup.8          q3,  d16[7]   // right
1040*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q8,  d16, d4  // top-bottom
1041*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d18      // weights_hor
1042*c0909341SAndroid Build Coastguard Worker8:
1043*c0909341SAndroid Build Coastguard Worker        vld2.8          {d0[],  d1[]},  [r2,  :16], r7 // left
1044*c0909341SAndroid Build Coastguard Worker        vld2.8          {d20[], d22[]}, [r12, :16]!    // weights_ver
1045*c0909341SAndroid Build Coastguard Worker        vshll.i8        q12, d6,  #8  // right*256
1046*c0909341SAndroid Build Coastguard Worker        vshll.i8        q13, d6,  #8
1047*c0909341SAndroid Build Coastguard Worker        vshll.i8        q14, d4,  #8  // bottom*256
1048*c0909341SAndroid Build Coastguard Worker        vshll.i8        q15, d4,  #8
1049*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q1,  d0,  d6  // left-right (left flipped)
1050*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q0,  d1,  d6
1051*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d20      // weights_ver
1052*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q11, d22
1053*c0909341SAndroid Build Coastguard Worker        vmla.i16        q12, q0,  q9  // right*256  + (left-right)*weights_hor
1054*c0909341SAndroid Build Coastguard Worker        vmla.i16        q13, q1,  q9
1055*c0909341SAndroid Build Coastguard Worker        vmla.i16        q14, q8,  q10 // bottom*256 + (top-bottom)*weights_ver
1056*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q8,  q11
1057*c0909341SAndroid Build Coastguard Worker        vhadd.u16       q12, q12, q14
1058*c0909341SAndroid Build Coastguard Worker        vhadd.u16       q13, q13, q15
1059*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d24, q12, #8
1060*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d25, q13, #8
1061*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1062*c0909341SAndroid Build Coastguard Worker        vst1.8          {d24}, [r0, :64], r1
1063*c0909341SAndroid Build Coastguard Worker        vst1.8          {d25}, [r6, :64], r1
1064*c0909341SAndroid Build Coastguard Worker        bgt             8b
1065*c0909341SAndroid Build Coastguard Worker        pop             {r4-r10, pc}
1066*c0909341SAndroid Build Coastguard Worker160:
1067*c0909341SAndroid Build Coastguard Worker320:
1068*c0909341SAndroid Build Coastguard Worker640:
1069*c0909341SAndroid Build Coastguard Worker        add             lr,  r2,  r3
1070*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #2
1071*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-2
1072*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6[], d7[]}, [lr] // right
1073*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3
1074*c0909341SAndroid Build Coastguard Worker        mov             r9,  r3
1075*c0909341SAndroid Build Coastguard Worker
1076*c0909341SAndroid Build Coastguard Worker1:
1077*c0909341SAndroid Build Coastguard Worker        vld2.8          {d0[],  d1[]},  [r2,  :16], r7 // left
1078*c0909341SAndroid Build Coastguard Worker        vld2.8          {d20[], d22[]}, [r12, :16]!    // weights_ver
1079*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q1,  d0,  d6  // left-right (left flipped)
1080*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q0,  d1,  d6
1081*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d20      // weights_ver
1082*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q11, d22
1083*c0909341SAndroid Build Coastguard Worker2:
1084*c0909341SAndroid Build Coastguard Worker        vld1.8          {d16}, [r8]!       // top
1085*c0909341SAndroid Build Coastguard Worker        vld1.8          {d18}, [r10, :64]! // weights_hor
1086*c0909341SAndroid Build Coastguard Worker        vshll.i8        q12, d6,  #8  // right*256
1087*c0909341SAndroid Build Coastguard Worker        vshll.i8        q13, d6,  #8
1088*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d18      // weights_hor
1089*c0909341SAndroid Build Coastguard Worker        vshll.i8        q14, d4,  #8  // bottom*256
1090*c0909341SAndroid Build Coastguard Worker        vshll.i8        q15, d4,  #8
1091*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q8,  d16, d4  // top-bottom
1092*c0909341SAndroid Build Coastguard Worker        vmla.i16        q12, q0,  q9  // right*256  + (left-right)*weights_hor
1093*c0909341SAndroid Build Coastguard Worker        vmla.i16        q13, q1,  q9
1094*c0909341SAndroid Build Coastguard Worker        vmla.i16        q14, q8,  q10 // bottom*256 + (top-bottom)*weights_ver
1095*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q8,  q11
1096*c0909341SAndroid Build Coastguard Worker        vhadd.u16       q12, q12, q14
1097*c0909341SAndroid Build Coastguard Worker        vhadd.u16       q13, q13, q15
1098*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d24, q12, #8
1099*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d25, q13, #8
1100*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #8
1101*c0909341SAndroid Build Coastguard Worker        vst1.8          {d24}, [r0, :64]!
1102*c0909341SAndroid Build Coastguard Worker        vst1.8          {d25}, [r6, :64]!
1103*c0909341SAndroid Build Coastguard Worker        bgt             2b
1104*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1105*c0909341SAndroid Build Coastguard Worker        ble             9f
1106*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  r9
1107*c0909341SAndroid Build Coastguard Worker        sub             r10, r10, r9
1108*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
1109*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
1110*c0909341SAndroid Build Coastguard Worker        mov             r3,  r9
1111*c0909341SAndroid Build Coastguard Worker        b               1b
1112*c0909341SAndroid Build Coastguard Worker9:
1113*c0909341SAndroid Build Coastguard Worker        pop             {r4-r10, pc}
1114*c0909341SAndroid Build Coastguard Workerendfunc
1115*c0909341SAndroid Build Coastguard Worker
1116*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_v_8bpc_neon(pixel *dst, const ptrdiff_t stride,
1117*c0909341SAndroid Build Coastguard Worker//                               const pixel *const topleft,
1118*c0909341SAndroid Build Coastguard Worker//                               const int width, const int height, const int a,
1119*c0909341SAndroid Build Coastguard Worker//                               const int max_width, const int max_height);
1120*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_v_8bpc_neon, export=1
1121*c0909341SAndroid Build Coastguard Worker        push            {r4-r7, lr}
1122*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #20]
1123*c0909341SAndroid Build Coastguard Worker        movrel          r7,  X(sm_weights)
1124*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r4
1125*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1126*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_smooth_v_tbl)
1127*c0909341SAndroid Build Coastguard Worker        sub             r12, r2,  r4
1128*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
1129*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r5, lr, lsl #2]
1130*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4[]},  [r12] // bottom
1131*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
1132*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr
1133*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1134*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1135*c0909341SAndroid Build Coastguard Worker        bx              r5
1136*c0909341SAndroid Build Coastguard Worker
1137*c0909341SAndroid Build Coastguard Worker        .align 2
1138*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_v_tbl):
1139*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1140*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1141*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1142*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1143*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1144*c0909341SAndroid Build Coastguard Worker
1145*c0909341SAndroid Build Coastguard Worker40:
1146*c0909341SAndroid Build Coastguard Worker        vld1.32         {d6[]}, [r2]  // top
1147*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q3,  d6,  d4  // top-bottom
1148*c0909341SAndroid Build Coastguard Worker4:
1149*c0909341SAndroid Build Coastguard Worker        vld4.8          {d16[], d17[], d18[], d19[]}, [r7, :32]! // weights_ver
1150*c0909341SAndroid Build Coastguard Worker        vshll.i8        q10, d4,  #8  // bottom*256
1151*c0909341SAndroid Build Coastguard Worker        vshll.i8        q11, d4,  #8
1152*c0909341SAndroid Build Coastguard Worker        vzip.32         d16, d17      // weights_ver
1153*c0909341SAndroid Build Coastguard Worker        vzip.32         d18, d19
1154*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q8,  d16      // weights_ver
1155*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d18
1156*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1157*c0909341SAndroid Build Coastguard Worker        vmla.i16        q10, q3,  q8  // bottom*256 + (top-bottom)*weights_ver
1158*c0909341SAndroid Build Coastguard Worker        vmla.i16        q11, q3,  q9
1159*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q10, #8
1160*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d21, q11, #8
1161*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[0]}, [r0, :32], r1
1162*c0909341SAndroid Build Coastguard Worker        vst1.32         {d20[1]}, [r6, :32], r1
1163*c0909341SAndroid Build Coastguard Worker        vst1.32         {d21[0]}, [r0, :32], r1
1164*c0909341SAndroid Build Coastguard Worker        vst1.32         {d21[1]}, [r6, :32], r1
1165*c0909341SAndroid Build Coastguard Worker        bgt             4b
1166*c0909341SAndroid Build Coastguard Worker        pop             {r4-r7, pc}
1167*c0909341SAndroid Build Coastguard Worker80:
1168*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6}, [r2]    // top
1169*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q3,  d6,  d4  // top-bottom
1170*c0909341SAndroid Build Coastguard Worker8:
1171*c0909341SAndroid Build Coastguard Worker        vld4.8          {d16[], d18[], d20[], d22[]}, [r7, :32]! // weights_ver
1172*c0909341SAndroid Build Coastguard Worker        vshll.i8        q12, d4,  #8  // bottom*256
1173*c0909341SAndroid Build Coastguard Worker        vshll.i8        q13, d4,  #8
1174*c0909341SAndroid Build Coastguard Worker        vshll.i8        q14, d4,  #8
1175*c0909341SAndroid Build Coastguard Worker        vshll.i8        q15, d4,  #8
1176*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q8,  d16      // weights_ver
1177*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d18
1178*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d20
1179*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q11, d22
1180*c0909341SAndroid Build Coastguard Worker        vmla.i16        q12, q3,  q8  // bottom*256 + (top-bottom)*weights_ver
1181*c0909341SAndroid Build Coastguard Worker        vmla.i16        q13, q3,  q9
1182*c0909341SAndroid Build Coastguard Worker        vmla.i16        q14, q3,  q10
1183*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q3,  q11
1184*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d24, q12, #8
1185*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d25, q13, #8
1186*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d26, q14, #8
1187*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d27, q15, #8
1188*c0909341SAndroid Build Coastguard Worker        vst1.8          {d24}, [r0, :64], r1
1189*c0909341SAndroid Build Coastguard Worker        vst1.8          {d25}, [r6, :64], r1
1190*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1191*c0909341SAndroid Build Coastguard Worker        vst1.8          {d26}, [r0, :64], r1
1192*c0909341SAndroid Build Coastguard Worker        vst1.8          {d27}, [r6, :64], r1
1193*c0909341SAndroid Build Coastguard Worker        bgt             8b
1194*c0909341SAndroid Build Coastguard Worker        pop             {r4-r7, pc}
1195*c0909341SAndroid Build Coastguard Worker160:
1196*c0909341SAndroid Build Coastguard Worker320:
1197*c0909341SAndroid Build Coastguard Worker640:
1198*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1199*c0909341SAndroid Build Coastguard Worker        // Set up pointers for four rows in parallel; r0, r6, r5, lr
1200*c0909341SAndroid Build Coastguard Worker        add             r5,  r0,  r1
1201*c0909341SAndroid Build Coastguard Worker        add             lr,  r6,  r1
1202*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1203*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3
1204*c0909341SAndroid Build Coastguard Worker        mov             r12, r3
1205*c0909341SAndroid Build Coastguard Worker
1206*c0909341SAndroid Build Coastguard Worker1:
1207*c0909341SAndroid Build Coastguard Worker        vld4.8          {d8[], d10[], d12[], d14[]}, [r7, :32]! // weights_ver
1208*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q4,  d8       // weights_ver
1209*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q5,  d10
1210*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q6,  d12
1211*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q7,  d14
1212*c0909341SAndroid Build Coastguard Worker2:
1213*c0909341SAndroid Build Coastguard Worker        vld1.8          {q3}, [r2]!   // top
1214*c0909341SAndroid Build Coastguard Worker        vshll.i8        q8,  d4,  #8  // bottom*256
1215*c0909341SAndroid Build Coastguard Worker        vshll.i8        q9,  d4,  #8
1216*c0909341SAndroid Build Coastguard Worker        vshll.i8        q10, d4,  #8
1217*c0909341SAndroid Build Coastguard Worker        vshll.i8        q11, d4,  #8
1218*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q0,  d6,  d4  // top-bottom
1219*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q1,  d7,  d4
1220*c0909341SAndroid Build Coastguard Worker        vshll.i8        q12, d4,  #8
1221*c0909341SAndroid Build Coastguard Worker        vshll.i8        q13, d4,  #8
1222*c0909341SAndroid Build Coastguard Worker        vshll.i8        q14, d4,  #8
1223*c0909341SAndroid Build Coastguard Worker        vshll.i8        q15, d4,  #8
1224*c0909341SAndroid Build Coastguard Worker        vmla.i16        q8,  q0,  q4  // bottom*256 + (top-bottom)*weights_ver
1225*c0909341SAndroid Build Coastguard Worker        vmla.i16        q9,  q1,  q4
1226*c0909341SAndroid Build Coastguard Worker        vmla.i16        q10, q0,  q5
1227*c0909341SAndroid Build Coastguard Worker        vmla.i16        q11, q1,  q5
1228*c0909341SAndroid Build Coastguard Worker        vmla.i16        q12, q0,  q6  // bottom*256 + (top-bottom)*weights_ver
1229*c0909341SAndroid Build Coastguard Worker        vmla.i16        q13, q1,  q6
1230*c0909341SAndroid Build Coastguard Worker        vmla.i16        q14, q0,  q7
1231*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q1,  q7
1232*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d16, q8,  #8
1233*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d17, q9,  #8
1234*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d18, q10, #8
1235*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d19, q11, #8
1236*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q12, #8
1237*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d21, q13, #8
1238*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d22, q14, #8
1239*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d23, q15, #8
1240*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
1241*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8},  [r0, :128]!
1242*c0909341SAndroid Build Coastguard Worker        vst1.8          {q9},  [r6, :128]!
1243*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10}, [r5, :128]!
1244*c0909341SAndroid Build Coastguard Worker        vst1.8          {q11}, [lr, :128]!
1245*c0909341SAndroid Build Coastguard Worker        bgt             2b
1246*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1247*c0909341SAndroid Build Coastguard Worker        ble             9f
1248*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r12
1249*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
1250*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
1251*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r1
1252*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r1
1253*c0909341SAndroid Build Coastguard Worker        mov             r3,  r12
1254*c0909341SAndroid Build Coastguard Worker        b               1b
1255*c0909341SAndroid Build Coastguard Worker9:
1256*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1257*c0909341SAndroid Build Coastguard Worker        pop             {r4-r7, pc}
1258*c0909341SAndroid Build Coastguard Workerendfunc
1259*c0909341SAndroid Build Coastguard Worker
1260*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_h_8bpc_neon(pixel *dst, const ptrdiff_t stride,
1261*c0909341SAndroid Build Coastguard Worker//                               const pixel *const topleft,
1262*c0909341SAndroid Build Coastguard Worker//                               const int width, const int height, const int a,
1263*c0909341SAndroid Build Coastguard Worker//                               const int max_width, const int max_height);
1264*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_h_8bpc_neon, export=1
1265*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
1266*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #24]
1267*c0909341SAndroid Build Coastguard Worker        movrel          r8,  X(sm_weights)
1268*c0909341SAndroid Build Coastguard Worker        add             r8,  r8,  r3
1269*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1270*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_smooth_h_tbl)
1271*c0909341SAndroid Build Coastguard Worker        add             r12, r2,  r3
1272*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
1273*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r5, lr, lsl #2]
1274*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4[]},  [r12] // right
1275*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr
1276*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1277*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1278*c0909341SAndroid Build Coastguard Worker        bx              r5
1279*c0909341SAndroid Build Coastguard Worker
1280*c0909341SAndroid Build Coastguard Worker        .align 2
1281*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_h_tbl):
1282*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1283*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1284*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1285*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1286*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1287*c0909341SAndroid Build Coastguard Worker
1288*c0909341SAndroid Build Coastguard Worker40:
1289*c0909341SAndroid Build Coastguard Worker        vld1.32         {d6[]}, [r8, :32] // weights_hor
1290*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
1291*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-4
1292*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q3,  d6       // weights_hor
1293*c0909341SAndroid Build Coastguard Worker4:
1294*c0909341SAndroid Build Coastguard Worker        vld4.8          {d0[], d1[], d2[], d3[]},  [r2, :32], r7 // left
1295*c0909341SAndroid Build Coastguard Worker        vshll.i8        q8,  d4,  #8  // right*256
1296*c0909341SAndroid Build Coastguard Worker        vshll.i8        q9,  d4,  #8
1297*c0909341SAndroid Build Coastguard Worker        vzip.32         d3,  d2       // left, flipped
1298*c0909341SAndroid Build Coastguard Worker        vzip.32         d1,  d0
1299*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q1,  d3,  d4  // left-right
1300*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q0,  d1,  d4
1301*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1302*c0909341SAndroid Build Coastguard Worker        vmla.i16        q8,  q1,  q3  // right*256  + (left-right)*weights_hor
1303*c0909341SAndroid Build Coastguard Worker        vmla.i16        q9,  q0,  q3
1304*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d16, q8,  #8
1305*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d17, q9,  #8
1306*c0909341SAndroid Build Coastguard Worker        vst1.32         {d16[0]}, [r0, :32], r1
1307*c0909341SAndroid Build Coastguard Worker        vst1.32         {d16[1]}, [r6, :32], r1
1308*c0909341SAndroid Build Coastguard Worker        vst1.32         {d17[0]}, [r0, :32], r1
1309*c0909341SAndroid Build Coastguard Worker        vst1.32         {d17[1]}, [r6, :32], r1
1310*c0909341SAndroid Build Coastguard Worker        bgt             4b
1311*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1312*c0909341SAndroid Build Coastguard Worker80:
1313*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6}, [r8, :64] // weights_hor
1314*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
1315*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-4
1316*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q3,  d6       // weights_hor
1317*c0909341SAndroid Build Coastguard Worker8:
1318*c0909341SAndroid Build Coastguard Worker        vld4.8          {d16[], d18[], d20[], d22[]},  [r2, :32], r7 // left
1319*c0909341SAndroid Build Coastguard Worker        vshll.i8        q12, d4,  #8  // right*256
1320*c0909341SAndroid Build Coastguard Worker        vshll.i8        q13, d4,  #8
1321*c0909341SAndroid Build Coastguard Worker        vshll.i8        q14, d4,  #8
1322*c0909341SAndroid Build Coastguard Worker        vshll.i8        q15, d4,  #8
1323*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q11, d22, d4  // left-right
1324*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q10, d20, d4
1325*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q9,  d18, d4
1326*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q8,  d16, d4
1327*c0909341SAndroid Build Coastguard Worker        vmla.i16        q12, q11, q3  // right*256  + (left-right)*weights_hor
1328*c0909341SAndroid Build Coastguard Worker        vmla.i16        q13, q10, q3  // (left flipped)
1329*c0909341SAndroid Build Coastguard Worker        vmla.i16        q14, q9,  q3
1330*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q8,  q3
1331*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d24, q12, #8
1332*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d25, q13, #8
1333*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d26, q14, #8
1334*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d27, q15, #8
1335*c0909341SAndroid Build Coastguard Worker        vst1.8          {d24}, [r0, :64], r1
1336*c0909341SAndroid Build Coastguard Worker        vst1.8          {d25}, [r6, :64], r1
1337*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1338*c0909341SAndroid Build Coastguard Worker        vst1.8          {d26}, [r0, :64], r1
1339*c0909341SAndroid Build Coastguard Worker        vst1.8          {d27}, [r6, :64], r1
1340*c0909341SAndroid Build Coastguard Worker        bgt             8b
1341*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1342*c0909341SAndroid Build Coastguard Worker160:
1343*c0909341SAndroid Build Coastguard Worker320:
1344*c0909341SAndroid Build Coastguard Worker640:
1345*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1346*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
1347*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-4
1348*c0909341SAndroid Build Coastguard Worker        // Set up pointers for four rows in parallel; r0, r6, r5, lr
1349*c0909341SAndroid Build Coastguard Worker        add             r5,  r0,  r1
1350*c0909341SAndroid Build Coastguard Worker        add             lr,  r6,  r1
1351*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1352*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3
1353*c0909341SAndroid Build Coastguard Worker        mov             r12, r3
1354*c0909341SAndroid Build Coastguard Worker
1355*c0909341SAndroid Build Coastguard Worker1:
1356*c0909341SAndroid Build Coastguard Worker        vld4.8          {d8[], d10[], d12[], d14[]},  [r2, :32], r7 // left
1357*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q4,  d8,  d4  // left-right
1358*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q5,  d10, d4
1359*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q6,  d12, d4
1360*c0909341SAndroid Build Coastguard Worker        vsubl.u8        q7,  d14, d4
1361*c0909341SAndroid Build Coastguard Worker2:
1362*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r8, :128]! // weights_hor
1363*c0909341SAndroid Build Coastguard Worker        vshll.i8        q8,  d4,  #8  // right*256
1364*c0909341SAndroid Build Coastguard Worker        vshll.i8        q9,  d4,  #8
1365*c0909341SAndroid Build Coastguard Worker        vshll.i8        q10, d4,  #8
1366*c0909341SAndroid Build Coastguard Worker        vshll.i8        q11, d4,  #8
1367*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d2       // weights_hor
1368*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d3
1369*c0909341SAndroid Build Coastguard Worker        vshll.i8        q12, d4,  #8
1370*c0909341SAndroid Build Coastguard Worker        vshll.i8        q13, d4,  #8
1371*c0909341SAndroid Build Coastguard Worker        vshll.i8        q14, d4,  #8
1372*c0909341SAndroid Build Coastguard Worker        vshll.i8        q15, d4,  #8
1373*c0909341SAndroid Build Coastguard Worker        vmla.i16        q8,  q7,  q0  // right*256  + (left-right)*weights_hor
1374*c0909341SAndroid Build Coastguard Worker        vmla.i16        q9,  q7,  q1  // (left flipped)
1375*c0909341SAndroid Build Coastguard Worker        vmla.i16        q10, q6,  q0
1376*c0909341SAndroid Build Coastguard Worker        vmla.i16        q11, q6,  q1
1377*c0909341SAndroid Build Coastguard Worker        vmla.i16        q12, q5,  q0
1378*c0909341SAndroid Build Coastguard Worker        vmla.i16        q13, q5,  q1
1379*c0909341SAndroid Build Coastguard Worker        vmla.i16        q14, q4,  q0
1380*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q4,  q1
1381*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d16, q8,  #8
1382*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d17, q9,  #8
1383*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d18, q10, #8
1384*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d19, q11, #8
1385*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d20, q12, #8
1386*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d21, q13, #8
1387*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d22, q14, #8
1388*c0909341SAndroid Build Coastguard Worker        vrshrn.i16      d23, q15, #8
1389*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
1390*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8},  [r0, :128]!
1391*c0909341SAndroid Build Coastguard Worker        vst1.8          {q9},  [r6, :128]!
1392*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10}, [r5, :128]!
1393*c0909341SAndroid Build Coastguard Worker        vst1.8          {q11}, [lr, :128]!
1394*c0909341SAndroid Build Coastguard Worker        bgt             2b
1395*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1396*c0909341SAndroid Build Coastguard Worker        ble             9f
1397*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  r12
1398*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
1399*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
1400*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r1
1401*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r1
1402*c0909341SAndroid Build Coastguard Worker        mov             r3,  r12
1403*c0909341SAndroid Build Coastguard Worker        b               1b
1404*c0909341SAndroid Build Coastguard Worker9:
1405*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1406*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1407*c0909341SAndroid Build Coastguard Workerendfunc
1408*c0909341SAndroid Build Coastguard Worker
1409*c0909341SAndroid Build Coastguard Worker// void ipred_filter_8bpc_neon(pixel *dst, const ptrdiff_t stride,
1410*c0909341SAndroid Build Coastguard Worker//                             const pixel *const topleft,
1411*c0909341SAndroid Build Coastguard Worker//                             const int width, const int height, const int filt_idx,
1412*c0909341SAndroid Build Coastguard Worker//                             const int max_width, const int max_height);
1413*c0909341SAndroid Build Coastguard Workerfunction ipred_filter_8bpc_neon, export=1
1414*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
1415*c0909341SAndroid Build Coastguard Worker        movw            r12, #511
1416*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
1417*c0909341SAndroid Build Coastguard Worker        and             r5,  r5,  r12 // 511
1418*c0909341SAndroid Build Coastguard Worker        movrel          r6,  X(filter_intra_taps)
1419*c0909341SAndroid Build Coastguard Worker        lsl             r5,  r5,  #6
1420*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r5
1421*c0909341SAndroid Build Coastguard Worker        vld1.8          {d20, d21, d22, d23}, [r6, :128]!
1422*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1423*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_filter_tbl)
1424*c0909341SAndroid Build Coastguard Worker        vld1.8          {d27, d28, d29}, [r6, :64]
1425*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #26
1426*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r5, lr, lsl #2]
1427*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q8,  d20
1428*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q9,  d21
1429*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr
1430*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q10, d22
1431*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q11, d23
1432*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1433*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1434*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q12, d27
1435*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q13, d28
1436*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q14, d29
1437*c0909341SAndroid Build Coastguard Worker        add             r8,  r2,  #1
1438*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #2
1439*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-2
1440*c0909341SAndroid Build Coastguard Worker        bx              r5
1441*c0909341SAndroid Build Coastguard Worker
1442*c0909341SAndroid Build Coastguard Worker        .align 2
1443*c0909341SAndroid Build Coastguard WorkerL(ipred_filter_tbl):
1444*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_filter_tbl) + CONFIG_THUMB
1445*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_filter_tbl) + CONFIG_THUMB
1446*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_filter_tbl) + CONFIG_THUMB
1447*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_filter_tbl) + CONFIG_THUMB
1448*c0909341SAndroid Build Coastguard Worker
1449*c0909341SAndroid Build Coastguard Worker40:
1450*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [r8]     // top (0-3)
1451*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0          // top (0-3)
1452*c0909341SAndroid Build Coastguard Worker4:
1453*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[]}, [r2], r7 // left (0-1) + topleft (2)
1454*c0909341SAndroid Build Coastguard Worker        vmul.i16        q2,  q9,  d0[0]  // p1(top[0]) * filter(1)
1455*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q10, d0[1]  // p2(top[1]) * filter(2)
1456*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q11, d0[2]  // p3(top[2]) * filter(3)
1457*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d2          // left (0-1) + topleft (2)
1458*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q12, d0[3]  // p4(top[3]) * filter(4)
1459*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q8,  d2[2]  // p0(topleft) * filter(0)
1460*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q13, d2[1]  // p5(left[0]) * filter(5)
1461*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q14, d2[0]  // p6(left[1]) * filter(6)
1462*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d4,  q2,  #4
1463*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1464*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[0]}, [r0, :32], r1
1465*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d4
1466*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[1]}, [r6, :32], r1
1467*c0909341SAndroid Build Coastguard Worker        vmov            d0,  d1          // move top from [4-7] to [0-3]
1468*c0909341SAndroid Build Coastguard Worker        bgt             4b
1469*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1470*c0909341SAndroid Build Coastguard Worker80:
1471*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0},  [r8]      // top (0-7)
1472*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0          // top (0-7)
1473*c0909341SAndroid Build Coastguard Worker8:
1474*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[]}, [r2], r7 // left (0-1) + topleft (2)
1475*c0909341SAndroid Build Coastguard Worker        vmul.i16        q2,  q9,  d0[0]  // p1(top[0]) * filter(1)
1476*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q10, d0[1]  // p2(top[1]) * filter(2)
1477*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q11, d0[2]  // p3(top[2]) * filter(3)
1478*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d2          // left (0-1) + topleft (2)
1479*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q12, d0[3]  // p4(top[3]) * filter(4)
1480*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q8,  d2[2]  // p0(topleft) * filter(0)
1481*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q13, d2[1]  // p5(left[0]) * filter(5)
1482*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q14, d2[0]  // p6(left[1]) * filter(6)
1483*c0909341SAndroid Build Coastguard Worker        vmul.i16        q3,  q9,  d1[0]  // p1(top[0]) * filter(1)
1484*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q10, d1[1]  // p2(top[1]) * filter(2)
1485*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q11, d1[2]  // p3(top[2]) * filter(3)
1486*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d4,  q2,  #4
1487*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d4          // first block, in 16 bit
1488*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q12, d1[3]  // p4(top[3]) * filter(4)
1489*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q8,  d0[3]  // p0(topleft) * filter(0)
1490*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q13, d2[3]  // p5(left[0]) * filter(5)
1491*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q14, d3[3]  // p6(left[1]) * filter(6)
1492*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d5,  q3,  #4
1493*c0909341SAndroid Build Coastguard Worker        vzip.32         d4,  d5
1494*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1495*c0909341SAndroid Build Coastguard Worker        vst1.8          {d4}, [r0, :64], r1
1496*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d5
1497*c0909341SAndroid Build Coastguard Worker        vst1.8          {d5}, [r6, :64], r1
1498*c0909341SAndroid Build Coastguard Worker        bgt             8b
1499*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1500*c0909341SAndroid Build Coastguard Worker160:
1501*c0909341SAndroid Build Coastguard Worker320:
1502*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q5}
1503*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3
1504*c0909341SAndroid Build Coastguard Worker        mov             lr,  r3
1505*c0909341SAndroid Build Coastguard Worker
1506*c0909341SAndroid Build Coastguard Worker1:
1507*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [r2], r7 // left (0-1) + topleft (2)
1508*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d0          // left (0-1) + topleft (2)
1509*c0909341SAndroid Build Coastguard Worker2:
1510*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2}, [r8]!      // top(0-15)
1511*c0909341SAndroid Build Coastguard Worker        vmul.i16        q3,  q8,  d0[2]  // p0(topleft) * filter(0)
1512*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q13, d0[1]  // p5(left[0]) * filter(5)
1513*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q1,  d4          // top(0-7)
1514*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q2,  d5          // top(8-15)
1515*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q14, d0[0]  // p6(left[1]) * filter(6)
1516*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q9,  d2[0]  // p1(top[0]) * filter(1)
1517*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q10, d2[1]  // p2(top[1]) * filter(2)
1518*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q11, d2[2]  // p3(top[2]) * filter(3)
1519*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q12, d2[3]  // p4(top[3]) * filter(4)
1520*c0909341SAndroid Build Coastguard Worker
1521*c0909341SAndroid Build Coastguard Worker        vmul.i16        q4,  q9,  d3[0]  // p1(top[0]) * filter(1)
1522*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q10, d3[1]  // p2(top[1]) * filter(2)
1523*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q11, d3[2]  // p3(top[2]) * filter(3)
1524*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d6,  q3,  #4
1525*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d6          // first block, in 16 bit
1526*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q12, d3[3]  // p4(top[3]) * filter(4)
1527*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q8,  d2[3]  // p0(topleft) * filter(0)
1528*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q13, d0[3]  // p5(left[0]) * filter(5)
1529*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q14, d1[3]  // p6(left[1]) * filter(6)
1530*c0909341SAndroid Build Coastguard Worker
1531*c0909341SAndroid Build Coastguard Worker        vmul.i16        q5,  q9,  d4[0]  // p1(top[0]) * filter(1)
1532*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q10, d4[1]  // p2(top[1]) * filter(2)
1533*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q11, d4[2]  // p3(top[2]) * filter(3)
1534*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d7,  q4,  #4
1535*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d7          // second block, in 16 bit
1536*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q12, d4[3]  // p4(top[3]) * filter(4)
1537*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q8,  d3[3]  // p0(topleft) * filter(0)
1538*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q13, d0[3]  // p5(left[0]) * filter(5)
1539*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q14, d1[3]  // p6(left[1]) * filter(6)
1540*c0909341SAndroid Build Coastguard Worker
1541*c0909341SAndroid Build Coastguard Worker        vmul.i16        q15, q9,  d5[0]  // p1(top[0]) * filter(1)
1542*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q10, d5[1]  // p2(top[1]) * filter(2)
1543*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q11, d5[2]  // p3(top[2]) * filter(3)
1544*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d8,  q5,  #4
1545*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q0,  d8          // third block, in 16 bit
1546*c0909341SAndroid Build Coastguard Worker        vmov.u8         r12, d5[6]
1547*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q12, d5[3]  // p4(top[3]) * filter(4)
1548*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q8,  d4[3]  // p0(topleft) * filter(0)
1549*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q13, d0[3]  // p5(left[0]) * filter(5)
1550*c0909341SAndroid Build Coastguard Worker        vmla.i16        q15, q14, d1[3]  // p6(left[1]) * filter(6)
1551*c0909341SAndroid Build Coastguard Worker        vmov.8          d0[4], r12
1552*c0909341SAndroid Build Coastguard Worker
1553*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
1554*c0909341SAndroid Build Coastguard Worker        vqrshrun.s16    d9,  q15, #4
1555*c0909341SAndroid Build Coastguard Worker
1556*c0909341SAndroid Build Coastguard Worker        vst4.32         {d6[0], d7[0], d8[0], d9[0]}, [r0, :128]!
1557*c0909341SAndroid Build Coastguard Worker        vst4.32         {d6[1], d7[1], d8[1], d9[1]}, [r6, :128]!
1558*c0909341SAndroid Build Coastguard Worker        ble             8f
1559*c0909341SAndroid Build Coastguard Worker        vmov.u8         r12, d9[7]
1560*c0909341SAndroid Build Coastguard Worker        vmov.8          d0[0], r12
1561*c0909341SAndroid Build Coastguard Worker        vmov.u8         r12, d9[3]
1562*c0909341SAndroid Build Coastguard Worker        vmov.8          d0[2], r12
1563*c0909341SAndroid Build Coastguard Worker        b               2b
1564*c0909341SAndroid Build Coastguard Worker8:
1565*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1566*c0909341SAndroid Build Coastguard Worker
1567*c0909341SAndroid Build Coastguard Worker        ble             9f
1568*c0909341SAndroid Build Coastguard Worker        sub             r8,  r6,  lr
1569*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
1570*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
1571*c0909341SAndroid Build Coastguard Worker        mov             r3,  lr
1572*c0909341SAndroid Build Coastguard Worker        b               1b
1573*c0909341SAndroid Build Coastguard Worker9:
1574*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q5}
1575*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1576*c0909341SAndroid Build Coastguard Workerendfunc
1577*c0909341SAndroid Build Coastguard Worker
1578*c0909341SAndroid Build Coastguard Worker// void pal_pred_8bpc_neon(pixel *dst, const ptrdiff_t stride,
1579*c0909341SAndroid Build Coastguard Worker//                         const pixel *const pal, const uint8_t *idx,
1580*c0909341SAndroid Build Coastguard Worker//                         const int w, const int h);
1581*c0909341SAndroid Build Coastguard Workerfunction pal_pred_8bpc_neon, export=1
1582*c0909341SAndroid Build Coastguard Worker        push            {r4-r5, lr}
1583*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #12]
1584*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r2, :64]
1585*c0909341SAndroid Build Coastguard Worker        clz             lr,  r4
1586*c0909341SAndroid Build Coastguard Worker        adr             r12, L(pal_pred_tbl)
1587*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
1588*c0909341SAndroid Build Coastguard Worker        vmov.i8         q15, #7
1589*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
1590*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
1591*c0909341SAndroid Build Coastguard Worker        add             r2,  r0,  r1
1592*c0909341SAndroid Build Coastguard Worker        bx              r12
1593*c0909341SAndroid Build Coastguard Worker
1594*c0909341SAndroid Build Coastguard Worker        .align 2
1595*c0909341SAndroid Build Coastguard WorkerL(pal_pred_tbl):
1596*c0909341SAndroid Build Coastguard Worker        .word 640f - L(pal_pred_tbl) + CONFIG_THUMB
1597*c0909341SAndroid Build Coastguard Worker        .word 320f - L(pal_pred_tbl) + CONFIG_THUMB
1598*c0909341SAndroid Build Coastguard Worker        .word 160f - L(pal_pred_tbl) + CONFIG_THUMB
1599*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(pal_pred_tbl) + CONFIG_THUMB
1600*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(pal_pred_tbl) + CONFIG_THUMB
1601*c0909341SAndroid Build Coastguard Worker
1602*c0909341SAndroid Build Coastguard Worker40:
1603*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1604*c0909341SAndroid Build Coastguard Worker4:
1605*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2}, [r3, :64]!
1606*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #4
1607*c0909341SAndroid Build Coastguard Worker        vshr.u8         d3,  d2,  #4
1608*c0909341SAndroid Build Coastguard Worker        vand.u8         d2,  d2,  d30
1609*c0909341SAndroid Build Coastguard Worker        vzip.8          d2,  d3
1610*c0909341SAndroid Build Coastguard Worker        vtbl.8          d2, {d0}, d2
1611*c0909341SAndroid Build Coastguard Worker        vtbl.8          d3, {d0}, d3
1612*c0909341SAndroid Build Coastguard Worker        vst1.32         {d2[0]}, [r0, :32], r1
1613*c0909341SAndroid Build Coastguard Worker        vst1.32         {d2[1]}, [r2, :32], r1
1614*c0909341SAndroid Build Coastguard Worker        vst1.32         {d3[0]}, [r0, :32], r1
1615*c0909341SAndroid Build Coastguard Worker        vst1.32         {d3[1]}, [r2, :32], r1
1616*c0909341SAndroid Build Coastguard Worker        bgt             4b
1617*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1618*c0909341SAndroid Build Coastguard Worker80:
1619*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1620*c0909341SAndroid Build Coastguard Worker8:
1621*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r3, :64]!
1622*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #4
1623*c0909341SAndroid Build Coastguard Worker        vshr.u8         q2,  q1,  #4
1624*c0909341SAndroid Build Coastguard Worker        vand.u8         q1,  q1,  q15
1625*c0909341SAndroid Build Coastguard Worker        vzip.8          q1,  q2
1626*c0909341SAndroid Build Coastguard Worker        vtbl.8          d2, {d0}, d2
1627*c0909341SAndroid Build Coastguard Worker        vtbl.8          d3, {d0}, d3
1628*c0909341SAndroid Build Coastguard Worker        vst1.8          {d2}, [r0, :64], r1
1629*c0909341SAndroid Build Coastguard Worker        vtbl.8          d4, {d0}, d4
1630*c0909341SAndroid Build Coastguard Worker        vst1.8          {d3}, [r2, :64], r1
1631*c0909341SAndroid Build Coastguard Worker        vtbl.8          d5, {d0}, d5
1632*c0909341SAndroid Build Coastguard Worker        vst1.8          {d4}, [r0, :64], r1
1633*c0909341SAndroid Build Coastguard Worker        vst1.8          {d5}, [r2, :64], r1
1634*c0909341SAndroid Build Coastguard Worker        bgt             8b
1635*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1636*c0909341SAndroid Build Coastguard Worker160:
1637*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1638*c0909341SAndroid Build Coastguard Worker16:
1639*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10, q11}, [r3, :64]!
1640*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #4
1641*c0909341SAndroid Build Coastguard Worker        vand.u8         q8,  q10, q15
1642*c0909341SAndroid Build Coastguard Worker        vshr.u8         q9,  q10, #4
1643*c0909341SAndroid Build Coastguard Worker        vand.u8         q10, q11, q15
1644*c0909341SAndroid Build Coastguard Worker        vshr.u8         q11, q11, #4
1645*c0909341SAndroid Build Coastguard Worker        vzip.8          q8,  q9
1646*c0909341SAndroid Build Coastguard Worker        vzip.8          q10, q11
1647*c0909341SAndroid Build Coastguard Worker        vtbl.8          d16, {d0}, d16
1648*c0909341SAndroid Build Coastguard Worker        vtbl.8          d17, {d0}, d17
1649*c0909341SAndroid Build Coastguard Worker        vtbl.8          d18, {d0}, d18
1650*c0909341SAndroid Build Coastguard Worker        vtbl.8          d19, {d0}, d19
1651*c0909341SAndroid Build Coastguard Worker        vtbl.8          d20, {d0}, d20
1652*c0909341SAndroid Build Coastguard Worker        vtbl.8          d21, {d0}, d21
1653*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8},  [r0, :128], r1
1654*c0909341SAndroid Build Coastguard Worker        vtbl.8          d22, {d0}, d22
1655*c0909341SAndroid Build Coastguard Worker        vst1.8          {q9},  [r2, :128], r1
1656*c0909341SAndroid Build Coastguard Worker        vtbl.8          d23, {d0}, d23
1657*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10}, [r0, :128], r1
1658*c0909341SAndroid Build Coastguard Worker        vst1.8          {q11}, [r2, :128], r1
1659*c0909341SAndroid Build Coastguard Worker        bgt             16b
1660*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1661*c0909341SAndroid Build Coastguard Worker320:
1662*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1663*c0909341SAndroid Build Coastguard Worker32:
1664*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10, q11}, [r3, :64]!
1665*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
1666*c0909341SAndroid Build Coastguard Worker        vand.u8         q8,  q10, q15
1667*c0909341SAndroid Build Coastguard Worker        vshr.u8         q9,  q10, #4
1668*c0909341SAndroid Build Coastguard Worker        vand.u8         q10, q11, q15
1669*c0909341SAndroid Build Coastguard Worker        vshr.u8         q11, q11, #4
1670*c0909341SAndroid Build Coastguard Worker        vzip.8          q8,  q9
1671*c0909341SAndroid Build Coastguard Worker        vzip.8          q10, q11
1672*c0909341SAndroid Build Coastguard Worker        vtbl.8          d16, {d0}, d16
1673*c0909341SAndroid Build Coastguard Worker        vtbl.8          d17, {d0}, d17
1674*c0909341SAndroid Build Coastguard Worker        vtbl.8          d18, {d0}, d18
1675*c0909341SAndroid Build Coastguard Worker        vtbl.8          d19, {d0}, d19
1676*c0909341SAndroid Build Coastguard Worker        vtbl.8          d20, {d0}, d20
1677*c0909341SAndroid Build Coastguard Worker        vtbl.8          d21, {d0}, d21
1678*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8,  q9},  [r0, :128], r1
1679*c0909341SAndroid Build Coastguard Worker        vtbl.8          d22, {d0}, d22
1680*c0909341SAndroid Build Coastguard Worker        vtbl.8          d23, {d0}, d23
1681*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10, q11}, [r2, :128], r1
1682*c0909341SAndroid Build Coastguard Worker        bgt             32b
1683*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1684*c0909341SAndroid Build Coastguard Worker640:
1685*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
1686*c0909341SAndroid Build Coastguard Worker64:
1687*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10, q11}, [r3, :64]!
1688*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #1
1689*c0909341SAndroid Build Coastguard Worker        vand.u8         q8,  q10, q15
1690*c0909341SAndroid Build Coastguard Worker        vshr.u8         q9,  q10, #4
1691*c0909341SAndroid Build Coastguard Worker        vand.u8         q10, q11, q15
1692*c0909341SAndroid Build Coastguard Worker        vshr.u8         q11, q11, #4
1693*c0909341SAndroid Build Coastguard Worker        vzip.8          q8,  q9
1694*c0909341SAndroid Build Coastguard Worker        vzip.8          q10, q11
1695*c0909341SAndroid Build Coastguard Worker        vtbl.8          d16, {d0}, d16
1696*c0909341SAndroid Build Coastguard Worker        vtbl.8          d17, {d0}, d17
1697*c0909341SAndroid Build Coastguard Worker        vtbl.8          d18, {d0}, d18
1698*c0909341SAndroid Build Coastguard Worker        vtbl.8          d19, {d0}, d19
1699*c0909341SAndroid Build Coastguard Worker        vtbl.8          d20, {d0}, d20
1700*c0909341SAndroid Build Coastguard Worker        vtbl.8          d21, {d0}, d21
1701*c0909341SAndroid Build Coastguard Worker        vst1.8          {q8,  q9},  [r0, :128]!
1702*c0909341SAndroid Build Coastguard Worker        vtbl.8          d22, {d0}, d22
1703*c0909341SAndroid Build Coastguard Worker        vtbl.8          d23, {d0}, d23
1704*c0909341SAndroid Build Coastguard Worker        vst1.8          {q10, q11}, [r0, :128], r1
1705*c0909341SAndroid Build Coastguard Worker        bgt             64b
1706*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1707*c0909341SAndroid Build Coastguard Workerendfunc
1708*c0909341SAndroid Build Coastguard Worker
1709*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_128_8bpc_neon(pixel *dst, const ptrdiff_t stride,
1710*c0909341SAndroid Build Coastguard Worker//                              const pixel *const topleft,
1711*c0909341SAndroid Build Coastguard Worker//                              const int width, const int height,
1712*c0909341SAndroid Build Coastguard Worker//                              const int16_t *ac, const int alpha);
1713*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_128_8bpc_neon, export=1
1714*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
1715*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
1716*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
1717*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1718*c0909341SAndroid Build Coastguard Worker        adr             r12, L(ipred_cfl_128_tbl)
1719*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #26
1720*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
1721*c0909341SAndroid Build Coastguard Worker        vmov.i16        q0,  #128     // dc
1722*c0909341SAndroid Build Coastguard Worker        vdup.i16        q1,  r6       // alpha
1723*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
1724*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1725*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1726*c0909341SAndroid Build Coastguard Worker        bx              r12
1727*c0909341SAndroid Build Coastguard Worker
1728*c0909341SAndroid Build Coastguard Worker        .align 2
1729*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_128_tbl):
1730*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_tbl):
1731*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_splat_w16) - L(ipred_cfl_128_tbl) + CONFIG_THUMB
1732*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_splat_w16) - L(ipred_cfl_128_tbl) + CONFIG_THUMB
1733*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_splat_w8)  - L(ipred_cfl_128_tbl) + CONFIG_THUMB
1734*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_splat_w4)  - L(ipred_cfl_128_tbl) + CONFIG_THUMB
1735*c0909341SAndroid Build Coastguard Worker
1736*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w4):
1737*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r5, :128]!
1738*c0909341SAndroid Build Coastguard Worker        vmul.i16        q2,  q2,  q1  // diff = ac * alpha
1739*c0909341SAndroid Build Coastguard Worker        vmul.i16        q3,  q3,  q1
1740*c0909341SAndroid Build Coastguard Worker        vshr.s16        q8,  q2,  #15 // sign = diff >> 15
1741*c0909341SAndroid Build Coastguard Worker        vshr.s16        q9,  q3,  #15
1742*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q8  // diff + sign
1743*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q9
1744*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #6  // (diff + sign + 32) >> 6 = apply_sign()
1745*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q3,  #6
1746*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q0  // dc + apply_sign()
1747*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q0
1748*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d4,  q2       // iclip_pixel(dc + apply_sign())
1749*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d5,  q3
1750*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[0]}, [r0, :32], r1
1751*c0909341SAndroid Build Coastguard Worker        vst1.32         {d4[1]}, [r6, :32], r1
1752*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1753*c0909341SAndroid Build Coastguard Worker        vst1.32         {d5[0]}, [r0, :32], r1
1754*c0909341SAndroid Build Coastguard Worker        vst1.32         {d5[1]}, [r6, :32], r1
1755*c0909341SAndroid Build Coastguard Worker        bgt             L(ipred_cfl_splat_w4)
1756*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1757*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w8):
1758*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8, q9},   [r5, :128]!
1759*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r5, :128]!
1760*c0909341SAndroid Build Coastguard Worker        vmul.i16        q8,  q8,  q1  // diff = ac * alpha
1761*c0909341SAndroid Build Coastguard Worker        vmul.i16        q9,  q9,  q1
1762*c0909341SAndroid Build Coastguard Worker        vmul.i16        q10, q10, q1
1763*c0909341SAndroid Build Coastguard Worker        vmul.i16        q11, q11, q1
1764*c0909341SAndroid Build Coastguard Worker        vshr.s16        q12, q8,  #15 // sign = diff >> 15
1765*c0909341SAndroid Build Coastguard Worker        vshr.s16        q13, q9,  #15
1766*c0909341SAndroid Build Coastguard Worker        vshr.s16        q14, q10, #15
1767*c0909341SAndroid Build Coastguard Worker        vshr.s16        q15, q11, #15
1768*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q12 // diff + sign
1769*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q13
1770*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q14
1771*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q15
1772*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q8,  q8,  #6  // (diff + sign + 32) >> 6 = apply_sign()
1773*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q9,  q9,  #6
1774*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q10, q10, #6
1775*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q11, q11, #6
1776*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0  // dc + apply_sign()
1777*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q0
1778*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q0
1779*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q0
1780*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d16, q8       // iclip_pixel(dc + apply_sign())
1781*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d17, q9
1782*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d18, q10
1783*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d19, q11
1784*c0909341SAndroid Build Coastguard Worker        vst1.8          {d16}, [r0, :64], r1
1785*c0909341SAndroid Build Coastguard Worker        vst1.8          {d17}, [r6, :64], r1
1786*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1787*c0909341SAndroid Build Coastguard Worker        vst1.8          {d18}, [r0, :64], r1
1788*c0909341SAndroid Build Coastguard Worker        vst1.8          {d19}, [r6, :64], r1
1789*c0909341SAndroid Build Coastguard Worker        bgt             L(ipred_cfl_splat_w8)
1790*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1791*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w16):
1792*c0909341SAndroid Build Coastguard Worker        add             r12, r5,  r3, lsl #1
1793*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3
1794*c0909341SAndroid Build Coastguard Worker        mov             lr,  r3
1795*c0909341SAndroid Build Coastguard Worker1:
1796*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8, q9},   [r5, :128]!
1797*c0909341SAndroid Build Coastguard Worker        vmul.i16        q8,  q8,  q1  // diff = ac * alpha
1798*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r12, :128]!
1799*c0909341SAndroid Build Coastguard Worker        vmul.i16        q9,  q9,  q1
1800*c0909341SAndroid Build Coastguard Worker        vmul.i16        q10, q10, q1
1801*c0909341SAndroid Build Coastguard Worker        vmul.i16        q11, q11, q1
1802*c0909341SAndroid Build Coastguard Worker        vshr.s16        q12, q8,  #15 // sign = diff >> 15
1803*c0909341SAndroid Build Coastguard Worker        vshr.s16        q13, q9,  #15
1804*c0909341SAndroid Build Coastguard Worker        vshr.s16        q14, q10, #15
1805*c0909341SAndroid Build Coastguard Worker        vshr.s16        q15, q11, #15
1806*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q12 // diff + sign
1807*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q13
1808*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q14
1809*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q15
1810*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q8,  q8,  #6  // (diff + sign + 32) >> 6 = apply_sign()
1811*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q9,  q9,  #6
1812*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q10, q10, #6
1813*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q11, q11, #6
1814*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0  // dc + apply_sign()
1815*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q0
1816*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q0
1817*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q0
1818*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d16, q8       // iclip_pixel(dc + apply_sign())
1819*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d17, q9
1820*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d18, q10
1821*c0909341SAndroid Build Coastguard Worker        vqmovun.s16     d19, q11
1822*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
1823*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8}, [r0, :128]!
1824*c0909341SAndroid Build Coastguard Worker        vst1.16         {q9}, [r6, :128]!
1825*c0909341SAndroid Build Coastguard Worker        bgt             1b
1826*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1827*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr, lsl #1
1828*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr, lsl #1
1829*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
1830*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
1831*c0909341SAndroid Build Coastguard Worker        mov             r3,  lr
1832*c0909341SAndroid Build Coastguard Worker        bgt             1b
1833*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1834*c0909341SAndroid Build Coastguard Workerendfunc
1835*c0909341SAndroid Build Coastguard Worker
1836*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_top_8bpc_neon(pixel *dst, const ptrdiff_t stride,
1837*c0909341SAndroid Build Coastguard Worker//                              const pixel *const topleft,
1838*c0909341SAndroid Build Coastguard Worker//                              const int width, const int height,
1839*c0909341SAndroid Build Coastguard Worker//                              const int16_t *ac, const int alpha);
1840*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_top_8bpc_neon, export=1
1841*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
1842*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
1843*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
1844*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1845*c0909341SAndroid Build Coastguard Worker        adr             r12, L(ipred_cfl_top_tbl)
1846*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #26
1847*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
1848*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  r6   // alpha
1849*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
1850*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
1851*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1852*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1853*c0909341SAndroid Build Coastguard Worker        bx              r12
1854*c0909341SAndroid Build Coastguard Worker
1855*c0909341SAndroid Build Coastguard Worker        .align 2
1856*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_top_tbl):
1857*c0909341SAndroid Build Coastguard Worker        .word 32f - L(ipred_cfl_top_tbl) + CONFIG_THUMB
1858*c0909341SAndroid Build Coastguard Worker        .word 16f - L(ipred_cfl_top_tbl) + CONFIG_THUMB
1859*c0909341SAndroid Build Coastguard Worker        .word 8f  - L(ipred_cfl_top_tbl) + CONFIG_THUMB
1860*c0909341SAndroid Build Coastguard Worker        .word 4f  - L(ipred_cfl_top_tbl) + CONFIG_THUMB
1861*c0909341SAndroid Build Coastguard Worker
1862*c0909341SAndroid Build Coastguard Worker4:
1863*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [r2]
1864*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
1865*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1866*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #2
1867*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
1868*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w4)
1869*c0909341SAndroid Build Coastguard Worker8:
1870*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r2]
1871*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
1872*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1873*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1874*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #3
1875*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
1876*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w8)
1877*c0909341SAndroid Build Coastguard Worker16:
1878*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r2]
1879*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
1880*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
1881*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1882*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1883*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #4
1884*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
1885*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w16)
1886*c0909341SAndroid Build Coastguard Worker32:
1887*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3}, [r2]
1888*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
1889*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q3,  d6,  d7
1890*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q2,  q3
1891*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
1892*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1893*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1894*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #5
1895*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
1896*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w16)
1897*c0909341SAndroid Build Coastguard Workerendfunc
1898*c0909341SAndroid Build Coastguard Worker
1899*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_left_8bpc_neon(pixel *dst, const ptrdiff_t stride,
1900*c0909341SAndroid Build Coastguard Worker//                               const pixel *const topleft,
1901*c0909341SAndroid Build Coastguard Worker//                               const int width, const int height,
1902*c0909341SAndroid Build Coastguard Worker//                               const int16_t *ac, const int alpha);
1903*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_left_8bpc_neon, export=1
1904*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
1905*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
1906*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
1907*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r4
1908*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1909*c0909341SAndroid Build Coastguard Worker        clz             r8,  r4
1910*c0909341SAndroid Build Coastguard Worker        adr             r12, L(ipred_cfl_splat_tbl)
1911*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_left_tbl)
1912*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #26
1913*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #26
1914*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
1915*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r7,  r8, lsl #2]
1916*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  r6   // alpha
1917*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
1918*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8
1919*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1920*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1921*c0909341SAndroid Build Coastguard Worker        bx              r7
1922*c0909341SAndroid Build Coastguard Worker
1923*c0909341SAndroid Build Coastguard Worker        .align 2
1924*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_tbl):
1925*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_left_h32) - L(ipred_cfl_left_tbl) + CONFIG_THUMB
1926*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_left_h16) - L(ipred_cfl_left_tbl) + CONFIG_THUMB
1927*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_left_h8)  - L(ipred_cfl_left_tbl) + CONFIG_THUMB
1928*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_left_h4)  - L(ipred_cfl_left_tbl) + CONFIG_THUMB
1929*c0909341SAndroid Build Coastguard Worker
1930*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h4):
1931*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [r2, :32]
1932*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
1933*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1934*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #2
1935*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
1936*c0909341SAndroid Build Coastguard Worker        bx              r12
1937*c0909341SAndroid Build Coastguard Worker
1938*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h8):
1939*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r2, :64]
1940*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
1941*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1942*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1943*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #3
1944*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
1945*c0909341SAndroid Build Coastguard Worker        bx              r12
1946*c0909341SAndroid Build Coastguard Worker
1947*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h16):
1948*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r2, :128]
1949*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
1950*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
1951*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1952*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1953*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #4
1954*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
1955*c0909341SAndroid Build Coastguard Worker        bx              r12
1956*c0909341SAndroid Build Coastguard Worker
1957*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h32):
1958*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3}, [r2, :128]
1959*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
1960*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q3,  d6,  d7
1961*c0909341SAndroid Build Coastguard Worker        vadd.u16        q0,  q2,  q3
1962*c0909341SAndroid Build Coastguard Worker        vadd.u16        d0,  d0,  d1
1963*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1964*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d0,  d0
1965*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #5
1966*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
1967*c0909341SAndroid Build Coastguard Worker        bx              r12
1968*c0909341SAndroid Build Coastguard Workerendfunc
1969*c0909341SAndroid Build Coastguard Worker
1970*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_8bpc_neon(pixel *dst, const ptrdiff_t stride,
1971*c0909341SAndroid Build Coastguard Worker//                          const pixel *const topleft,
1972*c0909341SAndroid Build Coastguard Worker//                          const int width, const int height,
1973*c0909341SAndroid Build Coastguard Worker//                          const int16_t *ac, const int alpha);
1974*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_8bpc_neon, export=1
1975*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
1976*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
1977*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
1978*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r4
1979*c0909341SAndroid Build Coastguard Worker        add             r8,  r3,  r4  // width + height
1980*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  r6       // alpha
1981*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1982*c0909341SAndroid Build Coastguard Worker        clz             r6,  r4
1983*c0909341SAndroid Build Coastguard Worker        vdup.16         d16, r8       // width + height
1984*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_tbl)
1985*c0909341SAndroid Build Coastguard Worker        rbit            r8,  r8       // rbit(width + height)
1986*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #22 // 26 leading bits, minus table offset 4
1987*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #26
1988*c0909341SAndroid Build Coastguard Worker        clz             r8,  r8       // ctz(width + height)
1989*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r7, lr, lsl #2]
1990*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [r7, r6, lsl #2]
1991*c0909341SAndroid Build Coastguard Worker        neg             r8,  r8       // -ctz(width + height)
1992*c0909341SAndroid Build Coastguard Worker        add             r12, r7,  lr
1993*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r6
1994*c0909341SAndroid Build Coastguard Worker        vshr.u16        d16, d16, #1  // (width + height) >> 1
1995*c0909341SAndroid Build Coastguard Worker        vdup.16         d17, r8       // -ctz(width + height)
1996*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1997*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1998*c0909341SAndroid Build Coastguard Worker        bx              r7
1999*c0909341SAndroid Build Coastguard Worker
2000*c0909341SAndroid Build Coastguard Worker        .align 2
2001*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_tbl):
2002*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_h32) - L(ipred_cfl_tbl) + CONFIG_THUMB
2003*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_h16) - L(ipred_cfl_tbl) + CONFIG_THUMB
2004*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_h8)  - L(ipred_cfl_tbl) + CONFIG_THUMB
2005*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_h4)  - L(ipred_cfl_tbl) + CONFIG_THUMB
2006*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_w32) - L(ipred_cfl_tbl) + CONFIG_THUMB
2007*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_w16) - L(ipred_cfl_tbl) + CONFIG_THUMB
2008*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_w8)  - L(ipred_cfl_tbl) + CONFIG_THUMB
2009*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_w4)  - L(ipred_cfl_tbl) + CONFIG_THUMB
2010*c0909341SAndroid Build Coastguard Worker
2011*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h4):
2012*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]}, [r2, :32]!
2013*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
2014*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
2015*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0
2016*c0909341SAndroid Build Coastguard Worker        bx              r12
2017*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w4):
2018*c0909341SAndroid Build Coastguard Worker        vld1.32         {d1[]},  [r2]
2019*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d16
2020*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d1,  d1
2021*c0909341SAndroid Build Coastguard Worker        vpadd.u16       d1,  d1
2022*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #4
2023*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2024*c0909341SAndroid Build Coastguard Worker        vshl.u16        d0,  d0,  d17
2025*c0909341SAndroid Build Coastguard Worker        beq             1f
2026*c0909341SAndroid Build Coastguard Worker        // h = 8/16
2027*c0909341SAndroid Build Coastguard Worker        movw            lr,  #(0x3334/2)
2028*c0909341SAndroid Build Coastguard Worker        movw            r8,  #(0x5556/2)
2029*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #16
2030*c0909341SAndroid Build Coastguard Worker        it              ne
2031*c0909341SAndroid Build Coastguard Worker        movne           lr,  r8
2032*c0909341SAndroid Build Coastguard Worker        vdup.16         d18, lr
2033*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d0,  d0,  d18
2034*c0909341SAndroid Build Coastguard Worker1:
2035*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2036*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w4)
2037*c0909341SAndroid Build Coastguard Worker
2038*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h8):
2039*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r2, :64]!
2040*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d0,  d0
2041*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0
2042*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
2043*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0
2044*c0909341SAndroid Build Coastguard Worker        bx              r12
2045*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w8):
2046*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1}, [r2]
2047*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d16
2048*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d1,  d1
2049*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d1
2050*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d1
2051*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #8
2052*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2053*c0909341SAndroid Build Coastguard Worker        vshl.u16        d0,  d0,  d17
2054*c0909341SAndroid Build Coastguard Worker        beq             1f
2055*c0909341SAndroid Build Coastguard Worker        // h = 4/16/32
2056*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #32
2057*c0909341SAndroid Build Coastguard Worker        movw            lr,  #(0x3334/2)
2058*c0909341SAndroid Build Coastguard Worker        movw            r8,  #(0x5556/2)
2059*c0909341SAndroid Build Coastguard Worker        it              ne
2060*c0909341SAndroid Build Coastguard Worker        movne           lr,  r8
2061*c0909341SAndroid Build Coastguard Worker        vdup.16         d18, lr
2062*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d0,  d0,  d18
2063*c0909341SAndroid Build Coastguard Worker1:
2064*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2065*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w8)
2066*c0909341SAndroid Build Coastguard Worker
2067*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h16):
2068*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r2, :128]!
2069*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q0,  d0,  d1
2070*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2071*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0
2072*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
2073*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0
2074*c0909341SAndroid Build Coastguard Worker        bx              r12
2075*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w16):
2076*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2}, [r2]
2077*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d16
2078*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
2079*c0909341SAndroid Build Coastguard Worker        vadd.i16        d4,  d4,  d5
2080*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4
2081*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4
2082*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #16
2083*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d4
2084*c0909341SAndroid Build Coastguard Worker        vshl.u16        d0,  d0,  d17
2085*c0909341SAndroid Build Coastguard Worker        beq             1f
2086*c0909341SAndroid Build Coastguard Worker        // h = 4/8/32/64
2087*c0909341SAndroid Build Coastguard Worker        tst             r4,  #(32+16+8)  // 16 added to make a consecutive bitmask
2088*c0909341SAndroid Build Coastguard Worker        movw            lr,  #(0x3334/2)
2089*c0909341SAndroid Build Coastguard Worker        movw            r8,  #(0x5556/2)
2090*c0909341SAndroid Build Coastguard Worker        it              ne
2091*c0909341SAndroid Build Coastguard Worker        movne           lr,  r8
2092*c0909341SAndroid Build Coastguard Worker        vdup.16         d18, lr
2093*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d0,  d0,  d18
2094*c0909341SAndroid Build Coastguard Worker1:
2095*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2096*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w16)
2097*c0909341SAndroid Build Coastguard Worker
2098*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h32):
2099*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3}, [r2, :128]!
2100*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
2101*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q3,  d6,  d7
2102*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q2,  q3
2103*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2104*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0
2105*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #1
2106*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0
2107*c0909341SAndroid Build Coastguard Worker        bx              r12
2108*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w32):
2109*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3},  [r2]
2110*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d16
2111*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q2,  d4,  d5
2112*c0909341SAndroid Build Coastguard Worker        vaddl.u8        q3,  d6,  d7
2113*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
2114*c0909341SAndroid Build Coastguard Worker        vadd.i16        d4,  d4,  d5
2115*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4
2116*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4
2117*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #32
2118*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d4
2119*c0909341SAndroid Build Coastguard Worker        vshl.u16        d0,  d0,  d17
2120*c0909341SAndroid Build Coastguard Worker        beq             1f
2121*c0909341SAndroid Build Coastguard Worker        // h = 8/16/64
2122*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #8
2123*c0909341SAndroid Build Coastguard Worker        movw            lr,  #(0x3334/2)
2124*c0909341SAndroid Build Coastguard Worker        movw            r8,  #(0x5556/2)
2125*c0909341SAndroid Build Coastguard Worker        it              ne
2126*c0909341SAndroid Build Coastguard Worker        movne           lr,  r8
2127*c0909341SAndroid Build Coastguard Worker        vdup.16         d18, lr
2128*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d0,  d0,  d18
2129*c0909341SAndroid Build Coastguard Worker1:
2130*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2131*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w16)
2132*c0909341SAndroid Build Coastguard Workerendfunc
2133*c0909341SAndroid Build Coastguard Worker
2134*c0909341SAndroid Build Coastguard Worker// void cfl_ac_420_8bpc_neon(int16_t *const ac, const pixel *const ypx,
2135*c0909341SAndroid Build Coastguard Worker//                           const ptrdiff_t stride, const int w_pad,
2136*c0909341SAndroid Build Coastguard Worker//                           const int h_pad, const int cw, const int ch);
2137*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_420_8bpc_neon, export=1
2138*c0909341SAndroid Build Coastguard Worker        push            {r4-r8,lr}
2139*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
2140*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
2141*c0909341SAndroid Build Coastguard Worker        clz             r8,  r5
2142*c0909341SAndroid Build Coastguard Worker        lsl             r4,  r4,  #2
2143*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_420_tbl)
2144*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #27
2145*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r7, r8, lsl #2]
2146*c0909341SAndroid Build Coastguard Worker        vmov.i16        q8,  #0
2147*c0909341SAndroid Build Coastguard Worker        vmov.i16        q9,  #0
2148*c0909341SAndroid Build Coastguard Worker        vmov.i16        q10, #0
2149*c0909341SAndroid Build Coastguard Worker        vmov.i16        q11, #0
2150*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8
2151*c0909341SAndroid Build Coastguard Worker        sub             r8,  r6,  r4  // height - h_pad
2152*c0909341SAndroid Build Coastguard Worker        rbit            lr,  r5       // rbit(width)
2153*c0909341SAndroid Build Coastguard Worker        rbit            r12, r6       // rbit(height)
2154*c0909341SAndroid Build Coastguard Worker        clz             lr,  lr       // ctz(width)
2155*c0909341SAndroid Build Coastguard Worker        clz             r12, r12      // ctz(height)
2156*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r12 // log2sz
2157*c0909341SAndroid Build Coastguard Worker        add             r12, r1,  r2
2158*c0909341SAndroid Build Coastguard Worker        vdup.32         d31, lr
2159*c0909341SAndroid Build Coastguard Worker        lsl             r2,  r2,  #1
2160*c0909341SAndroid Build Coastguard Worker        vneg.s32        d31, d31      // -log2sz
2161*c0909341SAndroid Build Coastguard Worker        bx              r7
2162*c0909341SAndroid Build Coastguard Worker
2163*c0909341SAndroid Build Coastguard Worker        .align 2
2164*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_tbl):
2165*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16) - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB
2166*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w8)  - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB
2167*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w4)  - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB
2168*c0909341SAndroid Build Coastguard Worker
2169*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4):
2170*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input
2171*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r1,  :64], r2
2172*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2}, [r12, :64], r2
2173*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1}, [r1,  :64], r2
2174*c0909341SAndroid Build Coastguard Worker        vld1.8          {d3}, [r12, :64], r2
2175*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2176*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q1,  q1
2177*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
2178*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2179*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2180*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0}, [r0, :128]!
2181*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2182*c0909341SAndroid Build Coastguard Worker        bgt             1b
2183*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2184*c0909341SAndroid Build Coastguard Worker        vmov            d0,  d1
2185*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d1
2186*c0909341SAndroid Build Coastguard Worker        vmov            d3,  d1
2187*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_hpad):
2188*c0909341SAndroid Build Coastguard Worker        beq             3f // This assumes that all callers already did "cmp r4, #0"
2189*c0909341SAndroid Build Coastguard Worker2:      // Vertical padding (h_pad > 0)
2190*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
2191*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2192*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2193*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q1
2194*c0909341SAndroid Build Coastguard Worker        bgt             2b
2195*c0909341SAndroid Build Coastguard Worker3:
2196*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_calc_subtract_dc):
2197*c0909341SAndroid Build Coastguard Worker        // Aggregate the sums
2198*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q8,  q9
2199*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q10, q11
2200*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      q0,  q0
2201*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      q1,  q1
2202*c0909341SAndroid Build Coastguard Worker        vadd.i32        q0,  q1
2203*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d1
2204*c0909341SAndroid Build Coastguard Worker        vpadd.i32       d0,  d0,  d0  // sum
2205*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r6, lsl #3
2206*c0909341SAndroid Build Coastguard Worker        vrshl.u32       d16, d0,  d31 // (sum + (1 << (log2sz - 1))) >>= log2sz
2207*c0909341SAndroid Build Coastguard Worker        vdup.16         q8,  d16[0]
2208*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_subtract_dc):
2209*c0909341SAndroid Build Coastguard Worker6:      // Subtract dc from ac
2210*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r0, :128]
2211*c0909341SAndroid Build Coastguard Worker        subs            r6,  r6,  #4
2212*c0909341SAndroid Build Coastguard Worker        vsub.i16        q0,  q0,  q8
2213*c0909341SAndroid Build Coastguard Worker        vsub.i16        q1,  q1,  q8
2214*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2215*c0909341SAndroid Build Coastguard Worker        bgt             6b
2216*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
2217*c0909341SAndroid Build Coastguard Worker
2218*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8):
2219*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
2220*c0909341SAndroid Build Coastguard Worker        bne             L(ipred_cfl_ac_420_w8_wpad)
2221*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, without padding
2222*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r1,  :128], r2
2223*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r12, :128], r2
2224*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2}, [r1,  :128], r2
2225*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2226*c0909341SAndroid Build Coastguard Worker        vld1.8          {q3}, [r12, :128], r2
2227*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q1,  q1
2228*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2229*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q3,  q3
2230*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
2231*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
2232*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2233*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q2,  #1
2234*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2235*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2236*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2237*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2238*c0909341SAndroid Build Coastguard Worker        bgt             1b
2239*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2240*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q1
2241*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w8_hpad)
2242*c0909341SAndroid Build Coastguard Worker
2243*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_wpad):
2244*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 4
2245*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r1,  :64], r2
2246*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r12, :64], r2
2247*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1}, [r1,  :64], r2
2248*c0909341SAndroid Build Coastguard Worker        vld1.16         {d3}, [r12, :64], r2
2249*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2250*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q1,  q1
2251*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
2252*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2253*c0909341SAndroid Build Coastguard Worker        vdup.16         d3,  d1[3]
2254*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d1
2255*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  d0[3]
2256*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2257*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2258*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2259*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2260*c0909341SAndroid Build Coastguard Worker        bgt             1b
2261*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2262*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q1
2263*c0909341SAndroid Build Coastguard Worker
2264*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_hpad):
2265*c0909341SAndroid Build Coastguard Worker        beq             3f // This assumes that all callers already did "cmp r4, #0"
2266*c0909341SAndroid Build Coastguard Worker2:      // Vertical padding (h_pad > 0)
2267*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
2268*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2269*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2270*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2271*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2272*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q0
2273*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q1
2274*c0909341SAndroid Build Coastguard Worker        bgt             2b
2275*c0909341SAndroid Build Coastguard Worker3:
2276*c0909341SAndroid Build Coastguard Worker
2277*c0909341SAndroid Build Coastguard Worker        // Double the height and reuse the w4 summing/subtracting
2278*c0909341SAndroid Build Coastguard Worker        lsl             r6,  r6,  #1
2279*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_calc_subtract_dc)
2280*c0909341SAndroid Build Coastguard Worker
2281*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16):
2282*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_420_w16_tbl)
2283*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r7, r3, lsl #2]
2284*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r3
2285*c0909341SAndroid Build Coastguard Worker        bx              r7
2286*c0909341SAndroid Build Coastguard Worker
2287*c0909341SAndroid Build Coastguard Worker        .align 2
2288*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_tbl):
2289*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16_wpad0) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB
2290*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16_wpad1) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB
2291*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16_wpad2) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB
2292*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16_wpad3) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB
2293*c0909341SAndroid Build Coastguard Worker
2294*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad0):
2295*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, without padding
2296*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0, q1},   [r1,  :128], r2
2297*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3},   [r12, :128], r2
2298*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2299*c0909341SAndroid Build Coastguard Worker        vld1.8          {q12, q13}, [r1,  :128], r2
2300*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q1,  q1
2301*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2302*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q3,  q3
2303*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q2
2304*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q3
2305*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3},   [r12, :128], r2
2306*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q12, q12
2307*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q13, q13
2308*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2309*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q3,  q3
2310*c0909341SAndroid Build Coastguard Worker        vadd.i16        q12, q12, q2
2311*c0909341SAndroid Build Coastguard Worker        vadd.i16        q13, q13, q3
2312*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2313*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #1
2314*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q12, #1
2315*c0909341SAndroid Build Coastguard Worker        vshl.i16        q3,  q13, #1
2316*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2317*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2318*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2319*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2320*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2321*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2322*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2323*c0909341SAndroid Build Coastguard Worker        bgt             1b
2324*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2325*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2326*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2327*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2328*c0909341SAndroid Build Coastguard Worker
2329*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad1):
2330*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 4
2331*c0909341SAndroid Build Coastguard Worker        vldr            d2,    [r1,  #16]
2332*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0},  [r1,  :128], r2
2333*c0909341SAndroid Build Coastguard Worker        vldr            d6,    [r12, #16]
2334*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2},  [r12, :128], r2
2335*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d2,  d2
2336*c0909341SAndroid Build Coastguard Worker        vldr            d26,   [r1,  #16]
2337*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2338*c0909341SAndroid Build Coastguard Worker        vld1.8          {q12}, [r1,  :128], r2
2339*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d6,  d6
2340*c0909341SAndroid Build Coastguard Worker        vldr            d30,   [r12, #16]
2341*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2342*c0909341SAndroid Build Coastguard Worker        vld1.8          {q14}, [r12, :128], r2
2343*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d26, d26
2344*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q12, q12
2345*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d30, d30
2346*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q14, q14
2347*c0909341SAndroid Build Coastguard Worker        vadd.i16        d2,  d2,  d6
2348*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q2
2349*c0909341SAndroid Build Coastguard Worker        vadd.i16        d26, d26, d30
2350*c0909341SAndroid Build Coastguard Worker        vadd.i16        q12, q12, q14
2351*c0909341SAndroid Build Coastguard Worker        vshl.i16        d2,  d2,  #1
2352*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2353*c0909341SAndroid Build Coastguard Worker        vshl.i16        d6,  d26, #1
2354*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q12, #1
2355*c0909341SAndroid Build Coastguard Worker        vdup.16         d3,  d2[3]
2356*c0909341SAndroid Build Coastguard Worker        vdup.16         d7,  d6[3]
2357*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2358*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2359*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2360*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2361*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2362*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2363*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2364*c0909341SAndroid Build Coastguard Worker        bgt             1b
2365*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2366*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2367*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2368*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2369*c0909341SAndroid Build Coastguard Worker
2370*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad2):
2371*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 8
2372*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r1,  :128], r2
2373*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r12, :128], r2
2374*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2}, [r1,  :128], r2
2375*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2376*c0909341SAndroid Build Coastguard Worker        vld1.8          {q3}, [r12, :128], r2
2377*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q1,  q1
2378*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2379*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q3,  q3
2380*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
2381*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
2382*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2383*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #1
2384*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d1[3]
2385*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d5[3]
2386*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2387*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2388*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2389*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2390*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2391*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2392*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2393*c0909341SAndroid Build Coastguard Worker        bgt             1b
2394*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2395*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2396*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2397*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2398*c0909341SAndroid Build Coastguard Worker
2399*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad3):
2400*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 12
2401*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r1,  :64], r2
2402*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1}, [r12, :64], r2
2403*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4}, [r1,  :64], r2
2404*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2405*c0909341SAndroid Build Coastguard Worker        vld1.8          {d5}, [r12, :64], r2
2406*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2407*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2408*c0909341SAndroid Build Coastguard Worker        vadd.i16        d4,  d4,  d5
2409*c0909341SAndroid Build Coastguard Worker        vshl.i16        d0,  d0,  #1
2410*c0909341SAndroid Build Coastguard Worker        vshl.i16        d4,  d4,  #1
2411*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d0[3]
2412*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d4[3]
2413*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  d0[3]
2414*c0909341SAndroid Build Coastguard Worker        vdup.16         d5,  d4[3]
2415*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2416*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2417*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2418*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2419*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2420*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2421*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2422*c0909341SAndroid Build Coastguard Worker        bgt             1b
2423*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2424*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2425*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2426*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2427*c0909341SAndroid Build Coastguard Worker
2428*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_hpad):
2429*c0909341SAndroid Build Coastguard Worker        beq             3f // This assumes that all callers already did "cmp r4, #0"
2430*c0909341SAndroid Build Coastguard Worker2:      // Vertical padding (h_pad > 0)
2431*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
2432*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2433*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2434*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2435*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2436*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2437*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2438*c0909341SAndroid Build Coastguard Worker        bgt             2b
2439*c0909341SAndroid Build Coastguard Worker3:
2440*c0909341SAndroid Build Coastguard Worker
2441*c0909341SAndroid Build Coastguard Worker        // Quadruple the height and reuse the w4 summing/subtracting
2442*c0909341SAndroid Build Coastguard Worker        lsl             r6,  r6,  #2
2443*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_calc_subtract_dc)
2444*c0909341SAndroid Build Coastguard Workerendfunc
2445*c0909341SAndroid Build Coastguard Worker
2446*c0909341SAndroid Build Coastguard Worker// void cfl_ac_422_8bpc_neon(int16_t *const ac, const pixel *const ypx,
2447*c0909341SAndroid Build Coastguard Worker//                           const ptrdiff_t stride, const int w_pad,
2448*c0909341SAndroid Build Coastguard Worker//                           const int h_pad, const int cw, const int ch);
2449*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_422_8bpc_neon, export=1
2450*c0909341SAndroid Build Coastguard Worker        push            {r4-r8,lr}
2451*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
2452*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
2453*c0909341SAndroid Build Coastguard Worker        clz             r8,  r5
2454*c0909341SAndroid Build Coastguard Worker        lsl             r4,  r4,  #2
2455*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_422_tbl)
2456*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #27
2457*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r7, r8, lsl #2]
2458*c0909341SAndroid Build Coastguard Worker        vmov.i16        q8,  #0
2459*c0909341SAndroid Build Coastguard Worker        vmov.i16        q9,  #0
2460*c0909341SAndroid Build Coastguard Worker        vmov.i16        q10, #0
2461*c0909341SAndroid Build Coastguard Worker        vmov.i16        q11, #0
2462*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8
2463*c0909341SAndroid Build Coastguard Worker        sub             r8,  r6,  r4  // height - h_pad
2464*c0909341SAndroid Build Coastguard Worker        rbit            lr,  r5       // rbit(width)
2465*c0909341SAndroid Build Coastguard Worker        rbit            r12, r6       // rbit(height)
2466*c0909341SAndroid Build Coastguard Worker        clz             lr,  lr       // ctz(width)
2467*c0909341SAndroid Build Coastguard Worker        clz             r12, r12      // ctz(height)
2468*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r12 // log2sz
2469*c0909341SAndroid Build Coastguard Worker        add             r12, r1,  r2
2470*c0909341SAndroid Build Coastguard Worker        vdup.32         d31, lr
2471*c0909341SAndroid Build Coastguard Worker        lsl             r2,  r2,  #1
2472*c0909341SAndroid Build Coastguard Worker        vneg.s32        d31, d31      // -log2sz
2473*c0909341SAndroid Build Coastguard Worker        bx              r7
2474*c0909341SAndroid Build Coastguard Worker
2475*c0909341SAndroid Build Coastguard Worker        .align 2
2476*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_tbl):
2477*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB
2478*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w8) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB
2479*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w4) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB
2480*c0909341SAndroid Build Coastguard Worker
2481*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w4):
2482*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input
2483*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r1,  :64], r2
2484*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1}, [r12, :64], r2
2485*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2}, [r1,  :64], r2
2486*c0909341SAndroid Build Coastguard Worker        vld1.8          {d3}, [r12, :64], r2
2487*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2488*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q1,  q1
2489*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2490*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #2
2491*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
2492*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2493*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2494*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2495*c0909341SAndroid Build Coastguard Worker        bgt             1b
2496*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2497*c0909341SAndroid Build Coastguard Worker        vmov            d0,  d3
2498*c0909341SAndroid Build Coastguard Worker        vmov            d1,  d3
2499*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d3
2500*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_hpad)
2501*c0909341SAndroid Build Coastguard Worker
2502*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8):
2503*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
2504*c0909341SAndroid Build Coastguard Worker        bne             L(ipred_cfl_ac_422_w8_wpad)
2505*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, without padding
2506*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r1,  :128], r2
2507*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r12, :128], r2
2508*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2}, [r1,  :128], r2
2509*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2510*c0909341SAndroid Build Coastguard Worker        vld1.8          {q3}, [r12, :128], r2
2511*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q1,  q1
2512*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2513*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q3,  q3
2514*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2515*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #2
2516*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #2
2517*c0909341SAndroid Build Coastguard Worker        vshl.i16        q3,  q3,  #2
2518*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
2519*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2520*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2521*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2522*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2523*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2524*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2525*c0909341SAndroid Build Coastguard Worker        bgt             1b
2526*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2527*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q3
2528*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2529*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w8_hpad)
2530*c0909341SAndroid Build Coastguard Worker
2531*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8_wpad):
2532*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 4
2533*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r1,  :64], r2
2534*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1}, [r12, :64], r2
2535*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2}, [r1,  :64], r2
2536*c0909341SAndroid Build Coastguard Worker        vld1.8          {d3}, [r12, :64], r2
2537*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2538*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q1,  q1
2539*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2540*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #2
2541*c0909341SAndroid Build Coastguard Worker        vdup.16         d7,  d3[3]
2542*c0909341SAndroid Build Coastguard Worker        vmov            d6,  d3
2543*c0909341SAndroid Build Coastguard Worker        vdup.16         d5,  d2[3]
2544*c0909341SAndroid Build Coastguard Worker        vmov            d4,  d2
2545*c0909341SAndroid Build Coastguard Worker        vdup.16         d3,  d1[3]
2546*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d1
2547*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  d0[3]
2548*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
2549*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2550*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2551*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2552*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2553*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2554*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2555*c0909341SAndroid Build Coastguard Worker        bgt             1b
2556*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2557*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q3
2558*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2559*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w8_hpad)
2560*c0909341SAndroid Build Coastguard Worker
2561*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16):
2562*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_422_w16_tbl)
2563*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r7, r3, lsl #2]
2564*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r3
2565*c0909341SAndroid Build Coastguard Worker        bx              r7
2566*c0909341SAndroid Build Coastguard Worker
2567*c0909341SAndroid Build Coastguard Worker        .align 2
2568*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_tbl):
2569*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16_wpad0) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB
2570*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16_wpad1) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB
2571*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16_wpad2) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB
2572*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16_wpad3) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB
2573*c0909341SAndroid Build Coastguard Worker
2574*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad0):
2575*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, without padding
2576*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0, q1}, [r1,  :128], r2
2577*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3}, [r12, :128], r2
2578*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2579*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q1,  q1
2580*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2581*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q3,  q3
2582*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2583*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #2
2584*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #2
2585*c0909341SAndroid Build Coastguard Worker        vshl.i16        q3,  q3,  #2
2586*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2587*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2588*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2589*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2590*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2591*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2592*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2593*c0909341SAndroid Build Coastguard Worker        bgt             1b
2594*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2595*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2596*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2597*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2598*c0909341SAndroid Build Coastguard Worker
2599*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad1):
2600*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 4
2601*c0909341SAndroid Build Coastguard Worker        vldr            d2,   [r1,  #16]
2602*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r1,  :128], r2
2603*c0909341SAndroid Build Coastguard Worker        vldr            d6,   [r12, #16]
2604*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2}, [r12, :128], r2
2605*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d2,  d2
2606*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2607*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       d6,  d6
2608*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2609*c0909341SAndroid Build Coastguard Worker        vshl.i16        d2,  d2,  #2
2610*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2611*c0909341SAndroid Build Coastguard Worker        vshl.i16        d6,  d6,  #2
2612*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #2
2613*c0909341SAndroid Build Coastguard Worker        vdup.16         d3,  d2[3]
2614*c0909341SAndroid Build Coastguard Worker        vdup.16         d7,  d6[3]
2615*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2616*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2617*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2618*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2619*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2620*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2621*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2622*c0909341SAndroid Build Coastguard Worker        bgt             1b
2623*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2624*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2625*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2626*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2627*c0909341SAndroid Build Coastguard Worker
2628*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad2):
2629*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 8
2630*c0909341SAndroid Build Coastguard Worker        vld1.8          {q0}, [r1,  :128], r2
2631*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2}, [r12, :128], r2
2632*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2633*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q2,  q2
2634*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2635*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #2
2636*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d1[3]
2637*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d5[3]
2638*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2639*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2640*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2641*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2642*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2643*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2644*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2645*c0909341SAndroid Build Coastguard Worker        bgt             1b
2646*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2647*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2648*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2649*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2650*c0909341SAndroid Build Coastguard Worker
2651*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad3):
2652*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 12
2653*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r1,  :64], r2
2654*c0909341SAndroid Build Coastguard Worker        vld1.8          {d1}, [r12, :64], r2
2655*c0909341SAndroid Build Coastguard Worker        vpaddl.u8       q0,  q0
2656*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2657*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d1[3]
2658*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d0[3]
2659*c0909341SAndroid Build Coastguard Worker        vdup.16         d5,  d1[3]
2660*c0909341SAndroid Build Coastguard Worker        vmov            d4,  d1
2661*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  d0[3]
2662*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2663*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2664*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2665*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2666*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2667*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2668*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2669*c0909341SAndroid Build Coastguard Worker        bgt             1b
2670*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2671*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2672*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2673*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2674*c0909341SAndroid Build Coastguard Workerendfunc
2675*c0909341SAndroid Build Coastguard Worker
2676*c0909341SAndroid Build Coastguard Worker// void cfl_ac_444_8bpc_neon(int16_t *const ac, const pixel *const ypx,
2677*c0909341SAndroid Build Coastguard Worker//                           const ptrdiff_t stride, const int w_pad,
2678*c0909341SAndroid Build Coastguard Worker//                           const int h_pad, const int cw, const int ch);
2679*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_444_8bpc_neon, export=1
2680*c0909341SAndroid Build Coastguard Worker        push            {r4-r8,lr}
2681*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
2682*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
2683*c0909341SAndroid Build Coastguard Worker        clz             r8,  r5
2684*c0909341SAndroid Build Coastguard Worker        lsl             r4,  r4,  #2
2685*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_444_tbl)
2686*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #26
2687*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r7, r8, lsl #2]
2688*c0909341SAndroid Build Coastguard Worker        vmov.i16        q8,  #0
2689*c0909341SAndroid Build Coastguard Worker        vmov.i16        q9,  #0
2690*c0909341SAndroid Build Coastguard Worker        vmov.i16        q10, #0
2691*c0909341SAndroid Build Coastguard Worker        vmov.i16        q11, #0
2692*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8
2693*c0909341SAndroid Build Coastguard Worker        sub             r8,  r6,  r4  // height - h_pad
2694*c0909341SAndroid Build Coastguard Worker        rbit            lr,  r5       // rbit(width)
2695*c0909341SAndroid Build Coastguard Worker        rbit            r12, r6       // rbit(height)
2696*c0909341SAndroid Build Coastguard Worker        clz             lr,  lr       // ctz(width)
2697*c0909341SAndroid Build Coastguard Worker        clz             r12, r12      // ctz(height)
2698*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r12 // log2sz
2699*c0909341SAndroid Build Coastguard Worker        add             r12, r1,  r2
2700*c0909341SAndroid Build Coastguard Worker        vdup.32         d31, lr
2701*c0909341SAndroid Build Coastguard Worker        lsl             r2,  r2,  #1
2702*c0909341SAndroid Build Coastguard Worker        vneg.s32        d31, d31      // -log2sz
2703*c0909341SAndroid Build Coastguard Worker        bx              r7
2704*c0909341SAndroid Build Coastguard Worker
2705*c0909341SAndroid Build Coastguard Worker        .align 2
2706*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_tbl):
2707*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB
2708*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w16) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB
2709*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w8)  - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB
2710*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w4)  - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB
2711*c0909341SAndroid Build Coastguard Worker
2712*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w4):
2713*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input
2714*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[]},  [r1,  :32], r2
2715*c0909341SAndroid Build Coastguard Worker        vld1.32         {d0[1]}, [r12, :32], r2
2716*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[]},  [r1,  :32], r2
2717*c0909341SAndroid Build Coastguard Worker        vld1.32         {d2[1]}, [r12, :32], r2
2718*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d0,  #3
2719*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d2,  #3
2720*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
2721*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2722*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2723*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2724*c0909341SAndroid Build Coastguard Worker        bgt             1b
2725*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2726*c0909341SAndroid Build Coastguard Worker        vmov            d0,  d3
2727*c0909341SAndroid Build Coastguard Worker        vmov            d1,  d3
2728*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d3
2729*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_hpad)
2730*c0909341SAndroid Build Coastguard Worker
2731*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w8):
2732*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input
2733*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r1,  :64], r2
2734*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r12, :64], r2
2735*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4}, [r1,  :64], r2
2736*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d0,  #3
2737*c0909341SAndroid Build Coastguard Worker        vld1.16         {d6}, [r12, :64], r2
2738*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d2,  #3
2739*c0909341SAndroid Build Coastguard Worker        vshll.u8        q2,  d4,  #3
2740*c0909341SAndroid Build Coastguard Worker        vshll.u8        q3,  d6,  #3
2741*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
2742*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2743*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2744*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2745*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2746*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2747*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2748*c0909341SAndroid Build Coastguard Worker        bgt             1b
2749*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2750*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q3
2751*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2752*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w8_hpad)
2753*c0909341SAndroid Build Coastguard Worker
2754*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16):
2755*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
2756*c0909341SAndroid Build Coastguard Worker        bne             L(ipred_cfl_ac_444_w16_wpad)
2757*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, without padding
2758*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r1,  :128], r2
2759*c0909341SAndroid Build Coastguard Worker        vld1.8          {q3}, [r12, :128], r2
2760*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d2,  #3
2761*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d3,  #3
2762*c0909341SAndroid Build Coastguard Worker        vshll.u8        q2,  d6,  #3
2763*c0909341SAndroid Build Coastguard Worker        vshll.u8        q3,  d7,  #3
2764*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2765*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2766*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2767*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2768*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2769*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2770*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2771*c0909341SAndroid Build Coastguard Worker        bgt             1b
2772*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2773*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2774*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2775*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2776*c0909341SAndroid Build Coastguard Worker
2777*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16_wpad):
2778*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, padding 8
2779*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0}, [r1,  :64], r2
2780*c0909341SAndroid Build Coastguard Worker        vld1.8          {d4}, [r12, :64], r2
2781*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d0,  #3
2782*c0909341SAndroid Build Coastguard Worker        vshll.u8        q2,  d4,  #3
2783*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d1[3]
2784*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d5[3]
2785*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2786*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2787*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2788*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2789*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2790*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2791*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2792*c0909341SAndroid Build Coastguard Worker        bgt             1b
2793*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2794*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2795*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2796*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2797*c0909341SAndroid Build Coastguard Worker
2798*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32):
2799*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_444_w32_tbl)
2800*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r7, r3, lsl #1] // (w3>>1) << 2
2801*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r3
2802*c0909341SAndroid Build Coastguard Worker        bx              r7
2803*c0909341SAndroid Build Coastguard Worker
2804*c0909341SAndroid Build Coastguard Worker        .align 2
2805*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_tbl):
2806*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32_wpad0) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB
2807*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32_wpad2) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB
2808*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32_wpad4) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB
2809*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32_wpad6) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB
2810*c0909341SAndroid Build Coastguard Worker
2811*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad0):
2812*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, without padding
2813*c0909341SAndroid Build Coastguard Worker        vld1.8          {q2, q3},   [r1,  :128], r2
2814*c0909341SAndroid Build Coastguard Worker        vld1.8          {q13, q14}, [r12, :128], r2
2815*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d4,  #3
2816*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d5,  #3
2817*c0909341SAndroid Build Coastguard Worker        vshll.u8        q2,  d6,  #3
2818*c0909341SAndroid Build Coastguard Worker        vshll.u8        q3,  d7,  #3
2819*c0909341SAndroid Build Coastguard Worker        vshll.u8        q12, d26, #3
2820*c0909341SAndroid Build Coastguard Worker        vshll.u8        q13, d27, #3
2821*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2822*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1},   [r0, :128]!
2823*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2824*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2825*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d28, #3
2826*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d29, #3
2827*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3},   [r0, :128]!
2828*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2829*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2830*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r0, :128]!
2831*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q12
2832*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q13
2833*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1},   [r0, :128]!
2834*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q0
2835*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q1
2836*c0909341SAndroid Build Coastguard Worker        bgt             1b
2837*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2838*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_444_w32_hpad)
2839*c0909341SAndroid Build Coastguard Worker
2840*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad2):
2841*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, padding 8
2842*c0909341SAndroid Build Coastguard Worker        vldr            d4,    [r1,  #16]
2843*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1},  [r1,  :128], r2
2844*c0909341SAndroid Build Coastguard Worker        vldr            d28,   [r12, #16]
2845*c0909341SAndroid Build Coastguard Worker        vld1.8          {q13}, [r12, :128], r2
2846*c0909341SAndroid Build Coastguard Worker        vshll.u8        q2,  d4,  #3
2847*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d2,  #3
2848*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d3,  #3
2849*c0909341SAndroid Build Coastguard Worker        vshll.u8        q12, d26, #3
2850*c0909341SAndroid Build Coastguard Worker        vshll.u8        q13, d27, #3
2851*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d5[3]
2852*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2853*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1},   [r0, :128]!
2854*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2855*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2856*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d28, #3
2857*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3},   [r0, :128]!
2858*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2859*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2860*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d1[3]
2861*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r0, :128]!
2862*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q12
2863*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q13
2864*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1},   [r0, :128]!
2865*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q0
2866*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q1
2867*c0909341SAndroid Build Coastguard Worker        bgt             1b
2868*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2869*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_444_w32_hpad)
2870*c0909341SAndroid Build Coastguard Worker
2871*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad4):
2872*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, padding 16
2873*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1},  [r1,  :128], r2
2874*c0909341SAndroid Build Coastguard Worker        vld1.8          {q13}, [r12, :128], r2
2875*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d2,  #3
2876*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d3,  #3
2877*c0909341SAndroid Build Coastguard Worker        vshll.u8        q12, d26, #3
2878*c0909341SAndroid Build Coastguard Worker        vshll.u8        q13, d27, #3
2879*c0909341SAndroid Build Coastguard Worker        vdup.16         q2,  d3[3]
2880*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d3[3]
2881*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2882*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1},   [r0, :128]!
2883*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2884*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2885*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d27[3]
2886*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d27[3]
2887*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3},   [r0, :128]!
2888*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2889*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2890*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r0, :128]!
2891*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q12
2892*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q13
2893*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1},   [r0, :128]!
2894*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q0
2895*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q1
2896*c0909341SAndroid Build Coastguard Worker        bgt             1b
2897*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2898*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_444_w32_hpad)
2899*c0909341SAndroid Build Coastguard Worker
2900*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad6):
2901*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, padding 24
2902*c0909341SAndroid Build Coastguard Worker        vld1.8          {d0},  [r1,  :64], r2
2903*c0909341SAndroid Build Coastguard Worker        vld1.8          {d24}, [r12, :64], r2
2904*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d0,  #3
2905*c0909341SAndroid Build Coastguard Worker        vshll.u8        q12, d24, #3
2906*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2907*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d1[3]
2908*c0909341SAndroid Build Coastguard Worker        vdup.16         q2,  d1[3]
2909*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d1[3]
2910*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1},   [r0, :128]!
2911*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q0
2912*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q1
2913*c0909341SAndroid Build Coastguard Worker        vdup.16         q13, d25[3]
2914*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d25[3]
2915*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d25[3]
2916*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3},   [r0, :128]!
2917*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
2918*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q3
2919*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r0, :128]!
2920*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q12
2921*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q13
2922*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1},   [r0, :128]!
2923*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q0
2924*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q1
2925*c0909341SAndroid Build Coastguard Worker        bgt             1b
2926*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2927*c0909341SAndroid Build Coastguard Worker
2928*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_hpad):
2929*c0909341SAndroid Build Coastguard Worker        beq             3f // This assumes that all callers already did "cmp r4, #0"
2930*c0909341SAndroid Build Coastguard Worker2:      // Vertical padding (h_pad > 0)
2931*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #1
2932*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r0, :128]!
2933*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q12
2934*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q13
2935*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1},   [r0, :128]!
2936*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q0
2937*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q1
2938*c0909341SAndroid Build Coastguard Worker        bgt             2b
2939*c0909341SAndroid Build Coastguard Worker3:
2940*c0909341SAndroid Build Coastguard Worker
2941*c0909341SAndroid Build Coastguard Worker        //  Multiply the height by eight and reuse the w4 subtracting
2942*c0909341SAndroid Build Coastguard Worker        lsl             r6,  r6,  #3
2943*c0909341SAndroid Build Coastguard Worker        // Aggregate the sums, with wider intermediates earlier than in
2944*c0909341SAndroid Build Coastguard Worker        // ipred_cfl_ac_420_w8_calc_subtract_dc.
2945*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      q0,  q8
2946*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      q1,  q9
2947*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      q2,  q10
2948*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      q3,  q11
2949*c0909341SAndroid Build Coastguard Worker        vadd.i32        q0,  q0,  q1
2950*c0909341SAndroid Build Coastguard Worker        vadd.i32        q2,  q2,  q3
2951*c0909341SAndroid Build Coastguard Worker        vadd.i32        q0,  q0,  q2
2952*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d1
2953*c0909341SAndroid Build Coastguard Worker        vpadd.i32       d0,  d0,  d0  // sum
2954*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r6, lsl #3
2955*c0909341SAndroid Build Coastguard Worker        vrshl.u32       d16, d0,  d31 // (sum + (1 << (log2sz - 1))) >>= log2sz
2956*c0909341SAndroid Build Coastguard Worker        vdup.16         q8,  d16[0]
2957*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_subtract_dc)
2958*c0909341SAndroid Build Coastguard Workerendfunc
2959