xref: /aosp_15_r20/external/libdav1d/src/arm/32/ipred16.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, B Krishnan Iyer
4*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo
5*c0909341SAndroid Build Coastguard Worker * All rights reserved.
6*c0909341SAndroid Build Coastguard Worker *
7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
9*c0909341SAndroid Build Coastguard Worker *
10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
11*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
12*c0909341SAndroid Build Coastguard Worker *
13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
14*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
15*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
16*c0909341SAndroid Build Coastguard Worker *
17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27*c0909341SAndroid Build Coastguard Worker */
28*c0909341SAndroid Build Coastguard Worker
29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
30*c0909341SAndroid Build Coastguard Worker#include "util.S"
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Worker// void ipred_dc_128_16bpc_neon(pixel *dst, const ptrdiff_t stride,
33*c0909341SAndroid Build Coastguard Worker//                              const pixel *const topleft,
34*c0909341SAndroid Build Coastguard Worker//                              const int width, const int height, const int a,
35*c0909341SAndroid Build Coastguard Worker//                              const int max_width, const int max_height,
36*c0909341SAndroid Build Coastguard Worker//                              const int bitdepth_max);
37*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_128_16bpc_neon, export=1
38*c0909341SAndroid Build Coastguard Worker        push            {r4, lr}
39*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #8]
40*c0909341SAndroid Build Coastguard Worker        ldr             r12, [sp, #24]
41*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
42*c0909341SAndroid Build Coastguard Worker        adr             r2,  L(ipred_dc_128_tbl)
43*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #25
44*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  r12
45*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r2,  r3,  lsl #2]
46*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
47*c0909341SAndroid Build Coastguard Worker        vrshr.u16       q0,  q0,  #1
48*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  r3
49*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
50*c0909341SAndroid Build Coastguard Worker        bx              r2
51*c0909341SAndroid Build Coastguard Worker
52*c0909341SAndroid Build Coastguard Worker        .align 2
53*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_128_tbl):
54*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_dc_128_tbl) + CONFIG_THUMB
55*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_dc_128_tbl) + CONFIG_THUMB
56*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_dc_128_tbl) + CONFIG_THUMB
57*c0909341SAndroid Build Coastguard Worker        .word 8f   - L(ipred_dc_128_tbl) + CONFIG_THUMB
58*c0909341SAndroid Build Coastguard Worker        .word 4f   - L(ipred_dc_128_tbl) + CONFIG_THUMB
59*c0909341SAndroid Build Coastguard Worker4:
60*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
61*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
62*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
63*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
64*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
65*c0909341SAndroid Build Coastguard Worker        bgt             4b
66*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
67*c0909341SAndroid Build Coastguard Worker8:
68*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1}, [r0,  :128], r1
69*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1}, [r12, :128], r1
70*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
71*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1}, [r0,  :128], r1
72*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1}, [r12, :128], r1
73*c0909341SAndroid Build Coastguard Worker        bgt             8b
74*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
75*c0909341SAndroid Build Coastguard Worker160:
76*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q0
77*c0909341SAndroid Build Coastguard Worker16:
78*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
79*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
80*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
81*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
82*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
83*c0909341SAndroid Build Coastguard Worker        bgt             16b
84*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
85*c0909341SAndroid Build Coastguard Worker320:
86*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q0
87*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
88*c0909341SAndroid Build Coastguard Worker32:
89*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
90*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
91*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
92*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
93*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
94*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
95*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
96*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
97*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
98*c0909341SAndroid Build Coastguard Worker        bgt             32b
99*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
100*c0909341SAndroid Build Coastguard Worker640:
101*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q0
102*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #96
103*c0909341SAndroid Build Coastguard Worker64:
104*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
105*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
106*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
107*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
108*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
109*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
110*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
111*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
112*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
113*c0909341SAndroid Build Coastguard Worker        bgt             64b
114*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
115*c0909341SAndroid Build Coastguard Workerendfunc
116*c0909341SAndroid Build Coastguard Worker
117*c0909341SAndroid Build Coastguard Worker// void ipred_v_16bpc_neon(pixel *dst, const ptrdiff_t stride,
118*c0909341SAndroid Build Coastguard Worker//                         const pixel *const topleft,
119*c0909341SAndroid Build Coastguard Worker//                         const int width, const int height, const int a,
120*c0909341SAndroid Build Coastguard Worker//                         const int max_width, const int max_height);
121*c0909341SAndroid Build Coastguard Workerfunction ipred_v_16bpc_neon, export=1
122*c0909341SAndroid Build Coastguard Worker        push            {r4, lr}
123*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [sp, #8]
124*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
125*c0909341SAndroid Build Coastguard Worker        adr             r4,  L(ipred_v_tbl)
126*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #25
127*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r4,  r3,  lsl #2]
128*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
129*c0909341SAndroid Build Coastguard Worker        add             r4,  r4,  r3
130*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
131*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
132*c0909341SAndroid Build Coastguard Worker        bx              r4
133*c0909341SAndroid Build Coastguard Worker
134*c0909341SAndroid Build Coastguard Worker        .align 2
135*c0909341SAndroid Build Coastguard WorkerL(ipred_v_tbl):
136*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_v_tbl) + CONFIG_THUMB
137*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_v_tbl) + CONFIG_THUMB
138*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_v_tbl) + CONFIG_THUMB
139*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_v_tbl) + CONFIG_THUMB
140*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_v_tbl) + CONFIG_THUMB
141*c0909341SAndroid Build Coastguard Worker
142*c0909341SAndroid Build Coastguard Worker40:
143*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r2]
144*c0909341SAndroid Build Coastguard Worker4:
145*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
146*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
147*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #4
148*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
149*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
150*c0909341SAndroid Build Coastguard Worker        bgt             4b
151*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
152*c0909341SAndroid Build Coastguard Worker80:
153*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0},  [r2]
154*c0909341SAndroid Build Coastguard Worker8:
155*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r0,  :128], r1
156*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r12, :128], r1
157*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #4
158*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r0,  :128], r1
159*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r12, :128], r1
160*c0909341SAndroid Build Coastguard Worker        bgt             8b
161*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
162*c0909341SAndroid Build Coastguard Worker160:
163*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r2]
164*c0909341SAndroid Build Coastguard Worker16:
165*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
166*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
167*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #4
168*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
169*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
170*c0909341SAndroid Build Coastguard Worker        bgt             16b
171*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
172*c0909341SAndroid Build Coastguard Worker320:
173*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r2]!
174*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
175*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r2]
176*c0909341SAndroid Build Coastguard Worker32:
177*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
178*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
179*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4,  d5,  d6,  d7},  [r0,  :128], r1
180*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4,  d5,  d6,  d7},  [r12, :128], r1
181*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #4
182*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
183*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
184*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4,  d5,  d6,  d7},  [r0,  :128], r1
185*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4,  d5,  d6,  d7},  [r12, :128], r1
186*c0909341SAndroid Build Coastguard Worker        bgt             32b
187*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
188*c0909341SAndroid Build Coastguard Worker640:
189*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r2]!
190*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #96
191*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r2]!
192*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8,  q9},  [r2]!
193*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r2]!
194*c0909341SAndroid Build Coastguard Worker64:
195*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
196*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
197*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4,  d5,  d6,  d7},  [r0,  :128]!
198*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4,  d5,  d6,  d7},  [r12, :128]!
199*c0909341SAndroid Build Coastguard Worker        subs            lr,  lr,  #2
200*c0909341SAndroid Build Coastguard Worker        vst1.16         {d16, d17, d18, d19}, [r0,  :128]!
201*c0909341SAndroid Build Coastguard Worker        vst1.16         {d16, d17, d18, d19}, [r12, :128]!
202*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20, d21, d22, d23}, [r0,  :128], r1
203*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20, d21, d22, d23}, [r12, :128], r1
204*c0909341SAndroid Build Coastguard Worker        bgt             64b
205*c0909341SAndroid Build Coastguard Worker        pop             {r4, pc}
206*c0909341SAndroid Build Coastguard Workerendfunc
207*c0909341SAndroid Build Coastguard Worker
208*c0909341SAndroid Build Coastguard Worker// void ipred_h_16bpc_neon(pixel *dst, const ptrdiff_t stride,
209*c0909341SAndroid Build Coastguard Worker//                         const pixel *const topleft,
210*c0909341SAndroid Build Coastguard Worker//                         const int width, const int height, const int a,
211*c0909341SAndroid Build Coastguard Worker//                         const int max_width, const int max_height);
212*c0909341SAndroid Build Coastguard Workerfunction ipred_h_16bpc_neon, export=1
213*c0909341SAndroid Build Coastguard Worker        push            {r4-r5, lr}
214*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #12]
215*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
216*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_h_tbl)
217*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #25
218*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r5,  r3,  lsl #2]
219*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #2
220*c0909341SAndroid Build Coastguard Worker        mov             lr,  #-2
221*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r3
222*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
223*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
224*c0909341SAndroid Build Coastguard Worker        bx              r5
225*c0909341SAndroid Build Coastguard Worker
226*c0909341SAndroid Build Coastguard Worker        .align 2
227*c0909341SAndroid Build Coastguard WorkerL(ipred_h_tbl):
228*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_h_tbl) + CONFIG_THUMB
229*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_h_tbl) + CONFIG_THUMB
230*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_h_tbl) + CONFIG_THUMB
231*c0909341SAndroid Build Coastguard Worker        .word 8f   - L(ipred_h_tbl) + CONFIG_THUMB
232*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_h_tbl) + CONFIG_THUMB
233*c0909341SAndroid Build Coastguard Worker40:
234*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #6
235*c0909341SAndroid Build Coastguard Worker        mov             lr,  #-8
236*c0909341SAndroid Build Coastguard Worker4:
237*c0909341SAndroid Build Coastguard Worker        vld4.16         {d0[],  d1[],  d2[],  d3[]},  [r2],  lr
238*c0909341SAndroid Build Coastguard Worker        vst1.16         {d3},  [r0,  :64], r1
239*c0909341SAndroid Build Coastguard Worker        vst1.16         {d2},  [r12, :64], r1
240*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
241*c0909341SAndroid Build Coastguard Worker        vst1.16         {d1},  [r0,  :64], r1
242*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
243*c0909341SAndroid Build Coastguard Worker        bgt             4b
244*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
245*c0909341SAndroid Build Coastguard Worker8:
246*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0[],  d1[]},  [r2],  lr
247*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
248*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2[],  d3[]},  [r2],  lr
249*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0},  [r0,    :128],  r1
250*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4[],  d5[]},  [r2],  lr
251*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1},  [r12,   :128],  r1
252*c0909341SAndroid Build Coastguard Worker        vld1.16         {d6[],  d7[]},  [r2],  lr
253*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2},  [r0,    :128],  r1
254*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3},  [r12,   :128],  r1
255*c0909341SAndroid Build Coastguard Worker        bgt             8b
256*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
257*c0909341SAndroid Build Coastguard Worker160:
258*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #16
259*c0909341SAndroid Build Coastguard Worker16:
260*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0[],  d1[]}, [r2],  lr
261*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
262*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2[],  d3[]}, [r2],  lr
263*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0},  [r0,   :128]!
264*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4[],  d5[]}, [r2],  lr
265*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1},  [r12,  :128]!
266*c0909341SAndroid Build Coastguard Worker        vld1.16         {d6[],  d7[]}, [r2],  lr
267*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0},  [r0,   :128],  r1
268*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1},  [r12,  :128],  r1
269*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2},  [r0,   :128]!
270*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3},  [r12,  :128]!
271*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2},  [r0,   :128],  r1
272*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3},  [r12,  :128],  r1
273*c0909341SAndroid Build Coastguard Worker        bgt             16b
274*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
275*c0909341SAndroid Build Coastguard Worker320:
276*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #48
277*c0909341SAndroid Build Coastguard Worker32:
278*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0[],  d1[]},  [r2],  lr
279*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
280*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2[],  d3[]},  [r2],  lr
281*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0},  [r0,    :128]!
282*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4[],  d5[]},  [r2],  lr
283*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1},  [r12,   :128]!
284*c0909341SAndroid Build Coastguard Worker        vld1.16         {d6[],  d7[]},  [r2],  lr
285*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0},  [r0,    :128]!
286*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1},  [r12,   :128]!
287*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0},  [r0,    :128]!
288*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1},  [r12,   :128]!
289*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0},  [r0,    :128],  r1
290*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1},  [r12,   :128],  r1
291*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2},  [r0,    :128]!
292*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3},  [r12,   :128]!
293*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2},  [r0,    :128]!
294*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3},  [r12,   :128]!
295*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2},  [r0,    :128]!
296*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3},  [r12,   :128]!
297*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2},  [r0,    :128],  r1
298*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3},  [r12,   :128],  r1
299*c0909341SAndroid Build Coastguard Worker        bgt             32b
300*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
301*c0909341SAndroid Build Coastguard Worker640:
302*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #96
303*c0909341SAndroid Build Coastguard Worker64:
304*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0[],  d1[]},  [r2],  lr
305*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
306*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4[],  d5[]},  [r2],  lr
307*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q0
308*c0909341SAndroid Build Coastguard Worker        vmov            q3,  q2
309*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0,  :128]!
310*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r12, :128]!
311*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0,  :128]!
312*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r12, :128]!
313*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0,  :128]!
314*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r12, :128]!
315*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0,  :128],  r1
316*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r12, :128],  r1
317*c0909341SAndroid Build Coastguard Worker        bgt             64b
318*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
319*c0909341SAndroid Build Coastguard Workerendfunc
320*c0909341SAndroid Build Coastguard Worker
321*c0909341SAndroid Build Coastguard Worker// void ipred_dc_top_16bpc_neon(pixel *dst, const ptrdiff_t stride,
322*c0909341SAndroid Build Coastguard Worker//                              const pixel *const topleft,
323*c0909341SAndroid Build Coastguard Worker//                              const int width, const int height, const int a,
324*c0909341SAndroid Build Coastguard Worker//                              const int max_width, const int max_height);
325*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_top_16bpc_neon, export=1
326*c0909341SAndroid Build Coastguard Worker        push            {r4-r5, lr}
327*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #12]
328*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
329*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_dc_top_tbl)
330*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #25
331*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r5,  r3,  lsl #2]
332*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
333*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r3
334*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
335*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
336*c0909341SAndroid Build Coastguard Worker        bx              r5
337*c0909341SAndroid Build Coastguard Worker
338*c0909341SAndroid Build Coastguard Worker        .align 2
339*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_top_tbl):
340*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_dc_top_tbl) + CONFIG_THUMB
341*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_dc_top_tbl) + CONFIG_THUMB
342*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_dc_top_tbl) + CONFIG_THUMB
343*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_dc_top_tbl) + CONFIG_THUMB
344*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_dc_top_tbl) + CONFIG_THUMB
345*c0909341SAndroid Build Coastguard Worker
346*c0909341SAndroid Build Coastguard Worker40:
347*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0},  [r2]
348*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
349*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
350*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #2
351*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  d0[0]
352*c0909341SAndroid Build Coastguard Worker4:
353*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
354*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
355*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
356*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
357*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
358*c0909341SAndroid Build Coastguard Worker        bgt             4b
359*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
360*c0909341SAndroid Build Coastguard Worker80:
361*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1},  [r2]
362*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
363*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
364*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
365*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #3
366*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
367*c0909341SAndroid Build Coastguard Worker8:
368*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r0,  :128], r1
369*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r12, :128], r1
370*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
371*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r0,  :128], r1
372*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r12, :128], r1
373*c0909341SAndroid Build Coastguard Worker        bgt             8b
374*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
375*c0909341SAndroid Build Coastguard Worker160:
376*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2]
377*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
378*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
379*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
380*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
381*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d4,  d0,  #4
382*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d4[0]
383*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d4[0]
384*c0909341SAndroid Build Coastguard Worker16:
385*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
386*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
387*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
388*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
389*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
390*c0909341SAndroid Build Coastguard Worker        bgt             16b
391*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
392*c0909341SAndroid Build Coastguard Worker320:
393*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2]!
394*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4,  d5,  d6,  d7},  [r2]
395*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
396*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
397*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q2
398*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
399*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
400*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
401*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d18, q0,  #5
402*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d18[0]
403*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d18[0]
404*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
405*c0909341SAndroid Build Coastguard Worker32:
406*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
407*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
408*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
409*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
410*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
411*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
412*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
413*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
414*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
415*c0909341SAndroid Build Coastguard Worker        bgt             32b
416*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
417*c0909341SAndroid Build Coastguard Worker640:
418*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2]!
419*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4,  d5,  d6,  d7},  [r2]!
420*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
421*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r2]!
422*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
423*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20, d21, d22, d23}, [r2]
424*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q9
425*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q11
426*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q2
427*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q10
428*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q8
429*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
430*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
431*c0909341SAndroid Build Coastguard Worker        vpadd.i32       d0,  d0,  d0
432*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d18, q0,  #6
433*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d18[0]
434*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d18[0]
435*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #96
436*c0909341SAndroid Build Coastguard Worker64:
437*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
438*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
439*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
440*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
441*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
442*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
443*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
444*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
445*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
446*c0909341SAndroid Build Coastguard Worker        bgt             64b
447*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
448*c0909341SAndroid Build Coastguard Workerendfunc
449*c0909341SAndroid Build Coastguard Worker
450*c0909341SAndroid Build Coastguard Worker// void ipred_dc_left_16bpc_neon(pixel *dst, const ptrdiff_t stride,
451*c0909341SAndroid Build Coastguard Worker//                               const pixel *const topleft,
452*c0909341SAndroid Build Coastguard Worker//                               const int width, const int height, const int a,
453*c0909341SAndroid Build Coastguard Worker//                               const int max_width, const int max_height);
454*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_left_16bpc_neon, export=1
455*c0909341SAndroid Build Coastguard Worker        push            {r4-r5, lr}
456*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #12]
457*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r4,  lsl #1
458*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
459*c0909341SAndroid Build Coastguard Worker        clz             lr,  r4
460*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
461*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_dc_left_tbl)
462*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #20
463*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r5,  r3,  lsl #2]
464*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r5,  lr,  lsl #2]
465*c0909341SAndroid Build Coastguard Worker        add             r3,  r5,  r3
466*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr
467*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
468*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
469*c0909341SAndroid Build Coastguard Worker        bx              r5
470*c0909341SAndroid Build Coastguard Worker
471*c0909341SAndroid Build Coastguard Worker        .align 2
472*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_tbl):
473*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h64) - L(ipred_dc_left_tbl) + CONFIG_THUMB
474*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h32) - L(ipred_dc_left_tbl) + CONFIG_THUMB
475*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h16) - L(ipred_dc_left_tbl) + CONFIG_THUMB
476*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h8)  - L(ipred_dc_left_tbl) + CONFIG_THUMB
477*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_h4)  - L(ipred_dc_left_tbl) + CONFIG_THUMB
478*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w64) - L(ipred_dc_left_tbl) + CONFIG_THUMB
479*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w32) - L(ipred_dc_left_tbl) + CONFIG_THUMB
480*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w16) - L(ipred_dc_left_tbl) + CONFIG_THUMB
481*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w8)  - L(ipred_dc_left_tbl) + CONFIG_THUMB
482*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_left_w4)  - L(ipred_dc_left_tbl) + CONFIG_THUMB
483*c0909341SAndroid Build Coastguard Worker
484*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h4):
485*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0},  [r2, :64]
486*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
487*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
488*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #2
489*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
490*c0909341SAndroid Build Coastguard Worker        bx              r3
491*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w4):
492*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
493*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
494*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
495*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
496*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
497*c0909341SAndroid Build Coastguard Worker        bgt             L(ipred_dc_left_w4)
498*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
499*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h8):
500*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1},  [r2, :128]
501*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
502*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
503*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
504*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #3
505*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
506*c0909341SAndroid Build Coastguard Worker        bx              r3
507*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w8):
508*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r0,  :128], r1
509*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r12, :128], r1
510*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
511*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r0,  :128], r1
512*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r12, :128], r1
513*c0909341SAndroid Build Coastguard Worker        bgt             L(ipred_dc_left_w8)
514*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
515*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h16):
516*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2, :128]
517*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
518*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
519*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
520*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
521*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #4
522*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
523*c0909341SAndroid Build Coastguard Worker        bx              r3
524*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w16):
525*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q0
526*c0909341SAndroid Build Coastguard Worker1:
527*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
528*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
529*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
530*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
531*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
532*c0909341SAndroid Build Coastguard Worker        bgt             1b
533*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
534*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h32):
535*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2, :128]!
536*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4,  d5,  d6,  d7},  [r2, :128]
537*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
538*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
539*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q2
540*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
541*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
542*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
543*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d0,  q0,  #5
544*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
545*c0909341SAndroid Build Coastguard Worker        bx              r3
546*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w32):
547*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
548*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q0
549*c0909341SAndroid Build Coastguard Worker1:
550*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
551*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
552*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
553*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
554*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
555*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
556*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
557*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
558*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
559*c0909341SAndroid Build Coastguard Worker        bgt             1b
560*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
561*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_h64):
562*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2, :128]!
563*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4,  d5,  d6,  d7},  [r2, :128]!
564*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
565*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r2, :128]!
566*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
567*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20, d21, d22, d23}, [r2, :128]
568*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q9
569*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q11
570*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q2
571*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q10
572*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q8
573*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
574*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
575*c0909341SAndroid Build Coastguard Worker        vpadd.i32       d0,  d0,  d0
576*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d0,  q0,  #6
577*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
578*c0909341SAndroid Build Coastguard Worker        bx              r3
579*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_left_w64):
580*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #96
581*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q0
582*c0909341SAndroid Build Coastguard Worker1:
583*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
584*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
585*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
586*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
587*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
588*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
589*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
590*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
591*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
592*c0909341SAndroid Build Coastguard Worker        bgt             1b
593*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
594*c0909341SAndroid Build Coastguard Workerendfunc
595*c0909341SAndroid Build Coastguard Worker
596*c0909341SAndroid Build Coastguard Worker// void ipred_dc_16bpc_neon(pixel *dst, const ptrdiff_t stride,
597*c0909341SAndroid Build Coastguard Worker//                          const pixel *const topleft,
598*c0909341SAndroid Build Coastguard Worker//                          const int width, const int height, const int a,
599*c0909341SAndroid Build Coastguard Worker//                          const int max_width, const int max_height);
600*c0909341SAndroid Build Coastguard Workerfunction ipred_dc_16bpc_neon, export=1
601*c0909341SAndroid Build Coastguard Worker        push            {r4-r6, lr}
602*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #16]
603*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r4,  lsl #1
604*c0909341SAndroid Build Coastguard Worker        add             lr,  r3,  r4        // width + height
605*c0909341SAndroid Build Coastguard Worker        clz             r3,  r3
606*c0909341SAndroid Build Coastguard Worker        clz             r12, r4
607*c0909341SAndroid Build Coastguard Worker        vdup.32         q15, lr             // width + height
608*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_dc_tbl)
609*c0909341SAndroid Build Coastguard Worker        rbit            lr,  lr             // rbit(width + height)
610*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  #20       // 25 leading bits, minus table offset 5
611*c0909341SAndroid Build Coastguard Worker        sub             r12, r12, #25
612*c0909341SAndroid Build Coastguard Worker        clz             lr,  lr             // ctz(width + height)
613*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r5,  r3,  lsl #2]
614*c0909341SAndroid Build Coastguard Worker        ldr             r12, [r5,  r12, lsl #2]
615*c0909341SAndroid Build Coastguard Worker        neg             lr,  lr             // -ctz(width + height)
616*c0909341SAndroid Build Coastguard Worker        add             r3,  r5,  r3
617*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r12
618*c0909341SAndroid Build Coastguard Worker        vshr.u32        q15, q15, #1        // (width + height) >> 1
619*c0909341SAndroid Build Coastguard Worker        vdup.32         q14, lr             // -ctz(width + height)
620*c0909341SAndroid Build Coastguard Worker        add             r12, r0,  r1
621*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
622*c0909341SAndroid Build Coastguard Worker        bx              r5
623*c0909341SAndroid Build Coastguard Worker
624*c0909341SAndroid Build Coastguard Worker        .align 2
625*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_tbl):
626*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h64) - L(ipred_dc_tbl) + CONFIG_THUMB
627*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h32) - L(ipred_dc_tbl) + CONFIG_THUMB
628*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h16) - L(ipred_dc_tbl) + CONFIG_THUMB
629*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h8)  - L(ipred_dc_tbl) + CONFIG_THUMB
630*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_h4)  - L(ipred_dc_tbl) + CONFIG_THUMB
631*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w64) - L(ipred_dc_tbl) + CONFIG_THUMB
632*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w32) - L(ipred_dc_tbl) + CONFIG_THUMB
633*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w16) - L(ipred_dc_tbl) + CONFIG_THUMB
634*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w8)  - L(ipred_dc_tbl) + CONFIG_THUMB
635*c0909341SAndroid Build Coastguard Worker        .word L(ipred_dc_w4)  - L(ipred_dc_tbl) + CONFIG_THUMB
636*c0909341SAndroid Build Coastguard Worker
637*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h4):
638*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0},  [r2, :64]!
639*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
640*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
641*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
642*c0909341SAndroid Build Coastguard Worker        bx              r3
643*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w4):
644*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2},  [r2]
645*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d30
646*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d2,  d2
647*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d2,  d2
648*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #4
649*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d2
650*c0909341SAndroid Build Coastguard Worker        vshl.u32        d0,  d0,  d28
651*c0909341SAndroid Build Coastguard Worker        beq             1f
652*c0909341SAndroid Build Coastguard Worker        // h = 8/16
653*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #16
654*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0x6667
655*c0909341SAndroid Build Coastguard Worker        movw            r5,  #0xAAAB
656*c0909341SAndroid Build Coastguard Worker        it              ne
657*c0909341SAndroid Build Coastguard Worker        movne           lr,  r5
658*c0909341SAndroid Build Coastguard Worker        vdup.32         d24, lr
659*c0909341SAndroid Build Coastguard Worker        vmul.i32        d0,  d0,  d24
660*c0909341SAndroid Build Coastguard Worker        vshr.u32        d0,  d0,  #17
661*c0909341SAndroid Build Coastguard Worker1:
662*c0909341SAndroid Build Coastguard Worker        vdup.16         d0,  d0[0]
663*c0909341SAndroid Build Coastguard Worker2:
664*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
665*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
666*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
667*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r0,  :64], r1
668*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r12, :64], r1
669*c0909341SAndroid Build Coastguard Worker        bgt             2b
670*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
671*c0909341SAndroid Build Coastguard Worker
672*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h8):
673*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1},  [r2, :128]!
674*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
675*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
676*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
677*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
678*c0909341SAndroid Build Coastguard Worker        bx              r3
679*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w8):
680*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2,  d3},  [r2]
681*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d30
682*c0909341SAndroid Build Coastguard Worker        vadd.i16        d2,  d2,  d3
683*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d2,  d2
684*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d2,  d2
685*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #8
686*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d2
687*c0909341SAndroid Build Coastguard Worker        vshl.u32        d0,  d0,  d28
688*c0909341SAndroid Build Coastguard Worker        beq             1f
689*c0909341SAndroid Build Coastguard Worker        // h = 4/16/32
690*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #32
691*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0x6667
692*c0909341SAndroid Build Coastguard Worker        movw            r5,  #0xAAAB
693*c0909341SAndroid Build Coastguard Worker        it              ne
694*c0909341SAndroid Build Coastguard Worker        movne           lr,  r5
695*c0909341SAndroid Build Coastguard Worker        vdup.32         d24, lr
696*c0909341SAndroid Build Coastguard Worker        vmul.i32        d0,  d0,  d24
697*c0909341SAndroid Build Coastguard Worker        vshr.u32        d0,  d0,  #17
698*c0909341SAndroid Build Coastguard Worker1:
699*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
700*c0909341SAndroid Build Coastguard Worker2:
701*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r0,  :128], r1
702*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r12, :128], r1
703*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
704*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r0,  :128], r1
705*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1},  [r12, :128], r1
706*c0909341SAndroid Build Coastguard Worker        bgt             2b
707*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
708*c0909341SAndroid Build Coastguard Worker
709*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h16):
710*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2, :128]!
711*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
712*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
713*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
714*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
715*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
716*c0909341SAndroid Build Coastguard Worker        bx              r3
717*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w16):
718*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2,  d3,  d4,  d5},  [r2]
719*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d30
720*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q2
721*c0909341SAndroid Build Coastguard Worker        vadd.i16        d2,  d2,  d3
722*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d2,  d1
723*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d2,  d2
724*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #16
725*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d2
726*c0909341SAndroid Build Coastguard Worker        vshl.u32        d4,  d0,  d28
727*c0909341SAndroid Build Coastguard Worker        beq             1f
728*c0909341SAndroid Build Coastguard Worker        // h = 4/8/32/64
729*c0909341SAndroid Build Coastguard Worker        tst             r4,  #(32+16+8)     // 16 added to make a consecutive bitmask
730*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0x6667
731*c0909341SAndroid Build Coastguard Worker        movw            r5,  #0xAAAB
732*c0909341SAndroid Build Coastguard Worker        it              ne
733*c0909341SAndroid Build Coastguard Worker        movne           lr,  r5
734*c0909341SAndroid Build Coastguard Worker        vdup.32         d24, lr
735*c0909341SAndroid Build Coastguard Worker        vmul.i32        d4,  d4,  d24
736*c0909341SAndroid Build Coastguard Worker        vshr.u32        d4,  d4,  #17
737*c0909341SAndroid Build Coastguard Worker1:
738*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d4[0]
739*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d4[0]
740*c0909341SAndroid Build Coastguard Worker2:
741*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
742*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
743*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
744*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
745*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
746*c0909341SAndroid Build Coastguard Worker        bgt             2b
747*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
748*c0909341SAndroid Build Coastguard Worker
749*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h32):
750*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2, :128]!
751*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4,  d5,  d6,  d7},  [r2, :128]!
752*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
753*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
754*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q2
755*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
756*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
757*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
758*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
759*c0909341SAndroid Build Coastguard Worker        bx              r3
760*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w32):
761*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2,  d3,  d4,  d5},  [r2]!
762*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d30
763*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r2]
764*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q2
765*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q9
766*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q8
767*c0909341SAndroid Build Coastguard Worker        vadd.i16        d2,  d2,  d3
768*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d2,  d2
769*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d2,  d2
770*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #32
771*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d2
772*c0909341SAndroid Build Coastguard Worker        vshl.u32        d4,  d0,  d28
773*c0909341SAndroid Build Coastguard Worker        beq             1f
774*c0909341SAndroid Build Coastguard Worker        // h = 8/16/64
775*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #8
776*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0x6667
777*c0909341SAndroid Build Coastguard Worker        movw            r5,  #0xAAAB
778*c0909341SAndroid Build Coastguard Worker        it              ne
779*c0909341SAndroid Build Coastguard Worker        movne           lr,  r5
780*c0909341SAndroid Build Coastguard Worker        vdup.32         d24, lr
781*c0909341SAndroid Build Coastguard Worker        vmul.i32        d4,  d4,  d24
782*c0909341SAndroid Build Coastguard Worker        vshr.u32        d4,  d4,  #17
783*c0909341SAndroid Build Coastguard Worker1:
784*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
785*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d4[0]
786*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d4[0]
787*c0909341SAndroid Build Coastguard Worker2:
788*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
789*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
790*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
791*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
792*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
793*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
794*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
795*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
796*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
797*c0909341SAndroid Build Coastguard Worker        bgt             2b
798*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
799*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_h64):
800*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0,  d1,  d2,  d3},  [r2, :128]!
801*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4,  d5,  d6,  d7},  [r2, :128]!
802*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
803*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r2, :128]!
804*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
805*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20, d21, d22, d23}, [r2, :128]!
806*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q9
807*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q11
808*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q2
809*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q10
810*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q8
811*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
812*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
813*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
814*c0909341SAndroid Build Coastguard Worker        vpadd.i32       d0,  d0,  d0
815*c0909341SAndroid Build Coastguard Worker        bx              r3
816*c0909341SAndroid Build Coastguard WorkerL(ipred_dc_w64):
817*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2,  d3,  d4,  d5},  [r2]!
818*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d30
819*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16, d17, d18, d19}, [r2]!
820*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q2
821*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20, d21, d22, d23}, [r2]!
822*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q9
823*c0909341SAndroid Build Coastguard Worker        vld1.16         {d24, d25, d26, d27}, [r2]!
824*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q11
825*c0909341SAndroid Build Coastguard Worker        vadd.i16        q12, q12, q13
826*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q8
827*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q12
828*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q10
829*c0909341SAndroid Build Coastguard Worker        vadd.i16        d2,  d2,  d3
830*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d2,  d2
831*c0909341SAndroid Build Coastguard Worker        vpadd.i32       d2,  d2,  d2
832*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #64
833*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d2
834*c0909341SAndroid Build Coastguard Worker        vshl.u32        d4,  d0,  d28
835*c0909341SAndroid Build Coastguard Worker        beq             1f
836*c0909341SAndroid Build Coastguard Worker        // h = 16/32
837*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #16
838*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0x6667
839*c0909341SAndroid Build Coastguard Worker        movw            r5,  #0xAAAB
840*c0909341SAndroid Build Coastguard Worker        it              ne
841*c0909341SAndroid Build Coastguard Worker        movne           lr,  r5
842*c0909341SAndroid Build Coastguard Worker        vdup.32         d24, lr
843*c0909341SAndroid Build Coastguard Worker        vmul.i32        d4,  d4,  d24
844*c0909341SAndroid Build Coastguard Worker        vshr.u32        d4,  d4,  #17
845*c0909341SAndroid Build Coastguard Worker1:
846*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #96
847*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d4[0]
848*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d4[0]
849*c0909341SAndroid Build Coastguard Worker2:
850*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
851*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
852*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
853*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
854*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
855*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128]!
856*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128]!
857*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r0,  :128], r1
858*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0,  d1,  d2,  d3},  [r12, :128], r1
859*c0909341SAndroid Build Coastguard Worker        bgt             2b
860*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
861*c0909341SAndroid Build Coastguard Workerendfunc
862*c0909341SAndroid Build Coastguard Worker
863*c0909341SAndroid Build Coastguard Worker// void ipred_paeth_16bpc_neon(pixel *dst, const ptrdiff_t stride,
864*c0909341SAndroid Build Coastguard Worker//                             const pixel *const topleft,
865*c0909341SAndroid Build Coastguard Worker//                             const int width, const int height, const int a,
866*c0909341SAndroid Build Coastguard Worker//                             const int max_width, const int max_height);
867*c0909341SAndroid Build Coastguard Workerfunction ipred_paeth_16bpc_neon, export=1
868*c0909341SAndroid Build Coastguard Worker        push            {r4-r6, lr}
869*c0909341SAndroid Build Coastguard Worker        vpush           {q4}
870*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #32]
871*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
872*c0909341SAndroid Build Coastguard Worker        adr             r12, L(ipred_paeth_tbl)
873*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
874*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
875*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4[], d5[]},  [r2]
876*c0909341SAndroid Build Coastguard Worker        add             r6,  r2,  #2
877*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
878*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
879*c0909341SAndroid Build Coastguard Worker        mov             r5,  #-4
880*c0909341SAndroid Build Coastguard Worker        add             lr,  r0,  r1
881*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
882*c0909341SAndroid Build Coastguard Worker        bx              r12
883*c0909341SAndroid Build Coastguard Worker
884*c0909341SAndroid Build Coastguard Worker        .align 2
885*c0909341SAndroid Build Coastguard WorkerL(ipred_paeth_tbl):
886*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_paeth_tbl) + CONFIG_THUMB
887*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_paeth_tbl) + CONFIG_THUMB
888*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_paeth_tbl) + CONFIG_THUMB
889*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_paeth_tbl) + CONFIG_THUMB
890*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_paeth_tbl) + CONFIG_THUMB
891*c0909341SAndroid Build Coastguard Worker
892*c0909341SAndroid Build Coastguard Worker40:
893*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
894*c0909341SAndroid Build Coastguard Worker        mov             r5,  #-8
895*c0909341SAndroid Build Coastguard Worker        vld1.16         {d6},  [r6]
896*c0909341SAndroid Build Coastguard Worker        vsub.i16        d16, d6,  d4  // top - topleft
897*c0909341SAndroid Build Coastguard Worker        vmov            d7,  d6
898*c0909341SAndroid Build Coastguard Worker        vmov            d17, d16
899*c0909341SAndroid Build Coastguard Worker4:
900*c0909341SAndroid Build Coastguard Worker        vld4.16         {d0[], d1[], d2[], d3[]},  [r2, :64], r5
901*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q8,  q0  // base
902*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q8,  q1
903*c0909341SAndroid Build Coastguard Worker        vabd.s16        q11, q3,  q9  // tdiff
904*c0909341SAndroid Build Coastguard Worker        vabd.s16        q12, q3,  q10
905*c0909341SAndroid Build Coastguard Worker        vabd.s16        q13, q2,  q9  // tldiff
906*c0909341SAndroid Build Coastguard Worker        vabd.s16        q14, q2,  q10
907*c0909341SAndroid Build Coastguard Worker        vabd.s16        q9,  q0,  q9  // ldiff
908*c0909341SAndroid Build Coastguard Worker        vabd.s16        q10, q1,  q10
909*c0909341SAndroid Build Coastguard Worker        vmin.u16        q15, q11, q13 // min(tdiff, tldiff)
910*c0909341SAndroid Build Coastguard Worker        vmin.u16        q4,  q12, q14
911*c0909341SAndroid Build Coastguard Worker        vcge.u16        q11, q13, q11 // tldiff >= tdiff
912*c0909341SAndroid Build Coastguard Worker        vcge.u16        q12, q14, q12
913*c0909341SAndroid Build Coastguard Worker        vcge.u16        q9,  q15, q9  // min(tdiff, tldiff) >= ldiff
914*c0909341SAndroid Build Coastguard Worker        vcge.u16        q10, q4,  q10
915*c0909341SAndroid Build Coastguard Worker        vbsl            q12, q3,  q2  // tdiff <= tldiff ? top : topleft
916*c0909341SAndroid Build Coastguard Worker        vbsl            q11, q3,  q2
917*c0909341SAndroid Build Coastguard Worker        vbit            q12, q1,  q10 // ldiff <= min ? left : ...
918*c0909341SAndroid Build Coastguard Worker        vbit            q11, q0,  q9
919*c0909341SAndroid Build Coastguard Worker        vst1.16         {d25}, [r0, :64], r1
920*c0909341SAndroid Build Coastguard Worker        vst1.16         {d24}, [lr, :64], r1
921*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
922*c0909341SAndroid Build Coastguard Worker        vst1.16         {d23}, [r0, :64], r1
923*c0909341SAndroid Build Coastguard Worker        vst1.16         {d22}, [lr, :64], r1
924*c0909341SAndroid Build Coastguard Worker        bgt             4b
925*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
926*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
927*c0909341SAndroid Build Coastguard Worker80:
928*c0909341SAndroid Build Coastguard Worker160:
929*c0909341SAndroid Build Coastguard Worker320:
930*c0909341SAndroid Build Coastguard Worker640:
931*c0909341SAndroid Build Coastguard Worker        vld1.16         {q3},  [r6]!
932*c0909341SAndroid Build Coastguard Worker        mov             r12, r3
933*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3, lsl #1
934*c0909341SAndroid Build Coastguard Worker1:
935*c0909341SAndroid Build Coastguard Worker        vld2.16         {d0[], d2[]},  [r2, :32], r5
936*c0909341SAndroid Build Coastguard Worker        vmov            d1,  d0
937*c0909341SAndroid Build Coastguard Worker        vmov            d3,  d2
938*c0909341SAndroid Build Coastguard Worker2:
939*c0909341SAndroid Build Coastguard Worker        vsub.i16        q8,  q3,  q2  // top - topleft
940*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q8,  q0  // base
941*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q8,  q1
942*c0909341SAndroid Build Coastguard Worker        vabd.s16        q11, q3,  q9  // tdiff
943*c0909341SAndroid Build Coastguard Worker        vabd.s16        q12, q3,  q10
944*c0909341SAndroid Build Coastguard Worker        vabd.s16        q13, q2,  q9  // tldiff
945*c0909341SAndroid Build Coastguard Worker        vabd.s16        q14, q2,  q10
946*c0909341SAndroid Build Coastguard Worker        vabd.s16        q9,  q0,  q9  // ldiff
947*c0909341SAndroid Build Coastguard Worker        vabd.s16        q10, q1,  q10
948*c0909341SAndroid Build Coastguard Worker        vmin.u16        q15, q11, q13 // min(tdiff, tldiff)
949*c0909341SAndroid Build Coastguard Worker        vmin.u16        q4,  q12, q14
950*c0909341SAndroid Build Coastguard Worker        vcge.u16        q11, q13, q11 // tldiff >= tdiff
951*c0909341SAndroid Build Coastguard Worker        vcge.u16        q12, q14, q12
952*c0909341SAndroid Build Coastguard Worker        vcge.u16        q9,  q15, q9  // min(tdiff, tldiff) >= ldiff
953*c0909341SAndroid Build Coastguard Worker        vcge.u16        q10, q4,  q10
954*c0909341SAndroid Build Coastguard Worker        vbsl            q12, q3,  q2  // tdiff <= tldiff ? top : topleft
955*c0909341SAndroid Build Coastguard Worker        vbsl            q11, q3,  q2
956*c0909341SAndroid Build Coastguard Worker        vbit            q12, q1,  q10 // ldiff <= min ? left : ...
957*c0909341SAndroid Build Coastguard Worker        vbit            q11, q0,  q9
958*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #8
959*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12}, [r0, :128]!
960*c0909341SAndroid Build Coastguard Worker        vst1.16         {q11}, [lr, :128]!
961*c0909341SAndroid Build Coastguard Worker        ble             8f
962*c0909341SAndroid Build Coastguard Worker        vld1.16         {q3},  [r6]!
963*c0909341SAndroid Build Coastguard Worker        b               2b
964*c0909341SAndroid Build Coastguard Worker8:
965*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
966*c0909341SAndroid Build Coastguard Worker        ble             9f
967*c0909341SAndroid Build Coastguard Worker        // End of horizontal loop, move pointers to next two rows
968*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  r12, lsl #1
969*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
970*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r1
971*c0909341SAndroid Build Coastguard Worker        vld1.16         {q3},  [r6]!
972*c0909341SAndroid Build Coastguard Worker        mov             r3,  r12
973*c0909341SAndroid Build Coastguard Worker        b               1b
974*c0909341SAndroid Build Coastguard Worker9:
975*c0909341SAndroid Build Coastguard Worker        vpop            {q4}
976*c0909341SAndroid Build Coastguard Worker        pop             {r4-r6, pc}
977*c0909341SAndroid Build Coastguard Workerendfunc
978*c0909341SAndroid Build Coastguard Worker
979*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_16bpc_neon(pixel *dst, const ptrdiff_t stride,
980*c0909341SAndroid Build Coastguard Worker//                              const pixel *const topleft,
981*c0909341SAndroid Build Coastguard Worker//                              const int width, const int height, const int a,
982*c0909341SAndroid Build Coastguard Worker//                              const int max_width, const int max_height);
983*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_16bpc_neon, export=1
984*c0909341SAndroid Build Coastguard Worker        push            {r4-r10, lr}
985*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #32]
986*c0909341SAndroid Build Coastguard Worker        movrel          r10, X(sm_weights)
987*c0909341SAndroid Build Coastguard Worker        add             r12, r10, r4
988*c0909341SAndroid Build Coastguard Worker        add             r10, r10, r3
989*c0909341SAndroid Build Coastguard Worker        clz             r9,  r3
990*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_smooth_tbl)
991*c0909341SAndroid Build Coastguard Worker        sub             lr,  r2,  r4, lsl #1
992*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  #25
993*c0909341SAndroid Build Coastguard Worker        ldr             r9,  [r5, r9, lsl #2]
994*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4[], d5[]},  [lr] // bottom
995*c0909341SAndroid Build Coastguard Worker        add             r8,  r2,  #2
996*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r9
997*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
998*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
999*c0909341SAndroid Build Coastguard Worker        bx              r5
1000*c0909341SAndroid Build Coastguard Worker
1001*c0909341SAndroid Build Coastguard Worker        .align 2
1002*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_tbl):
1003*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_smooth_tbl) + CONFIG_THUMB
1004*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_smooth_tbl) + CONFIG_THUMB
1005*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_smooth_tbl) + CONFIG_THUMB
1006*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_smooth_tbl) + CONFIG_THUMB
1007*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_smooth_tbl) + CONFIG_THUMB
1008*c0909341SAndroid Build Coastguard Worker
1009*c0909341SAndroid Build Coastguard Worker40:
1010*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16},   [r8]       // top
1011*c0909341SAndroid Build Coastguard Worker        vld1.32         {d18[]}, [r10, :32] // weights_hor
1012*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #8
1013*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-8
1014*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d16[3]   // right
1015*c0909341SAndroid Build Coastguard Worker        vsub.i16        q8,  q8,  q2  // top-bottom
1016*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d18      // weights_hor
1017*c0909341SAndroid Build Coastguard Worker        vadd.i16        d19, d4,  d6  // bottom+right
1018*c0909341SAndroid Build Coastguard Worker4:
1019*c0909341SAndroid Build Coastguard Worker        vld4.16         {d0[],  d1[],  d2[],  d3[]},  [r2,  :64], r7 // left
1020*c0909341SAndroid Build Coastguard Worker        vld4.8          {d20[], d21[], d22[], d23[]}, [r12, :32]!    // weights_ver
1021*c0909341SAndroid Build Coastguard Worker        vshll.u16       q12, d19, #8  // (bottom+right)*256
1022*c0909341SAndroid Build Coastguard Worker        vshll.u16       q13, d19, #8
1023*c0909341SAndroid Build Coastguard Worker        vshll.u16       q14, d19, #8
1024*c0909341SAndroid Build Coastguard Worker        vshll.u16       q15, d19, #8
1025*c0909341SAndroid Build Coastguard Worker        vzip.32         d20, d21      // weights_ver
1026*c0909341SAndroid Build Coastguard Worker        vzip.32         d22, d23
1027*c0909341SAndroid Build Coastguard Worker        vsub.i16        q1,  q1,  q3  // left-right
1028*c0909341SAndroid Build Coastguard Worker        vsub.i16        q0,  q0,  q3
1029*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d20      // weights_ver
1030*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q11, d22
1031*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d3,  d18 // += (left-right)*weights_hor
1032*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d2,  d18 // (left flipped)
1033*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d1,  d18
1034*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d0,  d18
1035*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d16, d20 // += (top-bottom)*weights_ver
1036*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d16, d21
1037*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d16, d22
1038*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d16, d23
1039*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d24, q12, #9
1040*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d25, q13, #9
1041*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d26, q14, #9
1042*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d27, q15, #9
1043*c0909341SAndroid Build Coastguard Worker        vst1.16         {d24}, [r0, :64], r1
1044*c0909341SAndroid Build Coastguard Worker        vst1.16         {d25}, [r6, :64], r1
1045*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1046*c0909341SAndroid Build Coastguard Worker        vst1.16         {d26}, [r0, :64], r1
1047*c0909341SAndroid Build Coastguard Worker        vst1.16         {d27}, [r6, :64], r1
1048*c0909341SAndroid Build Coastguard Worker        bgt             4b
1049*c0909341SAndroid Build Coastguard Worker        pop             {r4-r10, pc}
1050*c0909341SAndroid Build Coastguard Worker80:
1051*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8},  [r8]       // top
1052*c0909341SAndroid Build Coastguard Worker        vld1.8          {d18}, [r10, :64] // weights_hor
1053*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
1054*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-4
1055*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d17[3]   // right
1056*c0909341SAndroid Build Coastguard Worker        vsub.i16        q8,  q8,  q2  // top-bottom
1057*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d18      // weights_hor
1058*c0909341SAndroid Build Coastguard Worker        vadd.i16        d3,  d4,  d6  // bottom+right
1059*c0909341SAndroid Build Coastguard Worker8:
1060*c0909341SAndroid Build Coastguard Worker        vld2.16         {d0[],  d1[]},  [r2,  :32], r7 // left
1061*c0909341SAndroid Build Coastguard Worker        vld2.8          {d20[], d22[]}, [r12, :16]!    // weights_ver
1062*c0909341SAndroid Build Coastguard Worker        vshll.u16       q12, d3,  #8  // (bottom+right)*256
1063*c0909341SAndroid Build Coastguard Worker        vshll.u16       q13, d3,  #8
1064*c0909341SAndroid Build Coastguard Worker        vshll.u16       q14, d3,  #8
1065*c0909341SAndroid Build Coastguard Worker        vshll.u16       q15, d3,  #8
1066*c0909341SAndroid Build Coastguard Worker        vsub.i16        q0,  q0,  q3  // left-right
1067*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d20      // weights_ver
1068*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q11, d22
1069*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d1,  d18 // += (left-right)*weights_hor
1070*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d1,  d19 // (left flipped)
1071*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d0,  d18
1072*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d0,  d19
1073*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d16, d20 // += (top-bottom)*weights_ver
1074*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d17, d20
1075*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d16, d22
1076*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d17, d22
1077*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d24, q12, #9
1078*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d25, q13, #9
1079*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d26, q14, #9
1080*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d27, q15, #9
1081*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1082*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12}, [r0, :128], r1
1083*c0909341SAndroid Build Coastguard Worker        vst1.16         {q13}, [r6, :128], r1
1084*c0909341SAndroid Build Coastguard Worker        bgt             8b
1085*c0909341SAndroid Build Coastguard Worker        pop             {r4-r10, pc}
1086*c0909341SAndroid Build Coastguard Worker160:
1087*c0909341SAndroid Build Coastguard Worker320:
1088*c0909341SAndroid Build Coastguard Worker640:
1089*c0909341SAndroid Build Coastguard Worker        add             lr,  r2,  r3, lsl #1
1090*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
1091*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-4
1092*c0909341SAndroid Build Coastguard Worker        vld1.16         {d6[], d7[]}, [lr] // right
1093*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3, lsl #1
1094*c0909341SAndroid Build Coastguard Worker        mov             r9,  r3
1095*c0909341SAndroid Build Coastguard Worker        vadd.i16        d3,  d4,  d6  // bottom+right
1096*c0909341SAndroid Build Coastguard Worker
1097*c0909341SAndroid Build Coastguard Worker1:
1098*c0909341SAndroid Build Coastguard Worker        vld2.16         {d0[],  d1[]},  [r2,  :32], r7 // left
1099*c0909341SAndroid Build Coastguard Worker        vld2.8          {d20[], d22[]}, [r12, :16]!    // weights_ver
1100*c0909341SAndroid Build Coastguard Worker        vsub.i16        q0,  q0,  q3  // left-right
1101*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q10, d20      // weights_ver
1102*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q11, d22
1103*c0909341SAndroid Build Coastguard Worker2:
1104*c0909341SAndroid Build Coastguard Worker        vld1.8          {d18}, [r10, :64]! // weights_hor
1105*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8},  [r8]!       // top
1106*c0909341SAndroid Build Coastguard Worker        vshll.u16       q12, d3,  #8  // (bottom+right)*256
1107*c0909341SAndroid Build Coastguard Worker        vshll.u16       q13, d3,  #8
1108*c0909341SAndroid Build Coastguard Worker        vmovl.u8        q9,  d18      // weights_hor
1109*c0909341SAndroid Build Coastguard Worker        vshll.u16       q14, d3,  #8
1110*c0909341SAndroid Build Coastguard Worker        vshll.u16       q15, d3,  #8
1111*c0909341SAndroid Build Coastguard Worker        vsub.i16        q8,  q8,  q2  // top-bottom
1112*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d1,  d18 // += (left-right)*weights_hor
1113*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d1,  d19 // (left flipped)
1114*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d0,  d18
1115*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d0,  d19
1116*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q12, d16, d20 // += (top-bottom)*weights_ver
1117*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q13, d17, d20
1118*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q14, d16, d22
1119*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q15, d17, d22
1120*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d24, q12, #9
1121*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d25, q13, #9
1122*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d26, q14, #9
1123*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d27, q15, #9
1124*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #8
1125*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12}, [r0, :128]!
1126*c0909341SAndroid Build Coastguard Worker        vst1.16         {q13}, [r6, :128]!
1127*c0909341SAndroid Build Coastguard Worker        bgt             2b
1128*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1129*c0909341SAndroid Build Coastguard Worker        ble             9f
1130*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  r9, lsl #1
1131*c0909341SAndroid Build Coastguard Worker        sub             r10, r10, r9
1132*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
1133*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
1134*c0909341SAndroid Build Coastguard Worker        mov             r3,  r9
1135*c0909341SAndroid Build Coastguard Worker        b               1b
1136*c0909341SAndroid Build Coastguard Worker9:
1137*c0909341SAndroid Build Coastguard Worker        pop             {r4-r10, pc}
1138*c0909341SAndroid Build Coastguard Workerendfunc
1139*c0909341SAndroid Build Coastguard Worker
1140*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_v_16bpc_neon(pixel *dst, const ptrdiff_t stride,
1141*c0909341SAndroid Build Coastguard Worker//                                const pixel *const topleft,
1142*c0909341SAndroid Build Coastguard Worker//                                const int width, const int height, const int a,
1143*c0909341SAndroid Build Coastguard Worker//                                const int max_width, const int max_height);
1144*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_v_16bpc_neon, export=1
1145*c0909341SAndroid Build Coastguard Worker        push            {r4-r7, lr}
1146*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #20]
1147*c0909341SAndroid Build Coastguard Worker        movrel          r7,  X(sm_weights)
1148*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r4
1149*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1150*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_smooth_v_tbl)
1151*c0909341SAndroid Build Coastguard Worker        sub             r12, r2,  r4,  lsl #1
1152*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
1153*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r5, lr, lsl #2]
1154*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4[], d5[]},  [r12] // bottom
1155*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
1156*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr
1157*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1158*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1159*c0909341SAndroid Build Coastguard Worker        bx              r5
1160*c0909341SAndroid Build Coastguard Worker
1161*c0909341SAndroid Build Coastguard Worker        .align 2
1162*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_v_tbl):
1163*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1164*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1165*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1166*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1167*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_smooth_v_tbl) + CONFIG_THUMB
1168*c0909341SAndroid Build Coastguard Worker
1169*c0909341SAndroid Build Coastguard Worker40:
1170*c0909341SAndroid Build Coastguard Worker        vld1.16         {d6}, [r2]    // top
1171*c0909341SAndroid Build Coastguard Worker        vsub.i16        d6,  d6,  d4  // top-bottom
1172*c0909341SAndroid Build Coastguard Worker        vmov            d7,  d6
1173*c0909341SAndroid Build Coastguard Worker4:
1174*c0909341SAndroid Build Coastguard Worker        vld4.8          {d16[], d17[], d18[], d19[]}, [r7, :32]! // weights_ver
1175*c0909341SAndroid Build Coastguard Worker        vzip.32         d16, d17      // weights_ver
1176*c0909341SAndroid Build Coastguard Worker        vzip.32         d18, d19
1177*c0909341SAndroid Build Coastguard Worker        vshll.u8        q8,  d16, #7  // weights_ver << 7
1178*c0909341SAndroid Build Coastguard Worker        vshll.u8        q9,  d18, #7
1179*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q10, q3,  q8  // ((top-bottom)*weights_ver + 128) >> 8
1180*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q11, q3,  q9
1181*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
1182*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q2
1183*c0909341SAndroid Build Coastguard Worker        vst1.16         {d20}, [r0, :64], r1
1184*c0909341SAndroid Build Coastguard Worker        vst1.16         {d21}, [r6, :64], r1
1185*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1186*c0909341SAndroid Build Coastguard Worker        vst1.16         {d22}, [r0, :64], r1
1187*c0909341SAndroid Build Coastguard Worker        vst1.16         {d23}, [r6, :64], r1
1188*c0909341SAndroid Build Coastguard Worker        bgt             4b
1189*c0909341SAndroid Build Coastguard Worker        pop             {r4-r7, pc}
1190*c0909341SAndroid Build Coastguard Worker80:
1191*c0909341SAndroid Build Coastguard Worker        vld1.16         {q3}, [r2]    // top
1192*c0909341SAndroid Build Coastguard Worker        vsub.i16        q3,  q3,  q2  // top-bottom
1193*c0909341SAndroid Build Coastguard Worker8:
1194*c0909341SAndroid Build Coastguard Worker        vld4.8          {d16[], d18[], d20[], d22[]}, [r7, :32]! // weights_ver
1195*c0909341SAndroid Build Coastguard Worker        vshll.u8        q8,  d16, #7  // weights_ver << 7
1196*c0909341SAndroid Build Coastguard Worker        vshll.u8        q9,  d18, #7
1197*c0909341SAndroid Build Coastguard Worker        vshll.u8        q10, d20, #7
1198*c0909341SAndroid Build Coastguard Worker        vshll.u8        q11, d22, #7
1199*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q8,  q3,  q8  // ((top-bottom)*weights_ver + 128) >> 8
1200*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q9,  q3,  q9
1201*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q10, q3,  q10
1202*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q11, q3,  q11
1203*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q2
1204*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q2
1205*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
1206*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q2
1207*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8},  [r0, :128], r1
1208*c0909341SAndroid Build Coastguard Worker        vst1.16         {q9},  [r6, :128], r1
1209*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1210*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10}, [r0, :128], r1
1211*c0909341SAndroid Build Coastguard Worker        vst1.16         {q11}, [r6, :128], r1
1212*c0909341SAndroid Build Coastguard Worker        bgt             8b
1213*c0909341SAndroid Build Coastguard Worker        pop             {r4-r7, pc}
1214*c0909341SAndroid Build Coastguard Worker160:
1215*c0909341SAndroid Build Coastguard Worker320:
1216*c0909341SAndroid Build Coastguard Worker640:
1217*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1218*c0909341SAndroid Build Coastguard Worker        // Set up pointers for four rows in parallel; r0, r6, r5, lr
1219*c0909341SAndroid Build Coastguard Worker        add             r5,  r0,  r1
1220*c0909341SAndroid Build Coastguard Worker        add             lr,  r6,  r1
1221*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1222*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3, lsl #1
1223*c0909341SAndroid Build Coastguard Worker        mov             r12, r3
1224*c0909341SAndroid Build Coastguard Worker
1225*c0909341SAndroid Build Coastguard Worker1:
1226*c0909341SAndroid Build Coastguard Worker        vld4.8          {d8[], d10[], d12[], d14[]}, [r7, :32]! // weights_ver
1227*c0909341SAndroid Build Coastguard Worker        vshll.u8        q4,  d8,  #7  // weights_ver << 7
1228*c0909341SAndroid Build Coastguard Worker        vshll.u8        q5,  d10, #7
1229*c0909341SAndroid Build Coastguard Worker        vshll.u8        q6,  d12, #7
1230*c0909341SAndroid Build Coastguard Worker        vshll.u8        q7,  d14, #7
1231*c0909341SAndroid Build Coastguard Worker2:
1232*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r2]!  // top
1233*c0909341SAndroid Build Coastguard Worker        vsub.i16        q0,  q0,  q2  // top-bottom
1234*c0909341SAndroid Build Coastguard Worker        vsub.i16        q1,  q1,  q2
1235*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q8,  q0,  q4  // ((top-bottom)*weights_ver + 128) >> 8
1236*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q9,  q1,  q4
1237*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q10, q0,  q5
1238*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q11, q1,  q5
1239*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q12, q0,  q6
1240*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q13, q1,  q6
1241*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q14, q0,  q7
1242*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q15, q1,  q7
1243*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q2
1244*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q2
1245*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
1246*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q2
1247*c0909341SAndroid Build Coastguard Worker        vadd.i16        q12, q12, q2
1248*c0909341SAndroid Build Coastguard Worker        vadd.i16        q13, q13, q2
1249*c0909341SAndroid Build Coastguard Worker        vadd.i16        q14, q14, q2
1250*c0909341SAndroid Build Coastguard Worker        vadd.i16        q15, q15, q2
1251*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
1252*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r0, :128]!
1253*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r6, :128]!
1254*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r5, :128]!
1255*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [lr, :128]!
1256*c0909341SAndroid Build Coastguard Worker        bgt             2b
1257*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1258*c0909341SAndroid Build Coastguard Worker        ble             9f
1259*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r12, lsl #1
1260*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
1261*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
1262*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r1
1263*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r1
1264*c0909341SAndroid Build Coastguard Worker        mov             r3,  r12
1265*c0909341SAndroid Build Coastguard Worker        b               1b
1266*c0909341SAndroid Build Coastguard Worker9:
1267*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1268*c0909341SAndroid Build Coastguard Worker        pop             {r4-r7, pc}
1269*c0909341SAndroid Build Coastguard Workerendfunc
1270*c0909341SAndroid Build Coastguard Worker
1271*c0909341SAndroid Build Coastguard Worker// void ipred_smooth_h_16bpc_neon(pixel *dst, const ptrdiff_t stride,
1272*c0909341SAndroid Build Coastguard Worker//                                const pixel *const topleft,
1273*c0909341SAndroid Build Coastguard Worker//                                const int width, const int height, const int a,
1274*c0909341SAndroid Build Coastguard Worker//                                const int max_width, const int max_height);
1275*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_h_16bpc_neon, export=1
1276*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
1277*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #24]
1278*c0909341SAndroid Build Coastguard Worker        movrel          r8,  X(sm_weights)
1279*c0909341SAndroid Build Coastguard Worker        add             r8,  r8,  r3
1280*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1281*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_smooth_h_tbl)
1282*c0909341SAndroid Build Coastguard Worker        add             r12, r2,  r3, lsl #1
1283*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
1284*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r5, lr, lsl #2]
1285*c0909341SAndroid Build Coastguard Worker        vld1.16         {d4[], d5[]},  [r12] // right
1286*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr
1287*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1288*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1289*c0909341SAndroid Build Coastguard Worker        bx              r5
1290*c0909341SAndroid Build Coastguard Worker
1291*c0909341SAndroid Build Coastguard Worker        .align 2
1292*c0909341SAndroid Build Coastguard WorkerL(ipred_smooth_h_tbl):
1293*c0909341SAndroid Build Coastguard Worker        .word 640f - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1294*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1295*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1296*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1297*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_smooth_h_tbl) + CONFIG_THUMB
1298*c0909341SAndroid Build Coastguard Worker
1299*c0909341SAndroid Build Coastguard Worker40:
1300*c0909341SAndroid Build Coastguard Worker        vld1.32         {d6[]}, [r8, :32] // weights_hor
1301*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #8
1302*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-8
1303*c0909341SAndroid Build Coastguard Worker        vshll.u8        q3,  d6,  #7  // weights_hor << 7
1304*c0909341SAndroid Build Coastguard Worker4:
1305*c0909341SAndroid Build Coastguard Worker        vld4.16         {d0[], d1[], d2[], d3[]},  [r2, :64], r7 // left
1306*c0909341SAndroid Build Coastguard Worker        vsub.i16        q0,  q0,  q2  // left-right
1307*c0909341SAndroid Build Coastguard Worker        vsub.i16        q1,  q1,  q2
1308*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1309*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q8,  q1,  q3  // ((left-right)*weights_hor + 128) >> 8
1310*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q9,  q0,  q3  // (left flipped)
1311*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q2
1312*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q2
1313*c0909341SAndroid Build Coastguard Worker        vst1.16         {d17}, [r0, :64], r1
1314*c0909341SAndroid Build Coastguard Worker        vst1.16         {d16}, [r6, :64], r1
1315*c0909341SAndroid Build Coastguard Worker        vst1.16         {d19}, [r0, :64], r1
1316*c0909341SAndroid Build Coastguard Worker        vst1.16         {d18}, [r6, :64], r1
1317*c0909341SAndroid Build Coastguard Worker        bgt             4b
1318*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1319*c0909341SAndroid Build Coastguard Worker80:
1320*c0909341SAndroid Build Coastguard Worker        vld1.8          {d6}, [r8, :64] // weights_hor
1321*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #8
1322*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-8
1323*c0909341SAndroid Build Coastguard Worker        vshll.u8        q3,  d6,  #7  // weights_hor << 7
1324*c0909341SAndroid Build Coastguard Worker8:
1325*c0909341SAndroid Build Coastguard Worker        vld1.16         {d23},  [r2, :64], r7 // left
1326*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1327*c0909341SAndroid Build Coastguard Worker        vsub.i16        d23, d23, d4  // left-right
1328*c0909341SAndroid Build Coastguard Worker        vdup.16         q8,  d23[3]   // flip left
1329*c0909341SAndroid Build Coastguard Worker        vdup.16         q9,  d23[2]
1330*c0909341SAndroid Build Coastguard Worker        vdup.16         q10, d23[1]
1331*c0909341SAndroid Build Coastguard Worker        vdup.16         q11, d23[0]
1332*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q8,  q8,  q3  // ((left-right)*weights_hor + 128) >> 8
1333*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q9,  q9,  q3
1334*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q10, q10, q3
1335*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q11, q11, q3
1336*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q2
1337*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q2
1338*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
1339*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q2
1340*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8},  [r0, :128], r1
1341*c0909341SAndroid Build Coastguard Worker        vst1.16         {q9},  [r6, :128], r1
1342*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10}, [r0, :128], r1
1343*c0909341SAndroid Build Coastguard Worker        vst1.16         {q11}, [r6, :128], r1
1344*c0909341SAndroid Build Coastguard Worker        bgt             8b
1345*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1346*c0909341SAndroid Build Coastguard Worker160:
1347*c0909341SAndroid Build Coastguard Worker320:
1348*c0909341SAndroid Build Coastguard Worker640:
1349*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1350*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #8
1351*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-8
1352*c0909341SAndroid Build Coastguard Worker        // Set up pointers for four rows in parallel; r0, r6, r5, lr
1353*c0909341SAndroid Build Coastguard Worker        add             r5,  r0,  r1
1354*c0909341SAndroid Build Coastguard Worker        add             lr,  r6,  r1
1355*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1356*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3, lsl #1
1357*c0909341SAndroid Build Coastguard Worker        mov             r12, r3
1358*c0909341SAndroid Build Coastguard Worker
1359*c0909341SAndroid Build Coastguard Worker1:
1360*c0909341SAndroid Build Coastguard Worker        vld1.16         {d15},  [r2, :64], r7 // left
1361*c0909341SAndroid Build Coastguard Worker        vsub.i16        d15, d15, d4  // left-right
1362*c0909341SAndroid Build Coastguard Worker        vdup.16         q4,  d15[3]   // flip left
1363*c0909341SAndroid Build Coastguard Worker        vdup.16         q5,  d15[2]
1364*c0909341SAndroid Build Coastguard Worker        vdup.16         q6,  d15[1]
1365*c0909341SAndroid Build Coastguard Worker        vdup.16         q7,  d15[0]
1366*c0909341SAndroid Build Coastguard Worker2:
1367*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r8, :128]! // weights_hor
1368*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
1369*c0909341SAndroid Build Coastguard Worker        vshll.u8        q0,  d2,  #7  // weights_hor << 7
1370*c0909341SAndroid Build Coastguard Worker        vshll.u8        q1,  d3,  #7
1371*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q8,  q0,  q4  // ((left-right)*weights_hor + 128) >> 8
1372*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q9,  q1,  q4
1373*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q10, q0,  q5
1374*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q11, q1,  q5
1375*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q12, q0,  q6
1376*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q13, q1,  q6
1377*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q14, q0,  q7
1378*c0909341SAndroid Build Coastguard Worker        vqrdmulh.s16    q15, q1,  q7
1379*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q2
1380*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q2
1381*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q2
1382*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q2
1383*c0909341SAndroid Build Coastguard Worker        vadd.i16        q12, q12, q2
1384*c0909341SAndroid Build Coastguard Worker        vadd.i16        q13, q13, q2
1385*c0909341SAndroid Build Coastguard Worker        vadd.i16        q14, q14, q2
1386*c0909341SAndroid Build Coastguard Worker        vadd.i16        q15, q15, q2
1387*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r0, :128]!
1388*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r6, :128]!
1389*c0909341SAndroid Build Coastguard Worker        vst1.16         {q12, q13}, [r5, :128]!
1390*c0909341SAndroid Build Coastguard Worker        vst1.16         {q14, q15}, [lr, :128]!
1391*c0909341SAndroid Build Coastguard Worker        bgt             2b
1392*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
1393*c0909341SAndroid Build Coastguard Worker        ble             9f
1394*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  r12
1395*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
1396*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
1397*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  r1
1398*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r1
1399*c0909341SAndroid Build Coastguard Worker        mov             r3,  r12
1400*c0909341SAndroid Build Coastguard Worker        b               1b
1401*c0909341SAndroid Build Coastguard Worker9:
1402*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1403*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1404*c0909341SAndroid Build Coastguard Workerendfunc
1405*c0909341SAndroid Build Coastguard Worker
1406*c0909341SAndroid Build Coastguard Worker// void ipred_filter_16bpc_neon(pixel *dst, const ptrdiff_t stride,
1407*c0909341SAndroid Build Coastguard Worker//                              const pixel *const topleft,
1408*c0909341SAndroid Build Coastguard Worker//                              const int width, const int height, const int filt_idx,
1409*c0909341SAndroid Build Coastguard Worker//                              const int max_width, const int max_height,
1410*c0909341SAndroid Build Coastguard Worker//                              const int bitdepth_max);
1411*c0909341SAndroid Build Coastguard Worker.macro filter_fn bpc
1412*c0909341SAndroid Build Coastguard Workerfunction ipred_filter_\bpc\()bpc_neon, export=1
1413*c0909341SAndroid Build Coastguard Worker        movw            r12, #511
1414*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,   [sp, #88]
1415*c0909341SAndroid Build Coastguard Worker        and             r5,  r5,  r12 // 511
1416*c0909341SAndroid Build Coastguard Worker        movrel          r6,  X(filter_intra_taps)
1417*c0909341SAndroid Build Coastguard Worker        lsl             r5,  r5,  #6
1418*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r5
1419*c0909341SAndroid Build Coastguard Worker        vld1.8          {d20, d21, d22, d23}, [r6, :128]!
1420*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1421*c0909341SAndroid Build Coastguard Worker        adr             r5,  L(ipred_filter\bpc\()_tbl)
1422*c0909341SAndroid Build Coastguard Worker        vld1.8          {d27, d28, d29}, [r6, :64]
1423*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #26
1424*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r5, lr, lsl #2]
1425*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q8,  d20
1426*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q9,  d21
1427*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr
1428*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q10, d22
1429*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q11, d23
1430*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1431*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1432*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q12, d27
1433*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q13, d28
1434*c0909341SAndroid Build Coastguard Worker        vmovl.s8        q14, d29
1435*c0909341SAndroid Build Coastguard Worker        mov             r7,  #-4
1436*c0909341SAndroid Build Coastguard Worker        vdup.16         q15, r8
1437*c0909341SAndroid Build Coastguard Worker        add             r8,  r2,  #2
1438*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #4
1439*c0909341SAndroid Build Coastguard Worker.if \bpc == 10
1440*c0909341SAndroid Build Coastguard Worker        vmov.i16        q7,  #0
1441*c0909341SAndroid Build Coastguard Worker.endif
1442*c0909341SAndroid Build Coastguard Worker        bx              r5
1443*c0909341SAndroid Build Coastguard Worker
1444*c0909341SAndroid Build Coastguard Worker        .align 2
1445*c0909341SAndroid Build Coastguard WorkerL(ipred_filter\bpc\()_tbl):
1446*c0909341SAndroid Build Coastguard Worker        .word 320f - L(ipred_filter\bpc\()_tbl) + CONFIG_THUMB
1447*c0909341SAndroid Build Coastguard Worker        .word 160f - L(ipred_filter\bpc\()_tbl) + CONFIG_THUMB
1448*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(ipred_filter\bpc\()_tbl) + CONFIG_THUMB
1449*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(ipred_filter\bpc\()_tbl) + CONFIG_THUMB
1450*c0909341SAndroid Build Coastguard Worker
1451*c0909341SAndroid Build Coastguard Worker40:
1452*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r8]       // top (0-3)
1453*c0909341SAndroid Build Coastguard Worker4:
1454*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r2], r7   // left (0-1) + topleft (2)
1455*c0909341SAndroid Build Coastguard Worker.if \bpc == 10
1456*c0909341SAndroid Build Coastguard Worker        vmul.i16        q2,  q9,  d0[0]  // p1(top[0]) * filter(1)
1457*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q10, d0[1]  // p2(top[1]) * filter(2)
1458*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q11, d0[2]  // p3(top[2]) * filter(3)
1459*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q12, d0[3]  // p4(top[3]) * filter(4)
1460*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q8,  d2[2]  // p0(topleft) * filter(0)
1461*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q13, d2[1]  // p5(left[0]) * filter(5)
1462*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q14, d2[0]  // p6(left[1]) * filter(6)
1463*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #4
1464*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q7
1465*c0909341SAndroid Build Coastguard Worker.else
1466*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d18, d0[0]  // p1(top[0]) * filter(1)
1467*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d20, d0[1]  // p2(top[1]) * filter(2)
1468*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d22, d0[2]  // p3(top[2]) * filter(3)
1469*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d24, d0[3]  // p4(top[3]) * filter(4)
1470*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d16, d2[2]  // p0(topleft) * filter(0)
1471*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d26, d2[1]  // p5(left[0]) * filter(5)
1472*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d28, d2[0]  // p6(left[1]) * filter(6)
1473*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d19, d0[0]  // p1(top[0]) * filter(1)
1474*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d21, d0[1]  // p2(top[1]) * filter(2)
1475*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d23, d0[2]  // p3(top[2]) * filter(3)
1476*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d25, d0[3]  // p4(top[3]) * filter(4)
1477*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d17, d2[2]  // p0(topleft) * filter(0)
1478*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d27, d2[1]  // p5(left[0]) * filter(5)
1479*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d29, d2[0]  // p6(left[1]) * filter(6)
1480*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d4,  q2,  #4
1481*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d5,  q3,  #4
1482*c0909341SAndroid Build Coastguard Worker.endif
1483*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
1484*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1485*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [r0, :64], r1
1486*c0909341SAndroid Build Coastguard Worker        vst1.16         {d5}, [r6, :64], r1
1487*c0909341SAndroid Build Coastguard Worker        vmov            d0,  d5          // move top from [4-7] to [0-3]
1488*c0909341SAndroid Build Coastguard Worker        bgt             4b
1489*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1490*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1491*c0909341SAndroid Build Coastguard Worker80:
1492*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0},  [r8]      // top (0-7)
1493*c0909341SAndroid Build Coastguard Worker8:
1494*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r2], r7   // left (0-1) + topleft (2)
1495*c0909341SAndroid Build Coastguard Worker.if \bpc == 10
1496*c0909341SAndroid Build Coastguard Worker        vmul.i16        q2,  q9,  d0[0]  // p1(top[0]) * filter(1)
1497*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q10, d0[1]  // p2(top[1]) * filter(2)
1498*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q11, d0[2]  // p3(top[2]) * filter(3)
1499*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q12, d0[3]  // p4(top[3]) * filter(4)
1500*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q8,  d2[2]  // p0(topleft) * filter(0)
1501*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q13, d2[1]  // p5(left[0]) * filter(5)
1502*c0909341SAndroid Build Coastguard Worker        vmla.i16        q2,  q14, d2[0]  // p6(left[1]) * filter(6)
1503*c0909341SAndroid Build Coastguard Worker        vmul.i16        q3,  q9,  d1[0]  // p1(top[0]) * filter(1)
1504*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q10, d1[1]  // p2(top[1]) * filter(2)
1505*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q11, d1[2]  // p3(top[2]) * filter(3)
1506*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q2,  q2,  #4
1507*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q7
1508*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
1509*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q12, d1[3]  // p4(top[3]) * filter(4)
1510*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q8,  d0[3]  // p0(topleft) * filter(0)
1511*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q13, d4[3]  // p5(left[0]) * filter(5)
1512*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q14, d5[3]  // p6(left[1]) * filter(6)
1513*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q3,  #4
1514*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q7
1515*c0909341SAndroid Build Coastguard Worker.else
1516*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d18, d0[0]  // p1(top[0]) * filter(1)
1517*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d20, d0[1]  // p2(top[1]) * filter(2)
1518*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d22, d0[2]  // p3(top[2]) * filter(3)
1519*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d24, d0[3]  // p4(top[3]) * filter(4)
1520*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d16, d2[2]  // p0(topleft) * filter(0)
1521*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d26, d2[1]  // p5(left[0]) * filter(5)
1522*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q2,  d28, d2[0]  // p6(left[1]) * filter(6)
1523*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d19, d0[0]  // p1(top[0]) * filter(1)
1524*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d21, d0[1]  // p2(top[1]) * filter(2)
1525*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d23, d0[2]  // p3(top[2]) * filter(3)
1526*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d25, d0[3]  // p4(top[3]) * filter(4)
1527*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d17, d2[2]  // p0(topleft) * filter(0)
1528*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d27, d2[1]  // p5(left[0]) * filter(5)
1529*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d29, d2[0]  // p6(left[1]) * filter(6)
1530*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d4,  q2,  #4
1531*c0909341SAndroid Build Coastguard Worker        vmull.s16       q4,  d18, d1[0]  // p1(top[0]) * filter(1)
1532*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d20, d1[1]  // p2(top[1]) * filter(2)
1533*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d22, d1[2]  // p3(top[2]) * filter(3)
1534*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d5,  q3,  #4
1535*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
1536*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d24, d1[3]  // p4(top[3]) * filter(4)
1537*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d16, d0[3]  // p0(topleft) * filter(0)
1538*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d26, d4[3]  // p5(left[0]) * filter(5)
1539*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d28, d5[3]  // p6(left[1]) * filter(6)
1540*c0909341SAndroid Build Coastguard Worker        vmull.s16       q5,  d19, d1[0]  // p1(top[0]) * filter(1)
1541*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d21, d1[1]  // p2(top[1]) * filter(2)
1542*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d23, d1[2]  // p3(top[2]) * filter(3)
1543*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d25, d1[3]  // p4(top[3]) * filter(4)
1544*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d17, d0[3]  // p0(topleft) * filter(0)
1545*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d27, d4[3]  // p5(left[0]) * filter(5)
1546*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d29, d5[3]  // p6(left[1]) * filter(6)
1547*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d6,  q4,  #4
1548*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d7,  q5,  #4
1549*c0909341SAndroid Build Coastguard Worker.endif
1550*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
1551*c0909341SAndroid Build Coastguard Worker        vswp            d5,  d6
1552*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1553*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [r0, :128], r1
1554*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q3
1555*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3}, [r6, :128], r1
1556*c0909341SAndroid Build Coastguard Worker        bgt             8b
1557*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1558*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1559*c0909341SAndroid Build Coastguard Worker160:
1560*c0909341SAndroid Build Coastguard Worker320:
1561*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3, lsl #1
1562*c0909341SAndroid Build Coastguard Worker        mov             lr,  r3
1563*c0909341SAndroid Build Coastguard Worker
1564*c0909341SAndroid Build Coastguard Worker1:
1565*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r2], r7   // left (0-1) + topleft (2)
1566*c0909341SAndroid Build Coastguard Worker2:
1567*c0909341SAndroid Build Coastguard Worker        vld1.16         {q1, q2}, [r8]!  // top(0-15)
1568*c0909341SAndroid Build Coastguard Worker.if \bpc == 10
1569*c0909341SAndroid Build Coastguard Worker        vmul.i16        q3,  q8,  d0[2]  // p0(topleft) * filter(0)
1570*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q13, d0[1]  // p5(left[0]) * filter(5)
1571*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q14, d0[0]  // p6(left[1]) * filter(6)
1572*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q9,  d2[0]  // p1(top[0]) * filter(1)
1573*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q10, d2[1]  // p2(top[1]) * filter(2)
1574*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q11, d2[2]  // p3(top[2]) * filter(3)
1575*c0909341SAndroid Build Coastguard Worker        vmla.i16        q3,  q12, d2[3]  // p4(top[3]) * filter(4)
1576*c0909341SAndroid Build Coastguard Worker
1577*c0909341SAndroid Build Coastguard Worker        vmul.i16        q4,  q9,  d3[0]  // p1(top[0]) * filter(1)
1578*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q10, d3[1]  // p2(top[1]) * filter(2)
1579*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q11, d3[2]  // p3(top[2]) * filter(3)
1580*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q3,  q3,  #4
1581*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q7
1582*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
1583*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q12, d3[3]  // p4(top[3]) * filter(4)
1584*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q8,  d2[3]  // p0(topleft) * filter(0)
1585*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q13, d6[3]  // p5(left[0]) * filter(5)
1586*c0909341SAndroid Build Coastguard Worker        vmla.i16        q4,  q14, d7[3]  // p6(left[1]) * filter(6)
1587*c0909341SAndroid Build Coastguard Worker
1588*c0909341SAndroid Build Coastguard Worker        vmul.i16        q5,  q9,  d4[0]  // p1(top[0]) * filter(1)
1589*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q10, d4[1]  // p2(top[1]) * filter(2)
1590*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q11, d4[2]  // p3(top[2]) * filter(3)
1591*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q4,  q4,  #4
1592*c0909341SAndroid Build Coastguard Worker        vmax.s16        q4,  q4,  q7
1593*c0909341SAndroid Build Coastguard Worker        vmin.s16        q4,  q4,  q15
1594*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q4
1595*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q12, d4[3]  // p4(top[3]) * filter(4)
1596*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q8,  d3[3]  // p0(topleft) * filter(0)
1597*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q13, d0[3]  // p5(left[0]) * filter(5)
1598*c0909341SAndroid Build Coastguard Worker        vmla.i16        q5,  q14, d1[3]  // p6(left[1]) * filter(6)
1599*c0909341SAndroid Build Coastguard Worker
1600*c0909341SAndroid Build Coastguard Worker        vmul.i16        q6,  q9,  d5[0]  // p1(top[0]) * filter(1)
1601*c0909341SAndroid Build Coastguard Worker        vmla.i16        q6,  q10, d5[1]  // p2(top[1]) * filter(2)
1602*c0909341SAndroid Build Coastguard Worker        vmla.i16        q6,  q11, d5[2]  // p3(top[2]) * filter(3)
1603*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q5,  q5,  #4
1604*c0909341SAndroid Build Coastguard Worker        vmax.s16        q5,  q5,  q7
1605*c0909341SAndroid Build Coastguard Worker        vmin.s16        q5,  q5,  q15
1606*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q5
1607*c0909341SAndroid Build Coastguard Worker        vmov.u16        r12, d5[3]
1608*c0909341SAndroid Build Coastguard Worker        vmla.i16        q6,  q12, d5[3]  // p4(top[3]) * filter(4)
1609*c0909341SAndroid Build Coastguard Worker        vmla.i16        q6,  q8,  d4[3]  // p0(topleft) * filter(0)
1610*c0909341SAndroid Build Coastguard Worker        vmla.i16        q6,  q13, d0[3]  // p5(left[0]) * filter(5)
1611*c0909341SAndroid Build Coastguard Worker        vmla.i16        q6,  q14, d1[3]  // p6(left[1]) * filter(6)
1612*c0909341SAndroid Build Coastguard Worker        vmov.16         d0[2], r12
1613*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
1614*c0909341SAndroid Build Coastguard Worker        vrshr.s16       q6,  q6,  #4
1615*c0909341SAndroid Build Coastguard Worker.else
1616*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d16, d0[2]  // p0(topleft) * filter(0)
1617*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d26, d0[1]  // p5(left[0]) * filter(5)
1618*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d28, d0[0]  // p6(left[1]) * filter(6)
1619*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d18, d2[0]  // p1(top[0]) * filter(1)
1620*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d20, d2[1]  // p2(top[1]) * filter(2)
1621*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d22, d2[2]  // p3(top[2]) * filter(3)
1622*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q3,  d24, d2[3]  // p4(top[3]) * filter(4)
1623*c0909341SAndroid Build Coastguard Worker        vmull.s16       q4,  d17, d0[2]  // p0(topleft) * filter(0)
1624*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d27, d0[1]  // p5(left[0]) * filter(5)
1625*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d29, d0[0]  // p6(left[1]) * filter(6)
1626*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d19, d2[0]  // p1(top[0]) * filter(1)
1627*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d21, d2[1]  // p2(top[1]) * filter(2)
1628*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d23, d2[2]  // p3(top[2]) * filter(3)
1629*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q4,  d25, d2[3]  // p4(top[3]) * filter(4)
1630*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d6,  q3,  #4
1631*c0909341SAndroid Build Coastguard Worker        vmull.s16       q5,  d18, d3[0]  // p1(top[0]) * filter(1)
1632*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d20, d3[1]  // p2(top[1]) * filter(2)
1633*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d7,  q4,  #4
1634*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
1635*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d22, d3[2]  // p3(top[2]) * filter(3)
1636*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d24, d3[3]  // p4(top[3]) * filter(4)
1637*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d16, d2[3]  // p0(topleft) * filter(0)
1638*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d26, d6[3]  // p5(left[0]) * filter(5)
1639*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q5,  d28, d7[3]  // p6(left[1]) * filter(6)
1640*c0909341SAndroid Build Coastguard Worker        vmull.s16       q6,  d19, d3[0]  // p1(top[0]) * filter(1)
1641*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d21, d3[1]  // p2(top[1]) * filter(2)
1642*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d23, d3[2]  // p3(top[2]) * filter(3)
1643*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d25, d3[3]  // p4(top[3]) * filter(4)
1644*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d17, d2[3]  // p0(topleft) * filter(0)
1645*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d27, d6[3]  // p5(left[0]) * filter(5)
1646*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d29, d7[3]  // p6(left[1]) * filter(6)
1647*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d8,  q5,  #4
1648*c0909341SAndroid Build Coastguard Worker        vmull.s16       q7,  d18, d4[0]  // p1(top[0]) * filter(1)
1649*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d20, d4[1]  // p2(top[1]) * filter(2)
1650*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d22, d4[2]  // p3(top[2]) * filter(3)
1651*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d9,  q6,  #4
1652*c0909341SAndroid Build Coastguard Worker        vmin.s16        q0,  q4,  q15
1653*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d24, d4[3]  // p4(top[3]) * filter(4)
1654*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d16, d3[3]  // p0(topleft) * filter(0)
1655*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d26, d0[3]  // p5(left[0]) * filter(5)
1656*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d28, d1[3]  // p6(left[1]) * filter(6)
1657*c0909341SAndroid Build Coastguard Worker        vmin.s16        q4,  q4,  q15
1658*c0909341SAndroid Build Coastguard Worker        vmull.s16       q6,  d19, d4[0]  // p1(top[0]) * filter(1)
1659*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d21, d4[1]  // p2(top[1]) * filter(2)
1660*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d23, d4[2]  // p3(top[2]) * filter(3)
1661*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d25, d4[3]  // p4(top[3]) * filter(4)
1662*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d17, d3[3]  // p0(topleft) * filter(0)
1663*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d27, d0[3]  // p5(left[0]) * filter(5)
1664*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q6,  d29, d1[3]  // p6(left[1]) * filter(6)
1665*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d10, q7,  #4
1666*c0909341SAndroid Build Coastguard Worker        vmull.s16       q1,  d18, d5[0]  // p1(top[0]) * filter(1)
1667*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d20, d5[1]  // p2(top[1]) * filter(2)
1668*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d22, d5[2]  // p3(top[2]) * filter(3)
1669*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d11, q6,  #4
1670*c0909341SAndroid Build Coastguard Worker        vmin.s16        q0,  q5,  q15
1671*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d24, d5[3]  // p4(top[3]) * filter(4)
1672*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d16, d4[3]  // p0(topleft) * filter(0)
1673*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d26, d0[3]  // p5(left[0]) * filter(5)
1674*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q1,  d28, d1[3]  // p6(left[1]) * filter(6)
1675*c0909341SAndroid Build Coastguard Worker        vmin.s16        q5,  q5,  q15
1676*c0909341SAndroid Build Coastguard Worker        vmov.u16        r12, d5[3]
1677*c0909341SAndroid Build Coastguard Worker        vmull.s16       q7,  d19, d5[0]  // p1(top[0]) * filter(1)
1678*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d21, d5[1]  // p2(top[1]) * filter(2)
1679*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d23, d5[2]  // p3(top[2]) * filter(3)
1680*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d25, d5[3]  // p4(top[3]) * filter(4)
1681*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d17, d4[3]  // p0(topleft) * filter(0)
1682*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d27, d0[3]  // p5(left[0]) * filter(5)
1683*c0909341SAndroid Build Coastguard Worker        vmlal.s16       q7,  d29, d1[3]  // p6(left[1]) * filter(6)
1684*c0909341SAndroid Build Coastguard Worker        vmov.16         d0[2], r12
1685*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d12, q1,  #4
1686*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
1687*c0909341SAndroid Build Coastguard Worker        vqrshrun.s32    d13, q7,  #4
1688*c0909341SAndroid Build Coastguard Worker.endif
1689*c0909341SAndroid Build Coastguard Worker        vswp            q4,  q5
1690*c0909341SAndroid Build Coastguard Worker.if \bpc == 10
1691*c0909341SAndroid Build Coastguard Worker        vmax.s16        q6,  q6,  q7
1692*c0909341SAndroid Build Coastguard Worker.endif
1693*c0909341SAndroid Build Coastguard Worker        vswp            d7,  d10
1694*c0909341SAndroid Build Coastguard Worker        vmin.s16        q6,  q6,  q15
1695*c0909341SAndroid Build Coastguard Worker
1696*c0909341SAndroid Build Coastguard Worker        vswp            d9,  d12
1697*c0909341SAndroid Build Coastguard Worker
1698*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3, q4}, [r0, :128]!
1699*c0909341SAndroid Build Coastguard Worker        vst1.16         {q5, q6}, [r6, :128]!
1700*c0909341SAndroid Build Coastguard Worker        ble             8f
1701*c0909341SAndroid Build Coastguard Worker        vmov.u16        r12, d13[3]
1702*c0909341SAndroid Build Coastguard Worker        vmov.16         d0[0], r12
1703*c0909341SAndroid Build Coastguard Worker        vmov.u16        r12, d9[3]
1704*c0909341SAndroid Build Coastguard Worker        vmov.16         d0[1], r12
1705*c0909341SAndroid Build Coastguard Worker        b               2b
1706*c0909341SAndroid Build Coastguard Worker8:
1707*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
1708*c0909341SAndroid Build Coastguard Worker
1709*c0909341SAndroid Build Coastguard Worker        ble             9f
1710*c0909341SAndroid Build Coastguard Worker        sub             r8,  r6,  lr, lsl #1
1711*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
1712*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
1713*c0909341SAndroid Build Coastguard Worker        mov             r3,  lr
1714*c0909341SAndroid Build Coastguard Worker        b               1b
1715*c0909341SAndroid Build Coastguard Worker9:
1716*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
1717*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
1718*c0909341SAndroid Build Coastguard Workerendfunc
1719*c0909341SAndroid Build Coastguard Worker.endm
1720*c0909341SAndroid Build Coastguard Worker
1721*c0909341SAndroid Build Coastguard Workerfilter_fn 10
1722*c0909341SAndroid Build Coastguard Workerfilter_fn 12
1723*c0909341SAndroid Build Coastguard Worker
1724*c0909341SAndroid Build Coastguard Workerfunction ipred_filter_16bpc_neon, export=1
1725*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
1726*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
1727*c0909341SAndroid Build Coastguard Worker        movw            r12, 0x3ff
1728*c0909341SAndroid Build Coastguard Worker        ldr             r8, [sp, #104]
1729*c0909341SAndroid Build Coastguard Worker        cmp             r8,  r12
1730*c0909341SAndroid Build Coastguard Worker        ble             ipred_filter_10bpc_neon
1731*c0909341SAndroid Build Coastguard Worker        b               ipred_filter_12bpc_neon
1732*c0909341SAndroid Build Coastguard Workerendfunc
1733*c0909341SAndroid Build Coastguard Worker
1734*c0909341SAndroid Build Coastguard Worker// void pal_pred_16bpc_neon(pixel *dst, const ptrdiff_t stride,
1735*c0909341SAndroid Build Coastguard Worker//                          const pixel *const pal, const uint8_t *idx,
1736*c0909341SAndroid Build Coastguard Worker//                          const int w, const int h);
1737*c0909341SAndroid Build Coastguard Workerfunction pal_pred_16bpc_neon, export=1
1738*c0909341SAndroid Build Coastguard Worker        push            {r4-r5, lr}
1739*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [sp, #12]
1740*c0909341SAndroid Build Coastguard Worker        ldr             r5,  [sp, #16]
1741*c0909341SAndroid Build Coastguard Worker        vld1.16         {q14}, [r2, :128]
1742*c0909341SAndroid Build Coastguard Worker        clz             lr,  r4
1743*c0909341SAndroid Build Coastguard Worker        adr             r12, L(pal_pred_tbl)
1744*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #25
1745*c0909341SAndroid Build Coastguard Worker        vmov.i8         q13, #7
1746*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
1747*c0909341SAndroid Build Coastguard Worker        vmov.i16        q15, #0x100
1748*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
1749*c0909341SAndroid Build Coastguard Worker        add             r2,  r0,  r1
1750*c0909341SAndroid Build Coastguard Worker        bx              r12
1751*c0909341SAndroid Build Coastguard Worker
1752*c0909341SAndroid Build Coastguard Worker        .align 2
1753*c0909341SAndroid Build Coastguard WorkerL(pal_pred_tbl):
1754*c0909341SAndroid Build Coastguard Worker        .word 640f - L(pal_pred_tbl) + CONFIG_THUMB
1755*c0909341SAndroid Build Coastguard Worker        .word 320f - L(pal_pred_tbl) + CONFIG_THUMB
1756*c0909341SAndroid Build Coastguard Worker        .word 160f - L(pal_pred_tbl) + CONFIG_THUMB
1757*c0909341SAndroid Build Coastguard Worker        .word 80f  - L(pal_pred_tbl) + CONFIG_THUMB
1758*c0909341SAndroid Build Coastguard Worker        .word 40f  - L(pal_pred_tbl) + CONFIG_THUMB
1759*c0909341SAndroid Build Coastguard Worker
1760*c0909341SAndroid Build Coastguard Worker40:
1761*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1762*c0909341SAndroid Build Coastguard Worker4:
1763*c0909341SAndroid Build Coastguard Worker        vld1.8          {d2}, [r3, :64]!
1764*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #4
1765*c0909341SAndroid Build Coastguard Worker        vshr.u8         d3,  d2,  #4
1766*c0909341SAndroid Build Coastguard Worker        vand.u8         d2,  d2,  d26
1767*c0909341SAndroid Build Coastguard Worker        vzip.8          d2,  d3
1768*c0909341SAndroid Build Coastguard Worker        // Restructure q1 from a, b, c, ... into 2*a, 2*a+1, 2*b, 2*b+1, 2*c, 2*c+1, ...
1769*c0909341SAndroid Build Coastguard Worker        vadd.i8         q0,  q1,  q1
1770*c0909341SAndroid Build Coastguard Worker        vadd.i8         q1,  q1,  q1
1771*c0909341SAndroid Build Coastguard Worker        vzip.8          q0,  q1
1772*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q15
1773*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q15
1774*c0909341SAndroid Build Coastguard Worker        vtbl.8          d0, {q14}, d0
1775*c0909341SAndroid Build Coastguard Worker        vtbl.8          d1, {q14}, d1
1776*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0}, [r0, :64], r1
1777*c0909341SAndroid Build Coastguard Worker        vtbl.8          d2, {q14}, d2
1778*c0909341SAndroid Build Coastguard Worker        vst1.16         {d1}, [r2, :64], r1
1779*c0909341SAndroid Build Coastguard Worker        vtbl.8          d3, {q14}, d3
1780*c0909341SAndroid Build Coastguard Worker        vst1.16         {d2}, [r0, :64], r1
1781*c0909341SAndroid Build Coastguard Worker        vst1.16         {d3}, [r2, :64], r1
1782*c0909341SAndroid Build Coastguard Worker        bgt             4b
1783*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1784*c0909341SAndroid Build Coastguard Worker80:
1785*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1786*c0909341SAndroid Build Coastguard Worker8:
1787*c0909341SAndroid Build Coastguard Worker        vld1.8          {q1}, [r3, :64]!
1788*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #4
1789*c0909341SAndroid Build Coastguard Worker        vshr.u8         q2,  q1,  #4
1790*c0909341SAndroid Build Coastguard Worker        vand.u8         q1,  q1,  q13
1791*c0909341SAndroid Build Coastguard Worker        vzip.8          q1,  q2
1792*c0909341SAndroid Build Coastguard Worker        // Prefer doing the adds twice, instead of chaining a vmov after
1793*c0909341SAndroid Build Coastguard Worker        // the add.
1794*c0909341SAndroid Build Coastguard Worker        vadd.i8         q0,  q1,  q1
1795*c0909341SAndroid Build Coastguard Worker        vadd.i8         q1,  q1,  q1
1796*c0909341SAndroid Build Coastguard Worker        vadd.i8         q3,  q2,  q2
1797*c0909341SAndroid Build Coastguard Worker        vadd.i8         q2,  q2,  q2
1798*c0909341SAndroid Build Coastguard Worker        vzip.8          q0,  q1
1799*c0909341SAndroid Build Coastguard Worker        vzip.8          q2,  q3
1800*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q15
1801*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q15
1802*c0909341SAndroid Build Coastguard Worker        vtbl.8          d0, {q14}, d0
1803*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q15
1804*c0909341SAndroid Build Coastguard Worker        vtbl.8          d1, {q14}, d1
1805*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q15
1806*c0909341SAndroid Build Coastguard Worker        vtbl.8          d2, {q14}, d2
1807*c0909341SAndroid Build Coastguard Worker        vtbl.8          d3, {q14}, d3
1808*c0909341SAndroid Build Coastguard Worker        vtbl.8          d4, {q14}, d4
1809*c0909341SAndroid Build Coastguard Worker        vtbl.8          d5, {q14}, d5
1810*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0}, [r0, :128], r1
1811*c0909341SAndroid Build Coastguard Worker        vtbl.8          d6, {q14}, d6
1812*c0909341SAndroid Build Coastguard Worker        vst1.16         {q1}, [r2, :128], r1
1813*c0909341SAndroid Build Coastguard Worker        vtbl.8          d7, {q14}, d7
1814*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [r0, :128], r1
1815*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3}, [r2, :128], r1
1816*c0909341SAndroid Build Coastguard Worker        bgt             8b
1817*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1818*c0909341SAndroid Build Coastguard Worker160:
1819*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1820*c0909341SAndroid Build Coastguard Worker16:
1821*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10, q11}, [r3, :64]!
1822*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #4
1823*c0909341SAndroid Build Coastguard Worker        vand.u8         q2,  q10, q13
1824*c0909341SAndroid Build Coastguard Worker        vshr.u8         q3,  q10, #4
1825*c0909341SAndroid Build Coastguard Worker        vand.u8         q10, q11, q13
1826*c0909341SAndroid Build Coastguard Worker        vshr.u8         q11, q11, #4
1827*c0909341SAndroid Build Coastguard Worker        vzip.8          q2,  q3
1828*c0909341SAndroid Build Coastguard Worker        vzip.8          q10, q11
1829*c0909341SAndroid Build Coastguard Worker        vadd.i8         q0,  q2,  q2
1830*c0909341SAndroid Build Coastguard Worker        vadd.i8         q1,  q2,  q2
1831*c0909341SAndroid Build Coastguard Worker        vadd.i8         q2,  q3,  q3
1832*c0909341SAndroid Build Coastguard Worker        vadd.i8         q3,  q3,  q3
1833*c0909341SAndroid Build Coastguard Worker        vadd.i8         q8,  q10, q10
1834*c0909341SAndroid Build Coastguard Worker        vadd.i8         q9,  q10, q10
1835*c0909341SAndroid Build Coastguard Worker        vadd.i8         q10, q11, q11
1836*c0909341SAndroid Build Coastguard Worker        vzip.8          q0,  q1
1837*c0909341SAndroid Build Coastguard Worker        vadd.i8         q11, q11, q11
1838*c0909341SAndroid Build Coastguard Worker        vzip.8          q2,  q3
1839*c0909341SAndroid Build Coastguard Worker        vzip.8          q8,  q9
1840*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q15
1841*c0909341SAndroid Build Coastguard Worker        vzip.8          q10, q11
1842*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q15
1843*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q15
1844*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q15
1845*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q15
1846*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q15
1847*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q15
1848*c0909341SAndroid Build Coastguard Worker        vtbl.8          d0,  {q14}, d0
1849*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q15
1850*c0909341SAndroid Build Coastguard Worker        vtbl.8          d1,  {q14}, d1
1851*c0909341SAndroid Build Coastguard Worker        vtbl.8          d2,  {q14}, d2
1852*c0909341SAndroid Build Coastguard Worker        vtbl.8          d3,  {q14}, d3
1853*c0909341SAndroid Build Coastguard Worker        vtbl.8          d4,  {q14}, d4
1854*c0909341SAndroid Build Coastguard Worker        vtbl.8          d5,  {q14}, d5
1855*c0909341SAndroid Build Coastguard Worker        vtbl.8          d6,  {q14}, d6
1856*c0909341SAndroid Build Coastguard Worker        vtbl.8          d7,  {q14}, d7
1857*c0909341SAndroid Build Coastguard Worker        vtbl.8          d16, {q14}, d16
1858*c0909341SAndroid Build Coastguard Worker        vtbl.8          d17, {q14}, d17
1859*c0909341SAndroid Build Coastguard Worker        vtbl.8          d18, {q14}, d18
1860*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r0, :128], r1
1861*c0909341SAndroid Build Coastguard Worker        vtbl.8          d19, {q14}, d19
1862*c0909341SAndroid Build Coastguard Worker        vtbl.8          d20, {q14}, d20
1863*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3},  [r2, :128], r1
1864*c0909341SAndroid Build Coastguard Worker        vtbl.8          d21, {q14}, d21
1865*c0909341SAndroid Build Coastguard Worker        vtbl.8          d22, {q14}, d22
1866*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r0, :128], r1
1867*c0909341SAndroid Build Coastguard Worker        vtbl.8          d23, {q14}, d23
1868*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r2, :128], r1
1869*c0909341SAndroid Build Coastguard Worker        bgt             16b
1870*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1871*c0909341SAndroid Build Coastguard Worker320:
1872*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1873*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #32
1874*c0909341SAndroid Build Coastguard Worker32:
1875*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10, q11}, [r3, :64]!
1876*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #2
1877*c0909341SAndroid Build Coastguard Worker        vand.u8         q2,  q10, q13
1878*c0909341SAndroid Build Coastguard Worker        vshr.u8         q3,  q10, #4
1879*c0909341SAndroid Build Coastguard Worker        vand.u8         q10, q11, q13
1880*c0909341SAndroid Build Coastguard Worker        vshr.u8         q11, q11, #4
1881*c0909341SAndroid Build Coastguard Worker        vzip.8          q2,  q3
1882*c0909341SAndroid Build Coastguard Worker        vzip.8          q10, q11
1883*c0909341SAndroid Build Coastguard Worker        vadd.i8         q0,  q2,  q2
1884*c0909341SAndroid Build Coastguard Worker        vadd.i8         q1,  q2,  q2
1885*c0909341SAndroid Build Coastguard Worker        vadd.i8         q2,  q3,  q3
1886*c0909341SAndroid Build Coastguard Worker        vadd.i8         q3,  q3,  q3
1887*c0909341SAndroid Build Coastguard Worker        vadd.i8         q8,  q10, q10
1888*c0909341SAndroid Build Coastguard Worker        vadd.i8         q9,  q10, q10
1889*c0909341SAndroid Build Coastguard Worker        vadd.i8         q10, q11, q11
1890*c0909341SAndroid Build Coastguard Worker        vzip.8          q0,  q1
1891*c0909341SAndroid Build Coastguard Worker        vadd.i8         q11, q11, q11
1892*c0909341SAndroid Build Coastguard Worker        vzip.8          q2,  q3
1893*c0909341SAndroid Build Coastguard Worker        vzip.8          q8,  q9
1894*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q15
1895*c0909341SAndroid Build Coastguard Worker        vzip.8          q10, q11
1896*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q15
1897*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q15
1898*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q15
1899*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q15
1900*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q15
1901*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q15
1902*c0909341SAndroid Build Coastguard Worker        vtbl.8          d0,  {q14}, d0
1903*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q15
1904*c0909341SAndroid Build Coastguard Worker        vtbl.8          d1,  {q14}, d1
1905*c0909341SAndroid Build Coastguard Worker        vtbl.8          d2,  {q14}, d2
1906*c0909341SAndroid Build Coastguard Worker        vtbl.8          d3,  {q14}, d3
1907*c0909341SAndroid Build Coastguard Worker        vtbl.8          d4,  {q14}, d4
1908*c0909341SAndroid Build Coastguard Worker        vtbl.8          d5,  {q14}, d5
1909*c0909341SAndroid Build Coastguard Worker        vtbl.8          d6,  {q14}, d6
1910*c0909341SAndroid Build Coastguard Worker        vtbl.8          d7,  {q14}, d7
1911*c0909341SAndroid Build Coastguard Worker        vtbl.8          d16, {q14}, d16
1912*c0909341SAndroid Build Coastguard Worker        vtbl.8          d17, {q14}, d17
1913*c0909341SAndroid Build Coastguard Worker        vtbl.8          d18, {q14}, d18
1914*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r0, :128]!
1915*c0909341SAndroid Build Coastguard Worker        vtbl.8          d19, {q14}, d19
1916*c0909341SAndroid Build Coastguard Worker        vtbl.8          d20, {q14}, d20
1917*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3},  [r0, :128], r1
1918*c0909341SAndroid Build Coastguard Worker        vtbl.8          d21, {q14}, d21
1919*c0909341SAndroid Build Coastguard Worker        vtbl.8          d22, {q14}, d22
1920*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r2, :128]!
1921*c0909341SAndroid Build Coastguard Worker        vtbl.8          d23, {q14}, d23
1922*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r2, :128], r1
1923*c0909341SAndroid Build Coastguard Worker        bgt             32b
1924*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1925*c0909341SAndroid Build Coastguard Worker640:
1926*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  #96
1927*c0909341SAndroid Build Coastguard Worker64:
1928*c0909341SAndroid Build Coastguard Worker        vld1.8          {q10, q11}, [r3, :64]!
1929*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  #1
1930*c0909341SAndroid Build Coastguard Worker        vand.u8         q2,  q10, q13
1931*c0909341SAndroid Build Coastguard Worker        vshr.u8         q3,  q10, #4
1932*c0909341SAndroid Build Coastguard Worker        vand.u8         q10, q11, q13
1933*c0909341SAndroid Build Coastguard Worker        vshr.u8         q11, q11, #4
1934*c0909341SAndroid Build Coastguard Worker        vzip.8          q2,  q3
1935*c0909341SAndroid Build Coastguard Worker        vzip.8          q10, q11
1936*c0909341SAndroid Build Coastguard Worker        vadd.i8         q0,  q2,  q2
1937*c0909341SAndroid Build Coastguard Worker        vadd.i8         q1,  q2,  q2
1938*c0909341SAndroid Build Coastguard Worker        vadd.i8         q2,  q3,  q3
1939*c0909341SAndroid Build Coastguard Worker        vadd.i8         q3,  q3,  q3
1940*c0909341SAndroid Build Coastguard Worker        vadd.i8         q8,  q10, q10
1941*c0909341SAndroid Build Coastguard Worker        vadd.i8         q9,  q10, q10
1942*c0909341SAndroid Build Coastguard Worker        vadd.i8         q10, q11, q11
1943*c0909341SAndroid Build Coastguard Worker        vzip.8          q0,  q1
1944*c0909341SAndroid Build Coastguard Worker        vadd.i8         q11, q11, q11
1945*c0909341SAndroid Build Coastguard Worker        vzip.8          q2,  q3
1946*c0909341SAndroid Build Coastguard Worker        vzip.8          q8,  q9
1947*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q15
1948*c0909341SAndroid Build Coastguard Worker        vzip.8          q10, q11
1949*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q15
1950*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q15
1951*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q15
1952*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q15
1953*c0909341SAndroid Build Coastguard Worker        vadd.i16        q9,  q9,  q15
1954*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q15
1955*c0909341SAndroid Build Coastguard Worker        vtbl.8          d0,  {q14}, d0
1956*c0909341SAndroid Build Coastguard Worker        vadd.i16        q11, q11, q15
1957*c0909341SAndroid Build Coastguard Worker        vtbl.8          d1,  {q14}, d1
1958*c0909341SAndroid Build Coastguard Worker        vtbl.8          d2,  {q14}, d2
1959*c0909341SAndroid Build Coastguard Worker        vtbl.8          d3,  {q14}, d3
1960*c0909341SAndroid Build Coastguard Worker        vtbl.8          d4,  {q14}, d4
1961*c0909341SAndroid Build Coastguard Worker        vtbl.8          d5,  {q14}, d5
1962*c0909341SAndroid Build Coastguard Worker        vtbl.8          d6,  {q14}, d6
1963*c0909341SAndroid Build Coastguard Worker        vtbl.8          d7,  {q14}, d7
1964*c0909341SAndroid Build Coastguard Worker        vtbl.8          d16, {q14}, d16
1965*c0909341SAndroid Build Coastguard Worker        vtbl.8          d17, {q14}, d17
1966*c0909341SAndroid Build Coastguard Worker        vtbl.8          d18, {q14}, d18
1967*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0,  q1},  [r0, :128]!
1968*c0909341SAndroid Build Coastguard Worker        vtbl.8          d19, {q14}, d19
1969*c0909341SAndroid Build Coastguard Worker        vtbl.8          d20, {q14}, d20
1970*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2,  q3},  [r0, :128]!
1971*c0909341SAndroid Build Coastguard Worker        vtbl.8          d21, {q14}, d21
1972*c0909341SAndroid Build Coastguard Worker        vtbl.8          d22, {q14}, d22
1973*c0909341SAndroid Build Coastguard Worker        vst1.16         {q8,  q9},  [r0, :128]!
1974*c0909341SAndroid Build Coastguard Worker        vtbl.8          d23, {q14}, d23
1975*c0909341SAndroid Build Coastguard Worker        vst1.16         {q10, q11}, [r0, :128], r1
1976*c0909341SAndroid Build Coastguard Worker        bgt             64b
1977*c0909341SAndroid Build Coastguard Worker        pop             {r4-r5, pc}
1978*c0909341SAndroid Build Coastguard Workerendfunc
1979*c0909341SAndroid Build Coastguard Worker
1980*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_128_16bpc_neon(pixel *dst, const ptrdiff_t stride,
1981*c0909341SAndroid Build Coastguard Worker//                               const pixel *const topleft,
1982*c0909341SAndroid Build Coastguard Worker//                               const int width, const int height,
1983*c0909341SAndroid Build Coastguard Worker//                               const int16_t *ac, const int alpha,
1984*c0909341SAndroid Build Coastguard Worker//                               const int bitdepth_max);
1985*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_128_16bpc_neon, export=1
1986*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
1987*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
1988*c0909341SAndroid Build Coastguard Worker        ldrd            r6,  r7,  [sp, #32]
1989*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
1990*c0909341SAndroid Build Coastguard Worker        vdup.16         q15, r7       // bitdepth_max
1991*c0909341SAndroid Build Coastguard Worker        adr             r12, L(ipred_cfl_128_tbl)
1992*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #26
1993*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
1994*c0909341SAndroid Build Coastguard Worker        vrshr.u16       q0,  q15, #1
1995*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  r6       // alpha
1996*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
1997*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
1998*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
1999*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
2000*c0909341SAndroid Build Coastguard Worker        bx              r12
2001*c0909341SAndroid Build Coastguard Worker
2002*c0909341SAndroid Build Coastguard Worker        .align 2
2003*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_128_tbl):
2004*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_tbl):
2005*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_splat_w16) - L(ipred_cfl_128_tbl) + CONFIG_THUMB
2006*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_splat_w16) - L(ipred_cfl_128_tbl) + CONFIG_THUMB
2007*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_splat_w8)  - L(ipred_cfl_128_tbl) + CONFIG_THUMB
2008*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_splat_w4)  - L(ipred_cfl_128_tbl) + CONFIG_THUMB
2009*c0909341SAndroid Build Coastguard Worker
2010*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w4):
2011*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8, q9}, [r5, :128]!
2012*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d16, d2  // diff = ac * alpha
2013*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d17, d3
2014*c0909341SAndroid Build Coastguard Worker        vmull.s16       q8,  d18, d2
2015*c0909341SAndroid Build Coastguard Worker        vmull.s16       q9,  d19, d3
2016*c0909341SAndroid Build Coastguard Worker        vshr.s32        q10, q2,  #31 // sign = diff >> 15
2017*c0909341SAndroid Build Coastguard Worker        vshr.s32        q11, q3,  #31
2018*c0909341SAndroid Build Coastguard Worker        vshr.s32        q12, q8,  #31
2019*c0909341SAndroid Build Coastguard Worker        vshr.s32        q13, q9,  #31
2020*c0909341SAndroid Build Coastguard Worker        vadd.i32        q2,  q2,  q10 // diff + sign
2021*c0909341SAndroid Build Coastguard Worker        vadd.i32        q3,  q3,  q11
2022*c0909341SAndroid Build Coastguard Worker        vadd.i32        q8,  q8,  q12
2023*c0909341SAndroid Build Coastguard Worker        vadd.i32        q9,  q9,  q13
2024*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d4,  q2,  #6  // (diff + sign + 32) >> 6 = apply_sign()
2025*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d5,  q3,  #6
2026*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d6,  q8,  #6
2027*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d7,  q9,  #6
2028*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q0  // dc + apply_sign()
2029*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q0
2030*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q14
2031*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q14
2032*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
2033*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
2034*c0909341SAndroid Build Coastguard Worker        vst1.16         {d4}, [r0, :64], r1
2035*c0909341SAndroid Build Coastguard Worker        vst1.16         {d5}, [r6, :64], r1
2036*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
2037*c0909341SAndroid Build Coastguard Worker        vst1.16         {d6}, [r0, :64], r1
2038*c0909341SAndroid Build Coastguard Worker        vst1.16         {d7}, [r6, :64], r1
2039*c0909341SAndroid Build Coastguard Worker        bgt             L(ipred_cfl_splat_w4)
2040*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
2041*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w8):
2042*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8, q9}, [r5, :128]!
2043*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
2044*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d16, d2  // diff = ac * alpha
2045*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d17, d3
2046*c0909341SAndroid Build Coastguard Worker        vmull.s16       q8,  d18, d2
2047*c0909341SAndroid Build Coastguard Worker        vmull.s16       q9,  d19, d3
2048*c0909341SAndroid Build Coastguard Worker        vshr.s32        q10, q2,  #31 // sign = diff >> 15
2049*c0909341SAndroid Build Coastguard Worker        vshr.s32        q11, q3,  #31
2050*c0909341SAndroid Build Coastguard Worker        vshr.s32        q12, q8,  #31
2051*c0909341SAndroid Build Coastguard Worker        vshr.s32        q13, q9,  #31
2052*c0909341SAndroid Build Coastguard Worker        vadd.i32        q2,  q2,  q10 // diff + sign
2053*c0909341SAndroid Build Coastguard Worker        vadd.i32        q3,  q3,  q11
2054*c0909341SAndroid Build Coastguard Worker        vadd.i32        q8,  q8,  q12
2055*c0909341SAndroid Build Coastguard Worker        vadd.i32        q9,  q9,  q13
2056*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d4,  q2,  #6  // (diff + sign + 32) >> 6 = apply_sign()
2057*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d5,  q3,  #6
2058*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d6,  q8,  #6
2059*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d7,  q9,  #6
2060*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q0  // dc + apply_sign()
2061*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q0
2062*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q14
2063*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q14
2064*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
2065*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
2066*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2}, [r0, :128], r1
2067*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3}, [r6, :128], r1
2068*c0909341SAndroid Build Coastguard Worker        bgt             L(ipred_cfl_splat_w8)
2069*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
2070*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_splat_w16):
2071*c0909341SAndroid Build Coastguard Worker        vpush           {q4-q7}
2072*c0909341SAndroid Build Coastguard Worker        add             r12, r5,  r3, lsl #1
2073*c0909341SAndroid Build Coastguard Worker        sub             r1,  r1,  r3, lsl #1
2074*c0909341SAndroid Build Coastguard Worker        mov             lr,  r3
2075*c0909341SAndroid Build Coastguard Worker1:
2076*c0909341SAndroid Build Coastguard Worker        vld1.16         {q6, q7}, [r5, :128]!
2077*c0909341SAndroid Build Coastguard Worker        vmull.s16       q2,  d12, d2  // diff = ac * alpha
2078*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8, q9}, [r12, :128]!
2079*c0909341SAndroid Build Coastguard Worker        vmull.s16       q3,  d13, d3
2080*c0909341SAndroid Build Coastguard Worker        vmull.s16       q4,  d14, d2
2081*c0909341SAndroid Build Coastguard Worker        vmull.s16       q5,  d15, d3
2082*c0909341SAndroid Build Coastguard Worker        vmull.s16       q6,  d16, d2
2083*c0909341SAndroid Build Coastguard Worker        vmull.s16       q7,  d17, d3
2084*c0909341SAndroid Build Coastguard Worker        vmull.s16       q8,  d18, d2
2085*c0909341SAndroid Build Coastguard Worker        vmull.s16       q9,  d19, d3
2086*c0909341SAndroid Build Coastguard Worker        vshr.s32        q10, q2,  #31 // sign = diff >> 15
2087*c0909341SAndroid Build Coastguard Worker        vshr.s32        q11, q3,  #31
2088*c0909341SAndroid Build Coastguard Worker        vshr.s32        q12, q4,  #31
2089*c0909341SAndroid Build Coastguard Worker        vshr.s32        q13, q5,  #31
2090*c0909341SAndroid Build Coastguard Worker        vadd.i32        q2,  q2,  q10 // diff + sign
2091*c0909341SAndroid Build Coastguard Worker        vshr.s32        q10, q6,  #31
2092*c0909341SAndroid Build Coastguard Worker        vadd.i32        q3,  q3,  q11
2093*c0909341SAndroid Build Coastguard Worker        vshr.s32        q11, q7,  #31
2094*c0909341SAndroid Build Coastguard Worker        vadd.i32        q4,  q4,  q12
2095*c0909341SAndroid Build Coastguard Worker        vshr.s32        q12, q8,  #31
2096*c0909341SAndroid Build Coastguard Worker        vadd.i32        q5,  q5,  q13
2097*c0909341SAndroid Build Coastguard Worker        vshr.s32        q13, q9,  #31
2098*c0909341SAndroid Build Coastguard Worker        vadd.i32        q6,  q6,  q10
2099*c0909341SAndroid Build Coastguard Worker        vadd.i32        q7,  q7,  q11
2100*c0909341SAndroid Build Coastguard Worker        vadd.i32        q8,  q8,  q12
2101*c0909341SAndroid Build Coastguard Worker        vadd.i32        q9,  q9,  q13
2102*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d4,  q2,  #6  // (diff + sign + 32) >> 6 = apply_sign()
2103*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d5,  q3,  #6
2104*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d6,  q4,  #6
2105*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d7,  q5,  #6
2106*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q0  // dc + apply_sign()
2107*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d8,  q6,  #6
2108*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d9,  q7,  #6
2109*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q0
2110*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d10, q8,  #6
2111*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d11, q9,  #6
2112*c0909341SAndroid Build Coastguard Worker        vadd.i16        q4,  q4,  q0
2113*c0909341SAndroid Build Coastguard Worker        vadd.i16        q5,  q5,  q0
2114*c0909341SAndroid Build Coastguard Worker        vmax.s16        q2,  q2,  q14
2115*c0909341SAndroid Build Coastguard Worker        vmax.s16        q3,  q3,  q14
2116*c0909341SAndroid Build Coastguard Worker        vmax.s16        q4,  q4,  q14
2117*c0909341SAndroid Build Coastguard Worker        vmax.s16        q5,  q5,  q14
2118*c0909341SAndroid Build Coastguard Worker        vmin.s16        q2,  q2,  q15
2119*c0909341SAndroid Build Coastguard Worker        vmin.s16        q3,  q3,  q15
2120*c0909341SAndroid Build Coastguard Worker        vmin.s16        q4,  q4,  q15
2121*c0909341SAndroid Build Coastguard Worker        vmin.s16        q5,  q5,  q15
2122*c0909341SAndroid Build Coastguard Worker        subs            r3,  r3,  #16
2123*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2124*c0909341SAndroid Build Coastguard Worker        vst1.16         {q4, q5}, [r6, :128]!
2125*c0909341SAndroid Build Coastguard Worker        bgt             1b
2126*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
2127*c0909341SAndroid Build Coastguard Worker        add             r5,  r5,  lr, lsl #1
2128*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr, lsl #1
2129*c0909341SAndroid Build Coastguard Worker        add             r0,  r0,  r1
2130*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r1
2131*c0909341SAndroid Build Coastguard Worker        mov             r3,  lr
2132*c0909341SAndroid Build Coastguard Worker        bgt             1b
2133*c0909341SAndroid Build Coastguard Worker        vpop            {q4-q7}
2134*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
2135*c0909341SAndroid Build Coastguard Workerendfunc
2136*c0909341SAndroid Build Coastguard Worker
2137*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_top_16bpc_neon(pixel *dst, const ptrdiff_t stride,
2138*c0909341SAndroid Build Coastguard Worker//                               const pixel *const topleft,
2139*c0909341SAndroid Build Coastguard Worker//                               const int width, const int height,
2140*c0909341SAndroid Build Coastguard Worker//                               const int16_t *ac, const int alpha,
2141*c0909341SAndroid Build Coastguard Worker//                               const int bitdepth_max);
2142*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_top_16bpc_neon, export=1
2143*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
2144*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
2145*c0909341SAndroid Build Coastguard Worker        ldrd            r6,  r7,  [sp, #32]
2146*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
2147*c0909341SAndroid Build Coastguard Worker        vdup.16         q15, r7       // bitdepth_max
2148*c0909341SAndroid Build Coastguard Worker        adr             r12, L(ipred_cfl_top_tbl)
2149*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #26
2150*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
2151*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  r6   // alpha
2152*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
2153*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
2154*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
2155*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
2156*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
2157*c0909341SAndroid Build Coastguard Worker        bx              r12
2158*c0909341SAndroid Build Coastguard Worker
2159*c0909341SAndroid Build Coastguard Worker        .align 2
2160*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_top_tbl):
2161*c0909341SAndroid Build Coastguard Worker        .word 32f - L(ipred_cfl_top_tbl) + CONFIG_THUMB
2162*c0909341SAndroid Build Coastguard Worker        .word 16f - L(ipred_cfl_top_tbl) + CONFIG_THUMB
2163*c0909341SAndroid Build Coastguard Worker        .word 8f  - L(ipred_cfl_top_tbl) + CONFIG_THUMB
2164*c0909341SAndroid Build Coastguard Worker        .word 4f  - L(ipred_cfl_top_tbl) + CONFIG_THUMB
2165*c0909341SAndroid Build Coastguard Worker
2166*c0909341SAndroid Build Coastguard Worker4:
2167*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r2]
2168*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2169*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2170*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #2
2171*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2172*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w4)
2173*c0909341SAndroid Build Coastguard Worker8:
2174*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r2]
2175*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2176*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2177*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2178*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #3
2179*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2180*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w8)
2181*c0909341SAndroid Build Coastguard Worker16:
2182*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r2]
2183*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q2,  q3
2184*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2185*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2186*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2187*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #4
2188*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2189*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w16)
2190*c0909341SAndroid Build Coastguard Worker32:
2191*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8,  q9},  [r2]!
2192*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r2]
2193*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q9
2194*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q11
2195*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q8,  q10
2196*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2197*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2198*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
2199*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d0,  q0,  #5
2200*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2201*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w16)
2202*c0909341SAndroid Build Coastguard Workerendfunc
2203*c0909341SAndroid Build Coastguard Worker
2204*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_left_16bpc_neon(pixel *dst, const ptrdiff_t stride,
2205*c0909341SAndroid Build Coastguard Worker//                                const pixel *const topleft,
2206*c0909341SAndroid Build Coastguard Worker//                                const int width, const int height,
2207*c0909341SAndroid Build Coastguard Worker//                                const int16_t *ac, const int alpha,
2208*c0909341SAndroid Build Coastguard Worker//                                const int bitdepth_max);
2209*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_left_16bpc_neon, export=1
2210*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
2211*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
2212*c0909341SAndroid Build Coastguard Worker        ldrd            r6,  r7,  [sp, #32]
2213*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r4, lsl #1
2214*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
2215*c0909341SAndroid Build Coastguard Worker        clz             r8,  r4
2216*c0909341SAndroid Build Coastguard Worker        vdup.16         q15, r7       // bitdepth_max
2217*c0909341SAndroid Build Coastguard Worker        adr             r12, L(ipred_cfl_splat_tbl)
2218*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_left_tbl)
2219*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #26
2220*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #26
2221*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r12, lr, lsl #2]
2222*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r7,  r8, lsl #2]
2223*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  r6   // alpha
2224*c0909341SAndroid Build Coastguard Worker        add             r12, r12, lr
2225*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8
2226*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
2227*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
2228*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
2229*c0909341SAndroid Build Coastguard Worker        bx              r7
2230*c0909341SAndroid Build Coastguard Worker
2231*c0909341SAndroid Build Coastguard Worker        .align 2
2232*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_tbl):
2233*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_left_h32) - L(ipred_cfl_left_tbl) + CONFIG_THUMB
2234*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_left_h16) - L(ipred_cfl_left_tbl) + CONFIG_THUMB
2235*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_left_h8)  - L(ipred_cfl_left_tbl) + CONFIG_THUMB
2236*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_left_h4)  - L(ipred_cfl_left_tbl) + CONFIG_THUMB
2237*c0909341SAndroid Build Coastguard Worker
2238*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h4):
2239*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r2, :64]
2240*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2241*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2242*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #2
2243*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2244*c0909341SAndroid Build Coastguard Worker        bx              r12
2245*c0909341SAndroid Build Coastguard Worker
2246*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h8):
2247*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r2, :128]
2248*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2249*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2250*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2251*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #3
2252*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2253*c0909341SAndroid Build Coastguard Worker        bx              r12
2254*c0909341SAndroid Build Coastguard Worker
2255*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h16):
2256*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r2, :128]
2257*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q2,  q3
2258*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2259*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2260*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2261*c0909341SAndroid Build Coastguard Worker        vrshr.u16       d0,  d0,  #4
2262*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2263*c0909341SAndroid Build Coastguard Worker        bx              r12
2264*c0909341SAndroid Build Coastguard Worker
2265*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_left_h32):
2266*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8,  q9},  [r2, :128]!
2267*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r2, :128]
2268*c0909341SAndroid Build Coastguard Worker        vadd.i16        q8,  q8,  q9
2269*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q11
2270*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q8,  q10
2271*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2272*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2273*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
2274*c0909341SAndroid Build Coastguard Worker        vrshrn.i32      d0,  q0,  #5
2275*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2276*c0909341SAndroid Build Coastguard Worker        bx              r12
2277*c0909341SAndroid Build Coastguard Workerendfunc
2278*c0909341SAndroid Build Coastguard Worker
2279*c0909341SAndroid Build Coastguard Worker// void ipred_cfl_16bpc_neon(pixel *dst, const ptrdiff_t stride,
2280*c0909341SAndroid Build Coastguard Worker//                           const pixel *const topleft,
2281*c0909341SAndroid Build Coastguard Worker//                           const int width, const int height,
2282*c0909341SAndroid Build Coastguard Worker//                           const int16_t *ac, const int alpha,
2283*c0909341SAndroid Build Coastguard Worker//                           const int bitdepth_max);
2284*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_16bpc_neon, export=1
2285*c0909341SAndroid Build Coastguard Worker        push            {r4-r8, lr}
2286*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
2287*c0909341SAndroid Build Coastguard Worker        ldrd            r6,  r7,  [sp, #32]
2288*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  r4, lsl #1
2289*c0909341SAndroid Build Coastguard Worker        add             r8,  r3,  r4  // width + height
2290*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  r6       // alpha
2291*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3
2292*c0909341SAndroid Build Coastguard Worker        clz             r6,  r4
2293*c0909341SAndroid Build Coastguard Worker        vdup.32         d16, r8       // width + height
2294*c0909341SAndroid Build Coastguard Worker        vdup.16         q15, r7       // bitdepth_max
2295*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_tbl)
2296*c0909341SAndroid Build Coastguard Worker        rbit            r8,  r8       // rbit(width + height)
2297*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #22 // 26 leading bits, minus table offset 4
2298*c0909341SAndroid Build Coastguard Worker        sub             r6,  r6,  #26
2299*c0909341SAndroid Build Coastguard Worker        clz             r8,  r8       // ctz(width + height)
2300*c0909341SAndroid Build Coastguard Worker        ldr             lr,  [r7, lr, lsl #2]
2301*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [r7, r6, lsl #2]
2302*c0909341SAndroid Build Coastguard Worker        neg             r8,  r8       // -ctz(width + height)
2303*c0909341SAndroid Build Coastguard Worker        add             r12, r7,  lr
2304*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r6
2305*c0909341SAndroid Build Coastguard Worker        vshr.u32        d16, d16, #1  // (width + height) >> 1
2306*c0909341SAndroid Build Coastguard Worker        vdup.32         d17, r8       // -ctz(width + height)
2307*c0909341SAndroid Build Coastguard Worker        add             r6,  r0,  r1
2308*c0909341SAndroid Build Coastguard Worker        lsl             r1,  r1,  #1
2309*c0909341SAndroid Build Coastguard Worker        vmov.i16        q14, #0
2310*c0909341SAndroid Build Coastguard Worker        bx              r7
2311*c0909341SAndroid Build Coastguard Worker
2312*c0909341SAndroid Build Coastguard Worker        .align 2
2313*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_tbl):
2314*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_h32) - L(ipred_cfl_tbl) + CONFIG_THUMB
2315*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_h16) - L(ipred_cfl_tbl) + CONFIG_THUMB
2316*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_h8)  - L(ipred_cfl_tbl) + CONFIG_THUMB
2317*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_h4)  - L(ipred_cfl_tbl) + CONFIG_THUMB
2318*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_w32) - L(ipred_cfl_tbl) + CONFIG_THUMB
2319*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_w16) - L(ipred_cfl_tbl) + CONFIG_THUMB
2320*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_w8)  - L(ipred_cfl_tbl) + CONFIG_THUMB
2321*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_w4)  - L(ipred_cfl_tbl) + CONFIG_THUMB
2322*c0909341SAndroid Build Coastguard Worker
2323*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h4):
2324*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r2, :64]!
2325*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2326*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
2327*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
2328*c0909341SAndroid Build Coastguard Worker        bx              r12
2329*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w4):
2330*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1}, [r2]
2331*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d16
2332*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d1,  d1
2333*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d1,  d1
2334*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #4
2335*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d1
2336*c0909341SAndroid Build Coastguard Worker        vshl.u32        d0,  d0,  d17
2337*c0909341SAndroid Build Coastguard Worker        beq             1f
2338*c0909341SAndroid Build Coastguard Worker        // h = 8/16
2339*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #16
2340*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0x6667
2341*c0909341SAndroid Build Coastguard Worker        movw            r8,  #0xAAAB
2342*c0909341SAndroid Build Coastguard Worker        it              ne
2343*c0909341SAndroid Build Coastguard Worker        movne           lr,  r8
2344*c0909341SAndroid Build Coastguard Worker        vdup.32         d18, lr
2345*c0909341SAndroid Build Coastguard Worker        vmul.i32        d0,  d0,  d18
2346*c0909341SAndroid Build Coastguard Worker        vshr.u32        d0,  d0,  #17
2347*c0909341SAndroid Build Coastguard Worker1:
2348*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2349*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w4)
2350*c0909341SAndroid Build Coastguard Worker
2351*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h8):
2352*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r2, :128]!
2353*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2354*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2355*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
2356*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
2357*c0909341SAndroid Build Coastguard Worker        bx              r12
2358*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w8):
2359*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2}, [r2]
2360*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d16
2361*c0909341SAndroid Build Coastguard Worker        vadd.i16        d1,  d4,  d5
2362*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d1,  d1
2363*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d1,  d1
2364*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #8
2365*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d1
2366*c0909341SAndroid Build Coastguard Worker        vshl.u32        d0,  d0,  d17
2367*c0909341SAndroid Build Coastguard Worker        beq             1f
2368*c0909341SAndroid Build Coastguard Worker        // h = 4/16/32
2369*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #32
2370*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0x6667
2371*c0909341SAndroid Build Coastguard Worker        movw            r8,  #0xAAAB
2372*c0909341SAndroid Build Coastguard Worker        it              ne
2373*c0909341SAndroid Build Coastguard Worker        movne           lr,  r8
2374*c0909341SAndroid Build Coastguard Worker        vdup.32         d18, lr
2375*c0909341SAndroid Build Coastguard Worker        vmul.i32        d0,  d0,  d18
2376*c0909341SAndroid Build Coastguard Worker        vshr.u32        d0,  d0,  #17
2377*c0909341SAndroid Build Coastguard Worker1:
2378*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2379*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w8)
2380*c0909341SAndroid Build Coastguard Worker
2381*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h16):
2382*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r2, :128]!
2383*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q2,  q3
2384*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2385*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2386*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
2387*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
2388*c0909341SAndroid Build Coastguard Worker        bx              r12
2389*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w16):
2390*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r2]
2391*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d16
2392*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
2393*c0909341SAndroid Build Coastguard Worker        vadd.i16        d1,  d4,  d5
2394*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d1,  d1
2395*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d1,  d1
2396*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #16
2397*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d1
2398*c0909341SAndroid Build Coastguard Worker        vshl.u32        d0,  d0,  d17
2399*c0909341SAndroid Build Coastguard Worker        beq             1f
2400*c0909341SAndroid Build Coastguard Worker        // h = 4/8/32/64
2401*c0909341SAndroid Build Coastguard Worker        tst             r4,  #(32+16+8)  // 16 added to make a consecutive bitmask
2402*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0x6667
2403*c0909341SAndroid Build Coastguard Worker        movw            r8,  #0xAAAB
2404*c0909341SAndroid Build Coastguard Worker        it              ne
2405*c0909341SAndroid Build Coastguard Worker        movne           lr,  r8
2406*c0909341SAndroid Build Coastguard Worker        vdup.32         d18, lr
2407*c0909341SAndroid Build Coastguard Worker        vmul.i32        d0,  d0,  d18
2408*c0909341SAndroid Build Coastguard Worker        vshr.u32        d0,  d0,  #17
2409*c0909341SAndroid Build Coastguard Worker1:
2410*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2411*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w16)
2412*c0909341SAndroid Build Coastguard Worker
2413*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_h32):
2414*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3},   [r2, :128]!
2415*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r2, :128]!
2416*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
2417*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q11
2418*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q2,  q10
2419*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d1
2420*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d0
2421*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #2
2422*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d0,  d0
2423*c0909341SAndroid Build Coastguard Worker        bx              r12
2424*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_w32):
2425*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3},   [r2]!
2426*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d16
2427*c0909341SAndroid Build Coastguard Worker        vld1.16         {q10, q11}, [r2]!
2428*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
2429*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q11
2430*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q10
2431*c0909341SAndroid Build Coastguard Worker        vadd.i16        d1,  d4,  d5
2432*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d1,  d1
2433*c0909341SAndroid Build Coastguard Worker        vpaddl.u16      d1,  d1
2434*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #32
2435*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d1
2436*c0909341SAndroid Build Coastguard Worker        vshl.u32        d0,  d0,  d17
2437*c0909341SAndroid Build Coastguard Worker        beq             1f
2438*c0909341SAndroid Build Coastguard Worker        // h = 8/16/64
2439*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #8
2440*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0x6667
2441*c0909341SAndroid Build Coastguard Worker        movw            r8,  #0xAAAB
2442*c0909341SAndroid Build Coastguard Worker        it              ne
2443*c0909341SAndroid Build Coastguard Worker        movne           lr,  r8
2444*c0909341SAndroid Build Coastguard Worker        vdup.32         d18, lr
2445*c0909341SAndroid Build Coastguard Worker        vmul.i32        d0,  d0,  d18
2446*c0909341SAndroid Build Coastguard Worker        vshr.u32        d0,  d0,  #17
2447*c0909341SAndroid Build Coastguard Worker1:
2448*c0909341SAndroid Build Coastguard Worker        vdup.16         q0,  d0[0]
2449*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_splat_w16)
2450*c0909341SAndroid Build Coastguard Workerendfunc
2451*c0909341SAndroid Build Coastguard Worker
2452*c0909341SAndroid Build Coastguard Worker// void cfl_ac_420_16bpc_neon(int16_t *const ac, const pixel *const ypx,
2453*c0909341SAndroid Build Coastguard Worker//                            const ptrdiff_t stride, const int w_pad,
2454*c0909341SAndroid Build Coastguard Worker//                            const int h_pad, const int cw, const int ch);
2455*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_420_16bpc_neon, export=1
2456*c0909341SAndroid Build Coastguard Worker        push            {r4-r8,lr}
2457*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
2458*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
2459*c0909341SAndroid Build Coastguard Worker        clz             r8,  r5
2460*c0909341SAndroid Build Coastguard Worker        lsl             r4,  r4,  #2
2461*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_420_tbl)
2462*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #27
2463*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r7, r8, lsl #2]
2464*c0909341SAndroid Build Coastguard Worker        vmov.i32        q8,  #0
2465*c0909341SAndroid Build Coastguard Worker        vmov.i32        q9,  #0
2466*c0909341SAndroid Build Coastguard Worker        vmov.i32        q10, #0
2467*c0909341SAndroid Build Coastguard Worker        vmov.i32        q11, #0
2468*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8
2469*c0909341SAndroid Build Coastguard Worker        sub             r8,  r6,  r4  // height - h_pad
2470*c0909341SAndroid Build Coastguard Worker        rbit            lr,  r5       // rbit(width)
2471*c0909341SAndroid Build Coastguard Worker        rbit            r12, r6       // rbit(height)
2472*c0909341SAndroid Build Coastguard Worker        clz             lr,  lr       // ctz(width)
2473*c0909341SAndroid Build Coastguard Worker        clz             r12, r12      // ctz(height)
2474*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r12 // log2sz
2475*c0909341SAndroid Build Coastguard Worker        add             r12, r1,  r2
2476*c0909341SAndroid Build Coastguard Worker        vdup.32         d31, lr
2477*c0909341SAndroid Build Coastguard Worker        lsl             r2,  r2,  #1
2478*c0909341SAndroid Build Coastguard Worker        vneg.s32        d31, d31      // -log2sz
2479*c0909341SAndroid Build Coastguard Worker        bx              r7
2480*c0909341SAndroid Build Coastguard Worker
2481*c0909341SAndroid Build Coastguard Worker        .align 2
2482*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_tbl):
2483*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16) - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB
2484*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w8)  - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB
2485*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w4)  - L(ipred_cfl_ac_420_tbl) + CONFIG_THUMB
2486*c0909341SAndroid Build Coastguard Worker
2487*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4):
2488*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input
2489*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r1,  :128], r2
2490*c0909341SAndroid Build Coastguard Worker        vld1.16         {q1}, [r12, :128], r2
2491*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2}, [r1,  :128], r2
2492*c0909341SAndroid Build Coastguard Worker        vld1.16         {q3}, [r12, :128], r2
2493*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
2494*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
2495*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2496*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d4,  d5
2497*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2498*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2499*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0}, [r0, :128]!
2500*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2501*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2502*c0909341SAndroid Build Coastguard Worker        bgt             1b
2503*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2504*c0909341SAndroid Build Coastguard Worker        vmov            d0,  d1
2505*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d1
2506*c0909341SAndroid Build Coastguard Worker        vmov            d3,  d1
2507*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_hpad):
2508*c0909341SAndroid Build Coastguard Worker        beq             3f // This assumes that all callers already did "cmp r4, #0"
2509*c0909341SAndroid Build Coastguard Worker2:      // Vertical padding (h_pad > 0)
2510*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
2511*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2512*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2513*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2514*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2515*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2516*c0909341SAndroid Build Coastguard Worker        bgt             2b
2517*c0909341SAndroid Build Coastguard Worker3:
2518*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w4_calc_subtract_dc):
2519*c0909341SAndroid Build Coastguard Worker        // Aggregate the sums
2520*c0909341SAndroid Build Coastguard Worker        vadd.i32        q8,  q8,  q9
2521*c0909341SAndroid Build Coastguard Worker        vadd.i32        q10, q10, q11
2522*c0909341SAndroid Build Coastguard Worker        vadd.i32        q0,  q8,  q10
2523*c0909341SAndroid Build Coastguard Worker        vadd.i32        d0,  d0,  d1
2524*c0909341SAndroid Build Coastguard Worker        vpadd.i32       d0,  d0,  d0  // sum
2525*c0909341SAndroid Build Coastguard Worker        sub             r0,  r0,  r6, lsl #3
2526*c0909341SAndroid Build Coastguard Worker        vrshl.u32       d16, d0,  d31 // (sum + (1 << (log2sz - 1))) >>= log2sz
2527*c0909341SAndroid Build Coastguard Worker        vdup.16         q8,  d16[0]
2528*c0909341SAndroid Build Coastguard Worker6:      // Subtract dc from ac
2529*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r0, :128]
2530*c0909341SAndroid Build Coastguard Worker        subs            r6,  r6,  #4
2531*c0909341SAndroid Build Coastguard Worker        vsub.i16        q0,  q0,  q8
2532*c0909341SAndroid Build Coastguard Worker        vsub.i16        q1,  q1,  q8
2533*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2534*c0909341SAndroid Build Coastguard Worker        bgt             6b
2535*c0909341SAndroid Build Coastguard Worker        pop             {r4-r8, pc}
2536*c0909341SAndroid Build Coastguard Worker
2537*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8):
2538*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
2539*c0909341SAndroid Build Coastguard Worker        bne             L(ipred_cfl_ac_420_w8_wpad)
2540*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, without padding
2541*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r1,  :128], r2
2542*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r12, :128], r2
2543*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r1,  :128], r2
2544*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q2
2545*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q3
2546*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r12, :128], r2
2547*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2548*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d2,  d3
2549*c0909341SAndroid Build Coastguard Worker        vadd.i16        q12, q12, q2
2550*c0909341SAndroid Build Coastguard Worker        vadd.i16        q13, q13, q3
2551*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d24, d25
2552*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d3,  d26, d27
2553*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2554*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #1
2555*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2556*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2557*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2558*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2559*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2560*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2561*c0909341SAndroid Build Coastguard Worker        bgt             1b
2562*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2563*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q1
2564*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w8_hpad)
2565*c0909341SAndroid Build Coastguard Worker
2566*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_wpad):
2567*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 4
2568*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r1,  :128], r2
2569*c0909341SAndroid Build Coastguard Worker        vld1.16         {q1}, [r12, :128], r2
2570*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2}, [r1,  :128], r2
2571*c0909341SAndroid Build Coastguard Worker        vld1.16         {q3}, [r12, :128], r2
2572*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q1
2573*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q3
2574*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2575*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d4,  d5
2576*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2577*c0909341SAndroid Build Coastguard Worker        vdup.16         d3,  d1[3]
2578*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d1
2579*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  d0[3]
2580*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2581*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2582*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2583*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2584*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2585*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2586*c0909341SAndroid Build Coastguard Worker        bgt             1b
2587*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2588*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q1
2589*c0909341SAndroid Build Coastguard Worker
2590*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w8_hpad):
2591*c0909341SAndroid Build Coastguard Worker        beq             3f // This assumes that all callers already did "cmp r4, #0"
2592*c0909341SAndroid Build Coastguard Worker2:      // Vertical padding (h_pad > 0)
2593*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #4
2594*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2595*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2596*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2597*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2598*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2599*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2600*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2601*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2602*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2603*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2604*c0909341SAndroid Build Coastguard Worker        bgt             2b
2605*c0909341SAndroid Build Coastguard Worker3:
2606*c0909341SAndroid Build Coastguard Worker
2607*c0909341SAndroid Build Coastguard Worker        // Double the height and reuse the w4 summing/subtracting
2608*c0909341SAndroid Build Coastguard Worker        lsl             r6,  r6,  #1
2609*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_calc_subtract_dc)
2610*c0909341SAndroid Build Coastguard Worker
2611*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16):
2612*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_420_w16_tbl)
2613*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r7, r3, lsl #2]
2614*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r3
2615*c0909341SAndroid Build Coastguard Worker        bx              r7
2616*c0909341SAndroid Build Coastguard Worker
2617*c0909341SAndroid Build Coastguard Worker        .align 2
2618*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_tbl):
2619*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16_wpad0) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB
2620*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16_wpad1) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB
2621*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16_wpad2) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB
2622*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_420_w16_wpad3) - L(ipred_cfl_ac_420_w16_tbl) + CONFIG_THUMB
2623*c0909341SAndroid Build Coastguard Worker
2624*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad0):
2625*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #32
2626*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, without padding
2627*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r1,  :128]!
2628*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r12, :128]!
2629*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r1,  :128], r2
2630*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q12
2631*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q13
2632*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r12, :128], r2
2633*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2634*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d2,  d3
2635*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q12
2636*c0909341SAndroid Build Coastguard Worker        vadd.i16        q3,  q3,  q13
2637*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d4,  d5
2638*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d3,  d6,  d7
2639*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2640*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #1
2641*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #1
2642*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2643*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2644*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2645*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2646*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2647*c0909341SAndroid Build Coastguard Worker        bgt             1b
2648*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2649*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2650*c0909341SAndroid Build Coastguard Worker
2651*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad1):
2652*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #32
2653*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 4
2654*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r1,  :128]!
2655*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r12, :128]!
2656*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2},       [r1,  :128], r2
2657*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q12
2658*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q13
2659*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12},     [r12, :128], r2
2660*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2661*c0909341SAndroid Build Coastguard Worker        vadd.i16        q2,  q2,  q12
2662*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d2,  d3
2663*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d4,  d5
2664*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2665*c0909341SAndroid Build Coastguard Worker        vshl.i16        d2,  d2,  #1
2666*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #1
2667*c0909341SAndroid Build Coastguard Worker        vdup.16         d3,  d2[3]
2668*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2669*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2670*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2671*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2672*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2673*c0909341SAndroid Build Coastguard Worker        bgt             1b
2674*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2675*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2676*c0909341SAndroid Build Coastguard Worker
2677*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad2):
2678*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 8
2679*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r1,  :128], r2
2680*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r12, :128], r2
2681*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q12
2682*c0909341SAndroid Build Coastguard Worker        vadd.i16        q1,  q1,  q13
2683*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2684*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d2,  d3
2685*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #1
2686*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #1
2687*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d1[3]
2688*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2689*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2690*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2691*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2692*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2693*c0909341SAndroid Build Coastguard Worker        bgt             1b
2694*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2695*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2696*c0909341SAndroid Build Coastguard Worker
2697*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_wpad3):
2698*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 12
2699*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0},  [r1,  :128], r2
2700*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12}, [r12, :128], r2
2701*c0909341SAndroid Build Coastguard Worker        vadd.i16        q0,  q0,  q12
2702*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2703*c0909341SAndroid Build Coastguard Worker        vshl.i16        d0,  d0,  #1
2704*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #1
2705*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d0[3]
2706*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  d0[3]
2707*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2708*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2709*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2710*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2711*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2712*c0909341SAndroid Build Coastguard Worker        bgt             1b
2713*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2714*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2715*c0909341SAndroid Build Coastguard Worker
2716*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_420_w16_hpad):
2717*c0909341SAndroid Build Coastguard Worker        beq             3f // This assumes that all callers already did "cmp r4, #0"
2718*c0909341SAndroid Build Coastguard Worker2:      // Vertical padding (h_pad > 0)
2719*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #2
2720*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2721*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2722*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2723*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2724*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2725*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2726*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2727*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2728*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2729*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2730*c0909341SAndroid Build Coastguard Worker        bgt             2b
2731*c0909341SAndroid Build Coastguard Worker3:
2732*c0909341SAndroid Build Coastguard Worker
2733*c0909341SAndroid Build Coastguard Worker        // Quadruple the height and reuse the w4 summing/subtracting
2734*c0909341SAndroid Build Coastguard Worker        lsl             r6,  r6,  #2
2735*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_calc_subtract_dc)
2736*c0909341SAndroid Build Coastguard Workerendfunc
2737*c0909341SAndroid Build Coastguard Worker
2738*c0909341SAndroid Build Coastguard Worker// void cfl_ac_422_16bpc_neon(int16_t *const ac, const pixel *const ypx,
2739*c0909341SAndroid Build Coastguard Worker//                            const ptrdiff_t stride, const int w_pad,
2740*c0909341SAndroid Build Coastguard Worker//                            const int h_pad, const int cw, const int ch);
2741*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_422_16bpc_neon, export=1
2742*c0909341SAndroid Build Coastguard Worker        push            {r4-r8,lr}
2743*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
2744*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
2745*c0909341SAndroid Build Coastguard Worker        clz             r8,  r5
2746*c0909341SAndroid Build Coastguard Worker        lsl             r4,  r4,  #2
2747*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_422_tbl)
2748*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #27
2749*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r7, r8, lsl #2]
2750*c0909341SAndroid Build Coastguard Worker        vmov.i16        q8,  #0
2751*c0909341SAndroid Build Coastguard Worker        vmov.i16        q9,  #0
2752*c0909341SAndroid Build Coastguard Worker        vmov.i16        q10, #0
2753*c0909341SAndroid Build Coastguard Worker        vmov.i16        q11, #0
2754*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8
2755*c0909341SAndroid Build Coastguard Worker        sub             r8,  r6,  r4  // height - h_pad
2756*c0909341SAndroid Build Coastguard Worker        rbit            lr,  r5       // rbit(width)
2757*c0909341SAndroid Build Coastguard Worker        rbit            r12, r6       // rbit(height)
2758*c0909341SAndroid Build Coastguard Worker        clz             lr,  lr       // ctz(width)
2759*c0909341SAndroid Build Coastguard Worker        clz             r12, r12      // ctz(height)
2760*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r12 // log2sz
2761*c0909341SAndroid Build Coastguard Worker        add             r12, r1,  r2
2762*c0909341SAndroid Build Coastguard Worker        vdup.32         d31, lr
2763*c0909341SAndroid Build Coastguard Worker        lsl             r2,  r2,  #1
2764*c0909341SAndroid Build Coastguard Worker        vneg.s32        d31, d31      // -log2sz
2765*c0909341SAndroid Build Coastguard Worker        bx              r7
2766*c0909341SAndroid Build Coastguard Worker
2767*c0909341SAndroid Build Coastguard Worker        .align 2
2768*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_tbl):
2769*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB
2770*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w8) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB
2771*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w4) - L(ipred_cfl_ac_422_tbl) + CONFIG_THUMB
2772*c0909341SAndroid Build Coastguard Worker
2773*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w4):
2774*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input
2775*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r1,  :128], r2
2776*c0909341SAndroid Build Coastguard Worker        vld1.16         {q1}, [r12, :128], r2
2777*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2}, [r1,  :128], r2
2778*c0909341SAndroid Build Coastguard Worker        vld1.16         {q3}, [r12, :128], r2
2779*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2780*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d2,  d3
2781*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d4,  d5
2782*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d3,  d6,  d7
2783*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2784*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #2
2785*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
2786*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2787*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2788*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2789*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2790*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2791*c0909341SAndroid Build Coastguard Worker        bgt             1b
2792*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2793*c0909341SAndroid Build Coastguard Worker        vmov            d0,  d3
2794*c0909341SAndroid Build Coastguard Worker        vmov            d1,  d3
2795*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d3
2796*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_hpad)
2797*c0909341SAndroid Build Coastguard Worker
2798*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8):
2799*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
2800*c0909341SAndroid Build Coastguard Worker        bne             L(ipred_cfl_ac_422_w8_wpad)
2801*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, without padding
2802*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r1,  :128], r2
2803*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r12, :128], r2
2804*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r1,  :128], r2
2805*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2806*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d2,  d3
2807*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d4,  d5
2808*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d3,  d6,  d7
2809*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r12, :128], r2
2810*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d24, d24, d25
2811*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d25, d26, d27
2812*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d26, d4,  d5
2813*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d27, d6,  d7
2814*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2815*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #2
2816*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q12, #2
2817*c0909341SAndroid Build Coastguard Worker        vshl.i16        q3,  q13, #2
2818*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
2819*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2820*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2821*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2822*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2823*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2824*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2825*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
2826*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
2827*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
2828*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
2829*c0909341SAndroid Build Coastguard Worker        bgt             1b
2830*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2831*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q3
2832*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2833*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w8_hpad)
2834*c0909341SAndroid Build Coastguard Worker
2835*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w8_wpad):
2836*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 4
2837*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0},  [r1,  :128], r2
2838*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2},  [r12, :128], r2
2839*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12}, [r1,  :128], r2
2840*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2841*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d4,  d5
2842*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r12, :128], r2
2843*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d24, d24, d25
2844*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d25, d4,  d5
2845*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2846*c0909341SAndroid Build Coastguard Worker        vshl.i16        q12, q12, #2
2847*c0909341SAndroid Build Coastguard Worker        vdup.16         d7,  d25[3]
2848*c0909341SAndroid Build Coastguard Worker        vmov            d6,  d25
2849*c0909341SAndroid Build Coastguard Worker        vdup.16         d5,  d24[3]
2850*c0909341SAndroid Build Coastguard Worker        vmov            d4,  d24
2851*c0909341SAndroid Build Coastguard Worker        vdup.16         d3,  d1[3]
2852*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d1
2853*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  d0[3]
2854*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
2855*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2856*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2857*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2858*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2859*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2860*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2861*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
2862*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
2863*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
2864*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
2865*c0909341SAndroid Build Coastguard Worker        bgt             1b
2866*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2867*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q3
2868*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2869*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w8_hpad)
2870*c0909341SAndroid Build Coastguard Worker
2871*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16):
2872*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_422_w16_tbl)
2873*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r7, r3, lsl #2]
2874*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r3
2875*c0909341SAndroid Build Coastguard Worker        bx              r7
2876*c0909341SAndroid Build Coastguard Worker
2877*c0909341SAndroid Build Coastguard Worker        .align 2
2878*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_tbl):
2879*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16_wpad0) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB
2880*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16_wpad1) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB
2881*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16_wpad2) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB
2882*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_422_w16_wpad3) - L(ipred_cfl_ac_422_w16_tbl) + CONFIG_THUMB
2883*c0909341SAndroid Build Coastguard Worker
2884*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad0):
2885*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #32
2886*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, without padding
2887*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r1,  :128]!
2888*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r12, :128]!
2889*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r1,  :128], r2
2890*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2891*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d2,  d3
2892*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d24, d25
2893*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d3,  d26, d27
2894*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12, q13}, [r12, :128], r2
2895*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4,  d5
2896*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d5,  d6,  d7
2897*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d6,  d24, d25
2898*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d7,  d26, d27
2899*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2900*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #2
2901*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #2
2902*c0909341SAndroid Build Coastguard Worker        vshl.i16        q3,  q3,  #2
2903*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2904*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2905*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2906*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2907*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2908*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2909*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2910*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
2911*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
2912*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
2913*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
2914*c0909341SAndroid Build Coastguard Worker        bgt             1b
2915*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2916*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2917*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2918*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2919*c0909341SAndroid Build Coastguard Worker
2920*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad1):
2921*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #32
2922*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 4
2923*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r1,  :128]!
2924*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r12, :128]!
2925*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12},      [r1,  :128], r2
2926*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2927*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d2,  d3
2928*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d2,  d24, d25
2929*c0909341SAndroid Build Coastguard Worker        vld1.16         {q12},      [r12, :128], r2
2930*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4,  d5
2931*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d5,  d6,  d7
2932*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d6,  d24, d25
2933*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2934*c0909341SAndroid Build Coastguard Worker        vshl.i16        d2,  d2,  #2
2935*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #2
2936*c0909341SAndroid Build Coastguard Worker        vshl.i16        d6,  d6,  #2
2937*c0909341SAndroid Build Coastguard Worker        vdup.16         d3,  d2[3]
2938*c0909341SAndroid Build Coastguard Worker        vdup.16         d7,  d6[3]
2939*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2940*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2941*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2942*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2943*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2944*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2945*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2946*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
2947*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
2948*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
2949*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
2950*c0909341SAndroid Build Coastguard Worker        bgt             1b
2951*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2952*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2953*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2954*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2955*c0909341SAndroid Build Coastguard Worker
2956*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad2):
2957*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 8
2958*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0,  q1},  [r1,  :128], r2
2959*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2,  q3},  [r12, :128], r2
2960*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2961*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d2,  d3
2962*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d4,  d4,  d5
2963*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d5,  d6,  d7
2964*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2965*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #2
2966*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d1[3]
2967*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d5[3]
2968*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2969*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2970*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
2971*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
2972*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
2973*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
2974*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
2975*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
2976*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
2977*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
2978*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
2979*c0909341SAndroid Build Coastguard Worker        bgt             1b
2980*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
2981*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
2982*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
2983*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
2984*c0909341SAndroid Build Coastguard Worker
2985*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_422_w16_wpad3):
2986*c0909341SAndroid Build Coastguard Worker1:      // Copy and subsample input, padding 12
2987*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r1,  :128], r2
2988*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2}, [r12, :128], r2
2989*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d0,  d0,  d1
2990*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d1,  d4,  d5
2991*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #2
2992*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d1[3]
2993*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d0[3]
2994*c0909341SAndroid Build Coastguard Worker        vdup.16         d5,  d1[3]
2995*c0909341SAndroid Build Coastguard Worker        vmov            d4,  d1
2996*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  d0[3]
2997*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
2998*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
2999*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3000*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3001*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3002*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3003*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
3004*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
3005*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
3006*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
3007*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
3008*c0909341SAndroid Build Coastguard Worker        bgt             1b
3009*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3010*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
3011*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
3012*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
3013*c0909341SAndroid Build Coastguard Workerendfunc
3014*c0909341SAndroid Build Coastguard Worker
3015*c0909341SAndroid Build Coastguard Worker// void cfl_ac_444_16bpc_neon(int16_t *const ac, const pixel *const ypx,
3016*c0909341SAndroid Build Coastguard Worker//                            const ptrdiff_t stride, const int w_pad,
3017*c0909341SAndroid Build Coastguard Worker//                            const int h_pad, const int cw, const int ch);
3018*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_ac_444_16bpc_neon, export=1
3019*c0909341SAndroid Build Coastguard Worker        push            {r4-r8,lr}
3020*c0909341SAndroid Build Coastguard Worker        ldrd            r4,  r5,  [sp, #24]
3021*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [sp, #32]
3022*c0909341SAndroid Build Coastguard Worker        clz             r8,  r5
3023*c0909341SAndroid Build Coastguard Worker        lsl             r4,  r4,  #2
3024*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_444_tbl)
3025*c0909341SAndroid Build Coastguard Worker        sub             r8,  r8,  #26
3026*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r7, r8, lsl #2]
3027*c0909341SAndroid Build Coastguard Worker        vmov.i16        q8,  #0
3028*c0909341SAndroid Build Coastguard Worker        vmov.i16        q9,  #0
3029*c0909341SAndroid Build Coastguard Worker        vmov.i16        q10, #0
3030*c0909341SAndroid Build Coastguard Worker        vmov.i16        q11, #0
3031*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r8
3032*c0909341SAndroid Build Coastguard Worker        sub             r8,  r6,  r4  // height - h_pad
3033*c0909341SAndroid Build Coastguard Worker        rbit            lr,  r5       // rbit(width)
3034*c0909341SAndroid Build Coastguard Worker        rbit            r12, r6       // rbit(height)
3035*c0909341SAndroid Build Coastguard Worker        clz             lr,  lr       // ctz(width)
3036*c0909341SAndroid Build Coastguard Worker        clz             r12, r12      // ctz(height)
3037*c0909341SAndroid Build Coastguard Worker        add             lr,  lr,  r12 // log2sz
3038*c0909341SAndroid Build Coastguard Worker        add             r12, r1,  r2
3039*c0909341SAndroid Build Coastguard Worker        vdup.32         d31, lr
3040*c0909341SAndroid Build Coastguard Worker        lsl             r2,  r2,  #1
3041*c0909341SAndroid Build Coastguard Worker        vneg.s32        d31, d31      // -log2sz
3042*c0909341SAndroid Build Coastguard Worker        bx              r7
3043*c0909341SAndroid Build Coastguard Worker
3044*c0909341SAndroid Build Coastguard Worker        .align 2
3045*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_tbl):
3046*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB
3047*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w16) - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB
3048*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w8)  - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB
3049*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w4)  - L(ipred_cfl_ac_444_tbl) + CONFIG_THUMB
3050*c0909341SAndroid Build Coastguard Worker
3051*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w4):
3052*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input
3053*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0}, [r1,  :64], r2
3054*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1}, [r12, :64], r2
3055*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2}, [r1,  :64], r2
3056*c0909341SAndroid Build Coastguard Worker        vld1.16         {d3}, [r12, :64], r2
3057*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #3
3058*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #3
3059*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
3060*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
3061*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3062*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3063*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3064*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3065*c0909341SAndroid Build Coastguard Worker        bgt             1b
3066*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3067*c0909341SAndroid Build Coastguard Worker        vmov            d0,  d3
3068*c0909341SAndroid Build Coastguard Worker        vmov            d1,  d3
3069*c0909341SAndroid Build Coastguard Worker        vmov            d2,  d3
3070*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_hpad)
3071*c0909341SAndroid Build Coastguard Worker
3072*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w8):
3073*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input
3074*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r1,  :128], r2
3075*c0909341SAndroid Build Coastguard Worker        vld1.16         {q1}, [r12, :128], r2
3076*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2}, [r1,  :128], r2
3077*c0909341SAndroid Build Coastguard Worker        vld1.16         {q3}, [r12, :128], r2
3078*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #3
3079*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #3
3080*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #3
3081*c0909341SAndroid Build Coastguard Worker        vshl.i16        q3,  q3,  #3
3082*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #4
3083*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
3084*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3085*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3086*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3087*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3088*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
3089*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
3090*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
3091*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
3092*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
3093*c0909341SAndroid Build Coastguard Worker        bgt             1b
3094*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3095*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q3
3096*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
3097*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w8_hpad)
3098*c0909341SAndroid Build Coastguard Worker
3099*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16):
3100*c0909341SAndroid Build Coastguard Worker        cmp             r3,  #0
3101*c0909341SAndroid Build Coastguard Worker        bne             L(ipred_cfl_ac_444_w16_wpad)
3102*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, without padding
3103*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r1,  :128], r2
3104*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r12, :128], r2
3105*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #3
3106*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #3
3107*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #3
3108*c0909341SAndroid Build Coastguard Worker        vshl.i16        q3,  q3,  #3
3109*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3110*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
3111*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3112*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3113*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3114*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3115*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
3116*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
3117*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
3118*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
3119*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
3120*c0909341SAndroid Build Coastguard Worker        bgt             1b
3121*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3122*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
3123*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
3124*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
3125*c0909341SAndroid Build Coastguard Worker
3126*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w16_wpad):
3127*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, padding 8
3128*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r1,  :128], r2
3129*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2}, [r12, :128], r2
3130*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #3
3131*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #3
3132*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d1[3]
3133*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d5[3]
3134*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #2
3135*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
3136*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3137*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3138*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3139*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3140*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
3141*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
3142*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
3143*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
3144*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
3145*c0909341SAndroid Build Coastguard Worker        bgt             1b
3146*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3147*c0909341SAndroid Build Coastguard Worker        vmov            q0,  q2
3148*c0909341SAndroid Build Coastguard Worker        vmov            q1,  q3
3149*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w16_hpad)
3150*c0909341SAndroid Build Coastguard Worker
3151*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32):
3152*c0909341SAndroid Build Coastguard Worker        adr             r7,  L(ipred_cfl_ac_444_w32_tbl)
3153*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r7, r3, lsl #1] // (w3>>1) << 2
3154*c0909341SAndroid Build Coastguard Worker        asr             r2,  r2,  #1
3155*c0909341SAndroid Build Coastguard Worker        add             r7,  r7,  r3
3156*c0909341SAndroid Build Coastguard Worker        bx              r7
3157*c0909341SAndroid Build Coastguard Worker
3158*c0909341SAndroid Build Coastguard Worker        .align 2
3159*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_tbl):
3160*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32_wpad0) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB
3161*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32_wpad2) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB
3162*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32_wpad4) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB
3163*c0909341SAndroid Build Coastguard Worker        .word L(ipred_cfl_ac_444_w32_wpad6) - L(ipred_cfl_ac_444_w32_tbl) + CONFIG_THUMB
3164*c0909341SAndroid Build Coastguard Worker
3165*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad0):
3166*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #32
3167*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, without padding
3168*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r1, :128]!
3169*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2, q3}, [r1, :128], r2
3170*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #3
3171*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #3
3172*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #3
3173*c0909341SAndroid Build Coastguard Worker        vshl.i16        q3,  q3,  #3
3174*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #1
3175*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
3176*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3177*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3178*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3179*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3180*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
3181*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
3182*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
3183*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
3184*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
3185*c0909341SAndroid Build Coastguard Worker        bgt             1b
3186*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3187*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_444_w32_hpad)
3188*c0909341SAndroid Build Coastguard Worker
3189*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad2):
3190*c0909341SAndroid Build Coastguard Worker        sub             r2,  r2,  #32
3191*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, padding 8
3192*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r1, :128]!
3193*c0909341SAndroid Build Coastguard Worker        vld1.16         {q2},     [r1, :128], r2
3194*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #3
3195*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #3
3196*c0909341SAndroid Build Coastguard Worker        vshl.i16        q2,  q2,  #3
3197*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #1
3198*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
3199*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d5[3]
3200*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3201*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3202*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3203*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3204*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
3205*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
3206*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
3207*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
3208*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
3209*c0909341SAndroid Build Coastguard Worker        bgt             1b
3210*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3211*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_444_w32_hpad)
3212*c0909341SAndroid Build Coastguard Worker
3213*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad4):
3214*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, padding 16
3215*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0, q1}, [r1, :128], r2
3216*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #3
3217*c0909341SAndroid Build Coastguard Worker        vshl.i16        q1,  q1,  #3
3218*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #1
3219*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
3220*c0909341SAndroid Build Coastguard Worker        vdup.16         q2,  d3[3]
3221*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d3[3]
3222*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3223*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3224*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3225*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3226*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
3227*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
3228*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
3229*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
3230*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
3231*c0909341SAndroid Build Coastguard Worker        bgt             1b
3232*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3233*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_444_w32_hpad)
3234*c0909341SAndroid Build Coastguard Worker
3235*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_wpad6):
3236*c0909341SAndroid Build Coastguard Worker1:      // Copy and expand input, padding 24
3237*c0909341SAndroid Build Coastguard Worker        vld1.16         {q0}, [r1, :128], r2
3238*c0909341SAndroid Build Coastguard Worker        vshl.i16        q0,  q0,  #3
3239*c0909341SAndroid Build Coastguard Worker        subs            r8,  r8,  #1
3240*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  d1[3]
3241*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
3242*c0909341SAndroid Build Coastguard Worker        vdup.16         q2,  d1[3]
3243*c0909341SAndroid Build Coastguard Worker        vdup.16         q3,  d1[3]
3244*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3245*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3246*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3247*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3248*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
3249*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
3250*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
3251*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
3252*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
3253*c0909341SAndroid Build Coastguard Worker        bgt             1b
3254*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
3255*c0909341SAndroid Build Coastguard Worker
3256*c0909341SAndroid Build Coastguard WorkerL(ipred_cfl_ac_444_w32_hpad):
3257*c0909341SAndroid Build Coastguard Worker        beq             3f // This assumes that all callers already did "cmp r4, #0"
3258*c0909341SAndroid Build Coastguard Worker2:      // Vertical padding (h_pad > 0)
3259*c0909341SAndroid Build Coastguard Worker        subs            r4,  r4,  #1
3260*c0909341SAndroid Build Coastguard Worker        vst1.16         {q0, q1}, [r0, :128]!
3261*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d0
3262*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d1
3263*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d2
3264*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d3
3265*c0909341SAndroid Build Coastguard Worker        vst1.16         {q2, q3}, [r0, :128]!
3266*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q8,  q8,  d4
3267*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q9,  q9,  d5
3268*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q10, q10, d6
3269*c0909341SAndroid Build Coastguard Worker        vaddw.u16       q11, q11, d7
3270*c0909341SAndroid Build Coastguard Worker        bgt             2b
3271*c0909341SAndroid Build Coastguard Worker3:
3272*c0909341SAndroid Build Coastguard Worker
3273*c0909341SAndroid Build Coastguard Worker        //  Multiply the height by eight and reuse the w4 subtracting
3274*c0909341SAndroid Build Coastguard Worker        lsl             r6,  r6,  #3
3275*c0909341SAndroid Build Coastguard Worker        b               L(ipred_cfl_ac_420_w4_calc_subtract_dc)
3276*c0909341SAndroid Build Coastguard Workerendfunc
3277