xref: /aosp_15_r20/external/libavc/encoder/arm/ih264e_half_pel.s (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1*495ae853SAndroid Build Coastguard Worker@/******************************************************************************
2*495ae853SAndroid Build Coastguard Worker@ *
3*495ae853SAndroid Build Coastguard Worker@ * Copyright (C) 2015 The Android Open Source Project
4*495ae853SAndroid Build Coastguard Worker@ *
5*495ae853SAndroid Build Coastguard Worker@ * Licensed under the Apache License, Version 2.0 (the "License");
6*495ae853SAndroid Build Coastguard Worker@ * you may not use this file except in compliance with the License.
7*495ae853SAndroid Build Coastguard Worker@ * You may obtain a copy of the License at:
8*495ae853SAndroid Build Coastguard Worker@ *
9*495ae853SAndroid Build Coastguard Worker@ * http://www.apache.org/licenses/LICENSE-2.0
10*495ae853SAndroid Build Coastguard Worker@ *
11*495ae853SAndroid Build Coastguard Worker@ * Unless required by applicable law or agreed to in writing, software
12*495ae853SAndroid Build Coastguard Worker@ * distributed under the License is distributed on an "AS IS" BASIS,
13*495ae853SAndroid Build Coastguard Worker@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*495ae853SAndroid Build Coastguard Worker@ * See the License for the specific language governing permissions and
15*495ae853SAndroid Build Coastguard Worker@ * limitations under the License.
16*495ae853SAndroid Build Coastguard Worker@ *
17*495ae853SAndroid Build Coastguard Worker@ *****************************************************************************
18*495ae853SAndroid Build Coastguard Worker@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*495ae853SAndroid Build Coastguard Worker@*/
20*495ae853SAndroid Build Coastguard Worker@/**
21*495ae853SAndroid Build Coastguard Worker@ *******************************************************************************
22*495ae853SAndroid Build Coastguard Worker@ * @file
23*495ae853SAndroid Build Coastguard Worker@ *  ih264e_half_pel.s
24*495ae853SAndroid Build Coastguard Worker@ *
25*495ae853SAndroid Build Coastguard Worker@ * @brief
26*495ae853SAndroid Build Coastguard Worker@ *
27*495ae853SAndroid Build Coastguard Worker@ *
28*495ae853SAndroid Build Coastguard Worker@ * @author
29*495ae853SAndroid Build Coastguard Worker@ *  Ittiam
30*495ae853SAndroid Build Coastguard Worker@ *
31*495ae853SAndroid Build Coastguard Worker@ * @par List of Functions:
32*495ae853SAndroid Build Coastguard Worker@ *  ih264e_sixtapfilter_horz
33*495ae853SAndroid Build Coastguard Worker@ *  ih264e_sixtap_filter_2dvh_vert
34*495ae853SAndroid Build Coastguard Worker@
35*495ae853SAndroid Build Coastguard Worker@ *
36*495ae853SAndroid Build Coastguard Worker@ * @remarks
37*495ae853SAndroid Build Coastguard Worker@ *  None
38*495ae853SAndroid Build Coastguard Worker@ *
39*495ae853SAndroid Build Coastguard Worker@ *******************************************************************************
40*495ae853SAndroid Build Coastguard Worker@ */
41*495ae853SAndroid Build Coastguard Worker
42*495ae853SAndroid Build Coastguard Worker
43*495ae853SAndroid Build Coastguard Worker.text
44*495ae853SAndroid Build Coastguard Worker.p2align 2
45*495ae853SAndroid Build Coastguard Worker
46*495ae853SAndroid Build Coastguard Worker@/*******************************************************************************
47*495ae853SAndroid Build Coastguard Worker@*
48*495ae853SAndroid Build Coastguard Worker@* @brief
49*495ae853SAndroid Build Coastguard Worker@*     Interprediction luma filter for horizontal input(Filter run for width = 17 and height =16)
50*495ae853SAndroid Build Coastguard Worker@*
51*495ae853SAndroid Build Coastguard Worker@* @par Description:
52*495ae853SAndroid Build Coastguard Worker@*    Applies a 6 tap horizontal filter .The output is  clipped to 8 bits
53*495ae853SAndroid Build Coastguard Worker@*    sec 8.4.2.2.1 titled "Luma sample interpolation process"
54*495ae853SAndroid Build Coastguard Worker@*
55*495ae853SAndroid Build Coastguard Worker@* @param[in] pu1_src
56*495ae853SAndroid Build Coastguard Worker@*  UWORD8 pointer to the source
57*495ae853SAndroid Build Coastguard Worker@*
58*495ae853SAndroid Build Coastguard Worker@* @param[out] pu1_dst
59*495ae853SAndroid Build Coastguard Worker@*  UWORD8 pointer to the destination
60*495ae853SAndroid Build Coastguard Worker@*
61*495ae853SAndroid Build Coastguard Worker@* @param[in] src_strd
62*495ae853SAndroid Build Coastguard Worker@*  integer source stride
63*495ae853SAndroid Build Coastguard Worker@*
64*495ae853SAndroid Build Coastguard Worker@* @param[in] dst_strd
65*495ae853SAndroid Build Coastguard Worker@*  integer destination stride
66*495ae853SAndroid Build Coastguard Worker@*
67*495ae853SAndroid Build Coastguard Worker@*
68*495ae853SAndroid Build Coastguard Worker@* @returns
69*495ae853SAndroid Build Coastguard Worker@*
70*495ae853SAndroid Build Coastguard Worker@* @remarks
71*495ae853SAndroid Build Coastguard Worker@*  None
72*495ae853SAndroid Build Coastguard Worker@*
73*495ae853SAndroid Build Coastguard Worker@*******************************************************************************
74*495ae853SAndroid Build Coastguard Worker@*/
75*495ae853SAndroid Build Coastguard Worker@void ih264e_sixtapfilter_horz(UWORD8 *pu1_src,
76*495ae853SAndroid Build Coastguard Worker@                                UWORD8 *pu1_dst,
77*495ae853SAndroid Build Coastguard Worker@                                WORD32 src_strd,
78*495ae853SAndroid Build Coastguard Worker@                                WORD32 dst_strd);
79*495ae853SAndroid Build Coastguard Worker
80*495ae853SAndroid Build Coastguard Worker
81*495ae853SAndroid Build Coastguard Worker.equ HALFPEL_WIDTH ,  17 + 1            @( make it even, two rows are processed at a time)
82*495ae853SAndroid Build Coastguard Worker
83*495ae853SAndroid Build Coastguard Worker
84*495ae853SAndroid Build Coastguard Worker    .global ih264e_sixtapfilter_horz_a9q
85*495ae853SAndroid Build Coastguard Workerih264e_sixtapfilter_horz_a9q:
86*495ae853SAndroid Build Coastguard Worker    stmfd         sp!, {lr}
87*495ae853SAndroid Build Coastguard Worker
88*495ae853SAndroid Build Coastguard Worker    vmov.i8       d0, #5
89*495ae853SAndroid Build Coastguard Worker    sub           r0, r0, #2
90*495ae853SAndroid Build Coastguard Worker
91*495ae853SAndroid Build Coastguard Worker    vmov.i8       d1, #20
92*495ae853SAndroid Build Coastguard Worker    mov           r14, #HALFPEL_WIDTH
93*495ae853SAndroid Build Coastguard Worker    vpush         {d8-d15}
94*495ae853SAndroid Build Coastguard Worker
95*495ae853SAndroid Build Coastguard Workerfilter_horz_loop:
96*495ae853SAndroid Build Coastguard Worker
97*495ae853SAndroid Build Coastguard Worker
98*495ae853SAndroid Build Coastguard Worker    vld1.8        {d2, d3, d4}, [r0], r2 @// Load row0
99*495ae853SAndroid Build Coastguard Worker    vld1.8        {d5, d6, d7}, [r0], r2 @// Load row1
100*495ae853SAndroid Build Coastguard Worker
101*495ae853SAndroid Build Coastguard Worker    @// Processing row0 and row1
102*495ae853SAndroid Build Coastguard Worker
103*495ae853SAndroid Build Coastguard Worker    vext.8        d31, d2, d3, #5       @//extract a[5]                         (column1,row0)
104*495ae853SAndroid Build Coastguard Worker    vext.8        d30, d3, d4, #5       @//extract a[5]                         (column2,row0)
105*495ae853SAndroid Build Coastguard Worker
106*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q4, d31, d2           @// a0 + a5                             (column1,row0)
107*495ae853SAndroid Build Coastguard Worker    vext.8        d29, d4, d4, #5       @//extract a[5]                         (column3,row0)
108*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q5, d30, d3           @// a0 + a5                             (column2,row0)
109*495ae853SAndroid Build Coastguard Worker    vext.8        d28, d5, d6, #5       @//extract a[5]                         (column1,row1)
110*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q6, d29, d4           @// a0 + a5                             (column3,row0)
111*495ae853SAndroid Build Coastguard Worker    vext.8        d27, d6, d7, #5       @//extract a[5]                         (column2,row1)
112*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q7, d28, d5           @// a0 + a5                             (column1,row1)
113*495ae853SAndroid Build Coastguard Worker    vext.8        d26, d7, d7, #5       @//extract a[5]                         (column3,row1)
114*495ae853SAndroid Build Coastguard Worker
115*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q8, d27, d6           @// a0 + a5                             (column2,row1)
116*495ae853SAndroid Build Coastguard Worker    vext.8        d31, d2, d3, #2       @//extract a[2]                         (column1,row0)
117*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q9, d26, d7           @// a0 + a5                             (column3,row1)
118*495ae853SAndroid Build Coastguard Worker    vext.8        d30, d3, d4, #2       @//extract a[2]                         (column2,row0)
119*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q4, d31, d1           @// a0 + a5 + 20a2                      (column1,row0)
120*495ae853SAndroid Build Coastguard Worker    vext.8        d29, d4, d4, #2       @//extract a[2]                         (column3,row0)
121*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q5, d30, d1           @// a0 + a5 + 20a2                      (column2,row0)
122*495ae853SAndroid Build Coastguard Worker    vext.8        d28, d5, d6, #2       @//extract a[2]                         (column1,row1)
123*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q6, d29, d1           @// a0 + a5 + 20a2                      (column3,row0)
124*495ae853SAndroid Build Coastguard Worker    vext.8        d27, d6, d7, #2       @//extract a[2]                         (column2,row1)
125*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q7, d28, d1           @// a0 + a5 + 20a2                      (column1,row1)
126*495ae853SAndroid Build Coastguard Worker    vext.8        d26, d7, d7, #2       @//extract a[2]                         (column3,row1)
127*495ae853SAndroid Build Coastguard Worker
128*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q8, d27, d1           @// a0 + a5 + 20a2                      (column2,row1)
129*495ae853SAndroid Build Coastguard Worker    vext.8        d31, d2, d3, #3       @//extract a[3]                         (column1,row0)
130*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q9, d26, d1           @// a0 + a5 + 20a2                      (column3,row1)
131*495ae853SAndroid Build Coastguard Worker    vext.8        d30, d3, d4, #3       @//extract a[3]                         (column2,row0)
132*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q4, d31, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row0)
133*495ae853SAndroid Build Coastguard Worker    vext.8        d29, d4, d4, #3       @//extract a[3]                         (column3,row0)
134*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q5, d30, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row0)
135*495ae853SAndroid Build Coastguard Worker    vext.8        d28, d5, d6, #3       @//extract a[3]                         (column1,row1)
136*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q6, d29, d1           @// a0 + a5 + 20a2 + 20a3               (column3,row0)
137*495ae853SAndroid Build Coastguard Worker    vext.8        d27, d6, d7, #3       @//extract a[3]                         (column2,row1)
138*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q7, d28, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row1)
139*495ae853SAndroid Build Coastguard Worker    vext.8        d26, d7, d7, #3       @//extract a[3]                         (column3,row1)
140*495ae853SAndroid Build Coastguard Worker
141*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q8, d27, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row1)
142*495ae853SAndroid Build Coastguard Worker    vext.8        d31, d2, d3, #1       @//extract a[1]                         (column1,row0)
143*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q9, d26, d1           @// a0 + a5 + 20a2 + 20a3               (column3,row1)
144*495ae853SAndroid Build Coastguard Worker    vext.8        d30, d3, d4, #1       @//extract a[1]                         (column2,row0)
145*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q4, d31, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
146*495ae853SAndroid Build Coastguard Worker    vext.8        d29, d4, d4, #1       @//extract a[1]                         (column3,row0)
147*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q5, d30, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
148*495ae853SAndroid Build Coastguard Worker    vext.8        d28, d5, d6, #1       @//extract a[1]                         (column1,row1)
149*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q6, d29, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
150*495ae853SAndroid Build Coastguard Worker    vext.8        d27, d6, d7, #1       @//extract a[1]                         (column2,row1)
151*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q7, d28, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row1)
152*495ae853SAndroid Build Coastguard Worker    vext.8        d26, d7, d7, #1       @//extract a[1]                         (column3,row1)
153*495ae853SAndroid Build Coastguard Worker
154*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q8, d27, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row1)
155*495ae853SAndroid Build Coastguard Worker    vext.8        d31, d2, d3, #4       @//extract a[4]                         (column1,row0)
156*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q9, d26, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row1)
157*495ae853SAndroid Build Coastguard Worker    vext.8        d30, d3, d4, #4       @//extract a[4]                         (column2,row0)
158*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q4, d31, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
159*495ae853SAndroid Build Coastguard Worker    vext.8        d29, d4, d4, #4       @//extract a[4]                         (column3,row0)
160*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q5, d30, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
161*495ae853SAndroid Build Coastguard Worker    vext.8        d28, d5, d6, #4       @//extract a[4]                         (column1,row1)
162*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q6, d29, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
163*495ae853SAndroid Build Coastguard Worker    vext.8        d27, d6, d7, #4       @//extract a[4]                         (column2,row1)
164*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q7, d28, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row1)
165*495ae853SAndroid Build Coastguard Worker    vext.8        d26, d7, d7, #4       @//extract a[4]                         (column3,row1)
166*495ae853SAndroid Build Coastguard Worker
167*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q8, d27, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row1)
168*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q9, d26, d0           @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row1)
169*495ae853SAndroid Build Coastguard Worker
170*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d20, q4, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
171*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d21, q5, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
172*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d22, q6, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
173*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d23, q7, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row1)
174*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d24, q8, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row1)
175*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d25, q9, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row1)
176*495ae853SAndroid Build Coastguard Worker
177*495ae853SAndroid Build Coastguard Worker    vst1.8        {d20, d21, d22}, [r1], r3 @//Store dest row0
178*495ae853SAndroid Build Coastguard Worker    vst1.8        {d23, d24, d25}, [r1], r3 @//Store dest row1
179*495ae853SAndroid Build Coastguard Worker
180*495ae853SAndroid Build Coastguard Worker    subs          r14, r14, #2          @   decrement counter
181*495ae853SAndroid Build Coastguard Worker
182*495ae853SAndroid Build Coastguard Worker    bne           filter_horz_loop
183*495ae853SAndroid Build Coastguard Worker
184*495ae853SAndroid Build Coastguard Worker    vpop          {d8-d15}
185*495ae853SAndroid Build Coastguard Worker    ldmfd         sp!, {pc}
186*495ae853SAndroid Build Coastguard Worker
187*495ae853SAndroid Build Coastguard Worker
188*495ae853SAndroid Build Coastguard Worker
189*495ae853SAndroid Build Coastguard Worker
190*495ae853SAndroid Build Coastguard Worker
191*495ae853SAndroid Build Coastguard Worker
192*495ae853SAndroid Build Coastguard Worker
193*495ae853SAndroid Build Coastguard Worker
194*495ae853SAndroid Build Coastguard Worker
195*495ae853SAndroid Build Coastguard Worker@/**
196*495ae853SAndroid Build Coastguard Worker@*******************************************************************************
197*495ae853SAndroid Build Coastguard Worker@*
198*495ae853SAndroid Build Coastguard Worker@* @brief
199*495ae853SAndroid Build Coastguard Worker@*   This function implements a two stage cascaded six tap filter. It
200*495ae853SAndroid Build Coastguard Worker@*    applies the six tap filter in the vertical direction on the
201*495ae853SAndroid Build Coastguard Worker@*    predictor values, followed by applying the same filter in the
202*495ae853SAndroid Build Coastguard Worker@*    horizontal direction on the output of the first stage. The six tap
203*495ae853SAndroid Build Coastguard Worker@*    filtering operation is described in sec 8.4.2.2.1 titled "Luma sample
204*495ae853SAndroid Build Coastguard Worker@*    interpolation process"
205*495ae853SAndroid Build Coastguard Worker@*    (Filter run for width = 17 and height =17)
206*495ae853SAndroid Build Coastguard Worker@* @par Description:
207*495ae853SAndroid Build Coastguard Worker@*    The function interpolates
208*495ae853SAndroid Build Coastguard Worker@*    the predictors first in the vertical direction and then in the
209*495ae853SAndroid Build Coastguard Worker@*    horizontal direction to output the (1/2,1/2). The output of the first
210*495ae853SAndroid Build Coastguard Worker@*    stage of the filter is stored in the buffer pointed to by pi16_pred1(only in C)
211*495ae853SAndroid Build Coastguard Worker@*    in 16 bit precision.
212*495ae853SAndroid Build Coastguard Worker@*
213*495ae853SAndroid Build Coastguard Worker@*
214*495ae853SAndroid Build Coastguard Worker@* @param[in] pu1_src
215*495ae853SAndroid Build Coastguard Worker@*  UWORD8 pointer to the source
216*495ae853SAndroid Build Coastguard Worker@*
217*495ae853SAndroid Build Coastguard Worker@* @param[out] pu1_dst1
218*495ae853SAndroid Build Coastguard Worker@*  UWORD8 pointer to the destination(vertical filtered output)
219*495ae853SAndroid Build Coastguard Worker@*
220*495ae853SAndroid Build Coastguard Worker@* @param[out] pu1_dst2
221*495ae853SAndroid Build Coastguard Worker@*  UWORD8 pointer to the destination(out put after applying horizontal filter to the intermediate vertical output)
222*495ae853SAndroid Build Coastguard Worker@*
223*495ae853SAndroid Build Coastguard Worker@* @param[in] src_strd
224*495ae853SAndroid Build Coastguard Worker@*  integer source stride
225*495ae853SAndroid Build Coastguard Worker@*
226*495ae853SAndroid Build Coastguard Worker@* @param[in] dst_strd
227*495ae853SAndroid Build Coastguard Worker@*  integer destination stride of pu1_dst
228*495ae853SAndroid Build Coastguard Worker@*
229*495ae853SAndroid Build Coastguard Worker@* @param[in]pi16_pred1
230*495ae853SAndroid Build Coastguard Worker@*  Pointer to 16bit intermediate buffer(used only in c)
231*495ae853SAndroid Build Coastguard Worker@*
232*495ae853SAndroid Build Coastguard Worker@* @param[in] pi16_pred1_strd
233*495ae853SAndroid Build Coastguard Worker@*  integer destination stride of pi16_pred1
234*495ae853SAndroid Build Coastguard Worker@*
235*495ae853SAndroid Build Coastguard Worker@*
236*495ae853SAndroid Build Coastguard Worker@* @returns
237*495ae853SAndroid Build Coastguard Worker@*
238*495ae853SAndroid Build Coastguard Worker@* @remarks
239*495ae853SAndroid Build Coastguard Worker@*  None
240*495ae853SAndroid Build Coastguard Worker@*
241*495ae853SAndroid Build Coastguard Worker@*******************************************************************************
242*495ae853SAndroid Build Coastguard Worker@*/
243*495ae853SAndroid Build Coastguard Worker@void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src,
244*495ae853SAndroid Build Coastguard Worker@                                UWORD8 *pu1_dst1,
245*495ae853SAndroid Build Coastguard Worker@                                UWORD8 *pu1_dst2,
246*495ae853SAndroid Build Coastguard Worker@                                WORD32 src_strd,
247*495ae853SAndroid Build Coastguard Worker@                                WORD32 dst_strd,
248*495ae853SAndroid Build Coastguard Worker@                                WORD32 *pi16_pred1,/* Pointer to 16bit intermmediate buffer (used only in c)*/
249*495ae853SAndroid Build Coastguard Worker@                                WORD32 pi16_pred1_strd)
250*495ae853SAndroid Build Coastguard Worker
251*495ae853SAndroid Build Coastguard Worker
252*495ae853SAndroid Build Coastguard Worker
253*495ae853SAndroid Build Coastguard Worker
254*495ae853SAndroid Build Coastguard Worker    .global ih264e_sixtap_filter_2dvh_vert_a9q
255*495ae853SAndroid Build Coastguard Worker
256*495ae853SAndroid Build Coastguard Workerih264e_sixtap_filter_2dvh_vert_a9q:
257*495ae853SAndroid Build Coastguard Worker    stmfd         sp!, {r10, r11, r12, lr}
258*495ae853SAndroid Build Coastguard Worker
259*495ae853SAndroid Build Coastguard Worker@//r0 - pu1_ref
260*495ae853SAndroid Build Coastguard Worker@//r3 - u4_ref_width
261*495ae853SAndroid Build Coastguard Worker    vpush         {d8-d15}
262*495ae853SAndroid Build Coastguard Worker    @// Load six rows for vertical interpolation
263*495ae853SAndroid Build Coastguard Worker    lsl           r12, r3, #1
264*495ae853SAndroid Build Coastguard Worker    sub           r0, r0, r12
265*495ae853SAndroid Build Coastguard Worker    sub           r0, r0, #2
266*495ae853SAndroid Build Coastguard Worker    vld1.8        {d2, d3, d4}, [r0], r3
267*495ae853SAndroid Build Coastguard Worker    vld1.8        {d5, d6, d7}, [r0], r3
268*495ae853SAndroid Build Coastguard Worker    vld1.8        {d8, d9, d10}, [r0], r3
269*495ae853SAndroid Build Coastguard Worker    mov           r12, #5
270*495ae853SAndroid Build Coastguard Worker    vld1.8        {d11, d12, d13}, [r0], r3
271*495ae853SAndroid Build Coastguard Worker    mov           r14, #20
272*495ae853SAndroid Build Coastguard Worker    vld1.8        {d14, d15, d16}, [r0], r3
273*495ae853SAndroid Build Coastguard Worker    vmov.16       d0[0], r12
274*495ae853SAndroid Build Coastguard Worker    vmov.16       d0[1], r14
275*495ae853SAndroid Build Coastguard Worker    vld1.8        {d17, d18, d19}, [r0], r3
276*495ae853SAndroid Build Coastguard Worker    vmov.i8       d1, #20
277*495ae853SAndroid Build Coastguard Worker
278*495ae853SAndroid Build Coastguard Worker@// r12 - u2_buff1_width
279*495ae853SAndroid Build Coastguard Worker@// r14 - u2_buff2_width
280*495ae853SAndroid Build Coastguard Worker    ldr           r12, [sp, #80]
281*495ae853SAndroid Build Coastguard Worker    add           r11, r1, #6
282*495ae853SAndroid Build Coastguard Worker
283*495ae853SAndroid Build Coastguard Worker    mov           r14, r12
284*495ae853SAndroid Build Coastguard Worker
285*495ae853SAndroid Build Coastguard Worker    mov           r10, #3               @loop counter
286*495ae853SAndroid Build Coastguard Worker
287*495ae853SAndroid Build Coastguard Worker
288*495ae853SAndroid Build Coastguard Workerfilter_2dvh_loop:
289*495ae853SAndroid Build Coastguard Worker
290*495ae853SAndroid Build Coastguard Worker    @// ////////////// ROW 1 ///////////////////////
291*495ae853SAndroid Build Coastguard Worker
292*495ae853SAndroid Build Coastguard Worker@// Process first vertical interpolated row
293*495ae853SAndroid Build Coastguard Worker@// each column is
294*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q10, d2, d17          @// a0 + a5                             (column1,row0)
295*495ae853SAndroid Build Coastguard Worker    vmov.i8       d31, #5
296*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d8, d1           @// a0 + a5 + 20a2                      (column1,row0)
297*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d11, d1          @// a0 + a5 + 20a2 + 20a3               (column1,row0)
298*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d5, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
299*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d14, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
300*495ae853SAndroid Build Coastguard Worker
301*495ae853SAndroid Build Coastguard Worker
302*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q11, d3, d18          @// a0 + a5                             (column2,row0)
303*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d9, d1           @// a0 + a5 + 20a2                      (column2,row0)
304*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d12, d1          @// a0 + a5 + 20a2 + 20a3               (column2,row0)
305*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d6, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
306*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d15, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
307*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
308*495ae853SAndroid Build Coastguard Worker
309*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q12, d4, d19          @// a0 + a5                             (column3,row0)
310*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
311*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d10, d1          @// a0 + a5 + 20a2                      (column3,row0)
312*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d13, d1          @// a0 + a5 + 20a2 + 20a3               (column3,row0)
313*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d7, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
314*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d16, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
315*495ae853SAndroid Build Coastguard Worker
316*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d2, q10, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
317*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
318*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d3, q11, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
319*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
320*495ae853SAndroid Build Coastguard Worker
321*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
322*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
323*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
324*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
325*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
326*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
327*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
328*495ae853SAndroid Build Coastguard Worker
329*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d4, q12, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
330*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
331*495ae853SAndroid Build Coastguard Worker
332*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
333*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
334*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
335*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
336*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
337*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
338*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
339*495ae853SAndroid Build Coastguard Worker
340*495ae853SAndroid Build Coastguard Worker    vext.8        d2, d2, d3, #2
341*495ae853SAndroid Build Coastguard Worker    vst1.8        {d3, d4}, [r11], r12  @// store row1 - 1,1/2 grid
342*495ae853SAndroid Build Coastguard Worker    vst1.8        {d2}, [r1], r12       @// store row1 - 1,1/2 grid
343*495ae853SAndroid Build Coastguard Worker
344*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
345*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
346*495ae853SAndroid Build Coastguard Worker
347*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q1, d31, d22          @// a0 + a5                             (set3)
348*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
349*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q1, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
350*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q1, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
351*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q1, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
352*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q1, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
353*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
354*495ae853SAndroid Build Coastguard Worker
355*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
356*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
357*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
358*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
359*495ae853SAndroid Build Coastguard Worker
360*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
361*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
362*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
363*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
364*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
365*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
366*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
367*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
368*495ae853SAndroid Build Coastguard Worker
369*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
370*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
371*495ae853SAndroid Build Coastguard Worker
372*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
373*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q1, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
374*495ae853SAndroid Build Coastguard Worker
375*495ae853SAndroid Build Coastguard Worker    vld1.8        {d2, d3, d4}, [r0], r3 @// Load next Row data
376*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
377*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
378*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
379*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
380*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
381*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
382*495ae853SAndroid Build Coastguard Worker
383*495ae853SAndroid Build Coastguard Worker
384*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
385*495ae853SAndroid Build Coastguard Worker    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
386*495ae853SAndroid Build Coastguard Worker
387*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
388*495ae853SAndroid Build Coastguard Worker
389*495ae853SAndroid Build Coastguard Worker    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
390*495ae853SAndroid Build Coastguard Worker    @// ////////////// ROW 2 ///////////////////////
391*495ae853SAndroid Build Coastguard Worker
392*495ae853SAndroid Build Coastguard Worker@// Process first vertical interpolated row
393*495ae853SAndroid Build Coastguard Worker@// each column is
394*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q10, d5, d2           @// a0 + a5                             (column1,row0)
395*495ae853SAndroid Build Coastguard Worker    vmov.i8       d31, #5
396*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d11, d1          @// a0 + a5 + 20a2                      (column1,row0)
397*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d14, d1          @// a0 + a5 + 20a2 + 20a3               (column1,row0)
398*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d8, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
399*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d17, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
400*495ae853SAndroid Build Coastguard Worker
401*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
402*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
403*495ae853SAndroid Build Coastguard Worker
404*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q11, d6, d3           @// a0 + a5                             (column2,row0)
405*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d12, d1          @// a0 + a5 + 20a2                      (column2,row0)
406*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d15, d1          @// a0 + a5 + 20a2 + 20a3               (column2,row0)
407*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d9, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
408*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d18, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
409*495ae853SAndroid Build Coastguard Worker
410*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
411*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
412*495ae853SAndroid Build Coastguard Worker
413*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q12, d7, d4           @// a0 + a5                             (column3,row0)
414*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
415*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d13, d1          @// a0 + a5 + 20a2                      (column3,row0)
416*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d16, d1          @// a0 + a5 + 20a2 + 20a3               (column3,row0)
417*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d10, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
418*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d19, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
419*495ae853SAndroid Build Coastguard Worker    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
420*495ae853SAndroid Build Coastguard Worker
421*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d5, q10, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
422*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
423*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d6, q11, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
424*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
425*495ae853SAndroid Build Coastguard Worker
426*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
427*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
428*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
429*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
430*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
431*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
432*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
433*495ae853SAndroid Build Coastguard Worker
434*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d7, q12, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
435*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
436*495ae853SAndroid Build Coastguard Worker
437*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
438*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
439*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
440*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
441*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
442*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
443*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
444*495ae853SAndroid Build Coastguard Worker
445*495ae853SAndroid Build Coastguard Worker    vext.8        d5, d5, d6, #2
446*495ae853SAndroid Build Coastguard Worker    vst1.8        {d6, d7}, [r11], r12  @// store row1 - 1,1/2 grid
447*495ae853SAndroid Build Coastguard Worker    vst1.8        {d5}, [r1], r12       @// store row1 - 1,1/2 grid
448*495ae853SAndroid Build Coastguard Worker
449*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
450*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
451*495ae853SAndroid Build Coastguard Worker
452*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q3, d31, d22          @// a0 + a5                             (set3)
453*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
454*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q3, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
455*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q3, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
456*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q3, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
457*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q3, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
458*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
459*495ae853SAndroid Build Coastguard Worker
460*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
461*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
462*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
463*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
464*495ae853SAndroid Build Coastguard Worker
465*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
466*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
467*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
468*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
469*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
470*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
471*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
472*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
473*495ae853SAndroid Build Coastguard Worker
474*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
475*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
476*495ae853SAndroid Build Coastguard Worker
477*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
478*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q3, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
479*495ae853SAndroid Build Coastguard Worker
480*495ae853SAndroid Build Coastguard Worker    vld1.8        {d5, d6, d7}, [r0], r3 @// Load next Row data
481*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
482*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
483*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
484*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
485*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
486*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
487*495ae853SAndroid Build Coastguard Worker
488*495ae853SAndroid Build Coastguard Worker
489*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
490*495ae853SAndroid Build Coastguard Worker    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
491*495ae853SAndroid Build Coastguard Worker
492*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
493*495ae853SAndroid Build Coastguard Worker
494*495ae853SAndroid Build Coastguard Worker    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
495*495ae853SAndroid Build Coastguard Worker    @// ////////////// ROW 3 ///////////////////////
496*495ae853SAndroid Build Coastguard Worker
497*495ae853SAndroid Build Coastguard Worker@// Process first vertical interpolated row
498*495ae853SAndroid Build Coastguard Worker@// each column is
499*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q10, d8, d5           @// a0 + a5                             (column1,row0)
500*495ae853SAndroid Build Coastguard Worker    vmov.i8       d31, #5
501*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d14, d1          @// a0 + a5 + 20a2                      (column1,row0)
502*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d17, d1          @// a0 + a5 + 20a2 + 20a3               (column1,row0)
503*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d11, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
504*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d2, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
505*495ae853SAndroid Build Coastguard Worker
506*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
507*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
508*495ae853SAndroid Build Coastguard Worker
509*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q11, d9, d6           @// a0 + a5                             (column2,row0)
510*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d15, d1          @// a0 + a5 + 20a2                      (column2,row0)
511*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d18, d1          @// a0 + a5 + 20a2 + 20a3               (column2,row0)
512*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d12, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
513*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d3, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
514*495ae853SAndroid Build Coastguard Worker
515*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
516*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
517*495ae853SAndroid Build Coastguard Worker
518*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q12, d10, d7          @// a0 + a5                             (column3,row0)
519*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
520*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d16, d1          @// a0 + a5 + 20a2                      (column3,row0)
521*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d19, d1          @// a0 + a5 + 20a2 + 20a3               (column3,row0)
522*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d13, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
523*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d4, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
524*495ae853SAndroid Build Coastguard Worker
525*495ae853SAndroid Build Coastguard Worker    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
526*495ae853SAndroid Build Coastguard Worker
527*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d8, q10, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
528*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
529*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d9, q11, #5           @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
530*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
531*495ae853SAndroid Build Coastguard Worker
532*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
533*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
534*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
535*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
536*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
537*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
538*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
539*495ae853SAndroid Build Coastguard Worker
540*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d10, q12, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
541*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
542*495ae853SAndroid Build Coastguard Worker
543*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
544*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
545*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
546*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
547*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
548*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
549*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
550*495ae853SAndroid Build Coastguard Worker
551*495ae853SAndroid Build Coastguard Worker    vext.8        d8, d8, d9, #2
552*495ae853SAndroid Build Coastguard Worker    vst1.8        {d9, d10}, [r11], r12 @// store row1 - 1,1/2 grid
553*495ae853SAndroid Build Coastguard Worker    vst1.8        {d8}, [r1], r12       @// store row1 - 1,1/2 grid
554*495ae853SAndroid Build Coastguard Worker
555*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
556*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
557*495ae853SAndroid Build Coastguard Worker
558*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q4, d31, d22          @// a0 + a5                             (set3)
559*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
560*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q4, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
561*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q4, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
562*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q4, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
563*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q4, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
564*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
565*495ae853SAndroid Build Coastguard Worker
566*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
567*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
568*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
569*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
570*495ae853SAndroid Build Coastguard Worker
571*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
572*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
573*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
574*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
575*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
576*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
577*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
578*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
579*495ae853SAndroid Build Coastguard Worker
580*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
581*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
582*495ae853SAndroid Build Coastguard Worker
583*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
584*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q4, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
585*495ae853SAndroid Build Coastguard Worker
586*495ae853SAndroid Build Coastguard Worker    vld1.8        {d8, d9, d10}, [r0], r3 @// Load next Row data
587*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
588*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
589*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
590*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
591*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
592*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
593*495ae853SAndroid Build Coastguard Worker
594*495ae853SAndroid Build Coastguard Worker
595*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
596*495ae853SAndroid Build Coastguard Worker    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
597*495ae853SAndroid Build Coastguard Worker
598*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
599*495ae853SAndroid Build Coastguard Worker
600*495ae853SAndroid Build Coastguard Worker    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
601*495ae853SAndroid Build Coastguard Worker    @// ////////////// ROW 4 ///////////////////////
602*495ae853SAndroid Build Coastguard Worker
603*495ae853SAndroid Build Coastguard Worker@// Process first vertical interpolated row
604*495ae853SAndroid Build Coastguard Worker@// each column is
605*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q10, d11, d8          @// a0 + a5                             (column1,row0)
606*495ae853SAndroid Build Coastguard Worker    vmov.i8       d31, #5
607*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d17, d1          @// a0 + a5 + 20a2                      (column1,row0)
608*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d2, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row0)
609*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d14, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
610*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d5, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
611*495ae853SAndroid Build Coastguard Worker
612*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
613*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
614*495ae853SAndroid Build Coastguard Worker
615*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q11, d12, d9          @// a0 + a5                             (column2,row0)
616*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d18, d1          @// a0 + a5 + 20a2                      (column2,row0)
617*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d3, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row0)
618*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d15, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
619*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d6, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
620*495ae853SAndroid Build Coastguard Worker
621*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
622*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
623*495ae853SAndroid Build Coastguard Worker
624*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q12, d13, d10         @// a0 + a5                             (column3,row0)
625*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
626*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d19, d1          @// a0 + a5 + 20a2                      (column3,row0)
627*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d4, d1           @// a0 + a5 + 20a2 + 20a3               (column3,row0)
628*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d16, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
629*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d7, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
630*495ae853SAndroid Build Coastguard Worker
631*495ae853SAndroid Build Coastguard Worker    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
632*495ae853SAndroid Build Coastguard Worker
633*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d11, q10, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
634*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
635*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d12, q11, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
636*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
637*495ae853SAndroid Build Coastguard Worker
638*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
639*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
640*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
641*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
642*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
643*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
644*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
645*495ae853SAndroid Build Coastguard Worker
646*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d13, q12, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
647*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
648*495ae853SAndroid Build Coastguard Worker
649*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
650*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
651*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
652*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
653*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
654*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
655*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
656*495ae853SAndroid Build Coastguard Worker
657*495ae853SAndroid Build Coastguard Worker    vext.8        d11, d11, d12, #2
658*495ae853SAndroid Build Coastguard Worker    vst1.8        {d12, d13}, [r11], r12 @// store row1 - 1,1/2 grid
659*495ae853SAndroid Build Coastguard Worker    vst1.8        {d11}, [r1], r12      @// store row1 - 1,1/2 grid
660*495ae853SAndroid Build Coastguard Worker
661*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
662*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
663*495ae853SAndroid Build Coastguard Worker
664*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q6, d31, d22          @// a0 + a5                             (set3)
665*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
666*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q6, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
667*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q6, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
668*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q6, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
669*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q6, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
670*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
671*495ae853SAndroid Build Coastguard Worker
672*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
673*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
674*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
675*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
676*495ae853SAndroid Build Coastguard Worker
677*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
678*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
679*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
680*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
681*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
682*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
683*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
684*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
685*495ae853SAndroid Build Coastguard Worker
686*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
687*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
688*495ae853SAndroid Build Coastguard Worker
689*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
690*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q6, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
691*495ae853SAndroid Build Coastguard Worker
692*495ae853SAndroid Build Coastguard Worker    vld1.8        {d11, d12, d13}, [r0], r3 @// Load next Row data
693*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
694*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
695*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
696*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
697*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
698*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
699*495ae853SAndroid Build Coastguard Worker
700*495ae853SAndroid Build Coastguard Worker
701*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
702*495ae853SAndroid Build Coastguard Worker    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
703*495ae853SAndroid Build Coastguard Worker
704*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
705*495ae853SAndroid Build Coastguard Worker
706*495ae853SAndroid Build Coastguard Worker    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
707*495ae853SAndroid Build Coastguard Worker    @// ////////////// ROW 5 ///////////////////////
708*495ae853SAndroid Build Coastguard Worker
709*495ae853SAndroid Build Coastguard Worker@// Process first vertical interpolated row
710*495ae853SAndroid Build Coastguard Worker@// each column is
711*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q10, d14, d11         @// a0 + a5                             (column1,row0)
712*495ae853SAndroid Build Coastguard Worker    vmov.i8       d31, #5
713*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d2, d1           @// a0 + a5 + 20a2                      (column1,row0)
714*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d5, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row0)
715*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d17, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
716*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d8, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
717*495ae853SAndroid Build Coastguard Worker
718*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
719*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
720*495ae853SAndroid Build Coastguard Worker
721*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q11, d15, d12         @// a0 + a5                             (column2,row0)
722*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d3, d1           @// a0 + a5 + 20a2                      (column2,row0)
723*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d6, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row0)
724*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d18, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
725*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d9, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
726*495ae853SAndroid Build Coastguard Worker
727*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
728*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
729*495ae853SAndroid Build Coastguard Worker
730*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q12, d16, d13         @// a0 + a5                             (column3,row0)
731*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
732*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d4, d1           @// a0 + a5 + 20a2                      (column3,row0)
733*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d7, d1           @// a0 + a5 + 20a2 + 20a3               (column3,row0)
734*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d19, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
735*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d10, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
736*495ae853SAndroid Build Coastguard Worker
737*495ae853SAndroid Build Coastguard Worker    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
738*495ae853SAndroid Build Coastguard Worker
739*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d14, q10, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
740*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
741*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d15, q11, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
742*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
743*495ae853SAndroid Build Coastguard Worker
744*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
745*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
746*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
747*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
748*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
749*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
750*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
751*495ae853SAndroid Build Coastguard Worker
752*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d16, q12, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
753*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
754*495ae853SAndroid Build Coastguard Worker
755*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
756*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
757*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
758*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
759*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
760*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
761*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
762*495ae853SAndroid Build Coastguard Worker
763*495ae853SAndroid Build Coastguard Worker    vext.8        d14, d14, d15, #2
764*495ae853SAndroid Build Coastguard Worker    vst1.8        {d15, d16}, [r11], r12 @// store row1 - 1,1/2 grid
765*495ae853SAndroid Build Coastguard Worker    vst1.8        {d14}, [r1], r12      @// store row1 - 1,1/2 grid
766*495ae853SAndroid Build Coastguard Worker
767*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
768*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
769*495ae853SAndroid Build Coastguard Worker
770*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q7, d31, d22          @// a0 + a5                             (set3)
771*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
772*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q7, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
773*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q7, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
774*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q7, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
775*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q7, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
776*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
777*495ae853SAndroid Build Coastguard Worker
778*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
779*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
780*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
781*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
782*495ae853SAndroid Build Coastguard Worker
783*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
784*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
785*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
786*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
787*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
788*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
789*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
790*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
791*495ae853SAndroid Build Coastguard Worker
792*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
793*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
794*495ae853SAndroid Build Coastguard Worker
795*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
796*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q7, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
797*495ae853SAndroid Build Coastguard Worker
798*495ae853SAndroid Build Coastguard Worker    vld1.8        {d14, d15, d16}, [r0], r3 @// Load next Row data
799*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
800*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
801*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
802*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
803*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
804*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
805*495ae853SAndroid Build Coastguard Worker
806*495ae853SAndroid Build Coastguard Worker
807*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D27,Q14,#2          ;// half,half gird set3,4
808*495ae853SAndroid Build Coastguard Worker    @//VSHRN.s32        D28,Q11,#8          ;// shift by 8 and later we will shift by 2 more with rounding  (set5)
809*495ae853SAndroid Build Coastguard Worker
810*495ae853SAndroid Build Coastguard Worker    @//VQRSHRUN.s16 D28,Q14,#2          ;// half,half gird set5
811*495ae853SAndroid Build Coastguard Worker
812*495ae853SAndroid Build Coastguard Worker    @//VST1.8       {D26,D27,D28},[r2],r14  ;// store 1/2,1,2 grif values
813*495ae853SAndroid Build Coastguard Worker    @// ////////////// ROW 6 ///////////////////////
814*495ae853SAndroid Build Coastguard Worker
815*495ae853SAndroid Build Coastguard Worker@// Process first vertical interpolated row
816*495ae853SAndroid Build Coastguard Worker@// each column is
817*495ae853SAndroid Build Coastguard Worker
818*495ae853SAndroid Build Coastguard Worker    cmp           r10, #1               @// if it 17 rows are complete skip
819*495ae853SAndroid Build Coastguard Worker    beq           filter_2dvh_skip_row
820*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q10, d17, d14         @// a0 + a5                             (column1,row0)
821*495ae853SAndroid Build Coastguard Worker    vmov.i8       d31, #5
822*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d5, d1           @// a0 + a5 + 20a2                      (column1,row0)
823*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q10, d8, d1           @// a0 + a5 + 20a2 + 20a3               (column1,row0)
824*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d2, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column1,row0)
825*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q10, d11, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column1,row0)
826*495ae853SAndroid Build Coastguard Worker
827*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
828*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
829*495ae853SAndroid Build Coastguard Worker
830*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q11, d18, d15         @// a0 + a5                             (column2,row0)
831*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d6, d1           @// a0 + a5 + 20a2                      (column2,row0)
832*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q11, d9, d1           @// a0 + a5 + 20a2 + 20a3               (column2,row0)
833*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d3, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column2,row0)
834*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q11, d12, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column2,row0)
835*495ae853SAndroid Build Coastguard Worker
836*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
837*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d20, d21, #2     @//extract a[2]                         (set1)
838*495ae853SAndroid Build Coastguard Worker
839*495ae853SAndroid Build Coastguard Worker    vaddl.u8      q12, d19, d16         @// a0 + a5                             (column3,row0)
840*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d20, d21, #3     @//extract a[3]                         (set1)
841*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d7, d1           @// a0 + a5 + 20a2                      (column3,row0)
842*495ae853SAndroid Build Coastguard Worker    vmlal.u8      q12, d10, d1          @// a0 + a5 + 20a2 + 20a3               (column3,row0)
843*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d4, d31          @// a0 + a5 + 20a2 + 20a3 - 5a1         (column3,row0)
844*495ae853SAndroid Build Coastguard Worker    vmlsl.u8      q12, d13, d31         @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (column3,row0)
845*495ae853SAndroid Build Coastguard Worker
846*495ae853SAndroid Build Coastguard Worker    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
847*495ae853SAndroid Build Coastguard Worker
848*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d17, q10, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column1,row0)
849*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d21, d22, #1     @//extract a[5]                         (set1)
850*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d18, q11, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column2,row0)
851*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d20, d21, #1     @//extract a[1]                         (set1)
852*495ae853SAndroid Build Coastguard Worker
853*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d20         @// a0 + a5                             (set1)
854*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d22, d23, #1     @//extract a[5]                         (set2)
855*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set1)
856*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set1)
857*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set1)
858*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d21, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set1)
859*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d21, d22, #2     @//extract a[2]                         (set2)
860*495ae853SAndroid Build Coastguard Worker
861*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d19, q12, #5          @// (a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4 + 16) >> 5   (column3,row0)
862*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d21, d22, #3     @//extract a[3]                         (set2)
863*495ae853SAndroid Build Coastguard Worker
864*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d21, d22, #1     @//extract a[1]                         (set2)
865*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q10, d31, d21         @// a0 + a5                             (set2)
866*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d30, d0[1]       @// a0 + a5 + 20a2                      (set2)
867*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q10, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set2)
868*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set2)
869*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q10, d22, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set2)
870*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d23, d24, #1     @//extract a[5]                         (set3)
871*495ae853SAndroid Build Coastguard Worker
872*495ae853SAndroid Build Coastguard Worker    vext.8        d17, d17, d18, #2
873*495ae853SAndroid Build Coastguard Worker    vst1.8        {d18, d19}, [r11], r12 @// store row1 - 1,1/2 grid
874*495ae853SAndroid Build Coastguard Worker    vst1.8        {d17}, [r1], r12      @// store row1 - 1,1/2 grid
875*495ae853SAndroid Build Coastguard Worker
876*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d22, d23, #2     @//extract a[2]                         (set3)
877*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d22, d23, #3     @//extract a[3]                         (set3)
878*495ae853SAndroid Build Coastguard Worker
879*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q9, d31, d22          @// a0 + a5                             (set3)
880*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d22, d23, #1     @//extract a[1]                         (set3)
881*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q9, d30, d0[1]        @// a0 + a5 + 20a2                      (set3)
882*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q9, d29, d0[1]        @// a0 + a5 + 20a2 + 20a3               (set3)
883*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q9, d28, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1         (set3)
884*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q9, d23, d0[0]        @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set3)
885*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[5]                         (set4)
886*495ae853SAndroid Build Coastguard Worker
887*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d21, q10, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set2)
888*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d23, d24, #2     @//extract a[2]                         (set4)
889*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d20, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set1)
890*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d23, d24, #3     @//extract a[3]                         (set4)
891*495ae853SAndroid Build Coastguard Worker
892*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q13, d31, d23         @// a0 + a5                             (set4)
893*495ae853SAndroid Build Coastguard Worker    vext.16       d28, d23, d24, #1     @//extract a[1]                         (set4)
894*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d25, d25, #1     @//extract a[5]                         (set5) ;//here only first element in the row is valid
895*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d30, d0[1]       @// a0 + a5 + 20a2                      (set4)
896*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q13, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set4)
897*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d28, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set4)
898*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q13, d24, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set4)
899*495ae853SAndroid Build Coastguard Worker    vext.16       d30, d24, d25, #2     @//extract a[2]                         (set5)
900*495ae853SAndroid Build Coastguard Worker
901*495ae853SAndroid Build Coastguard Worker    vaddl.s16     q11, d31, d24         @// a0 + a5                             (set5)
902*495ae853SAndroid Build Coastguard Worker    vext.16       d29, d24, d25, #3     @//extract a[3]                         (set5)
903*495ae853SAndroid Build Coastguard Worker
904*495ae853SAndroid Build Coastguard Worker    vext.16       d31, d24, d25, #1     @//extract a[1]                         (set5)
905*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q9, #8           @// shift by 8 and later we will shift by 2 more with rounding  (set3)
906*495ae853SAndroid Build Coastguard Worker
907*495ae853SAndroid Build Coastguard Worker    vld1.8        {d17, d18, d19}, [r0], r3 @// Load next Row data
908*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d30, d0[1]       @// a0 + a5 + 20a2                      (set5)
909*495ae853SAndroid Build Coastguard Worker    vmlal.s16     q11, d29, d0[1]       @// a0 + a5 + 20a2 + 20a3               (set5)
910*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d31, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1         (set5)
911*495ae853SAndroid Build Coastguard Worker    vmlsl.s16     q11, d25, d0[0]       @// a0 + a5 + 20a2 + 20a3 - 5a1 - 5a4   (set5)
912*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d29, q13, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set4)
913*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d26, q10, #2          @// half,half gird set1,2
914*495ae853SAndroid Build Coastguard Worker
915*495ae853SAndroid Build Coastguard Worker
916*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
917*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
918*495ae853SAndroid Build Coastguard Worker
919*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
920*495ae853SAndroid Build Coastguard Worker
921*495ae853SAndroid Build Coastguard Worker    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
922*495ae853SAndroid Build Coastguard Worker
923*495ae853SAndroid Build Coastguard Worker    subs          r10, r10, #1          @//decrement loop counter
924*495ae853SAndroid Build Coastguard Worker
925*495ae853SAndroid Build Coastguard Worker    bne           filter_2dvh_loop
926*495ae853SAndroid Build Coastguard Worker
927*495ae853SAndroid Build Coastguard Worker
928*495ae853SAndroid Build Coastguard Worker@// Process first vertical interpolated row
929*495ae853SAndroid Build Coastguard Worker@// each column is
930*495ae853SAndroid Build Coastguard Worker    @// ////////////// ROW 13 ///////////////////////
931*495ae853SAndroid Build Coastguard Worker
932*495ae853SAndroid Build Coastguard Worker@// Process first vertical interpolated row
933*495ae853SAndroid Build Coastguard Worker@// each column is
934*495ae853SAndroid Build Coastguard Worker    vpop          {d8-d15}
935*495ae853SAndroid Build Coastguard Worker    ldmfd         sp!, {r10, r11, r12, pc}
936*495ae853SAndroid Build Coastguard Worker
937*495ae853SAndroid Build Coastguard Workerfilter_2dvh_skip_row:
938*495ae853SAndroid Build Coastguard Worker
939*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d27, q14, #2          @// half,half gird set3,4
940*495ae853SAndroid Build Coastguard Worker    vshrn.s32     d28, q11, #8          @// shift by 8 and later we will shift by 2 more with rounding  (set5)
941*495ae853SAndroid Build Coastguard Worker
942*495ae853SAndroid Build Coastguard Worker    vqrshrun.s16  d28, q14, #2          @// half,half gird set5
943*495ae853SAndroid Build Coastguard Worker
944*495ae853SAndroid Build Coastguard Worker    vst1.8        {d26, d27, d28}, [r2], r14 @// store 1/2,1,2 grif values
945*495ae853SAndroid Build Coastguard Worker    vpop          {d8-d15}
946*495ae853SAndroid Build Coastguard Worker    ldmfd         sp!, {r10, r11, r12, pc}
947*495ae853SAndroid Build Coastguard Worker
948*495ae853SAndroid Build Coastguard Worker
949*495ae853SAndroid Build Coastguard Worker
950*495ae853SAndroid Build Coastguard Worker
951