xref: /aosp_15_r20/external/libhevc/common/arm/ihevc_padding.s (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar@/*****************************************************************************
2*c83a76b0SSuyog Pawar@*
3*c83a76b0SSuyog Pawar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar@*
5*c83a76b0SSuyog Pawar@* Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar@* you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar@* You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar@*
9*c83a76b0SSuyog Pawar@* http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar@*
11*c83a76b0SSuyog Pawar@* Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar@* distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar@* See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar@* limitations under the License.
16*c83a76b0SSuyog Pawar@*
17*c83a76b0SSuyog Pawar@*****************************************************************************/
18*c83a76b0SSuyog Pawar@/**
19*c83a76b0SSuyog Pawar@ *******************************************************************************
20*c83a76b0SSuyog Pawar@ * @file
21*c83a76b0SSuyog Pawar@ *  ihevc_padding_neon.s
22*c83a76b0SSuyog Pawar@ *
23*c83a76b0SSuyog Pawar@ * @brief
24*c83a76b0SSuyog Pawar@ *  contains function definitions padding
25*c83a76b0SSuyog Pawar@ *
26*c83a76b0SSuyog Pawar@ * @author
27*c83a76b0SSuyog Pawar@ *  naveen sr
28*c83a76b0SSuyog Pawar@ *
29*c83a76b0SSuyog Pawar@ * @par list of functions:
30*c83a76b0SSuyog Pawar@ *  - ihevc_pad_left_luma()
31*c83a76b0SSuyog Pawar@ *  - ihevc_pad_left_chroma()
32*c83a76b0SSuyog Pawar@ *
33*c83a76b0SSuyog Pawar@ * @remarks
34*c83a76b0SSuyog Pawar@ *  none
35*c83a76b0SSuyog Pawar@ *
36*c83a76b0SSuyog Pawar@ *******************************************************************************
37*c83a76b0SSuyog Pawar@*/
38*c83a76b0SSuyog Pawar
39*c83a76b0SSuyog Pawar@/**
40*c83a76b0SSuyog Pawar@*******************************************************************************
41*c83a76b0SSuyog Pawar@*
42*c83a76b0SSuyog Pawar@* @brief
43*c83a76b0SSuyog Pawar@*   padding (luma block) at the left of a 2d array
44*c83a76b0SSuyog Pawar@*
45*c83a76b0SSuyog Pawar@* @par description:
46*c83a76b0SSuyog Pawar@*   the left column of a 2d array is replicated for pad_size times at the left
47*c83a76b0SSuyog Pawar@*
48*c83a76b0SSuyog Pawar@*
49*c83a76b0SSuyog Pawar@* @param[in] pu1_src
50*c83a76b0SSuyog Pawar@*  uword8 pointer to the source
51*c83a76b0SSuyog Pawar@*
52*c83a76b0SSuyog Pawar@* @param[in] src_strd
53*c83a76b0SSuyog Pawar@*  integer source stride
54*c83a76b0SSuyog Pawar@*
55*c83a76b0SSuyog Pawar@* @param[in] ht
56*c83a76b0SSuyog Pawar@*  integer height of the array
57*c83a76b0SSuyog Pawar@*
58*c83a76b0SSuyog Pawar@* @param[in] wd
59*c83a76b0SSuyog Pawar@*  integer width of the array
60*c83a76b0SSuyog Pawar@*
61*c83a76b0SSuyog Pawar@* @param[in] pad_size
62*c83a76b0SSuyog Pawar@*  integer -padding size of the array
63*c83a76b0SSuyog Pawar@*
64*c83a76b0SSuyog Pawar@* @param[in] ht
65*c83a76b0SSuyog Pawar@*  integer height of the array
66*c83a76b0SSuyog Pawar@*
67*c83a76b0SSuyog Pawar@* @param[in] wd
68*c83a76b0SSuyog Pawar@*  integer width of the array
69*c83a76b0SSuyog Pawar@*
70*c83a76b0SSuyog Pawar@* @returns
71*c83a76b0SSuyog Pawar@*
72*c83a76b0SSuyog Pawar@* @remarks
73*c83a76b0SSuyog Pawar@*  none
74*c83a76b0SSuyog Pawar@*
75*c83a76b0SSuyog Pawar@*******************************************************************************
76*c83a76b0SSuyog Pawar@*/
77*c83a76b0SSuyog Pawar@.if pad_left_luma == c
78*c83a76b0SSuyog Pawar@void ihevc_pad_left_luma(uword8 *pu1_src,
79*c83a76b0SSuyog Pawar@                        word32 src_strd,
80*c83a76b0SSuyog Pawar@                        word32 ht,
81*c83a76b0SSuyog Pawar@                        word32 pad_size)
82*c83a76b0SSuyog Pawar@**************variables vs registers*************************
83*c83a76b0SSuyog Pawar@   r0 => *pu1_src
84*c83a76b0SSuyog Pawar@   r1 => src_strd
85*c83a76b0SSuyog Pawar@   r2 => ht
86*c83a76b0SSuyog Pawar@   r3 => pad_size
87*c83a76b0SSuyog Pawar
88*c83a76b0SSuyog Pawar.text
89*c83a76b0SSuyog Pawar.align 4
90*c83a76b0SSuyog Pawar
91*c83a76b0SSuyog Pawar
92*c83a76b0SSuyog Pawar
93*c83a76b0SSuyog Pawar
94*c83a76b0SSuyog Pawar.globl ihevc_pad_left_luma_a9q
95*c83a76b0SSuyog Pawar
96*c83a76b0SSuyog Pawar.type ihevc_pad_left_luma_a9q, %function
97*c83a76b0SSuyog Pawar
98*c83a76b0SSuyog Pawarihevc_pad_left_luma_a9q:
99*c83a76b0SSuyog Pawar
100*c83a76b0SSuyog Pawar    stmfd       sp!, {r4-r11,lr}            @stack stores the values of the arguments
101*c83a76b0SSuyog Pawar
102*c83a76b0SSuyog Pawarloop_start_luma_left:
103*c83a76b0SSuyog Pawar    @ pad size is assumed to be pad_left = 80
104*c83a76b0SSuyog Pawar    sub         r4,r0,r3
105*c83a76b0SSuyog Pawar
106*c83a76b0SSuyog Pawar    ldrb        r8,[r0]
107*c83a76b0SSuyog Pawar    add         r0,r1
108*c83a76b0SSuyog Pawar    ldrb        r9,[r0]
109*c83a76b0SSuyog Pawar    add         r0,r1
110*c83a76b0SSuyog Pawar    ldrb        r10,[r0]
111*c83a76b0SSuyog Pawar    add         r0,r1
112*c83a76b0SSuyog Pawar    ldrb        r11,[r0]
113*c83a76b0SSuyog Pawar    add         r0,r1
114*c83a76b0SSuyog Pawar
115*c83a76b0SSuyog Pawar    vdup.u8     q0,r8
116*c83a76b0SSuyog Pawar    vdup.u8     q1,r9
117*c83a76b0SSuyog Pawar    vdup.u8     q2,r10
118*c83a76b0SSuyog Pawar    vdup.u8     q3,r11
119*c83a76b0SSuyog Pawar
120*c83a76b0SSuyog Pawar    add         r5,r4,r1
121*c83a76b0SSuyog Pawar
122*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @128/8 = 16 bytes store
123*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
124*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
125*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
126*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]                @ 16 bytes store
127*c83a76b0SSuyog Pawar
128*c83a76b0SSuyog Pawar    add         r6,r5,r1
129*c83a76b0SSuyog Pawar
130*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
131*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
132*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
133*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
134*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]                @128/8 = 16 bytes store
135*c83a76b0SSuyog Pawar
136*c83a76b0SSuyog Pawar    add         r7,r6,r1
137*c83a76b0SSuyog Pawar
138*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
139*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
140*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
141*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
142*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]                @128/8 = 16 bytes store
143*c83a76b0SSuyog Pawar
144*c83a76b0SSuyog Pawar    subs        r2,#4
145*c83a76b0SSuyog Pawar
146*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
147*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
148*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
149*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
150*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
151*c83a76b0SSuyog Pawar
152*c83a76b0SSuyog Pawar    @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
153*c83a76b0SSuyog Pawar
154*c83a76b0SSuyog Pawar    bne         loop_start_luma_left
155*c83a76b0SSuyog Pawar
156*c83a76b0SSuyog Pawar    ldmfd       sp!,{r4-r11,pc}             @reload the registers from sp
157*c83a76b0SSuyog Pawar
158*c83a76b0SSuyog Pawar
159*c83a76b0SSuyog Pawar
160*c83a76b0SSuyog Pawar
161*c83a76b0SSuyog Pawar
162*c83a76b0SSuyog Pawar@/**
163*c83a76b0SSuyog Pawar@*******************************************************************************
164*c83a76b0SSuyog Pawar@*
165*c83a76b0SSuyog Pawar@* @brief
166*c83a76b0SSuyog Pawar@*   padding (chroma block) at the left of a 2d array
167*c83a76b0SSuyog Pawar@*
168*c83a76b0SSuyog Pawar@* @par description:
169*c83a76b0SSuyog Pawar@*   the left column of a 2d array is replicated for pad_size times at the left
170*c83a76b0SSuyog Pawar@*
171*c83a76b0SSuyog Pawar@*
172*c83a76b0SSuyog Pawar@* @param[in] pu1_src
173*c83a76b0SSuyog Pawar@*  uword8 pointer to the source
174*c83a76b0SSuyog Pawar@*
175*c83a76b0SSuyog Pawar@* @param[in] src_strd
176*c83a76b0SSuyog Pawar@*  integer source stride
177*c83a76b0SSuyog Pawar@*
178*c83a76b0SSuyog Pawar@* @param[in] ht
179*c83a76b0SSuyog Pawar@*  integer height of the array
180*c83a76b0SSuyog Pawar@*
181*c83a76b0SSuyog Pawar@* @param[in] wd
182*c83a76b0SSuyog Pawar@*  integer width of the array (each colour component)
183*c83a76b0SSuyog Pawar@*
184*c83a76b0SSuyog Pawar@* @param[in] pad_size
185*c83a76b0SSuyog Pawar@*  integer -padding size of the array
186*c83a76b0SSuyog Pawar@*
187*c83a76b0SSuyog Pawar@* @param[in] ht
188*c83a76b0SSuyog Pawar@*  integer height of the array
189*c83a76b0SSuyog Pawar@*
190*c83a76b0SSuyog Pawar@* @param[in] wd
191*c83a76b0SSuyog Pawar@*  integer width of the array
192*c83a76b0SSuyog Pawar@*
193*c83a76b0SSuyog Pawar@* @returns
194*c83a76b0SSuyog Pawar@*
195*c83a76b0SSuyog Pawar@* @remarks
196*c83a76b0SSuyog Pawar@*  none
197*c83a76b0SSuyog Pawar@*
198*c83a76b0SSuyog Pawar@*******************************************************************************
199*c83a76b0SSuyog Pawar@*/
200*c83a76b0SSuyog Pawar@.if pad_left_chroma == c
201*c83a76b0SSuyog Pawar@void ihevc_pad_left_chroma(uword8 *pu1_src,
202*c83a76b0SSuyog Pawar@                            word32 src_strd,
203*c83a76b0SSuyog Pawar@                            word32 ht,
204*c83a76b0SSuyog Pawar@                            word32 pad_size)
205*c83a76b0SSuyog Pawar@{
206*c83a76b0SSuyog Pawar@   r0 => *pu1_src
207*c83a76b0SSuyog Pawar@   r1 => src_strd
208*c83a76b0SSuyog Pawar@   r2 => ht
209*c83a76b0SSuyog Pawar@   r3 => pad_size
210*c83a76b0SSuyog Pawar
211*c83a76b0SSuyog Pawar
212*c83a76b0SSuyog Pawar
213*c83a76b0SSuyog Pawar.globl ihevc_pad_left_chroma_a9q
214*c83a76b0SSuyog Pawar
215*c83a76b0SSuyog Pawar.type ihevc_pad_left_chroma_a9q, %function
216*c83a76b0SSuyog Pawar
217*c83a76b0SSuyog Pawarihevc_pad_left_chroma_a9q:
218*c83a76b0SSuyog Pawar
219*c83a76b0SSuyog Pawar    stmfd       sp!, {r4-r11, lr}           @stack stores the values of the arguments
220*c83a76b0SSuyog Pawar
221*c83a76b0SSuyog Pawarloop_start_chroma_left:
222*c83a76b0SSuyog Pawar    @ pad size is assumed to be pad_left = 80
223*c83a76b0SSuyog Pawar    sub         r4,r0,r3
224*c83a76b0SSuyog Pawar
225*c83a76b0SSuyog Pawar    ldrh        r8,[r0]
226*c83a76b0SSuyog Pawar    add         r0,r1
227*c83a76b0SSuyog Pawar    ldrh        r9,[r0]
228*c83a76b0SSuyog Pawar    add         r0,r1
229*c83a76b0SSuyog Pawar    ldrh        r10,[r0]
230*c83a76b0SSuyog Pawar    add         r0,r1
231*c83a76b0SSuyog Pawar    ldrh        r11,[r0]
232*c83a76b0SSuyog Pawar    add         r0,r1
233*c83a76b0SSuyog Pawar
234*c83a76b0SSuyog Pawar    vdup.u16    q0,r8
235*c83a76b0SSuyog Pawar    vdup.u16    q1,r9
236*c83a76b0SSuyog Pawar    vdup.u16    q2,r10
237*c83a76b0SSuyog Pawar    vdup.u16    q3,r11
238*c83a76b0SSuyog Pawar
239*c83a76b0SSuyog Pawar    add         r5,r4,r1
240*c83a76b0SSuyog Pawar
241*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @128/8 = 16 bytes store
242*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
243*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
244*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
245*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]                @ 16 bytes store
246*c83a76b0SSuyog Pawar
247*c83a76b0SSuyog Pawar    add         r6,r5,r1
248*c83a76b0SSuyog Pawar
249*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
250*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
251*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
252*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
253*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]                @128/8 = 16 bytes store
254*c83a76b0SSuyog Pawar
255*c83a76b0SSuyog Pawar    add         r7,r6,r1
256*c83a76b0SSuyog Pawar
257*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
258*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
259*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
260*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
261*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]                @128/8 = 16 bytes store
262*c83a76b0SSuyog Pawar
263*c83a76b0SSuyog Pawar    subs        r2,#4
264*c83a76b0SSuyog Pawar
265*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
266*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
267*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
268*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
269*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
270*c83a76b0SSuyog Pawar
271*c83a76b0SSuyog Pawar    @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
272*c83a76b0SSuyog Pawar
273*c83a76b0SSuyog Pawar    bne         loop_start_chroma_left
274*c83a76b0SSuyog Pawar
275*c83a76b0SSuyog Pawar    ldmfd       sp!,{r4-r11,pc}             @reload the registers from sp
276*c83a76b0SSuyog Pawar
277*c83a76b0SSuyog Pawar
278*c83a76b0SSuyog Pawar
279*c83a76b0SSuyog Pawar
280*c83a76b0SSuyog Pawar
281*c83a76b0SSuyog Pawar@/**
282*c83a76b0SSuyog Pawar@*******************************************************************************
283*c83a76b0SSuyog Pawar@*
284*c83a76b0SSuyog Pawar@* @brief
285*c83a76b0SSuyog Pawar@* padding (luma block) at the right of a 2d array
286*c83a76b0SSuyog Pawar@*
287*c83a76b0SSuyog Pawar@* @par description:
288*c83a76b0SSuyog Pawar@* the right column of a 2d array is replicated for pad_size times at the right
289*c83a76b0SSuyog Pawar@*
290*c83a76b0SSuyog Pawar@*
291*c83a76b0SSuyog Pawar@* @param[in] pu1_src
292*c83a76b0SSuyog Pawar@*  uword8 pointer to the source
293*c83a76b0SSuyog Pawar@*
294*c83a76b0SSuyog Pawar@* @param[in] src_strd
295*c83a76b0SSuyog Pawar@*  integer source stride
296*c83a76b0SSuyog Pawar@*
297*c83a76b0SSuyog Pawar@* @param[in] ht
298*c83a76b0SSuyog Pawar@*  integer height of the array
299*c83a76b0SSuyog Pawar@*
300*c83a76b0SSuyog Pawar@* @param[in] wd
301*c83a76b0SSuyog Pawar@*  integer width of the array
302*c83a76b0SSuyog Pawar@*
303*c83a76b0SSuyog Pawar@* @param[in] pad_size
304*c83a76b0SSuyog Pawar@*  integer -padding size of the array
305*c83a76b0SSuyog Pawar@*
306*c83a76b0SSuyog Pawar@* @param[in] ht
307*c83a76b0SSuyog Pawar@*  integer height of the array
308*c83a76b0SSuyog Pawar@*
309*c83a76b0SSuyog Pawar@* @param[in] wd
310*c83a76b0SSuyog Pawar@*  integer width of the array
311*c83a76b0SSuyog Pawar@*
312*c83a76b0SSuyog Pawar@* @returns
313*c83a76b0SSuyog Pawar@*
314*c83a76b0SSuyog Pawar@* @remarks
315*c83a76b0SSuyog Pawar@*  none
316*c83a76b0SSuyog Pawar@*
317*c83a76b0SSuyog Pawar@*******************************************************************************
318*c83a76b0SSuyog Pawar@*/
319*c83a76b0SSuyog Pawar@.if pad_right_luma == c
320*c83a76b0SSuyog Pawar@void ihevc_pad_right_luma(uword8 *pu1_src,
321*c83a76b0SSuyog Pawar@                        word32 src_strd,
322*c83a76b0SSuyog Pawar@                        word32 ht,
323*c83a76b0SSuyog Pawar@                        word32 pad_size)
324*c83a76b0SSuyog Pawar@{
325*c83a76b0SSuyog Pawar@    word32 row@
326*c83a76b0SSuyog Pawar@
327*c83a76b0SSuyog Pawar@    for(row = 0@ row < ht@ row++)
328*c83a76b0SSuyog Pawar@    {
329*c83a76b0SSuyog Pawar@        memset(pu1_src, *(pu1_src -1), pad_size)@
330*c83a76b0SSuyog Pawar@
331*c83a76b0SSuyog Pawar@        pu1_src += src_strd@
332*c83a76b0SSuyog Pawar@    }
333*c83a76b0SSuyog Pawar@}
334*c83a76b0SSuyog Pawar@
335*c83a76b0SSuyog Pawar@   r0 => *pu1_src
336*c83a76b0SSuyog Pawar@   r1 => src_strd
337*c83a76b0SSuyog Pawar@   r2 => ht
338*c83a76b0SSuyog Pawar@   r3 => pad_size
339*c83a76b0SSuyog Pawar
340*c83a76b0SSuyog Pawar
341*c83a76b0SSuyog Pawar
342*c83a76b0SSuyog Pawar.globl ihevc_pad_right_luma_a9q
343*c83a76b0SSuyog Pawar
344*c83a76b0SSuyog Pawar.type ihevc_pad_right_luma_a9q, %function
345*c83a76b0SSuyog Pawar
346*c83a76b0SSuyog Pawarihevc_pad_right_luma_a9q:
347*c83a76b0SSuyog Pawar
348*c83a76b0SSuyog Pawar    stmfd       sp!, {r4-r11, lr}           @stack stores the values of the arguments
349*c83a76b0SSuyog Pawar
350*c83a76b0SSuyog Pawarloop_start_luma_right:
351*c83a76b0SSuyog Pawar    @ pad size is assumed to be pad_left = 80
352*c83a76b0SSuyog Pawar    mov         r4,r0
353*c83a76b0SSuyog Pawar
354*c83a76b0SSuyog Pawar    ldrb        r8,[r0, #-1]
355*c83a76b0SSuyog Pawar    add         r0,r1
356*c83a76b0SSuyog Pawar    ldrb        r9,[r0, #-1]
357*c83a76b0SSuyog Pawar    add         r0,r1
358*c83a76b0SSuyog Pawar    ldrb        r10,[r0, #-1]
359*c83a76b0SSuyog Pawar    add         r0,r1
360*c83a76b0SSuyog Pawar    ldrb        r11,[r0, #-1]
361*c83a76b0SSuyog Pawar    add         r0,r1
362*c83a76b0SSuyog Pawar
363*c83a76b0SSuyog Pawar    add         r5,r4,r1
364*c83a76b0SSuyog Pawar    add         r6,r5,r1
365*c83a76b0SSuyog Pawar    add         r7,r6,r1
366*c83a76b0SSuyog Pawar
367*c83a76b0SSuyog Pawar    vdup.u8     q0,r8
368*c83a76b0SSuyog Pawar    vdup.u8     q1,r9
369*c83a76b0SSuyog Pawar    vdup.u8     q2,r10
370*c83a76b0SSuyog Pawar    vdup.u8     q3,r11
371*c83a76b0SSuyog Pawar
372*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @128/8 = 16 bytes store
373*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
374*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
375*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
376*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]                @ 16 bytes store
377*c83a76b0SSuyog Pawar
378*c83a76b0SSuyog Pawar
379*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
380*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
381*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
382*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
383*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]                @128/8 = 16 bytes store
384*c83a76b0SSuyog Pawar
385*c83a76b0SSuyog Pawar    subs        r2,#4
386*c83a76b0SSuyog Pawar
387*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
388*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
389*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
390*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
391*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]                @128/8 = 16 bytes store
392*c83a76b0SSuyog Pawar
393*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
394*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
395*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
396*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
397*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]                @128/8 = 16 bytes store
398*c83a76b0SSuyog Pawar
399*c83a76b0SSuyog Pawar
400*c83a76b0SSuyog Pawar    @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
401*c83a76b0SSuyog Pawar
402*c83a76b0SSuyog Pawar
403*c83a76b0SSuyog Pawar    bne         loop_start_luma_right
404*c83a76b0SSuyog Pawar
405*c83a76b0SSuyog Pawar    ldmfd       sp!,{r4-r11,pc}             @reload the registers from sp
406*c83a76b0SSuyog Pawar
407*c83a76b0SSuyog Pawar
408*c83a76b0SSuyog Pawar
409*c83a76b0SSuyog Pawar
410*c83a76b0SSuyog Pawar
411*c83a76b0SSuyog Pawar@/**
412*c83a76b0SSuyog Pawar@*******************************************************************************
413*c83a76b0SSuyog Pawar@*
414*c83a76b0SSuyog Pawar@* @brief
415*c83a76b0SSuyog Pawar@@* padding (chroma block) at the right of a 2d array
416*c83a76b0SSuyog Pawar@*
417*c83a76b0SSuyog Pawar@* @par description:
418*c83a76b0SSuyog Pawar@* the right column of a 2d array is replicated for pad_size times at the right
419*c83a76b0SSuyog Pawar@*
420*c83a76b0SSuyog Pawar@*
421*c83a76b0SSuyog Pawar@* @param[in] pu1_src
422*c83a76b0SSuyog Pawar@@*  uword8 pointer to the source
423*c83a76b0SSuyog Pawar@*
424*c83a76b0SSuyog Pawar@* @param[in] src_strd
425*c83a76b0SSuyog Pawar@*  integer source stride
426*c83a76b0SSuyog Pawar@*
427*c83a76b0SSuyog Pawar@* @param[in] ht
428*c83a76b0SSuyog Pawar@@*  integer height of the array
429*c83a76b0SSuyog Pawar@*
430*c83a76b0SSuyog Pawar@* @param[in] wd
431*c83a76b0SSuyog Pawar@*  integer width of the array (each colour component)
432*c83a76b0SSuyog Pawar@*
433*c83a76b0SSuyog Pawar@* @param[in] pad_size
434*c83a76b0SSuyog Pawar@*  integer -padding size of the array
435*c83a76b0SSuyog Pawar@*
436*c83a76b0SSuyog Pawar@* @param[in] ht
437*c83a76b0SSuyog Pawar@@*  integer height of the array
438*c83a76b0SSuyog Pawar@*
439*c83a76b0SSuyog Pawar@* @param[in] wd
440*c83a76b0SSuyog Pawar@*  integer width of the array
441*c83a76b0SSuyog Pawar@*
442*c83a76b0SSuyog Pawar@* @returns
443*c83a76b0SSuyog Pawar@*
444*c83a76b0SSuyog Pawar@* @remarks
445*c83a76b0SSuyog Pawar@*  none
446*c83a76b0SSuyog Pawar@*
447*c83a76b0SSuyog Pawar@*******************************************************************************
448*c83a76b0SSuyog Pawar@*/
449*c83a76b0SSuyog Pawar@.if pad_right_chroma == c
450*c83a76b0SSuyog Pawar@void ihevc_pad_right_chroma(uword8 *pu1_src,
451*c83a76b0SSuyog Pawar@                        word32 src_strd,
452*c83a76b0SSuyog Pawar@                        word32 ht,
453*c83a76b0SSuyog Pawar@                        word32 pad_size)
454*c83a76b0SSuyog Pawar@   r0 => *pu1_src
455*c83a76b0SSuyog Pawar@   r1 => src_strd
456*c83a76b0SSuyog Pawar@   r2 => ht
457*c83a76b0SSuyog Pawar@   r3 => pad_size
458*c83a76b0SSuyog Pawar
459*c83a76b0SSuyog Pawar
460*c83a76b0SSuyog Pawar
461*c83a76b0SSuyog Pawar.globl ihevc_pad_right_chroma_a9q
462*c83a76b0SSuyog Pawar
463*c83a76b0SSuyog Pawar.type ihevc_pad_right_chroma_a9q, %function
464*c83a76b0SSuyog Pawar
465*c83a76b0SSuyog Pawarihevc_pad_right_chroma_a9q:
466*c83a76b0SSuyog Pawar
467*c83a76b0SSuyog Pawar    stmfd       sp!, {r4-r11, lr}           @stack stores the values of the arguments
468*c83a76b0SSuyog Pawar
469*c83a76b0SSuyog Pawarloop_start_chroma_right:
470*c83a76b0SSuyog Pawar    @ pad size is assumed to be pad_left = 80
471*c83a76b0SSuyog Pawar    mov         r4,r0
472*c83a76b0SSuyog Pawar
473*c83a76b0SSuyog Pawar    ldrh        r8,[r0, #-2]
474*c83a76b0SSuyog Pawar    add         r0,r1
475*c83a76b0SSuyog Pawar    ldrh        r9,[r0, #-2]
476*c83a76b0SSuyog Pawar    add         r0,r1
477*c83a76b0SSuyog Pawar    ldrh        r10,[r0, #-2]
478*c83a76b0SSuyog Pawar    add         r0,r1
479*c83a76b0SSuyog Pawar    ldrh        r11,[r0, #-2]
480*c83a76b0SSuyog Pawar    add         r0,r1
481*c83a76b0SSuyog Pawar
482*c83a76b0SSuyog Pawar    vdup.u16    q0,r8
483*c83a76b0SSuyog Pawar    vdup.u16    q1,r9
484*c83a76b0SSuyog Pawar    vdup.u16    q2,r10
485*c83a76b0SSuyog Pawar    vdup.u16    q3,r11
486*c83a76b0SSuyog Pawar
487*c83a76b0SSuyog Pawar    add         r5,r4,r1
488*c83a76b0SSuyog Pawar
489*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @128/8 = 16 bytes store
490*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
491*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
492*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]!               @ 16 bytes store
493*c83a76b0SSuyog Pawar    vst1.8      {d0,d1},[r4]                @ 16 bytes store
494*c83a76b0SSuyog Pawar
495*c83a76b0SSuyog Pawar    add         r6,r5,r1
496*c83a76b0SSuyog Pawar
497*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
498*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
499*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
500*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]!               @128/8 = 16 bytes store
501*c83a76b0SSuyog Pawar    vst1.8      {d2,d3},[r5]                @128/8 = 16 bytes store
502*c83a76b0SSuyog Pawar
503*c83a76b0SSuyog Pawar    add         r7,r6,r1
504*c83a76b0SSuyog Pawar
505*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
506*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
507*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
508*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]!               @128/8 = 16 bytes store
509*c83a76b0SSuyog Pawar    vst1.8      {d4,d5},[r6]                @128/8 = 16 bytes store
510*c83a76b0SSuyog Pawar
511*c83a76b0SSuyog Pawar    subs        r2,#4
512*c83a76b0SSuyog Pawar
513*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
514*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
515*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
516*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]!               @128/8 = 16 bytes store
517*c83a76b0SSuyog Pawar    vst1.8      {d6,d7},[r7]                @128/8 = 16 bytes store
518*c83a76b0SSuyog Pawar
519*c83a76b0SSuyog Pawar    @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
520*c83a76b0SSuyog Pawar
521*c83a76b0SSuyog Pawar    bne         loop_start_chroma_right
522*c83a76b0SSuyog Pawar
523*c83a76b0SSuyog Pawar    ldmfd       sp!,{r4-r11,pc}             @reload the registers from sp
524*c83a76b0SSuyog Pawar
525*c83a76b0SSuyog Pawar
526*c83a76b0SSuyog Pawar
527*c83a76b0SSuyog Pawar
528*c83a76b0SSuyog Pawar
529*c83a76b0SSuyog Pawar
530*c83a76b0SSuyog Pawar
531*c83a76b0SSuyog Pawar
532