xref: /aosp_15_r20/external/libhevc/common/arm64/ihevc_padding.s (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar///*****************************************************************************
2*c83a76b0SSuyog Pawar//*
3*c83a76b0SSuyog Pawar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar//*
5*c83a76b0SSuyog Pawar//* Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar//* you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar//* You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar//*
9*c83a76b0SSuyog Pawar//* http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar//*
11*c83a76b0SSuyog Pawar//* Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar//* distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar//* See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar//* limitations under the License.
16*c83a76b0SSuyog Pawar//*
17*c83a76b0SSuyog Pawar//*****************************************************************************/
18*c83a76b0SSuyog Pawar///**
19*c83a76b0SSuyog Pawar// *******************************************************************************
20*c83a76b0SSuyog Pawar// * //file
21*c83a76b0SSuyog Pawar// *  ihevc_padding_neon.s
22*c83a76b0SSuyog Pawar// *
23*c83a76b0SSuyog Pawar// * //brief
24*c83a76b0SSuyog Pawar// *  contains function definitions padding
25*c83a76b0SSuyog Pawar// *
26*c83a76b0SSuyog Pawar// * //author
27*c83a76b0SSuyog Pawar// *     naveen sr
28*c83a76b0SSuyog Pawar// *
29*c83a76b0SSuyog Pawar// * //par list of functions:
30*c83a76b0SSuyog Pawar// *  - ihevc_pad_left_luma()
31*c83a76b0SSuyog Pawar// *  - ihevc_pad_left_chroma()
32*c83a76b0SSuyog Pawar// *
33*c83a76b0SSuyog Pawar// * //remarks
34*c83a76b0SSuyog Pawar// *  none
35*c83a76b0SSuyog Pawar// *
36*c83a76b0SSuyog Pawar// *******************************************************************************
37*c83a76b0SSuyog Pawar//*/
38*c83a76b0SSuyog Pawar
39*c83a76b0SSuyog Pawar///**
40*c83a76b0SSuyog Pawar//*******************************************************************************
41*c83a76b0SSuyog Pawar//*
42*c83a76b0SSuyog Pawar//* //brief
43*c83a76b0SSuyog Pawar//*   padding (luma block) at the left of a 2d array
44*c83a76b0SSuyog Pawar//*
45*c83a76b0SSuyog Pawar//* //par description:
46*c83a76b0SSuyog Pawar//*   the left column of a 2d array is replicated for pad_size times at the left
47*c83a76b0SSuyog Pawar//*
48*c83a76b0SSuyog Pawar//*
49*c83a76b0SSuyog Pawar//* //param[in] pu1_src
50*c83a76b0SSuyog Pawar//*  uword8 pointer to the source
51*c83a76b0SSuyog Pawar//*
52*c83a76b0SSuyog Pawar//* //param[in] src_strd
53*c83a76b0SSuyog Pawar//*  integer source stride
54*c83a76b0SSuyog Pawar//*
55*c83a76b0SSuyog Pawar//* //param[in] ht
56*c83a76b0SSuyog Pawar//*  integer height of the array
57*c83a76b0SSuyog Pawar//*
58*c83a76b0SSuyog Pawar//* //param[in] wd
59*c83a76b0SSuyog Pawar//*  integer width of the array
60*c83a76b0SSuyog Pawar//*
61*c83a76b0SSuyog Pawar//* //param[in] pad_size
62*c83a76b0SSuyog Pawar//*  integer -padding size of the array
63*c83a76b0SSuyog Pawar//*
64*c83a76b0SSuyog Pawar//* //param[in] ht
65*c83a76b0SSuyog Pawar//*  integer height of the array
66*c83a76b0SSuyog Pawar//*
67*c83a76b0SSuyog Pawar//* //param[in] wd
68*c83a76b0SSuyog Pawar//*  integer width of the array
69*c83a76b0SSuyog Pawar//*
70*c83a76b0SSuyog Pawar//* //returns
71*c83a76b0SSuyog Pawar//*
72*c83a76b0SSuyog Pawar//* //remarks
73*c83a76b0SSuyog Pawar//*  none
74*c83a76b0SSuyog Pawar//*
75*c83a76b0SSuyog Pawar//*******************************************************************************
76*c83a76b0SSuyog Pawar//*/
77*c83a76b0SSuyog Pawar//.if pad_left_luma == c
78*c83a76b0SSuyog Pawar//void ihevc_pad_left_luma(uword8 *pu1_src,
79*c83a76b0SSuyog Pawar//                        word32 src_strd,
80*c83a76b0SSuyog Pawar//                        word32 ht,
81*c83a76b0SSuyog Pawar//                        word32 pad_size)
82*c83a76b0SSuyog Pawar//**************variables vs registers*************************
83*c83a76b0SSuyog Pawar//    x0 => *pu1_src
84*c83a76b0SSuyog Pawar//    x1 => src_strd
85*c83a76b0SSuyog Pawar//    x2 => ht
86*c83a76b0SSuyog Pawar//    x3 => pad_size
87*c83a76b0SSuyog Pawar
88*c83a76b0SSuyog Pawar.text
89*c83a76b0SSuyog Pawar.align 4
90*c83a76b0SSuyog Pawar
91*c83a76b0SSuyog Pawar.globl ihevc_pad_left_luma_av8
92*c83a76b0SSuyog Pawar
93*c83a76b0SSuyog Pawar.type ihevc_pad_left_luma_av8, %function
94*c83a76b0SSuyog Pawar
95*c83a76b0SSuyog Pawarihevc_pad_left_luma_av8:
96*c83a76b0SSuyog Pawar
97*c83a76b0SSuyog Pawarloop_start_luma_left:
98*c83a76b0SSuyog Pawar    // pad size is assumed to be pad_left = 80
99*c83a76b0SSuyog Pawar    sub         x4,x0,x3
100*c83a76b0SSuyog Pawar
101*c83a76b0SSuyog Pawar    ldrb        w8,[x0]
102*c83a76b0SSuyog Pawar    add         x0,x0,x1
103*c83a76b0SSuyog Pawar    ldrb        w9,[x0]
104*c83a76b0SSuyog Pawar    add         x0,x0,x1
105*c83a76b0SSuyog Pawar    ldrb        w10,[x0]
106*c83a76b0SSuyog Pawar    add         x0,x0,x1
107*c83a76b0SSuyog Pawar    ldrb        w11,[x0]
108*c83a76b0SSuyog Pawar    add         x0,x0,x1
109*c83a76b0SSuyog Pawar
110*c83a76b0SSuyog Pawar    dup         v0.16b,w8
111*c83a76b0SSuyog Pawar    dup         v2.16b,w9
112*c83a76b0SSuyog Pawar    dup         v4.16b,w10
113*c83a76b0SSuyog Pawar    dup         v6.16b,w11
114*c83a76b0SSuyog Pawar
115*c83a76b0SSuyog Pawar    add         x5,x4,x1
116*c83a76b0SSuyog Pawar
117*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           //128/8 = 16 bytes store
118*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
119*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
120*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
121*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4]               // 16 bytes store
122*c83a76b0SSuyog Pawar
123*c83a76b0SSuyog Pawar    add         x6,x5,x1
124*c83a76b0SSuyog Pawar
125*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
126*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
127*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
128*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
129*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5]               //128/8 = 16 bytes store
130*c83a76b0SSuyog Pawar
131*c83a76b0SSuyog Pawar    add         x7,x6,x1
132*c83a76b0SSuyog Pawar
133*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
134*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
135*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
136*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
137*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6]               //128/8 = 16 bytes store
138*c83a76b0SSuyog Pawar
139*c83a76b0SSuyog Pawar    subs        x2, x2,#4
140*c83a76b0SSuyog Pawar
141*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
142*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
143*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
144*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
145*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7]               //128/8 = 16 bytes store
146*c83a76b0SSuyog Pawar
147*c83a76b0SSuyog Pawar    // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
148*c83a76b0SSuyog Pawar
149*c83a76b0SSuyog Pawar    bne         loop_start_luma_left
150*c83a76b0SSuyog Pawar
151*c83a76b0SSuyog Pawar    ret
152*c83a76b0SSuyog Pawar
153*c83a76b0SSuyog Pawar
154*c83a76b0SSuyog Pawar
155*c83a76b0SSuyog Pawar
156*c83a76b0SSuyog Pawar
157*c83a76b0SSuyog Pawar///**
158*c83a76b0SSuyog Pawar//*******************************************************************************
159*c83a76b0SSuyog Pawar//*
160*c83a76b0SSuyog Pawar//* //brief
161*c83a76b0SSuyog Pawar//*   padding (chroma block) at the left of a 2d array
162*c83a76b0SSuyog Pawar//*
163*c83a76b0SSuyog Pawar//* //par description:
164*c83a76b0SSuyog Pawar//*   the left column of a 2d array is replicated for pad_size times at the left
165*c83a76b0SSuyog Pawar//*
166*c83a76b0SSuyog Pawar//*
167*c83a76b0SSuyog Pawar//* //param[in] pu1_src
168*c83a76b0SSuyog Pawar//*  uword8 pointer to the source
169*c83a76b0SSuyog Pawar//*
170*c83a76b0SSuyog Pawar//* //param[in] src_strd
171*c83a76b0SSuyog Pawar//*  integer source stride
172*c83a76b0SSuyog Pawar//*
173*c83a76b0SSuyog Pawar//* //param[in] ht
174*c83a76b0SSuyog Pawar//*  integer height of the array
175*c83a76b0SSuyog Pawar//*
176*c83a76b0SSuyog Pawar//* //param[in] wd
177*c83a76b0SSuyog Pawar//*  integer width of the array (each colour component)
178*c83a76b0SSuyog Pawar//*
179*c83a76b0SSuyog Pawar//* //param[in] pad_size
180*c83a76b0SSuyog Pawar//*  integer -padding size of the array
181*c83a76b0SSuyog Pawar//*
182*c83a76b0SSuyog Pawar//* //param[in] ht
183*c83a76b0SSuyog Pawar//*  integer height of the array
184*c83a76b0SSuyog Pawar//*
185*c83a76b0SSuyog Pawar//* //param[in] wd
186*c83a76b0SSuyog Pawar//*  integer width of the array
187*c83a76b0SSuyog Pawar//*
188*c83a76b0SSuyog Pawar//* //returns
189*c83a76b0SSuyog Pawar//*
190*c83a76b0SSuyog Pawar//* //remarks
191*c83a76b0SSuyog Pawar//*  none
192*c83a76b0SSuyog Pawar//*
193*c83a76b0SSuyog Pawar//*******************************************************************************
194*c83a76b0SSuyog Pawar//*/
195*c83a76b0SSuyog Pawar//.if pad_left_chroma == c
196*c83a76b0SSuyog Pawar//void ihevc_pad_left_chroma(uword8 *pu1_src,
197*c83a76b0SSuyog Pawar//                            word32 src_strd,
198*c83a76b0SSuyog Pawar//                            word32 ht,
199*c83a76b0SSuyog Pawar//                            word32 pad_size)
200*c83a76b0SSuyog Pawar//{
201*c83a76b0SSuyog Pawar//    x0 => *pu1_src
202*c83a76b0SSuyog Pawar//    x1 => src_strd
203*c83a76b0SSuyog Pawar//    x2 => ht
204*c83a76b0SSuyog Pawar//    x3 => pad_size
205*c83a76b0SSuyog Pawar
206*c83a76b0SSuyog Pawar
207*c83a76b0SSuyog Pawar
208*c83a76b0SSuyog Pawar.globl ihevc_pad_left_chroma_av8
209*c83a76b0SSuyog Pawar
210*c83a76b0SSuyog Pawar.type ihevc_pad_left_chroma_av8, %function
211*c83a76b0SSuyog Pawar
212*c83a76b0SSuyog Pawarihevc_pad_left_chroma_av8:
213*c83a76b0SSuyog Pawar
214*c83a76b0SSuyog Pawar
215*c83a76b0SSuyog Pawarloop_start_chroma_left:
216*c83a76b0SSuyog Pawar    // pad size is assumed to be pad_left = 80
217*c83a76b0SSuyog Pawar    sub         x4,x0,x3
218*c83a76b0SSuyog Pawar
219*c83a76b0SSuyog Pawar    ldrh        w8,[x0]
220*c83a76b0SSuyog Pawar    add         x0,x0,x1
221*c83a76b0SSuyog Pawar    ldrh        w9,[x0]
222*c83a76b0SSuyog Pawar    add         x0,x0,x1
223*c83a76b0SSuyog Pawar    ldrh        w10,[x0]
224*c83a76b0SSuyog Pawar    add         x0,x0,x1
225*c83a76b0SSuyog Pawar    ldrh        w11,[x0]
226*c83a76b0SSuyog Pawar    add         x0,x0,x1
227*c83a76b0SSuyog Pawar
228*c83a76b0SSuyog Pawar    dup         v0.8h,w8
229*c83a76b0SSuyog Pawar    dup         v2.8h,w9
230*c83a76b0SSuyog Pawar    dup         v4.8h,w10
231*c83a76b0SSuyog Pawar    dup         v6.8h,w11
232*c83a76b0SSuyog Pawar
233*c83a76b0SSuyog Pawar    add         x5,x4,x1
234*c83a76b0SSuyog Pawar
235*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           //128/8 = 16 bytes store
236*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
237*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
238*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
239*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4]               // 16 bytes store
240*c83a76b0SSuyog Pawar
241*c83a76b0SSuyog Pawar    add         x6,x5,x1
242*c83a76b0SSuyog Pawar
243*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
244*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
245*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
246*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
247*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5]               //128/8 = 16 bytes store
248*c83a76b0SSuyog Pawar
249*c83a76b0SSuyog Pawar    add         x7,x6,x1
250*c83a76b0SSuyog Pawar
251*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
252*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
253*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
254*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
255*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6]               //128/8 = 16 bytes store
256*c83a76b0SSuyog Pawar
257*c83a76b0SSuyog Pawar    subs        x2, x2,#4
258*c83a76b0SSuyog Pawar
259*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
260*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
261*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
262*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
263*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7]               //128/8 = 16 bytes store
264*c83a76b0SSuyog Pawar
265*c83a76b0SSuyog Pawar    // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
266*c83a76b0SSuyog Pawar
267*c83a76b0SSuyog Pawar    bne         loop_start_chroma_left
268*c83a76b0SSuyog Pawar
269*c83a76b0SSuyog Pawar    ret
270*c83a76b0SSuyog Pawar
271*c83a76b0SSuyog Pawar
272*c83a76b0SSuyog Pawar
273*c83a76b0SSuyog Pawar
274*c83a76b0SSuyog Pawar
275*c83a76b0SSuyog Pawar///**
276*c83a76b0SSuyog Pawar//*******************************************************************************
277*c83a76b0SSuyog Pawar//*
278*c83a76b0SSuyog Pawar//* //brief
279*c83a76b0SSuyog Pawar//* padding (luma block) at the right of a 2d array
280*c83a76b0SSuyog Pawar//*
281*c83a76b0SSuyog Pawar//* //par description:
282*c83a76b0SSuyog Pawar//* the right column of a 2d array is replicated for pad_size times at the right
283*c83a76b0SSuyog Pawar//*
284*c83a76b0SSuyog Pawar//*
285*c83a76b0SSuyog Pawar//* //param[in] pu1_src
286*c83a76b0SSuyog Pawar//*  uword8 pointer to the source
287*c83a76b0SSuyog Pawar//*
288*c83a76b0SSuyog Pawar//* //param[in] src_strd
289*c83a76b0SSuyog Pawar//*  integer source stride
290*c83a76b0SSuyog Pawar//*
291*c83a76b0SSuyog Pawar//* //param[in] ht
292*c83a76b0SSuyog Pawar//*  integer height of the array
293*c83a76b0SSuyog Pawar//*
294*c83a76b0SSuyog Pawar//* //param[in] wd
295*c83a76b0SSuyog Pawar//*  integer width of the array
296*c83a76b0SSuyog Pawar//*
297*c83a76b0SSuyog Pawar//* //param[in] pad_size
298*c83a76b0SSuyog Pawar//*  integer -padding size of the array
299*c83a76b0SSuyog Pawar//*
300*c83a76b0SSuyog Pawar//* //param[in] ht
301*c83a76b0SSuyog Pawar//*  integer height of the array
302*c83a76b0SSuyog Pawar//*
303*c83a76b0SSuyog Pawar//* //param[in] wd
304*c83a76b0SSuyog Pawar//*  integer width of the array
305*c83a76b0SSuyog Pawar//*
306*c83a76b0SSuyog Pawar//* //returns
307*c83a76b0SSuyog Pawar//*
308*c83a76b0SSuyog Pawar//* //remarks
309*c83a76b0SSuyog Pawar//*  none
310*c83a76b0SSuyog Pawar//*
311*c83a76b0SSuyog Pawar//*******************************************************************************
312*c83a76b0SSuyog Pawar//*/
313*c83a76b0SSuyog Pawar//.if pad_right_luma == c
314*c83a76b0SSuyog Pawar//void ihevc_pad_right_luma(uword8 *pu1_src,
315*c83a76b0SSuyog Pawar//                        word32 src_strd,
316*c83a76b0SSuyog Pawar//                        word32 ht,
317*c83a76b0SSuyog Pawar//                        word32 pad_size)
318*c83a76b0SSuyog Pawar//{
319*c83a76b0SSuyog Pawar//    word32 row//
320*c83a76b0SSuyog Pawar//
321*c83a76b0SSuyog Pawar//    for(row = 0// row < ht// row++)
322*c83a76b0SSuyog Pawar//    {
323*c83a76b0SSuyog Pawar//        memset(pu1_src, *(pu1_src -1), pad_size)//
324*c83a76b0SSuyog Pawar//
325*c83a76b0SSuyog Pawar//        pu1_src += src_strd//
326*c83a76b0SSuyog Pawar//    }
327*c83a76b0SSuyog Pawar//}
328*c83a76b0SSuyog Pawar//
329*c83a76b0SSuyog Pawar//    x0 => *pu1_src
330*c83a76b0SSuyog Pawar//    x1 => src_strd
331*c83a76b0SSuyog Pawar//    x2 => ht
332*c83a76b0SSuyog Pawar//    x3 => pad_size
333*c83a76b0SSuyog Pawar
334*c83a76b0SSuyog Pawar
335*c83a76b0SSuyog Pawar
336*c83a76b0SSuyog Pawar.globl ihevc_pad_right_luma_av8
337*c83a76b0SSuyog Pawar
338*c83a76b0SSuyog Pawar.type ihevc_pad_right_luma_av8, %function
339*c83a76b0SSuyog Pawar
340*c83a76b0SSuyog Pawarihevc_pad_right_luma_av8:
341*c83a76b0SSuyog Pawar
342*c83a76b0SSuyog Pawar
343*c83a76b0SSuyog Pawarloop_start_luma_right:
344*c83a76b0SSuyog Pawar    // pad size is assumed to be pad_left = 80
345*c83a76b0SSuyog Pawar    mov         x4,x0
346*c83a76b0SSuyog Pawar
347*c83a76b0SSuyog Pawar    ldrb        w8,[x0, #-1]
348*c83a76b0SSuyog Pawar    add         x0,x0,x1
349*c83a76b0SSuyog Pawar    ldrb        w9,[x0, #-1]
350*c83a76b0SSuyog Pawar    add         x0,x0,x1
351*c83a76b0SSuyog Pawar    ldrb        w10,[x0, #-1]
352*c83a76b0SSuyog Pawar    add         x0,x0,x1
353*c83a76b0SSuyog Pawar    ldrb        w11,[x0, #-1]
354*c83a76b0SSuyog Pawar    add         x0,x0,x1
355*c83a76b0SSuyog Pawar
356*c83a76b0SSuyog Pawar    add         x5,x4,x1
357*c83a76b0SSuyog Pawar    add         x6,x5,x1
358*c83a76b0SSuyog Pawar    add         x7,x6,x1
359*c83a76b0SSuyog Pawar
360*c83a76b0SSuyog Pawar    dup         v0.16b,w8
361*c83a76b0SSuyog Pawar    dup         v2.16b,w9
362*c83a76b0SSuyog Pawar    dup         v4.16b,w10
363*c83a76b0SSuyog Pawar    dup         v6.16b,w11
364*c83a76b0SSuyog Pawar
365*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           //128/8 = 16 bytes store
366*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
367*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
368*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
369*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4]               // 16 bytes store
370*c83a76b0SSuyog Pawar
371*c83a76b0SSuyog Pawar
372*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
373*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
374*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
375*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
376*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5]               //128/8 = 16 bytes store
377*c83a76b0SSuyog Pawar
378*c83a76b0SSuyog Pawar    subs        x2, x2,#4
379*c83a76b0SSuyog Pawar
380*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
381*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
382*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
383*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
384*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6]               //128/8 = 16 bytes store
385*c83a76b0SSuyog Pawar
386*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
387*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
388*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
389*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
390*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7]               //128/8 = 16 bytes store
391*c83a76b0SSuyog Pawar
392*c83a76b0SSuyog Pawar
393*c83a76b0SSuyog Pawar    // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
394*c83a76b0SSuyog Pawar
395*c83a76b0SSuyog Pawar
396*c83a76b0SSuyog Pawar    bne         loop_start_luma_right
397*c83a76b0SSuyog Pawar
398*c83a76b0SSuyog Pawar    ret
399*c83a76b0SSuyog Pawar
400*c83a76b0SSuyog Pawar
401*c83a76b0SSuyog Pawar
402*c83a76b0SSuyog Pawar
403*c83a76b0SSuyog Pawar
404*c83a76b0SSuyog Pawar///**
405*c83a76b0SSuyog Pawar//*******************************************************************************
406*c83a76b0SSuyog Pawar//*
407*c83a76b0SSuyog Pawar//* //brief
408*c83a76b0SSuyog Pawar////* padding (chroma block) at the right of a 2d array
409*c83a76b0SSuyog Pawar//*
410*c83a76b0SSuyog Pawar//* //par description:
411*c83a76b0SSuyog Pawar//* the right column of a 2d array is replicated for pad_size times at the right
412*c83a76b0SSuyog Pawar//*
413*c83a76b0SSuyog Pawar//*
414*c83a76b0SSuyog Pawar//* //param[in] pu1_src
415*c83a76b0SSuyog Pawar////*  uword8 pointer to the source
416*c83a76b0SSuyog Pawar//*
417*c83a76b0SSuyog Pawar//* //param[in] src_strd
418*c83a76b0SSuyog Pawar//*  integer source stride
419*c83a76b0SSuyog Pawar//*
420*c83a76b0SSuyog Pawar//* //param[in] ht
421*c83a76b0SSuyog Pawar////*  integer height of the array
422*c83a76b0SSuyog Pawar//*
423*c83a76b0SSuyog Pawar//* //param[in] wd
424*c83a76b0SSuyog Pawar//*  integer width of the array (each colour component)
425*c83a76b0SSuyog Pawar//*
426*c83a76b0SSuyog Pawar//* //param[in] pad_size
427*c83a76b0SSuyog Pawar//*  integer -padding size of the array
428*c83a76b0SSuyog Pawar//*
429*c83a76b0SSuyog Pawar//* //param[in] ht
430*c83a76b0SSuyog Pawar////*  integer height of the array
431*c83a76b0SSuyog Pawar//*
432*c83a76b0SSuyog Pawar//* //param[in] wd
433*c83a76b0SSuyog Pawar//*  integer width of the array
434*c83a76b0SSuyog Pawar//*
435*c83a76b0SSuyog Pawar//* //returns
436*c83a76b0SSuyog Pawar//*
437*c83a76b0SSuyog Pawar//* //remarks
438*c83a76b0SSuyog Pawar//*  none
439*c83a76b0SSuyog Pawar//*
440*c83a76b0SSuyog Pawar//*******************************************************************************
441*c83a76b0SSuyog Pawar//*/
442*c83a76b0SSuyog Pawar//.if pad_right_chroma == c
443*c83a76b0SSuyog Pawar//void ihevc_pad_right_chroma(uword8 *pu1_src,
444*c83a76b0SSuyog Pawar//                        word32 src_strd,
445*c83a76b0SSuyog Pawar//                        word32 ht,
446*c83a76b0SSuyog Pawar//                        word32 pad_size)
447*c83a76b0SSuyog Pawar//    x0 => *pu1_src
448*c83a76b0SSuyog Pawar//    x1 => src_strd
449*c83a76b0SSuyog Pawar//    x2 => ht
450*c83a76b0SSuyog Pawar//    x3 => pad_size
451*c83a76b0SSuyog Pawar
452*c83a76b0SSuyog Pawar
453*c83a76b0SSuyog Pawar
454*c83a76b0SSuyog Pawar.globl ihevc_pad_right_chroma_av8
455*c83a76b0SSuyog Pawar
456*c83a76b0SSuyog Pawar.type ihevc_pad_right_chroma_av8, %function
457*c83a76b0SSuyog Pawar
458*c83a76b0SSuyog Pawarihevc_pad_right_chroma_av8:
459*c83a76b0SSuyog Pawar
460*c83a76b0SSuyog Pawar
461*c83a76b0SSuyog Pawarloop_start_chroma_right:
462*c83a76b0SSuyog Pawar    // pad size is assumed to be pad_left = 80
463*c83a76b0SSuyog Pawar    mov         x4,x0
464*c83a76b0SSuyog Pawar
465*c83a76b0SSuyog Pawar    ldrh        w8,[x0, #-2]
466*c83a76b0SSuyog Pawar    add         x0,x0,x1
467*c83a76b0SSuyog Pawar    ldrh        w9,[x0, #-2]
468*c83a76b0SSuyog Pawar    add         x0,x0,x1
469*c83a76b0SSuyog Pawar    ldrh        w10,[x0, #-2]
470*c83a76b0SSuyog Pawar    add         x0,x0,x1
471*c83a76b0SSuyog Pawar    ldrh        w11,[x0, #-2]
472*c83a76b0SSuyog Pawar    add         x0,x0,x1
473*c83a76b0SSuyog Pawar
474*c83a76b0SSuyog Pawar    dup         v0.8h,w8
475*c83a76b0SSuyog Pawar    dup         v2.8h,w9
476*c83a76b0SSuyog Pawar    dup         v4.8h,w10
477*c83a76b0SSuyog Pawar    dup         v6.8h,w11
478*c83a76b0SSuyog Pawar
479*c83a76b0SSuyog Pawar    add         x5,x4,x1
480*c83a76b0SSuyog Pawar
481*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           //128/8 = 16 bytes store
482*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
483*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
484*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4],#16           // 16 bytes store
485*c83a76b0SSuyog Pawar    st1         {v0.16b},[x4]               // 16 bytes store
486*c83a76b0SSuyog Pawar
487*c83a76b0SSuyog Pawar    add         x6,x5,x1
488*c83a76b0SSuyog Pawar
489*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
490*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
491*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
492*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5],#16           //128/8 = 16 bytes store
493*c83a76b0SSuyog Pawar    st1         {v2.16b},[x5]               //128/8 = 16 bytes store
494*c83a76b0SSuyog Pawar
495*c83a76b0SSuyog Pawar    add         x7,x6,x1
496*c83a76b0SSuyog Pawar
497*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
498*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
499*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
500*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6],#16           //128/8 = 16 bytes store
501*c83a76b0SSuyog Pawar    st1         {v4.16b},[x6]               //128/8 = 16 bytes store
502*c83a76b0SSuyog Pawar
503*c83a76b0SSuyog Pawar    subs        x2, x2,#4
504*c83a76b0SSuyog Pawar
505*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
506*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
507*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
508*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7],#16           //128/8 = 16 bytes store
509*c83a76b0SSuyog Pawar    st1         {v6.16b},[x7]               //128/8 = 16 bytes store
510*c83a76b0SSuyog Pawar
511*c83a76b0SSuyog Pawar    // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store
512*c83a76b0SSuyog Pawar
513*c83a76b0SSuyog Pawar    bne         loop_start_chroma_right
514*c83a76b0SSuyog Pawar
515*c83a76b0SSuyog Pawar    ret
516*c83a76b0SSuyog Pawar
517*c83a76b0SSuyog Pawar
518*c83a76b0SSuyog Pawar
519*c83a76b0SSuyog Pawar
520*c83a76b0SSuyog Pawar
521*c83a76b0SSuyog Pawar
522*c83a76b0SSuyog Pawar
523*c83a76b0SSuyog Pawar
524