1*c83a76b0SSuyog Pawar@/***************************************************************************** 2*c83a76b0SSuyog Pawar@* 3*c83a76b0SSuyog Pawar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4*c83a76b0SSuyog Pawar@* 5*c83a76b0SSuyog Pawar@* Licensed under the Apache License, Version 2.0 (the "License"); 6*c83a76b0SSuyog Pawar@* you may not use this file except in compliance with the License. 7*c83a76b0SSuyog Pawar@* You may obtain a copy of the License at: 8*c83a76b0SSuyog Pawar@* 9*c83a76b0SSuyog Pawar@* http://www.apache.org/licenses/LICENSE-2.0 10*c83a76b0SSuyog Pawar@* 11*c83a76b0SSuyog Pawar@* Unless required by applicable law or agreed to in writing, software 12*c83a76b0SSuyog Pawar@* distributed under the License is distributed on an "AS IS" BASIS, 13*c83a76b0SSuyog Pawar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*c83a76b0SSuyog Pawar@* See the License for the specific language governing permissions and 15*c83a76b0SSuyog Pawar@* limitations under the License. 16*c83a76b0SSuyog Pawar@* 17*c83a76b0SSuyog Pawar@*****************************************************************************/ 18*c83a76b0SSuyog Pawar@/** 19*c83a76b0SSuyog Pawar@ ******************************************************************************* 20*c83a76b0SSuyog Pawar@ * @file 21*c83a76b0SSuyog Pawar@ * ihevc_padding_neon.s 22*c83a76b0SSuyog Pawar@ * 23*c83a76b0SSuyog Pawar@ * @brief 24*c83a76b0SSuyog Pawar@ * contains function definitions padding 25*c83a76b0SSuyog Pawar@ * 26*c83a76b0SSuyog Pawar@ * @author 27*c83a76b0SSuyog Pawar@ * naveen sr 28*c83a76b0SSuyog Pawar@ * 29*c83a76b0SSuyog Pawar@ * @par list of functions: 30*c83a76b0SSuyog Pawar@ * - ihevc_pad_left_luma() 31*c83a76b0SSuyog Pawar@ * - ihevc_pad_left_chroma() 32*c83a76b0SSuyog Pawar@ * 33*c83a76b0SSuyog Pawar@ * @remarks 34*c83a76b0SSuyog Pawar@ * none 35*c83a76b0SSuyog Pawar@ * 36*c83a76b0SSuyog Pawar@ ******************************************************************************* 37*c83a76b0SSuyog Pawar@*/ 38*c83a76b0SSuyog Pawar 39*c83a76b0SSuyog Pawar@/** 40*c83a76b0SSuyog Pawar@******************************************************************************* 41*c83a76b0SSuyog Pawar@* 42*c83a76b0SSuyog Pawar@* @brief 43*c83a76b0SSuyog Pawar@* padding (luma block) at the left of a 2d array 44*c83a76b0SSuyog Pawar@* 45*c83a76b0SSuyog Pawar@* @par description: 46*c83a76b0SSuyog Pawar@* the left column of a 2d array is replicated for pad_size times at the left 47*c83a76b0SSuyog Pawar@* 48*c83a76b0SSuyog Pawar@* 49*c83a76b0SSuyog Pawar@* @param[in] pu1_src 50*c83a76b0SSuyog Pawar@* uword8 pointer to the source 51*c83a76b0SSuyog Pawar@* 52*c83a76b0SSuyog Pawar@* @param[in] src_strd 53*c83a76b0SSuyog Pawar@* integer source stride 54*c83a76b0SSuyog Pawar@* 55*c83a76b0SSuyog Pawar@* @param[in] ht 56*c83a76b0SSuyog Pawar@* integer height of the array 57*c83a76b0SSuyog Pawar@* 58*c83a76b0SSuyog Pawar@* @param[in] wd 59*c83a76b0SSuyog Pawar@* integer width of the array 60*c83a76b0SSuyog Pawar@* 61*c83a76b0SSuyog Pawar@* @param[in] pad_size 62*c83a76b0SSuyog Pawar@* integer -padding size of the array 63*c83a76b0SSuyog Pawar@* 64*c83a76b0SSuyog Pawar@* @param[in] ht 65*c83a76b0SSuyog Pawar@* integer height of the array 66*c83a76b0SSuyog Pawar@* 67*c83a76b0SSuyog Pawar@* @param[in] wd 68*c83a76b0SSuyog Pawar@* integer width of the array 69*c83a76b0SSuyog Pawar@* 70*c83a76b0SSuyog Pawar@* @returns 71*c83a76b0SSuyog Pawar@* 72*c83a76b0SSuyog Pawar@* @remarks 73*c83a76b0SSuyog Pawar@* none 74*c83a76b0SSuyog Pawar@* 75*c83a76b0SSuyog Pawar@******************************************************************************* 76*c83a76b0SSuyog Pawar@*/ 77*c83a76b0SSuyog Pawar@.if pad_left_luma == c 78*c83a76b0SSuyog Pawar@void ihevc_pad_left_luma(uword8 *pu1_src, 79*c83a76b0SSuyog Pawar@ word32 src_strd, 80*c83a76b0SSuyog Pawar@ word32 ht, 81*c83a76b0SSuyog Pawar@ word32 pad_size) 82*c83a76b0SSuyog Pawar@**************variables vs registers************************* 83*c83a76b0SSuyog Pawar@ r0 => *pu1_src 84*c83a76b0SSuyog Pawar@ r1 => src_strd 85*c83a76b0SSuyog Pawar@ r2 => ht 86*c83a76b0SSuyog Pawar@ r3 => pad_size 87*c83a76b0SSuyog Pawar 88*c83a76b0SSuyog Pawar.text 89*c83a76b0SSuyog Pawar.align 4 90*c83a76b0SSuyog Pawar 91*c83a76b0SSuyog Pawar 92*c83a76b0SSuyog Pawar 93*c83a76b0SSuyog Pawar 94*c83a76b0SSuyog Pawar.globl ihevc_pad_left_luma_a9q 95*c83a76b0SSuyog Pawar 96*c83a76b0SSuyog Pawar.type ihevc_pad_left_luma_a9q, %function 97*c83a76b0SSuyog Pawar 98*c83a76b0SSuyog Pawarihevc_pad_left_luma_a9q: 99*c83a76b0SSuyog Pawar 100*c83a76b0SSuyog Pawar stmfd sp!, {r4-r11,lr} @stack stores the values of the arguments 101*c83a76b0SSuyog Pawar 102*c83a76b0SSuyog Pawarloop_start_luma_left: 103*c83a76b0SSuyog Pawar @ pad size is assumed to be pad_left = 80 104*c83a76b0SSuyog Pawar sub r4,r0,r3 105*c83a76b0SSuyog Pawar 106*c83a76b0SSuyog Pawar ldrb r8,[r0] 107*c83a76b0SSuyog Pawar add r0,r1 108*c83a76b0SSuyog Pawar ldrb r9,[r0] 109*c83a76b0SSuyog Pawar add r0,r1 110*c83a76b0SSuyog Pawar ldrb r10,[r0] 111*c83a76b0SSuyog Pawar add r0,r1 112*c83a76b0SSuyog Pawar ldrb r11,[r0] 113*c83a76b0SSuyog Pawar add r0,r1 114*c83a76b0SSuyog Pawar 115*c83a76b0SSuyog Pawar vdup.u8 q0,r8 116*c83a76b0SSuyog Pawar vdup.u8 q1,r9 117*c83a76b0SSuyog Pawar vdup.u8 q2,r10 118*c83a76b0SSuyog Pawar vdup.u8 q3,r11 119*c83a76b0SSuyog Pawar 120*c83a76b0SSuyog Pawar add r5,r4,r1 121*c83a76b0SSuyog Pawar 122*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 123*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 124*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 125*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 126*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4] @ 16 bytes store 127*c83a76b0SSuyog Pawar 128*c83a76b0SSuyog Pawar add r6,r5,r1 129*c83a76b0SSuyog Pawar 130*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 131*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 132*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 133*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 134*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 135*c83a76b0SSuyog Pawar 136*c83a76b0SSuyog Pawar add r7,r6,r1 137*c83a76b0SSuyog Pawar 138*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 139*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 140*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 141*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 142*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 143*c83a76b0SSuyog Pawar 144*c83a76b0SSuyog Pawar subs r2,#4 145*c83a76b0SSuyog Pawar 146*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 147*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 148*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 149*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 150*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 151*c83a76b0SSuyog Pawar 152*c83a76b0SSuyog Pawar @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 153*c83a76b0SSuyog Pawar 154*c83a76b0SSuyog Pawar bne loop_start_luma_left 155*c83a76b0SSuyog Pawar 156*c83a76b0SSuyog Pawar ldmfd sp!,{r4-r11,pc} @reload the registers from sp 157*c83a76b0SSuyog Pawar 158*c83a76b0SSuyog Pawar 159*c83a76b0SSuyog Pawar 160*c83a76b0SSuyog Pawar 161*c83a76b0SSuyog Pawar 162*c83a76b0SSuyog Pawar@/** 163*c83a76b0SSuyog Pawar@******************************************************************************* 164*c83a76b0SSuyog Pawar@* 165*c83a76b0SSuyog Pawar@* @brief 166*c83a76b0SSuyog Pawar@* padding (chroma block) at the left of a 2d array 167*c83a76b0SSuyog Pawar@* 168*c83a76b0SSuyog Pawar@* @par description: 169*c83a76b0SSuyog Pawar@* the left column of a 2d array is replicated for pad_size times at the left 170*c83a76b0SSuyog Pawar@* 171*c83a76b0SSuyog Pawar@* 172*c83a76b0SSuyog Pawar@* @param[in] pu1_src 173*c83a76b0SSuyog Pawar@* uword8 pointer to the source 174*c83a76b0SSuyog Pawar@* 175*c83a76b0SSuyog Pawar@* @param[in] src_strd 176*c83a76b0SSuyog Pawar@* integer source stride 177*c83a76b0SSuyog Pawar@* 178*c83a76b0SSuyog Pawar@* @param[in] ht 179*c83a76b0SSuyog Pawar@* integer height of the array 180*c83a76b0SSuyog Pawar@* 181*c83a76b0SSuyog Pawar@* @param[in] wd 182*c83a76b0SSuyog Pawar@* integer width of the array (each colour component) 183*c83a76b0SSuyog Pawar@* 184*c83a76b0SSuyog Pawar@* @param[in] pad_size 185*c83a76b0SSuyog Pawar@* integer -padding size of the array 186*c83a76b0SSuyog Pawar@* 187*c83a76b0SSuyog Pawar@* @param[in] ht 188*c83a76b0SSuyog Pawar@* integer height of the array 189*c83a76b0SSuyog Pawar@* 190*c83a76b0SSuyog Pawar@* @param[in] wd 191*c83a76b0SSuyog Pawar@* integer width of the array 192*c83a76b0SSuyog Pawar@* 193*c83a76b0SSuyog Pawar@* @returns 194*c83a76b0SSuyog Pawar@* 195*c83a76b0SSuyog Pawar@* @remarks 196*c83a76b0SSuyog Pawar@* none 197*c83a76b0SSuyog Pawar@* 198*c83a76b0SSuyog Pawar@******************************************************************************* 199*c83a76b0SSuyog Pawar@*/ 200*c83a76b0SSuyog Pawar@.if pad_left_chroma == c 201*c83a76b0SSuyog Pawar@void ihevc_pad_left_chroma(uword8 *pu1_src, 202*c83a76b0SSuyog Pawar@ word32 src_strd, 203*c83a76b0SSuyog Pawar@ word32 ht, 204*c83a76b0SSuyog Pawar@ word32 pad_size) 205*c83a76b0SSuyog Pawar@{ 206*c83a76b0SSuyog Pawar@ r0 => *pu1_src 207*c83a76b0SSuyog Pawar@ r1 => src_strd 208*c83a76b0SSuyog Pawar@ r2 => ht 209*c83a76b0SSuyog Pawar@ r3 => pad_size 210*c83a76b0SSuyog Pawar 211*c83a76b0SSuyog Pawar 212*c83a76b0SSuyog Pawar 213*c83a76b0SSuyog Pawar.globl ihevc_pad_left_chroma_a9q 214*c83a76b0SSuyog Pawar 215*c83a76b0SSuyog Pawar.type ihevc_pad_left_chroma_a9q, %function 216*c83a76b0SSuyog Pawar 217*c83a76b0SSuyog Pawarihevc_pad_left_chroma_a9q: 218*c83a76b0SSuyog Pawar 219*c83a76b0SSuyog Pawar stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments 220*c83a76b0SSuyog Pawar 221*c83a76b0SSuyog Pawarloop_start_chroma_left: 222*c83a76b0SSuyog Pawar @ pad size is assumed to be pad_left = 80 223*c83a76b0SSuyog Pawar sub r4,r0,r3 224*c83a76b0SSuyog Pawar 225*c83a76b0SSuyog Pawar ldrh r8,[r0] 226*c83a76b0SSuyog Pawar add r0,r1 227*c83a76b0SSuyog Pawar ldrh r9,[r0] 228*c83a76b0SSuyog Pawar add r0,r1 229*c83a76b0SSuyog Pawar ldrh r10,[r0] 230*c83a76b0SSuyog Pawar add r0,r1 231*c83a76b0SSuyog Pawar ldrh r11,[r0] 232*c83a76b0SSuyog Pawar add r0,r1 233*c83a76b0SSuyog Pawar 234*c83a76b0SSuyog Pawar vdup.u16 q0,r8 235*c83a76b0SSuyog Pawar vdup.u16 q1,r9 236*c83a76b0SSuyog Pawar vdup.u16 q2,r10 237*c83a76b0SSuyog Pawar vdup.u16 q3,r11 238*c83a76b0SSuyog Pawar 239*c83a76b0SSuyog Pawar add r5,r4,r1 240*c83a76b0SSuyog Pawar 241*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 242*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 243*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 244*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 245*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4] @ 16 bytes store 246*c83a76b0SSuyog Pawar 247*c83a76b0SSuyog Pawar add r6,r5,r1 248*c83a76b0SSuyog Pawar 249*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 250*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 251*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 252*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 253*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 254*c83a76b0SSuyog Pawar 255*c83a76b0SSuyog Pawar add r7,r6,r1 256*c83a76b0SSuyog Pawar 257*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 258*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 259*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 260*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 261*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 262*c83a76b0SSuyog Pawar 263*c83a76b0SSuyog Pawar subs r2,#4 264*c83a76b0SSuyog Pawar 265*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 266*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 267*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 268*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 269*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 270*c83a76b0SSuyog Pawar 271*c83a76b0SSuyog Pawar @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 272*c83a76b0SSuyog Pawar 273*c83a76b0SSuyog Pawar bne loop_start_chroma_left 274*c83a76b0SSuyog Pawar 275*c83a76b0SSuyog Pawar ldmfd sp!,{r4-r11,pc} @reload the registers from sp 276*c83a76b0SSuyog Pawar 277*c83a76b0SSuyog Pawar 278*c83a76b0SSuyog Pawar 279*c83a76b0SSuyog Pawar 280*c83a76b0SSuyog Pawar 281*c83a76b0SSuyog Pawar@/** 282*c83a76b0SSuyog Pawar@******************************************************************************* 283*c83a76b0SSuyog Pawar@* 284*c83a76b0SSuyog Pawar@* @brief 285*c83a76b0SSuyog Pawar@* padding (luma block) at the right of a 2d array 286*c83a76b0SSuyog Pawar@* 287*c83a76b0SSuyog Pawar@* @par description: 288*c83a76b0SSuyog Pawar@* the right column of a 2d array is replicated for pad_size times at the right 289*c83a76b0SSuyog Pawar@* 290*c83a76b0SSuyog Pawar@* 291*c83a76b0SSuyog Pawar@* @param[in] pu1_src 292*c83a76b0SSuyog Pawar@* uword8 pointer to the source 293*c83a76b0SSuyog Pawar@* 294*c83a76b0SSuyog Pawar@* @param[in] src_strd 295*c83a76b0SSuyog Pawar@* integer source stride 296*c83a76b0SSuyog Pawar@* 297*c83a76b0SSuyog Pawar@* @param[in] ht 298*c83a76b0SSuyog Pawar@* integer height of the array 299*c83a76b0SSuyog Pawar@* 300*c83a76b0SSuyog Pawar@* @param[in] wd 301*c83a76b0SSuyog Pawar@* integer width of the array 302*c83a76b0SSuyog Pawar@* 303*c83a76b0SSuyog Pawar@* @param[in] pad_size 304*c83a76b0SSuyog Pawar@* integer -padding size of the array 305*c83a76b0SSuyog Pawar@* 306*c83a76b0SSuyog Pawar@* @param[in] ht 307*c83a76b0SSuyog Pawar@* integer height of the array 308*c83a76b0SSuyog Pawar@* 309*c83a76b0SSuyog Pawar@* @param[in] wd 310*c83a76b0SSuyog Pawar@* integer width of the array 311*c83a76b0SSuyog Pawar@* 312*c83a76b0SSuyog Pawar@* @returns 313*c83a76b0SSuyog Pawar@* 314*c83a76b0SSuyog Pawar@* @remarks 315*c83a76b0SSuyog Pawar@* none 316*c83a76b0SSuyog Pawar@* 317*c83a76b0SSuyog Pawar@******************************************************************************* 318*c83a76b0SSuyog Pawar@*/ 319*c83a76b0SSuyog Pawar@.if pad_right_luma == c 320*c83a76b0SSuyog Pawar@void ihevc_pad_right_luma(uword8 *pu1_src, 321*c83a76b0SSuyog Pawar@ word32 src_strd, 322*c83a76b0SSuyog Pawar@ word32 ht, 323*c83a76b0SSuyog Pawar@ word32 pad_size) 324*c83a76b0SSuyog Pawar@{ 325*c83a76b0SSuyog Pawar@ word32 row@ 326*c83a76b0SSuyog Pawar@ 327*c83a76b0SSuyog Pawar@ for(row = 0@ row < ht@ row++) 328*c83a76b0SSuyog Pawar@ { 329*c83a76b0SSuyog Pawar@ memset(pu1_src, *(pu1_src -1), pad_size)@ 330*c83a76b0SSuyog Pawar@ 331*c83a76b0SSuyog Pawar@ pu1_src += src_strd@ 332*c83a76b0SSuyog Pawar@ } 333*c83a76b0SSuyog Pawar@} 334*c83a76b0SSuyog Pawar@ 335*c83a76b0SSuyog Pawar@ r0 => *pu1_src 336*c83a76b0SSuyog Pawar@ r1 => src_strd 337*c83a76b0SSuyog Pawar@ r2 => ht 338*c83a76b0SSuyog Pawar@ r3 => pad_size 339*c83a76b0SSuyog Pawar 340*c83a76b0SSuyog Pawar 341*c83a76b0SSuyog Pawar 342*c83a76b0SSuyog Pawar.globl ihevc_pad_right_luma_a9q 343*c83a76b0SSuyog Pawar 344*c83a76b0SSuyog Pawar.type ihevc_pad_right_luma_a9q, %function 345*c83a76b0SSuyog Pawar 346*c83a76b0SSuyog Pawarihevc_pad_right_luma_a9q: 347*c83a76b0SSuyog Pawar 348*c83a76b0SSuyog Pawar stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments 349*c83a76b0SSuyog Pawar 350*c83a76b0SSuyog Pawarloop_start_luma_right: 351*c83a76b0SSuyog Pawar @ pad size is assumed to be pad_left = 80 352*c83a76b0SSuyog Pawar mov r4,r0 353*c83a76b0SSuyog Pawar 354*c83a76b0SSuyog Pawar ldrb r8,[r0, #-1] 355*c83a76b0SSuyog Pawar add r0,r1 356*c83a76b0SSuyog Pawar ldrb r9,[r0, #-1] 357*c83a76b0SSuyog Pawar add r0,r1 358*c83a76b0SSuyog Pawar ldrb r10,[r0, #-1] 359*c83a76b0SSuyog Pawar add r0,r1 360*c83a76b0SSuyog Pawar ldrb r11,[r0, #-1] 361*c83a76b0SSuyog Pawar add r0,r1 362*c83a76b0SSuyog Pawar 363*c83a76b0SSuyog Pawar add r5,r4,r1 364*c83a76b0SSuyog Pawar add r6,r5,r1 365*c83a76b0SSuyog Pawar add r7,r6,r1 366*c83a76b0SSuyog Pawar 367*c83a76b0SSuyog Pawar vdup.u8 q0,r8 368*c83a76b0SSuyog Pawar vdup.u8 q1,r9 369*c83a76b0SSuyog Pawar vdup.u8 q2,r10 370*c83a76b0SSuyog Pawar vdup.u8 q3,r11 371*c83a76b0SSuyog Pawar 372*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 373*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 374*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 375*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 376*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4] @ 16 bytes store 377*c83a76b0SSuyog Pawar 378*c83a76b0SSuyog Pawar 379*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 380*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 381*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 382*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 383*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 384*c83a76b0SSuyog Pawar 385*c83a76b0SSuyog Pawar subs r2,#4 386*c83a76b0SSuyog Pawar 387*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 388*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 389*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 390*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 391*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 392*c83a76b0SSuyog Pawar 393*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 394*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 395*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 396*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 397*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store 398*c83a76b0SSuyog Pawar 399*c83a76b0SSuyog Pawar 400*c83a76b0SSuyog Pawar @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 401*c83a76b0SSuyog Pawar 402*c83a76b0SSuyog Pawar 403*c83a76b0SSuyog Pawar bne loop_start_luma_right 404*c83a76b0SSuyog Pawar 405*c83a76b0SSuyog Pawar ldmfd sp!,{r4-r11,pc} @reload the registers from sp 406*c83a76b0SSuyog Pawar 407*c83a76b0SSuyog Pawar 408*c83a76b0SSuyog Pawar 409*c83a76b0SSuyog Pawar 410*c83a76b0SSuyog Pawar 411*c83a76b0SSuyog Pawar@/** 412*c83a76b0SSuyog Pawar@******************************************************************************* 413*c83a76b0SSuyog Pawar@* 414*c83a76b0SSuyog Pawar@* @brief 415*c83a76b0SSuyog Pawar@@* padding (chroma block) at the right of a 2d array 416*c83a76b0SSuyog Pawar@* 417*c83a76b0SSuyog Pawar@* @par description: 418*c83a76b0SSuyog Pawar@* the right column of a 2d array is replicated for pad_size times at the right 419*c83a76b0SSuyog Pawar@* 420*c83a76b0SSuyog Pawar@* 421*c83a76b0SSuyog Pawar@* @param[in] pu1_src 422*c83a76b0SSuyog Pawar@@* uword8 pointer to the source 423*c83a76b0SSuyog Pawar@* 424*c83a76b0SSuyog Pawar@* @param[in] src_strd 425*c83a76b0SSuyog Pawar@* integer source stride 426*c83a76b0SSuyog Pawar@* 427*c83a76b0SSuyog Pawar@* @param[in] ht 428*c83a76b0SSuyog Pawar@@* integer height of the array 429*c83a76b0SSuyog Pawar@* 430*c83a76b0SSuyog Pawar@* @param[in] wd 431*c83a76b0SSuyog Pawar@* integer width of the array (each colour component) 432*c83a76b0SSuyog Pawar@* 433*c83a76b0SSuyog Pawar@* @param[in] pad_size 434*c83a76b0SSuyog Pawar@* integer -padding size of the array 435*c83a76b0SSuyog Pawar@* 436*c83a76b0SSuyog Pawar@* @param[in] ht 437*c83a76b0SSuyog Pawar@@* integer height of the array 438*c83a76b0SSuyog Pawar@* 439*c83a76b0SSuyog Pawar@* @param[in] wd 440*c83a76b0SSuyog Pawar@* integer width of the array 441*c83a76b0SSuyog Pawar@* 442*c83a76b0SSuyog Pawar@* @returns 443*c83a76b0SSuyog Pawar@* 444*c83a76b0SSuyog Pawar@* @remarks 445*c83a76b0SSuyog Pawar@* none 446*c83a76b0SSuyog Pawar@* 447*c83a76b0SSuyog Pawar@******************************************************************************* 448*c83a76b0SSuyog Pawar@*/ 449*c83a76b0SSuyog Pawar@.if pad_right_chroma == c 450*c83a76b0SSuyog Pawar@void ihevc_pad_right_chroma(uword8 *pu1_src, 451*c83a76b0SSuyog Pawar@ word32 src_strd, 452*c83a76b0SSuyog Pawar@ word32 ht, 453*c83a76b0SSuyog Pawar@ word32 pad_size) 454*c83a76b0SSuyog Pawar@ r0 => *pu1_src 455*c83a76b0SSuyog Pawar@ r1 => src_strd 456*c83a76b0SSuyog Pawar@ r2 => ht 457*c83a76b0SSuyog Pawar@ r3 => pad_size 458*c83a76b0SSuyog Pawar 459*c83a76b0SSuyog Pawar 460*c83a76b0SSuyog Pawar 461*c83a76b0SSuyog Pawar.globl ihevc_pad_right_chroma_a9q 462*c83a76b0SSuyog Pawar 463*c83a76b0SSuyog Pawar.type ihevc_pad_right_chroma_a9q, %function 464*c83a76b0SSuyog Pawar 465*c83a76b0SSuyog Pawarihevc_pad_right_chroma_a9q: 466*c83a76b0SSuyog Pawar 467*c83a76b0SSuyog Pawar stmfd sp!, {r4-r11, lr} @stack stores the values of the arguments 468*c83a76b0SSuyog Pawar 469*c83a76b0SSuyog Pawarloop_start_chroma_right: 470*c83a76b0SSuyog Pawar @ pad size is assumed to be pad_left = 80 471*c83a76b0SSuyog Pawar mov r4,r0 472*c83a76b0SSuyog Pawar 473*c83a76b0SSuyog Pawar ldrh r8,[r0, #-2] 474*c83a76b0SSuyog Pawar add r0,r1 475*c83a76b0SSuyog Pawar ldrh r9,[r0, #-2] 476*c83a76b0SSuyog Pawar add r0,r1 477*c83a76b0SSuyog Pawar ldrh r10,[r0, #-2] 478*c83a76b0SSuyog Pawar add r0,r1 479*c83a76b0SSuyog Pawar ldrh r11,[r0, #-2] 480*c83a76b0SSuyog Pawar add r0,r1 481*c83a76b0SSuyog Pawar 482*c83a76b0SSuyog Pawar vdup.u16 q0,r8 483*c83a76b0SSuyog Pawar vdup.u16 q1,r9 484*c83a76b0SSuyog Pawar vdup.u16 q2,r10 485*c83a76b0SSuyog Pawar vdup.u16 q3,r11 486*c83a76b0SSuyog Pawar 487*c83a76b0SSuyog Pawar add r5,r4,r1 488*c83a76b0SSuyog Pawar 489*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @128/8 = 16 bytes store 490*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 491*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 492*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4]! @ 16 bytes store 493*c83a76b0SSuyog Pawar vst1.8 {d0,d1},[r4] @ 16 bytes store 494*c83a76b0SSuyog Pawar 495*c83a76b0SSuyog Pawar add r6,r5,r1 496*c83a76b0SSuyog Pawar 497*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 498*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 499*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 500*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5]! @128/8 = 16 bytes store 501*c83a76b0SSuyog Pawar vst1.8 {d2,d3},[r5] @128/8 = 16 bytes store 502*c83a76b0SSuyog Pawar 503*c83a76b0SSuyog Pawar add r7,r6,r1 504*c83a76b0SSuyog Pawar 505*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 506*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 507*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 508*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6]! @128/8 = 16 bytes store 509*c83a76b0SSuyog Pawar vst1.8 {d4,d5},[r6] @128/8 = 16 bytes store 510*c83a76b0SSuyog Pawar 511*c83a76b0SSuyog Pawar subs r2,#4 512*c83a76b0SSuyog Pawar 513*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 514*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 515*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 516*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7]! @128/8 = 16 bytes store 517*c83a76b0SSuyog Pawar vst1.8 {d6,d7},[r7] @128/8 = 16 bytes store 518*c83a76b0SSuyog Pawar 519*c83a76b0SSuyog Pawar @ total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 520*c83a76b0SSuyog Pawar 521*c83a76b0SSuyog Pawar bne loop_start_chroma_right 522*c83a76b0SSuyog Pawar 523*c83a76b0SSuyog Pawar ldmfd sp!,{r4-r11,pc} @reload the registers from sp 524*c83a76b0SSuyog Pawar 525*c83a76b0SSuyog Pawar 526*c83a76b0SSuyog Pawar 527*c83a76b0SSuyog Pawar 528*c83a76b0SSuyog Pawar 529*c83a76b0SSuyog Pawar 530*c83a76b0SSuyog Pawar 531*c83a76b0SSuyog Pawar 532