1*c83a76b0SSuyog Pawar///***************************************************************************** 2*c83a76b0SSuyog Pawar//* 3*c83a76b0SSuyog Pawar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4*c83a76b0SSuyog Pawar//* 5*c83a76b0SSuyog Pawar//* Licensed under the Apache License, Version 2.0 (the "License"); 6*c83a76b0SSuyog Pawar//* you may not use this file except in compliance with the License. 7*c83a76b0SSuyog Pawar//* You may obtain a copy of the License at: 8*c83a76b0SSuyog Pawar//* 9*c83a76b0SSuyog Pawar//* http://www.apache.org/licenses/LICENSE-2.0 10*c83a76b0SSuyog Pawar//* 11*c83a76b0SSuyog Pawar//* Unless required by applicable law or agreed to in writing, software 12*c83a76b0SSuyog Pawar//* distributed under the License is distributed on an "AS IS" BASIS, 13*c83a76b0SSuyog Pawar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*c83a76b0SSuyog Pawar//* See the License for the specific language governing permissions and 15*c83a76b0SSuyog Pawar//* limitations under the License. 16*c83a76b0SSuyog Pawar//* 17*c83a76b0SSuyog Pawar//*****************************************************************************/ 18*c83a76b0SSuyog Pawar///** 19*c83a76b0SSuyog Pawar// ******************************************************************************* 20*c83a76b0SSuyog Pawar// * //file 21*c83a76b0SSuyog Pawar// * ihevc_padding_neon.s 22*c83a76b0SSuyog Pawar// * 23*c83a76b0SSuyog Pawar// * //brief 24*c83a76b0SSuyog Pawar// * contains function definitions padding 25*c83a76b0SSuyog Pawar// * 26*c83a76b0SSuyog Pawar// * //author 27*c83a76b0SSuyog Pawar// * naveen sr 28*c83a76b0SSuyog Pawar// * 29*c83a76b0SSuyog Pawar// * //par list of functions: 30*c83a76b0SSuyog Pawar// * - ihevc_pad_left_luma() 31*c83a76b0SSuyog Pawar// * - ihevc_pad_left_chroma() 32*c83a76b0SSuyog Pawar// * 33*c83a76b0SSuyog Pawar// * //remarks 34*c83a76b0SSuyog Pawar// * none 35*c83a76b0SSuyog Pawar// * 36*c83a76b0SSuyog Pawar// ******************************************************************************* 37*c83a76b0SSuyog Pawar//*/ 38*c83a76b0SSuyog Pawar 39*c83a76b0SSuyog Pawar///** 40*c83a76b0SSuyog Pawar//******************************************************************************* 41*c83a76b0SSuyog Pawar//* 42*c83a76b0SSuyog Pawar//* //brief 43*c83a76b0SSuyog Pawar//* padding (luma block) at the left of a 2d array 44*c83a76b0SSuyog Pawar//* 45*c83a76b0SSuyog Pawar//* //par description: 46*c83a76b0SSuyog Pawar//* the left column of a 2d array is replicated for pad_size times at the left 47*c83a76b0SSuyog Pawar//* 48*c83a76b0SSuyog Pawar//* 49*c83a76b0SSuyog Pawar//* //param[in] pu1_src 50*c83a76b0SSuyog Pawar//* uword8 pointer to the source 51*c83a76b0SSuyog Pawar//* 52*c83a76b0SSuyog Pawar//* //param[in] src_strd 53*c83a76b0SSuyog Pawar//* integer source stride 54*c83a76b0SSuyog Pawar//* 55*c83a76b0SSuyog Pawar//* //param[in] ht 56*c83a76b0SSuyog Pawar//* integer height of the array 57*c83a76b0SSuyog Pawar//* 58*c83a76b0SSuyog Pawar//* //param[in] wd 59*c83a76b0SSuyog Pawar//* integer width of the array 60*c83a76b0SSuyog Pawar//* 61*c83a76b0SSuyog Pawar//* //param[in] pad_size 62*c83a76b0SSuyog Pawar//* integer -padding size of the array 63*c83a76b0SSuyog Pawar//* 64*c83a76b0SSuyog Pawar//* //param[in] ht 65*c83a76b0SSuyog Pawar//* integer height of the array 66*c83a76b0SSuyog Pawar//* 67*c83a76b0SSuyog Pawar//* //param[in] wd 68*c83a76b0SSuyog Pawar//* integer width of the array 69*c83a76b0SSuyog Pawar//* 70*c83a76b0SSuyog Pawar//* //returns 71*c83a76b0SSuyog Pawar//* 72*c83a76b0SSuyog Pawar//* //remarks 73*c83a76b0SSuyog Pawar//* none 74*c83a76b0SSuyog Pawar//* 75*c83a76b0SSuyog Pawar//******************************************************************************* 76*c83a76b0SSuyog Pawar//*/ 77*c83a76b0SSuyog Pawar//.if pad_left_luma == c 78*c83a76b0SSuyog Pawar//void ihevc_pad_left_luma(uword8 *pu1_src, 79*c83a76b0SSuyog Pawar// word32 src_strd, 80*c83a76b0SSuyog Pawar// word32 ht, 81*c83a76b0SSuyog Pawar// word32 pad_size) 82*c83a76b0SSuyog Pawar//**************variables vs registers************************* 83*c83a76b0SSuyog Pawar// x0 => *pu1_src 84*c83a76b0SSuyog Pawar// x1 => src_strd 85*c83a76b0SSuyog Pawar// x2 => ht 86*c83a76b0SSuyog Pawar// x3 => pad_size 87*c83a76b0SSuyog Pawar 88*c83a76b0SSuyog Pawar.text 89*c83a76b0SSuyog Pawar.align 4 90*c83a76b0SSuyog Pawar 91*c83a76b0SSuyog Pawar.globl ihevc_pad_left_luma_av8 92*c83a76b0SSuyog Pawar 93*c83a76b0SSuyog Pawar.type ihevc_pad_left_luma_av8, %function 94*c83a76b0SSuyog Pawar 95*c83a76b0SSuyog Pawarihevc_pad_left_luma_av8: 96*c83a76b0SSuyog Pawar 97*c83a76b0SSuyog Pawarloop_start_luma_left: 98*c83a76b0SSuyog Pawar // pad size is assumed to be pad_left = 80 99*c83a76b0SSuyog Pawar sub x4,x0,x3 100*c83a76b0SSuyog Pawar 101*c83a76b0SSuyog Pawar ldrb w8,[x0] 102*c83a76b0SSuyog Pawar add x0,x0,x1 103*c83a76b0SSuyog Pawar ldrb w9,[x0] 104*c83a76b0SSuyog Pawar add x0,x0,x1 105*c83a76b0SSuyog Pawar ldrb w10,[x0] 106*c83a76b0SSuyog Pawar add x0,x0,x1 107*c83a76b0SSuyog Pawar ldrb w11,[x0] 108*c83a76b0SSuyog Pawar add x0,x0,x1 109*c83a76b0SSuyog Pawar 110*c83a76b0SSuyog Pawar dup v0.16b,w8 111*c83a76b0SSuyog Pawar dup v2.16b,w9 112*c83a76b0SSuyog Pawar dup v4.16b,w10 113*c83a76b0SSuyog Pawar dup v6.16b,w11 114*c83a76b0SSuyog Pawar 115*c83a76b0SSuyog Pawar add x5,x4,x1 116*c83a76b0SSuyog Pawar 117*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 //128/8 = 16 bytes store 118*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 119*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 120*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 121*c83a76b0SSuyog Pawar st1 {v0.16b},[x4] // 16 bytes store 122*c83a76b0SSuyog Pawar 123*c83a76b0SSuyog Pawar add x6,x5,x1 124*c83a76b0SSuyog Pawar 125*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 126*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 127*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 128*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 129*c83a76b0SSuyog Pawar st1 {v2.16b},[x5] //128/8 = 16 bytes store 130*c83a76b0SSuyog Pawar 131*c83a76b0SSuyog Pawar add x7,x6,x1 132*c83a76b0SSuyog Pawar 133*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 134*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 135*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 136*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 137*c83a76b0SSuyog Pawar st1 {v4.16b},[x6] //128/8 = 16 bytes store 138*c83a76b0SSuyog Pawar 139*c83a76b0SSuyog Pawar subs x2, x2,#4 140*c83a76b0SSuyog Pawar 141*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 142*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 143*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 144*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 145*c83a76b0SSuyog Pawar st1 {v6.16b},[x7] //128/8 = 16 bytes store 146*c83a76b0SSuyog Pawar 147*c83a76b0SSuyog Pawar // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 148*c83a76b0SSuyog Pawar 149*c83a76b0SSuyog Pawar bne loop_start_luma_left 150*c83a76b0SSuyog Pawar 151*c83a76b0SSuyog Pawar ret 152*c83a76b0SSuyog Pawar 153*c83a76b0SSuyog Pawar 154*c83a76b0SSuyog Pawar 155*c83a76b0SSuyog Pawar 156*c83a76b0SSuyog Pawar 157*c83a76b0SSuyog Pawar///** 158*c83a76b0SSuyog Pawar//******************************************************************************* 159*c83a76b0SSuyog Pawar//* 160*c83a76b0SSuyog Pawar//* //brief 161*c83a76b0SSuyog Pawar//* padding (chroma block) at the left of a 2d array 162*c83a76b0SSuyog Pawar//* 163*c83a76b0SSuyog Pawar//* //par description: 164*c83a76b0SSuyog Pawar//* the left column of a 2d array is replicated for pad_size times at the left 165*c83a76b0SSuyog Pawar//* 166*c83a76b0SSuyog Pawar//* 167*c83a76b0SSuyog Pawar//* //param[in] pu1_src 168*c83a76b0SSuyog Pawar//* uword8 pointer to the source 169*c83a76b0SSuyog Pawar//* 170*c83a76b0SSuyog Pawar//* //param[in] src_strd 171*c83a76b0SSuyog Pawar//* integer source stride 172*c83a76b0SSuyog Pawar//* 173*c83a76b0SSuyog Pawar//* //param[in] ht 174*c83a76b0SSuyog Pawar//* integer height of the array 175*c83a76b0SSuyog Pawar//* 176*c83a76b0SSuyog Pawar//* //param[in] wd 177*c83a76b0SSuyog Pawar//* integer width of the array (each colour component) 178*c83a76b0SSuyog Pawar//* 179*c83a76b0SSuyog Pawar//* //param[in] pad_size 180*c83a76b0SSuyog Pawar//* integer -padding size of the array 181*c83a76b0SSuyog Pawar//* 182*c83a76b0SSuyog Pawar//* //param[in] ht 183*c83a76b0SSuyog Pawar//* integer height of the array 184*c83a76b0SSuyog Pawar//* 185*c83a76b0SSuyog Pawar//* //param[in] wd 186*c83a76b0SSuyog Pawar//* integer width of the array 187*c83a76b0SSuyog Pawar//* 188*c83a76b0SSuyog Pawar//* //returns 189*c83a76b0SSuyog Pawar//* 190*c83a76b0SSuyog Pawar//* //remarks 191*c83a76b0SSuyog Pawar//* none 192*c83a76b0SSuyog Pawar//* 193*c83a76b0SSuyog Pawar//******************************************************************************* 194*c83a76b0SSuyog Pawar//*/ 195*c83a76b0SSuyog Pawar//.if pad_left_chroma == c 196*c83a76b0SSuyog Pawar//void ihevc_pad_left_chroma(uword8 *pu1_src, 197*c83a76b0SSuyog Pawar// word32 src_strd, 198*c83a76b0SSuyog Pawar// word32 ht, 199*c83a76b0SSuyog Pawar// word32 pad_size) 200*c83a76b0SSuyog Pawar//{ 201*c83a76b0SSuyog Pawar// x0 => *pu1_src 202*c83a76b0SSuyog Pawar// x1 => src_strd 203*c83a76b0SSuyog Pawar// x2 => ht 204*c83a76b0SSuyog Pawar// x3 => pad_size 205*c83a76b0SSuyog Pawar 206*c83a76b0SSuyog Pawar 207*c83a76b0SSuyog Pawar 208*c83a76b0SSuyog Pawar.globl ihevc_pad_left_chroma_av8 209*c83a76b0SSuyog Pawar 210*c83a76b0SSuyog Pawar.type ihevc_pad_left_chroma_av8, %function 211*c83a76b0SSuyog Pawar 212*c83a76b0SSuyog Pawarihevc_pad_left_chroma_av8: 213*c83a76b0SSuyog Pawar 214*c83a76b0SSuyog Pawar 215*c83a76b0SSuyog Pawarloop_start_chroma_left: 216*c83a76b0SSuyog Pawar // pad size is assumed to be pad_left = 80 217*c83a76b0SSuyog Pawar sub x4,x0,x3 218*c83a76b0SSuyog Pawar 219*c83a76b0SSuyog Pawar ldrh w8,[x0] 220*c83a76b0SSuyog Pawar add x0,x0,x1 221*c83a76b0SSuyog Pawar ldrh w9,[x0] 222*c83a76b0SSuyog Pawar add x0,x0,x1 223*c83a76b0SSuyog Pawar ldrh w10,[x0] 224*c83a76b0SSuyog Pawar add x0,x0,x1 225*c83a76b0SSuyog Pawar ldrh w11,[x0] 226*c83a76b0SSuyog Pawar add x0,x0,x1 227*c83a76b0SSuyog Pawar 228*c83a76b0SSuyog Pawar dup v0.8h,w8 229*c83a76b0SSuyog Pawar dup v2.8h,w9 230*c83a76b0SSuyog Pawar dup v4.8h,w10 231*c83a76b0SSuyog Pawar dup v6.8h,w11 232*c83a76b0SSuyog Pawar 233*c83a76b0SSuyog Pawar add x5,x4,x1 234*c83a76b0SSuyog Pawar 235*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 //128/8 = 16 bytes store 236*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 237*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 238*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 239*c83a76b0SSuyog Pawar st1 {v0.16b},[x4] // 16 bytes store 240*c83a76b0SSuyog Pawar 241*c83a76b0SSuyog Pawar add x6,x5,x1 242*c83a76b0SSuyog Pawar 243*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 244*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 245*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 246*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 247*c83a76b0SSuyog Pawar st1 {v2.16b},[x5] //128/8 = 16 bytes store 248*c83a76b0SSuyog Pawar 249*c83a76b0SSuyog Pawar add x7,x6,x1 250*c83a76b0SSuyog Pawar 251*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 252*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 253*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 254*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 255*c83a76b0SSuyog Pawar st1 {v4.16b},[x6] //128/8 = 16 bytes store 256*c83a76b0SSuyog Pawar 257*c83a76b0SSuyog Pawar subs x2, x2,#4 258*c83a76b0SSuyog Pawar 259*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 260*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 261*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 262*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 263*c83a76b0SSuyog Pawar st1 {v6.16b},[x7] //128/8 = 16 bytes store 264*c83a76b0SSuyog Pawar 265*c83a76b0SSuyog Pawar // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 266*c83a76b0SSuyog Pawar 267*c83a76b0SSuyog Pawar bne loop_start_chroma_left 268*c83a76b0SSuyog Pawar 269*c83a76b0SSuyog Pawar ret 270*c83a76b0SSuyog Pawar 271*c83a76b0SSuyog Pawar 272*c83a76b0SSuyog Pawar 273*c83a76b0SSuyog Pawar 274*c83a76b0SSuyog Pawar 275*c83a76b0SSuyog Pawar///** 276*c83a76b0SSuyog Pawar//******************************************************************************* 277*c83a76b0SSuyog Pawar//* 278*c83a76b0SSuyog Pawar//* //brief 279*c83a76b0SSuyog Pawar//* padding (luma block) at the right of a 2d array 280*c83a76b0SSuyog Pawar//* 281*c83a76b0SSuyog Pawar//* //par description: 282*c83a76b0SSuyog Pawar//* the right column of a 2d array is replicated for pad_size times at the right 283*c83a76b0SSuyog Pawar//* 284*c83a76b0SSuyog Pawar//* 285*c83a76b0SSuyog Pawar//* //param[in] pu1_src 286*c83a76b0SSuyog Pawar//* uword8 pointer to the source 287*c83a76b0SSuyog Pawar//* 288*c83a76b0SSuyog Pawar//* //param[in] src_strd 289*c83a76b0SSuyog Pawar//* integer source stride 290*c83a76b0SSuyog Pawar//* 291*c83a76b0SSuyog Pawar//* //param[in] ht 292*c83a76b0SSuyog Pawar//* integer height of the array 293*c83a76b0SSuyog Pawar//* 294*c83a76b0SSuyog Pawar//* //param[in] wd 295*c83a76b0SSuyog Pawar//* integer width of the array 296*c83a76b0SSuyog Pawar//* 297*c83a76b0SSuyog Pawar//* //param[in] pad_size 298*c83a76b0SSuyog Pawar//* integer -padding size of the array 299*c83a76b0SSuyog Pawar//* 300*c83a76b0SSuyog Pawar//* //param[in] ht 301*c83a76b0SSuyog Pawar//* integer height of the array 302*c83a76b0SSuyog Pawar//* 303*c83a76b0SSuyog Pawar//* //param[in] wd 304*c83a76b0SSuyog Pawar//* integer width of the array 305*c83a76b0SSuyog Pawar//* 306*c83a76b0SSuyog Pawar//* //returns 307*c83a76b0SSuyog Pawar//* 308*c83a76b0SSuyog Pawar//* //remarks 309*c83a76b0SSuyog Pawar//* none 310*c83a76b0SSuyog Pawar//* 311*c83a76b0SSuyog Pawar//******************************************************************************* 312*c83a76b0SSuyog Pawar//*/ 313*c83a76b0SSuyog Pawar//.if pad_right_luma == c 314*c83a76b0SSuyog Pawar//void ihevc_pad_right_luma(uword8 *pu1_src, 315*c83a76b0SSuyog Pawar// word32 src_strd, 316*c83a76b0SSuyog Pawar// word32 ht, 317*c83a76b0SSuyog Pawar// word32 pad_size) 318*c83a76b0SSuyog Pawar//{ 319*c83a76b0SSuyog Pawar// word32 row// 320*c83a76b0SSuyog Pawar// 321*c83a76b0SSuyog Pawar// for(row = 0// row < ht// row++) 322*c83a76b0SSuyog Pawar// { 323*c83a76b0SSuyog Pawar// memset(pu1_src, *(pu1_src -1), pad_size)// 324*c83a76b0SSuyog Pawar// 325*c83a76b0SSuyog Pawar// pu1_src += src_strd// 326*c83a76b0SSuyog Pawar// } 327*c83a76b0SSuyog Pawar//} 328*c83a76b0SSuyog Pawar// 329*c83a76b0SSuyog Pawar// x0 => *pu1_src 330*c83a76b0SSuyog Pawar// x1 => src_strd 331*c83a76b0SSuyog Pawar// x2 => ht 332*c83a76b0SSuyog Pawar// x3 => pad_size 333*c83a76b0SSuyog Pawar 334*c83a76b0SSuyog Pawar 335*c83a76b0SSuyog Pawar 336*c83a76b0SSuyog Pawar.globl ihevc_pad_right_luma_av8 337*c83a76b0SSuyog Pawar 338*c83a76b0SSuyog Pawar.type ihevc_pad_right_luma_av8, %function 339*c83a76b0SSuyog Pawar 340*c83a76b0SSuyog Pawarihevc_pad_right_luma_av8: 341*c83a76b0SSuyog Pawar 342*c83a76b0SSuyog Pawar 343*c83a76b0SSuyog Pawarloop_start_luma_right: 344*c83a76b0SSuyog Pawar // pad size is assumed to be pad_left = 80 345*c83a76b0SSuyog Pawar mov x4,x0 346*c83a76b0SSuyog Pawar 347*c83a76b0SSuyog Pawar ldrb w8,[x0, #-1] 348*c83a76b0SSuyog Pawar add x0,x0,x1 349*c83a76b0SSuyog Pawar ldrb w9,[x0, #-1] 350*c83a76b0SSuyog Pawar add x0,x0,x1 351*c83a76b0SSuyog Pawar ldrb w10,[x0, #-1] 352*c83a76b0SSuyog Pawar add x0,x0,x1 353*c83a76b0SSuyog Pawar ldrb w11,[x0, #-1] 354*c83a76b0SSuyog Pawar add x0,x0,x1 355*c83a76b0SSuyog Pawar 356*c83a76b0SSuyog Pawar add x5,x4,x1 357*c83a76b0SSuyog Pawar add x6,x5,x1 358*c83a76b0SSuyog Pawar add x7,x6,x1 359*c83a76b0SSuyog Pawar 360*c83a76b0SSuyog Pawar dup v0.16b,w8 361*c83a76b0SSuyog Pawar dup v2.16b,w9 362*c83a76b0SSuyog Pawar dup v4.16b,w10 363*c83a76b0SSuyog Pawar dup v6.16b,w11 364*c83a76b0SSuyog Pawar 365*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 //128/8 = 16 bytes store 366*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 367*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 368*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 369*c83a76b0SSuyog Pawar st1 {v0.16b},[x4] // 16 bytes store 370*c83a76b0SSuyog Pawar 371*c83a76b0SSuyog Pawar 372*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 373*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 374*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 375*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 376*c83a76b0SSuyog Pawar st1 {v2.16b},[x5] //128/8 = 16 bytes store 377*c83a76b0SSuyog Pawar 378*c83a76b0SSuyog Pawar subs x2, x2,#4 379*c83a76b0SSuyog Pawar 380*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 381*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 382*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 383*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 384*c83a76b0SSuyog Pawar st1 {v4.16b},[x6] //128/8 = 16 bytes store 385*c83a76b0SSuyog Pawar 386*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 387*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 388*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 389*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 390*c83a76b0SSuyog Pawar st1 {v6.16b},[x7] //128/8 = 16 bytes store 391*c83a76b0SSuyog Pawar 392*c83a76b0SSuyog Pawar 393*c83a76b0SSuyog Pawar // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 394*c83a76b0SSuyog Pawar 395*c83a76b0SSuyog Pawar 396*c83a76b0SSuyog Pawar bne loop_start_luma_right 397*c83a76b0SSuyog Pawar 398*c83a76b0SSuyog Pawar ret 399*c83a76b0SSuyog Pawar 400*c83a76b0SSuyog Pawar 401*c83a76b0SSuyog Pawar 402*c83a76b0SSuyog Pawar 403*c83a76b0SSuyog Pawar 404*c83a76b0SSuyog Pawar///** 405*c83a76b0SSuyog Pawar//******************************************************************************* 406*c83a76b0SSuyog Pawar//* 407*c83a76b0SSuyog Pawar//* //brief 408*c83a76b0SSuyog Pawar////* padding (chroma block) at the right of a 2d array 409*c83a76b0SSuyog Pawar//* 410*c83a76b0SSuyog Pawar//* //par description: 411*c83a76b0SSuyog Pawar//* the right column of a 2d array is replicated for pad_size times at the right 412*c83a76b0SSuyog Pawar//* 413*c83a76b0SSuyog Pawar//* 414*c83a76b0SSuyog Pawar//* //param[in] pu1_src 415*c83a76b0SSuyog Pawar////* uword8 pointer to the source 416*c83a76b0SSuyog Pawar//* 417*c83a76b0SSuyog Pawar//* //param[in] src_strd 418*c83a76b0SSuyog Pawar//* integer source stride 419*c83a76b0SSuyog Pawar//* 420*c83a76b0SSuyog Pawar//* //param[in] ht 421*c83a76b0SSuyog Pawar////* integer height of the array 422*c83a76b0SSuyog Pawar//* 423*c83a76b0SSuyog Pawar//* //param[in] wd 424*c83a76b0SSuyog Pawar//* integer width of the array (each colour component) 425*c83a76b0SSuyog Pawar//* 426*c83a76b0SSuyog Pawar//* //param[in] pad_size 427*c83a76b0SSuyog Pawar//* integer -padding size of the array 428*c83a76b0SSuyog Pawar//* 429*c83a76b0SSuyog Pawar//* //param[in] ht 430*c83a76b0SSuyog Pawar////* integer height of the array 431*c83a76b0SSuyog Pawar//* 432*c83a76b0SSuyog Pawar//* //param[in] wd 433*c83a76b0SSuyog Pawar//* integer width of the array 434*c83a76b0SSuyog Pawar//* 435*c83a76b0SSuyog Pawar//* //returns 436*c83a76b0SSuyog Pawar//* 437*c83a76b0SSuyog Pawar//* //remarks 438*c83a76b0SSuyog Pawar//* none 439*c83a76b0SSuyog Pawar//* 440*c83a76b0SSuyog Pawar//******************************************************************************* 441*c83a76b0SSuyog Pawar//*/ 442*c83a76b0SSuyog Pawar//.if pad_right_chroma == c 443*c83a76b0SSuyog Pawar//void ihevc_pad_right_chroma(uword8 *pu1_src, 444*c83a76b0SSuyog Pawar// word32 src_strd, 445*c83a76b0SSuyog Pawar// word32 ht, 446*c83a76b0SSuyog Pawar// word32 pad_size) 447*c83a76b0SSuyog Pawar// x0 => *pu1_src 448*c83a76b0SSuyog Pawar// x1 => src_strd 449*c83a76b0SSuyog Pawar// x2 => ht 450*c83a76b0SSuyog Pawar// x3 => pad_size 451*c83a76b0SSuyog Pawar 452*c83a76b0SSuyog Pawar 453*c83a76b0SSuyog Pawar 454*c83a76b0SSuyog Pawar.globl ihevc_pad_right_chroma_av8 455*c83a76b0SSuyog Pawar 456*c83a76b0SSuyog Pawar.type ihevc_pad_right_chroma_av8, %function 457*c83a76b0SSuyog Pawar 458*c83a76b0SSuyog Pawarihevc_pad_right_chroma_av8: 459*c83a76b0SSuyog Pawar 460*c83a76b0SSuyog Pawar 461*c83a76b0SSuyog Pawarloop_start_chroma_right: 462*c83a76b0SSuyog Pawar // pad size is assumed to be pad_left = 80 463*c83a76b0SSuyog Pawar mov x4,x0 464*c83a76b0SSuyog Pawar 465*c83a76b0SSuyog Pawar ldrh w8,[x0, #-2] 466*c83a76b0SSuyog Pawar add x0,x0,x1 467*c83a76b0SSuyog Pawar ldrh w9,[x0, #-2] 468*c83a76b0SSuyog Pawar add x0,x0,x1 469*c83a76b0SSuyog Pawar ldrh w10,[x0, #-2] 470*c83a76b0SSuyog Pawar add x0,x0,x1 471*c83a76b0SSuyog Pawar ldrh w11,[x0, #-2] 472*c83a76b0SSuyog Pawar add x0,x0,x1 473*c83a76b0SSuyog Pawar 474*c83a76b0SSuyog Pawar dup v0.8h,w8 475*c83a76b0SSuyog Pawar dup v2.8h,w9 476*c83a76b0SSuyog Pawar dup v4.8h,w10 477*c83a76b0SSuyog Pawar dup v6.8h,w11 478*c83a76b0SSuyog Pawar 479*c83a76b0SSuyog Pawar add x5,x4,x1 480*c83a76b0SSuyog Pawar 481*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 //128/8 = 16 bytes store 482*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 483*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 484*c83a76b0SSuyog Pawar st1 {v0.16b},[x4],#16 // 16 bytes store 485*c83a76b0SSuyog Pawar st1 {v0.16b},[x4] // 16 bytes store 486*c83a76b0SSuyog Pawar 487*c83a76b0SSuyog Pawar add x6,x5,x1 488*c83a76b0SSuyog Pawar 489*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 490*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 491*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 492*c83a76b0SSuyog Pawar st1 {v2.16b},[x5],#16 //128/8 = 16 bytes store 493*c83a76b0SSuyog Pawar st1 {v2.16b},[x5] //128/8 = 16 bytes store 494*c83a76b0SSuyog Pawar 495*c83a76b0SSuyog Pawar add x7,x6,x1 496*c83a76b0SSuyog Pawar 497*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 498*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 499*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 500*c83a76b0SSuyog Pawar st1 {v4.16b},[x6],#16 //128/8 = 16 bytes store 501*c83a76b0SSuyog Pawar st1 {v4.16b},[x6] //128/8 = 16 bytes store 502*c83a76b0SSuyog Pawar 503*c83a76b0SSuyog Pawar subs x2, x2,#4 504*c83a76b0SSuyog Pawar 505*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 506*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 507*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 508*c83a76b0SSuyog Pawar st1 {v6.16b},[x7],#16 //128/8 = 16 bytes store 509*c83a76b0SSuyog Pawar st1 {v6.16b},[x7] //128/8 = 16 bytes store 510*c83a76b0SSuyog Pawar 511*c83a76b0SSuyog Pawar // total of 4rows*(16*5) = 4 * 80 = 4 * pad_left store 512*c83a76b0SSuyog Pawar 513*c83a76b0SSuyog Pawar bne loop_start_chroma_right 514*c83a76b0SSuyog Pawar 515*c83a76b0SSuyog Pawar ret 516*c83a76b0SSuyog Pawar 517*c83a76b0SSuyog Pawar 518*c83a76b0SSuyog Pawar 519*c83a76b0SSuyog Pawar 520*c83a76b0SSuyog Pawar 521*c83a76b0SSuyog Pawar 522*c83a76b0SSuyog Pawar 523*c83a76b0SSuyog Pawar 524