1*c83a76b0SSuyog Pawar@/***************************************************************************** 2*c83a76b0SSuyog Pawar@* 3*c83a76b0SSuyog Pawar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4*c83a76b0SSuyog Pawar@* 5*c83a76b0SSuyog Pawar@* Licensed under the Apache License, Version 2.0 (the "License"); 6*c83a76b0SSuyog Pawar@* you may not use this file except in compliance with the License. 7*c83a76b0SSuyog Pawar@* You may obtain a copy of the License at: 8*c83a76b0SSuyog Pawar@* 9*c83a76b0SSuyog Pawar@* http://www.apache.org/licenses/LICENSE-2.0 10*c83a76b0SSuyog Pawar@* 11*c83a76b0SSuyog Pawar@* Unless required by applicable law or agreed to in writing, software 12*c83a76b0SSuyog Pawar@* distributed under the License is distributed on an "AS IS" BASIS, 13*c83a76b0SSuyog Pawar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*c83a76b0SSuyog Pawar@* See the License for the specific language governing permissions and 15*c83a76b0SSuyog Pawar@* limitations under the License. 16*c83a76b0SSuyog Pawar@* 17*c83a76b0SSuyog Pawar@*****************************************************************************/ 18*c83a76b0SSuyog Pawar@/** 19*c83a76b0SSuyog Pawar@ ******************************************************************************* 20*c83a76b0SSuyog Pawar@ * ,:file 21*c83a76b0SSuyog Pawar@ * ihevc_mem_fns_neon.s 22*c83a76b0SSuyog Pawar@ * 23*c83a76b0SSuyog Pawar@ * ,:brief 24*c83a76b0SSuyog Pawar@ * Contains function definitions for memory manipulation 25*c83a76b0SSuyog Pawar@ * 26*c83a76b0SSuyog Pawar@ * ,:author 27*c83a76b0SSuyog Pawar@ * Naveen SR 28*c83a76b0SSuyog Pawar@ * 29*c83a76b0SSuyog Pawar@ * ,:par List of Functions: 30*c83a76b0SSuyog Pawar@ * - ihevc_memcpy() 31*c83a76b0SSuyog Pawar@ * - ihevc_memset_mul_8() 32*c83a76b0SSuyog Pawar@ * - ihevc_memset_16bit_mul_8() 33*c83a76b0SSuyog Pawar@ * 34*c83a76b0SSuyog Pawar@ * ,:remarks 35*c83a76b0SSuyog Pawar@ * None 36*c83a76b0SSuyog Pawar@ * 37*c83a76b0SSuyog Pawar@ ******************************************************************************* 38*c83a76b0SSuyog Pawar@*/ 39*c83a76b0SSuyog Pawar 40*c83a76b0SSuyog Pawar@/** 41*c83a76b0SSuyog Pawar@******************************************************************************* 42*c83a76b0SSuyog Pawar@* 43*c83a76b0SSuyog Pawar@* ,:brief 44*c83a76b0SSuyog Pawar@* memcpy of a 1d array 45*c83a76b0SSuyog Pawar@* 46*c83a76b0SSuyog Pawar@* ,:par Description: 47*c83a76b0SSuyog Pawar@* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 48*c83a76b0SSuyog Pawar@* 49*c83a76b0SSuyog Pawar@* ,:param[in] pu1_dst 50*c83a76b0SSuyog Pawar@* UWORD8 pointer to the destination 51*c83a76b0SSuyog Pawar@* 52*c83a76b0SSuyog Pawar@* ,:param[in] pu1_src 53*c83a76b0SSuyog Pawar@* UWORD8 pointer to the source 54*c83a76b0SSuyog Pawar@* 55*c83a76b0SSuyog Pawar@* ,:param[in] num_bytes 56*c83a76b0SSuyog Pawar@* number of bytes to copy 57*c83a76b0SSuyog Pawar@* ,:returns 58*c83a76b0SSuyog Pawar@* 59*c83a76b0SSuyog Pawar@* ,:remarks 60*c83a76b0SSuyog Pawar@* None 61*c83a76b0SSuyog Pawar@* 62*c83a76b0SSuyog Pawar@******************************************************************************* 63*c83a76b0SSuyog Pawar@*/ 64*c83a76b0SSuyog Pawar@void ihevc_memcpy_mul_8(UWORD8 *pu1_dst, 65*c83a76b0SSuyog Pawar@ UWORD8 *pu1_src, 66*c83a76b0SSuyog Pawar@ UWORD8 num_bytes) 67*c83a76b0SSuyog Pawar@**************Variables Vs Registers************************* 68*c83a76b0SSuyog Pawar@ r0 => *pu1_dst 69*c83a76b0SSuyog Pawar@ r1 => *pu1_src 70*c83a76b0SSuyog Pawar@ r2 => num_bytes 71*c83a76b0SSuyog Pawar 72*c83a76b0SSuyog Pawar.text 73*c83a76b0SSuyog Pawar.p2align 2 74*c83a76b0SSuyog Pawar 75*c83a76b0SSuyog Pawar 76*c83a76b0SSuyog Pawar 77*c83a76b0SSuyog Pawar 78*c83a76b0SSuyog Pawar .global ihevc_memcpy_mul_8_a9q 79*c83a76b0SSuyog Pawar.type ihevc_memcpy_mul_8_a9q, %function 80*c83a76b0SSuyog Pawar 81*c83a76b0SSuyog Pawarihevc_memcpy_mul_8_a9q: 82*c83a76b0SSuyog Pawar 83*c83a76b0SSuyog PawarLOOP_NEON_MEMCPY_MUL_8: 84*c83a76b0SSuyog Pawar @ Memcpy 8 bytes 85*c83a76b0SSuyog Pawar VLD1.8 d0,[r1]! 86*c83a76b0SSuyog Pawar VST1.8 d0,[r0]! 87*c83a76b0SSuyog Pawar 88*c83a76b0SSuyog Pawar SUBS r2,r2,#8 89*c83a76b0SSuyog Pawar BNE LOOP_NEON_MEMCPY_MUL_8 90*c83a76b0SSuyog Pawar MOV PC,LR 91*c83a76b0SSuyog Pawar 92*c83a76b0SSuyog Pawar 93*c83a76b0SSuyog Pawar 94*c83a76b0SSuyog Pawar@******************************************************************************* 95*c83a76b0SSuyog Pawar@*/ 96*c83a76b0SSuyog Pawar@void ihevc_memcpy(UWORD8 *pu1_dst, 97*c83a76b0SSuyog Pawar@ UWORD8 *pu1_src, 98*c83a76b0SSuyog Pawar@ UWORD8 num_bytes) 99*c83a76b0SSuyog Pawar@**************Variables Vs Registers************************* 100*c83a76b0SSuyog Pawar@ r0 => *pu1_dst 101*c83a76b0SSuyog Pawar@ r1 => *pu1_src 102*c83a76b0SSuyog Pawar@ r2 => num_bytes 103*c83a76b0SSuyog Pawar 104*c83a76b0SSuyog Pawar 105*c83a76b0SSuyog Pawar 106*c83a76b0SSuyog Pawar .global ihevc_memcpy_a9q 107*c83a76b0SSuyog Pawar.type ihevc_memcpy_a9q, %function 108*c83a76b0SSuyog Pawar 109*c83a76b0SSuyog Pawarihevc_memcpy_a9q: 110*c83a76b0SSuyog Pawar SUBS r2,#8 111*c83a76b0SSuyog Pawar BLT ARM_MEMCPY 112*c83a76b0SSuyog PawarLOOP_NEON_MEMCPY: 113*c83a76b0SSuyog Pawar @ Memcpy 8 bytes 114*c83a76b0SSuyog Pawar VLD1.8 d0,[r1]! 115*c83a76b0SSuyog Pawar VST1.8 d0,[r0]! 116*c83a76b0SSuyog Pawar 117*c83a76b0SSuyog Pawar SUBS r2,#8 118*c83a76b0SSuyog Pawar BGE LOOP_NEON_MEMCPY 119*c83a76b0SSuyog Pawar CMP r2,#-8 120*c83a76b0SSuyog Pawar BXEQ LR 121*c83a76b0SSuyog Pawar 122*c83a76b0SSuyog PawarARM_MEMCPY: 123*c83a76b0SSuyog Pawar ADD r2,#8 124*c83a76b0SSuyog Pawar 125*c83a76b0SSuyog PawarLOOP_ARM_MEMCPY: 126*c83a76b0SSuyog Pawar LDRB r3,[r1],#1 127*c83a76b0SSuyog Pawar STRB r3,[r0],#1 128*c83a76b0SSuyog Pawar SUBS r2,#1 129*c83a76b0SSuyog Pawar BNE LOOP_ARM_MEMCPY 130*c83a76b0SSuyog Pawar BX LR 131*c83a76b0SSuyog Pawar 132*c83a76b0SSuyog Pawar 133*c83a76b0SSuyog Pawar 134*c83a76b0SSuyog Pawar 135*c83a76b0SSuyog Pawar@void ihevc_memset_mul_8(UWORD8 *pu1_dst, 136*c83a76b0SSuyog Pawar@ UWORD8 value, 137*c83a76b0SSuyog Pawar@ UWORD8 num_bytes) 138*c83a76b0SSuyog Pawar@**************Variables Vs Registers************************* 139*c83a76b0SSuyog Pawar@ r0 => *pu1_dst 140*c83a76b0SSuyog Pawar@ r1 => value 141*c83a76b0SSuyog Pawar@ r2 => num_bytes 142*c83a76b0SSuyog Pawar 143*c83a76b0SSuyog Pawar.text 144*c83a76b0SSuyog Pawar.p2align 2 145*c83a76b0SSuyog Pawar 146*c83a76b0SSuyog Pawar 147*c83a76b0SSuyog Pawar 148*c83a76b0SSuyog Pawar .global ihevc_memset_mul_8_a9q 149*c83a76b0SSuyog Pawar.type ihevc_memset_mul_8_a9q, %function 150*c83a76b0SSuyog Pawar 151*c83a76b0SSuyog Pawarihevc_memset_mul_8_a9q: 152*c83a76b0SSuyog Pawar 153*c83a76b0SSuyog Pawar@ Assumptions: numbytes is either 8, 16 or 32 154*c83a76b0SSuyog Pawar VDUP.8 d0,r1 155*c83a76b0SSuyog PawarLOOP_MEMSET_MUL_8: 156*c83a76b0SSuyog Pawar @ Memset 8 bytes 157*c83a76b0SSuyog Pawar VST1.8 d0,[r0]! 158*c83a76b0SSuyog Pawar 159*c83a76b0SSuyog Pawar SUBS r2,r2,#8 160*c83a76b0SSuyog Pawar BNE LOOP_MEMSET_MUL_8 161*c83a76b0SSuyog Pawar 162*c83a76b0SSuyog Pawar BX LR 163*c83a76b0SSuyog Pawar 164*c83a76b0SSuyog Pawar 165*c83a76b0SSuyog Pawar 166*c83a76b0SSuyog Pawar 167*c83a76b0SSuyog Pawar@void ihevc_memset(UWORD8 *pu1_dst, 168*c83a76b0SSuyog Pawar@ UWORD8 value, 169*c83a76b0SSuyog Pawar@ UWORD8 num_bytes) 170*c83a76b0SSuyog Pawar@**************Variables Vs Registers************************* 171*c83a76b0SSuyog Pawar@ r0 => *pu1_dst 172*c83a76b0SSuyog Pawar@ r1 => value 173*c83a76b0SSuyog Pawar@ r2 => num_bytes 174*c83a76b0SSuyog Pawar 175*c83a76b0SSuyog Pawar 176*c83a76b0SSuyog Pawar 177*c83a76b0SSuyog Pawar .global ihevc_memset_a9q 178*c83a76b0SSuyog Pawar.type ihevc_memset_a9q, %function 179*c83a76b0SSuyog Pawar 180*c83a76b0SSuyog Pawarihevc_memset_a9q: 181*c83a76b0SSuyog Pawar SUBS r2,#8 182*c83a76b0SSuyog Pawar BLT ARM_MEMSET 183*c83a76b0SSuyog Pawar VDUP.8 d0,r1 184*c83a76b0SSuyog PawarLOOP_NEON_MEMSET: 185*c83a76b0SSuyog Pawar @ Memcpy 8 bytes 186*c83a76b0SSuyog Pawar VST1.8 d0,[r0]! 187*c83a76b0SSuyog Pawar 188*c83a76b0SSuyog Pawar SUBS r2,#8 189*c83a76b0SSuyog Pawar BGE LOOP_NEON_MEMSET 190*c83a76b0SSuyog Pawar CMP r2,#-8 191*c83a76b0SSuyog Pawar BXEQ LR 192*c83a76b0SSuyog Pawar 193*c83a76b0SSuyog PawarARM_MEMSET: 194*c83a76b0SSuyog Pawar ADD r2,#8 195*c83a76b0SSuyog Pawar 196*c83a76b0SSuyog PawarLOOP_ARM_MEMSET: 197*c83a76b0SSuyog Pawar STRB r1,[r0],#1 198*c83a76b0SSuyog Pawar SUBS r2,#1 199*c83a76b0SSuyog Pawar BNE LOOP_ARM_MEMSET 200*c83a76b0SSuyog Pawar BX LR 201*c83a76b0SSuyog Pawar 202*c83a76b0SSuyog Pawar 203*c83a76b0SSuyog Pawar 204*c83a76b0SSuyog Pawar 205*c83a76b0SSuyog Pawar@void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst, 206*c83a76b0SSuyog Pawar@ UWORD16 value, 207*c83a76b0SSuyog Pawar@ UWORD8 num_words) 208*c83a76b0SSuyog Pawar@**************Variables Vs Registers************************* 209*c83a76b0SSuyog Pawar@ r0 => *pu2_dst 210*c83a76b0SSuyog Pawar@ r1 => value 211*c83a76b0SSuyog Pawar@ r2 => num_words 212*c83a76b0SSuyog Pawar 213*c83a76b0SSuyog Pawar.text 214*c83a76b0SSuyog Pawar.p2align 2 215*c83a76b0SSuyog Pawar 216*c83a76b0SSuyog Pawar 217*c83a76b0SSuyog Pawar 218*c83a76b0SSuyog Pawar .global ihevc_memset_16bit_mul_8_a9q 219*c83a76b0SSuyog Pawar.type ihevc_memset_16bit_mul_8_a9q, %function 220*c83a76b0SSuyog Pawar 221*c83a76b0SSuyog Pawarihevc_memset_16bit_mul_8_a9q: 222*c83a76b0SSuyog Pawar 223*c83a76b0SSuyog Pawar@ Assumptions: num_words is either 8, 16 or 32 224*c83a76b0SSuyog Pawar 225*c83a76b0SSuyog Pawar @ Memset 8 words 226*c83a76b0SSuyog Pawar VDUP.16 d0,r1 227*c83a76b0SSuyog PawarLOOP_MEMSET_16BIT_MUL_8: 228*c83a76b0SSuyog Pawar VST1.16 d0,[r0]! 229*c83a76b0SSuyog Pawar VST1.16 d0,[r0]! 230*c83a76b0SSuyog Pawar 231*c83a76b0SSuyog Pawar SUBS r2,r2,#8 232*c83a76b0SSuyog Pawar BNE LOOP_MEMSET_16BIT_MUL_8 233*c83a76b0SSuyog Pawar 234*c83a76b0SSuyog Pawar BX LR 235*c83a76b0SSuyog Pawar 236*c83a76b0SSuyog Pawar 237*c83a76b0SSuyog Pawar 238*c83a76b0SSuyog Pawar 239*c83a76b0SSuyog Pawar@void ihevc_memset_16bit(UWORD16 *pu2_dst, 240*c83a76b0SSuyog Pawar@ UWORD16 value, 241*c83a76b0SSuyog Pawar@ UWORD8 num_words) 242*c83a76b0SSuyog Pawar@**************Variables Vs Registers************************* 243*c83a76b0SSuyog Pawar@ r0 => *pu2_dst 244*c83a76b0SSuyog Pawar@ r1 => value 245*c83a76b0SSuyog Pawar@ r2 => num_words 246*c83a76b0SSuyog Pawar 247*c83a76b0SSuyog Pawar 248*c83a76b0SSuyog Pawar 249*c83a76b0SSuyog Pawar .global ihevc_memset_16bit_a9q 250*c83a76b0SSuyog Pawar.type ihevc_memset_16bit_a9q, %function 251*c83a76b0SSuyog Pawar 252*c83a76b0SSuyog Pawarihevc_memset_16bit_a9q: 253*c83a76b0SSuyog Pawar SUBS r2,#8 254*c83a76b0SSuyog Pawar BLT ARM_MEMSET_16BIT 255*c83a76b0SSuyog Pawar VDUP.16 d0,r1 256*c83a76b0SSuyog PawarLOOP_NEON_MEMSET_16BIT: 257*c83a76b0SSuyog Pawar @ Memset 8 words 258*c83a76b0SSuyog Pawar VST1.16 d0,[r0]! 259*c83a76b0SSuyog Pawar VST1.16 d0,[r0]! 260*c83a76b0SSuyog Pawar 261*c83a76b0SSuyog Pawar SUBS r2,#8 262*c83a76b0SSuyog Pawar BGE LOOP_NEON_MEMSET_16BIT 263*c83a76b0SSuyog Pawar CMP r2,#-8 264*c83a76b0SSuyog Pawar BXEQ LR 265*c83a76b0SSuyog Pawar 266*c83a76b0SSuyog PawarARM_MEMSET_16BIT: 267*c83a76b0SSuyog Pawar ADD r2,#8 268*c83a76b0SSuyog Pawar 269*c83a76b0SSuyog PawarLOOP_ARM_MEMSET_16BIT: 270*c83a76b0SSuyog Pawar STRH r1,[r0],#2 271*c83a76b0SSuyog Pawar SUBS r2,#1 272*c83a76b0SSuyog Pawar BNE LOOP_ARM_MEMSET_16BIT 273*c83a76b0SSuyog Pawar BX LR 274*c83a76b0SSuyog Pawar 275*c83a76b0SSuyog Pawar 276*c83a76b0SSuyog Pawar 277*c83a76b0SSuyog Pawar 278*c83a76b0SSuyog Pawar .section .note.GNU-stack,"",%progbits 279*c83a76b0SSuyog Pawar 280