1*c83a76b0SSuyog Pawar///***************************************************************************** 2*c83a76b0SSuyog Pawar//* 3*c83a76b0SSuyog Pawar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4*c83a76b0SSuyog Pawar//* 5*c83a76b0SSuyog Pawar//* Licensed under the Apache License, Version 2.0 (the "License"); 6*c83a76b0SSuyog Pawar//* you may not use this file except in compliance with the License. 7*c83a76b0SSuyog Pawar//* You may obtain a copy of the License at: 8*c83a76b0SSuyog Pawar//* 9*c83a76b0SSuyog Pawar//* http://www.apache.org/licenses/LICENSE-2.0 10*c83a76b0SSuyog Pawar//* 11*c83a76b0SSuyog Pawar//* Unless required by applicable law or agreed to in writing, software 12*c83a76b0SSuyog Pawar//* distributed under the License is distributed on an "AS IS" BASIS, 13*c83a76b0SSuyog Pawar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14*c83a76b0SSuyog Pawar//* See the License for the specific language governing permissions and 15*c83a76b0SSuyog Pawar//* limitations under the License. 16*c83a76b0SSuyog Pawar//* 17*c83a76b0SSuyog Pawar//*****************************************************************************/ 18*c83a76b0SSuyog Pawar///** 19*c83a76b0SSuyog Pawar// ******************************************************************************* 20*c83a76b0SSuyog Pawar// * ,:file 21*c83a76b0SSuyog Pawar// * ihevc_mem_fns_neon.s 22*c83a76b0SSuyog Pawar// * 23*c83a76b0SSuyog Pawar// * ,:brief 24*c83a76b0SSuyog Pawar// * Contains function definitions for memory manipulation 25*c83a76b0SSuyog Pawar// * 26*c83a76b0SSuyog Pawar// * ,:author 27*c83a76b0SSuyog Pawar// * Naveen SR 28*c83a76b0SSuyog Pawar// * 29*c83a76b0SSuyog Pawar// * ,:par List of Functions: 30*c83a76b0SSuyog Pawar// * - ihevc_memcpy() 31*c83a76b0SSuyog Pawar// * - ihevc_memset_mul_8() 32*c83a76b0SSuyog Pawar// * - ihevc_memset_16bit_mul_8() 33*c83a76b0SSuyog Pawar// * 34*c83a76b0SSuyog Pawar// * ,:remarks 35*c83a76b0SSuyog Pawar// * None 36*c83a76b0SSuyog Pawar// * 37*c83a76b0SSuyog Pawar// ******************************************************************************* 38*c83a76b0SSuyog Pawar//*/ 39*c83a76b0SSuyog Pawar 40*c83a76b0SSuyog Pawar///** 41*c83a76b0SSuyog Pawar//******************************************************************************* 42*c83a76b0SSuyog Pawar//* 43*c83a76b0SSuyog Pawar//* ,:brief 44*c83a76b0SSuyog Pawar//* memcpy of a 1d array 45*c83a76b0SSuyog Pawar//* 46*c83a76b0SSuyog Pawar//* ,:par Description: 47*c83a76b0SSuyog Pawar//* Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes 48*c83a76b0SSuyog Pawar//* 49*c83a76b0SSuyog Pawar//* ,:param[in] pu1_dst 50*c83a76b0SSuyog Pawar//* UWORD8 pointer to the destination 51*c83a76b0SSuyog Pawar//* 52*c83a76b0SSuyog Pawar//* ,:param[in] pu1_src 53*c83a76b0SSuyog Pawar//* UWORD8 pointer to the source 54*c83a76b0SSuyog Pawar//* 55*c83a76b0SSuyog Pawar//* ,:param[in] num_bytes 56*c83a76b0SSuyog Pawar//* number of bytes to copy 57*c83a76b0SSuyog Pawar//* ,:returns 58*c83a76b0SSuyog Pawar//* 59*c83a76b0SSuyog Pawar//* ,:remarks 60*c83a76b0SSuyog Pawar//* None 61*c83a76b0SSuyog Pawar//* 62*c83a76b0SSuyog Pawar//******************************************************************************* 63*c83a76b0SSuyog Pawar//*/ 64*c83a76b0SSuyog Pawar//void ihevc_memcpy_mul_8(UWORD8 *pu1_dst, 65*c83a76b0SSuyog Pawar// UWORD8 *pu1_src, 66*c83a76b0SSuyog Pawar// UWORD8 num_bytes) 67*c83a76b0SSuyog Pawar//**************Variables Vs Registers************************* 68*c83a76b0SSuyog Pawar// x0 => *pu1_dst 69*c83a76b0SSuyog Pawar// x1 => *pu1_src 70*c83a76b0SSuyog Pawar// x2 => num_bytes 71*c83a76b0SSuyog Pawar 72*c83a76b0SSuyog Pawar.text 73*c83a76b0SSuyog Pawar.p2align 2 74*c83a76b0SSuyog Pawar 75*c83a76b0SSuyog Pawar 76*c83a76b0SSuyog Pawar .global ihevc_memcpy_mul_8_av8 77*c83a76b0SSuyog Pawar.type ihevc_memcpy_mul_8_av8, %function 78*c83a76b0SSuyog Pawar 79*c83a76b0SSuyog Pawarihevc_memcpy_mul_8_av8: 80*c83a76b0SSuyog Pawar 81*c83a76b0SSuyog PawarLOOP_NEON_MEMCPY_MUL_8: 82*c83a76b0SSuyog Pawar // Memcpy 8 bytes 83*c83a76b0SSuyog Pawar LD1 {v0.8b},[x1],#8 84*c83a76b0SSuyog Pawar ST1 {v0.8b},[x0],#8 85*c83a76b0SSuyog Pawar 86*c83a76b0SSuyog Pawar SUBS x2,x2,#8 87*c83a76b0SSuyog Pawar BNE LOOP_NEON_MEMCPY_MUL_8 88*c83a76b0SSuyog Pawar ret 89*c83a76b0SSuyog Pawar 90*c83a76b0SSuyog Pawar 91*c83a76b0SSuyog Pawar 92*c83a76b0SSuyog Pawar//******************************************************************************* 93*c83a76b0SSuyog Pawar//*/ 94*c83a76b0SSuyog Pawar//void ihevc_memcpy(UWORD8 *pu1_dst, 95*c83a76b0SSuyog Pawar// UWORD8 *pu1_src, 96*c83a76b0SSuyog Pawar// UWORD8 num_bytes) 97*c83a76b0SSuyog Pawar//**************Variables Vs Registers************************* 98*c83a76b0SSuyog Pawar// x0 => *pu1_dst 99*c83a76b0SSuyog Pawar// x1 => *pu1_src 100*c83a76b0SSuyog Pawar// x2 => num_bytes 101*c83a76b0SSuyog Pawar 102*c83a76b0SSuyog Pawar 103*c83a76b0SSuyog Pawar 104*c83a76b0SSuyog Pawar .global ihevc_memcpy_av8 105*c83a76b0SSuyog Pawar.type ihevc_memcpy_av8, %function 106*c83a76b0SSuyog Pawar 107*c83a76b0SSuyog Pawarihevc_memcpy_av8: 108*c83a76b0SSuyog Pawar SUBS x2,x2,#8 109*c83a76b0SSuyog Pawar BLT ARM_MEMCPY 110*c83a76b0SSuyog PawarLOOP_NEON_MEMCPY: 111*c83a76b0SSuyog Pawar // Memcpy 8 bytes 112*c83a76b0SSuyog Pawar LD1 {v0.8b},[x1],#8 113*c83a76b0SSuyog Pawar ST1 {v0.8b},[x0],#8 114*c83a76b0SSuyog Pawar 115*c83a76b0SSuyog Pawar SUBS x2,x2,#8 116*c83a76b0SSuyog Pawar BGE LOOP_NEON_MEMCPY 117*c83a76b0SSuyog Pawar CMN x2,#8 118*c83a76b0SSuyog Pawar BEQ MEMCPY_RETURN 119*c83a76b0SSuyog Pawar 120*c83a76b0SSuyog PawarARM_MEMCPY: 121*c83a76b0SSuyog Pawar ADD x2,x2,#8 122*c83a76b0SSuyog Pawar 123*c83a76b0SSuyog PawarLOOP_ARM_MEMCPY: 124*c83a76b0SSuyog Pawar LDRB w3,[x1],#1 125*c83a76b0SSuyog Pawar STRB w3,[x0],#1 126*c83a76b0SSuyog Pawar SUBS x2,x2,#1 127*c83a76b0SSuyog Pawar BNE LOOP_ARM_MEMCPY 128*c83a76b0SSuyog PawarMEMCPY_RETURN: 129*c83a76b0SSuyog Pawar ret 130*c83a76b0SSuyog Pawar 131*c83a76b0SSuyog Pawar 132*c83a76b0SSuyog Pawar 133*c83a76b0SSuyog Pawar 134*c83a76b0SSuyog Pawar//void ihevc_memset_mul_8(UWORD8 *pu1_dst, 135*c83a76b0SSuyog Pawar// UWORD8 value, 136*c83a76b0SSuyog Pawar// UWORD8 num_bytes) 137*c83a76b0SSuyog Pawar//**************Variables Vs Registers************************* 138*c83a76b0SSuyog Pawar// x0 => *pu1_dst 139*c83a76b0SSuyog Pawar// x1 => value 140*c83a76b0SSuyog Pawar// x2 => num_bytes 141*c83a76b0SSuyog Pawar 142*c83a76b0SSuyog Pawar.text 143*c83a76b0SSuyog Pawar.p2align 2 144*c83a76b0SSuyog Pawar 145*c83a76b0SSuyog Pawar 146*c83a76b0SSuyog Pawar 147*c83a76b0SSuyog Pawar .global ihevc_memset_mul_8_av8 148*c83a76b0SSuyog Pawar.type ihevc_memset_mul_8_av8, %function 149*c83a76b0SSuyog Pawar 150*c83a76b0SSuyog Pawarihevc_memset_mul_8_av8: 151*c83a76b0SSuyog Pawar 152*c83a76b0SSuyog Pawar// Assumptions: numbytes is either 8, 16 or 32 153*c83a76b0SSuyog Pawar dup v0.8b,w1 154*c83a76b0SSuyog PawarLOOP_MEMSET_MUL_8: 155*c83a76b0SSuyog Pawar // Memset 8 bytes 156*c83a76b0SSuyog Pawar ST1 {v0.8b},[x0],#8 157*c83a76b0SSuyog Pawar 158*c83a76b0SSuyog Pawar SUBS x2,x2,#8 159*c83a76b0SSuyog Pawar BNE LOOP_MEMSET_MUL_8 160*c83a76b0SSuyog Pawar 161*c83a76b0SSuyog Pawar ret 162*c83a76b0SSuyog Pawar 163*c83a76b0SSuyog Pawar 164*c83a76b0SSuyog Pawar 165*c83a76b0SSuyog Pawar 166*c83a76b0SSuyog Pawar//void ihevc_memset(UWORD8 *pu1_dst, 167*c83a76b0SSuyog Pawar// UWORD8 value, 168*c83a76b0SSuyog Pawar// UWORD8 num_bytes) 169*c83a76b0SSuyog Pawar//**************Variables Vs Registers************************* 170*c83a76b0SSuyog Pawar// x0 => *pu1_dst 171*c83a76b0SSuyog Pawar// x1 => value 172*c83a76b0SSuyog Pawar// x2 => num_bytes 173*c83a76b0SSuyog Pawar 174*c83a76b0SSuyog Pawar 175*c83a76b0SSuyog Pawar 176*c83a76b0SSuyog Pawar .global ihevc_memset_av8 177*c83a76b0SSuyog Pawar.type ihevc_memset_av8, %function 178*c83a76b0SSuyog Pawar 179*c83a76b0SSuyog Pawarihevc_memset_av8: 180*c83a76b0SSuyog Pawar SUBS x2,x2,#8 181*c83a76b0SSuyog Pawar BLT ARM_MEMSET 182*c83a76b0SSuyog Pawar dup v0.8b,w1 183*c83a76b0SSuyog PawarLOOP_NEON_MEMSET: 184*c83a76b0SSuyog Pawar // Memcpy 8 bytes 185*c83a76b0SSuyog Pawar ST1 {v0.8b},[x0],#8 186*c83a76b0SSuyog Pawar 187*c83a76b0SSuyog Pawar SUBS x2,x2,#8 188*c83a76b0SSuyog Pawar BGE LOOP_NEON_MEMSET 189*c83a76b0SSuyog Pawar CMN x2,#8 190*c83a76b0SSuyog Pawar BEQ MEMSET_RETURN 191*c83a76b0SSuyog Pawar 192*c83a76b0SSuyog PawarARM_MEMSET: 193*c83a76b0SSuyog Pawar ADD x2,x2,#8 194*c83a76b0SSuyog Pawar 195*c83a76b0SSuyog PawarLOOP_ARM_MEMSET: 196*c83a76b0SSuyog Pawar STRB w1,[x0],#1 197*c83a76b0SSuyog Pawar SUBS x2,x2,#1 198*c83a76b0SSuyog Pawar BNE LOOP_ARM_MEMSET 199*c83a76b0SSuyog Pawar 200*c83a76b0SSuyog PawarMEMSET_RETURN: 201*c83a76b0SSuyog Pawar ret 202*c83a76b0SSuyog Pawar 203*c83a76b0SSuyog Pawar 204*c83a76b0SSuyog Pawar 205*c83a76b0SSuyog Pawar 206*c83a76b0SSuyog Pawar//void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst, 207*c83a76b0SSuyog Pawar// UWORD16 value, 208*c83a76b0SSuyog Pawar// UWORD8 num_words) 209*c83a76b0SSuyog Pawar//**************Variables Vs Registers************************* 210*c83a76b0SSuyog Pawar// x0 => *pu2_dst 211*c83a76b0SSuyog Pawar// x1 => value 212*c83a76b0SSuyog Pawar// x2 => num_words 213*c83a76b0SSuyog Pawar 214*c83a76b0SSuyog Pawar.text 215*c83a76b0SSuyog Pawar.p2align 2 216*c83a76b0SSuyog Pawar 217*c83a76b0SSuyog Pawar 218*c83a76b0SSuyog Pawar 219*c83a76b0SSuyog Pawar .global ihevc_memset_16bit_mul_8_av8 220*c83a76b0SSuyog Pawar.type ihevc_memset_16bit_mul_8_av8, %function 221*c83a76b0SSuyog Pawar 222*c83a76b0SSuyog Pawarihevc_memset_16bit_mul_8_av8: 223*c83a76b0SSuyog Pawar 224*c83a76b0SSuyog Pawar// Assumptions: num_words is either 8, 16 or 32 225*c83a76b0SSuyog Pawar 226*c83a76b0SSuyog Pawar // Memset 8 words 227*c83a76b0SSuyog Pawar dup v0.8h,w1 228*c83a76b0SSuyog PawarLOOP_MEMSET_16BIT_MUL_8: 229*c83a76b0SSuyog Pawar ST1 {v0.8h},[x0],#16 230*c83a76b0SSuyog Pawar 231*c83a76b0SSuyog Pawar SUBS x2,x2,#8 232*c83a76b0SSuyog Pawar BNE LOOP_MEMSET_16BIT_MUL_8 233*c83a76b0SSuyog Pawar 234*c83a76b0SSuyog Pawar ret 235*c83a76b0SSuyog Pawar 236*c83a76b0SSuyog Pawar 237*c83a76b0SSuyog Pawar 238*c83a76b0SSuyog Pawar 239*c83a76b0SSuyog Pawar//void ihevc_memset_16bit(UWORD16 *pu2_dst, 240*c83a76b0SSuyog Pawar// UWORD16 value, 241*c83a76b0SSuyog Pawar// UWORD8 num_words) 242*c83a76b0SSuyog Pawar//**************Variables Vs Registers************************* 243*c83a76b0SSuyog Pawar// x0 => *pu2_dst 244*c83a76b0SSuyog Pawar// x1 => value 245*c83a76b0SSuyog Pawar// x2 => num_words 246*c83a76b0SSuyog Pawar 247*c83a76b0SSuyog Pawar 248*c83a76b0SSuyog Pawar 249*c83a76b0SSuyog Pawar .global ihevc_memset_16bit_av8 250*c83a76b0SSuyog Pawar.type ihevc_memset_16bit_av8, %function 251*c83a76b0SSuyog Pawar 252*c83a76b0SSuyog Pawarihevc_memset_16bit_av8: 253*c83a76b0SSuyog Pawar SUBS x2,x2,#8 254*c83a76b0SSuyog Pawar BLT ARM_MEMSET_16BIT 255*c83a76b0SSuyog Pawar dup v0.8h,w1 256*c83a76b0SSuyog PawarLOOP_NEON_MEMSET_16BIT: 257*c83a76b0SSuyog Pawar // Memset 8 words 258*c83a76b0SSuyog Pawar ST1 {v0.8h},[x0],#16 259*c83a76b0SSuyog Pawar 260*c83a76b0SSuyog Pawar SUBS x2,x2,#8 261*c83a76b0SSuyog Pawar BGE LOOP_NEON_MEMSET_16BIT 262*c83a76b0SSuyog Pawar CMN x2,#8 263*c83a76b0SSuyog Pawar BEQ MEMSET_16BIT_RETURN 264*c83a76b0SSuyog Pawar 265*c83a76b0SSuyog PawarARM_MEMSET_16BIT: 266*c83a76b0SSuyog Pawar ADD x2,x2,#8 267*c83a76b0SSuyog Pawar 268*c83a76b0SSuyog PawarLOOP_ARM_MEMSET_16BIT: 269*c83a76b0SSuyog Pawar STRH w1,[x0],#2 270*c83a76b0SSuyog Pawar SUBS x2,x2,#1 271*c83a76b0SSuyog Pawar BNE LOOP_ARM_MEMSET_16BIT 272*c83a76b0SSuyog Pawar 273*c83a76b0SSuyog PawarMEMSET_16BIT_RETURN: 274*c83a76b0SSuyog Pawar ret 275*c83a76b0SSuyog Pawar 276*c83a76b0SSuyog Pawar 277*c83a76b0SSuyog Pawar 278*c83a76b0SSuyog Pawar 279*c83a76b0SSuyog Pawar .section .note.GNU-stack,"",%progbits 280*c83a76b0SSuyog Pawar 281