xref: /aosp_15_r20/external/libhevc/common/arm64/ihevc_mem_fns.s (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar///*****************************************************************************
2*c83a76b0SSuyog Pawar//*
3*c83a76b0SSuyog Pawar//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar//*
5*c83a76b0SSuyog Pawar//* Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar//* you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar//* You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar//*
9*c83a76b0SSuyog Pawar//* http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar//*
11*c83a76b0SSuyog Pawar//* Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar//* distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar//* See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar//* limitations under the License.
16*c83a76b0SSuyog Pawar//*
17*c83a76b0SSuyog Pawar//*****************************************************************************/
18*c83a76b0SSuyog Pawar///**
19*c83a76b0SSuyog Pawar// *******************************************************************************
20*c83a76b0SSuyog Pawar// * ,:file
21*c83a76b0SSuyog Pawar// *  ihevc_mem_fns_neon.s
22*c83a76b0SSuyog Pawar// *
23*c83a76b0SSuyog Pawar// * ,:brief
24*c83a76b0SSuyog Pawar// *  Contains function definitions for memory manipulation
25*c83a76b0SSuyog Pawar// *
26*c83a76b0SSuyog Pawar// * ,:author
27*c83a76b0SSuyog Pawar// *     Naveen SR
28*c83a76b0SSuyog Pawar// *
29*c83a76b0SSuyog Pawar// * ,:par List of Functions:
30*c83a76b0SSuyog Pawar// *  - ihevc_memcpy()
31*c83a76b0SSuyog Pawar// *  - ihevc_memset_mul_8()
32*c83a76b0SSuyog Pawar// *  - ihevc_memset_16bit_mul_8()
33*c83a76b0SSuyog Pawar// *
34*c83a76b0SSuyog Pawar// * ,:remarks
35*c83a76b0SSuyog Pawar// *  None
36*c83a76b0SSuyog Pawar// *
37*c83a76b0SSuyog Pawar// *******************************************************************************
38*c83a76b0SSuyog Pawar//*/
39*c83a76b0SSuyog Pawar
40*c83a76b0SSuyog Pawar///**
41*c83a76b0SSuyog Pawar//*******************************************************************************
42*c83a76b0SSuyog Pawar//*
43*c83a76b0SSuyog Pawar//* ,:brief
44*c83a76b0SSuyog Pawar//*   memcpy of a 1d array
45*c83a76b0SSuyog Pawar//*
46*c83a76b0SSuyog Pawar//* ,:par Description:
47*c83a76b0SSuyog Pawar//*   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
48*c83a76b0SSuyog Pawar//*
49*c83a76b0SSuyog Pawar//* ,:param[in] pu1_dst
50*c83a76b0SSuyog Pawar//*  UWORD8 pointer to the destination
51*c83a76b0SSuyog Pawar//*
52*c83a76b0SSuyog Pawar//* ,:param[in] pu1_src
53*c83a76b0SSuyog Pawar//*  UWORD8 pointer to the source
54*c83a76b0SSuyog Pawar//*
55*c83a76b0SSuyog Pawar//* ,:param[in] num_bytes
56*c83a76b0SSuyog Pawar//*  number of bytes to copy
57*c83a76b0SSuyog Pawar//* ,:returns
58*c83a76b0SSuyog Pawar//*
59*c83a76b0SSuyog Pawar//* ,:remarks
60*c83a76b0SSuyog Pawar//*  None
61*c83a76b0SSuyog Pawar//*
62*c83a76b0SSuyog Pawar//*******************************************************************************
63*c83a76b0SSuyog Pawar//*/
64*c83a76b0SSuyog Pawar//void ihevc_memcpy_mul_8(UWORD8 *pu1_dst,
65*c83a76b0SSuyog Pawar//                      UWORD8 *pu1_src,
66*c83a76b0SSuyog Pawar//                      UWORD8 num_bytes)
67*c83a76b0SSuyog Pawar//**************Variables Vs Registers*************************
68*c83a76b0SSuyog Pawar//    x0 => *pu1_dst
69*c83a76b0SSuyog Pawar//    x1 => *pu1_src
70*c83a76b0SSuyog Pawar//    x2 => num_bytes
71*c83a76b0SSuyog Pawar
72*c83a76b0SSuyog Pawar.text
73*c83a76b0SSuyog Pawar.p2align 2
74*c83a76b0SSuyog Pawar
75*c83a76b0SSuyog Pawar
76*c83a76b0SSuyog Pawar    .global ihevc_memcpy_mul_8_av8
77*c83a76b0SSuyog Pawar.type ihevc_memcpy_mul_8_av8, %function
78*c83a76b0SSuyog Pawar
79*c83a76b0SSuyog Pawarihevc_memcpy_mul_8_av8:
80*c83a76b0SSuyog Pawar
81*c83a76b0SSuyog PawarLOOP_NEON_MEMCPY_MUL_8:
82*c83a76b0SSuyog Pawar    // Memcpy 8 bytes
83*c83a76b0SSuyog Pawar    LD1         {v0.8b},[x1],#8
84*c83a76b0SSuyog Pawar    ST1         {v0.8b},[x0],#8
85*c83a76b0SSuyog Pawar
86*c83a76b0SSuyog Pawar    SUBS        x2,x2,#8
87*c83a76b0SSuyog Pawar    BNE         LOOP_NEON_MEMCPY_MUL_8
88*c83a76b0SSuyog Pawar    ret
89*c83a76b0SSuyog Pawar
90*c83a76b0SSuyog Pawar
91*c83a76b0SSuyog Pawar
92*c83a76b0SSuyog Pawar//*******************************************************************************
93*c83a76b0SSuyog Pawar//*/
94*c83a76b0SSuyog Pawar//void ihevc_memcpy(UWORD8 *pu1_dst,
95*c83a76b0SSuyog Pawar//                  UWORD8 *pu1_src,
96*c83a76b0SSuyog Pawar//                  UWORD8 num_bytes)
97*c83a76b0SSuyog Pawar//**************Variables Vs Registers*************************
98*c83a76b0SSuyog Pawar//    x0 => *pu1_dst
99*c83a76b0SSuyog Pawar//    x1 => *pu1_src
100*c83a76b0SSuyog Pawar//    x2 => num_bytes
101*c83a76b0SSuyog Pawar
102*c83a76b0SSuyog Pawar
103*c83a76b0SSuyog Pawar
104*c83a76b0SSuyog Pawar    .global ihevc_memcpy_av8
105*c83a76b0SSuyog Pawar.type ihevc_memcpy_av8, %function
106*c83a76b0SSuyog Pawar
107*c83a76b0SSuyog Pawarihevc_memcpy_av8:
108*c83a76b0SSuyog Pawar    SUBS        x2,x2,#8
109*c83a76b0SSuyog Pawar    BLT         ARM_MEMCPY
110*c83a76b0SSuyog PawarLOOP_NEON_MEMCPY:
111*c83a76b0SSuyog Pawar    // Memcpy 8 bytes
112*c83a76b0SSuyog Pawar    LD1         {v0.8b},[x1],#8
113*c83a76b0SSuyog Pawar    ST1         {v0.8b},[x0],#8
114*c83a76b0SSuyog Pawar
115*c83a76b0SSuyog Pawar    SUBS        x2,x2,#8
116*c83a76b0SSuyog Pawar    BGE         LOOP_NEON_MEMCPY
117*c83a76b0SSuyog Pawar    CMN         x2,#8
118*c83a76b0SSuyog Pawar    BEQ         MEMCPY_RETURN
119*c83a76b0SSuyog Pawar
120*c83a76b0SSuyog PawarARM_MEMCPY:
121*c83a76b0SSuyog Pawar    ADD         x2,x2,#8
122*c83a76b0SSuyog Pawar
123*c83a76b0SSuyog PawarLOOP_ARM_MEMCPY:
124*c83a76b0SSuyog Pawar    LDRB        w3,[x1],#1
125*c83a76b0SSuyog Pawar    STRB        w3,[x0],#1
126*c83a76b0SSuyog Pawar    SUBS        x2,x2,#1
127*c83a76b0SSuyog Pawar    BNE         LOOP_ARM_MEMCPY
128*c83a76b0SSuyog PawarMEMCPY_RETURN:
129*c83a76b0SSuyog Pawar    ret
130*c83a76b0SSuyog Pawar
131*c83a76b0SSuyog Pawar
132*c83a76b0SSuyog Pawar
133*c83a76b0SSuyog Pawar
134*c83a76b0SSuyog Pawar//void ihevc_memset_mul_8(UWORD8 *pu1_dst,
135*c83a76b0SSuyog Pawar//                       UWORD8 value,
136*c83a76b0SSuyog Pawar//                       UWORD8 num_bytes)
137*c83a76b0SSuyog Pawar//**************Variables Vs Registers*************************
138*c83a76b0SSuyog Pawar//    x0 => *pu1_dst
139*c83a76b0SSuyog Pawar//    x1 => value
140*c83a76b0SSuyog Pawar//    x2 => num_bytes
141*c83a76b0SSuyog Pawar
142*c83a76b0SSuyog Pawar.text
143*c83a76b0SSuyog Pawar.p2align 2
144*c83a76b0SSuyog Pawar
145*c83a76b0SSuyog Pawar
146*c83a76b0SSuyog Pawar
147*c83a76b0SSuyog Pawar    .global ihevc_memset_mul_8_av8
148*c83a76b0SSuyog Pawar.type ihevc_memset_mul_8_av8, %function
149*c83a76b0SSuyog Pawar
150*c83a76b0SSuyog Pawarihevc_memset_mul_8_av8:
151*c83a76b0SSuyog Pawar
152*c83a76b0SSuyog Pawar// Assumptions: numbytes is either 8, 16 or 32
153*c83a76b0SSuyog Pawar    dup         v0.8b,w1
154*c83a76b0SSuyog PawarLOOP_MEMSET_MUL_8:
155*c83a76b0SSuyog Pawar    // Memset 8 bytes
156*c83a76b0SSuyog Pawar    ST1         {v0.8b},[x0],#8
157*c83a76b0SSuyog Pawar
158*c83a76b0SSuyog Pawar    SUBS        x2,x2,#8
159*c83a76b0SSuyog Pawar    BNE         LOOP_MEMSET_MUL_8
160*c83a76b0SSuyog Pawar
161*c83a76b0SSuyog Pawar    ret
162*c83a76b0SSuyog Pawar
163*c83a76b0SSuyog Pawar
164*c83a76b0SSuyog Pawar
165*c83a76b0SSuyog Pawar
166*c83a76b0SSuyog Pawar//void ihevc_memset(UWORD8 *pu1_dst,
167*c83a76b0SSuyog Pawar//                       UWORD8 value,
168*c83a76b0SSuyog Pawar//                       UWORD8 num_bytes)
169*c83a76b0SSuyog Pawar//**************Variables Vs Registers*************************
170*c83a76b0SSuyog Pawar//    x0 => *pu1_dst
171*c83a76b0SSuyog Pawar//    x1 => value
172*c83a76b0SSuyog Pawar//    x2 => num_bytes
173*c83a76b0SSuyog Pawar
174*c83a76b0SSuyog Pawar
175*c83a76b0SSuyog Pawar
176*c83a76b0SSuyog Pawar    .global ihevc_memset_av8
177*c83a76b0SSuyog Pawar.type ihevc_memset_av8, %function
178*c83a76b0SSuyog Pawar
179*c83a76b0SSuyog Pawarihevc_memset_av8:
180*c83a76b0SSuyog Pawar    SUBS        x2,x2,#8
181*c83a76b0SSuyog Pawar    BLT         ARM_MEMSET
182*c83a76b0SSuyog Pawar    dup         v0.8b,w1
183*c83a76b0SSuyog PawarLOOP_NEON_MEMSET:
184*c83a76b0SSuyog Pawar    // Memcpy 8 bytes
185*c83a76b0SSuyog Pawar    ST1         {v0.8b},[x0],#8
186*c83a76b0SSuyog Pawar
187*c83a76b0SSuyog Pawar    SUBS        x2,x2,#8
188*c83a76b0SSuyog Pawar    BGE         LOOP_NEON_MEMSET
189*c83a76b0SSuyog Pawar    CMN         x2,#8
190*c83a76b0SSuyog Pawar    BEQ         MEMSET_RETURN
191*c83a76b0SSuyog Pawar
192*c83a76b0SSuyog PawarARM_MEMSET:
193*c83a76b0SSuyog Pawar    ADD         x2,x2,#8
194*c83a76b0SSuyog Pawar
195*c83a76b0SSuyog PawarLOOP_ARM_MEMSET:
196*c83a76b0SSuyog Pawar    STRB        w1,[x0],#1
197*c83a76b0SSuyog Pawar    SUBS        x2,x2,#1
198*c83a76b0SSuyog Pawar    BNE         LOOP_ARM_MEMSET
199*c83a76b0SSuyog Pawar
200*c83a76b0SSuyog PawarMEMSET_RETURN:
201*c83a76b0SSuyog Pawar    ret
202*c83a76b0SSuyog Pawar
203*c83a76b0SSuyog Pawar
204*c83a76b0SSuyog Pawar
205*c83a76b0SSuyog Pawar
206*c83a76b0SSuyog Pawar//void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst,
207*c83a76b0SSuyog Pawar//                                      UWORD16 value,
208*c83a76b0SSuyog Pawar//                                      UWORD8 num_words)
209*c83a76b0SSuyog Pawar//**************Variables Vs Registers*************************
210*c83a76b0SSuyog Pawar//    x0 => *pu2_dst
211*c83a76b0SSuyog Pawar//    x1 => value
212*c83a76b0SSuyog Pawar//    x2 => num_words
213*c83a76b0SSuyog Pawar
214*c83a76b0SSuyog Pawar.text
215*c83a76b0SSuyog Pawar.p2align 2
216*c83a76b0SSuyog Pawar
217*c83a76b0SSuyog Pawar
218*c83a76b0SSuyog Pawar
219*c83a76b0SSuyog Pawar    .global ihevc_memset_16bit_mul_8_av8
220*c83a76b0SSuyog Pawar.type ihevc_memset_16bit_mul_8_av8, %function
221*c83a76b0SSuyog Pawar
222*c83a76b0SSuyog Pawarihevc_memset_16bit_mul_8_av8:
223*c83a76b0SSuyog Pawar
224*c83a76b0SSuyog Pawar// Assumptions: num_words is either 8, 16 or 32
225*c83a76b0SSuyog Pawar
226*c83a76b0SSuyog Pawar    // Memset 8 words
227*c83a76b0SSuyog Pawar    dup         v0.8h,w1
228*c83a76b0SSuyog PawarLOOP_MEMSET_16BIT_MUL_8:
229*c83a76b0SSuyog Pawar    ST1         {v0.8h},[x0],#16
230*c83a76b0SSuyog Pawar
231*c83a76b0SSuyog Pawar    SUBS        x2,x2,#8
232*c83a76b0SSuyog Pawar    BNE         LOOP_MEMSET_16BIT_MUL_8
233*c83a76b0SSuyog Pawar
234*c83a76b0SSuyog Pawar    ret
235*c83a76b0SSuyog Pawar
236*c83a76b0SSuyog Pawar
237*c83a76b0SSuyog Pawar
238*c83a76b0SSuyog Pawar
239*c83a76b0SSuyog Pawar//void ihevc_memset_16bit(UWORD16 *pu2_dst,
240*c83a76b0SSuyog Pawar//                       UWORD16 value,
241*c83a76b0SSuyog Pawar//                       UWORD8 num_words)
242*c83a76b0SSuyog Pawar//**************Variables Vs Registers*************************
243*c83a76b0SSuyog Pawar//    x0 => *pu2_dst
244*c83a76b0SSuyog Pawar//    x1 => value
245*c83a76b0SSuyog Pawar//    x2 => num_words
246*c83a76b0SSuyog Pawar
247*c83a76b0SSuyog Pawar
248*c83a76b0SSuyog Pawar
249*c83a76b0SSuyog Pawar    .global ihevc_memset_16bit_av8
250*c83a76b0SSuyog Pawar.type ihevc_memset_16bit_av8, %function
251*c83a76b0SSuyog Pawar
252*c83a76b0SSuyog Pawarihevc_memset_16bit_av8:
253*c83a76b0SSuyog Pawar    SUBS        x2,x2,#8
254*c83a76b0SSuyog Pawar    BLT         ARM_MEMSET_16BIT
255*c83a76b0SSuyog Pawar    dup         v0.8h,w1
256*c83a76b0SSuyog PawarLOOP_NEON_MEMSET_16BIT:
257*c83a76b0SSuyog Pawar    // Memset 8 words
258*c83a76b0SSuyog Pawar    ST1         {v0.8h},[x0],#16
259*c83a76b0SSuyog Pawar
260*c83a76b0SSuyog Pawar    SUBS        x2,x2,#8
261*c83a76b0SSuyog Pawar    BGE         LOOP_NEON_MEMSET_16BIT
262*c83a76b0SSuyog Pawar    CMN         x2,#8
263*c83a76b0SSuyog Pawar    BEQ         MEMSET_16BIT_RETURN
264*c83a76b0SSuyog Pawar
265*c83a76b0SSuyog PawarARM_MEMSET_16BIT:
266*c83a76b0SSuyog Pawar    ADD         x2,x2,#8
267*c83a76b0SSuyog Pawar
268*c83a76b0SSuyog PawarLOOP_ARM_MEMSET_16BIT:
269*c83a76b0SSuyog Pawar    STRH        w1,[x0],#2
270*c83a76b0SSuyog Pawar    SUBS        x2,x2,#1
271*c83a76b0SSuyog Pawar    BNE         LOOP_ARM_MEMSET_16BIT
272*c83a76b0SSuyog Pawar
273*c83a76b0SSuyog PawarMEMSET_16BIT_RETURN:
274*c83a76b0SSuyog Pawar    ret
275*c83a76b0SSuyog Pawar
276*c83a76b0SSuyog Pawar
277*c83a76b0SSuyog Pawar
278*c83a76b0SSuyog Pawar
279*c83a76b0SSuyog Pawar    .section .note.GNU-stack,"",%progbits
280*c83a76b0SSuyog Pawar
281