xref: /aosp_15_r20/external/libhevc/common/arm/ihevc_mem_fns.s (revision c83a76b084498d55f252f48b2e3786804cdf24b7)
1*c83a76b0SSuyog Pawar@/*****************************************************************************
2*c83a76b0SSuyog Pawar@*
3*c83a76b0SSuyog Pawar@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4*c83a76b0SSuyog Pawar@*
5*c83a76b0SSuyog Pawar@* Licensed under the Apache License, Version 2.0 (the "License");
6*c83a76b0SSuyog Pawar@* you may not use this file except in compliance with the License.
7*c83a76b0SSuyog Pawar@* You may obtain a copy of the License at:
8*c83a76b0SSuyog Pawar@*
9*c83a76b0SSuyog Pawar@* http://www.apache.org/licenses/LICENSE-2.0
10*c83a76b0SSuyog Pawar@*
11*c83a76b0SSuyog Pawar@* Unless required by applicable law or agreed to in writing, software
12*c83a76b0SSuyog Pawar@* distributed under the License is distributed on an "AS IS" BASIS,
13*c83a76b0SSuyog Pawar@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*c83a76b0SSuyog Pawar@* See the License for the specific language governing permissions and
15*c83a76b0SSuyog Pawar@* limitations under the License.
16*c83a76b0SSuyog Pawar@*
17*c83a76b0SSuyog Pawar@*****************************************************************************/
18*c83a76b0SSuyog Pawar@/**
19*c83a76b0SSuyog Pawar@ *******************************************************************************
20*c83a76b0SSuyog Pawar@ * ,:file
21*c83a76b0SSuyog Pawar@ *  ihevc_mem_fns_neon.s
22*c83a76b0SSuyog Pawar@ *
23*c83a76b0SSuyog Pawar@ * ,:brief
24*c83a76b0SSuyog Pawar@ *  Contains function definitions for memory manipulation
25*c83a76b0SSuyog Pawar@ *
26*c83a76b0SSuyog Pawar@ * ,:author
27*c83a76b0SSuyog Pawar@ *  Naveen SR
28*c83a76b0SSuyog Pawar@ *
29*c83a76b0SSuyog Pawar@ * ,:par List of Functions:
30*c83a76b0SSuyog Pawar@ *  - ihevc_memcpy()
31*c83a76b0SSuyog Pawar@ *  - ihevc_memset_mul_8()
32*c83a76b0SSuyog Pawar@ *  - ihevc_memset_16bit_mul_8()
33*c83a76b0SSuyog Pawar@ *
34*c83a76b0SSuyog Pawar@ * ,:remarks
35*c83a76b0SSuyog Pawar@ *  None
36*c83a76b0SSuyog Pawar@ *
37*c83a76b0SSuyog Pawar@ *******************************************************************************
38*c83a76b0SSuyog Pawar@*/
39*c83a76b0SSuyog Pawar
40*c83a76b0SSuyog Pawar@/**
41*c83a76b0SSuyog Pawar@*******************************************************************************
42*c83a76b0SSuyog Pawar@*
43*c83a76b0SSuyog Pawar@* ,:brief
44*c83a76b0SSuyog Pawar@*   memcpy of a 1d array
45*c83a76b0SSuyog Pawar@*
46*c83a76b0SSuyog Pawar@* ,:par Description:
47*c83a76b0SSuyog Pawar@*   Does memcpy of 8bit data from source to destination for 8,16 or 32 number of bytes
48*c83a76b0SSuyog Pawar@*
49*c83a76b0SSuyog Pawar@* ,:param[in] pu1_dst
50*c83a76b0SSuyog Pawar@*  UWORD8 pointer to the destination
51*c83a76b0SSuyog Pawar@*
52*c83a76b0SSuyog Pawar@* ,:param[in] pu1_src
53*c83a76b0SSuyog Pawar@*  UWORD8 pointer to the source
54*c83a76b0SSuyog Pawar@*
55*c83a76b0SSuyog Pawar@* ,:param[in] num_bytes
56*c83a76b0SSuyog Pawar@*  number of bytes to copy
57*c83a76b0SSuyog Pawar@* ,:returns
58*c83a76b0SSuyog Pawar@*
59*c83a76b0SSuyog Pawar@* ,:remarks
60*c83a76b0SSuyog Pawar@*  None
61*c83a76b0SSuyog Pawar@*
62*c83a76b0SSuyog Pawar@*******************************************************************************
63*c83a76b0SSuyog Pawar@*/
64*c83a76b0SSuyog Pawar@void ihevc_memcpy_mul_8(UWORD8 *pu1_dst,
65*c83a76b0SSuyog Pawar@                    UWORD8 *pu1_src,
66*c83a76b0SSuyog Pawar@                   UWORD8 num_bytes)
67*c83a76b0SSuyog Pawar@**************Variables Vs Registers*************************
68*c83a76b0SSuyog Pawar@   r0 => *pu1_dst
69*c83a76b0SSuyog Pawar@   r1 => *pu1_src
70*c83a76b0SSuyog Pawar@   r2 => num_bytes
71*c83a76b0SSuyog Pawar
72*c83a76b0SSuyog Pawar.text
73*c83a76b0SSuyog Pawar.p2align 2
74*c83a76b0SSuyog Pawar
75*c83a76b0SSuyog Pawar
76*c83a76b0SSuyog Pawar
77*c83a76b0SSuyog Pawar
78*c83a76b0SSuyog Pawar    .global ihevc_memcpy_mul_8_a9q
79*c83a76b0SSuyog Pawar.type ihevc_memcpy_mul_8_a9q, %function
80*c83a76b0SSuyog Pawar
81*c83a76b0SSuyog Pawarihevc_memcpy_mul_8_a9q:
82*c83a76b0SSuyog Pawar
83*c83a76b0SSuyog PawarLOOP_NEON_MEMCPY_MUL_8:
84*c83a76b0SSuyog Pawar    @ Memcpy 8 bytes
85*c83a76b0SSuyog Pawar    VLD1.8      d0,[r1]!
86*c83a76b0SSuyog Pawar    VST1.8      d0,[r0]!
87*c83a76b0SSuyog Pawar
88*c83a76b0SSuyog Pawar    SUBS        r2,r2,#8
89*c83a76b0SSuyog Pawar    BNE         LOOP_NEON_MEMCPY_MUL_8
90*c83a76b0SSuyog Pawar    MOV         PC,LR
91*c83a76b0SSuyog Pawar
92*c83a76b0SSuyog Pawar
93*c83a76b0SSuyog Pawar
94*c83a76b0SSuyog Pawar@*******************************************************************************
95*c83a76b0SSuyog Pawar@*/
96*c83a76b0SSuyog Pawar@void ihevc_memcpy(UWORD8 *pu1_dst,
97*c83a76b0SSuyog Pawar@                  UWORD8 *pu1_src,
98*c83a76b0SSuyog Pawar@                  UWORD8 num_bytes)
99*c83a76b0SSuyog Pawar@**************Variables Vs Registers*************************
100*c83a76b0SSuyog Pawar@   r0 => *pu1_dst
101*c83a76b0SSuyog Pawar@   r1 => *pu1_src
102*c83a76b0SSuyog Pawar@   r2 => num_bytes
103*c83a76b0SSuyog Pawar
104*c83a76b0SSuyog Pawar
105*c83a76b0SSuyog Pawar
106*c83a76b0SSuyog Pawar    .global ihevc_memcpy_a9q
107*c83a76b0SSuyog Pawar.type ihevc_memcpy_a9q, %function
108*c83a76b0SSuyog Pawar
109*c83a76b0SSuyog Pawarihevc_memcpy_a9q:
110*c83a76b0SSuyog Pawar    SUBS        r2,#8
111*c83a76b0SSuyog Pawar    BLT         ARM_MEMCPY
112*c83a76b0SSuyog PawarLOOP_NEON_MEMCPY:
113*c83a76b0SSuyog Pawar    @ Memcpy 8 bytes
114*c83a76b0SSuyog Pawar    VLD1.8      d0,[r1]!
115*c83a76b0SSuyog Pawar    VST1.8      d0,[r0]!
116*c83a76b0SSuyog Pawar
117*c83a76b0SSuyog Pawar    SUBS        r2,#8
118*c83a76b0SSuyog Pawar    BGE         LOOP_NEON_MEMCPY
119*c83a76b0SSuyog Pawar    CMP         r2,#-8
120*c83a76b0SSuyog Pawar    BXEQ        LR
121*c83a76b0SSuyog Pawar
122*c83a76b0SSuyog PawarARM_MEMCPY:
123*c83a76b0SSuyog Pawar    ADD         r2,#8
124*c83a76b0SSuyog Pawar
125*c83a76b0SSuyog PawarLOOP_ARM_MEMCPY:
126*c83a76b0SSuyog Pawar    LDRB        r3,[r1],#1
127*c83a76b0SSuyog Pawar    STRB        r3,[r0],#1
128*c83a76b0SSuyog Pawar    SUBS        r2,#1
129*c83a76b0SSuyog Pawar    BNE         LOOP_ARM_MEMCPY
130*c83a76b0SSuyog Pawar    BX          LR
131*c83a76b0SSuyog Pawar
132*c83a76b0SSuyog Pawar
133*c83a76b0SSuyog Pawar
134*c83a76b0SSuyog Pawar
135*c83a76b0SSuyog Pawar@void ihevc_memset_mul_8(UWORD8 *pu1_dst,
136*c83a76b0SSuyog Pawar@                       UWORD8 value,
137*c83a76b0SSuyog Pawar@                       UWORD8 num_bytes)
138*c83a76b0SSuyog Pawar@**************Variables Vs Registers*************************
139*c83a76b0SSuyog Pawar@   r0 => *pu1_dst
140*c83a76b0SSuyog Pawar@   r1 => value
141*c83a76b0SSuyog Pawar@   r2 => num_bytes
142*c83a76b0SSuyog Pawar
143*c83a76b0SSuyog Pawar.text
144*c83a76b0SSuyog Pawar.p2align 2
145*c83a76b0SSuyog Pawar
146*c83a76b0SSuyog Pawar
147*c83a76b0SSuyog Pawar
148*c83a76b0SSuyog Pawar    .global ihevc_memset_mul_8_a9q
149*c83a76b0SSuyog Pawar.type ihevc_memset_mul_8_a9q, %function
150*c83a76b0SSuyog Pawar
151*c83a76b0SSuyog Pawarihevc_memset_mul_8_a9q:
152*c83a76b0SSuyog Pawar
153*c83a76b0SSuyog Pawar@ Assumptions: numbytes is either 8, 16 or 32
154*c83a76b0SSuyog Pawar    VDUP.8      d0,r1
155*c83a76b0SSuyog PawarLOOP_MEMSET_MUL_8:
156*c83a76b0SSuyog Pawar    @ Memset 8 bytes
157*c83a76b0SSuyog Pawar    VST1.8      d0,[r0]!
158*c83a76b0SSuyog Pawar
159*c83a76b0SSuyog Pawar    SUBS        r2,r2,#8
160*c83a76b0SSuyog Pawar    BNE         LOOP_MEMSET_MUL_8
161*c83a76b0SSuyog Pawar
162*c83a76b0SSuyog Pawar    BX          LR
163*c83a76b0SSuyog Pawar
164*c83a76b0SSuyog Pawar
165*c83a76b0SSuyog Pawar
166*c83a76b0SSuyog Pawar
167*c83a76b0SSuyog Pawar@void ihevc_memset(UWORD8 *pu1_dst,
168*c83a76b0SSuyog Pawar@                       UWORD8 value,
169*c83a76b0SSuyog Pawar@                       UWORD8 num_bytes)
170*c83a76b0SSuyog Pawar@**************Variables Vs Registers*************************
171*c83a76b0SSuyog Pawar@   r0 => *pu1_dst
172*c83a76b0SSuyog Pawar@   r1 => value
173*c83a76b0SSuyog Pawar@   r2 => num_bytes
174*c83a76b0SSuyog Pawar
175*c83a76b0SSuyog Pawar
176*c83a76b0SSuyog Pawar
177*c83a76b0SSuyog Pawar    .global ihevc_memset_a9q
178*c83a76b0SSuyog Pawar.type ihevc_memset_a9q, %function
179*c83a76b0SSuyog Pawar
180*c83a76b0SSuyog Pawarihevc_memset_a9q:
181*c83a76b0SSuyog Pawar    SUBS        r2,#8
182*c83a76b0SSuyog Pawar    BLT         ARM_MEMSET
183*c83a76b0SSuyog Pawar    VDUP.8      d0,r1
184*c83a76b0SSuyog PawarLOOP_NEON_MEMSET:
185*c83a76b0SSuyog Pawar    @ Memcpy 8 bytes
186*c83a76b0SSuyog Pawar    VST1.8      d0,[r0]!
187*c83a76b0SSuyog Pawar
188*c83a76b0SSuyog Pawar    SUBS        r2,#8
189*c83a76b0SSuyog Pawar    BGE         LOOP_NEON_MEMSET
190*c83a76b0SSuyog Pawar    CMP         r2,#-8
191*c83a76b0SSuyog Pawar    BXEQ        LR
192*c83a76b0SSuyog Pawar
193*c83a76b0SSuyog PawarARM_MEMSET:
194*c83a76b0SSuyog Pawar    ADD         r2,#8
195*c83a76b0SSuyog Pawar
196*c83a76b0SSuyog PawarLOOP_ARM_MEMSET:
197*c83a76b0SSuyog Pawar    STRB        r1,[r0],#1
198*c83a76b0SSuyog Pawar    SUBS        r2,#1
199*c83a76b0SSuyog Pawar    BNE         LOOP_ARM_MEMSET
200*c83a76b0SSuyog Pawar    BX          LR
201*c83a76b0SSuyog Pawar
202*c83a76b0SSuyog Pawar
203*c83a76b0SSuyog Pawar
204*c83a76b0SSuyog Pawar
205*c83a76b0SSuyog Pawar@void ihevc_memset_16bit_mul_8(UWORD16 *pu2_dst,
206*c83a76b0SSuyog Pawar@                                   UWORD16 value,
207*c83a76b0SSuyog Pawar@                                   UWORD8 num_words)
208*c83a76b0SSuyog Pawar@**************Variables Vs Registers*************************
209*c83a76b0SSuyog Pawar@   r0 => *pu2_dst
210*c83a76b0SSuyog Pawar@   r1 => value
211*c83a76b0SSuyog Pawar@   r2 => num_words
212*c83a76b0SSuyog Pawar
213*c83a76b0SSuyog Pawar.text
214*c83a76b0SSuyog Pawar.p2align 2
215*c83a76b0SSuyog Pawar
216*c83a76b0SSuyog Pawar
217*c83a76b0SSuyog Pawar
218*c83a76b0SSuyog Pawar    .global ihevc_memset_16bit_mul_8_a9q
219*c83a76b0SSuyog Pawar.type ihevc_memset_16bit_mul_8_a9q, %function
220*c83a76b0SSuyog Pawar
221*c83a76b0SSuyog Pawarihevc_memset_16bit_mul_8_a9q:
222*c83a76b0SSuyog Pawar
223*c83a76b0SSuyog Pawar@ Assumptions: num_words is either 8, 16 or 32
224*c83a76b0SSuyog Pawar
225*c83a76b0SSuyog Pawar    @ Memset 8 words
226*c83a76b0SSuyog Pawar    VDUP.16     d0,r1
227*c83a76b0SSuyog PawarLOOP_MEMSET_16BIT_MUL_8:
228*c83a76b0SSuyog Pawar    VST1.16     d0,[r0]!
229*c83a76b0SSuyog Pawar    VST1.16     d0,[r0]!
230*c83a76b0SSuyog Pawar
231*c83a76b0SSuyog Pawar    SUBS        r2,r2,#8
232*c83a76b0SSuyog Pawar    BNE         LOOP_MEMSET_16BIT_MUL_8
233*c83a76b0SSuyog Pawar
234*c83a76b0SSuyog Pawar    BX          LR
235*c83a76b0SSuyog Pawar
236*c83a76b0SSuyog Pawar
237*c83a76b0SSuyog Pawar
238*c83a76b0SSuyog Pawar
239*c83a76b0SSuyog Pawar@void ihevc_memset_16bit(UWORD16 *pu2_dst,
240*c83a76b0SSuyog Pawar@                       UWORD16 value,
241*c83a76b0SSuyog Pawar@                       UWORD8 num_words)
242*c83a76b0SSuyog Pawar@**************Variables Vs Registers*************************
243*c83a76b0SSuyog Pawar@   r0 => *pu2_dst
244*c83a76b0SSuyog Pawar@   r1 => value
245*c83a76b0SSuyog Pawar@   r2 => num_words
246*c83a76b0SSuyog Pawar
247*c83a76b0SSuyog Pawar
248*c83a76b0SSuyog Pawar
249*c83a76b0SSuyog Pawar    .global ihevc_memset_16bit_a9q
250*c83a76b0SSuyog Pawar.type ihevc_memset_16bit_a9q, %function
251*c83a76b0SSuyog Pawar
252*c83a76b0SSuyog Pawarihevc_memset_16bit_a9q:
253*c83a76b0SSuyog Pawar    SUBS        r2,#8
254*c83a76b0SSuyog Pawar    BLT         ARM_MEMSET_16BIT
255*c83a76b0SSuyog Pawar    VDUP.16     d0,r1
256*c83a76b0SSuyog PawarLOOP_NEON_MEMSET_16BIT:
257*c83a76b0SSuyog Pawar    @ Memset 8 words
258*c83a76b0SSuyog Pawar    VST1.16     d0,[r0]!
259*c83a76b0SSuyog Pawar    VST1.16     d0,[r0]!
260*c83a76b0SSuyog Pawar
261*c83a76b0SSuyog Pawar    SUBS        r2,#8
262*c83a76b0SSuyog Pawar    BGE         LOOP_NEON_MEMSET_16BIT
263*c83a76b0SSuyog Pawar    CMP         r2,#-8
264*c83a76b0SSuyog Pawar    BXEQ        LR
265*c83a76b0SSuyog Pawar
266*c83a76b0SSuyog PawarARM_MEMSET_16BIT:
267*c83a76b0SSuyog Pawar    ADD         r2,#8
268*c83a76b0SSuyog Pawar
269*c83a76b0SSuyog PawarLOOP_ARM_MEMSET_16BIT:
270*c83a76b0SSuyog Pawar    STRH        r1,[r0],#2
271*c83a76b0SSuyog Pawar    SUBS        r2,#1
272*c83a76b0SSuyog Pawar    BNE         LOOP_ARM_MEMSET_16BIT
273*c83a76b0SSuyog Pawar    BX          LR
274*c83a76b0SSuyog Pawar
275*c83a76b0SSuyog Pawar
276*c83a76b0SSuyog Pawar
277*c83a76b0SSuyog Pawar
278*c83a76b0SSuyog Pawar    .section .note.GNU-stack,"",%progbits
279*c83a76b0SSuyog Pawar
280