xref: /aosp_15_r20/external/libvpx/config/arm-neon/vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm.S (revision fb1b10ab9aebc7c7068eedab379b749d7e3900be)
1@ This file was created from a .asm file
2@  using the ads2gas.pl script.
3.syntax unified
4@
5@  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
6@
7@  Use of this source code is governed by a BSD-style license
8@  that can be found in the LICENSE file in the root of the source
9@  tree. An additional intellectual property rights grant can be found
10@  in the file PATENTS.  All contributing project authors may
11@  be found in the AUTHORS file in the root of the source tree.
12@
13
14    .global vpx_convolve_copy_neon
15    .type vpx_convolve_copy_neon, function
16    .arm
17    .eabi_attribute 24, 1 @Tag_ABI_align_needed
18    .eabi_attribute 25, 1 @Tag_ABI_align_preserved
19
20    .text
21    .p2align 2
22
23vpx_convolve_copy_neon: @ PROC
24    push                {r4-r5, lr}
25    ldrd                r4, r5, [sp, #32]
26
27    cmp                 r4, #32
28    bgt                 copy64
29    beq                 copy32
30    cmp                 r4, #8
31    bgt                 copy16
32    beq                 copy8
33    b                   copy4
34
35copy64:
36    sub                 lr, r1, #32
37    sub                 r3, r3, #32
38copy64_h:
39    pld                 [r0, r1, lsl #1]
40    vld1.8              {q0-q1}, [r0]!
41    vld1.8              {q2-q3}, [r0], lr
42    vst1.8              {q0-q1}, [r2,:128]!
43    vst1.8              {q2-q3}, [r2,:128], r3
44    subs                r5, r5, #1
45    bgt                 copy64_h
46    pop                 {r4-r5, pc}
47
48copy32:
49    pld                 [r0, r1, lsl #1]
50    vld1.8              {q0-q1}, [r0], r1
51    pld                 [r0, r1, lsl #1]
52    vld1.8              {q2-q3}, [r0], r1
53    vst1.8              {q0-q1}, [r2,:128], r3
54    vst1.8              {q2-q3}, [r2,:128], r3
55    subs                r5, r5, #2
56    bgt                 copy32
57    pop                 {r4-r5, pc}
58
59copy16:
60    pld                 [r0, r1, lsl #1]
61    vld1.8              {q0}, [r0], r1
62    pld                 [r0, r1, lsl #1]
63    vld1.8              {q1}, [r0], r1
64    vst1.8              {q0}, [r2,:128], r3
65    vst1.8              {q1}, [r2,:128], r3
66    subs                r5, r5, #2
67    bgt                 copy16
68    pop                 {r4-r5, pc}
69
70copy8:
71    pld                 [r0, r1, lsl #1]
72    vld1.8              {d0}, [r0], r1
73    pld                 [r0, r1, lsl #1]
74    vld1.8              {d2}, [r0], r1
75    vst1.8              {d0}, [r2,:64], r3
76    vst1.8              {d2}, [r2,:64], r3
77    subs                r5, r5, #2
78    bgt                 copy8
79    pop                 {r4-r5, pc}
80
81copy4:
82    ldr                 r12, [r0], r1
83    str                 r12, [r2], r3
84    subs                r5, r5, #1
85    bgt                 copy4
86    pop                 {r4-r5, pc}
87.size vpx_convolve_copy_neon, .-vpx_convolve_copy_neon    @ ENDP
88
89    .section .note.GNU-stack,"",%progbits
90