xref: /aosp_15_r20/external/libdav1d/src/arm/32/util.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1/******************************************************************************
2 * Copyright © 2018, VideoLAN and dav1d authors
3 * Copyright © 2015 Martin Storsjo
4 * Copyright © 2015 Janne Grunau
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice, this
11 *    list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 *    this list of conditions and the following disclaimer in the documentation
15 *    and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 *****************************************************************************/
28
29#ifndef DAV1D_SRC_ARM_32_UTIL_S
30#define DAV1D_SRC_ARM_32_UTIL_S
31
32#include "config.h"
33#include "src/arm/asm.S"
34#include "src/arm/arm-arch.h"
35
36.macro v4bx rd
37#if __ARM_ARCH >= 5 || defined(__ARM_ARCH_4T__)
38        bx              \rd
39#else
40        mov             pc, \rd
41#endif
42.endm
43
44.macro v4blx rd
45#if __ARM_ARCH >= 5
46        blx             \rd
47#else
48        mov             lr,  pc
49        v4bx            \rd
50#endif
51.endm
52
53.macro movrel_local rd, val, offset=0
54#if (__ARM_ARCH >= 7 || defined(__ARM_ARCH_6T2__)) && !defined(PIC)
55        movw            \rd, #:lower16:\val+\offset
56        movt            \rd, #:upper16:\val+\offset
57#else
58        ldr             \rd,  90001f
59        b               90002f
6090001:
61        .word           \val + \offset - (90002f + 8 - 4 * CONFIG_THUMB)
6290002:
63        add             \rd,  \rd,  pc
64#endif
65.endm
66
67.macro movrel rd, val, offset=0
68#if defined(PIC) && defined(__APPLE__)
69        ldr             \rd,  1f
70        b               2f
711:
72        .word           3f - (2f + 8 - 4 * CONFIG_THUMB)
732:
74        ldr             \rd,  [pc, \rd]
75.if \offset < 0
76        sub             \rd,  \rd,  #-(\offset)
77.elseif \offset > 0
78        add             \rd,  \rd,  #\offset
79.endif
80        .non_lazy_symbol_pointer
813:
82        .indirect_symbol \val
83        .word       0
84        .text
85#else
86        movrel_local    \rd, \val, \offset
87#endif
88.endm
89
90// This macro clobbers r7 (and r12 on windows) and stores data at the
91// bottom of the stack; sp is the start of the space allocated that
92// the caller can use.
93.macro sub_sp_align space
94#if CONFIG_THUMB
95        mov             r7,  sp
96        and             r7,  r7,  #15
97#else
98        and             r7,  sp,  #15
99#endif
100        sub             sp,  sp,  r7
101        // Now the stack is aligned, store the amount of adjustment back
102        // on the stack, as we don't want to waste a register as frame
103        // pointer.
104        str             r7,  [sp, #-16]!
105#ifdef _WIN32
106.if \space > 8192
107        // Here, we'd need to touch two (or more) pages while decrementing
108        // the stack pointer.
109        .error          "sub_sp_align doesn't support values over 8K at the moment"
110.elseif \space > 4096
111        sub             r7,  sp,  #4096
112        ldr             r12, [r7]
113        sub             r7,  r7,  #(\space - 4096)
114        mov             sp,  r7
115.else
116        sub             sp,  sp,  #\space
117.endif
118#else
119.if \space >= 4096
120        sub             sp,  sp,  #(\space)/4096*4096
121.endif
122.if (\space % 4096) != 0
123        sub             sp,  sp,  #(\space)%4096
124.endif
125#endif
126.endm
127
128.macro add_sp_align space
129.if \space >= 4096
130        add             sp,  sp,  #(\space)/4096*4096
131.endif
132.if (\space % 4096) != 0
133        add             sp,  sp,  #(\space)%4096
134.endif
135        ldr             r7,  [sp], #16
136        // Add back the original stack adjustment
137        add             sp,  sp,  r7
138.endm
139
140.macro transpose_8x8b q0, q1, q2, q3, r0, r1, r2, r3, r4, r5, r6, r7
141        vtrn.32         \q0,  \q2
142        vtrn.32         \q1,  \q3
143
144        vtrn.16         \r0,  \r2
145        vtrn.16         \r1,  \r3
146        vtrn.16         \r4,  \r6
147        vtrn.16         \r5,  \r7
148
149        vtrn.8          \r0,  \r1
150        vtrn.8          \r2,  \r3
151        vtrn.8          \r4,  \r5
152        vtrn.8          \r6,  \r7
153.endm
154
155.macro transpose_8x8h r0, r1, r2, r3, r4, r5, r6, r7, d0, d1, d2, d3, d4, d5, d6, d7
156        vswp            \d0,  \d4
157        vswp            \d1,  \d5
158        vswp            \d2,  \d6
159        vswp            \d3,  \d7
160
161        vtrn.32         \r0,  \r2
162        vtrn.32         \r1,  \r3
163        vtrn.32         \r4,  \r6
164        vtrn.32         \r5,  \r7
165
166        vtrn.16         \r0,  \r1
167        vtrn.16         \r2,  \r3
168        vtrn.16         \r4,  \r5
169        vtrn.16         \r6,  \r7
170.endm
171
172.macro transpose_4x8b q0, q1, r0, r1, r2, r3
173        vtrn.16         \q0,  \q1
174
175        vtrn.8          \r0,  \r1
176        vtrn.8          \r2,  \r3
177.endm
178
179.macro transpose_4x4s q0, q1, q2, q3, r0, r1, r2, r3, r4, r5, r6, r7
180        vswp            \r1,  \r4 // vtrn.64 \q0, \q2
181        vswp            \r3,  \r6 // vtrn.64 \q1, \q3
182
183        vtrn.32         \q0,  \q1
184        vtrn.32         \q2,  \q3
185.endm
186
187.macro transpose_4x4h q0, q1, r0, r1, r2, r3
188        vtrn.32         \q0,  \q1
189
190        vtrn.16         \r0,  \r1
191        vtrn.16         \r2,  \r3
192.endm
193
194.macro transpose_4x8h r0, r1, r2, r3
195        vtrn.32         \r0,  \r2
196        vtrn.32         \r1,  \r3
197
198        vtrn.16         \r0,  \r1
199        vtrn.16         \r2,  \r3
200.endm
201
202#endif /* DAV1D_SRC_ARM_32_UTIL_S */
203