1/****************************************************************************** 2 * Copyright © 2018, VideoLAN and dav1d authors 3 * Copyright © 2015 Martin Storsjo 4 * Copyright © 2015 Janne Grunau 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are met: 9 * 10 * 1. Redistributions of source code must retain the above copyright notice, this 11 * list of conditions and the following disclaimer. 12 * 13 * 2. Redistributions in binary form must reproduce the above copyright notice, 14 * this list of conditions and the following disclaimer in the documentation 15 * and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 *****************************************************************************/ 28 29#ifndef DAV1D_SRC_ARM_32_UTIL_S 30#define DAV1D_SRC_ARM_32_UTIL_S 31 32#include "config.h" 33#include "src/arm/asm.S" 34#include "src/arm/arm-arch.h" 35 36.macro v4bx rd 37#if __ARM_ARCH >= 5 || defined(__ARM_ARCH_4T__) 38 bx \rd 39#else 40 mov pc, \rd 41#endif 42.endm 43 44.macro v4blx rd 45#if __ARM_ARCH >= 5 46 blx \rd 47#else 48 mov lr, pc 49 v4bx \rd 50#endif 51.endm 52 53.macro movrel_local rd, val, offset=0 54#if (__ARM_ARCH >= 7 || defined(__ARM_ARCH_6T2__)) && !defined(PIC) 55 movw \rd, #:lower16:\val+\offset 56 movt \rd, #:upper16:\val+\offset 57#else 58 ldr \rd, 90001f 59 b 90002f 6090001: 61 .word \val + \offset - (90002f + 8 - 4 * CONFIG_THUMB) 6290002: 63 add \rd, \rd, pc 64#endif 65.endm 66 67.macro movrel rd, val, offset=0 68#if defined(PIC) && defined(__APPLE__) 69 ldr \rd, 1f 70 b 2f 711: 72 .word 3f - (2f + 8 - 4 * CONFIG_THUMB) 732: 74 ldr \rd, [pc, \rd] 75.if \offset < 0 76 sub \rd, \rd, #-(\offset) 77.elseif \offset > 0 78 add \rd, \rd, #\offset 79.endif 80 .non_lazy_symbol_pointer 813: 82 .indirect_symbol \val 83 .word 0 84 .text 85#else 86 movrel_local \rd, \val, \offset 87#endif 88.endm 89 90// This macro clobbers r7 (and r12 on windows) and stores data at the 91// bottom of the stack; sp is the start of the space allocated that 92// the caller can use. 93.macro sub_sp_align space 94#if CONFIG_THUMB 95 mov r7, sp 96 and r7, r7, #15 97#else 98 and r7, sp, #15 99#endif 100 sub sp, sp, r7 101 // Now the stack is aligned, store the amount of adjustment back 102 // on the stack, as we don't want to waste a register as frame 103 // pointer. 104 str r7, [sp, #-16]! 105#ifdef _WIN32 106.if \space > 8192 107 // Here, we'd need to touch two (or more) pages while decrementing 108 // the stack pointer. 109 .error "sub_sp_align doesn't support values over 8K at the moment" 110.elseif \space > 4096 111 sub r7, sp, #4096 112 ldr r12, [r7] 113 sub r7, r7, #(\space - 4096) 114 mov sp, r7 115.else 116 sub sp, sp, #\space 117.endif 118#else 119.if \space >= 4096 120 sub sp, sp, #(\space)/4096*4096 121.endif 122.if (\space % 4096) != 0 123 sub sp, sp, #(\space)%4096 124.endif 125#endif 126.endm 127 128.macro add_sp_align space 129.if \space >= 4096 130 add sp, sp, #(\space)/4096*4096 131.endif 132.if (\space % 4096) != 0 133 add sp, sp, #(\space)%4096 134.endif 135 ldr r7, [sp], #16 136 // Add back the original stack adjustment 137 add sp, sp, r7 138.endm 139 140.macro transpose_8x8b q0, q1, q2, q3, r0, r1, r2, r3, r4, r5, r6, r7 141 vtrn.32 \q0, \q2 142 vtrn.32 \q1, \q3 143 144 vtrn.16 \r0, \r2 145 vtrn.16 \r1, \r3 146 vtrn.16 \r4, \r6 147 vtrn.16 \r5, \r7 148 149 vtrn.8 \r0, \r1 150 vtrn.8 \r2, \r3 151 vtrn.8 \r4, \r5 152 vtrn.8 \r6, \r7 153.endm 154 155.macro transpose_8x8h r0, r1, r2, r3, r4, r5, r6, r7, d0, d1, d2, d3, d4, d5, d6, d7 156 vswp \d0, \d4 157 vswp \d1, \d5 158 vswp \d2, \d6 159 vswp \d3, \d7 160 161 vtrn.32 \r0, \r2 162 vtrn.32 \r1, \r3 163 vtrn.32 \r4, \r6 164 vtrn.32 \r5, \r7 165 166 vtrn.16 \r0, \r1 167 vtrn.16 \r2, \r3 168 vtrn.16 \r4, \r5 169 vtrn.16 \r6, \r7 170.endm 171 172.macro transpose_4x8b q0, q1, r0, r1, r2, r3 173 vtrn.16 \q0, \q1 174 175 vtrn.8 \r0, \r1 176 vtrn.8 \r2, \r3 177.endm 178 179.macro transpose_4x4s q0, q1, q2, q3, r0, r1, r2, r3, r4, r5, r6, r7 180 vswp \r1, \r4 // vtrn.64 \q0, \q2 181 vswp \r3, \r6 // vtrn.64 \q1, \q3 182 183 vtrn.32 \q0, \q1 184 vtrn.32 \q2, \q3 185.endm 186 187.macro transpose_4x4h q0, q1, r0, r1, r2, r3 188 vtrn.32 \q0, \q1 189 190 vtrn.16 \r0, \r1 191 vtrn.16 \r2, \r3 192.endm 193 194.macro transpose_4x8h r0, r1, r2, r3 195 vtrn.32 \r0, \r2 196 vtrn.32 \r1, \r3 197 198 vtrn.16 \r0, \r1 199 vtrn.16 \r2, \r3 200.endm 201 202#endif /* DAV1D_SRC_ARM_32_UTIL_S */ 203