1/* 2 * Copyright © 2018, VideoLAN and dav1d authors 3 * Copyright © 2020, Martin Storsjo 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include "src/arm/asm.S" 29#include "util.S" 30#include "cdef_tmpl.S" 31 32// r1 = d0/q0 33// r2 = d2/q1 34.macro pad_top_bot_16 s1, s2, w, stride, r1, r2, align, ret 35 tst r7, #1 // CDEF_HAVE_LEFT 36 beq 2f 37 // CDEF_HAVE_LEFT 38 tst r7, #2 // CDEF_HAVE_RIGHT 39 beq 1f 40 // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 41 vldr s8, [\s1, #-4] 42 vld1.16 {\r1}, [\s1, :\align] 43 vldr s9, [\s1, #2*\w] 44 vldr s10, [\s2, #-4] 45 vld1.16 {\r2}, [\s2, :\align] 46 vldr s11, [\s2, #2*\w] 47 vstr s8, [r0, #-4] 48 vst1.16 {\r1}, [r0, :\align] 49 vstr s9, [r0, #2*\w] 50 add r0, r0, #2*\stride 51 vstr s10, [r0, #-4] 52 vst1.16 {\r2}, [r0, :\align] 53 vstr s11, [r0, #2*\w] 54.if \ret 55 pop {r4-r8,pc} 56.else 57 add r0, r0, #2*\stride 58 b 3f 59.endif 60 611: 62 // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 63 vldr s8, [\s1, #-4] 64 vld1.16 {\r1}, [\s1, :\align] 65 vldr s9, [\s2, #-4] 66 vld1.16 {\r2}, [\s2, :\align] 67 vstr s8, [r0, #-4] 68 vst1.16 {\r1}, [r0, :\align] 69 vstr s12, [r0, #2*\w] 70 add r0, r0, #2*\stride 71 vstr s9, [r0, #-4] 72 vst1.16 {\r2}, [r0, :\align] 73 vstr s12, [r0, #2*\w] 74.if \ret 75 pop {r4-r8,pc} 76.else 77 add r0, r0, #2*\stride 78 b 3f 79.endif 80 812: 82 // !CDEF_HAVE_LEFT 83 tst r7, #2 // CDEF_HAVE_RIGHT 84 beq 1f 85 // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 86 vld1.16 {\r1}, [\s1, :\align] 87 vldr s8, [\s1, #2*\w] 88 vld1.16 {\r2}, [\s2, :\align] 89 vldr s9, [\s2, #2*\w] 90 vstr s12, [r0, #-4] 91 vst1.16 {\r1}, [r0, :\align] 92 vstr s8, [r0, #2*\w] 93 add r0, r0, #2*\stride 94 vstr s12, [r0, #-4] 95 vst1.16 {\r2}, [r0, :\align] 96 vstr s9, [r0, #2*\w] 97.if \ret 98 pop {r4-r8,pc} 99.else 100 add r0, r0, #2*\stride 101 b 3f 102.endif 103 1041: 105 // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 106 vld1.16 {\r1}, [\s1, :\align] 107 vld1.16 {\r2}, [\s2, :\align] 108 vstr s12, [r0, #-4] 109 vst1.16 {\r1}, [r0, :\align] 110 vstr s12, [r0, #2*\w] 111 add r0, r0, #2*\stride 112 vstr s12, [r0, #-4] 113 vst1.16 {\r2}, [r0, :\align] 114 vstr s12, [r0, #2*\w] 115.if \ret 116 pop {r4-r8,pc} 117.else 118 add r0, r0, #2*\stride 119.endif 1203: 121.endm 122 123// void dav1d_cdef_paddingX_16bpc_neon(uint16_t *tmp, const pixel *src, 124// ptrdiff_t src_stride, const pixel (*left)[2], 125// const pixel *const top, 126// const pixel *const bottom, int h, 127// enum CdefEdgeFlags edges); 128 129// r1 = d0/q0 130// r2 = d2/q1 131.macro padding_func_16 w, stride, r1, r2, align 132function cdef_padding\w\()_16bpc_neon, export=1 133 push {r4-r8,lr} 134 ldrd r4, r5, [sp, #24] 135 ldrd r6, r7, [sp, #32] 136 vmov.i16 q3, #0x8000 137 tst r7, #4 // CDEF_HAVE_TOP 138 bne 1f 139 // !CDEF_HAVE_TOP 140 sub r12, r0, #2*(2*\stride+2) 141 vmov.i16 q2, #0x8000 142 vst1.16 {q2,q3}, [r12]! 143.if \w == 8 144 vst1.16 {q2,q3}, [r12]! 145.endif 146 b 3f 1471: 148 // CDEF_HAVE_TOP 149 add r8, r4, r2 150 sub r0, r0, #2*(2*\stride) 151 pad_top_bot_16 r4, r8, \w, \stride, \r1, \r2, \align, 0 152 153 // Middle section 1543: 155 tst r7, #1 // CDEF_HAVE_LEFT 156 beq 2f 157 // CDEF_HAVE_LEFT 158 tst r7, #2 // CDEF_HAVE_RIGHT 159 beq 1f 160 // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 1610: 162 vld1.32 {d2[]}, [r3, :32]! 163 vldr s5, [r1, #2*\w] 164 vld1.16 {\r1}, [r1, :\align], r2 165 subs r6, r6, #1 166 vstr s4, [r0, #-4] 167 vst1.16 {\r1}, [r0, :\align] 168 vstr s5, [r0, #2*\w] 169 add r0, r0, #2*\stride 170 bgt 0b 171 b 3f 1721: 173 // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 174 vld1.32 {d2[]}, [r3, :32]! 175 vld1.16 {\r1}, [r1, :\align], r2 176 subs r6, r6, #1 177 vstr s4, [r0, #-4] 178 vst1.16 {\r1}, [r0, :\align] 179 vstr s12, [r0, #2*\w] 180 add r0, r0, #2*\stride 181 bgt 1b 182 b 3f 1832: 184 tst r7, #2 // CDEF_HAVE_RIGHT 185 beq 1f 186 // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 1870: 188 vldr s4, [r1, #2*\w] 189 vld1.16 {\r1}, [r1, :\align], r2 190 subs r6, r6, #1 191 vstr s12, [r0, #-4] 192 vst1.16 {\r1}, [r0, :\align] 193 vstr s4, [r0, #2*\w] 194 add r0, r0, #2*\stride 195 bgt 0b 196 b 3f 1971: 198 // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 199 vld1.16 {\r1}, [r1, :\align], r2 200 subs r6, r6, #1 201 vstr s12, [r0, #-4] 202 vst1.16 {\r1}, [r0, :\align] 203 vstr s12, [r0, #2*\w] 204 add r0, r0, #2*\stride 205 bgt 1b 206 2073: 208 tst r7, #8 // CDEF_HAVE_BOTTOM 209 bne 1f 210 // !CDEF_HAVE_BOTTOM 211 sub r12, r0, #4 212 vmov.i16 q2, #0x8000 213 vst1.16 {q2,q3}, [r12]! 214.if \w == 8 215 vst1.16 {q2,q3}, [r12]! 216.endif 217 pop {r4-r8,pc} 2181: 219 // CDEF_HAVE_BOTTOM 220 add r8, r5, r2 221 pad_top_bot_16 r5, r8, \w, \stride, \r1, \r2, \align, 1 222endfunc 223.endm 224 225padding_func_16 8, 16, q0, q1, 128 226padding_func_16 4, 8, d0, d2, 64 227 228tables 229 230filter 8, 16 231filter 4, 16 232 233find_dir 16 234