1/* 2 * Copyright © 2018, VideoLAN and dav1d authors 3 * Copyright © 2020, Martin Storsjo 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions are met: 8 * 9 * 1. Redistributions of source code must retain the above copyright notice, this 10 * list of conditions and the following disclaimer. 11 * 12 * 2. Redistributions in binary form must reproduce the above copyright notice, 13 * this list of conditions and the following disclaimer in the documentation 14 * and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include "src/arm/asm.S" 29#include "util.S" 30#include "cdef_tmpl.S" 31 32.macro pad_top_bot_16 s1, s2, w, stride, reg, ret 33 tst w7, #1 // CDEF_HAVE_LEFT 34 b.eq 2f 35 // CDEF_HAVE_LEFT 36 sub \s1, \s1, #4 37 sub \s2, \s2, #4 38 tst w7, #2 // CDEF_HAVE_RIGHT 39 b.eq 1f 40 // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 41 ldr \reg\()0, [\s1] 42 ldr d1, [\s1, #2*\w] 43 ldr \reg\()2, [\s2] 44 ldr d3, [\s2, #2*\w] 45 str \reg\()0, [x0] 46 str d1, [x0, #2*\w] 47 add x0, x0, #2*\stride 48 str \reg\()2, [x0] 49 str d3, [x0, #2*\w] 50.if \ret 51 ret 52.else 53 add x0, x0, #2*\stride 54 b 3f 55.endif 56 571: 58 // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 59 ldr \reg\()0, [\s1] 60 ldr s1, [\s1, #2*\w] 61 ldr \reg\()2, [\s2] 62 ldr s3, [\s2, #2*\w] 63 str \reg\()0, [x0] 64 str s1, [x0, #2*\w] 65 str s31, [x0, #2*\w+4] 66 add x0, x0, #2*\stride 67 str \reg\()2, [x0] 68 str s3, [x0, #2*\w] 69 str s31, [x0, #2*\w+4] 70.if \ret 71 ret 72.else 73 add x0, x0, #2*\stride 74 b 3f 75.endif 76 772: 78 // !CDEF_HAVE_LEFT 79 tst w7, #2 // CDEF_HAVE_RIGHT 80 b.eq 1f 81 // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 82 ldr \reg\()0, [\s1] 83 ldr s1, [\s1, #2*\w] 84 ldr \reg\()2, [\s2] 85 ldr s3, [\s2, #2*\w] 86 str s31, [x0] 87 stur \reg\()0, [x0, #4] 88 str s1, [x0, #4+2*\w] 89 add x0, x0, #2*\stride 90 str s31, [x0] 91 stur \reg\()2, [x0, #4] 92 str s3, [x0, #4+2*\w] 93.if \ret 94 ret 95.else 96 add x0, x0, #2*\stride 97 b 3f 98.endif 99 1001: 101 // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 102 ldr \reg\()0, [\s1] 103 ldr \reg\()1, [\s2] 104 str s31, [x0] 105 stur \reg\()0, [x0, #4] 106 str s31, [x0, #4+2*\w] 107 add x0, x0, #2*\stride 108 str s31, [x0] 109 stur \reg\()1, [x0, #4] 110 str s31, [x0, #4+2*\w] 111.if \ret 112 ret 113.else 114 add x0, x0, #2*\stride 115.endif 1163: 117.endm 118 119.macro load_n_incr_16 dst, src, incr, w 120.if \w == 4 121 ld1 {\dst\().4h}, [\src], \incr 122.else 123 ld1 {\dst\().8h}, [\src], \incr 124.endif 125.endm 126 127// void dav1d_cdef_paddingX_16bpc_neon(uint16_t *tmp, const pixel *src, 128// ptrdiff_t src_stride, const pixel (*left)[2], 129// const pixel *const top, 130// const pixel *const bottom, int h, 131// enum CdefEdgeFlags edges); 132 133.macro padding_func_16 w, stride, reg 134function cdef_padding\w\()_16bpc_neon, export=1 135 movi v30.8h, #0x80, lsl #8 136 mov v31.16b, v30.16b 137 sub x0, x0, #2*(2*\stride+2) 138 tst w7, #4 // CDEF_HAVE_TOP 139 b.ne 1f 140 // !CDEF_HAVE_TOP 141 st1 {v30.8h, v31.8h}, [x0], #32 142.if \w == 8 143 st1 {v30.8h, v31.8h}, [x0], #32 144.endif 145 b 3f 1461: 147 // CDEF_HAVE_TOP 148 add x9, x4, x2 149 pad_top_bot_16 x4, x9, \w, \stride, \reg, 0 150 151 // Middle section 1523: 153 tst w7, #1 // CDEF_HAVE_LEFT 154 b.eq 2f 155 // CDEF_HAVE_LEFT 156 tst w7, #2 // CDEF_HAVE_RIGHT 157 b.eq 1f 158 // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 1590: 160 ld1 {v0.s}[0], [x3], #4 161 ldr s2, [x1, #2*\w] 162 load_n_incr_16 v1, x1, x2, \w 163 subs w6, w6, #1 164 str s0, [x0] 165 stur \reg\()1, [x0, #4] 166 str s2, [x0, #4+2*\w] 167 add x0, x0, #2*\stride 168 b.gt 0b 169 b 3f 1701: 171 // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 172 ld1 {v0.s}[0], [x3], #4 173 load_n_incr_16 v1, x1, x2, \w 174 subs w6, w6, #1 175 str s0, [x0] 176 stur \reg\()1, [x0, #4] 177 str s31, [x0, #4+2*\w] 178 add x0, x0, #2*\stride 179 b.gt 1b 180 b 3f 1812: 182 tst w7, #2 // CDEF_HAVE_RIGHT 183 b.eq 1f 184 // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 1850: 186 ldr s1, [x1, #2*\w] 187 load_n_incr_16 v0, x1, x2, \w 188 subs w6, w6, #1 189 str s31, [x0] 190 stur \reg\()0, [x0, #4] 191 str s1, [x0, #4+2*\w] 192 add x0, x0, #2*\stride 193 b.gt 0b 194 b 3f 1951: 196 // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 197 load_n_incr_16 v0, x1, x2, \w 198 subs w6, w6, #1 199 str s31, [x0] 200 stur \reg\()0, [x0, #4] 201 str s31, [x0, #4+2*\w] 202 add x0, x0, #2*\stride 203 b.gt 1b 204 2053: 206 tst w7, #8 // CDEF_HAVE_BOTTOM 207 b.ne 1f 208 // !CDEF_HAVE_BOTTOM 209 st1 {v30.8h, v31.8h}, [x0], #32 210.if \w == 8 211 st1 {v30.8h, v31.8h}, [x0], #32 212.endif 213 ret 2141: 215 // CDEF_HAVE_BOTTOM 216 add x9, x5, x2 217 pad_top_bot_16 x5, x9, \w, \stride, \reg, 1 218endfunc 219.endm 220 221padding_func_16 8, 16, q 222padding_func_16 4, 8, d 223 224tables 225 226filter 8, 16 227filter 4, 16 228 229find_dir 16 230