1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, Martin Storsjo 4*c0909341SAndroid Build Coastguard Worker * All rights reserved. 5*c0909341SAndroid Build Coastguard Worker * 6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 8*c0909341SAndroid Build Coastguard Worker * 9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 11*c0909341SAndroid Build Coastguard Worker * 12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 15*c0909341SAndroid Build Coastguard Worker * 16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*c0909341SAndroid Build Coastguard Worker */ 27*c0909341SAndroid Build Coastguard Worker 28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 29*c0909341SAndroid Build Coastguard Worker#include "util.S" 30*c0909341SAndroid Build Coastguard Worker#include "cdef_tmpl.S" 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker.macro pad_top_bottom s1, s2, w, stride, rn, rw, ret 33*c0909341SAndroid Build Coastguard Worker tst w7, #1 // CDEF_HAVE_LEFT 34*c0909341SAndroid Build Coastguard Worker b.eq 2f 35*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT 36*c0909341SAndroid Build Coastguard Worker sub \s1, \s1, #2 37*c0909341SAndroid Build Coastguard Worker sub \s2, \s2, #2 38*c0909341SAndroid Build Coastguard Worker tst w7, #2 // CDEF_HAVE_RIGHT 39*c0909341SAndroid Build Coastguard Worker b.eq 1f 40*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 41*c0909341SAndroid Build Coastguard Worker ldr \rn\()0, [\s1] 42*c0909341SAndroid Build Coastguard Worker ldr s1, [\s1, #\w] 43*c0909341SAndroid Build Coastguard Worker ldr \rn\()2, [\s2] 44*c0909341SAndroid Build Coastguard Worker ldr s3, [\s2, #\w] 45*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b 46*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b 47*c0909341SAndroid Build Coastguard Worker uxtl v2.8h, v2.8b 48*c0909341SAndroid Build Coastguard Worker uxtl v3.8h, v3.8b 49*c0909341SAndroid Build Coastguard Worker str \rw\()0, [x0] 50*c0909341SAndroid Build Coastguard Worker str d1, [x0, #2*\w] 51*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 52*c0909341SAndroid Build Coastguard Worker str \rw\()2, [x0] 53*c0909341SAndroid Build Coastguard Worker str d3, [x0, #2*\w] 54*c0909341SAndroid Build Coastguard Worker.if \ret 55*c0909341SAndroid Build Coastguard Worker ret 56*c0909341SAndroid Build Coastguard Worker.else 57*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 58*c0909341SAndroid Build Coastguard Worker b 3f 59*c0909341SAndroid Build Coastguard Worker.endif 60*c0909341SAndroid Build Coastguard Worker 61*c0909341SAndroid Build Coastguard Worker1: 62*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 63*c0909341SAndroid Build Coastguard Worker ldr \rn\()0, [\s1] 64*c0909341SAndroid Build Coastguard Worker ldr h1, [\s1, #\w] 65*c0909341SAndroid Build Coastguard Worker ldr \rn\()2, [\s2] 66*c0909341SAndroid Build Coastguard Worker ldr h3, [\s2, #\w] 67*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b 68*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b 69*c0909341SAndroid Build Coastguard Worker uxtl v2.8h, v2.8b 70*c0909341SAndroid Build Coastguard Worker uxtl v3.8h, v3.8b 71*c0909341SAndroid Build Coastguard Worker str \rw\()0, [x0] 72*c0909341SAndroid Build Coastguard Worker str s1, [x0, #2*\w] 73*c0909341SAndroid Build Coastguard Worker str s31, [x0, #2*\w+4] 74*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 75*c0909341SAndroid Build Coastguard Worker str \rw\()2, [x0] 76*c0909341SAndroid Build Coastguard Worker str s3, [x0, #2*\w] 77*c0909341SAndroid Build Coastguard Worker str s31, [x0, #2*\w+4] 78*c0909341SAndroid Build Coastguard Worker.if \ret 79*c0909341SAndroid Build Coastguard Worker ret 80*c0909341SAndroid Build Coastguard Worker.else 81*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 82*c0909341SAndroid Build Coastguard Worker b 3f 83*c0909341SAndroid Build Coastguard Worker.endif 84*c0909341SAndroid Build Coastguard Worker 85*c0909341SAndroid Build Coastguard Worker2: 86*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT 87*c0909341SAndroid Build Coastguard Worker tst w7, #2 // CDEF_HAVE_RIGHT 88*c0909341SAndroid Build Coastguard Worker b.eq 1f 89*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 90*c0909341SAndroid Build Coastguard Worker ldr \rn\()0, [\s1] 91*c0909341SAndroid Build Coastguard Worker ldr h1, [\s1, #\w] 92*c0909341SAndroid Build Coastguard Worker ldr \rn\()2, [\s2] 93*c0909341SAndroid Build Coastguard Worker ldr h3, [\s2, #\w] 94*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b 95*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b 96*c0909341SAndroid Build Coastguard Worker uxtl v2.8h, v2.8b 97*c0909341SAndroid Build Coastguard Worker uxtl v3.8h, v3.8b 98*c0909341SAndroid Build Coastguard Worker str s31, [x0] 99*c0909341SAndroid Build Coastguard Worker stur \rw\()0, [x0, #4] 100*c0909341SAndroid Build Coastguard Worker str s1, [x0, #4+2*\w] 101*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 102*c0909341SAndroid Build Coastguard Worker str s31, [x0] 103*c0909341SAndroid Build Coastguard Worker stur \rw\()2, [x0, #4] 104*c0909341SAndroid Build Coastguard Worker str s3, [x0, #4+2*\w] 105*c0909341SAndroid Build Coastguard Worker.if \ret 106*c0909341SAndroid Build Coastguard Worker ret 107*c0909341SAndroid Build Coastguard Worker.else 108*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 109*c0909341SAndroid Build Coastguard Worker b 3f 110*c0909341SAndroid Build Coastguard Worker.endif 111*c0909341SAndroid Build Coastguard Worker 112*c0909341SAndroid Build Coastguard Worker1: 113*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 114*c0909341SAndroid Build Coastguard Worker ldr \rn\()0, [\s1] 115*c0909341SAndroid Build Coastguard Worker ldr \rn\()1, [\s2] 116*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b 117*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b 118*c0909341SAndroid Build Coastguard Worker str s31, [x0] 119*c0909341SAndroid Build Coastguard Worker stur \rw\()0, [x0, #4] 120*c0909341SAndroid Build Coastguard Worker str s31, [x0, #4+2*\w] 121*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 122*c0909341SAndroid Build Coastguard Worker str s31, [x0] 123*c0909341SAndroid Build Coastguard Worker stur \rw\()1, [x0, #4] 124*c0909341SAndroid Build Coastguard Worker str s31, [x0, #4+2*\w] 125*c0909341SAndroid Build Coastguard Worker.if \ret 126*c0909341SAndroid Build Coastguard Worker ret 127*c0909341SAndroid Build Coastguard Worker.else 128*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 129*c0909341SAndroid Build Coastguard Worker.endif 130*c0909341SAndroid Build Coastguard Worker3: 131*c0909341SAndroid Build Coastguard Worker.endm 132*c0909341SAndroid Build Coastguard Worker 133*c0909341SAndroid Build Coastguard Worker.macro load_n_incr dst, src, incr, w 134*c0909341SAndroid Build Coastguard Worker.if \w == 4 135*c0909341SAndroid Build Coastguard Worker ld1 {\dst\().s}[0], [\src], \incr 136*c0909341SAndroid Build Coastguard Worker.else 137*c0909341SAndroid Build Coastguard Worker ld1 {\dst\().8b}, [\src], \incr 138*c0909341SAndroid Build Coastguard Worker.endif 139*c0909341SAndroid Build Coastguard Worker.endm 140*c0909341SAndroid Build Coastguard Worker 141*c0909341SAndroid Build Coastguard Worker// void dav1d_cdef_paddingX_8bpc_neon(uint16_t *tmp, const pixel *src, 142*c0909341SAndroid Build Coastguard Worker// ptrdiff_t src_stride, const pixel (*left)[2], 143*c0909341SAndroid Build Coastguard Worker// const pixel *const top, 144*c0909341SAndroid Build Coastguard Worker// const pixel *const bottom, int h, 145*c0909341SAndroid Build Coastguard Worker// enum CdefEdgeFlags edges); 146*c0909341SAndroid Build Coastguard Worker 147*c0909341SAndroid Build Coastguard Worker.macro padding_func w, stride, rn, rw 148*c0909341SAndroid Build Coastguard Workerfunction cdef_padding\w\()_8bpc_neon, export=1 149*c0909341SAndroid Build Coastguard Worker cmp w7, #0xf // fully edged 150*c0909341SAndroid Build Coastguard Worker b.eq cdef_padding\w\()_edged_8bpc_neon 151*c0909341SAndroid Build Coastguard Worker movi v30.8h, #0x80, lsl #8 152*c0909341SAndroid Build Coastguard Worker mov v31.16b, v30.16b 153*c0909341SAndroid Build Coastguard Worker sub x0, x0, #2*(2*\stride+2) 154*c0909341SAndroid Build Coastguard Worker tst w7, #4 // CDEF_HAVE_TOP 155*c0909341SAndroid Build Coastguard Worker b.ne 1f 156*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_TOP 157*c0909341SAndroid Build Coastguard Worker st1 {v30.8h, v31.8h}, [x0], #32 158*c0909341SAndroid Build Coastguard Worker.if \w == 8 159*c0909341SAndroid Build Coastguard Worker st1 {v30.8h, v31.8h}, [x0], #32 160*c0909341SAndroid Build Coastguard Worker.endif 161*c0909341SAndroid Build Coastguard Worker b 3f 162*c0909341SAndroid Build Coastguard Worker1: 163*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_TOP 164*c0909341SAndroid Build Coastguard Worker add x9, x4, x2 165*c0909341SAndroid Build Coastguard Worker pad_top_bottom x4, x9, \w, \stride, \rn, \rw, 0 166*c0909341SAndroid Build Coastguard Worker 167*c0909341SAndroid Build Coastguard Worker // Middle section 168*c0909341SAndroid Build Coastguard Worker3: 169*c0909341SAndroid Build Coastguard Worker tst w7, #1 // CDEF_HAVE_LEFT 170*c0909341SAndroid Build Coastguard Worker b.eq 2f 171*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT 172*c0909341SAndroid Build Coastguard Worker tst w7, #2 // CDEF_HAVE_RIGHT 173*c0909341SAndroid Build Coastguard Worker b.eq 1f 174*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 175*c0909341SAndroid Build Coastguard Worker0: 176*c0909341SAndroid Build Coastguard Worker ld1 {v0.h}[0], [x3], #2 177*c0909341SAndroid Build Coastguard Worker ldr h2, [x1, #\w] 178*c0909341SAndroid Build Coastguard Worker load_n_incr v1, x1, x2, \w 179*c0909341SAndroid Build Coastguard Worker subs w6, w6, #1 180*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b 181*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b 182*c0909341SAndroid Build Coastguard Worker uxtl v2.8h, v2.8b 183*c0909341SAndroid Build Coastguard Worker str s0, [x0] 184*c0909341SAndroid Build Coastguard Worker stur \rw\()1, [x0, #4] 185*c0909341SAndroid Build Coastguard Worker str s2, [x0, #4+2*\w] 186*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 187*c0909341SAndroid Build Coastguard Worker b.gt 0b 188*c0909341SAndroid Build Coastguard Worker b 3f 189*c0909341SAndroid Build Coastguard Worker1: 190*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 191*c0909341SAndroid Build Coastguard Worker ld1 {v0.h}[0], [x3], #2 192*c0909341SAndroid Build Coastguard Worker load_n_incr v1, x1, x2, \w 193*c0909341SAndroid Build Coastguard Worker subs w6, w6, #1 194*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b 195*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b 196*c0909341SAndroid Build Coastguard Worker str s0, [x0] 197*c0909341SAndroid Build Coastguard Worker stur \rw\()1, [x0, #4] 198*c0909341SAndroid Build Coastguard Worker str s31, [x0, #4+2*\w] 199*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 200*c0909341SAndroid Build Coastguard Worker b.gt 1b 201*c0909341SAndroid Build Coastguard Worker b 3f 202*c0909341SAndroid Build Coastguard Worker2: 203*c0909341SAndroid Build Coastguard Worker tst w7, #2 // CDEF_HAVE_RIGHT 204*c0909341SAndroid Build Coastguard Worker b.eq 1f 205*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 206*c0909341SAndroid Build Coastguard Worker0: 207*c0909341SAndroid Build Coastguard Worker ldr h1, [x1, #\w] 208*c0909341SAndroid Build Coastguard Worker load_n_incr v0, x1, x2, \w 209*c0909341SAndroid Build Coastguard Worker subs w6, w6, #1 210*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b 211*c0909341SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b 212*c0909341SAndroid Build Coastguard Worker str s31, [x0] 213*c0909341SAndroid Build Coastguard Worker stur \rw\()0, [x0, #4] 214*c0909341SAndroid Build Coastguard Worker str s1, [x0, #4+2*\w] 215*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 216*c0909341SAndroid Build Coastguard Worker b.gt 0b 217*c0909341SAndroid Build Coastguard Worker b 3f 218*c0909341SAndroid Build Coastguard Worker1: 219*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 220*c0909341SAndroid Build Coastguard Worker load_n_incr v0, x1, x2, \w 221*c0909341SAndroid Build Coastguard Worker subs w6, w6, #1 222*c0909341SAndroid Build Coastguard Worker uxtl v0.8h, v0.8b 223*c0909341SAndroid Build Coastguard Worker str s31, [x0] 224*c0909341SAndroid Build Coastguard Worker stur \rw\()0, [x0, #4] 225*c0909341SAndroid Build Coastguard Worker str s31, [x0, #4+2*\w] 226*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 227*c0909341SAndroid Build Coastguard Worker b.gt 1b 228*c0909341SAndroid Build Coastguard Worker 229*c0909341SAndroid Build Coastguard Worker3: 230*c0909341SAndroid Build Coastguard Worker tst w7, #8 // CDEF_HAVE_BOTTOM 231*c0909341SAndroid Build Coastguard Worker b.ne 1f 232*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_BOTTOM 233*c0909341SAndroid Build Coastguard Worker st1 {v30.8h, v31.8h}, [x0], #32 234*c0909341SAndroid Build Coastguard Worker.if \w == 8 235*c0909341SAndroid Build Coastguard Worker st1 {v30.8h, v31.8h}, [x0], #32 236*c0909341SAndroid Build Coastguard Worker.endif 237*c0909341SAndroid Build Coastguard Worker ret 238*c0909341SAndroid Build Coastguard Worker1: 239*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_BOTTOM 240*c0909341SAndroid Build Coastguard Worker add x9, x5, x2 241*c0909341SAndroid Build Coastguard Worker pad_top_bottom x5, x9, \w, \stride, \rn, \rw, 1 242*c0909341SAndroid Build Coastguard Workerendfunc 243*c0909341SAndroid Build Coastguard Worker.endm 244*c0909341SAndroid Build Coastguard Worker 245*c0909341SAndroid Build Coastguard Workerpadding_func 8, 16, d, q 246*c0909341SAndroid Build Coastguard Workerpadding_func 4, 8, s, d 247*c0909341SAndroid Build Coastguard Worker 248*c0909341SAndroid Build Coastguard Worker// void cdef_paddingX_edged_8bpc_neon(uint8_t *tmp, const pixel *src, 249*c0909341SAndroid Build Coastguard Worker// ptrdiff_t src_stride, const pixel (*left)[2], 250*c0909341SAndroid Build Coastguard Worker// const pixel *const top, 251*c0909341SAndroid Build Coastguard Worker// const pixel *const bottom, int h, 252*c0909341SAndroid Build Coastguard Worker// enum CdefEdgeFlags edges); 253*c0909341SAndroid Build Coastguard Worker 254*c0909341SAndroid Build Coastguard Worker.macro padding_func_edged w, stride, reg 255*c0909341SAndroid Build Coastguard Workerfunction cdef_padding\w\()_edged_8bpc_neon, export=1 256*c0909341SAndroid Build Coastguard Worker sub x4, x4, #2 257*c0909341SAndroid Build Coastguard Worker sub x5, x5, #2 258*c0909341SAndroid Build Coastguard Worker sub x0, x0, #(2*\stride+2) 259*c0909341SAndroid Build Coastguard Worker 260*c0909341SAndroid Build Coastguard Worker.if \w == 4 261*c0909341SAndroid Build Coastguard Worker ldr d0, [x4] 262*c0909341SAndroid Build Coastguard Worker ldr d1, [x4, x2] 263*c0909341SAndroid Build Coastguard Worker st1 {v0.8b, v1.8b}, [x0], #16 264*c0909341SAndroid Build Coastguard Worker.else 265*c0909341SAndroid Build Coastguard Worker add x9, x4, x2 266*c0909341SAndroid Build Coastguard Worker ldr d0, [x4] 267*c0909341SAndroid Build Coastguard Worker ldr s1, [x4, #8] 268*c0909341SAndroid Build Coastguard Worker ldr d2, [x9] 269*c0909341SAndroid Build Coastguard Worker ldr s3, [x9, #8] 270*c0909341SAndroid Build Coastguard Worker str d0, [x0] 271*c0909341SAndroid Build Coastguard Worker str s1, [x0, #8] 272*c0909341SAndroid Build Coastguard Worker str d2, [x0, #\stride] 273*c0909341SAndroid Build Coastguard Worker str s3, [x0, #\stride+8] 274*c0909341SAndroid Build Coastguard Worker add x0, x0, #2*\stride 275*c0909341SAndroid Build Coastguard Worker.endif 276*c0909341SAndroid Build Coastguard Worker 277*c0909341SAndroid Build Coastguard Worker0: 278*c0909341SAndroid Build Coastguard Worker ld1 {v0.h}[0], [x3], #2 279*c0909341SAndroid Build Coastguard Worker ldr h2, [x1, #\w] 280*c0909341SAndroid Build Coastguard Worker load_n_incr v1, x1, x2, \w 281*c0909341SAndroid Build Coastguard Worker subs w6, w6, #1 282*c0909341SAndroid Build Coastguard Worker str h0, [x0] 283*c0909341SAndroid Build Coastguard Worker stur \reg\()1, [x0, #2] 284*c0909341SAndroid Build Coastguard Worker str h2, [x0, #2+\w] 285*c0909341SAndroid Build Coastguard Worker add x0, x0, #\stride 286*c0909341SAndroid Build Coastguard Worker b.gt 0b 287*c0909341SAndroid Build Coastguard Worker 288*c0909341SAndroid Build Coastguard Worker.if \w == 4 289*c0909341SAndroid Build Coastguard Worker ldr d0, [x5] 290*c0909341SAndroid Build Coastguard Worker ldr d1, [x5, x2] 291*c0909341SAndroid Build Coastguard Worker st1 {v0.8b, v1.8b}, [x0], #16 292*c0909341SAndroid Build Coastguard Worker.else 293*c0909341SAndroid Build Coastguard Worker add x9, x5, x2 294*c0909341SAndroid Build Coastguard Worker ldr d0, [x5] 295*c0909341SAndroid Build Coastguard Worker ldr s1, [x5, #8] 296*c0909341SAndroid Build Coastguard Worker ldr d2, [x9] 297*c0909341SAndroid Build Coastguard Worker ldr s3, [x9, #8] 298*c0909341SAndroid Build Coastguard Worker str d0, [x0] 299*c0909341SAndroid Build Coastguard Worker str s1, [x0, #8] 300*c0909341SAndroid Build Coastguard Worker str d2, [x0, #\stride] 301*c0909341SAndroid Build Coastguard Worker str s3, [x0, #\stride+8] 302*c0909341SAndroid Build Coastguard Worker.endif 303*c0909341SAndroid Build Coastguard Worker ret 304*c0909341SAndroid Build Coastguard Workerendfunc 305*c0909341SAndroid Build Coastguard Worker.endm 306*c0909341SAndroid Build Coastguard Worker 307*c0909341SAndroid Build Coastguard Workerpadding_func_edged 8, 16, d 308*c0909341SAndroid Build Coastguard Workerpadding_func_edged 4, 8, s 309*c0909341SAndroid Build Coastguard Worker 310*c0909341SAndroid Build Coastguard Workertables 311*c0909341SAndroid Build Coastguard Worker 312*c0909341SAndroid Build Coastguard Workerfilter 8, 8 313*c0909341SAndroid Build Coastguard Workerfilter 4, 8 314*c0909341SAndroid Build Coastguard Worker 315*c0909341SAndroid Build Coastguard Workerfind_dir 8 316*c0909341SAndroid Build Coastguard Worker 317*c0909341SAndroid Build Coastguard Worker.macro load_px_8 d1, d2, w 318*c0909341SAndroid Build Coastguard Worker.if \w == 8 319*c0909341SAndroid Build Coastguard Worker add x6, x2, w9, sxtb // x + off 320*c0909341SAndroid Build Coastguard Worker sub x9, x2, w9, sxtb // x - off 321*c0909341SAndroid Build Coastguard Worker ld1 {\d1\().d}[0], [x6] // p0 322*c0909341SAndroid Build Coastguard Worker add x6, x6, #16 // += stride 323*c0909341SAndroid Build Coastguard Worker ld1 {\d2\().d}[0], [x9] // p1 324*c0909341SAndroid Build Coastguard Worker add x9, x9, #16 // += stride 325*c0909341SAndroid Build Coastguard Worker ld1 {\d1\().d}[1], [x6] // p0 326*c0909341SAndroid Build Coastguard Worker ld1 {\d2\().d}[1], [x9] // p0 327*c0909341SAndroid Build Coastguard Worker.else 328*c0909341SAndroid Build Coastguard Worker add x6, x2, w9, sxtb // x + off 329*c0909341SAndroid Build Coastguard Worker sub x9, x2, w9, sxtb // x - off 330*c0909341SAndroid Build Coastguard Worker ld1 {\d1\().s}[0], [x6] // p0 331*c0909341SAndroid Build Coastguard Worker add x6, x6, #8 // += stride 332*c0909341SAndroid Build Coastguard Worker ld1 {\d2\().s}[0], [x9] // p1 333*c0909341SAndroid Build Coastguard Worker add x9, x9, #8 // += stride 334*c0909341SAndroid Build Coastguard Worker ld1 {\d1\().s}[1], [x6] // p0 335*c0909341SAndroid Build Coastguard Worker add x6, x6, #8 // += stride 336*c0909341SAndroid Build Coastguard Worker ld1 {\d2\().s}[1], [x9] // p1 337*c0909341SAndroid Build Coastguard Worker add x9, x9, #8 // += stride 338*c0909341SAndroid Build Coastguard Worker ld1 {\d1\().s}[2], [x6] // p0 339*c0909341SAndroid Build Coastguard Worker add x6, x6, #8 // += stride 340*c0909341SAndroid Build Coastguard Worker ld1 {\d2\().s}[2], [x9] // p1 341*c0909341SAndroid Build Coastguard Worker add x9, x9, #8 // += stride 342*c0909341SAndroid Build Coastguard Worker ld1 {\d1\().s}[3], [x6] // p0 343*c0909341SAndroid Build Coastguard Worker ld1 {\d2\().s}[3], [x9] // p1 344*c0909341SAndroid Build Coastguard Worker.endif 345*c0909341SAndroid Build Coastguard Worker.endm 346*c0909341SAndroid Build Coastguard Worker.macro handle_pixel_8 s1, s2, thresh_vec, shift, tap, min 347*c0909341SAndroid Build Coastguard Worker.if \min 348*c0909341SAndroid Build Coastguard Worker umin v3.16b, v3.16b, \s1\().16b 349*c0909341SAndroid Build Coastguard Worker umax v4.16b, v4.16b, \s1\().16b 350*c0909341SAndroid Build Coastguard Worker umin v3.16b, v3.16b, \s2\().16b 351*c0909341SAndroid Build Coastguard Worker umax v4.16b, v4.16b, \s2\().16b 352*c0909341SAndroid Build Coastguard Worker.endif 353*c0909341SAndroid Build Coastguard Worker uabd v16.16b, v0.16b, \s1\().16b // abs(diff) 354*c0909341SAndroid Build Coastguard Worker uabd v20.16b, v0.16b, \s2\().16b // abs(diff) 355*c0909341SAndroid Build Coastguard Worker ushl v17.16b, v16.16b, \shift // abs(diff) >> shift 356*c0909341SAndroid Build Coastguard Worker ushl v21.16b, v20.16b, \shift // abs(diff) >> shift 357*c0909341SAndroid Build Coastguard Worker uqsub v17.16b, \thresh_vec, v17.16b // clip = imax(0, threshold - (abs(diff) >> shift)) 358*c0909341SAndroid Build Coastguard Worker uqsub v21.16b, \thresh_vec, v21.16b // clip = imax(0, threshold - (abs(diff) >> shift)) 359*c0909341SAndroid Build Coastguard Worker cmhi v18.16b, v0.16b, \s1\().16b // px > p0 360*c0909341SAndroid Build Coastguard Worker cmhi v22.16b, v0.16b, \s2\().16b // px > p1 361*c0909341SAndroid Build Coastguard Worker umin v17.16b, v17.16b, v16.16b // imin(abs(diff), clip) 362*c0909341SAndroid Build Coastguard Worker umin v21.16b, v21.16b, v20.16b // imin(abs(diff), clip) 363*c0909341SAndroid Build Coastguard Worker dup v19.16b, \tap // taps[k] 364*c0909341SAndroid Build Coastguard Worker neg v16.16b, v17.16b // -imin() 365*c0909341SAndroid Build Coastguard Worker neg v20.16b, v21.16b // -imin() 366*c0909341SAndroid Build Coastguard Worker bsl v18.16b, v16.16b, v17.16b // constrain() = apply_sign() 367*c0909341SAndroid Build Coastguard Worker bsl v22.16b, v20.16b, v21.16b // constrain() = apply_sign() 368*c0909341SAndroid Build Coastguard Worker mla v1.16b, v18.16b, v19.16b // sum += taps[k] * constrain() 369*c0909341SAndroid Build Coastguard Worker mla v2.16b, v22.16b, v19.16b // sum += taps[k] * constrain() 370*c0909341SAndroid Build Coastguard Worker.endm 371*c0909341SAndroid Build Coastguard Worker 372*c0909341SAndroid Build Coastguard Worker// void cdef_filterX_edged_8bpc_neon(pixel *dst, ptrdiff_t dst_stride, 373*c0909341SAndroid Build Coastguard Worker// const uint8_t *tmp, int pri_strength, 374*c0909341SAndroid Build Coastguard Worker// int sec_strength, int dir, int damping, 375*c0909341SAndroid Build Coastguard Worker// int h); 376*c0909341SAndroid Build Coastguard Worker.macro filter_func_8 w, pri, sec, min, suffix 377*c0909341SAndroid Build Coastguard Workerfunction cdef_filter\w\suffix\()_edged_8bpc_neon 378*c0909341SAndroid Build Coastguard Worker.if \pri 379*c0909341SAndroid Build Coastguard Worker movrel x8, pri_taps 380*c0909341SAndroid Build Coastguard Worker and w9, w3, #1 381*c0909341SAndroid Build Coastguard Worker add x8, x8, w9, uxtw #1 382*c0909341SAndroid Build Coastguard Worker.endif 383*c0909341SAndroid Build Coastguard Worker movrel x9, directions\w 384*c0909341SAndroid Build Coastguard Worker add x5, x9, w5, uxtw #1 385*c0909341SAndroid Build Coastguard Worker movi v30.8b, #7 386*c0909341SAndroid Build Coastguard Worker dup v28.8b, w6 // damping 387*c0909341SAndroid Build Coastguard Worker 388*c0909341SAndroid Build Coastguard Worker.if \pri 389*c0909341SAndroid Build Coastguard Worker dup v25.16b, w3 // threshold 390*c0909341SAndroid Build Coastguard Worker.endif 391*c0909341SAndroid Build Coastguard Worker.if \sec 392*c0909341SAndroid Build Coastguard Worker dup v27.16b, w4 // threshold 393*c0909341SAndroid Build Coastguard Worker.endif 394*c0909341SAndroid Build Coastguard Worker trn1 v24.8b, v25.8b, v27.8b 395*c0909341SAndroid Build Coastguard Worker clz v24.8b, v24.8b // clz(threshold) 396*c0909341SAndroid Build Coastguard Worker sub v24.8b, v30.8b, v24.8b // ulog2(threshold) 397*c0909341SAndroid Build Coastguard Worker uqsub v24.8b, v28.8b, v24.8b // shift = imax(0, damping - ulog2(threshold)) 398*c0909341SAndroid Build Coastguard Worker neg v24.8b, v24.8b // -shift 399*c0909341SAndroid Build Coastguard Worker.if \sec 400*c0909341SAndroid Build Coastguard Worker dup v26.16b, v24.b[1] 401*c0909341SAndroid Build Coastguard Worker.endif 402*c0909341SAndroid Build Coastguard Worker.if \pri 403*c0909341SAndroid Build Coastguard Worker dup v24.16b, v24.b[0] 404*c0909341SAndroid Build Coastguard Worker.endif 405*c0909341SAndroid Build Coastguard Worker 406*c0909341SAndroid Build Coastguard Worker1: 407*c0909341SAndroid Build Coastguard Worker.if \w == 8 408*c0909341SAndroid Build Coastguard Worker add x12, x2, #16 409*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[0], [x2] // px 410*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x12] // px 411*c0909341SAndroid Build Coastguard Worker.else 412*c0909341SAndroid Build Coastguard Worker add x12, x2, #1*8 413*c0909341SAndroid Build Coastguard Worker add x13, x2, #2*8 414*c0909341SAndroid Build Coastguard Worker add x14, x2, #3*8 415*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[0], [x2] // px 416*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[1], [x12] // px 417*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[2], [x13] // px 418*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[3], [x14] // px 419*c0909341SAndroid Build Coastguard Worker.endif 420*c0909341SAndroid Build Coastguard Worker 421*c0909341SAndroid Build Coastguard Worker // We need 9-bits or two 8-bit accululators to fit the sum. 422*c0909341SAndroid Build Coastguard Worker // Max of |sum| > 15*2*6(pri) + 4*4*3(sec) = 228. 423*c0909341SAndroid Build Coastguard Worker // Start sum at -1 instead of 0 to help handle rounding later. 424*c0909341SAndroid Build Coastguard Worker movi v1.16b, #255 // sum 425*c0909341SAndroid Build Coastguard Worker movi v2.16b, #0 // sum 426*c0909341SAndroid Build Coastguard Worker.if \min 427*c0909341SAndroid Build Coastguard Worker mov v3.16b, v0.16b // min 428*c0909341SAndroid Build Coastguard Worker mov v4.16b, v0.16b // max 429*c0909341SAndroid Build Coastguard Worker.endif 430*c0909341SAndroid Build Coastguard Worker 431*c0909341SAndroid Build Coastguard Worker // Instead of loading sec_taps 2, 1 from memory, just set it 432*c0909341SAndroid Build Coastguard Worker // to 2 initially and decrease for the second round. 433*c0909341SAndroid Build Coastguard Worker // This is also used as loop counter. 434*c0909341SAndroid Build Coastguard Worker mov w11, #2 // sec_taps[0] 435*c0909341SAndroid Build Coastguard Worker 436*c0909341SAndroid Build Coastguard Worker2: 437*c0909341SAndroid Build Coastguard Worker.if \pri 438*c0909341SAndroid Build Coastguard Worker ldrb w9, [x5] // off1 439*c0909341SAndroid Build Coastguard Worker 440*c0909341SAndroid Build Coastguard Worker load_px_8 v5, v6, \w 441*c0909341SAndroid Build Coastguard Worker.endif 442*c0909341SAndroid Build Coastguard Worker 443*c0909341SAndroid Build Coastguard Worker.if \sec 444*c0909341SAndroid Build Coastguard Worker add x5, x5, #4 // +2*2 445*c0909341SAndroid Build Coastguard Worker ldrb w9, [x5] // off2 446*c0909341SAndroid Build Coastguard Worker load_px_8 v28, v29, \w 447*c0909341SAndroid Build Coastguard Worker.endif 448*c0909341SAndroid Build Coastguard Worker 449*c0909341SAndroid Build Coastguard Worker.if \pri 450*c0909341SAndroid Build Coastguard Worker ldrb w10, [x8] // *pri_taps 451*c0909341SAndroid Build Coastguard Worker 452*c0909341SAndroid Build Coastguard Worker handle_pixel_8 v5, v6, v25.16b, v24.16b, w10, \min 453*c0909341SAndroid Build Coastguard Worker.endif 454*c0909341SAndroid Build Coastguard Worker 455*c0909341SAndroid Build Coastguard Worker.if \sec 456*c0909341SAndroid Build Coastguard Worker add x5, x5, #8 // +2*4 457*c0909341SAndroid Build Coastguard Worker ldrb w9, [x5] // off3 458*c0909341SAndroid Build Coastguard Worker load_px_8 v5, v6, \w 459*c0909341SAndroid Build Coastguard Worker 460*c0909341SAndroid Build Coastguard Worker handle_pixel_8 v28, v29, v27.16b, v26.16b, w11, \min 461*c0909341SAndroid Build Coastguard Worker 462*c0909341SAndroid Build Coastguard Worker handle_pixel_8 v5, v6, v27.16b, v26.16b, w11, \min 463*c0909341SAndroid Build Coastguard Worker 464*c0909341SAndroid Build Coastguard Worker sub x5, x5, #11 // x5 -= 2*(2+4); x5 += 1; 465*c0909341SAndroid Build Coastguard Worker.else 466*c0909341SAndroid Build Coastguard Worker add x5, x5, #1 // x5 += 1 467*c0909341SAndroid Build Coastguard Worker.endif 468*c0909341SAndroid Build Coastguard Worker subs w11, w11, #1 // sec_tap-- (value) 469*c0909341SAndroid Build Coastguard Worker.if \pri 470*c0909341SAndroid Build Coastguard Worker add x8, x8, #1 // pri_taps++ (pointer) 471*c0909341SAndroid Build Coastguard Worker.endif 472*c0909341SAndroid Build Coastguard Worker b.ne 2b 473*c0909341SAndroid Build Coastguard Worker 474*c0909341SAndroid Build Coastguard Worker // Perform halving adds since the value won't fit otherwise. 475*c0909341SAndroid Build Coastguard Worker // To handle the offset for negative values, use both halving w/ and w/o rounding. 476*c0909341SAndroid Build Coastguard Worker srhadd v5.16b, v1.16b, v2.16b // sum >> 1 477*c0909341SAndroid Build Coastguard Worker shadd v6.16b, v1.16b, v2.16b // (sum - 1) >> 1 478*c0909341SAndroid Build Coastguard Worker cmlt v1.16b, v5.16b, #0 // sum < 0 479*c0909341SAndroid Build Coastguard Worker bsl v1.16b, v6.16b, v5.16b // (sum - (sum < 0)) >> 1 480*c0909341SAndroid Build Coastguard Worker 481*c0909341SAndroid Build Coastguard Worker srshr v1.16b, v1.16b, #3 // (8 + sum - (sum < 0)) >> 4 482*c0909341SAndroid Build Coastguard Worker 483*c0909341SAndroid Build Coastguard Worker usqadd v0.16b, v1.16b // px + (8 + sum ...) >> 4 484*c0909341SAndroid Build Coastguard Worker.if \min 485*c0909341SAndroid Build Coastguard Worker umin v0.16b, v0.16b, v4.16b 486*c0909341SAndroid Build Coastguard Worker umax v0.16b, v0.16b, v3.16b // iclip(px + .., min, max) 487*c0909341SAndroid Build Coastguard Worker.endif 488*c0909341SAndroid Build Coastguard Worker.if \w == 8 489*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[0], [x0], x1 490*c0909341SAndroid Build Coastguard Worker add x2, x2, #2*16 // tmp += 2*tmp_stride 491*c0909341SAndroid Build Coastguard Worker subs w7, w7, #2 // h -= 2 492*c0909341SAndroid Build Coastguard Worker st1 {v0.d}[1], [x0], x1 493*c0909341SAndroid Build Coastguard Worker.else 494*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[0], [x0], x1 495*c0909341SAndroid Build Coastguard Worker add x2, x2, #4*8 // tmp += 4*tmp_stride 496*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[1], [x0], x1 497*c0909341SAndroid Build Coastguard Worker subs w7, w7, #4 // h -= 4 498*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[2], [x0], x1 499*c0909341SAndroid Build Coastguard Worker st1 {v0.s}[3], [x0], x1 500*c0909341SAndroid Build Coastguard Worker.endif 501*c0909341SAndroid Build Coastguard Worker 502*c0909341SAndroid Build Coastguard Worker // Reset pri_taps and directions back to the original point 503*c0909341SAndroid Build Coastguard Worker sub x5, x5, #2 504*c0909341SAndroid Build Coastguard Worker.if \pri 505*c0909341SAndroid Build Coastguard Worker sub x8, x8, #2 506*c0909341SAndroid Build Coastguard Worker.endif 507*c0909341SAndroid Build Coastguard Worker 508*c0909341SAndroid Build Coastguard Worker b.gt 1b 509*c0909341SAndroid Build Coastguard Worker ret 510*c0909341SAndroid Build Coastguard Workerendfunc 511*c0909341SAndroid Build Coastguard Worker.endm 512*c0909341SAndroid Build Coastguard Worker 513*c0909341SAndroid Build Coastguard Worker.macro filter_8 w 514*c0909341SAndroid Build Coastguard Workerfilter_func_8 \w, pri=1, sec=0, min=0, suffix=_pri 515*c0909341SAndroid Build Coastguard Workerfilter_func_8 \w, pri=0, sec=1, min=0, suffix=_sec 516*c0909341SAndroid Build Coastguard Workerfilter_func_8 \w, pri=1, sec=1, min=1, suffix=_pri_sec 517*c0909341SAndroid Build Coastguard Worker.endm 518*c0909341SAndroid Build Coastguard Worker 519*c0909341SAndroid Build Coastguard Workerfilter_8 8 520*c0909341SAndroid Build Coastguard Workerfilter_8 4 521