1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, Martin Storsjo 4*c0909341SAndroid Build Coastguard Worker * All rights reserved. 5*c0909341SAndroid Build Coastguard Worker * 6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 8*c0909341SAndroid Build Coastguard Worker * 9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 11*c0909341SAndroid Build Coastguard Worker * 12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 15*c0909341SAndroid Build Coastguard Worker * 16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*c0909341SAndroid Build Coastguard Worker */ 27*c0909341SAndroid Build Coastguard Worker 28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 29*c0909341SAndroid Build Coastguard Worker#include "util.S" 30*c0909341SAndroid Build Coastguard Worker#include "cdef_tmpl.S" 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker// n1 = s0/d0 33*c0909341SAndroid Build Coastguard Worker// w1 = d0/q0 34*c0909341SAndroid Build Coastguard Worker// n2 = s4/d2 35*c0909341SAndroid Build Coastguard Worker// w2 = d2/q1 36*c0909341SAndroid Build Coastguard Worker.macro pad_top_bottom s1, s2, w, stride, n1, w1, n2, w2, align, ret 37*c0909341SAndroid Build Coastguard Worker tst r7, #1 // CDEF_HAVE_LEFT 38*c0909341SAndroid Build Coastguard Worker beq 2f 39*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT 40*c0909341SAndroid Build Coastguard Worker tst r7, #2 // CDEF_HAVE_RIGHT 41*c0909341SAndroid Build Coastguard Worker beq 1f 42*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 43*c0909341SAndroid Build Coastguard Worker ldrh r12, [\s1, #-2] 44*c0909341SAndroid Build Coastguard Worker vldr \n1, [\s1] 45*c0909341SAndroid Build Coastguard Worker vdup.16 d4, r12 46*c0909341SAndroid Build Coastguard Worker ldrh r12, [\s1, #\w] 47*c0909341SAndroid Build Coastguard Worker vmov.16 d4[1], r12 48*c0909341SAndroid Build Coastguard Worker ldrh r12, [\s2, #-2] 49*c0909341SAndroid Build Coastguard Worker vldr \n2, [\s2] 50*c0909341SAndroid Build Coastguard Worker vmov.16 d4[2], r12 51*c0909341SAndroid Build Coastguard Worker ldrh r12, [\s2, #\w] 52*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 53*c0909341SAndroid Build Coastguard Worker vmov.16 d4[3], r12 54*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d2 55*c0909341SAndroid Build Coastguard Worker vmovl.u8 q2, d4 56*c0909341SAndroid Build Coastguard Worker vstr s8, [r0, #-4] 57*c0909341SAndroid Build Coastguard Worker vst1.16 {\w1}, [r0, :\align] 58*c0909341SAndroid Build Coastguard Worker vstr s9, [r0, #2*\w] 59*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 60*c0909341SAndroid Build Coastguard Worker vstr s10, [r0, #-4] 61*c0909341SAndroid Build Coastguard Worker vst1.16 {\w2}, [r0, :\align] 62*c0909341SAndroid Build Coastguard Worker vstr s11, [r0, #2*\w] 63*c0909341SAndroid Build Coastguard Worker.if \ret 64*c0909341SAndroid Build Coastguard Worker pop {r4-r8,pc} 65*c0909341SAndroid Build Coastguard Worker.else 66*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 67*c0909341SAndroid Build Coastguard Worker b 3f 68*c0909341SAndroid Build Coastguard Worker.endif 69*c0909341SAndroid Build Coastguard Worker 70*c0909341SAndroid Build Coastguard Worker1: 71*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 72*c0909341SAndroid Build Coastguard Worker ldrh r12, [\s1, #-2] 73*c0909341SAndroid Build Coastguard Worker vldr \n1, [\s1] 74*c0909341SAndroid Build Coastguard Worker vdup.16 d4, r12 75*c0909341SAndroid Build Coastguard Worker ldrh r12, [\s2, #-2] 76*c0909341SAndroid Build Coastguard Worker vldr \n2, [\s2] 77*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 78*c0909341SAndroid Build Coastguard Worker vmov.16 d4[1], r12 79*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d2 80*c0909341SAndroid Build Coastguard Worker vmovl.u8 q2, d4 81*c0909341SAndroid Build Coastguard Worker vstr s8, [r0, #-4] 82*c0909341SAndroid Build Coastguard Worker vst1.16 {\w1}, [r0, :\align] 83*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #2*\w] 84*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 85*c0909341SAndroid Build Coastguard Worker vstr s9, [r0, #-4] 86*c0909341SAndroid Build Coastguard Worker vst1.16 {\w2}, [r0, :\align] 87*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #2*\w] 88*c0909341SAndroid Build Coastguard Worker.if \ret 89*c0909341SAndroid Build Coastguard Worker pop {r4-r8,pc} 90*c0909341SAndroid Build Coastguard Worker.else 91*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 92*c0909341SAndroid Build Coastguard Worker b 3f 93*c0909341SAndroid Build Coastguard Worker.endif 94*c0909341SAndroid Build Coastguard Worker 95*c0909341SAndroid Build Coastguard Worker2: 96*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT 97*c0909341SAndroid Build Coastguard Worker tst r7, #2 // CDEF_HAVE_RIGHT 98*c0909341SAndroid Build Coastguard Worker beq 1f 99*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 100*c0909341SAndroid Build Coastguard Worker vldr \n1, [\s1] 101*c0909341SAndroid Build Coastguard Worker ldrh r12, [\s1, #\w] 102*c0909341SAndroid Build Coastguard Worker vldr \n2, [\s2] 103*c0909341SAndroid Build Coastguard Worker vdup.16 d4, r12 104*c0909341SAndroid Build Coastguard Worker ldrh r12, [\s2, #\w] 105*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 106*c0909341SAndroid Build Coastguard Worker vmov.16 d4[1], r12 107*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d2 108*c0909341SAndroid Build Coastguard Worker vmovl.u8 q2, d4 109*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #-4] 110*c0909341SAndroid Build Coastguard Worker vst1.16 {\w1}, [r0, :\align] 111*c0909341SAndroid Build Coastguard Worker vstr s8, [r0, #2*\w] 112*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 113*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #-4] 114*c0909341SAndroid Build Coastguard Worker vst1.16 {\w2}, [r0, :\align] 115*c0909341SAndroid Build Coastguard Worker vstr s9, [r0, #2*\w] 116*c0909341SAndroid Build Coastguard Worker.if \ret 117*c0909341SAndroid Build Coastguard Worker pop {r4-r8,pc} 118*c0909341SAndroid Build Coastguard Worker.else 119*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 120*c0909341SAndroid Build Coastguard Worker b 3f 121*c0909341SAndroid Build Coastguard Worker.endif 122*c0909341SAndroid Build Coastguard Worker 123*c0909341SAndroid Build Coastguard Worker1: 124*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 125*c0909341SAndroid Build Coastguard Worker vldr \n1, [\s1] 126*c0909341SAndroid Build Coastguard Worker vldr \n2, [\s2] 127*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 128*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d2 129*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #-4] 130*c0909341SAndroid Build Coastguard Worker vst1.16 {\w1}, [r0, :\align] 131*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #2*\w] 132*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 133*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #-4] 134*c0909341SAndroid Build Coastguard Worker vst1.16 {\w2}, [r0, :\align] 135*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #2*\w] 136*c0909341SAndroid Build Coastguard Worker.if \ret 137*c0909341SAndroid Build Coastguard Worker pop {r4-r8,pc} 138*c0909341SAndroid Build Coastguard Worker.else 139*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 140*c0909341SAndroid Build Coastguard Worker.endif 141*c0909341SAndroid Build Coastguard Worker3: 142*c0909341SAndroid Build Coastguard Worker.endm 143*c0909341SAndroid Build Coastguard Worker 144*c0909341SAndroid Build Coastguard Worker.macro load_n_incr dst, src, incr, w 145*c0909341SAndroid Build Coastguard Worker.if \w == 4 146*c0909341SAndroid Build Coastguard Worker vld1.32 {\dst\()[0]}, [\src, :32], \incr 147*c0909341SAndroid Build Coastguard Worker.else 148*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst\()}, [\src, :64], \incr 149*c0909341SAndroid Build Coastguard Worker.endif 150*c0909341SAndroid Build Coastguard Worker.endm 151*c0909341SAndroid Build Coastguard Worker 152*c0909341SAndroid Build Coastguard Worker// void dav1d_cdef_paddingX_8bpc_neon(uint16_t *tmp, const pixel *src, 153*c0909341SAndroid Build Coastguard Worker// ptrdiff_t src_stride, const pixel (*left)[2], 154*c0909341SAndroid Build Coastguard Worker// const pixel *const top, 155*c0909341SAndroid Build Coastguard Worker// const pixel *const bottom, int h, 156*c0909341SAndroid Build Coastguard Worker// enum CdefEdgeFlags edges); 157*c0909341SAndroid Build Coastguard Worker 158*c0909341SAndroid Build Coastguard Worker// n1 = s0/d0 159*c0909341SAndroid Build Coastguard Worker// w1 = d0/q0 160*c0909341SAndroid Build Coastguard Worker// n2 = s4/d2 161*c0909341SAndroid Build Coastguard Worker// w2 = d2/q1 162*c0909341SAndroid Build Coastguard Worker.macro padding_func w, stride, n1, w1, n2, w2, align 163*c0909341SAndroid Build Coastguard Workerfunction cdef_padding\w\()_8bpc_neon, export=1 164*c0909341SAndroid Build Coastguard Worker push {r4-r8,lr} 165*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #24] 166*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #32] 167*c0909341SAndroid Build Coastguard Worker cmp r7, #0xf // fully edged 168*c0909341SAndroid Build Coastguard Worker beq cdef_padding\w\()_edged_8bpc_neon 169*c0909341SAndroid Build Coastguard Worker vmov.i16 q3, #0x8000 170*c0909341SAndroid Build Coastguard Worker tst r7, #4 // CDEF_HAVE_TOP 171*c0909341SAndroid Build Coastguard Worker bne 1f 172*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_TOP 173*c0909341SAndroid Build Coastguard Worker sub r12, r0, #2*(2*\stride+2) 174*c0909341SAndroid Build Coastguard Worker vmov.i16 q2, #0x8000 175*c0909341SAndroid Build Coastguard Worker vst1.16 {q2,q3}, [r12]! 176*c0909341SAndroid Build Coastguard Worker.if \w == 8 177*c0909341SAndroid Build Coastguard Worker vst1.16 {q2,q3}, [r12]! 178*c0909341SAndroid Build Coastguard Worker.endif 179*c0909341SAndroid Build Coastguard Worker b 3f 180*c0909341SAndroid Build Coastguard Worker1: 181*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_TOP 182*c0909341SAndroid Build Coastguard Worker add r8, r4, r2 183*c0909341SAndroid Build Coastguard Worker sub r0, r0, #2*(2*\stride) 184*c0909341SAndroid Build Coastguard Worker pad_top_bottom r4, r8, \w, \stride, \n1, \w1, \n2, \w2, \align, 0 185*c0909341SAndroid Build Coastguard Worker 186*c0909341SAndroid Build Coastguard Worker // Middle section 187*c0909341SAndroid Build Coastguard Worker3: 188*c0909341SAndroid Build Coastguard Worker tst r7, #1 // CDEF_HAVE_LEFT 189*c0909341SAndroid Build Coastguard Worker beq 2f 190*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT 191*c0909341SAndroid Build Coastguard Worker tst r7, #2 // CDEF_HAVE_RIGHT 192*c0909341SAndroid Build Coastguard Worker beq 1f 193*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 194*c0909341SAndroid Build Coastguard Worker0: 195*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[]}, [r3, :16]! 196*c0909341SAndroid Build Coastguard Worker ldrh r12, [r1, #\w] 197*c0909341SAndroid Build Coastguard Worker load_n_incr d0, r1, r2, \w 198*c0909341SAndroid Build Coastguard Worker subs r6, r6, #1 199*c0909341SAndroid Build Coastguard Worker vmov.16 d2[1], r12 200*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 201*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d2 202*c0909341SAndroid Build Coastguard Worker vstr s4, [r0, #-4] 203*c0909341SAndroid Build Coastguard Worker vst1.16 {\w1}, [r0, :\align] 204*c0909341SAndroid Build Coastguard Worker vstr s5, [r0, #2*\w] 205*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 206*c0909341SAndroid Build Coastguard Worker bgt 0b 207*c0909341SAndroid Build Coastguard Worker b 3f 208*c0909341SAndroid Build Coastguard Worker1: 209*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 210*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[]}, [r3, :16]! 211*c0909341SAndroid Build Coastguard Worker load_n_incr d0, r1, r2, \w 212*c0909341SAndroid Build Coastguard Worker subs r6, r6, #1 213*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 214*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d2 215*c0909341SAndroid Build Coastguard Worker vstr s4, [r0, #-4] 216*c0909341SAndroid Build Coastguard Worker vst1.16 {\w1}, [r0, :\align] 217*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #2*\w] 218*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 219*c0909341SAndroid Build Coastguard Worker bgt 1b 220*c0909341SAndroid Build Coastguard Worker b 3f 221*c0909341SAndroid Build Coastguard Worker2: 222*c0909341SAndroid Build Coastguard Worker tst r7, #2 // CDEF_HAVE_RIGHT 223*c0909341SAndroid Build Coastguard Worker beq 1f 224*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT 225*c0909341SAndroid Build Coastguard Worker0: 226*c0909341SAndroid Build Coastguard Worker ldrh r12, [r1, #\w] 227*c0909341SAndroid Build Coastguard Worker load_n_incr d0, r1, r2, \w 228*c0909341SAndroid Build Coastguard Worker vdup.16 d2, r12 229*c0909341SAndroid Build Coastguard Worker subs r6, r6, #1 230*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 231*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d2 232*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #-4] 233*c0909341SAndroid Build Coastguard Worker vst1.16 {\w1}, [r0, :\align] 234*c0909341SAndroid Build Coastguard Worker vstr s4, [r0, #2*\w] 235*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 236*c0909341SAndroid Build Coastguard Worker bgt 0b 237*c0909341SAndroid Build Coastguard Worker b 3f 238*c0909341SAndroid Build Coastguard Worker1: 239*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT 240*c0909341SAndroid Build Coastguard Worker load_n_incr d0, r1, r2, \w 241*c0909341SAndroid Build Coastguard Worker subs r6, r6, #1 242*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d0 243*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #-4] 244*c0909341SAndroid Build Coastguard Worker vst1.16 {\w1}, [r0, :\align] 245*c0909341SAndroid Build Coastguard Worker vstr s12, [r0, #2*\w] 246*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 247*c0909341SAndroid Build Coastguard Worker bgt 1b 248*c0909341SAndroid Build Coastguard Worker 249*c0909341SAndroid Build Coastguard Worker3: 250*c0909341SAndroid Build Coastguard Worker tst r7, #8 // CDEF_HAVE_BOTTOM 251*c0909341SAndroid Build Coastguard Worker bne 1f 252*c0909341SAndroid Build Coastguard Worker // !CDEF_HAVE_BOTTOM 253*c0909341SAndroid Build Coastguard Worker sub r12, r0, #4 254*c0909341SAndroid Build Coastguard Worker vmov.i16 q2, #0x8000 255*c0909341SAndroid Build Coastguard Worker vst1.16 {q2,q3}, [r12]! 256*c0909341SAndroid Build Coastguard Worker.if \w == 8 257*c0909341SAndroid Build Coastguard Worker vst1.16 {q2,q3}, [r12]! 258*c0909341SAndroid Build Coastguard Worker.endif 259*c0909341SAndroid Build Coastguard Worker pop {r4-r8,pc} 260*c0909341SAndroid Build Coastguard Worker1: 261*c0909341SAndroid Build Coastguard Worker // CDEF_HAVE_BOTTOM 262*c0909341SAndroid Build Coastguard Worker add r8, r5, r2 263*c0909341SAndroid Build Coastguard Worker pad_top_bottom r5, r8, \w, \stride, \n1, \w1, \n2, \w2, \align, 1 264*c0909341SAndroid Build Coastguard Workerendfunc 265*c0909341SAndroid Build Coastguard Worker.endm 266*c0909341SAndroid Build Coastguard Worker 267*c0909341SAndroid Build Coastguard Workerpadding_func 8, 16, d0, q0, d2, q1, 128 268*c0909341SAndroid Build Coastguard Workerpadding_func 4, 8, s0, d0, s4, d2, 64 269*c0909341SAndroid Build Coastguard Worker 270*c0909341SAndroid Build Coastguard Worker// void cdef_paddingX_edged_8bpc_neon(uint16_t *tmp, const pixel *src, 271*c0909341SAndroid Build Coastguard Worker// ptrdiff_t src_stride, const pixel (*left)[2], 272*c0909341SAndroid Build Coastguard Worker// const pixel *const top, 273*c0909341SAndroid Build Coastguard Worker// const pixel *const bottom, int h, 274*c0909341SAndroid Build Coastguard Worker// enum CdefEdgeFlags edges); 275*c0909341SAndroid Build Coastguard Worker 276*c0909341SAndroid Build Coastguard Worker.macro padding_func_edged w, stride, reg, align 277*c0909341SAndroid Build Coastguard Workerfunction cdef_padding\w\()_edged_8bpc_neon 278*c0909341SAndroid Build Coastguard Worker sub r0, r0, #(2*\stride) 279*c0909341SAndroid Build Coastguard Worker 280*c0909341SAndroid Build Coastguard Worker ldrh r12, [r4, #-2] 281*c0909341SAndroid Build Coastguard Worker vldr \reg, [r4] 282*c0909341SAndroid Build Coastguard Worker add r8, r4, r2 283*c0909341SAndroid Build Coastguard Worker strh r12, [r0, #-2] 284*c0909341SAndroid Build Coastguard Worker ldrh r12, [r4, #\w] 285*c0909341SAndroid Build Coastguard Worker vstr \reg, [r0] 286*c0909341SAndroid Build Coastguard Worker strh r12, [r0, #\w] 287*c0909341SAndroid Build Coastguard Worker 288*c0909341SAndroid Build Coastguard Worker ldrh r12, [r8, #-2] 289*c0909341SAndroid Build Coastguard Worker vldr \reg, [r8] 290*c0909341SAndroid Build Coastguard Worker strh r12, [r0, #\stride-2] 291*c0909341SAndroid Build Coastguard Worker ldrh r12, [r8, #\w] 292*c0909341SAndroid Build Coastguard Worker vstr \reg, [r0, #\stride] 293*c0909341SAndroid Build Coastguard Worker strh r12, [r0, #\stride+\w] 294*c0909341SAndroid Build Coastguard Worker add r0, r0, #2*\stride 295*c0909341SAndroid Build Coastguard Worker 296*c0909341SAndroid Build Coastguard Worker0: 297*c0909341SAndroid Build Coastguard Worker ldrh r12, [r3], #2 298*c0909341SAndroid Build Coastguard Worker vldr \reg, [r1] 299*c0909341SAndroid Build Coastguard Worker str r12, [r0, #-2] 300*c0909341SAndroid Build Coastguard Worker ldrh r12, [r1, #\w] 301*c0909341SAndroid Build Coastguard Worker add r1, r1, r2 302*c0909341SAndroid Build Coastguard Worker subs r6, r6, #1 303*c0909341SAndroid Build Coastguard Worker vstr \reg, [r0] 304*c0909341SAndroid Build Coastguard Worker str r12, [r0, #\w] 305*c0909341SAndroid Build Coastguard Worker add r0, r0, #\stride 306*c0909341SAndroid Build Coastguard Worker bgt 0b 307*c0909341SAndroid Build Coastguard Worker 308*c0909341SAndroid Build Coastguard Worker ldrh r12, [r5, #-2] 309*c0909341SAndroid Build Coastguard Worker vldr \reg, [r5] 310*c0909341SAndroid Build Coastguard Worker add r8, r5, r2 311*c0909341SAndroid Build Coastguard Worker strh r12, [r0, #-2] 312*c0909341SAndroid Build Coastguard Worker ldrh r12, [r5, #\w] 313*c0909341SAndroid Build Coastguard Worker vstr \reg, [r0] 314*c0909341SAndroid Build Coastguard Worker strh r12, [r0, #\w] 315*c0909341SAndroid Build Coastguard Worker 316*c0909341SAndroid Build Coastguard Worker ldrh r12, [r8, #-2] 317*c0909341SAndroid Build Coastguard Worker vldr \reg, [r8] 318*c0909341SAndroid Build Coastguard Worker strh r12, [r0, #\stride-2] 319*c0909341SAndroid Build Coastguard Worker ldrh r12, [r8, #\w] 320*c0909341SAndroid Build Coastguard Worker vstr \reg, [r0, #\stride] 321*c0909341SAndroid Build Coastguard Worker strh r12, [r0, #\stride+\w] 322*c0909341SAndroid Build Coastguard Worker 323*c0909341SAndroid Build Coastguard Worker pop {r4-r8,pc} 324*c0909341SAndroid Build Coastguard Workerendfunc 325*c0909341SAndroid Build Coastguard Worker.endm 326*c0909341SAndroid Build Coastguard Worker 327*c0909341SAndroid Build Coastguard Workerpadding_func_edged 8, 16, d0, 64 328*c0909341SAndroid Build Coastguard Workerpadding_func_edged 4, 8, s0, 32 329*c0909341SAndroid Build Coastguard Worker 330*c0909341SAndroid Build Coastguard Workertables 331*c0909341SAndroid Build Coastguard Worker 332*c0909341SAndroid Build Coastguard Workerfilter 8, 8 333*c0909341SAndroid Build Coastguard Workerfilter 4, 8 334*c0909341SAndroid Build Coastguard Worker 335*c0909341SAndroid Build Coastguard Workerfind_dir 8 336*c0909341SAndroid Build Coastguard Worker 337*c0909341SAndroid Build Coastguard Worker.macro load_px_8 d11, d12, d21, d22, w 338*c0909341SAndroid Build Coastguard Worker.if \w == 8 339*c0909341SAndroid Build Coastguard Worker add r6, r2, r9 // x + off 340*c0909341SAndroid Build Coastguard Worker sub r9, r2, r9 // x - off 341*c0909341SAndroid Build Coastguard Worker vld1.8 {\d11}, [r6] // p0 342*c0909341SAndroid Build Coastguard Worker add r6, r6, #16 // += stride 343*c0909341SAndroid Build Coastguard Worker vld1.8 {\d21}, [r9] // p1 344*c0909341SAndroid Build Coastguard Worker add r9, r9, #16 // += stride 345*c0909341SAndroid Build Coastguard Worker vld1.8 {\d12}, [r6] // p0 346*c0909341SAndroid Build Coastguard Worker vld1.8 {\d22}, [r9] // p1 347*c0909341SAndroid Build Coastguard Worker.else 348*c0909341SAndroid Build Coastguard Worker add r6, r2, r9 // x + off 349*c0909341SAndroid Build Coastguard Worker sub r9, r2, r9 // x - off 350*c0909341SAndroid Build Coastguard Worker vld1.32 {\d11[0]}, [r6] // p0 351*c0909341SAndroid Build Coastguard Worker add r6, r6, #8 // += stride 352*c0909341SAndroid Build Coastguard Worker vld1.32 {\d21[0]}, [r9] // p1 353*c0909341SAndroid Build Coastguard Worker add r9, r9, #8 // += stride 354*c0909341SAndroid Build Coastguard Worker vld1.32 {\d11[1]}, [r6] // p0 355*c0909341SAndroid Build Coastguard Worker add r6, r6, #8 // += stride 356*c0909341SAndroid Build Coastguard Worker vld1.32 {\d21[1]}, [r9] // p1 357*c0909341SAndroid Build Coastguard Worker add r9, r9, #8 // += stride 358*c0909341SAndroid Build Coastguard Worker vld1.32 {\d12[0]}, [r6] // p0 359*c0909341SAndroid Build Coastguard Worker add r6, r6, #8 // += stride 360*c0909341SAndroid Build Coastguard Worker vld1.32 {\d22[0]}, [r9] // p1 361*c0909341SAndroid Build Coastguard Worker add r9, r9, #8 // += stride 362*c0909341SAndroid Build Coastguard Worker vld1.32 {\d12[1]}, [r6] // p0 363*c0909341SAndroid Build Coastguard Worker vld1.32 {\d22[1]}, [r9] // p1 364*c0909341SAndroid Build Coastguard Worker.endif 365*c0909341SAndroid Build Coastguard Worker.endm 366*c0909341SAndroid Build Coastguard Worker.macro handle_pixel_8 s1, s2, thresh_vec, shift, tap, min 367*c0909341SAndroid Build Coastguard Worker.if \min 368*c0909341SAndroid Build Coastguard Worker vmin.u8 q3, q3, \s1 369*c0909341SAndroid Build Coastguard Worker vmax.u8 q4, q4, \s1 370*c0909341SAndroid Build Coastguard Worker vmin.u8 q3, q3, \s2 371*c0909341SAndroid Build Coastguard Worker vmax.u8 q4, q4, \s2 372*c0909341SAndroid Build Coastguard Worker.endif 373*c0909341SAndroid Build Coastguard Worker vabd.u8 q8, q0, \s1 // abs(diff) 374*c0909341SAndroid Build Coastguard Worker vabd.u8 q11, q0, \s2 // abs(diff) 375*c0909341SAndroid Build Coastguard Worker vshl.u8 q9, q8, \shift // abs(diff) >> shift 376*c0909341SAndroid Build Coastguard Worker vshl.u8 q12, q11, \shift // abs(diff) >> shift 377*c0909341SAndroid Build Coastguard Worker vqsub.u8 q9, \thresh_vec, q9 // clip = imax(0, threshold - (abs(diff) >> shift)) 378*c0909341SAndroid Build Coastguard Worker vqsub.u8 q12, \thresh_vec, q12// clip = imax(0, threshold - (abs(diff) >> shift)) 379*c0909341SAndroid Build Coastguard Worker vcgt.u8 q10, q0, \s1 // px > p0 380*c0909341SAndroid Build Coastguard Worker vcgt.u8 q13, q0, \s2 // px > p1 381*c0909341SAndroid Build Coastguard Worker vmin.u8 q9, q9, q8 // imin(abs(diff), clip) 382*c0909341SAndroid Build Coastguard Worker vmin.u8 q12, q12, q11 // imin(abs(diff), clip) 383*c0909341SAndroid Build Coastguard Worker vneg.s8 q8, q9 // -imin() 384*c0909341SAndroid Build Coastguard Worker vneg.s8 q11, q12 // -imin() 385*c0909341SAndroid Build Coastguard Worker vbsl q10, q8, q9 // constrain() = imax(imin(diff, clip), -clip) 386*c0909341SAndroid Build Coastguard Worker vdup.8 d18, \tap // taps[k] 387*c0909341SAndroid Build Coastguard Worker vbsl q13, q11, q12 // constrain() = imax(imin(diff, clip), -clip) 388*c0909341SAndroid Build Coastguard Worker vmlal.s8 q1, d20, d18 // sum += taps[k] * constrain() 389*c0909341SAndroid Build Coastguard Worker vmlal.s8 q1, d26, d18 // sum += taps[k] * constrain() 390*c0909341SAndroid Build Coastguard Worker vmlal.s8 q2, d21, d18 // sum += taps[k] * constrain() 391*c0909341SAndroid Build Coastguard Worker vmlal.s8 q2, d27, d18 // sum += taps[k] * constrain() 392*c0909341SAndroid Build Coastguard Worker.endm 393*c0909341SAndroid Build Coastguard Worker 394*c0909341SAndroid Build Coastguard Worker// void cdef_filterX_edged_neon(pixel *dst, ptrdiff_t dst_stride, 395*c0909341SAndroid Build Coastguard Worker// const uint16_t *tmp, int pri_strength, 396*c0909341SAndroid Build Coastguard Worker// int sec_strength, int dir, int damping, 397*c0909341SAndroid Build Coastguard Worker// int h, size_t edges); 398*c0909341SAndroid Build Coastguard Worker.macro filter_func_8 w, pri, sec, min, suffix 399*c0909341SAndroid Build Coastguard Workerfunction cdef_filter\w\suffix\()_edged_neon 400*c0909341SAndroid Build Coastguard Worker.if \pri 401*c0909341SAndroid Build Coastguard Worker movrel_local r8, pri_taps 402*c0909341SAndroid Build Coastguard Worker and r9, r3, #1 403*c0909341SAndroid Build Coastguard Worker add r8, r8, r9, lsl #1 404*c0909341SAndroid Build Coastguard Worker.endif 405*c0909341SAndroid Build Coastguard Worker movrel_local r9, directions\w 406*c0909341SAndroid Build Coastguard Worker add r5, r9, r5, lsl #1 407*c0909341SAndroid Build Coastguard Worker vmov.u8 d17, #7 408*c0909341SAndroid Build Coastguard Worker vdup.8 d16, r6 // damping 409*c0909341SAndroid Build Coastguard Worker 410*c0909341SAndroid Build Coastguard Worker vmov.8 d8[0], r3 411*c0909341SAndroid Build Coastguard Worker vmov.8 d8[1], r4 412*c0909341SAndroid Build Coastguard Worker vclz.i8 d8, d8 // clz(threshold) 413*c0909341SAndroid Build Coastguard Worker vsub.i8 d8, d17, d8 // ulog2(threshold) 414*c0909341SAndroid Build Coastguard Worker vqsub.u8 d8, d16, d8 // shift = imax(0, damping - ulog2(threshold)) 415*c0909341SAndroid Build Coastguard Worker vneg.s8 d8, d8 // -shift 416*c0909341SAndroid Build Coastguard Worker.if \sec 417*c0909341SAndroid Build Coastguard Worker vdup.8 q6, d8[1] 418*c0909341SAndroid Build Coastguard Worker.endif 419*c0909341SAndroid Build Coastguard Worker.if \pri 420*c0909341SAndroid Build Coastguard Worker vdup.8 q5, d8[0] 421*c0909341SAndroid Build Coastguard Worker.endif 422*c0909341SAndroid Build Coastguard Worker 423*c0909341SAndroid Build Coastguard Worker1: 424*c0909341SAndroid Build Coastguard Worker.if \w == 8 425*c0909341SAndroid Build Coastguard Worker add r12, r2, #16 426*c0909341SAndroid Build Coastguard Worker vld1.8 {d0}, [r2, :64] // px 427*c0909341SAndroid Build Coastguard Worker vld1.8 {d1}, [r12, :64] // px 428*c0909341SAndroid Build Coastguard Worker.else 429*c0909341SAndroid Build Coastguard Worker add r12, r2, #8 430*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[0]}, [r2, :32] // px 431*c0909341SAndroid Build Coastguard Worker add r9, r2, #2*8 432*c0909341SAndroid Build Coastguard Worker vld1.32 {d0[1]}, [r12, :32] // px 433*c0909341SAndroid Build Coastguard Worker add r12, r12, #2*8 434*c0909341SAndroid Build Coastguard Worker vld1.32 {d1[0]}, [r9, :32] // px 435*c0909341SAndroid Build Coastguard Worker vld1.32 {d1[1]}, [r12, :32] // px 436*c0909341SAndroid Build Coastguard Worker.endif 437*c0909341SAndroid Build Coastguard Worker 438*c0909341SAndroid Build Coastguard Worker vmov.u8 q1, #0 // sum 439*c0909341SAndroid Build Coastguard Worker vmov.u8 q2, #0 // sum 440*c0909341SAndroid Build Coastguard Worker.if \min 441*c0909341SAndroid Build Coastguard Worker vmov.u16 q3, q0 // min 442*c0909341SAndroid Build Coastguard Worker vmov.u16 q4, q0 // max 443*c0909341SAndroid Build Coastguard Worker.endif 444*c0909341SAndroid Build Coastguard Worker 445*c0909341SAndroid Build Coastguard Worker // Instead of loading sec_taps 2, 1 from memory, just set it 446*c0909341SAndroid Build Coastguard Worker // to 2 initially and decrease for the second round. 447*c0909341SAndroid Build Coastguard Worker // This is also used as loop counter. 448*c0909341SAndroid Build Coastguard Worker mov lr, #2 // sec_taps[0] 449*c0909341SAndroid Build Coastguard Worker 450*c0909341SAndroid Build Coastguard Worker2: 451*c0909341SAndroid Build Coastguard Worker.if \pri 452*c0909341SAndroid Build Coastguard Worker ldrsb r9, [r5] // off1 453*c0909341SAndroid Build Coastguard Worker 454*c0909341SAndroid Build Coastguard Worker load_px_8 d28, d29, d30, d31, \w 455*c0909341SAndroid Build Coastguard Worker.endif 456*c0909341SAndroid Build Coastguard Worker 457*c0909341SAndroid Build Coastguard Worker.if \sec 458*c0909341SAndroid Build Coastguard Worker add r5, r5, #4 // +2*2 459*c0909341SAndroid Build Coastguard Worker ldrsb r9, [r5] // off2 460*c0909341SAndroid Build Coastguard Worker.endif 461*c0909341SAndroid Build Coastguard Worker 462*c0909341SAndroid Build Coastguard Worker.if \pri 463*c0909341SAndroid Build Coastguard Worker ldrb r12, [r8] // *pri_taps 464*c0909341SAndroid Build Coastguard Worker vdup.8 q7, r3 // threshold 465*c0909341SAndroid Build Coastguard Worker 466*c0909341SAndroid Build Coastguard Worker handle_pixel_8 q14, q15, q7, q5, r12, \min 467*c0909341SAndroid Build Coastguard Worker.endif 468*c0909341SAndroid Build Coastguard Worker 469*c0909341SAndroid Build Coastguard Worker.if \sec 470*c0909341SAndroid Build Coastguard Worker load_px_8 d28, d29, d30, d31, \w 471*c0909341SAndroid Build Coastguard Worker 472*c0909341SAndroid Build Coastguard Worker add r5, r5, #8 // +2*4 473*c0909341SAndroid Build Coastguard Worker ldrsb r9, [r5] // off3 474*c0909341SAndroid Build Coastguard Worker 475*c0909341SAndroid Build Coastguard Worker vdup.8 q7, r4 // threshold 476*c0909341SAndroid Build Coastguard Worker 477*c0909341SAndroid Build Coastguard Worker handle_pixel_8 q14, q15, q7, q6, lr, \min 478*c0909341SAndroid Build Coastguard Worker 479*c0909341SAndroid Build Coastguard Worker load_px_8 d28, d29, d30, d31, \w 480*c0909341SAndroid Build Coastguard Worker 481*c0909341SAndroid Build Coastguard Worker handle_pixel_8 q14, q15, q7, q6, lr, \min 482*c0909341SAndroid Build Coastguard Worker 483*c0909341SAndroid Build Coastguard Worker sub r5, r5, #11 // r5 -= 2*(2+4); r5 += 1; 484*c0909341SAndroid Build Coastguard Worker.else 485*c0909341SAndroid Build Coastguard Worker add r5, r5, #1 // r5 += 1 486*c0909341SAndroid Build Coastguard Worker.endif 487*c0909341SAndroid Build Coastguard Worker subs lr, lr, #1 // sec_tap-- (value) 488*c0909341SAndroid Build Coastguard Worker.if \pri 489*c0909341SAndroid Build Coastguard Worker add r8, r8, #1 // pri_taps++ (pointer) 490*c0909341SAndroid Build Coastguard Worker.endif 491*c0909341SAndroid Build Coastguard Worker bne 2b 492*c0909341SAndroid Build Coastguard Worker 493*c0909341SAndroid Build Coastguard Worker vshr.s16 q14, q1, #15 // -(sum < 0) 494*c0909341SAndroid Build Coastguard Worker vshr.s16 q15, q2, #15 // -(sum < 0) 495*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q14 // sum - (sum < 0) 496*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q15 // sum - (sum < 0) 497*c0909341SAndroid Build Coastguard Worker vrshr.s16 q1, q1, #4 // (8 + sum - (sum < 0)) >> 4 498*c0909341SAndroid Build Coastguard Worker vrshr.s16 q2, q2, #4 // (8 + sum - (sum < 0)) >> 4 499*c0909341SAndroid Build Coastguard Worker vaddw.u8 q1, q1, d0 // px + (8 + sum ...) >> 4 500*c0909341SAndroid Build Coastguard Worker vaddw.u8 q2, q2, d1 // px + (8 + sum ...) >> 4 501*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d0, q1 502*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d1, q2 503*c0909341SAndroid Build Coastguard Worker.if \min 504*c0909341SAndroid Build Coastguard Worker vmin.u8 q0, q0, q4 505*c0909341SAndroid Build Coastguard Worker vmax.u8 q0, q0, q3 // iclip(px + .., min, max) 506*c0909341SAndroid Build Coastguard Worker.endif 507*c0909341SAndroid Build Coastguard Worker.if \w == 8 508*c0909341SAndroid Build Coastguard Worker vst1.8 {d0}, [r0, :64], r1 509*c0909341SAndroid Build Coastguard Worker add r2, r2, #2*16 // tmp += 2*tmp_stride 510*c0909341SAndroid Build Coastguard Worker subs r7, r7, #2 // h -= 2 511*c0909341SAndroid Build Coastguard Worker vst1.8 {d1}, [r0, :64], r1 512*c0909341SAndroid Build Coastguard Worker.else 513*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[0]}, [r0, :32], r1 514*c0909341SAndroid Build Coastguard Worker add r2, r2, #4*8 // tmp += 4*tmp_stride 515*c0909341SAndroid Build Coastguard Worker vst1.32 {d0[1]}, [r0, :32], r1 516*c0909341SAndroid Build Coastguard Worker subs r7, r7, #4 // h -= 4 517*c0909341SAndroid Build Coastguard Worker vst1.32 {d1[0]}, [r0, :32], r1 518*c0909341SAndroid Build Coastguard Worker vst1.32 {d1[1]}, [r0, :32], r1 519*c0909341SAndroid Build Coastguard Worker.endif 520*c0909341SAndroid Build Coastguard Worker 521*c0909341SAndroid Build Coastguard Worker // Reset pri_taps and directions back to the original point 522*c0909341SAndroid Build Coastguard Worker sub r5, r5, #2 523*c0909341SAndroid Build Coastguard Worker.if \pri 524*c0909341SAndroid Build Coastguard Worker sub r8, r8, #2 525*c0909341SAndroid Build Coastguard Worker.endif 526*c0909341SAndroid Build Coastguard Worker 527*c0909341SAndroid Build Coastguard Worker bgt 1b 528*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 529*c0909341SAndroid Build Coastguard Worker pop {r4-r9,pc} 530*c0909341SAndroid Build Coastguard Workerendfunc 531*c0909341SAndroid Build Coastguard Worker.endm 532*c0909341SAndroid Build Coastguard Worker 533*c0909341SAndroid Build Coastguard Worker.macro filter_8 w 534*c0909341SAndroid Build Coastguard Workerfilter_func_8 \w, pri=1, sec=0, min=0, suffix=_pri 535*c0909341SAndroid Build Coastguard Workerfilter_func_8 \w, pri=0, sec=1, min=0, suffix=_sec 536*c0909341SAndroid Build Coastguard Workerfilter_func_8 \w, pri=1, sec=1, min=1, suffix=_pri_sec 537*c0909341SAndroid Build Coastguard Worker.endm 538*c0909341SAndroid Build Coastguard Worker 539*c0909341SAndroid Build Coastguard Workerfilter_8 8 540*c0909341SAndroid Build Coastguard Workerfilter_8 4 541