1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2021, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2021, Martin Storsjo 4*c0909341SAndroid Build Coastguard Worker * All rights reserved. 5*c0909341SAndroid Build Coastguard Worker * 6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 8*c0909341SAndroid Build Coastguard Worker * 9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 11*c0909341SAndroid Build Coastguard Worker * 12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 15*c0909341SAndroid Build Coastguard Worker * 16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*c0909341SAndroid Build Coastguard Worker */ 27*c0909341SAndroid Build Coastguard Worker 28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 29*c0909341SAndroid Build Coastguard Worker#include "util.S" 30*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm-offsets.h" 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker#define GRAIN_WIDTH 82 33*c0909341SAndroid Build Coastguard Worker#define GRAIN_HEIGHT 73 34*c0909341SAndroid Build Coastguard Worker 35*c0909341SAndroid Build Coastguard Worker#define SUB_GRAIN_WIDTH 44 36*c0909341SAndroid Build Coastguard Worker#define SUB_GRAIN_HEIGHT 38 37*c0909341SAndroid Build Coastguard Worker 38*c0909341SAndroid Build Coastguard Worker.macro increment_seed steps, shift=1 39*c0909341SAndroid Build Coastguard Worker lsr r11, r2, #3 40*c0909341SAndroid Build Coastguard Worker lsr r12, r2, #12 41*c0909341SAndroid Build Coastguard Worker lsr lr, r2, #1 42*c0909341SAndroid Build Coastguard Worker eor r11, r2, r11 // (r >> 0) ^ (r >> 3) 43*c0909341SAndroid Build Coastguard Worker eor r12, r12, lr // (r >> 12) ^ (r >> 1) 44*c0909341SAndroid Build Coastguard Worker eor r11, r11, r12 // (r >> 0) ^ (r >> 3) ^ (r >> 12) ^ (r >> 1) 45*c0909341SAndroid Build Coastguard Worker.if \shift 46*c0909341SAndroid Build Coastguard Worker lsr r2, r2, #\steps 47*c0909341SAndroid Build Coastguard Worker.endif 48*c0909341SAndroid Build Coastguard Worker and r11, r11, #((1 << \steps) - 1) // bit 49*c0909341SAndroid Build Coastguard Worker.if \shift 50*c0909341SAndroid Build Coastguard Worker orr r2, r2, r11, lsl #(16 - \steps) // *state 51*c0909341SAndroid Build Coastguard Worker.else 52*c0909341SAndroid Build Coastguard Worker orr r2, r2, r11, lsl #16 // *state 53*c0909341SAndroid Build Coastguard Worker.endif 54*c0909341SAndroid Build Coastguard Worker.endm 55*c0909341SAndroid Build Coastguard Worker 56*c0909341SAndroid Build Coastguard Worker.macro read_rand dest, bits, age 57*c0909341SAndroid Build Coastguard Worker ubfx \dest, r2, #16 - \bits - \age, #\bits 58*c0909341SAndroid Build Coastguard Worker.endm 59*c0909341SAndroid Build Coastguard Worker 60*c0909341SAndroid Build Coastguard Worker.macro read_shift_rand dest, bits 61*c0909341SAndroid Build Coastguard Worker ubfx \dest, r2, #17 - \bits, #\bits 62*c0909341SAndroid Build Coastguard Worker lsr r2, r2, #1 63*c0909341SAndroid Build Coastguard Worker.endm 64*c0909341SAndroid Build Coastguard Worker 65*c0909341SAndroid Build Coastguard Worker// special calling convention: 66*c0909341SAndroid Build Coastguard Worker// r2 holds seed 67*c0909341SAndroid Build Coastguard Worker// r3 holds dav1d_gaussian_sequence 68*c0909341SAndroid Build Coastguard Worker// clobbers r11-r12 69*c0909341SAndroid Build Coastguard Worker// returns in d0-d1 70*c0909341SAndroid Build Coastguard Workerfunction get_gaussian_neon 71*c0909341SAndroid Build Coastguard Worker push {r5-r6,lr} 72*c0909341SAndroid Build Coastguard Worker increment_seed 4 73*c0909341SAndroid Build Coastguard Worker read_rand r5, 11, 3 74*c0909341SAndroid Build Coastguard Worker read_rand r6, 11, 2 75*c0909341SAndroid Build Coastguard Worker add r5, r3, r5, lsl #1 76*c0909341SAndroid Build Coastguard Worker add r6, r3, r6, lsl #1 77*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[0]}, [r5] 78*c0909341SAndroid Build Coastguard Worker read_rand r5, 11, 1 79*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[1]}, [r6] 80*c0909341SAndroid Build Coastguard Worker add r5, r3, r5, lsl #1 81*c0909341SAndroid Build Coastguard Worker read_rand r6, 11, 0 82*c0909341SAndroid Build Coastguard Worker increment_seed 4 83*c0909341SAndroid Build Coastguard Worker add r6, r3, r6, lsl #1 84*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[2]}, [r5] 85*c0909341SAndroid Build Coastguard Worker read_rand r5, 11, 3 86*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[3]}, [r6] 87*c0909341SAndroid Build Coastguard Worker add r5, r3, r5, lsl #1 88*c0909341SAndroid Build Coastguard Worker read_rand r6, 11, 2 89*c0909341SAndroid Build Coastguard Worker vld1.16 {d1[0]}, [r5] 90*c0909341SAndroid Build Coastguard Worker add r6, r3, r6, lsl #1 91*c0909341SAndroid Build Coastguard Worker read_rand r5, 11, 1 92*c0909341SAndroid Build Coastguard Worker vld1.16 {d1[1]}, [r6] 93*c0909341SAndroid Build Coastguard Worker read_rand r6, 11, 0 94*c0909341SAndroid Build Coastguard Worker add r5, r3, r5, lsl #1 95*c0909341SAndroid Build Coastguard Worker add r6, r3, r6, lsl #1 96*c0909341SAndroid Build Coastguard Worker vld1.16 {d1[2]}, [r5] 97*c0909341SAndroid Build Coastguard Worker vld1.16 {d1[3]}, [r6] 98*c0909341SAndroid Build Coastguard Worker pop {r5-r6,pc} 99*c0909341SAndroid Build Coastguard Workerendfunc 100*c0909341SAndroid Build Coastguard Worker 101*c0909341SAndroid Build Coastguard Worker.macro get_grain_row r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 102*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 103*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 104*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r0, q0 105*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 106*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 107*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r1, q0 108*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 109*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 110*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r2, q0 111*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 112*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 113*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r3, q0 114*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 115*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 116*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r4, q0 117*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 118*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 119*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r5, q0 120*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 121*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 122*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r6, q0 123*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 124*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 125*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r7, q0 126*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 127*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 128*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r8, q0 129*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 130*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 131*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r9, q0 132*c0909341SAndroid Build Coastguard Worker increment_seed 2 133*c0909341SAndroid Build Coastguard Worker read_rand r11, 11, 1 134*c0909341SAndroid Build Coastguard Worker read_rand r12, 11, 0 135*c0909341SAndroid Build Coastguard Worker add r11, r3, r11, lsl #1 136*c0909341SAndroid Build Coastguard Worker add r12, r3, r12, lsl #1 137*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[0]}, [r11] 138*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[1]}, [r12] 139*c0909341SAndroid Build Coastguard Worker vrshl.s16 d0, d0, d30 140*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r10, q0 141*c0909341SAndroid Build Coastguard Worker.endm 142*c0909341SAndroid Build Coastguard Worker 143*c0909341SAndroid Build Coastguard Worker.macro store_grain_row r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10 144*c0909341SAndroid Build Coastguard Worker vst1.16 {\r0, \r1, \r2, \r3}, [r0]! 145*c0909341SAndroid Build Coastguard Worker vst1.16 {\r4, \r5, \r6, \r7}, [r0]! 146*c0909341SAndroid Build Coastguard Worker vst1.16 {\r8, \r9}, [r0]! 147*c0909341SAndroid Build Coastguard Worker vst1.16 {\r10[0]}, [r0]! 148*c0909341SAndroid Build Coastguard Worker.endm 149*c0909341SAndroid Build Coastguard Worker 150*c0909341SAndroid Build Coastguard Worker.macro get_grain_row_44 r0, r1, r2, r3, r4, r5 151*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 152*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 153*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r0, q0 154*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 155*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 156*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r1, q0 157*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 158*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 159*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r2, q0 160*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 161*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 162*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r3, q0 163*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 164*c0909341SAndroid Build Coastguard Worker vrshl.s16 q0, q0, q15 165*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r4, q0 166*c0909341SAndroid Build Coastguard Worker increment_seed 4 167*c0909341SAndroid Build Coastguard Worker read_rand r11, 11, 3 168*c0909341SAndroid Build Coastguard Worker read_rand r12, 11, 2 169*c0909341SAndroid Build Coastguard Worker add r11, r3, r11, lsl #1 170*c0909341SAndroid Build Coastguard Worker add r12, r3, r12, lsl #1 171*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[]}, [r11] 172*c0909341SAndroid Build Coastguard Worker read_rand r11, 11, 1 173*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[1]}, [r12] 174*c0909341SAndroid Build Coastguard Worker add r11, r3, r11, lsl #1 175*c0909341SAndroid Build Coastguard Worker read_rand r12, 11, 0 176*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[2]}, [r11] 177*c0909341SAndroid Build Coastguard Worker add r12, r3, r12, lsl #1 178*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[3]}, [r12] 179*c0909341SAndroid Build Coastguard Worker vrshl.s16 d0, d0, d30 180*c0909341SAndroid Build Coastguard Worker vmovn.i16 \r5, q0 181*c0909341SAndroid Build Coastguard Worker.endm 182*c0909341SAndroid Build Coastguard Worker 183*c0909341SAndroid Build Coastguard Worker.macro store_grain_row_44 r0, r1, r2, r3, r4, r5 184*c0909341SAndroid Build Coastguard Worker vst1.16 {\r0, \r1, \r2, \r3}, [r0]! 185*c0909341SAndroid Build Coastguard Worker vst1.16 {\r4, \r5}, [r0] 186*c0909341SAndroid Build Coastguard Worker add r0, r0, #GRAIN_WIDTH-32 187*c0909341SAndroid Build Coastguard Worker.endm 188*c0909341SAndroid Build Coastguard Worker 189*c0909341SAndroid Build Coastguard Workerfunction get_grain_2_neon 190*c0909341SAndroid Build Coastguard Worker push {r11,lr} 191*c0909341SAndroid Build Coastguard Worker increment_seed 2 192*c0909341SAndroid Build Coastguard Worker read_rand r11, 11, 1 193*c0909341SAndroid Build Coastguard Worker read_rand r12, 11, 0 194*c0909341SAndroid Build Coastguard Worker add r11, r3, r11, lsl #1 195*c0909341SAndroid Build Coastguard Worker add r12, r3, r12, lsl #1 196*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[0]}, [r11] 197*c0909341SAndroid Build Coastguard Worker vld1.16 {d0[1]}, [r12] 198*c0909341SAndroid Build Coastguard Worker vrshl.s16 d0, d0, d30 199*c0909341SAndroid Build Coastguard Worker vmovn.i16 d0, q0 200*c0909341SAndroid Build Coastguard Worker pop {r11,pc} 201*c0909341SAndroid Build Coastguard Workerendfunc 202*c0909341SAndroid Build Coastguard Worker 203*c0909341SAndroid Build Coastguard Worker.macro get_grain_2 dst 204*c0909341SAndroid Build Coastguard Worker bl get_grain_2_neon 205*c0909341SAndroid Build Coastguard Worker.ifnc \dst, d0 206*c0909341SAndroid Build Coastguard Worker vmov \dst, d0 207*c0909341SAndroid Build Coastguard Worker.endif 208*c0909341SAndroid Build Coastguard Worker.endm 209*c0909341SAndroid Build Coastguard Worker 210*c0909341SAndroid Build Coastguard Worker// r1 holds the number of entries to produce 211*c0909341SAndroid Build Coastguard Worker// r6, r8 and r10 hold the previous output entries 212*c0909341SAndroid Build Coastguard Worker// q0 holds the vector of produced entries 213*c0909341SAndroid Build Coastguard Worker// q1 holds the input vector of sums from above 214*c0909341SAndroid Build Coastguard Worker.macro output_lag n 215*c0909341SAndroid Build Coastguard Workerfunction output_lag\n\()_neon 216*c0909341SAndroid Build Coastguard Worker push {r0, lr} 217*c0909341SAndroid Build Coastguard Worker.if \n == 1 218*c0909341SAndroid Build Coastguard Worker mov lr, #-128 219*c0909341SAndroid Build Coastguard Worker.else 220*c0909341SAndroid Build Coastguard Worker mov r0, #1 221*c0909341SAndroid Build Coastguard Worker mov lr, #1 222*c0909341SAndroid Build Coastguard Worker sub r7, r7, #1 223*c0909341SAndroid Build Coastguard Worker sub r9, r9, #1 224*c0909341SAndroid Build Coastguard Worker lsl r0, r0, r7 225*c0909341SAndroid Build Coastguard Worker lsl lr, lr, r9 226*c0909341SAndroid Build Coastguard Worker add r7, r7, #1 227*c0909341SAndroid Build Coastguard Worker add r9, r9, #1 228*c0909341SAndroid Build Coastguard Worker.endif 229*c0909341SAndroid Build Coastguard Worker1: 230*c0909341SAndroid Build Coastguard Worker read_shift_rand r12, 11 231*c0909341SAndroid Build Coastguard Worker vmov.32 r11, d2[0] 232*c0909341SAndroid Build Coastguard Worker lsl r12, r12, #1 233*c0909341SAndroid Build Coastguard Worker vext.8 q0, q0, q0, #1 234*c0909341SAndroid Build Coastguard Worker ldrsh r12, [r3, r12] 235*c0909341SAndroid Build Coastguard Worker.if \n == 1 236*c0909341SAndroid Build Coastguard Worker mla r11, r6, r4, r11 // sum (above) + *coeff * prev output 237*c0909341SAndroid Build Coastguard Worker add r6, r11, r8 // 1 << (ar_coeff_shift - 1) 238*c0909341SAndroid Build Coastguard Worker add r12, r12, r10 239*c0909341SAndroid Build Coastguard Worker asr r6, r6, r7 // >> ar_coeff_shift 240*c0909341SAndroid Build Coastguard Worker asr r12, r12, r9 // >> (4 + grain_scale_shift) 241*c0909341SAndroid Build Coastguard Worker add r6, r6, r12 242*c0909341SAndroid Build Coastguard Worker cmp r6, r5 243*c0909341SAndroid Build Coastguard Worker.elseif \n == 2 244*c0909341SAndroid Build Coastguard Worker mla r11, r8, r4, r11 // sum (above) + *coeff * prev output 1 245*c0909341SAndroid Build Coastguard Worker mla r11, r6, r10, r11 // += *coeff * prev output 2 246*c0909341SAndroid Build Coastguard Worker mov r8, r6 247*c0909341SAndroid Build Coastguard Worker add r6, r11, r0 // 1 << (ar_coeff_shift - 1) 248*c0909341SAndroid Build Coastguard Worker add r12, r12, lr // 1 << (4 + grain_scale_shift - 1) 249*c0909341SAndroid Build Coastguard Worker asr r6, r6, r7 // >> ar_coeff_shift 250*c0909341SAndroid Build Coastguard Worker asr r12, r12, r9 // >> (4 + grain_scale_shift) 251*c0909341SAndroid Build Coastguard Worker add r6, r6, r12 252*c0909341SAndroid Build Coastguard Worker push {lr} 253*c0909341SAndroid Build Coastguard Worker cmp r6, r5 254*c0909341SAndroid Build Coastguard Worker mov lr, #-128 255*c0909341SAndroid Build Coastguard Worker.else 256*c0909341SAndroid Build Coastguard Worker push {r1-r3} 257*c0909341SAndroid Build Coastguard Worker sbfx r1, r4, #0, #8 258*c0909341SAndroid Build Coastguard Worker sbfx r2, r4, #8, #8 259*c0909341SAndroid Build Coastguard Worker sbfx r3, r4, #16, #8 260*c0909341SAndroid Build Coastguard Worker mla r11, r10, r1, r11 // sum (above) + *coeff * prev output 1 261*c0909341SAndroid Build Coastguard Worker mla r11, r8, r2, r11 // sum (above) + *coeff * prev output 2 262*c0909341SAndroid Build Coastguard Worker mla r11, r6, r3, r11 // += *coeff * prev output 3 263*c0909341SAndroid Build Coastguard Worker pop {r1-r3} 264*c0909341SAndroid Build Coastguard Worker mov r10, r8 265*c0909341SAndroid Build Coastguard Worker mov r8, r6 266*c0909341SAndroid Build Coastguard Worker 267*c0909341SAndroid Build Coastguard Worker add r6, r11, r0 // 1 << (ar_coeff_shift - 1) 268*c0909341SAndroid Build Coastguard Worker add r12, r12, lr // 1 << (4 + grain_scale_shift - 1) 269*c0909341SAndroid Build Coastguard Worker asr r6, r6, r7 // >> ar_coeff_shift 270*c0909341SAndroid Build Coastguard Worker asr r12, r12, r9 // >> (4 + grain_scale_shift) 271*c0909341SAndroid Build Coastguard Worker add r6, r6, r12 272*c0909341SAndroid Build Coastguard Worker push {lr} 273*c0909341SAndroid Build Coastguard Worker cmp r6, r5 274*c0909341SAndroid Build Coastguard Worker mov lr, #-128 275*c0909341SAndroid Build Coastguard Worker.endif 276*c0909341SAndroid Build Coastguard Worker it gt 277*c0909341SAndroid Build Coastguard Worker movgt r6, r5 278*c0909341SAndroid Build Coastguard Worker cmp r6, lr 279*c0909341SAndroid Build Coastguard Worker it lt 280*c0909341SAndroid Build Coastguard Worker movlt r6, lr 281*c0909341SAndroid Build Coastguard Worker.if \n >= 2 282*c0909341SAndroid Build Coastguard Worker pop {lr} 283*c0909341SAndroid Build Coastguard Worker.endif 284*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 285*c0909341SAndroid Build Coastguard Worker vext.8 q1, q1, q1, #4 286*c0909341SAndroid Build Coastguard Worker vmov.8 d1[7], r6 287*c0909341SAndroid Build Coastguard Worker bgt 1b 288*c0909341SAndroid Build Coastguard Worker pop {r0, pc} 289*c0909341SAndroid Build Coastguard Workerendfunc 290*c0909341SAndroid Build Coastguard Worker.endm 291*c0909341SAndroid Build Coastguard Worker 292*c0909341SAndroid Build Coastguard Workeroutput_lag 1 293*c0909341SAndroid Build Coastguard Workeroutput_lag 2 294*c0909341SAndroid Build Coastguard Workeroutput_lag 3 295*c0909341SAndroid Build Coastguard Worker 296*c0909341SAndroid Build Coastguard Worker 297*c0909341SAndroid Build Coastguard Workerfunction sum_lag1_above_neon 298*c0909341SAndroid Build Coastguard Worker vmull.s8 q2, d6, d28 299*c0909341SAndroid Build Coastguard Worker vmull.s8 q3, d7, d28 300*c0909341SAndroid Build Coastguard Worker vmull.s8 q4, d0, d27 301*c0909341SAndroid Build Coastguard Worker vmull.s8 q5, d1, d27 302*c0909341SAndroid Build Coastguard Worker 303*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d4, d8 304*c0909341SAndroid Build Coastguard Worker vaddl.s16 q2, d5, d9 305*c0909341SAndroid Build Coastguard Worker vaddl.s16 q4, d6, d10 306*c0909341SAndroid Build Coastguard Worker vaddl.s16 q5, d7, d11 307*c0909341SAndroid Build Coastguard Worker 308*c0909341SAndroid Build Coastguard Worker vmull.s8 q3, d3, d29 309*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d2, d29 310*c0909341SAndroid Build Coastguard Worker 311*c0909341SAndroid Build Coastguard Worker vaddw.s16 q4, q4, d6 312*c0909341SAndroid Build Coastguard Worker vaddw.s16 q5, q5, d7 313*c0909341SAndroid Build Coastguard Worker vaddw.s16 q3, q2, d3 314*c0909341SAndroid Build Coastguard Worker vaddw.s16 q2, q0, d2 315*c0909341SAndroid Build Coastguard Worker bx lr 316*c0909341SAndroid Build Coastguard Workerendfunc 317*c0909341SAndroid Build Coastguard Worker 318*c0909341SAndroid Build Coastguard Worker.macro sum_lag_n_body lag, type, uv_layout, edge, elems, store, uv_coeff 319*c0909341SAndroid Build Coastguard Worker.ifc \lag\()_\edge, lag3_left 320*c0909341SAndroid Build Coastguard Worker bl sum_lag3_left_above_neon 321*c0909341SAndroid Build Coastguard Worker.else 322*c0909341SAndroid Build Coastguard Worker bl sum_\lag\()_above_neon 323*c0909341SAndroid Build Coastguard Worker.endif 324*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_420 325*c0909341SAndroid Build Coastguard Worker vpush {q6-q7} 326*c0909341SAndroid Build Coastguard Worker add r12, r11, #GRAIN_WIDTH 327*c0909341SAndroid Build Coastguard Worker vld1.16 {q0, q1}, [r11]! 328*c0909341SAndroid Build Coastguard Worker vld1.16 {q6, q7}, [r12]! 329*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q0, q0 330*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q1, q1 331*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q6, q6 332*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q7, q7 333*c0909341SAndroid Build Coastguard Worker vadd.i16 q0, q0, q6 334*c0909341SAndroid Build Coastguard Worker vadd.i16 q1, q1, q7 335*c0909341SAndroid Build Coastguard Worker vpop {q6-q7} 336*c0909341SAndroid Build Coastguard Worker vrshrn.s16 d0, q0, #2 337*c0909341SAndroid Build Coastguard Worker vrshrn.s16 d1, q1, #2 338*c0909341SAndroid Build Coastguard Worker.endif 339*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_422 340*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r11]! 341*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q0, q0 342*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q1, q1 343*c0909341SAndroid Build Coastguard Worker vrshrn.s16 d0, q0, #1 344*c0909341SAndroid Build Coastguard Worker vrshrn.s16 d1, q1, #1 345*c0909341SAndroid Build Coastguard Worker.endif 346*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_444 347*c0909341SAndroid Build Coastguard Worker vld1.8 {q0}, [r11]! 348*c0909341SAndroid Build Coastguard Worker.endif 349*c0909341SAndroid Build Coastguard Worker.if \uv_layout 350*c0909341SAndroid Build Coastguard Worker.ifnb \uv_coeff 351*c0909341SAndroid Build Coastguard Worker vdup.8 d13, \uv_coeff 352*c0909341SAndroid Build Coastguard Worker.endif 353*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d0, d13 354*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d1, d13 355*c0909341SAndroid Build Coastguard Worker vaddw.s16 q2, q2, d2 356*c0909341SAndroid Build Coastguard Worker vaddw.s16 q3, q3, d3 357*c0909341SAndroid Build Coastguard Worker vaddw.s16 q4, q4, d0 358*c0909341SAndroid Build Coastguard Worker vaddw.s16 q5, q5, d1 359*c0909341SAndroid Build Coastguard Worker.endif 360*c0909341SAndroid Build Coastguard Worker.if \uv_layout && \elems == 16 361*c0909341SAndroid Build Coastguard Worker b sum_\lag\()_y_\edge\()_start 362*c0909341SAndroid Build Coastguard Worker.elseif \uv_layout == 444 && \elems == 15 363*c0909341SAndroid Build Coastguard Worker b sum_\lag\()_y_\edge\()_start 364*c0909341SAndroid Build Coastguard Worker.elseif \uv_layout == 422 && \elems == 9 365*c0909341SAndroid Build Coastguard Worker b sum_\lag\()_uv_420_\edge\()_start 366*c0909341SAndroid Build Coastguard Worker.else 367*c0909341SAndroid Build Coastguard Workersum_\lag\()_\type\()_\edge\()_start: 368*c0909341SAndroid Build Coastguard Worker push {r11} 369*c0909341SAndroid Build Coastguard Worker.ifc \edge, left 370*c0909341SAndroid Build Coastguard Worker increment_seed 4 371*c0909341SAndroid Build Coastguard Worker read_rand r11, 11, 3 372*c0909341SAndroid Build Coastguard Worker read_rand r12, 11, 2 373*c0909341SAndroid Build Coastguard Worker add r11, r3, r11, lsl #1 374*c0909341SAndroid Build Coastguard Worker add r12, r3, r12, lsl #1 375*c0909341SAndroid Build Coastguard Worker vld1.16 {d1[1]}, [r11] 376*c0909341SAndroid Build Coastguard Worker read_rand r11, 11, 1 377*c0909341SAndroid Build Coastguard Worker vld1.16 {d1[2]}, [r12] 378*c0909341SAndroid Build Coastguard Worker add r11, r3, r11, lsl #1 379*c0909341SAndroid Build Coastguard Worker vld1.16 {d1[3]}, [r11] 380*c0909341SAndroid Build Coastguard Worker lsl r2, r2, #1 // shift back the state as if we'd done increment_seed with shift=0 381*c0909341SAndroid Build Coastguard Worker vrshl.s16 d1, d1, d30 382*c0909341SAndroid Build Coastguard Worker vmovn.i16 d1, q0 383*c0909341SAndroid Build Coastguard Worker vext.8 q2, q2, q2, #12 384*c0909341SAndroid Build Coastguard Worker.ifc \lag, lag3 385*c0909341SAndroid Build Coastguard Worker vmov.s8 r10, d1[5] 386*c0909341SAndroid Build Coastguard Worker.endif 387*c0909341SAndroid Build Coastguard Worker.ifnc \lag, lag1 388*c0909341SAndroid Build Coastguard Worker vmov.s8 r8, d1[6] 389*c0909341SAndroid Build Coastguard Worker.endif 390*c0909341SAndroid Build Coastguard Worker vmov.s8 r6, d1[7] 391*c0909341SAndroid Build Coastguard Worker 392*c0909341SAndroid Build Coastguard Worker vmov q1, q2 393*c0909341SAndroid Build Coastguard Worker mov r1, #1 394*c0909341SAndroid Build Coastguard Worker bl output_\lag\()_neon 395*c0909341SAndroid Build Coastguard Worker.else 396*c0909341SAndroid Build Coastguard Worker increment_seed 4, shift=0 397*c0909341SAndroid Build Coastguard Worker vmov q1, q2 398*c0909341SAndroid Build Coastguard Worker mov r1, #4 399*c0909341SAndroid Build Coastguard Worker bl output_\lag\()_neon 400*c0909341SAndroid Build Coastguard Worker.endif 401*c0909341SAndroid Build Coastguard Worker 402*c0909341SAndroid Build Coastguard Worker increment_seed 4, shift=0 403*c0909341SAndroid Build Coastguard Worker vmov q1, q3 404*c0909341SAndroid Build Coastguard Worker mov r1, #4 405*c0909341SAndroid Build Coastguard Worker bl output_\lag\()_neon 406*c0909341SAndroid Build Coastguard Worker 407*c0909341SAndroid Build Coastguard Worker increment_seed 4, shift=0 408*c0909341SAndroid Build Coastguard Worker vmov q1, q4 409*c0909341SAndroid Build Coastguard Worker.if \elems == 9 410*c0909341SAndroid Build Coastguard Worker mov r1, #1 411*c0909341SAndroid Build Coastguard Worker bl output_\lag\()_neon 412*c0909341SAndroid Build Coastguard Worker lsr r2, r2, #3 413*c0909341SAndroid Build Coastguard Worker 414*c0909341SAndroid Build Coastguard Worker read_rand r11, 11, 2 415*c0909341SAndroid Build Coastguard Worker read_rand r12, 11, 1 416*c0909341SAndroid Build Coastguard Worker add r11, r3, r11, lsl #1 417*c0909341SAndroid Build Coastguard Worker add r12, r3, r12, lsl #1 418*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[0]}, [r11] 419*c0909341SAndroid Build Coastguard Worker read_rand r11, 11, 0 420*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[1]}, [r12] 421*c0909341SAndroid Build Coastguard Worker add r11, r3, r11, lsl #1 422*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[2]}, [r11] 423*c0909341SAndroid Build Coastguard Worker vrshl.s16 d2, d2, d30 424*c0909341SAndroid Build Coastguard Worker vmovn.i16 d2, q1 425*c0909341SAndroid Build Coastguard Worker vext.8 q0, q0, q1, #7 426*c0909341SAndroid Build Coastguard Worker.else 427*c0909341SAndroid Build Coastguard Worker mov r1, #4 428*c0909341SAndroid Build Coastguard Worker bl output_\lag\()_neon 429*c0909341SAndroid Build Coastguard Worker 430*c0909341SAndroid Build Coastguard Worker increment_seed 4, shift=0 431*c0909341SAndroid Build Coastguard Worker vmov q1, q5 432*c0909341SAndroid Build Coastguard Worker 433*c0909341SAndroid Build Coastguard Worker.ifc \edge, right 434*c0909341SAndroid Build Coastguard Worker mov r1, #3 435*c0909341SAndroid Build Coastguard Worker bl output_\lag\()_neon 436*c0909341SAndroid Build Coastguard Worker read_shift_rand r11, 11 437*c0909341SAndroid Build Coastguard Worker add r11, r3, r11, lsl #1 438*c0909341SAndroid Build Coastguard Worker vld1.16 {d2[0]}, [r11] 439*c0909341SAndroid Build Coastguard Worker vrshl.s16 d2, d2, d30 440*c0909341SAndroid Build Coastguard Worker vext.8 q0, q0, q1, #1 441*c0909341SAndroid Build Coastguard Worker.else 442*c0909341SAndroid Build Coastguard Worker mov r1, #4 443*c0909341SAndroid Build Coastguard Worker bl output_\lag\()_neon 444*c0909341SAndroid Build Coastguard Worker.endif 445*c0909341SAndroid Build Coastguard Worker.endif 446*c0909341SAndroid Build Coastguard Worker.if \store 447*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0]! 448*c0909341SAndroid Build Coastguard Worker.endif 449*c0909341SAndroid Build Coastguard Worker pop {r11} 450*c0909341SAndroid Build Coastguard Worker pop {r1, pc} 451*c0909341SAndroid Build Coastguard Worker.endif 452*c0909341SAndroid Build Coastguard Worker.endm 453*c0909341SAndroid Build Coastguard Worker 454*c0909341SAndroid Build Coastguard Worker.macro sum_lag1_func type, uv_layout, edge, elems=16 455*c0909341SAndroid Build Coastguard Workerfunction sum_\type\()_lag1_\edge\()_neon 456*c0909341SAndroid Build Coastguard Worker push {r1, lr} 457*c0909341SAndroid Build Coastguard Worker sum_lag_n_body lag1, \type, \uv_layout, \edge, \elems, store=0 458*c0909341SAndroid Build Coastguard Workerendfunc 459*c0909341SAndroid Build Coastguard Worker.endm 460*c0909341SAndroid Build Coastguard Worker 461*c0909341SAndroid Build Coastguard Workersum_lag1_func y, 0, left 462*c0909341SAndroid Build Coastguard Workersum_lag1_func y, 0, mid 463*c0909341SAndroid Build Coastguard Workersum_lag1_func y, 0, right, 15 464*c0909341SAndroid Build Coastguard Workersum_lag1_func uv_444, 444, left 465*c0909341SAndroid Build Coastguard Workersum_lag1_func uv_444, 444, mid 466*c0909341SAndroid Build Coastguard Workersum_lag1_func uv_444, 444, right, 15 467*c0909341SAndroid Build Coastguard Workersum_lag1_func uv_422, 422, left 468*c0909341SAndroid Build Coastguard Workersum_lag1_func uv_422, 422, mid 469*c0909341SAndroid Build Coastguard Workersum_lag1_func uv_422, 422, right, 9 470*c0909341SAndroid Build Coastguard Workersum_lag1_func uv_420, 420, left 471*c0909341SAndroid Build Coastguard Workersum_lag1_func uv_420, 420, mid 472*c0909341SAndroid Build Coastguard Workersum_lag1_func uv_420, 420, right, 9 473*c0909341SAndroid Build Coastguard Worker 474*c0909341SAndroid Build Coastguard Worker.macro sum_lag1 type, dst, left, mid, right, edge=mid 475*c0909341SAndroid Build Coastguard Worker vmov q3, \mid 476*c0909341SAndroid Build Coastguard Worker vext.8 q0, \left, \mid, #15 477*c0909341SAndroid Build Coastguard Worker vext.8 q1, \mid, \right, #1 478*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag1_\edge\()_neon 479*c0909341SAndroid Build Coastguard Worker vmov \dst, q0 480*c0909341SAndroid Build Coastguard Worker.endm 481*c0909341SAndroid Build Coastguard Worker 482*c0909341SAndroid Build Coastguard Worker.macro sum_y_lag1 dst, left, mid, right, edge=mid 483*c0909341SAndroid Build Coastguard Worker sum_lag1 y, \dst, \left, \mid, \right, \edge 484*c0909341SAndroid Build Coastguard Worker.endm 485*c0909341SAndroid Build Coastguard Worker 486*c0909341SAndroid Build Coastguard Worker.macro sum_uv_444_lag1 dst, left, mid, right, edge=mid 487*c0909341SAndroid Build Coastguard Worker sum_lag1 uv_444, \dst, \left, \mid, \right, \edge 488*c0909341SAndroid Build Coastguard Worker.endm 489*c0909341SAndroid Build Coastguard Worker 490*c0909341SAndroid Build Coastguard Worker.macro sum_uv_422_lag1 dst, left, mid, right, edge=mid 491*c0909341SAndroid Build Coastguard Worker sum_lag1 uv_422, \dst, \left, \mid, \right, \edge 492*c0909341SAndroid Build Coastguard Worker.endm 493*c0909341SAndroid Build Coastguard Worker 494*c0909341SAndroid Build Coastguard Worker.macro sum_uv_420_lag1 dst, left, mid, right, edge=mid 495*c0909341SAndroid Build Coastguard Worker sum_lag1 uv_420, \dst, \left, \mid, \right, \edge 496*c0909341SAndroid Build Coastguard Worker.endm 497*c0909341SAndroid Build Coastguard Worker 498*c0909341SAndroid Build Coastguard Worker 499*c0909341SAndroid Build Coastguard Workerfunction sum_lag2_above_neon 500*c0909341SAndroid Build Coastguard Worker push {lr} 501*c0909341SAndroid Build Coastguard Worker sub r12, r0, #2*GRAIN_WIDTH - 16 502*c0909341SAndroid Build Coastguard Worker sub lr, r0, #1*GRAIN_WIDTH - 16 503*c0909341SAndroid Build Coastguard Worker vld1.8 {q10}, [r12] // load top right 504*c0909341SAndroid Build Coastguard Worker vld1.8 {q13}, [lr] 505*c0909341SAndroid Build Coastguard Worker 506*c0909341SAndroid Build Coastguard Worker vext.8 q6, q8, q9, #14 // top left, top mid 507*c0909341SAndroid Build Coastguard Worker vdup.8 d14, d28[0] 508*c0909341SAndroid Build Coastguard Worker vext.8 q8, q8, q9, #15 509*c0909341SAndroid Build Coastguard Worker vdup.8 d15, d28[1] 510*c0909341SAndroid Build Coastguard Worker 511*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d12, d14 512*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d13, d14 513*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d16, d15 514*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d17, d15 515*c0909341SAndroid Build Coastguard Worker 516*c0909341SAndroid Build Coastguard Worker vaddl.s16 q2, d0, d12 517*c0909341SAndroid Build Coastguard Worker vaddl.s16 q3, d1, d13 518*c0909341SAndroid Build Coastguard Worker vaddl.s16 q4, d2, d16 519*c0909341SAndroid Build Coastguard Worker vaddl.s16 q5, d3, d17 520*c0909341SAndroid Build Coastguard Worker 521*c0909341SAndroid Build Coastguard Worker vext.8 q6, q9, q10, #1 // top mid, top right 522*c0909341SAndroid Build Coastguard Worker vdup.8 d14, d28[3] 523*c0909341SAndroid Build Coastguard Worker vext.8 q8, q9, q10, #2 524*c0909341SAndroid Build Coastguard Worker vdup.8 d15, d28[4] 525*c0909341SAndroid Build Coastguard Worker 526*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d12, d14 527*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d13, d14 528*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d16, d15 529*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d17, d15 530*c0909341SAndroid Build Coastguard Worker 531*c0909341SAndroid Build Coastguard Worker vaddl.s16 q7, d0, d12 532*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d1, d13 533*c0909341SAndroid Build Coastguard Worker vaddl.s16 q6, d2, d16 534*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d17 535*c0909341SAndroid Build Coastguard Worker 536*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q7 537*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q0 538*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q6 539*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 540*c0909341SAndroid Build Coastguard Worker 541*c0909341SAndroid Build Coastguard Worker vext.8 q6, q11, q12, #14 // top left, top mid 542*c0909341SAndroid Build Coastguard Worker vdup.8 d14, d28[5] 543*c0909341SAndroid Build Coastguard Worker vext.8 q8, q11, q12, #15 544*c0909341SAndroid Build Coastguard Worker vdup.8 d15, d28[6] 545*c0909341SAndroid Build Coastguard Worker 546*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d12, d14 547*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d13, d14 548*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d16, d15 549*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d17, d15 550*c0909341SAndroid Build Coastguard Worker 551*c0909341SAndroid Build Coastguard Worker vaddl.s16 q7, d0, d12 552*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d1, d13 553*c0909341SAndroid Build Coastguard Worker vaddl.s16 q6, d2, d16 554*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d17 555*c0909341SAndroid Build Coastguard Worker 556*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q7 557*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q0 558*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q6 559*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 560*c0909341SAndroid Build Coastguard Worker 561*c0909341SAndroid Build Coastguard Worker vext.8 q6, q12, q13, #1 // top mid, top right 562*c0909341SAndroid Build Coastguard Worker vdup.8 d14, d29[0] 563*c0909341SAndroid Build Coastguard Worker vext.8 q8, q12, q13, #2 564*c0909341SAndroid Build Coastguard Worker vdup.8 d15, d29[1] 565*c0909341SAndroid Build Coastguard Worker 566*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d12, d14 567*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d13, d14 568*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d16, d15 569*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d17, d15 570*c0909341SAndroid Build Coastguard Worker 571*c0909341SAndroid Build Coastguard Worker vaddl.s16 q7, d0, d12 572*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d1, d13 573*c0909341SAndroid Build Coastguard Worker vaddl.s16 q6, d2, d16 574*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d17 575*c0909341SAndroid Build Coastguard Worker 576*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q7 577*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q0 578*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q6 579*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 580*c0909341SAndroid Build Coastguard Worker 581*c0909341SAndroid Build Coastguard Worker vdup.8 d14, d28[2] 582*c0909341SAndroid Build Coastguard Worker vdup.8 d15, d28[7] 583*c0909341SAndroid Build Coastguard Worker 584*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d18, d14 585*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d19, d14 586*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d24, d15 587*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d25, d15 588*c0909341SAndroid Build Coastguard Worker 589*c0909341SAndroid Build Coastguard Worker vaddl.s16 q7, d0, d12 590*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d1, d13 591*c0909341SAndroid Build Coastguard Worker vaddl.s16 q6, d2, d16 592*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d17 593*c0909341SAndroid Build Coastguard Worker 594*c0909341SAndroid Build Coastguard Worker vmov q8, q9 595*c0909341SAndroid Build Coastguard Worker vmov q9, q10 596*c0909341SAndroid Build Coastguard Worker 597*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q7 598*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q0 599*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q6 600*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 601*c0909341SAndroid Build Coastguard Worker 602*c0909341SAndroid Build Coastguard Worker vmov q11, q12 603*c0909341SAndroid Build Coastguard Worker vmov q12, q13 604*c0909341SAndroid Build Coastguard Worker 605*c0909341SAndroid Build Coastguard Worker pop {pc} 606*c0909341SAndroid Build Coastguard Workerendfunc 607*c0909341SAndroid Build Coastguard Worker 608*c0909341SAndroid Build Coastguard Worker.macro sum_lag2_func type, uv_layout, edge, elems=16 609*c0909341SAndroid Build Coastguard Workerfunction sum_\type\()_lag2_\edge\()_neon 610*c0909341SAndroid Build Coastguard Worker push {r1, lr} 611*c0909341SAndroid Build Coastguard Worker.ifc \edge, left 612*c0909341SAndroid Build Coastguard Worker sub r12, r0, #2*GRAIN_WIDTH 613*c0909341SAndroid Build Coastguard Worker sub lr, r0, #1*GRAIN_WIDTH 614*c0909341SAndroid Build Coastguard Worker vld1.8 {q9}, [r12] // load the previous block right above 615*c0909341SAndroid Build Coastguard Worker vld1.8 {q12}, [lr] 616*c0909341SAndroid Build Coastguard Worker.endif 617*c0909341SAndroid Build Coastguard Worker sum_lag_n_body lag2, \type, \uv_layout, \edge, \elems, store=1, uv_coeff=d29[4] 618*c0909341SAndroid Build Coastguard Workerendfunc 619*c0909341SAndroid Build Coastguard Worker.endm 620*c0909341SAndroid Build Coastguard Worker 621*c0909341SAndroid Build Coastguard Workersum_lag2_func y, 0, left 622*c0909341SAndroid Build Coastguard Workersum_lag2_func y, 0, mid 623*c0909341SAndroid Build Coastguard Workersum_lag2_func y, 0, right, 15 624*c0909341SAndroid Build Coastguard Workersum_lag2_func uv_444, 444, left 625*c0909341SAndroid Build Coastguard Workersum_lag2_func uv_444, 444, mid 626*c0909341SAndroid Build Coastguard Workersum_lag2_func uv_444, 444, right, 15 627*c0909341SAndroid Build Coastguard Workersum_lag2_func uv_422, 422, left 628*c0909341SAndroid Build Coastguard Workersum_lag2_func uv_422, 422, mid 629*c0909341SAndroid Build Coastguard Workersum_lag2_func uv_422, 422, right, 9 630*c0909341SAndroid Build Coastguard Workersum_lag2_func uv_420, 420, left 631*c0909341SAndroid Build Coastguard Workersum_lag2_func uv_420, 420, mid 632*c0909341SAndroid Build Coastguard Workersum_lag2_func uv_420, 420, right, 9 633*c0909341SAndroid Build Coastguard Worker 634*c0909341SAndroid Build Coastguard Worker 635*c0909341SAndroid Build Coastguard Workerfunction sum_lag3_left_above_neon 636*c0909341SAndroid Build Coastguard Worker // A separate codepath for the left edge, to avoid reading outside 637*c0909341SAndroid Build Coastguard Worker // of the edge of the buffer. 638*c0909341SAndroid Build Coastguard Worker sub r12, r0, #3*GRAIN_WIDTH 639*c0909341SAndroid Build Coastguard Worker vld1.8 {q11, q12}, [r12] 640*c0909341SAndroid Build Coastguard Worker vext.8 q12, q11, q12, #13 641*c0909341SAndroid Build Coastguard Worker vext.8 q11, q11, q11, #13 642*c0909341SAndroid Build Coastguard Worker b sum_lag3_above_start 643*c0909341SAndroid Build Coastguard Workerendfunc 644*c0909341SAndroid Build Coastguard Worker 645*c0909341SAndroid Build Coastguard Workerfunction sum_lag3_above_neon 646*c0909341SAndroid Build Coastguard Worker sub r12, r0, #3*GRAIN_WIDTH + 3 647*c0909341SAndroid Build Coastguard Worker vld1.8 {q11, q12}, [r12] 648*c0909341SAndroid Build Coastguard Worker 649*c0909341SAndroid Build Coastguard Workersum_lag3_above_start: 650*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d26[0] 651*c0909341SAndroid Build Coastguard Worker vext.8 q9, q11, q12, #1 652*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d26[1] 653*c0909341SAndroid Build Coastguard Worker 654*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d22, d20 655*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d23, d20 656*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d18, d21 657*c0909341SAndroid Build Coastguard Worker vmull.s8 q7, d19, d21 658*c0909341SAndroid Build Coastguard Worker 659*c0909341SAndroid Build Coastguard Worker vext.8 q8, q11, q12, #2 660*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d26[2] 661*c0909341SAndroid Build Coastguard Worker vext.8 q9, q11, q12, #3 662*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d26[3] 663*c0909341SAndroid Build Coastguard Worker 664*c0909341SAndroid Build Coastguard Worker vaddl.s16 q2, d0, d12 665*c0909341SAndroid Build Coastguard Worker vaddl.s16 q3, d1, d13 666*c0909341SAndroid Build Coastguard Worker vaddl.s16 q4, d2, d14 667*c0909341SAndroid Build Coastguard Worker vaddl.s16 q5, d3, d15 668*c0909341SAndroid Build Coastguard Worker 669*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d16, d20 670*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d17, d20 671*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d18, d21 672*c0909341SAndroid Build Coastguard Worker vmull.s8 q7, d19, d21 673*c0909341SAndroid Build Coastguard Worker 674*c0909341SAndroid Build Coastguard Worker vaddl.s16 q8, d0, d12 675*c0909341SAndroid Build Coastguard Worker vaddl.s16 q9, d1, d13 676*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d2, d14 677*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d15 678*c0909341SAndroid Build Coastguard Worker 679*c0909341SAndroid Build Coastguard Worker vext.8 q6, q11, q12, #4 680*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d26[4] 681*c0909341SAndroid Build Coastguard Worker vext.8 q7, q11, q12, #5 682*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d26[5] 683*c0909341SAndroid Build Coastguard Worker 684*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q8 685*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q9 686*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q0 687*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 688*c0909341SAndroid Build Coastguard Worker 689*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d12, d20 690*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d13, d20 691*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d14, d21 692*c0909341SAndroid Build Coastguard Worker vmull.s8 q9, d15, d21 693*c0909341SAndroid Build Coastguard Worker 694*c0909341SAndroid Build Coastguard Worker sub r12, r0, #2*GRAIN_WIDTH + 3 695*c0909341SAndroid Build Coastguard Worker 696*c0909341SAndroid Build Coastguard Worker vaddl.s16 q6, d0, d16 697*c0909341SAndroid Build Coastguard Worker vaddl.s16 q7, d1, d17 698*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d2, d18 699*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d19 700*c0909341SAndroid Build Coastguard Worker 701*c0909341SAndroid Build Coastguard Worker vext.8 q8, q11, q12, #6 702*c0909341SAndroid Build Coastguard Worker vld1.8 {q11, q12}, [r12] 703*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d26[6] 704*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d26[7] 705*c0909341SAndroid Build Coastguard Worker 706*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q6 707*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q7 708*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q0 709*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 710*c0909341SAndroid Build Coastguard Worker 711*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d16, d20 712*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d17, d20 713*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d22, d21 714*c0909341SAndroid Build Coastguard Worker vmull.s8 q7, d23, d21 715*c0909341SAndroid Build Coastguard Worker 716*c0909341SAndroid Build Coastguard Worker vaddl.s16 q8, d0, d12 717*c0909341SAndroid Build Coastguard Worker vaddl.s16 q9, d1, d13 718*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d2, d14 719*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d15 720*c0909341SAndroid Build Coastguard Worker 721*c0909341SAndroid Build Coastguard Worker vext.8 q6, q11, q12, #1 722*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d27[0] 723*c0909341SAndroid Build Coastguard Worker vext.8 q7, q11, q12, #2 724*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d27[1] 725*c0909341SAndroid Build Coastguard Worker 726*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q8 727*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q9 728*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q0 729*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 730*c0909341SAndroid Build Coastguard Worker 731*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d12, d20 732*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d13, d20 733*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d14, d21 734*c0909341SAndroid Build Coastguard Worker vmull.s8 q9, d15, d21 735*c0909341SAndroid Build Coastguard Worker 736*c0909341SAndroid Build Coastguard Worker vaddl.s16 q6, d0, d16 737*c0909341SAndroid Build Coastguard Worker vaddl.s16 q7, d1, d17 738*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d2, d18 739*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d19 740*c0909341SAndroid Build Coastguard Worker 741*c0909341SAndroid Build Coastguard Worker vext.8 q8, q11, q12, #3 742*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d27[2] 743*c0909341SAndroid Build Coastguard Worker vext.8 q9, q11, q12, #4 744*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d27[3] 745*c0909341SAndroid Build Coastguard Worker 746*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q6 747*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q7 748*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q0 749*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 750*c0909341SAndroid Build Coastguard Worker 751*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d16, d20 752*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d17, d20 753*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d18, d21 754*c0909341SAndroid Build Coastguard Worker vmull.s8 q7, d19, d21 755*c0909341SAndroid Build Coastguard Worker 756*c0909341SAndroid Build Coastguard Worker sub r12, r0, #1*GRAIN_WIDTH + 3 757*c0909341SAndroid Build Coastguard Worker 758*c0909341SAndroid Build Coastguard Worker vaddl.s16 q8, d0, d12 759*c0909341SAndroid Build Coastguard Worker vaddl.s16 q9, d1, d13 760*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d2, d14 761*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d15 762*c0909341SAndroid Build Coastguard Worker 763*c0909341SAndroid Build Coastguard Worker vext.8 q6, q11, q12, #5 764*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d27[4] 765*c0909341SAndroid Build Coastguard Worker vext.8 q7, q11, q12, #6 766*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d27[5] 767*c0909341SAndroid Build Coastguard Worker 768*c0909341SAndroid Build Coastguard Worker vld1.8 {q11, q12}, [r12] 769*c0909341SAndroid Build Coastguard Worker 770*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q8 771*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q9 772*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q0 773*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 774*c0909341SAndroid Build Coastguard Worker 775*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d12, d20 776*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d13, d20 777*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d14, d21 778*c0909341SAndroid Build Coastguard Worker vmull.s8 q9, d15, d21 779*c0909341SAndroid Build Coastguard Worker 780*c0909341SAndroid Build Coastguard Worker vaddl.s16 q6, d0, d16 781*c0909341SAndroid Build Coastguard Worker vaddl.s16 q7, d1, d17 782*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d2, d18 783*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d19 784*c0909341SAndroid Build Coastguard Worker 785*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d27[6] 786*c0909341SAndroid Build Coastguard Worker vext.8 q9, q11, q12, #1 787*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d27[7] 788*c0909341SAndroid Build Coastguard Worker 789*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q6 790*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q7 791*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q0 792*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 793*c0909341SAndroid Build Coastguard Worker 794*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d22, d20 795*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d23, d20 796*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d18, d21 797*c0909341SAndroid Build Coastguard Worker vmull.s8 q7, d19, d21 798*c0909341SAndroid Build Coastguard Worker 799*c0909341SAndroid Build Coastguard Worker vaddl.s16 q8, d0, d12 800*c0909341SAndroid Build Coastguard Worker vaddl.s16 q9, d1, d13 801*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d2, d14 802*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d15 803*c0909341SAndroid Build Coastguard Worker 804*c0909341SAndroid Build Coastguard Worker vext.8 q6, q11, q12, #2 805*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d28[0] 806*c0909341SAndroid Build Coastguard Worker vext.8 q7, q11, q12, #3 807*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d28[1] 808*c0909341SAndroid Build Coastguard Worker 809*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q8 810*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q9 811*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q0 812*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 813*c0909341SAndroid Build Coastguard Worker 814*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d12, d20 815*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d13, d20 816*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d14, d21 817*c0909341SAndroid Build Coastguard Worker vmull.s8 q9, d15, d21 818*c0909341SAndroid Build Coastguard Worker 819*c0909341SAndroid Build Coastguard Worker vaddl.s16 q6, d0, d16 820*c0909341SAndroid Build Coastguard Worker vaddl.s16 q7, d1, d17 821*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d2, d18 822*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d19 823*c0909341SAndroid Build Coastguard Worker 824*c0909341SAndroid Build Coastguard Worker vext.8 q8, q11, q12, #4 825*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d28[2] 826*c0909341SAndroid Build Coastguard Worker vext.8 q9, q11, q12, #5 827*c0909341SAndroid Build Coastguard Worker vdup.8 d21, d28[3] 828*c0909341SAndroid Build Coastguard Worker 829*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q6 830*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q7 831*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q0 832*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 833*c0909341SAndroid Build Coastguard Worker 834*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d16, d20 835*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d17, d20 836*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d18, d21 837*c0909341SAndroid Build Coastguard Worker vmull.s8 q7, d19, d21 838*c0909341SAndroid Build Coastguard Worker 839*c0909341SAndroid Build Coastguard Worker vaddl.s16 q8, d0, d12 840*c0909341SAndroid Build Coastguard Worker vaddl.s16 q9, d1, d13 841*c0909341SAndroid Build Coastguard Worker vaddl.s16 q0, d2, d14 842*c0909341SAndroid Build Coastguard Worker vaddl.s16 q1, d3, d15 843*c0909341SAndroid Build Coastguard Worker 844*c0909341SAndroid Build Coastguard Worker vext.8 q6, q11, q12, #6 845*c0909341SAndroid Build Coastguard Worker vdup.8 d20, d28[4] 846*c0909341SAndroid Build Coastguard Worker 847*c0909341SAndroid Build Coastguard Worker vadd.i32 q2, q2, q8 848*c0909341SAndroid Build Coastguard Worker vadd.i32 q3, q3, q9 849*c0909341SAndroid Build Coastguard Worker vadd.i32 q4, q4, q0 850*c0909341SAndroid Build Coastguard Worker vadd.i32 q5, q5, q1 851*c0909341SAndroid Build Coastguard Worker 852*c0909341SAndroid Build Coastguard Worker vmull.s8 q0, d12, d20 853*c0909341SAndroid Build Coastguard Worker vmull.s8 q1, d13, d20 854*c0909341SAndroid Build Coastguard Worker 855*c0909341SAndroid Build Coastguard Worker vaddw.s16 q2, q2, d0 856*c0909341SAndroid Build Coastguard Worker vaddw.s16 q3, q3, d1 857*c0909341SAndroid Build Coastguard Worker vaddw.s16 q4, q4, d2 858*c0909341SAndroid Build Coastguard Worker vaddw.s16 q5, q5, d3 859*c0909341SAndroid Build Coastguard Worker 860*c0909341SAndroid Build Coastguard Worker bx lr 861*c0909341SAndroid Build Coastguard Workerendfunc 862*c0909341SAndroid Build Coastguard Worker 863*c0909341SAndroid Build Coastguard Worker.macro sum_lag3_func type, uv_layout, edge, elems=16 864*c0909341SAndroid Build Coastguard Workerfunction sum_\type\()_lag3_\edge\()_neon 865*c0909341SAndroid Build Coastguard Worker push {r1, lr} 866*c0909341SAndroid Build Coastguard Worker sum_lag_n_body lag3, \type, \uv_layout, \edge, \elems, store=1, uv_coeff=d29[0] 867*c0909341SAndroid Build Coastguard Workerendfunc 868*c0909341SAndroid Build Coastguard Worker.endm 869*c0909341SAndroid Build Coastguard Worker 870*c0909341SAndroid Build Coastguard Workersum_lag3_func y, 0, left 871*c0909341SAndroid Build Coastguard Workersum_lag3_func y, 0, mid 872*c0909341SAndroid Build Coastguard Workersum_lag3_func y, 0, right, 15 873*c0909341SAndroid Build Coastguard Workersum_lag3_func uv_444, 444, left 874*c0909341SAndroid Build Coastguard Workersum_lag3_func uv_444, 444, mid 875*c0909341SAndroid Build Coastguard Workersum_lag3_func uv_444, 444, right, 15 876*c0909341SAndroid Build Coastguard Workersum_lag3_func uv_422, 422, left 877*c0909341SAndroid Build Coastguard Workersum_lag3_func uv_422, 422, mid 878*c0909341SAndroid Build Coastguard Workersum_lag3_func uv_422, 422, right, 9 879*c0909341SAndroid Build Coastguard Workersum_lag3_func uv_420, 420, left 880*c0909341SAndroid Build Coastguard Workersum_lag3_func uv_420, 420, mid 881*c0909341SAndroid Build Coastguard Workersum_lag3_func uv_420, 420, right, 9 882*c0909341SAndroid Build Coastguard Worker 883*c0909341SAndroid Build Coastguard Workerfunction generate_grain_rows_neon 884*c0909341SAndroid Build Coastguard Worker push {r11,lr} 885*c0909341SAndroid Build Coastguard Worker1: 886*c0909341SAndroid Build Coastguard Worker get_grain_row d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26 887*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 888*c0909341SAndroid Build Coastguard Worker store_grain_row d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26 889*c0909341SAndroid Build Coastguard Worker bgt 1b 890*c0909341SAndroid Build Coastguard Worker pop {r11,pc} 891*c0909341SAndroid Build Coastguard Workerendfunc 892*c0909341SAndroid Build Coastguard Worker 893*c0909341SAndroid Build Coastguard Workerfunction generate_grain_rows_44_neon 894*c0909341SAndroid Build Coastguard Worker push {r11,lr} 895*c0909341SAndroid Build Coastguard Worker1: 896*c0909341SAndroid Build Coastguard Worker get_grain_row_44 d16, d17, d18, d19, d20, d21 897*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 898*c0909341SAndroid Build Coastguard Worker store_grain_row_44 d16, d17, d18, d19, d20, d21 899*c0909341SAndroid Build Coastguard Worker bgt 1b 900*c0909341SAndroid Build Coastguard Worker pop {r11,pc} 901*c0909341SAndroid Build Coastguard Workerendfunc 902*c0909341SAndroid Build Coastguard Worker 903*c0909341SAndroid Build Coastguard Workerfunction gen_grain_uv_444_lag0_neon 904*c0909341SAndroid Build Coastguard Worker vld1.8 {q3}, [r11]! 905*c0909341SAndroid Build Coastguard Worker push {r11,lr} 906*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 907*c0909341SAndroid Build Coastguard Worker vrshl.s16 q8, q0, q15 908*c0909341SAndroid Build Coastguard Worker bl get_gaussian_neon 909*c0909341SAndroid Build Coastguard Worker vrshl.s16 q9, q0, q15 910*c0909341SAndroid Build Coastguard Worker vqmovn.s16 d0, q8 911*c0909341SAndroid Build Coastguard Worker vqmovn.s16 d1, q9 912*c0909341SAndroid Build Coastguard Worker 913*c0909341SAndroid Build Coastguard Worker vand q3, q3, q1 914*c0909341SAndroid Build Coastguard Worker vmull.s8 q2, d6, d22 915*c0909341SAndroid Build Coastguard Worker vmull.s8 q3, d7, d22 916*c0909341SAndroid Build Coastguard Worker vrshl.s16 q2, q2, q12 917*c0909341SAndroid Build Coastguard Worker vrshl.s16 q3, q3, q12 918*c0909341SAndroid Build Coastguard Worker vaddw.s8 q2, q2, d0 919*c0909341SAndroid Build Coastguard Worker vaddw.s8 q3, q3, d1 920*c0909341SAndroid Build Coastguard Worker vqmovn.s16 d4, q2 921*c0909341SAndroid Build Coastguard Worker vqmovn.s16 d5, q3 922*c0909341SAndroid Build Coastguard Worker vst1.8 {q2}, [r0]! 923*c0909341SAndroid Build Coastguard Worker pop {r11,pc} 924*c0909341SAndroid Build Coastguard Workerendfunc 925*c0909341SAndroid Build Coastguard Worker 926*c0909341SAndroid Build Coastguard Workerfunction get_grain_row_44_neon 927*c0909341SAndroid Build Coastguard Worker push {r11,lr} 928*c0909341SAndroid Build Coastguard Worker get_grain_row_44 d16, d17, d18, d19, d20, d21 929*c0909341SAndroid Build Coastguard Worker pop {r11,pc} 930*c0909341SAndroid Build Coastguard Workerendfunc 931*c0909341SAndroid Build Coastguard Worker 932*c0909341SAndroid Build Coastguard Workerfunction add_uv_420_coeff_lag0_neon 933*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r11]! 934*c0909341SAndroid Build Coastguard Worker vld1.16 {q4, q5}, [r12]! 935*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q2, q2 936*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q3, q3 937*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q4, q4 938*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q5, q5 939*c0909341SAndroid Build Coastguard Worker vadd.i16 q2, q2, q4 940*c0909341SAndroid Build Coastguard Worker vadd.i16 q3, q3, q5 941*c0909341SAndroid Build Coastguard Worker vrshrn.s16 d4, q2, #2 942*c0909341SAndroid Build Coastguard Worker vrshrn.s16 d5, q3, #2 943*c0909341SAndroid Build Coastguard Worker b add_coeff_lag0_start 944*c0909341SAndroid Build Coastguard Workerendfunc 945*c0909341SAndroid Build Coastguard Worker 946*c0909341SAndroid Build Coastguard Workerfunction add_uv_422_coeff_lag0_neon 947*c0909341SAndroid Build Coastguard Worker vld1.16 {q2, q3}, [r11]! 948*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q2, q2 949*c0909341SAndroid Build Coastguard Worker vpaddl.s8 q3, q3 950*c0909341SAndroid Build Coastguard Worker vrshrn.s16 d4, q2, #1 951*c0909341SAndroid Build Coastguard Worker vrshrn.s16 d5, q3, #1 952*c0909341SAndroid Build Coastguard Worker 953*c0909341SAndroid Build Coastguard Workeradd_coeff_lag0_start: 954*c0909341SAndroid Build Coastguard Worker vand q3, q2, q1 955*c0909341SAndroid Build Coastguard Worker vmull.s8 q2, d6, d22 956*c0909341SAndroid Build Coastguard Worker vmull.s8 q3, d7, d22 957*c0909341SAndroid Build Coastguard Worker vrshl.s16 q2, q2, q12 958*c0909341SAndroid Build Coastguard Worker vrshl.s16 q3, q3, q12 959*c0909341SAndroid Build Coastguard Worker vaddw.s8 q2, q2, d0 960*c0909341SAndroid Build Coastguard Worker vaddw.s8 q3, q3, d1 961*c0909341SAndroid Build Coastguard Worker vqmovn.s16 d4, q2 962*c0909341SAndroid Build Coastguard Worker vqmovn.s16 d5, q3 963*c0909341SAndroid Build Coastguard Worker bx lr 964*c0909341SAndroid Build Coastguard Workerendfunc 965*c0909341SAndroid Build Coastguard Worker 966*c0909341SAndroid Build Coastguard Worker.macro gen_grain_82 type 967*c0909341SAndroid Build Coastguard Workerfunction generate_grain_\type\()_8bpc_neon, export=1 968*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 969*c0909341SAndroid Build Coastguard Worker 970*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_444 971*c0909341SAndroid Build Coastguard Worker mov r12, r3 972*c0909341SAndroid Build Coastguard Worker mov lr, #28 973*c0909341SAndroid Build Coastguard Worker add r11, r1, #3*GRAIN_WIDTH 974*c0909341SAndroid Build Coastguard Worker mov r1, r2 975*c0909341SAndroid Build Coastguard Worker mul r12, r12, lr 976*c0909341SAndroid Build Coastguard Worker.endif 977*c0909341SAndroid Build Coastguard Worker movrel r3, X(gaussian_sequence) 978*c0909341SAndroid Build Coastguard Worker ldr r2, [r1, #FGD_SEED] 979*c0909341SAndroid Build Coastguard Worker ldr r9, [r1, #FGD_GRAIN_SCALE_SHIFT] 980*c0909341SAndroid Build Coastguard Worker.ifc \type, y 981*c0909341SAndroid Build Coastguard Worker add r4, r1, #FGD_AR_COEFFS_Y 982*c0909341SAndroid Build Coastguard Worker.else 983*c0909341SAndroid Build Coastguard Worker add r4, r1, #FGD_AR_COEFFS_UV 984*c0909341SAndroid Build Coastguard Worker.endif 985*c0909341SAndroid Build Coastguard Worker adr r5, L(gen_grain_\type\()_tbl) 986*c0909341SAndroid Build Coastguard Worker ldr r6, [r1, #FGD_AR_COEFF_LAG] 987*c0909341SAndroid Build Coastguard Worker add r9, r9, #4 988*c0909341SAndroid Build Coastguard Worker ldr r6, [r5, r6, lsl #2] 989*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r9 // 4 + data->grain_scale_shift 990*c0909341SAndroid Build Coastguard Worker add r5, r5, r6 991*c0909341SAndroid Build Coastguard Worker vneg.s16 q15, q15 992*c0909341SAndroid Build Coastguard Worker 993*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_444 994*c0909341SAndroid Build Coastguard Worker cmp r12, #0 995*c0909341SAndroid Build Coastguard Worker movw r10, #0x49d8 996*c0909341SAndroid Build Coastguard Worker movw lr, #0xb524 997*c0909341SAndroid Build Coastguard Worker // Intentionally using a separate register instead of moveq with an 998*c0909341SAndroid Build Coastguard Worker // immediate constant, to avoid armv8 deprecated it instruction forms. 999*c0909341SAndroid Build Coastguard Worker it eq 1000*c0909341SAndroid Build Coastguard Worker moveq r10, lr 1001*c0909341SAndroid Build Coastguard Worker add r4, r4, r12 // Add offset to ar_coeffs_uv[1] 1002*c0909341SAndroid Build Coastguard Worker eor r2, r2, r10 1003*c0909341SAndroid Build Coastguard Worker.endif 1004*c0909341SAndroid Build Coastguard Worker 1005*c0909341SAndroid Build Coastguard Worker ldr r7, [r1, #FGD_AR_COEFF_SHIFT] 1006*c0909341SAndroid Build Coastguard Worker mov r8, #1 1007*c0909341SAndroid Build Coastguard Worker mov r10, #1 1008*c0909341SAndroid Build Coastguard Worker lsl r8, r8, r7 // 1 << ar_coeff_shift 1009*c0909341SAndroid Build Coastguard Worker lsl r10, r10, r9 // 1 << (4 + data->grain_scale_shift) 1010*c0909341SAndroid Build Coastguard Worker lsr r8, r8, #1 // 1 << (ar_coeff_shift - 1) 1011*c0909341SAndroid Build Coastguard Worker lsr r10, r10, #1 // 1 << (4 + data->grain_scale_shift - 1) 1012*c0909341SAndroid Build Coastguard Worker 1013*c0909341SAndroid Build Coastguard Worker bx r5 1014*c0909341SAndroid Build Coastguard Worker 1015*c0909341SAndroid Build Coastguard Worker .align 2 1016*c0909341SAndroid Build Coastguard WorkerL(gen_grain_\type\()_tbl): 1017*c0909341SAndroid Build Coastguard Worker .word L(generate_grain_\type\()_lag0) - L(gen_grain_\type\()_tbl) + CONFIG_THUMB 1018*c0909341SAndroid Build Coastguard Worker .word L(generate_grain_\type\()_lag1) - L(gen_grain_\type\()_tbl) + CONFIG_THUMB 1019*c0909341SAndroid Build Coastguard Worker .word L(generate_grain_\type\()_lag2) - L(gen_grain_\type\()_tbl) + CONFIG_THUMB 1020*c0909341SAndroid Build Coastguard Worker .word L(generate_grain_\type\()_lag3) - L(gen_grain_\type\()_tbl) + CONFIG_THUMB 1021*c0909341SAndroid Build Coastguard Worker 1022*c0909341SAndroid Build Coastguard WorkerL(generate_grain_\type\()_lag0): 1023*c0909341SAndroid Build Coastguard Worker.ifc \type, y 1024*c0909341SAndroid Build Coastguard Worker mov r1, #GRAIN_HEIGHT 1025*c0909341SAndroid Build Coastguard Worker bl generate_grain_rows_neon 1026*c0909341SAndroid Build Coastguard Worker.else 1027*c0909341SAndroid Build Coastguard Worker 1028*c0909341SAndroid Build Coastguard Worker mov r1, #3 1029*c0909341SAndroid Build Coastguard Worker bl generate_grain_rows_neon 1030*c0909341SAndroid Build Coastguard Worker mov r1, #GRAIN_HEIGHT-3 1031*c0909341SAndroid Build Coastguard Worker 1032*c0909341SAndroid Build Coastguard Worker vdup.16 q12, r7 1033*c0909341SAndroid Build Coastguard Worker vld1.8 {d22[]}, [r4] // ar_coeffs_uv[0] 1034*c0909341SAndroid Build Coastguard Worker vmov.i8 q0, #0 1035*c0909341SAndroid Build Coastguard Worker vmov.i8 q1, #255 1036*c0909341SAndroid Build Coastguard Worker vext.8 q13, q0, q1, #13 1037*c0909341SAndroid Build Coastguard Worker vext.8 q14, q1, q0, #1 1038*c0909341SAndroid Build Coastguard Worker vneg.s16 q12, q12 1039*c0909341SAndroid Build Coastguard Worker 1040*c0909341SAndroid Build Coastguard Worker1: 1041*c0909341SAndroid Build Coastguard Worker vmov q1, q13 1042*c0909341SAndroid Build Coastguard Worker bl gen_grain_uv_444_lag0_neon // 16 1043*c0909341SAndroid Build Coastguard Worker vmov.i8 q1, #255 1044*c0909341SAndroid Build Coastguard Worker bl gen_grain_uv_444_lag0_neon // 32 1045*c0909341SAndroid Build Coastguard Worker bl gen_grain_uv_444_lag0_neon // 48 1046*c0909341SAndroid Build Coastguard Worker bl gen_grain_uv_444_lag0_neon // 64 1047*c0909341SAndroid Build Coastguard Worker vmov q1, q14 1048*c0909341SAndroid Build Coastguard Worker bl gen_grain_uv_444_lag0_neon // 80 1049*c0909341SAndroid Build Coastguard Worker get_grain_2 d16 1050*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 1051*c0909341SAndroid Build Coastguard Worker add r11, r11, #2 1052*c0909341SAndroid Build Coastguard Worker vst1.16 {d16[0]}, [r0]! 1053*c0909341SAndroid Build Coastguard Worker bgt 1b 1054*c0909341SAndroid Build Coastguard Worker.endif 1055*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1056*c0909341SAndroid Build Coastguard Worker 1057*c0909341SAndroid Build Coastguard WorkerL(generate_grain_\type\()_lag1): 1058*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1059*c0909341SAndroid Build Coastguard Worker mov r5, #127 1060*c0909341SAndroid Build Coastguard Worker vld1.8 {d27[]}, [r4]! // ar_coeffs_y[0] 1061*c0909341SAndroid Build Coastguard Worker vld1.8 {d28[]}, [r4]! // ar_coeffs_y[1] 1062*c0909341SAndroid Build Coastguard Worker vld1.8 {d29[]}, [r4] // ar_coeffs_y[2] 1063*c0909341SAndroid Build Coastguard Worker.ifc \type, y 1064*c0909341SAndroid Build Coastguard Worker ldrsb r4, [r4, #1] // ar_coeffs_y[3] 1065*c0909341SAndroid Build Coastguard Worker.else 1066*c0909341SAndroid Build Coastguard Worker add r4, r4, #2 1067*c0909341SAndroid Build Coastguard Worker.endif 1068*c0909341SAndroid Build Coastguard Worker 1069*c0909341SAndroid Build Coastguard Worker mov r1, #3 1070*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_444 1071*c0909341SAndroid Build Coastguard Worker vld1.8 {d13[]}, [r4] // ar_coeffs_uv[4] 1072*c0909341SAndroid Build Coastguard Worker ldrsb r4, [r4, #-1] // ar_coeffs_uv[3] 1073*c0909341SAndroid Build Coastguard Worker.endif 1074*c0909341SAndroid Build Coastguard Worker bl generate_grain_rows_neon 1075*c0909341SAndroid Build Coastguard Worker 1076*c0909341SAndroid Build Coastguard Worker mov r1, #GRAIN_HEIGHT - 3 1077*c0909341SAndroid Build Coastguard Worker1: 1078*c0909341SAndroid Build Coastguard Worker sum_\type\()_lag1 q7, q8, q8, q9, left 1079*c0909341SAndroid Build Coastguard Worker sum_\type\()_lag1 q8, q8, q9, q10 1080*c0909341SAndroid Build Coastguard Worker sum_\type\()_lag1 q9, q9, q10, q11 1081*c0909341SAndroid Build Coastguard Worker sum_\type\()_lag1 q10, q10, q11, q12 1082*c0909341SAndroid Build Coastguard Worker sum_\type\()_lag1 q12, q11, q12, q13, right 1083*c0909341SAndroid Build Coastguard Worker get_grain_2 d26 1084*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 1085*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_444 1086*c0909341SAndroid Build Coastguard Worker add r11, r11, #2 1087*c0909341SAndroid Build Coastguard Worker.endif 1088*c0909341SAndroid Build Coastguard Worker store_grain_row d14, d15, d16, d17, d18, d19, d20, d21, d24, d25, d26 1089*c0909341SAndroid Build Coastguard Worker vmov q11, q10 1090*c0909341SAndroid Build Coastguard Worker vmov q10, q9 1091*c0909341SAndroid Build Coastguard Worker vmov q9, q8 1092*c0909341SAndroid Build Coastguard Worker vmov q8, q7 1093*c0909341SAndroid Build Coastguard Worker bgt 1b 1094*c0909341SAndroid Build Coastguard Worker 1095*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1096*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1097*c0909341SAndroid Build Coastguard Worker 1098*c0909341SAndroid Build Coastguard WorkerL(generate_grain_\type\()_lag2): 1099*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1100*c0909341SAndroid Build Coastguard Worker mov r5, #127 1101*c0909341SAndroid Build Coastguard Worker vld1.8 {d28,d29}, [r4] // ar_coeffs_y[0-11], ar_coeffs_uv[0-12] 1102*c0909341SAndroid Build Coastguard Worker 1103*c0909341SAndroid Build Coastguard Worker vmov.s8 r4, d29[2] 1104*c0909341SAndroid Build Coastguard Worker vmov.s8 r10, d29[3] 1105*c0909341SAndroid Build Coastguard Worker 1106*c0909341SAndroid Build Coastguard Worker mov r1, #3 1107*c0909341SAndroid Build Coastguard Worker bl generate_grain_rows_neon 1108*c0909341SAndroid Build Coastguard Worker 1109*c0909341SAndroid Build Coastguard Worker mov r1, #GRAIN_HEIGHT - 3 1110*c0909341SAndroid Build Coastguard Worker1: 1111*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag2_left_neon 1112*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag2_mid_neon 1113*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag2_mid_neon 1114*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag2_mid_neon 1115*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag2_right_neon 1116*c0909341SAndroid Build Coastguard Worker get_grain_2 d16 1117*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 1118*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_444 1119*c0909341SAndroid Build Coastguard Worker add r11, r11, #2 1120*c0909341SAndroid Build Coastguard Worker.endif 1121*c0909341SAndroid Build Coastguard Worker vst1.16 {d16[0]}, [r0]! 1122*c0909341SAndroid Build Coastguard Worker bgt 1b 1123*c0909341SAndroid Build Coastguard Worker 1124*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1125*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1126*c0909341SAndroid Build Coastguard Worker 1127*c0909341SAndroid Build Coastguard WorkerL(generate_grain_\type\()_lag3): 1128*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1129*c0909341SAndroid Build Coastguard Worker mov r5, #127 1130*c0909341SAndroid Build Coastguard Worker vld1.8 {q13, q14}, [r4] // ar_coeffs_y[0-23], ar_coeffs_uv[0-24] 1131*c0909341SAndroid Build Coastguard Worker 1132*c0909341SAndroid Build Coastguard Worker vmov.u8 r4, d28[5] 1133*c0909341SAndroid Build Coastguard Worker vmov.u8 r10, d28[6] 1134*c0909341SAndroid Build Coastguard Worker vmov.u8 r12, d28[7] 1135*c0909341SAndroid Build Coastguard Worker 1136*c0909341SAndroid Build Coastguard Worker orr r4, r4, r10, lsl #8 1137*c0909341SAndroid Build Coastguard Worker orr r4, r4, r12, lsl #16 1138*c0909341SAndroid Build Coastguard Worker 1139*c0909341SAndroid Build Coastguard Worker mov r1, #3 1140*c0909341SAndroid Build Coastguard Worker vpush {d26} 1141*c0909341SAndroid Build Coastguard Worker bl generate_grain_rows_neon 1142*c0909341SAndroid Build Coastguard Worker vpop {d26} 1143*c0909341SAndroid Build Coastguard Worker 1144*c0909341SAndroid Build Coastguard Worker mov r1, #GRAIN_HEIGHT - 3 1145*c0909341SAndroid Build Coastguard Worker1: 1146*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag3_left_neon 1147*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag3_mid_neon 1148*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag3_mid_neon 1149*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag3_mid_neon 1150*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag3_right_neon 1151*c0909341SAndroid Build Coastguard Worker get_grain_2 d16 1152*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 1153*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_444 1154*c0909341SAndroid Build Coastguard Worker add r11, r11, #2 1155*c0909341SAndroid Build Coastguard Worker.endif 1156*c0909341SAndroid Build Coastguard Worker vst1.16 {d16[0]}, [r0]! 1157*c0909341SAndroid Build Coastguard Worker bgt 1b 1158*c0909341SAndroid Build Coastguard Worker 1159*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1160*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1161*c0909341SAndroid Build Coastguard Workerendfunc 1162*c0909341SAndroid Build Coastguard Worker.endm 1163*c0909341SAndroid Build Coastguard Worker 1164*c0909341SAndroid Build Coastguard Workergen_grain_82 y 1165*c0909341SAndroid Build Coastguard Workergen_grain_82 uv_444 1166*c0909341SAndroid Build Coastguard Worker 1167*c0909341SAndroid Build Coastguard Worker.macro set_height dst, type 1168*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_420 1169*c0909341SAndroid Build Coastguard Worker mov \dst, #SUB_GRAIN_HEIGHT-3 1170*c0909341SAndroid Build Coastguard Worker.else 1171*c0909341SAndroid Build Coastguard Worker mov \dst, #GRAIN_HEIGHT-3 1172*c0909341SAndroid Build Coastguard Worker.endif 1173*c0909341SAndroid Build Coastguard Worker.endm 1174*c0909341SAndroid Build Coastguard Worker 1175*c0909341SAndroid Build Coastguard Worker.macro increment_y_ptr reg, type 1176*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_420 1177*c0909341SAndroid Build Coastguard Worker add \reg, \reg, #2*GRAIN_WIDTH-(3*32) 1178*c0909341SAndroid Build Coastguard Worker.else 1179*c0909341SAndroid Build Coastguard Worker sub \reg, \reg, #3*32-GRAIN_WIDTH 1180*c0909341SAndroid Build Coastguard Worker.endif 1181*c0909341SAndroid Build Coastguard Worker.endm 1182*c0909341SAndroid Build Coastguard Worker 1183*c0909341SAndroid Build Coastguard Worker.macro gen_grain_44 type 1184*c0909341SAndroid Build Coastguard Workerfunction generate_grain_\type\()_8bpc_neon, export=1 1185*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 1186*c0909341SAndroid Build Coastguard Worker 1187*c0909341SAndroid Build Coastguard Worker mov r12, r3 1188*c0909341SAndroid Build Coastguard Worker mov lr, #28 1189*c0909341SAndroid Build Coastguard Worker add r11, r1, #3*GRAIN_WIDTH-3 1190*c0909341SAndroid Build Coastguard Worker mov r1, r2 1191*c0909341SAndroid Build Coastguard Worker mul r12, r12, lr 1192*c0909341SAndroid Build Coastguard Worker 1193*c0909341SAndroid Build Coastguard Worker movrel r3, X(gaussian_sequence) 1194*c0909341SAndroid Build Coastguard Worker ldr r2, [r1, #FGD_SEED] 1195*c0909341SAndroid Build Coastguard Worker ldr r9, [r1, #FGD_GRAIN_SCALE_SHIFT] 1196*c0909341SAndroid Build Coastguard Worker add r4, r1, #FGD_AR_COEFFS_UV 1197*c0909341SAndroid Build Coastguard Worker adr r5, L(gen_grain_\type\()_tbl) 1198*c0909341SAndroid Build Coastguard Worker ldr r6, [r1, #FGD_AR_COEFF_LAG] 1199*c0909341SAndroid Build Coastguard Worker add r9, r9, #4 1200*c0909341SAndroid Build Coastguard Worker ldr r6, [r5, r6, lsl #2] 1201*c0909341SAndroid Build Coastguard Worker vdup.16 q15, r9 // 4 + data->grain_scale_shift 1202*c0909341SAndroid Build Coastguard Worker add r5, r5, r6 1203*c0909341SAndroid Build Coastguard Worker vneg.s16 q15, q15 1204*c0909341SAndroid Build Coastguard Worker 1205*c0909341SAndroid Build Coastguard Worker cmp r12, #0 1206*c0909341SAndroid Build Coastguard Worker movw r10, #0x49d8 1207*c0909341SAndroid Build Coastguard Worker movw lr, #0xb524 1208*c0909341SAndroid Build Coastguard Worker // Intentionally using a separate register instead of moveq with an 1209*c0909341SAndroid Build Coastguard Worker // immediate constant, to avoid armv8 deprecated it instruction forms. 1210*c0909341SAndroid Build Coastguard Worker it eq 1211*c0909341SAndroid Build Coastguard Worker moveq r10, lr 1212*c0909341SAndroid Build Coastguard Worker add r4, r4, r12 // Add offset to ar_coeffs_uv[1] 1213*c0909341SAndroid Build Coastguard Worker eor r2, r2, r10 1214*c0909341SAndroid Build Coastguard Worker 1215*c0909341SAndroid Build Coastguard Worker ldr r7, [r1, #FGD_AR_COEFF_SHIFT] 1216*c0909341SAndroid Build Coastguard Worker mov r8, #1 1217*c0909341SAndroid Build Coastguard Worker mov r10, #1 1218*c0909341SAndroid Build Coastguard Worker lsl r8, r8, r7 // 1 << ar_coeff_shift 1219*c0909341SAndroid Build Coastguard Worker lsl r10, r10, r9 // 1 << (4 + data->grain_scale_shift) 1220*c0909341SAndroid Build Coastguard Worker lsr r8, r8, #1 // 1 << (ar_coeff_shift - 1) 1221*c0909341SAndroid Build Coastguard Worker lsr r10, r10, #1 // 1 << (4 + data->grain_scale_shift - 1) 1222*c0909341SAndroid Build Coastguard Worker bx r5 1223*c0909341SAndroid Build Coastguard Worker 1224*c0909341SAndroid Build Coastguard Worker .align 2 1225*c0909341SAndroid Build Coastguard WorkerL(gen_grain_\type\()_tbl): 1226*c0909341SAndroid Build Coastguard Worker .word L(generate_grain_\type\()_lag0) - L(gen_grain_\type\()_tbl) + CONFIG_THUMB 1227*c0909341SAndroid Build Coastguard Worker .word L(generate_grain_\type\()_lag1) - L(gen_grain_\type\()_tbl) + CONFIG_THUMB 1228*c0909341SAndroid Build Coastguard Worker .word L(generate_grain_\type\()_lag2) - L(gen_grain_\type\()_tbl) + CONFIG_THUMB 1229*c0909341SAndroid Build Coastguard Worker .word L(generate_grain_\type\()_lag3) - L(gen_grain_\type\()_tbl) + CONFIG_THUMB 1230*c0909341SAndroid Build Coastguard Worker 1231*c0909341SAndroid Build Coastguard WorkerL(generate_grain_\type\()_lag0): 1232*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_420 1233*c0909341SAndroid Build Coastguard Worker vpush {q4-q5} 1234*c0909341SAndroid Build Coastguard Worker.endif 1235*c0909341SAndroid Build Coastguard Worker mov r1, #3 1236*c0909341SAndroid Build Coastguard Worker bl generate_grain_rows_44_neon 1237*c0909341SAndroid Build Coastguard Worker set_height r1, \type 1238*c0909341SAndroid Build Coastguard Worker 1239*c0909341SAndroid Build Coastguard Worker vdup.16 q12, r7 1240*c0909341SAndroid Build Coastguard Worker vld1.8 {d22[]}, [r4] // ar_coeffs_uv[0] 1241*c0909341SAndroid Build Coastguard Worker vmov.i8 q0, #0 1242*c0909341SAndroid Build Coastguard Worker vmov.i8 q1, #255 1243*c0909341SAndroid Build Coastguard Worker vext.8 q13, q0, q1, #13 1244*c0909341SAndroid Build Coastguard Worker vext.8 q14, q1, q0, #7 1245*c0909341SAndroid Build Coastguard Worker vneg.s16 q12, q12 1246*c0909341SAndroid Build Coastguard Worker 1247*c0909341SAndroid Build Coastguard Worker1: 1248*c0909341SAndroid Build Coastguard Worker bl get_grain_row_44_neon 1249*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_420 1250*c0909341SAndroid Build Coastguard Worker add r12, r11, #GRAIN_WIDTH 1251*c0909341SAndroid Build Coastguard Worker.endif 1252*c0909341SAndroid Build Coastguard Worker vmov q1, q13 1253*c0909341SAndroid Build Coastguard Worker vmov q0, q8 1254*c0909341SAndroid Build Coastguard Worker bl add_\type\()_coeff_lag0_neon 1255*c0909341SAndroid Build Coastguard Worker vmov.i8 q1, #255 1256*c0909341SAndroid Build Coastguard Worker vmov q0, q9 1257*c0909341SAndroid Build Coastguard Worker vmov q8, q2 1258*c0909341SAndroid Build Coastguard Worker bl add_\type\()_coeff_lag0_neon 1259*c0909341SAndroid Build Coastguard Worker vmov.i8 q1, q14 1260*c0909341SAndroid Build Coastguard Worker vmov q0, q10 1261*c0909341SAndroid Build Coastguard Worker vmov q9, q2 1262*c0909341SAndroid Build Coastguard Worker bl add_\type\()_coeff_lag0_neon 1263*c0909341SAndroid Build Coastguard Worker vmov q10, q2 1264*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 1265*c0909341SAndroid Build Coastguard Worker increment_y_ptr r11, \type 1266*c0909341SAndroid Build Coastguard Worker store_grain_row_44 d16, d17, d18, d19, d20, d21 1267*c0909341SAndroid Build Coastguard Worker bgt 1b 1268*c0909341SAndroid Build Coastguard Worker 1269*c0909341SAndroid Build Coastguard Worker.ifc \type, uv_420 1270*c0909341SAndroid Build Coastguard Worker vpop {q4-q5} 1271*c0909341SAndroid Build Coastguard Worker.endif 1272*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1273*c0909341SAndroid Build Coastguard Worker 1274*c0909341SAndroid Build Coastguard WorkerL(generate_grain_\type\()_lag1): 1275*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1276*c0909341SAndroid Build Coastguard Worker mov r5, #127 1277*c0909341SAndroid Build Coastguard Worker vld1.8 {d27[]}, [r4]! // ar_coeffs_uv[0] 1278*c0909341SAndroid Build Coastguard Worker vld1.8 {d28[]}, [r4]! // ar_coeffs_uv[1] 1279*c0909341SAndroid Build Coastguard Worker vld1.8 {d29[]}, [r4] // ar_coeffs_uv[2] 1280*c0909341SAndroid Build Coastguard Worker add r4, r4, #2 1281*c0909341SAndroid Build Coastguard Worker 1282*c0909341SAndroid Build Coastguard Worker mov r1, #3 1283*c0909341SAndroid Build Coastguard Worker vld1.8 {d13[]}, [r4] // ar_coeffs_uv[4] 1284*c0909341SAndroid Build Coastguard Worker ldrsb r4, [r4, #-1] // ar_coeffs_uv[3] 1285*c0909341SAndroid Build Coastguard Worker bl generate_grain_rows_44_neon 1286*c0909341SAndroid Build Coastguard Worker 1287*c0909341SAndroid Build Coastguard Worker set_height r1, \type 1288*c0909341SAndroid Build Coastguard Worker1: 1289*c0909341SAndroid Build Coastguard Worker sum_\type\()_lag1 q7, q8, q8, q9, left 1290*c0909341SAndroid Build Coastguard Worker sum_\type\()_lag1 q8, q8, q9, q10 1291*c0909341SAndroid Build Coastguard Worker sum_\type\()_lag1 q10, q9, q10, q11, right 1292*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 1293*c0909341SAndroid Build Coastguard Worker increment_y_ptr r11, \type 1294*c0909341SAndroid Build Coastguard Worker store_grain_row_44 d14, d15, d16, d17, d20, d21 1295*c0909341SAndroid Build Coastguard Worker vmov q9, q8 1296*c0909341SAndroid Build Coastguard Worker vmov q8, q7 1297*c0909341SAndroid Build Coastguard Worker bgt 1b 1298*c0909341SAndroid Build Coastguard Worker 1299*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1300*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1301*c0909341SAndroid Build Coastguard Worker 1302*c0909341SAndroid Build Coastguard WorkerL(generate_grain_\type\()_lag2): 1303*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1304*c0909341SAndroid Build Coastguard Worker mov r5, #127 1305*c0909341SAndroid Build Coastguard Worker vld1.8 {d28,d29}, [r4] // ar_coeffs_uv[0-12] 1306*c0909341SAndroid Build Coastguard Worker 1307*c0909341SAndroid Build Coastguard Worker vmov.s8 r4, d29[2] 1308*c0909341SAndroid Build Coastguard Worker vmov.s8 r10, d29[3] 1309*c0909341SAndroid Build Coastguard Worker 1310*c0909341SAndroid Build Coastguard Worker mov r1, #3 1311*c0909341SAndroid Build Coastguard Worker bl generate_grain_rows_44_neon 1312*c0909341SAndroid Build Coastguard Worker 1313*c0909341SAndroid Build Coastguard Worker set_height r1, \type 1314*c0909341SAndroid Build Coastguard Worker1: 1315*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag2_left_neon 1316*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag2_mid_neon 1317*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag2_right_neon 1318*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 1319*c0909341SAndroid Build Coastguard Worker increment_y_ptr r11, \type 1320*c0909341SAndroid Build Coastguard Worker add r0, r0, #GRAIN_WIDTH-48 1321*c0909341SAndroid Build Coastguard Worker bgt 1b 1322*c0909341SAndroid Build Coastguard Worker 1323*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1324*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1325*c0909341SAndroid Build Coastguard Worker 1326*c0909341SAndroid Build Coastguard WorkerL(generate_grain_\type\()_lag3): 1327*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1328*c0909341SAndroid Build Coastguard Worker mov r5, #127 1329*c0909341SAndroid Build Coastguard Worker vld1.8 {q13, q14}, [r4] // ar_coeffs_y[0-23], ar_coeffs_uv[0-24] 1330*c0909341SAndroid Build Coastguard Worker 1331*c0909341SAndroid Build Coastguard Worker vmov.u8 r4, d28[5] 1332*c0909341SAndroid Build Coastguard Worker vmov.u8 r10, d28[6] 1333*c0909341SAndroid Build Coastguard Worker vmov.u8 r12, d28[7] 1334*c0909341SAndroid Build Coastguard Worker 1335*c0909341SAndroid Build Coastguard Worker orr r4, r4, r10, lsl #8 1336*c0909341SAndroid Build Coastguard Worker orr r4, r4, r12, lsl #16 1337*c0909341SAndroid Build Coastguard Worker 1338*c0909341SAndroid Build Coastguard Worker mov r1, #3 1339*c0909341SAndroid Build Coastguard Worker bl generate_grain_rows_44_neon 1340*c0909341SAndroid Build Coastguard Worker 1341*c0909341SAndroid Build Coastguard Worker set_height r1, \type 1342*c0909341SAndroid Build Coastguard Worker1: 1343*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag3_left_neon 1344*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag3_mid_neon 1345*c0909341SAndroid Build Coastguard Worker bl sum_\type\()_lag3_right_neon 1346*c0909341SAndroid Build Coastguard Worker subs r1, r1, #1 1347*c0909341SAndroid Build Coastguard Worker increment_y_ptr r11, \type 1348*c0909341SAndroid Build Coastguard Worker add r0, r0, #GRAIN_WIDTH-48 1349*c0909341SAndroid Build Coastguard Worker bgt 1b 1350*c0909341SAndroid Build Coastguard Worker 1351*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1352*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1353*c0909341SAndroid Build Coastguard Workerendfunc 1354*c0909341SAndroid Build Coastguard Worker.endm 1355*c0909341SAndroid Build Coastguard Worker 1356*c0909341SAndroid Build Coastguard Workergen_grain_44 uv_420 1357*c0909341SAndroid Build Coastguard Workergen_grain_44 uv_422 1358*c0909341SAndroid Build Coastguard Worker 1359*c0909341SAndroid Build Coastguard Worker.macro gather_interleaved dst1, dst2, src1, src2, off 1360*c0909341SAndroid Build Coastguard Worker vmov.u8 r11, \src1[0+\off] 1361*c0909341SAndroid Build Coastguard Worker vmov.u8 r12, \src2[0+\off] 1362*c0909341SAndroid Build Coastguard Worker add r11, r11, r3 1363*c0909341SAndroid Build Coastguard Worker vmov.u8 lr, \src1[2+\off] 1364*c0909341SAndroid Build Coastguard Worker add r12, r12, r3 1365*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst1[0+\off]}, [r11] 1366*c0909341SAndroid Build Coastguard Worker vmov.u8 r11, \src2[2+\off] 1367*c0909341SAndroid Build Coastguard Worker add lr, lr, r3 1368*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst2[0+\off]}, [r12] 1369*c0909341SAndroid Build Coastguard Worker vmov.u8 r12, \src1[4+\off] 1370*c0909341SAndroid Build Coastguard Worker add r11, r11, r3 1371*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst1[2+\off]}, [lr] 1372*c0909341SAndroid Build Coastguard Worker vmov.u8 lr, \src2[4+\off] 1373*c0909341SAndroid Build Coastguard Worker add r12, r12, r3 1374*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst2[2+\off]}, [r11] 1375*c0909341SAndroid Build Coastguard Worker vmov.u8 r11, \src1[6+\off] 1376*c0909341SAndroid Build Coastguard Worker add lr, lr, r3 1377*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst1[4+\off]}, [r12] 1378*c0909341SAndroid Build Coastguard Worker vmov.u8 r12, \src2[6+\off] 1379*c0909341SAndroid Build Coastguard Worker add r11, r11, r3 1380*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst2[4+\off]}, [lr] 1381*c0909341SAndroid Build Coastguard Worker add r12, r12, r3 1382*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst1[6+\off]}, [r11] 1383*c0909341SAndroid Build Coastguard Worker vld1.8 {\dst2[6+\off]}, [r12] 1384*c0909341SAndroid Build Coastguard Worker.endm 1385*c0909341SAndroid Build Coastguard Worker 1386*c0909341SAndroid Build Coastguard Worker.macro gather dst1, dst2, dst3, dst4, src1, src2, src3, src4 1387*c0909341SAndroid Build Coastguard Worker gather_interleaved \dst1, \dst3, \src1, \src3, 0 1388*c0909341SAndroid Build Coastguard Worker gather_interleaved \dst1, \dst3, \src1, \src3, 1 1389*c0909341SAndroid Build Coastguard Worker gather_interleaved \dst2, \dst4, \src2, \src4, 0 1390*c0909341SAndroid Build Coastguard Worker gather_interleaved \dst2, \dst4, \src2, \src4, 1 1391*c0909341SAndroid Build Coastguard Worker.endm 1392*c0909341SAndroid Build Coastguard Worker 1393*c0909341SAndroid Build Coastguard Workerfunction gather32_neon 1394*c0909341SAndroid Build Coastguard Worker push {r11-r12,lr} 1395*c0909341SAndroid Build Coastguard Worker gather d8, d9, d10, d11, d0, d1, d2, d3 1396*c0909341SAndroid Build Coastguard Worker pop {r11-r12,pc} 1397*c0909341SAndroid Build Coastguard Workerendfunc 1398*c0909341SAndroid Build Coastguard Worker 1399*c0909341SAndroid Build Coastguard Workerfunction gather16_neon 1400*c0909341SAndroid Build Coastguard Worker push {r11-r12,lr} 1401*c0909341SAndroid Build Coastguard Worker gather_interleaved d8, d9, d0, d1, 0 1402*c0909341SAndroid Build Coastguard Worker gather_interleaved d8, d9, d0, d1, 1 1403*c0909341SAndroid Build Coastguard Worker pop {r11-r12,pc} 1404*c0909341SAndroid Build Coastguard Workerendfunc 1405*c0909341SAndroid Build Coastguard Worker 1406*c0909341SAndroid Build Coastguard Workerconst overlap_coeffs_0, align=4 1407*c0909341SAndroid Build Coastguard Worker .byte 27, 17, 0, 0, 0, 0, 0, 0 1408*c0909341SAndroid Build Coastguard Worker .byte 17, 27, 32, 32, 32, 32, 32, 32 1409*c0909341SAndroid Build Coastguard Workerendconst 1410*c0909341SAndroid Build Coastguard Worker 1411*c0909341SAndroid Build Coastguard Workerconst overlap_coeffs_1, align=4 1412*c0909341SAndroid Build Coastguard Worker .byte 23, 0, 0, 0, 0, 0, 0, 0 1413*c0909341SAndroid Build Coastguard Worker .byte 22, 32, 32, 32, 32, 32, 32, 32 1414*c0909341SAndroid Build Coastguard Workerendconst 1415*c0909341SAndroid Build Coastguard Worker 1416*c0909341SAndroid Build Coastguard Worker.macro calc_offset offx, offy, src, sx, sy 1417*c0909341SAndroid Build Coastguard Worker and \offy, \src, #0xF // randval & 0xF 1418*c0909341SAndroid Build Coastguard Worker lsr \offx, \src, #4 // randval >> 4 1419*c0909341SAndroid Build Coastguard Worker.if \sy == 0 1420*c0909341SAndroid Build Coastguard Worker add \offy, \offy, \offy // 2 * (randval & 0xF) 1421*c0909341SAndroid Build Coastguard Worker.endif 1422*c0909341SAndroid Build Coastguard Worker.if \sx == 0 1423*c0909341SAndroid Build Coastguard Worker add \offx, \offx, \offx // 2 * (randval >> 4) 1424*c0909341SAndroid Build Coastguard Worker.endif 1425*c0909341SAndroid Build Coastguard Worker.endm 1426*c0909341SAndroid Build Coastguard Worker 1427*c0909341SAndroid Build Coastguard Worker.macro add_offset dst, offx, offy, src, stride 1428*c0909341SAndroid Build Coastguard Worker mla \dst, \stride, \offy, \src // grain_lut += grain_stride * offy 1429*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \offx // grain_lut += offx 1430*c0909341SAndroid Build Coastguard Worker.endm 1431*c0909341SAndroid Build Coastguard Worker 1432*c0909341SAndroid Build Coastguard Worker// void dav1d_fgy_32x32_8bpc_neon(pixel *const dst, const pixel *const src, 1433*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, 1434*c0909341SAndroid Build Coastguard Worker// const uint8_t scaling[SCALING_SIZE], 1435*c0909341SAndroid Build Coastguard Worker// const int scaling_shift, 1436*c0909341SAndroid Build Coastguard Worker// const entry grain_lut[][GRAIN_WIDTH], 1437*c0909341SAndroid Build Coastguard Worker// const int offsets[][2], 1438*c0909341SAndroid Build Coastguard Worker// const int h, const ptrdiff_t clip, 1439*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t type); 1440*c0909341SAndroid Build Coastguard Workerfunction fgy_32x32_8bpc_neon, export=1 1441*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 1442*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1443*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #100] // scaling_shift, grain_lut 1444*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #108] // offsets, h 1445*c0909341SAndroid Build Coastguard Worker ldr r8, [sp, #116] // clip 1446*c0909341SAndroid Build Coastguard Worker mov r9, #GRAIN_WIDTH // grain_lut stride 1447*c0909341SAndroid Build Coastguard Worker 1448*c0909341SAndroid Build Coastguard Worker neg r4, r4 1449*c0909341SAndroid Build Coastguard Worker vdup.16 q13, r4 // -scaling_shift 1450*c0909341SAndroid Build Coastguard Worker cmp r8, #0 1451*c0909341SAndroid Build Coastguard Worker 1452*c0909341SAndroid Build Coastguard Worker movrel_local r12, overlap_coeffs_0 1453*c0909341SAndroid Build Coastguard Worker 1454*c0909341SAndroid Build Coastguard Worker beq 1f 1455*c0909341SAndroid Build Coastguard Worker // clip 1456*c0909341SAndroid Build Coastguard Worker vmov.i8 q14, #16 1457*c0909341SAndroid Build Coastguard Worker vmov.i8 q15, #235 1458*c0909341SAndroid Build Coastguard Worker b 2f 1459*c0909341SAndroid Build Coastguard Worker1: 1460*c0909341SAndroid Build Coastguard Worker // no clip 1461*c0909341SAndroid Build Coastguard Worker vmov.i8 q14, #0 1462*c0909341SAndroid Build Coastguard Worker vmov.i8 q15, #255 1463*c0909341SAndroid Build Coastguard Worker2: 1464*c0909341SAndroid Build Coastguard Worker 1465*c0909341SAndroid Build Coastguard Worker vld1.8 {d24, d25}, [r12, :128] // overlap_coeffs 1466*c0909341SAndroid Build Coastguard Worker 1467*c0909341SAndroid Build Coastguard Worker add r5, r5, #9 // grain_lut += 9 1468*c0909341SAndroid Build Coastguard Worker add r5, r5, r9, lsl #3 // grain_lut += 8 * grain_stride 1469*c0909341SAndroid Build Coastguard Worker add r5, r5, r9 // grain_lut += grain_stride 1470*c0909341SAndroid Build Coastguard Worker 1471*c0909341SAndroid Build Coastguard Worker ldr r10, [r6, #8] // offsets[1][0] 1472*c0909341SAndroid Build Coastguard Worker calc_offset r10, r4, r10, 0, 0 1473*c0909341SAndroid Build Coastguard Worker add_offset r4, r10, r4, r5, r9 1474*c0909341SAndroid Build Coastguard Worker ldr r10, [r6, #4] // offsets[0][1] 1475*c0909341SAndroid Build Coastguard Worker calc_offset r10, r11, r10, 0, 0 1476*c0909341SAndroid Build Coastguard Worker add_offset r11, r10, r11, r5, r9 1477*c0909341SAndroid Build Coastguard Worker ldr r10, [r6, #12] // offsets[1][1] 1478*c0909341SAndroid Build Coastguard Worker calc_offset r10, r8, r10, 0, 0 1479*c0909341SAndroid Build Coastguard Worker add_offset r8, r10, r8, r5, r9 1480*c0909341SAndroid Build Coastguard Worker ldr r6, [r6] // offsets[0][0] 1481*c0909341SAndroid Build Coastguard Worker calc_offset r6, lr, r6, 0, 0 1482*c0909341SAndroid Build Coastguard Worker add_offset r5, r6, lr, r5, r9 1483*c0909341SAndroid Build Coastguard Worker 1484*c0909341SAndroid Build Coastguard Worker add r4, r4, #32 // grain_lut += FG_BLOCK_SIZE * bx 1485*c0909341SAndroid Build Coastguard Worker add r6, r11, r9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by 1486*c0909341SAndroid Build Coastguard Worker 1487*c0909341SAndroid Build Coastguard Worker ldr r10, [sp, #120] // type 1488*c0909341SAndroid Build Coastguard Worker adr r11, L(fgy_loop_tbl) 1489*c0909341SAndroid Build Coastguard Worker 1490*c0909341SAndroid Build Coastguard Worker tst r10, #1 1491*c0909341SAndroid Build Coastguard Worker ldr r10, [r11, r10, lsl #2] 1492*c0909341SAndroid Build Coastguard Worker 1493*c0909341SAndroid Build Coastguard Worker add r8, r8, r9, lsl #5 // grain_lut += grain_stride * FG_BLOCK_SIZE * by 1494*c0909341SAndroid Build Coastguard Worker add r8, r8, #32 // grain_lut += FG_BLOCK_SIZE * bx 1495*c0909341SAndroid Build Coastguard Worker 1496*c0909341SAndroid Build Coastguard Worker add r11, r11, r10 1497*c0909341SAndroid Build Coastguard Worker 1498*c0909341SAndroid Build Coastguard Worker beq 1f 1499*c0909341SAndroid Build Coastguard Worker // y overlap 1500*c0909341SAndroid Build Coastguard Worker vdup.8 d14, d24[0] 1501*c0909341SAndroid Build Coastguard Worker vdup.8 d15, d24[1] 1502*c0909341SAndroid Build Coastguard Worker mov r10, r7 // backup actual h 1503*c0909341SAndroid Build Coastguard Worker mov r7, #2 1504*c0909341SAndroid Build Coastguard Worker1: 1505*c0909341SAndroid Build Coastguard Worker bx r11 1506*c0909341SAndroid Build Coastguard Workerendfunc 1507*c0909341SAndroid Build Coastguard Worker 1508*c0909341SAndroid Build Coastguard Workerfunction fgy_loop_neon 1509*c0909341SAndroid Build Coastguard WorkerL(fgy_loop_tbl): 1510*c0909341SAndroid Build Coastguard Worker .word L(loop_00) - L(fgy_loop_tbl) + CONFIG_THUMB 1511*c0909341SAndroid Build Coastguard Worker .word L(loop_01) - L(fgy_loop_tbl) + CONFIG_THUMB 1512*c0909341SAndroid Build Coastguard Worker .word L(loop_10) - L(fgy_loop_tbl) + CONFIG_THUMB 1513*c0909341SAndroid Build Coastguard Worker .word L(loop_11) - L(fgy_loop_tbl) + CONFIG_THUMB 1514*c0909341SAndroid Build Coastguard Worker 1515*c0909341SAndroid Build Coastguard Worker.macro fgy ox, oy 1516*c0909341SAndroid Build Coastguard WorkerL(loop_\ox\oy): 1517*c0909341SAndroid Build Coastguard Worker1: 1518*c0909341SAndroid Build Coastguard Worker.if \ox 1519*c0909341SAndroid Build Coastguard Worker vld1.8 {d8}, [r4], r9 // grain_lut old 1520*c0909341SAndroid Build Coastguard Worker.endif 1521*c0909341SAndroid Build Coastguard Worker.if \oy 1522*c0909341SAndroid Build Coastguard Worker vld1.8 {q2, q3}, [r6], r9 // grain_lut top 1523*c0909341SAndroid Build Coastguard Worker.endif 1524*c0909341SAndroid Build Coastguard Worker.if \ox && \oy 1525*c0909341SAndroid Build Coastguard Worker vld1.8 {d10}, [r8], r9 // grain_lut top old 1526*c0909341SAndroid Build Coastguard Worker.endif 1527*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r1, :128], r2 // src 1528*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r5], r9 // grain_lut 1529*c0909341SAndroid Build Coastguard Worker 1530*c0909341SAndroid Build Coastguard Worker.if \ox 1531*c0909341SAndroid Build Coastguard Worker vmull.s8 q4, d8, d24 1532*c0909341SAndroid Build Coastguard Worker vmlal.s8 q4, d20, d25 1533*c0909341SAndroid Build Coastguard Worker.endif 1534*c0909341SAndroid Build Coastguard Worker 1535*c0909341SAndroid Build Coastguard Worker.if \oy 1536*c0909341SAndroid Build Coastguard Worker.if \ox 1537*c0909341SAndroid Build Coastguard Worker vmull.s8 q5, d10, d24 1538*c0909341SAndroid Build Coastguard Worker vmlal.s8 q5, d4, d25 1539*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d20, q4, #5 1540*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d4, q5, #5 1541*c0909341SAndroid Build Coastguard Worker.endif 1542*c0909341SAndroid Build Coastguard Worker 1543*c0909341SAndroid Build Coastguard Worker vmull.s8 q4, d20, d15 1544*c0909341SAndroid Build Coastguard Worker vmull.s8 q5, d21, d15 1545*c0909341SAndroid Build Coastguard Worker vmull.s8 q8, d22, d15 1546*c0909341SAndroid Build Coastguard Worker vmull.s8 q9, d23, d15 1547*c0909341SAndroid Build Coastguard Worker vmlal.s8 q4, d4, d14 1548*c0909341SAndroid Build Coastguard Worker vmlal.s8 q5, d5, d14 1549*c0909341SAndroid Build Coastguard Worker vmlal.s8 q8, d6, d14 1550*c0909341SAndroid Build Coastguard Worker vmlal.s8 q9, d7, d14 1551*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d20, q4, #5 1552*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d21, q5, #5 1553*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d22, q8, #5 1554*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d23, q9, #5 1555*c0909341SAndroid Build Coastguard Worker.elseif \ox 1556*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d20, q4, #5 1557*c0909341SAndroid Build Coastguard Worker.endif 1558*c0909341SAndroid Build Coastguard Worker 1559*c0909341SAndroid Build Coastguard Worker bl gather32_neon 1560*c0909341SAndroid Build Coastguard Worker 1561*c0909341SAndroid Build Coastguard Worker vmovl.s8 q8, d20 // grain 1562*c0909341SAndroid Build Coastguard Worker vmovl.s8 q9, d21 1563*c0909341SAndroid Build Coastguard Worker vmovl.s8 q10, d22 1564*c0909341SAndroid Build Coastguard Worker vmovl.s8 q11, d23 1565*c0909341SAndroid Build Coastguard Worker 1566*c0909341SAndroid Build Coastguard Worker vmovl.u8 q2, d8 // scaling 1567*c0909341SAndroid Build Coastguard Worker vmovl.u8 q3, d9 1568*c0909341SAndroid Build Coastguard Worker vmovl.u8 q4, d10 1569*c0909341SAndroid Build Coastguard Worker vmovl.u8 q5, d11 1570*c0909341SAndroid Build Coastguard Worker 1571*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q2 // scaling * grain 1572*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q9, q3 1573*c0909341SAndroid Build Coastguard Worker vmul.i16 q10, q10, q4 1574*c0909341SAndroid Build Coastguard Worker vmul.i16 q11, q11, q5 1575*c0909341SAndroid Build Coastguard Worker 1576*c0909341SAndroid Build Coastguard Worker vrshl.s16 q8, q8, q13 // round2(scaling * grain, scaling_shift) 1577*c0909341SAndroid Build Coastguard Worker vrshl.s16 q9, q9, q13 1578*c0909341SAndroid Build Coastguard Worker vrshl.s16 q10, q10, q13 1579*c0909341SAndroid Build Coastguard Worker vrshl.s16 q11, q11, q13 1580*c0909341SAndroid Build Coastguard Worker 1581*c0909341SAndroid Build Coastguard Worker vaddw.u8 q8, q8, d0 // *src + noise 1582*c0909341SAndroid Build Coastguard Worker vaddw.u8 q9, q9, d1 1583*c0909341SAndroid Build Coastguard Worker vaddw.u8 q10, q10, d2 1584*c0909341SAndroid Build Coastguard Worker vaddw.u8 q11, q11, d3 1585*c0909341SAndroid Build Coastguard Worker 1586*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d0, q8 1587*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d1, q9 1588*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d2, q10 1589*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d3, q11 1590*c0909341SAndroid Build Coastguard Worker 1591*c0909341SAndroid Build Coastguard Worker vmax.u8 q0, q0, q14 1592*c0909341SAndroid Build Coastguard Worker vmax.u8 q1, q1, q14 1593*c0909341SAndroid Build Coastguard Worker vmin.u8 q0, q0, q15 1594*c0909341SAndroid Build Coastguard Worker vmin.u8 q1, q1, q15 1595*c0909341SAndroid Build Coastguard Worker 1596*c0909341SAndroid Build Coastguard Worker subs r7, r7, #1 1597*c0909341SAndroid Build Coastguard Worker.if \oy 1598*c0909341SAndroid Build Coastguard Worker vdup.8 d14, d25[0] 1599*c0909341SAndroid Build Coastguard Worker vdup.8 d15, d25[1] 1600*c0909341SAndroid Build Coastguard Worker.endif 1601*c0909341SAndroid Build Coastguard Worker vst1.8 {q0, q1}, [r0, :128], r2 // dst 1602*c0909341SAndroid Build Coastguard Worker bgt 1b 1603*c0909341SAndroid Build Coastguard Worker 1604*c0909341SAndroid Build Coastguard Worker.if \oy 1605*c0909341SAndroid Build Coastguard Worker cmp r10, #2 1606*c0909341SAndroid Build Coastguard Worker sub r7, r10, #2 // restore actual remaining h 1607*c0909341SAndroid Build Coastguard Worker bgt L(loop_\ox\()0) 1608*c0909341SAndroid Build Coastguard Worker.endif 1609*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1610*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1611*c0909341SAndroid Build Coastguard Worker.endm 1612*c0909341SAndroid Build Coastguard Worker 1613*c0909341SAndroid Build Coastguard Worker fgy 0, 0 1614*c0909341SAndroid Build Coastguard Worker fgy 0, 1 1615*c0909341SAndroid Build Coastguard Worker fgy 1, 0 1616*c0909341SAndroid Build Coastguard Worker fgy 1, 1 1617*c0909341SAndroid Build Coastguard Workerendfunc 1618*c0909341SAndroid Build Coastguard Worker 1619*c0909341SAndroid Build Coastguard Worker// void dav1d_fguv_32x32_420_8bpc_neon(pixel *const dst, 1620*c0909341SAndroid Build Coastguard Worker// const pixel *const src, 1621*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t stride, 1622*c0909341SAndroid Build Coastguard Worker// const uint8_t scaling[SCALING_SIZE], 1623*c0909341SAndroid Build Coastguard Worker// const Dav1dFilmGrainData *const data, 1624*c0909341SAndroid Build Coastguard Worker// const entry grain_lut[][GRAIN_WIDTH], 1625*c0909341SAndroid Build Coastguard Worker// const pixel *const luma_row, 1626*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t luma_stride, 1627*c0909341SAndroid Build Coastguard Worker// const int offsets[][2], 1628*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t h, const ptrdiff_t uv, 1629*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t is_id, 1630*c0909341SAndroid Build Coastguard Worker// const ptrdiff_t type); 1631*c0909341SAndroid Build Coastguard Worker.macro fguv layout, sx, sy 1632*c0909341SAndroid Build Coastguard Workerfunction fguv_32x32_\layout\()_8bpc_neon, export=1 1633*c0909341SAndroid Build Coastguard Worker push {r4-r11,lr} 1634*c0909341SAndroid Build Coastguard Worker vpush {q4-q7} 1635*c0909341SAndroid Build Coastguard Worker ldrd r4, r5, [sp, #100] // data, grain_lut 1636*c0909341SAndroid Build Coastguard Worker ldrd r6, r7, [sp, #108] // luma_row, luma_stride 1637*c0909341SAndroid Build Coastguard Worker ldrd r8, r9, [sp, #116] // offsets, h 1638*c0909341SAndroid Build Coastguard Worker ldrd r10, r11, [sp, #124] // uv, is_id 1639*c0909341SAndroid Build Coastguard Worker 1640*c0909341SAndroid Build Coastguard Worker // !csfl 1641*c0909341SAndroid Build Coastguard Worker add r10, r4, r10, lsl #2 // + 4*uv 1642*c0909341SAndroid Build Coastguard Worker add r12, r10, #FGD_UV_LUMA_MULT 1643*c0909341SAndroid Build Coastguard Worker add lr, r10, #FGD_UV_MULT 1644*c0909341SAndroid Build Coastguard Worker add r10, r10, #FGD_UV_OFFSET 1645*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[]}, [r12] // uv_luma_mult 1646*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[2]}, [r10] // uv_offset 1647*c0909341SAndroid Build Coastguard Worker vld1.16 {d4[1]}, [lr] // uv_mult 1648*c0909341SAndroid Build Coastguard Worker 1649*c0909341SAndroid Build Coastguard Worker ldr lr, [r4, #FGD_SCALING_SHIFT] 1650*c0909341SAndroid Build Coastguard Worker ldr r12, [r4, #FGD_CLIP_TO_RESTRICTED_RANGE] 1651*c0909341SAndroid Build Coastguard Worker neg lr, lr // -scaling_shift 1652*c0909341SAndroid Build Coastguard Worker 1653*c0909341SAndroid Build Coastguard Worker cmp r12, #0 1654*c0909341SAndroid Build Coastguard Worker vdup.16 q13, lr // -scaling_shift 1655*c0909341SAndroid Build Coastguard Worker 1656*c0909341SAndroid Build Coastguard Worker beq 1f 1657*c0909341SAndroid Build Coastguard Worker // clip 1658*c0909341SAndroid Build Coastguard Worker cmp r11, #0 1659*c0909341SAndroid Build Coastguard Worker vmov.i8 q14, #16 1660*c0909341SAndroid Build Coastguard Worker vmov.i8 q15, #240 1661*c0909341SAndroid Build Coastguard Worker beq 2f 1662*c0909341SAndroid Build Coastguard Worker // is_id 1663*c0909341SAndroid Build Coastguard Worker vmov.i8 q15, #235 1664*c0909341SAndroid Build Coastguard Worker b 2f 1665*c0909341SAndroid Build Coastguard Worker1: 1666*c0909341SAndroid Build Coastguard Worker // no clip 1667*c0909341SAndroid Build Coastguard Worker vmov.i8 q14, #0 1668*c0909341SAndroid Build Coastguard Worker vmov.i8 q15, #255 1669*c0909341SAndroid Build Coastguard Worker2: 1670*c0909341SAndroid Build Coastguard Worker 1671*c0909341SAndroid Build Coastguard Worker mov r10, #GRAIN_WIDTH // grain_lut stride 1672*c0909341SAndroid Build Coastguard Worker 1673*c0909341SAndroid Build Coastguard Worker add r5, r5, #(3 + (2 >> \sx)*3) // grain_lut += 9 or 6 1674*c0909341SAndroid Build Coastguard Worker.if \sy 1675*c0909341SAndroid Build Coastguard Worker add r5, r5, r10, lsl #2 // grain_lut += 4 * grain_stride 1676*c0909341SAndroid Build Coastguard Worker add r5, r5, r10, lsl #1 // grain_lut += 2 * grain_stride 1677*c0909341SAndroid Build Coastguard Worker.else 1678*c0909341SAndroid Build Coastguard Worker add r5, r5, r10, lsl #3 // grain_lut += 8 * grain_stride 1679*c0909341SAndroid Build Coastguard Worker add r5, r5, r10 // grain_lut += grain_stride 1680*c0909341SAndroid Build Coastguard Worker.endif 1681*c0909341SAndroid Build Coastguard Worker 1682*c0909341SAndroid Build Coastguard Worker ldr r12, [r8, #8] // offsets[1][0] 1683*c0909341SAndroid Build Coastguard Worker calc_offset r12, r4, r12, \sx, \sy 1684*c0909341SAndroid Build Coastguard Worker add_offset r4, r12, r4, r5, r10 1685*c0909341SAndroid Build Coastguard Worker 1686*c0909341SAndroid Build Coastguard Worker ldr r12, [r8, #4] // offsets[0][1] 1687*c0909341SAndroid Build Coastguard Worker calc_offset r12, lr, r12, \sx, \sy 1688*c0909341SAndroid Build Coastguard Worker add_offset lr, r12, lr, r5, r10 1689*c0909341SAndroid Build Coastguard Worker 1690*c0909341SAndroid Build Coastguard Worker ldr r12, [r8, #12] // offsets[1][1] 1691*c0909341SAndroid Build Coastguard Worker calc_offset r12, r11, r12, \sx, \sy 1692*c0909341SAndroid Build Coastguard Worker add_offset r11, r12, r11, r5, r10 1693*c0909341SAndroid Build Coastguard Worker 1694*c0909341SAndroid Build Coastguard Worker ldr r8, [r8] // offsets[0][0] 1695*c0909341SAndroid Build Coastguard Worker calc_offset r8, r12, r8, \sx, \sy 1696*c0909341SAndroid Build Coastguard Worker add_offset r5, r8, r12, r5, r10 1697*c0909341SAndroid Build Coastguard Worker 1698*c0909341SAndroid Build Coastguard Worker add r4, r4, #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx 1699*c0909341SAndroid Build Coastguard Worker add r8, lr, r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by 1700*c0909341SAndroid Build Coastguard Worker add r11, r11, r10, lsl #(5 - \sy) // grain_lut += grain_stride * FG_BLOCK_SIZE * by 1701*c0909341SAndroid Build Coastguard Worker add r11, r11, #(32 >> \sx) // grain_lut += FG_BLOCK_SIZE * bx 1702*c0909341SAndroid Build Coastguard Worker 1703*c0909341SAndroid Build Coastguard Worker movrel_local r12, overlap_coeffs_\sx 1704*c0909341SAndroid Build Coastguard Worker ldr lr, [sp, #132] // type 1705*c0909341SAndroid Build Coastguard Worker 1706*c0909341SAndroid Build Coastguard Worker vld1.8 {d24, d25}, [r12, :128] // overlap_coeffs 1707*c0909341SAndroid Build Coastguard Worker 1708*c0909341SAndroid Build Coastguard Worker movrel_local r12, L(fguv_loop_sx\sx\()_tbl) 1709*c0909341SAndroid Build Coastguard Worker#if CONFIG_THUMB 1710*c0909341SAndroid Build Coastguard Worker // This uses movrel_local instead of adr above, because the target 1711*c0909341SAndroid Build Coastguard Worker // can be out of range for adr. But movrel_local leaves the thumb bit 1712*c0909341SAndroid Build Coastguard Worker // set on COFF (but probably wouldn't if building for thumb on ELF), 1713*c0909341SAndroid Build Coastguard Worker // thus try to clear the bit for robustness. 1714*c0909341SAndroid Build Coastguard Worker bic r12, r12, #1 1715*c0909341SAndroid Build Coastguard Worker#endif 1716*c0909341SAndroid Build Coastguard Worker 1717*c0909341SAndroid Build Coastguard Worker tst lr, #1 1718*c0909341SAndroid Build Coastguard Worker ldr lr, [r12, lr, lsl #2] 1719*c0909341SAndroid Build Coastguard Worker 1720*c0909341SAndroid Build Coastguard Worker add r12, r12, lr 1721*c0909341SAndroid Build Coastguard Worker 1722*c0909341SAndroid Build Coastguard Worker beq 1f 1723*c0909341SAndroid Build Coastguard Worker // y overlap 1724*c0909341SAndroid Build Coastguard Worker sub lr, r9, #(2 >> \sy) // backup remaining h 1725*c0909341SAndroid Build Coastguard Worker mov r9, #(2 >> \sy) 1726*c0909341SAndroid Build Coastguard Worker 1727*c0909341SAndroid Build Coastguard Worker1: 1728*c0909341SAndroid Build Coastguard Worker 1729*c0909341SAndroid Build Coastguard Worker.if \sy 1730*c0909341SAndroid Build Coastguard Worker vmov.i8 d6, #23 1731*c0909341SAndroid Build Coastguard Worker vmov.i8 d7, #22 1732*c0909341SAndroid Build Coastguard Worker.else 1733*c0909341SAndroid Build Coastguard Worker vmov.i8 d6, #27 1734*c0909341SAndroid Build Coastguard Worker vmov.i8 d7, #17 1735*c0909341SAndroid Build Coastguard Worker.endif 1736*c0909341SAndroid Build Coastguard Worker 1737*c0909341SAndroid Build Coastguard Worker.if \sy 1738*c0909341SAndroid Build Coastguard Worker add r7, r7, r7 // luma_stride *= 2 1739*c0909341SAndroid Build Coastguard Worker.endif 1740*c0909341SAndroid Build Coastguard Worker 1741*c0909341SAndroid Build Coastguard Worker bx r12 1742*c0909341SAndroid Build Coastguard Workerendfunc 1743*c0909341SAndroid Build Coastguard Worker.endm 1744*c0909341SAndroid Build Coastguard Worker 1745*c0909341SAndroid Build Coastguard Workerfguv 420, 1, 1 1746*c0909341SAndroid Build Coastguard Workerfguv 422, 1, 0 1747*c0909341SAndroid Build Coastguard Workerfguv 444, 0, 0 1748*c0909341SAndroid Build Coastguard Worker 1749*c0909341SAndroid Build Coastguard Workerfunction fguv_loop_sx0_neon 1750*c0909341SAndroid Build Coastguard WorkerL(fguv_loop_sx0_tbl): 1751*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx0_csfl0_00) - L(fguv_loop_sx0_tbl) + CONFIG_THUMB 1752*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx0_csfl0_01) - L(fguv_loop_sx0_tbl) + CONFIG_THUMB 1753*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx0_csfl0_10) - L(fguv_loop_sx0_tbl) + CONFIG_THUMB 1754*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx0_csfl0_11) - L(fguv_loop_sx0_tbl) + CONFIG_THUMB 1755*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx0_csfl1_00) - L(fguv_loop_sx0_tbl) + CONFIG_THUMB 1756*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx0_csfl1_01) - L(fguv_loop_sx0_tbl) + CONFIG_THUMB 1757*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx0_csfl1_10) - L(fguv_loop_sx0_tbl) + CONFIG_THUMB 1758*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx0_csfl1_11) - L(fguv_loop_sx0_tbl) + CONFIG_THUMB 1759*c0909341SAndroid Build Coastguard Worker 1760*c0909341SAndroid Build Coastguard Worker.macro fguv_loop_sx0 csfl, ox, oy 1761*c0909341SAndroid Build Coastguard WorkerL(fguv_loop_sx0_csfl\csfl\()_\ox\oy): 1762*c0909341SAndroid Build Coastguard Worker.if \oy 1763*c0909341SAndroid Build Coastguard Worker mov r12, lr 1764*c0909341SAndroid Build Coastguard Worker.endif 1765*c0909341SAndroid Build Coastguard Worker1: 1766*c0909341SAndroid Build Coastguard Worker.if \ox 1767*c0909341SAndroid Build Coastguard Worker vld1.8 {d8}, [r4], r10 // grain_lut old 1768*c0909341SAndroid Build Coastguard Worker.endif 1769*c0909341SAndroid Build Coastguard Worker.if \oy 1770*c0909341SAndroid Build Coastguard Worker vld1.8 {q8, q9}, [r8], r10 // grain_lut top 1771*c0909341SAndroid Build Coastguard Worker.endif 1772*c0909341SAndroid Build Coastguard Worker.if \ox && \oy 1773*c0909341SAndroid Build Coastguard Worker vld1.8 {d10}, [r11], r10 // grain_lut top old 1774*c0909341SAndroid Build Coastguard Worker.endif 1775*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r6, :128], r7 // luma 1776*c0909341SAndroid Build Coastguard Worker vld1.8 {q10, q11}, [r5], r10 // grain_lut 1777*c0909341SAndroid Build Coastguard Worker 1778*c0909341SAndroid Build Coastguard Worker.if \ox 1779*c0909341SAndroid Build Coastguard Worker vmull.s8 q4, d8, d24 1780*c0909341SAndroid Build Coastguard Worker vmlal.s8 q4, d20, d25 1781*c0909341SAndroid Build Coastguard Worker.endif 1782*c0909341SAndroid Build Coastguard Worker 1783*c0909341SAndroid Build Coastguard Worker.if \oy 1784*c0909341SAndroid Build Coastguard Worker.if \ox 1785*c0909341SAndroid Build Coastguard Worker vmull.s8 q5, d10, d24 1786*c0909341SAndroid Build Coastguard Worker vmlal.s8 q5, d16, d25 1787*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d20, q4, #5 1788*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d16, q5, #5 1789*c0909341SAndroid Build Coastguard Worker.endif 1790*c0909341SAndroid Build Coastguard Worker 1791*c0909341SAndroid Build Coastguard Worker vmull.s8 q4, d20, d7 1792*c0909341SAndroid Build Coastguard Worker vmull.s8 q5, d21, d7 1793*c0909341SAndroid Build Coastguard Worker vmull.s8 q6, d22, d7 1794*c0909341SAndroid Build Coastguard Worker vmull.s8 q7, d23, d7 1795*c0909341SAndroid Build Coastguard Worker vmlal.s8 q4, d16, d6 1796*c0909341SAndroid Build Coastguard Worker vmlal.s8 q5, d17, d6 1797*c0909341SAndroid Build Coastguard Worker vmlal.s8 q6, d18, d6 1798*c0909341SAndroid Build Coastguard Worker vmlal.s8 q7, d19, d6 1799*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d20, q4, #5 1800*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d21, q5, #5 1801*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d22, q6, #5 1802*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d23, q7, #5 1803*c0909341SAndroid Build Coastguard Worker.elseif \ox 1804*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d20, q4, #5 1805*c0909341SAndroid Build Coastguard Worker.endif 1806*c0909341SAndroid Build Coastguard Worker.if !\csfl 1807*c0909341SAndroid Build Coastguard Worker vld1.8 {q8, q9}, [r1, :128] // src 1808*c0909341SAndroid Build Coastguard Worker vmovl.u8 q4, d0 1809*c0909341SAndroid Build Coastguard Worker vmovl.u8 q5, d1 1810*c0909341SAndroid Build Coastguard Worker vmovl.u8 q6, d2 1811*c0909341SAndroid Build Coastguard Worker vmovl.u8 q7, d3 1812*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d16 1813*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d17 1814*c0909341SAndroid Build Coastguard Worker vmovl.u8 q8, d18 1815*c0909341SAndroid Build Coastguard Worker vmovl.u8 q9, d19 1816*c0909341SAndroid Build Coastguard Worker vmul.i16 q4, q4, d4[0] 1817*c0909341SAndroid Build Coastguard Worker vmul.i16 q5, q5, d4[0] 1818*c0909341SAndroid Build Coastguard Worker vmul.i16 q6, q6, d4[0] 1819*c0909341SAndroid Build Coastguard Worker vmul.i16 q7, q7, d4[0] 1820*c0909341SAndroid Build Coastguard Worker vmul.i16 q0, q0, d4[1] 1821*c0909341SAndroid Build Coastguard Worker vmul.i16 q1, q1, d4[1] 1822*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, d4[1] 1823*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q9, d4[1] 1824*c0909341SAndroid Build Coastguard Worker vqadd.s16 q4, q4, q0 1825*c0909341SAndroid Build Coastguard Worker vqadd.s16 q5, q5, q1 1826*c0909341SAndroid Build Coastguard Worker vqadd.s16 q6, q6, q8 1827*c0909341SAndroid Build Coastguard Worker vqadd.s16 q7, q7, q9 1828*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d4[2] 1829*c0909341SAndroid Build Coastguard Worker vshr.s16 q4, q4, #6 1830*c0909341SAndroid Build Coastguard Worker vshr.s16 q5, q5, #6 1831*c0909341SAndroid Build Coastguard Worker vshr.s16 q6, q6, #6 1832*c0909341SAndroid Build Coastguard Worker vshr.s16 q7, q7, #6 1833*c0909341SAndroid Build Coastguard Worker vadd.i16 q4, q4, q0 1834*c0909341SAndroid Build Coastguard Worker vadd.i16 q5, q5, q0 1835*c0909341SAndroid Build Coastguard Worker vadd.i16 q6, q6, q0 1836*c0909341SAndroid Build Coastguard Worker vadd.i16 q7, q7, q0 1837*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d0, q4 1838*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d1, q5 1839*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d2, q6 1840*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d3, q7 1841*c0909341SAndroid Build Coastguard Worker.endif 1842*c0909341SAndroid Build Coastguard Worker 1843*c0909341SAndroid Build Coastguard Worker bl gather32_neon 1844*c0909341SAndroid Build Coastguard Worker 1845*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r1, :128], r2 // src 1846*c0909341SAndroid Build Coastguard Worker 1847*c0909341SAndroid Build Coastguard Worker vmovl.s8 q8, d20 // grain 1848*c0909341SAndroid Build Coastguard Worker vmovl.s8 q9, d21 1849*c0909341SAndroid Build Coastguard Worker vmovl.s8 q10, d22 1850*c0909341SAndroid Build Coastguard Worker vmovl.s8 q11, d23 1851*c0909341SAndroid Build Coastguard Worker 1852*c0909341SAndroid Build Coastguard Worker vmovl.u8 q6, d8 // scaling 1853*c0909341SAndroid Build Coastguard Worker vmovl.u8 q7, d9 1854*c0909341SAndroid Build Coastguard Worker vmovl.u8 q4, d10 1855*c0909341SAndroid Build Coastguard Worker vmovl.u8 q5, d11 1856*c0909341SAndroid Build Coastguard Worker 1857*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q6 // scaling * grain 1858*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q9, q7 1859*c0909341SAndroid Build Coastguard Worker vmul.i16 q10, q10, q4 1860*c0909341SAndroid Build Coastguard Worker vmul.i16 q11, q11, q5 1861*c0909341SAndroid Build Coastguard Worker 1862*c0909341SAndroid Build Coastguard Worker vrshl.s16 q8, q8, q13 // round2(scaling * grain, scaling_shift) 1863*c0909341SAndroid Build Coastguard Worker vrshl.s16 q9, q9, q13 1864*c0909341SAndroid Build Coastguard Worker vrshl.s16 q10, q10, q13 1865*c0909341SAndroid Build Coastguard Worker vrshl.s16 q11, q11, q13 1866*c0909341SAndroid Build Coastguard Worker 1867*c0909341SAndroid Build Coastguard Worker vaddw.u8 q8, q8, d0 // *src + noise 1868*c0909341SAndroid Build Coastguard Worker vaddw.u8 q9, q9, d1 1869*c0909341SAndroid Build Coastguard Worker vaddw.u8 q10, q10, d2 1870*c0909341SAndroid Build Coastguard Worker vaddw.u8 q11, q11, d3 1871*c0909341SAndroid Build Coastguard Worker 1872*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d0, q8 1873*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d1, q9 1874*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d2, q10 1875*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d3, q11 1876*c0909341SAndroid Build Coastguard Worker 1877*c0909341SAndroid Build Coastguard Worker vmax.u8 q0, q0, q14 1878*c0909341SAndroid Build Coastguard Worker vmax.u8 q1, q1, q14 1879*c0909341SAndroid Build Coastguard Worker vmin.u8 q0, q0, q15 1880*c0909341SAndroid Build Coastguard Worker vmin.u8 q1, q1, q15 1881*c0909341SAndroid Build Coastguard Worker 1882*c0909341SAndroid Build Coastguard Worker subs r9, r9, #1 1883*c0909341SAndroid Build Coastguard Worker.if \oy 1884*c0909341SAndroid Build Coastguard Worker vdup.8 d6, d25[0] 1885*c0909341SAndroid Build Coastguard Worker vdup.8 d7, d25[1] 1886*c0909341SAndroid Build Coastguard Worker.endif 1887*c0909341SAndroid Build Coastguard Worker 1888*c0909341SAndroid Build Coastguard Worker vst1.8 {q0, q1}, [r0, :128], r2 // dst 1889*c0909341SAndroid Build Coastguard Worker bgt 1b 1890*c0909341SAndroid Build Coastguard Worker 1891*c0909341SAndroid Build Coastguard Worker.if \oy 1892*c0909341SAndroid Build Coastguard Worker cmp r12, #0 1893*c0909341SAndroid Build Coastguard Worker mov r9, r12 // restore actual remaining h 1894*c0909341SAndroid Build Coastguard Worker bgt L(fguv_loop_sx0_csfl\csfl\()_\ox\()0) 1895*c0909341SAndroid Build Coastguard Worker.endif 1896*c0909341SAndroid Build Coastguard Worker b 9f 1897*c0909341SAndroid Build Coastguard Worker.endm 1898*c0909341SAndroid Build Coastguard Worker fguv_loop_sx0 0, 0, 0 1899*c0909341SAndroid Build Coastguard Worker fguv_loop_sx0 0, 0, 1 1900*c0909341SAndroid Build Coastguard Worker fguv_loop_sx0 0, 1, 0 1901*c0909341SAndroid Build Coastguard Worker fguv_loop_sx0 0, 1, 1 1902*c0909341SAndroid Build Coastguard Worker fguv_loop_sx0 1, 0, 0 1903*c0909341SAndroid Build Coastguard Worker fguv_loop_sx0 1, 0, 1 1904*c0909341SAndroid Build Coastguard Worker fguv_loop_sx0 1, 1, 0 1905*c0909341SAndroid Build Coastguard Worker fguv_loop_sx0 1, 1, 1 1906*c0909341SAndroid Build Coastguard Worker 1907*c0909341SAndroid Build Coastguard Worker9: 1908*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 1909*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 1910*c0909341SAndroid Build Coastguard Workerendfunc 1911*c0909341SAndroid Build Coastguard Worker 1912*c0909341SAndroid Build Coastguard Workerfunction fguv_loop_sx1_neon 1913*c0909341SAndroid Build Coastguard WorkerL(fguv_loop_sx1_tbl): 1914*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx1_csfl0_00) - L(fguv_loop_sx1_tbl) + CONFIG_THUMB 1915*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx1_csfl0_01) - L(fguv_loop_sx1_tbl) + CONFIG_THUMB 1916*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx1_csfl0_10) - L(fguv_loop_sx1_tbl) + CONFIG_THUMB 1917*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx1_csfl0_11) - L(fguv_loop_sx1_tbl) + CONFIG_THUMB 1918*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx1_csfl1_00) - L(fguv_loop_sx1_tbl) + CONFIG_THUMB 1919*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx1_csfl1_01) - L(fguv_loop_sx1_tbl) + CONFIG_THUMB 1920*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx1_csfl1_10) - L(fguv_loop_sx1_tbl) + CONFIG_THUMB 1921*c0909341SAndroid Build Coastguard Worker .word L(fguv_loop_sx1_csfl1_11) - L(fguv_loop_sx1_tbl) + CONFIG_THUMB 1922*c0909341SAndroid Build Coastguard Worker 1923*c0909341SAndroid Build Coastguard Worker.macro fguv_loop_sx1 csfl, ox, oy 1924*c0909341SAndroid Build Coastguard WorkerL(fguv_loop_sx1_csfl\csfl\()_\ox\oy): 1925*c0909341SAndroid Build Coastguard Worker.if \oy 1926*c0909341SAndroid Build Coastguard Worker mov r12, lr 1927*c0909341SAndroid Build Coastguard Worker.endif 1928*c0909341SAndroid Build Coastguard Worker1: 1929*c0909341SAndroid Build Coastguard Worker.if \ox 1930*c0909341SAndroid Build Coastguard Worker vld1.8 {d8}, [r4], r10 // grain_lut old 1931*c0909341SAndroid Build Coastguard Worker.endif 1932*c0909341SAndroid Build Coastguard Worker.if \oy 1933*c0909341SAndroid Build Coastguard Worker vld1.8 {q8}, [r8], r10 // grain_lut top 1934*c0909341SAndroid Build Coastguard Worker.endif 1935*c0909341SAndroid Build Coastguard Worker.if \ox && \oy 1936*c0909341SAndroid Build Coastguard Worker vld1.8 {d10}, [r11], r10 // grain_lut top old 1937*c0909341SAndroid Build Coastguard Worker.endif 1938*c0909341SAndroid Build Coastguard Worker vld1.8 {q0, q1}, [r6, :128], r7 // luma 1939*c0909341SAndroid Build Coastguard Worker vld1.8 {q10}, [r5], r10 // grain_lut 1940*c0909341SAndroid Build Coastguard Worker vld1.8 {q11}, [r1, :128], r2 // src 1941*c0909341SAndroid Build Coastguard Worker 1942*c0909341SAndroid Build Coastguard Worker.if \ox 1943*c0909341SAndroid Build Coastguard Worker vmull.s8 q4, d8, d24 1944*c0909341SAndroid Build Coastguard Worker vmlal.s8 q4, d20, d25 1945*c0909341SAndroid Build Coastguard Worker.endif 1946*c0909341SAndroid Build Coastguard Worker 1947*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q0, q0 1948*c0909341SAndroid Build Coastguard Worker vpaddl.u8 q1, q1 1949*c0909341SAndroid Build Coastguard Worker.if \oy 1950*c0909341SAndroid Build Coastguard Worker.if \ox 1951*c0909341SAndroid Build Coastguard Worker vmull.s8 q5, d10, d24 1952*c0909341SAndroid Build Coastguard Worker vmlal.s8 q5, d16, d25 1953*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d20, q4, #5 1954*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d16, q5, #5 1955*c0909341SAndroid Build Coastguard Worker.endif 1956*c0909341SAndroid Build Coastguard Worker 1957*c0909341SAndroid Build Coastguard Worker vmull.s8 q4, d20, d7 1958*c0909341SAndroid Build Coastguard Worker vmull.s8 q5, d21, d7 1959*c0909341SAndroid Build Coastguard Worker vmlal.s8 q4, d16, d6 1960*c0909341SAndroid Build Coastguard Worker vmlal.s8 q5, d17, d6 1961*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d20, q4, #5 1962*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d21, q5, #5 1963*c0909341SAndroid Build Coastguard Worker.elseif \ox 1964*c0909341SAndroid Build Coastguard Worker vqrshrn.s16 d20, q4, #5 1965*c0909341SAndroid Build Coastguard Worker.endif 1966*c0909341SAndroid Build Coastguard Worker.if \csfl 1967*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d0, q0, #1 1968*c0909341SAndroid Build Coastguard Worker vrshrn.u16 d1, q1, #1 1969*c0909341SAndroid Build Coastguard Worker.else 1970*c0909341SAndroid Build Coastguard Worker vrshr.u16 q4, q0, #1 1971*c0909341SAndroid Build Coastguard Worker vrshr.u16 q5, q1, #1 1972*c0909341SAndroid Build Coastguard Worker vmovl.u8 q0, d22 1973*c0909341SAndroid Build Coastguard Worker vmovl.u8 q1, d23 1974*c0909341SAndroid Build Coastguard Worker vmul.i16 q4, q4, d4[0] 1975*c0909341SAndroid Build Coastguard Worker vmul.i16 q5, q5, d4[0] 1976*c0909341SAndroid Build Coastguard Worker vmul.i16 q0, q0, d4[1] 1977*c0909341SAndroid Build Coastguard Worker vmul.i16 q1, q1, d4[1] 1978*c0909341SAndroid Build Coastguard Worker vqadd.s16 q4, q4, q0 1979*c0909341SAndroid Build Coastguard Worker vqadd.s16 q5, q5, q1 1980*c0909341SAndroid Build Coastguard Worker vdup.16 q0, d4[2] 1981*c0909341SAndroid Build Coastguard Worker vshr.s16 q4, q4, #6 1982*c0909341SAndroid Build Coastguard Worker vshr.s16 q5, q5, #6 1983*c0909341SAndroid Build Coastguard Worker vadd.i16 q4, q4, q0 1984*c0909341SAndroid Build Coastguard Worker vadd.i16 q5, q5, q0 1985*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d0, q4 1986*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d1, q5 1987*c0909341SAndroid Build Coastguard Worker.endif 1988*c0909341SAndroid Build Coastguard Worker 1989*c0909341SAndroid Build Coastguard Worker bl gather16_neon 1990*c0909341SAndroid Build Coastguard Worker 1991*c0909341SAndroid Build Coastguard Worker vmovl.s8 q8, d20 // grain 1992*c0909341SAndroid Build Coastguard Worker vmovl.s8 q9, d21 1993*c0909341SAndroid Build Coastguard Worker 1994*c0909341SAndroid Build Coastguard Worker vmovl.u8 q6, d8 // scaling 1995*c0909341SAndroid Build Coastguard Worker vmovl.u8 q7, d9 1996*c0909341SAndroid Build Coastguard Worker 1997*c0909341SAndroid Build Coastguard Worker vmul.i16 q8, q8, q6 // scaling * grain 1998*c0909341SAndroid Build Coastguard Worker vmul.i16 q9, q9, q7 1999*c0909341SAndroid Build Coastguard Worker 2000*c0909341SAndroid Build Coastguard Worker vrshl.s16 q8, q8, q13 // round2(scaling * grain, scaling_shift) 2001*c0909341SAndroid Build Coastguard Worker vrshl.s16 q9, q9, q13 2002*c0909341SAndroid Build Coastguard Worker 2003*c0909341SAndroid Build Coastguard Worker vaddw.u8 q8, q8, d22 // *src + noise 2004*c0909341SAndroid Build Coastguard Worker vaddw.u8 q9, q9, d23 2005*c0909341SAndroid Build Coastguard Worker 2006*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d0, q8 2007*c0909341SAndroid Build Coastguard Worker vqmovun.s16 d1, q9 2008*c0909341SAndroid Build Coastguard Worker 2009*c0909341SAndroid Build Coastguard Worker vmax.u8 q0, q0, q14 2010*c0909341SAndroid Build Coastguard Worker vmin.u8 q0, q0, q15 2011*c0909341SAndroid Build Coastguard Worker 2012*c0909341SAndroid Build Coastguard Worker subs r9, r9, #1 2013*c0909341SAndroid Build Coastguard Worker.if \oy 2014*c0909341SAndroid Build Coastguard Worker vswp d6, d7 2015*c0909341SAndroid Build Coastguard Worker.endif 2016*c0909341SAndroid Build Coastguard Worker vst1.8 {q0}, [r0, :128], r2 // dst 2017*c0909341SAndroid Build Coastguard Worker bgt 1b 2018*c0909341SAndroid Build Coastguard Worker 2019*c0909341SAndroid Build Coastguard Worker.if \oy 2020*c0909341SAndroid Build Coastguard Worker cmp r12, #0 2021*c0909341SAndroid Build Coastguard Worker mov r9, r12 // restore actual remaining h 2022*c0909341SAndroid Build Coastguard Worker bgt L(fguv_loop_sx1_csfl\csfl\()_\ox\()0) 2023*c0909341SAndroid Build Coastguard Worker.endif 2024*c0909341SAndroid Build Coastguard Worker 2025*c0909341SAndroid Build Coastguard Worker b 9f 2026*c0909341SAndroid Build Coastguard Worker.endm 2027*c0909341SAndroid Build Coastguard Worker fguv_loop_sx1 0, 0, 0 2028*c0909341SAndroid Build Coastguard Worker fguv_loop_sx1 0, 0, 1 2029*c0909341SAndroid Build Coastguard Worker fguv_loop_sx1 0, 1, 0 2030*c0909341SAndroid Build Coastguard Worker fguv_loop_sx1 0, 1, 1 2031*c0909341SAndroid Build Coastguard Worker fguv_loop_sx1 1, 0, 0 2032*c0909341SAndroid Build Coastguard Worker fguv_loop_sx1 1, 0, 1 2033*c0909341SAndroid Build Coastguard Worker fguv_loop_sx1 1, 1, 0 2034*c0909341SAndroid Build Coastguard Worker fguv_loop_sx1 1, 1, 1 2035*c0909341SAndroid Build Coastguard Worker 2036*c0909341SAndroid Build Coastguard Worker9: 2037*c0909341SAndroid Build Coastguard Worker vpop {q4-q7} 2038*c0909341SAndroid Build Coastguard Worker pop {r4-r11,pc} 2039*c0909341SAndroid Build Coastguard Workerendfunc 2040