1*e1eccf28SAndroid Build Coastguard Worker/* 2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project 3*e1eccf28SAndroid Build Coastguard Worker * 4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*e1eccf28SAndroid Build Coastguard Worker * 8*e1eccf28SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*e1eccf28SAndroid Build Coastguard Worker * 10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License. 15*e1eccf28SAndroid Build Coastguard Worker */ 16*e1eccf28SAndroid Build Coastguard Worker 17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart 18*e1eccf28SAndroid Build Coastguard Worker#define END(f) .fnend; .size f, .-f; 19*e1eccf28SAndroid Build Coastguard Worker 20*e1eccf28SAndroid Build Coastguard Worker.eabi_attribute 25,1 @Tag_ABI_align8_preserved 21*e1eccf28SAndroid Build Coastguard Worker.arm 22*e1eccf28SAndroid Build Coastguard Worker 23*e1eccf28SAndroid Build Coastguard Worker.macro lanepair dst, src, xr0, xr1, yr0, yr1, zr0, zr1 24*e1eccf28SAndroid Build Coastguard Worker 25*e1eccf28SAndroid Build Coastguard Worker vmov r6, r7, \src 26*e1eccf28SAndroid Build Coastguard Worker 27*e1eccf28SAndroid Build Coastguard Worker add r6, r6, r3 28*e1eccf28SAndroid Build Coastguard Worker add r7, r7, r3 29*e1eccf28SAndroid Build Coastguard Worker 30*e1eccf28SAndroid Build Coastguard Worker vld1.u8 d16, [r6], r4 31*e1eccf28SAndroid Build Coastguard Worker vld1.u8 d17, [r7], r4 32*e1eccf28SAndroid Build Coastguard Worker 33*e1eccf28SAndroid Build Coastguard Worker vld1.u8 d18, [r6], r5 34*e1eccf28SAndroid Build Coastguard Worker vld1.u8 d19, [r7], r5 35*e1eccf28SAndroid Build Coastguard Worker 36*e1eccf28SAndroid Build Coastguard Worker vdup.u8 d6, \yr0 37*e1eccf28SAndroid Build Coastguard Worker vdup.u8 d7, \yr1 38*e1eccf28SAndroid Build Coastguard Worker /* Y interpolate, front, lanes 0 and 1 -> q12 and q13 */ 39*e1eccf28SAndroid Build Coastguard Worker vshll.u8 q12, d16, #8 40*e1eccf28SAndroid Build Coastguard Worker vshll.u8 q13, d17, #8 41*e1eccf28SAndroid Build Coastguard Worker vmlsl.u8 q12, d16, d6 42*e1eccf28SAndroid Build Coastguard Worker vmlsl.u8 q13, d17, d7 43*e1eccf28SAndroid Build Coastguard Worker vmlal.u8 q12, d18, d6 44*e1eccf28SAndroid Build Coastguard Worker vmlal.u8 q13, d19, d7 45*e1eccf28SAndroid Build Coastguard Worker 46*e1eccf28SAndroid Build Coastguard Worker vld1.u8 d18, [r6] 47*e1eccf28SAndroid Build Coastguard Worker vld1.u8 d19, [r7] 48*e1eccf28SAndroid Build Coastguard Worker 49*e1eccf28SAndroid Build Coastguard Worker sub r6, r6, r4 50*e1eccf28SAndroid Build Coastguard Worker sub r7, r7, r4 51*e1eccf28SAndroid Build Coastguard Worker 52*e1eccf28SAndroid Build Coastguard Worker vld1.u8 d16, [r6] 53*e1eccf28SAndroid Build Coastguard Worker vld1.u8 d17, [r7] 54*e1eccf28SAndroid Build Coastguard Worker 55*e1eccf28SAndroid Build Coastguard Worker /* Y interpolate, rear, lanes 0 and 1 -> q14 and q15 */ 56*e1eccf28SAndroid Build Coastguard Worker vshll.u8 q14, d16, #8 57*e1eccf28SAndroid Build Coastguard Worker vshll.u8 q15, d17, #8 58*e1eccf28SAndroid Build Coastguard Worker vmlsl.u8 q14, d16, d6 59*e1eccf28SAndroid Build Coastguard Worker vmlsl.u8 q15, d17, d7 60*e1eccf28SAndroid Build Coastguard Worker vmlal.u8 q14, d18, d6 61*e1eccf28SAndroid Build Coastguard Worker vmlal.u8 q15, d19, d7 62*e1eccf28SAndroid Build Coastguard Worker 63*e1eccf28SAndroid Build Coastguard Worker /* Z interpolate, lane 0 q12/q14 -> q10 */ 64*e1eccf28SAndroid Build Coastguard Worker vshll.u16 q8, d24, #8 65*e1eccf28SAndroid Build Coastguard Worker vshll.u16 q9, d25, #8 66*e1eccf28SAndroid Build Coastguard Worker vmlsl.u16 q8, d24, \zr0 67*e1eccf28SAndroid Build Coastguard Worker vmlsl.u16 q9, d25, \zr0 68*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q8, d28, \zr0 69*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q9, d29, \zr0 70*e1eccf28SAndroid Build Coastguard Worker vrshrn.u32 d20, q8, #8 71*e1eccf28SAndroid Build Coastguard Worker vrshrn.u32 d21, q9, #8 72*e1eccf28SAndroid Build Coastguard Worker 73*e1eccf28SAndroid Build Coastguard Worker /* Z interpolate, lane 1 q13/q15 -> q11 */ 74*e1eccf28SAndroid Build Coastguard Worker vshll.u16 q8, d26, #8 75*e1eccf28SAndroid Build Coastguard Worker vshll.u16 q9, d27, #8 76*e1eccf28SAndroid Build Coastguard Worker vmlsl.u16 q8, d26, \zr1 77*e1eccf28SAndroid Build Coastguard Worker vmlsl.u16 q9, d27, \zr1 78*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q8, d30, \zr1 79*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q9, d31, \zr1 80*e1eccf28SAndroid Build Coastguard Worker vrshrn.u32 d22, q8, #8 81*e1eccf28SAndroid Build Coastguard Worker vrshrn.u32 d23, q9, #8 82*e1eccf28SAndroid Build Coastguard Worker 83*e1eccf28SAndroid Build Coastguard Worker /* X interpolate, lanes 0 and 1 q10,q11 -> q14 */ 84*e1eccf28SAndroid Build Coastguard Worker vshll.u16 q8, d20, #8 85*e1eccf28SAndroid Build Coastguard Worker vshll.u16 q9, d22, #8 86*e1eccf28SAndroid Build Coastguard Worker vmlsl.u16 q8, d20, \xr0 87*e1eccf28SAndroid Build Coastguard Worker vmlsl.u16 q9, d22, \xr1 88*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q8, d21, \xr0 89*e1eccf28SAndroid Build Coastguard Worker vmlal.u16 q9, d23, \xr1 90*e1eccf28SAndroid Build Coastguard Worker vshrn.u32 d28, q8, #8 91*e1eccf28SAndroid Build Coastguard Worker vshrn.u32 d29, q9, #8 92*e1eccf28SAndroid Build Coastguard Worker 93*e1eccf28SAndroid Build Coastguard Worker /* pack lanes 0-1 -> d12 */ 94*e1eccf28SAndroid Build Coastguard Worker vqrshrn.u16 \dst, q14, #8 95*e1eccf28SAndroid Build Coastguard Worker.endm 96*e1eccf28SAndroid Build Coastguard Worker 97*e1eccf28SAndroid Build Coastguard Worker/* void rsdIntrinsic3DLUT_K( 98*e1eccf28SAndroid Build Coastguard Worker * void *dst, // r0 99*e1eccf28SAndroid Build Coastguard Worker * void const *in, // r1 100*e1eccf28SAndroid Build Coastguard Worker * size_t count, // r2 101*e1eccf28SAndroid Build Coastguard Worker * void const *lut, // r3 102*e1eccf28SAndroid Build Coastguard Worker * int32_t pitchy, // [sp] 103*e1eccf28SAndroid Build Coastguard Worker * int32_t pitchz, // [sp+#4] 104*e1eccf28SAndroid Build Coastguard Worker * int dimx, // [sp+#8] 105*e1eccf28SAndroid Build Coastguard Worker * int dimy, // [sp+#12] 106*e1eccf28SAndroid Build Coastguard Worker * int dimz); // [sp+#16] 107*e1eccf28SAndroid Build Coastguard Worker */ 108*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsic3DLUT_K) 109*e1eccf28SAndroid Build Coastguard Worker push {r4,r5,r6,r7} 110*e1eccf28SAndroid Build Coastguard Worker ldr r4, [sp, #16] 111*e1eccf28SAndroid Build Coastguard Worker ldr r5, [sp, #20] 112*e1eccf28SAndroid Build Coastguard Worker ldr r6, [sp, #24] 113*e1eccf28SAndroid Build Coastguard Worker ldr r7, [sp, #28] 114*e1eccf28SAndroid Build Coastguard Worker ldr r12, [sp, #32] 115*e1eccf28SAndroid Build Coastguard Worker vpush {d8-d15} 116*e1eccf28SAndroid Build Coastguard Worker 117*e1eccf28SAndroid Build Coastguard Worker vmov.u8 d8, #1 118*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d8[0], r6 119*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d8[1], r7 120*e1eccf28SAndroid Build Coastguard Worker vmov.u16 d8[2], r12 121*e1eccf28SAndroid Build Coastguard Worker vmov d9, r4, r5 122*e1eccf28SAndroid Build Coastguard Worker 123*e1eccf28SAndroid Build Coastguard Worker subs r2, #8 124*e1eccf28SAndroid Build Coastguard Worker bge 2f 125*e1eccf28SAndroid Build Coastguard Worker cmp r2, #-8 126*e1eccf28SAndroid Build Coastguard Worker ble 9f 127*e1eccf28SAndroid Build Coastguard Worker b 4f 128*e1eccf28SAndroid Build Coastguard Worker 129*e1eccf28SAndroid Build Coastguard Worker .align 6 130*e1eccf28SAndroid Build Coastguard Worker1: vst4.u8 {d12,d13,d14,d15}, [r0]! 131*e1eccf28SAndroid Build Coastguard Worker/* r0 = dst 132*e1eccf28SAndroid Build Coastguard Worker * r1 = src 133*e1eccf28SAndroid Build Coastguard Worker * r2 = count 134*e1eccf28SAndroid Build Coastguard Worker * r3 = lut 135*e1eccf28SAndroid Build Coastguard Worker * r4 = pitchy 136*e1eccf28SAndroid Build Coastguard Worker * r5 = pitchz 137*e1eccf28SAndroid Build Coastguard Worker * r6 = offset0 138*e1eccf28SAndroid Build Coastguard Worker * r7 = offset1 139*e1eccf28SAndroid Build Coastguard Worker */ 140*e1eccf28SAndroid Build Coastguard Worker2: vld4.u8 {d0,d2,d4,d6}, [r1]! 141*e1eccf28SAndroid Build Coastguard Worker3: vmov d10, d6 142*e1eccf28SAndroid Build Coastguard Worker/* q0,q1,q2,q5 source data 143*e1eccf28SAndroid Build Coastguard Worker * q4 dimensions and pitches 144*e1eccf28SAndroid Build Coastguard Worker * q3, scratch register for scalar access 145*e1eccf28SAndroid Build Coastguard Worker */ 146*e1eccf28SAndroid Build Coastguard Worker vmov q3, q4 147*e1eccf28SAndroid Build Coastguard Worker vmovl.u8 q0, d0 148*e1eccf28SAndroid Build Coastguard Worker vmovl.u8 q1, d2 149*e1eccf28SAndroid Build Coastguard Worker vmovl.u8 q2, d4 150*e1eccf28SAndroid Build Coastguard Worker vmul.u16 q0, q0, d6[0] 151*e1eccf28SAndroid Build Coastguard Worker vmul.u16 q1, q1, d6[1] 152*e1eccf28SAndroid Build Coastguard Worker vmul.u16 q2, q2, d6[2] 153*e1eccf28SAndroid Build Coastguard Worker 154*e1eccf28SAndroid Build Coastguard Worker/* vrsra.u16 below would be more accurate, but this can result in a dim.0 case 155*e1eccf28SAndroid Build Coastguard Worker * where we try to read from the limit of the array and the limit +1 to 156*e1eccf28SAndroid Build Coastguard Worker * interpolate, even though the fractional component is zero. Strictly this is 157*e1eccf28SAndroid Build Coastguard Worker * correct, except for the llegal access problem. 158*e1eccf28SAndroid Build Coastguard Worker */ 159*e1eccf28SAndroid Build Coastguard Worker vsra.u16 q0, q0, #8 160*e1eccf28SAndroid Build Coastguard Worker vsra.u16 q1, q1, #8 161*e1eccf28SAndroid Build Coastguard Worker vsra.u16 q2, q2, #8 162*e1eccf28SAndroid Build Coastguard Worker 163*e1eccf28SAndroid Build Coastguard Worker vshr.u16 q12, q0, #8 164*e1eccf28SAndroid Build Coastguard Worker vshr.u16 q13, q1, #8 165*e1eccf28SAndroid Build Coastguard Worker vshr.u16 q14, q2, #8 166*e1eccf28SAndroid Build Coastguard Worker 167*e1eccf28SAndroid Build Coastguard Worker vbic.u16 q0, #0xff00 168*e1eccf28SAndroid Build Coastguard Worker vmovn.u16 d2, q1 169*e1eccf28SAndroid Build Coastguard Worker vbic.u16 q2, #0xff00 170*e1eccf28SAndroid Build Coastguard Worker 171*e1eccf28SAndroid Build Coastguard Worker/* q0,d2,q2 fractional offset 172*e1eccf28SAndroid Build Coastguard Worker * q12,q13,q14 integer offset 173*e1eccf28SAndroid Build Coastguard Worker */ 174*e1eccf28SAndroid Build Coastguard Worker 175*e1eccf28SAndroid Build Coastguard Worker vshll.u16 q6, d24, #2 176*e1eccf28SAndroid Build Coastguard Worker vshll.u16 q7, d25, #2 177*e1eccf28SAndroid Build Coastguard Worker vmovl.u16 q8, d26 178*e1eccf28SAndroid Build Coastguard Worker vmovl.u16 q9, d27 179*e1eccf28SAndroid Build Coastguard Worker vmovl.u16 q10, d28 180*e1eccf28SAndroid Build Coastguard Worker vmovl.u16 q11, d29 181*e1eccf28SAndroid Build Coastguard Worker vmla.s32 q6, q8, d9[0] 182*e1eccf28SAndroid Build Coastguard Worker vmla.s32 q7, q9, d9[0] 183*e1eccf28SAndroid Build Coastguard Worker vmla.s32 q6, q10, d9[1] 184*e1eccf28SAndroid Build Coastguard Worker vmla.s32 q7, q11, d9[1] 185*e1eccf28SAndroid Build Coastguard Worker 186*e1eccf28SAndroid Build Coastguard Worker/* q6,q7 list of table offsets */ 187*e1eccf28SAndroid Build Coastguard Worker 188*e1eccf28SAndroid Build Coastguard Worker /* lanes 0 and 1 */ 189*e1eccf28SAndroid Build Coastguard Worker lanepair dst=d12, src=d12, xr0=d0[0], xr1=d0[1], yr0=d2[0], yr1=d2[1], zr0=d4[0], zr1=d4[1] 190*e1eccf28SAndroid Build Coastguard Worker 191*e1eccf28SAndroid Build Coastguard Worker /* lanes 2 and 3 */ 192*e1eccf28SAndroid Build Coastguard Worker lanepair dst=d13, src=d13, xr0=d0[2], xr1=d0[3], yr0=d2[2], yr1=d2[3], zr0=d4[2], zr1=d4[3] 193*e1eccf28SAndroid Build Coastguard Worker 194*e1eccf28SAndroid Build Coastguard Worker /* lanes 4 and 5 */ 195*e1eccf28SAndroid Build Coastguard Worker lanepair dst=d14, src=d14, xr0=d1[0], xr1=d1[1], yr0=d2[4], yr1=d2[5], zr0=d5[0], zr1=d5[1] 196*e1eccf28SAndroid Build Coastguard Worker 197*e1eccf28SAndroid Build Coastguard Worker /* lanes 6 and 7 */ 198*e1eccf28SAndroid Build Coastguard Worker lanepair dst=d15, src=d15, xr0=d1[2], xr1=d1[3], yr0=d2[6], yr1=d2[7], zr0=d5[2], zr1=d5[3] 199*e1eccf28SAndroid Build Coastguard Worker 200*e1eccf28SAndroid Build Coastguard Worker vuzp.u8 d12, d13 201*e1eccf28SAndroid Build Coastguard Worker vuzp.u8 d14, d15 202*e1eccf28SAndroid Build Coastguard Worker vuzp.u8 d12, d14 203*e1eccf28SAndroid Build Coastguard Worker vuzp.u8 d13, d15 204*e1eccf28SAndroid Build Coastguard Worker 205*e1eccf28SAndroid Build Coastguard Worker subs r2, r2, #8 206*e1eccf28SAndroid Build Coastguard Worker vmov.u8 d15, d10 207*e1eccf28SAndroid Build Coastguard Worker 208*e1eccf28SAndroid Build Coastguard Worker bge 1b 209*e1eccf28SAndroid Build Coastguard Worker 210*e1eccf28SAndroid Build Coastguard Worker cmp r2, #-8 211*e1eccf28SAndroid Build Coastguard Worker blt 1f 212*e1eccf28SAndroid Build Coastguard Worker 213*e1eccf28SAndroid Build Coastguard Worker vst4.u8 {d12,d13,d14,d15}, [r0]! 214*e1eccf28SAndroid Build Coastguard Worker 215*e1eccf28SAndroid Build Coastguard Worker beq 9f 216*e1eccf28SAndroid Build Coastguard Worker 217*e1eccf28SAndroid Build Coastguard Worker /* fill the vector with a safe value */ 218*e1eccf28SAndroid Build Coastguard Worker4: vld1.u32 {d0[]}, [r1] 219*e1eccf28SAndroid Build Coastguard Worker vmov d2, d0 220*e1eccf28SAndroid Build Coastguard Worker vmov d4, d0 221*e1eccf28SAndroid Build Coastguard Worker vmov d6, d0 222*e1eccf28SAndroid Build Coastguard Worker tst r2, #4 223*e1eccf28SAndroid Build Coastguard Worker beq 2f 224*e1eccf28SAndroid Build Coastguard Worker vld1.u32 {d0}, [r1]! 225*e1eccf28SAndroid Build Coastguard Worker vld1.u32 {d2}, [r1]! 226*e1eccf28SAndroid Build Coastguard Worker2: tst r2, #2 227*e1eccf28SAndroid Build Coastguard Worker beq 2f 228*e1eccf28SAndroid Build Coastguard Worker vld1.u32 {d4}, [r1]! 229*e1eccf28SAndroid Build Coastguard Worker2: tst r2, #1 230*e1eccf28SAndroid Build Coastguard Worker beq 2f 231*e1eccf28SAndroid Build Coastguard Worker vld1.u32 {d6[0]}, [r1]! 232*e1eccf28SAndroid Build Coastguard Worker2: vuzp.8 d0, d2 233*e1eccf28SAndroid Build Coastguard Worker vuzp.8 d4, d6 234*e1eccf28SAndroid Build Coastguard Worker vuzp.8 d0, d4 235*e1eccf28SAndroid Build Coastguard Worker vuzp.8 d2, d6 236*e1eccf28SAndroid Build Coastguard Worker b 3b 237*e1eccf28SAndroid Build Coastguard Worker 238*e1eccf28SAndroid Build Coastguard Worker1: vzip.8 d12, d14 239*e1eccf28SAndroid Build Coastguard Worker vzip.8 d13, d15 240*e1eccf28SAndroid Build Coastguard Worker vzip.8 d12, d13 241*e1eccf28SAndroid Build Coastguard Worker vzip.8 d14, d15 242*e1eccf28SAndroid Build Coastguard Worker tst r2, #4 243*e1eccf28SAndroid Build Coastguard Worker beq 2f 244*e1eccf28SAndroid Build Coastguard Worker vst1.u32 {d12,d13}, [r0]! 245*e1eccf28SAndroid Build Coastguard Worker2: tst r2, #2 246*e1eccf28SAndroid Build Coastguard Worker beq 2f 247*e1eccf28SAndroid Build Coastguard Worker vst1.u32 {d14}, [r0]! 248*e1eccf28SAndroid Build Coastguard Worker2: tst r2, #1 249*e1eccf28SAndroid Build Coastguard Worker beq 9f 250*e1eccf28SAndroid Build Coastguard Worker vst1.u32 {d15[0]}, [r0]! 251*e1eccf28SAndroid Build Coastguard Worker 252*e1eccf28SAndroid Build Coastguard Worker9: mov r0, #0 253*e1eccf28SAndroid Build Coastguard Worker vpop {d8-d15} 254*e1eccf28SAndroid Build Coastguard Worker pop {r4,r5,r6,r7} 255*e1eccf28SAndroid Build Coastguard Worker bx lr 256*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsic3DLUT_K) 257