1*e1eccf28SAndroid Build Coastguard Worker/* 2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project 3*e1eccf28SAndroid Build Coastguard Worker * 4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License"); 5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License. 6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at 7*e1eccf28SAndroid Build Coastguard Worker * 8*e1eccf28SAndroid Build Coastguard Worker * http://www.apache.org/licenses/LICENSE-2.0 9*e1eccf28SAndroid Build Coastguard Worker * 10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software 11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS, 12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and 14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License. 15*e1eccf28SAndroid Build Coastguard Worker */ 16*e1eccf28SAndroid Build Coastguard Worker 17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: 18*e1eccf28SAndroid Build Coastguard Worker#define END(f) .size f, .-f; 19*e1eccf28SAndroid Build Coastguard Worker 20*e1eccf28SAndroid Build Coastguard Worker 21*e1eccf28SAndroid Build Coastguard Worker.macro lanepair dst, src0, src1, xr0, xr1, yr0, yr1, zr0, zr1 22*e1eccf28SAndroid Build Coastguard Worker 23*e1eccf28SAndroid Build Coastguard Worker smov x6, \src0 24*e1eccf28SAndroid Build Coastguard Worker smov x7, \src1 25*e1eccf28SAndroid Build Coastguard Worker 26*e1eccf28SAndroid Build Coastguard Worker add x6, x6, x3 27*e1eccf28SAndroid Build Coastguard Worker add x7, x7, x3 28*e1eccf28SAndroid Build Coastguard Worker 29*e1eccf28SAndroid Build Coastguard Worker ld1 {v16.2s}, [x6], x4 30*e1eccf28SAndroid Build Coastguard Worker ld1 {v17.2s}, [x7], x4 31*e1eccf28SAndroid Build Coastguard Worker 32*e1eccf28SAndroid Build Coastguard Worker ld1 {v18.2s}, [x6], x5 33*e1eccf28SAndroid Build Coastguard Worker ld1 {v19.2s}, [x7], x5 34*e1eccf28SAndroid Build Coastguard Worker 35*e1eccf28SAndroid Build Coastguard Worker dup v8.8b, \yr0 36*e1eccf28SAndroid Build Coastguard Worker dup v9.8b, \yr1 37*e1eccf28SAndroid Build Coastguard Worker /* Y interpolate, front, lanes 0 and 1 -> v12 and v13 */ 38*e1eccf28SAndroid Build Coastguard Worker zip1 v12.16b, v5.16b, v16.16b 39*e1eccf28SAndroid Build Coastguard Worker zip1 v13.16b, v5.16b, v17.16b 40*e1eccf28SAndroid Build Coastguard Worker umlsl v12.8h, v16.8b, v8.8b 41*e1eccf28SAndroid Build Coastguard Worker umlsl v13.8h, v17.8b, v9.8b 42*e1eccf28SAndroid Build Coastguard Worker umlal v12.8h, v18.8b, v8.8b 43*e1eccf28SAndroid Build Coastguard Worker umlal v13.8h, v19.8b, v9.8b 44*e1eccf28SAndroid Build Coastguard Worker 45*e1eccf28SAndroid Build Coastguard Worker ld1 {v18.2s}, [x6] 46*e1eccf28SAndroid Build Coastguard Worker ld1 {v19.2s}, [x7] 47*e1eccf28SAndroid Build Coastguard Worker 48*e1eccf28SAndroid Build Coastguard Worker sub x6, x6, x4 49*e1eccf28SAndroid Build Coastguard Worker sub x7, x7, x4 50*e1eccf28SAndroid Build Coastguard Worker 51*e1eccf28SAndroid Build Coastguard Worker ld1 {v16.2s}, [x6] 52*e1eccf28SAndroid Build Coastguard Worker ld1 {v17.2s}, [x7] 53*e1eccf28SAndroid Build Coastguard Worker 54*e1eccf28SAndroid Build Coastguard Worker /* Y interpolate, rear, lanes 0 and 1 -> v14 and v15 */ 55*e1eccf28SAndroid Build Coastguard Worker zip1 v14.16b, v5.16b, v16.16b 56*e1eccf28SAndroid Build Coastguard Worker zip1 v15.16b, v5.16b, v17.16b 57*e1eccf28SAndroid Build Coastguard Worker umlsl v14.8h, v16.8b, v8.8b 58*e1eccf28SAndroid Build Coastguard Worker umlsl v15.8h, v17.8b, v9.8b 59*e1eccf28SAndroid Build Coastguard Worker umlal v14.8h, v18.8b, v8.8b 60*e1eccf28SAndroid Build Coastguard Worker umlal v15.8h, v19.8b, v9.8b 61*e1eccf28SAndroid Build Coastguard Worker 62*e1eccf28SAndroid Build Coastguard Worker /* Z interpolate, lane 0 v12/v14 -> v10 */ 63*e1eccf28SAndroid Build Coastguard Worker ushll v8.4s, v12.4h, #8 64*e1eccf28SAndroid Build Coastguard Worker ushll2 v9.4s, v12.8h, #8 65*e1eccf28SAndroid Build Coastguard Worker umlsl v8.4s, v12.4h, \zr0 66*e1eccf28SAndroid Build Coastguard Worker umlsl2 v9.4s, v12.8h, \zr0 67*e1eccf28SAndroid Build Coastguard Worker umlal v8.4s, v14.4h, \zr0 68*e1eccf28SAndroid Build Coastguard Worker umlal2 v9.4s, v14.8h, \zr0 69*e1eccf28SAndroid Build Coastguard Worker rshrn v10.4h, v8.4s, #8 70*e1eccf28SAndroid Build Coastguard Worker rshrn2 v10.8h, v9.4s, #8 71*e1eccf28SAndroid Build Coastguard Worker 72*e1eccf28SAndroid Build Coastguard Worker /* Z interpolate, lane 1 v13/v15 -> v11 */ 73*e1eccf28SAndroid Build Coastguard Worker ushll v8.4s, v13.4h, #8 74*e1eccf28SAndroid Build Coastguard Worker ushll2 v9.4s, v13.8h, #8 75*e1eccf28SAndroid Build Coastguard Worker umlsl v8.4s, v13.4h, \zr1 76*e1eccf28SAndroid Build Coastguard Worker umlsl2 v9.4s, v13.8h, \zr1 77*e1eccf28SAndroid Build Coastguard Worker umlal v8.4s, v15.4h, \zr1 78*e1eccf28SAndroid Build Coastguard Worker umlal2 v9.4s, v15.8h, \zr1 79*e1eccf28SAndroid Build Coastguard Worker rshrn v11.4h, v8.4s, #8 80*e1eccf28SAndroid Build Coastguard Worker rshrn2 v11.8h, v9.4s, #8 81*e1eccf28SAndroid Build Coastguard Worker 82*e1eccf28SAndroid Build Coastguard Worker /* X interpolate, lanes 0 and 1 v10,v11 -> v14 */ 83*e1eccf28SAndroid Build Coastguard Worker ushll v8.4s, v10.4h, #8 84*e1eccf28SAndroid Build Coastguard Worker ushll v9.4s, v11.4h, #8 85*e1eccf28SAndroid Build Coastguard Worker umlsl v8.4s, v10.4h, \xr0 86*e1eccf28SAndroid Build Coastguard Worker umlsl v9.4s, v11.4h, \xr1 87*e1eccf28SAndroid Build Coastguard Worker umlal2 v8.4s, v10.8h, \xr0 88*e1eccf28SAndroid Build Coastguard Worker umlal2 v9.4s, v11.8h, \xr1 89*e1eccf28SAndroid Build Coastguard Worker shrn v14.4h, v8.4s, #8 90*e1eccf28SAndroid Build Coastguard Worker shrn2 v14.8h, v9.4s, #8 91*e1eccf28SAndroid Build Coastguard Worker 92*e1eccf28SAndroid Build Coastguard Worker /* pack lanes 0-1 -> v6 */ 93*e1eccf28SAndroid Build Coastguard Worker.ifc \dst, v20.16b 94*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 \dst, v14.8h, #8 95*e1eccf28SAndroid Build Coastguard Worker.else ; .ifc \dst, v21.16b 96*e1eccf28SAndroid Build Coastguard Worker uqrshrn2 \dst, v14.8h, #8 97*e1eccf28SAndroid Build Coastguard Worker.else 98*e1eccf28SAndroid Build Coastguard Worker uqrshrn \dst, v14.8h, #8 99*e1eccf28SAndroid Build Coastguard Worker.endif ; .endif 100*e1eccf28SAndroid Build Coastguard Worker.endm 101*e1eccf28SAndroid Build Coastguard Worker 102*e1eccf28SAndroid Build Coastguard Worker/* void rsdIntrinsic3DLUT_K( 103*e1eccf28SAndroid Build Coastguard Worker * void *dst, // x0 104*e1eccf28SAndroid Build Coastguard Worker * void const *in, // x1 105*e1eccf28SAndroid Build Coastguard Worker * size_t count, // x2 106*e1eccf28SAndroid Build Coastguard Worker * void const *lut, // x3 107*e1eccf28SAndroid Build Coastguard Worker * int32_t pitchy, // w4 108*e1eccf28SAndroid Build Coastguard Worker * int32_t pitchz, // w5 109*e1eccf28SAndroid Build Coastguard Worker * int dimx, // w6 110*e1eccf28SAndroid Build Coastguard Worker * int dimy, // w7 111*e1eccf28SAndroid Build Coastguard Worker * int dimz); // [sp] 112*e1eccf28SAndroid Build Coastguard Worker */ 113*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsic3DLUT_K) 114*e1eccf28SAndroid Build Coastguard Worker ldr w8, [sp] 115*e1eccf28SAndroid Build Coastguard Worker stp d8, d9, [sp, #-64]! 116*e1eccf28SAndroid Build Coastguard Worker stp d10, d11, [sp, #16] 117*e1eccf28SAndroid Build Coastguard Worker stp d12, d13, [sp, #32] 118*e1eccf28SAndroid Build Coastguard Worker stp d14, d15, [sp, #48] 119*e1eccf28SAndroid Build Coastguard Worker movi v4.8b, #1 120*e1eccf28SAndroid Build Coastguard Worker ins v4.h[0], w6 121*e1eccf28SAndroid Build Coastguard Worker ins v4.h[1], w7 122*e1eccf28SAndroid Build Coastguard Worker ins v4.h[2], w8 123*e1eccf28SAndroid Build Coastguard Worker ins v4.s[2], w4 124*e1eccf28SAndroid Build Coastguard Worker ins v4.s[3], w5 125*e1eccf28SAndroid Build Coastguard Worker movi v5.16b, #0 126*e1eccf28SAndroid Build Coastguard Worker 127*e1eccf28SAndroid Build Coastguard Worker subs x2, x2, #8 128*e1eccf28SAndroid Build Coastguard Worker bge 2f 129*e1eccf28SAndroid Build Coastguard Worker cmn x2, #8 // same as cmp x2, #-8 130*e1eccf28SAndroid Build Coastguard Worker ble 9f 131*e1eccf28SAndroid Build Coastguard Worker b 4f 132*e1eccf28SAndroid Build Coastguard Worker 133*e1eccf28SAndroid Build Coastguard Worker .align 6 134*e1eccf28SAndroid Build Coastguard Worker1: st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [x0], #32 135*e1eccf28SAndroid Build Coastguard Worker/* x0 = dst 136*e1eccf28SAndroid Build Coastguard Worker * x1 = src 137*e1eccf28SAndroid Build Coastguard Worker * x2 = count 138*e1eccf28SAndroid Build Coastguard Worker * x3 = lut 139*e1eccf28SAndroid Build Coastguard Worker * x4 = pitchy 140*e1eccf28SAndroid Build Coastguard Worker * x5 = pitchz 141*e1eccf28SAndroid Build Coastguard Worker * x6 = offset0 142*e1eccf28SAndroid Build Coastguard Worker * x7 = offset1 143*e1eccf28SAndroid Build Coastguard Worker */ 144*e1eccf28SAndroid Build Coastguard Worker2: ld4 {v0.8b-v3.8b}, [x1], #32 145*e1eccf28SAndroid Build Coastguard Worker/* v0,v1,v2,v3 source data 146*e1eccf28SAndroid Build Coastguard Worker * v4 dimensions and pitches 147*e1eccf28SAndroid Build Coastguard Worker */ 148*e1eccf28SAndroid Build Coastguard Worker3: uxtl v0.8h, v0.8b 149*e1eccf28SAndroid Build Coastguard Worker uxtl v1.8h, v1.8b 150*e1eccf28SAndroid Build Coastguard Worker uxtl v2.8h, v2.8b 151*e1eccf28SAndroid Build Coastguard Worker mul v0.8h, v0.8h, v4.h[0] 152*e1eccf28SAndroid Build Coastguard Worker mul v1.8h, v1.8h, v4.h[1] 153*e1eccf28SAndroid Build Coastguard Worker mul v2.8h, v2.8h, v4.h[2] 154*e1eccf28SAndroid Build Coastguard Worker 155*e1eccf28SAndroid Build Coastguard Worker/* ursra below would be more accurate, but this can result in a dim.0 case 156*e1eccf28SAndroid Build Coastguard Worker * where we try to read from the limit of the array and the limit +1 to 157*e1eccf28SAndroid Build Coastguard Worker * interpolate, even though the fractional component is zero. Strictly this is 158*e1eccf28SAndroid Build Coastguard Worker * correct, except for the llegal access problem. 159*e1eccf28SAndroid Build Coastguard Worker */ 160*e1eccf28SAndroid Build Coastguard Worker usra v0.8h, v0.8h, #8 161*e1eccf28SAndroid Build Coastguard Worker usra v1.8h, v1.8h, #8 162*e1eccf28SAndroid Build Coastguard Worker usra v2.8h, v2.8h, #8 163*e1eccf28SAndroid Build Coastguard Worker 164*e1eccf28SAndroid Build Coastguard Worker ushr v12.8h, v0.8h, #8 165*e1eccf28SAndroid Build Coastguard Worker ushr v13.8h, v1.8h, #8 166*e1eccf28SAndroid Build Coastguard Worker ushr v14.8h, v2.8h, #8 167*e1eccf28SAndroid Build Coastguard Worker bic v0.8h, #0xff, LSL #8 168*e1eccf28SAndroid Build Coastguard Worker xtn v1.8b, v1.8h 169*e1eccf28SAndroid Build Coastguard Worker bic v2.8h, #0xff, LSL #8 170*e1eccf28SAndroid Build Coastguard Worker 171*e1eccf28SAndroid Build Coastguard Worker/* v0.8h,v1.8b,v2.hb fractional offset 172*e1eccf28SAndroid Build Coastguard Worker * v12.8h,v13.8h,v14.8h integer offset 173*e1eccf28SAndroid Build Coastguard Worker */ 174*e1eccf28SAndroid Build Coastguard Worker 175*e1eccf28SAndroid Build Coastguard Worker ushll v6.4s, v12.4h, #2 176*e1eccf28SAndroid Build Coastguard Worker ushll2 v7.4s, v12.8h, #2 177*e1eccf28SAndroid Build Coastguard Worker uxtl v8.4s, v13.4h 178*e1eccf28SAndroid Build Coastguard Worker uxtl2 v9.4s, v13.8h 179*e1eccf28SAndroid Build Coastguard Worker uxtl v10.4s, v14.4h 180*e1eccf28SAndroid Build Coastguard Worker uxtl2 v11.4s, v14.8h 181*e1eccf28SAndroid Build Coastguard Worker mla v6.4s, v8.4s, v4.s[2] 182*e1eccf28SAndroid Build Coastguard Worker mla v7.4s, v9.4s, v4.s[2] 183*e1eccf28SAndroid Build Coastguard Worker mla v6.4s, v10.4s, v4.s[3] 184*e1eccf28SAndroid Build Coastguard Worker mla v7.4s, v11.4s, v4.s[3] 185*e1eccf28SAndroid Build Coastguard Worker 186*e1eccf28SAndroid Build Coastguard Worker/* v6,v7 list of table offsets */ 187*e1eccf28SAndroid Build Coastguard Worker 188*e1eccf28SAndroid Build Coastguard Worker /* lanes 0 and 1 */ 189*e1eccf28SAndroid Build Coastguard Worker lanepair dst=v20.8b, src0=v6.s[0], src1=v6.s[1], xr0=v0.h[0], xr1=v0.h[1], yr0=v1.b[0], yr1=v1.b[1], zr0=v2.h[0], zr1=v2.h[1] 190*e1eccf28SAndroid Build Coastguard Worker 191*e1eccf28SAndroid Build Coastguard Worker /* lanes 2 and 3 */ 192*e1eccf28SAndroid Build Coastguard Worker lanepair dst=v20.16b, src0=v6.s[2], src1=v6.s[3], xr0=v0.h[2], xr1=v0.h[3], yr0=v1.b[2], yr1=v1.b[3], zr0=v2.h[2], zr1=v2.h[3] 193*e1eccf28SAndroid Build Coastguard Worker 194*e1eccf28SAndroid Build Coastguard Worker /* lanes 4 and 5 */ 195*e1eccf28SAndroid Build Coastguard Worker lanepair dst=v21.8b, src0=v7.s[0], src1=v7.s[1], xr0=v0.h[4], xr1=v0.h[5], yr0=v1.b[4], yr1=v1.b[5], zr0=v2.h[4], zr1=v2.h[5] 196*e1eccf28SAndroid Build Coastguard Worker 197*e1eccf28SAndroid Build Coastguard Worker /* lanes 6 and 7 */ 198*e1eccf28SAndroid Build Coastguard Worker lanepair dst=v21.16b, src0=v7.s[2], src1=v7.s[3], xr0=v0.h[6], xr1=v0.h[7], yr0=v1.b[6], yr1=v1.b[7], zr0=v2.h[6], zr1=v2.h[7] 199*e1eccf28SAndroid Build Coastguard Worker 200*e1eccf28SAndroid Build Coastguard Worker uzp1 v6.16b, v20.16b, v21.16b 201*e1eccf28SAndroid Build Coastguard Worker uzp2 v7.16b, v20.16b, v21.16b 202*e1eccf28SAndroid Build Coastguard Worker uzp1 v20.16b, v6.16b, v7.16b 203*e1eccf28SAndroid Build Coastguard Worker uzp2 v22.16b, v6.16b, v7.16b 204*e1eccf28SAndroid Build Coastguard Worker mov v21.d[0], v20.d[1] 205*e1eccf28SAndroid Build Coastguard Worker 206*e1eccf28SAndroid Build Coastguard Worker subs x2, x2, #8 207*e1eccf28SAndroid Build Coastguard Worker mov v23.8b, v3.8b 208*e1eccf28SAndroid Build Coastguard Worker 209*e1eccf28SAndroid Build Coastguard Worker bge 1b 210*e1eccf28SAndroid Build Coastguard Worker 211*e1eccf28SAndroid Build Coastguard Worker cmn x2, #8 // same as cmp x2, #-8 212*e1eccf28SAndroid Build Coastguard Worker blt 1f 213*e1eccf28SAndroid Build Coastguard Worker 214*e1eccf28SAndroid Build Coastguard Worker st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [x0], #32 215*e1eccf28SAndroid Build Coastguard Worker beq 9f 216*e1eccf28SAndroid Build Coastguard Worker 217*e1eccf28SAndroid Build Coastguard Worker /* fill the vector with a safe value */ 218*e1eccf28SAndroid Build Coastguard Worker4: ld4r {v0.8b-v3.8b}, [x1] 219*e1eccf28SAndroid Build Coastguard Worker tbz x2, #2, 2f 220*e1eccf28SAndroid Build Coastguard Worker ld4 {v0.b-v3.b}[0], [x1], #4 221*e1eccf28SAndroid Build Coastguard Worker ld4 {v0.b-v3.b}[1], [x1], #4 222*e1eccf28SAndroid Build Coastguard Worker ld4 {v0.b-v3.b}[2], [x1], #4 223*e1eccf28SAndroid Build Coastguard Worker ld4 {v0.b-v3.b}[3], [x1], #4 224*e1eccf28SAndroid Build Coastguard Worker2: tbz x2, #1, 2f 225*e1eccf28SAndroid Build Coastguard Worker ld4 {v0.b-v3.b}[4], [x1], #4 226*e1eccf28SAndroid Build Coastguard Worker ld4 {v0.b-v3.b}[5], [x1], #4 227*e1eccf28SAndroid Build Coastguard Worker2: tbz x2, #0, 2f 228*e1eccf28SAndroid Build Coastguard Worker ld4 {v0.b-v3.b}[6], [x1], #4 229*e1eccf28SAndroid Build Coastguard Worker2: b 3b 230*e1eccf28SAndroid Build Coastguard Worker 231*e1eccf28SAndroid Build Coastguard Worker1: tst x2, #4 232*e1eccf28SAndroid Build Coastguard Worker beq 2f 233*e1eccf28SAndroid Build Coastguard Worker st4 {v20.b-v23.b}[0], [x0], #4 234*e1eccf28SAndroid Build Coastguard Worker st4 {v20.b-v23.b}[1], [x0], #4 235*e1eccf28SAndroid Build Coastguard Worker st4 {v20.b-v23.b}[2], [x0], #4 236*e1eccf28SAndroid Build Coastguard Worker st4 {v20.b-v23.b}[3], [x0], #4 237*e1eccf28SAndroid Build Coastguard Worker2: tst x2, #2 238*e1eccf28SAndroid Build Coastguard Worker beq 2f 239*e1eccf28SAndroid Build Coastguard Worker st4 {v20.b-v23.b}[4], [x0], #4 240*e1eccf28SAndroid Build Coastguard Worker st4 {v20.b-v23.b}[5], [x0], #4 241*e1eccf28SAndroid Build Coastguard Worker2: tst x2, #1 242*e1eccf28SAndroid Build Coastguard Worker beq 9f 243*e1eccf28SAndroid Build Coastguard Worker st4 {v20.b-v23.b}[6], [x0], #4 244*e1eccf28SAndroid Build Coastguard Worker 245*e1eccf28SAndroid Build Coastguard Worker9: ldp d14, d15, [sp, #48] 246*e1eccf28SAndroid Build Coastguard Worker ldp d12, d13, [sp, #32] 247*e1eccf28SAndroid Build Coastguard Worker ldp d10, d11, [sp, #16] 248*e1eccf28SAndroid Build Coastguard Worker ldp d8, d9, [sp], #64 249*e1eccf28SAndroid Build Coastguard Worker ret 250*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsic3DLUT_K) 251