xref: /aosp_15_r20/frameworks/rs/toolkit/Lut3d_neon.S (revision e1eccf28f96817838ad6867f7f39d2351ec11f56)
1*e1eccf28SAndroid Build Coastguard Worker/*
2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project
3*e1eccf28SAndroid Build Coastguard Worker *
4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*e1eccf28SAndroid Build Coastguard Worker *
8*e1eccf28SAndroid Build Coastguard Worker *      http://www.apache.org/licenses/LICENSE-2.0
9*e1eccf28SAndroid Build Coastguard Worker *
10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License.
15*e1eccf28SAndroid Build Coastguard Worker */
16*e1eccf28SAndroid Build Coastguard Worker
17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart
18*e1eccf28SAndroid Build Coastguard Worker#define END(f) .fnend; .size f, .-f;
19*e1eccf28SAndroid Build Coastguard Worker
20*e1eccf28SAndroid Build Coastguard Worker.eabi_attribute 25,1 @Tag_ABI_align8_preserved
21*e1eccf28SAndroid Build Coastguard Worker.arm
22*e1eccf28SAndroid Build Coastguard Worker
23*e1eccf28SAndroid Build Coastguard Worker.macro lanepair dst, src, xr0, xr1, yr0, yr1, zr0, zr1
24*e1eccf28SAndroid Build Coastguard Worker
25*e1eccf28SAndroid Build Coastguard Worker            vmov        r6, r7, \src
26*e1eccf28SAndroid Build Coastguard Worker
27*e1eccf28SAndroid Build Coastguard Worker            add         r6, r6, r3
28*e1eccf28SAndroid Build Coastguard Worker            add         r7, r7, r3
29*e1eccf28SAndroid Build Coastguard Worker
30*e1eccf28SAndroid Build Coastguard Worker            vld1.u8     d16, [r6], r4
31*e1eccf28SAndroid Build Coastguard Worker            vld1.u8     d17, [r7], r4
32*e1eccf28SAndroid Build Coastguard Worker
33*e1eccf28SAndroid Build Coastguard Worker            vld1.u8     d18, [r6], r5
34*e1eccf28SAndroid Build Coastguard Worker            vld1.u8     d19, [r7], r5
35*e1eccf28SAndroid Build Coastguard Worker
36*e1eccf28SAndroid Build Coastguard Worker            vdup.u8     d6, \yr0
37*e1eccf28SAndroid Build Coastguard Worker            vdup.u8     d7, \yr1
38*e1eccf28SAndroid Build Coastguard Worker            /* Y interpolate, front, lanes 0 and 1 -> q12 and q13 */
39*e1eccf28SAndroid Build Coastguard Worker            vshll.u8    q12, d16, #8
40*e1eccf28SAndroid Build Coastguard Worker            vshll.u8    q13, d17, #8
41*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u8    q12, d16, d6
42*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u8    q13, d17, d7
43*e1eccf28SAndroid Build Coastguard Worker            vmlal.u8    q12, d18, d6
44*e1eccf28SAndroid Build Coastguard Worker            vmlal.u8    q13, d19, d7
45*e1eccf28SAndroid Build Coastguard Worker
46*e1eccf28SAndroid Build Coastguard Worker            vld1.u8     d18, [r6]
47*e1eccf28SAndroid Build Coastguard Worker            vld1.u8     d19, [r7]
48*e1eccf28SAndroid Build Coastguard Worker
49*e1eccf28SAndroid Build Coastguard Worker            sub         r6, r6, r4
50*e1eccf28SAndroid Build Coastguard Worker            sub         r7, r7, r4
51*e1eccf28SAndroid Build Coastguard Worker
52*e1eccf28SAndroid Build Coastguard Worker            vld1.u8     d16, [r6]
53*e1eccf28SAndroid Build Coastguard Worker            vld1.u8     d17, [r7]
54*e1eccf28SAndroid Build Coastguard Worker
55*e1eccf28SAndroid Build Coastguard Worker            /* Y interpolate, rear, lanes 0 and 1 -> q14 and q15 */
56*e1eccf28SAndroid Build Coastguard Worker            vshll.u8    q14, d16, #8
57*e1eccf28SAndroid Build Coastguard Worker            vshll.u8    q15, d17, #8
58*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u8    q14, d16, d6
59*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u8    q15, d17, d7
60*e1eccf28SAndroid Build Coastguard Worker            vmlal.u8    q14, d18, d6
61*e1eccf28SAndroid Build Coastguard Worker            vmlal.u8    q15, d19, d7
62*e1eccf28SAndroid Build Coastguard Worker
63*e1eccf28SAndroid Build Coastguard Worker            /* Z interpolate, lane 0 q12/q14 -> q10 */
64*e1eccf28SAndroid Build Coastguard Worker            vshll.u16   q8, d24, #8
65*e1eccf28SAndroid Build Coastguard Worker            vshll.u16   q9, d25, #8
66*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u16   q8, d24, \zr0
67*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u16   q9, d25, \zr0
68*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q8, d28, \zr0
69*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q9, d29, \zr0
70*e1eccf28SAndroid Build Coastguard Worker            vrshrn.u32  d20, q8, #8
71*e1eccf28SAndroid Build Coastguard Worker            vrshrn.u32  d21, q9, #8
72*e1eccf28SAndroid Build Coastguard Worker
73*e1eccf28SAndroid Build Coastguard Worker            /* Z interpolate, lane 1 q13/q15 -> q11 */
74*e1eccf28SAndroid Build Coastguard Worker            vshll.u16   q8, d26, #8
75*e1eccf28SAndroid Build Coastguard Worker            vshll.u16   q9, d27, #8
76*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u16   q8, d26, \zr1
77*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u16   q9, d27, \zr1
78*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q8, d30, \zr1
79*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q9, d31, \zr1
80*e1eccf28SAndroid Build Coastguard Worker            vrshrn.u32  d22, q8, #8
81*e1eccf28SAndroid Build Coastguard Worker            vrshrn.u32  d23, q9, #8
82*e1eccf28SAndroid Build Coastguard Worker
83*e1eccf28SAndroid Build Coastguard Worker            /* X interpolate, lanes 0 and 1 q10,q11 -> q14 */
84*e1eccf28SAndroid Build Coastguard Worker            vshll.u16   q8, d20, #8
85*e1eccf28SAndroid Build Coastguard Worker            vshll.u16   q9, d22, #8
86*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u16   q8, d20, \xr0
87*e1eccf28SAndroid Build Coastguard Worker            vmlsl.u16   q9, d22, \xr1
88*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q8, d21, \xr0
89*e1eccf28SAndroid Build Coastguard Worker            vmlal.u16   q9, d23, \xr1
90*e1eccf28SAndroid Build Coastguard Worker            vshrn.u32   d28, q8, #8
91*e1eccf28SAndroid Build Coastguard Worker            vshrn.u32   d29, q9, #8
92*e1eccf28SAndroid Build Coastguard Worker
93*e1eccf28SAndroid Build Coastguard Worker            /* pack lanes 0-1 -> d12 */
94*e1eccf28SAndroid Build Coastguard Worker            vqrshrn.u16  \dst, q14, #8
95*e1eccf28SAndroid Build Coastguard Worker.endm
96*e1eccf28SAndroid Build Coastguard Worker
97*e1eccf28SAndroid Build Coastguard Worker/* void rsdIntrinsic3DLUT_K(
98*e1eccf28SAndroid Build Coastguard Worker *          void *dst,          // r0
99*e1eccf28SAndroid Build Coastguard Worker *          void const *in,     // r1
100*e1eccf28SAndroid Build Coastguard Worker *          size_t count,       // r2
101*e1eccf28SAndroid Build Coastguard Worker *          void const *lut,    // r3
102*e1eccf28SAndroid Build Coastguard Worker *          int32_t pitchy,     // [sp]
103*e1eccf28SAndroid Build Coastguard Worker *          int32_t pitchz,     // [sp+#4]
104*e1eccf28SAndroid Build Coastguard Worker *          int dimx,           // [sp+#8]
105*e1eccf28SAndroid Build Coastguard Worker *          int dimy,           // [sp+#12]
106*e1eccf28SAndroid Build Coastguard Worker *          int dimz);          // [sp+#16]
107*e1eccf28SAndroid Build Coastguard Worker */
108*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsic3DLUT_K)
109*e1eccf28SAndroid Build Coastguard Worker            push        {r4,r5,r6,r7}
110*e1eccf28SAndroid Build Coastguard Worker            ldr         r4, [sp, #16]
111*e1eccf28SAndroid Build Coastguard Worker            ldr         r5, [sp, #20]
112*e1eccf28SAndroid Build Coastguard Worker            ldr         r6, [sp, #24]
113*e1eccf28SAndroid Build Coastguard Worker            ldr         r7, [sp, #28]
114*e1eccf28SAndroid Build Coastguard Worker            ldr         r12, [sp, #32]
115*e1eccf28SAndroid Build Coastguard Worker            vpush       {d8-d15}
116*e1eccf28SAndroid Build Coastguard Worker
117*e1eccf28SAndroid Build Coastguard Worker            vmov.u8     d8, #1
118*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d8[0], r6
119*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d8[1], r7
120*e1eccf28SAndroid Build Coastguard Worker            vmov.u16    d8[2], r12
121*e1eccf28SAndroid Build Coastguard Worker            vmov        d9, r4, r5
122*e1eccf28SAndroid Build Coastguard Worker
123*e1eccf28SAndroid Build Coastguard Worker            subs        r2, #8
124*e1eccf28SAndroid Build Coastguard Worker            bge         2f
125*e1eccf28SAndroid Build Coastguard Worker            cmp         r2, #-8
126*e1eccf28SAndroid Build Coastguard Worker            ble         9f
127*e1eccf28SAndroid Build Coastguard Worker            b           4f
128*e1eccf28SAndroid Build Coastguard Worker
129*e1eccf28SAndroid Build Coastguard Worker            .align 6
130*e1eccf28SAndroid Build Coastguard Worker1:          vst4.u8     {d12,d13,d14,d15}, [r0]!
131*e1eccf28SAndroid Build Coastguard Worker/* r0  = dst
132*e1eccf28SAndroid Build Coastguard Worker * r1  = src
133*e1eccf28SAndroid Build Coastguard Worker * r2  = count
134*e1eccf28SAndroid Build Coastguard Worker * r3  = lut
135*e1eccf28SAndroid Build Coastguard Worker * r4  = pitchy
136*e1eccf28SAndroid Build Coastguard Worker * r5  = pitchz
137*e1eccf28SAndroid Build Coastguard Worker * r6 = offset0
138*e1eccf28SAndroid Build Coastguard Worker * r7 = offset1
139*e1eccf28SAndroid Build Coastguard Worker */
140*e1eccf28SAndroid Build Coastguard Worker2:          vld4.u8     {d0,d2,d4,d6}, [r1]!
141*e1eccf28SAndroid Build Coastguard Worker3:          vmov        d10, d6
142*e1eccf28SAndroid Build Coastguard Worker/* q0,q1,q2,q5 source data
143*e1eccf28SAndroid Build Coastguard Worker * q4 dimensions and pitches
144*e1eccf28SAndroid Build Coastguard Worker * q3, scratch register for scalar access
145*e1eccf28SAndroid Build Coastguard Worker */
146*e1eccf28SAndroid Build Coastguard Worker            vmov        q3, q4
147*e1eccf28SAndroid Build Coastguard Worker            vmovl.u8    q0, d0
148*e1eccf28SAndroid Build Coastguard Worker            vmovl.u8    q1, d2
149*e1eccf28SAndroid Build Coastguard Worker            vmovl.u8    q2, d4
150*e1eccf28SAndroid Build Coastguard Worker            vmul.u16    q0, q0, d6[0]
151*e1eccf28SAndroid Build Coastguard Worker            vmul.u16    q1, q1, d6[1]
152*e1eccf28SAndroid Build Coastguard Worker            vmul.u16    q2, q2, d6[2]
153*e1eccf28SAndroid Build Coastguard Worker
154*e1eccf28SAndroid Build Coastguard Worker/* vrsra.u16 below would be more accurate, but this can result in a dim.0 case
155*e1eccf28SAndroid Build Coastguard Worker * where we try to read from the limit of the array and the limit +1 to
156*e1eccf28SAndroid Build Coastguard Worker * interpolate, even though the fractional component is zero.  Strictly this is
157*e1eccf28SAndroid Build Coastguard Worker * correct, except for the llegal access problem.
158*e1eccf28SAndroid Build Coastguard Worker */
159*e1eccf28SAndroid Build Coastguard Worker            vsra.u16    q0, q0, #8
160*e1eccf28SAndroid Build Coastguard Worker            vsra.u16    q1, q1, #8
161*e1eccf28SAndroid Build Coastguard Worker            vsra.u16    q2, q2, #8
162*e1eccf28SAndroid Build Coastguard Worker
163*e1eccf28SAndroid Build Coastguard Worker            vshr.u16    q12, q0, #8
164*e1eccf28SAndroid Build Coastguard Worker            vshr.u16    q13, q1, #8
165*e1eccf28SAndroid Build Coastguard Worker            vshr.u16    q14, q2, #8
166*e1eccf28SAndroid Build Coastguard Worker
167*e1eccf28SAndroid Build Coastguard Worker            vbic.u16    q0, #0xff00
168*e1eccf28SAndroid Build Coastguard Worker            vmovn.u16   d2, q1
169*e1eccf28SAndroid Build Coastguard Worker            vbic.u16    q2, #0xff00
170*e1eccf28SAndroid Build Coastguard Worker
171*e1eccf28SAndroid Build Coastguard Worker/* q0,d2,q2 fractional offset
172*e1eccf28SAndroid Build Coastguard Worker * q12,q13,q14 integer offset
173*e1eccf28SAndroid Build Coastguard Worker */
174*e1eccf28SAndroid Build Coastguard Worker
175*e1eccf28SAndroid Build Coastguard Worker            vshll.u16   q6, d24, #2
176*e1eccf28SAndroid Build Coastguard Worker            vshll.u16   q7, d25, #2
177*e1eccf28SAndroid Build Coastguard Worker            vmovl.u16   q8, d26
178*e1eccf28SAndroid Build Coastguard Worker            vmovl.u16   q9, d27
179*e1eccf28SAndroid Build Coastguard Worker            vmovl.u16   q10, d28
180*e1eccf28SAndroid Build Coastguard Worker            vmovl.u16   q11, d29
181*e1eccf28SAndroid Build Coastguard Worker            vmla.s32    q6, q8,  d9[0]
182*e1eccf28SAndroid Build Coastguard Worker            vmla.s32    q7, q9,  d9[0]
183*e1eccf28SAndroid Build Coastguard Worker            vmla.s32    q6, q10, d9[1]
184*e1eccf28SAndroid Build Coastguard Worker            vmla.s32    q7, q11, d9[1]
185*e1eccf28SAndroid Build Coastguard Worker
186*e1eccf28SAndroid Build Coastguard Worker/* q6,q7 list of table offsets */
187*e1eccf28SAndroid Build Coastguard Worker
188*e1eccf28SAndroid Build Coastguard Worker        /* lanes 0 and 1 */
189*e1eccf28SAndroid Build Coastguard Worker            lanepair dst=d12, src=d12, xr0=d0[0], xr1=d0[1], yr0=d2[0], yr1=d2[1], zr0=d4[0], zr1=d4[1]
190*e1eccf28SAndroid Build Coastguard Worker
191*e1eccf28SAndroid Build Coastguard Worker        /* lanes 2 and 3 */
192*e1eccf28SAndroid Build Coastguard Worker            lanepair dst=d13, src=d13, xr0=d0[2], xr1=d0[3], yr0=d2[2], yr1=d2[3], zr0=d4[2], zr1=d4[3]
193*e1eccf28SAndroid Build Coastguard Worker
194*e1eccf28SAndroid Build Coastguard Worker        /* lanes 4 and 5 */
195*e1eccf28SAndroid Build Coastguard Worker            lanepair dst=d14, src=d14, xr0=d1[0], xr1=d1[1], yr0=d2[4], yr1=d2[5], zr0=d5[0], zr1=d5[1]
196*e1eccf28SAndroid Build Coastguard Worker
197*e1eccf28SAndroid Build Coastguard Worker        /* lanes 6 and 7 */
198*e1eccf28SAndroid Build Coastguard Worker            lanepair dst=d15, src=d15, xr0=d1[2], xr1=d1[3], yr0=d2[6], yr1=d2[7], zr0=d5[2], zr1=d5[3]
199*e1eccf28SAndroid Build Coastguard Worker
200*e1eccf28SAndroid Build Coastguard Worker            vuzp.u8     d12, d13
201*e1eccf28SAndroid Build Coastguard Worker            vuzp.u8     d14, d15
202*e1eccf28SAndroid Build Coastguard Worker            vuzp.u8     d12, d14
203*e1eccf28SAndroid Build Coastguard Worker            vuzp.u8     d13, d15
204*e1eccf28SAndroid Build Coastguard Worker
205*e1eccf28SAndroid Build Coastguard Worker            subs        r2, r2, #8
206*e1eccf28SAndroid Build Coastguard Worker            vmov.u8     d15, d10
207*e1eccf28SAndroid Build Coastguard Worker
208*e1eccf28SAndroid Build Coastguard Worker            bge         1b
209*e1eccf28SAndroid Build Coastguard Worker
210*e1eccf28SAndroid Build Coastguard Worker            cmp         r2, #-8
211*e1eccf28SAndroid Build Coastguard Worker            blt         1f
212*e1eccf28SAndroid Build Coastguard Worker
213*e1eccf28SAndroid Build Coastguard Worker            vst4.u8     {d12,d13,d14,d15}, [r0]!
214*e1eccf28SAndroid Build Coastguard Worker
215*e1eccf28SAndroid Build Coastguard Worker            beq         9f
216*e1eccf28SAndroid Build Coastguard Worker
217*e1eccf28SAndroid Build Coastguard Worker            /* fill the vector with a safe value */
218*e1eccf28SAndroid Build Coastguard Worker4:          vld1.u32    {d0[]}, [r1]
219*e1eccf28SAndroid Build Coastguard Worker            vmov        d2, d0
220*e1eccf28SAndroid Build Coastguard Worker            vmov        d4, d0
221*e1eccf28SAndroid Build Coastguard Worker            vmov        d6, d0
222*e1eccf28SAndroid Build Coastguard Worker            tst         r2, #4
223*e1eccf28SAndroid Build Coastguard Worker            beq         2f
224*e1eccf28SAndroid Build Coastguard Worker            vld1.u32    {d0}, [r1]!
225*e1eccf28SAndroid Build Coastguard Worker            vld1.u32    {d2}, [r1]!
226*e1eccf28SAndroid Build Coastguard Worker2:          tst         r2, #2
227*e1eccf28SAndroid Build Coastguard Worker            beq         2f
228*e1eccf28SAndroid Build Coastguard Worker            vld1.u32    {d4}, [r1]!
229*e1eccf28SAndroid Build Coastguard Worker2:          tst         r2, #1
230*e1eccf28SAndroid Build Coastguard Worker            beq         2f
231*e1eccf28SAndroid Build Coastguard Worker            vld1.u32    {d6[0]}, [r1]!
232*e1eccf28SAndroid Build Coastguard Worker2:          vuzp.8      d0, d2
233*e1eccf28SAndroid Build Coastguard Worker            vuzp.8      d4, d6
234*e1eccf28SAndroid Build Coastguard Worker            vuzp.8      d0, d4
235*e1eccf28SAndroid Build Coastguard Worker            vuzp.8      d2, d6
236*e1eccf28SAndroid Build Coastguard Worker            b           3b
237*e1eccf28SAndroid Build Coastguard Worker
238*e1eccf28SAndroid Build Coastguard Worker1:          vzip.8      d12, d14
239*e1eccf28SAndroid Build Coastguard Worker            vzip.8      d13, d15
240*e1eccf28SAndroid Build Coastguard Worker            vzip.8      d12, d13
241*e1eccf28SAndroid Build Coastguard Worker            vzip.8      d14, d15
242*e1eccf28SAndroid Build Coastguard Worker            tst         r2, #4
243*e1eccf28SAndroid Build Coastguard Worker            beq         2f
244*e1eccf28SAndroid Build Coastguard Worker            vst1.u32    {d12,d13}, [r0]!
245*e1eccf28SAndroid Build Coastguard Worker2:          tst         r2, #2
246*e1eccf28SAndroid Build Coastguard Worker            beq         2f
247*e1eccf28SAndroid Build Coastguard Worker            vst1.u32    {d14}, [r0]!
248*e1eccf28SAndroid Build Coastguard Worker2:          tst         r2, #1
249*e1eccf28SAndroid Build Coastguard Worker            beq         9f
250*e1eccf28SAndroid Build Coastguard Worker            vst1.u32    {d15[0]}, [r0]!
251*e1eccf28SAndroid Build Coastguard Worker
252*e1eccf28SAndroid Build Coastguard Worker9:          mov         r0, #0
253*e1eccf28SAndroid Build Coastguard Worker            vpop        {d8-d15}
254*e1eccf28SAndroid Build Coastguard Worker            pop         {r4,r5,r6,r7}
255*e1eccf28SAndroid Build Coastguard Worker            bx lr
256*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsic3DLUT_K)
257