xref: /aosp_15_r20/frameworks/rs/toolkit/Blend_neon.S (revision e1eccf28f96817838ad6867f7f39d2351ec11f56)
1*e1eccf28SAndroid Build Coastguard Worker/*
2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2013-2014 The Android Open Source Project
3*e1eccf28SAndroid Build Coastguard Worker *
4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*e1eccf28SAndroid Build Coastguard Worker *
8*e1eccf28SAndroid Build Coastguard Worker *      http://www.apache.org/licenses/LICENSE-2.0
9*e1eccf28SAndroid Build Coastguard Worker *
10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License.
15*e1eccf28SAndroid Build Coastguard Worker */
16*e1eccf28SAndroid Build Coastguard Worker
17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart
18*e1eccf28SAndroid Build Coastguard Worker#define END(f) .fnend; .size f, .-f;
19*e1eccf28SAndroid Build Coastguard Worker
20*e1eccf28SAndroid Build Coastguard Worker#define BLEND_LIST(X) \
21*e1eccf28SAndroid Build Coastguard Worker    X(0, CLEAR) \
22*e1eccf28SAndroid Build Coastguard Worker    X(1, SRC) \
23*e1eccf28SAndroid Build Coastguard Worker    X(2, DST) \
24*e1eccf28SAndroid Build Coastguard Worker    X(3, SRC_OVER) \
25*e1eccf28SAndroid Build Coastguard Worker    X(4, DST_OVER) \
26*e1eccf28SAndroid Build Coastguard Worker    X(5, SRC_IN) \
27*e1eccf28SAndroid Build Coastguard Worker    X(6, DST_IN) \
28*e1eccf28SAndroid Build Coastguard Worker    X(7, SRC_OUT) \
29*e1eccf28SAndroid Build Coastguard Worker    X(8, DST_OUT) \
30*e1eccf28SAndroid Build Coastguard Worker    X(9, SRC_ATOP) \
31*e1eccf28SAndroid Build Coastguard Worker    X(10, DST_ATOP) \
32*e1eccf28SAndroid Build Coastguard Worker    X(11, XOR) \
33*e1eccf28SAndroid Build Coastguard Worker    X(14, MULTIPLY) \
34*e1eccf28SAndroid Build Coastguard Worker    X(21, DIFFERENCE) \
35*e1eccf28SAndroid Build Coastguard Worker    X(34, ADD) \
36*e1eccf28SAndroid Build Coastguard Worker    X(35, SUBTRACT)
37*e1eccf28SAndroid Build Coastguard Worker
38*e1eccf28SAndroid Build Coastguard Worker.eabi_attribute 25,1 @Tag_ABI_align8_preserved
39*e1eccf28SAndroid Build Coastguard Worker.arm
40*e1eccf28SAndroid Build Coastguard Worker
41*e1eccf28SAndroid Build Coastguard Worker/* For every blend operation supported, define a macro with just the arithmetic
42*e1eccf28SAndroid Build Coastguard Worker * component.  The rest can be handled later on.
43*e1eccf28SAndroid Build Coastguard Worker *
44*e1eccf28SAndroid Build Coastguard Worker * At entry q0-q3 contain the RGBA data from the destination buffer, and q8-q11
45*e1eccf28SAndroid Build Coastguard Worker * contain the data from the source buffer.  Both have already been split out
46*e1eccf28SAndroid Build Coastguard Worker * into one colour component per register (if necessary).  q3 and q11 contain
47*e1eccf28SAndroid Build Coastguard Worker * the alpha components.
48*e1eccf28SAndroid Build Coastguard Worker *
49*e1eccf28SAndroid Build Coastguard Worker * At the same time as defining the assembly macro, define a corresponding
50*e1eccf28SAndroid Build Coastguard Worker * preprocessor macro indicating any other requirements.
51*e1eccf28SAndroid Build Coastguard Worker *    zipped=0 -- The macro does not require the RGBA components to be
52*e1eccf28SAndroid Build Coastguard Worker *                separated.
53*e1eccf28SAndroid Build Coastguard Worker *    lddst=0  -- The macro does not require data from the destination buffer.
54*e1eccf28SAndroid Build Coastguard Worker *    ldsrc=0  -- The macro does not require data from the source buffer.
55*e1eccf28SAndroid Build Coastguard Worker *    nowrap=1 -- The macro requires no wrapper at all, and should simply be
56*e1eccf28SAndroid Build Coastguard Worker *                inserted without any surrounding load/store or loop code.
57*e1eccf28SAndroid Build Coastguard Worker */
58*e1eccf28SAndroid Build Coastguard Worker
59*e1eccf28SAndroid Build Coastguard Worker#define params_CLEAR zipped=0, lddst=0, ldsrc=0
60*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_CLEAR
61*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q0, #0
62*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q1, #0
63*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q2, #0
64*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q3, #0
65*e1eccf28SAndroid Build Coastguard Worker.endm
66*e1eccf28SAndroid Build Coastguard Worker
67*e1eccf28SAndroid Build Coastguard Worker#define params_SRC zipped=0, lddst=0
68*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC
69*e1eccf28SAndroid Build Coastguard Worker        vmov    q0, q8
70*e1eccf28SAndroid Build Coastguard Worker        vmov    q1, q9
71*e1eccf28SAndroid Build Coastguard Worker        vmov    q2, q10
72*e1eccf28SAndroid Build Coastguard Worker        vmov    q3, q11
73*e1eccf28SAndroid Build Coastguard Worker.endm
74*e1eccf28SAndroid Build Coastguard Worker
75*e1eccf28SAndroid Build Coastguard Worker#define params_DST nowrap=1
76*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST
77*e1eccf28SAndroid Build Coastguard Worker        /* nop */
78*e1eccf28SAndroid Build Coastguard Worker.endm
79*e1eccf28SAndroid Build Coastguard Worker
80*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_OVER zipped=1
81*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_OVER
82*e1eccf28SAndroid Build Coastguard Worker        vmvn        q7, q11
83*e1eccf28SAndroid Build Coastguard Worker
84*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q12, d15, d1
85*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q0,  d14, d0
86*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q13, d15, d3
87*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q1,  d14, d2
88*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q14, d15, d5
89*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q2,  d14, d4
90*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q15, d15, d7
91*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q3,  d14, d6
92*e1eccf28SAndroid Build Coastguard Worker
93*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d8,  q0,  #8
94*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d9,  q12, #8
95*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d10, q1,  #8
96*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d11, q13, #8
97*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d12, q2,  #8
98*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d13, q14, #8
99*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d14, q3,  #8
100*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d15, q15, #8
101*e1eccf28SAndroid Build Coastguard Worker
102*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q0,  d8
103*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q12, d9
104*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q1,  d10
105*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q13, d11
106*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q2,  d12
107*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q14, d13
108*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q3,  d14
109*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q15, d15
110*e1eccf28SAndroid Build Coastguard Worker
111*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d0, q0,  #8
112*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d1, q12, #8
113*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d2, q1,  #8
114*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d3, q13, #8
115*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d4, q2,  #8
116*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d5, q14, #8
117*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d6, q3,  #8
118*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d7, q15, #8
119*e1eccf28SAndroid Build Coastguard Worker
120*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8    q0, q8
121*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8    q1, q9
122*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8    q2, q10
123*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8    q3, q11
124*e1eccf28SAndroid Build Coastguard Worker.endm
125*e1eccf28SAndroid Build Coastguard Worker
126*e1eccf28SAndroid Build Coastguard Worker#define params_DST_OVER zipped=1
127*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_OVER
128*e1eccf28SAndroid Build Coastguard Worker        vmvn        q7, q3
129*e1eccf28SAndroid Build Coastguard Worker
130*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q12, d15, d17
131*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q8,  d14, d16
132*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q13, d15, d19
133*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q9,  d14, d18
134*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q14, d15, d21
135*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q10, d14, d20
136*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q15, d15, d23
137*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q11, d14, d22
138*e1eccf28SAndroid Build Coastguard Worker
139*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d8,  q0,  #8
140*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d9,  q12, #8
141*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d10, q1,  #8
142*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d11, q13, #8
143*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d12, q2,  #8
144*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d13, q14, #8
145*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d14, q3,  #8
146*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d15, q15, #8
147*e1eccf28SAndroid Build Coastguard Worker
148*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q8,  d8
149*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q12, d9
150*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q9,  d10
151*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q13, d11
152*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q10, d12
153*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q14, d13
154*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q11, d14
155*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q15, d15
156*e1eccf28SAndroid Build Coastguard Worker
157*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d16, q8,  #8
158*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d17, q12, #8
159*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d18, q9,  #8
160*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d19, q13, #8
161*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d20, q10, #8
162*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d21, q14, #8
163*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d22, q11, #8
164*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d23, q15, #8
165*e1eccf28SAndroid Build Coastguard Worker
166*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8    q0, q8
167*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8    q1, q9
168*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8    q2, q10
169*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8    q3, q11
170*e1eccf28SAndroid Build Coastguard Worker.endm
171*e1eccf28SAndroid Build Coastguard Worker
172*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_IN zipped=1
173*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_IN
174*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q12, d7, d17
175*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q0,  d6, d16
176*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q13, d7, d19
177*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q1,  d6, d18
178*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q14, d7, d21
179*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q2,  d6, d20
180*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q15, d7, d23
181*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q3,  d6, d22
182*e1eccf28SAndroid Build Coastguard Worker
183*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d8,  q0,  #8
184*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d9,  q12, #8
185*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d10, q1,  #8
186*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d11, q13, #8
187*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d12, q2,  #8
188*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d13, q14, #8
189*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d14, q3,  #8
190*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d15, q15, #8
191*e1eccf28SAndroid Build Coastguard Worker
192*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q0,  d8
193*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q12, d9
194*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q1,  d10
195*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q13, d11
196*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q2,  d12
197*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q14, d13
198*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q3,  d14
199*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q15, d15
200*e1eccf28SAndroid Build Coastguard Worker
201*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d0, q0,  #8
202*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d1, q12, #8
203*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d2, q1,  #8
204*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d3, q13, #8
205*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d4, q2,  #8
206*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d5, q14, #8
207*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d6, q3,  #8
208*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d7, q15, #8
209*e1eccf28SAndroid Build Coastguard Worker.endm
210*e1eccf28SAndroid Build Coastguard Worker
211*e1eccf28SAndroid Build Coastguard Worker#define params_DST_IN zipped=1
212*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_IN
213*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q12, d1, d23
214*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q0,  d0, d22
215*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q13, d3, d23
216*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q1,  d2, d22
217*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q14, d5, d23
218*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q2,  d4, d22
219*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q15, d7, d23
220*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q3,  d6, d22
221*e1eccf28SAndroid Build Coastguard Worker
222*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d8,  q0,  #8
223*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d9,  q12, #8
224*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d10, q1,  #8
225*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d11, q13, #8
226*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d12, q2,  #8
227*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d13, q14, #8
228*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d14, q3,  #8
229*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d15, q15, #8
230*e1eccf28SAndroid Build Coastguard Worker
231*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q0,  d8
232*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q12, d9
233*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q1,  d10
234*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q13, d11
235*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q2,  d12
236*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q14, d13
237*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q3,  d14
238*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q15, d15
239*e1eccf28SAndroid Build Coastguard Worker
240*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d0, q0,  #8
241*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d1, q12, #8
242*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d2, q1,  #8
243*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d3, q13, #8
244*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d4, q2,  #8
245*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d5, q14, #8
246*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d6, q3,  #8
247*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d7, q15, #8
248*e1eccf28SAndroid Build Coastguard Worker.endm
249*e1eccf28SAndroid Build Coastguard Worker
250*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_OUT zipped=1
251*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_OUT
252*e1eccf28SAndroid Build Coastguard Worker        vmvn        q3, q3
253*e1eccf28SAndroid Build Coastguard Worker        blend_kernel_SRC_IN
254*e1eccf28SAndroid Build Coastguard Worker.endm
255*e1eccf28SAndroid Build Coastguard Worker
256*e1eccf28SAndroid Build Coastguard Worker
257*e1eccf28SAndroid Build Coastguard Worker#define params_DST_OUT zipped=1
258*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_OUT
259*e1eccf28SAndroid Build Coastguard Worker        vmvn        q11, q11
260*e1eccf28SAndroid Build Coastguard Worker        blend_kernel_DST_IN
261*e1eccf28SAndroid Build Coastguard Worker.endm
262*e1eccf28SAndroid Build Coastguard Worker
263*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_ATOP zipped=1
264*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_ATOP
265*e1eccf28SAndroid Build Coastguard Worker        vmvn        q11, q11
266*e1eccf28SAndroid Build Coastguard Worker
267*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q12, d23, d1
268*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q0,  d22, d0
269*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q13, d23, d3
270*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q1,  d22, d2
271*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q14, d23, d5
272*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q2,  d22, d4
273*e1eccf28SAndroid Build Coastguard Worker
274*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q4,  d7, d17
275*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q8,  d6, d16
276*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q5,  d7, d19
277*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q9,  d6, d18
278*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q6,  d7, d21
279*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q10, d6, d20
280*e1eccf28SAndroid Build Coastguard Worker
281*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q12, q4
282*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q0,  q8
283*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q13, q5
284*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q1,  q9
285*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q14, q6
286*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q2,  q10
287*e1eccf28SAndroid Build Coastguard Worker
288*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q8,  q0,  #8
289*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q4,  q12, #8
290*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q9,  q1,  #8
291*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q5,  q13, #8
292*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q10, q2,  #8
293*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q6,  q14, #8
294*e1eccf28SAndroid Build Coastguard Worker
295*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q0,  q8
296*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q12, q4
297*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q1,  q9
298*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q13, q5
299*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q2,  q10
300*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q14, q6
301*e1eccf28SAndroid Build Coastguard Worker
302*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d0, q0,  #8
303*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d1, q12, #8
304*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d2, q1,  #8
305*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d3, q13, #8
306*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d4, q2,  #8
307*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d5, q14, #8
308*e1eccf28SAndroid Build Coastguard Worker.endm
309*e1eccf28SAndroid Build Coastguard Worker
310*e1eccf28SAndroid Build Coastguard Worker#define params_DST_ATOP zipped=1
311*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_ATOP
312*e1eccf28SAndroid Build Coastguard Worker        vmvn        q3, q3
313*e1eccf28SAndroid Build Coastguard Worker
314*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q12, d23, d1
315*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q0,  d22, d0
316*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q13, d23, d3
317*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q1,  d22, d2
318*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q14, d23, d5
319*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q2,  d22, d4
320*e1eccf28SAndroid Build Coastguard Worker
321*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q4,  d7, d17
322*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q8,  d6, d16
323*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q5,  d7, d19
324*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q9,  d6, d18
325*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q6,  d7, d21
326*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q10, d6, d20
327*e1eccf28SAndroid Build Coastguard Worker
328*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q12, q4
329*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q0,  q8
330*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q13, q5
331*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q1,  q9
332*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q14, q6
333*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q2,  q10
334*e1eccf28SAndroid Build Coastguard Worker
335*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q8,  q0,  #8
336*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q4,  q12, #8
337*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q9,  q1,  #8
338*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q5,  q13, #8
339*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q10, q2,  #8
340*e1eccf28SAndroid Build Coastguard Worker        vrshr.u16   q6,  q14, #8
341*e1eccf28SAndroid Build Coastguard Worker
342*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q0,  q8
343*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q12, q4
344*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q1,  q9
345*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q13, q5
346*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q2,  q10
347*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q14, q6
348*e1eccf28SAndroid Build Coastguard Worker
349*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d0, q0,  #8
350*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d1, q12, #8
351*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d2, q1,  #8
352*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d3, q13, #8
353*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d4, q2,  #8
354*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d5, q14, #8
355*e1eccf28SAndroid Build Coastguard Worker
356*e1eccf28SAndroid Build Coastguard Worker        vmov        q3, q11
357*e1eccf28SAndroid Build Coastguard Worker.endm
358*e1eccf28SAndroid Build Coastguard Worker
359*e1eccf28SAndroid Build Coastguard Worker#define params_MULTIPLY zipped=0
360*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_MULTIPLY
361*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q12, d1, d17
362*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q0,  d0, d16
363*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q13, d3, d19
364*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q1,  d2, d18
365*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q14, d5, d21
366*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q2,  d4, d20
367*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q15, d7, d23
368*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q3,  d6, d22
369*e1eccf28SAndroid Build Coastguard Worker
370*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d8,  q0,  #8
371*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d9,  q12, #8
372*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d10, q1,  #8
373*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d11, q13, #8
374*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d12, q2,  #8
375*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d13, q14, #8
376*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d14, q3,  #8
377*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d15, q15, #8
378*e1eccf28SAndroid Build Coastguard Worker
379*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q0,  d8
380*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q12, d9
381*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q1,  d10
382*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q13, d11
383*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q2,  d12
384*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q14, d13
385*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q3,  d14
386*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q15, d15
387*e1eccf28SAndroid Build Coastguard Worker
388*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d0, q0,  #8
389*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d1, q12, #8
390*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d2, q1,  #8
391*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d3, q13, #8
392*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d4, q2,  #8
393*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d5, q14, #8
394*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d6, q3,  #8
395*e1eccf28SAndroid Build Coastguard Worker        vrshrn.u16  d7, q15, #8
396*e1eccf28SAndroid Build Coastguard Worker.endm
397*e1eccf28SAndroid Build Coastguard Worker
398*e1eccf28SAndroid Build Coastguard Worker#define params_ADD zipped=0
399*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_ADD
400*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8 q0, q0, q8
401*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8 q1, q1, q9
402*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8 q2, q2, q10
403*e1eccf28SAndroid Build Coastguard Worker        vqadd.u8 q3, q3, q11
404*e1eccf28SAndroid Build Coastguard Worker.endm
405*e1eccf28SAndroid Build Coastguard Worker
406*e1eccf28SAndroid Build Coastguard Worker#define params_SUBTRACT zipped=0
407*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SUBTRACT
408*e1eccf28SAndroid Build Coastguard Worker        vqsub.u8 q0, q0, q8
409*e1eccf28SAndroid Build Coastguard Worker        vqsub.u8 q1, q1, q9
410*e1eccf28SAndroid Build Coastguard Worker        vqsub.u8 q2, q2, q10
411*e1eccf28SAndroid Build Coastguard Worker        vqsub.u8 q3, q3, q11
412*e1eccf28SAndroid Build Coastguard Worker.endm
413*e1eccf28SAndroid Build Coastguard Worker
414*e1eccf28SAndroid Build Coastguard Worker#define params_DIFFERENCE zipped=0
415*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DIFFERENCE
416*e1eccf28SAndroid Build Coastguard Worker        vabd.u8 q0, q0, q8
417*e1eccf28SAndroid Build Coastguard Worker        vabd.u8 q1, q1, q9
418*e1eccf28SAndroid Build Coastguard Worker        vabd.u8 q2, q2, q10
419*e1eccf28SAndroid Build Coastguard Worker        vabd.u8 q3, q3, q11
420*e1eccf28SAndroid Build Coastguard Worker.endm
421*e1eccf28SAndroid Build Coastguard Worker
422*e1eccf28SAndroid Build Coastguard Worker#define params_XOR zipped=0
423*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_XOR
424*e1eccf28SAndroid Build Coastguard Worker        veor    q0, q0, q8
425*e1eccf28SAndroid Build Coastguard Worker        veor    q1, q1, q9
426*e1eccf28SAndroid Build Coastguard Worker        veor    q2, q2, q10
427*e1eccf28SAndroid Build Coastguard Worker        veor    q3, q3, q11
428*e1eccf28SAndroid Build Coastguard Worker.endm
429*e1eccf28SAndroid Build Coastguard Worker
430*e1eccf28SAndroid Build Coastguard Worker
431*e1eccf28SAndroid Build Coastguard Worker/* Define the wrapper code which will load and store the data, iterate the
432*e1eccf28SAndroid Build Coastguard Worker * correct number of times, and safely handle the remainder at the end of the
433*e1eccf28SAndroid Build Coastguard Worker * loop.  Various sections of assembly code are dropped or substituted for
434*e1eccf28SAndroid Build Coastguard Worker * simpler operations if they're not needed.
435*e1eccf28SAndroid Build Coastguard Worker */
436*e1eccf28SAndroid Build Coastguard Worker.macro wrap_line kernel, nowrap=0, zipped=1, lddst=1, ldsrc=1, pld=1
437*e1eccf28SAndroid Build Coastguard Worker.if \nowrap
438*e1eccf28SAndroid Build Coastguard Worker        \kernel
439*e1eccf28SAndroid Build Coastguard Worker.else
440*e1eccf28SAndroid Build Coastguard Worker        vpush   {d8-d15}
441*e1eccf28SAndroid Build Coastguard Worker        subs    r2, #64
442*e1eccf28SAndroid Build Coastguard Worker        b       2f
443*e1eccf28SAndroid Build Coastguard Worker        .align 4
444*e1eccf28SAndroid Build Coastguard Worker1:
445*e1eccf28SAndroid Build Coastguard Worker  .if \lddst
446*e1eccf28SAndroid Build Coastguard Worker    .if \zipped
447*e1eccf28SAndroid Build Coastguard Worker        vld4.8  {d0,d2,d4,d6}, [r0]!
448*e1eccf28SAndroid Build Coastguard Worker        vld4.8  {d1,d3,d5,d7}, [r0]!
449*e1eccf28SAndroid Build Coastguard Worker    .else
450*e1eccf28SAndroid Build Coastguard Worker        vld1.8  {d0-d3}, [r0]!
451*e1eccf28SAndroid Build Coastguard Worker        vld1.8  {d4-d7}, [r0]!
452*e1eccf28SAndroid Build Coastguard Worker    .endif
453*e1eccf28SAndroid Build Coastguard Worker        sub     r0, #64
454*e1eccf28SAndroid Build Coastguard Worker  .endif
455*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc
456*e1eccf28SAndroid Build Coastguard Worker    .if \zipped
457*e1eccf28SAndroid Build Coastguard Worker        vld4.8  {d16,d18,d20,d22}, [r1]!
458*e1eccf28SAndroid Build Coastguard Worker        vld4.8  {d17,d19,d21,d23}, [r1]!
459*e1eccf28SAndroid Build Coastguard Worker    .else
460*e1eccf28SAndroid Build Coastguard Worker        vld1.8  {d16-d19}, [r1]!
461*e1eccf28SAndroid Build Coastguard Worker        vld1.8  {d20-d23}, [r1]!
462*e1eccf28SAndroid Build Coastguard Worker    .endif
463*e1eccf28SAndroid Build Coastguard Worker  .endif
464*e1eccf28SAndroid Build Coastguard Worker  .if \pld
465*e1eccf28SAndroid Build Coastguard Worker    .if \lddst ; pld [r0, #192] ; .endif
466*e1eccf28SAndroid Build Coastguard Worker    .if \ldsrc ; pld [r1, #192] ; .endif
467*e1eccf28SAndroid Build Coastguard Worker  .endif
468*e1eccf28SAndroid Build Coastguard Worker
469*e1eccf28SAndroid Build Coastguard Worker        \kernel
470*e1eccf28SAndroid Build Coastguard Worker
471*e1eccf28SAndroid Build Coastguard Worker        subs    r2, #64
472*e1eccf28SAndroid Build Coastguard Worker  .if \zipped
473*e1eccf28SAndroid Build Coastguard Worker        vst4.8  {d0,d2,d4,d6}, [r0]!
474*e1eccf28SAndroid Build Coastguard Worker        vst4.8  {d1,d3,d5,d7}, [r0]!
475*e1eccf28SAndroid Build Coastguard Worker  .else
476*e1eccf28SAndroid Build Coastguard Worker        vst1.8  {d0-d3}, [r0]!
477*e1eccf28SAndroid Build Coastguard Worker        vst1.8  {d4-d7}, [r0]!
478*e1eccf28SAndroid Build Coastguard Worker  .endif
479*e1eccf28SAndroid Build Coastguard Worker
480*e1eccf28SAndroid Build Coastguard Worker2:      bge     1b
481*e1eccf28SAndroid Build Coastguard Worker        adds    r2, #64
482*e1eccf28SAndroid Build Coastguard Worker        beq     2f
483*e1eccf28SAndroid Build Coastguard Worker
484*e1eccf28SAndroid Build Coastguard Worker        /* To handle the tail portion of the data (something less than 64
485*e1eccf28SAndroid Build Coastguard Worker         * bytes) load small power-of-two chunks into working registers.  It
486*e1eccf28SAndroid Build Coastguard Worker         * doesn't matter where they end up in the register; the same process
487*e1eccf28SAndroid Build Coastguard Worker         * will store them back out using the same positions and the operations
488*e1eccf28SAndroid Build Coastguard Worker         * don't require data to interact with its neighbours.
489*e1eccf28SAndroid Build Coastguard Worker         */
490*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q0, #0
491*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q1, #0
492*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q2, #0
493*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q3, #0
494*e1eccf28SAndroid Build Coastguard Worker
495*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q8, #0
496*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q9, #0
497*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q10, #0
498*e1eccf28SAndroid Build Coastguard Worker        vmov.i8 q11, #0
499*e1eccf28SAndroid Build Coastguard Worker
500*e1eccf28SAndroid Build Coastguard Worker        tst     r2, #32
501*e1eccf28SAndroid Build Coastguard Worker        beq     1f
502*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; vld1.64 {d4-d7}, [r0]!   ; .endif
503*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; vld1.64 {d20-d23}, [r1]! ; .endif
504*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #16
505*e1eccf28SAndroid Build Coastguard Worker        beq     1f
506*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; vld1.64 {d2-d3}, [r0]!   ; .endif
507*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; vld1.64 {d18-d19}, [r1]! ; .endif
508*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #8
509*e1eccf28SAndroid Build Coastguard Worker        beq     1f
510*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; vld1.64 {d1}, [r0]!      ; .endif
511*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; vld1.64 {d17}, [r1]!     ; .endif
512*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #4
513*e1eccf28SAndroid Build Coastguard Worker        beq     1f
514*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; vld1.32 {d0[1]}, [r0]!   ; .endif
515*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; vld1.32 {d16[1]}, [r1]!  ; .endif
516*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #2
517*e1eccf28SAndroid Build Coastguard Worker        beq     1f
518*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; vld1.16 {d0[1]}, [r0]!   ; .endif
519*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; vld1.16 {d16[1]}, [r1]!  ; .endif
520*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #1
521*e1eccf28SAndroid Build Coastguard Worker        beq     1f
522*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; vld1.8  {d0[1]}, [r0]!   ; .endif
523*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; vld1.8  {d16[1]}, [r1]!  ; .endif
524*e1eccf28SAndroid Build Coastguard Worker1:
525*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; sub     r0, r2           ; .endif
526*e1eccf28SAndroid Build Coastguard Worker
527*e1eccf28SAndroid Build Coastguard Worker  .if \zipped
528*e1eccf28SAndroid Build Coastguard Worker        /* One small impediment in the process above is that some of the load
529*e1eccf28SAndroid Build Coastguard Worker         * operations can't perform byte-wise structure deinterleaving at the
530*e1eccf28SAndroid Build Coastguard Worker         * same time as loading only part of a register.  So the data is loaded
531*e1eccf28SAndroid Build Coastguard Worker         * linearly and unpacked manually at this point.
532*e1eccf28SAndroid Build Coastguard Worker         */
533*e1eccf28SAndroid Build Coastguard Worker        vuzp.8  q0, q1
534*e1eccf28SAndroid Build Coastguard Worker        vuzp.8  q2, q3
535*e1eccf28SAndroid Build Coastguard Worker        vuzp.8  q0, q2
536*e1eccf28SAndroid Build Coastguard Worker        vuzp.8  q1, q3
537*e1eccf28SAndroid Build Coastguard Worker
538*e1eccf28SAndroid Build Coastguard Worker        vuzp.8  q8, q9
539*e1eccf28SAndroid Build Coastguard Worker        vuzp.8  q10, q11
540*e1eccf28SAndroid Build Coastguard Worker        vuzp.8  q8, q10
541*e1eccf28SAndroid Build Coastguard Worker        vuzp.8  q9, q11
542*e1eccf28SAndroid Build Coastguard Worker
543*e1eccf28SAndroid Build Coastguard Worker        \kernel
544*e1eccf28SAndroid Build Coastguard Worker
545*e1eccf28SAndroid Build Coastguard Worker        vzip.8  q0, q2
546*e1eccf28SAndroid Build Coastguard Worker        vzip.8  q1, q3
547*e1eccf28SAndroid Build Coastguard Worker        vzip.8  q0, q1
548*e1eccf28SAndroid Build Coastguard Worker        vzip.8  q2, q3
549*e1eccf28SAndroid Build Coastguard Worker  .else
550*e1eccf28SAndroid Build Coastguard Worker        \kernel
551*e1eccf28SAndroid Build Coastguard Worker  .endif
552*e1eccf28SAndroid Build Coastguard Worker
553*e1eccf28SAndroid Build Coastguard Worker        tst     r2, #32
554*e1eccf28SAndroid Build Coastguard Worker        beq     1f
555*e1eccf28SAndroid Build Coastguard Worker        vst1.64 {d4-d7}, [r0]!
556*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #16
557*e1eccf28SAndroid Build Coastguard Worker        beq     1f
558*e1eccf28SAndroid Build Coastguard Worker        vst1.64 {d2-d3}, [r0]!
559*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #8
560*e1eccf28SAndroid Build Coastguard Worker        beq     1f
561*e1eccf28SAndroid Build Coastguard Worker        vst1.64 {d1}, [r0]!
562*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #4
563*e1eccf28SAndroid Build Coastguard Worker        beq     1f
564*e1eccf28SAndroid Build Coastguard Worker        vst1.32 {d0[1]}, [r0]!
565*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #2
566*e1eccf28SAndroid Build Coastguard Worker        beq     1f
567*e1eccf28SAndroid Build Coastguard Worker        vst1.16 {d0[1]}, [r0]!
568*e1eccf28SAndroid Build Coastguard Worker1:      tst     r2, #1
569*e1eccf28SAndroid Build Coastguard Worker        beq     2f
570*e1eccf28SAndroid Build Coastguard Worker        vst1.8  {d0[1]}, [r0]!
571*e1eccf28SAndroid Build Coastguard Worker2:      vpop    {d8-d15}
572*e1eccf28SAndroid Build Coastguard Worker.endif
573*e1eccf28SAndroid Build Coastguard Worker        mov     r0, #0
574*e1eccf28SAndroid Build Coastguard Worker        bx      lr
575*e1eccf28SAndroid Build Coastguard Worker.endm
576*e1eccf28SAndroid Build Coastguard Worker
577*e1eccf28SAndroid Build Coastguard Worker
578*e1eccf28SAndroid Build Coastguard Worker/* produce list of blend_line_XX() functions; each function uses the wrap_line
579*e1eccf28SAndroid Build Coastguard Worker * macro, passing it the name of the operation macro it wants along with
580*e1eccf28SAndroid Build Coastguard Worker * optional parameters to remove unnecessary operations.
581*e1eccf28SAndroid Build Coastguard Worker */
582*e1eccf28SAndroid Build Coastguard Worker#define BLEND_X(d, n) ENTRY(blend_line_##n) ; wrap_line blend_kernel_##n, params_##n ; END(blend_line_##n) ;
583*e1eccf28SAndroid Build Coastguard Worker    BLEND_LIST(BLEND_X)
584*e1eccf28SAndroid Build Coastguard Worker#undef BLEND_X
585*e1eccf28SAndroid Build Coastguard Worker
586*e1eccf28SAndroid Build Coastguard Worker
587*e1eccf28SAndroid Build Coastguard Worker/*  int rsdIntrinsicBlend_K(
588*e1eccf28SAndroid Build Coastguard Worker *          uchar4 *out,        // r0
589*e1eccf28SAndroid Build Coastguard Worker *          uchar4 const *in,   // r1
590*e1eccf28SAndroid Build Coastguard Worker *          int slot,           // r2
591*e1eccf28SAndroid Build Coastguard Worker *          size_t xstart,      // r3
592*e1eccf28SAndroid Build Coastguard Worker *          size_t xend);       // [sp]
593*e1eccf28SAndroid Build Coastguard Worker */
594*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicBlend_K)
595*e1eccf28SAndroid Build Coastguard Worker    adr     ip, blend_functions
596*e1eccf28SAndroid Build Coastguard Worker    cmp     r2, #(blend_functions_end - blend_functions) >> 2
597*e1eccf28SAndroid Build Coastguard Worker    ldrlo   ip, [ip, r2, LSL #2]
598*e1eccf28SAndroid Build Coastguard Worker    movhs   ip, #0
599*e1eccf28SAndroid Build Coastguard Worker    ldr     r2, [sp]
600*e1eccf28SAndroid Build Coastguard Worker    add     r0, r3, LSL #2
601*e1eccf28SAndroid Build Coastguard Worker    add     r1, r3, LSL #2
602*e1eccf28SAndroid Build Coastguard Worker    sub     r2, r3
603*e1eccf28SAndroid Build Coastguard Worker    mov     r2, r2, LSL #2
604*e1eccf28SAndroid Build Coastguard Worker    cmp     ip, #0
605*e1eccf28SAndroid Build Coastguard Worker    addne   ip, ip, pc
606*e1eccf28SAndroid Build Coastguard Worker    bxne    ip
607*e1eccf28SAndroid Build Coastguard Worker1:  mov     r0, #-1
608*e1eccf28SAndroid Build Coastguard Worker    bx      lr
609*e1eccf28SAndroid Build Coastguard Worker
610*e1eccf28SAndroid Build Coastguard Workerblend_functions:
611*e1eccf28SAndroid Build Coastguard Worker.set off,0
612*e1eccf28SAndroid Build Coastguard Worker#define BLEND_X(d, n) .rept d-off ; .word 0 ; .endr ; .word blend_line_##n-1b ; .set off, d+1 ;
613*e1eccf28SAndroid Build Coastguard Worker        BLEND_LIST(BLEND_X)
614*e1eccf28SAndroid Build Coastguard Worker#undef BLEND_X
615*e1eccf28SAndroid Build Coastguard Workerblend_functions_end:
616*e1eccf28SAndroid Build Coastguard Worker
617*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicBlend_K)
618