xref: /aosp_15_r20/frameworks/rs/toolkit/Blend_advsimd.S (revision e1eccf28f96817838ad6867f7f39d2351ec11f56)
1*e1eccf28SAndroid Build Coastguard Worker/*
2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2013-2014 The Android Open Source Project
3*e1eccf28SAndroid Build Coastguard Worker *
4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*e1eccf28SAndroid Build Coastguard Worker *
8*e1eccf28SAndroid Build Coastguard Worker *      http://www.apache.org/licenses/LICENSE-2.0
9*e1eccf28SAndroid Build Coastguard Worker *
10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License.
15*e1eccf28SAndroid Build Coastguard Worker */
16*e1eccf28SAndroid Build Coastguard Worker
17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f:
18*e1eccf28SAndroid Build Coastguard Worker#define END(f) .size f, .-f;
19*e1eccf28SAndroid Build Coastguard Worker
20*e1eccf28SAndroid Build Coastguard Worker#define BLEND_LIST(X) \
21*e1eccf28SAndroid Build Coastguard Worker    X(0, CLEAR) \
22*e1eccf28SAndroid Build Coastguard Worker    X(1, SRC) \
23*e1eccf28SAndroid Build Coastguard Worker    X(2, DST) \
24*e1eccf28SAndroid Build Coastguard Worker    X(3, SRC_OVER) \
25*e1eccf28SAndroid Build Coastguard Worker    X(4, DST_OVER) \
26*e1eccf28SAndroid Build Coastguard Worker    X(5, SRC_IN) \
27*e1eccf28SAndroid Build Coastguard Worker    X(6, DST_IN) \
28*e1eccf28SAndroid Build Coastguard Worker    X(7, SRC_OUT) \
29*e1eccf28SAndroid Build Coastguard Worker    X(8, DST_OUT) \
30*e1eccf28SAndroid Build Coastguard Worker    X(9, SRC_ATOP) \
31*e1eccf28SAndroid Build Coastguard Worker    X(10, DST_ATOP) \
32*e1eccf28SAndroid Build Coastguard Worker    X(11, XOR) \
33*e1eccf28SAndroid Build Coastguard Worker    X(12, MULTIPLY) \
34*e1eccf28SAndroid Build Coastguard Worker    X(13, ADD) \
35*e1eccf28SAndroid Build Coastguard Worker    X(14, SUBTRACT)
36*e1eccf28SAndroid Build Coastguard Worker
37*e1eccf28SAndroid Build Coastguard Worker/* This operation was not enabled in the original RenderScript. We could
38*e1eccf28SAndroid Build Coastguard Worker * enable it.
39*e1eccf28SAndroid Build Coastguard Worker *
40*e1eccf28SAndroid Build Coastguard Worker *  X(15, DIFFERENCE) \
41*e1eccf28SAndroid Build Coastguard Worker */
42*e1eccf28SAndroid Build Coastguard Worker
43*e1eccf28SAndroid Build Coastguard Worker/* For every blend operation supported, define a macro with just the arithmetic
44*e1eccf28SAndroid Build Coastguard Worker * component.  The rest can be handled later on.
45*e1eccf28SAndroid Build Coastguard Worker *
46*e1eccf28SAndroid Build Coastguard Worker * At entry q0-q3 contain the RGBA data from the destination buffer, and q8-q11
47*e1eccf28SAndroid Build Coastguard Worker * contain the data from the source buffer.  Both have already been split out
48*e1eccf28SAndroid Build Coastguard Worker * into one colour component per register (if necessary).  q3 and q11 contain
49*e1eccf28SAndroid Build Coastguard Worker * the alpha components.
50*e1eccf28SAndroid Build Coastguard Worker *
51*e1eccf28SAndroid Build Coastguard Worker * At the same time as defining the assembly macro, define a corresponding
52*e1eccf28SAndroid Build Coastguard Worker * preprocessor macro indicating any other requirements.
53*e1eccf28SAndroid Build Coastguard Worker *    zipped=0 -- The macro does not require the RGBA components to be
54*e1eccf28SAndroid Build Coastguard Worker *                separated.
55*e1eccf28SAndroid Build Coastguard Worker *    lddst=0  -- The macro does not require data from the destination buffer.
56*e1eccf28SAndroid Build Coastguard Worker *    ldsrc=0  -- The macro does not require data from the source buffer.
57*e1eccf28SAndroid Build Coastguard Worker *    nowrap=1 -- The macro requires no wrapper at all, and should simply be
58*e1eccf28SAndroid Build Coastguard Worker *                inserted without any surrounding load/store or loop code.
59*e1eccf28SAndroid Build Coastguard Worker */
60*e1eccf28SAndroid Build Coastguard Worker
61*e1eccf28SAndroid Build Coastguard Worker#define params_CLEAR zipped=0, lddst=0, ldsrc=0
62*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_CLEAR
63*e1eccf28SAndroid Build Coastguard Worker        movi    v0.16b, #0
64*e1eccf28SAndroid Build Coastguard Worker        movi    v1.16b, #0
65*e1eccf28SAndroid Build Coastguard Worker        movi    v2.16b, #0
66*e1eccf28SAndroid Build Coastguard Worker        movi    v3.16b, #0
67*e1eccf28SAndroid Build Coastguard Worker.endm
68*e1eccf28SAndroid Build Coastguard Worker
69*e1eccf28SAndroid Build Coastguard Worker#define params_SRC zipped=0, lddst=0
70*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC
71*e1eccf28SAndroid Build Coastguard Worker        mov     v0.16b, v8.16b
72*e1eccf28SAndroid Build Coastguard Worker        mov     v1.16b, v9.16b
73*e1eccf28SAndroid Build Coastguard Worker        mov     v2.16b, v10.16b
74*e1eccf28SAndroid Build Coastguard Worker        mov     v3.16b, v11.16b
75*e1eccf28SAndroid Build Coastguard Worker.endm
76*e1eccf28SAndroid Build Coastguard Worker
77*e1eccf28SAndroid Build Coastguard Worker#define params_DST nowrap=1
78*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST
79*e1eccf28SAndroid Build Coastguard Worker        /* nop */
80*e1eccf28SAndroid Build Coastguard Worker.endm
81*e1eccf28SAndroid Build Coastguard Worker
82*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_OVER zipped=1
83*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_OVER
84*e1eccf28SAndroid Build Coastguard Worker        mvn         v7.16b, v11.16b
85*e1eccf28SAndroid Build Coastguard Worker
86*e1eccf28SAndroid Build Coastguard Worker        umull2      v12.8h, v7.16b, v0.16b
87*e1eccf28SAndroid Build Coastguard Worker        umull       v0.8h,  v7.8b,  v0.8b
88*e1eccf28SAndroid Build Coastguard Worker        umull2      v13.8h, v7.16b, v1.16b
89*e1eccf28SAndroid Build Coastguard Worker        umull       v1.8h,  v7.8b,  v1.8b
90*e1eccf28SAndroid Build Coastguard Worker        umull2      v14.8h, v7.16b, v2.16b
91*e1eccf28SAndroid Build Coastguard Worker        umull       v2.8h,  v7.8b,  v2.8b
92*e1eccf28SAndroid Build Coastguard Worker        umull2      v15.8h, v7.16b, v3.16b
93*e1eccf28SAndroid Build Coastguard Worker        umull       v3.8h,  v7.8b,  v3.8b
94*e1eccf28SAndroid Build Coastguard Worker
95*e1eccf28SAndroid Build Coastguard Worker        rshrn       v4.8b,  v0.8h,  #8
96*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v4.16b, v12.8h, #8
97*e1eccf28SAndroid Build Coastguard Worker        rshrn       v5.8b,  v1.8h,  #8
98*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v5.16b, v13.8h, #8
99*e1eccf28SAndroid Build Coastguard Worker        rshrn       v6.8b,  v2.8h,  #8
100*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v6.16b, v14.8h, #8
101*e1eccf28SAndroid Build Coastguard Worker        rshrn       v7.8b,  v3.8h,  #8
102*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v7.16b, v15.8h, #8
103*e1eccf28SAndroid Build Coastguard Worker
104*e1eccf28SAndroid Build Coastguard Worker        uaddw       v0.8h,  v0.8h,  v4.8b
105*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v12.8h, v12.8h, v4.16b
106*e1eccf28SAndroid Build Coastguard Worker        uaddw       v1.8h,  v1.8h,  v5.8b
107*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v13.8h, v13.8h, v5.16b
108*e1eccf28SAndroid Build Coastguard Worker        uaddw       v2.8h,  v2.8h,  v6.8b
109*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v14.8h, v14.8h, v6.16b
110*e1eccf28SAndroid Build Coastguard Worker        uaddw       v3.8h,  v3.8h,  v7.8b
111*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v15.8h, v15.8h, v7.16b
112*e1eccf28SAndroid Build Coastguard Worker
113*e1eccf28SAndroid Build Coastguard Worker        rshrn       v0.8b,  v0.8h,  #8
114*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v0.16b, v12.8h, #8
115*e1eccf28SAndroid Build Coastguard Worker        rshrn       v1.8b,  v1.8h,  #8
116*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v1.16b, v13.8h, #8
117*e1eccf28SAndroid Build Coastguard Worker        rshrn       v2.8b,  v2.8h,  #8
118*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v2.16b, v14.8h, #8
119*e1eccf28SAndroid Build Coastguard Worker        rshrn       v3.8b,  v3.8h,  #8
120*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v3.16b, v15.8h, #8
121*e1eccf28SAndroid Build Coastguard Worker
122*e1eccf28SAndroid Build Coastguard Worker        uqadd       v0.16b, v0.16b, v8.16b
123*e1eccf28SAndroid Build Coastguard Worker        uqadd       v1.16b, v1.16b, v9.16b
124*e1eccf28SAndroid Build Coastguard Worker        uqadd       v2.16b, v2.16b, v10.16b
125*e1eccf28SAndroid Build Coastguard Worker        uqadd       v3.16b, v3.16b, v11.16b
126*e1eccf28SAndroid Build Coastguard Worker.endm
127*e1eccf28SAndroid Build Coastguard Worker
128*e1eccf28SAndroid Build Coastguard Worker#define params_DST_OVER zipped=1
129*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_OVER
130*e1eccf28SAndroid Build Coastguard Worker        mvn         v7.16b, v3.16b
131*e1eccf28SAndroid Build Coastguard Worker
132*e1eccf28SAndroid Build Coastguard Worker        umull2      v12.8h, v7.16b, v8.16b
133*e1eccf28SAndroid Build Coastguard Worker        umull       v8.8h,  v7.8b,  v8.8b
134*e1eccf28SAndroid Build Coastguard Worker        umull2      v13.8h, v7.16b, v9.16b
135*e1eccf28SAndroid Build Coastguard Worker        umull       v9.8h,  v7.8b,  v9.8b
136*e1eccf28SAndroid Build Coastguard Worker        umull2      v14.8h, v7.16b, v10.16b
137*e1eccf28SAndroid Build Coastguard Worker        umull       v10.8h, v7.8b,  v10.8b
138*e1eccf28SAndroid Build Coastguard Worker        umull2      v15.8h, v7.16b, v11.16b
139*e1eccf28SAndroid Build Coastguard Worker        umull       v11.8h, v7.8b,  v11.8b
140*e1eccf28SAndroid Build Coastguard Worker
141*e1eccf28SAndroid Build Coastguard Worker        rshrn       v4.8b,  v8.8h,  #8
142*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v4.16b, v12.8h, #8
143*e1eccf28SAndroid Build Coastguard Worker        rshrn       v5.8b,  v9.8h,  #8
144*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v5.16b, v13.8h, #8
145*e1eccf28SAndroid Build Coastguard Worker        rshrn       v6.8b,  v10.8h, #8
146*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v6.16b, v14.8h, #8
147*e1eccf28SAndroid Build Coastguard Worker        rshrn       v7.8b,  v11.8h, #8
148*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v7.16b, v15.8h, #8
149*e1eccf28SAndroid Build Coastguard Worker
150*e1eccf28SAndroid Build Coastguard Worker        uaddw       v8.8h,  v8.8h,  v4.8b
151*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v12.8h, v12.8h, v4.16b
152*e1eccf28SAndroid Build Coastguard Worker        uaddw       v9.8h,  v9.8h,  v5.8b
153*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v13.8h, v13.8h, v5.16b
154*e1eccf28SAndroid Build Coastguard Worker        uaddw       v10.8h, v10.8h, v6.8b
155*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v14.8h, v14.8h, v6.16b
156*e1eccf28SAndroid Build Coastguard Worker        uaddw       v11.8h, v11.8h, v7.8b
157*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v15.8h, v15.8h, v7.16b
158*e1eccf28SAndroid Build Coastguard Worker
159*e1eccf28SAndroid Build Coastguard Worker        rshrn       v8.8b,  v8.8h,  #8
160*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v8.16b, v12.8h, #8
161*e1eccf28SAndroid Build Coastguard Worker        rshrn       v9.8b,  v9.8h,  #8
162*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v9.16b, v13.8h, #8
163*e1eccf28SAndroid Build Coastguard Worker        rshrn       v10.8b,  v10.8h, #8
164*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v10.16b, v14.8h, #8
165*e1eccf28SAndroid Build Coastguard Worker        rshrn       v11.8b,  v11.8h, #8
166*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v11.16b, v15.8h, #8
167*e1eccf28SAndroid Build Coastguard Worker
168*e1eccf28SAndroid Build Coastguard Worker        uqadd       v0.16b, v0.16b, v8.16b
169*e1eccf28SAndroid Build Coastguard Worker        uqadd       v1.16b, v1.16b, v9.16b
170*e1eccf28SAndroid Build Coastguard Worker        uqadd       v2.16b, v2.16b, v10.16b
171*e1eccf28SAndroid Build Coastguard Worker        uqadd       v3.16b, v3.16b, v11.16b
172*e1eccf28SAndroid Build Coastguard Worker.endm
173*e1eccf28SAndroid Build Coastguard Worker
174*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_IN zipped=1
175*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_IN
176*e1eccf28SAndroid Build Coastguard Worker        umull2      v12.8h, v3.16b, v8.16b
177*e1eccf28SAndroid Build Coastguard Worker        umull       v0.8h,  v3.8b,  v8.8b
178*e1eccf28SAndroid Build Coastguard Worker        umull2      v13.8h, v3.16b, v9.16b
179*e1eccf28SAndroid Build Coastguard Worker        umull       v1.8h,  v3.8b,  v9.8b
180*e1eccf28SAndroid Build Coastguard Worker        umull2      v14.8h, v3.16b, v10.16b
181*e1eccf28SAndroid Build Coastguard Worker        umull       v2.8h,  v3.8b,  v10.8b
182*e1eccf28SAndroid Build Coastguard Worker        umull2      v15.8h, v3.16b, v11.16b
183*e1eccf28SAndroid Build Coastguard Worker        umull       v3.8h,  v3.8b,  v11.8b
184*e1eccf28SAndroid Build Coastguard Worker
185*e1eccf28SAndroid Build Coastguard Worker        rshrn       v4.8b,  v0.8h,  #8
186*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v4.16b, v12.8h, #8
187*e1eccf28SAndroid Build Coastguard Worker        rshrn       v5.8b,  v1.8h,  #8
188*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v5.16b, v13.8h, #8
189*e1eccf28SAndroid Build Coastguard Worker        rshrn       v6.8b,  v2.8h,  #8
190*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v6.16b, v14.8h, #8
191*e1eccf28SAndroid Build Coastguard Worker        rshrn       v7.8b,  v3.8h,  #8
192*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v7.16b, v15.8h, #8
193*e1eccf28SAndroid Build Coastguard Worker
194*e1eccf28SAndroid Build Coastguard Worker        uaddw       v0.8h,  v0.8h,  v4.8b
195*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v12.8h, v12.8h, v4.16b
196*e1eccf28SAndroid Build Coastguard Worker        uaddw       v1.8h,  v1.8h,  v5.8b
197*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v13.8h, v13.8h, v5.16b
198*e1eccf28SAndroid Build Coastguard Worker        uaddw       v2.8h,  v2.8h,  v6.8b
199*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v14.8h, v14.8h, v6.16b
200*e1eccf28SAndroid Build Coastguard Worker        uaddw       v3.8h,  v3.8h,  v7.8b
201*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v15.8h, v15.8h, v7.16b
202*e1eccf28SAndroid Build Coastguard Worker
203*e1eccf28SAndroid Build Coastguard Worker        rshrn       v0.8b,  v0.8h,  #8
204*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v0.16b, v12.8h, #8
205*e1eccf28SAndroid Build Coastguard Worker        rshrn       v1.8b,  v1.8h,  #8
206*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v1.16b, v13.8h, #8
207*e1eccf28SAndroid Build Coastguard Worker        rshrn       v2.8b,  v2.8h,  #8
208*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v2.16b, v14.8h, #8
209*e1eccf28SAndroid Build Coastguard Worker        rshrn       v3.8b,  v3.8h,  #8
210*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v3.16b, v15.8h, #8
211*e1eccf28SAndroid Build Coastguard Worker.endm
212*e1eccf28SAndroid Build Coastguard Worker
213*e1eccf28SAndroid Build Coastguard Worker#define params_DST_IN zipped=1
214*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_IN
215*e1eccf28SAndroid Build Coastguard Worker        umull2      v12.8h, v0.16b, v11.16b
216*e1eccf28SAndroid Build Coastguard Worker        umull       v0.8h,  v0.8b,  v11.8b
217*e1eccf28SAndroid Build Coastguard Worker        umull2      v13.8h, v1.16b, v11.16b
218*e1eccf28SAndroid Build Coastguard Worker        umull       v1.8h,  v1.8b,  v11.8b
219*e1eccf28SAndroid Build Coastguard Worker        umull2      v14.8h, v2.16b, v11.16b
220*e1eccf28SAndroid Build Coastguard Worker        umull       v2.8h,  v2.8b,  v11.8b
221*e1eccf28SAndroid Build Coastguard Worker        umull2      v15.8h, v3.16b, v11.16b
222*e1eccf28SAndroid Build Coastguard Worker        umull       v3.8h,  v3.8b,  v11.8b
223*e1eccf28SAndroid Build Coastguard Worker
224*e1eccf28SAndroid Build Coastguard Worker        rshrn       v4.8b,  v0.8h,  #8
225*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v4.16b, v12.8h, #8
226*e1eccf28SAndroid Build Coastguard Worker        rshrn       v5.8b,  v1.8h,  #8
227*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v5.16b, v13.8h, #8
228*e1eccf28SAndroid Build Coastguard Worker        rshrn       v6.8b,  v2.8h,  #8
229*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v6.16b, v14.8h, #8
230*e1eccf28SAndroid Build Coastguard Worker        rshrn       v7.8b,  v3.8h,  #8
231*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v7.16b, v15.8h, #8
232*e1eccf28SAndroid Build Coastguard Worker
233*e1eccf28SAndroid Build Coastguard Worker        uaddw       v0.8h,  v0.8h,  v4.8b
234*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v12.8h, v12.8h, v4.16b
235*e1eccf28SAndroid Build Coastguard Worker        uaddw       v1.8h,  v1.8h,  v5.8b
236*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v13.8h, v13.8h, v5.16b
237*e1eccf28SAndroid Build Coastguard Worker        uaddw       v2.8h,  v2.8h,  v6.8b
238*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v14.8h, v14.8h, v6.16b
239*e1eccf28SAndroid Build Coastguard Worker        uaddw       v3.8h,  v3.8h,  v7.8b
240*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v15.8h, v15.8h, v7.16b
241*e1eccf28SAndroid Build Coastguard Worker
242*e1eccf28SAndroid Build Coastguard Worker        rshrn       v0.8b,  v0.8h,  #8
243*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v0.16b, v12.8h, #8
244*e1eccf28SAndroid Build Coastguard Worker        rshrn       v1.8b,  v1.8h,  #8
245*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v1.16b, v13.8h, #8
246*e1eccf28SAndroid Build Coastguard Worker        rshrn       v2.8b,  v2.8h,  #8
247*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v2.16b, v14.8h, #8
248*e1eccf28SAndroid Build Coastguard Worker        rshrn       v3.8b,  v3.8h,  #8
249*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v3.16b, v15.8h, #8
250*e1eccf28SAndroid Build Coastguard Worker.endm
251*e1eccf28SAndroid Build Coastguard Worker
252*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_OUT zipped=1
253*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_OUT
254*e1eccf28SAndroid Build Coastguard Worker        mvn         v3.16b, v3.16b
255*e1eccf28SAndroid Build Coastguard Worker        blend_kernel_SRC_IN
256*e1eccf28SAndroid Build Coastguard Worker.endm
257*e1eccf28SAndroid Build Coastguard Worker
258*e1eccf28SAndroid Build Coastguard Worker
259*e1eccf28SAndroid Build Coastguard Worker#define params_DST_OUT zipped=1
260*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_OUT
261*e1eccf28SAndroid Build Coastguard Worker        mvn         v11.16b, v11.16b
262*e1eccf28SAndroid Build Coastguard Worker        blend_kernel_DST_IN
263*e1eccf28SAndroid Build Coastguard Worker.endm
264*e1eccf28SAndroid Build Coastguard Worker
265*e1eccf28SAndroid Build Coastguard Worker#define params_SRC_ATOP zipped=1
266*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SRC_ATOP
267*e1eccf28SAndroid Build Coastguard Worker        mvn         v11.16b, v11.16b
268*e1eccf28SAndroid Build Coastguard Worker
269*e1eccf28SAndroid Build Coastguard Worker        umull2      v12.8h, v11.16b, v0.16b
270*e1eccf28SAndroid Build Coastguard Worker        umull       v0.8h,  v11.8b,  v0.8b
271*e1eccf28SAndroid Build Coastguard Worker        umull2      v13.8h, v11.16b, v1.16b
272*e1eccf28SAndroid Build Coastguard Worker        umull       v1.8h,  v11.8b,  v1.8b
273*e1eccf28SAndroid Build Coastguard Worker        umull2      v14.8h, v11.16b, v2.16b
274*e1eccf28SAndroid Build Coastguard Worker        umull       v2.8h,  v11.8b,  v2.8b
275*e1eccf28SAndroid Build Coastguard Worker
276*e1eccf28SAndroid Build Coastguard Worker        umull2      v4.8h,  v3.16b, v8.16b
277*e1eccf28SAndroid Build Coastguard Worker        umull       v8.8h,  v3.8b,  v8.8b
278*e1eccf28SAndroid Build Coastguard Worker        umull2      v5.8h,  v3.16b, v9.16b
279*e1eccf28SAndroid Build Coastguard Worker        umull       v9.8h,  v3.8b,  v9.8b
280*e1eccf28SAndroid Build Coastguard Worker        umull2      v6.8h,  v3.16b, v10.16b
281*e1eccf28SAndroid Build Coastguard Worker        umull       v10.8h, v3.8b,  v10.8b
282*e1eccf28SAndroid Build Coastguard Worker
283*e1eccf28SAndroid Build Coastguard Worker        uqadd       v12.8h, v12.8h, v4.8h
284*e1eccf28SAndroid Build Coastguard Worker        uqadd       v0.8h,  v0.8h,  v8.8h
285*e1eccf28SAndroid Build Coastguard Worker        uqadd       v13.8h, v13.8h, v5.8h
286*e1eccf28SAndroid Build Coastguard Worker        uqadd       v1.8h,  v1.8h,  v9.8h
287*e1eccf28SAndroid Build Coastguard Worker        uqadd       v14.8h, v14.8h, v6.8h
288*e1eccf28SAndroid Build Coastguard Worker        uqadd       v2.8h,  v2.8h,  v10.8h
289*e1eccf28SAndroid Build Coastguard Worker
290*e1eccf28SAndroid Build Coastguard Worker        urshr       v8.8h,  v0.8h,  #8
291*e1eccf28SAndroid Build Coastguard Worker        urshr       v4.8h,  v12.8h, #8
292*e1eccf28SAndroid Build Coastguard Worker        urshr       v9.8h,  v1.8h,  #8
293*e1eccf28SAndroid Build Coastguard Worker        urshr       v5.8h,  v13.8h, #8
294*e1eccf28SAndroid Build Coastguard Worker        urshr       v10.8h, v2.8h,  #8
295*e1eccf28SAndroid Build Coastguard Worker        urshr       v6.8h,  v14.8h, #8
296*e1eccf28SAndroid Build Coastguard Worker
297*e1eccf28SAndroid Build Coastguard Worker        uqadd       v0.8h,  v0.8h,  v8.8h
298*e1eccf28SAndroid Build Coastguard Worker        uqadd       v12.8h, v12.8h, v4.8h
299*e1eccf28SAndroid Build Coastguard Worker        uqadd       v1.8h,  v1.8h,  v9.8h
300*e1eccf28SAndroid Build Coastguard Worker        uqadd       v13.8h, v13.8h, v5.8h
301*e1eccf28SAndroid Build Coastguard Worker        uqadd       v2.8h,  v2.8h,  v10.8h
302*e1eccf28SAndroid Build Coastguard Worker        uqadd       v14.8h, v14.8h, v6.8h
303*e1eccf28SAndroid Build Coastguard Worker
304*e1eccf28SAndroid Build Coastguard Worker        uqrshrn     v0.8b,  v0.8h,  #8
305*e1eccf28SAndroid Build Coastguard Worker        uqrshrn2    v0.16b, v12.8h, #8
306*e1eccf28SAndroid Build Coastguard Worker        uqrshrn     v1.8b,  v1.8h,  #8
307*e1eccf28SAndroid Build Coastguard Worker        uqrshrn2    v1.16b, v13.8h, #8
308*e1eccf28SAndroid Build Coastguard Worker        uqrshrn     v2.8b,  v2.8h,  #8
309*e1eccf28SAndroid Build Coastguard Worker        uqrshrn2    v2.16b, v14.8h, #8
310*e1eccf28SAndroid Build Coastguard Worker.endm
311*e1eccf28SAndroid Build Coastguard Worker
312*e1eccf28SAndroid Build Coastguard Worker#define params_DST_ATOP zipped=1
313*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DST_ATOP
314*e1eccf28SAndroid Build Coastguard Worker        mvn         v3.16b, v3.16b
315*e1eccf28SAndroid Build Coastguard Worker
316*e1eccf28SAndroid Build Coastguard Worker        umull2      v12.8h, v11.16b, v0.16b
317*e1eccf28SAndroid Build Coastguard Worker        umull       v0.8h,  v11.8b,  v0.8b
318*e1eccf28SAndroid Build Coastguard Worker        umull2      v13.8h, v11.16b, v1.16b
319*e1eccf28SAndroid Build Coastguard Worker        umull       v1.8h,  v11.8b,  v1.8b
320*e1eccf28SAndroid Build Coastguard Worker        umull2      v14.8h, v11.16b, v2.16b
321*e1eccf28SAndroid Build Coastguard Worker        umull       v2.8h,  v11.8b,  v2.8b
322*e1eccf28SAndroid Build Coastguard Worker
323*e1eccf28SAndroid Build Coastguard Worker        umull2      v4.8h,  v3.16b, v8.16b
324*e1eccf28SAndroid Build Coastguard Worker        umull       v8.8h,  v3.8b,  v8.8b
325*e1eccf28SAndroid Build Coastguard Worker        umull2      v5.8h,  v3.16b, v9.16b
326*e1eccf28SAndroid Build Coastguard Worker        umull       v9.8h,  v3.8b,  v9.8b
327*e1eccf28SAndroid Build Coastguard Worker        umull2      v6.8h,  v3.16b, v10.16b
328*e1eccf28SAndroid Build Coastguard Worker        umull       v10.8h, v3.8b,  v10.8b
329*e1eccf28SAndroid Build Coastguard Worker
330*e1eccf28SAndroid Build Coastguard Worker        uqadd       v12.8h, v12.8h, v4.8h
331*e1eccf28SAndroid Build Coastguard Worker        uqadd       v0.8h,  v0.8h,  v8.8h
332*e1eccf28SAndroid Build Coastguard Worker        uqadd       v13.8h, v13.8h, v5.8h
333*e1eccf28SAndroid Build Coastguard Worker        uqadd       v1.8h,  v1.8h,  v9.8h
334*e1eccf28SAndroid Build Coastguard Worker        uqadd       v14.8h, v14.8h, v6.8h
335*e1eccf28SAndroid Build Coastguard Worker        uqadd       v2.8h,  v2.8h,  v10.8h
336*e1eccf28SAndroid Build Coastguard Worker
337*e1eccf28SAndroid Build Coastguard Worker        urshr       v8.8h,  v0.8h,  #8
338*e1eccf28SAndroid Build Coastguard Worker        urshr       v4.8h,  v12.8h, #8
339*e1eccf28SAndroid Build Coastguard Worker        urshr       v9.8h,  v1.8h,  #8
340*e1eccf28SAndroid Build Coastguard Worker        urshr       v5.8h,  v13.8h, #8
341*e1eccf28SAndroid Build Coastguard Worker        urshr       v10.8h, v2.8h,  #8
342*e1eccf28SAndroid Build Coastguard Worker        urshr       v6.8h,  v14.8h, #8
343*e1eccf28SAndroid Build Coastguard Worker
344*e1eccf28SAndroid Build Coastguard Worker        uqadd       v0.8h,  v0.8h,  v8.8h
345*e1eccf28SAndroid Build Coastguard Worker        uqadd       v12.8h, v12.8h, v4.8h
346*e1eccf28SAndroid Build Coastguard Worker        uqadd       v1.8h,  v1.8h,  v9.8h
347*e1eccf28SAndroid Build Coastguard Worker        uqadd       v13.8h, v13.8h, v5.8h
348*e1eccf28SAndroid Build Coastguard Worker        uqadd       v2.8h,  v2.8h,  v10.8h
349*e1eccf28SAndroid Build Coastguard Worker        uqadd       v14.8h, v14.8h, v6.8h
350*e1eccf28SAndroid Build Coastguard Worker
351*e1eccf28SAndroid Build Coastguard Worker        uqrshrn     v0.8b,  v0.8h,  #8
352*e1eccf28SAndroid Build Coastguard Worker        uqrshrn2    v0.16b, v12.8h, #8
353*e1eccf28SAndroid Build Coastguard Worker        uqrshrn     v1.8b,  v1.8h,  #8
354*e1eccf28SAndroid Build Coastguard Worker        uqrshrn2    v1.16b, v13.8h, #8
355*e1eccf28SAndroid Build Coastguard Worker        uqrshrn     v2.8b,  v2.8h,  #8
356*e1eccf28SAndroid Build Coastguard Worker        uqrshrn2    v2.16b, v14.8h, #8
357*e1eccf28SAndroid Build Coastguard Worker
358*e1eccf28SAndroid Build Coastguard Worker        mov         v3.16b, v11.16b
359*e1eccf28SAndroid Build Coastguard Worker.endm
360*e1eccf28SAndroid Build Coastguard Worker
361*e1eccf28SAndroid Build Coastguard Worker#define params_MULTIPLY zipped=0
362*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_MULTIPLY
363*e1eccf28SAndroid Build Coastguard Worker        umull2      v12.8h, v0.16b, v8.16b
364*e1eccf28SAndroid Build Coastguard Worker        umull       v0.8h,  v0.8b,  v8.8b
365*e1eccf28SAndroid Build Coastguard Worker        umull2      v13.8h, v1.16b, v9.16b
366*e1eccf28SAndroid Build Coastguard Worker        umull       v1.8h,  v1.8b,  v9.8b
367*e1eccf28SAndroid Build Coastguard Worker        umull2      v14.8h, v2.16b, v10.16b
368*e1eccf28SAndroid Build Coastguard Worker        umull       v2.8h,  v2.8b,  v10.8b
369*e1eccf28SAndroid Build Coastguard Worker        umull2      v15.8h, v3.16b, v11.16b
370*e1eccf28SAndroid Build Coastguard Worker        umull       v3.8h,  v3.8b,  v11.8b
371*e1eccf28SAndroid Build Coastguard Worker
372*e1eccf28SAndroid Build Coastguard Worker        rshrn       v4.8b,  v0.8h,  #8
373*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v4.16b, v12.8h, #8
374*e1eccf28SAndroid Build Coastguard Worker        rshrn       v5.8b,  v1.8h,  #8
375*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v5.16b, v13.8h, #8
376*e1eccf28SAndroid Build Coastguard Worker        rshrn       v6.8b,  v2.8h,  #8
377*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v6.16b, v14.8h, #8
378*e1eccf28SAndroid Build Coastguard Worker        rshrn       v7.8b,  v3.8h,  #8
379*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v7.16b, v15.8h, #8
380*e1eccf28SAndroid Build Coastguard Worker
381*e1eccf28SAndroid Build Coastguard Worker        uaddw       v0.8h,  v0.8h,  v4.8b
382*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v12.8h, v12.8h, v4.16b
383*e1eccf28SAndroid Build Coastguard Worker        uaddw       v1.8h,  v1.8h,  v5.8b
384*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v13.8h, v13.8h, v5.16b
385*e1eccf28SAndroid Build Coastguard Worker        uaddw       v2.8h,  v2.8h,  v6.8b
386*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v14.8h, v14.8h, v6.16b
387*e1eccf28SAndroid Build Coastguard Worker        uaddw       v3.8h,  v3.8h,  v7.8b
388*e1eccf28SAndroid Build Coastguard Worker        uaddw2      v15.8h, v15.8h, v7.16b
389*e1eccf28SAndroid Build Coastguard Worker
390*e1eccf28SAndroid Build Coastguard Worker        rshrn       v0.8b,  v0.8h,  #8
391*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v0.16b, v12.8h, #8
392*e1eccf28SAndroid Build Coastguard Worker        rshrn       v1.8b,  v1.8h,  #8
393*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v1.16b, v13.8h, #8
394*e1eccf28SAndroid Build Coastguard Worker        rshrn       v2.8b,  v2.8h,  #8
395*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v2.16b, v14.8h, #8
396*e1eccf28SAndroid Build Coastguard Worker        rshrn       v3.8b,  v3.8h,  #8
397*e1eccf28SAndroid Build Coastguard Worker        rshrn2      v3.16b, v15.8h, #8
398*e1eccf28SAndroid Build Coastguard Worker.endm
399*e1eccf28SAndroid Build Coastguard Worker
400*e1eccf28SAndroid Build Coastguard Worker#define params_ADD zipped=0
401*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_ADD
402*e1eccf28SAndroid Build Coastguard Worker        uqadd    v0.16b, v0.16b, v8.16b
403*e1eccf28SAndroid Build Coastguard Worker        uqadd    v1.16b, v1.16b, v9.16b
404*e1eccf28SAndroid Build Coastguard Worker        uqadd    v2.16b, v2.16b, v10.16b
405*e1eccf28SAndroid Build Coastguard Worker        uqadd    v3.16b, v3.16b, v11.16b
406*e1eccf28SAndroid Build Coastguard Worker.endm
407*e1eccf28SAndroid Build Coastguard Worker
408*e1eccf28SAndroid Build Coastguard Worker#define params_SUBTRACT zipped=0
409*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_SUBTRACT
410*e1eccf28SAndroid Build Coastguard Worker        uqsub    v0.16b, v0.16b, v8.16b
411*e1eccf28SAndroid Build Coastguard Worker        uqsub    v1.16b, v1.16b, v9.16b
412*e1eccf28SAndroid Build Coastguard Worker        uqsub    v2.16b, v2.16b, v10.16b
413*e1eccf28SAndroid Build Coastguard Worker        uqsub    v3.16b, v3.16b, v11.16b
414*e1eccf28SAndroid Build Coastguard Worker.endm
415*e1eccf28SAndroid Build Coastguard Worker
416*e1eccf28SAndroid Build Coastguard Worker#define params_DIFFERENCE zipped=0
417*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_DIFFERENCE
418*e1eccf28SAndroid Build Coastguard Worker        uabd    v0.16b, v0.16b, v8.16b
419*e1eccf28SAndroid Build Coastguard Worker        uabd    v1.16b, v1.16b, v9.16b
420*e1eccf28SAndroid Build Coastguard Worker        uabd    v2.16b, v2.16b, v10.16b
421*e1eccf28SAndroid Build Coastguard Worker        uabd    v3.16b, v3.16b, v11.16b
422*e1eccf28SAndroid Build Coastguard Worker.endm
423*e1eccf28SAndroid Build Coastguard Worker
424*e1eccf28SAndroid Build Coastguard Worker#define params_XOR zipped=0
425*e1eccf28SAndroid Build Coastguard Worker.macro blend_kernel_XOR
426*e1eccf28SAndroid Build Coastguard Worker        eor     v0.16b, v0.16b, v8.16b
427*e1eccf28SAndroid Build Coastguard Worker        eor     v1.16b, v1.16b, v9.16b
428*e1eccf28SAndroid Build Coastguard Worker        eor     v2.16b, v2.16b, v10.16b
429*e1eccf28SAndroid Build Coastguard Worker        eor     v3.16b, v3.16b, v11.16b
430*e1eccf28SAndroid Build Coastguard Worker.endm
431*e1eccf28SAndroid Build Coastguard Worker
432*e1eccf28SAndroid Build Coastguard Worker
433*e1eccf28SAndroid Build Coastguard Worker/* Define the wrapper code which will load and store the data, iterate the
434*e1eccf28SAndroid Build Coastguard Worker * correct number of times, and safely handle the remainder at the end of the
435*e1eccf28SAndroid Build Coastguard Worker * loop.  Various sections of assembly code are dropped or substituted for
436*e1eccf28SAndroid Build Coastguard Worker * simpler operations if they're not needed.
437*e1eccf28SAndroid Build Coastguard Worker */
438*e1eccf28SAndroid Build Coastguard Worker.macro wrap_line kernel, nowrap=0, zipped=1, lddst=1, ldsrc=1, pld=1
439*e1eccf28SAndroid Build Coastguard Worker.if \nowrap
440*e1eccf28SAndroid Build Coastguard Worker        \kernel
441*e1eccf28SAndroid Build Coastguard Worker.else
442*e1eccf28SAndroid Build Coastguard Worker        sub     x3, sp, #32
443*e1eccf28SAndroid Build Coastguard Worker        sub     sp, sp, #64
444*e1eccf28SAndroid Build Coastguard Worker        st1     {v8.1d - v11.1d}, [sp]
445*e1eccf28SAndroid Build Coastguard Worker        st1     {v12.1d - v15.1d}, [x3]
446*e1eccf28SAndroid Build Coastguard Worker        subs    x2, x2, #64
447*e1eccf28SAndroid Build Coastguard Worker        b       2f
448*e1eccf28SAndroid Build Coastguard Worker.align 4
449*e1eccf28SAndroid Build Coastguard Worker1:
450*e1eccf28SAndroid Build Coastguard Worker  .if \lddst
451*e1eccf28SAndroid Build Coastguard Worker    .if \zipped
452*e1eccf28SAndroid Build Coastguard Worker        ld4     {v0.16b - v3.16b}, [x0]
453*e1eccf28SAndroid Build Coastguard Worker    .else
454*e1eccf28SAndroid Build Coastguard Worker        ld1     {v0.16b - v3.16b}, [x0]
455*e1eccf28SAndroid Build Coastguard Worker    .endif
456*e1eccf28SAndroid Build Coastguard Worker  .endif
457*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc
458*e1eccf28SAndroid Build Coastguard Worker    .if \zipped
459*e1eccf28SAndroid Build Coastguard Worker        ld4     {v8.16b - v11.16b}, [x1], #64
460*e1eccf28SAndroid Build Coastguard Worker    .else
461*e1eccf28SAndroid Build Coastguard Worker        ld1     {v8.16b - v11.16b}, [x1], #64
462*e1eccf28SAndroid Build Coastguard Worker    .endif
463*e1eccf28SAndroid Build Coastguard Worker  .endif
464*e1eccf28SAndroid Build Coastguard Worker  .if \pld
465*e1eccf28SAndroid Build Coastguard Worker#if 0 /* TODO: test this on real hardware */
466*e1eccf28SAndroid Build Coastguard Worker    .if \lddst ; prfm PLDL1STRM, [x0, #192] ; .endif
467*e1eccf28SAndroid Build Coastguard Worker    .if \ldsrc ; prfm PLDL1STRM, [x1, #192] ; .endif
468*e1eccf28SAndroid Build Coastguard Worker#endif
469*e1eccf28SAndroid Build Coastguard Worker  .endif
470*e1eccf28SAndroid Build Coastguard Worker
471*e1eccf28SAndroid Build Coastguard Worker        \kernel
472*e1eccf28SAndroid Build Coastguard Worker
473*e1eccf28SAndroid Build Coastguard Worker        subs    x2, x2, #64
474*e1eccf28SAndroid Build Coastguard Worker  .if \zipped
475*e1eccf28SAndroid Build Coastguard Worker        st4     {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64
476*e1eccf28SAndroid Build Coastguard Worker  .else
477*e1eccf28SAndroid Build Coastguard Worker        st1     {v0.16b,v1.16b,v2.16b,v3.16b}, [x0], #64
478*e1eccf28SAndroid Build Coastguard Worker  .endif
479*e1eccf28SAndroid Build Coastguard Worker
480*e1eccf28SAndroid Build Coastguard Worker2:      bge     1b
481*e1eccf28SAndroid Build Coastguard Worker        adds    x2, x2, #64
482*e1eccf28SAndroid Build Coastguard Worker        beq     2f
483*e1eccf28SAndroid Build Coastguard Worker
484*e1eccf28SAndroid Build Coastguard Worker        /* To handle the tail portion of the data (something less than 64
485*e1eccf28SAndroid Build Coastguard Worker         * bytes) load small power-of-two chunks into working registers.  It
486*e1eccf28SAndroid Build Coastguard Worker         * doesn't matter where they end up in the register; the same process
487*e1eccf28SAndroid Build Coastguard Worker         * will store them back out using the same positions and the operations
488*e1eccf28SAndroid Build Coastguard Worker         * don't require data to interact with its neighbours.
489*e1eccf28SAndroid Build Coastguard Worker         */
490*e1eccf28SAndroid Build Coastguard Worker        movi    v0.16b, #0
491*e1eccf28SAndroid Build Coastguard Worker        movi    v1.16b, #0
492*e1eccf28SAndroid Build Coastguard Worker        movi    v2.16b, #0
493*e1eccf28SAndroid Build Coastguard Worker        movi    v3.16b, #0
494*e1eccf28SAndroid Build Coastguard Worker
495*e1eccf28SAndroid Build Coastguard Worker        movi    v8.16b, #0
496*e1eccf28SAndroid Build Coastguard Worker        movi    v9.16b, #0
497*e1eccf28SAndroid Build Coastguard Worker        movi    v10.16b, #0
498*e1eccf28SAndroid Build Coastguard Worker        movi    v11.16b, #0
499*e1eccf28SAndroid Build Coastguard Worker
500*e1eccf28SAndroid Build Coastguard Worker        tbz     x2, #5, 1f
501*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; ld1     {v2.16b,v3.16b}, [x0], #32   ; .endif
502*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; ld1     {v10.16b,v11.16b}, [x1], #32 ; .endif
503*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #4, 1f
504*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; ld1     {v1.16b}, [x0], #16  ; .endif
505*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; ld1     {v9.16b}, [x1], #16  ; .endif
506*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #3, 1f
507*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; ld1     {v0.d}[1], [x0], #8 ; .endif
508*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; ld1     {v8.d}[1], [x1], #8 ; .endif
509*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #2, 1f
510*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; ld1     {v0.s}[1], [x0], #4 ; .endif
511*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; ld1     {v8.s}[1], [x1], #4 ; .endif
512*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #1, 1f
513*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; ld1     {v0.h}[1], [x0], #2 ; .endif
514*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; ld1     {v8.h}[1], [x1], #2 ; .endif
515*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #0, 1f
516*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; ld1     {v0.b}[1], [x0], #1 ; .endif
517*e1eccf28SAndroid Build Coastguard Worker  .if \ldsrc ; ld1     {v8.b}[1], [x1], #1 ; .endif
518*e1eccf28SAndroid Build Coastguard Worker1:
519*e1eccf28SAndroid Build Coastguard Worker  .if \lddst ; sub     x0, x0, x2           ; .endif
520*e1eccf28SAndroid Build Coastguard Worker
521*e1eccf28SAndroid Build Coastguard Worker.if \zipped
522*e1eccf28SAndroid Build Coastguard Worker        /* One small impediment in the process above is that some of the load
523*e1eccf28SAndroid Build Coastguard Worker         * operations can't perform byte-wise structure deinterleaving at the
524*e1eccf28SAndroid Build Coastguard Worker         * same time as loading only part of a register.  So the data is loaded
525*e1eccf28SAndroid Build Coastguard Worker         * linearly and unpacked manually at this point.
526*e1eccf28SAndroid Build Coastguard Worker         */
527*e1eccf28SAndroid Build Coastguard Worker        uzp1    v4.16b, v0.16b, v1.16b
528*e1eccf28SAndroid Build Coastguard Worker        uzp2    v5.16b, v0.16b, v1.16b
529*e1eccf28SAndroid Build Coastguard Worker        uzp1    v6.16b, v2.16b, v3.16b
530*e1eccf28SAndroid Build Coastguard Worker        uzp2    v7.16b, v2.16b, v3.16b
531*e1eccf28SAndroid Build Coastguard Worker        uzp1    v0.16b, v4.16b, v6.16b
532*e1eccf28SAndroid Build Coastguard Worker        uzp2    v2.16b, v4.16b, v6.16b
533*e1eccf28SAndroid Build Coastguard Worker        uzp1    v1.16b, v5.16b, v7.16b
534*e1eccf28SAndroid Build Coastguard Worker        uzp2    v3.16b, v5.16b, v7.16b
535*e1eccf28SAndroid Build Coastguard Worker
536*e1eccf28SAndroid Build Coastguard Worker        uzp1    v4.16b, v8.16b, v9.16b
537*e1eccf28SAndroid Build Coastguard Worker        uzp2    v5.16b, v8.16b, v9.16b
538*e1eccf28SAndroid Build Coastguard Worker        uzp1    v6.16b, v10.16b, v11.16b
539*e1eccf28SAndroid Build Coastguard Worker        uzp2    v7.16b, v10.16b, v11.16b
540*e1eccf28SAndroid Build Coastguard Worker        uzp1    v8.16b, v4.16b, v6.16b
541*e1eccf28SAndroid Build Coastguard Worker        uzp2    v10.16b, v4.16b, v6.16b
542*e1eccf28SAndroid Build Coastguard Worker        uzp1    v9.16b, v5.16b, v7.16b
543*e1eccf28SAndroid Build Coastguard Worker        uzp2    v11.16b, v5.16b, v7.16b
544*e1eccf28SAndroid Build Coastguard Worker
545*e1eccf28SAndroid Build Coastguard Worker        \kernel
546*e1eccf28SAndroid Build Coastguard Worker
547*e1eccf28SAndroid Build Coastguard Worker        zip1    v4.16b, v0.16b, v2.16b
548*e1eccf28SAndroid Build Coastguard Worker        zip2    v6.16b, v0.16b, v2.16b
549*e1eccf28SAndroid Build Coastguard Worker        zip1    v5.16b, v1.16b, v3.16b
550*e1eccf28SAndroid Build Coastguard Worker        zip2    v7.16b, v1.16b, v3.16b
551*e1eccf28SAndroid Build Coastguard Worker        zip1    v0.16b, v4.16b, v5.16b
552*e1eccf28SAndroid Build Coastguard Worker        zip2    v1.16b, v4.16b, v5.16b
553*e1eccf28SAndroid Build Coastguard Worker        zip1    v2.16b, v6.16b, v7.16b
554*e1eccf28SAndroid Build Coastguard Worker        zip2    v3.16b, v6.16b, v7.16b
555*e1eccf28SAndroid Build Coastguard Worker  .else
556*e1eccf28SAndroid Build Coastguard Worker        \kernel
557*e1eccf28SAndroid Build Coastguard Worker  .endif
558*e1eccf28SAndroid Build Coastguard Worker
559*e1eccf28SAndroid Build Coastguard Worker        tbz     x2, #5, 1f
560*e1eccf28SAndroid Build Coastguard Worker        st1     {v2.16b,v3.16b}, [x0], #32
561*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #4, 1f
562*e1eccf28SAndroid Build Coastguard Worker        st1     {v1.16b}, [x0], #16
563*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #3, 1f
564*e1eccf28SAndroid Build Coastguard Worker        st1     {v0.d}[1], [x0], #8
565*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #2, 1f
566*e1eccf28SAndroid Build Coastguard Worker        st1     {v0.s}[1], [x0], #4
567*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #1, 1f
568*e1eccf28SAndroid Build Coastguard Worker        st1     {v0.h}[1], [x0], #2
569*e1eccf28SAndroid Build Coastguard Worker1:      tbz     x2, #0, 2f
570*e1eccf28SAndroid Build Coastguard Worker        st1     {v0.b}[1], [x0], #1
571*e1eccf28SAndroid Build Coastguard Worker2:      ld1     {v8.1d - v11.1d}, [sp], #32
572*e1eccf28SAndroid Build Coastguard Worker        ld1     {v12.1d - v15.1d}, [sp], #32
573*e1eccf28SAndroid Build Coastguard Worker.endif
574*e1eccf28SAndroid Build Coastguard Worker        mov     x0, #0
575*e1eccf28SAndroid Build Coastguard Worker        ret
576*e1eccf28SAndroid Build Coastguard Worker.endm
577*e1eccf28SAndroid Build Coastguard Worker
578*e1eccf28SAndroid Build Coastguard Worker
579*e1eccf28SAndroid Build Coastguard Worker/* produce list of blend_line_XX() functions; each function uses the wrap_line
580*e1eccf28SAndroid Build Coastguard Worker * macro, passing it the name of the operation macro it wants along with
581*e1eccf28SAndroid Build Coastguard Worker * optional parameters to remove unnecessary operations.
582*e1eccf28SAndroid Build Coastguard Worker */
583*e1eccf28SAndroid Build Coastguard Worker#define BLEND_X(d, n) ENTRY(blend_line_##n) ; wrap_line blend_kernel_##n, params_##n ; END(blend_line_##n) ;
584*e1eccf28SAndroid Build Coastguard Worker    BLEND_LIST(BLEND_X)
585*e1eccf28SAndroid Build Coastguard Worker#undef BLEND_X
586*e1eccf28SAndroid Build Coastguard Worker
587*e1eccf28SAndroid Build Coastguard Worker#define BLEND_X(d, n) .set tablesize, d+1 ;
588*e1eccf28SAndroid Build Coastguard Worker    BLEND_LIST(BLEND_X)
589*e1eccf28SAndroid Build Coastguard Worker#undef BLEND_X
590*e1eccf28SAndroid Build Coastguard Worker
591*e1eccf28SAndroid Build Coastguard Worker/*  int rsdIntrinsicBlend_K(
592*e1eccf28SAndroid Build Coastguard Worker *          uchar4 *out,        // x0
593*e1eccf28SAndroid Build Coastguard Worker *          uchar4 const *in,   // x1
594*e1eccf28SAndroid Build Coastguard Worker *          int slot,           // x2
595*e1eccf28SAndroid Build Coastguard Worker *          size_t xstart,      // x3
596*e1eccf28SAndroid Build Coastguard Worker *          size_t xend);       // x4
597*e1eccf28SAndroid Build Coastguard Worker */
598*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicBlend_K)
599*e1eccf28SAndroid Build Coastguard Worker    adrp    x5, blendtable
600*e1eccf28SAndroid Build Coastguard Worker    add     x5, x5, :lo12:blendtable
601*e1eccf28SAndroid Build Coastguard Worker    cmp     w2, tablesize
602*e1eccf28SAndroid Build Coastguard Worker    bhs     1f
603*e1eccf28SAndroid Build Coastguard Worker    ldrsh   x6, [x5, w2, uxtw #1]
604*e1eccf28SAndroid Build Coastguard Worker    add     x0, x0, w3, uxtw #2
605*e1eccf28SAndroid Build Coastguard Worker    add     x1, x1, w3, uxtw #2
606*e1eccf28SAndroid Build Coastguard Worker    sub     w2, w4, w3
607*e1eccf28SAndroid Build Coastguard Worker    ubfiz   x2, x2, #2, #32 /* TODO: fix */
608*e1eccf28SAndroid Build Coastguard Worker    cbz     x6, 1f
609*e1eccf28SAndroid Build Coastguard Worker    adr     x5, 2f
610*e1eccf28SAndroid Build Coastguard Worker    add     x6, x5, x6
611*e1eccf28SAndroid Build Coastguard Worker2:  br      x6
612*e1eccf28SAndroid Build Coastguard Worker1:  mov     x0, #-1
613*e1eccf28SAndroid Build Coastguard Worker    ret
614*e1eccf28SAndroid Build Coastguard Worker
615*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicBlend_K)
616*e1eccf28SAndroid Build Coastguard Worker
617*e1eccf28SAndroid Build Coastguard Worker.rodata
618*e1eccf28SAndroid Build Coastguard Worker.set off,0
619*e1eccf28SAndroid Build Coastguard Workerblendtable:
620*e1eccf28SAndroid Build Coastguard Worker#define BLEND_X(d, n) .rept d-off ; .hword 0 ; .endr ; .hword blend_line_##n - 2b ; .set off, d+1 ;
621*e1eccf28SAndroid Build Coastguard Worker        BLEND_LIST(BLEND_X)
622*e1eccf28SAndroid Build Coastguard Worker#undef BLEND_X
623