xref: /aosp_15_r20/frameworks/rs/toolkit/YuvToRgb_neon.S (revision e1eccf28f96817838ad6867f7f39d2351ec11f56)
1*e1eccf28SAndroid Build Coastguard Worker/*
2*e1eccf28SAndroid Build Coastguard Worker * Copyright (C) 2014 The Android Open Source Project
3*e1eccf28SAndroid Build Coastguard Worker *
4*e1eccf28SAndroid Build Coastguard Worker * Licensed under the Apache License, Version 2.0 (the "License");
5*e1eccf28SAndroid Build Coastguard Worker * you may not use this file except in compliance with the License.
6*e1eccf28SAndroid Build Coastguard Worker * You may obtain a copy of the License at
7*e1eccf28SAndroid Build Coastguard Worker *
8*e1eccf28SAndroid Build Coastguard Worker *      http://www.apache.org/licenses/LICENSE-2.0
9*e1eccf28SAndroid Build Coastguard Worker *
10*e1eccf28SAndroid Build Coastguard Worker * Unless required by applicable law or agreed to in writing, software
11*e1eccf28SAndroid Build Coastguard Worker * distributed under the License is distributed on an "AS IS" BASIS,
12*e1eccf28SAndroid Build Coastguard Worker * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*e1eccf28SAndroid Build Coastguard Worker * See the License for the specific language governing permissions and
14*e1eccf28SAndroid Build Coastguard Worker * limitations under the License.
15*e1eccf28SAndroid Build Coastguard Worker */
16*e1eccf28SAndroid Build Coastguard Worker
17*e1eccf28SAndroid Build Coastguard Worker#define ENTRY(f) .text; .align 4; .globl f; .type f,#function; f: .fnstart
18*e1eccf28SAndroid Build Coastguard Worker#define END(f) .fnend; .size f, .-f;
19*e1eccf28SAndroid Build Coastguard Worker
20*e1eccf28SAndroid Build Coastguard Worker.eabi_attribute 25,1 @Tag_ABI_align8_preserved
21*e1eccf28SAndroid Build Coastguard Worker.arm
22*e1eccf28SAndroid Build Coastguard Worker
23*e1eccf28SAndroid Build Coastguard Worker/* Perform the actual YuvToRGB conversion in a macro, from register to
24*e1eccf28SAndroid Build Coastguard Worker * register.  This macro will be called from within several different wrapper
25*e1eccf28SAndroid Build Coastguard Worker * variants for different data layouts.  Y data starts in q8, but with the even
26*e1eccf28SAndroid Build Coastguard Worker * and odd bytes split into d16 and d17 respectively.  U and V are in d20
27*e1eccf28SAndroid Build Coastguard Worker * and d21.  Working constants are pre-loaded into q13-q15, and q3 is
28*e1eccf28SAndroid Build Coastguard Worker * pre-loaded with a constant 0xff alpha channel.
29*e1eccf28SAndroid Build Coastguard Worker *
30*e1eccf28SAndroid Build Coastguard Worker * The complicated arithmetic is the result of refactoring the original
31*e1eccf28SAndroid Build Coastguard Worker * equations to avoid 16-bit overflow without losing any precision.
32*e1eccf28SAndroid Build Coastguard Worker */
33*e1eccf28SAndroid Build Coastguard Worker.macro yuvkern
34*e1eccf28SAndroid Build Coastguard Worker        vmov.i8     d15, #149
35*e1eccf28SAndroid Build Coastguard Worker
36*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q1, d16, d15        // g0 = y0 * 149
37*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q5, d17, d15        // g1 = y1 * 149
38*e1eccf28SAndroid Build Coastguard Worker
39*e1eccf28SAndroid Build Coastguard Worker        vmov.i8     d14, #50
40*e1eccf28SAndroid Build Coastguard Worker        vmov.i8     d15, #104
41*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q8, d20, d14        // g2 = u * 50 + v * 104
42*e1eccf28SAndroid Build Coastguard Worker        vmlal.u8    q8, d21, d15
43*e1eccf28SAndroid Build Coastguard Worker
44*e1eccf28SAndroid Build Coastguard Worker        vshr.u8     d14, d21, #1
45*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q0, q1, d14         // r0 = y0 * 149 + (v >> 1)
46*e1eccf28SAndroid Build Coastguard Worker        vaddw.u8    q4, q5, d14         // r1 = y1 * 149 + (v >> 1)
47*e1eccf28SAndroid Build Coastguard Worker
48*e1eccf28SAndroid Build Coastguard Worker        vshll.u8    q7, d20, #2
49*e1eccf28SAndroid Build Coastguard Worker        vadd.u16    q2, q1, q7          // b0 = y0 * 149 + (u << 2)
50*e1eccf28SAndroid Build Coastguard Worker        vadd.u16    q6, q5, q7          // b1 = y1 * 149 + (u << 2)
51*e1eccf28SAndroid Build Coastguard Worker
52*e1eccf28SAndroid Build Coastguard Worker        vmov.i8     d14, #204
53*e1eccf28SAndroid Build Coastguard Worker        vmov.i8     d15, #254
54*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q11, d21, d14       // r2 = v * 204
55*e1eccf28SAndroid Build Coastguard Worker        vmull.u8    q12, d20, d15       // b2 = u * 254
56*e1eccf28SAndroid Build Coastguard Worker
57*e1eccf28SAndroid Build Coastguard Worker        vhadd.u16   q0, q11             // r0 = (r0 + r2) >> 1
58*e1eccf28SAndroid Build Coastguard Worker        vhadd.u16   q4, q11             // r1 = (r1 + r2) >> 1
59*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q1, q14             // g0 = satu16(g0 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0)
60*e1eccf28SAndroid Build Coastguard Worker        vqadd.u16   q5, q14             // g1 = satu16(g1 + (-16 * 149 + 128 * 50 + 128 * 104) >> 0)
61*e1eccf28SAndroid Build Coastguard Worker        vhadd.u16   q2, q12             // b0 = (b0 + b2) >> 1
62*e1eccf28SAndroid Build Coastguard Worker        vhadd.u16   q6, q12             // b1 = (b1 + b2) >> 1
63*e1eccf28SAndroid Build Coastguard Worker
64*e1eccf28SAndroid Build Coastguard Worker        vqsub.u16   q0, q13             // r0 = satu16(r0 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1)
65*e1eccf28SAndroid Build Coastguard Worker        vqsub.u16   q4, q13             // r1 = satu16(r1 - (16 * 149 + (128 >> 1) + 128 * 204) >> 1)
66*e1eccf28SAndroid Build Coastguard Worker        vqsub.u16   q1, q8              // g0 = satu16(g0 - g2)
67*e1eccf28SAndroid Build Coastguard Worker        vqsub.u16   q5, q8              // g1 = satu16(g1 - g2)
68*e1eccf28SAndroid Build Coastguard Worker        vqsub.u16   q2, q15             // b0 = satu16(b0 - (16 * 149 + (128 << 2) + 128 * 254) >> 1)
69*e1eccf28SAndroid Build Coastguard Worker        vqsub.u16   q6, q15             // b1 = satu16(b1 - (16 * 149 + (128 << 2) + 128 * 254) >> 1)
70*e1eccf28SAndroid Build Coastguard Worker
71*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d0, q0, #6
72*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d1, q1, #7
73*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d2, q4, #6
74*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d3, q5, #7
75*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d4, q2, #6
76*e1eccf28SAndroid Build Coastguard Worker        vqrshrn.u16 d5, q6, #6
77*e1eccf28SAndroid Build Coastguard Worker
78*e1eccf28SAndroid Build Coastguard Worker        vzip.u8     q0, q1
79*e1eccf28SAndroid Build Coastguard Worker        vzip.u8     d4, d5
80*e1eccf28SAndroid Build Coastguard Worker.endm
81*e1eccf28SAndroid Build Coastguard Worker
82*e1eccf28SAndroid Build Coastguard Worker/* Define the wrapper code which will load and store the data, iterate the
83*e1eccf28SAndroid Build Coastguard Worker * correct number of times, and safely handle the remainder at the end of the
84*e1eccf28SAndroid Build Coastguard Worker * loop.  Some sections of code are switched out depending on the data packing
85*e1eccf28SAndroid Build Coastguard Worker * being handled.
86*e1eccf28SAndroid Build Coastguard Worker */
87*e1eccf28SAndroid Build Coastguard Worker.macro wrap_line kernel, interleaved=0, swapuv=0
88*e1eccf28SAndroid Build Coastguard Worker
89*e1eccf28SAndroid Build Coastguard Worker        movw        r5, #((16 * 149 + (128 >> 1) + 128 * 204) >> 1)
90*e1eccf28SAndroid Build Coastguard Worker        vdup.i16    q13, r5
91*e1eccf28SAndroid Build Coastguard Worker        movw        r5, #((-16 * 149 + 128 * 50 + 128 * 104) >> 0)
92*e1eccf28SAndroid Build Coastguard Worker        vdup.i16    q14, r5
93*e1eccf28SAndroid Build Coastguard Worker        movw        r5, #((16 * 149 + (128 << 2) + 128 * 254) >> 1)
94*e1eccf28SAndroid Build Coastguard Worker        vdup.i16    q15, r5
95*e1eccf28SAndroid Build Coastguard Worker
96*e1eccf28SAndroid Build Coastguard Worker        vmov.i8     q3, #0xff
97*e1eccf28SAndroid Build Coastguard Worker
98*e1eccf28SAndroid Build Coastguard Worker        subs        r2, #16
99*e1eccf28SAndroid Build Coastguard Worker        bhs         1f
100*e1eccf28SAndroid Build Coastguard Worker        b           2f
101*e1eccf28SAndroid Build Coastguard Worker
102*e1eccf28SAndroid Build Coastguard Worker        .align 4
103*e1eccf28SAndroid Build Coastguard Worker1:      vld2.u8     {d16,d17}, [r1]!
104*e1eccf28SAndroid Build Coastguard Worker        pld         [r1, #256]
105*e1eccf28SAndroid Build Coastguard Worker  .if \interleaved
106*e1eccf28SAndroid Build Coastguard Worker        vld2.u8     {d20,d21}, [r3]!
107*e1eccf28SAndroid Build Coastguard Worker    .if \swapuv
108*e1eccf28SAndroid Build Coastguard Worker        vswp        d20, d21
109*e1eccf28SAndroid Build Coastguard Worker    .endif
110*e1eccf28SAndroid Build Coastguard Worker        pld         [r3, #256]
111*e1eccf28SAndroid Build Coastguard Worker  .else
112*e1eccf28SAndroid Build Coastguard Worker        vld1.u8     d20, [r3]!
113*e1eccf28SAndroid Build Coastguard Worker        vld1.u8     d21, [r4]!
114*e1eccf28SAndroid Build Coastguard Worker        pld         [r3, #128]
115*e1eccf28SAndroid Build Coastguard Worker        pld         [r4, #128]
116*e1eccf28SAndroid Build Coastguard Worker  .endif
117*e1eccf28SAndroid Build Coastguard Worker
118*e1eccf28SAndroid Build Coastguard Worker        \kernel
119*e1eccf28SAndroid Build Coastguard Worker
120*e1eccf28SAndroid Build Coastguard Worker        subs        r2, #16
121*e1eccf28SAndroid Build Coastguard Worker
122*e1eccf28SAndroid Build Coastguard Worker        vst4.u8     {d0,d2,d4,d6}, [r0]!
123*e1eccf28SAndroid Build Coastguard Worker        vst4.u8     {d1,d3,d5,d7}, [r0]!
124*e1eccf28SAndroid Build Coastguard Worker
125*e1eccf28SAndroid Build Coastguard Worker        bhs         1b
126*e1eccf28SAndroid Build Coastguard Worker
127*e1eccf28SAndroid Build Coastguard Worker2:      adds        r2, #16
128*e1eccf28SAndroid Build Coastguard Worker        beq         2f
129*e1eccf28SAndroid Build Coastguard Worker
130*e1eccf28SAndroid Build Coastguard Worker        /* To handle the tail portion of the data (something less than 16
131*e1eccf28SAndroid Build Coastguard Worker         * bytes) load small power-of-two chunks into working registers.  It
132*e1eccf28SAndroid Build Coastguard Worker         * doesn't matter where they end up in the register; the same process
133*e1eccf28SAndroid Build Coastguard Worker         * will store them back out using the same positions and the
134*e1eccf28SAndroid Build Coastguard Worker         * interaction between neighbouring pixels is constrained to odd
135*e1eccf28SAndroid Build Coastguard Worker         * boundaries where the load operations don't interfere.
136*e1eccf28SAndroid Build Coastguard Worker         */
137*e1eccf28SAndroid Build Coastguard Worker        vmov.i8     q8, #0
138*e1eccf28SAndroid Build Coastguard Worker        vmov.i8     q10, #0
139*e1eccf28SAndroid Build Coastguard Worker
140*e1eccf28SAndroid Build Coastguard Worker        tst         r2, #8
141*e1eccf28SAndroid Build Coastguard Worker        beq         1f
142*e1eccf28SAndroid Build Coastguard Worker        vld1.u8     d17, [r1]!
143*e1eccf28SAndroid Build Coastguard Worker  .if \interleaved
144*e1eccf28SAndroid Build Coastguard Worker        vld1.u8     d21, [r3]!
145*e1eccf28SAndroid Build Coastguard Worker  .else
146*e1eccf28SAndroid Build Coastguard Worker        vld1.u32    d20[1], [r3]!
147*e1eccf28SAndroid Build Coastguard Worker        vld1.u32    d21[1], [r4]!
148*e1eccf28SAndroid Build Coastguard Worker  .endif
149*e1eccf28SAndroid Build Coastguard Worker
150*e1eccf28SAndroid Build Coastguard Worker1:      tst         r2, #4
151*e1eccf28SAndroid Build Coastguard Worker        beq         1f
152*e1eccf28SAndroid Build Coastguard Worker        vld1.u32    d16[1], [r1]!
153*e1eccf28SAndroid Build Coastguard Worker  .if \interleaved
154*e1eccf28SAndroid Build Coastguard Worker        vld1.u32    d20[1], [r3]!
155*e1eccf28SAndroid Build Coastguard Worker  .else
156*e1eccf28SAndroid Build Coastguard Worker        vld1.u16    d20[1], [r3]!
157*e1eccf28SAndroid Build Coastguard Worker        vld1.u16    d21[1], [r4]!
158*e1eccf28SAndroid Build Coastguard Worker  .endif
159*e1eccf28SAndroid Build Coastguard Worker1:      tst         r2, #2
160*e1eccf28SAndroid Build Coastguard Worker        beq         1f
161*e1eccf28SAndroid Build Coastguard Worker        vld1.u16    d16[1], [r1]!
162*e1eccf28SAndroid Build Coastguard Worker  .if \interleaved
163*e1eccf28SAndroid Build Coastguard Worker        vld1.u16    d20[1], [r3]!
164*e1eccf28SAndroid Build Coastguard Worker  .else
165*e1eccf28SAndroid Build Coastguard Worker        vld1.u8     d20[1], [r3]!
166*e1eccf28SAndroid Build Coastguard Worker        vld1.u8     d21[1], [r4]!
167*e1eccf28SAndroid Build Coastguard Worker  .endif
168*e1eccf28SAndroid Build Coastguard Worker1:      tst         r2, #1
169*e1eccf28SAndroid Build Coastguard Worker        beq         1f
170*e1eccf28SAndroid Build Coastguard Worker        vld1.u8     d16[1], [r1]!
171*e1eccf28SAndroid Build Coastguard Worker  .if \interleaved
172*e1eccf28SAndroid Build Coastguard Worker        vld1.u16    d20[0], [r3]!
173*e1eccf28SAndroid Build Coastguard Worker  .else
174*e1eccf28SAndroid Build Coastguard Worker        vld1.u8     d20[0], [r3]!
175*e1eccf28SAndroid Build Coastguard Worker        vld1.u8     d21[0], [r4]!
176*e1eccf28SAndroid Build Coastguard Worker  .endif
177*e1eccf28SAndroid Build Coastguard Worker
178*e1eccf28SAndroid Build Coastguard Worker        /* One small impediment in the process above is that some of the load
179*e1eccf28SAndroid Build Coastguard Worker         * operations can't perform byte-wise structure deinterleaving at the
180*e1eccf28SAndroid Build Coastguard Worker         * same time as loading only part of a register.  So the data is loaded
181*e1eccf28SAndroid Build Coastguard Worker         * linearly and unpacked manually at this point if necessary.
182*e1eccf28SAndroid Build Coastguard Worker         */
183*e1eccf28SAndroid Build Coastguard Worker1:      vuzp.8      d16, d17
184*e1eccf28SAndroid Build Coastguard Worker  .if \interleaved
185*e1eccf28SAndroid Build Coastguard Worker        vuzp.8      d20, d21
186*e1eccf28SAndroid Build Coastguard Worker    .if \swapuv
187*e1eccf28SAndroid Build Coastguard Worker        vswp        d20, d21
188*e1eccf28SAndroid Build Coastguard Worker    .endif
189*e1eccf28SAndroid Build Coastguard Worker  .endif
190*e1eccf28SAndroid Build Coastguard Worker
191*e1eccf28SAndroid Build Coastguard Worker        \kernel
192*e1eccf28SAndroid Build Coastguard Worker
193*e1eccf28SAndroid Build Coastguard Worker        /* As above but with the output; structured stores for partial vectors
194*e1eccf28SAndroid Build Coastguard Worker         * aren't available, so the data is re-packed first and stored linearly.
195*e1eccf28SAndroid Build Coastguard Worker         */
196*e1eccf28SAndroid Build Coastguard Worker        vzip.8  q0, q2
197*e1eccf28SAndroid Build Coastguard Worker        vzip.8  q1, q3
198*e1eccf28SAndroid Build Coastguard Worker        vzip.8  q0, q1
199*e1eccf28SAndroid Build Coastguard Worker        vzip.8  q2, q3
200*e1eccf28SAndroid Build Coastguard Worker
201*e1eccf28SAndroid Build Coastguard Worker1:      tst         r2, #8
202*e1eccf28SAndroid Build Coastguard Worker        beq         1f
203*e1eccf28SAndroid Build Coastguard Worker        vst1.u8     {d4,d5,d6,d7}, [r0]!
204*e1eccf28SAndroid Build Coastguard Worker
205*e1eccf28SAndroid Build Coastguard Worker1:      tst         r2, #4
206*e1eccf28SAndroid Build Coastguard Worker        beq         1f
207*e1eccf28SAndroid Build Coastguard Worker        vst1.u8     {d2,d3}, [r0]!
208*e1eccf28SAndroid Build Coastguard Worker1:      tst         r2, #2
209*e1eccf28SAndroid Build Coastguard Worker        beq         1f
210*e1eccf28SAndroid Build Coastguard Worker        vst1.u8     d1, [r0]!
211*e1eccf28SAndroid Build Coastguard Worker1:      tst         r2, #1
212*e1eccf28SAndroid Build Coastguard Worker        beq         2f
213*e1eccf28SAndroid Build Coastguard Worker        vst1.u32    d0[1], [r0]!
214*e1eccf28SAndroid Build Coastguard Worker2:
215*e1eccf28SAndroid Build Coastguard Worker.endm
216*e1eccf28SAndroid Build Coastguard Worker
217*e1eccf28SAndroid Build Coastguard Worker
218*e1eccf28SAndroid Build Coastguard Worker/*  void rsdIntrinsicYuv2_K(
219*e1eccf28SAndroid Build Coastguard Worker *          void *out,          // r0
220*e1eccf28SAndroid Build Coastguard Worker *          void const *yin,    // r1
221*e1eccf28SAndroid Build Coastguard Worker *          void const *uin,    // r2
222*e1eccf28SAndroid Build Coastguard Worker *          void const *vin,    // r3
223*e1eccf28SAndroid Build Coastguard Worker *          size_t xstart,      // [sp]
224*e1eccf28SAndroid Build Coastguard Worker *          size_t xend);       // [sp+#4]
225*e1eccf28SAndroid Build Coastguard Worker */
226*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicYuv2_K)
227*e1eccf28SAndroid Build Coastguard Worker        push        {r4,r5}
228*e1eccf28SAndroid Build Coastguard Worker        ldr         r5, [sp, #8]
229*e1eccf28SAndroid Build Coastguard Worker        mov         r4, r3
230*e1eccf28SAndroid Build Coastguard Worker        mov         r3, r2
231*e1eccf28SAndroid Build Coastguard Worker        ldr         r2, [sp, #12]
232*e1eccf28SAndroid Build Coastguard Worker
233*e1eccf28SAndroid Build Coastguard Worker        add         r0, r5, LSL #2
234*e1eccf28SAndroid Build Coastguard Worker        add         r1, r5
235*e1eccf28SAndroid Build Coastguard Worker        add         r3, r5, LSR #1
236*e1eccf28SAndroid Build Coastguard Worker        add         r4, r5, LSR #1
237*e1eccf28SAndroid Build Coastguard Worker        sub         r2, r5
238*e1eccf28SAndroid Build Coastguard Worker
239*e1eccf28SAndroid Build Coastguard Worker        vpush       {d8-d15}
240*e1eccf28SAndroid Build Coastguard Worker
241*e1eccf28SAndroid Build Coastguard Worker        wrap_line yuvkern, 0
242*e1eccf28SAndroid Build Coastguard Worker
243*e1eccf28SAndroid Build Coastguard Worker        vpop        {d8-d15}
244*e1eccf28SAndroid Build Coastguard Worker        pop         {r4,r5}
245*e1eccf28SAndroid Build Coastguard Worker        bx lr
246*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicYuv2_K)
247*e1eccf28SAndroid Build Coastguard Worker
248*e1eccf28SAndroid Build Coastguard Worker/*  void rsdIntrinsicYuv_K(
249*e1eccf28SAndroid Build Coastguard Worker *          void *out,          // r0
250*e1eccf28SAndroid Build Coastguard Worker *          void const *yin,    // r1
251*e1eccf28SAndroid Build Coastguard Worker *          void const *uvin,   // r2
252*e1eccf28SAndroid Build Coastguard Worker *          size_t xstart,      // r3
253*e1eccf28SAndroid Build Coastguard Worker *          size_t xend);       // [sp]
254*e1eccf28SAndroid Build Coastguard Worker */
255*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicYuv_K)
256*e1eccf28SAndroid Build Coastguard Worker        push        {r4,r5}
257*e1eccf28SAndroid Build Coastguard Worker        bic         r4, r3, #1
258*e1eccf28SAndroid Build Coastguard Worker        add         r3, r2, r4
259*e1eccf28SAndroid Build Coastguard Worker        ldr         r2, [sp, #8]
260*e1eccf28SAndroid Build Coastguard Worker
261*e1eccf28SAndroid Build Coastguard Worker        add         r0, r4, LSL #2
262*e1eccf28SAndroid Build Coastguard Worker        add         r1, r4
263*e1eccf28SAndroid Build Coastguard Worker        sub         r2, r4
264*e1eccf28SAndroid Build Coastguard Worker
265*e1eccf28SAndroid Build Coastguard Worker        vpush       {d8-d15}
266*e1eccf28SAndroid Build Coastguard Worker
267*e1eccf28SAndroid Build Coastguard Worker        wrap_line yuvkern, 1, 1
268*e1eccf28SAndroid Build Coastguard Worker
269*e1eccf28SAndroid Build Coastguard Worker        vpop        {d8-d15}
270*e1eccf28SAndroid Build Coastguard Worker        pop         {r4,r5}
271*e1eccf28SAndroid Build Coastguard Worker        bx lr
272*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicYuv_K)
273*e1eccf28SAndroid Build Coastguard Worker
274*e1eccf28SAndroid Build Coastguard Worker/*  void rsdIntrinsicYuvR_K(
275*e1eccf28SAndroid Build Coastguard Worker *          void *out,          // r0
276*e1eccf28SAndroid Build Coastguard Worker *          void const *yin,    // r1
277*e1eccf28SAndroid Build Coastguard Worker *          void const *uvin,   // r2
278*e1eccf28SAndroid Build Coastguard Worker *          size_t xstart,      // r3
279*e1eccf28SAndroid Build Coastguard Worker *          size_t xend);       // [sp]
280*e1eccf28SAndroid Build Coastguard Worker */
281*e1eccf28SAndroid Build Coastguard WorkerENTRY(rsdIntrinsicYuvR_K)
282*e1eccf28SAndroid Build Coastguard Worker        push        {r4,r5}
283*e1eccf28SAndroid Build Coastguard Worker        bic         r4, r3, #1
284*e1eccf28SAndroid Build Coastguard Worker        add         r3, r2, r4
285*e1eccf28SAndroid Build Coastguard Worker        ldr         r2, [sp, #8]
286*e1eccf28SAndroid Build Coastguard Worker
287*e1eccf28SAndroid Build Coastguard Worker        add         r0, r4, LSL #2
288*e1eccf28SAndroid Build Coastguard Worker        add         r1, r4
289*e1eccf28SAndroid Build Coastguard Worker        sub         r2, r4
290*e1eccf28SAndroid Build Coastguard Worker
291*e1eccf28SAndroid Build Coastguard Worker        vpush       {d8-d15}
292*e1eccf28SAndroid Build Coastguard Worker
293*e1eccf28SAndroid Build Coastguard Worker        wrap_line yuvkern, 1
294*e1eccf28SAndroid Build Coastguard Worker
295*e1eccf28SAndroid Build Coastguard Worker        vpop        {d8-d15}
296*e1eccf28SAndroid Build Coastguard Worker        pop         {r4,r5}
297*e1eccf28SAndroid Build Coastguard Worker        bx lr
298*e1eccf28SAndroid Build Coastguard WorkerEND(rsdIntrinsicYuvR_K)
299