xref: /aosp_15_r20/external/libavc/encoder/arm/ih264e_fmt_conv.s (revision 495ae853bb871d1e5a258cb02c2cc13cde8ddb9a)
1*495ae853SAndroid Build Coastguard Worker@/******************************************************************************
2*495ae853SAndroid Build Coastguard Worker@ *
3*495ae853SAndroid Build Coastguard Worker@ * Copyright (C) 2015 The Android Open Source Project
4*495ae853SAndroid Build Coastguard Worker@ *
5*495ae853SAndroid Build Coastguard Worker@ * Licensed under the Apache License, Version 2.0 (the "License");
6*495ae853SAndroid Build Coastguard Worker@ * you may not use this file except in compliance with the License.
7*495ae853SAndroid Build Coastguard Worker@ * You may obtain a copy of the License at:
8*495ae853SAndroid Build Coastguard Worker@ *
9*495ae853SAndroid Build Coastguard Worker@ * http://www.apache.org/licenses/LICENSE-2.0
10*495ae853SAndroid Build Coastguard Worker@ *
11*495ae853SAndroid Build Coastguard Worker@ * Unless required by applicable law or agreed to in writing, software
12*495ae853SAndroid Build Coastguard Worker@ * distributed under the License is distributed on an "AS IS" BASIS,
13*495ae853SAndroid Build Coastguard Worker@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14*495ae853SAndroid Build Coastguard Worker@ * See the License for the specific language governing permissions and
15*495ae853SAndroid Build Coastguard Worker@ * limitations under the License.
16*495ae853SAndroid Build Coastguard Worker@ *
17*495ae853SAndroid Build Coastguard Worker@ *****************************************************************************
18*495ae853SAndroid Build Coastguard Worker@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19*495ae853SAndroid Build Coastguard Worker@*/
20*495ae853SAndroid Build Coastguard Worker
21*495ae853SAndroid Build Coastguard Worker.text
22*495ae853SAndroid Build Coastguard Worker.p2align 2
23*495ae853SAndroid Build Coastguard Worker
24*495ae853SAndroid Build Coastguard Worker@/*****************************************************************************
25*495ae853SAndroid Build Coastguard Worker@*                                                                            *
26*495ae853SAndroid Build Coastguard Worker@*  Function Name    : IH264D_CXA8_YUV420toYUV420SP_UV()                      *
27*495ae853SAndroid Build Coastguard Worker@*                                                                            *
28*495ae853SAndroid Build Coastguard Worker@*  Description      : This function conversts the image from YUV420P color   *
29*495ae853SAndroid Build Coastguard Worker@*                     space to 420SP color space(UV interleaved).        *
30*495ae853SAndroid Build Coastguard Worker@*                                                                            *
31*495ae853SAndroid Build Coastguard Worker@*  Arguments        : R0           pu1_y                                     *
32*495ae853SAndroid Build Coastguard Worker@*                     R1           pu1_u                                     *
33*495ae853SAndroid Build Coastguard Worker@*                     R2           pu1_v                                     *
34*495ae853SAndroid Build Coastguard Worker@*                     R3           pu1_dest_y                                *
35*495ae853SAndroid Build Coastguard Worker@*                     [R13 #40]    pu1_dest_uv                               *
36*495ae853SAndroid Build Coastguard Worker@*                     [R13 #44]    u2_height                                 *
37*495ae853SAndroid Build Coastguard Worker@*                     [R13 #48]    u2_width                                  *
38*495ae853SAndroid Build Coastguard Worker@*                     [R13 #52]    u2_stridey                                *
39*495ae853SAndroid Build Coastguard Worker@*                     [R13 #56]    u2_strideu                                *
40*495ae853SAndroid Build Coastguard Worker@*                     [R13 #60]    u2_stridev                                *
41*495ae853SAndroid Build Coastguard Worker@*                     [R13 #64]    u2_dest_stride_y                          *
42*495ae853SAndroid Build Coastguard Worker@*                     [R13 #68]    u2_dest_stride_uv                         *
43*495ae853SAndroid Build Coastguard Worker@*                     [R13 #72]    convert_uv_only                           *
44*495ae853SAndroid Build Coastguard Worker@*                                                                            *
45*495ae853SAndroid Build Coastguard Worker@*  Values Returned  : None                                                   *
46*495ae853SAndroid Build Coastguard Worker@*                                                                            *
47*495ae853SAndroid Build Coastguard Worker@*  Register Usage   : R0 - R14                                               *
48*495ae853SAndroid Build Coastguard Worker@*                                                                            *
49*495ae853SAndroid Build Coastguard Worker@*  Stack Usage      : 40 Bytes                                               *
50*495ae853SAndroid Build Coastguard Worker@*                                                                            *
51*495ae853SAndroid Build Coastguard Worker@*  Interruptibility : Interruptible                                          *
52*495ae853SAndroid Build Coastguard Worker@*                                                                            *
53*495ae853SAndroid Build Coastguard Worker@*  Known Limitations                                                         *
54*495ae853SAndroid Build Coastguard Worker@*       Assumptions: Image Width:     Assumed to be multiple of 16 and       *
55*495ae853SAndroid Build Coastguard Worker@*                     greater than or equal to 16                *
56*495ae853SAndroid Build Coastguard Worker@*                     Image Height:    Assumed to be even.                   *
57*495ae853SAndroid Build Coastguard Worker@*                                                                            *
58*495ae853SAndroid Build Coastguard Worker@*  Revision History :                                                        *
59*495ae853SAndroid Build Coastguard Worker@*         DD MM YYYY   Author(s)       Changes (Describe the changes made)   *
60*495ae853SAndroid Build Coastguard Worker@*         07 06 2010   Varshita        Draft                                 *
61*495ae853SAndroid Build Coastguard Worker@*         07 06 2010   Naveen Kr T     Completed                             *
62*495ae853SAndroid Build Coastguard Worker@*                                                                            *
63*495ae853SAndroid Build Coastguard Worker@*****************************************************************************/
64*495ae853SAndroid Build Coastguard Worker    .global ih264e_fmt_conv_420p_to_420sp_a9q
65*495ae853SAndroid Build Coastguard Worker
66*495ae853SAndroid Build Coastguard Workerih264e_fmt_conv_420p_to_420sp_a9q:
67*495ae853SAndroid Build Coastguard Worker
68*495ae853SAndroid Build Coastguard Worker    @// push the registers on the stack
69*495ae853SAndroid Build Coastguard Worker    stmfd         sp!, {r4-r12, lr}
70*495ae853SAndroid Build Coastguard Worker
71*495ae853SAndroid Build Coastguard Worker    ldr           r4, [sp, #72]         @// Load convert_uv_only
72*495ae853SAndroid Build Coastguard Worker
73*495ae853SAndroid Build Coastguard Worker    cmp           r4, #1
74*495ae853SAndroid Build Coastguard Worker    beq           yuv420sp_uv_chroma
75*495ae853SAndroid Build Coastguard Worker    @/* Do the preprocessing before the main loops start */
76*495ae853SAndroid Build Coastguard Worker    @// Load the parameters from stack
77*495ae853SAndroid Build Coastguard Worker    ldr           r4, [sp, #44]         @// Load u2_height from stack
78*495ae853SAndroid Build Coastguard Worker    ldr           r5, [sp, #48]         @// Load u2_width from stack
79*495ae853SAndroid Build Coastguard Worker    ldr           r7, [sp, #52]         @// Load u2_stridey from stack
80*495ae853SAndroid Build Coastguard Worker    ldr           r8, [sp, #64]         @// Load u2_dest_stride_y from stack
81*495ae853SAndroid Build Coastguard Worker    sub           r7, r7, r5            @// Source increment
82*495ae853SAndroid Build Coastguard Worker    sub           r8, r8, r5            @// Destination increment
83*495ae853SAndroid Build Coastguard Worker
84*495ae853SAndroid Build Coastguard Workeryuv420sp_uv_row_loop_y:
85*495ae853SAndroid Build Coastguard Worker    mov           r6, r5
86*495ae853SAndroid Build Coastguard Worker
87*495ae853SAndroid Build Coastguard Workeryuv420sp_uv_col_loop_y:
88*495ae853SAndroid Build Coastguard Worker    pld           [r0, #128]
89*495ae853SAndroid Build Coastguard Worker    vld1.8        {d0, d1}, [r0]!
90*495ae853SAndroid Build Coastguard Worker    vst1.8        {d0, d1}, [r3]!
91*495ae853SAndroid Build Coastguard Worker    sub           r6, r6, #16
92*495ae853SAndroid Build Coastguard Worker    cmp           r6, #15
93*495ae853SAndroid Build Coastguard Worker    bgt           yuv420sp_uv_col_loop_y
94*495ae853SAndroid Build Coastguard Worker
95*495ae853SAndroid Build Coastguard Worker    cmp           r6, #0
96*495ae853SAndroid Build Coastguard Worker    beq           yuv420sp_uv_row_loop_end_y
97*495ae853SAndroid Build Coastguard Worker    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
98*495ae853SAndroid Build Coastguard Worker    @//Ex if width is 162, above loop will process 160 pixels. And
99*495ae853SAndroid Build Coastguard Worker    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
100*495ae853SAndroid Build Coastguard Worker    @// and written using VLD1 and VST1
101*495ae853SAndroid Build Coastguard Worker    rsb           r6, r6, #16
102*495ae853SAndroid Build Coastguard Worker    sub           r0, r0, r6
103*495ae853SAndroid Build Coastguard Worker    sub           r3, r3, r6
104*495ae853SAndroid Build Coastguard Worker
105*495ae853SAndroid Build Coastguard Worker    vld1.8        {d0, d1}, [r0]!
106*495ae853SAndroid Build Coastguard Worker    vst1.8        {d0, d1}, [r3]!
107*495ae853SAndroid Build Coastguard Worker
108*495ae853SAndroid Build Coastguard Workeryuv420sp_uv_row_loop_end_y:
109*495ae853SAndroid Build Coastguard Worker    add           r0, r0, r7
110*495ae853SAndroid Build Coastguard Worker    add           r3, r3, r8
111*495ae853SAndroid Build Coastguard Worker    subs          r4, r4, #1
112*495ae853SAndroid Build Coastguard Worker    bgt           yuv420sp_uv_row_loop_y
113*495ae853SAndroid Build Coastguard Worker
114*495ae853SAndroid Build Coastguard Workeryuv420sp_uv_chroma:
115*495ae853SAndroid Build Coastguard Worker
116*495ae853SAndroid Build Coastguard Worker    ldr           r3, [sp, #40]         @// Load pu1_dest_uv from stack
117*495ae853SAndroid Build Coastguard Worker
118*495ae853SAndroid Build Coastguard Worker    ldr           r4, [sp, #44]         @// Load u2_height from stack
119*495ae853SAndroid Build Coastguard Worker
120*495ae853SAndroid Build Coastguard Worker    ldr           r5, [sp, #48]         @// Load u2_width from stack
121*495ae853SAndroid Build Coastguard Worker
122*495ae853SAndroid Build Coastguard Worker
123*495ae853SAndroid Build Coastguard Worker    ldr           r7, [sp, #56]         @// Load u2_strideu from stack
124*495ae853SAndroid Build Coastguard Worker
125*495ae853SAndroid Build Coastguard Worker    ldr           r8, [sp, #68]         @// Load u2_dest_stride_uv from stack
126*495ae853SAndroid Build Coastguard Worker
127*495ae853SAndroid Build Coastguard Worker    sub           r7, r7, r5, lsr #1    @// Source increment
128*495ae853SAndroid Build Coastguard Worker
129*495ae853SAndroid Build Coastguard Worker    sub           r8, r8, r5            @// Destination increment
130*495ae853SAndroid Build Coastguard Worker
131*495ae853SAndroid Build Coastguard Worker    mov           r5, r5, lsr #1
132*495ae853SAndroid Build Coastguard Worker    mov           r4, r4, lsr #1
133*495ae853SAndroid Build Coastguard Worker    ldr           r3, [sp, #40]         @// Load pu1_dest_uv from stack
134*495ae853SAndroid Build Coastguard Worker
135*495ae853SAndroid Build Coastguard Workeryuv420sp_uv_row_loop_uv:
136*495ae853SAndroid Build Coastguard Worker    mov           r6, r5
137*495ae853SAndroid Build Coastguard Worker
138*495ae853SAndroid Build Coastguard Worker
139*495ae853SAndroid Build Coastguard Workeryuv420sp_uv_col_loop_uv:
140*495ae853SAndroid Build Coastguard Worker    pld           [r1, #128]
141*495ae853SAndroid Build Coastguard Worker    pld           [r2, #128]
142*495ae853SAndroid Build Coastguard Worker    vld1.8        d0, [r1]!
143*495ae853SAndroid Build Coastguard Worker    vld1.8        d1, [r2]!
144*495ae853SAndroid Build Coastguard Worker    vst2.8        {d0, d1}, [r3]!
145*495ae853SAndroid Build Coastguard Worker    sub           r6, r6, #8
146*495ae853SAndroid Build Coastguard Worker    cmp           r6, #7
147*495ae853SAndroid Build Coastguard Worker    bgt           yuv420sp_uv_col_loop_uv
148*495ae853SAndroid Build Coastguard Worker
149*495ae853SAndroid Build Coastguard Worker    cmp           r6, #0
150*495ae853SAndroid Build Coastguard Worker    beq           yuv420sp_uv_row_loop_end_uv
151*495ae853SAndroid Build Coastguard Worker    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
152*495ae853SAndroid Build Coastguard Worker    @//Ex if width is 162, above loop will process 160 pixels. And
153*495ae853SAndroid Build Coastguard Worker    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
154*495ae853SAndroid Build Coastguard Worker    @// and written using VLD1 and VST1
155*495ae853SAndroid Build Coastguard Worker    rsb           r6, r6, #8
156*495ae853SAndroid Build Coastguard Worker    sub           r1, r1, r6
157*495ae853SAndroid Build Coastguard Worker    sub           r2, r2, r6
158*495ae853SAndroid Build Coastguard Worker    sub           r3, r3, r6, lsl #1
159*495ae853SAndroid Build Coastguard Worker
160*495ae853SAndroid Build Coastguard Worker    vld1.8        d0, [r1]!
161*495ae853SAndroid Build Coastguard Worker    vld1.8        d1, [r2]!
162*495ae853SAndroid Build Coastguard Worker    vst2.8        {d0, d1}, [r3]!
163*495ae853SAndroid Build Coastguard Worker
164*495ae853SAndroid Build Coastguard Workeryuv420sp_uv_row_loop_end_uv:
165*495ae853SAndroid Build Coastguard Worker    add           r1, r1, r7
166*495ae853SAndroid Build Coastguard Worker    add           r2, r2, r7
167*495ae853SAndroid Build Coastguard Worker    add           r3, r3, r8
168*495ae853SAndroid Build Coastguard Worker    subs          r4, r4, #1
169*495ae853SAndroid Build Coastguard Worker    bgt           yuv420sp_uv_row_loop_uv
170*495ae853SAndroid Build Coastguard Worker    @//POP THE REGISTERS
171*495ae853SAndroid Build Coastguard Worker    ldmfd         sp!, {r4-r12, pc}
172*495ae853SAndroid Build Coastguard Worker
173*495ae853SAndroid Build Coastguard Worker
174*495ae853SAndroid Build Coastguard Worker
175*495ae853SAndroid Build Coastguard Worker
176*495ae853SAndroid Build Coastguard Worker
177*495ae853SAndroid Build Coastguard Worker@ /**
178*495ae853SAndroid Build Coastguard Worker@ *******************************************************************************
179*495ae853SAndroid Build Coastguard Worker@ *
180*495ae853SAndroid Build Coastguard Worker@ * @brief ih264e_fmt_conv_422i_to_420sp_a9q
181*495ae853SAndroid Build Coastguard Worker@ *     Function used from format conversion or frame copy
182*495ae853SAndroid Build Coastguard Worker@ *
183*495ae853SAndroid Build Coastguard Worker@ *
184*495ae853SAndroid Build Coastguard Worker@ *
185*495ae853SAndroid Build Coastguard Worker@ *Inputs             : r0 - pu1_y            -   UWORD8 pointer to y plane.
186*495ae853SAndroid Build Coastguard Worker@ *                     r1 - pu1_u            -   UWORD8 pointer to u plane.
187*495ae853SAndroid Build Coastguard Worker@ *                     r2 - pu1_v            -   UWORD8 pointer to u plane.
188*495ae853SAndroid Build Coastguard Worker@ *                     r3 - pu2_yuv422i      -   UWORD16 pointer to yuv422iimage.
189*495ae853SAndroid Build Coastguard Worker@ *             stack + 40 - u4_width         -   Width of the Y plane.
190*495ae853SAndroid Build Coastguard Worker@ *                     44 - u4_height        -   Height of the Y plane.
191*495ae853SAndroid Build Coastguard Worker@ *                     48 - u4_stride_y      -   Stride in pixels of Y plane.
192*495ae853SAndroid Build Coastguard Worker@ *                     52 - u4_stride_u      -   Stride in pixels of U plane.
193*495ae853SAndroid Build Coastguard Worker@ *                     56 - u4_stride_v      -   Stride in pixels of V plane.
194*495ae853SAndroid Build Coastguard Worker@ *                     60 - u4_stride_yuv422i-   Stride in pixels of yuv422i image.
195*495ae853SAndroid Build Coastguard Worker@ *
196*495ae853SAndroid Build Coastguard Worker@ * @par   Description
197*495ae853SAndroid Build Coastguard Worker@ * Function used from copying or converting a reference frame to display buffer
198*495ae853SAndroid Build Coastguard Worker@ * in non shared mode
199*495ae853SAndroid Build Coastguard Worker@ *
200*495ae853SAndroid Build Coastguard Worker@ * @param[in] pu1_y_dst
201*495ae853SAndroid Build Coastguard Worker@ *   Output Y pointer
202*495ae853SAndroid Build Coastguard Worker@ *
203*495ae853SAndroid Build Coastguard Worker@ * @param[in] pu1_u_dst
204*495ae853SAndroid Build Coastguard Worker@ *   Output U/UV pointer ( UV is interleaved in the same format as that of input)
205*495ae853SAndroid Build Coastguard Worker@ *
206*495ae853SAndroid Build Coastguard Worker@ * @param[in] pu1_v_dst
207*495ae853SAndroid Build Coastguard Worker@ *   Output V pointer ( used in 420P output case)
208*495ae853SAndroid Build Coastguard Worker@ *
209*495ae853SAndroid Build Coastguard Worker@ * @param[in] u4_dst_y_strd
210*495ae853SAndroid Build Coastguard Worker@ *   Stride of destination Y buffer
211*495ae853SAndroid Build Coastguard Worker@ *
212*495ae853SAndroid Build Coastguard Worker@ * @param[in] u4_dst_u_strd
213*495ae853SAndroid Build Coastguard Worker@ *   Stride of destination  U/V buffer
214*495ae853SAndroid Build Coastguard Worker@ *
215*495ae853SAndroid Build Coastguard Worker@ *
216*495ae853SAndroid Build Coastguard Worker@ * @param[in] blocking
217*495ae853SAndroid Build Coastguard Worker@ *   To indicate whether format conversion should wait till frame is reconstructed
218*495ae853SAndroid Build Coastguard Worker@ *   and then return after complete copy is done. To be set to 1 when called at the
219*495ae853SAndroid Build Coastguard Worker@ *   end of frame processing and set to 0 when called between frame processing modules
220*495ae853SAndroid Build Coastguard Worker@ *   in order to utilize available MCPS
221*495ae853SAndroid Build Coastguard Worker@ *
222*495ae853SAndroid Build Coastguard Worker@ * @returns Error from IH264E_ERROR_T
223*495ae853SAndroid Build Coastguard Worker@ *
224*495ae853SAndroid Build Coastguard Worker@ * @remarks
225*495ae853SAndroid Build Coastguard Worker@ * Assumes that the stride of U and V buffers are same.
226*495ae853SAndroid Build Coastguard Worker@ * This is correct in most cases
227*495ae853SAndroid Build Coastguard Worker@ * If a case comes where this is not true we need to modify the fmt conversion funcnions called inside also
228*495ae853SAndroid Build Coastguard Worker@ * Since we read 4 pixels ata time the width should be aligned to 4
229*495ae853SAndroid Build Coastguard Worker@ * In assembly width should be aligned to 16 and height to 2.
230*495ae853SAndroid Build Coastguard Worker@ *
231*495ae853SAndroid Build Coastguard Worker@ *
232*495ae853SAndroid Build Coastguard Worker@ * Revision History :
233*495ae853SAndroid Build Coastguard Worker@ *         DD MM YYYY   Author(s)              Changes (Describe the changes made)
234*495ae853SAndroid Build Coastguard Worker@ *         07 06 2010   Harinarayanan K K       Adapeted to 422p
235*495ae853SAndroid Build Coastguard Worker@ *
236*495ae853SAndroid Build Coastguard Worker@ *******************************************************************************
237*495ae853SAndroid Build Coastguard Worker@ */
238*495ae853SAndroid Build Coastguard Worker
239*495ae853SAndroid Build Coastguard Worker@//`
240*495ae853SAndroid Build Coastguard Worker@*/
241*495ae853SAndroid Build Coastguard Worker    .global ih264e_fmt_conv_422i_to_420sp_a9q
242*495ae853SAndroid Build Coastguard Workerih264e_fmt_conv_422i_to_420sp_a9q:
243*495ae853SAndroid Build Coastguard Worker    stmfd         sp!, {r4-r12, lr}     @// Back the register which are used
244*495ae853SAndroid Build Coastguard Worker
245*495ae853SAndroid Build Coastguard Worker
246*495ae853SAndroid Build Coastguard Worker
247*495ae853SAndroid Build Coastguard Worker    @/* Do the preprocessing before the main loops start */
248*495ae853SAndroid Build Coastguard Worker    @// Load the parameters from stack
249*495ae853SAndroid Build Coastguard Worker    ldr           r4, [sp, #48]         @// Load u4_stride_y       from stack
250*495ae853SAndroid Build Coastguard Worker
251*495ae853SAndroid Build Coastguard Worker    ldr           r5, [sp, #60]         @// Load u4_stride_yuv422i from stack
252*495ae853SAndroid Build Coastguard Worker    add           r6, r0, r4            @// pu1_y_nxt_row       = pu1_y + u4_stride_y
253*495ae853SAndroid Build Coastguard Worker
254*495ae853SAndroid Build Coastguard Worker    ldr           r7, [sp, #40]         @// Load u4_width          from stack
255*495ae853SAndroid Build Coastguard Worker    add           r8, r3, r5, lsl #1    @// pu2_yuv422i_nxt_row = pu2_yuv422i_y + u4_stride_yuv422i(2 Bytes for each pixel)
256*495ae853SAndroid Build Coastguard Worker
257*495ae853SAndroid Build Coastguard Worker    ldr           r9, [sp, #52]         @// Load u4_stride_u       from stack
258*495ae853SAndroid Build Coastguard Worker    sub           r12, r4, r7           @// u2_offset1          = u4_stride_y - u4_width
259*495ae853SAndroid Build Coastguard Worker
260*495ae853SAndroid Build Coastguard Worker@LDR            r10,[sp,#56]                ;// Load u4_stride_v       from stack
261*495ae853SAndroid Build Coastguard Worker    sub           r14, r5, r7           @// u2_offset_yuv422i   = u4_stride_yuv422i - u4_width
262*495ae853SAndroid Build Coastguard Worker
263*495ae853SAndroid Build Coastguard Worker    ldr           r11, [sp, #44]        @// Load u4_height         from stack
264*495ae853SAndroid Build Coastguard Worker    sub           r9, r9, r7            @// u2_offset2          = u4_stride_u - u4_width >> 1
265*495ae853SAndroid Build Coastguard Worker
266*495ae853SAndroid Build Coastguard Worker@   SUB         r10,r10,r7,ASR #1           ;// u2_offset3          = u4_stride_v - u4_width >> 1
267*495ae853SAndroid Build Coastguard Worker    mov           r14, r14, lsl #1      @// u2_offset_yuv422i   = u2_offset_yuv422i * 2
268*495ae853SAndroid Build Coastguard Worker
269*495ae853SAndroid Build Coastguard Worker    mov           r11, r11, asr #1      @// u4_width = u4_width / 2 (u4_width >> 1)
270*495ae853SAndroid Build Coastguard Worker
271*495ae853SAndroid Build Coastguard Worker    add           r4, r12, r4           @// u2_offset1 = u2_offset1 + u4_stride_y
272*495ae853SAndroid Build Coastguard Worker    add           r5, r14, r5, lsl #1   @// u2_offset_yuv422i = u2_offset_yuv422i + u4_stride_yuv422i
273*495ae853SAndroid Build Coastguard Worker
274*495ae853SAndroid Build Coastguard Worker@// Register Assignment
275*495ae853SAndroid Build Coastguard Worker@// pu1_y               - r0
276*495ae853SAndroid Build Coastguard Worker@// pu1_y_nxt_row       - r6
277*495ae853SAndroid Build Coastguard Worker@// pu1_u               - r1
278*495ae853SAndroid Build Coastguard Worker@// pu1_v               - r2
279*495ae853SAndroid Build Coastguard Worker@// pu2_yuv422i         - r3
280*495ae853SAndroid Build Coastguard Worker@// pu2_yuv422i_nxt_row - r8
281*495ae853SAndroid Build Coastguard Worker@// u2_offset1          - r4
282*495ae853SAndroid Build Coastguard Worker@// u2_offset2          - r9
283*495ae853SAndroid Build Coastguard Worker@// u2_offset3          - r10
284*495ae853SAndroid Build Coastguard Worker@// u2_offset_yuv422i   - r5
285*495ae853SAndroid Build Coastguard Worker@// u4_width / 16       - r7
286*495ae853SAndroid Build Coastguard Worker@// u4_height / 2       - r11
287*495ae853SAndroid Build Coastguard Worker@// inner loop count    - r12
288*495ae853SAndroid Build Coastguard Workeryuv422i_to_420sp_height_loop:
289*495ae853SAndroid Build Coastguard Worker
290*495ae853SAndroid Build Coastguard Worker    mov           r12, r7               @// Inner loop count = u4_width / 16
291*495ae853SAndroid Build Coastguard Worker
292*495ae853SAndroid Build Coastguard Workeryuv422i_to_420sp_width_loop:
293*495ae853SAndroid Build Coastguard Worker    vld4.8        {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1
294*495ae853SAndroid Build Coastguard Worker    vld4.8        {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2
295*495ae853SAndroid Build Coastguard Worker    sub           r12, r12, #16
296*495ae853SAndroid Build Coastguard Worker
297*495ae853SAndroid Build Coastguard Worker    vrhadd.u8     d0, d0, d4
298*495ae853SAndroid Build Coastguard Worker    vrhadd.u8     d2, d2, d6
299*495ae853SAndroid Build Coastguard Worker
300*495ae853SAndroid Build Coastguard Worker    vst2.8        {d1, d3}, [r0]!       @// Store the 16 elements of row1 Y
301*495ae853SAndroid Build Coastguard Worker    vst2.8        {d5, d7}, [r6]!       @// Store the 16 elements of row2 Y
302*495ae853SAndroid Build Coastguard Worker
303*495ae853SAndroid Build Coastguard Worker    vst2.8        {d0, d2}, [r1]!       @// Store the 8 elements of row1/2 U
304*495ae853SAndroid Build Coastguard Worker
305*495ae853SAndroid Build Coastguard Worker    cmp           r12, #15
306*495ae853SAndroid Build Coastguard Worker    bgt           yuv422i_to_420sp_width_loop
307*495ae853SAndroid Build Coastguard Worker    cmp           r12, #0
308*495ae853SAndroid Build Coastguard Worker    beq           yuv422i_to_420sp_row_loop_end
309*495ae853SAndroid Build Coastguard Worker
310*495ae853SAndroid Build Coastguard Worker    @//If non-multiple of 16, then go back by few bytes to ensure 16 bytes can be read
311*495ae853SAndroid Build Coastguard Worker    @//Ex if width is 162, above loop will process 160 pixels. And
312*495ae853SAndroid Build Coastguard Worker    @//Both source and destination will point to 146th pixel and then 16 bytes will be read
313*495ae853SAndroid Build Coastguard Worker    @// and written using VLD1 and VST1
314*495ae853SAndroid Build Coastguard Worker    rsb           r12, r12, #16
315*495ae853SAndroid Build Coastguard Worker    sub           r3, r3, r12, lsl #1
316*495ae853SAndroid Build Coastguard Worker    sub           r8, r8, r12, lsl #1
317*495ae853SAndroid Build Coastguard Worker    sub           r0, r0, r12
318*495ae853SAndroid Build Coastguard Worker    sub           r6, r6, r12
319*495ae853SAndroid Build Coastguard Worker    sub           r1, r1, r12
320*495ae853SAndroid Build Coastguard Worker
321*495ae853SAndroid Build Coastguard Worker    vld4.8        {d0, d1, d2, d3}, [r3]! @// Load the 16 elements of row 1
322*495ae853SAndroid Build Coastguard Worker    vld4.8        {d4, d5, d6, d7}, [r8]! @// Load the 16 elements of row 2
323*495ae853SAndroid Build Coastguard Worker
324*495ae853SAndroid Build Coastguard Worker    vrhadd.u8     d0, d0, d4
325*495ae853SAndroid Build Coastguard Worker    vrhadd.u8     d2, d2, d6
326*495ae853SAndroid Build Coastguard Worker
327*495ae853SAndroid Build Coastguard Worker    vst2.8        {d1, d3}, [r0]!       @// Store the 16 elements of row1 Y
328*495ae853SAndroid Build Coastguard Worker    vst2.8        {d5, d7}, [r6]!       @// Store the 16 elements of row2 Y
329*495ae853SAndroid Build Coastguard Worker
330*495ae853SAndroid Build Coastguard Worker    vst2.8        {d0, d2}, [r1]!       @// Store the 8 elements of row1/2 U
331*495ae853SAndroid Build Coastguard Worker
332*495ae853SAndroid Build Coastguard Workeryuv422i_to_420sp_row_loop_end:
333*495ae853SAndroid Build Coastguard Worker    @// Update the buffer pointer so that they will refer to next pair of rows
334*495ae853SAndroid Build Coastguard Worker    add           r0, r0, r4            @// pu1_y               = pu1_y                 + u2_offset1
335*495ae853SAndroid Build Coastguard Worker    add           r6, r6, r4            @// pu1_y_nxt_row       = pu1_y_nxt_row         + u2_offset1
336*495ae853SAndroid Build Coastguard Worker
337*495ae853SAndroid Build Coastguard Worker    add           r1, r1, r9            @// pu1_u               = pu1_u                 + u2_offset2
338*495ae853SAndroid Build Coastguard Worker    subs          r11, r11, #1
339*495ae853SAndroid Build Coastguard Worker
340*495ae853SAndroid Build Coastguard Worker    add           r3, r3, r5            @// pu2_yuv422i         = pu2_yuv422i           + u2_offset_yuv422i
341*495ae853SAndroid Build Coastguard Worker
342*495ae853SAndroid Build Coastguard Worker    add           r8, r8, r5            @// pu2_yuv422i_nxt_row = pu2_yuv422i_nxt_row   + u2_offset_yuv422i
343*495ae853SAndroid Build Coastguard Worker    bgt           yuv422i_to_420sp_height_loop
344*495ae853SAndroid Build Coastguard Worker    ldmfd         sp!, {r4-r12, pc}     @// Restore the register which are used
345*495ae853SAndroid Build Coastguard Worker
346*495ae853SAndroid Build Coastguard Worker
347*495ae853SAndroid Build Coastguard Worker
348