xref: /aosp_15_r20/external/libjpeg-turbo/simd/arm/jdmrgext-neon.c (revision dfc6aa5c1cfd4bc4e2018dc74aa96e29ee49c6da)
1*dfc6aa5cSAndroid Build Coastguard Worker /*
2*dfc6aa5cSAndroid Build Coastguard Worker  * jdmrgext-neon.c - merged upsampling/color conversion (Arm Neon)
3*dfc6aa5cSAndroid Build Coastguard Worker  *
4*dfc6aa5cSAndroid Build Coastguard Worker  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
5*dfc6aa5cSAndroid Build Coastguard Worker  * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
6*dfc6aa5cSAndroid Build Coastguard Worker  *
7*dfc6aa5cSAndroid Build Coastguard Worker  * This software is provided 'as-is', without any express or implied
8*dfc6aa5cSAndroid Build Coastguard Worker  * warranty.  In no event will the authors be held liable for any damages
9*dfc6aa5cSAndroid Build Coastguard Worker  * arising from the use of this software.
10*dfc6aa5cSAndroid Build Coastguard Worker  *
11*dfc6aa5cSAndroid Build Coastguard Worker  * Permission is granted to anyone to use this software for any purpose,
12*dfc6aa5cSAndroid Build Coastguard Worker  * including commercial applications, and to alter it and redistribute it
13*dfc6aa5cSAndroid Build Coastguard Worker  * freely, subject to the following restrictions:
14*dfc6aa5cSAndroid Build Coastguard Worker  *
15*dfc6aa5cSAndroid Build Coastguard Worker  * 1. The origin of this software must not be misrepresented; you must not
16*dfc6aa5cSAndroid Build Coastguard Worker  *    claim that you wrote the original software. If you use this software
17*dfc6aa5cSAndroid Build Coastguard Worker  *    in a product, an acknowledgment in the product documentation would be
18*dfc6aa5cSAndroid Build Coastguard Worker  *    appreciated but is not required.
19*dfc6aa5cSAndroid Build Coastguard Worker  * 2. Altered source versions must be plainly marked as such, and must not be
20*dfc6aa5cSAndroid Build Coastguard Worker  *    misrepresented as being the original software.
21*dfc6aa5cSAndroid Build Coastguard Worker  * 3. This notice may not be removed or altered from any source distribution.
22*dfc6aa5cSAndroid Build Coastguard Worker  */
23*dfc6aa5cSAndroid Build Coastguard Worker 
24*dfc6aa5cSAndroid Build Coastguard Worker /* This file is included by jdmerge-neon.c. */
25*dfc6aa5cSAndroid Build Coastguard Worker 
26*dfc6aa5cSAndroid Build Coastguard Worker 
27*dfc6aa5cSAndroid Build Coastguard Worker /* These routines combine simple (non-fancy, i.e. non-smooth) h2v1 or h2v2
28*dfc6aa5cSAndroid Build Coastguard Worker  * chroma upsampling and YCbCr -> RGB color conversion into a single function.
29*dfc6aa5cSAndroid Build Coastguard Worker  *
30*dfc6aa5cSAndroid Build Coastguard Worker  * As with the standalone functions, YCbCr -> RGB conversion is defined by the
31*dfc6aa5cSAndroid Build Coastguard Worker  * following equations:
32*dfc6aa5cSAndroid Build Coastguard Worker  *    R = Y                        + 1.40200 * (Cr - 128)
33*dfc6aa5cSAndroid Build Coastguard Worker  *    G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128)
34*dfc6aa5cSAndroid Build Coastguard Worker  *    B = Y + 1.77200 * (Cb - 128)
35*dfc6aa5cSAndroid Build Coastguard Worker  *
36*dfc6aa5cSAndroid Build Coastguard Worker  * Scaled integer constants are used to avoid floating-point arithmetic:
37*dfc6aa5cSAndroid Build Coastguard Worker  *    0.3441467 = 11277 * 2^-15
38*dfc6aa5cSAndroid Build Coastguard Worker  *    0.7141418 = 23401 * 2^-15
39*dfc6aa5cSAndroid Build Coastguard Worker  *    1.4020386 = 22971 * 2^-14
40*dfc6aa5cSAndroid Build Coastguard Worker  *    1.7720337 = 29033 * 2^-14
41*dfc6aa5cSAndroid Build Coastguard Worker  * These constants are defined in jdmerge-neon.c.
42*dfc6aa5cSAndroid Build Coastguard Worker  *
43*dfc6aa5cSAndroid Build Coastguard Worker  * To ensure correct results, rounding is used when descaling.
44*dfc6aa5cSAndroid Build Coastguard Worker  */
45*dfc6aa5cSAndroid Build Coastguard Worker 
46*dfc6aa5cSAndroid Build Coastguard Worker /* Notes on safe memory access for merged upsampling/YCbCr -> RGB conversion
47*dfc6aa5cSAndroid Build Coastguard Worker  * routines:
48*dfc6aa5cSAndroid Build Coastguard Worker  *
49*dfc6aa5cSAndroid Build Coastguard Worker  * Input memory buffers can be safely overread up to the next multiple of
50*dfc6aa5cSAndroid Build Coastguard Worker  * ALIGN_SIZE bytes, since they are always allocated by alloc_sarray() in
51*dfc6aa5cSAndroid Build Coastguard Worker  * jmemmgr.c.
52*dfc6aa5cSAndroid Build Coastguard Worker  *
53*dfc6aa5cSAndroid Build Coastguard Worker  * The output buffer cannot safely be written beyond output_width, since
54*dfc6aa5cSAndroid Build Coastguard Worker  * output_buf points to a possibly unpadded row in the decompressed image
55*dfc6aa5cSAndroid Build Coastguard Worker  * buffer allocated by the calling program.
56*dfc6aa5cSAndroid Build Coastguard Worker  */
57*dfc6aa5cSAndroid Build Coastguard Worker 
58*dfc6aa5cSAndroid Build Coastguard Worker /* Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
59*dfc6aa5cSAndroid Build Coastguard Worker  */
60*dfc6aa5cSAndroid Build Coastguard Worker 
jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)61*dfc6aa5cSAndroid Build Coastguard Worker void jsimd_h2v1_merged_upsample_neon(JDIMENSION output_width,
62*dfc6aa5cSAndroid Build Coastguard Worker                                      JSAMPIMAGE input_buf,
63*dfc6aa5cSAndroid Build Coastguard Worker                                      JDIMENSION in_row_group_ctr,
64*dfc6aa5cSAndroid Build Coastguard Worker                                      JSAMPARRAY output_buf)
65*dfc6aa5cSAndroid Build Coastguard Worker {
66*dfc6aa5cSAndroid Build Coastguard Worker   JSAMPROW outptr;
67*dfc6aa5cSAndroid Build Coastguard Worker   /* Pointers to Y, Cb, and Cr data */
68*dfc6aa5cSAndroid Build Coastguard Worker   JSAMPROW inptr0, inptr1, inptr2;
69*dfc6aa5cSAndroid Build Coastguard Worker 
70*dfc6aa5cSAndroid Build Coastguard Worker   const int16x4_t consts = vld1_s16(jsimd_ycc_rgb_convert_neon_consts);
71*dfc6aa5cSAndroid Build Coastguard Worker   const int16x8_t neg_128 = vdupq_n_s16(-128);
72*dfc6aa5cSAndroid Build Coastguard Worker 
73*dfc6aa5cSAndroid Build Coastguard Worker   inptr0 = input_buf[0][in_row_group_ctr];
74*dfc6aa5cSAndroid Build Coastguard Worker   inptr1 = input_buf[1][in_row_group_ctr];
75*dfc6aa5cSAndroid Build Coastguard Worker   inptr2 = input_buf[2][in_row_group_ctr];
76*dfc6aa5cSAndroid Build Coastguard Worker   outptr = output_buf[0];
77*dfc6aa5cSAndroid Build Coastguard Worker 
78*dfc6aa5cSAndroid Build Coastguard Worker   int cols_remaining = output_width;
79*dfc6aa5cSAndroid Build Coastguard Worker   for (; cols_remaining >= 16; cols_remaining -= 16) {
80*dfc6aa5cSAndroid Build Coastguard Worker     /* De-interleave Y component values into two separate vectors, one
81*dfc6aa5cSAndroid Build Coastguard Worker      * containing the component values with even-numbered indices and one
82*dfc6aa5cSAndroid Build Coastguard Worker      * containing the component values with odd-numbered indices.
83*dfc6aa5cSAndroid Build Coastguard Worker      */
84*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t y = vld2_u8(inptr0);
85*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8_t cb = vld1_u8(inptr1);
86*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8_t cr = vld1_u8(inptr2);
87*dfc6aa5cSAndroid Build Coastguard Worker     /* Subtract 128 from Cb and Cr. */
88*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t cr_128 =
89*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cr));
90*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t cb_128 =
91*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cb));
92*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute G-Y: - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) */
93*dfc6aa5cSAndroid Build Coastguard Worker     int32x4_t g_sub_y_l = vmull_lane_s16(vget_low_s16(cb_128), consts, 0);
94*dfc6aa5cSAndroid Build Coastguard Worker     int32x4_t g_sub_y_h = vmull_lane_s16(vget_high_s16(cb_128), consts, 0);
95*dfc6aa5cSAndroid Build Coastguard Worker     g_sub_y_l = vmlsl_lane_s16(g_sub_y_l, vget_low_s16(cr_128), consts, 1);
96*dfc6aa5cSAndroid Build Coastguard Worker     g_sub_y_h = vmlsl_lane_s16(g_sub_y_h, vget_high_s16(cr_128), consts, 1);
97*dfc6aa5cSAndroid Build Coastguard Worker     /* Descale G components: shift right 15, round, and narrow to 16-bit. */
98*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g_sub_y = vcombine_s16(vrshrn_n_s32(g_sub_y_l, 15),
99*dfc6aa5cSAndroid Build Coastguard Worker                                      vrshrn_n_s32(g_sub_y_h, 15));
100*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute R-Y: 1.40200 * (Cr - 128) */
101*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cr_128, 1), consts, 2);
102*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute B-Y: 1.77200 * (Cb - 128) */
103*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cb_128, 1), consts, 3);
104*dfc6aa5cSAndroid Build Coastguard Worker     /* Add the chroma-derived values (G-Y, R-Y, and B-Y) to both the "even" and
105*dfc6aa5cSAndroid Build Coastguard Worker      * "odd" Y component values.  This effectively upsamples the chroma
106*dfc6aa5cSAndroid Build Coastguard Worker      * components horizontally.
107*dfc6aa5cSAndroid Build Coastguard Worker      */
108*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g_even =
109*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
110*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[0]));
111*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r_even =
112*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
113*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[0]));
114*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b_even =
115*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
116*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[0]));
117*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g_odd =
118*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
119*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[1]));
120*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r_odd =
121*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
122*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[1]));
123*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b_odd =
124*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
125*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[1]));
126*dfc6aa5cSAndroid Build Coastguard Worker     /* Convert each component to unsigned and narrow, clamping to [0-255].
127*dfc6aa5cSAndroid Build Coastguard Worker      * Re-interleave the "even" and "odd" component values.
128*dfc6aa5cSAndroid Build Coastguard Worker      */
129*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t r = vzip_u8(vqmovun_s16(r_even), vqmovun_s16(r_odd));
130*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t g = vzip_u8(vqmovun_s16(g_even), vqmovun_s16(g_odd));
131*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t b = vzip_u8(vqmovun_s16(b_even), vqmovun_s16(b_odd));
132*dfc6aa5cSAndroid Build Coastguard Worker 
133*dfc6aa5cSAndroid Build Coastguard Worker #ifdef RGB_ALPHA
134*dfc6aa5cSAndroid Build Coastguard Worker     uint8x16x4_t rgba;
135*dfc6aa5cSAndroid Build Coastguard Worker     rgba.val[RGB_RED] = vcombine_u8(r.val[0], r.val[1]);
136*dfc6aa5cSAndroid Build Coastguard Worker     rgba.val[RGB_GREEN] = vcombine_u8(g.val[0], g.val[1]);
137*dfc6aa5cSAndroid Build Coastguard Worker     rgba.val[RGB_BLUE] = vcombine_u8(b.val[0], b.val[1]);
138*dfc6aa5cSAndroid Build Coastguard Worker     /* Set alpha channel to opaque (0xFF). */
139*dfc6aa5cSAndroid Build Coastguard Worker     rgba.val[RGB_ALPHA] = vdupq_n_u8(0xFF);
140*dfc6aa5cSAndroid Build Coastguard Worker     /* Store RGBA pixel data to memory. */
141*dfc6aa5cSAndroid Build Coastguard Worker     vst4q_u8(outptr, rgba);
142*dfc6aa5cSAndroid Build Coastguard Worker #else
143*dfc6aa5cSAndroid Build Coastguard Worker     uint8x16x3_t rgb;
144*dfc6aa5cSAndroid Build Coastguard Worker     rgb.val[RGB_RED] = vcombine_u8(r.val[0], r.val[1]);
145*dfc6aa5cSAndroid Build Coastguard Worker     rgb.val[RGB_GREEN] = vcombine_u8(g.val[0], g.val[1]);
146*dfc6aa5cSAndroid Build Coastguard Worker     rgb.val[RGB_BLUE] = vcombine_u8(b.val[0], b.val[1]);
147*dfc6aa5cSAndroid Build Coastguard Worker     /* Store RGB pixel data to memory. */
148*dfc6aa5cSAndroid Build Coastguard Worker     vst3q_u8(outptr, rgb);
149*dfc6aa5cSAndroid Build Coastguard Worker #endif
150*dfc6aa5cSAndroid Build Coastguard Worker 
151*dfc6aa5cSAndroid Build Coastguard Worker     /* Increment pointers. */
152*dfc6aa5cSAndroid Build Coastguard Worker     inptr0 += 16;
153*dfc6aa5cSAndroid Build Coastguard Worker     inptr1 += 8;
154*dfc6aa5cSAndroid Build Coastguard Worker     inptr2 += 8;
155*dfc6aa5cSAndroid Build Coastguard Worker     outptr += (RGB_PIXELSIZE * 16);
156*dfc6aa5cSAndroid Build Coastguard Worker   }
157*dfc6aa5cSAndroid Build Coastguard Worker 
158*dfc6aa5cSAndroid Build Coastguard Worker   if (cols_remaining > 0) {
159*dfc6aa5cSAndroid Build Coastguard Worker     /* De-interleave Y component values into two separate vectors, one
160*dfc6aa5cSAndroid Build Coastguard Worker      * containing the component values with even-numbered indices and one
161*dfc6aa5cSAndroid Build Coastguard Worker      * containing the component values with odd-numbered indices.
162*dfc6aa5cSAndroid Build Coastguard Worker      */
163*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t y = vld2_u8(inptr0);
164*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8_t cb = vld1_u8(inptr1);
165*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8_t cr = vld1_u8(inptr2);
166*dfc6aa5cSAndroid Build Coastguard Worker     /* Subtract 128 from Cb and Cr. */
167*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t cr_128 =
168*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cr));
169*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t cb_128 =
170*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cb));
171*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute G-Y: - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) */
172*dfc6aa5cSAndroid Build Coastguard Worker     int32x4_t g_sub_y_l = vmull_lane_s16(vget_low_s16(cb_128), consts, 0);
173*dfc6aa5cSAndroid Build Coastguard Worker     int32x4_t g_sub_y_h = vmull_lane_s16(vget_high_s16(cb_128), consts, 0);
174*dfc6aa5cSAndroid Build Coastguard Worker     g_sub_y_l = vmlsl_lane_s16(g_sub_y_l, vget_low_s16(cr_128), consts, 1);
175*dfc6aa5cSAndroid Build Coastguard Worker     g_sub_y_h = vmlsl_lane_s16(g_sub_y_h, vget_high_s16(cr_128), consts, 1);
176*dfc6aa5cSAndroid Build Coastguard Worker     /* Descale G components: shift right 15, round, and narrow to 16-bit. */
177*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g_sub_y = vcombine_s16(vrshrn_n_s32(g_sub_y_l, 15),
178*dfc6aa5cSAndroid Build Coastguard Worker                                      vrshrn_n_s32(g_sub_y_h, 15));
179*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute R-Y: 1.40200 * (Cr - 128) */
180*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cr_128, 1), consts, 2);
181*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute B-Y: 1.77200 * (Cb - 128) */
182*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cb_128, 1), consts, 3);
183*dfc6aa5cSAndroid Build Coastguard Worker     /* Add the chroma-derived values (G-Y, R-Y, and B-Y) to both the "even" and
184*dfc6aa5cSAndroid Build Coastguard Worker      * "odd" Y component values.  This effectively upsamples the chroma
185*dfc6aa5cSAndroid Build Coastguard Worker      * components horizontally.
186*dfc6aa5cSAndroid Build Coastguard Worker      */
187*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g_even =
188*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
189*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[0]));
190*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r_even =
191*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
192*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[0]));
193*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b_even =
194*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
195*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[0]));
196*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g_odd =
197*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
198*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[1]));
199*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r_odd =
200*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
201*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[1]));
202*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b_odd =
203*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
204*dfc6aa5cSAndroid Build Coastguard Worker                                      y.val[1]));
205*dfc6aa5cSAndroid Build Coastguard Worker     /* Convert each component to unsigned and narrow, clamping to [0-255].
206*dfc6aa5cSAndroid Build Coastguard Worker      * Re-interleave the "even" and "odd" component values.
207*dfc6aa5cSAndroid Build Coastguard Worker      */
208*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t r = vzip_u8(vqmovun_s16(r_even), vqmovun_s16(r_odd));
209*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t g = vzip_u8(vqmovun_s16(g_even), vqmovun_s16(g_odd));
210*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t b = vzip_u8(vqmovun_s16(b_even), vqmovun_s16(b_odd));
211*dfc6aa5cSAndroid Build Coastguard Worker 
212*dfc6aa5cSAndroid Build Coastguard Worker #ifdef RGB_ALPHA
213*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x4_t rgba_h;
214*dfc6aa5cSAndroid Build Coastguard Worker     rgba_h.val[RGB_RED] = r.val[1];
215*dfc6aa5cSAndroid Build Coastguard Worker     rgba_h.val[RGB_GREEN] = g.val[1];
216*dfc6aa5cSAndroid Build Coastguard Worker     rgba_h.val[RGB_BLUE] = b.val[1];
217*dfc6aa5cSAndroid Build Coastguard Worker     /* Set alpha channel to opaque (0xFF). */
218*dfc6aa5cSAndroid Build Coastguard Worker     rgba_h.val[RGB_ALPHA] = vdup_n_u8(0xFF);
219*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x4_t rgba_l;
220*dfc6aa5cSAndroid Build Coastguard Worker     rgba_l.val[RGB_RED] = r.val[0];
221*dfc6aa5cSAndroid Build Coastguard Worker     rgba_l.val[RGB_GREEN] = g.val[0];
222*dfc6aa5cSAndroid Build Coastguard Worker     rgba_l.val[RGB_BLUE] = b.val[0];
223*dfc6aa5cSAndroid Build Coastguard Worker     /* Set alpha channel to opaque (0xFF). */
224*dfc6aa5cSAndroid Build Coastguard Worker     rgba_l.val[RGB_ALPHA] = vdup_n_u8(0xFF);
225*dfc6aa5cSAndroid Build Coastguard Worker     /* Store RGBA pixel data to memory. */
226*dfc6aa5cSAndroid Build Coastguard Worker     switch (cols_remaining) {
227*dfc6aa5cSAndroid Build Coastguard Worker     case 15:
228*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgba_h, 6);
229*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
230*dfc6aa5cSAndroid Build Coastguard Worker     case 14:
231*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgba_h, 5);
232*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
233*dfc6aa5cSAndroid Build Coastguard Worker     case 13:
234*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgba_h, 4);
235*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
236*dfc6aa5cSAndroid Build Coastguard Worker     case 12:
237*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgba_h, 3);
238*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
239*dfc6aa5cSAndroid Build Coastguard Worker     case 11:
240*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgba_h, 2);
241*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
242*dfc6aa5cSAndroid Build Coastguard Worker     case 10:
243*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgba_h, 1);
244*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
245*dfc6aa5cSAndroid Build Coastguard Worker     case 9:
246*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgba_h, 0);
247*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
248*dfc6aa5cSAndroid Build Coastguard Worker     case 8:
249*dfc6aa5cSAndroid Build Coastguard Worker       vst4_u8(outptr, rgba_l);
250*dfc6aa5cSAndroid Build Coastguard Worker       break;
251*dfc6aa5cSAndroid Build Coastguard Worker     case 7:
252*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba_l, 6);
253*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
254*dfc6aa5cSAndroid Build Coastguard Worker     case 6:
255*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba_l, 5);
256*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
257*dfc6aa5cSAndroid Build Coastguard Worker     case 5:
258*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba_l, 4);
259*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
260*dfc6aa5cSAndroid Build Coastguard Worker     case 4:
261*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba_l, 3);
262*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
263*dfc6aa5cSAndroid Build Coastguard Worker     case 3:
264*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba_l, 2);
265*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
266*dfc6aa5cSAndroid Build Coastguard Worker     case 2:
267*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba_l, 1);
268*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
269*dfc6aa5cSAndroid Build Coastguard Worker     case 1:
270*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr, rgba_l, 0);
271*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
272*dfc6aa5cSAndroid Build Coastguard Worker     default:
273*dfc6aa5cSAndroid Build Coastguard Worker       break;
274*dfc6aa5cSAndroid Build Coastguard Worker     }
275*dfc6aa5cSAndroid Build Coastguard Worker #else
276*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x3_t rgb_h;
277*dfc6aa5cSAndroid Build Coastguard Worker     rgb_h.val[RGB_RED] = r.val[1];
278*dfc6aa5cSAndroid Build Coastguard Worker     rgb_h.val[RGB_GREEN] = g.val[1];
279*dfc6aa5cSAndroid Build Coastguard Worker     rgb_h.val[RGB_BLUE] = b.val[1];
280*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x3_t rgb_l;
281*dfc6aa5cSAndroid Build Coastguard Worker     rgb_l.val[RGB_RED] = r.val[0];
282*dfc6aa5cSAndroid Build Coastguard Worker     rgb_l.val[RGB_GREEN] = g.val[0];
283*dfc6aa5cSAndroid Build Coastguard Worker     rgb_l.val[RGB_BLUE] = b.val[0];
284*dfc6aa5cSAndroid Build Coastguard Worker     /* Store RGB pixel data to memory. */
285*dfc6aa5cSAndroid Build Coastguard Worker     switch (cols_remaining) {
286*dfc6aa5cSAndroid Build Coastguard Worker     case 15:
287*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 14 * RGB_PIXELSIZE, rgb_h, 6);
288*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
289*dfc6aa5cSAndroid Build Coastguard Worker     case 14:
290*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 13 * RGB_PIXELSIZE, rgb_h, 5);
291*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
292*dfc6aa5cSAndroid Build Coastguard Worker     case 13:
293*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 12 * RGB_PIXELSIZE, rgb_h, 4);
294*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
295*dfc6aa5cSAndroid Build Coastguard Worker     case 12:
296*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 11 * RGB_PIXELSIZE, rgb_h, 3);
297*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
298*dfc6aa5cSAndroid Build Coastguard Worker     case 11:
299*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 10 * RGB_PIXELSIZE, rgb_h, 2);
300*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
301*dfc6aa5cSAndroid Build Coastguard Worker     case 10:
302*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 9 * RGB_PIXELSIZE, rgb_h, 1);
303*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
304*dfc6aa5cSAndroid Build Coastguard Worker     case 9:
305*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 8 * RGB_PIXELSIZE, rgb_h, 0);
306*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
307*dfc6aa5cSAndroid Build Coastguard Worker     case 8:
308*dfc6aa5cSAndroid Build Coastguard Worker       vst3_u8(outptr, rgb_l);
309*dfc6aa5cSAndroid Build Coastguard Worker       break;
310*dfc6aa5cSAndroid Build Coastguard Worker     case 7:
311*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb_l, 6);
312*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
313*dfc6aa5cSAndroid Build Coastguard Worker     case 6:
314*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb_l, 5);
315*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
316*dfc6aa5cSAndroid Build Coastguard Worker     case 5:
317*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb_l, 4);
318*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
319*dfc6aa5cSAndroid Build Coastguard Worker     case 4:
320*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb_l, 3);
321*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
322*dfc6aa5cSAndroid Build Coastguard Worker     case 3:
323*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb_l, 2);
324*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
325*dfc6aa5cSAndroid Build Coastguard Worker     case 2:
326*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb_l, 1);
327*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
328*dfc6aa5cSAndroid Build Coastguard Worker     case 1:
329*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr, rgb_l, 0);
330*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
331*dfc6aa5cSAndroid Build Coastguard Worker     default:
332*dfc6aa5cSAndroid Build Coastguard Worker       break;
333*dfc6aa5cSAndroid Build Coastguard Worker     }
334*dfc6aa5cSAndroid Build Coastguard Worker #endif
335*dfc6aa5cSAndroid Build Coastguard Worker   }
336*dfc6aa5cSAndroid Build Coastguard Worker }
337*dfc6aa5cSAndroid Build Coastguard Worker 
338*dfc6aa5cSAndroid Build Coastguard Worker 
339*dfc6aa5cSAndroid Build Coastguard Worker /* Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
340*dfc6aa5cSAndroid Build Coastguard Worker  *
341*dfc6aa5cSAndroid Build Coastguard Worker  * See comments above for details regarding color conversion and safe memory
342*dfc6aa5cSAndroid Build Coastguard Worker  * access.
343*dfc6aa5cSAndroid Build Coastguard Worker  */
344*dfc6aa5cSAndroid Build Coastguard Worker 
jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)345*dfc6aa5cSAndroid Build Coastguard Worker void jsimd_h2v2_merged_upsample_neon(JDIMENSION output_width,
346*dfc6aa5cSAndroid Build Coastguard Worker                                      JSAMPIMAGE input_buf,
347*dfc6aa5cSAndroid Build Coastguard Worker                                      JDIMENSION in_row_group_ctr,
348*dfc6aa5cSAndroid Build Coastguard Worker                                      JSAMPARRAY output_buf)
349*dfc6aa5cSAndroid Build Coastguard Worker {
350*dfc6aa5cSAndroid Build Coastguard Worker   JSAMPROW outptr0, outptr1;
351*dfc6aa5cSAndroid Build Coastguard Worker   /* Pointers to Y (both rows), Cb, and Cr data */
352*dfc6aa5cSAndroid Build Coastguard Worker   JSAMPROW inptr0_0, inptr0_1, inptr1, inptr2;
353*dfc6aa5cSAndroid Build Coastguard Worker 
354*dfc6aa5cSAndroid Build Coastguard Worker   const int16x4_t consts = vld1_s16(jsimd_ycc_rgb_convert_neon_consts);
355*dfc6aa5cSAndroid Build Coastguard Worker   const int16x8_t neg_128 = vdupq_n_s16(-128);
356*dfc6aa5cSAndroid Build Coastguard Worker 
357*dfc6aa5cSAndroid Build Coastguard Worker   inptr0_0 = input_buf[0][in_row_group_ctr * 2];
358*dfc6aa5cSAndroid Build Coastguard Worker   inptr0_1 = input_buf[0][in_row_group_ctr * 2 + 1];
359*dfc6aa5cSAndroid Build Coastguard Worker   inptr1 = input_buf[1][in_row_group_ctr];
360*dfc6aa5cSAndroid Build Coastguard Worker   inptr2 = input_buf[2][in_row_group_ctr];
361*dfc6aa5cSAndroid Build Coastguard Worker   outptr0 = output_buf[0];
362*dfc6aa5cSAndroid Build Coastguard Worker   outptr1 = output_buf[1];
363*dfc6aa5cSAndroid Build Coastguard Worker 
364*dfc6aa5cSAndroid Build Coastguard Worker   int cols_remaining = output_width;
365*dfc6aa5cSAndroid Build Coastguard Worker   for (; cols_remaining >= 16; cols_remaining -= 16) {
366*dfc6aa5cSAndroid Build Coastguard Worker     /* For each row, de-interleave Y component values into two separate
367*dfc6aa5cSAndroid Build Coastguard Worker      * vectors, one containing the component values with even-numbered indices
368*dfc6aa5cSAndroid Build Coastguard Worker      * and one containing the component values with odd-numbered indices.
369*dfc6aa5cSAndroid Build Coastguard Worker      */
370*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t y0 = vld2_u8(inptr0_0);
371*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t y1 = vld2_u8(inptr0_1);
372*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8_t cb = vld1_u8(inptr1);
373*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8_t cr = vld1_u8(inptr2);
374*dfc6aa5cSAndroid Build Coastguard Worker     /* Subtract 128 from Cb and Cr. */
375*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t cr_128 =
376*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cr));
377*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t cb_128 =
378*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cb));
379*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute G-Y: - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) */
380*dfc6aa5cSAndroid Build Coastguard Worker     int32x4_t g_sub_y_l = vmull_lane_s16(vget_low_s16(cb_128), consts, 0);
381*dfc6aa5cSAndroid Build Coastguard Worker     int32x4_t g_sub_y_h = vmull_lane_s16(vget_high_s16(cb_128), consts, 0);
382*dfc6aa5cSAndroid Build Coastguard Worker     g_sub_y_l = vmlsl_lane_s16(g_sub_y_l, vget_low_s16(cr_128), consts, 1);
383*dfc6aa5cSAndroid Build Coastguard Worker     g_sub_y_h = vmlsl_lane_s16(g_sub_y_h, vget_high_s16(cr_128), consts, 1);
384*dfc6aa5cSAndroid Build Coastguard Worker     /* Descale G components: shift right 15, round, and narrow to 16-bit. */
385*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g_sub_y = vcombine_s16(vrshrn_n_s32(g_sub_y_l, 15),
386*dfc6aa5cSAndroid Build Coastguard Worker                                      vrshrn_n_s32(g_sub_y_h, 15));
387*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute R-Y: 1.40200 * (Cr - 128) */
388*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cr_128, 1), consts, 2);
389*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute B-Y: 1.77200 * (Cb - 128) */
390*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cb_128, 1), consts, 3);
391*dfc6aa5cSAndroid Build Coastguard Worker     /* For each row, add the chroma-derived values (G-Y, R-Y, and B-Y) to both
392*dfc6aa5cSAndroid Build Coastguard Worker      * the "even" and "odd" Y component values.  This effectively upsamples the
393*dfc6aa5cSAndroid Build Coastguard Worker      * chroma components both horizontally and vertically.
394*dfc6aa5cSAndroid Build Coastguard Worker      */
395*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g0_even =
396*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
397*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[0]));
398*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r0_even =
399*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
400*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[0]));
401*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b0_even =
402*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
403*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[0]));
404*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g0_odd =
405*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
406*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[1]));
407*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r0_odd =
408*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
409*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[1]));
410*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b0_odd =
411*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
412*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[1]));
413*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g1_even =
414*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
415*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[0]));
416*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r1_even =
417*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
418*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[0]));
419*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b1_even =
420*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
421*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[0]));
422*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g1_odd =
423*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
424*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[1]));
425*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r1_odd =
426*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
427*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[1]));
428*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b1_odd =
429*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
430*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[1]));
431*dfc6aa5cSAndroid Build Coastguard Worker     /* Convert each component to unsigned and narrow, clamping to [0-255].
432*dfc6aa5cSAndroid Build Coastguard Worker      * Re-interleave the "even" and "odd" component values.
433*dfc6aa5cSAndroid Build Coastguard Worker      */
434*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t r0 = vzip_u8(vqmovun_s16(r0_even), vqmovun_s16(r0_odd));
435*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t r1 = vzip_u8(vqmovun_s16(r1_even), vqmovun_s16(r1_odd));
436*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t g0 = vzip_u8(vqmovun_s16(g0_even), vqmovun_s16(g0_odd));
437*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t g1 = vzip_u8(vqmovun_s16(g1_even), vqmovun_s16(g1_odd));
438*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t b0 = vzip_u8(vqmovun_s16(b0_even), vqmovun_s16(b0_odd));
439*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t b1 = vzip_u8(vqmovun_s16(b1_even), vqmovun_s16(b1_odd));
440*dfc6aa5cSAndroid Build Coastguard Worker 
441*dfc6aa5cSAndroid Build Coastguard Worker #ifdef RGB_ALPHA
442*dfc6aa5cSAndroid Build Coastguard Worker     uint8x16x4_t rgba0, rgba1;
443*dfc6aa5cSAndroid Build Coastguard Worker     rgba0.val[RGB_RED] = vcombine_u8(r0.val[0], r0.val[1]);
444*dfc6aa5cSAndroid Build Coastguard Worker     rgba1.val[RGB_RED] = vcombine_u8(r1.val[0], r1.val[1]);
445*dfc6aa5cSAndroid Build Coastguard Worker     rgba0.val[RGB_GREEN] = vcombine_u8(g0.val[0], g0.val[1]);
446*dfc6aa5cSAndroid Build Coastguard Worker     rgba1.val[RGB_GREEN] = vcombine_u8(g1.val[0], g1.val[1]);
447*dfc6aa5cSAndroid Build Coastguard Worker     rgba0.val[RGB_BLUE] = vcombine_u8(b0.val[0], b0.val[1]);
448*dfc6aa5cSAndroid Build Coastguard Worker     rgba1.val[RGB_BLUE] = vcombine_u8(b1.val[0], b1.val[1]);
449*dfc6aa5cSAndroid Build Coastguard Worker     /* Set alpha channel to opaque (0xFF). */
450*dfc6aa5cSAndroid Build Coastguard Worker     rgba0.val[RGB_ALPHA] = vdupq_n_u8(0xFF);
451*dfc6aa5cSAndroid Build Coastguard Worker     rgba1.val[RGB_ALPHA] = vdupq_n_u8(0xFF);
452*dfc6aa5cSAndroid Build Coastguard Worker     /* Store RGBA pixel data to memory. */
453*dfc6aa5cSAndroid Build Coastguard Worker     vst4q_u8(outptr0, rgba0);
454*dfc6aa5cSAndroid Build Coastguard Worker     vst4q_u8(outptr1, rgba1);
455*dfc6aa5cSAndroid Build Coastguard Worker #else
456*dfc6aa5cSAndroid Build Coastguard Worker     uint8x16x3_t rgb0, rgb1;
457*dfc6aa5cSAndroid Build Coastguard Worker     rgb0.val[RGB_RED] = vcombine_u8(r0.val[0], r0.val[1]);
458*dfc6aa5cSAndroid Build Coastguard Worker     rgb1.val[RGB_RED] = vcombine_u8(r1.val[0], r1.val[1]);
459*dfc6aa5cSAndroid Build Coastguard Worker     rgb0.val[RGB_GREEN] = vcombine_u8(g0.val[0], g0.val[1]);
460*dfc6aa5cSAndroid Build Coastguard Worker     rgb1.val[RGB_GREEN] = vcombine_u8(g1.val[0], g1.val[1]);
461*dfc6aa5cSAndroid Build Coastguard Worker     rgb0.val[RGB_BLUE] = vcombine_u8(b0.val[0], b0.val[1]);
462*dfc6aa5cSAndroid Build Coastguard Worker     rgb1.val[RGB_BLUE] = vcombine_u8(b1.val[0], b1.val[1]);
463*dfc6aa5cSAndroid Build Coastguard Worker     /* Store RGB pixel data to memory. */
464*dfc6aa5cSAndroid Build Coastguard Worker     vst3q_u8(outptr0, rgb0);
465*dfc6aa5cSAndroid Build Coastguard Worker     vst3q_u8(outptr1, rgb1);
466*dfc6aa5cSAndroid Build Coastguard Worker #endif
467*dfc6aa5cSAndroid Build Coastguard Worker 
468*dfc6aa5cSAndroid Build Coastguard Worker     /* Increment pointers. */
469*dfc6aa5cSAndroid Build Coastguard Worker     inptr0_0 += 16;
470*dfc6aa5cSAndroid Build Coastguard Worker     inptr0_1 += 16;
471*dfc6aa5cSAndroid Build Coastguard Worker     inptr1 += 8;
472*dfc6aa5cSAndroid Build Coastguard Worker     inptr2 += 8;
473*dfc6aa5cSAndroid Build Coastguard Worker     outptr0 += (RGB_PIXELSIZE * 16);
474*dfc6aa5cSAndroid Build Coastguard Worker     outptr1 += (RGB_PIXELSIZE * 16);
475*dfc6aa5cSAndroid Build Coastguard Worker   }
476*dfc6aa5cSAndroid Build Coastguard Worker 
477*dfc6aa5cSAndroid Build Coastguard Worker   if (cols_remaining > 0) {
478*dfc6aa5cSAndroid Build Coastguard Worker     /* For each row, de-interleave Y component values into two separate
479*dfc6aa5cSAndroid Build Coastguard Worker      * vectors, one containing the component values with even-numbered indices
480*dfc6aa5cSAndroid Build Coastguard Worker      * and one containing the component values with odd-numbered indices.
481*dfc6aa5cSAndroid Build Coastguard Worker      */
482*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t y0 = vld2_u8(inptr0_0);
483*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t y1 = vld2_u8(inptr0_1);
484*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8_t cb = vld1_u8(inptr1);
485*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8_t cr = vld1_u8(inptr2);
486*dfc6aa5cSAndroid Build Coastguard Worker     /* Subtract 128 from Cb and Cr. */
487*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t cr_128 =
488*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cr));
489*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t cb_128 =
490*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cb));
491*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute G-Y: - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) */
492*dfc6aa5cSAndroid Build Coastguard Worker     int32x4_t g_sub_y_l = vmull_lane_s16(vget_low_s16(cb_128), consts, 0);
493*dfc6aa5cSAndroid Build Coastguard Worker     int32x4_t g_sub_y_h = vmull_lane_s16(vget_high_s16(cb_128), consts, 0);
494*dfc6aa5cSAndroid Build Coastguard Worker     g_sub_y_l = vmlsl_lane_s16(g_sub_y_l, vget_low_s16(cr_128), consts, 1);
495*dfc6aa5cSAndroid Build Coastguard Worker     g_sub_y_h = vmlsl_lane_s16(g_sub_y_h, vget_high_s16(cr_128), consts, 1);
496*dfc6aa5cSAndroid Build Coastguard Worker     /* Descale G components: shift right 15, round, and narrow to 16-bit. */
497*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g_sub_y = vcombine_s16(vrshrn_n_s32(g_sub_y_l, 15),
498*dfc6aa5cSAndroid Build Coastguard Worker                                      vrshrn_n_s32(g_sub_y_h, 15));
499*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute R-Y: 1.40200 * (Cr - 128) */
500*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cr_128, 1), consts, 2);
501*dfc6aa5cSAndroid Build Coastguard Worker     /* Compute B-Y: 1.77200 * (Cb - 128) */
502*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cb_128, 1), consts, 3);
503*dfc6aa5cSAndroid Build Coastguard Worker     /* For each row, add the chroma-derived values (G-Y, R-Y, and B-Y) to both
504*dfc6aa5cSAndroid Build Coastguard Worker      * the "even" and "odd" Y component values.  This effectively upsamples the
505*dfc6aa5cSAndroid Build Coastguard Worker      * chroma components both horizontally and vertically.
506*dfc6aa5cSAndroid Build Coastguard Worker      */
507*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g0_even =
508*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
509*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[0]));
510*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r0_even =
511*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
512*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[0]));
513*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b0_even =
514*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
515*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[0]));
516*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g0_odd =
517*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
518*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[1]));
519*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r0_odd =
520*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
521*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[1]));
522*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b0_odd =
523*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
524*dfc6aa5cSAndroid Build Coastguard Worker                                      y0.val[1]));
525*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g1_even =
526*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
527*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[0]));
528*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r1_even =
529*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
530*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[0]));
531*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b1_even =
532*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
533*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[0]));
534*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t g1_odd =
535*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y),
536*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[1]));
537*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t r1_odd =
538*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y),
539*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[1]));
540*dfc6aa5cSAndroid Build Coastguard Worker     int16x8_t b1_odd =
541*dfc6aa5cSAndroid Build Coastguard Worker       vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y),
542*dfc6aa5cSAndroid Build Coastguard Worker                                      y1.val[1]));
543*dfc6aa5cSAndroid Build Coastguard Worker     /* Convert each component to unsigned and narrow, clamping to [0-255].
544*dfc6aa5cSAndroid Build Coastguard Worker      * Re-interleave the "even" and "odd" component values.
545*dfc6aa5cSAndroid Build Coastguard Worker      */
546*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t r0 = vzip_u8(vqmovun_s16(r0_even), vqmovun_s16(r0_odd));
547*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t r1 = vzip_u8(vqmovun_s16(r1_even), vqmovun_s16(r1_odd));
548*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t g0 = vzip_u8(vqmovun_s16(g0_even), vqmovun_s16(g0_odd));
549*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t g1 = vzip_u8(vqmovun_s16(g1_even), vqmovun_s16(g1_odd));
550*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t b0 = vzip_u8(vqmovun_s16(b0_even), vqmovun_s16(b0_odd));
551*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x2_t b1 = vzip_u8(vqmovun_s16(b1_even), vqmovun_s16(b1_odd));
552*dfc6aa5cSAndroid Build Coastguard Worker 
553*dfc6aa5cSAndroid Build Coastguard Worker #ifdef RGB_ALPHA
554*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x4_t rgba0_h, rgba1_h;
555*dfc6aa5cSAndroid Build Coastguard Worker     rgba0_h.val[RGB_RED] = r0.val[1];
556*dfc6aa5cSAndroid Build Coastguard Worker     rgba1_h.val[RGB_RED] = r1.val[1];
557*dfc6aa5cSAndroid Build Coastguard Worker     rgba0_h.val[RGB_GREEN] = g0.val[1];
558*dfc6aa5cSAndroid Build Coastguard Worker     rgba1_h.val[RGB_GREEN] = g1.val[1];
559*dfc6aa5cSAndroid Build Coastguard Worker     rgba0_h.val[RGB_BLUE] = b0.val[1];
560*dfc6aa5cSAndroid Build Coastguard Worker     rgba1_h.val[RGB_BLUE] = b1.val[1];
561*dfc6aa5cSAndroid Build Coastguard Worker     /* Set alpha channel to opaque (0xFF). */
562*dfc6aa5cSAndroid Build Coastguard Worker     rgba0_h.val[RGB_ALPHA] = vdup_n_u8(0xFF);
563*dfc6aa5cSAndroid Build Coastguard Worker     rgba1_h.val[RGB_ALPHA] = vdup_n_u8(0xFF);
564*dfc6aa5cSAndroid Build Coastguard Worker 
565*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x4_t rgba0_l, rgba1_l;
566*dfc6aa5cSAndroid Build Coastguard Worker     rgba0_l.val[RGB_RED] = r0.val[0];
567*dfc6aa5cSAndroid Build Coastguard Worker     rgba1_l.val[RGB_RED] = r1.val[0];
568*dfc6aa5cSAndroid Build Coastguard Worker     rgba0_l.val[RGB_GREEN] = g0.val[0];
569*dfc6aa5cSAndroid Build Coastguard Worker     rgba1_l.val[RGB_GREEN] = g1.val[0];
570*dfc6aa5cSAndroid Build Coastguard Worker     rgba0_l.val[RGB_BLUE] = b0.val[0];
571*dfc6aa5cSAndroid Build Coastguard Worker     rgba1_l.val[RGB_BLUE] = b1.val[0];
572*dfc6aa5cSAndroid Build Coastguard Worker     /* Set alpha channel to opaque (0xFF). */
573*dfc6aa5cSAndroid Build Coastguard Worker     rgba0_l.val[RGB_ALPHA] = vdup_n_u8(0xFF);
574*dfc6aa5cSAndroid Build Coastguard Worker     rgba1_l.val[RGB_ALPHA] = vdup_n_u8(0xFF);
575*dfc6aa5cSAndroid Build Coastguard Worker     /* Store RGBA pixel data to memory. */
576*dfc6aa5cSAndroid Build Coastguard Worker     switch (cols_remaining) {
577*dfc6aa5cSAndroid Build Coastguard Worker     case 15:
578*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgba0_h, 6);
579*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgba1_h, 6);
580*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
581*dfc6aa5cSAndroid Build Coastguard Worker     case 14:
582*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgba0_h, 5);
583*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgba1_h, 5);
584*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
585*dfc6aa5cSAndroid Build Coastguard Worker     case 13:
586*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgba0_h, 4);
587*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgba1_h, 4);
588*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
589*dfc6aa5cSAndroid Build Coastguard Worker     case 12:
590*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgba0_h, 3);
591*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgba1_h, 3);
592*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
593*dfc6aa5cSAndroid Build Coastguard Worker     case 11:
594*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgba0_h, 2);
595*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgba1_h, 2);
596*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
597*dfc6aa5cSAndroid Build Coastguard Worker     case 10:
598*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgba0_h, 1);
599*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgba1_h, 1);
600*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
601*dfc6aa5cSAndroid Build Coastguard Worker     case 9:
602*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgba0_h, 0);
603*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgba1_h, 0);
604*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
605*dfc6aa5cSAndroid Build Coastguard Worker     case 8:
606*dfc6aa5cSAndroid Build Coastguard Worker       vst4_u8(outptr0, rgba0_l);
607*dfc6aa5cSAndroid Build Coastguard Worker       vst4_u8(outptr1, rgba1_l);
608*dfc6aa5cSAndroid Build Coastguard Worker       break;
609*dfc6aa5cSAndroid Build Coastguard Worker     case 7:
610*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgba0_l, 6);
611*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgba1_l, 6);
612*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
613*dfc6aa5cSAndroid Build Coastguard Worker     case 6:
614*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgba0_l, 5);
615*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgba1_l, 5);
616*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
617*dfc6aa5cSAndroid Build Coastguard Worker     case 5:
618*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgba0_l, 4);
619*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgba1_l, 4);
620*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
621*dfc6aa5cSAndroid Build Coastguard Worker     case 4:
622*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgba0_l, 3);
623*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgba1_l, 3);
624*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
625*dfc6aa5cSAndroid Build Coastguard Worker     case 3:
626*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgba0_l, 2);
627*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgba1_l, 2);
628*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
629*dfc6aa5cSAndroid Build Coastguard Worker     case 2:
630*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgba0_l, 1);
631*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgba1_l, 1);
632*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
633*dfc6aa5cSAndroid Build Coastguard Worker     case 1:
634*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr0, rgba0_l, 0);
635*dfc6aa5cSAndroid Build Coastguard Worker       vst4_lane_u8(outptr1, rgba1_l, 0);
636*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
637*dfc6aa5cSAndroid Build Coastguard Worker     default:
638*dfc6aa5cSAndroid Build Coastguard Worker       break;
639*dfc6aa5cSAndroid Build Coastguard Worker     }
640*dfc6aa5cSAndroid Build Coastguard Worker #else
641*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x3_t rgb0_h, rgb1_h;
642*dfc6aa5cSAndroid Build Coastguard Worker     rgb0_h.val[RGB_RED] = r0.val[1];
643*dfc6aa5cSAndroid Build Coastguard Worker     rgb1_h.val[RGB_RED] = r1.val[1];
644*dfc6aa5cSAndroid Build Coastguard Worker     rgb0_h.val[RGB_GREEN] = g0.val[1];
645*dfc6aa5cSAndroid Build Coastguard Worker     rgb1_h.val[RGB_GREEN] = g1.val[1];
646*dfc6aa5cSAndroid Build Coastguard Worker     rgb0_h.val[RGB_BLUE] = b0.val[1];
647*dfc6aa5cSAndroid Build Coastguard Worker     rgb1_h.val[RGB_BLUE] = b1.val[1];
648*dfc6aa5cSAndroid Build Coastguard Worker 
649*dfc6aa5cSAndroid Build Coastguard Worker     uint8x8x3_t rgb0_l, rgb1_l;
650*dfc6aa5cSAndroid Build Coastguard Worker     rgb0_l.val[RGB_RED] = r0.val[0];
651*dfc6aa5cSAndroid Build Coastguard Worker     rgb1_l.val[RGB_RED] = r1.val[0];
652*dfc6aa5cSAndroid Build Coastguard Worker     rgb0_l.val[RGB_GREEN] = g0.val[0];
653*dfc6aa5cSAndroid Build Coastguard Worker     rgb1_l.val[RGB_GREEN] = g1.val[0];
654*dfc6aa5cSAndroid Build Coastguard Worker     rgb0_l.val[RGB_BLUE] = b0.val[0];
655*dfc6aa5cSAndroid Build Coastguard Worker     rgb1_l.val[RGB_BLUE] = b1.val[0];
656*dfc6aa5cSAndroid Build Coastguard Worker     /* Store RGB pixel data to memory. */
657*dfc6aa5cSAndroid Build Coastguard Worker     switch (cols_remaining) {
658*dfc6aa5cSAndroid Build Coastguard Worker     case 15:
659*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 14 * RGB_PIXELSIZE, rgb0_h, 6);
660*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 14 * RGB_PIXELSIZE, rgb1_h, 6);
661*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
662*dfc6aa5cSAndroid Build Coastguard Worker     case 14:
663*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 13 * RGB_PIXELSIZE, rgb0_h, 5);
664*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 13 * RGB_PIXELSIZE, rgb1_h, 5);
665*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
666*dfc6aa5cSAndroid Build Coastguard Worker     case 13:
667*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 12 * RGB_PIXELSIZE, rgb0_h, 4);
668*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 12 * RGB_PIXELSIZE, rgb1_h, 4);
669*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
670*dfc6aa5cSAndroid Build Coastguard Worker     case 12:
671*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 11 * RGB_PIXELSIZE, rgb0_h, 3);
672*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 11 * RGB_PIXELSIZE, rgb1_h, 3);
673*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
674*dfc6aa5cSAndroid Build Coastguard Worker     case 11:
675*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 10 * RGB_PIXELSIZE, rgb0_h, 2);
676*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 10 * RGB_PIXELSIZE, rgb1_h, 2);
677*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
678*dfc6aa5cSAndroid Build Coastguard Worker     case 10:
679*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 9 * RGB_PIXELSIZE, rgb0_h, 1);
680*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 9 * RGB_PIXELSIZE, rgb1_h, 1);
681*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
682*dfc6aa5cSAndroid Build Coastguard Worker     case 9:
683*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 8 * RGB_PIXELSIZE, rgb0_h, 0);
684*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 8 * RGB_PIXELSIZE, rgb1_h, 0);
685*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
686*dfc6aa5cSAndroid Build Coastguard Worker     case 8:
687*dfc6aa5cSAndroid Build Coastguard Worker       vst3_u8(outptr0, rgb0_l);
688*dfc6aa5cSAndroid Build Coastguard Worker       vst3_u8(outptr1, rgb1_l);
689*dfc6aa5cSAndroid Build Coastguard Worker       break;
690*dfc6aa5cSAndroid Build Coastguard Worker     case 7:
691*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 6 * RGB_PIXELSIZE, rgb0_l, 6);
692*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 6 * RGB_PIXELSIZE, rgb1_l, 6);
693*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
694*dfc6aa5cSAndroid Build Coastguard Worker     case 6:
695*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 5 * RGB_PIXELSIZE, rgb0_l, 5);
696*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 5 * RGB_PIXELSIZE, rgb1_l, 5);
697*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
698*dfc6aa5cSAndroid Build Coastguard Worker     case 5:
699*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 4 * RGB_PIXELSIZE, rgb0_l, 4);
700*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 4 * RGB_PIXELSIZE, rgb1_l, 4);
701*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
702*dfc6aa5cSAndroid Build Coastguard Worker     case 4:
703*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 3 * RGB_PIXELSIZE, rgb0_l, 3);
704*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 3 * RGB_PIXELSIZE, rgb1_l, 3);
705*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
706*dfc6aa5cSAndroid Build Coastguard Worker     case 3:
707*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 2 * RGB_PIXELSIZE, rgb0_l, 2);
708*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 2 * RGB_PIXELSIZE, rgb1_l, 2);
709*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
710*dfc6aa5cSAndroid Build Coastguard Worker     case 2:
711*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0 + 1 * RGB_PIXELSIZE, rgb0_l, 1);
712*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1 + 1 * RGB_PIXELSIZE, rgb1_l, 1);
713*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
714*dfc6aa5cSAndroid Build Coastguard Worker     case 1:
715*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr0, rgb0_l, 0);
716*dfc6aa5cSAndroid Build Coastguard Worker       vst3_lane_u8(outptr1, rgb1_l, 0);
717*dfc6aa5cSAndroid Build Coastguard Worker       FALLTHROUGH               /*FALLTHROUGH*/
718*dfc6aa5cSAndroid Build Coastguard Worker     default:
719*dfc6aa5cSAndroid Build Coastguard Worker       break;
720*dfc6aa5cSAndroid Build Coastguard Worker     }
721*dfc6aa5cSAndroid Build Coastguard Worker #endif
722*dfc6aa5cSAndroid Build Coastguard Worker   }
723*dfc6aa5cSAndroid Build Coastguard Worker }
724