xref: /aosp_15_r20/external/libjpeg-turbo/simd/arm/jdcolext-neon.c (revision dfc6aa5c1cfd4bc4e2018dc74aa96e29ee49c6da)
1*dfc6aa5cSAndroid Build Coastguard Worker /*
2*dfc6aa5cSAndroid Build Coastguard Worker  * jdcolext-neon.c - colorspace conversion (Arm Neon)
3*dfc6aa5cSAndroid Build Coastguard Worker  *
4*dfc6aa5cSAndroid Build Coastguard Worker  * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
5*dfc6aa5cSAndroid Build Coastguard Worker  * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
6*dfc6aa5cSAndroid Build Coastguard Worker  *
7*dfc6aa5cSAndroid Build Coastguard Worker  * This software is provided 'as-is', without any express or implied
8*dfc6aa5cSAndroid Build Coastguard Worker  * warranty.  In no event will the authors be held liable for any damages
9*dfc6aa5cSAndroid Build Coastguard Worker  * arising from the use of this software.
10*dfc6aa5cSAndroid Build Coastguard Worker  *
11*dfc6aa5cSAndroid Build Coastguard Worker  * Permission is granted to anyone to use this software for any purpose,
12*dfc6aa5cSAndroid Build Coastguard Worker  * including commercial applications, and to alter it and redistribute it
13*dfc6aa5cSAndroid Build Coastguard Worker  * freely, subject to the following restrictions:
14*dfc6aa5cSAndroid Build Coastguard Worker  *
15*dfc6aa5cSAndroid Build Coastguard Worker  * 1. The origin of this software must not be misrepresented; you must not
16*dfc6aa5cSAndroid Build Coastguard Worker  *    claim that you wrote the original software. If you use this software
17*dfc6aa5cSAndroid Build Coastguard Worker  *    in a product, an acknowledgment in the product documentation would be
18*dfc6aa5cSAndroid Build Coastguard Worker  *    appreciated but is not required.
19*dfc6aa5cSAndroid Build Coastguard Worker  * 2. Altered source versions must be plainly marked as such, and must not be
20*dfc6aa5cSAndroid Build Coastguard Worker  *    misrepresented as being the original software.
21*dfc6aa5cSAndroid Build Coastguard Worker  * 3. This notice may not be removed or altered from any source distribution.
22*dfc6aa5cSAndroid Build Coastguard Worker  */
23*dfc6aa5cSAndroid Build Coastguard Worker 
24*dfc6aa5cSAndroid Build Coastguard Worker /* This file is included by jdcolor-neon.c. */
25*dfc6aa5cSAndroid Build Coastguard Worker 
26*dfc6aa5cSAndroid Build Coastguard Worker 
27*dfc6aa5cSAndroid Build Coastguard Worker /* YCbCr -> RGB conversion is defined by the following equations:
28*dfc6aa5cSAndroid Build Coastguard Worker  *    R = Y                        + 1.40200 * (Cr - 128)
29*dfc6aa5cSAndroid Build Coastguard Worker  *    G = Y - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128)
30*dfc6aa5cSAndroid Build Coastguard Worker  *    B = Y + 1.77200 * (Cb - 128)
31*dfc6aa5cSAndroid Build Coastguard Worker  *
32*dfc6aa5cSAndroid Build Coastguard Worker  * Scaled integer constants are used to avoid floating-point arithmetic:
33*dfc6aa5cSAndroid Build Coastguard Worker  *    0.3441467 = 11277 * 2^-15
34*dfc6aa5cSAndroid Build Coastguard Worker  *    0.7141418 = 23401 * 2^-15
35*dfc6aa5cSAndroid Build Coastguard Worker  *    1.4020386 = 22971 * 2^-14
36*dfc6aa5cSAndroid Build Coastguard Worker  *    1.7720337 = 29033 * 2^-14
37*dfc6aa5cSAndroid Build Coastguard Worker  * These constants are defined in jdcolor-neon.c.
38*dfc6aa5cSAndroid Build Coastguard Worker  *
39*dfc6aa5cSAndroid Build Coastguard Worker  * To ensure correct results, rounding is used when descaling.
40*dfc6aa5cSAndroid Build Coastguard Worker  */
41*dfc6aa5cSAndroid Build Coastguard Worker 
42*dfc6aa5cSAndroid Build Coastguard Worker /* Notes on safe memory access for YCbCr -> RGB conversion routines:
43*dfc6aa5cSAndroid Build Coastguard Worker  *
44*dfc6aa5cSAndroid Build Coastguard Worker  * Input memory buffers can be safely overread up to the next multiple of
45*dfc6aa5cSAndroid Build Coastguard Worker  * ALIGN_SIZE bytes, since they are always allocated by alloc_sarray() in
46*dfc6aa5cSAndroid Build Coastguard Worker  * jmemmgr.c.
47*dfc6aa5cSAndroid Build Coastguard Worker  *
48*dfc6aa5cSAndroid Build Coastguard Worker  * The output buffer cannot safely be written beyond output_width, since
49*dfc6aa5cSAndroid Build Coastguard Worker  * output_buf points to a possibly unpadded row in the decompressed image
50*dfc6aa5cSAndroid Build Coastguard Worker  * buffer allocated by the calling program.
51*dfc6aa5cSAndroid Build Coastguard Worker  */
52*dfc6aa5cSAndroid Build Coastguard Worker 
jsimd_ycc_rgb_convert_neon(JDIMENSION output_width,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)53*dfc6aa5cSAndroid Build Coastguard Worker void jsimd_ycc_rgb_convert_neon(JDIMENSION output_width, JSAMPIMAGE input_buf,
54*dfc6aa5cSAndroid Build Coastguard Worker                                 JDIMENSION input_row, JSAMPARRAY output_buf,
55*dfc6aa5cSAndroid Build Coastguard Worker                                 int num_rows)
56*dfc6aa5cSAndroid Build Coastguard Worker {
57*dfc6aa5cSAndroid Build Coastguard Worker   JSAMPROW outptr;
58*dfc6aa5cSAndroid Build Coastguard Worker   /* Pointers to Y, Cb, and Cr data */
59*dfc6aa5cSAndroid Build Coastguard Worker   JSAMPROW inptr0, inptr1, inptr2;
60*dfc6aa5cSAndroid Build Coastguard Worker 
61*dfc6aa5cSAndroid Build Coastguard Worker   const int16x4_t consts = vld1_s16(jsimd_ycc_rgb_convert_neon_consts);
62*dfc6aa5cSAndroid Build Coastguard Worker   const int16x8_t neg_128 = vdupq_n_s16(-128);
63*dfc6aa5cSAndroid Build Coastguard Worker 
64*dfc6aa5cSAndroid Build Coastguard Worker   while (--num_rows >= 0) {
65*dfc6aa5cSAndroid Build Coastguard Worker     inptr0 = input_buf[0][input_row];
66*dfc6aa5cSAndroid Build Coastguard Worker     inptr1 = input_buf[1][input_row];
67*dfc6aa5cSAndroid Build Coastguard Worker     inptr2 = input_buf[2][input_row];
68*dfc6aa5cSAndroid Build Coastguard Worker     input_row++;
69*dfc6aa5cSAndroid Build Coastguard Worker     outptr = *output_buf++;
70*dfc6aa5cSAndroid Build Coastguard Worker     int cols_remaining = output_width;
71*dfc6aa5cSAndroid Build Coastguard Worker     for (; cols_remaining >= 16; cols_remaining -= 16) {
72*dfc6aa5cSAndroid Build Coastguard Worker       uint8x16_t y  = vld1q_u8(inptr0);
73*dfc6aa5cSAndroid Build Coastguard Worker       uint8x16_t cb = vld1q_u8(inptr1);
74*dfc6aa5cSAndroid Build Coastguard Worker       uint8x16_t cr = vld1q_u8(inptr2);
75*dfc6aa5cSAndroid Build Coastguard Worker       /* Subtract 128 from Cb and Cr. */
76*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t cr_128_l =
77*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128),
78*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_low_u8(cr)));
79*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t cr_128_h =
80*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128),
81*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_high_u8(cr)));
82*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t cb_128_l =
83*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128),
84*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_low_u8(cb)));
85*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t cb_128_h =
86*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128),
87*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_high_u8(cb)));
88*dfc6aa5cSAndroid Build Coastguard Worker       /* Compute G-Y: - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) */
89*dfc6aa5cSAndroid Build Coastguard Worker       int32x4_t g_sub_y_ll = vmull_lane_s16(vget_low_s16(cb_128_l), consts, 0);
90*dfc6aa5cSAndroid Build Coastguard Worker       int32x4_t g_sub_y_lh = vmull_lane_s16(vget_high_s16(cb_128_l),
91*dfc6aa5cSAndroid Build Coastguard Worker                                             consts, 0);
92*dfc6aa5cSAndroid Build Coastguard Worker       int32x4_t g_sub_y_hl = vmull_lane_s16(vget_low_s16(cb_128_h), consts, 0);
93*dfc6aa5cSAndroid Build Coastguard Worker       int32x4_t g_sub_y_hh = vmull_lane_s16(vget_high_s16(cb_128_h),
94*dfc6aa5cSAndroid Build Coastguard Worker                                             consts, 0);
95*dfc6aa5cSAndroid Build Coastguard Worker       g_sub_y_ll = vmlsl_lane_s16(g_sub_y_ll, vget_low_s16(cr_128_l),
96*dfc6aa5cSAndroid Build Coastguard Worker                                   consts, 1);
97*dfc6aa5cSAndroid Build Coastguard Worker       g_sub_y_lh = vmlsl_lane_s16(g_sub_y_lh, vget_high_s16(cr_128_l),
98*dfc6aa5cSAndroid Build Coastguard Worker                                   consts, 1);
99*dfc6aa5cSAndroid Build Coastguard Worker       g_sub_y_hl = vmlsl_lane_s16(g_sub_y_hl, vget_low_s16(cr_128_h),
100*dfc6aa5cSAndroid Build Coastguard Worker                                   consts, 1);
101*dfc6aa5cSAndroid Build Coastguard Worker       g_sub_y_hh = vmlsl_lane_s16(g_sub_y_hh, vget_high_s16(cr_128_h),
102*dfc6aa5cSAndroid Build Coastguard Worker                                   consts, 1);
103*dfc6aa5cSAndroid Build Coastguard Worker       /* Descale G components: shift right 15, round, and narrow to 16-bit. */
104*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t g_sub_y_l = vcombine_s16(vrshrn_n_s32(g_sub_y_ll, 15),
105*dfc6aa5cSAndroid Build Coastguard Worker                                          vrshrn_n_s32(g_sub_y_lh, 15));
106*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t g_sub_y_h = vcombine_s16(vrshrn_n_s32(g_sub_y_hl, 15),
107*dfc6aa5cSAndroid Build Coastguard Worker                                          vrshrn_n_s32(g_sub_y_hh, 15));
108*dfc6aa5cSAndroid Build Coastguard Worker       /* Compute R-Y: 1.40200 * (Cr - 128) */
109*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t r_sub_y_l = vqrdmulhq_lane_s16(vshlq_n_s16(cr_128_l, 1),
110*dfc6aa5cSAndroid Build Coastguard Worker                                                consts, 2);
111*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t r_sub_y_h = vqrdmulhq_lane_s16(vshlq_n_s16(cr_128_h, 1),
112*dfc6aa5cSAndroid Build Coastguard Worker                                                consts, 2);
113*dfc6aa5cSAndroid Build Coastguard Worker       /* Compute B-Y: 1.77200 * (Cb - 128) */
114*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t b_sub_y_l = vqrdmulhq_lane_s16(vshlq_n_s16(cb_128_l, 1),
115*dfc6aa5cSAndroid Build Coastguard Worker                                                consts, 3);
116*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t b_sub_y_h = vqrdmulhq_lane_s16(vshlq_n_s16(cb_128_h, 1),
117*dfc6aa5cSAndroid Build Coastguard Worker                                                consts, 3);
118*dfc6aa5cSAndroid Build Coastguard Worker       /* Add Y. */
119*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t r_l =
120*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y_l),
121*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_low_u8(y)));
122*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t r_h =
123*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y_h),
124*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_high_u8(y)));
125*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t b_l =
126*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y_l),
127*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_low_u8(y)));
128*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t b_h =
129*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y_h),
130*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_high_u8(y)));
131*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t g_l =
132*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y_l),
133*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_low_u8(y)));
134*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t g_h =
135*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y_h),
136*dfc6aa5cSAndroid Build Coastguard Worker                                        vget_high_u8(y)));
137*dfc6aa5cSAndroid Build Coastguard Worker 
138*dfc6aa5cSAndroid Build Coastguard Worker #if RGB_PIXELSIZE == 4
139*dfc6aa5cSAndroid Build Coastguard Worker       uint8x16x4_t rgba;
140*dfc6aa5cSAndroid Build Coastguard Worker       /* Convert each component to unsigned and narrow, clamping to [0-255]. */
141*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_RED] = vcombine_u8(vqmovun_s16(r_l), vqmovun_s16(r_h));
142*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_GREEN] = vcombine_u8(vqmovun_s16(g_l), vqmovun_s16(g_h));
143*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_BLUE] = vcombine_u8(vqmovun_s16(b_l), vqmovun_s16(b_h));
144*dfc6aa5cSAndroid Build Coastguard Worker       /* Set alpha channel to opaque (0xFF). */
145*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_ALPHA] = vdupq_n_u8(0xFF);
146*dfc6aa5cSAndroid Build Coastguard Worker       /* Store RGBA pixel data to memory. */
147*dfc6aa5cSAndroid Build Coastguard Worker       vst4q_u8(outptr, rgba);
148*dfc6aa5cSAndroid Build Coastguard Worker #elif RGB_PIXELSIZE == 3
149*dfc6aa5cSAndroid Build Coastguard Worker       uint8x16x3_t rgb;
150*dfc6aa5cSAndroid Build Coastguard Worker       /* Convert each component to unsigned and narrow, clamping to [0-255]. */
151*dfc6aa5cSAndroid Build Coastguard Worker       rgb.val[RGB_RED] = vcombine_u8(vqmovun_s16(r_l), vqmovun_s16(r_h));
152*dfc6aa5cSAndroid Build Coastguard Worker       rgb.val[RGB_GREEN] = vcombine_u8(vqmovun_s16(g_l), vqmovun_s16(g_h));
153*dfc6aa5cSAndroid Build Coastguard Worker       rgb.val[RGB_BLUE] = vcombine_u8(vqmovun_s16(b_l), vqmovun_s16(b_h));
154*dfc6aa5cSAndroid Build Coastguard Worker       /* Store RGB pixel data to memory. */
155*dfc6aa5cSAndroid Build Coastguard Worker       vst3q_u8(outptr, rgb);
156*dfc6aa5cSAndroid Build Coastguard Worker #else
157*dfc6aa5cSAndroid Build Coastguard Worker       /* Pack R, G, and B values in ratio 5:6:5. */
158*dfc6aa5cSAndroid Build Coastguard Worker       uint16x8_t rgb565_l = vqshluq_n_s16(r_l, 8);
159*dfc6aa5cSAndroid Build Coastguard Worker       rgb565_l = vsriq_n_u16(rgb565_l, vqshluq_n_s16(g_l, 8), 5);
160*dfc6aa5cSAndroid Build Coastguard Worker       rgb565_l = vsriq_n_u16(rgb565_l, vqshluq_n_s16(b_l, 8), 11);
161*dfc6aa5cSAndroid Build Coastguard Worker       uint16x8_t rgb565_h = vqshluq_n_s16(r_h, 8);
162*dfc6aa5cSAndroid Build Coastguard Worker       rgb565_h = vsriq_n_u16(rgb565_h, vqshluq_n_s16(g_h, 8), 5);
163*dfc6aa5cSAndroid Build Coastguard Worker       rgb565_h = vsriq_n_u16(rgb565_h, vqshluq_n_s16(b_h, 8), 11);
164*dfc6aa5cSAndroid Build Coastguard Worker       /* Store RGB pixel data to memory. */
165*dfc6aa5cSAndroid Build Coastguard Worker       vst1q_u16((uint16_t *)outptr, rgb565_l);
166*dfc6aa5cSAndroid Build Coastguard Worker       vst1q_u16(((uint16_t *)outptr) + 8, rgb565_h);
167*dfc6aa5cSAndroid Build Coastguard Worker #endif
168*dfc6aa5cSAndroid Build Coastguard Worker 
169*dfc6aa5cSAndroid Build Coastguard Worker       /* Increment pointers. */
170*dfc6aa5cSAndroid Build Coastguard Worker       inptr0 += 16;
171*dfc6aa5cSAndroid Build Coastguard Worker       inptr1 += 16;
172*dfc6aa5cSAndroid Build Coastguard Worker       inptr2 += 16;
173*dfc6aa5cSAndroid Build Coastguard Worker       outptr += (RGB_PIXELSIZE * 16);
174*dfc6aa5cSAndroid Build Coastguard Worker     }
175*dfc6aa5cSAndroid Build Coastguard Worker 
176*dfc6aa5cSAndroid Build Coastguard Worker     if (cols_remaining >= 8) {
177*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8_t y  = vld1_u8(inptr0);
178*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8_t cb = vld1_u8(inptr1);
179*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8_t cr = vld1_u8(inptr2);
180*dfc6aa5cSAndroid Build Coastguard Worker       /* Subtract 128 from Cb and Cr. */
181*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t cr_128 =
182*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cr));
183*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t cb_128 =
184*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cb));
185*dfc6aa5cSAndroid Build Coastguard Worker       /* Compute G-Y: - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) */
186*dfc6aa5cSAndroid Build Coastguard Worker       int32x4_t g_sub_y_l = vmull_lane_s16(vget_low_s16(cb_128), consts, 0);
187*dfc6aa5cSAndroid Build Coastguard Worker       int32x4_t g_sub_y_h = vmull_lane_s16(vget_high_s16(cb_128), consts, 0);
188*dfc6aa5cSAndroid Build Coastguard Worker       g_sub_y_l = vmlsl_lane_s16(g_sub_y_l, vget_low_s16(cr_128), consts, 1);
189*dfc6aa5cSAndroid Build Coastguard Worker       g_sub_y_h = vmlsl_lane_s16(g_sub_y_h, vget_high_s16(cr_128), consts, 1);
190*dfc6aa5cSAndroid Build Coastguard Worker       /* Descale G components: shift right 15, round, and narrow to 16-bit. */
191*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t g_sub_y = vcombine_s16(vrshrn_n_s32(g_sub_y_l, 15),
192*dfc6aa5cSAndroid Build Coastguard Worker                                        vrshrn_n_s32(g_sub_y_h, 15));
193*dfc6aa5cSAndroid Build Coastguard Worker       /* Compute R-Y: 1.40200 * (Cr - 128) */
194*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t r_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cr_128, 1),
195*dfc6aa5cSAndroid Build Coastguard Worker                                              consts, 2);
196*dfc6aa5cSAndroid Build Coastguard Worker       /* Compute B-Y: 1.77200 * (Cb - 128) */
197*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t b_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cb_128, 1),
198*dfc6aa5cSAndroid Build Coastguard Worker                                              consts, 3);
199*dfc6aa5cSAndroid Build Coastguard Worker       /* Add Y. */
200*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t r =
201*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y), y));
202*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t b =
203*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y), y));
204*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t g =
205*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y), y));
206*dfc6aa5cSAndroid Build Coastguard Worker 
207*dfc6aa5cSAndroid Build Coastguard Worker #if RGB_PIXELSIZE == 4
208*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8x4_t rgba;
209*dfc6aa5cSAndroid Build Coastguard Worker       /* Convert each component to unsigned and narrow, clamping to [0-255]. */
210*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_RED] = vqmovun_s16(r);
211*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_GREEN] = vqmovun_s16(g);
212*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_BLUE] = vqmovun_s16(b);
213*dfc6aa5cSAndroid Build Coastguard Worker       /* Set alpha channel to opaque (0xFF). */
214*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_ALPHA] = vdup_n_u8(0xFF);
215*dfc6aa5cSAndroid Build Coastguard Worker       /* Store RGBA pixel data to memory. */
216*dfc6aa5cSAndroid Build Coastguard Worker       vst4_u8(outptr, rgba);
217*dfc6aa5cSAndroid Build Coastguard Worker #elif RGB_PIXELSIZE == 3
218*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8x3_t rgb;
219*dfc6aa5cSAndroid Build Coastguard Worker       /* Convert each component to unsigned and narrow, clamping to [0-255]. */
220*dfc6aa5cSAndroid Build Coastguard Worker       rgb.val[RGB_RED] = vqmovun_s16(r);
221*dfc6aa5cSAndroid Build Coastguard Worker       rgb.val[RGB_GREEN] = vqmovun_s16(g);
222*dfc6aa5cSAndroid Build Coastguard Worker       rgb.val[RGB_BLUE] = vqmovun_s16(b);
223*dfc6aa5cSAndroid Build Coastguard Worker       /* Store RGB pixel data to memory. */
224*dfc6aa5cSAndroid Build Coastguard Worker       vst3_u8(outptr, rgb);
225*dfc6aa5cSAndroid Build Coastguard Worker #else
226*dfc6aa5cSAndroid Build Coastguard Worker       /* Pack R, G, and B values in ratio 5:6:5. */
227*dfc6aa5cSAndroid Build Coastguard Worker       uint16x8_t rgb565 = vqshluq_n_s16(r, 8);
228*dfc6aa5cSAndroid Build Coastguard Worker       rgb565 = vsriq_n_u16(rgb565, vqshluq_n_s16(g, 8), 5);
229*dfc6aa5cSAndroid Build Coastguard Worker       rgb565 = vsriq_n_u16(rgb565, vqshluq_n_s16(b, 8), 11);
230*dfc6aa5cSAndroid Build Coastguard Worker       /* Store RGB pixel data to memory. */
231*dfc6aa5cSAndroid Build Coastguard Worker       vst1q_u16((uint16_t *)outptr, rgb565);
232*dfc6aa5cSAndroid Build Coastguard Worker #endif
233*dfc6aa5cSAndroid Build Coastguard Worker 
234*dfc6aa5cSAndroid Build Coastguard Worker       /* Increment pointers. */
235*dfc6aa5cSAndroid Build Coastguard Worker       inptr0 += 8;
236*dfc6aa5cSAndroid Build Coastguard Worker       inptr1 += 8;
237*dfc6aa5cSAndroid Build Coastguard Worker       inptr2 += 8;
238*dfc6aa5cSAndroid Build Coastguard Worker       outptr += (RGB_PIXELSIZE * 8);
239*dfc6aa5cSAndroid Build Coastguard Worker       cols_remaining -= 8;
240*dfc6aa5cSAndroid Build Coastguard Worker     }
241*dfc6aa5cSAndroid Build Coastguard Worker 
242*dfc6aa5cSAndroid Build Coastguard Worker     /* Handle the tail elements. */
243*dfc6aa5cSAndroid Build Coastguard Worker     if (cols_remaining > 0) {
244*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8_t y  = vld1_u8(inptr0);
245*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8_t cb = vld1_u8(inptr1);
246*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8_t cr = vld1_u8(inptr2);
247*dfc6aa5cSAndroid Build Coastguard Worker       /* Subtract 128 from Cb and Cr. */
248*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t cr_128 =
249*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cr));
250*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t cb_128 =
251*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(neg_128), cb));
252*dfc6aa5cSAndroid Build Coastguard Worker       /* Compute G-Y: - 0.34414 * (Cb - 128) - 0.71414 * (Cr - 128) */
253*dfc6aa5cSAndroid Build Coastguard Worker       int32x4_t g_sub_y_l = vmull_lane_s16(vget_low_s16(cb_128), consts, 0);
254*dfc6aa5cSAndroid Build Coastguard Worker       int32x4_t g_sub_y_h = vmull_lane_s16(vget_high_s16(cb_128), consts, 0);
255*dfc6aa5cSAndroid Build Coastguard Worker       g_sub_y_l = vmlsl_lane_s16(g_sub_y_l, vget_low_s16(cr_128), consts, 1);
256*dfc6aa5cSAndroid Build Coastguard Worker       g_sub_y_h = vmlsl_lane_s16(g_sub_y_h, vget_high_s16(cr_128), consts, 1);
257*dfc6aa5cSAndroid Build Coastguard Worker       /* Descale G components: shift right 15, round, and narrow to 16-bit. */
258*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t g_sub_y = vcombine_s16(vrshrn_n_s32(g_sub_y_l, 15),
259*dfc6aa5cSAndroid Build Coastguard Worker                                        vrshrn_n_s32(g_sub_y_h, 15));
260*dfc6aa5cSAndroid Build Coastguard Worker       /* Compute R-Y: 1.40200 * (Cr - 128) */
261*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t r_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cr_128, 1),
262*dfc6aa5cSAndroid Build Coastguard Worker                                              consts, 2);
263*dfc6aa5cSAndroid Build Coastguard Worker       /* Compute B-Y: 1.77200 * (Cb - 128) */
264*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t b_sub_y = vqrdmulhq_lane_s16(vshlq_n_s16(cb_128, 1),
265*dfc6aa5cSAndroid Build Coastguard Worker                                              consts, 3);
266*dfc6aa5cSAndroid Build Coastguard Worker       /* Add Y. */
267*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t r =
268*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(r_sub_y), y));
269*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t b =
270*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(b_sub_y), y));
271*dfc6aa5cSAndroid Build Coastguard Worker       int16x8_t g =
272*dfc6aa5cSAndroid Build Coastguard Worker         vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(g_sub_y), y));
273*dfc6aa5cSAndroid Build Coastguard Worker 
274*dfc6aa5cSAndroid Build Coastguard Worker #if RGB_PIXELSIZE == 4
275*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8x4_t rgba;
276*dfc6aa5cSAndroid Build Coastguard Worker       /* Convert each component to unsigned and narrow, clamping to [0-255]. */
277*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_RED] = vqmovun_s16(r);
278*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_GREEN] = vqmovun_s16(g);
279*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_BLUE] = vqmovun_s16(b);
280*dfc6aa5cSAndroid Build Coastguard Worker       /* Set alpha channel to opaque (0xFF). */
281*dfc6aa5cSAndroid Build Coastguard Worker       rgba.val[RGB_ALPHA] = vdup_n_u8(0xFF);
282*dfc6aa5cSAndroid Build Coastguard Worker       /* Store RGBA pixel data to memory. */
283*dfc6aa5cSAndroid Build Coastguard Worker       switch (cols_remaining) {
284*dfc6aa5cSAndroid Build Coastguard Worker       case 7:
285*dfc6aa5cSAndroid Build Coastguard Worker         vst4_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgba, 6);
286*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
287*dfc6aa5cSAndroid Build Coastguard Worker       case 6:
288*dfc6aa5cSAndroid Build Coastguard Worker         vst4_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgba, 5);
289*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
290*dfc6aa5cSAndroid Build Coastguard Worker       case 5:
291*dfc6aa5cSAndroid Build Coastguard Worker         vst4_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgba, 4);
292*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
293*dfc6aa5cSAndroid Build Coastguard Worker       case 4:
294*dfc6aa5cSAndroid Build Coastguard Worker         vst4_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgba, 3);
295*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
296*dfc6aa5cSAndroid Build Coastguard Worker       case 3:
297*dfc6aa5cSAndroid Build Coastguard Worker         vst4_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgba, 2);
298*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
299*dfc6aa5cSAndroid Build Coastguard Worker       case 2:
300*dfc6aa5cSAndroid Build Coastguard Worker         vst4_lane_u8(outptr + RGB_PIXELSIZE, rgba, 1);
301*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
302*dfc6aa5cSAndroid Build Coastguard Worker       case 1:
303*dfc6aa5cSAndroid Build Coastguard Worker         vst4_lane_u8(outptr, rgba, 0);
304*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
305*dfc6aa5cSAndroid Build Coastguard Worker       default:
306*dfc6aa5cSAndroid Build Coastguard Worker         break;
307*dfc6aa5cSAndroid Build Coastguard Worker       }
308*dfc6aa5cSAndroid Build Coastguard Worker #elif RGB_PIXELSIZE == 3
309*dfc6aa5cSAndroid Build Coastguard Worker       uint8x8x3_t rgb;
310*dfc6aa5cSAndroid Build Coastguard Worker       /* Convert each component to unsigned and narrow, clamping to [0-255]. */
311*dfc6aa5cSAndroid Build Coastguard Worker       rgb.val[RGB_RED] = vqmovun_s16(r);
312*dfc6aa5cSAndroid Build Coastguard Worker       rgb.val[RGB_GREEN] = vqmovun_s16(g);
313*dfc6aa5cSAndroid Build Coastguard Worker       rgb.val[RGB_BLUE] = vqmovun_s16(b);
314*dfc6aa5cSAndroid Build Coastguard Worker       /* Store RGB pixel data to memory. */
315*dfc6aa5cSAndroid Build Coastguard Worker       switch (cols_remaining) {
316*dfc6aa5cSAndroid Build Coastguard Worker       case 7:
317*dfc6aa5cSAndroid Build Coastguard Worker         vst3_lane_u8(outptr + 6 * RGB_PIXELSIZE, rgb, 6);
318*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
319*dfc6aa5cSAndroid Build Coastguard Worker       case 6:
320*dfc6aa5cSAndroid Build Coastguard Worker         vst3_lane_u8(outptr + 5 * RGB_PIXELSIZE, rgb, 5);
321*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
322*dfc6aa5cSAndroid Build Coastguard Worker       case 5:
323*dfc6aa5cSAndroid Build Coastguard Worker         vst3_lane_u8(outptr + 4 * RGB_PIXELSIZE, rgb, 4);
324*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
325*dfc6aa5cSAndroid Build Coastguard Worker       case 4:
326*dfc6aa5cSAndroid Build Coastguard Worker         vst3_lane_u8(outptr + 3 * RGB_PIXELSIZE, rgb, 3);
327*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
328*dfc6aa5cSAndroid Build Coastguard Worker       case 3:
329*dfc6aa5cSAndroid Build Coastguard Worker         vst3_lane_u8(outptr + 2 * RGB_PIXELSIZE, rgb, 2);
330*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
331*dfc6aa5cSAndroid Build Coastguard Worker       case 2:
332*dfc6aa5cSAndroid Build Coastguard Worker         vst3_lane_u8(outptr + RGB_PIXELSIZE, rgb, 1);
333*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
334*dfc6aa5cSAndroid Build Coastguard Worker       case 1:
335*dfc6aa5cSAndroid Build Coastguard Worker         vst3_lane_u8(outptr, rgb, 0);
336*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
337*dfc6aa5cSAndroid Build Coastguard Worker       default:
338*dfc6aa5cSAndroid Build Coastguard Worker         break;
339*dfc6aa5cSAndroid Build Coastguard Worker       }
340*dfc6aa5cSAndroid Build Coastguard Worker #else
341*dfc6aa5cSAndroid Build Coastguard Worker       /* Pack R, G, and B values in ratio 5:6:5. */
342*dfc6aa5cSAndroid Build Coastguard Worker       uint16x8_t rgb565 = vqshluq_n_s16(r, 8);
343*dfc6aa5cSAndroid Build Coastguard Worker       rgb565 = vsriq_n_u16(rgb565, vqshluq_n_s16(g, 8), 5);
344*dfc6aa5cSAndroid Build Coastguard Worker       rgb565 = vsriq_n_u16(rgb565, vqshluq_n_s16(b, 8), 11);
345*dfc6aa5cSAndroid Build Coastguard Worker       /* Store RGB565 pixel data to memory. */
346*dfc6aa5cSAndroid Build Coastguard Worker       switch (cols_remaining) {
347*dfc6aa5cSAndroid Build Coastguard Worker       case 7:
348*dfc6aa5cSAndroid Build Coastguard Worker         vst1q_lane_u16((uint16_t *)(outptr + 6 * RGB_PIXELSIZE), rgb565, 6);
349*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
350*dfc6aa5cSAndroid Build Coastguard Worker       case 6:
351*dfc6aa5cSAndroid Build Coastguard Worker         vst1q_lane_u16((uint16_t *)(outptr + 5 * RGB_PIXELSIZE), rgb565, 5);
352*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
353*dfc6aa5cSAndroid Build Coastguard Worker       case 5:
354*dfc6aa5cSAndroid Build Coastguard Worker         vst1q_lane_u16((uint16_t *)(outptr + 4 * RGB_PIXELSIZE), rgb565, 4);
355*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
356*dfc6aa5cSAndroid Build Coastguard Worker       case 4:
357*dfc6aa5cSAndroid Build Coastguard Worker         vst1q_lane_u16((uint16_t *)(outptr + 3 * RGB_PIXELSIZE), rgb565, 3);
358*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
359*dfc6aa5cSAndroid Build Coastguard Worker       case 3:
360*dfc6aa5cSAndroid Build Coastguard Worker         vst1q_lane_u16((uint16_t *)(outptr + 2 * RGB_PIXELSIZE), rgb565, 2);
361*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
362*dfc6aa5cSAndroid Build Coastguard Worker       case 2:
363*dfc6aa5cSAndroid Build Coastguard Worker         vst1q_lane_u16((uint16_t *)(outptr + RGB_PIXELSIZE), rgb565, 1);
364*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
365*dfc6aa5cSAndroid Build Coastguard Worker       case 1:
366*dfc6aa5cSAndroid Build Coastguard Worker         vst1q_lane_u16((uint16_t *)outptr, rgb565, 0);
367*dfc6aa5cSAndroid Build Coastguard Worker         FALLTHROUGH             /*FALLTHROUGH*/
368*dfc6aa5cSAndroid Build Coastguard Worker       default:
369*dfc6aa5cSAndroid Build Coastguard Worker         break;
370*dfc6aa5cSAndroid Build Coastguard Worker       }
371*dfc6aa5cSAndroid Build Coastguard Worker #endif
372*dfc6aa5cSAndroid Build Coastguard Worker     }
373*dfc6aa5cSAndroid Build Coastguard Worker   }
374*dfc6aa5cSAndroid Build Coastguard Worker }
375