1 /*
2 * jsimd_x86_64.c
3 *
4 * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
5 * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
6 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
7 *
8 * Based on the x86 SIMD extension for IJG JPEG library,
9 * Copyright (C) 1999-2006, MIYASAKA Masaru.
10 * For conditions of distribution and use, see copyright notice in jsimdext.inc
11 *
12 * This file contains the interface between the "normal" portions
13 * of the library and the SIMD implementations when running on a
14 * 64-bit x86 architecture.
15 */
16
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
23 #include "../jsimd.h"
24
25 /*
26 * In the PIC cases, we have no guarantee that constants will keep
27 * their alignment. This macro allows us to verify it at runtime.
28 */
29 #define IS_ALIGNED(ptr, order) (((size_t)ptr & ((1 << order) - 1)) == 0)
30
31 #define IS_ALIGNED_SSE(ptr) (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32 #define IS_ALIGNED_AVX(ptr) (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
33
34 static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
35 static THREAD_LOCAL unsigned int simd_huffman = 1;
36
37 /*
38 * Check what SIMD accelerations are supported.
39 */
40 LOCAL(void)
init_simd(void)41 init_simd(void)
42 {
43 #ifndef NO_GETENV
44 char env[2] = { 0 };
45 #endif
46
47 if (simd_support != ~0U)
48 return;
49
50 simd_support = jpeg_simd_cpu_support();
51
52 #ifndef NO_GETENV
53 /* Force different settings through environment variables */
54 if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
55 simd_support &= JSIMD_SSE2;
56 if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
57 simd_support &= JSIMD_AVX2;
58 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
59 simd_support = 0;
60 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
61 simd_huffman = 0;
62 #endif
63 }
64
65 GLOBAL(int)
jsimd_can_rgb_ycc(void)66 jsimd_can_rgb_ycc(void)
67 {
68 init_simd();
69
70 /* The code is optimised for these values only */
71 if (BITS_IN_JSAMPLE != 8)
72 return 0;
73 if (sizeof(JDIMENSION) != 4)
74 return 0;
75 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
76 return 0;
77
78 if ((simd_support & JSIMD_AVX2) &&
79 IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
80 return 1;
81 if ((simd_support & JSIMD_SSE2) &&
82 IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
83 return 1;
84
85 return 0;
86 }
87
88 GLOBAL(int)
jsimd_can_rgb_gray(void)89 jsimd_can_rgb_gray(void)
90 {
91 init_simd();
92
93 /* The code is optimised for these values only */
94 if (BITS_IN_JSAMPLE != 8)
95 return 0;
96 if (sizeof(JDIMENSION) != 4)
97 return 0;
98 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
99 return 0;
100
101 if ((simd_support & JSIMD_AVX2) &&
102 IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
103 return 1;
104 if ((simd_support & JSIMD_SSE2) &&
105 IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
106 return 1;
107
108 return 0;
109 }
110
111 GLOBAL(int)
jsimd_can_ycc_rgb(void)112 jsimd_can_ycc_rgb(void)
113 {
114 init_simd();
115
116 /* The code is optimised for these values only */
117 if (BITS_IN_JSAMPLE != 8)
118 return 0;
119 if (sizeof(JDIMENSION) != 4)
120 return 0;
121 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
122 return 0;
123
124 if ((simd_support & JSIMD_AVX2) &&
125 IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
126 return 1;
127 if ((simd_support & JSIMD_SSE2) &&
128 IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
129 return 1;
130
131 return 0;
132 }
133
134 GLOBAL(int)
jsimd_can_ycc_rgb565(void)135 jsimd_can_ycc_rgb565(void)
136 {
137 return 0;
138 }
139
140 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)141 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
142 JSAMPIMAGE output_buf, JDIMENSION output_row,
143 int num_rows)
144 {
145 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
147
148 if (simd_support == ~0U)
149 init_simd();
150
151 switch (cinfo->in_color_space) {
152 case JCS_EXT_RGB:
153 avx2fct = jsimd_extrgb_ycc_convert_avx2;
154 sse2fct = jsimd_extrgb_ycc_convert_sse2;
155 break;
156 case JCS_EXT_RGBX:
157 case JCS_EXT_RGBA:
158 avx2fct = jsimd_extrgbx_ycc_convert_avx2;
159 sse2fct = jsimd_extrgbx_ycc_convert_sse2;
160 break;
161 case JCS_EXT_BGR:
162 avx2fct = jsimd_extbgr_ycc_convert_avx2;
163 sse2fct = jsimd_extbgr_ycc_convert_sse2;
164 break;
165 case JCS_EXT_BGRX:
166 case JCS_EXT_BGRA:
167 avx2fct = jsimd_extbgrx_ycc_convert_avx2;
168 sse2fct = jsimd_extbgrx_ycc_convert_sse2;
169 break;
170 case JCS_EXT_XBGR:
171 case JCS_EXT_ABGR:
172 avx2fct = jsimd_extxbgr_ycc_convert_avx2;
173 sse2fct = jsimd_extxbgr_ycc_convert_sse2;
174 break;
175 case JCS_EXT_XRGB:
176 case JCS_EXT_ARGB:
177 avx2fct = jsimd_extxrgb_ycc_convert_avx2;
178 sse2fct = jsimd_extxrgb_ycc_convert_sse2;
179 break;
180 default:
181 avx2fct = jsimd_rgb_ycc_convert_avx2;
182 sse2fct = jsimd_rgb_ycc_convert_sse2;
183 break;
184 }
185
186 if (simd_support & JSIMD_AVX2)
187 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
188 else
189 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
190 }
191
192 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)193 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
194 JSAMPIMAGE output_buf, JDIMENSION output_row,
195 int num_rows)
196 {
197 void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
198 void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
199
200 if (simd_support == ~0U)
201 init_simd();
202
203 switch (cinfo->in_color_space) {
204 case JCS_EXT_RGB:
205 avx2fct = jsimd_extrgb_gray_convert_avx2;
206 sse2fct = jsimd_extrgb_gray_convert_sse2;
207 break;
208 case JCS_EXT_RGBX:
209 case JCS_EXT_RGBA:
210 avx2fct = jsimd_extrgbx_gray_convert_avx2;
211 sse2fct = jsimd_extrgbx_gray_convert_sse2;
212 break;
213 case JCS_EXT_BGR:
214 avx2fct = jsimd_extbgr_gray_convert_avx2;
215 sse2fct = jsimd_extbgr_gray_convert_sse2;
216 break;
217 case JCS_EXT_BGRX:
218 case JCS_EXT_BGRA:
219 avx2fct = jsimd_extbgrx_gray_convert_avx2;
220 sse2fct = jsimd_extbgrx_gray_convert_sse2;
221 break;
222 case JCS_EXT_XBGR:
223 case JCS_EXT_ABGR:
224 avx2fct = jsimd_extxbgr_gray_convert_avx2;
225 sse2fct = jsimd_extxbgr_gray_convert_sse2;
226 break;
227 case JCS_EXT_XRGB:
228 case JCS_EXT_ARGB:
229 avx2fct = jsimd_extxrgb_gray_convert_avx2;
230 sse2fct = jsimd_extxrgb_gray_convert_sse2;
231 break;
232 default:
233 avx2fct = jsimd_rgb_gray_convert_avx2;
234 sse2fct = jsimd_rgb_gray_convert_sse2;
235 break;
236 }
237
238 if (simd_support & JSIMD_AVX2)
239 avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
240 else
241 sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
242 }
243
244 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)245 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
246 JDIMENSION input_row, JSAMPARRAY output_buf,
247 int num_rows)
248 {
249 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
250 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
251
252 if (simd_support == ~0U)
253 init_simd();
254
255 switch (cinfo->out_color_space) {
256 case JCS_EXT_RGB:
257 avx2fct = jsimd_ycc_extrgb_convert_avx2;
258 sse2fct = jsimd_ycc_extrgb_convert_sse2;
259 break;
260 case JCS_EXT_RGBX:
261 case JCS_EXT_RGBA:
262 avx2fct = jsimd_ycc_extrgbx_convert_avx2;
263 sse2fct = jsimd_ycc_extrgbx_convert_sse2;
264 break;
265 case JCS_EXT_BGR:
266 avx2fct = jsimd_ycc_extbgr_convert_avx2;
267 sse2fct = jsimd_ycc_extbgr_convert_sse2;
268 break;
269 case JCS_EXT_BGRX:
270 case JCS_EXT_BGRA:
271 avx2fct = jsimd_ycc_extbgrx_convert_avx2;
272 sse2fct = jsimd_ycc_extbgrx_convert_sse2;
273 break;
274 case JCS_EXT_XBGR:
275 case JCS_EXT_ABGR:
276 avx2fct = jsimd_ycc_extxbgr_convert_avx2;
277 sse2fct = jsimd_ycc_extxbgr_convert_sse2;
278 break;
279 case JCS_EXT_XRGB:
280 case JCS_EXT_ARGB:
281 avx2fct = jsimd_ycc_extxrgb_convert_avx2;
282 sse2fct = jsimd_ycc_extxrgb_convert_sse2;
283 break;
284 default:
285 avx2fct = jsimd_ycc_rgb_convert_avx2;
286 sse2fct = jsimd_ycc_rgb_convert_sse2;
287 break;
288 }
289
290 if (simd_support & JSIMD_AVX2)
291 avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292 else
293 sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
294 }
295
296 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)297 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
298 JDIMENSION input_row, JSAMPARRAY output_buf,
299 int num_rows)
300 {
301 }
302
303 GLOBAL(int)
jsimd_can_h2v2_downsample(void)304 jsimd_can_h2v2_downsample(void)
305 {
306 init_simd();
307
308 /* The code is optimised for these values only */
309 if (BITS_IN_JSAMPLE != 8)
310 return 0;
311 if (sizeof(JDIMENSION) != 4)
312 return 0;
313
314 if (simd_support & JSIMD_AVX2)
315 return 1;
316 if (simd_support & JSIMD_SSE2)
317 return 1;
318
319 return 0;
320 }
321
322 GLOBAL(int)
jsimd_can_h2v1_downsample(void)323 jsimd_can_h2v1_downsample(void)
324 {
325 init_simd();
326
327 /* The code is optimised for these values only */
328 if (BITS_IN_JSAMPLE != 8)
329 return 0;
330 if (sizeof(JDIMENSION) != 4)
331 return 0;
332
333 if (simd_support & JSIMD_AVX2)
334 return 1;
335 if (simd_support & JSIMD_SSE2)
336 return 1;
337
338 return 0;
339 }
340
341 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)342 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
343 JSAMPARRAY input_data, JSAMPARRAY output_data)
344 {
345 if (simd_support == ~0U)
346 init_simd();
347
348 if (simd_support & JSIMD_AVX2)
349 jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
350 compptr->v_samp_factor,
351 compptr->width_in_blocks, input_data,
352 output_data);
353 else
354 jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
355 compptr->v_samp_factor,
356 compptr->width_in_blocks, input_data,
357 output_data);
358 }
359
360 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)361 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
362 JSAMPARRAY input_data, JSAMPARRAY output_data)
363 {
364 if (simd_support == ~0U)
365 init_simd();
366
367 if (simd_support & JSIMD_AVX2)
368 jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
369 compptr->v_samp_factor,
370 compptr->width_in_blocks, input_data,
371 output_data);
372 else
373 jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
374 compptr->v_samp_factor,
375 compptr->width_in_blocks, input_data,
376 output_data);
377 }
378
379 GLOBAL(int)
jsimd_can_h2v2_upsample(void)380 jsimd_can_h2v2_upsample(void)
381 {
382 init_simd();
383
384 /* The code is optimised for these values only */
385 if (BITS_IN_JSAMPLE != 8)
386 return 0;
387 if (sizeof(JDIMENSION) != 4)
388 return 0;
389
390 if (simd_support & JSIMD_AVX2)
391 return 1;
392 if (simd_support & JSIMD_SSE2)
393 return 1;
394
395 return 0;
396 }
397
398 GLOBAL(int)
jsimd_can_h2v1_upsample(void)399 jsimd_can_h2v1_upsample(void)
400 {
401 init_simd();
402
403 /* The code is optimised for these values only */
404 if (BITS_IN_JSAMPLE != 8)
405 return 0;
406 if (sizeof(JDIMENSION) != 4)
407 return 0;
408
409 if (simd_support & JSIMD_AVX2)
410 return 1;
411 if (simd_support & JSIMD_SSE2)
412 return 1;
413
414 return 0;
415 }
416
417 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)418 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
419 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
420 {
421 if (simd_support == ~0U)
422 init_simd();
423
424 if (simd_support & JSIMD_AVX2)
425 jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
426 input_data, output_data_ptr);
427 else
428 jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
429 input_data, output_data_ptr);
430 }
431
432 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)433 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
434 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
435 {
436 if (simd_support == ~0U)
437 init_simd();
438
439 if (simd_support & JSIMD_AVX2)
440 jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
441 input_data, output_data_ptr);
442 else
443 jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
444 input_data, output_data_ptr);
445 }
446
447 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)448 jsimd_can_h2v2_fancy_upsample(void)
449 {
450 init_simd();
451
452 /* The code is optimised for these values only */
453 if (BITS_IN_JSAMPLE != 8)
454 return 0;
455 if (sizeof(JDIMENSION) != 4)
456 return 0;
457
458 if ((simd_support & JSIMD_AVX2) &&
459 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
460 return 1;
461 if ((simd_support & JSIMD_SSE2) &&
462 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
463 return 1;
464
465 return 0;
466 }
467
468 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)469 jsimd_can_h2v1_fancy_upsample(void)
470 {
471 init_simd();
472
473 /* The code is optimised for these values only */
474 if (BITS_IN_JSAMPLE != 8)
475 return 0;
476 if (sizeof(JDIMENSION) != 4)
477 return 0;
478
479 if ((simd_support & JSIMD_AVX2) &&
480 IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
481 return 1;
482 if ((simd_support & JSIMD_SSE2) &&
483 IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
484 return 1;
485
486 return 0;
487 }
488
489 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)490 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
491 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
492 {
493 if (simd_support == ~0U)
494 init_simd();
495
496 if (simd_support & JSIMD_AVX2)
497 jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
498 compptr->downsampled_width, input_data,
499 output_data_ptr);
500 else
501 jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
502 compptr->downsampled_width, input_data,
503 output_data_ptr);
504 }
505
506 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)507 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
508 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
509 {
510 if (simd_support == ~0U)
511 init_simd();
512
513 if (simd_support & JSIMD_AVX2)
514 jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
515 compptr->downsampled_width, input_data,
516 output_data_ptr);
517 else
518 jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
519 compptr->downsampled_width, input_data,
520 output_data_ptr);
521 }
522
523 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)524 jsimd_can_h2v2_merged_upsample(void)
525 {
526 init_simd();
527
528 /* The code is optimised for these values only */
529 if (BITS_IN_JSAMPLE != 8)
530 return 0;
531 if (sizeof(JDIMENSION) != 4)
532 return 0;
533
534 if ((simd_support & JSIMD_AVX2) &&
535 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
536 return 1;
537 if ((simd_support & JSIMD_SSE2) &&
538 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
539 return 1;
540
541 return 0;
542 }
543
544 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)545 jsimd_can_h2v1_merged_upsample(void)
546 {
547 init_simd();
548
549 /* The code is optimised for these values only */
550 if (BITS_IN_JSAMPLE != 8)
551 return 0;
552 if (sizeof(JDIMENSION) != 4)
553 return 0;
554
555 if ((simd_support & JSIMD_AVX2) &&
556 IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
557 return 1;
558 if ((simd_support & JSIMD_SSE2) &&
559 IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
560 return 1;
561
562 return 0;
563 }
564
565 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)566 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
567 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
568 {
569 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
570 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
571
572 if (simd_support == ~0U)
573 init_simd();
574
575 switch (cinfo->out_color_space) {
576 case JCS_EXT_RGB:
577 avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
578 sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
579 break;
580 case JCS_EXT_RGBX:
581 case JCS_EXT_RGBA:
582 avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
583 sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
584 break;
585 case JCS_EXT_BGR:
586 avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
587 sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
588 break;
589 case JCS_EXT_BGRX:
590 case JCS_EXT_BGRA:
591 avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
592 sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
593 break;
594 case JCS_EXT_XBGR:
595 case JCS_EXT_ABGR:
596 avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
597 sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
598 break;
599 case JCS_EXT_XRGB:
600 case JCS_EXT_ARGB:
601 avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
602 sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
603 break;
604 default:
605 avx2fct = jsimd_h2v2_merged_upsample_avx2;
606 sse2fct = jsimd_h2v2_merged_upsample_sse2;
607 break;
608 }
609
610 if (simd_support & JSIMD_AVX2)
611 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
612 else
613 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
614 }
615
616 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)617 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
618 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
619 {
620 void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
621 void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
622
623 if (simd_support == ~0U)
624 init_simd();
625
626 switch (cinfo->out_color_space) {
627 case JCS_EXT_RGB:
628 avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
629 sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
630 break;
631 case JCS_EXT_RGBX:
632 case JCS_EXT_RGBA:
633 avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
634 sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
635 break;
636 case JCS_EXT_BGR:
637 avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
638 sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
639 break;
640 case JCS_EXT_BGRX:
641 case JCS_EXT_BGRA:
642 avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
643 sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
644 break;
645 case JCS_EXT_XBGR:
646 case JCS_EXT_ABGR:
647 avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
648 sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
649 break;
650 case JCS_EXT_XRGB:
651 case JCS_EXT_ARGB:
652 avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
653 sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
654 break;
655 default:
656 avx2fct = jsimd_h2v1_merged_upsample_avx2;
657 sse2fct = jsimd_h2v1_merged_upsample_sse2;
658 break;
659 }
660
661 if (simd_support & JSIMD_AVX2)
662 avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
663 else
664 sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
665 }
666
667 GLOBAL(int)
jsimd_can_convsamp(void)668 jsimd_can_convsamp(void)
669 {
670 init_simd();
671
672 /* The code is optimised for these values only */
673 if (DCTSIZE != 8)
674 return 0;
675 if (BITS_IN_JSAMPLE != 8)
676 return 0;
677 if (sizeof(JDIMENSION) != 4)
678 return 0;
679 if (sizeof(DCTELEM) != 2)
680 return 0;
681
682 if (simd_support & JSIMD_AVX2)
683 return 1;
684 if (simd_support & JSIMD_SSE2)
685 return 1;
686
687 return 0;
688 }
689
690 GLOBAL(int)
jsimd_can_convsamp_float(void)691 jsimd_can_convsamp_float(void)
692 {
693 init_simd();
694
695 /* The code is optimised for these values only */
696 if (DCTSIZE != 8)
697 return 0;
698 if (BITS_IN_JSAMPLE != 8)
699 return 0;
700 if (sizeof(JDIMENSION) != 4)
701 return 0;
702 if (sizeof(FAST_FLOAT) != 4)
703 return 0;
704
705 if (simd_support & JSIMD_SSE2)
706 return 1;
707
708 return 0;
709 }
710
711 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)712 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
713 DCTELEM *workspace)
714 {
715 if (simd_support == ~0U)
716 init_simd();
717
718 if (simd_support & JSIMD_AVX2)
719 jsimd_convsamp_avx2(sample_data, start_col, workspace);
720 else
721 jsimd_convsamp_sse2(sample_data, start_col, workspace);
722 }
723
724 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)725 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
726 FAST_FLOAT *workspace)
727 {
728 jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
729 }
730
731 GLOBAL(int)
jsimd_can_fdct_islow(void)732 jsimd_can_fdct_islow(void)
733 {
734 init_simd();
735
736 /* The code is optimised for these values only */
737 if (DCTSIZE != 8)
738 return 0;
739 if (sizeof(DCTELEM) != 2)
740 return 0;
741
742 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
743 return 1;
744 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
745 return 1;
746
747 return 0;
748 }
749
750 GLOBAL(int)
jsimd_can_fdct_ifast(void)751 jsimd_can_fdct_ifast(void)
752 {
753 init_simd();
754
755 /* The code is optimised for these values only */
756 if (DCTSIZE != 8)
757 return 0;
758 if (sizeof(DCTELEM) != 2)
759 return 0;
760
761 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
762 return 1;
763
764 return 0;
765 }
766
767 GLOBAL(int)
jsimd_can_fdct_float(void)768 jsimd_can_fdct_float(void)
769 {
770 init_simd();
771
772 /* The code is optimised for these values only */
773 if (DCTSIZE != 8)
774 return 0;
775 if (sizeof(FAST_FLOAT) != 4)
776 return 0;
777
778 if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
779 return 1;
780
781 return 0;
782 }
783
784 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)785 jsimd_fdct_islow(DCTELEM *data)
786 {
787 if (simd_support == ~0U)
788 init_simd();
789
790 if (simd_support & JSIMD_AVX2)
791 jsimd_fdct_islow_avx2(data);
792 else
793 jsimd_fdct_islow_sse2(data);
794 }
795
796 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)797 jsimd_fdct_ifast(DCTELEM *data)
798 {
799 jsimd_fdct_ifast_sse2(data);
800 }
801
802 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)803 jsimd_fdct_float(FAST_FLOAT *data)
804 {
805 jsimd_fdct_float_sse(data);
806 }
807
808 GLOBAL(int)
jsimd_can_quantize(void)809 jsimd_can_quantize(void)
810 {
811 init_simd();
812
813 /* The code is optimised for these values only */
814 if (DCTSIZE != 8)
815 return 0;
816 if (sizeof(JCOEF) != 2)
817 return 0;
818 if (sizeof(DCTELEM) != 2)
819 return 0;
820
821 if (simd_support & JSIMD_AVX2)
822 return 1;
823 if (simd_support & JSIMD_SSE2)
824 return 1;
825
826 return 0;
827 }
828
829 GLOBAL(int)
jsimd_can_quantize_float(void)830 jsimd_can_quantize_float(void)
831 {
832 init_simd();
833
834 /* The code is optimised for these values only */
835 if (DCTSIZE != 8)
836 return 0;
837 if (sizeof(JCOEF) != 2)
838 return 0;
839 if (sizeof(FAST_FLOAT) != 4)
840 return 0;
841
842 if (simd_support & JSIMD_SSE2)
843 return 1;
844
845 return 0;
846 }
847
848 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)849 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
850 {
851 if (simd_support == ~0U)
852 init_simd();
853
854 if (simd_support & JSIMD_AVX2)
855 jsimd_quantize_avx2(coef_block, divisors, workspace);
856 else
857 jsimd_quantize_sse2(coef_block, divisors, workspace);
858 }
859
860 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)861 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
862 FAST_FLOAT *workspace)
863 {
864 jsimd_quantize_float_sse2(coef_block, divisors, workspace);
865 }
866
867 GLOBAL(int)
jsimd_can_idct_2x2(void)868 jsimd_can_idct_2x2(void)
869 {
870 init_simd();
871
872 /* The code is optimised for these values only */
873 if (DCTSIZE != 8)
874 return 0;
875 if (sizeof(JCOEF) != 2)
876 return 0;
877 if (BITS_IN_JSAMPLE != 8)
878 return 0;
879 if (sizeof(JDIMENSION) != 4)
880 return 0;
881 if (sizeof(ISLOW_MULT_TYPE) != 2)
882 return 0;
883
884 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
885 return 1;
886
887 return 0;
888 }
889
890 GLOBAL(int)
jsimd_can_idct_4x4(void)891 jsimd_can_idct_4x4(void)
892 {
893 init_simd();
894
895 /* The code is optimised for these values only */
896 if (DCTSIZE != 8)
897 return 0;
898 if (sizeof(JCOEF) != 2)
899 return 0;
900 if (BITS_IN_JSAMPLE != 8)
901 return 0;
902 if (sizeof(JDIMENSION) != 4)
903 return 0;
904 if (sizeof(ISLOW_MULT_TYPE) != 2)
905 return 0;
906
907 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
908 return 1;
909
910 return 0;
911 }
912
913 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)914 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
915 JCOEFPTR coef_block, JSAMPARRAY output_buf,
916 JDIMENSION output_col)
917 {
918 jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
919 }
920
921 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)922 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
923 JCOEFPTR coef_block, JSAMPARRAY output_buf,
924 JDIMENSION output_col)
925 {
926 jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
927 }
928
929 GLOBAL(int)
jsimd_can_idct_islow(void)930 jsimd_can_idct_islow(void)
931 {
932 init_simd();
933
934 /* The code is optimised for these values only */
935 if (DCTSIZE != 8)
936 return 0;
937 if (sizeof(JCOEF) != 2)
938 return 0;
939 if (BITS_IN_JSAMPLE != 8)
940 return 0;
941 if (sizeof(JDIMENSION) != 4)
942 return 0;
943 if (sizeof(ISLOW_MULT_TYPE) != 2)
944 return 0;
945
946 if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
947 return 1;
948 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
949 return 1;
950
951 return 0;
952 }
953
954 GLOBAL(int)
jsimd_can_idct_ifast(void)955 jsimd_can_idct_ifast(void)
956 {
957 init_simd();
958
959 /* The code is optimised for these values only */
960 if (DCTSIZE != 8)
961 return 0;
962 if (sizeof(JCOEF) != 2)
963 return 0;
964 if (BITS_IN_JSAMPLE != 8)
965 return 0;
966 if (sizeof(JDIMENSION) != 4)
967 return 0;
968 if (sizeof(IFAST_MULT_TYPE) != 2)
969 return 0;
970 if (IFAST_SCALE_BITS != 2)
971 return 0;
972
973 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
974 return 1;
975
976 return 0;
977 }
978
979 GLOBAL(int)
jsimd_can_idct_float(void)980 jsimd_can_idct_float(void)
981 {
982 init_simd();
983
984 if (DCTSIZE != 8)
985 return 0;
986 if (sizeof(JCOEF) != 2)
987 return 0;
988 if (BITS_IN_JSAMPLE != 8)
989 return 0;
990 if (sizeof(JDIMENSION) != 4)
991 return 0;
992 if (sizeof(FAST_FLOAT) != 4)
993 return 0;
994 if (sizeof(FLOAT_MULT_TYPE) != 4)
995 return 0;
996
997 if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
998 return 1;
999
1000 return 0;
1001 }
1002
1003 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1004 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1005 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1006 JDIMENSION output_col)
1007 {
1008 if (simd_support == ~0U)
1009 init_simd();
1010
1011 if (simd_support & JSIMD_AVX2)
1012 jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1013 output_col);
1014 else
1015 jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1016 output_col);
1017 }
1018
1019 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1020 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1021 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1022 JDIMENSION output_col)
1023 {
1024 jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1025 output_col);
1026 }
1027
1028 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1029 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1030 JCOEFPTR coef_block, JSAMPARRAY output_buf,
1031 JDIMENSION output_col)
1032 {
1033 jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1034 output_col);
1035 }
1036
1037 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)1038 jsimd_can_huff_encode_one_block(void)
1039 {
1040 init_simd();
1041
1042 if (DCTSIZE != 8)
1043 return 0;
1044 if (sizeof(JCOEF) != 2)
1045 return 0;
1046
1047 if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1048 IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1049 return 1;
1050
1051 return 0;
1052 }
1053
1054 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1055 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1056 int last_dc_val, c_derived_tbl *dctbl,
1057 c_derived_tbl *actbl)
1058 {
1059 return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1060 dctbl, actbl);
1061 }
1062
1063 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1064 jsimd_can_encode_mcu_AC_first_prepare(void)
1065 {
1066 init_simd();
1067
1068 if (DCTSIZE != 8)
1069 return 0;
1070 if (sizeof(JCOEF) != 2)
1071 return 0;
1072 if (simd_support & JSIMD_SSE2)
1073 return 1;
1074
1075 return 0;
1076 }
1077
1078 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * values,size_t * zerobits)1079 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1080 const int *jpeg_natural_order_start, int Sl,
1081 int Al, UJCOEF *values, size_t *zerobits)
1082 {
1083 jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1084 Sl, Al, values, zerobits);
1085 }
1086
1087 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1088 jsimd_can_encode_mcu_AC_refine_prepare(void)
1089 {
1090 init_simd();
1091
1092 if (DCTSIZE != 8)
1093 return 0;
1094 if (sizeof(JCOEF) != 2)
1095 return 0;
1096 if (simd_support & JSIMD_SSE2)
1097 return 1;
1098
1099 return 0;
1100 }
1101
1102 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * absvalues,size_t * bits)1103 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1104 const int *jpeg_natural_order_start, int Sl,
1105 int Al, UJCOEF *absvalues, size_t *bits)
1106 {
1107 return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1108 jpeg_natural_order_start,
1109 Sl, Al, absvalues, bits);
1110 }
1111