xref: /aosp_15_r20/external/libjpeg-turbo/simd/x86_64/jsimd.c (revision dfc6aa5c1cfd4bc4e2018dc74aa96e29ee49c6da)
1 /*
2  * jsimd_x86_64.c
3  *
4  * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
5  * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander.
6  * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
7  *
8  * Based on the x86 SIMD extension for IJG JPEG library,
9  * Copyright (C) 1999-2006, MIYASAKA Masaru.
10  * For conditions of distribution and use, see copyright notice in jsimdext.inc
11  *
12  * This file contains the interface between the "normal" portions
13  * of the library and the SIMD implementations when running on a
14  * 64-bit x86 architecture.
15  */
16 
17 #define JPEG_INTERNALS
18 #include "../../jinclude.h"
19 #include "../../jpeglib.h"
20 #include "../../jsimd.h"
21 #include "../../jdct.h"
22 #include "../../jsimddct.h"
23 #include "../jsimd.h"
24 
25 /*
26  * In the PIC cases, we have no guarantee that constants will keep
27  * their alignment. This macro allows us to verify it at runtime.
28  */
29 #define IS_ALIGNED(ptr, order)  (((size_t)ptr & ((1 << order) - 1)) == 0)
30 
31 #define IS_ALIGNED_SSE(ptr)  (IS_ALIGNED(ptr, 4)) /* 16 byte alignment */
32 #define IS_ALIGNED_AVX(ptr)  (IS_ALIGNED(ptr, 5)) /* 32 byte alignment */
33 
34 static THREAD_LOCAL unsigned int simd_support = (unsigned int)(~0);
35 static THREAD_LOCAL unsigned int simd_huffman = 1;
36 
37 /*
38  * Check what SIMD accelerations are supported.
39  */
40 LOCAL(void)
init_simd(void)41 init_simd(void)
42 {
43 #ifndef NO_GETENV
44   char env[2] = { 0 };
45 #endif
46 
47   if (simd_support != ~0U)
48     return;
49 
50   simd_support = jpeg_simd_cpu_support();
51 
52 #ifndef NO_GETENV
53   /* Force different settings through environment variables */
54   if (!GETENV_S(env, 2, "JSIMD_FORCESSE2") && !strcmp(env, "1"))
55     simd_support &= JSIMD_SSE2;
56   if (!GETENV_S(env, 2, "JSIMD_FORCEAVX2") && !strcmp(env, "1"))
57     simd_support &= JSIMD_AVX2;
58   if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
59     simd_support = 0;
60   if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
61     simd_huffman = 0;
62 #endif
63 }
64 
65 GLOBAL(int)
jsimd_can_rgb_ycc(void)66 jsimd_can_rgb_ycc(void)
67 {
68   init_simd();
69 
70   /* The code is optimised for these values only */
71   if (BITS_IN_JSAMPLE != 8)
72     return 0;
73   if (sizeof(JDIMENSION) != 4)
74     return 0;
75   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
76     return 0;
77 
78   if ((simd_support & JSIMD_AVX2) &&
79       IS_ALIGNED_AVX(jconst_rgb_ycc_convert_avx2))
80     return 1;
81   if ((simd_support & JSIMD_SSE2) &&
82       IS_ALIGNED_SSE(jconst_rgb_ycc_convert_sse2))
83     return 1;
84 
85   return 0;
86 }
87 
88 GLOBAL(int)
jsimd_can_rgb_gray(void)89 jsimd_can_rgb_gray(void)
90 {
91   init_simd();
92 
93   /* The code is optimised for these values only */
94   if (BITS_IN_JSAMPLE != 8)
95     return 0;
96   if (sizeof(JDIMENSION) != 4)
97     return 0;
98   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
99     return 0;
100 
101   if ((simd_support & JSIMD_AVX2) &&
102       IS_ALIGNED_AVX(jconst_rgb_gray_convert_avx2))
103     return 1;
104   if ((simd_support & JSIMD_SSE2) &&
105       IS_ALIGNED_SSE(jconst_rgb_gray_convert_sse2))
106     return 1;
107 
108   return 0;
109 }
110 
111 GLOBAL(int)
jsimd_can_ycc_rgb(void)112 jsimd_can_ycc_rgb(void)
113 {
114   init_simd();
115 
116   /* The code is optimised for these values only */
117   if (BITS_IN_JSAMPLE != 8)
118     return 0;
119   if (sizeof(JDIMENSION) != 4)
120     return 0;
121   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
122     return 0;
123 
124   if ((simd_support & JSIMD_AVX2) &&
125       IS_ALIGNED_AVX(jconst_ycc_rgb_convert_avx2))
126     return 1;
127   if ((simd_support & JSIMD_SSE2) &&
128       IS_ALIGNED_SSE(jconst_ycc_rgb_convert_sse2))
129     return 1;
130 
131   return 0;
132 }
133 
134 GLOBAL(int)
jsimd_can_ycc_rgb565(void)135 jsimd_can_ycc_rgb565(void)
136 {
137   return 0;
138 }
139 
140 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)141 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
142                       JSAMPIMAGE output_buf, JDIMENSION output_row,
143                       int num_rows)
144 {
145   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
146   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
147 
148   if (simd_support == ~0U)
149     init_simd();
150 
151   switch (cinfo->in_color_space) {
152   case JCS_EXT_RGB:
153     avx2fct = jsimd_extrgb_ycc_convert_avx2;
154     sse2fct = jsimd_extrgb_ycc_convert_sse2;
155     break;
156   case JCS_EXT_RGBX:
157   case JCS_EXT_RGBA:
158     avx2fct = jsimd_extrgbx_ycc_convert_avx2;
159     sse2fct = jsimd_extrgbx_ycc_convert_sse2;
160     break;
161   case JCS_EXT_BGR:
162     avx2fct = jsimd_extbgr_ycc_convert_avx2;
163     sse2fct = jsimd_extbgr_ycc_convert_sse2;
164     break;
165   case JCS_EXT_BGRX:
166   case JCS_EXT_BGRA:
167     avx2fct = jsimd_extbgrx_ycc_convert_avx2;
168     sse2fct = jsimd_extbgrx_ycc_convert_sse2;
169     break;
170   case JCS_EXT_XBGR:
171   case JCS_EXT_ABGR:
172     avx2fct = jsimd_extxbgr_ycc_convert_avx2;
173     sse2fct = jsimd_extxbgr_ycc_convert_sse2;
174     break;
175   case JCS_EXT_XRGB:
176   case JCS_EXT_ARGB:
177     avx2fct = jsimd_extxrgb_ycc_convert_avx2;
178     sse2fct = jsimd_extxrgb_ycc_convert_sse2;
179     break;
180   default:
181     avx2fct = jsimd_rgb_ycc_convert_avx2;
182     sse2fct = jsimd_rgb_ycc_convert_sse2;
183     break;
184   }
185 
186   if (simd_support & JSIMD_AVX2)
187     avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
188   else
189     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
190 }
191 
192 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)193 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
194                        JSAMPIMAGE output_buf, JDIMENSION output_row,
195                        int num_rows)
196 {
197   void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
198   void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
199 
200   if (simd_support == ~0U)
201     init_simd();
202 
203   switch (cinfo->in_color_space) {
204   case JCS_EXT_RGB:
205     avx2fct = jsimd_extrgb_gray_convert_avx2;
206     sse2fct = jsimd_extrgb_gray_convert_sse2;
207     break;
208   case JCS_EXT_RGBX:
209   case JCS_EXT_RGBA:
210     avx2fct = jsimd_extrgbx_gray_convert_avx2;
211     sse2fct = jsimd_extrgbx_gray_convert_sse2;
212     break;
213   case JCS_EXT_BGR:
214     avx2fct = jsimd_extbgr_gray_convert_avx2;
215     sse2fct = jsimd_extbgr_gray_convert_sse2;
216     break;
217   case JCS_EXT_BGRX:
218   case JCS_EXT_BGRA:
219     avx2fct = jsimd_extbgrx_gray_convert_avx2;
220     sse2fct = jsimd_extbgrx_gray_convert_sse2;
221     break;
222   case JCS_EXT_XBGR:
223   case JCS_EXT_ABGR:
224     avx2fct = jsimd_extxbgr_gray_convert_avx2;
225     sse2fct = jsimd_extxbgr_gray_convert_sse2;
226     break;
227   case JCS_EXT_XRGB:
228   case JCS_EXT_ARGB:
229     avx2fct = jsimd_extxrgb_gray_convert_avx2;
230     sse2fct = jsimd_extxrgb_gray_convert_sse2;
231     break;
232   default:
233     avx2fct = jsimd_rgb_gray_convert_avx2;
234     sse2fct = jsimd_rgb_gray_convert_sse2;
235     break;
236   }
237 
238   if (simd_support & JSIMD_AVX2)
239     avx2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
240   else
241     sse2fct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
242 }
243 
244 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)245 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
246                       JDIMENSION input_row, JSAMPARRAY output_buf,
247                       int num_rows)
248 {
249   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
250   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
251 
252   if (simd_support == ~0U)
253     init_simd();
254 
255   switch (cinfo->out_color_space) {
256   case JCS_EXT_RGB:
257     avx2fct = jsimd_ycc_extrgb_convert_avx2;
258     sse2fct = jsimd_ycc_extrgb_convert_sse2;
259     break;
260   case JCS_EXT_RGBX:
261   case JCS_EXT_RGBA:
262     avx2fct = jsimd_ycc_extrgbx_convert_avx2;
263     sse2fct = jsimd_ycc_extrgbx_convert_sse2;
264     break;
265   case JCS_EXT_BGR:
266     avx2fct = jsimd_ycc_extbgr_convert_avx2;
267     sse2fct = jsimd_ycc_extbgr_convert_sse2;
268     break;
269   case JCS_EXT_BGRX:
270   case JCS_EXT_BGRA:
271     avx2fct = jsimd_ycc_extbgrx_convert_avx2;
272     sse2fct = jsimd_ycc_extbgrx_convert_sse2;
273     break;
274   case JCS_EXT_XBGR:
275   case JCS_EXT_ABGR:
276     avx2fct = jsimd_ycc_extxbgr_convert_avx2;
277     sse2fct = jsimd_ycc_extxbgr_convert_sse2;
278     break;
279   case JCS_EXT_XRGB:
280   case JCS_EXT_ARGB:
281     avx2fct = jsimd_ycc_extxrgb_convert_avx2;
282     sse2fct = jsimd_ycc_extxrgb_convert_sse2;
283     break;
284   default:
285     avx2fct = jsimd_ycc_rgb_convert_avx2;
286     sse2fct = jsimd_ycc_rgb_convert_sse2;
287     break;
288   }
289 
290   if (simd_support & JSIMD_AVX2)
291     avx2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
292   else
293     sse2fct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
294 }
295 
296 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)297 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
298                          JDIMENSION input_row, JSAMPARRAY output_buf,
299                          int num_rows)
300 {
301 }
302 
303 GLOBAL(int)
jsimd_can_h2v2_downsample(void)304 jsimd_can_h2v2_downsample(void)
305 {
306   init_simd();
307 
308   /* The code is optimised for these values only */
309   if (BITS_IN_JSAMPLE != 8)
310     return 0;
311   if (sizeof(JDIMENSION) != 4)
312     return 0;
313 
314   if (simd_support & JSIMD_AVX2)
315     return 1;
316   if (simd_support & JSIMD_SSE2)
317     return 1;
318 
319   return 0;
320 }
321 
322 GLOBAL(int)
jsimd_can_h2v1_downsample(void)323 jsimd_can_h2v1_downsample(void)
324 {
325   init_simd();
326 
327   /* The code is optimised for these values only */
328   if (BITS_IN_JSAMPLE != 8)
329     return 0;
330   if (sizeof(JDIMENSION) != 4)
331     return 0;
332 
333   if (simd_support & JSIMD_AVX2)
334     return 1;
335   if (simd_support & JSIMD_SSE2)
336     return 1;
337 
338   return 0;
339 }
340 
341 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)342 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
343                       JSAMPARRAY input_data, JSAMPARRAY output_data)
344 {
345   if (simd_support == ~0U)
346     init_simd();
347 
348   if (simd_support & JSIMD_AVX2)
349     jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
350                                compptr->v_samp_factor,
351                                compptr->width_in_blocks, input_data,
352                                output_data);
353   else
354     jsimd_h2v2_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
355                                compptr->v_samp_factor,
356                                compptr->width_in_blocks, input_data,
357                                output_data);
358 }
359 
360 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)361 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
362                       JSAMPARRAY input_data, JSAMPARRAY output_data)
363 {
364   if (simd_support == ~0U)
365     init_simd();
366 
367   if (simd_support & JSIMD_AVX2)
368     jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor,
369                                compptr->v_samp_factor,
370                                compptr->width_in_blocks, input_data,
371                                output_data);
372   else
373     jsimd_h2v1_downsample_sse2(cinfo->image_width, cinfo->max_v_samp_factor,
374                                compptr->v_samp_factor,
375                                compptr->width_in_blocks, input_data,
376                                output_data);
377 }
378 
379 GLOBAL(int)
jsimd_can_h2v2_upsample(void)380 jsimd_can_h2v2_upsample(void)
381 {
382   init_simd();
383 
384   /* The code is optimised for these values only */
385   if (BITS_IN_JSAMPLE != 8)
386     return 0;
387   if (sizeof(JDIMENSION) != 4)
388     return 0;
389 
390   if (simd_support & JSIMD_AVX2)
391     return 1;
392   if (simd_support & JSIMD_SSE2)
393     return 1;
394 
395   return 0;
396 }
397 
398 GLOBAL(int)
jsimd_can_h2v1_upsample(void)399 jsimd_can_h2v1_upsample(void)
400 {
401   init_simd();
402 
403   /* The code is optimised for these values only */
404   if (BITS_IN_JSAMPLE != 8)
405     return 0;
406   if (sizeof(JDIMENSION) != 4)
407     return 0;
408 
409   if (simd_support & JSIMD_AVX2)
410     return 1;
411   if (simd_support & JSIMD_SSE2)
412     return 1;
413 
414   return 0;
415 }
416 
417 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)418 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
419                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
420 {
421   if (simd_support == ~0U)
422     init_simd();
423 
424   if (simd_support & JSIMD_AVX2)
425     jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
426                              input_data, output_data_ptr);
427   else
428     jsimd_h2v2_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
429                              input_data, output_data_ptr);
430 }
431 
432 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)433 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
434                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
435 {
436   if (simd_support == ~0U)
437     init_simd();
438 
439   if (simd_support & JSIMD_AVX2)
440     jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width,
441                              input_data, output_data_ptr);
442   else
443     jsimd_h2v1_upsample_sse2(cinfo->max_v_samp_factor, cinfo->output_width,
444                              input_data, output_data_ptr);
445 }
446 
447 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)448 jsimd_can_h2v2_fancy_upsample(void)
449 {
450   init_simd();
451 
452   /* The code is optimised for these values only */
453   if (BITS_IN_JSAMPLE != 8)
454     return 0;
455   if (sizeof(JDIMENSION) != 4)
456     return 0;
457 
458   if ((simd_support & JSIMD_AVX2) &&
459       IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
460     return 1;
461   if ((simd_support & JSIMD_SSE2) &&
462       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
463     return 1;
464 
465   return 0;
466 }
467 
468 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)469 jsimd_can_h2v1_fancy_upsample(void)
470 {
471   init_simd();
472 
473   /* The code is optimised for these values only */
474   if (BITS_IN_JSAMPLE != 8)
475     return 0;
476   if (sizeof(JDIMENSION) != 4)
477     return 0;
478 
479   if ((simd_support & JSIMD_AVX2) &&
480       IS_ALIGNED_AVX(jconst_fancy_upsample_avx2))
481     return 1;
482   if ((simd_support & JSIMD_SSE2) &&
483       IS_ALIGNED_SSE(jconst_fancy_upsample_sse2))
484     return 1;
485 
486   return 0;
487 }
488 
489 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)490 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
491                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
492 {
493   if (simd_support == ~0U)
494     init_simd();
495 
496   if (simd_support & JSIMD_AVX2)
497     jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor,
498                                    compptr->downsampled_width, input_data,
499                                    output_data_ptr);
500   else
501     jsimd_h2v2_fancy_upsample_sse2(cinfo->max_v_samp_factor,
502                                    compptr->downsampled_width, input_data,
503                                    output_data_ptr);
504 }
505 
506 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)507 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
508                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
509 {
510   if (simd_support == ~0U)
511     init_simd();
512 
513   if (simd_support & JSIMD_AVX2)
514     jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor,
515                                    compptr->downsampled_width, input_data,
516                                    output_data_ptr);
517   else
518     jsimd_h2v1_fancy_upsample_sse2(cinfo->max_v_samp_factor,
519                                    compptr->downsampled_width, input_data,
520                                    output_data_ptr);
521 }
522 
523 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)524 jsimd_can_h2v2_merged_upsample(void)
525 {
526   init_simd();
527 
528   /* The code is optimised for these values only */
529   if (BITS_IN_JSAMPLE != 8)
530     return 0;
531   if (sizeof(JDIMENSION) != 4)
532     return 0;
533 
534   if ((simd_support & JSIMD_AVX2) &&
535       IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
536     return 1;
537   if ((simd_support & JSIMD_SSE2) &&
538       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
539     return 1;
540 
541   return 0;
542 }
543 
544 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)545 jsimd_can_h2v1_merged_upsample(void)
546 {
547   init_simd();
548 
549   /* The code is optimised for these values only */
550   if (BITS_IN_JSAMPLE != 8)
551     return 0;
552   if (sizeof(JDIMENSION) != 4)
553     return 0;
554 
555   if ((simd_support & JSIMD_AVX2) &&
556       IS_ALIGNED_AVX(jconst_merged_upsample_avx2))
557     return 1;
558   if ((simd_support & JSIMD_SSE2) &&
559       IS_ALIGNED_SSE(jconst_merged_upsample_sse2))
560     return 1;
561 
562   return 0;
563 }
564 
565 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)566 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
567                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
568 {
569   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
570   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
571 
572   if (simd_support == ~0U)
573     init_simd();
574 
575   switch (cinfo->out_color_space) {
576   case JCS_EXT_RGB:
577     avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2;
578     sse2fct = jsimd_h2v2_extrgb_merged_upsample_sse2;
579     break;
580   case JCS_EXT_RGBX:
581   case JCS_EXT_RGBA:
582     avx2fct = jsimd_h2v2_extrgbx_merged_upsample_avx2;
583     sse2fct = jsimd_h2v2_extrgbx_merged_upsample_sse2;
584     break;
585   case JCS_EXT_BGR:
586     avx2fct = jsimd_h2v2_extbgr_merged_upsample_avx2;
587     sse2fct = jsimd_h2v2_extbgr_merged_upsample_sse2;
588     break;
589   case JCS_EXT_BGRX:
590   case JCS_EXT_BGRA:
591     avx2fct = jsimd_h2v2_extbgrx_merged_upsample_avx2;
592     sse2fct = jsimd_h2v2_extbgrx_merged_upsample_sse2;
593     break;
594   case JCS_EXT_XBGR:
595   case JCS_EXT_ABGR:
596     avx2fct = jsimd_h2v2_extxbgr_merged_upsample_avx2;
597     sse2fct = jsimd_h2v2_extxbgr_merged_upsample_sse2;
598     break;
599   case JCS_EXT_XRGB:
600   case JCS_EXT_ARGB:
601     avx2fct = jsimd_h2v2_extxrgb_merged_upsample_avx2;
602     sse2fct = jsimd_h2v2_extxrgb_merged_upsample_sse2;
603     break;
604   default:
605     avx2fct = jsimd_h2v2_merged_upsample_avx2;
606     sse2fct = jsimd_h2v2_merged_upsample_sse2;
607     break;
608   }
609 
610   if (simd_support & JSIMD_AVX2)
611     avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
612   else
613     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
614 }
615 
616 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)617 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
618                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
619 {
620   void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
621   void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
622 
623   if (simd_support == ~0U)
624     init_simd();
625 
626   switch (cinfo->out_color_space) {
627   case JCS_EXT_RGB:
628     avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2;
629     sse2fct = jsimd_h2v1_extrgb_merged_upsample_sse2;
630     break;
631   case JCS_EXT_RGBX:
632   case JCS_EXT_RGBA:
633     avx2fct = jsimd_h2v1_extrgbx_merged_upsample_avx2;
634     sse2fct = jsimd_h2v1_extrgbx_merged_upsample_sse2;
635     break;
636   case JCS_EXT_BGR:
637     avx2fct = jsimd_h2v1_extbgr_merged_upsample_avx2;
638     sse2fct = jsimd_h2v1_extbgr_merged_upsample_sse2;
639     break;
640   case JCS_EXT_BGRX:
641   case JCS_EXT_BGRA:
642     avx2fct = jsimd_h2v1_extbgrx_merged_upsample_avx2;
643     sse2fct = jsimd_h2v1_extbgrx_merged_upsample_sse2;
644     break;
645   case JCS_EXT_XBGR:
646   case JCS_EXT_ABGR:
647     avx2fct = jsimd_h2v1_extxbgr_merged_upsample_avx2;
648     sse2fct = jsimd_h2v1_extxbgr_merged_upsample_sse2;
649     break;
650   case JCS_EXT_XRGB:
651   case JCS_EXT_ARGB:
652     avx2fct = jsimd_h2v1_extxrgb_merged_upsample_avx2;
653     sse2fct = jsimd_h2v1_extxrgb_merged_upsample_sse2;
654     break;
655   default:
656     avx2fct = jsimd_h2v1_merged_upsample_avx2;
657     sse2fct = jsimd_h2v1_merged_upsample_sse2;
658     break;
659   }
660 
661   if (simd_support & JSIMD_AVX2)
662     avx2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
663   else
664     sse2fct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
665 }
666 
667 GLOBAL(int)
jsimd_can_convsamp(void)668 jsimd_can_convsamp(void)
669 {
670   init_simd();
671 
672   /* The code is optimised for these values only */
673   if (DCTSIZE != 8)
674     return 0;
675   if (BITS_IN_JSAMPLE != 8)
676     return 0;
677   if (sizeof(JDIMENSION) != 4)
678     return 0;
679   if (sizeof(DCTELEM) != 2)
680     return 0;
681 
682   if (simd_support & JSIMD_AVX2)
683     return 1;
684   if (simd_support & JSIMD_SSE2)
685     return 1;
686 
687   return 0;
688 }
689 
690 GLOBAL(int)
jsimd_can_convsamp_float(void)691 jsimd_can_convsamp_float(void)
692 {
693   init_simd();
694 
695   /* The code is optimised for these values only */
696   if (DCTSIZE != 8)
697     return 0;
698   if (BITS_IN_JSAMPLE != 8)
699     return 0;
700   if (sizeof(JDIMENSION) != 4)
701     return 0;
702   if (sizeof(FAST_FLOAT) != 4)
703     return 0;
704 
705   if (simd_support & JSIMD_SSE2)
706     return 1;
707 
708   return 0;
709 }
710 
711 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)712 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
713                DCTELEM *workspace)
714 {
715   if (simd_support == ~0U)
716     init_simd();
717 
718   if (simd_support & JSIMD_AVX2)
719     jsimd_convsamp_avx2(sample_data, start_col, workspace);
720   else
721     jsimd_convsamp_sse2(sample_data, start_col, workspace);
722 }
723 
724 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)725 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
726                      FAST_FLOAT *workspace)
727 {
728   jsimd_convsamp_float_sse2(sample_data, start_col, workspace);
729 }
730 
731 GLOBAL(int)
jsimd_can_fdct_islow(void)732 jsimd_can_fdct_islow(void)
733 {
734   init_simd();
735 
736   /* The code is optimised for these values only */
737   if (DCTSIZE != 8)
738     return 0;
739   if (sizeof(DCTELEM) != 2)
740     return 0;
741 
742   if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_fdct_islow_avx2))
743     return 1;
744   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2))
745     return 1;
746 
747   return 0;
748 }
749 
750 GLOBAL(int)
jsimd_can_fdct_ifast(void)751 jsimd_can_fdct_ifast(void)
752 {
753   init_simd();
754 
755   /* The code is optimised for these values only */
756   if (DCTSIZE != 8)
757     return 0;
758   if (sizeof(DCTELEM) != 2)
759     return 0;
760 
761   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_ifast_sse2))
762     return 1;
763 
764   return 0;
765 }
766 
767 GLOBAL(int)
jsimd_can_fdct_float(void)768 jsimd_can_fdct_float(void)
769 {
770   init_simd();
771 
772   /* The code is optimised for these values only */
773   if (DCTSIZE != 8)
774     return 0;
775   if (sizeof(FAST_FLOAT) != 4)
776     return 0;
777 
778   if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse))
779     return 1;
780 
781   return 0;
782 }
783 
784 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)785 jsimd_fdct_islow(DCTELEM *data)
786 {
787   if (simd_support == ~0U)
788     init_simd();
789 
790   if (simd_support & JSIMD_AVX2)
791     jsimd_fdct_islow_avx2(data);
792   else
793     jsimd_fdct_islow_sse2(data);
794 }
795 
796 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)797 jsimd_fdct_ifast(DCTELEM *data)
798 {
799   jsimd_fdct_ifast_sse2(data);
800 }
801 
802 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)803 jsimd_fdct_float(FAST_FLOAT *data)
804 {
805   jsimd_fdct_float_sse(data);
806 }
807 
808 GLOBAL(int)
jsimd_can_quantize(void)809 jsimd_can_quantize(void)
810 {
811   init_simd();
812 
813   /* The code is optimised for these values only */
814   if (DCTSIZE != 8)
815     return 0;
816   if (sizeof(JCOEF) != 2)
817     return 0;
818   if (sizeof(DCTELEM) != 2)
819     return 0;
820 
821   if (simd_support & JSIMD_AVX2)
822     return 1;
823   if (simd_support & JSIMD_SSE2)
824     return 1;
825 
826   return 0;
827 }
828 
829 GLOBAL(int)
jsimd_can_quantize_float(void)830 jsimd_can_quantize_float(void)
831 {
832   init_simd();
833 
834   /* The code is optimised for these values only */
835   if (DCTSIZE != 8)
836     return 0;
837   if (sizeof(JCOEF) != 2)
838     return 0;
839   if (sizeof(FAST_FLOAT) != 4)
840     return 0;
841 
842   if (simd_support & JSIMD_SSE2)
843     return 1;
844 
845   return 0;
846 }
847 
848 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)849 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
850 {
851   if (simd_support == ~0U)
852     init_simd();
853 
854   if (simd_support & JSIMD_AVX2)
855     jsimd_quantize_avx2(coef_block, divisors, workspace);
856   else
857     jsimd_quantize_sse2(coef_block, divisors, workspace);
858 }
859 
860 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)861 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
862                      FAST_FLOAT *workspace)
863 {
864   jsimd_quantize_float_sse2(coef_block, divisors, workspace);
865 }
866 
867 GLOBAL(int)
jsimd_can_idct_2x2(void)868 jsimd_can_idct_2x2(void)
869 {
870   init_simd();
871 
872   /* The code is optimised for these values only */
873   if (DCTSIZE != 8)
874     return 0;
875   if (sizeof(JCOEF) != 2)
876     return 0;
877   if (BITS_IN_JSAMPLE != 8)
878     return 0;
879   if (sizeof(JDIMENSION) != 4)
880     return 0;
881   if (sizeof(ISLOW_MULT_TYPE) != 2)
882     return 0;
883 
884   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
885     return 1;
886 
887   return 0;
888 }
889 
890 GLOBAL(int)
jsimd_can_idct_4x4(void)891 jsimd_can_idct_4x4(void)
892 {
893   init_simd();
894 
895   /* The code is optimised for these values only */
896   if (DCTSIZE != 8)
897     return 0;
898   if (sizeof(JCOEF) != 2)
899     return 0;
900   if (BITS_IN_JSAMPLE != 8)
901     return 0;
902   if (sizeof(JDIMENSION) != 4)
903     return 0;
904   if (sizeof(ISLOW_MULT_TYPE) != 2)
905     return 0;
906 
907   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2))
908     return 1;
909 
910   return 0;
911 }
912 
913 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)914 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
915                JCOEFPTR coef_block, JSAMPARRAY output_buf,
916                JDIMENSION output_col)
917 {
918   jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, output_col);
919 }
920 
921 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)922 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
923                JCOEFPTR coef_block, JSAMPARRAY output_buf,
924                JDIMENSION output_col)
925 {
926   jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, output_col);
927 }
928 
929 GLOBAL(int)
jsimd_can_idct_islow(void)930 jsimd_can_idct_islow(void)
931 {
932   init_simd();
933 
934   /* The code is optimised for these values only */
935   if (DCTSIZE != 8)
936     return 0;
937   if (sizeof(JCOEF) != 2)
938     return 0;
939   if (BITS_IN_JSAMPLE != 8)
940     return 0;
941   if (sizeof(JDIMENSION) != 4)
942     return 0;
943   if (sizeof(ISLOW_MULT_TYPE) != 2)
944     return 0;
945 
946   if ((simd_support & JSIMD_AVX2) && IS_ALIGNED_AVX(jconst_idct_islow_avx2))
947     return 1;
948   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_islow_sse2))
949     return 1;
950 
951   return 0;
952 }
953 
954 GLOBAL(int)
jsimd_can_idct_ifast(void)955 jsimd_can_idct_ifast(void)
956 {
957   init_simd();
958 
959   /* The code is optimised for these values only */
960   if (DCTSIZE != 8)
961     return 0;
962   if (sizeof(JCOEF) != 2)
963     return 0;
964   if (BITS_IN_JSAMPLE != 8)
965     return 0;
966   if (sizeof(JDIMENSION) != 4)
967     return 0;
968   if (sizeof(IFAST_MULT_TYPE) != 2)
969     return 0;
970   if (IFAST_SCALE_BITS != 2)
971     return 0;
972 
973   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2))
974     return 1;
975 
976   return 0;
977 }
978 
979 GLOBAL(int)
jsimd_can_idct_float(void)980 jsimd_can_idct_float(void)
981 {
982   init_simd();
983 
984   if (DCTSIZE != 8)
985     return 0;
986   if (sizeof(JCOEF) != 2)
987     return 0;
988   if (BITS_IN_JSAMPLE != 8)
989     return 0;
990   if (sizeof(JDIMENSION) != 4)
991     return 0;
992   if (sizeof(FAST_FLOAT) != 4)
993     return 0;
994   if (sizeof(FLOAT_MULT_TYPE) != 4)
995     return 0;
996 
997   if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2))
998     return 1;
999 
1000   return 0;
1001 }
1002 
1003 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1004 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1005                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
1006                  JDIMENSION output_col)
1007 {
1008   if (simd_support == ~0U)
1009     init_simd();
1010 
1011   if (simd_support & JSIMD_AVX2)
1012     jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf,
1013                           output_col);
1014   else
1015     jsimd_idct_islow_sse2(compptr->dct_table, coef_block, output_buf,
1016                           output_col);
1017 }
1018 
1019 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1020 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1021                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
1022                  JDIMENSION output_col)
1023 {
1024   jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf,
1025                         output_col);
1026 }
1027 
1028 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)1029 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
1030                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
1031                  JDIMENSION output_col)
1032 {
1033   jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf,
1034                         output_col);
1035 }
1036 
1037 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)1038 jsimd_can_huff_encode_one_block(void)
1039 {
1040   init_simd();
1041 
1042   if (DCTSIZE != 8)
1043     return 0;
1044   if (sizeof(JCOEF) != 2)
1045     return 0;
1046 
1047   if ((simd_support & JSIMD_SSE2) && simd_huffman &&
1048       IS_ALIGNED_SSE(jconst_huff_encode_one_block))
1049     return 1;
1050 
1051   return 0;
1052 }
1053 
1054 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)1055 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
1056                             int last_dc_val, c_derived_tbl *dctbl,
1057                             c_derived_tbl *actbl)
1058 {
1059   return jsimd_huff_encode_one_block_sse2(state, buffer, block, last_dc_val,
1060                                           dctbl, actbl);
1061 }
1062 
1063 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1064 jsimd_can_encode_mcu_AC_first_prepare(void)
1065 {
1066   init_simd();
1067 
1068   if (DCTSIZE != 8)
1069     return 0;
1070   if (sizeof(JCOEF) != 2)
1071     return 0;
1072   if (simd_support & JSIMD_SSE2)
1073     return 1;
1074 
1075   return 0;
1076 }
1077 
1078 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * values,size_t * zerobits)1079 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1080                                   const int *jpeg_natural_order_start, int Sl,
1081                                   int Al, UJCOEF *values, size_t *zerobits)
1082 {
1083   jsimd_encode_mcu_AC_first_prepare_sse2(block, jpeg_natural_order_start,
1084                                          Sl, Al, values, zerobits);
1085 }
1086 
1087 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1088 jsimd_can_encode_mcu_AC_refine_prepare(void)
1089 {
1090   init_simd();
1091 
1092   if (DCTSIZE != 8)
1093     return 0;
1094   if (sizeof(JCOEF) != 2)
1095     return 0;
1096   if (simd_support & JSIMD_SSE2)
1097     return 1;
1098 
1099   return 0;
1100 }
1101 
1102 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * absvalues,size_t * bits)1103 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1104                                    const int *jpeg_natural_order_start, int Sl,
1105                                    int Al, UJCOEF *absvalues, size_t *bits)
1106 {
1107   return jsimd_encode_mcu_AC_refine_prepare_sse2(block,
1108                                                  jpeg_natural_order_start,
1109                                                  Sl, Al, absvalues, bits);
1110 }
1111