xref: /aosp_15_r20/external/libjpeg-turbo/simd/arm/aarch64/jsimd.c (revision dfc6aa5c1cfd4bc4e2018dc74aa96e29ee49c6da)
1 /*
2  * jsimd_arm64.c
3  *
4  * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
5  * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6  * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
7  * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
8  * Copyright (C) 2020, Arm Limited.
9  *
10  * Based on the x86 SIMD extension for IJG JPEG library,
11  * Copyright (C) 1999-2006, MIYASAKA Masaru.
12  * For conditions of distribution and use, see copyright notice in jsimdext.inc
13  *
14  * This file contains the interface between the "normal" portions
15  * of the library and the SIMD implementations when running on a
16  * 64-bit Arm architecture.
17  */
18 
19 #define JPEG_INTERNALS
20 #include "../../../jinclude.h"
21 #include "../../../jpeglib.h"
22 #include "../../../jsimd.h"
23 #include "../../../jdct.h"
24 #include "../../../jsimddct.h"
25 #include "../../jsimd.h"
26 
27 #include <ctype.h>
28 
29 #define JSIMD_FASTLD3  1
30 #define JSIMD_FASTST3  2
31 #define JSIMD_FASTTBL  4
32 
33 static THREAD_LOCAL unsigned int simd_support = ~0;
34 static THREAD_LOCAL unsigned int simd_huffman = 1;
35 static THREAD_LOCAL unsigned int simd_features = JSIMD_FASTLD3 |
36                                                  JSIMD_FASTST3 | JSIMD_FASTTBL;
37 
38 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
39 
40 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT  (1024 * 1024)
41 
42 LOCAL(int)
check_cpuinfo(char * buffer,const char * field,char * value)43 check_cpuinfo(char *buffer, const char *field, char *value)
44 {
45   char *p;
46 
47   if (*value == 0)
48     return 0;
49   if (strncmp(buffer, field, strlen(field)) != 0)
50     return 0;
51   buffer += strlen(field);
52   while (isspace(*buffer))
53     buffer++;
54 
55   /* Check if 'value' is present in the buffer as a separate word */
56   while ((p = strstr(buffer, value))) {
57     if (p > buffer && !isspace(*(p - 1))) {
58       buffer++;
59       continue;
60     }
61     p += strlen(value);
62     if (*p != 0 && !isspace(*p)) {
63       buffer++;
64       continue;
65     }
66     return 1;
67   }
68   return 0;
69 }
70 
71 LOCAL(int)
parse_proc_cpuinfo(int bufsize)72 parse_proc_cpuinfo(int bufsize)
73 {
74   char *buffer = (char *)malloc(bufsize);
75   FILE *fd;
76 
77   if (!buffer)
78     return 0;
79 
80   fd = fopen("/proc/cpuinfo", "r");
81   if (fd) {
82     while (fgets(buffer, bufsize, fd)) {
83       if (!strchr(buffer, '\n') && !feof(fd)) {
84         /* "impossible" happened - insufficient size of the buffer! */
85         fclose(fd);
86         free(buffer);
87         return 0;
88       }
89       if (check_cpuinfo(buffer, "CPU part", "0xd03") ||
90           check_cpuinfo(buffer, "CPU part", "0xd07"))
91         /* The Cortex-A53 has a slow tbl implementation.  We can gain a few
92            percent speedup by disabling the use of that instruction.  The
93            speedup on Cortex-A57 is more subtle but still measurable. */
94         simd_features &= ~JSIMD_FASTTBL;
95       else if (check_cpuinfo(buffer, "CPU part", "0x0a1"))
96         /* The SIMD version of Huffman encoding is slower than the C version on
97            Cavium ThunderX.  Also, ld3 and st3 are abyssmally slow on that
98            CPU. */
99         simd_huffman = simd_features = 0;
100     }
101     fclose(fd);
102   }
103   free(buffer);
104   return 1;
105 }
106 
107 #endif
108 
109 /*
110  * Check what SIMD accelerations are supported.
111  */
112 
113 /*
114  * Armv8 architectures support Neon extensions by default.
115  * It is no longer optional as it was with Armv7.
116  */
117 
118 
119 LOCAL(void)
init_simd(void)120 init_simd(void)
121 {
122 #ifndef NO_GETENV
123   char env[2] = { 0 };
124 #endif
125 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
126   int bufsize = 1024; /* an initial guess for the line buffer size limit */
127 #endif
128 
129   if (simd_support != ~0U)
130     return;
131 
132   simd_support = 0;
133 
134   simd_support |= JSIMD_NEON;
135 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
136   while (!parse_proc_cpuinfo(bufsize)) {
137     bufsize *= 2;
138     if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
139       break;
140   }
141 #endif
142 
143 #ifndef NO_GETENV
144   /* Force different settings through environment variables */
145   if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
146     simd_support = JSIMD_NEON;
147   if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
148     simd_support = 0;
149   if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
150     simd_huffman = 0;
151   if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "1"))
152     simd_features |= JSIMD_FASTLD3;
153   if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "0"))
154     simd_features &= ~JSIMD_FASTLD3;
155   if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "1"))
156     simd_features |= JSIMD_FASTST3;
157   if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "0"))
158     simd_features &= ~JSIMD_FASTST3;
159 #endif
160 }
161 
162 GLOBAL(int)
jsimd_can_rgb_ycc(void)163 jsimd_can_rgb_ycc(void)
164 {
165   init_simd();
166 
167   /* The code is optimised for these values only */
168   if (BITS_IN_JSAMPLE != 8)
169     return 0;
170   if (sizeof(JDIMENSION) != 4)
171     return 0;
172   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
173     return 0;
174 
175   if (simd_support & JSIMD_NEON)
176     return 1;
177 
178   return 0;
179 }
180 
181 GLOBAL(int)
jsimd_can_rgb_gray(void)182 jsimd_can_rgb_gray(void)
183 {
184   init_simd();
185 
186   /* The code is optimised for these values only */
187   if (BITS_IN_JSAMPLE != 8)
188     return 0;
189   if (sizeof(JDIMENSION) != 4)
190     return 0;
191   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
192     return 0;
193 
194   if (simd_support & JSIMD_NEON)
195     return 1;
196 
197   return 0;
198 }
199 
200 GLOBAL(int)
jsimd_can_ycc_rgb(void)201 jsimd_can_ycc_rgb(void)
202 {
203   init_simd();
204 
205   /* The code is optimised for these values only */
206   if (BITS_IN_JSAMPLE != 8)
207     return 0;
208   if (sizeof(JDIMENSION) != 4)
209     return 0;
210   if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
211     return 0;
212 
213   if (simd_support & JSIMD_NEON)
214     return 1;
215 
216   return 0;
217 }
218 
219 GLOBAL(int)
jsimd_can_ycc_rgb565(void)220 jsimd_can_ycc_rgb565(void)
221 {
222   init_simd();
223 
224   /* The code is optimised for these values only */
225   if (BITS_IN_JSAMPLE != 8)
226     return 0;
227   if (sizeof(JDIMENSION) != 4)
228     return 0;
229 
230   if (simd_support & JSIMD_NEON)
231     return 1;
232 
233   return 0;
234 }
235 
236 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)237 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
238                       JSAMPIMAGE output_buf, JDIMENSION output_row,
239                       int num_rows)
240 {
241   void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
242 
243   switch (cinfo->in_color_space) {
244   case JCS_EXT_RGB:
245 #ifndef NEON_INTRINSICS
246     if (simd_features & JSIMD_FASTLD3)
247 #endif
248       neonfct = jsimd_extrgb_ycc_convert_neon;
249 #ifndef NEON_INTRINSICS
250     else
251       neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
252 #endif
253     break;
254   case JCS_EXT_RGBX:
255   case JCS_EXT_RGBA:
256     neonfct = jsimd_extrgbx_ycc_convert_neon;
257     break;
258   case JCS_EXT_BGR:
259 #ifndef NEON_INTRINSICS
260     if (simd_features & JSIMD_FASTLD3)
261 #endif
262       neonfct = jsimd_extbgr_ycc_convert_neon;
263 #ifndef NEON_INTRINSICS
264     else
265       neonfct = jsimd_extbgr_ycc_convert_neon_slowld3;
266 #endif
267     break;
268   case JCS_EXT_BGRX:
269   case JCS_EXT_BGRA:
270     neonfct = jsimd_extbgrx_ycc_convert_neon;
271     break;
272   case JCS_EXT_XBGR:
273   case JCS_EXT_ABGR:
274     neonfct = jsimd_extxbgr_ycc_convert_neon;
275     break;
276   case JCS_EXT_XRGB:
277   case JCS_EXT_ARGB:
278     neonfct = jsimd_extxrgb_ycc_convert_neon;
279     break;
280   default:
281 #ifndef NEON_INTRINSICS
282     if (simd_features & JSIMD_FASTLD3)
283 #endif
284       neonfct = jsimd_extrgb_ycc_convert_neon;
285 #ifndef NEON_INTRINSICS
286     else
287       neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
288 #endif
289     break;
290   }
291 
292   neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
293 }
294 
295 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)296 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
297                        JSAMPIMAGE output_buf, JDIMENSION output_row,
298                        int num_rows)
299 {
300   void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
301 
302   switch (cinfo->in_color_space) {
303   case JCS_EXT_RGB:
304     neonfct = jsimd_extrgb_gray_convert_neon;
305     break;
306   case JCS_EXT_RGBX:
307   case JCS_EXT_RGBA:
308     neonfct = jsimd_extrgbx_gray_convert_neon;
309     break;
310   case JCS_EXT_BGR:
311     neonfct = jsimd_extbgr_gray_convert_neon;
312     break;
313   case JCS_EXT_BGRX:
314   case JCS_EXT_BGRA:
315     neonfct = jsimd_extbgrx_gray_convert_neon;
316     break;
317   case JCS_EXT_XBGR:
318   case JCS_EXT_ABGR:
319     neonfct = jsimd_extxbgr_gray_convert_neon;
320     break;
321   case JCS_EXT_XRGB:
322   case JCS_EXT_ARGB:
323     neonfct = jsimd_extxrgb_gray_convert_neon;
324     break;
325   default:
326     neonfct = jsimd_extrgb_gray_convert_neon;
327     break;
328   }
329 
330   neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
331 }
332 
333 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)334 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
335                       JDIMENSION input_row, JSAMPARRAY output_buf,
336                       int num_rows)
337 {
338   void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
339 
340   switch (cinfo->out_color_space) {
341   case JCS_EXT_RGB:
342 #ifndef NEON_INTRINSICS
343     if (simd_features & JSIMD_FASTST3)
344 #endif
345       neonfct = jsimd_ycc_extrgb_convert_neon;
346 #ifndef NEON_INTRINSICS
347     else
348       neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
349 #endif
350     break;
351   case JCS_EXT_RGBX:
352   case JCS_EXT_RGBA:
353     neonfct = jsimd_ycc_extrgbx_convert_neon;
354     break;
355   case JCS_EXT_BGR:
356 #ifndef NEON_INTRINSICS
357     if (simd_features & JSIMD_FASTST3)
358 #endif
359       neonfct = jsimd_ycc_extbgr_convert_neon;
360 #ifndef NEON_INTRINSICS
361     else
362       neonfct = jsimd_ycc_extbgr_convert_neon_slowst3;
363 #endif
364     break;
365   case JCS_EXT_BGRX:
366   case JCS_EXT_BGRA:
367     neonfct = jsimd_ycc_extbgrx_convert_neon;
368     break;
369   case JCS_EXT_XBGR:
370   case JCS_EXT_ABGR:
371     neonfct = jsimd_ycc_extxbgr_convert_neon;
372     break;
373   case JCS_EXT_XRGB:
374   case JCS_EXT_ARGB:
375     neonfct = jsimd_ycc_extxrgb_convert_neon;
376     break;
377   default:
378 #ifndef NEON_INTRINSICS
379     if (simd_features & JSIMD_FASTST3)
380 #endif
381       neonfct = jsimd_ycc_extrgb_convert_neon;
382 #ifndef NEON_INTRINSICS
383     else
384       neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
385 #endif
386     break;
387   }
388 
389   neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
390 }
391 
392 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)393 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
394                          JDIMENSION input_row, JSAMPARRAY output_buf,
395                          int num_rows)
396 {
397   jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
398                                 output_buf, num_rows);
399 }
400 
401 GLOBAL(int)
jsimd_can_h2v2_downsample(void)402 jsimd_can_h2v2_downsample(void)
403 {
404   init_simd();
405 
406   /* The code is optimised for these values only */
407   if (BITS_IN_JSAMPLE != 8)
408     return 0;
409   if (DCTSIZE != 8)
410     return 0;
411   if (sizeof(JDIMENSION) != 4)
412     return 0;
413 
414   if (simd_support & JSIMD_NEON)
415     return 1;
416 
417   return 0;
418 }
419 
420 GLOBAL(int)
jsimd_can_h2v1_downsample(void)421 jsimd_can_h2v1_downsample(void)
422 {
423   init_simd();
424 
425   /* The code is optimised for these values only */
426   if (BITS_IN_JSAMPLE != 8)
427     return 0;
428   if (DCTSIZE != 8)
429     return 0;
430   if (sizeof(JDIMENSION) != 4)
431     return 0;
432 
433   if (simd_support & JSIMD_NEON)
434     return 1;
435 
436   return 0;
437 }
438 
439 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)440 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
441                       JSAMPARRAY input_data, JSAMPARRAY output_data)
442 {
443   jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
444                              compptr->v_samp_factor, compptr->width_in_blocks,
445                              input_data, output_data);
446 }
447 
448 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)449 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
450                       JSAMPARRAY input_data, JSAMPARRAY output_data)
451 {
452   jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
453                              compptr->v_samp_factor, compptr->width_in_blocks,
454                              input_data, output_data);
455 }
456 
457 GLOBAL(int)
jsimd_can_h2v2_upsample(void)458 jsimd_can_h2v2_upsample(void)
459 {
460   init_simd();
461 
462   /* The code is optimised for these values only */
463   if (BITS_IN_JSAMPLE != 8)
464     return 0;
465   if (sizeof(JDIMENSION) != 4)
466     return 0;
467 
468   if (simd_support & JSIMD_NEON)
469     return 1;
470 
471   return 0;
472 }
473 
474 GLOBAL(int)
jsimd_can_h2v1_upsample(void)475 jsimd_can_h2v1_upsample(void)
476 {
477   init_simd();
478 
479   /* The code is optimised for these values only */
480   if (BITS_IN_JSAMPLE != 8)
481     return 0;
482   if (sizeof(JDIMENSION) != 4)
483     return 0;
484   if (simd_support & JSIMD_NEON)
485     return 1;
486 
487   return 0;
488 }
489 
490 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)491 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
492                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
493 {
494   jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
495                            input_data, output_data_ptr);
496 }
497 
498 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)499 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
500                     JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
501 {
502   jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
503                            input_data, output_data_ptr);
504 }
505 
506 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)507 jsimd_can_h2v2_fancy_upsample(void)
508 {
509   init_simd();
510 
511   /* The code is optimised for these values only */
512   if (BITS_IN_JSAMPLE != 8)
513     return 0;
514   if (sizeof(JDIMENSION) != 4)
515     return 0;
516 
517   if (simd_support & JSIMD_NEON)
518     return 1;
519 
520   return 0;
521 }
522 
523 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)524 jsimd_can_h2v1_fancy_upsample(void)
525 {
526   init_simd();
527 
528   /* The code is optimised for these values only */
529   if (BITS_IN_JSAMPLE != 8)
530     return 0;
531   if (sizeof(JDIMENSION) != 4)
532     return 0;
533 
534   if (simd_support & JSIMD_NEON)
535     return 1;
536 
537   return 0;
538 }
539 
540 GLOBAL(int)
jsimd_can_h1v2_fancy_upsample(void)541 jsimd_can_h1v2_fancy_upsample(void)
542 {
543   init_simd();
544 
545   /* The code is optimised for these values only */
546   if (BITS_IN_JSAMPLE != 8)
547     return 0;
548   if (sizeof(JDIMENSION) != 4)
549     return 0;
550 
551   if (simd_support & JSIMD_NEON)
552     return 1;
553 
554   return 0;
555 }
556 
557 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)558 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
559                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
560 {
561   jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
562                                  compptr->downsampled_width, input_data,
563                                  output_data_ptr);
564 }
565 
566 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)567 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
568                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
569 {
570   jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
571                                  compptr->downsampled_width, input_data,
572                                  output_data_ptr);
573 }
574 
575 GLOBAL(void)
jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)576 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
577                           JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
578 {
579   jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
580                                  compptr->downsampled_width, input_data,
581                                  output_data_ptr);
582 }
583 
584 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)585 jsimd_can_h2v2_merged_upsample(void)
586 {
587   init_simd();
588 
589   /* The code is optimised for these values only */
590   if (BITS_IN_JSAMPLE != 8)
591     return 0;
592   if (sizeof(JDIMENSION) != 4)
593     return 0;
594 
595   if (simd_support & JSIMD_NEON)
596     return 1;
597 
598   return 0;
599 }
600 
601 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)602 jsimd_can_h2v1_merged_upsample(void)
603 {
604   init_simd();
605 
606   /* The code is optimised for these values only */
607   if (BITS_IN_JSAMPLE != 8)
608     return 0;
609   if (sizeof(JDIMENSION) != 4)
610     return 0;
611 
612   if (simd_support & JSIMD_NEON)
613     return 1;
614 
615   return 0;
616 }
617 
618 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)619 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
620                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
621 {
622   void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
623 
624   switch (cinfo->out_color_space) {
625     case JCS_EXT_RGB:
626       neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
627       break;
628     case JCS_EXT_RGBX:
629     case JCS_EXT_RGBA:
630       neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
631       break;
632     case JCS_EXT_BGR:
633       neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
634       break;
635     case JCS_EXT_BGRX:
636     case JCS_EXT_BGRA:
637       neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
638       break;
639     case JCS_EXT_XBGR:
640     case JCS_EXT_ABGR:
641       neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
642       break;
643     case JCS_EXT_XRGB:
644     case JCS_EXT_ARGB:
645       neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
646       break;
647     default:
648       neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
649       break;
650   }
651 
652   neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
653 }
654 
655 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)656 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
657                            JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
658 {
659   void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
660 
661   switch (cinfo->out_color_space) {
662     case JCS_EXT_RGB:
663       neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
664       break;
665     case JCS_EXT_RGBX:
666     case JCS_EXT_RGBA:
667       neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
668       break;
669     case JCS_EXT_BGR:
670       neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
671       break;
672     case JCS_EXT_BGRX:
673     case JCS_EXT_BGRA:
674       neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
675       break;
676     case JCS_EXT_XBGR:
677     case JCS_EXT_ABGR:
678       neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
679       break;
680     case JCS_EXT_XRGB:
681     case JCS_EXT_ARGB:
682       neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
683       break;
684     default:
685       neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
686       break;
687   }
688 
689   neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
690 }
691 
692 GLOBAL(int)
jsimd_can_convsamp(void)693 jsimd_can_convsamp(void)
694 {
695   init_simd();
696 
697   /* The code is optimised for these values only */
698   if (DCTSIZE != 8)
699     return 0;
700   if (BITS_IN_JSAMPLE != 8)
701     return 0;
702   if (sizeof(JDIMENSION) != 4)
703     return 0;
704   if (sizeof(DCTELEM) != 2)
705     return 0;
706 
707   if (simd_support & JSIMD_NEON)
708     return 1;
709 
710   return 0;
711 }
712 
713 GLOBAL(int)
jsimd_can_convsamp_float(void)714 jsimd_can_convsamp_float(void)
715 {
716   return 0;
717 }
718 
719 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)720 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
721                DCTELEM *workspace)
722 {
723   jsimd_convsamp_neon(sample_data, start_col, workspace);
724 }
725 
726 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)727 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
728                      FAST_FLOAT *workspace)
729 {
730 }
731 
732 GLOBAL(int)
jsimd_can_fdct_islow(void)733 jsimd_can_fdct_islow(void)
734 {
735   init_simd();
736 
737   /* The code is optimised for these values only */
738   if (DCTSIZE != 8)
739     return 0;
740   if (sizeof(DCTELEM) != 2)
741     return 0;
742 
743   if (simd_support & JSIMD_NEON)
744     return 1;
745 
746   return 0;
747 }
748 
749 GLOBAL(int)
jsimd_can_fdct_ifast(void)750 jsimd_can_fdct_ifast(void)
751 {
752   init_simd();
753 
754   /* The code is optimised for these values only */
755   if (DCTSIZE != 8)
756     return 0;
757   if (sizeof(DCTELEM) != 2)
758     return 0;
759 
760   if (simd_support & JSIMD_NEON)
761     return 1;
762 
763   return 0;
764 }
765 
766 GLOBAL(int)
jsimd_can_fdct_float(void)767 jsimd_can_fdct_float(void)
768 {
769   return 0;
770 }
771 
772 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)773 jsimd_fdct_islow(DCTELEM *data)
774 {
775   jsimd_fdct_islow_neon(data);
776 }
777 
778 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)779 jsimd_fdct_ifast(DCTELEM *data)
780 {
781   jsimd_fdct_ifast_neon(data);
782 }
783 
784 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)785 jsimd_fdct_float(FAST_FLOAT *data)
786 {
787 }
788 
789 GLOBAL(int)
jsimd_can_quantize(void)790 jsimd_can_quantize(void)
791 {
792   init_simd();
793 
794   /* The code is optimised for these values only */
795   if (DCTSIZE != 8)
796     return 0;
797   if (sizeof(JCOEF) != 2)
798     return 0;
799   if (sizeof(DCTELEM) != 2)
800     return 0;
801 
802   if (simd_support & JSIMD_NEON)
803     return 1;
804 
805   return 0;
806 }
807 
808 GLOBAL(int)
jsimd_can_quantize_float(void)809 jsimd_can_quantize_float(void)
810 {
811   return 0;
812 }
813 
814 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)815 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
816 {
817   jsimd_quantize_neon(coef_block, divisors, workspace);
818 }
819 
820 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)821 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
822                      FAST_FLOAT *workspace)
823 {
824 }
825 
826 GLOBAL(int)
jsimd_can_idct_2x2(void)827 jsimd_can_idct_2x2(void)
828 {
829   init_simd();
830 
831   /* The code is optimised for these values only */
832   if (DCTSIZE != 8)
833     return 0;
834   if (sizeof(JCOEF) != 2)
835     return 0;
836   if (BITS_IN_JSAMPLE != 8)
837     return 0;
838   if (sizeof(JDIMENSION) != 4)
839     return 0;
840   if (sizeof(ISLOW_MULT_TYPE) != 2)
841     return 0;
842 
843   if (simd_support & JSIMD_NEON)
844     return 1;
845 
846   return 0;
847 }
848 
849 GLOBAL(int)
jsimd_can_idct_4x4(void)850 jsimd_can_idct_4x4(void)
851 {
852   init_simd();
853 
854   /* The code is optimised for these values only */
855   if (DCTSIZE != 8)
856     return 0;
857   if (sizeof(JCOEF) != 2)
858     return 0;
859   if (BITS_IN_JSAMPLE != 8)
860     return 0;
861   if (sizeof(JDIMENSION) != 4)
862     return 0;
863   if (sizeof(ISLOW_MULT_TYPE) != 2)
864     return 0;
865 
866   if (simd_support & JSIMD_NEON)
867     return 1;
868 
869   return 0;
870 }
871 
872 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)873 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
874                JCOEFPTR coef_block, JSAMPARRAY output_buf,
875                JDIMENSION output_col)
876 {
877   jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
878 }
879 
880 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)881 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
882                JCOEFPTR coef_block, JSAMPARRAY output_buf,
883                JDIMENSION output_col)
884 {
885   jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
886 }
887 
888 GLOBAL(int)
jsimd_can_idct_islow(void)889 jsimd_can_idct_islow(void)
890 {
891   init_simd();
892 
893   /* The code is optimised for these values only */
894   if (DCTSIZE != 8)
895     return 0;
896   if (sizeof(JCOEF) != 2)
897     return 0;
898   if (BITS_IN_JSAMPLE != 8)
899     return 0;
900   if (sizeof(JDIMENSION) != 4)
901     return 0;
902   if (sizeof(ISLOW_MULT_TYPE) != 2)
903     return 0;
904 
905   if (simd_support & JSIMD_NEON)
906     return 1;
907 
908   return 0;
909 }
910 
911 GLOBAL(int)
jsimd_can_idct_ifast(void)912 jsimd_can_idct_ifast(void)
913 {
914   init_simd();
915 
916   /* The code is optimised for these values only */
917   if (DCTSIZE != 8)
918     return 0;
919   if (sizeof(JCOEF) != 2)
920     return 0;
921   if (BITS_IN_JSAMPLE != 8)
922     return 0;
923   if (sizeof(JDIMENSION) != 4)
924     return 0;
925   if (sizeof(IFAST_MULT_TYPE) != 2)
926     return 0;
927   if (IFAST_SCALE_BITS != 2)
928     return 0;
929 
930   if (simd_support & JSIMD_NEON)
931     return 1;
932 
933   return 0;
934 }
935 
936 GLOBAL(int)
jsimd_can_idct_float(void)937 jsimd_can_idct_float(void)
938 {
939   return 0;
940 }
941 
942 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)943 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
944                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
945                  JDIMENSION output_col)
946 {
947   jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
948                         output_col);
949 }
950 
951 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)952 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
953                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
954                  JDIMENSION output_col)
955 {
956   jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
957                         output_col);
958 }
959 
960 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)961 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
962                  JCOEFPTR coef_block, JSAMPARRAY output_buf,
963                  JDIMENSION output_col)
964 {
965 }
966 
967 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)968 jsimd_can_huff_encode_one_block(void)
969 {
970   init_simd();
971 
972   if (DCTSIZE != 8)
973     return 0;
974   if (sizeof(JCOEF) != 2)
975     return 0;
976 
977   if (simd_support & JSIMD_NEON && simd_huffman)
978     return 1;
979 
980   return 0;
981 }
982 
983 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)984 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
985                             int last_dc_val, c_derived_tbl *dctbl,
986                             c_derived_tbl *actbl)
987 {
988 #ifndef NEON_INTRINSICS
989   if (simd_features & JSIMD_FASTTBL)
990 #endif
991     return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
992                                             dctbl, actbl);
993 #ifndef NEON_INTRINSICS
994   else
995     return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block,
996                                                     last_dc_val, dctbl, actbl);
997 #endif
998 }
999 
1000 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1001 jsimd_can_encode_mcu_AC_first_prepare(void)
1002 {
1003   init_simd();
1004 
1005   if (DCTSIZE != 8)
1006     return 0;
1007   if (sizeof(JCOEF) != 2)
1008     return 0;
1009   if (SIZEOF_SIZE_T != 8)
1010     return 0;
1011 
1012   if (simd_support & JSIMD_NEON)
1013     return 1;
1014 
1015   return 0;
1016 }
1017 
1018 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * values,size_t * zerobits)1019 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1020                                   const int *jpeg_natural_order_start, int Sl,
1021                                   int Al, UJCOEF *values, size_t *zerobits)
1022 {
1023   jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
1024                                          Sl, Al, values, zerobits);
1025 }
1026 
1027 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1028 jsimd_can_encode_mcu_AC_refine_prepare(void)
1029 {
1030   init_simd();
1031 
1032   if (DCTSIZE != 8)
1033     return 0;
1034   if (sizeof(JCOEF) != 2)
1035     return 0;
1036   if (SIZEOF_SIZE_T != 8)
1037     return 0;
1038 
1039   if (simd_support & JSIMD_NEON)
1040     return 1;
1041 
1042   return 0;
1043 }
1044 
1045 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * absvalues,size_t * bits)1046 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1047                                    const int *jpeg_natural_order_start, int Sl,
1048                                    int Al, UJCOEF *absvalues, size_t *bits)
1049 {
1050   return jsimd_encode_mcu_AC_refine_prepare_neon(block,
1051                                                  jpeg_natural_order_start,
1052                                                  Sl, Al, absvalues, bits);
1053 }
1054