1 /*
2 * jsimd_arm64.c
3 *
4 * Copyright 2009 Pierre Ossman <[email protected]> for Cendio AB
5 * Copyright (C) 2011, Nokia Corporation and/or its subsidiary(-ies).
6 * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2020, 2022, D. R. Commander.
7 * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
8 * Copyright (C) 2020, Arm Limited.
9 *
10 * Based on the x86 SIMD extension for IJG JPEG library,
11 * Copyright (C) 1999-2006, MIYASAKA Masaru.
12 * For conditions of distribution and use, see copyright notice in jsimdext.inc
13 *
14 * This file contains the interface between the "normal" portions
15 * of the library and the SIMD implementations when running on a
16 * 64-bit Arm architecture.
17 */
18
19 #define JPEG_INTERNALS
20 #include "../../../jinclude.h"
21 #include "../../../jpeglib.h"
22 #include "../../../jsimd.h"
23 #include "../../../jdct.h"
24 #include "../../../jsimddct.h"
25 #include "../../jsimd.h"
26
27 #include <ctype.h>
28
29 #define JSIMD_FASTLD3 1
30 #define JSIMD_FASTST3 2
31 #define JSIMD_FASTTBL 4
32
33 static THREAD_LOCAL unsigned int simd_support = ~0;
34 static THREAD_LOCAL unsigned int simd_huffman = 1;
35 static THREAD_LOCAL unsigned int simd_features = JSIMD_FASTLD3 |
36 JSIMD_FASTST3 | JSIMD_FASTTBL;
37
38 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
39
40 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
41
42 LOCAL(int)
check_cpuinfo(char * buffer,const char * field,char * value)43 check_cpuinfo(char *buffer, const char *field, char *value)
44 {
45 char *p;
46
47 if (*value == 0)
48 return 0;
49 if (strncmp(buffer, field, strlen(field)) != 0)
50 return 0;
51 buffer += strlen(field);
52 while (isspace(*buffer))
53 buffer++;
54
55 /* Check if 'value' is present in the buffer as a separate word */
56 while ((p = strstr(buffer, value))) {
57 if (p > buffer && !isspace(*(p - 1))) {
58 buffer++;
59 continue;
60 }
61 p += strlen(value);
62 if (*p != 0 && !isspace(*p)) {
63 buffer++;
64 continue;
65 }
66 return 1;
67 }
68 return 0;
69 }
70
71 LOCAL(int)
parse_proc_cpuinfo(int bufsize)72 parse_proc_cpuinfo(int bufsize)
73 {
74 char *buffer = (char *)malloc(bufsize);
75 FILE *fd;
76
77 if (!buffer)
78 return 0;
79
80 fd = fopen("/proc/cpuinfo", "r");
81 if (fd) {
82 while (fgets(buffer, bufsize, fd)) {
83 if (!strchr(buffer, '\n') && !feof(fd)) {
84 /* "impossible" happened - insufficient size of the buffer! */
85 fclose(fd);
86 free(buffer);
87 return 0;
88 }
89 if (check_cpuinfo(buffer, "CPU part", "0xd03") ||
90 check_cpuinfo(buffer, "CPU part", "0xd07"))
91 /* The Cortex-A53 has a slow tbl implementation. We can gain a few
92 percent speedup by disabling the use of that instruction. The
93 speedup on Cortex-A57 is more subtle but still measurable. */
94 simd_features &= ~JSIMD_FASTTBL;
95 else if (check_cpuinfo(buffer, "CPU part", "0x0a1"))
96 /* The SIMD version of Huffman encoding is slower than the C version on
97 Cavium ThunderX. Also, ld3 and st3 are abyssmally slow on that
98 CPU. */
99 simd_huffman = simd_features = 0;
100 }
101 fclose(fd);
102 }
103 free(buffer);
104 return 1;
105 }
106
107 #endif
108
109 /*
110 * Check what SIMD accelerations are supported.
111 */
112
113 /*
114 * Armv8 architectures support Neon extensions by default.
115 * It is no longer optional as it was with Armv7.
116 */
117
118
119 LOCAL(void)
init_simd(void)120 init_simd(void)
121 {
122 #ifndef NO_GETENV
123 char env[2] = { 0 };
124 #endif
125 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
126 int bufsize = 1024; /* an initial guess for the line buffer size limit */
127 #endif
128
129 if (simd_support != ~0U)
130 return;
131
132 simd_support = 0;
133
134 simd_support |= JSIMD_NEON;
135 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
136 while (!parse_proc_cpuinfo(bufsize)) {
137 bufsize *= 2;
138 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
139 break;
140 }
141 #endif
142
143 #ifndef NO_GETENV
144 /* Force different settings through environment variables */
145 if (!GETENV_S(env, 2, "JSIMD_FORCENEON") && !strcmp(env, "1"))
146 simd_support = JSIMD_NEON;
147 if (!GETENV_S(env, 2, "JSIMD_FORCENONE") && !strcmp(env, "1"))
148 simd_support = 0;
149 if (!GETENV_S(env, 2, "JSIMD_NOHUFFENC") && !strcmp(env, "1"))
150 simd_huffman = 0;
151 if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "1"))
152 simd_features |= JSIMD_FASTLD3;
153 if (!GETENV_S(env, 2, "JSIMD_FASTLD3") && !strcmp(env, "0"))
154 simd_features &= ~JSIMD_FASTLD3;
155 if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "1"))
156 simd_features |= JSIMD_FASTST3;
157 if (!GETENV_S(env, 2, "JSIMD_FASTST3") && !strcmp(env, "0"))
158 simd_features &= ~JSIMD_FASTST3;
159 #endif
160 }
161
162 GLOBAL(int)
jsimd_can_rgb_ycc(void)163 jsimd_can_rgb_ycc(void)
164 {
165 init_simd();
166
167 /* The code is optimised for these values only */
168 if (BITS_IN_JSAMPLE != 8)
169 return 0;
170 if (sizeof(JDIMENSION) != 4)
171 return 0;
172 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
173 return 0;
174
175 if (simd_support & JSIMD_NEON)
176 return 1;
177
178 return 0;
179 }
180
181 GLOBAL(int)
jsimd_can_rgb_gray(void)182 jsimd_can_rgb_gray(void)
183 {
184 init_simd();
185
186 /* The code is optimised for these values only */
187 if (BITS_IN_JSAMPLE != 8)
188 return 0;
189 if (sizeof(JDIMENSION) != 4)
190 return 0;
191 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
192 return 0;
193
194 if (simd_support & JSIMD_NEON)
195 return 1;
196
197 return 0;
198 }
199
200 GLOBAL(int)
jsimd_can_ycc_rgb(void)201 jsimd_can_ycc_rgb(void)
202 {
203 init_simd();
204
205 /* The code is optimised for these values only */
206 if (BITS_IN_JSAMPLE != 8)
207 return 0;
208 if (sizeof(JDIMENSION) != 4)
209 return 0;
210 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
211 return 0;
212
213 if (simd_support & JSIMD_NEON)
214 return 1;
215
216 return 0;
217 }
218
219 GLOBAL(int)
jsimd_can_ycc_rgb565(void)220 jsimd_can_ycc_rgb565(void)
221 {
222 init_simd();
223
224 /* The code is optimised for these values only */
225 if (BITS_IN_JSAMPLE != 8)
226 return 0;
227 if (sizeof(JDIMENSION) != 4)
228 return 0;
229
230 if (simd_support & JSIMD_NEON)
231 return 1;
232
233 return 0;
234 }
235
236 GLOBAL(void)
jsimd_rgb_ycc_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)237 jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
238 JSAMPIMAGE output_buf, JDIMENSION output_row,
239 int num_rows)
240 {
241 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
242
243 switch (cinfo->in_color_space) {
244 case JCS_EXT_RGB:
245 #ifndef NEON_INTRINSICS
246 if (simd_features & JSIMD_FASTLD3)
247 #endif
248 neonfct = jsimd_extrgb_ycc_convert_neon;
249 #ifndef NEON_INTRINSICS
250 else
251 neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
252 #endif
253 break;
254 case JCS_EXT_RGBX:
255 case JCS_EXT_RGBA:
256 neonfct = jsimd_extrgbx_ycc_convert_neon;
257 break;
258 case JCS_EXT_BGR:
259 #ifndef NEON_INTRINSICS
260 if (simd_features & JSIMD_FASTLD3)
261 #endif
262 neonfct = jsimd_extbgr_ycc_convert_neon;
263 #ifndef NEON_INTRINSICS
264 else
265 neonfct = jsimd_extbgr_ycc_convert_neon_slowld3;
266 #endif
267 break;
268 case JCS_EXT_BGRX:
269 case JCS_EXT_BGRA:
270 neonfct = jsimd_extbgrx_ycc_convert_neon;
271 break;
272 case JCS_EXT_XBGR:
273 case JCS_EXT_ABGR:
274 neonfct = jsimd_extxbgr_ycc_convert_neon;
275 break;
276 case JCS_EXT_XRGB:
277 case JCS_EXT_ARGB:
278 neonfct = jsimd_extxrgb_ycc_convert_neon;
279 break;
280 default:
281 #ifndef NEON_INTRINSICS
282 if (simd_features & JSIMD_FASTLD3)
283 #endif
284 neonfct = jsimd_extrgb_ycc_convert_neon;
285 #ifndef NEON_INTRINSICS
286 else
287 neonfct = jsimd_extrgb_ycc_convert_neon_slowld3;
288 #endif
289 break;
290 }
291
292 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
293 }
294
295 GLOBAL(void)
jsimd_rgb_gray_convert(j_compress_ptr cinfo,JSAMPARRAY input_buf,JSAMPIMAGE output_buf,JDIMENSION output_row,int num_rows)296 jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
297 JSAMPIMAGE output_buf, JDIMENSION output_row,
298 int num_rows)
299 {
300 void (*neonfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
301
302 switch (cinfo->in_color_space) {
303 case JCS_EXT_RGB:
304 neonfct = jsimd_extrgb_gray_convert_neon;
305 break;
306 case JCS_EXT_RGBX:
307 case JCS_EXT_RGBA:
308 neonfct = jsimd_extrgbx_gray_convert_neon;
309 break;
310 case JCS_EXT_BGR:
311 neonfct = jsimd_extbgr_gray_convert_neon;
312 break;
313 case JCS_EXT_BGRX:
314 case JCS_EXT_BGRA:
315 neonfct = jsimd_extbgrx_gray_convert_neon;
316 break;
317 case JCS_EXT_XBGR:
318 case JCS_EXT_ABGR:
319 neonfct = jsimd_extxbgr_gray_convert_neon;
320 break;
321 case JCS_EXT_XRGB:
322 case JCS_EXT_ARGB:
323 neonfct = jsimd_extxrgb_gray_convert_neon;
324 break;
325 default:
326 neonfct = jsimd_extrgb_gray_convert_neon;
327 break;
328 }
329
330 neonfct(cinfo->image_width, input_buf, output_buf, output_row, num_rows);
331 }
332
333 GLOBAL(void)
jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)334 jsimd_ycc_rgb_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
335 JDIMENSION input_row, JSAMPARRAY output_buf,
336 int num_rows)
337 {
338 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
339
340 switch (cinfo->out_color_space) {
341 case JCS_EXT_RGB:
342 #ifndef NEON_INTRINSICS
343 if (simd_features & JSIMD_FASTST3)
344 #endif
345 neonfct = jsimd_ycc_extrgb_convert_neon;
346 #ifndef NEON_INTRINSICS
347 else
348 neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
349 #endif
350 break;
351 case JCS_EXT_RGBX:
352 case JCS_EXT_RGBA:
353 neonfct = jsimd_ycc_extrgbx_convert_neon;
354 break;
355 case JCS_EXT_BGR:
356 #ifndef NEON_INTRINSICS
357 if (simd_features & JSIMD_FASTST3)
358 #endif
359 neonfct = jsimd_ycc_extbgr_convert_neon;
360 #ifndef NEON_INTRINSICS
361 else
362 neonfct = jsimd_ycc_extbgr_convert_neon_slowst3;
363 #endif
364 break;
365 case JCS_EXT_BGRX:
366 case JCS_EXT_BGRA:
367 neonfct = jsimd_ycc_extbgrx_convert_neon;
368 break;
369 case JCS_EXT_XBGR:
370 case JCS_EXT_ABGR:
371 neonfct = jsimd_ycc_extxbgr_convert_neon;
372 break;
373 case JCS_EXT_XRGB:
374 case JCS_EXT_ARGB:
375 neonfct = jsimd_ycc_extxrgb_convert_neon;
376 break;
377 default:
378 #ifndef NEON_INTRINSICS
379 if (simd_features & JSIMD_FASTST3)
380 #endif
381 neonfct = jsimd_ycc_extrgb_convert_neon;
382 #ifndef NEON_INTRINSICS
383 else
384 neonfct = jsimd_ycc_extrgb_convert_neon_slowst3;
385 #endif
386 break;
387 }
388
389 neonfct(cinfo->output_width, input_buf, input_row, output_buf, num_rows);
390 }
391
392 GLOBAL(void)
jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION input_row,JSAMPARRAY output_buf,int num_rows)393 jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
394 JDIMENSION input_row, JSAMPARRAY output_buf,
395 int num_rows)
396 {
397 jsimd_ycc_rgb565_convert_neon(cinfo->output_width, input_buf, input_row,
398 output_buf, num_rows);
399 }
400
401 GLOBAL(int)
jsimd_can_h2v2_downsample(void)402 jsimd_can_h2v2_downsample(void)
403 {
404 init_simd();
405
406 /* The code is optimised for these values only */
407 if (BITS_IN_JSAMPLE != 8)
408 return 0;
409 if (DCTSIZE != 8)
410 return 0;
411 if (sizeof(JDIMENSION) != 4)
412 return 0;
413
414 if (simd_support & JSIMD_NEON)
415 return 1;
416
417 return 0;
418 }
419
420 GLOBAL(int)
jsimd_can_h2v1_downsample(void)421 jsimd_can_h2v1_downsample(void)
422 {
423 init_simd();
424
425 /* The code is optimised for these values only */
426 if (BITS_IN_JSAMPLE != 8)
427 return 0;
428 if (DCTSIZE != 8)
429 return 0;
430 if (sizeof(JDIMENSION) != 4)
431 return 0;
432
433 if (simd_support & JSIMD_NEON)
434 return 1;
435
436 return 0;
437 }
438
439 GLOBAL(void)
jsimd_h2v2_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)440 jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
441 JSAMPARRAY input_data, JSAMPARRAY output_data)
442 {
443 jsimd_h2v2_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
444 compptr->v_samp_factor, compptr->width_in_blocks,
445 input_data, output_data);
446 }
447
448 GLOBAL(void)
jsimd_h2v1_downsample(j_compress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY output_data)449 jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
450 JSAMPARRAY input_data, JSAMPARRAY output_data)
451 {
452 jsimd_h2v1_downsample_neon(cinfo->image_width, cinfo->max_v_samp_factor,
453 compptr->v_samp_factor, compptr->width_in_blocks,
454 input_data, output_data);
455 }
456
457 GLOBAL(int)
jsimd_can_h2v2_upsample(void)458 jsimd_can_h2v2_upsample(void)
459 {
460 init_simd();
461
462 /* The code is optimised for these values only */
463 if (BITS_IN_JSAMPLE != 8)
464 return 0;
465 if (sizeof(JDIMENSION) != 4)
466 return 0;
467
468 if (simd_support & JSIMD_NEON)
469 return 1;
470
471 return 0;
472 }
473
474 GLOBAL(int)
jsimd_can_h2v1_upsample(void)475 jsimd_can_h2v1_upsample(void)
476 {
477 init_simd();
478
479 /* The code is optimised for these values only */
480 if (BITS_IN_JSAMPLE != 8)
481 return 0;
482 if (sizeof(JDIMENSION) != 4)
483 return 0;
484 if (simd_support & JSIMD_NEON)
485 return 1;
486
487 return 0;
488 }
489
490 GLOBAL(void)
jsimd_h2v2_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)491 jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
492 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
493 {
494 jsimd_h2v2_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
495 input_data, output_data_ptr);
496 }
497
498 GLOBAL(void)
jsimd_h2v1_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)499 jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
500 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
501 {
502 jsimd_h2v1_upsample_neon(cinfo->max_v_samp_factor, cinfo->output_width,
503 input_data, output_data_ptr);
504 }
505
506 GLOBAL(int)
jsimd_can_h2v2_fancy_upsample(void)507 jsimd_can_h2v2_fancy_upsample(void)
508 {
509 init_simd();
510
511 /* The code is optimised for these values only */
512 if (BITS_IN_JSAMPLE != 8)
513 return 0;
514 if (sizeof(JDIMENSION) != 4)
515 return 0;
516
517 if (simd_support & JSIMD_NEON)
518 return 1;
519
520 return 0;
521 }
522
523 GLOBAL(int)
jsimd_can_h2v1_fancy_upsample(void)524 jsimd_can_h2v1_fancy_upsample(void)
525 {
526 init_simd();
527
528 /* The code is optimised for these values only */
529 if (BITS_IN_JSAMPLE != 8)
530 return 0;
531 if (sizeof(JDIMENSION) != 4)
532 return 0;
533
534 if (simd_support & JSIMD_NEON)
535 return 1;
536
537 return 0;
538 }
539
540 GLOBAL(int)
jsimd_can_h1v2_fancy_upsample(void)541 jsimd_can_h1v2_fancy_upsample(void)
542 {
543 init_simd();
544
545 /* The code is optimised for these values only */
546 if (BITS_IN_JSAMPLE != 8)
547 return 0;
548 if (sizeof(JDIMENSION) != 4)
549 return 0;
550
551 if (simd_support & JSIMD_NEON)
552 return 1;
553
554 return 0;
555 }
556
557 GLOBAL(void)
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)558 jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
559 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
560 {
561 jsimd_h2v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
562 compptr->downsampled_width, input_data,
563 output_data_ptr);
564 }
565
566 GLOBAL(void)
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)567 jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
568 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
569 {
570 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
571 compptr->downsampled_width, input_data,
572 output_data_ptr);
573 }
574
575 GLOBAL(void)
jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,jpeg_component_info * compptr,JSAMPARRAY input_data,JSAMPARRAY * output_data_ptr)576 jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
577 JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr)
578 {
579 jsimd_h1v2_fancy_upsample_neon(cinfo->max_v_samp_factor,
580 compptr->downsampled_width, input_data,
581 output_data_ptr);
582 }
583
584 GLOBAL(int)
jsimd_can_h2v2_merged_upsample(void)585 jsimd_can_h2v2_merged_upsample(void)
586 {
587 init_simd();
588
589 /* The code is optimised for these values only */
590 if (BITS_IN_JSAMPLE != 8)
591 return 0;
592 if (sizeof(JDIMENSION) != 4)
593 return 0;
594
595 if (simd_support & JSIMD_NEON)
596 return 1;
597
598 return 0;
599 }
600
601 GLOBAL(int)
jsimd_can_h2v1_merged_upsample(void)602 jsimd_can_h2v1_merged_upsample(void)
603 {
604 init_simd();
605
606 /* The code is optimised for these values only */
607 if (BITS_IN_JSAMPLE != 8)
608 return 0;
609 if (sizeof(JDIMENSION) != 4)
610 return 0;
611
612 if (simd_support & JSIMD_NEON)
613 return 1;
614
615 return 0;
616 }
617
618 GLOBAL(void)
jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)619 jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
620 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
621 {
622 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
623
624 switch (cinfo->out_color_space) {
625 case JCS_EXT_RGB:
626 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
627 break;
628 case JCS_EXT_RGBX:
629 case JCS_EXT_RGBA:
630 neonfct = jsimd_h2v2_extrgbx_merged_upsample_neon;
631 break;
632 case JCS_EXT_BGR:
633 neonfct = jsimd_h2v2_extbgr_merged_upsample_neon;
634 break;
635 case JCS_EXT_BGRX:
636 case JCS_EXT_BGRA:
637 neonfct = jsimd_h2v2_extbgrx_merged_upsample_neon;
638 break;
639 case JCS_EXT_XBGR:
640 case JCS_EXT_ABGR:
641 neonfct = jsimd_h2v2_extxbgr_merged_upsample_neon;
642 break;
643 case JCS_EXT_XRGB:
644 case JCS_EXT_ARGB:
645 neonfct = jsimd_h2v2_extxrgb_merged_upsample_neon;
646 break;
647 default:
648 neonfct = jsimd_h2v2_extrgb_merged_upsample_neon;
649 break;
650 }
651
652 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
653 }
654
655 GLOBAL(void)
jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,JSAMPIMAGE input_buf,JDIMENSION in_row_group_ctr,JSAMPARRAY output_buf)656 jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
657 JDIMENSION in_row_group_ctr, JSAMPARRAY output_buf)
658 {
659 void (*neonfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY);
660
661 switch (cinfo->out_color_space) {
662 case JCS_EXT_RGB:
663 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
664 break;
665 case JCS_EXT_RGBX:
666 case JCS_EXT_RGBA:
667 neonfct = jsimd_h2v1_extrgbx_merged_upsample_neon;
668 break;
669 case JCS_EXT_BGR:
670 neonfct = jsimd_h2v1_extbgr_merged_upsample_neon;
671 break;
672 case JCS_EXT_BGRX:
673 case JCS_EXT_BGRA:
674 neonfct = jsimd_h2v1_extbgrx_merged_upsample_neon;
675 break;
676 case JCS_EXT_XBGR:
677 case JCS_EXT_ABGR:
678 neonfct = jsimd_h2v1_extxbgr_merged_upsample_neon;
679 break;
680 case JCS_EXT_XRGB:
681 case JCS_EXT_ARGB:
682 neonfct = jsimd_h2v1_extxrgb_merged_upsample_neon;
683 break;
684 default:
685 neonfct = jsimd_h2v1_extrgb_merged_upsample_neon;
686 break;
687 }
688
689 neonfct(cinfo->output_width, input_buf, in_row_group_ctr, output_buf);
690 }
691
692 GLOBAL(int)
jsimd_can_convsamp(void)693 jsimd_can_convsamp(void)
694 {
695 init_simd();
696
697 /* The code is optimised for these values only */
698 if (DCTSIZE != 8)
699 return 0;
700 if (BITS_IN_JSAMPLE != 8)
701 return 0;
702 if (sizeof(JDIMENSION) != 4)
703 return 0;
704 if (sizeof(DCTELEM) != 2)
705 return 0;
706
707 if (simd_support & JSIMD_NEON)
708 return 1;
709
710 return 0;
711 }
712
713 GLOBAL(int)
jsimd_can_convsamp_float(void)714 jsimd_can_convsamp_float(void)
715 {
716 return 0;
717 }
718
719 GLOBAL(void)
jsimd_convsamp(JSAMPARRAY sample_data,JDIMENSION start_col,DCTELEM * workspace)720 jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
721 DCTELEM *workspace)
722 {
723 jsimd_convsamp_neon(sample_data, start_col, workspace);
724 }
725
726 GLOBAL(void)
jsimd_convsamp_float(JSAMPARRAY sample_data,JDIMENSION start_col,FAST_FLOAT * workspace)727 jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
728 FAST_FLOAT *workspace)
729 {
730 }
731
732 GLOBAL(int)
jsimd_can_fdct_islow(void)733 jsimd_can_fdct_islow(void)
734 {
735 init_simd();
736
737 /* The code is optimised for these values only */
738 if (DCTSIZE != 8)
739 return 0;
740 if (sizeof(DCTELEM) != 2)
741 return 0;
742
743 if (simd_support & JSIMD_NEON)
744 return 1;
745
746 return 0;
747 }
748
749 GLOBAL(int)
jsimd_can_fdct_ifast(void)750 jsimd_can_fdct_ifast(void)
751 {
752 init_simd();
753
754 /* The code is optimised for these values only */
755 if (DCTSIZE != 8)
756 return 0;
757 if (sizeof(DCTELEM) != 2)
758 return 0;
759
760 if (simd_support & JSIMD_NEON)
761 return 1;
762
763 return 0;
764 }
765
766 GLOBAL(int)
jsimd_can_fdct_float(void)767 jsimd_can_fdct_float(void)
768 {
769 return 0;
770 }
771
772 GLOBAL(void)
jsimd_fdct_islow(DCTELEM * data)773 jsimd_fdct_islow(DCTELEM *data)
774 {
775 jsimd_fdct_islow_neon(data);
776 }
777
778 GLOBAL(void)
jsimd_fdct_ifast(DCTELEM * data)779 jsimd_fdct_ifast(DCTELEM *data)
780 {
781 jsimd_fdct_ifast_neon(data);
782 }
783
784 GLOBAL(void)
jsimd_fdct_float(FAST_FLOAT * data)785 jsimd_fdct_float(FAST_FLOAT *data)
786 {
787 }
788
789 GLOBAL(int)
jsimd_can_quantize(void)790 jsimd_can_quantize(void)
791 {
792 init_simd();
793
794 /* The code is optimised for these values only */
795 if (DCTSIZE != 8)
796 return 0;
797 if (sizeof(JCOEF) != 2)
798 return 0;
799 if (sizeof(DCTELEM) != 2)
800 return 0;
801
802 if (simd_support & JSIMD_NEON)
803 return 1;
804
805 return 0;
806 }
807
808 GLOBAL(int)
jsimd_can_quantize_float(void)809 jsimd_can_quantize_float(void)
810 {
811 return 0;
812 }
813
814 GLOBAL(void)
jsimd_quantize(JCOEFPTR coef_block,DCTELEM * divisors,DCTELEM * workspace)815 jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
816 {
817 jsimd_quantize_neon(coef_block, divisors, workspace);
818 }
819
820 GLOBAL(void)
jsimd_quantize_float(JCOEFPTR coef_block,FAST_FLOAT * divisors,FAST_FLOAT * workspace)821 jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
822 FAST_FLOAT *workspace)
823 {
824 }
825
826 GLOBAL(int)
jsimd_can_idct_2x2(void)827 jsimd_can_idct_2x2(void)
828 {
829 init_simd();
830
831 /* The code is optimised for these values only */
832 if (DCTSIZE != 8)
833 return 0;
834 if (sizeof(JCOEF) != 2)
835 return 0;
836 if (BITS_IN_JSAMPLE != 8)
837 return 0;
838 if (sizeof(JDIMENSION) != 4)
839 return 0;
840 if (sizeof(ISLOW_MULT_TYPE) != 2)
841 return 0;
842
843 if (simd_support & JSIMD_NEON)
844 return 1;
845
846 return 0;
847 }
848
849 GLOBAL(int)
jsimd_can_idct_4x4(void)850 jsimd_can_idct_4x4(void)
851 {
852 init_simd();
853
854 /* The code is optimised for these values only */
855 if (DCTSIZE != 8)
856 return 0;
857 if (sizeof(JCOEF) != 2)
858 return 0;
859 if (BITS_IN_JSAMPLE != 8)
860 return 0;
861 if (sizeof(JDIMENSION) != 4)
862 return 0;
863 if (sizeof(ISLOW_MULT_TYPE) != 2)
864 return 0;
865
866 if (simd_support & JSIMD_NEON)
867 return 1;
868
869 return 0;
870 }
871
872 GLOBAL(void)
jsimd_idct_2x2(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)873 jsimd_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
874 JCOEFPTR coef_block, JSAMPARRAY output_buf,
875 JDIMENSION output_col)
876 {
877 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
878 }
879
880 GLOBAL(void)
jsimd_idct_4x4(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)881 jsimd_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
882 JCOEFPTR coef_block, JSAMPARRAY output_buf,
883 JDIMENSION output_col)
884 {
885 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
886 }
887
888 GLOBAL(int)
jsimd_can_idct_islow(void)889 jsimd_can_idct_islow(void)
890 {
891 init_simd();
892
893 /* The code is optimised for these values only */
894 if (DCTSIZE != 8)
895 return 0;
896 if (sizeof(JCOEF) != 2)
897 return 0;
898 if (BITS_IN_JSAMPLE != 8)
899 return 0;
900 if (sizeof(JDIMENSION) != 4)
901 return 0;
902 if (sizeof(ISLOW_MULT_TYPE) != 2)
903 return 0;
904
905 if (simd_support & JSIMD_NEON)
906 return 1;
907
908 return 0;
909 }
910
911 GLOBAL(int)
jsimd_can_idct_ifast(void)912 jsimd_can_idct_ifast(void)
913 {
914 init_simd();
915
916 /* The code is optimised for these values only */
917 if (DCTSIZE != 8)
918 return 0;
919 if (sizeof(JCOEF) != 2)
920 return 0;
921 if (BITS_IN_JSAMPLE != 8)
922 return 0;
923 if (sizeof(JDIMENSION) != 4)
924 return 0;
925 if (sizeof(IFAST_MULT_TYPE) != 2)
926 return 0;
927 if (IFAST_SCALE_BITS != 2)
928 return 0;
929
930 if (simd_support & JSIMD_NEON)
931 return 1;
932
933 return 0;
934 }
935
936 GLOBAL(int)
jsimd_can_idct_float(void)937 jsimd_can_idct_float(void)
938 {
939 return 0;
940 }
941
942 GLOBAL(void)
jsimd_idct_islow(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)943 jsimd_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
944 JCOEFPTR coef_block, JSAMPARRAY output_buf,
945 JDIMENSION output_col)
946 {
947 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf,
948 output_col);
949 }
950
951 GLOBAL(void)
jsimd_idct_ifast(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)952 jsimd_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
953 JCOEFPTR coef_block, JSAMPARRAY output_buf,
954 JDIMENSION output_col)
955 {
956 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf,
957 output_col);
958 }
959
960 GLOBAL(void)
jsimd_idct_float(j_decompress_ptr cinfo,jpeg_component_info * compptr,JCOEFPTR coef_block,JSAMPARRAY output_buf,JDIMENSION output_col)961 jsimd_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
962 JCOEFPTR coef_block, JSAMPARRAY output_buf,
963 JDIMENSION output_col)
964 {
965 }
966
967 GLOBAL(int)
jsimd_can_huff_encode_one_block(void)968 jsimd_can_huff_encode_one_block(void)
969 {
970 init_simd();
971
972 if (DCTSIZE != 8)
973 return 0;
974 if (sizeof(JCOEF) != 2)
975 return 0;
976
977 if (simd_support & JSIMD_NEON && simd_huffman)
978 return 1;
979
980 return 0;
981 }
982
983 GLOBAL(JOCTET *)
jsimd_huff_encode_one_block(void * state,JOCTET * buffer,JCOEFPTR block,int last_dc_val,c_derived_tbl * dctbl,c_derived_tbl * actbl)984 jsimd_huff_encode_one_block(void *state, JOCTET *buffer, JCOEFPTR block,
985 int last_dc_val, c_derived_tbl *dctbl,
986 c_derived_tbl *actbl)
987 {
988 #ifndef NEON_INTRINSICS
989 if (simd_features & JSIMD_FASTTBL)
990 #endif
991 return jsimd_huff_encode_one_block_neon(state, buffer, block, last_dc_val,
992 dctbl, actbl);
993 #ifndef NEON_INTRINSICS
994 else
995 return jsimd_huff_encode_one_block_neon_slowtbl(state, buffer, block,
996 last_dc_val, dctbl, actbl);
997 #endif
998 }
999
1000 GLOBAL(int)
jsimd_can_encode_mcu_AC_first_prepare(void)1001 jsimd_can_encode_mcu_AC_first_prepare(void)
1002 {
1003 init_simd();
1004
1005 if (DCTSIZE != 8)
1006 return 0;
1007 if (sizeof(JCOEF) != 2)
1008 return 0;
1009 if (SIZEOF_SIZE_T != 8)
1010 return 0;
1011
1012 if (simd_support & JSIMD_NEON)
1013 return 1;
1014
1015 return 0;
1016 }
1017
1018 GLOBAL(void)
jsimd_encode_mcu_AC_first_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * values,size_t * zerobits)1019 jsimd_encode_mcu_AC_first_prepare(const JCOEF *block,
1020 const int *jpeg_natural_order_start, int Sl,
1021 int Al, UJCOEF *values, size_t *zerobits)
1022 {
1023 jsimd_encode_mcu_AC_first_prepare_neon(block, jpeg_natural_order_start,
1024 Sl, Al, values, zerobits);
1025 }
1026
1027 GLOBAL(int)
jsimd_can_encode_mcu_AC_refine_prepare(void)1028 jsimd_can_encode_mcu_AC_refine_prepare(void)
1029 {
1030 init_simd();
1031
1032 if (DCTSIZE != 8)
1033 return 0;
1034 if (sizeof(JCOEF) != 2)
1035 return 0;
1036 if (SIZEOF_SIZE_T != 8)
1037 return 0;
1038
1039 if (simd_support & JSIMD_NEON)
1040 return 1;
1041
1042 return 0;
1043 }
1044
1045 GLOBAL(int)
jsimd_encode_mcu_AC_refine_prepare(const JCOEF * block,const int * jpeg_natural_order_start,int Sl,int Al,UJCOEF * absvalues,size_t * bits)1046 jsimd_encode_mcu_AC_refine_prepare(const JCOEF *block,
1047 const int *jpeg_natural_order_start, int Sl,
1048 int Al, UJCOEF *absvalues, size_t *bits)
1049 {
1050 return jsimd_encode_mcu_AC_refine_prepare_neon(block,
1051 jpeg_natural_order_start,
1052 Sl, Al, absvalues, bits);
1053 }
1054