xref: /aosp_15_r20/external/libjpeg-turbo/simd/arm/jchuff.h (revision dfc6aa5c1cfd4bc4e2018dc74aa96e29ee49c6da)
1 /*
2  * jchuff.h
3  *
4  * This file was part of the Independent JPEG Group's software:
5  * Copyright (C) 1991-1997, Thomas G. Lane.
6  * libjpeg-turbo Modifications:
7  * Copyright (C) 2009, 2018, 2021, D. R. Commander.
8  * Copyright (C) 2018, Matthias Räncker.
9  * Copyright (C) 2020-2021, Arm Limited.
10  * For conditions of distribution and use, see the accompanying README.ijg
11  * file.
12  */
13 
14 /* Expanded entropy encoder object for Huffman encoding.
15  *
16  * The savable_state subrecord contains fields that change within an MCU,
17  * but must not be updated permanently until we complete the MCU.
18  */
19 
20 #if defined(__aarch64__) || defined(_M_ARM64)
21 #define BIT_BUF_SIZE  64
22 #else
23 #define BIT_BUF_SIZE  32
24 #endif
25 
26 typedef struct {
27   size_t put_buffer;                    /* current bit accumulation buffer */
28   int free_bits;                        /* # of bits available in it */
29   int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
30 } savable_state;
31 
32 typedef struct {
33   JOCTET *next_output_byte;     /* => next byte to write in buffer */
34   size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
35   savable_state cur;            /* Current bit buffer & DC state */
36   j_compress_ptr cinfo;         /* dump_buffer needs access to this */
37   int simd;
38 } working_state;
39 
40 /* Outputting bits to the file */
41 
42 /* Output byte b and, speculatively, an additional 0 byte. 0xFF must be encoded
43  * as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the byte is
44  * 0xFF.  Otherwise, the output buffer pointer is advanced by 1, and the
45  * speculative 0 byte will be overwritten by the next byte.
46  */
47 #define EMIT_BYTE(b) { \
48   buffer[0] = (JOCTET)(b); \
49   buffer[1] = 0; \
50   buffer -= -2 + ((JOCTET)(b) < 0xFF); \
51 }
52 
53 /* Output the entire bit buffer.  If there are no 0xFF bytes in it, then write
54  * directly to the output buffer.  Otherwise, use the EMIT_BYTE() macro to
55  * encode 0xFF as 0xFF 0x00.
56  */
57 #if defined(__aarch64__) || defined(_M_ARM64)
58 
59 #define FLUSH() { \
60   if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \
61     EMIT_BYTE(put_buffer >> 56) \
62     EMIT_BYTE(put_buffer >> 48) \
63     EMIT_BYTE(put_buffer >> 40) \
64     EMIT_BYTE(put_buffer >> 32) \
65     EMIT_BYTE(put_buffer >> 24) \
66     EMIT_BYTE(put_buffer >> 16) \
67     EMIT_BYTE(put_buffer >>  8) \
68     EMIT_BYTE(put_buffer      ) \
69   } else { \
70     *((uint64_t *)buffer) = BUILTIN_BSWAP64(put_buffer); \
71     buffer += 8; \
72   } \
73 }
74 
75 #else
76 
77 #if defined(_MSC_VER) && !defined(__clang__)
78 #define SPLAT() { \
79   buffer[0] = (JOCTET)(put_buffer >> 24); \
80   buffer[1] = (JOCTET)(put_buffer >> 16); \
81   buffer[2] = (JOCTET)(put_buffer >>  8); \
82   buffer[3] = (JOCTET)(put_buffer      ); \
83   buffer += 4; \
84 }
85 #else
86 #define SPLAT() { \
87   put_buffer = __builtin_bswap32(put_buffer); \
88   __asm__("str %1, [%0], #4" : "+r" (buffer) : "r" (put_buffer)); \
89 }
90 #endif
91 
92 #define FLUSH() { \
93   if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \
94     EMIT_BYTE(put_buffer >> 24) \
95     EMIT_BYTE(put_buffer >> 16) \
96     EMIT_BYTE(put_buffer >>  8) \
97     EMIT_BYTE(put_buffer      ) \
98   } else { \
99     SPLAT(); \
100   } \
101 }
102 
103 #endif
104 
105 /* Fill the bit buffer to capacity with the leading bits from code, then output
106  * the bit buffer and put the remaining bits from code into the bit buffer.
107  */
108 #define PUT_AND_FLUSH(code, size) { \
109   put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \
110   FLUSH() \
111   free_bits += BIT_BUF_SIZE; \
112   put_buffer = code; \
113 }
114 
115 /* Insert code into the bit buffer and output the bit buffer if needed.
116  * NOTE: We can't flush with free_bits == 0, since the left shift in
117  * PUT_AND_FLUSH() would have undefined behavior.
118  */
119 #define PUT_BITS(code, size) { \
120   free_bits -= size; \
121   if (free_bits < 0) \
122     PUT_AND_FLUSH(code, size) \
123   else \
124     put_buffer = (put_buffer << size) | code; \
125 }
126 
127 #define PUT_CODE(code, size, diff) { \
128   diff |= code << nbits; \
129   nbits += size; \
130   PUT_BITS(diff, nbits) \
131 }
132