xref: /aosp_15_r20/external/lzma/C/XzCrc64Opt.c (revision f6dc9357d832569d4d1f5d24eacdb3935a1ae8e6)
1 /* XzCrc64Opt.c -- CRC64 calculation (optimized functions)
2 2023-12-08 : Igor Pavlov : Public domain */
3 
4 #include "Precomp.h"
5 
6 #include "CpuArch.h"
7 
8 #if !defined(Z7_CRC64_NUM_TABLES) || Z7_CRC64_NUM_TABLES > 1
9 
10 // for debug only : define Z7_CRC64_DEBUG_BE to test big-endian code in little-endian cpu
11 // #define Z7_CRC64_DEBUG_BE
12 #ifdef Z7_CRC64_DEBUG_BE
13 #undef MY_CPU_LE
14 #define MY_CPU_BE
15 #endif
16 
17 #if defined(MY_CPU_64BIT)
18 #define Z7_CRC64_USE_64BIT
19 #endif
20 
21 // the value Z7_CRC64_NUM_TABLES_USE must be defined to same value as in XzCrc64.c
22 #ifdef Z7_CRC64_NUM_TABLES
23 #define Z7_CRC64_NUM_TABLES_USE  Z7_CRC64_NUM_TABLES
24 #else
25 #define Z7_CRC64_NUM_TABLES_USE  12
26 #endif
27 
28 #if Z7_CRC64_NUM_TABLES_USE % 4 || \
29     Z7_CRC64_NUM_TABLES_USE < 4 || \
30     Z7_CRC64_NUM_TABLES_USE > 4 * 4
31   #error Stop_Compiling_Bad_CRC64_NUM_TABLES
32 #endif
33 
34 
35 #ifndef MY_CPU_BE
36 
37 #define CRC64_UPDATE_BYTE_2(crc, b)  (table[((crc) ^ (b)) & 0xFF] ^ ((crc) >> 8))
38 
39 #if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
40 
41 #define Q64LE(n, d) \
42     ( (table + ((n) * 8 + 7) * 0x100)[((d)         ) & 0xFF] \
43     ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 1 * 8) & 0xFF] \
44     ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 2 * 8) & 0xFF] \
45     ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 3 * 8) & 0xFF] \
46     ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 4 * 8) & 0xFF] \
47     ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 5 * 8) & 0xFF] \
48     ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 6 * 8) & 0xFF] \
49     ^ (table + ((n) * 8 + 0) * 0x100)[((d) >> 7 * 8)] )
50 
51 #define R64(a)  *((const UInt64 *)(const void *)p + (a))
52 
53 #else
54 
55 #define Q32LE(n, d) \
56     ( (table + ((n) * 4 + 3) * 0x100)[((d)         ) & 0xFF] \
57     ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 1 * 8) & 0xFF] \
58     ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 2 * 8) & 0xFF] \
59     ^ (table + ((n) * 4 + 0) * 0x100)[((d) >> 3 * 8)] )
60 
61 #define R32(a)  *((const UInt32 *)(const void *)p + (a))
62 
63 #endif
64 
65 
66 #define CRC64_FUNC_PRE_LE2(step) \
67 UInt64 Z7_FASTCALL XzCrc64UpdateT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
68 
69 #define CRC64_FUNC_PRE_LE(step)   \
70         CRC64_FUNC_PRE_LE2(step); \
71         CRC64_FUNC_PRE_LE2(step)
72 
CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE)73 CRC64_FUNC_PRE_LE(Z7_CRC64_NUM_TABLES_USE)
74 {
75   const Byte *p = (const Byte *)data;
76   const Byte *lim;
77   for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
78     v = CRC64_UPDATE_BYTE_2(v, *p);
79   lim = p + size;
80   if (size >= Z7_CRC64_NUM_TABLES_USE)
81   {
82     lim -= Z7_CRC64_NUM_TABLES_USE;
83     do
84     {
85 #if Z7_CRC64_NUM_TABLES_USE == 4
86       const UInt32 d = (UInt32)v ^ R32(0);
87       v = (v >> 32) ^ Q32LE(0, d);
88 #elif Z7_CRC64_NUM_TABLES_USE == 8
89 #ifdef Z7_CRC64_USE_64BIT
90       v ^= R64(0);
91       v = Q64LE(0, v);
92 #else
93       UInt32 v0, v1;
94       v0 = (UInt32)v         ^ R32(0);
95       v1 = (UInt32)(v >> 32) ^ R32(1);
96       v = Q32LE(1, v0) ^ Q32LE(0, v1);
97 #endif
98 #elif Z7_CRC64_NUM_TABLES_USE == 12
99       UInt32 w;
100       UInt32 v0, v1;
101       v0 = (UInt32)v         ^ R32(0);
102       v1 = (UInt32)(v >> 32) ^ R32(1);
103       w = R32(2);
104       v = Q32LE(0, w);
105       v ^= Q32LE(2, v0) ^ Q32LE(1, v1);
106 #elif Z7_CRC64_NUM_TABLES_USE == 16
107 #ifdef Z7_CRC64_USE_64BIT
108       UInt64 w;
109       UInt64 x;
110       w  = R64(1);      x = Q64LE(0, w);
111       v ^= R64(0);  v = x ^ Q64LE(1, v);
112 #else
113       UInt32 v0, v1;
114       UInt32 r0, r1;
115       v0 = (UInt32)v         ^ R32(0);
116       v1 = (UInt32)(v >> 32) ^ R32(1);
117       r0 =                     R32(2);
118       r1 =                     R32(3);
119       v  = Q32LE(1, r0) ^ Q32LE(0, r1);
120       v ^= Q32LE(3, v0) ^ Q32LE(2, v1);
121 #endif
122 #else
123 #error Stop_Compiling_Bad_CRC64_NUM_TABLES
124 #endif
125       p += Z7_CRC64_NUM_TABLES_USE;
126     }
127     while (p <= lim);
128     lim += Z7_CRC64_NUM_TABLES_USE;
129   }
130   for (; p < lim; p++)
131     v = CRC64_UPDATE_BYTE_2(v, *p);
132   return v;
133 }
134 
135 #undef CRC64_UPDATE_BYTE_2
136 #undef R32
137 #undef R64
138 #undef Q32LE
139 #undef Q64LE
140 #undef CRC64_FUNC_PRE_LE
141 #undef CRC64_FUNC_PRE_LE2
142 
143 #endif
144 
145 
146 
147 
148 #ifndef MY_CPU_LE
149 
150 #define CRC64_UPDATE_BYTE_2_BE(crc, b)  (table[((crc) >> 56) ^ (b)] ^ ((crc) << 8))
151 
152 #if defined(Z7_CRC64_USE_64BIT) && (Z7_CRC64_NUM_TABLES_USE % 8 == 0)
153 
154 #define Q64BE(n, d) \
155     ( (table + ((n) * 8 + 0) * 0x100)[(Byte)(d)] \
156     ^ (table + ((n) * 8 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
157     ^ (table + ((n) * 8 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
158     ^ (table + ((n) * 8 + 3) * 0x100)[((d) >> 3 * 8) & 0xFF] \
159     ^ (table + ((n) * 8 + 4) * 0x100)[((d) >> 4 * 8) & 0xFF] \
160     ^ (table + ((n) * 8 + 5) * 0x100)[((d) >> 5 * 8) & 0xFF] \
161     ^ (table + ((n) * 8 + 6) * 0x100)[((d) >> 6 * 8) & 0xFF] \
162     ^ (table + ((n) * 8 + 7) * 0x100)[((d) >> 7 * 8)] )
163 
164 #ifdef Z7_CRC64_DEBUG_BE
165   #define R64BE(a)  GetBe64a((const UInt64 *)(const void *)p + (a))
166 #else
167   #define R64BE(a)         *((const UInt64 *)(const void *)p + (a))
168 #endif
169 
170 #else
171 
172 #define Q32BE(n, d) \
173     ( (table + ((n) * 4 + 0) * 0x100)[(Byte)(d)] \
174     ^ (table + ((n) * 4 + 1) * 0x100)[((d) >> 1 * 8) & 0xFF] \
175     ^ (table + ((n) * 4 + 2) * 0x100)[((d) >> 2 * 8) & 0xFF] \
176     ^ (table + ((n) * 4 + 3) * 0x100)[((d) >> 3 * 8)] )
177 
178 #ifdef Z7_CRC64_DEBUG_BE
179   #define R32BE(a)  GetBe32a((const UInt32 *)(const void *)p + (a))
180 #else
181   #define R32BE(a)         *((const UInt32 *)(const void *)p + (a))
182 #endif
183 
184 #endif
185 
186 #define CRC64_FUNC_PRE_BE2(step) \
187 UInt64 Z7_FASTCALL XzCrc64UpdateBeT ## step (UInt64 v, const void *data, size_t size, const UInt64 *table)
188 
189 #define CRC64_FUNC_PRE_BE(step)   \
190         CRC64_FUNC_PRE_BE2(step); \
191         CRC64_FUNC_PRE_BE2(step)
192 
CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)193 CRC64_FUNC_PRE_BE(Z7_CRC64_NUM_TABLES_USE)
194 {
195   const Byte *p = (const Byte *)data;
196   const Byte *lim;
197   v = Z7_BSWAP64(v);
198   for (; size && ((unsigned)(ptrdiff_t)p & (7 - (Z7_CRC64_NUM_TABLES_USE & 4))) != 0; size--, p++)
199     v = CRC64_UPDATE_BYTE_2_BE(v, *p);
200   lim = p + size;
201   if (size >= Z7_CRC64_NUM_TABLES_USE)
202   {
203     lim -= Z7_CRC64_NUM_TABLES_USE;
204     do
205     {
206 #if   Z7_CRC64_NUM_TABLES_USE == 4
207       const UInt32 d = (UInt32)(v >> 32) ^ R32BE(0);
208       v = (v << 32) ^ Q32BE(0, d);
209 #elif Z7_CRC64_NUM_TABLES_USE == 12
210       const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
211       const UInt32 d0 = (UInt32)(v      ) ^ R32BE(1);
212       const UInt32 w =                      R32BE(2);
213       v  = Q32BE(0, w);
214       v ^= Q32BE(2, d1) ^ Q32BE(1, d0);
215 
216 #elif Z7_CRC64_NUM_TABLES_USE == 8
217   #ifdef Z7_CRC64_USE_64BIT
218       v ^= R64BE(0);
219       v  = Q64BE(0, v);
220   #else
221       const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
222       const UInt32 d0 = (UInt32)(v      ) ^ R32BE(1);
223       v = Q32BE(1, d1) ^ Q32BE(0, d0);
224   #endif
225 #elif Z7_CRC64_NUM_TABLES_USE == 16
226   #ifdef Z7_CRC64_USE_64BIT
227       const UInt64 w = R64BE(1);
228       v ^= R64BE(0);
229       v  = Q64BE(0, w) ^ Q64BE(1, v);
230   #else
231       const UInt32 d1 = (UInt32)(v >> 32) ^ R32BE(0);
232       const UInt32 d0 = (UInt32)(v      ) ^ R32BE(1);
233       const UInt32 w1 =                     R32BE(2);
234       const UInt32 w0 =                     R32BE(3);
235       v  = Q32BE(1, w1) ^ Q32BE(0, w0);
236       v ^= Q32BE(3, d1) ^ Q32BE(2, d0);
237   #endif
238 #elif
239 #error Stop_Compiling_Bad_CRC64_NUM_TABLES
240 #endif
241       p += Z7_CRC64_NUM_TABLES_USE;
242     }
243     while (p <= lim);
244     lim += Z7_CRC64_NUM_TABLES_USE;
245   }
246   for (; p < lim; p++)
247     v = CRC64_UPDATE_BYTE_2_BE(v, *p);
248   return Z7_BSWAP64(v);
249 }
250 
251 #undef CRC64_UPDATE_BYTE_2_BE
252 #undef R32BE
253 #undef R64BE
254 #undef Q32BE
255 #undef Q64BE
256 #undef CRC64_FUNC_PRE_BE
257 #undef CRC64_FUNC_PRE_BE2
258 
259 #endif
260 #undef Z7_CRC64_NUM_TABLES_USE
261 #endif
262