1 /* Copyright (c) 2017-2022 Hans-Kristian Arntzen
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be
12 * included in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include <assert.h>
24 #include <cstdint>
25 #include <cstring>
26 #include <mutex>
27 #include <unordered_map>
28 #include <vector>
29
30 #include "texcompress_astc_luts.h"
31
32 namespace Granite
33 {
build_astc_unquant_weight_lut(uint8_t * lut,size_t range,const ASTCQuantizationMode & mode)34 static void build_astc_unquant_weight_lut(uint8_t *lut, size_t range, const ASTCQuantizationMode &mode)
35 {
36 for (size_t i = 0; i < range; i++)
37 {
38 auto &v = lut[i];
39
40 if (!mode.quints && !mode.trits)
41 {
42 switch (mode.bits)
43 {
44 case 1:
45 v = i * 63;
46 break;
47
48 case 2:
49 v = i * 0x15;
50 break;
51
52 case 3:
53 v = i * 9;
54 break;
55
56 case 4:
57 v = (i << 2) | (i >> 2);
58 break;
59
60 case 5:
61 v = (i << 1) | (i >> 4);
62 break;
63
64 default:
65 v = 0;
66 break;
67 }
68 }
69 else if (mode.bits == 0)
70 {
71 if (mode.trits)
72 v = 32 * i;
73 else
74 v = 16 * i;
75 }
76 else
77 {
78 unsigned b = (i >> 1) & 1;
79 unsigned c = (i >> 2) & 1;
80 unsigned A, B, C, D;
81
82 A = 0x7f * (i & 1);
83 D = i >> mode.bits;
84 B = 0;
85
86 if (mode.trits)
87 {
88 static const unsigned Cs[3] = { 50, 23, 11 };
89 C = Cs[mode.bits - 1];
90 if (mode.bits == 2)
91 B = 0x45 * b;
92 else if (mode.bits == 3)
93 B = 0x21 * b + 0x42 * c;
94 }
95 else
96 {
97 static const unsigned Cs[2] = { 28, 13 };
98 C = Cs[mode.bits - 1];
99 if (mode.bits == 2)
100 B = 0x42 * b;
101 }
102
103 unsigned unq = D * C + B;
104 unq ^= A;
105 unq = (A & 0x20) | (unq >> 2);
106 v = unq;
107 }
108
109 // Expand [0, 63] to [0, 64].
110 if (mode.bits != 0 && v > 32)
111 v++;
112 }
113 }
114
build_astc_unquant_endpoint_lut(uint8_t * lut,size_t range,const ASTCQuantizationMode & mode)115 static void build_astc_unquant_endpoint_lut(uint8_t *lut, size_t range, const ASTCQuantizationMode &mode)
116 {
117 for (size_t i = 0; i < range; i++)
118 {
119 auto &v = lut[i];
120
121 if (!mode.quints && !mode.trits)
122 {
123 // Bit-replication.
124 switch (mode.bits)
125 {
126 case 1:
127 v = i * 0xff;
128 break;
129
130 case 2:
131 v = i * 0x55;
132 break;
133
134 case 3:
135 v = (i << 5) | (i << 2) | (i >> 1);
136 break;
137
138 case 4:
139 v = i * 0x11;
140 break;
141
142 case 5:
143 v = (i << 3) | (i >> 2);
144 break;
145
146 case 6:
147 v = (i << 2) | (i >> 4);
148 break;
149
150 case 7:
151 v = (i << 1) | (i >> 6);
152 break;
153
154 default:
155 v = i;
156 break;
157 }
158 }
159 else
160 {
161 unsigned A, B, C, D;
162 unsigned b = (i >> 1) & 1;
163 unsigned c = (i >> 2) & 1;
164 unsigned d = (i >> 3) & 1;
165 unsigned e = (i >> 4) & 1;
166 unsigned f = (i >> 5) & 1;
167
168 B = 0;
169 D = i >> mode.bits;
170 A = (i & 1) * 0x1ff;
171
172 if (mode.trits)
173 {
174 static const unsigned Cs[6] = { 204, 93, 44, 22, 11, 5 };
175 C = Cs[mode.bits - 1];
176
177 switch (mode.bits)
178 {
179 case 2:
180 B = b * 0x116;
181 break;
182
183 case 3:
184 B = b * 0x85 + c * 0x10a;
185 break;
186
187 case 4:
188 B = b * 0x41 + c * 0x82 + d * 0x104;
189 break;
190
191 case 5:
192 B = b * 0x20 + c * 0x40 + d * 0x81 + e * 0x102;
193 break;
194
195 case 6:
196 B = b * 0x10 + c * 0x20 + d * 0x40 + e * 0x80 + f * 0x101;
197 break;
198 }
199 }
200 else
201 {
202 static const unsigned Cs[5] = { 113, 54, 26, 13, 6 };
203 C = Cs[mode.bits - 1];
204
205 switch (mode.bits)
206 {
207 case 2:
208 B = b * 0x10c;
209 break;
210
211 case 3:
212 B = b * 0x82 + c * 0x105;
213 break;
214
215 case 4:
216 B = b * 0x40 + c * 0x81 + d * 0x102;
217 break;
218
219 case 5:
220 B = b * 0x20 + c * 0x40 + d * 0x80 + e * 0x101;
221 break;
222 }
223 }
224
225 unsigned unq = D * C + B;
226 unq ^= A;
227 unq = (A & 0x80) | (unq >> 2);
228 v = uint8_t(unq);
229 }
230 }
231 }
232
astc_value_range(const ASTCQuantizationMode & mode)233 static unsigned astc_value_range(const ASTCQuantizationMode &mode)
234 {
235 unsigned value_range = 1u << mode.bits;
236 if (mode.trits)
237 value_range *= 3;
238 if (mode.quints)
239 value_range *= 5;
240
241 if (value_range == 1)
242 value_range = 0;
243 return value_range;
244 }
245
astc_hash52(uint32_t p)246 static uint32_t astc_hash52(uint32_t p)
247 {
248 p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
249 p ^= p >> 5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
250 p ^= p << 6; p ^= p >> 17;
251 return p;
252 }
253
254 // Copy-paste from spec.
astc_select_partition(int seed,int x,int y,int z,int partitioncount,bool small_block)255 static int astc_select_partition(int seed, int x, int y, int z, int partitioncount, bool small_block)
256 {
257 if (small_block)
258 {
259 x <<= 1;
260 y <<= 1;
261 z <<= 1;
262 }
263
264 seed += (partitioncount - 1) * 1024;
265 uint32_t rnum = astc_hash52(seed);
266 uint8_t seed1 = rnum & 0xF;
267 uint8_t seed2 = (rnum >> 4) & 0xF;
268 uint8_t seed3 = (rnum >> 8) & 0xF;
269 uint8_t seed4 = (rnum >> 12) & 0xF;
270 uint8_t seed5 = (rnum >> 16) & 0xF;
271 uint8_t seed6 = (rnum >> 20) & 0xF;
272 uint8_t seed7 = (rnum >> 24) & 0xF;
273 uint8_t seed8 = (rnum >> 28) & 0xF;
274 uint8_t seed9 = (rnum >> 18) & 0xF;
275 uint8_t seed10 = (rnum >> 22) & 0xF;
276 uint8_t seed11 = (rnum >> 26) & 0xF;
277 uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
278
279 seed1 *= seed1; seed2 *= seed2; seed3 *= seed3; seed4 *= seed4;
280 seed5 *= seed5; seed6 *= seed6; seed7 *= seed7; seed8 *= seed8;
281 seed9 *= seed9; seed10 *= seed10; seed11 *= seed11; seed12 *= seed12;
282
283 int sh1, sh2, sh3;
284 if (seed & 1)
285 {
286 sh1 = seed & 2 ? 4 : 5;
287 sh2 = partitioncount == 3 ? 6 : 5;
288 }
289 else
290 {
291 sh1 = partitioncount == 3 ? 6 : 5;
292 sh2 = seed & 2 ? 4 : 5;
293 }
294 sh3 = (seed & 0x10) ? sh1 : sh2;
295
296 seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
297 seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
298 seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
299
300 int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
301 int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
302 int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
303 int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
304
305 a &= 0x3f; b &= 0x3f; c &= 0x3f; d &= 0x3f;
306
307 if (partitioncount < 4)
308 d = 0;
309 if (partitioncount < 3)
310 c = 0;
311
312 if (a >= b && a >= c && a >= d)
313 return 0;
314 else if (b >= c && b >= d)
315 return 1;
316 else if (c >= d)
317 return 2;
318 else
319 return 3;
320 }
321
PartitionTable(unsigned block_width,unsigned block_height)322 ASTCLutHolder::PartitionTable::PartitionTable(unsigned block_width, unsigned block_height)
323 {
324 bool small_block = (block_width * block_height) < 31;
325
326 lut_width = block_width * 32;
327 lut_height = block_height * 32;
328 lut_buffer.resize(lut_width * lut_height);
329
330 for (unsigned seed_y = 0; seed_y < 32; seed_y++)
331 {
332 for (unsigned seed_x = 0; seed_x < 32; seed_x++)
333 {
334 unsigned seed = seed_y * 32 + seed_x;
335 for (unsigned block_y = 0; block_y < block_height; block_y++)
336 {
337 for (unsigned block_x = 0; block_x < block_width; block_x++)
338 {
339 int part2 = astc_select_partition(seed, block_x, block_y, 0, 2, small_block);
340 int part3 = astc_select_partition(seed, block_x, block_y, 0, 3, small_block);
341 int part4 = astc_select_partition(seed, block_x, block_y, 0, 4, small_block);
342 lut_buffer[(seed_y * block_height + block_y) * lut_width + (seed_x * block_width + block_x)] =
343 (part2 << 0) | (part3 << 2) | (part4 << 4);
344 }
345 }
346 }
347 }
348 }
349
get_partition_table(unsigned width,unsigned height)350 ASTCLutHolder::PartitionTable &ASTCLutHolder::get_partition_table(unsigned width, unsigned height)
351 {
352 std::lock_guard<std::mutex> holder{table_lock};
353 auto itr = tables.find(width * 16 + height);
354 if (itr != tables.end())
355 {
356 return itr->second;
357 }
358 else
359 {
360 auto &t = tables[width * 16 + height];
361 t = { width, height };
362 return t;
363 }
364 }
365
get_astc_luts()366 ASTCLutHolder &get_astc_luts()
367 {
368 static ASTCLutHolder holder;
369 return holder;
370 }
371
ASTCLutHolder()372 ASTCLutHolder::ASTCLutHolder()
373 {
374 init_color_endpoint();
375 init_weight_luts();
376 init_trits_quints();
377 }
378
init_color_endpoint()379 void ASTCLutHolder::init_color_endpoint()
380 {
381 auto &unquant_lut = color_endpoint.unquant_lut;
382
383 for (size_t i = 0; i < astc_num_quantization_modes; i++)
384 {
385 auto value_range = astc_value_range(astc_quantization_modes[i]);
386 color_endpoint.unquant_lut_offsets[i] = color_endpoint.unquant_offset;
387 build_astc_unquant_endpoint_lut(unquant_lut + color_endpoint.unquant_offset, value_range, astc_quantization_modes[i]);
388 color_endpoint.unquant_offset += value_range;
389 }
390
391 auto &lut = color_endpoint.lut;
392
393 // We can have a maximum of 9 endpoint pairs, i.e. 18 endpoint values in total.
394 for (unsigned pairs_minus_1 = 0; pairs_minus_1 < 9; pairs_minus_1++)
395 {
396 for (unsigned remaining = 0; remaining < 128; remaining++)
397 {
398 bool found_mode = false;
399 for (auto &mode : astc_quantization_modes)
400 {
401 unsigned num_values = (pairs_minus_1 + 1) * 2;
402 unsigned total_bits = mode.bits * num_values +
403 (mode.quints * 7 * num_values + 2) / 3 +
404 (mode.trits * 8 * num_values + 4) / 5;
405
406 if (total_bits <= remaining)
407 {
408 found_mode = true;
409 lut[pairs_minus_1][remaining][0] = mode.bits;
410 lut[pairs_minus_1][remaining][1] = mode.trits;
411 lut[pairs_minus_1][remaining][2] = mode.quints;
412 lut[pairs_minus_1][remaining][3] = color_endpoint.unquant_lut_offsets[&mode - astc_quantization_modes];
413 break;
414 }
415 }
416
417 if (!found_mode)
418 memset(lut[pairs_minus_1][remaining], 0, sizeof(lut[pairs_minus_1][remaining]));
419 }
420 }
421 }
422
init_weight_luts()423 void ASTCLutHolder::init_weight_luts()
424 {
425 auto &lut = weights.lut;
426 auto &unquant_lut = weights.unquant_lut;
427 auto &unquant_offset = weights.unquant_offset;
428
429 for (size_t i = 0; i < astc_num_weight_modes; i++)
430 {
431 auto value_range = astc_value_range(astc_weight_modes[i]);
432 lut[i][0] = astc_weight_modes[i].bits;
433 lut[i][1] = astc_weight_modes[i].trits;
434 lut[i][2] = astc_weight_modes[i].quints;
435 lut[i][3] = unquant_offset;
436 build_astc_unquant_weight_lut(unquant_lut + unquant_offset, value_range, astc_weight_modes[i]);
437 unquant_offset += value_range;
438 }
439
440 assert(unquant_offset <= 256);
441 }
442
init_trits_quints()443 void ASTCLutHolder::init_trits_quints()
444 {
445 // From specification.
446 auto &trits_quints = integer.trits_quints;
447
448 for (unsigned T = 0; T < 256; T++)
449 {
450 unsigned C;
451 uint8_t t0, t1, t2, t3, t4;
452
453 if (((T >> 2) & 7) == 7)
454 {
455 C = (((T >> 5) & 7) << 2) | (T & 3);
456 t4 = t3 = 2;
457 }
458 else
459 {
460 C = T & 0x1f;
461 if (((T >> 5) & 3) == 3)
462 {
463 t4 = 2;
464 t3 = (T >> 7) & 1;
465 }
466 else
467 {
468 t4 = (T >> 7) & 1;
469 t3 = (T >> 5) & 3;
470 }
471 }
472
473 if ((C & 3) == 3)
474 {
475 t2 = 2;
476 t1 = (C >> 4) & 1;
477 t0 = (((C >> 3) & 1) << 1) | (((C >> 2) & 1) & ~(((C >> 3) & 1)));
478 }
479 else if (((C >> 2) & 3) == 3)
480 {
481 t2 = 2;
482 t1 = 2;
483 t0 = C & 3;
484 }
485 else
486 {
487 t2 = (C >> 4) & 1;
488 t1 = (C >> 2) & 3;
489 t0 = (((C >> 1) & 1) << 1) | ((C & 1) & ~(((C >> 1) & 1)));
490 }
491
492 trits_quints[T] = t0 | (t1 << 3) | (t2 << 6) | (t3 << 9) | (t4 << 12);
493 }
494
495 for (unsigned Q = 0; Q < 128; Q++)
496 {
497 unsigned C;
498 uint8_t q0, q1, q2;
499 if (((Q >> 1) & 3) == 3 && ((Q >> 5) & 3) == 0)
500 {
501 q2 = ((Q & 1) << 2) | ((((Q >> 4) & 1) & ~(Q & 1)) << 1) | (((Q >> 3) & 1) & ~(Q & 1));
502 q1 = q0 = 4;
503 }
504 else
505 {
506 if (((Q >> 1) & 3) == 3)
507 {
508 q2 = 4;
509 C = (((Q >> 3) & 3) << 3) | ((~(Q >> 5) & 3) << 1) | (Q & 1);
510 }
511 else
512 {
513 q2 = (Q >> 5) & 3;
514 C = Q & 0x1f;
515 }
516
517 if ((C & 7) == 5)
518 {
519 q1 = 4;
520 q0 = (C >> 3) & 3;
521 }
522 else
523 {
524 q1 = (C >> 3) & 3;
525 q0 = C & 7;
526 }
527 }
528
529 trits_quints[256 + Q] = q0 | (q1 << 3) | (q2 << 6);
530 }
531 }
532 }
533