xref: /aosp_15_r20/external/mesa3d/src/util/texcompress_astc_luts.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /* Copyright (c) 2017-2022 Hans-Kristian Arntzen
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <assert.h>
24 #include <cstdint>
25 #include <cstring>
26 #include <mutex>
27 #include <unordered_map>
28 #include <vector>
29 
30 #include "texcompress_astc_luts.h"
31 
32 namespace Granite
33 {
build_astc_unquant_weight_lut(uint8_t * lut,size_t range,const ASTCQuantizationMode & mode)34 static void build_astc_unquant_weight_lut(uint8_t *lut, size_t range, const ASTCQuantizationMode &mode)
35 {
36 	for (size_t i = 0; i < range; i++)
37 	{
38 		auto &v = lut[i];
39 
40 		if (!mode.quints && !mode.trits)
41 		{
42 			switch (mode.bits)
43 			{
44 			case 1:
45 				v = i * 63;
46 				break;
47 
48 			case 2:
49 				v = i * 0x15;
50 				break;
51 
52 			case 3:
53 				v = i * 9;
54 				break;
55 
56 			case 4:
57 				v = (i << 2) | (i >> 2);
58 				break;
59 
60 			case 5:
61 				v = (i << 1) | (i >> 4);
62 				break;
63 
64 			default:
65 				v = 0;
66 				break;
67 			}
68 		}
69 		else if (mode.bits == 0)
70 		{
71 			if (mode.trits)
72 				v = 32 * i;
73 			else
74 				v = 16 * i;
75 		}
76 		else
77 		{
78 			unsigned b = (i >> 1) & 1;
79 			unsigned c = (i >> 2) & 1;
80 			unsigned A, B, C, D;
81 
82 			A = 0x7f * (i & 1);
83 			D = i >> mode.bits;
84 			B = 0;
85 
86 			if (mode.trits)
87 			{
88 				static const unsigned Cs[3] = { 50, 23, 11 };
89 				C = Cs[mode.bits - 1];
90 				if (mode.bits == 2)
91 					B = 0x45 * b;
92 				else if (mode.bits == 3)
93 					B = 0x21 * b + 0x42 * c;
94 			}
95 			else
96 			{
97 				static const unsigned Cs[2] = { 28, 13 };
98 				C = Cs[mode.bits - 1];
99 				if (mode.bits == 2)
100 					B = 0x42 * b;
101 			}
102 
103 			unsigned unq = D * C + B;
104 			unq ^= A;
105 			unq = (A & 0x20) | (unq >> 2);
106 			v = unq;
107 		}
108 
109 		// Expand [0, 63] to [0, 64].
110 		if (mode.bits != 0 && v > 32)
111 			v++;
112 	}
113 }
114 
build_astc_unquant_endpoint_lut(uint8_t * lut,size_t range,const ASTCQuantizationMode & mode)115 static void build_astc_unquant_endpoint_lut(uint8_t *lut, size_t range, const ASTCQuantizationMode &mode)
116 {
117 	for (size_t i = 0; i < range; i++)
118 	{
119 		auto &v = lut[i];
120 
121 		if (!mode.quints && !mode.trits)
122 		{
123 			// Bit-replication.
124 			switch (mode.bits)
125 			{
126 			case 1:
127 				v = i * 0xff;
128 				break;
129 
130 			case 2:
131 				v = i * 0x55;
132 				break;
133 
134 			case 3:
135 				v = (i << 5) | (i << 2) | (i >> 1);
136 				break;
137 
138 			case 4:
139 				v = i * 0x11;
140 				break;
141 
142 			case 5:
143 				v = (i << 3) | (i >> 2);
144 				break;
145 
146 			case 6:
147 				v = (i << 2) | (i >> 4);
148 				break;
149 
150 			case 7:
151 				v = (i << 1) | (i >> 6);
152 				break;
153 
154 			default:
155 				v = i;
156 				break;
157 			}
158 		}
159 		else
160 		{
161 			unsigned A, B, C, D;
162 			unsigned b = (i >> 1) & 1;
163 			unsigned c = (i >> 2) & 1;
164 			unsigned d = (i >> 3) & 1;
165 			unsigned e = (i >> 4) & 1;
166 			unsigned f = (i >> 5) & 1;
167 
168 			B = 0;
169 			D = i >> mode.bits;
170 			A = (i & 1) * 0x1ff;
171 
172 			if (mode.trits)
173 			{
174 				static const unsigned Cs[6] = { 204, 93, 44, 22, 11, 5 };
175 				C = Cs[mode.bits - 1];
176 
177 				switch (mode.bits)
178 				{
179 				case 2:
180 					B = b * 0x116;
181 					break;
182 
183 				case 3:
184 					B = b * 0x85 + c * 0x10a;
185 					break;
186 
187 				case 4:
188 					B = b * 0x41 + c * 0x82 + d * 0x104;
189 					break;
190 
191 				case 5:
192 					B = b * 0x20 + c * 0x40 + d * 0x81 + e * 0x102;
193 					break;
194 
195 				case 6:
196 					B = b * 0x10 + c * 0x20 + d * 0x40 + e * 0x80 + f * 0x101;
197 					break;
198 				}
199 			}
200 			else
201 			{
202 				static const unsigned Cs[5] = { 113, 54, 26, 13, 6 };
203 				C = Cs[mode.bits - 1];
204 
205 				switch (mode.bits)
206 				{
207 				case 2:
208 					B = b * 0x10c;
209 					break;
210 
211 				case 3:
212 					B = b * 0x82 + c * 0x105;
213 					break;
214 
215 				case 4:
216 					B = b * 0x40 + c * 0x81 + d * 0x102;
217 					break;
218 
219 				case 5:
220 					B = b * 0x20 + c * 0x40 + d * 0x80 + e * 0x101;
221 					break;
222 				}
223 			}
224 
225 			unsigned unq = D * C + B;
226 			unq ^= A;
227 			unq = (A & 0x80) | (unq >> 2);
228 			v = uint8_t(unq);
229 		}
230 	}
231 }
232 
astc_value_range(const ASTCQuantizationMode & mode)233 static unsigned astc_value_range(const ASTCQuantizationMode &mode)
234 {
235 	unsigned value_range = 1u << mode.bits;
236 	if (mode.trits)
237 		value_range *= 3;
238 	if (mode.quints)
239 		value_range *= 5;
240 
241 	if (value_range == 1)
242 		value_range = 0;
243 	return value_range;
244 }
245 
astc_hash52(uint32_t p)246 static uint32_t astc_hash52(uint32_t p)
247 {
248 	p ^= p >> 15; p -= p << 17; p += p << 7; p += p << 4;
249 	p ^= p >>  5; p += p << 16; p ^= p >> 7; p ^= p >> 3;
250 	p ^= p <<  6; p ^= p >> 17;
251 	return p;
252 }
253 
254 // Copy-paste from spec.
astc_select_partition(int seed,int x,int y,int z,int partitioncount,bool small_block)255 static int astc_select_partition(int seed, int x, int y, int z, int partitioncount, bool small_block)
256 {
257 	if (small_block)
258 	{
259 		x <<= 1;
260 		y <<= 1;
261 		z <<= 1;
262 	}
263 
264 	seed += (partitioncount - 1) * 1024;
265 	uint32_t rnum = astc_hash52(seed);
266 	uint8_t seed1 = rnum & 0xF;
267 	uint8_t seed2 = (rnum >> 4) & 0xF;
268 	uint8_t seed3 = (rnum >> 8) & 0xF;
269 	uint8_t seed4 = (rnum >> 12) & 0xF;
270 	uint8_t seed5 = (rnum >> 16) & 0xF;
271 	uint8_t seed6 = (rnum >> 20) & 0xF;
272 	uint8_t seed7 = (rnum >> 24) & 0xF;
273 	uint8_t seed8 = (rnum >> 28) & 0xF;
274 	uint8_t seed9 = (rnum >> 18) & 0xF;
275 	uint8_t seed10 = (rnum >> 22) & 0xF;
276 	uint8_t seed11 = (rnum >> 26) & 0xF;
277 	uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
278 
279 	seed1 *= seed1; seed2 *= seed2; seed3 *= seed3; seed4 *= seed4;
280 	seed5 *= seed5; seed6 *= seed6; seed7 *= seed7; seed8 *= seed8;
281 	seed9 *= seed9; seed10 *= seed10; seed11 *= seed11; seed12 *= seed12;
282 
283 	int sh1, sh2, sh3;
284 	if (seed & 1)
285 	{
286 		sh1 = seed & 2 ? 4 : 5;
287 		sh2 = partitioncount == 3 ? 6 : 5;
288 	}
289 	else
290 	{
291 		sh1 = partitioncount == 3 ? 6 : 5;
292 		sh2 = seed & 2 ? 4 : 5;
293 	}
294 	sh3 = (seed & 0x10) ? sh1 : sh2;
295 
296 	seed1 >>= sh1; seed2 >>= sh2; seed3 >>= sh1; seed4 >>= sh2;
297 	seed5 >>= sh1; seed6 >>= sh2; seed7 >>= sh1; seed8 >>= sh2;
298 	seed9 >>= sh3; seed10 >>= sh3; seed11 >>= sh3; seed12 >>= sh3;
299 
300 	int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
301 	int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
302 	int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
303 	int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
304 
305 	a &= 0x3f; b &= 0x3f; c &= 0x3f; d &= 0x3f;
306 
307 	if (partitioncount < 4)
308 		d = 0;
309 	if (partitioncount < 3)
310 		c = 0;
311 
312 	if (a >= b && a >= c && a >= d)
313 		return 0;
314 	else if (b >= c && b >= d)
315 		return 1;
316 	else if (c >= d)
317 		return 2;
318 	else
319 		return 3;
320 }
321 
PartitionTable(unsigned block_width,unsigned block_height)322 ASTCLutHolder::PartitionTable::PartitionTable(unsigned block_width, unsigned block_height)
323 {
324 	bool small_block = (block_width * block_height) < 31;
325 
326 	lut_width = block_width * 32;
327 	lut_height = block_height * 32;
328 	lut_buffer.resize(lut_width * lut_height);
329 
330 	for (unsigned seed_y = 0; seed_y < 32; seed_y++)
331 	{
332 		for (unsigned seed_x = 0; seed_x < 32; seed_x++)
333 		{
334 			unsigned seed = seed_y * 32 + seed_x;
335 			for (unsigned block_y = 0; block_y < block_height; block_y++)
336 			{
337 				for (unsigned block_x = 0; block_x < block_width; block_x++)
338 				{
339 					int part2 = astc_select_partition(seed, block_x, block_y, 0, 2, small_block);
340 					int part3 = astc_select_partition(seed, block_x, block_y, 0, 3, small_block);
341 					int part4 = astc_select_partition(seed, block_x, block_y, 0, 4, small_block);
342 					lut_buffer[(seed_y * block_height + block_y) * lut_width + (seed_x * block_width + block_x)] =
343 							(part2 << 0) | (part3 << 2) | (part4 << 4);
344 				}
345 			}
346 		}
347 	}
348 }
349 
get_partition_table(unsigned width,unsigned height)350 ASTCLutHolder::PartitionTable &ASTCLutHolder::get_partition_table(unsigned width, unsigned height)
351 {
352 	std::lock_guard<std::mutex> holder{table_lock};
353 	auto itr = tables.find(width * 16 + height);
354 	if (itr != tables.end())
355 	{
356 		return itr->second;
357 	}
358 	else
359 	{
360 		auto &t = tables[width * 16 + height];
361 		t = { width, height };
362 		return t;
363 	}
364 }
365 
get_astc_luts()366 ASTCLutHolder &get_astc_luts()
367 {
368 	static ASTCLutHolder holder;
369 	return holder;
370 }
371 
ASTCLutHolder()372 ASTCLutHolder::ASTCLutHolder()
373 {
374 	init_color_endpoint();
375 	init_weight_luts();
376 	init_trits_quints();
377 }
378 
init_color_endpoint()379 void ASTCLutHolder::init_color_endpoint()
380 {
381 	auto &unquant_lut = color_endpoint.unquant_lut;
382 
383 	for (size_t i = 0; i < astc_num_quantization_modes; i++)
384 	{
385 		auto value_range = astc_value_range(astc_quantization_modes[i]);
386 		color_endpoint.unquant_lut_offsets[i] = color_endpoint.unquant_offset;
387 		build_astc_unquant_endpoint_lut(unquant_lut + color_endpoint.unquant_offset, value_range, astc_quantization_modes[i]);
388 		color_endpoint.unquant_offset += value_range;
389 	}
390 
391 	auto &lut = color_endpoint.lut;
392 
393 	// We can have a maximum of 9 endpoint pairs, i.e. 18 endpoint values in total.
394 	for (unsigned pairs_minus_1 = 0; pairs_minus_1 < 9; pairs_minus_1++)
395 	{
396 		for (unsigned remaining = 0; remaining < 128; remaining++)
397 		{
398 			bool found_mode = false;
399 			for (auto &mode : astc_quantization_modes)
400 			{
401 				unsigned num_values = (pairs_minus_1 + 1) * 2;
402 				unsigned total_bits = mode.bits * num_values +
403 				                      (mode.quints * 7 * num_values + 2) / 3 +
404 				                      (mode.trits * 8 * num_values + 4) / 5;
405 
406 				if (total_bits <= remaining)
407 				{
408 					found_mode = true;
409 					lut[pairs_minus_1][remaining][0] = mode.bits;
410 					lut[pairs_minus_1][remaining][1] = mode.trits;
411 					lut[pairs_minus_1][remaining][2] = mode.quints;
412 					lut[pairs_minus_1][remaining][3] = color_endpoint.unquant_lut_offsets[&mode - astc_quantization_modes];
413 					break;
414 				}
415 			}
416 
417 			if (!found_mode)
418 				memset(lut[pairs_minus_1][remaining], 0, sizeof(lut[pairs_minus_1][remaining]));
419 		}
420 	}
421 }
422 
init_weight_luts()423 void ASTCLutHolder::init_weight_luts()
424 {
425 	auto &lut = weights.lut;
426 	auto &unquant_lut = weights.unquant_lut;
427 	auto &unquant_offset = weights.unquant_offset;
428 
429 	for (size_t i = 0; i < astc_num_weight_modes; i++)
430 	{
431 		auto value_range = astc_value_range(astc_weight_modes[i]);
432 		lut[i][0] = astc_weight_modes[i].bits;
433 		lut[i][1] = astc_weight_modes[i].trits;
434 		lut[i][2] = astc_weight_modes[i].quints;
435 		lut[i][3] = unquant_offset;
436 		build_astc_unquant_weight_lut(unquant_lut + unquant_offset, value_range, astc_weight_modes[i]);
437 		unquant_offset += value_range;
438 	}
439 
440 	assert(unquant_offset <= 256);
441 }
442 
init_trits_quints()443 void ASTCLutHolder::init_trits_quints()
444 {
445 	// From specification.
446 	auto &trits_quints = integer.trits_quints;
447 
448 	for (unsigned T = 0; T < 256; T++)
449 	{
450 		unsigned C;
451 		uint8_t t0, t1, t2, t3, t4;
452 
453 		if (((T >> 2) & 7) == 7)
454 		{
455 			C = (((T >> 5) & 7) << 2) | (T & 3);
456 			t4 = t3 = 2;
457 		}
458 		else
459 		{
460 			C = T & 0x1f;
461 			if (((T >> 5) & 3) == 3)
462 			{
463 				t4 = 2;
464 				t3 = (T >> 7) & 1;
465 			}
466 			else
467 			{
468 				t4 = (T >> 7) & 1;
469 				t3 = (T >> 5) & 3;
470 			}
471 		}
472 
473 		if ((C & 3) == 3)
474 		{
475 			t2 = 2;
476 			t1 = (C >> 4) & 1;
477 			t0 = (((C >> 3) & 1) << 1) | (((C >> 2) & 1) & ~(((C >> 3) & 1)));
478 		}
479 		else if (((C >> 2) & 3) == 3)
480 		{
481 			t2 = 2;
482 			t1 = 2;
483 			t0 = C & 3;
484 		}
485 		else
486 		{
487 			t2 = (C >> 4) & 1;
488 			t1 = (C >> 2) & 3;
489 			t0 = (((C >> 1) & 1) << 1) | ((C & 1) & ~(((C >> 1) & 1)));
490 		}
491 
492 		trits_quints[T] = t0 | (t1 << 3) | (t2 << 6) | (t3 << 9) | (t4 << 12);
493 	}
494 
495 	for (unsigned Q = 0; Q < 128; Q++)
496 	{
497 		unsigned C;
498 		uint8_t q0, q1, q2;
499 		if (((Q >> 1) & 3) == 3 && ((Q >> 5) & 3) == 0)
500 		{
501 			q2 = ((Q & 1) << 2) | ((((Q >> 4) & 1) & ~(Q & 1)) << 1) | (((Q >> 3) & 1) & ~(Q & 1));
502 			q1 = q0 = 4;
503 		}
504 		else
505 		{
506 			if (((Q >> 1) & 3) == 3)
507 			{
508 				q2 = 4;
509 				C = (((Q >> 3) & 3) << 3) | ((~(Q >> 5) & 3) << 1) | (Q & 1);
510 			}
511 			else
512 			{
513 				q2 = (Q >> 5) & 3;
514 				C = Q & 0x1f;
515 			}
516 
517 			if ((C & 7) == 5)
518 			{
519 				q1 = 4;
520 				q0 = (C >> 3) & 3;
521 			}
522 			else
523 			{
524 				q1 = (C >> 3) & 3;
525 				q0 = C & 7;
526 			}
527 		}
528 
529 		trits_quints[256 + Q] = q0 | (q1 << 3) | (q2 << 6);
530 	}
531 }
532 }
533