xref: /aosp_15_r20/external/swiftshader/third_party/astc-encoder/Source/astc_block_sizes2.cpp (revision 03ce13f70fcc45d86ee91b7ee4cab1936a95046e)
1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2020 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions to generate block size descriptor and decimation tables.
20  */
21 
22 #include "astc_codec_internals.h"
23 #include <memory>
24 
25 struct TexelWeight
26 {
27 	int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
28 	int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
29 	int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
30 
31 	int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
32 	int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
33 	int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
34 };
35 
36 // return 0 on invalid mode, 1 on valid mode.
decode_block_mode_2d(int blockmode,int * Nval,int * Mval,int * dual_weight_plane,int * quant_mode)37 static int decode_block_mode_2d(
38 	int blockmode,
39 	int* Nval,
40 	int* Mval,
41 	int* dual_weight_plane,
42 	int* quant_mode
43 ) {
44 	int base_quant_mode = (blockmode >> 4) & 1;
45 	int H = (blockmode >> 9) & 1;
46 	int D = (blockmode >> 10) & 1;
47 
48 	int A = (blockmode >> 5) & 0x3;
49 
50 	int N = 0, M = 0;
51 
52 	if ((blockmode & 3) != 0)
53 	{
54 		base_quant_mode |= (blockmode & 3) << 1;
55 		int B = (blockmode >> 7) & 3;
56 		switch ((blockmode >> 2) & 3)
57 		{
58 		case 0:
59 			N = B + 4;
60 			M = A + 2;
61 			break;
62 		case 1:
63 			N = B + 8;
64 			M = A + 2;
65 			break;
66 		case 2:
67 			N = A + 2;
68 			M = B + 8;
69 			break;
70 		case 3:
71 			B &= 1;
72 			if (blockmode & 0x100)
73 			{
74 				N = B + 2;
75 				M = A + 2;
76 			}
77 			else
78 			{
79 				N = A + 2;
80 				M = B + 6;
81 			}
82 			break;
83 		}
84 	}
85 	else
86 	{
87 		base_quant_mode |= ((blockmode >> 2) & 3) << 1;
88 		if (((blockmode >> 2) & 3) == 0)
89 			return 0;
90 		int B = (blockmode >> 9) & 3;
91 		switch ((blockmode >> 7) & 3)
92 		{
93 		case 0:
94 			N = 12;
95 			M = A + 2;
96 			break;
97 		case 1:
98 			N = A + 2;
99 			M = 12;
100 			break;
101 		case 2:
102 			N = A + 6;
103 			M = B + 6;
104 			D = 0;
105 			H = 0;
106 			break;
107 		case 3:
108 			switch ((blockmode >> 5) & 3)
109 			{
110 			case 0:
111 				N = 6;
112 				M = 10;
113 				break;
114 			case 1:
115 				N = 10;
116 				M = 6;
117 				break;
118 			case 2:
119 			case 3:
120 				return 0;
121 			}
122 			break;
123 		}
124 	}
125 
126 	int weight_count = N * M * (D + 1);
127 	int qmode = (base_quant_mode - 2) + 6 * H;
128 
129 	int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
130 	if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
131 		return 0;
132 
133 	*Nval = N;
134 	*Mval = M;
135 	*dual_weight_plane = D;
136 	*quant_mode = qmode;
137 	return 1;
138 }
139 
decode_block_mode_3d(int blockmode,int * Nval,int * Mval,int * Qval,int * dual_weight_plane,int * quant_mode)140 static int decode_block_mode_3d(
141 	int blockmode,
142 	int* Nval,
143 	int* Mval,
144 	int* Qval,
145 	int* dual_weight_plane,
146 	int* quant_mode
147 ) {
148 	int base_quant_mode = (blockmode >> 4) & 1;
149 	int H = (blockmode >> 9) & 1;
150 	int D = (blockmode >> 10) & 1;
151 
152 	int A = (blockmode >> 5) & 0x3;
153 
154 	int N = 0, M = 0, Q = 0;
155 
156 	if ((blockmode & 3) != 0)
157 	{
158 		base_quant_mode |= (blockmode & 3) << 1;
159 		int B = (blockmode >> 7) & 3;
160 		int C = (blockmode >> 2) & 0x3;
161 		N = A + 2;
162 		M = B + 2;
163 		Q = C + 2;
164 	}
165 	else
166 	{
167 		base_quant_mode |= ((blockmode >> 2) & 3) << 1;
168 		if (((blockmode >> 2) & 3) == 0)
169 			return 0;
170 		int B = (blockmode >> 9) & 3;
171 		if (((blockmode >> 7) & 3) != 3)
172 		{
173 			D = 0;
174 			H = 0;
175 		}
176 		switch ((blockmode >> 7) & 3)
177 		{
178 		case 0:
179 			N = 6;
180 			M = B + 2;
181 			Q = A + 2;
182 			break;
183 		case 1:
184 			N = A + 2;
185 			M = 6;
186 			Q = B + 2;
187 			break;
188 		case 2:
189 			N = A + 2;
190 			M = B + 2;
191 			Q = 6;
192 			break;
193 		case 3:
194 			N = 2;
195 			M = 2;
196 			Q = 2;
197 			switch ((blockmode >> 5) & 3)
198 			{
199 			case 0:
200 				N = 6;
201 				break;
202 			case 1:
203 				M = 6;
204 				break;
205 			case 2:
206 				Q = 6;
207 				break;
208 			case 3:
209 				return 0;
210 			}
211 			break;
212 		}
213 	}
214 
215 	int weight_count = N * M * Q * (D + 1);
216 	int qmode = (base_quant_mode - 2) + 6 * H;
217 
218 	int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
219 	if (weight_count > MAX_WEIGHTS_PER_BLOCK ||
220 	    weightbits < MIN_WEIGHT_BITS_PER_BLOCK ||
221 	    weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
222 		return 0;
223 
224 	*Nval = N;
225 	*Mval = M;
226 	*Qval = Q;
227 	*dual_weight_plane = D;
228 	*quant_mode = qmode;
229 	return 1;
230 }
231 
initialize_decimation_table_2d(int xdim,int ydim,int x_weights,int y_weights,decimation_table * dt)232 static void initialize_decimation_table_2d(
233 	int xdim,
234 	int ydim,
235 	int x_weights,
236 	int y_weights,
237 	decimation_table* dt
238 ) {
239 	int i, j;
240 	int x, y;
241 
242 	int texels_per_block = xdim * ydim;
243 	int weights_per_block = x_weights * y_weights;
244 
245 	std::unique_ptr<TexelWeight> tw(new TexelWeight);
246 
247 	for (i = 0; i < weights_per_block; i++)
248 		tw->texelcount_of_weight[i] = 0;
249 	for (i = 0; i < texels_per_block; i++)
250 		tw->weightcount_of_texel[i] = 0;
251 
252 	for (y = 0; y < ydim; y++)
253 		for (x = 0; x < xdim; x++)
254 		{
255 			int texel = y * xdim + x;
256 
257 			int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
258 			int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
259 
260 			int x_weight_frac = x_weight & 0xF;
261 			int y_weight_frac = y_weight & 0xF;
262 			int x_weight_int = x_weight >> 4;
263 			int y_weight_int = y_weight >> 4;
264 			int qweight[4];
265 			int weight[4];
266 			qweight[0] = x_weight_int + y_weight_int * x_weights;
267 			qweight[1] = qweight[0] + 1;
268 			qweight[2] = qweight[0] + x_weights;
269 			qweight[3] = qweight[2] + 1;
270 
271 			// truncated-precision bilinear interpolation.
272 			int prod = x_weight_frac * y_weight_frac;
273 
274 			weight[3] = (prod + 8) >> 4;
275 			weight[1] = x_weight_frac - weight[3];
276 			weight[2] = y_weight_frac - weight[3];
277 			weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
278 
279 			for (i = 0; i < 4; i++)
280 				if (weight[i] != 0)
281 				{
282 					tw->grid_weights_of_texel[texel][tw->weightcount_of_texel[texel]] = qweight[i];
283 					tw->weights_of_texel[texel][tw->weightcount_of_texel[texel]] = weight[i];
284 					tw->weightcount_of_texel[texel]++;
285 					tw->texels_of_weight[qweight[i]][tw->texelcount_of_weight[qweight[i]]] = texel;
286 					tw->texelweights_of_weight[qweight[i]][tw->texelcount_of_weight[qweight[i]]] = weight[i];
287 					tw->texelcount_of_weight[qweight[i]]++;
288 				}
289 		}
290 
291 	for (i = 0; i < texels_per_block; i++)
292 	{
293 		dt->texel_num_weights[i] = tw->weightcount_of_texel[i];
294 
295 		// ensure that all 4 entries are actually initialized.
296 		// This allows a branch-free implementation of compute_value_of_texel_flt()
297 		for (j = 0; j < 4; j++)
298 		{
299 			dt->texel_weights_int[i][j] = 0;
300 			dt->texel_weights[i][j] = 0;
301 		}
302 
303 		for (j = 0; j < tw->weightcount_of_texel[i]; j++)
304 		{
305 			dt->texel_weights_int[i][j] = (uint8_t)(tw->weights_of_texel[i][j]);
306 			dt->texel_weights[i][j] = (uint8_t)(tw->grid_weights_of_texel[i][j]);
307 		}
308 	}
309 
310 	dt->num_weights = weights_per_block;
311 }
312 
initialize_decimation_table_3d(int xdim,int ydim,int zdim,int x_weights,int y_weights,int z_weights,decimation_table * dt)313 static void initialize_decimation_table_3d(
314 	int xdim,
315 	int ydim,
316 	int zdim,
317 	int x_weights,
318 	int y_weights,
319 	int z_weights,
320 	decimation_table* dt
321 ) {
322 	int i, j;
323 	int x, y, z;
324 
325 	int texels_per_block = xdim * ydim * zdim;
326 	int weights_per_block = x_weights * y_weights * z_weights;
327 
328 	std::unique_ptr<TexelWeight> tw(new TexelWeight);
329 
330 	for (i = 0; i < weights_per_block; i++)
331 		tw->texelcount_of_weight[i] = 0;
332 	for (i = 0; i < texels_per_block; i++)
333 		tw->weightcount_of_texel[i] = 0;
334 
335 	for (z = 0; z < zdim; z++)
336 	{
337 		for (y = 0; y < ydim; y++)
338 		{
339 			for (x = 0; x < xdim; x++)
340 			{
341 				int texel = (z * ydim + y) * xdim + x;
342 
343 				int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
344 				int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
345 				int z_weight = (((1024 + zdim / 2) / (zdim - 1)) * z * (z_weights - 1) + 32) >> 6;
346 
347 				int x_weight_frac = x_weight & 0xF;
348 				int y_weight_frac = y_weight & 0xF;
349 				int z_weight_frac = z_weight & 0xF;
350 				int x_weight_int = x_weight >> 4;
351 				int y_weight_int = y_weight >> 4;
352 				int z_weight_int = z_weight >> 4;
353 				int qweight[4];
354 				int weight[4];
355 				qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
356 				qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
357 
358 				// simplex interpolation
359 				int fs = x_weight_frac;
360 				int ft = y_weight_frac;
361 				int fp = z_weight_frac;
362 
363 				int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
364 				int N = x_weights;
365 				int NM = x_weights * y_weights;
366 
367 				int s1, s2, w0, w1, w2, w3;
368 				switch (cas)
369 				{
370 				case 7:
371 					s1 = 1;
372 					s2 = N;
373 					w0 = 16 - fs;
374 					w1 = fs - ft;
375 					w2 = ft - fp;
376 					w3 = fp;
377 					break;
378 				case 3:
379 					s1 = N;
380 					s2 = 1;
381 					w0 = 16 - ft;
382 					w1 = ft - fs;
383 					w2 = fs - fp;
384 					w3 = fp;
385 					break;
386 				case 5:
387 					s1 = 1;
388 					s2 = NM;
389 					w0 = 16 - fs;
390 					w1 = fs - fp;
391 					w2 = fp - ft;
392 					w3 = ft;
393 					break;
394 				case 4:
395 					s1 = NM;
396 					s2 = 1;
397 					w0 = 16 - fp;
398 					w1 = fp - fs;
399 					w2 = fs - ft;
400 					w3 = ft;
401 					break;
402 				case 2:
403 					s1 = N;
404 					s2 = NM;
405 					w0 = 16 - ft;
406 					w1 = ft - fp;
407 					w2 = fp - fs;
408 					w3 = fs;
409 					break;
410 				case 0:
411 					s1 = NM;
412 					s2 = N;
413 					w0 = 16 - fp;
414 					w1 = fp - ft;
415 					w2 = ft - fs;
416 					w3 = fs;
417 					break;
418 
419 				default:
420 					s1 = NM;
421 					s2 = N;
422 					w0 = 16 - fp;
423 					w1 = fp - ft;
424 					w2 = ft - fs;
425 					w3 = fs;
426 					break;
427 				}
428 
429 				qweight[1] = qweight[0] + s1;
430 				qweight[2] = qweight[1] + s2;
431 				weight[0] = w0;
432 				weight[1] = w1;
433 				weight[2] = w2;
434 				weight[3] = w3;
435 
436 				for (i = 0; i < 4; i++)
437 				{
438 					if (weight[i] != 0)
439 					{
440 						tw->grid_weights_of_texel[texel][tw->weightcount_of_texel[texel]] = qweight[i];
441 						tw->weights_of_texel[texel][tw->weightcount_of_texel[texel]] = weight[i];
442 						tw->weightcount_of_texel[texel]++;
443 						tw->texels_of_weight[qweight[i]][tw->texelcount_of_weight[qweight[i]]] = texel;
444 						tw->texelweights_of_weight[qweight[i]][tw->texelcount_of_weight[qweight[i]]] = weight[i];
445 						tw->texelcount_of_weight[qweight[i]]++;
446 					}
447 				}
448 			}
449 		}
450 	}
451 
452 	for (i = 0; i < texels_per_block; i++)
453 	{
454 		dt->texel_num_weights[i] = tw->weightcount_of_texel[i];
455 
456 		// ensure that all 4 entries are actually initialized.
457 		// This allows a branch-free implementation of compute_value_of_texel_flt()
458 		for (j = 0; j < 4; j++)
459 		{
460 			dt->texel_weights_int[i][j] = 0;
461 			dt->texel_weights[i][j] = 0;
462 		}
463 
464 		for (j = 0; j < tw->weightcount_of_texel[i]; j++)
465 		{
466 			dt->texel_weights_int[i][j] = (uint8_t)(tw->weights_of_texel[i][j]);
467 			dt->texel_weights[i][j] = (uint8_t)(tw->grid_weights_of_texel[i][j]);
468 		}
469 	}
470 
471 	dt->num_weights = weights_per_block;
472 }
473 
construct_block_size_descriptor_2d(int xdim,int ydim,block_size_descriptor * bsd)474 static void construct_block_size_descriptor_2d(
475 	int xdim,
476 	int ydim,
477 	block_size_descriptor* bsd
478 ) {
479 	int decimation_mode_index[256];	// for each of the 256 entries in the decim_table_array, its index
480 	int decimation_mode_count = 0;
481 
482 	bsd->xdim = xdim;
483 	bsd->ydim = ydim;
484 	bsd->zdim = 1;
485 	bsd->texel_count = xdim * ydim;
486 
487 	for (int i = 0; i < 256; i++)
488 	{
489 		decimation_mode_index[i] = -1;
490 	}
491 
492 	// gather all the infill-modes that can be used with the current block size
493 	for (int x_weights = 2; x_weights <= 12; x_weights++)
494 	{
495 		for (int y_weights = 2; y_weights <= 12; y_weights++)
496 		{
497 			if (x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
498 			{
499 				continue;
500 			}
501 
502 			decimation_table *dt = new decimation_table;
503 			decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode_count;
504 			initialize_decimation_table_2d(xdim, ydim, x_weights, y_weights, dt);
505 
506 			bsd->decimation_tables[decimation_mode_count] = dt;
507 
508 			decimation_mode_count++;
509 		}
510 	}
511 
512 	bsd->decimation_mode_count = decimation_mode_count;
513 
514 	// then construct the list of block formats
515 	for (int i = 0; i < 2048; i++)
516 	{
517 		int x_weights, y_weights;
518 		int is_dual_plane;
519 		int quantization_mode;
520 		int fail = 0;
521 		int permit_decode = 1;
522 
523 		if (decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quantization_mode))
524 		{
525 			if (x_weights > xdim || y_weights > ydim)
526 				permit_decode = 0;
527 		}
528 		else
529 		{
530 			fail = 1;
531 			permit_decode = 0;
532 		}
533 
534 		if (fail)
535 		{
536 			bsd->block_modes[i].decimation_mode = -1;
537 			bsd->block_modes[i].quantization_mode = -1;
538 			bsd->block_modes[i].is_dual_plane = -1;
539 			bsd->block_modes[i].permit_decode = 0;
540 		}
541 		else
542 		{
543 			int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
544 			bsd->block_modes[i].decimation_mode = decimation_mode;
545 			bsd->block_modes[i].quantization_mode = quantization_mode;
546 			bsd->block_modes[i].is_dual_plane = is_dual_plane;
547 			bsd->block_modes[i].permit_decode = permit_decode;	// disallow decode of grid size larger than block size.
548 		}
549 	}
550 }
551 
construct_block_size_descriptor_3d(int xdim,int ydim,int zdim,block_size_descriptor * bsd)552 static void construct_block_size_descriptor_3d(
553 	int xdim,
554 	int ydim,
555 	int zdim,
556 	block_size_descriptor * bsd
557 ) {
558 	int decimation_mode_index[512];	// for each of the 512 entries in the decim_table_array, its index
559 	int decimation_mode_count = 0;
560 
561 	bsd->xdim = xdim;
562 	bsd->ydim = ydim;
563 	bsd->zdim = zdim;
564 	bsd->texel_count = xdim * ydim * zdim;
565 
566 	for (int i = 0; i < 512; i++)
567 	{
568 		decimation_mode_index[i] = -1;
569 	}
570 
571 	// gather all the infill-modes that can be used with the current block size
572 	for (int x_weights = 2; x_weights <= 6; x_weights++)
573 	{
574 		for (int y_weights = 2; y_weights <= 6; y_weights++)
575 		{
576 			for (int z_weights = 2; z_weights <= 6; z_weights++)
577 			{
578 				if ((x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
579 					continue;
580 				decimation_table *dt = new decimation_table;
581 				decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
582 				initialize_decimation_table_3d(xdim, ydim, zdim, x_weights, y_weights, z_weights, dt);
583 
584 				bsd->decimation_tables[decimation_mode_count] = dt;
585 
586 				decimation_mode_count++;
587 			}
588 		}
589 	}
590 
591 	bsd->decimation_mode_count = decimation_mode_count;
592 
593 	// then construct the list of block formats
594 	for (int i = 0; i < 2048; i++)
595 	{
596 		int x_weights, y_weights, z_weights;
597 		int is_dual_plane;
598 		int quantization_mode;
599 		int fail = 0;
600 		int permit_decode = 1;
601 
602 		if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quantization_mode))
603 		{
604 			if (x_weights > xdim || y_weights > ydim || z_weights > zdim)
605 				permit_decode = 0;
606 		}
607 		else
608 		{
609 			fail = 1;
610 			permit_decode = 0;
611 		}
612 		if (fail)
613 		{
614 			bsd->block_modes[i].decimation_mode = -1;
615 			bsd->block_modes[i].quantization_mode = -1;
616 			bsd->block_modes[i].is_dual_plane = -1;
617 			bsd->block_modes[i].permit_decode = 0;
618 		}
619 		else
620 		{
621 			int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
622 			bsd->block_modes[i].decimation_mode = decimation_mode;
623 			bsd->block_modes[i].quantization_mode = quantization_mode;
624 			bsd->block_modes[i].is_dual_plane = is_dual_plane;
625 			bsd->block_modes[i].permit_decode = permit_decode;
626 		}
627 	}
628 }
629 
630 /* Public function, see header file for detailed documentation */
init_block_size_descriptor(int xdim,int ydim,int zdim,block_size_descriptor * bsd)631 void init_block_size_descriptor(
632 	int xdim,
633 	int ydim,
634 	int zdim,
635 	block_size_descriptor* bsd
636 ) {
637 	if (zdim > 1)
638 		construct_block_size_descriptor_3d(xdim, ydim, zdim, bsd);
639 	else
640 		construct_block_size_descriptor_2d(xdim, ydim, bsd);
641 
642 	init_partition_tables(bsd);
643 }
644 
term_block_size_descriptor(block_size_descriptor * bsd)645 void term_block_size_descriptor(
646 	block_size_descriptor* bsd)
647 {
648 	for(int i = 0; i < bsd->decimation_mode_count; i++)
649 	{
650 		delete bsd->decimation_tables[i];
651 	}
652 }
653