1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2020 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Functions to generate block size descriptor and decimation tables.
20 */
21
22 #include "astc_codec_internals.h"
23 #include <memory>
24
25 struct TexelWeight
26 {
27 int weightcount_of_texel[MAX_TEXELS_PER_BLOCK];
28 int grid_weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
29 int weights_of_texel[MAX_TEXELS_PER_BLOCK][4];
30
31 int texelcount_of_weight[MAX_WEIGHTS_PER_BLOCK];
32 int texels_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
33 int texelweights_of_weight[MAX_WEIGHTS_PER_BLOCK][MAX_TEXELS_PER_BLOCK];
34 };
35
36 // return 0 on invalid mode, 1 on valid mode.
decode_block_mode_2d(int blockmode,int * Nval,int * Mval,int * dual_weight_plane,int * quant_mode)37 static int decode_block_mode_2d(
38 int blockmode,
39 int* Nval,
40 int* Mval,
41 int* dual_weight_plane,
42 int* quant_mode
43 ) {
44 int base_quant_mode = (blockmode >> 4) & 1;
45 int H = (blockmode >> 9) & 1;
46 int D = (blockmode >> 10) & 1;
47
48 int A = (blockmode >> 5) & 0x3;
49
50 int N = 0, M = 0;
51
52 if ((blockmode & 3) != 0)
53 {
54 base_quant_mode |= (blockmode & 3) << 1;
55 int B = (blockmode >> 7) & 3;
56 switch ((blockmode >> 2) & 3)
57 {
58 case 0:
59 N = B + 4;
60 M = A + 2;
61 break;
62 case 1:
63 N = B + 8;
64 M = A + 2;
65 break;
66 case 2:
67 N = A + 2;
68 M = B + 8;
69 break;
70 case 3:
71 B &= 1;
72 if (blockmode & 0x100)
73 {
74 N = B + 2;
75 M = A + 2;
76 }
77 else
78 {
79 N = A + 2;
80 M = B + 6;
81 }
82 break;
83 }
84 }
85 else
86 {
87 base_quant_mode |= ((blockmode >> 2) & 3) << 1;
88 if (((blockmode >> 2) & 3) == 0)
89 return 0;
90 int B = (blockmode >> 9) & 3;
91 switch ((blockmode >> 7) & 3)
92 {
93 case 0:
94 N = 12;
95 M = A + 2;
96 break;
97 case 1:
98 N = A + 2;
99 M = 12;
100 break;
101 case 2:
102 N = A + 6;
103 M = B + 6;
104 D = 0;
105 H = 0;
106 break;
107 case 3:
108 switch ((blockmode >> 5) & 3)
109 {
110 case 0:
111 N = 6;
112 M = 10;
113 break;
114 case 1:
115 N = 10;
116 M = 6;
117 break;
118 case 2:
119 case 3:
120 return 0;
121 }
122 break;
123 }
124 }
125
126 int weight_count = N * M * (D + 1);
127 int qmode = (base_quant_mode - 2) + 6 * H;
128
129 int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
130 if (weight_count > MAX_WEIGHTS_PER_BLOCK || weightbits < MIN_WEIGHT_BITS_PER_BLOCK || weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
131 return 0;
132
133 *Nval = N;
134 *Mval = M;
135 *dual_weight_plane = D;
136 *quant_mode = qmode;
137 return 1;
138 }
139
decode_block_mode_3d(int blockmode,int * Nval,int * Mval,int * Qval,int * dual_weight_plane,int * quant_mode)140 static int decode_block_mode_3d(
141 int blockmode,
142 int* Nval,
143 int* Mval,
144 int* Qval,
145 int* dual_weight_plane,
146 int* quant_mode
147 ) {
148 int base_quant_mode = (blockmode >> 4) & 1;
149 int H = (blockmode >> 9) & 1;
150 int D = (blockmode >> 10) & 1;
151
152 int A = (blockmode >> 5) & 0x3;
153
154 int N = 0, M = 0, Q = 0;
155
156 if ((blockmode & 3) != 0)
157 {
158 base_quant_mode |= (blockmode & 3) << 1;
159 int B = (blockmode >> 7) & 3;
160 int C = (blockmode >> 2) & 0x3;
161 N = A + 2;
162 M = B + 2;
163 Q = C + 2;
164 }
165 else
166 {
167 base_quant_mode |= ((blockmode >> 2) & 3) << 1;
168 if (((blockmode >> 2) & 3) == 0)
169 return 0;
170 int B = (blockmode >> 9) & 3;
171 if (((blockmode >> 7) & 3) != 3)
172 {
173 D = 0;
174 H = 0;
175 }
176 switch ((blockmode >> 7) & 3)
177 {
178 case 0:
179 N = 6;
180 M = B + 2;
181 Q = A + 2;
182 break;
183 case 1:
184 N = A + 2;
185 M = 6;
186 Q = B + 2;
187 break;
188 case 2:
189 N = A + 2;
190 M = B + 2;
191 Q = 6;
192 break;
193 case 3:
194 N = 2;
195 M = 2;
196 Q = 2;
197 switch ((blockmode >> 5) & 3)
198 {
199 case 0:
200 N = 6;
201 break;
202 case 1:
203 M = 6;
204 break;
205 case 2:
206 Q = 6;
207 break;
208 case 3:
209 return 0;
210 }
211 break;
212 }
213 }
214
215 int weight_count = N * M * Q * (D + 1);
216 int qmode = (base_quant_mode - 2) + 6 * H;
217
218 int weightbits = compute_ise_bitcount(weight_count, (quantization_method) qmode);
219 if (weight_count > MAX_WEIGHTS_PER_BLOCK ||
220 weightbits < MIN_WEIGHT_BITS_PER_BLOCK ||
221 weightbits > MAX_WEIGHT_BITS_PER_BLOCK)
222 return 0;
223
224 *Nval = N;
225 *Mval = M;
226 *Qval = Q;
227 *dual_weight_plane = D;
228 *quant_mode = qmode;
229 return 1;
230 }
231
initialize_decimation_table_2d(int xdim,int ydim,int x_weights,int y_weights,decimation_table * dt)232 static void initialize_decimation_table_2d(
233 int xdim,
234 int ydim,
235 int x_weights,
236 int y_weights,
237 decimation_table* dt
238 ) {
239 int i, j;
240 int x, y;
241
242 int texels_per_block = xdim * ydim;
243 int weights_per_block = x_weights * y_weights;
244
245 std::unique_ptr<TexelWeight> tw(new TexelWeight);
246
247 for (i = 0; i < weights_per_block; i++)
248 tw->texelcount_of_weight[i] = 0;
249 for (i = 0; i < texels_per_block; i++)
250 tw->weightcount_of_texel[i] = 0;
251
252 for (y = 0; y < ydim; y++)
253 for (x = 0; x < xdim; x++)
254 {
255 int texel = y * xdim + x;
256
257 int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
258 int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
259
260 int x_weight_frac = x_weight & 0xF;
261 int y_weight_frac = y_weight & 0xF;
262 int x_weight_int = x_weight >> 4;
263 int y_weight_int = y_weight >> 4;
264 int qweight[4];
265 int weight[4];
266 qweight[0] = x_weight_int + y_weight_int * x_weights;
267 qweight[1] = qweight[0] + 1;
268 qweight[2] = qweight[0] + x_weights;
269 qweight[3] = qweight[2] + 1;
270
271 // truncated-precision bilinear interpolation.
272 int prod = x_weight_frac * y_weight_frac;
273
274 weight[3] = (prod + 8) >> 4;
275 weight[1] = x_weight_frac - weight[3];
276 weight[2] = y_weight_frac - weight[3];
277 weight[0] = 16 - x_weight_frac - y_weight_frac + weight[3];
278
279 for (i = 0; i < 4; i++)
280 if (weight[i] != 0)
281 {
282 tw->grid_weights_of_texel[texel][tw->weightcount_of_texel[texel]] = qweight[i];
283 tw->weights_of_texel[texel][tw->weightcount_of_texel[texel]] = weight[i];
284 tw->weightcount_of_texel[texel]++;
285 tw->texels_of_weight[qweight[i]][tw->texelcount_of_weight[qweight[i]]] = texel;
286 tw->texelweights_of_weight[qweight[i]][tw->texelcount_of_weight[qweight[i]]] = weight[i];
287 tw->texelcount_of_weight[qweight[i]]++;
288 }
289 }
290
291 for (i = 0; i < texels_per_block; i++)
292 {
293 dt->texel_num_weights[i] = tw->weightcount_of_texel[i];
294
295 // ensure that all 4 entries are actually initialized.
296 // This allows a branch-free implementation of compute_value_of_texel_flt()
297 for (j = 0; j < 4; j++)
298 {
299 dt->texel_weights_int[i][j] = 0;
300 dt->texel_weights[i][j] = 0;
301 }
302
303 for (j = 0; j < tw->weightcount_of_texel[i]; j++)
304 {
305 dt->texel_weights_int[i][j] = (uint8_t)(tw->weights_of_texel[i][j]);
306 dt->texel_weights[i][j] = (uint8_t)(tw->grid_weights_of_texel[i][j]);
307 }
308 }
309
310 dt->num_weights = weights_per_block;
311 }
312
initialize_decimation_table_3d(int xdim,int ydim,int zdim,int x_weights,int y_weights,int z_weights,decimation_table * dt)313 static void initialize_decimation_table_3d(
314 int xdim,
315 int ydim,
316 int zdim,
317 int x_weights,
318 int y_weights,
319 int z_weights,
320 decimation_table* dt
321 ) {
322 int i, j;
323 int x, y, z;
324
325 int texels_per_block = xdim * ydim * zdim;
326 int weights_per_block = x_weights * y_weights * z_weights;
327
328 std::unique_ptr<TexelWeight> tw(new TexelWeight);
329
330 for (i = 0; i < weights_per_block; i++)
331 tw->texelcount_of_weight[i] = 0;
332 for (i = 0; i < texels_per_block; i++)
333 tw->weightcount_of_texel[i] = 0;
334
335 for (z = 0; z < zdim; z++)
336 {
337 for (y = 0; y < ydim; y++)
338 {
339 for (x = 0; x < xdim; x++)
340 {
341 int texel = (z * ydim + y) * xdim + x;
342
343 int x_weight = (((1024 + xdim / 2) / (xdim - 1)) * x * (x_weights - 1) + 32) >> 6;
344 int y_weight = (((1024 + ydim / 2) / (ydim - 1)) * y * (y_weights - 1) + 32) >> 6;
345 int z_weight = (((1024 + zdim / 2) / (zdim - 1)) * z * (z_weights - 1) + 32) >> 6;
346
347 int x_weight_frac = x_weight & 0xF;
348 int y_weight_frac = y_weight & 0xF;
349 int z_weight_frac = z_weight & 0xF;
350 int x_weight_int = x_weight >> 4;
351 int y_weight_int = y_weight >> 4;
352 int z_weight_int = z_weight >> 4;
353 int qweight[4];
354 int weight[4];
355 qweight[0] = (z_weight_int * y_weights + y_weight_int) * x_weights + x_weight_int;
356 qweight[3] = ((z_weight_int + 1) * y_weights + (y_weight_int + 1)) * x_weights + (x_weight_int + 1);
357
358 // simplex interpolation
359 int fs = x_weight_frac;
360 int ft = y_weight_frac;
361 int fp = z_weight_frac;
362
363 int cas = ((fs > ft) << 2) + ((ft > fp) << 1) + ((fs > fp));
364 int N = x_weights;
365 int NM = x_weights * y_weights;
366
367 int s1, s2, w0, w1, w2, w3;
368 switch (cas)
369 {
370 case 7:
371 s1 = 1;
372 s2 = N;
373 w0 = 16 - fs;
374 w1 = fs - ft;
375 w2 = ft - fp;
376 w3 = fp;
377 break;
378 case 3:
379 s1 = N;
380 s2 = 1;
381 w0 = 16 - ft;
382 w1 = ft - fs;
383 w2 = fs - fp;
384 w3 = fp;
385 break;
386 case 5:
387 s1 = 1;
388 s2 = NM;
389 w0 = 16 - fs;
390 w1 = fs - fp;
391 w2 = fp - ft;
392 w3 = ft;
393 break;
394 case 4:
395 s1 = NM;
396 s2 = 1;
397 w0 = 16 - fp;
398 w1 = fp - fs;
399 w2 = fs - ft;
400 w3 = ft;
401 break;
402 case 2:
403 s1 = N;
404 s2 = NM;
405 w0 = 16 - ft;
406 w1 = ft - fp;
407 w2 = fp - fs;
408 w3 = fs;
409 break;
410 case 0:
411 s1 = NM;
412 s2 = N;
413 w0 = 16 - fp;
414 w1 = fp - ft;
415 w2 = ft - fs;
416 w3 = fs;
417 break;
418
419 default:
420 s1 = NM;
421 s2 = N;
422 w0 = 16 - fp;
423 w1 = fp - ft;
424 w2 = ft - fs;
425 w3 = fs;
426 break;
427 }
428
429 qweight[1] = qweight[0] + s1;
430 qweight[2] = qweight[1] + s2;
431 weight[0] = w0;
432 weight[1] = w1;
433 weight[2] = w2;
434 weight[3] = w3;
435
436 for (i = 0; i < 4; i++)
437 {
438 if (weight[i] != 0)
439 {
440 tw->grid_weights_of_texel[texel][tw->weightcount_of_texel[texel]] = qweight[i];
441 tw->weights_of_texel[texel][tw->weightcount_of_texel[texel]] = weight[i];
442 tw->weightcount_of_texel[texel]++;
443 tw->texels_of_weight[qweight[i]][tw->texelcount_of_weight[qweight[i]]] = texel;
444 tw->texelweights_of_weight[qweight[i]][tw->texelcount_of_weight[qweight[i]]] = weight[i];
445 tw->texelcount_of_weight[qweight[i]]++;
446 }
447 }
448 }
449 }
450 }
451
452 for (i = 0; i < texels_per_block; i++)
453 {
454 dt->texel_num_weights[i] = tw->weightcount_of_texel[i];
455
456 // ensure that all 4 entries are actually initialized.
457 // This allows a branch-free implementation of compute_value_of_texel_flt()
458 for (j = 0; j < 4; j++)
459 {
460 dt->texel_weights_int[i][j] = 0;
461 dt->texel_weights[i][j] = 0;
462 }
463
464 for (j = 0; j < tw->weightcount_of_texel[i]; j++)
465 {
466 dt->texel_weights_int[i][j] = (uint8_t)(tw->weights_of_texel[i][j]);
467 dt->texel_weights[i][j] = (uint8_t)(tw->grid_weights_of_texel[i][j]);
468 }
469 }
470
471 dt->num_weights = weights_per_block;
472 }
473
construct_block_size_descriptor_2d(int xdim,int ydim,block_size_descriptor * bsd)474 static void construct_block_size_descriptor_2d(
475 int xdim,
476 int ydim,
477 block_size_descriptor* bsd
478 ) {
479 int decimation_mode_index[256]; // for each of the 256 entries in the decim_table_array, its index
480 int decimation_mode_count = 0;
481
482 bsd->xdim = xdim;
483 bsd->ydim = ydim;
484 bsd->zdim = 1;
485 bsd->texel_count = xdim * ydim;
486
487 for (int i = 0; i < 256; i++)
488 {
489 decimation_mode_index[i] = -1;
490 }
491
492 // gather all the infill-modes that can be used with the current block size
493 for (int x_weights = 2; x_weights <= 12; x_weights++)
494 {
495 for (int y_weights = 2; y_weights <= 12; y_weights++)
496 {
497 if (x_weights * y_weights > MAX_WEIGHTS_PER_BLOCK)
498 {
499 continue;
500 }
501
502 decimation_table *dt = new decimation_table;
503 decimation_mode_index[y_weights * 16 + x_weights] = decimation_mode_count;
504 initialize_decimation_table_2d(xdim, ydim, x_weights, y_weights, dt);
505
506 bsd->decimation_tables[decimation_mode_count] = dt;
507
508 decimation_mode_count++;
509 }
510 }
511
512 bsd->decimation_mode_count = decimation_mode_count;
513
514 // then construct the list of block formats
515 for (int i = 0; i < 2048; i++)
516 {
517 int x_weights, y_weights;
518 int is_dual_plane;
519 int quantization_mode;
520 int fail = 0;
521 int permit_decode = 1;
522
523 if (decode_block_mode_2d(i, &x_weights, &y_weights, &is_dual_plane, &quantization_mode))
524 {
525 if (x_weights > xdim || y_weights > ydim)
526 permit_decode = 0;
527 }
528 else
529 {
530 fail = 1;
531 permit_decode = 0;
532 }
533
534 if (fail)
535 {
536 bsd->block_modes[i].decimation_mode = -1;
537 bsd->block_modes[i].quantization_mode = -1;
538 bsd->block_modes[i].is_dual_plane = -1;
539 bsd->block_modes[i].permit_decode = 0;
540 }
541 else
542 {
543 int decimation_mode = decimation_mode_index[y_weights * 16 + x_weights];
544 bsd->block_modes[i].decimation_mode = decimation_mode;
545 bsd->block_modes[i].quantization_mode = quantization_mode;
546 bsd->block_modes[i].is_dual_plane = is_dual_plane;
547 bsd->block_modes[i].permit_decode = permit_decode; // disallow decode of grid size larger than block size.
548 }
549 }
550 }
551
construct_block_size_descriptor_3d(int xdim,int ydim,int zdim,block_size_descriptor * bsd)552 static void construct_block_size_descriptor_3d(
553 int xdim,
554 int ydim,
555 int zdim,
556 block_size_descriptor * bsd
557 ) {
558 int decimation_mode_index[512]; // for each of the 512 entries in the decim_table_array, its index
559 int decimation_mode_count = 0;
560
561 bsd->xdim = xdim;
562 bsd->ydim = ydim;
563 bsd->zdim = zdim;
564 bsd->texel_count = xdim * ydim * zdim;
565
566 for (int i = 0; i < 512; i++)
567 {
568 decimation_mode_index[i] = -1;
569 }
570
571 // gather all the infill-modes that can be used with the current block size
572 for (int x_weights = 2; x_weights <= 6; x_weights++)
573 {
574 for (int y_weights = 2; y_weights <= 6; y_weights++)
575 {
576 for (int z_weights = 2; z_weights <= 6; z_weights++)
577 {
578 if ((x_weights * y_weights * z_weights) > MAX_WEIGHTS_PER_BLOCK)
579 continue;
580 decimation_table *dt = new decimation_table;
581 decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights] = decimation_mode_count;
582 initialize_decimation_table_3d(xdim, ydim, zdim, x_weights, y_weights, z_weights, dt);
583
584 bsd->decimation_tables[decimation_mode_count] = dt;
585
586 decimation_mode_count++;
587 }
588 }
589 }
590
591 bsd->decimation_mode_count = decimation_mode_count;
592
593 // then construct the list of block formats
594 for (int i = 0; i < 2048; i++)
595 {
596 int x_weights, y_weights, z_weights;
597 int is_dual_plane;
598 int quantization_mode;
599 int fail = 0;
600 int permit_decode = 1;
601
602 if (decode_block_mode_3d(i, &x_weights, &y_weights, &z_weights, &is_dual_plane, &quantization_mode))
603 {
604 if (x_weights > xdim || y_weights > ydim || z_weights > zdim)
605 permit_decode = 0;
606 }
607 else
608 {
609 fail = 1;
610 permit_decode = 0;
611 }
612 if (fail)
613 {
614 bsd->block_modes[i].decimation_mode = -1;
615 bsd->block_modes[i].quantization_mode = -1;
616 bsd->block_modes[i].is_dual_plane = -1;
617 bsd->block_modes[i].permit_decode = 0;
618 }
619 else
620 {
621 int decimation_mode = decimation_mode_index[z_weights * 64 + y_weights * 8 + x_weights];
622 bsd->block_modes[i].decimation_mode = decimation_mode;
623 bsd->block_modes[i].quantization_mode = quantization_mode;
624 bsd->block_modes[i].is_dual_plane = is_dual_plane;
625 bsd->block_modes[i].permit_decode = permit_decode;
626 }
627 }
628 }
629
630 /* Public function, see header file for detailed documentation */
init_block_size_descriptor(int xdim,int ydim,int zdim,block_size_descriptor * bsd)631 void init_block_size_descriptor(
632 int xdim,
633 int ydim,
634 int zdim,
635 block_size_descriptor* bsd
636 ) {
637 if (zdim > 1)
638 construct_block_size_descriptor_3d(xdim, ydim, zdim, bsd);
639 else
640 construct_block_size_descriptor_2d(xdim, ydim, bsd);
641
642 init_partition_tables(bsd);
643 }
644
term_block_size_descriptor(block_size_descriptor * bsd)645 void term_block_size_descriptor(
646 block_size_descriptor* bsd)
647 {
648 for(int i = 0; i < bsd->decimation_mode_count; i++)
649 {
650 delete bsd->decimation_tables[i];
651 }
652 }
653