1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2011-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 //     http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17 
18 /**
19  * @brief Functions for generating partition tables on demand.
20  */
21 
22 #include "astcenc_internal.h"
23 
24 /**
25  * @brief Generate a canonical representation of a partition pattern.
26  *
27  * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store
28  * the remapped texel index. Remapping ensures that we only match on the partition pattern,
29  * independent of the partition order generated by the hash.
30  *
31  * @param      texel_count           The number of texels in the block.
32  * @param      partition_of_texel    The partition assignments, in hash order.
33  * @param[out] bit_pattern           The output bit pattern representation.
34  */
generate_canonical_partitioning(unsigned int texel_count,const uint8_t * partition_of_texel,uint64_t bit_pattern[7])35 static void generate_canonical_partitioning(
36 	unsigned int texel_count,
37 	const uint8_t* partition_of_texel,
38 	uint64_t bit_pattern[7]
39 ) {
40 	// Clear the pattern
41 	for (unsigned int i = 0; i < 7; i++)
42 	{
43 		bit_pattern[i] = 0;
44 	}
45 
46 	// Store a mapping to reorder the raw partitions so that the the partitions are ordered such
47 	// that the lowest texel index in partition N is smaller than the lowest texel index in
48 	// partition N + 1.
49 	int mapped_index[BLOCK_MAX_PARTITIONS];
50 	int map_weight_count = 0;
51 
52 	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
53 	{
54 		mapped_index[i] = -1;
55 	}
56 
57 	for (unsigned int i = 0; i < texel_count; i++)
58 	{
59 		int index = partition_of_texel[i];
60 		if (mapped_index[index] < 0)
61 		{
62 			mapped_index[index] = map_weight_count++;
63 		}
64 
65 		uint64_t xlat_index = mapped_index[index];
66 		bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F));
67 	}
68 }
69 
70 /**
71  * @brief Compare two canonical patterns to see if they are the same.
72  *
73  * @param part1   The first canonical bit pattern to check.
74  * @param part2   The second canonical bit pattern to check.
75  *
76  * @return @c true if the patterns are the same, @c false otherwise.
77  */
compare_canonical_partitionings(const uint64_t part1[7],const uint64_t part2[7])78 static bool compare_canonical_partitionings(
79 	const uint64_t part1[7],
80 	const uint64_t part2[7]
81 ) {
82 	return (part1[0] == part2[0]) && (part1[1] == part2[1]) &&
83 	       (part1[2] == part2[2]) && (part1[3] == part2[3]) &&
84 	       (part1[4] == part2[4]) && (part1[5] == part2[5]) &&
85 	       (part1[6] == part2[6]);
86 }
87 
88 /**
89  * @brief Hash function used for procedural partition assignment.
90  *
91  * @param inp The hash seed.
92  *
93  * @return The hashed value.
94  */
hash52(uint32_t inp)95 static uint32_t hash52(
96 	uint32_t inp
97 ) {
98 	inp ^= inp >> 15;
99 
100 	// (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
101 	inp *= 0xEEDE0891;
102 	inp ^= inp >> 5;
103 	inp += inp << 16;
104 	inp ^= inp >> 7;
105 	inp ^= inp >> 3;
106 	inp ^= inp << 6;
107 	inp ^= inp >> 17;
108 	return inp;
109 }
110 
111 /**
112  * @brief Select texel assignment for a single coordinate.
113  *
114  * @param seed              The seed - the partition index from the block.
115  * @param x                 The texel X coordinate in the block.
116  * @param y                 The texel Y coordinate in the block.
117  * @param z                 The texel Z coordinate in the block.
118  * @param partition_count   The total partition count of this encoding.
119  * @param small_block       @c true if the blockhas fewer than 32 texels.
120  *
121  * @return The assigned partition index for this texel.
122  */
select_partition(int seed,int x,int y,int z,int partition_count,bool small_block)123 static uint8_t select_partition(
124 	int seed,
125 	int x,
126 	int y,
127 	int z,
128 	int partition_count,
129 	bool small_block
130 ) {
131 	// For small blocks bias the coordinates to get better distribution
132 	if (small_block)
133 	{
134 		x <<= 1;
135 		y <<= 1;
136 		z <<= 1;
137 	}
138 
139 	seed += (partition_count - 1) * 1024;
140 
141 	uint32_t rnum = hash52(seed);
142 
143 	uint8_t seed1 = rnum & 0xF;
144 	uint8_t seed2 = (rnum >> 4) & 0xF;
145 	uint8_t seed3 = (rnum >> 8) & 0xF;
146 	uint8_t seed4 = (rnum >> 12) & 0xF;
147 	uint8_t seed5 = (rnum >> 16) & 0xF;
148 	uint8_t seed6 = (rnum >> 20) & 0xF;
149 	uint8_t seed7 = (rnum >> 24) & 0xF;
150 	uint8_t seed8 = (rnum >> 28) & 0xF;
151 	uint8_t seed9 = (rnum >> 18) & 0xF;
152 	uint8_t seed10 = (rnum >> 22) & 0xF;
153 	uint8_t seed11 = (rnum >> 26) & 0xF;
154 	uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF;
155 
156 	// Squaring all the seeds in order to bias their distribution towards lower values.
157 	seed1 *= seed1;
158 	seed2 *= seed2;
159 	seed3 *= seed3;
160 	seed4 *= seed4;
161 	seed5 *= seed5;
162 	seed6 *= seed6;
163 	seed7 *= seed7;
164 	seed8 *= seed8;
165 	seed9 *= seed9;
166 	seed10 *= seed10;
167 	seed11 *= seed11;
168 	seed12 *= seed12;
169 
170 	int sh1, sh2;
171 	if (seed & 1)
172 	{
173 		sh1 = (seed & 2 ? 4 : 5);
174 		sh2 = (partition_count == 3 ? 6 : 5);
175 	}
176 	else
177 	{
178 		sh1 = (partition_count == 3 ? 6 : 5);
179 		sh2 = (seed & 2 ? 4 : 5);
180 	}
181 
182 	int sh3 = (seed & 0x10) ? sh1 : sh2;
183 
184 	seed1 >>= sh1;
185 	seed2 >>= sh2;
186 	seed3 >>= sh1;
187 	seed4 >>= sh2;
188 	seed5 >>= sh1;
189 	seed6 >>= sh2;
190 	seed7 >>= sh1;
191 	seed8 >>= sh2;
192 
193 	seed9 >>= sh3;
194 	seed10 >>= sh3;
195 	seed11 >>= sh3;
196 	seed12 >>= sh3;
197 
198 	int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
199 	int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
200 	int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6);
201 	int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2);
202 
203 	// Apply the saw
204 	a &= 0x3F;
205 	b &= 0x3F;
206 	c &= 0x3F;
207 	d &= 0x3F;
208 
209 	// Remove some of the components if we are to output < 4 partitions.
210 	if (partition_count <= 3)
211 	{
212 		d = 0;
213 	}
214 
215 	if (partition_count <= 2)
216 	{
217 		c = 0;
218 	}
219 
220 	if (partition_count <= 1)
221 	{
222 		b = 0;
223 	}
224 
225 	uint8_t partition;
226 	if (a >= b && a >= c && a >= d)
227 	{
228 		partition = 0;
229 	}
230 	else if (b >= c && b >= d)
231 	{
232 		partition = 1;
233 	}
234 	else if (c >= d)
235 	{
236 		partition = 2;
237 	}
238 	else
239 	{
240 		partition = 3;
241 	}
242 
243 	return partition;
244 }
245 
246 /**
247  * @brief Generate a single partition info structure.
248  *
249  * @param[out] bsd                     The block size information.
250  * @param      partition_count         The partition count of this partitioning.
251  * @param      partition_index         The partition index / seed of this partitioning.
252  * @param      partition_remap_index   The remapped partition index of this partitioning.
253  * @param[out] pi                      The partition info structure to populate.
254  *
255  * @return True if this is a useful partition index, False if we can skip it.
256  */
generate_one_partition_info_entry(block_size_descriptor & bsd,unsigned int partition_count,unsigned int partition_index,unsigned int partition_remap_index,partition_info & pi)257 static bool generate_one_partition_info_entry(
258 	block_size_descriptor& bsd,
259 	unsigned int partition_count,
260 	unsigned int partition_index,
261 	unsigned int partition_remap_index,
262 	partition_info& pi
263 ) {
264 	int texels_per_block = bsd.texel_count;
265 	bool small_block = texels_per_block < 32;
266 
267 	uint8_t *partition_of_texel = pi.partition_of_texel;
268 
269 	// Assign texels to partitions
270 	int texel_idx = 0;
271 	int counts[BLOCK_MAX_PARTITIONS] { 0 };
272 	for (unsigned int z = 0; z < bsd.zdim; z++)
273 	{
274 		for (unsigned int y = 0; y <  bsd.ydim; y++)
275 		{
276 			for (unsigned int x = 0; x <  bsd.xdim; x++)
277 			{
278 				uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block);
279 				pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++);
280 				*partition_of_texel++ = part;
281 			}
282 		}
283 	}
284 
285 	// Fill loop tail so we can overfetch later
286 	for (unsigned int i = 0; i < partition_count; i++)
287 	{
288 		int ptex_count = counts[i];
289 		int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count);
290 		for (int j = ptex_count; j < ptex_count_simd; j++)
291 		{
292 			pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1];
293 		}
294 	}
295 
296 	// Populate the actual procedural partition count
297 	if (counts[0] == 0)
298 	{
299 		pi.partition_count = 0;
300 	}
301 	else if (counts[1] == 0)
302 	{
303 		pi.partition_count = 1;
304 	}
305 	else if (counts[2] == 0)
306 	{
307 		pi.partition_count = 2;
308 	}
309 	else if (counts[3] == 0)
310 	{
311 		pi.partition_count = 3;
312 	}
313 	else
314 	{
315 		pi.partition_count = 4;
316 	}
317 
318 	// Populate the partition index
319 	pi.partition_index = static_cast<uint16_t>(partition_index);
320 
321 	// Populate the coverage bitmaps for 2/3/4 partitions
322 	uint64_t* bitmaps { nullptr };
323 	if (partition_count == 2)
324 	{
325 		bitmaps = bsd.coverage_bitmaps_2[partition_remap_index];
326 	}
327 	else if (partition_count == 3)
328 	{
329 		bitmaps = bsd.coverage_bitmaps_3[partition_remap_index];
330 	}
331 	else if (partition_count == 4)
332 	{
333 		bitmaps = bsd.coverage_bitmaps_4[partition_remap_index];
334 	}
335 
336 	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++)
337 	{
338 		pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]);
339 	}
340 
341 	// Valid partitionings have texels in all of the requested partitions
342 	bool valid = pi.partition_count == partition_count;
343 
344 	if (bitmaps)
345 	{
346 		// Populate the partition coverage bitmap
347 		for (unsigned int i = 0; i < partition_count; i++)
348 		{
349 			bitmaps[i] = 0ULL;
350 		}
351 
352 		unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS);
353 		for (unsigned int i = 0; i < texels_to_process; i++)
354 		{
355 			unsigned int idx = bsd.kmeans_texels[i];
356 			bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i;
357 		}
358 	}
359 
360 	return valid;
361 }
362 
build_partition_table_for_one_partition_count(block_size_descriptor & bsd,bool can_omit_partitionings,unsigned int partition_count_cutoff,unsigned int partition_count,partition_info * ptab,uint64_t * canonical_patterns)363 static void build_partition_table_for_one_partition_count(
364 	block_size_descriptor& bsd,
365 	bool can_omit_partitionings,
366 	unsigned int partition_count_cutoff,
367 	unsigned int partition_count,
368 	partition_info* ptab,
369 	uint64_t* canonical_patterns
370 ) {
371 	unsigned int next_index = 0;
372 	bsd.partitioning_count_selected[partition_count - 1] = 0;
373 	bsd.partitioning_count_all[partition_count - 1] = 0;
374 
375 	// Skip tables larger than config max partition count if we can omit modes
376 	if (can_omit_partitionings && (partition_count > partition_count_cutoff))
377 	{
378 		return;
379 	}
380 
381 	// Iterate through twice
382 	//   - Pass 0: Keep selected partitionings
383 	//   - Pass 1: Keep non-selected partitionings (skip if in omit mode)
384 	unsigned int max_iter = can_omit_partitionings ? 1 : 2;
385 
386 	// Tracker for things we built in the first iteration
387 	uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 };
388 	for (unsigned int x = 0; x < max_iter; x++)
389 	{
390 		for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++)
391 		{
392 			// Don't include things we built in the first pass
393 			if ((x == 1) && build[i])
394 			{
395 				continue;
396 			}
397 
398 			bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]);
399 			if ((x == 0) && !keep_useful)
400 			{
401 				continue;
402 			}
403 
404 			generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * 7);
405 			bool keep_canonical = true;
406 			for (unsigned int j = 0; j < next_index; j++)
407 			{
408 				bool match = compare_canonical_partitionings(canonical_patterns + 7 * next_index, canonical_patterns + 7 * j);
409 				if (match)
410 				{
411 					keep_canonical = false;
412 					break;
413 				}
414 			}
415 
416 			if (keep_useful && keep_canonical)
417 			{
418 				if (x == 0)
419 				{
420 					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
421 					bsd.partitioning_count_selected[partition_count - 1]++;
422 					bsd.partitioning_count_all[partition_count - 1]++;
423 					build[i] = 1;
424 					next_index++;
425 				}
426 			}
427 			else
428 			{
429 				if (x == 1)
430 				{
431 					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index);
432 					bsd.partitioning_count_all[partition_count - 1]++;
433 					next_index++;
434 				}
435 			}
436 		}
437 	}
438 }
439 
440 /* See header for documentation. */
init_partition_tables(block_size_descriptor & bsd,bool can_omit_partitionings,unsigned int partition_count_cutoff)441 void init_partition_tables(
442 	block_size_descriptor& bsd,
443 	bool can_omit_partitionings,
444 	unsigned int partition_count_cutoff
445 ) {
446 	partition_info* par_tab2 = bsd.partitionings;
447 	partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS;
448 	partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS;
449 	partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS;
450 
451 	generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1);
452 	bsd.partitioning_count_selected[0] = 1;
453 	bsd.partitioning_count_all[0] = 1;
454 
455 	uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * 7];
456 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns);
457 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns);
458 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns);
459 
460 	delete[] canonical_patterns;
461 }
462