1 /*
2 * Copyright 2023 Google LLC
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "anv_private.h"
7
8 #include "compiler/nir/nir_builder.h"
9
10 static void
astc_emu_init_image_view(struct anv_cmd_buffer * cmd_buffer,struct anv_image_view * iview,struct anv_image * image,VkFormat format,VkImageUsageFlags usage,uint32_t level,uint32_t layer)11 astc_emu_init_image_view(struct anv_cmd_buffer *cmd_buffer,
12 struct anv_image_view *iview,
13 struct anv_image *image,
14 VkFormat format,
15 VkImageUsageFlags usage,
16 uint32_t level, uint32_t layer)
17 {
18 struct anv_device *device = cmd_buffer->device;
19
20 const VkImageViewCreateInfo create_info = {
21 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
22 .pNext = &(VkImageViewUsageCreateInfo){
23 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
24 .usage = usage,
25 },
26 .image = anv_image_to_handle(image),
27 /* XXX we only need 2D but the shader expects 2D_ARRAY */
28 .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
29 .format = format,
30 .subresourceRange = {
31 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
32 .baseMipLevel = level,
33 .levelCount = 1,
34 .baseArrayLayer = layer,
35 .layerCount = 1,
36 },
37 };
38
39 memset(iview, 0, sizeof(*iview));
40 anv_image_view_init(device, iview, &create_info,
41 &cmd_buffer->surface_state_stream);
42 }
43
44 static void
astc_emu_init_push_descriptor_set(struct anv_cmd_buffer * cmd_buffer,struct anv_push_descriptor_set * push_set,VkDescriptorSetLayout _layout,uint32_t write_count,const VkWriteDescriptorSet * writes)45 astc_emu_init_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
46 struct anv_push_descriptor_set *push_set,
47 VkDescriptorSetLayout _layout,
48 uint32_t write_count,
49 const VkWriteDescriptorSet *writes)
50 {
51 struct anv_device *device = cmd_buffer->device;
52 struct anv_descriptor_set_layout *layout =
53 anv_descriptor_set_layout_from_handle(_layout);
54
55 memset(push_set, 0, sizeof(*push_set));
56 anv_push_descriptor_set_init(cmd_buffer, push_set, layout);
57
58 anv_descriptor_set_write(device, &push_set->set, write_count, writes);
59 }
60
61 static void
astc_emu_init_flush_denorm_shader(nir_builder * b)62 astc_emu_init_flush_denorm_shader(nir_builder *b)
63 {
64 b->shader->info.workgroup_size[0] = 8;
65 b->shader->info.workgroup_size[1] = 8;
66
67 const struct glsl_type *src_type =
68 glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT);
69 nir_variable *src_var =
70 nir_variable_create(b->shader, nir_var_uniform, src_type, "src");
71 src_var->data.descriptor_set = 0;
72 src_var->data.binding = 0;
73
74 const struct glsl_type *dst_type =
75 glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_UINT);
76 nir_variable *dst_var =
77 nir_variable_create(b->shader, nir_var_uniform, dst_type, "dst");
78 dst_var->data.descriptor_set = 0;
79 dst_var->data.binding = 1;
80
81 nir_def *zero = nir_imm_int(b, 0);
82 nir_def *consts = nir_load_push_constant(b, 4, 32, zero, .range = 16);
83 nir_def *offset = nir_channels(b, consts, 0x3);
84 nir_def *extent = nir_channels(b, consts, 0x3 << 2);
85
86 nir_def *coord = nir_load_global_invocation_id(b, 32);
87 coord = nir_iadd(b, nir_channels(b, coord, 0x3), offset);
88
89 nir_def *cond = nir_ilt(b, coord, extent);
90 cond = nir_iand(b, nir_channel(b, cond, 0), nir_channel(b, cond, 1));
91 nir_push_if(b, cond);
92 {
93 const struct glsl_type *val_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
94 nir_variable *val_var =
95 nir_variable_create(b->shader, nir_var_shader_temp, val_type, "val");
96
97 coord = nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1),
98 zero);
99 nir_def *val =
100 nir_txf_deref(b, nir_build_deref_var(b, src_var), coord, zero);
101 nir_store_var(b, val_var, val, 0xf);
102
103 /* A void-extent block has this layout
104 *
105 * struct astc_void_extent_block {
106 * uint16_t header;
107 * uint16_t dontcare0;
108 * uint16_t dontcare1;
109 * uint16_t dontcare2;
110 * uint16_t R;
111 * uint16_t G;
112 * uint16_t B;
113 * uint16_t A;
114 * };
115 *
116 * where the lower 12 bits are 0xdfc for 2D LDR.
117 */
118 nir_def *block_mode = nir_iand_imm(b, nir_channel(b, val, 0), 0xfff);
119 nir_push_if(b, nir_ieq_imm(b, block_mode, 0xdfc));
120 {
121 nir_def *color = nir_channels(b, val, 0x3 << 2);
122 nir_def *comps = nir_unpack_64_4x16(b, nir_pack_64_2x32(b, color));
123
124 /* flush denorms */
125 comps = nir_bcsel(b, nir_ult_imm(b, comps, 4),
126 nir_imm_intN_t(b, 0, 16), comps);
127
128 color = nir_unpack_64_2x32(b, nir_pack_64_4x16(b, comps));
129 val = nir_vec4(b, nir_channel(b, val, 0), nir_channel(b, val, 1),
130 nir_channel(b, color, 0), nir_channel(b, color, 1));
131 nir_store_var(b, val_var, val, 0x3 << 2);
132 }
133 nir_pop_if(b, NULL);
134
135 nir_def *dst = &nir_build_deref_var(b, dst_var)->def;
136 coord = nir_pad_vector(b, coord, 4);
137 val = nir_load_var(b, val_var);
138 nir_image_deref_store(b, dst, coord, nir_undef(b, 1, 32), val, zero,
139 .image_dim = GLSL_SAMPLER_DIM_2D,
140 .image_array = true);
141 }
142 nir_pop_if(b, NULL);
143 }
144
145 static VkResult
astc_emu_init_flush_denorm_pipeline_locked(struct anv_device * device)146 astc_emu_init_flush_denorm_pipeline_locked(struct anv_device *device)
147 {
148 struct anv_device_astc_emu *astc_emu = &device->astc_emu;
149 VkDevice _device = anv_device_to_handle(device);
150 VkResult result = VK_SUCCESS;
151
152 if (astc_emu->ds_layout == VK_NULL_HANDLE) {
153 const VkDescriptorSetLayoutCreateInfo ds_layout_create_info = {
154 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
155 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
156 .bindingCount = 2,
157 .pBindings = (VkDescriptorSetLayoutBinding[]){
158 {
159 .binding = 0,
160 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
161 .descriptorCount = 1,
162 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
163 },
164 {
165 .binding = 1,
166 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
167 .descriptorCount = 1,
168 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
169 },
170 },
171 };
172 result = anv_CreateDescriptorSetLayout(_device, &ds_layout_create_info,
173 NULL, &astc_emu->ds_layout);
174 if (result != VK_SUCCESS)
175 goto out;
176 }
177
178 if (astc_emu->pipeline_layout == VK_NULL_HANDLE) {
179 const VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
180 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
181 .setLayoutCount = 1,
182 .pSetLayouts = &astc_emu->ds_layout,
183 .pushConstantRangeCount = 1,
184 .pPushConstantRanges = &(VkPushConstantRange){
185 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
186 .size = sizeof(uint32_t) * 4,
187 },
188 };
189 result = anv_CreatePipelineLayout(_device, &pipeline_layout_create_info,
190 NULL, &astc_emu->pipeline_layout);
191 if (result != VK_SUCCESS)
192 goto out;
193 }
194
195 if (astc_emu->pipeline == VK_NULL_HANDLE) {
196 const struct nir_shader_compiler_options *options =
197 device->physical->compiler->nir_options[MESA_SHADER_COMPUTE];
198 nir_builder b = nir_builder_init_simple_shader(
199 MESA_SHADER_COMPUTE, options, "astc_emu_flush_denorm");
200 astc_emu_init_flush_denorm_shader(&b);
201
202 const VkComputePipelineCreateInfo pipeline_create_info = {
203 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
204 .stage =
205 (VkPipelineShaderStageCreateInfo){
206 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
207 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
208 .module = vk_shader_module_handle_from_nir(b.shader),
209 .pName = "main",
210 },
211 .layout = astc_emu->pipeline_layout,
212 };
213 result = anv_CreateComputePipelines(_device, VK_NULL_HANDLE, 1,
214 &pipeline_create_info, NULL,
215 &astc_emu->pipeline);
216 ralloc_free(b.shader);
217
218 if (result != VK_SUCCESS)
219 goto out;
220 }
221
222 out:
223 return result;
224 }
225
226 static VkResult
astc_emu_init_flush_denorm_pipeline(struct anv_device * device)227 astc_emu_init_flush_denorm_pipeline(struct anv_device *device)
228 {
229 struct anv_device_astc_emu *astc_emu = &device->astc_emu;
230 VkResult result = VK_SUCCESS;
231
232 simple_mtx_lock(&astc_emu->mutex);
233 if (!astc_emu->pipeline)
234 result = astc_emu_init_flush_denorm_pipeline_locked(device);
235 simple_mtx_unlock(&astc_emu->mutex);
236
237 return result;
238 }
239
240 static void
astc_emu_flush_denorm_slice(struct anv_cmd_buffer * cmd_buffer,VkFormat astc_format,VkImageLayout layout,VkImageView src_view,VkImageView dst_view,VkRect2D rect)241 astc_emu_flush_denorm_slice(struct anv_cmd_buffer *cmd_buffer,
242 VkFormat astc_format,
243 VkImageLayout layout,
244 VkImageView src_view,
245 VkImageView dst_view,
246 VkRect2D rect)
247 {
248 struct anv_device *device = cmd_buffer->device;
249 struct anv_device_astc_emu *astc_emu = &device->astc_emu;
250 VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
251
252 VkResult result = astc_emu_init_flush_denorm_pipeline(device);
253 if (result != VK_SUCCESS) {
254 anv_batch_set_error(&cmd_buffer->batch, result);
255 return;
256 }
257
258 const uint32_t push_const[] = {
259 rect.offset.x,
260 rect.offset.y,
261 rect.offset.x + rect.extent.width,
262 rect.offset.y + rect.extent.height,
263 };
264
265 const VkWriteDescriptorSet set_writes[] = {
266 {
267 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
268 .dstBinding = 0,
269 .descriptorCount = 1,
270 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
271 .pImageInfo = &(VkDescriptorImageInfo){
272 .imageView = src_view,
273 .imageLayout = layout,
274 },
275 },
276 {
277 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
278 .dstBinding = 1,
279 .descriptorCount = 1,
280 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
281 .pImageInfo = &(VkDescriptorImageInfo){
282 .imageView = dst_view,
283 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
284 },
285 },
286 };
287 struct anv_push_descriptor_set push_set;
288 astc_emu_init_push_descriptor_set(cmd_buffer,
289 &push_set,
290 astc_emu->ds_layout,
291 ARRAY_SIZE(set_writes),
292 set_writes);
293 VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set);
294
295 anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE,
296 astc_emu->pipeline);
297
298 VkPushConstantsInfoKHR push_info = {
299 .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
300 .layout = astc_emu->pipeline_layout,
301 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
302 .offset = 0,
303 .size = sizeof(push_const),
304 .pValues = push_const,
305 };
306 anv_CmdPushConstants2KHR(cmd_buffer_, &push_info);
307
308 VkBindDescriptorSetsInfoKHR bind_info = {
309 .sType = VK_STRUCTURE_TYPE_BIND_DESCRIPTOR_SETS_INFO_KHR,
310 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
311 .layout = astc_emu->pipeline_layout,
312 .firstSet = 0,
313 .descriptorSetCount = 1,
314 .pDescriptorSets = &set,
315 .dynamicOffsetCount = 0,
316 .pDynamicOffsets = NULL,
317 };
318 anv_CmdBindDescriptorSets2KHR(cmd_buffer_, &bind_info);
319
320 /* each workgroup processes 8x8 texel blocks */
321 rect.extent.width = DIV_ROUND_UP(rect.extent.width, 8);
322 rect.extent.height = DIV_ROUND_UP(rect.extent.height, 8);
323
324 anv_genX(device->info, CmdDispatchBase)(cmd_buffer_, 0, 0, 0,
325 rect.extent.width,
326 rect.extent.height,
327 1);
328
329 anv_push_descriptor_set_finish(&push_set);
330 }
331
332 static void
astc_emu_decompress_slice(struct anv_cmd_buffer * cmd_buffer,VkFormat astc_format,VkImageLayout layout,VkImageView src_view,VkImageView dst_view,VkRect2D rect)333 astc_emu_decompress_slice(struct anv_cmd_buffer *cmd_buffer,
334 VkFormat astc_format,
335 VkImageLayout layout,
336 VkImageView src_view,
337 VkImageView dst_view,
338 VkRect2D rect)
339 {
340 struct anv_device *device = cmd_buffer->device;
341 struct anv_device_astc_emu *astc_emu = &device->astc_emu;
342 VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
343
344 VkPipeline pipeline =
345 vk_texcompress_astc_get_decode_pipeline(&device->vk, &device->vk.alloc,
346 astc_emu->texcompress,
347 VK_NULL_HANDLE, astc_format);
348 if (pipeline == VK_NULL_HANDLE) {
349 anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_UNKNOWN);
350 return;
351 }
352
353 anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
354
355 struct vk_texcompress_astc_write_descriptor_set writes;
356 vk_texcompress_astc_fill_write_descriptor_sets(astc_emu->texcompress,
357 &writes, src_view, layout,
358 dst_view, astc_format);
359
360 struct anv_push_descriptor_set push_set;
361 astc_emu_init_push_descriptor_set(cmd_buffer, &push_set,
362 astc_emu->texcompress->ds_layout,
363 ARRAY_SIZE(writes.descriptor_set),
364 writes.descriptor_set);
365
366 VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set);
367
368 VkBindDescriptorSetsInfoKHR bind_info = {
369 .sType = VK_STRUCTURE_TYPE_BIND_DESCRIPTOR_SETS_INFO_KHR,
370 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
371 .layout = astc_emu->texcompress->p_layout,
372 .firstSet = 0,
373 .descriptorSetCount = 1,
374 .pDescriptorSets = &set,
375 .dynamicOffsetCount = 0,
376 .pDynamicOffsets = NULL,
377 };
378 anv_CmdBindDescriptorSets2KHR(cmd_buffer_, &bind_info);
379
380 const uint32_t push_const[] = {
381 rect.offset.x,
382 rect.offset.y,
383 (rect.offset.x + rect.extent.width) *
384 vk_format_get_blockwidth(astc_format),
385 (rect.offset.y + rect.extent.height) *
386 vk_format_get_blockheight(astc_format),
387 false, /* we don't use VK_IMAGE_VIEW_TYPE_3D */
388 };
389 VkPushConstantsInfoKHR push_info = {
390 .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
391 .layout = astc_emu->texcompress->p_layout,
392 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
393 .offset = 0,
394 .size = sizeof(push_const),
395 .pValues = push_const,
396 };
397 anv_CmdPushConstants2KHR(cmd_buffer_, &push_info);
398
399 /* each workgroup processes 2x2 texel blocks */
400 rect.extent.width = DIV_ROUND_UP(rect.extent.width, 2);
401 rect.extent.height = DIV_ROUND_UP(rect.extent.height, 2);
402
403 anv_genX(device->info, CmdDispatchBase)(cmd_buffer_, 0, 0, 0,
404 rect.extent.width,
405 rect.extent.height,
406 1);
407
408 anv_push_descriptor_set_finish(&push_set);
409 }
410
411 void
anv_astc_emu_process(struct anv_cmd_buffer * cmd_buffer,struct anv_image * image,VkImageLayout layout,const VkImageSubresourceLayers * subresource,VkOffset3D block_offset,VkExtent3D block_extent)412 anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
413 struct anv_image *image,
414 VkImageLayout layout,
415 const VkImageSubresourceLayers *subresource,
416 VkOffset3D block_offset,
417 VkExtent3D block_extent)
418 {
419 const bool flush_denorms =
420 cmd_buffer->device->physical->flush_astc_ldr_void_extent_denorms;
421
422 assert(image->emu_plane_format != VK_FORMAT_UNDEFINED);
423
424 const VkRect2D rect = {
425 .offset = {
426 .x = block_offset.x,
427 .y = block_offset.y,
428 },
429 .extent = {
430 .width = block_extent.width,
431 .height = block_extent.height,
432 },
433 };
434
435 /* process one layer at a time because anv_image_fill_surface_state
436 * requires an uncompressed view of a compressed image to be single layer
437 */
438 const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D;
439 const uint32_t slice_base = is_3d ?
440 block_offset.z : subresource->baseArrayLayer;
441 const uint32_t slice_count = is_3d ?
442 block_extent.depth : subresource->layerCount;
443
444 struct anv_cmd_saved_state saved;
445 anv_cmd_buffer_save_state(cmd_buffer,
446 ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
447 ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0 |
448 ANV_CMD_SAVED_STATE_PUSH_CONSTANTS,
449 &saved);
450
451 for (uint32_t i = 0; i < slice_count; i++) {
452 struct anv_image_view src_view;
453 struct anv_image_view dst_view;
454 astc_emu_init_image_view(cmd_buffer, &src_view, image,
455 VK_FORMAT_R32G32B32A32_UINT,
456 VK_IMAGE_USAGE_SAMPLED_BIT,
457 subresource->mipLevel, slice_base + i);
458 astc_emu_init_image_view(cmd_buffer, &dst_view, image,
459 flush_denorms ? VK_FORMAT_R32G32B32A32_UINT
460 : VK_FORMAT_R8G8B8A8_UINT,
461 VK_IMAGE_USAGE_STORAGE_BIT,
462 subresource->mipLevel, slice_base + i);
463
464 if (flush_denorms) {
465 astc_emu_flush_denorm_slice(cmd_buffer, image->vk.format, layout,
466 anv_image_view_to_handle(&src_view),
467 anv_image_view_to_handle(&dst_view),
468 rect);
469 } else {
470 astc_emu_decompress_slice(cmd_buffer, image->vk.format, layout,
471 anv_image_view_to_handle(&src_view),
472 anv_image_view_to_handle(&dst_view),
473 rect);
474 }
475 }
476
477 anv_cmd_buffer_restore_state(cmd_buffer, &saved);
478 }
479
480 VkResult
anv_device_init_astc_emu(struct anv_device * device)481 anv_device_init_astc_emu(struct anv_device *device)
482 {
483 struct anv_device_astc_emu *astc_emu = &device->astc_emu;
484 VkResult result = VK_SUCCESS;
485
486 if (device->physical->flush_astc_ldr_void_extent_denorms)
487 simple_mtx_init(&astc_emu->mutex, mtx_plain);
488
489 if (device->physical->emu_astc_ldr) {
490 result = vk_texcompress_astc_init(&device->vk, &device->vk.alloc,
491 VK_NULL_HANDLE,
492 &astc_emu->texcompress);
493 }
494
495 return result;
496 }
497
498 void
anv_device_finish_astc_emu(struct anv_device * device)499 anv_device_finish_astc_emu(struct anv_device *device)
500 {
501 struct anv_device_astc_emu *astc_emu = &device->astc_emu;
502
503 if (device->physical->flush_astc_ldr_void_extent_denorms) {
504 VkDevice _device = anv_device_to_handle(device);
505
506 anv_DestroyPipeline(_device, astc_emu->pipeline, NULL);
507 anv_DestroyPipelineLayout(_device, astc_emu->pipeline_layout, NULL);
508 anv_DestroyDescriptorSetLayout(_device, astc_emu->ds_layout, NULL);
509 simple_mtx_destroy(&astc_emu->mutex);
510 }
511
512 if (astc_emu->texcompress) {
513 vk_texcompress_astc_finish(&device->vk, &device->vk.alloc,
514 astc_emu->texcompress);
515 }
516 }
517