xref: /aosp_15_r20/external/mesa3d/src/intel/vulkan/anv_astc_emu.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2023 Google LLC
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "anv_private.h"
7 
8 #include "compiler/nir/nir_builder.h"
9 
10 static void
astc_emu_init_image_view(struct anv_cmd_buffer * cmd_buffer,struct anv_image_view * iview,struct anv_image * image,VkFormat format,VkImageUsageFlags usage,uint32_t level,uint32_t layer)11 astc_emu_init_image_view(struct anv_cmd_buffer *cmd_buffer,
12                          struct anv_image_view *iview,
13                          struct anv_image *image,
14                          VkFormat format,
15                          VkImageUsageFlags usage,
16                          uint32_t level, uint32_t layer)
17 {
18    struct anv_device *device = cmd_buffer->device;
19 
20    const VkImageViewCreateInfo create_info = {
21       .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
22       .pNext = &(VkImageViewUsageCreateInfo){
23          .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,
24          .usage = usage,
25       },
26       .image = anv_image_to_handle(image),
27       /* XXX we only need 2D but the shader expects 2D_ARRAY */
28       .viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY,
29       .format = format,
30       .subresourceRange = {
31          .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
32          .baseMipLevel = level,
33          .levelCount = 1,
34          .baseArrayLayer = layer,
35          .layerCount = 1,
36       },
37    };
38 
39    memset(iview, 0, sizeof(*iview));
40    anv_image_view_init(device, iview, &create_info,
41                        &cmd_buffer->surface_state_stream);
42 }
43 
44 static void
astc_emu_init_push_descriptor_set(struct anv_cmd_buffer * cmd_buffer,struct anv_push_descriptor_set * push_set,VkDescriptorSetLayout _layout,uint32_t write_count,const VkWriteDescriptorSet * writes)45 astc_emu_init_push_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
46                                   struct anv_push_descriptor_set *push_set,
47                                   VkDescriptorSetLayout _layout,
48                                   uint32_t write_count,
49                                   const VkWriteDescriptorSet *writes)
50 {
51    struct anv_device *device = cmd_buffer->device;
52    struct anv_descriptor_set_layout *layout =
53       anv_descriptor_set_layout_from_handle(_layout);
54 
55    memset(push_set, 0, sizeof(*push_set));
56    anv_push_descriptor_set_init(cmd_buffer, push_set, layout);
57 
58    anv_descriptor_set_write(device, &push_set->set, write_count, writes);
59 }
60 
61 static void
astc_emu_init_flush_denorm_shader(nir_builder * b)62 astc_emu_init_flush_denorm_shader(nir_builder *b)
63 {
64    b->shader->info.workgroup_size[0] = 8;
65    b->shader->info.workgroup_size[1] = 8;
66 
67    const struct glsl_type *src_type =
68       glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_UINT);
69    nir_variable *src_var =
70       nir_variable_create(b->shader, nir_var_uniform, src_type, "src");
71    src_var->data.descriptor_set = 0;
72    src_var->data.binding = 0;
73 
74    const struct glsl_type *dst_type =
75       glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_UINT);
76    nir_variable *dst_var =
77       nir_variable_create(b->shader, nir_var_uniform, dst_type, "dst");
78    dst_var->data.descriptor_set = 0;
79    dst_var->data.binding = 1;
80 
81    nir_def *zero = nir_imm_int(b, 0);
82    nir_def *consts = nir_load_push_constant(b, 4, 32, zero, .range = 16);
83    nir_def *offset = nir_channels(b, consts, 0x3);
84    nir_def *extent = nir_channels(b, consts, 0x3 << 2);
85 
86    nir_def *coord = nir_load_global_invocation_id(b, 32);
87    coord = nir_iadd(b, nir_channels(b, coord, 0x3), offset);
88 
89    nir_def *cond = nir_ilt(b, coord, extent);
90    cond = nir_iand(b, nir_channel(b, cond, 0), nir_channel(b, cond, 1));
91    nir_push_if(b, cond);
92    {
93       const struct glsl_type *val_type = glsl_vector_type(GLSL_TYPE_UINT, 4);
94       nir_variable *val_var =
95          nir_variable_create(b->shader, nir_var_shader_temp, val_type, "val");
96 
97       coord = nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1),
98                        zero);
99       nir_def *val =
100          nir_txf_deref(b, nir_build_deref_var(b, src_var), coord, zero);
101       nir_store_var(b, val_var, val, 0xf);
102 
103       /* A void-extent block has this layout
104        *
105        *   struct astc_void_extent_block {
106        *      uint16_t header;
107        *      uint16_t dontcare0;
108        *      uint16_t dontcare1;
109        *      uint16_t dontcare2;
110        *      uint16_t R;
111        *      uint16_t G;
112        *      uint16_t B;
113        *      uint16_t A;
114        *   };
115        *
116        * where the lower 12 bits are 0xdfc for 2D LDR.
117        */
118       nir_def *block_mode = nir_iand_imm(b, nir_channel(b, val, 0), 0xfff);
119       nir_push_if(b, nir_ieq_imm(b, block_mode, 0xdfc));
120       {
121          nir_def *color = nir_channels(b, val, 0x3 << 2);
122          nir_def *comps = nir_unpack_64_4x16(b, nir_pack_64_2x32(b, color));
123 
124          /* flush denorms */
125          comps = nir_bcsel(b, nir_ult_imm(b, comps, 4),
126                            nir_imm_intN_t(b, 0, 16), comps);
127 
128          color = nir_unpack_64_2x32(b, nir_pack_64_4x16(b, comps));
129          val = nir_vec4(b, nir_channel(b, val, 0), nir_channel(b, val, 1),
130                         nir_channel(b, color, 0), nir_channel(b, color, 1));
131          nir_store_var(b, val_var, val, 0x3 << 2);
132       }
133       nir_pop_if(b, NULL);
134 
135       nir_def *dst = &nir_build_deref_var(b, dst_var)->def;
136       coord = nir_pad_vector(b, coord, 4);
137       val = nir_load_var(b, val_var);
138       nir_image_deref_store(b, dst, coord, nir_undef(b, 1, 32), val, zero,
139                             .image_dim = GLSL_SAMPLER_DIM_2D,
140                             .image_array = true);
141    }
142    nir_pop_if(b, NULL);
143 }
144 
145 static VkResult
astc_emu_init_flush_denorm_pipeline_locked(struct anv_device * device)146 astc_emu_init_flush_denorm_pipeline_locked(struct anv_device *device)
147 {
148    struct anv_device_astc_emu *astc_emu = &device->astc_emu;
149    VkDevice _device = anv_device_to_handle(device);
150    VkResult result = VK_SUCCESS;
151 
152    if (astc_emu->ds_layout == VK_NULL_HANDLE) {
153       const VkDescriptorSetLayoutCreateInfo ds_layout_create_info = {
154          .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
155          .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
156          .bindingCount = 2,
157          .pBindings = (VkDescriptorSetLayoutBinding[]){
158             {
159                .binding = 0,
160                .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
161                .descriptorCount = 1,
162                .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
163             },
164             {
165                .binding = 1,
166                .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
167                .descriptorCount = 1,
168                .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
169             },
170          },
171       };
172       result = anv_CreateDescriptorSetLayout(_device, &ds_layout_create_info,
173                                              NULL, &astc_emu->ds_layout);
174       if (result != VK_SUCCESS)
175          goto out;
176    }
177 
178    if (astc_emu->pipeline_layout == VK_NULL_HANDLE) {
179       const VkPipelineLayoutCreateInfo pipeline_layout_create_info = {
180          .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
181          .setLayoutCount = 1,
182          .pSetLayouts = &astc_emu->ds_layout,
183          .pushConstantRangeCount = 1,
184          .pPushConstantRanges = &(VkPushConstantRange){
185             .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
186             .size = sizeof(uint32_t) * 4,
187          },
188       };
189       result = anv_CreatePipelineLayout(_device, &pipeline_layout_create_info,
190                                         NULL, &astc_emu->pipeline_layout);
191       if (result != VK_SUCCESS)
192          goto out;
193    }
194 
195    if (astc_emu->pipeline == VK_NULL_HANDLE) {
196       const struct nir_shader_compiler_options *options =
197          device->physical->compiler->nir_options[MESA_SHADER_COMPUTE];
198       nir_builder b = nir_builder_init_simple_shader(
199             MESA_SHADER_COMPUTE, options, "astc_emu_flush_denorm");
200       astc_emu_init_flush_denorm_shader(&b);
201 
202       const VkComputePipelineCreateInfo pipeline_create_info = {
203          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
204          .stage =
205             (VkPipelineShaderStageCreateInfo){
206                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
207                .stage = VK_SHADER_STAGE_COMPUTE_BIT,
208                .module = vk_shader_module_handle_from_nir(b.shader),
209                .pName = "main",
210             },
211          .layout = astc_emu->pipeline_layout,
212       };
213       result = anv_CreateComputePipelines(_device, VK_NULL_HANDLE, 1,
214                                           &pipeline_create_info, NULL,
215                                           &astc_emu->pipeline);
216       ralloc_free(b.shader);
217 
218       if (result != VK_SUCCESS)
219          goto out;
220    }
221 
222 out:
223    return result;
224 }
225 
226 static VkResult
astc_emu_init_flush_denorm_pipeline(struct anv_device * device)227 astc_emu_init_flush_denorm_pipeline(struct anv_device *device)
228 {
229    struct anv_device_astc_emu *astc_emu = &device->astc_emu;
230    VkResult result = VK_SUCCESS;
231 
232    simple_mtx_lock(&astc_emu->mutex);
233    if (!astc_emu->pipeline)
234       result = astc_emu_init_flush_denorm_pipeline_locked(device);
235    simple_mtx_unlock(&astc_emu->mutex);
236 
237    return result;
238 }
239 
240 static void
astc_emu_flush_denorm_slice(struct anv_cmd_buffer * cmd_buffer,VkFormat astc_format,VkImageLayout layout,VkImageView src_view,VkImageView dst_view,VkRect2D rect)241 astc_emu_flush_denorm_slice(struct anv_cmd_buffer *cmd_buffer,
242                             VkFormat astc_format,
243                             VkImageLayout layout,
244                             VkImageView src_view,
245                             VkImageView dst_view,
246                             VkRect2D rect)
247 {
248    struct anv_device *device = cmd_buffer->device;
249    struct anv_device_astc_emu *astc_emu = &device->astc_emu;
250    VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
251 
252    VkResult result = astc_emu_init_flush_denorm_pipeline(device);
253    if (result != VK_SUCCESS) {
254       anv_batch_set_error(&cmd_buffer->batch, result);
255       return;
256    }
257 
258    const uint32_t push_const[] = {
259       rect.offset.x,
260       rect.offset.y,
261       rect.offset.x + rect.extent.width,
262       rect.offset.y + rect.extent.height,
263    };
264 
265    const VkWriteDescriptorSet set_writes[] = {
266       {
267          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
268          .dstBinding = 0,
269          .descriptorCount = 1,
270          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
271          .pImageInfo = &(VkDescriptorImageInfo){
272             .imageView = src_view,
273             .imageLayout = layout,
274          },
275       },
276       {
277          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
278          .dstBinding = 1,
279          .descriptorCount = 1,
280          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
281          .pImageInfo = &(VkDescriptorImageInfo){
282             .imageView = dst_view,
283             .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
284          },
285       },
286    };
287    struct anv_push_descriptor_set push_set;
288    astc_emu_init_push_descriptor_set(cmd_buffer,
289                                      &push_set,
290                                      astc_emu->ds_layout,
291                                      ARRAY_SIZE(set_writes),
292                                      set_writes);
293    VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set);
294 
295    anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE,
296                        astc_emu->pipeline);
297 
298    VkPushConstantsInfoKHR push_info = {
299       .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
300       .layout = astc_emu->pipeline_layout,
301       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
302       .offset = 0,
303       .size = sizeof(push_const),
304       .pValues = push_const,
305    };
306    anv_CmdPushConstants2KHR(cmd_buffer_, &push_info);
307 
308    VkBindDescriptorSetsInfoKHR bind_info = {
309       .sType = VK_STRUCTURE_TYPE_BIND_DESCRIPTOR_SETS_INFO_KHR,
310       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
311       .layout = astc_emu->pipeline_layout,
312       .firstSet = 0,
313       .descriptorSetCount = 1,
314       .pDescriptorSets = &set,
315       .dynamicOffsetCount = 0,
316       .pDynamicOffsets = NULL,
317    };
318    anv_CmdBindDescriptorSets2KHR(cmd_buffer_, &bind_info);
319 
320    /* each workgroup processes 8x8 texel blocks */
321    rect.extent.width = DIV_ROUND_UP(rect.extent.width, 8);
322    rect.extent.height = DIV_ROUND_UP(rect.extent.height, 8);
323 
324    anv_genX(device->info, CmdDispatchBase)(cmd_buffer_, 0, 0, 0,
325                                            rect.extent.width,
326                                            rect.extent.height,
327                                            1);
328 
329    anv_push_descriptor_set_finish(&push_set);
330 }
331 
332 static void
astc_emu_decompress_slice(struct anv_cmd_buffer * cmd_buffer,VkFormat astc_format,VkImageLayout layout,VkImageView src_view,VkImageView dst_view,VkRect2D rect)333 astc_emu_decompress_slice(struct anv_cmd_buffer *cmd_buffer,
334                           VkFormat astc_format,
335                           VkImageLayout layout,
336                           VkImageView src_view,
337                           VkImageView dst_view,
338                           VkRect2D rect)
339 {
340    struct anv_device *device = cmd_buffer->device;
341    struct anv_device_astc_emu *astc_emu = &device->astc_emu;
342    VkCommandBuffer cmd_buffer_ = anv_cmd_buffer_to_handle(cmd_buffer);
343 
344    VkPipeline pipeline =
345       vk_texcompress_astc_get_decode_pipeline(&device->vk, &device->vk.alloc,
346                                               astc_emu->texcompress,
347                                               VK_NULL_HANDLE, astc_format);
348    if (pipeline == VK_NULL_HANDLE) {
349       anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_UNKNOWN);
350       return;
351    }
352 
353    anv_CmdBindPipeline(cmd_buffer_, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
354 
355    struct vk_texcompress_astc_write_descriptor_set writes;
356    vk_texcompress_astc_fill_write_descriptor_sets(astc_emu->texcompress,
357                                                   &writes, src_view, layout,
358                                                   dst_view, astc_format);
359 
360    struct anv_push_descriptor_set push_set;
361    astc_emu_init_push_descriptor_set(cmd_buffer, &push_set,
362                                      astc_emu->texcompress->ds_layout,
363                                      ARRAY_SIZE(writes.descriptor_set),
364                                      writes.descriptor_set);
365 
366    VkDescriptorSet set = anv_descriptor_set_to_handle(&push_set.set);
367 
368    VkBindDescriptorSetsInfoKHR bind_info = {
369       .sType = VK_STRUCTURE_TYPE_BIND_DESCRIPTOR_SETS_INFO_KHR,
370       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
371       .layout = astc_emu->texcompress->p_layout,
372       .firstSet = 0,
373       .descriptorSetCount = 1,
374       .pDescriptorSets = &set,
375       .dynamicOffsetCount = 0,
376       .pDynamicOffsets = NULL,
377    };
378    anv_CmdBindDescriptorSets2KHR(cmd_buffer_, &bind_info);
379 
380    const uint32_t push_const[] = {
381       rect.offset.x,
382       rect.offset.y,
383       (rect.offset.x + rect.extent.width) *
384          vk_format_get_blockwidth(astc_format),
385       (rect.offset.y + rect.extent.height) *
386          vk_format_get_blockheight(astc_format),
387       false, /* we don't use VK_IMAGE_VIEW_TYPE_3D */
388    };
389    VkPushConstantsInfoKHR push_info = {
390       .sType = VK_STRUCTURE_TYPE_PUSH_CONSTANTS_INFO_KHR,
391       .layout = astc_emu->texcompress->p_layout,
392       .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
393       .offset = 0,
394       .size = sizeof(push_const),
395       .pValues = push_const,
396    };
397    anv_CmdPushConstants2KHR(cmd_buffer_, &push_info);
398 
399    /* each workgroup processes 2x2 texel blocks */
400    rect.extent.width = DIV_ROUND_UP(rect.extent.width, 2);
401    rect.extent.height = DIV_ROUND_UP(rect.extent.height, 2);
402 
403    anv_genX(device->info, CmdDispatchBase)(cmd_buffer_, 0, 0, 0,
404                                            rect.extent.width,
405                                            rect.extent.height,
406                                            1);
407 
408    anv_push_descriptor_set_finish(&push_set);
409 }
410 
411 void
anv_astc_emu_process(struct anv_cmd_buffer * cmd_buffer,struct anv_image * image,VkImageLayout layout,const VkImageSubresourceLayers * subresource,VkOffset3D block_offset,VkExtent3D block_extent)412 anv_astc_emu_process(struct anv_cmd_buffer *cmd_buffer,
413                      struct anv_image *image,
414                      VkImageLayout layout,
415                      const VkImageSubresourceLayers *subresource,
416                      VkOffset3D block_offset,
417                      VkExtent3D block_extent)
418 {
419    const bool flush_denorms =
420       cmd_buffer->device->physical->flush_astc_ldr_void_extent_denorms;
421 
422    assert(image->emu_plane_format != VK_FORMAT_UNDEFINED);
423 
424    const VkRect2D rect = {
425       .offset = {
426          .x = block_offset.x,
427          .y = block_offset.y,
428       },
429       .extent = {
430          .width = block_extent.width,
431          .height = block_extent.height,
432       },
433    };
434 
435    /* process one layer at a time because anv_image_fill_surface_state
436     * requires an uncompressed view of a compressed image to be single layer
437     */
438    const bool is_3d = image->vk.image_type == VK_IMAGE_TYPE_3D;
439    const uint32_t slice_base = is_3d ?
440       block_offset.z : subresource->baseArrayLayer;
441    const uint32_t slice_count = is_3d ?
442       block_extent.depth : subresource->layerCount;
443 
444    struct anv_cmd_saved_state saved;
445    anv_cmd_buffer_save_state(cmd_buffer,
446                              ANV_CMD_SAVED_STATE_COMPUTE_PIPELINE |
447                              ANV_CMD_SAVED_STATE_DESCRIPTOR_SET_0 |
448                              ANV_CMD_SAVED_STATE_PUSH_CONSTANTS,
449                              &saved);
450 
451    for (uint32_t i = 0; i < slice_count; i++) {
452       struct anv_image_view src_view;
453       struct anv_image_view dst_view;
454       astc_emu_init_image_view(cmd_buffer, &src_view, image,
455                                VK_FORMAT_R32G32B32A32_UINT,
456                                VK_IMAGE_USAGE_SAMPLED_BIT,
457                                subresource->mipLevel, slice_base + i);
458       astc_emu_init_image_view(cmd_buffer, &dst_view, image,
459                                flush_denorms ? VK_FORMAT_R32G32B32A32_UINT
460                                              : VK_FORMAT_R8G8B8A8_UINT,
461                                VK_IMAGE_USAGE_STORAGE_BIT,
462                                subresource->mipLevel, slice_base + i);
463 
464       if (flush_denorms) {
465          astc_emu_flush_denorm_slice(cmd_buffer, image->vk.format, layout,
466                                      anv_image_view_to_handle(&src_view),
467                                      anv_image_view_to_handle(&dst_view),
468                                      rect);
469       } else {
470          astc_emu_decompress_slice(cmd_buffer, image->vk.format, layout,
471                                    anv_image_view_to_handle(&src_view),
472                                    anv_image_view_to_handle(&dst_view),
473                                    rect);
474       }
475    }
476 
477    anv_cmd_buffer_restore_state(cmd_buffer, &saved);
478 }
479 
480 VkResult
anv_device_init_astc_emu(struct anv_device * device)481 anv_device_init_astc_emu(struct anv_device *device)
482 {
483    struct anv_device_astc_emu *astc_emu = &device->astc_emu;
484    VkResult result = VK_SUCCESS;
485 
486    if (device->physical->flush_astc_ldr_void_extent_denorms)
487       simple_mtx_init(&astc_emu->mutex, mtx_plain);
488 
489    if (device->physical->emu_astc_ldr) {
490       result = vk_texcompress_astc_init(&device->vk, &device->vk.alloc,
491                                         VK_NULL_HANDLE,
492                                         &astc_emu->texcompress);
493    }
494 
495    return result;
496 }
497 
498 void
anv_device_finish_astc_emu(struct anv_device * device)499 anv_device_finish_astc_emu(struct anv_device *device)
500 {
501    struct anv_device_astc_emu *astc_emu = &device->astc_emu;
502 
503    if (device->physical->flush_astc_ldr_void_extent_denorms) {
504       VkDevice _device = anv_device_to_handle(device);
505 
506       anv_DestroyPipeline(_device, astc_emu->pipeline, NULL);
507       anv_DestroyPipelineLayout(_device, astc_emu->pipeline_layout, NULL);
508       anv_DestroyDescriptorSetLayout(_device, astc_emu->ds_layout, NULL);
509       simple_mtx_destroy(&astc_emu->mutex);
510    }
511 
512    if (astc_emu->texcompress) {
513       vk_texcompress_astc_finish(&device->vk, &device->vk.alloc,
514                                  astc_emu->texcompress);
515    }
516 }
517