1 /*
2 * Copyright © 2021 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #define AC_SURFACE_INCLUDE_NIR
8 #include "ac_surface.h"
9
10 #include "radv_meta.h"
11 #include "vk_common_entrypoints.h"
12 #include "vk_format.h"
13
14 void
radv_device_finish_meta_copy_vrs_htile_state(struct radv_device * device)15 radv_device_finish_meta_copy_vrs_htile_state(struct radv_device *device)
16 {
17 struct radv_meta_state *state = &device->meta_state;
18
19 radv_DestroyPipeline(radv_device_to_handle(device), state->copy_vrs_htile_pipeline, &state->alloc);
20 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->copy_vrs_htile_p_layout, &state->alloc);
21 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->copy_vrs_htile_ds_layout,
22 &state->alloc);
23 }
24
25 static nir_shader *
build_copy_vrs_htile_shader(struct radv_device * device,struct radeon_surf * surf)26 build_copy_vrs_htile_shader(struct radv_device *device, struct radeon_surf *surf)
27 {
28 const struct radv_physical_device *pdev = radv_device_physical(device);
29 nir_builder b = radv_meta_init_shader(device, MESA_SHADER_COMPUTE, "meta_copy_vrs_htile");
30 b.shader->info.workgroup_size[0] = 8;
31 b.shader->info.workgroup_size[1] = 8;
32
33 /* Get coordinates. */
34 nir_def *global_id = get_global_ids(&b, 2);
35
36 nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
37
38 /* Multiply the coordinates by the HTILE block size. */
39 nir_def *coord = nir_iadd(&b, nir_imul_imm(&b, global_id, 8), offset);
40
41 /* Load constants. */
42 nir_def *constants = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 8), .range = 20);
43 nir_def *htile_pitch = nir_channel(&b, constants, 0);
44 nir_def *htile_slice_size = nir_channel(&b, constants, 1);
45 nir_def *read_htile_value = nir_channel(&b, constants, 2);
46
47 /* Get the HTILE addr from coordinates. */
48 nir_def *zero = nir_imm_int(&b, 0);
49 nir_def *htile_addr =
50 ac_nir_htile_addr_from_coord(&b, &pdev->info, &surf->u.gfx9.zs.htile_equation, htile_pitch, htile_slice_size,
51 nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero);
52
53 /* Set up the input VRS image descriptor. */
54 const struct glsl_type *vrs_sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, GLSL_TYPE_FLOAT);
55 nir_variable *input_vrs_img = nir_variable_create(b.shader, nir_var_uniform, vrs_sampler_type, "input_vrs_image");
56 input_vrs_img->data.descriptor_set = 0;
57 input_vrs_img->data.binding = 0;
58
59 /* Load the VRS rates from the 2D image. */
60 nir_def *value = nir_txf_deref(&b, nir_build_deref_var(&b, input_vrs_img), global_id, NULL);
61
62 /* Extract the X/Y rates and clamp them because the maximum supported VRS rate is 2x2 (1x1 in
63 * hardware).
64 *
65 * VRS rate X = min(value >> 2, 1)
66 * VRS rate Y = min(value & 3, 1)
67 */
68 nir_def *x_rate = nir_ushr_imm(&b, nir_channel(&b, value, 0), 2);
69 x_rate = nir_umin(&b, x_rate, nir_imm_int(&b, 1));
70
71 nir_def *y_rate = nir_iand_imm(&b, nir_channel(&b, value, 0), 3);
72 y_rate = nir_umin(&b, y_rate, nir_imm_int(&b, 1));
73
74 /* Compute the final VRS rate. */
75 nir_def *vrs_rates = nir_ior(&b, nir_ishl_imm(&b, y_rate, 10), nir_ishl_imm(&b, x_rate, 6));
76
77 /* Load the HTILE buffer descriptor. */
78 nir_def *htile_buf = radv_meta_load_descriptor(&b, 0, 1);
79
80 /* Load the HTILE value if requested, otherwise use the default value. */
81 nir_variable *htile_value = nir_local_variable_create(b.impl, glsl_int_type(), "htile_value");
82
83 nir_push_if(&b, nir_ieq_imm(&b, read_htile_value, 1));
84 {
85 /* Load the existing HTILE 32-bit value for this 8x8 pixels area. */
86 nir_def *input_value = nir_load_ssbo(&b, 1, 32, htile_buf, htile_addr);
87
88 /* Clear the 4-bit VRS rates. */
89 nir_store_var(&b, htile_value, nir_iand_imm(&b, input_value, 0xfffff33f), 0x1);
90 }
91 nir_push_else(&b, NULL);
92 {
93 nir_store_var(&b, htile_value, nir_imm_int(&b, 0xfffff33f), 0x1);
94 }
95 nir_pop_if(&b, NULL);
96
97 /* Set the VRS rates loaded from the image. */
98 nir_def *output_value = nir_ior(&b, nir_load_var(&b, htile_value), vrs_rates);
99
100 /* Store the updated HTILE 32-bit which contains the VRS rates. */
101 nir_store_ssbo(&b, output_value, htile_buf, htile_addr, .access = ACCESS_NON_READABLE);
102
103 return b.shader;
104 }
105
106 static VkResult
create_pipeline(struct radv_device * device,struct radeon_surf * surf)107 create_pipeline(struct radv_device *device, struct radeon_surf *surf)
108 {
109 struct radv_meta_state *state = &device->meta_state;
110 VkResult result;
111
112 const VkDescriptorSetLayoutBinding bindings[] = {
113 {
114 .binding = 0,
115 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
116 .descriptorCount = 1,
117 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
118 },
119 {
120 .binding = 1,
121 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
122 .descriptorCount = 1,
123 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
124 },
125 };
126
127 result = radv_meta_create_descriptor_set_layout(device, 2, bindings, &state->copy_vrs_htile_ds_layout);
128 if (result != VK_SUCCESS)
129 return result;
130
131 const VkPushConstantRange pc_range = {
132 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
133 .size = 20,
134 };
135
136 result = radv_meta_create_pipeline_layout(device, &state->copy_vrs_htile_ds_layout, 1, &pc_range,
137 &state->copy_vrs_htile_p_layout);
138 if (result != VK_SUCCESS)
139 return result;
140
141 nir_shader *cs = build_copy_vrs_htile_shader(device, surf);
142
143 result =
144 radv_meta_create_compute_pipeline(device, cs, state->copy_vrs_htile_p_layout, &state->copy_vrs_htile_pipeline);
145
146 ralloc_free(cs);
147 return result;
148 }
149
150 static VkResult
get_pipeline(struct radv_device * device,struct radv_image * image,VkPipeline * pipeline_out)151 get_pipeline(struct radv_device *device, struct radv_image *image, VkPipeline *pipeline_out)
152 {
153 struct radv_meta_state *state = &device->meta_state;
154 VkResult result = VK_SUCCESS;
155
156 mtx_lock(&state->mtx);
157 if (!state->copy_vrs_htile_pipeline) {
158 result = create_pipeline(device, &image->planes[0].surface);
159 if (result != VK_SUCCESS)
160 goto fail;
161 }
162
163 *pipeline_out = state->copy_vrs_htile_pipeline;
164
165 fail:
166 mtx_unlock(&state->mtx);
167 return result;
168 }
169
170 void
radv_copy_vrs_htile(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * vrs_iview,const VkRect2D * rect,struct radv_image * dst_image,struct radv_buffer * htile_buffer,bool read_htile_value)171 radv_copy_vrs_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *vrs_iview, const VkRect2D *rect,
172 struct radv_image *dst_image, struct radv_buffer *htile_buffer, bool read_htile_value)
173 {
174 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
175 struct radv_meta_state *state = &device->meta_state;
176 struct radv_meta_saved_state saved_state;
177 VkPipeline pipeline;
178 VkResult result;
179
180 assert(radv_image_has_htile(dst_image));
181
182 result = get_pipeline(device, dst_image, &pipeline);
183 if (result != VK_SUCCESS) {
184 vk_command_buffer_set_error(&cmd_buffer->vk, result);
185 return;
186 }
187
188 cmd_buffer->state.flush_bits |=
189 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
190 VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
191 radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, NULL);
192
193 radv_meta_save(&saved_state, cmd_buffer,
194 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
195
196 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
197
198 radv_meta_push_descriptor_set(
199 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->copy_vrs_htile_p_layout, 0, 2,
200 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
201 .dstBinding = 0,
202 .dstArrayElement = 0,
203 .descriptorCount = 1,
204 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
205 .pImageInfo =
206 (VkDescriptorImageInfo[]){
207 {
208 .sampler = VK_NULL_HANDLE,
209 .imageView = radv_image_view_to_handle(vrs_iview),
210 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
211 },
212 }},
213 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
214 .dstBinding = 1,
215 .dstArrayElement = 0,
216 .descriptorCount = 1,
217 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
218 .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(htile_buffer),
219 .offset = 0,
220 .range = htile_buffer->vk.size}}});
221
222 const unsigned constants[5] = {
223 rect->offset.x,
224 rect->offset.y,
225 dst_image->planes[0].surface.meta_pitch,
226 dst_image->planes[0].surface.meta_slice_size,
227 read_htile_value,
228 };
229
230 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->copy_vrs_htile_p_layout,
231 VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, constants);
232
233 uint32_t width = DIV_ROUND_UP(rect->extent.width, 8);
234 uint32_t height = DIV_ROUND_UP(rect->extent.height, 8);
235
236 radv_unaligned_dispatch(cmd_buffer, width, height, 1);
237
238 radv_meta_restore(&saved_state, cmd_buffer);
239
240 cmd_buffer->state.flush_bits |=
241 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
242 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
243 }
244