1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Based in part on v3d driver which is:
5 *
6 * Copyright © 2014-2017 Broadcom
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "v3dv_private.h"
29
30 /* Our Vulkan resource indices represent indices in descriptor maps which
31 * include all shader stages, so we need to size the arrays below
32 * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS.
33 */
34 #define MAX_STAGES 3
35
36 #define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
37 struct texture_bo_list {
38 struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS];
39 };
40
41 /* This tracks state BOs for both textures and samplers, so we
42 * multiply by 2.
43 */
44 #define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
45 struct state_bo_list {
46 uint32_t count;
47 struct v3dv_bo *states[MAX_TOTAL_STATES];
48 };
49
50 #define MAX_TOTAL_UNIFORM_BUFFERS ((MAX_UNIFORM_BUFFERS + \
51 MAX_INLINE_UNIFORM_BUFFERS) * MAX_STAGES)
52 #define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES)
53 struct buffer_bo_list {
54 struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS];
55 struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS];
56 };
57
58 static bool
state_bo_in_list(struct state_bo_list * list,struct v3dv_bo * bo)59 state_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo)
60 {
61 for (int i = 0; i < list->count; i++) {
62 if (list->states[i] == bo)
63 return true;
64 }
65 return false;
66 }
67
68 static void
push_constants_bo_free(VkDevice _device,uint64_t bo_ptr,VkAllocationCallbacks * alloc)69 push_constants_bo_free(VkDevice _device,
70 uint64_t bo_ptr,
71 VkAllocationCallbacks *alloc)
72 {
73 V3DV_FROM_HANDLE(v3dv_device, device, _device);
74 v3dv_bo_free(device, (struct v3dv_bo *)(uintptr_t) bo_ptr);
75 }
76
77 /*
78 * This method checks if the ubo used for push constants is needed to be
79 * updated or not.
80 *
81 * push constants ubo is only used for push constants accessed by a non-const
82 * index.
83 */
84 static void
check_push_constants_ubo(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)85 check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer,
86 struct v3dv_pipeline *pipeline)
87 {
88 if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO) ||
89 pipeline->layout->push_constant_size == 0)
90 return;
91
92 if (cmd_buffer->push_constants_resource.bo == NULL) {
93 cmd_buffer->push_constants_resource.bo =
94 v3dv_bo_alloc(cmd_buffer->device, 4096, "push constants", true);
95
96 v3dv_job_add_bo(cmd_buffer->state.job,
97 cmd_buffer->push_constants_resource.bo);
98
99 if (!cmd_buffer->push_constants_resource.bo) {
100 fprintf(stderr, "Failed to allocate memory for push constants\n");
101 abort();
102 }
103
104 bool ok = v3dv_bo_map(cmd_buffer->device,
105 cmd_buffer->push_constants_resource.bo,
106 cmd_buffer->push_constants_resource.bo->size);
107 if (!ok) {
108 fprintf(stderr, "failed to map push constants buffer\n");
109 abort();
110 }
111 } else {
112 if (cmd_buffer->push_constants_resource.offset +
113 cmd_buffer->state.push_constants_size <=
114 cmd_buffer->push_constants_resource.bo->size) {
115 cmd_buffer->push_constants_resource.offset +=
116 cmd_buffer->state.push_constants_size;
117 } else {
118 /* We ran out of space so we'll have to allocate a new buffer but we
119 * need to ensure the old one is preserved until the end of the command
120 * buffer life and make sure it is eventually freed. We use the
121 * private object machinery in the command buffer for this.
122 */
123 v3dv_cmd_buffer_add_private_obj(
124 cmd_buffer, (uintptr_t) cmd_buffer->push_constants_resource.bo,
125 (v3dv_cmd_buffer_private_obj_destroy_cb) push_constants_bo_free);
126
127 /* Now call back so we create a new BO */
128 cmd_buffer->push_constants_resource.bo = NULL;
129 check_push_constants_ubo(cmd_buffer, pipeline);
130 return;
131 }
132 }
133
134 assert(cmd_buffer->state.push_constants_size <= MAX_PUSH_CONSTANTS_SIZE);
135 memcpy(cmd_buffer->push_constants_resource.bo->map +
136 cmd_buffer->push_constants_resource.offset,
137 cmd_buffer->state.push_constants_data,
138 cmd_buffer->state.push_constants_size);
139
140 cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO;
141 }
142
143 /** V3D 4.x TMU configuration parameter 0 (texture) */
144 static void
write_tmu_p0(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,uint32_t data,struct texture_bo_list * tex_bos,struct state_bo_list * state_bos)145 write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
146 struct v3dv_pipeline *pipeline,
147 enum broadcom_shader_stage stage,
148 struct v3dv_cl_out **uniforms,
149 uint32_t data,
150 struct texture_bo_list *tex_bos,
151 struct state_bo_list *state_bos)
152 {
153 uint32_t texture_idx = v3d_unit_data_get_unit(data);
154
155 struct v3dv_descriptor_state *descriptor_state =
156 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
157
158 /* We need to ensure that the texture bo is added to the job */
159 struct v3dv_bo *texture_bo =
160 v3dv_descriptor_map_get_texture_bo(descriptor_state,
161 &pipeline->shared_data->maps[stage]->texture_map,
162 pipeline->layout, texture_idx);
163 assert(texture_bo);
164 assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS);
165 tex_bos->tex[texture_idx] = texture_bo;
166
167 struct v3dv_cl_reloc state_reloc =
168 v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state,
169 &pipeline->shared_data->maps[stage]->texture_map,
170 pipeline->layout,
171 texture_idx);
172
173 cl_aligned_u32(uniforms, state_reloc.bo->offset +
174 state_reloc.offset +
175 v3d_unit_data_get_offset(data));
176
177 /* Texture and Sampler states are typically suballocated, so they are
178 * usually the same BO: only flag them once to avoid trying to add them
179 * multiple times to the job later.
180 */
181 if (!state_bo_in_list(state_bos, state_reloc.bo)) {
182 assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
183 state_bos->states[state_bos->count++] = state_reloc.bo;
184 }
185 }
186
187 /** V3D 4.x TMU configuration parameter 1 (sampler) */
188 static void
write_tmu_p1(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,uint32_t data,struct state_bo_list * state_bos)189 write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
190 struct v3dv_pipeline *pipeline,
191 enum broadcom_shader_stage stage,
192 struct v3dv_cl_out **uniforms,
193 uint32_t data,
194 struct state_bo_list *state_bos)
195 {
196 uint32_t sampler_idx = v3d_unit_data_get_unit(data);
197 struct v3dv_descriptor_state *descriptor_state =
198 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
199
200 assert(sampler_idx != V3DV_NO_SAMPLER_16BIT_IDX &&
201 sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX);
202
203 struct v3dv_cl_reloc sampler_state_reloc =
204 v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state,
205 &pipeline->shared_data->maps[stage]->sampler_map,
206 pipeline->layout, sampler_idx);
207
208 const struct v3dv_sampler *sampler =
209 v3dv_descriptor_map_get_sampler(descriptor_state,
210 &pipeline->shared_data->maps[stage]->sampler_map,
211 pipeline->layout, sampler_idx);
212 assert(sampler);
213
214 /* Set unnormalized coordinates flag from sampler object */
215 uint32_t p1_packed = v3d_unit_data_get_offset(data);
216 if (sampler->unnormalized_coordinates) {
217 v3d_pack_unnormalized_coordinates(&cmd_buffer->device->devinfo, &p1_packed,
218 sampler->unnormalized_coordinates);
219 }
220
221 cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset +
222 sampler_state_reloc.offset +
223 p1_packed);
224
225 /* Texture and Sampler states are typically suballocated, so they are
226 * usually the same BO: only flag them once to avoid trying to add them
227 * multiple times to the job later.
228 */
229 if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) {
230 assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
231 state_bos->states[state_bos->count++] = sampler_state_reloc.bo;
232 }
233 }
234
235 static void
write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,enum quniform_contents content,uint32_t data,struct buffer_bo_list * buffer_bos)236 write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
237 struct v3dv_pipeline *pipeline,
238 enum broadcom_shader_stage stage,
239 struct v3dv_cl_out **uniforms,
240 enum quniform_contents content,
241 uint32_t data,
242 struct buffer_bo_list *buffer_bos)
243 {
244 struct v3dv_descriptor_state *descriptor_state =
245 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
246
247 struct v3dv_descriptor_map *map =
248 content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
249 &pipeline->shared_data->maps[stage]->ubo_map :
250 &pipeline->shared_data->maps[stage]->ssbo_map;
251
252 uint32_t offset =
253 content == QUNIFORM_UBO_ADDR ?
254 v3d_unit_data_get_offset(data) :
255 0;
256
257 uint32_t dynamic_offset = 0;
258
259 /* For ubos, index is shifted, as 0 is reserved for push constants
260 * and 1..MAX_INLINE_UNIFORM_BUFFERS are reserved for inline uniform
261 * buffers.
262 */
263 uint32_t index = v3d_unit_data_get_unit(data);
264 if (content == QUNIFORM_UBO_ADDR && index == 0) {
265 /* Ensure the push constants UBO is created and updated. This also
266 * adds the BO to the job so we don't need to track it in buffer_bos.
267 */
268 check_push_constants_ubo(cmd_buffer, pipeline);
269
270 struct v3dv_cl_reloc *resource =
271 &cmd_buffer->push_constants_resource;
272 assert(resource->bo);
273
274 cl_aligned_u32(uniforms, resource->bo->offset +
275 resource->offset +
276 offset + dynamic_offset);
277 } else {
278 if (content == QUNIFORM_UBO_ADDR) {
279 /* We reserve UBO index 0 for push constants in Vulkan (and for the
280 * constant buffer in GL) so the compiler always adds one to all UBO
281 * indices, fix it up before we access the descriptor map, since
282 * indices start from 0 there.
283 */
284 assert(index > 0);
285 index--;
286 } else {
287 index = data;
288 }
289
290 struct v3dv_descriptor *descriptor =
291 v3dv_descriptor_map_get_descriptor(descriptor_state, map,
292 pipeline->layout,
293 index, &dynamic_offset);
294
295 /* Inline UBO descriptors store UBO data in descriptor pool memory,
296 * instead of an external buffer.
297 */
298 assert(descriptor);
299
300 if (content == QUNIFORM_GET_SSBO_SIZE ||
301 content == QUNIFORM_GET_UBO_SIZE) {
302 cl_aligned_u32(uniforms, descriptor->range);
303 } else {
304 /* Inline uniform buffers store their contents in pool memory instead
305 * of an external buffer.
306 */
307 struct v3dv_bo *bo;
308 uint32_t addr;
309 if (descriptor->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
310 assert(dynamic_offset == 0);
311 struct v3dv_cl_reloc reloc =
312 v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
313 descriptor_state, map,
314 pipeline->layout, index,
315 NULL);
316 bo = reloc.bo;
317 addr = reloc.bo->offset + reloc.offset + offset;
318 } else {
319 assert(descriptor->buffer);
320 assert(descriptor->buffer->mem);
321 assert(descriptor->buffer->mem->bo);
322
323 bo = descriptor->buffer->mem->bo;
324 addr = bo->offset +
325 descriptor->buffer->mem_offset +
326 descriptor->offset +
327 offset + dynamic_offset;
328 }
329
330 cl_aligned_u32(uniforms, addr);
331
332 if (content == QUNIFORM_UBO_ADDR) {
333 assert(index < MAX_TOTAL_UNIFORM_BUFFERS);
334 buffer_bos->ubo[index] = bo;
335 } else {
336 assert(index < MAX_TOTAL_STORAGE_BUFFERS);
337 buffer_bos->ssbo[index] = bo;
338 }
339 }
340 }
341 }
342
343 static void
write_inline_uniform(struct v3dv_cl_out ** uniforms,uint32_t index,uint32_t offset,struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)344 write_inline_uniform(struct v3dv_cl_out **uniforms,
345 uint32_t index,
346 uint32_t offset,
347 struct v3dv_cmd_buffer *cmd_buffer,
348 struct v3dv_pipeline *pipeline,
349 enum broadcom_shader_stage stage)
350 {
351 assert(index < MAX_INLINE_UNIFORM_BUFFERS);
352
353 struct v3dv_descriptor_state *descriptor_state =
354 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
355
356 struct v3dv_descriptor_map *map =
357 &pipeline->shared_data->maps[stage]->ubo_map;
358
359 struct v3dv_cl_reloc reloc =
360 v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
361 descriptor_state, map,
362 pipeline->layout, index,
363 NULL);
364
365 /* Offset comes in 32-bit units */
366 uint32_t *addr = reloc.bo->map + reloc.offset + 4 * offset;
367 cl_aligned_u32(uniforms, *addr);
368 }
369
370 static uint32_t
get_texture_size_from_image_view(struct v3dv_image_view * image_view,enum quniform_contents contents,uint32_t data)371 get_texture_size_from_image_view(struct v3dv_image_view *image_view,
372 enum quniform_contents contents,
373 uint32_t data)
374 {
375 switch(contents) {
376 case QUNIFORM_IMAGE_WIDTH:
377 case QUNIFORM_TEXTURE_WIDTH:
378 /* We don't u_minify the values, as we are using the image_view
379 * extents
380 */
381 return image_view->vk.extent.width;
382 case QUNIFORM_IMAGE_HEIGHT:
383 case QUNIFORM_TEXTURE_HEIGHT:
384 return image_view->vk.extent.height;
385 case QUNIFORM_IMAGE_DEPTH:
386 case QUNIFORM_TEXTURE_DEPTH:
387 return image_view->vk.extent.depth;
388 case QUNIFORM_IMAGE_ARRAY_SIZE:
389 case QUNIFORM_TEXTURE_ARRAY_SIZE:
390 if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
391 return image_view->vk.layer_count;
392 } else {
393 assert(image_view->vk.layer_count % 6 == 0);
394 return image_view->vk.layer_count / 6;
395 }
396 case QUNIFORM_TEXTURE_LEVELS:
397 return image_view->vk.level_count;
398 case QUNIFORM_TEXTURE_SAMPLES:
399 assert(image_view->vk.image);
400 return image_view->vk.image->samples;
401 default:
402 unreachable("Bad texture size field");
403 }
404 }
405
406
407 static uint32_t
get_texture_size_from_buffer_view(struct v3dv_buffer_view * buffer_view,enum quniform_contents contents,uint32_t data)408 get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
409 enum quniform_contents contents,
410 uint32_t data)
411 {
412 switch(contents) {
413 case QUNIFORM_IMAGE_WIDTH:
414 case QUNIFORM_TEXTURE_WIDTH:
415 return buffer_view->num_elements;
416 /* Only size can be queried for texel buffers */
417 default:
418 unreachable("Bad texture size field for texel buffers");
419 }
420 }
421
422 static uint32_t
get_texture_size(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,enum quniform_contents contents,uint32_t data)423 get_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
424 struct v3dv_pipeline *pipeline,
425 enum broadcom_shader_stage stage,
426 enum quniform_contents contents,
427 uint32_t data)
428 {
429 uint32_t texture_idx = data;
430
431 struct v3dv_descriptor_state *descriptor_state =
432 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
433
434 struct v3dv_descriptor *descriptor =
435 v3dv_descriptor_map_get_descriptor(descriptor_state,
436 &pipeline->shared_data->maps[stage]->texture_map,
437 pipeline->layout,
438 texture_idx, NULL);
439
440 assert(descriptor);
441
442 switch (descriptor->type) {
443 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
444 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
445 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
446 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
447 return get_texture_size_from_image_view(descriptor->image_view,
448 contents, data);
449 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
450 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
451 return get_texture_size_from_buffer_view(descriptor->buffer_view,
452 contents, data);
453 default:
454 unreachable("Wrong descriptor for getting texture size");
455 }
456 }
457
458 struct v3dv_cl_reloc
v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant,uint32_t ** wg_count_offsets)459 v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
460 struct v3dv_pipeline *pipeline,
461 struct v3dv_shader_variant *variant,
462 uint32_t **wg_count_offsets)
463 {
464 struct v3d_uniform_list *uinfo =
465 &variant->prog_data.base->uniforms;
466 struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
467
468 struct v3dv_job *job = cmd_buffer->state.job;
469 assert(job);
470 assert(job->cmd_buffer == cmd_buffer);
471 struct v3d_device_info *devinfo = &cmd_buffer->device->devinfo;
472
473 struct texture_bo_list tex_bos = { 0 };
474 struct state_bo_list state_bos = { 0 };
475 struct buffer_bo_list buffer_bos = { 0 };
476
477 /* The hardware always pre-fetches the next uniform (also when there
478 * aren't any), so we always allocate space for an extra slot. This
479 * fixes MMU exceptions reported since Linux kernel 5.4 when the
480 * uniforms fill up the tail bytes of a page in the indirect
481 * BO. In that scenario, when the hardware pre-fetches after reading
482 * the last uniform it will read beyond the end of the page and trigger
483 * the MMU exception.
484 */
485 v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
486
487 struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
488
489 struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
490 for (int i = 0; i < uinfo->count; i++) {
491 uint32_t data = uinfo->data[i];
492
493 switch (uinfo->contents[i]) {
494 case QUNIFORM_CONSTANT:
495 cl_aligned_u32(&uniforms, data);
496 break;
497
498 case QUNIFORM_UNIFORM:
499 cl_aligned_u32(&uniforms, cmd_buffer->state.push_constants_data[data]);
500 break;
501
502 case QUNIFORM_INLINE_UBO_0:
503 case QUNIFORM_INLINE_UBO_1:
504 case QUNIFORM_INLINE_UBO_2:
505 case QUNIFORM_INLINE_UBO_3:
506 write_inline_uniform(&uniforms,
507 uinfo->contents[i] - QUNIFORM_INLINE_UBO_0, data,
508 cmd_buffer, pipeline, variant->stage);
509 break;
510
511 case QUNIFORM_VIEWPORT_X_SCALE: {
512 cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] *
513 devinfo->clipper_xy_granularity);
514 break;
515 }
516
517 case QUNIFORM_VIEWPORT_Y_SCALE: {
518 cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] *
519 devinfo->clipper_xy_granularity);
520 break;
521 }
522
523 case QUNIFORM_VIEWPORT_Z_OFFSET: {
524 float translate_z;
525 v3dv_cmd_buffer_state_get_viewport_z_xform(cmd_buffer, 0,
526 &translate_z, NULL);
527 cl_aligned_f(&uniforms, translate_z);
528 break;
529 }
530
531 case QUNIFORM_VIEWPORT_Z_SCALE: {
532 float scale_z;
533 v3dv_cmd_buffer_state_get_viewport_z_xform(cmd_buffer, 0,
534 NULL, &scale_z);
535 cl_aligned_f(&uniforms, scale_z);
536 break;
537 }
538
539 case QUNIFORM_SSBO_OFFSET:
540 case QUNIFORM_UBO_ADDR:
541 case QUNIFORM_GET_SSBO_SIZE:
542 case QUNIFORM_GET_UBO_SIZE:
543 write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms,
544 uinfo->contents[i], data, &buffer_bos);
545
546 break;
547
548 case QUNIFORM_IMAGE_TMU_CONFIG_P0:
549 case QUNIFORM_TMU_CONFIG_P0:
550 write_tmu_p0(cmd_buffer, pipeline, variant->stage,
551 &uniforms, data, &tex_bos, &state_bos);
552 break;
553
554 case QUNIFORM_TMU_CONFIG_P1:
555 write_tmu_p1(cmd_buffer, pipeline, variant->stage,
556 &uniforms, data, &state_bos);
557 break;
558
559 case QUNIFORM_IMAGE_WIDTH:
560 case QUNIFORM_IMAGE_HEIGHT:
561 case QUNIFORM_IMAGE_DEPTH:
562 case QUNIFORM_IMAGE_ARRAY_SIZE:
563 case QUNIFORM_TEXTURE_WIDTH:
564 case QUNIFORM_TEXTURE_HEIGHT:
565 case QUNIFORM_TEXTURE_DEPTH:
566 case QUNIFORM_TEXTURE_ARRAY_SIZE:
567 case QUNIFORM_TEXTURE_LEVELS:
568 case QUNIFORM_TEXTURE_SAMPLES:
569 cl_aligned_u32(&uniforms,
570 get_texture_size(cmd_buffer,
571 pipeline,
572 variant->stage,
573 uinfo->contents[i],
574 data));
575 break;
576
577 /* We generate this from geometry shaders to cap the generated gl_Layer
578 * to be within the number of layers of the framebuffer so we prevent the
579 * binner from trying to access tile state memory out of bounds (for
580 * layers that don't exist).
581 *
582 * Unfortunately, for secondary command buffers we may not know the
583 * number of layers in the framebuffer at this stage. Since we are
584 * only using this to sanitize the shader and it should not have any
585 * impact on correct shaders that emit valid values for gl_Layer,
586 * we just work around it by using the largest number of layers we
587 * support.
588 *
589 * FIXME: we could do better than this by recording in the job that
590 * the value at this uniform offset is not correct, and patch it when
591 * we execute the secondary command buffer into a primary, since we do
592 * have the correct number of layers at that point, but again, since this
593 * is only for sanityzing the shader and it only affects the specific case
594 * of secondary command buffers without framebuffer info available it
595 * might not be worth the trouble.
596 *
597 * With multiview the number of layers is dictated by the view mask
598 * and not by the framebuffer layers. We do set the job's frame tiling
599 * information correctly from the view mask in that case, however,
600 * secondary command buffers may not have valid frame tiling data,
601 * so when multiview is enabled, we always set the number of layers
602 * from the subpass view mask.
603 */
604 case QUNIFORM_FB_LAYERS: {
605 const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state;
606 const uint32_t view_mask =
607 state->pass->subpasses[state->subpass_idx].view_mask;
608
609 uint32_t num_layers;
610 if (view_mask != 0) {
611 num_layers = util_last_bit(view_mask);
612 } else if (job->frame_tiling.layers != 0) {
613 num_layers = job->frame_tiling.layers;
614 } else if (cmd_buffer->state.framebuffer) {
615 num_layers = cmd_buffer->state.framebuffer->layers;
616 } else {
617 assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
618 num_layers = 2048;
619 #if MESA_DEBUG
620 fprintf(stderr, "Skipping gl_LayerID shader sanity check for "
621 "secondary command buffer\n");
622 #endif
623 }
624 cl_aligned_u32(&uniforms, num_layers);
625 break;
626 }
627
628 case QUNIFORM_VIEW_INDEX:
629 cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index);
630 break;
631
632 case QUNIFORM_NUM_WORK_GROUPS:
633 assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
634 assert(job->csd.wg_count[data] > 0);
635 if (wg_count_offsets)
636 wg_count_offsets[data] = (uint32_t *) uniforms;
637 cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
638 break;
639
640 case QUNIFORM_WORK_GROUP_BASE:
641 assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
642 cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
643 break;
644
645 case QUNIFORM_SHARED_OFFSET:
646 assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
647 assert(job->csd.shared_memory);
648 cl_aligned_u32(&uniforms, job->csd.shared_memory->offset);
649 break;
650
651 case QUNIFORM_SPILL_OFFSET:
652 assert(pipeline->spill.bo);
653 cl_aligned_u32(&uniforms, pipeline->spill.bo->offset);
654 break;
655
656 case QUNIFORM_SPILL_SIZE_PER_THREAD:
657 assert(pipeline->spill.size_per_thread > 0);
658 cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread);
659 break;
660
661 case QUNIFORM_DRAW_ID:
662 cl_aligned_u32(&uniforms, job->cmd_buffer->state.draw_id);
663 break;
664
665 case QUNIFORM_LINE_WIDTH:
666 cl_aligned_u32(&uniforms,
667 job->cmd_buffer->vk.dynamic_graphics_state.rs.line.width);
668 break;
669
670 case QUNIFORM_AA_LINE_WIDTH:
671 cl_aligned_u32(&uniforms,
672 v3dv_get_aa_line_width(pipeline, job->cmd_buffer));
673 break;
674
675 default:
676 unreachable("unsupported quniform_contents uniform type\n");
677 }
678 }
679
680 cl_end(&job->indirect, uniforms);
681
682 for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) {
683 if (tex_bos.tex[i])
684 v3dv_job_add_bo(job, tex_bos.tex[i]);
685 }
686
687 for (int i = 0; i < state_bos.count; i++)
688 v3dv_job_add_bo(job, state_bos.states[i]);
689
690 for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) {
691 if (buffer_bos.ubo[i])
692 v3dv_job_add_bo(job, buffer_bos.ubo[i]);
693 }
694
695 for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) {
696 if (buffer_bos.ssbo[i])
697 v3dv_job_add_bo(job, buffer_bos.ssbo[i]);
698 }
699
700 if (job->csd.shared_memory)
701 v3dv_job_add_bo(job, job->csd.shared_memory);
702
703 if (pipeline->spill.bo)
704 v3dv_job_add_bo(job, pipeline->spill.bo);
705
706 return uniform_stream;
707 }
708
709 struct v3dv_cl_reloc
v3dv_write_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant)710 v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
711 struct v3dv_pipeline *pipeline,
712 struct v3dv_shader_variant *variant)
713 {
714 return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL);
715 }
716