xref: /aosp_15_r20/external/mesa3d/src/broadcom/vulkan/v3dv_uniforms.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Based in part on v3d driver which is:
5  *
6  * Copyright © 2014-2017 Broadcom
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include "v3dv_private.h"
29 
30 /* Our Vulkan resource indices represent indices in descriptor maps which
31  * include all shader stages, so we need to size the arrays below
32  * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS.
33  */
34 #define MAX_STAGES 3
35 
36 #define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
37 struct texture_bo_list {
38    struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS];
39 };
40 
41 /* This tracks state BOs for both textures and samplers, so we
42  * multiply by 2.
43  */
44 #define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES)
45 struct state_bo_list {
46    uint32_t count;
47    struct v3dv_bo *states[MAX_TOTAL_STATES];
48 };
49 
50 #define MAX_TOTAL_UNIFORM_BUFFERS ((MAX_UNIFORM_BUFFERS + \
51                                     MAX_INLINE_UNIFORM_BUFFERS) * MAX_STAGES)
52 #define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES)
53 struct buffer_bo_list {
54    struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS];
55    struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS];
56 };
57 
58 static bool
state_bo_in_list(struct state_bo_list * list,struct v3dv_bo * bo)59 state_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo)
60 {
61    for (int i = 0; i < list->count; i++) {
62       if (list->states[i] == bo)
63          return true;
64    }
65    return false;
66 }
67 
68 static void
push_constants_bo_free(VkDevice _device,uint64_t bo_ptr,VkAllocationCallbacks * alloc)69 push_constants_bo_free(VkDevice _device,
70                        uint64_t bo_ptr,
71                        VkAllocationCallbacks *alloc)
72 {
73    V3DV_FROM_HANDLE(v3dv_device, device, _device);
74    v3dv_bo_free(device, (struct v3dv_bo *)(uintptr_t) bo_ptr);
75 }
76 
77 /*
78  * This method checks if the ubo used for push constants is needed to be
79  * updated or not.
80  *
81  * push constants ubo is only used for push constants accessed by a non-const
82  * index.
83  */
84 static void
check_push_constants_ubo(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline)85 check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer,
86                          struct v3dv_pipeline *pipeline)
87 {
88    if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO) ||
89        pipeline->layout->push_constant_size == 0)
90       return;
91 
92    if (cmd_buffer->push_constants_resource.bo == NULL) {
93       cmd_buffer->push_constants_resource.bo =
94          v3dv_bo_alloc(cmd_buffer->device, 4096, "push constants", true);
95 
96       v3dv_job_add_bo(cmd_buffer->state.job,
97                       cmd_buffer->push_constants_resource.bo);
98 
99       if (!cmd_buffer->push_constants_resource.bo) {
100          fprintf(stderr, "Failed to allocate memory for push constants\n");
101          abort();
102       }
103 
104       bool ok = v3dv_bo_map(cmd_buffer->device,
105                             cmd_buffer->push_constants_resource.bo,
106                             cmd_buffer->push_constants_resource.bo->size);
107       if (!ok) {
108          fprintf(stderr, "failed to map push constants buffer\n");
109          abort();
110       }
111    } else {
112       if (cmd_buffer->push_constants_resource.offset +
113           cmd_buffer->state.push_constants_size <=
114           cmd_buffer->push_constants_resource.bo->size) {
115          cmd_buffer->push_constants_resource.offset +=
116             cmd_buffer->state.push_constants_size;
117       } else {
118          /* We ran out of space so we'll have to allocate a new buffer but we
119           * need to ensure the old one is preserved until the end of the command
120           * buffer life and make sure it is eventually freed. We use the
121           * private object machinery in the command buffer for this.
122           */
123          v3dv_cmd_buffer_add_private_obj(
124             cmd_buffer, (uintptr_t) cmd_buffer->push_constants_resource.bo,
125             (v3dv_cmd_buffer_private_obj_destroy_cb) push_constants_bo_free);
126 
127          /* Now call back so we create a new BO */
128          cmd_buffer->push_constants_resource.bo = NULL;
129          check_push_constants_ubo(cmd_buffer, pipeline);
130          return;
131       }
132    }
133 
134    assert(cmd_buffer->state.push_constants_size <= MAX_PUSH_CONSTANTS_SIZE);
135    memcpy(cmd_buffer->push_constants_resource.bo->map +
136           cmd_buffer->push_constants_resource.offset,
137           cmd_buffer->state.push_constants_data,
138           cmd_buffer->state.push_constants_size);
139 
140    cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO;
141 }
142 
143 /** V3D 4.x TMU configuration parameter 0 (texture) */
144 static void
write_tmu_p0(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,uint32_t data,struct texture_bo_list * tex_bos,struct state_bo_list * state_bos)145 write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer,
146              struct v3dv_pipeline *pipeline,
147              enum broadcom_shader_stage stage,
148              struct v3dv_cl_out **uniforms,
149              uint32_t data,
150              struct texture_bo_list *tex_bos,
151              struct state_bo_list *state_bos)
152 {
153    uint32_t texture_idx = v3d_unit_data_get_unit(data);
154 
155    struct v3dv_descriptor_state *descriptor_state =
156       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
157 
158    /* We need to ensure that the texture bo is added to the job */
159    struct v3dv_bo *texture_bo =
160       v3dv_descriptor_map_get_texture_bo(descriptor_state,
161                                          &pipeline->shared_data->maps[stage]->texture_map,
162                                          pipeline->layout, texture_idx);
163    assert(texture_bo);
164    assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS);
165    tex_bos->tex[texture_idx] = texture_bo;
166 
167    struct v3dv_cl_reloc state_reloc =
168       v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state,
169                                                    &pipeline->shared_data->maps[stage]->texture_map,
170                                                    pipeline->layout,
171                                                    texture_idx);
172 
173    cl_aligned_u32(uniforms, state_reloc.bo->offset +
174                             state_reloc.offset +
175                             v3d_unit_data_get_offset(data));
176 
177    /* Texture and Sampler states are typically suballocated, so they are
178     * usually the same BO: only flag them once to avoid trying to add them
179     * multiple times to the job later.
180     */
181    if (!state_bo_in_list(state_bos, state_reloc.bo)) {
182       assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
183       state_bos->states[state_bos->count++] = state_reloc.bo;
184    }
185 }
186 
187 /** V3D 4.x TMU configuration parameter 1 (sampler) */
188 static void
write_tmu_p1(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,uint32_t data,struct state_bo_list * state_bos)189 write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer,
190              struct v3dv_pipeline *pipeline,
191              enum broadcom_shader_stage stage,
192              struct v3dv_cl_out **uniforms,
193              uint32_t data,
194              struct state_bo_list *state_bos)
195 {
196    uint32_t sampler_idx = v3d_unit_data_get_unit(data);
197    struct v3dv_descriptor_state *descriptor_state =
198       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
199 
200    assert(sampler_idx != V3DV_NO_SAMPLER_16BIT_IDX &&
201           sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX);
202 
203    struct v3dv_cl_reloc sampler_state_reloc =
204       v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state,
205                                             &pipeline->shared_data->maps[stage]->sampler_map,
206                                             pipeline->layout, sampler_idx);
207 
208    const struct v3dv_sampler *sampler =
209       v3dv_descriptor_map_get_sampler(descriptor_state,
210                                       &pipeline->shared_data->maps[stage]->sampler_map,
211                                       pipeline->layout, sampler_idx);
212    assert(sampler);
213 
214    /* Set unnormalized coordinates flag from sampler object */
215    uint32_t p1_packed = v3d_unit_data_get_offset(data);
216    if (sampler->unnormalized_coordinates) {
217       v3d_pack_unnormalized_coordinates(&cmd_buffer->device->devinfo, &p1_packed,
218                                         sampler->unnormalized_coordinates);
219    }
220 
221    cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset +
222                             sampler_state_reloc.offset +
223                             p1_packed);
224 
225    /* Texture and Sampler states are typically suballocated, so they are
226     * usually the same BO: only flag them once to avoid trying to add them
227     * multiple times to the job later.
228     */
229    if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) {
230       assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS);
231       state_bos->states[state_bos->count++] = sampler_state_reloc.bo;
232    }
233 }
234 
235 static void
write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,struct v3dv_cl_out ** uniforms,enum quniform_contents content,uint32_t data,struct buffer_bo_list * buffer_bos)236 write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
237                         struct v3dv_pipeline *pipeline,
238                         enum broadcom_shader_stage stage,
239                         struct v3dv_cl_out **uniforms,
240                         enum quniform_contents content,
241                         uint32_t data,
242                         struct buffer_bo_list *buffer_bos)
243 {
244    struct v3dv_descriptor_state *descriptor_state =
245       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
246 
247    struct v3dv_descriptor_map *map =
248       content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ?
249       &pipeline->shared_data->maps[stage]->ubo_map :
250       &pipeline->shared_data->maps[stage]->ssbo_map;
251 
252    uint32_t offset =
253       content == QUNIFORM_UBO_ADDR ?
254       v3d_unit_data_get_offset(data) :
255       0;
256 
257    uint32_t dynamic_offset = 0;
258 
259    /* For ubos, index is shifted, as 0 is reserved for push constants
260     * and 1..MAX_INLINE_UNIFORM_BUFFERS are reserved for inline uniform
261     * buffers.
262     */
263    uint32_t index = v3d_unit_data_get_unit(data);
264    if (content == QUNIFORM_UBO_ADDR && index == 0) {
265       /* Ensure the push constants UBO is created and updated. This also
266        * adds the BO to the job so we don't need to track it in buffer_bos.
267        */
268       check_push_constants_ubo(cmd_buffer, pipeline);
269 
270       struct v3dv_cl_reloc *resource =
271          &cmd_buffer->push_constants_resource;
272       assert(resource->bo);
273 
274       cl_aligned_u32(uniforms, resource->bo->offset +
275                                resource->offset +
276                                offset + dynamic_offset);
277    } else {
278       if (content == QUNIFORM_UBO_ADDR) {
279          /* We reserve UBO index 0 for push constants in Vulkan (and for the
280           * constant buffer in GL) so the compiler always adds one to all UBO
281           * indices, fix it up before we access the descriptor map, since
282           * indices start from 0 there.
283           */
284          assert(index > 0);
285          index--;
286       } else {
287          index = data;
288       }
289 
290       struct v3dv_descriptor *descriptor =
291          v3dv_descriptor_map_get_descriptor(descriptor_state, map,
292                                             pipeline->layout,
293                                             index, &dynamic_offset);
294 
295       /* Inline UBO descriptors store UBO data in descriptor pool memory,
296        * instead of an external buffer.
297        */
298       assert(descriptor);
299 
300       if (content == QUNIFORM_GET_SSBO_SIZE ||
301           content == QUNIFORM_GET_UBO_SIZE) {
302          cl_aligned_u32(uniforms, descriptor->range);
303       } else {
304          /* Inline uniform buffers store their contents in pool memory instead
305           * of an external buffer.
306           */
307          struct v3dv_bo *bo;
308          uint32_t addr;
309          if (descriptor->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) {
310             assert(dynamic_offset == 0);
311             struct v3dv_cl_reloc reloc =
312                v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
313                                                      descriptor_state, map,
314                                                      pipeline->layout, index,
315                                                      NULL);
316             bo = reloc.bo;
317             addr = reloc.bo->offset + reloc.offset + offset;
318          } else {
319             assert(descriptor->buffer);
320             assert(descriptor->buffer->mem);
321             assert(descriptor->buffer->mem->bo);
322 
323             bo = descriptor->buffer->mem->bo;
324             addr = bo->offset +
325                    descriptor->buffer->mem_offset +
326                    descriptor->offset +
327                    offset + dynamic_offset;
328          }
329 
330          cl_aligned_u32(uniforms, addr);
331 
332          if (content == QUNIFORM_UBO_ADDR) {
333             assert(index < MAX_TOTAL_UNIFORM_BUFFERS);
334             buffer_bos->ubo[index] = bo;
335          } else {
336             assert(index < MAX_TOTAL_STORAGE_BUFFERS);
337             buffer_bos->ssbo[index] = bo;
338          }
339       }
340    }
341 }
342 
343 static void
write_inline_uniform(struct v3dv_cl_out ** uniforms,uint32_t index,uint32_t offset,struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage)344 write_inline_uniform(struct v3dv_cl_out **uniforms,
345                      uint32_t index,
346                      uint32_t offset,
347                      struct v3dv_cmd_buffer *cmd_buffer,
348                      struct v3dv_pipeline *pipeline,
349                      enum broadcom_shader_stage stage)
350 {
351    assert(index < MAX_INLINE_UNIFORM_BUFFERS);
352 
353    struct v3dv_descriptor_state *descriptor_state =
354       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
355 
356    struct v3dv_descriptor_map *map =
357       &pipeline->shared_data->maps[stage]->ubo_map;
358 
359    struct v3dv_cl_reloc reloc =
360       v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device,
361                                             descriptor_state, map,
362                                             pipeline->layout, index,
363                                             NULL);
364 
365    /* Offset comes in 32-bit units */
366    uint32_t *addr = reloc.bo->map + reloc.offset + 4 * offset;
367    cl_aligned_u32(uniforms, *addr);
368 }
369 
370 static uint32_t
get_texture_size_from_image_view(struct v3dv_image_view * image_view,enum quniform_contents contents,uint32_t data)371 get_texture_size_from_image_view(struct v3dv_image_view *image_view,
372                                  enum quniform_contents contents,
373                                  uint32_t data)
374 {
375    switch(contents) {
376    case QUNIFORM_IMAGE_WIDTH:
377    case QUNIFORM_TEXTURE_WIDTH:
378       /* We don't u_minify the values, as we are using the image_view
379        * extents
380        */
381       return image_view->vk.extent.width;
382    case QUNIFORM_IMAGE_HEIGHT:
383    case QUNIFORM_TEXTURE_HEIGHT:
384       return image_view->vk.extent.height;
385    case QUNIFORM_IMAGE_DEPTH:
386    case QUNIFORM_TEXTURE_DEPTH:
387       return image_view->vk.extent.depth;
388    case QUNIFORM_IMAGE_ARRAY_SIZE:
389    case QUNIFORM_TEXTURE_ARRAY_SIZE:
390       if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) {
391          return image_view->vk.layer_count;
392       } else {
393          assert(image_view->vk.layer_count % 6 == 0);
394          return image_view->vk.layer_count / 6;
395       }
396    case QUNIFORM_TEXTURE_LEVELS:
397       return image_view->vk.level_count;
398    case QUNIFORM_TEXTURE_SAMPLES:
399       assert(image_view->vk.image);
400       return image_view->vk.image->samples;
401    default:
402       unreachable("Bad texture size field");
403    }
404 }
405 
406 
407 static uint32_t
get_texture_size_from_buffer_view(struct v3dv_buffer_view * buffer_view,enum quniform_contents contents,uint32_t data)408 get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view,
409                                   enum quniform_contents contents,
410                                   uint32_t data)
411 {
412    switch(contents) {
413    case QUNIFORM_IMAGE_WIDTH:
414    case QUNIFORM_TEXTURE_WIDTH:
415       return buffer_view->num_elements;
416    /* Only size can be queried for texel buffers  */
417    default:
418       unreachable("Bad texture size field for texel buffers");
419    }
420 }
421 
422 static uint32_t
get_texture_size(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,enum broadcom_shader_stage stage,enum quniform_contents contents,uint32_t data)423 get_texture_size(struct v3dv_cmd_buffer *cmd_buffer,
424                  struct v3dv_pipeline *pipeline,
425                  enum broadcom_shader_stage stage,
426                  enum quniform_contents contents,
427                  uint32_t data)
428 {
429    uint32_t texture_idx = data;
430 
431    struct v3dv_descriptor_state *descriptor_state =
432       v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline);
433 
434    struct v3dv_descriptor *descriptor =
435       v3dv_descriptor_map_get_descriptor(descriptor_state,
436                                          &pipeline->shared_data->maps[stage]->texture_map,
437                                          pipeline->layout,
438                                          texture_idx, NULL);
439 
440    assert(descriptor);
441 
442    switch (descriptor->type) {
443    case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
444    case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
445    case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
446    case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
447       return get_texture_size_from_image_view(descriptor->image_view,
448                                               contents, data);
449    case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
450    case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
451       return get_texture_size_from_buffer_view(descriptor->buffer_view,
452                                                contents, data);
453    default:
454       unreachable("Wrong descriptor for getting texture size");
455    }
456 }
457 
458 struct v3dv_cl_reloc
v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant,uint32_t ** wg_count_offsets)459 v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
460                                struct v3dv_pipeline *pipeline,
461                                struct v3dv_shader_variant *variant,
462                                uint32_t **wg_count_offsets)
463 {
464    struct v3d_uniform_list *uinfo =
465       &variant->prog_data.base->uniforms;
466    struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic;
467 
468    struct v3dv_job *job = cmd_buffer->state.job;
469    assert(job);
470    assert(job->cmd_buffer == cmd_buffer);
471    struct v3d_device_info *devinfo = &cmd_buffer->device->devinfo;
472 
473    struct texture_bo_list tex_bos = { 0 };
474    struct state_bo_list state_bos = { 0 };
475    struct buffer_bo_list buffer_bos = { 0 };
476 
477    /* The hardware always pre-fetches the next uniform (also when there
478     * aren't any), so we always allocate space for an extra slot. This
479     * fixes MMU exceptions reported since Linux kernel 5.4 when the
480     * uniforms fill up the tail bytes of a page in the indirect
481     * BO. In that scenario, when the hardware pre-fetches after reading
482     * the last uniform it will read beyond the end of the page and trigger
483     * the MMU exception.
484     */
485    v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4);
486 
487    struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect);
488 
489    struct v3dv_cl_out *uniforms = cl_start(&job->indirect);
490    for (int i = 0; i < uinfo->count; i++) {
491       uint32_t data = uinfo->data[i];
492 
493       switch (uinfo->contents[i]) {
494       case QUNIFORM_CONSTANT:
495          cl_aligned_u32(&uniforms, data);
496          break;
497 
498       case QUNIFORM_UNIFORM:
499          cl_aligned_u32(&uniforms, cmd_buffer->state.push_constants_data[data]);
500          break;
501 
502       case QUNIFORM_INLINE_UBO_0:
503       case QUNIFORM_INLINE_UBO_1:
504       case QUNIFORM_INLINE_UBO_2:
505       case QUNIFORM_INLINE_UBO_3:
506          write_inline_uniform(&uniforms,
507                               uinfo->contents[i] - QUNIFORM_INLINE_UBO_0, data,
508                               cmd_buffer, pipeline, variant->stage);
509          break;
510 
511       case QUNIFORM_VIEWPORT_X_SCALE: {
512          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] *
513                                  devinfo->clipper_xy_granularity);
514          break;
515       }
516 
517       case QUNIFORM_VIEWPORT_Y_SCALE: {
518          cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] *
519                                  devinfo->clipper_xy_granularity);
520          break;
521       }
522 
523       case QUNIFORM_VIEWPORT_Z_OFFSET: {
524          float translate_z;
525          v3dv_cmd_buffer_state_get_viewport_z_xform(cmd_buffer, 0,
526                                                     &translate_z, NULL);
527          cl_aligned_f(&uniforms, translate_z);
528          break;
529       }
530 
531       case QUNIFORM_VIEWPORT_Z_SCALE: {
532          float scale_z;
533          v3dv_cmd_buffer_state_get_viewport_z_xform(cmd_buffer, 0,
534                                                     NULL, &scale_z);
535          cl_aligned_f(&uniforms, scale_z);
536          break;
537       }
538 
539       case QUNIFORM_SSBO_OFFSET:
540       case QUNIFORM_UBO_ADDR:
541       case QUNIFORM_GET_SSBO_SIZE:
542       case QUNIFORM_GET_UBO_SIZE:
543          write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms,
544                                  uinfo->contents[i], data, &buffer_bos);
545 
546         break;
547 
548       case QUNIFORM_IMAGE_TMU_CONFIG_P0:
549       case QUNIFORM_TMU_CONFIG_P0:
550          write_tmu_p0(cmd_buffer, pipeline, variant->stage,
551                       &uniforms, data, &tex_bos, &state_bos);
552          break;
553 
554       case QUNIFORM_TMU_CONFIG_P1:
555          write_tmu_p1(cmd_buffer, pipeline, variant->stage,
556                       &uniforms, data, &state_bos);
557          break;
558 
559       case QUNIFORM_IMAGE_WIDTH:
560       case QUNIFORM_IMAGE_HEIGHT:
561       case QUNIFORM_IMAGE_DEPTH:
562       case QUNIFORM_IMAGE_ARRAY_SIZE:
563       case QUNIFORM_TEXTURE_WIDTH:
564       case QUNIFORM_TEXTURE_HEIGHT:
565       case QUNIFORM_TEXTURE_DEPTH:
566       case QUNIFORM_TEXTURE_ARRAY_SIZE:
567       case QUNIFORM_TEXTURE_LEVELS:
568       case QUNIFORM_TEXTURE_SAMPLES:
569          cl_aligned_u32(&uniforms,
570                         get_texture_size(cmd_buffer,
571                                          pipeline,
572                                          variant->stage,
573                                          uinfo->contents[i],
574                                          data));
575          break;
576 
577       /* We generate this from geometry shaders to cap the generated gl_Layer
578        * to be within the number of layers of the framebuffer so we prevent the
579        * binner from trying to access tile state memory out of bounds (for
580        * layers that don't exist).
581        *
582        * Unfortunately, for secondary command buffers we may not know the
583        * number of layers in the framebuffer at this stage. Since we are
584        * only using this to sanitize the shader and it should not have any
585        * impact on correct shaders that emit valid values for gl_Layer,
586        * we just work around it by using the largest number of layers we
587        * support.
588        *
589        * FIXME: we could do better than this by recording in the job that
590        * the value at this uniform offset is not correct, and patch it when
591        * we execute the secondary command buffer into a primary, since we do
592        * have the correct number of layers at that point, but again, since this
593        * is only for sanityzing the shader and it only affects the specific case
594        * of secondary command buffers without framebuffer info available it
595        * might not be worth the trouble.
596        *
597        * With multiview the number of layers is dictated by the view mask
598        * and not by the framebuffer layers. We do set the job's frame tiling
599        * information correctly from the view mask in that case, however,
600        * secondary command buffers may not have valid frame tiling data,
601        * so when multiview is enabled, we always set the number of layers
602        * from the subpass view mask.
603        */
604       case QUNIFORM_FB_LAYERS: {
605          const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state;
606          const uint32_t view_mask =
607             state->pass->subpasses[state->subpass_idx].view_mask;
608 
609          uint32_t num_layers;
610          if (view_mask != 0) {
611             num_layers = util_last_bit(view_mask);
612          } else if (job->frame_tiling.layers != 0) {
613             num_layers = job->frame_tiling.layers;
614          } else if (cmd_buffer->state.framebuffer) {
615             num_layers = cmd_buffer->state.framebuffer->layers;
616          } else {
617             assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
618             num_layers = 2048;
619 #if MESA_DEBUG
620             fprintf(stderr, "Skipping gl_LayerID shader sanity check for "
621                             "secondary command buffer\n");
622 #endif
623          }
624          cl_aligned_u32(&uniforms, num_layers);
625          break;
626       }
627 
628       case QUNIFORM_VIEW_INDEX:
629          cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index);
630          break;
631 
632       case QUNIFORM_NUM_WORK_GROUPS:
633          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
634          assert(job->csd.wg_count[data] > 0);
635          if (wg_count_offsets)
636             wg_count_offsets[data] = (uint32_t *) uniforms;
637          cl_aligned_u32(&uniforms, job->csd.wg_count[data]);
638          break;
639 
640       case QUNIFORM_WORK_GROUP_BASE:
641          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
642          cl_aligned_u32(&uniforms, job->csd.wg_base[data]);
643          break;
644 
645       case QUNIFORM_SHARED_OFFSET:
646          assert(job->type == V3DV_JOB_TYPE_GPU_CSD);
647          assert(job->csd.shared_memory);
648          cl_aligned_u32(&uniforms, job->csd.shared_memory->offset);
649          break;
650 
651       case QUNIFORM_SPILL_OFFSET:
652          assert(pipeline->spill.bo);
653          cl_aligned_u32(&uniforms, pipeline->spill.bo->offset);
654          break;
655 
656       case QUNIFORM_SPILL_SIZE_PER_THREAD:
657          assert(pipeline->spill.size_per_thread > 0);
658          cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread);
659          break;
660 
661       case QUNIFORM_DRAW_ID:
662          cl_aligned_u32(&uniforms, job->cmd_buffer->state.draw_id);
663          break;
664 
665       case QUNIFORM_LINE_WIDTH:
666          cl_aligned_u32(&uniforms,
667                         job->cmd_buffer->vk.dynamic_graphics_state.rs.line.width);
668          break;
669 
670       case QUNIFORM_AA_LINE_WIDTH:
671          cl_aligned_u32(&uniforms,
672                         v3dv_get_aa_line_width(pipeline, job->cmd_buffer));
673          break;
674 
675       default:
676          unreachable("unsupported quniform_contents uniform type\n");
677       }
678    }
679 
680    cl_end(&job->indirect, uniforms);
681 
682    for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) {
683       if (tex_bos.tex[i])
684          v3dv_job_add_bo(job, tex_bos.tex[i]);
685    }
686 
687    for (int i = 0; i < state_bos.count; i++)
688       v3dv_job_add_bo(job, state_bos.states[i]);
689 
690    for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) {
691       if (buffer_bos.ubo[i])
692          v3dv_job_add_bo(job, buffer_bos.ubo[i]);
693    }
694 
695    for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) {
696       if (buffer_bos.ssbo[i])
697          v3dv_job_add_bo(job, buffer_bos.ssbo[i]);
698    }
699 
700    if (job->csd.shared_memory)
701       v3dv_job_add_bo(job, job->csd.shared_memory);
702 
703    if (pipeline->spill.bo)
704       v3dv_job_add_bo(job, pipeline->spill.bo);
705 
706    return uniform_stream;
707 }
708 
709 struct v3dv_cl_reloc
v3dv_write_uniforms(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_pipeline * pipeline,struct v3dv_shader_variant * variant)710 v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
711                     struct v3dv_pipeline *pipeline,
712                     struct v3dv_shader_variant *variant)
713 {
714    return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL);
715 }
716