xref: /aosp_15_r20/external/mesa3d/src/imagination/vulkan/pvr_job_compute.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 #include <vulkan/vulkan.h>
28 
29 #include "pvr_csb.h"
30 #include "pvr_debug.h"
31 #include "pvr_job_common.h"
32 #include "pvr_job_context.h"
33 #include "pvr_job_compute.h"
34 #include "pvr_private.h"
35 #include "pvr_types.h"
36 #include "pvr_winsys.h"
37 #include "util/macros.h"
38 
39 static void
pvr_submit_info_stream_init(struct pvr_compute_ctx * ctx,struct pvr_sub_cmd_compute * sub_cmd,struct pvr_winsys_compute_submit_info * submit_info)40 pvr_submit_info_stream_init(struct pvr_compute_ctx *ctx,
41                             struct pvr_sub_cmd_compute *sub_cmd,
42                             struct pvr_winsys_compute_submit_info *submit_info)
43 {
44    const struct pvr_device *const device = ctx->device;
45    const struct pvr_physical_device *const pdevice = device->pdevice;
46    const struct pvr_device_runtime_info *const dev_runtime_info =
47       &pdevice->dev_runtime_info;
48    const struct pvr_device_info *const dev_info = &pdevice->dev_info;
49    const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
50 
51    uint32_t *stream_ptr = (uint32_t *)submit_info->fw_stream;
52    uint32_t *stream_len_ptr = stream_ptr;
53 
54    /* Leave space for stream header. */
55    stream_ptr += pvr_cmd_length(KMD_STREAM_HDR);
56 
57    pvr_csb_pack ((uint64_t *)stream_ptr,
58                  CR_TPU_BORDER_COLOUR_TABLE_CDM,
59                  value) {
60       value.border_colour_table_address =
61          device->border_color_table.table->vma->dev_addr;
62    }
63    stream_ptr += pvr_cmd_length(CR_TPU_BORDER_COLOUR_TABLE_CDM);
64 
65    pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CTRL_STREAM_BASE, value) {
66       value.addr = pvr_csb_get_start_address(&sub_cmd->control_stream);
67    }
68    stream_ptr += pvr_cmd_length(CR_CDM_CTRL_STREAM_BASE);
69 
70    pvr_csb_pack ((uint64_t *)stream_ptr, CR_CDM_CONTEXT_STATE_BASE, state) {
71       state.addr = ctx_switch->compute_state_bo->vma->dev_addr;
72    }
73    stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_STATE_BASE);
74 
75    pvr_csb_pack (stream_ptr, CR_CDM_CONTEXT_PDS1, state) {
76       const uint32_t load_program_data_size =
77          PVR_DW_TO_BYTES(ctx_switch->sr[0].pds.load_program.data_size);
78 
79       state.pds_seq_dep = false;
80       state.usc_seq_dep = false;
81       state.target = false;
82       state.unified_size = ctx_switch->sr[0].usc.unified_size;
83       state.common_shared = true;
84       state.common_size =
85          DIV_ROUND_UP(sub_cmd->num_shared_regs << 2,
86                       PVRX(CR_CDM_CONTEXT_PDS1_COMMON_SIZE_UNIT_SIZE));
87       state.temp_size = 0;
88 
89       assert(load_program_data_size %
90                 PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE) ==
91              0);
92       state.data_size =
93          load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE);
94       state.fence = false;
95    }
96    stream_ptr += pvr_cmd_length(CR_CDM_CONTEXT_PDS1);
97 
98    if (PVR_HAS_FEATURE(dev_info, compute_morton_capable)) {
99       pvr_csb_pack (stream_ptr, CR_CDM_ITEM, value) {
100          value.mode = 0;
101       }
102       stream_ptr += pvr_cmd_length(CR_CDM_ITEM);
103    }
104 
105    if (PVR_HAS_FEATURE(dev_info, cluster_grouping)) {
106       pvr_csb_pack (stream_ptr, CR_COMPUTE_CLUSTER, value) {
107          if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
108              dev_runtime_info->num_phantoms > 1 && sub_cmd->uses_atomic_ops) {
109             /* Each phantom has its own MCU, so atomicity can only be
110              * guaranteed when all work items are processed on the same
111              * phantom. This means we need to disable all USCs other than
112              * those of the first phantom, which has 4 clusters.
113              */
114             value.mask = 0xFU;
115          } else {
116             value.mask = 0U;
117          }
118       }
119       stream_ptr += pvr_cmd_length(CR_COMPUTE_CLUSTER);
120    }
121 
122    if (PVR_HAS_FEATURE(dev_info, tpu_dm_global_registers)) {
123       pvr_csb_pack (stream_ptr, CR_TPU_TAG_CDM_CTRL, value) {
124       }
125       stream_ptr += pvr_cmd_length(CR_TPU_TAG_CDM_CTRL);
126    }
127 
128    if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
129       pvr_finishme(
130          "Emit execute_count when feature gpu_multicore_support is present");
131       *stream_ptr = 0;
132       stream_ptr++;
133    }
134 
135    submit_info->fw_stream_len =
136       (uint8_t *)stream_ptr - (uint8_t *)submit_info->fw_stream;
137    assert(submit_info->fw_stream_len <= ARRAY_SIZE(submit_info->fw_stream));
138 
139    pvr_csb_pack ((uint64_t *)stream_len_ptr, KMD_STREAM_HDR, value) {
140       value.length = submit_info->fw_stream_len;
141    }
142 }
143 
pvr_submit_info_ext_stream_init(struct pvr_compute_ctx * ctx,struct pvr_winsys_compute_submit_info * submit_info)144 static void pvr_submit_info_ext_stream_init(
145    struct pvr_compute_ctx *ctx,
146    struct pvr_winsys_compute_submit_info *submit_info)
147 {
148    const struct pvr_device_info *const dev_info =
149       &ctx->device->pdevice->dev_info;
150 
151    uint32_t *stream_ptr = (uint32_t *)submit_info->fw_stream;
152    uint32_t main_stream_len =
153       pvr_csb_unpack((uint64_t *)stream_ptr, KMD_STREAM_HDR).length;
154    uint32_t *ext_stream_ptr =
155       (uint32_t *)((uint8_t *)stream_ptr + main_stream_len);
156    uint32_t *header0_ptr;
157 
158    header0_ptr = ext_stream_ptr;
159    ext_stream_ptr += pvr_cmd_length(KMD_STREAM_EXTHDR_COMPUTE0);
160 
161    pvr_csb_pack (header0_ptr, KMD_STREAM_EXTHDR_COMPUTE0, header0) {
162       if (PVR_HAS_QUIRK(dev_info, 49927)) {
163          header0.has_brn49927 = true;
164 
165          pvr_csb_pack (ext_stream_ptr, CR_TPU, value) {
166             value.tag_cem_4k_face_packing = true;
167          }
168          ext_stream_ptr += pvr_cmd_length(CR_TPU);
169       }
170    }
171 
172    if ((*header0_ptr & PVRX(KMD_STREAM_EXTHDR_DATA_MASK)) != 0) {
173       submit_info->fw_stream_len =
174          (uint8_t *)ext_stream_ptr - (uint8_t *)submit_info->fw_stream;
175       assert(submit_info->fw_stream_len <= ARRAY_SIZE(submit_info->fw_stream));
176    }
177 }
178 
179 static void
pvr_submit_info_flags_init(const struct pvr_device_info * const dev_info,const struct pvr_sub_cmd_compute * const sub_cmd,struct pvr_winsys_compute_submit_flags * flags)180 pvr_submit_info_flags_init(const struct pvr_device_info *const dev_info,
181                            const struct pvr_sub_cmd_compute *const sub_cmd,
182                            struct pvr_winsys_compute_submit_flags *flags)
183 {
184    *flags = (struct pvr_winsys_compute_submit_flags){
185       .prevent_all_overlap = sub_cmd->uses_barrier,
186       .use_single_core = PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
187                          sub_cmd->uses_atomic_ops,
188    };
189 }
190 
pvr_compute_job_ws_submit_info_init(struct pvr_compute_ctx * ctx,struct pvr_sub_cmd_compute * sub_cmd,struct vk_sync * wait,struct pvr_winsys_compute_submit_info * submit_info)191 static void pvr_compute_job_ws_submit_info_init(
192    struct pvr_compute_ctx *ctx,
193    struct pvr_sub_cmd_compute *sub_cmd,
194    struct vk_sync *wait,
195    struct pvr_winsys_compute_submit_info *submit_info)
196 {
197    const struct pvr_device *const device = ctx->device;
198    const struct pvr_device_info *const dev_info = &device->pdevice->dev_info;
199 
200    memset(submit_info, 0, sizeof(*submit_info));
201 
202    submit_info->frame_num = device->global_queue_present_count;
203    submit_info->job_num = device->global_cmd_buffer_submit_count;
204 
205    submit_info->wait = wait;
206 
207    pvr_submit_info_stream_init(ctx, sub_cmd, submit_info);
208    pvr_submit_info_ext_stream_init(ctx, submit_info);
209    pvr_submit_info_flags_init(dev_info, sub_cmd, &submit_info->flags);
210 }
211 
pvr_compute_job_submit(struct pvr_compute_ctx * ctx,struct pvr_sub_cmd_compute * sub_cmd,struct vk_sync * wait,struct vk_sync * signal_sync)212 VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
213                                 struct pvr_sub_cmd_compute *sub_cmd,
214                                 struct vk_sync *wait,
215                                 struct vk_sync *signal_sync)
216 {
217    struct pvr_winsys_compute_submit_info submit_info;
218    struct pvr_device *device = ctx->device;
219 
220    pvr_compute_job_ws_submit_info_init(ctx, sub_cmd, wait, &submit_info);
221 
222    if (PVR_IS_DEBUG_SET(DUMP_CONTROL_STREAM)) {
223       pvr_csb_dump(&sub_cmd->control_stream,
224                    submit_info.frame_num,
225                    submit_info.job_num);
226    }
227 
228    return device->ws->ops->compute_submit(ctx->ws_ctx,
229                                           &submit_info,
230                                           &device->pdevice->dev_info,
231                                           signal_sync);
232 }
233