/* * Copyright © 2022 Google, Inc. * SPDX-License-Identifier: MIT */ #include #include #include #include "util/libsync.h" #include "util/os_file.h" #include "drm/freedreno_ringbuffer_sp.h" #include "virtio_priv.h" static void retire_execute(void *job, void *gdata, int thread_index) { struct fd_submit_sp *fd_submit = job; MESA_TRACE_FUNC(); fd_fence_wait(fd_submit->out_fence); } static void retire_cleanup(void *job, void *gdata, int thread_index) { struct fd_submit_sp *fd_submit = job; fd_submit_del(&fd_submit->base); } static int flush_submit_list(struct list_head *submit_list) { struct fd_submit_sp *fd_submit = to_fd_submit_sp(last_submit(submit_list)); struct virtio_pipe *virtio_pipe = to_virtio_pipe(fd_submit->base.pipe); struct fd_pipe *pipe = &virtio_pipe->base; struct fd_device *dev = pipe->dev; unsigned nr_cmds = 0; MESA_TRACE_FUNC(); /* Determine the number of extra cmds's from deferred submits that * we will be merging in: */ foreach_submit (submit, submit_list) { assert(submit->pipe == &virtio_pipe->base); nr_cmds += to_fd_ringbuffer_sp(submit->primary)->u.nr_cmds; } /* TODO we can get rid of the extra copy into the req by just * assuming the max amount that nr->bos will grow is by the * nr_cmds, and just over-allocate a bit. */ struct drm_msm_gem_submit_cmd cmds[nr_cmds]; unsigned cmd_idx = 0; /* Build up the table of cmds, and for all but the last submit in the * list, merge their bo tables into the last submit. */ foreach_submit_safe (submit, submit_list) { struct fd_ringbuffer_sp *deferred_primary = to_fd_ringbuffer_sp(submit->primary); for (unsigned i = 0; i < deferred_primary->u.nr_cmds; i++) { struct fd_bo *ring_bo = deferred_primary->u.cmds[i].ring_bo; cmds[cmd_idx].type = MSM_SUBMIT_CMD_BUF; cmds[cmd_idx].submit_idx = fd_submit_append_bo(fd_submit, ring_bo); cmds[cmd_idx].submit_offset = submit_offset(ring_bo, deferred_primary->offset); cmds[cmd_idx].size = deferred_primary->u.cmds[i].size; cmds[cmd_idx].pad = 0; cmds[cmd_idx].nr_relocs = 0; cmd_idx++; } /* We are merging all the submits in the list into the last submit, * so the remainder of the loop body doesn't apply to the last submit */ if (submit == last_submit(submit_list)) { DEBUG_MSG("merged %u submits", cmd_idx); break; } struct fd_submit_sp *fd_deferred_submit = to_fd_submit_sp(submit); for (unsigned i = 0; i < fd_deferred_submit->nr_bos; i++) { /* Note: if bo is used in both the current submit and the deferred * submit being merged, we expect to hit the fast-path as we add it * to the current submit: */ fd_submit_append_bo(fd_submit, fd_deferred_submit->bos[i]); } /* Now that the cmds/bos have been transfered over to the current submit, * we can remove the deferred submit from the list and drop it's reference */ list_del(&submit->node); fd_submit_del(submit); } /* Needs to be after get_cmd() as that could create bos/cmds table: * * NOTE allocate on-stack in the common case, but with an upper- * bound to limit on-stack allocation to 4k: */ const unsigned bo_limit = 4096 / sizeof(struct drm_msm_gem_submit_bo); bool bos_on_stack = fd_submit->nr_bos < bo_limit; struct drm_msm_gem_submit_bo _submit_bos[bos_on_stack ? fd_submit->nr_bos : 0]; struct drm_msm_gem_submit_bo *submit_bos; uint32_t _guest_handles[bos_on_stack ? fd_submit->nr_bos : 0]; uint32_t *guest_handles; if (bos_on_stack) { submit_bos = _submit_bos; guest_handles = _guest_handles; } else { submit_bos = malloc(fd_submit->nr_bos * sizeof(submit_bos[0])); guest_handles = malloc(fd_submit->nr_bos * sizeof(guest_handles[0])); } uint32_t nr_guest_handles = 0; for (unsigned i = 0; i < fd_submit->nr_bos; i++) { struct virtio_bo *virtio_bo = to_virtio_bo(fd_submit->bos[i]); if (virtio_bo->base.alloc_flags & FD_BO_SHARED) guest_handles[nr_guest_handles++] = virtio_bo->base.handle; submit_bos[i].flags = fd_submit->bos[i]->reloc_flags; submit_bos[i].handle = virtio_bo->res_id; submit_bos[i].presumed = 0; } if (virtio_pipe->next_submit_fence <= 0) virtio_pipe->next_submit_fence = 1; uint32_t kfence = virtio_pipe->next_submit_fence++; /* TODO avoid extra memcpy, and populate bo's and cmds directly * into the req msg */ unsigned bos_len = fd_submit->nr_bos * sizeof(struct drm_msm_gem_submit_bo); unsigned cmd_len = nr_cmds * sizeof(struct drm_msm_gem_submit_cmd); unsigned req_len = sizeof(struct msm_ccmd_gem_submit_req) + bos_len + cmd_len; struct msm_ccmd_gem_submit_req *req = malloc(req_len); req->hdr = MSM_CCMD(GEM_SUBMIT, req_len); req->flags = virtio_pipe->pipe; req->queue_id = virtio_pipe->queue_id; req->nr_bos = fd_submit->nr_bos; req->nr_cmds = nr_cmds; req->fence = kfence; memcpy(req->payload, submit_bos, bos_len); memcpy(req->payload + bos_len, cmds, cmd_len); struct fd_fence *out_fence = fd_submit->out_fence; out_fence->kfence = kfence; /* Even if gallium driver hasn't requested a fence-fd, request one. * This way, if we have to block waiting for the fence, we can do * it in the guest, rather than in the single-threaded host. */ out_fence->use_fence_fd = true; if (pipe->no_implicit_sync) { req->flags |= MSM_SUBMIT_NO_IMPLICIT; nr_guest_handles = 0; } struct vdrm_execbuf_params p = { .req = &req->hdr, .handles = guest_handles, .num_handles = nr_guest_handles, .has_in_fence_fd = !!(fd_submit->in_fence_fd != -1), .needs_out_fence_fd = true, .fence_fd = fd_submit->in_fence_fd, .ring_idx = virtio_pipe->ring_idx, }; vdrm_execbuf(to_virtio_device(dev)->vdrm, &p); out_fence->fence_fd = p.fence_fd; free(req); if (!bos_on_stack) { free(submit_bos); free(guest_handles); } if (fd_submit->in_fence_fd != -1) close(fd_submit->in_fence_fd); fd_submit_ref(&fd_submit->base); util_queue_fence_init(&fd_submit->retire_fence); util_queue_add_job(&virtio_pipe->retire_queue, fd_submit, &fd_submit->retire_fence, retire_execute, retire_cleanup, 0); return 0; } struct fd_submit * virtio_submit_new(struct fd_pipe *pipe) { /* We don't do any translation from internal FD_RELOC flags to MSM flags. */ STATIC_ASSERT(FD_RELOC_READ == MSM_SUBMIT_BO_READ); STATIC_ASSERT(FD_RELOC_WRITE == MSM_SUBMIT_BO_WRITE); STATIC_ASSERT(FD_RELOC_DUMP == MSM_SUBMIT_BO_DUMP); return fd_submit_sp_new(pipe, flush_submit_list); }