/* * Copyright © 2019 Raspberry Pi Ltd * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #include "v3dv_private.h" /* We don't expect that the packets we use in this file change across hw * versions, so we just explicitly set the V3D_VERSION and include v3dx_pack * here */ #define V3D_VERSION 42 #include "broadcom/common/v3d_macros.h" #include "broadcom/cle/v3dx_pack.h" void v3dv_cl_init(struct v3dv_job *job, struct v3dv_cl *cl) { cl->base = NULL; cl->next = cl->base; cl->bo = NULL; cl->size = 0; cl->job = job; list_inithead(&cl->bo_list); } void v3dv_cl_destroy(struct v3dv_cl *cl) { list_for_each_entry_safe(struct v3dv_bo, bo, &cl->bo_list, list_link) { assert(cl->job); list_del(&bo->list_link); v3dv_bo_free(cl->job->device, bo); } /* Leave the CL in a reset state to catch use after destroy instances */ v3dv_cl_init(NULL, cl); } enum v3dv_cl_chain_type { V3D_CL_BO_CHAIN_NONE = 0, V3D_CL_BO_CHAIN_WITH_BRANCH, V3D_CL_BO_CHAIN_WITH_RETURN_FROM_SUB_LIST, }; static bool cl_alloc_bo(struct v3dv_cl *cl, uint32_t space, enum v3dv_cl_chain_type chain_type) { /* The last bytes of a CLE buffer are unusable because of readahead * prefetch, so we need to take it into account when allocating a new BO * for the CL. We also reserve space for the BRANCH/RETURN_FROM_SUB_LIST * packet so we can always emit these last packets to the BO when * needed. We will need to increase cl->size by the packet length before * calling cl_submit to use this reserved space. */ uint32_t unusable_space = 0; struct v3d_device_info *devinfo = &cl->job->device->devinfo; uint32_t cle_readahead = devinfo->cle_readahead; uint32_t cle_buffer_min_size = devinfo->cle_buffer_min_size; switch (chain_type) { case V3D_CL_BO_CHAIN_WITH_BRANCH: unusable_space = cle_readahead + cl_packet_length(BRANCH); break; case V3D_CL_BO_CHAIN_WITH_RETURN_FROM_SUB_LIST: unusable_space = cle_readahead + cl_packet_length(RETURN_FROM_SUB_LIST); break; case V3D_CL_BO_CHAIN_NONE: break; } /* If we are growing, double the BO allocation size to reduce the number * of allocations with large command buffers. This has a very significant * impact on the number of draw calls per second reported by vkoverhead. */ space = align(space + unusable_space, cle_buffer_min_size); if (cl->bo) space = MAX2(cl->bo->size * 2, space); struct v3dv_bo *bo = v3dv_bo_alloc(cl->job->device, space, "CL", true); if (!bo) { fprintf(stderr, "failed to allocate memory for command list\n"); v3dv_flag_oom(NULL, cl->job); return false; } list_addtail(&bo->list_link, &cl->bo_list); bool ok = v3dv_bo_map(cl->job->device, bo, bo->size); if (!ok) { fprintf(stderr, "failed to map command list buffer\n"); v3dv_flag_oom(NULL, cl->job); return false; } /* Chain to the new BO from the old one if requested */ if (cl->bo) { switch (chain_type) { case V3D_CL_BO_CHAIN_WITH_BRANCH: cl->bo->cl_branch_offset = v3dv_cl_offset(cl); cl->size += cl_packet_length(BRANCH); assert(cl->size + cle_readahead <= cl->bo->size); cl_emit(cl, BRANCH, branch) { branch.address = v3dv_cl_address(bo, 0); } break; case V3D_CL_BO_CHAIN_WITH_RETURN_FROM_SUB_LIST: /* We do not want to emit branches from secondary command lists, instead, * we will branch to them when we execute them in a primary using * 'branch to sub list' commands, expecting each linked secondary to * end with a 'return from sub list' command. */ cl->size += cl_packet_length(RETURN_FROM_SUB_LIST); assert(cl->size + cle_readahead <= cl->bo->size); cl_emit(cl, RETURN_FROM_SUB_LIST, ret); FALLTHROUGH; case V3D_CL_BO_CHAIN_NONE: v3dv_job_add_bo_unchecked(cl->job, bo); break; } } else { v3dv_job_add_bo_unchecked(cl->job, bo); } cl->bo = bo; cl->base = cl->bo->map; /* Take only into account the usable size of the BO to guarantee that * we never write in the last bytes of the CL buffer because of the * readahead of the CLE */ cl->size = cl->bo->size - unusable_space; cl->next = cl->base; return true; } uint32_t v3dv_cl_ensure_space(struct v3dv_cl *cl, uint32_t space, uint32_t alignment) { uint32_t offset = align(v3dv_cl_offset(cl), alignment); if (offset + space <= cl->size) { cl->next = cl->base + offset; return offset; } cl_alloc_bo(cl, space, V3D_CL_BO_CHAIN_NONE); return 0; } void v3dv_cl_ensure_space_with_branch(struct v3dv_cl *cl, uint32_t space) { if (v3dv_cl_offset(cl) + space <= cl->size) return; enum v3dv_cl_chain_type chain_type = V3D_CL_BO_CHAIN_WITH_BRANCH; if (cl->job->type == V3DV_JOB_TYPE_GPU_CL_INCOMPLETE) chain_type = V3D_CL_BO_CHAIN_WITH_RETURN_FROM_SUB_LIST; cl_alloc_bo(cl, space, chain_type); }