1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Sparse buffer tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferTests.hpp"
25 #include "vktTestCaseUtil.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktSparseResourcesTestsUtil.hpp"
28 #include "vktSparseResourcesBase.hpp"
29 #include "vktSparseResourcesBufferSparseBinding.hpp"
30 #include "vktSparseResourcesBufferSparseResidency.hpp"
31 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
32
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkPlatform.hpp"
36 #include "vkPrograms.hpp"
37 #include "vkMemUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkQueryUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43
44 #include "tcuTestLog.hpp"
45
46 #include "deUniquePtr.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deMath.h"
49
50 #include <string>
51 #include <vector>
52 #include <map>
53
54 using namespace vk;
55 using de::MovePtr;
56 using de::SharedPtr;
57 using de::UniquePtr;
58 using tcu::IVec2;
59 using tcu::IVec4;
60 using tcu::Vec4;
61
62 namespace vkt
63 {
64 namespace sparse
65 {
66 namespace
67 {
68
69 typedef SharedPtr<UniquePtr<Allocation>> AllocationSp;
70
71 enum
72 {
73 RENDER_SIZE = 128, //!< framebuffer size in pixels
74 GRID_SIZE = RENDER_SIZE / 8, //!< number of grid tiles in a row
75 };
76
77 enum TestFlagBits
78 {
79 // sparseBinding is implied
80 TEST_FLAG_ALIASED = 1u << 0, //!< sparseResidencyAliased
81 TEST_FLAG_RESIDENCY = 1u << 1, //!< sparseResidencyBuffer
82 TEST_FLAG_NON_RESIDENT_STRICT = 1u << 2, //!< residencyNonResidentStrict
83 TEST_FLAG_ENABLE_DEVICE_GROUPS = 1u << 3, //!< device groups are enabled
84 };
85 typedef uint32_t TestFlags;
86
87 //! SparseAllocationBuilder output. Owns the allocated memory.
88 struct SparseAllocation
89 {
90 uint32_t numResourceChunks;
91 VkDeviceSize resourceSize; //!< buffer size in bytes
92 std::vector<AllocationSp> allocations; //!< actual allocated memory
93 std::vector<VkSparseMemoryBind> memoryBinds; //!< memory binds backing the resource
94 uint32_t memoryType; //!< memory type (same for all allocations)
95 uint32_t heapIndex; //!< memory heap index
96 };
97
98 //! Utility to lay out memory allocations for a sparse buffer, including holes and aliased regions.
99 //! Will allocate memory upon building.
100 class SparseAllocationBuilder
101 {
102 public:
103 SparseAllocationBuilder(void);
104
105 // \note "chunk" is the smallest (due to alignment) bindable amount of memory
106
107 SparseAllocationBuilder &addMemoryHole(const uint32_t numChunks = 1u);
108 SparseAllocationBuilder &addResourceHole(const uint32_t numChunks = 1u);
109 SparseAllocationBuilder &addMemoryBind(const uint32_t numChunks = 1u);
110 SparseAllocationBuilder &addAliasedMemoryBind(const uint32_t allocationNdx, const uint32_t chunkOffset,
111 const uint32_t numChunks = 1u);
112 SparseAllocationBuilder &addMemoryAllocation(void);
113
114 MovePtr<SparseAllocation> build(
115 const InstanceInterface &instanceInterface, const VkPhysicalDevice physicalDevice, const DeviceInterface &vk,
116 const VkDevice device, Allocator &allocator,
117 VkBufferCreateInfo referenceCreateInfo, //!< buffer size is ignored in this info
118 const VkDeviceSize minChunkSize = 0ull) const; //!< make sure chunks are at least this big
119
120 private:
121 struct MemoryBind
122 {
123 uint32_t allocationNdx;
124 uint32_t resourceChunkNdx;
125 uint32_t memoryChunkNdx;
126 uint32_t numChunks;
127 };
128
129 uint32_t m_allocationNdx;
130 uint32_t m_resourceChunkNdx;
131 uint32_t m_memoryChunkNdx;
132 std::vector<MemoryBind> m_memoryBinds;
133 std::vector<uint32_t> m_chunksPerAllocation;
134 };
135
SparseAllocationBuilder(void)136 SparseAllocationBuilder::SparseAllocationBuilder(void) : m_allocationNdx(0), m_resourceChunkNdx(0), m_memoryChunkNdx(0)
137 {
138 m_chunksPerAllocation.push_back(0);
139 }
140
addMemoryHole(const uint32_t numChunks)141 SparseAllocationBuilder &SparseAllocationBuilder::addMemoryHole(const uint32_t numChunks)
142 {
143 m_memoryChunkNdx += numChunks;
144 m_chunksPerAllocation[m_allocationNdx] += numChunks;
145
146 return *this;
147 }
148
addResourceHole(const uint32_t numChunks)149 SparseAllocationBuilder &SparseAllocationBuilder::addResourceHole(const uint32_t numChunks)
150 {
151 m_resourceChunkNdx += numChunks;
152
153 return *this;
154 }
155
addMemoryAllocation(void)156 SparseAllocationBuilder &SparseAllocationBuilder::addMemoryAllocation(void)
157 {
158 DE_ASSERT(m_memoryChunkNdx != 0); // doesn't make sense to have an empty allocation
159
160 m_allocationNdx += 1;
161 m_memoryChunkNdx = 0;
162 m_chunksPerAllocation.push_back(0);
163
164 return *this;
165 }
166
addMemoryBind(const uint32_t numChunks)167 SparseAllocationBuilder &SparseAllocationBuilder::addMemoryBind(const uint32_t numChunks)
168 {
169 const MemoryBind memoryBind = {m_allocationNdx, m_resourceChunkNdx, m_memoryChunkNdx, numChunks};
170 m_memoryBinds.push_back(memoryBind);
171
172 m_resourceChunkNdx += numChunks;
173 m_memoryChunkNdx += numChunks;
174 m_chunksPerAllocation[m_allocationNdx] += numChunks;
175
176 return *this;
177 }
178
addAliasedMemoryBind(const uint32_t allocationNdx,const uint32_t chunkOffset,const uint32_t numChunks)179 SparseAllocationBuilder &SparseAllocationBuilder::addAliasedMemoryBind(const uint32_t allocationNdx,
180 const uint32_t chunkOffset,
181 const uint32_t numChunks)
182 {
183 DE_ASSERT(allocationNdx <= m_allocationNdx);
184
185 const MemoryBind memoryBind = {allocationNdx, m_resourceChunkNdx, chunkOffset, numChunks};
186 m_memoryBinds.push_back(memoryBind);
187
188 m_resourceChunkNdx += numChunks;
189
190 return *this;
191 }
192
build(const InstanceInterface & instanceInterface,const VkPhysicalDevice physicalDevice,const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkBufferCreateInfo referenceCreateInfo,const VkDeviceSize minChunkSize) const193 MovePtr<SparseAllocation> SparseAllocationBuilder::build(const InstanceInterface &instanceInterface,
194 const VkPhysicalDevice physicalDevice,
195 const DeviceInterface &vk, const VkDevice device,
196 Allocator &allocator, VkBufferCreateInfo referenceCreateInfo,
197 const VkDeviceSize minChunkSize) const
198 {
199
200 MovePtr<SparseAllocation> sparseAllocation(new SparseAllocation());
201
202 referenceCreateInfo.size = sizeof(uint32_t);
203 const Unique<VkBuffer> refBuffer(createBuffer(vk, device, &referenceCreateInfo));
204 const VkMemoryRequirements memoryRequirements = getBufferMemoryRequirements(vk, device, *refBuffer);
205 const VkDeviceSize chunkSize = std::max(
206 memoryRequirements.alignment, static_cast<VkDeviceSize>(deAlign64(minChunkSize, memoryRequirements.alignment)));
207 const uint32_t memoryTypeNdx =
208 findMatchingMemoryType(instanceInterface, physicalDevice, memoryRequirements, MemoryRequirement::Any);
209 VkMemoryAllocateInfo allocInfo = {
210 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType;
211 DE_NULL, // const void* pNext;
212 memoryRequirements.size, // VkDeviceSize allocationSize;
213 memoryTypeNdx, // uint32_t memoryTypeIndex;
214 };
215
216 for (std::vector<uint32_t>::const_iterator numChunksIter = m_chunksPerAllocation.begin();
217 numChunksIter != m_chunksPerAllocation.end(); ++numChunksIter)
218 {
219 allocInfo.allocationSize = *numChunksIter * chunkSize;
220 sparseAllocation->allocations.push_back(makeDeSharedPtr(allocator.allocate(allocInfo, (VkDeviceSize)0)));
221 }
222
223 for (std::vector<MemoryBind>::const_iterator memBindIter = m_memoryBinds.begin();
224 memBindIter != m_memoryBinds.end(); ++memBindIter)
225 {
226 const Allocation &alloc = **sparseAllocation->allocations[memBindIter->allocationNdx];
227 const VkSparseMemoryBind bind = {
228 memBindIter->resourceChunkNdx * chunkSize, // VkDeviceSize resourceOffset;
229 memBindIter->numChunks * chunkSize, // VkDeviceSize size;
230 alloc.getMemory(), // VkDeviceMemory memory;
231 alloc.getOffset() + memBindIter->memoryChunkNdx * chunkSize, // VkDeviceSize memoryOffset;
232 (VkSparseMemoryBindFlags)0, // VkSparseMemoryBindFlags flags;
233 };
234 sparseAllocation->memoryBinds.push_back(bind);
235 referenceCreateInfo.size = std::max(referenceCreateInfo.size, bind.resourceOffset + bind.size);
236 }
237
238 sparseAllocation->resourceSize = referenceCreateInfo.size;
239 sparseAllocation->numResourceChunks = m_resourceChunkNdx;
240 sparseAllocation->memoryType = memoryTypeNdx;
241 sparseAllocation->heapIndex = getHeapIndexForMemoryType(instanceInterface, physicalDevice, memoryTypeNdx);
242
243 return sparseAllocation;
244 }
245
makeImageCreateInfo(const VkFormat format,const IVec2 & size,const VkImageUsageFlags usage)246 VkImageCreateInfo makeImageCreateInfo(const VkFormat format, const IVec2 &size, const VkImageUsageFlags usage)
247 {
248 const VkImageCreateInfo imageParams = {
249 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
250 DE_NULL, // const void* pNext;
251 (VkImageCreateFlags)0, // VkImageCreateFlags flags;
252 VK_IMAGE_TYPE_2D, // VkImageType imageType;
253 format, // VkFormat format;
254 makeExtent3D(size.x(), size.y(), 1), // VkExtent3D extent;
255 1u, // uint32_t mipLevels;
256 1u, // uint32_t arrayLayers;
257 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
258 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
259 usage, // VkImageUsageFlags usage;
260 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
261 0u, // uint32_t queueFamilyIndexCount;
262 DE_NULL, // const uint32_t* pQueueFamilyIndices;
263 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
264 };
265 return imageParams;
266 }
267
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkRenderPass renderPass,const IVec2 renderSize,const VkPrimitiveTopology topology,const uint32_t stageCount,const VkPipelineShaderStageCreateInfo * pStages)268 Move<VkPipeline> makeGraphicsPipeline(const DeviceInterface &vk, const VkDevice device,
269 const VkPipelineLayout pipelineLayout, const VkRenderPass renderPass,
270 const IVec2 renderSize, const VkPrimitiveTopology topology,
271 const uint32_t stageCount, const VkPipelineShaderStageCreateInfo *pStages)
272 {
273 const VkVertexInputBindingDescription vertexInputBindingDescription = {
274 0u, // uint32_t binding;
275 sizeof(Vec4), // uint32_t stride;
276 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate;
277 };
278
279 const VkVertexInputAttributeDescription vertexInputAttributeDescription = {
280 0u, // uint32_t location;
281 0u, // uint32_t binding;
282 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
283 0u, // uint32_t offset;
284 };
285
286 const VkPipelineVertexInputStateCreateInfo vertexInputStateInfo = {
287 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
288 DE_NULL, // const void* pNext;
289 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
290 1u, // uint32_t vertexBindingDescriptionCount;
291 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
292 1u, // uint32_t vertexAttributeDescriptionCount;
293 &vertexInputAttributeDescription, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
294 };
295
296 const VkPipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateInfo = {
297 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
298 DE_NULL, // const void* pNext;
299 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
300 topology, // VkPrimitiveTopology topology;
301 VK_FALSE, // VkBool32 primitiveRestartEnable;
302 };
303
304 const VkViewport viewport = makeViewport(renderSize);
305 const VkRect2D scissor = makeRect2D(renderSize);
306
307 const VkPipelineViewportStateCreateInfo pipelineViewportStateInfo = {
308 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType;
309 DE_NULL, // const void* pNext;
310 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags;
311 1u, // uint32_t viewportCount;
312 &viewport, // const VkViewport* pViewports;
313 1u, // uint32_t scissorCount;
314 &scissor, // const VkRect2D* pScissors;
315 };
316
317 const VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateInfo = {
318 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
319 DE_NULL, // const void* pNext;
320 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
321 VK_FALSE, // VkBool32 depthClampEnable;
322 VK_FALSE, // VkBool32 rasterizerDiscardEnable;
323 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
324 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
325 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace;
326 VK_FALSE, // VkBool32 depthBiasEnable;
327 0.0f, // float depthBiasConstantFactor;
328 0.0f, // float depthBiasClamp;
329 0.0f, // float depthBiasSlopeFactor;
330 1.0f, // float lineWidth;
331 };
332
333 const VkPipelineMultisampleStateCreateInfo pipelineMultisampleStateInfo = {
334 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType;
335 DE_NULL, // const void* pNext;
336 (VkPipelineMultisampleStateCreateFlags)0, // VkPipelineMultisampleStateCreateFlags flags;
337 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples;
338 VK_FALSE, // VkBool32 sampleShadingEnable;
339 0.0f, // float minSampleShading;
340 DE_NULL, // const VkSampleMask* pSampleMask;
341 VK_FALSE, // VkBool32 alphaToCoverageEnable;
342 VK_FALSE // VkBool32 alphaToOneEnable;
343 };
344
345 const VkStencilOpState stencilOpState = makeStencilOpState(VK_STENCIL_OP_KEEP, // stencil fail
346 VK_STENCIL_OP_KEEP, // depth & stencil pass
347 VK_STENCIL_OP_KEEP, // depth only fail
348 VK_COMPARE_OP_ALWAYS, // compare op
349 0u, // compare mask
350 0u, // write mask
351 0u); // reference
352
353 VkPipelineDepthStencilStateCreateInfo pipelineDepthStencilStateInfo = {
354 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType;
355 DE_NULL, // const void* pNext;
356 (VkPipelineDepthStencilStateCreateFlags)0, // VkPipelineDepthStencilStateCreateFlags flags;
357 VK_FALSE, // VkBool32 depthTestEnable;
358 VK_FALSE, // VkBool32 depthWriteEnable;
359 VK_COMPARE_OP_LESS, // VkCompareOp depthCompareOp;
360 VK_FALSE, // VkBool32 depthBoundsTestEnable;
361 VK_FALSE, // VkBool32 stencilTestEnable;
362 stencilOpState, // VkStencilOpState front;
363 stencilOpState, // VkStencilOpState back;
364 0.0f, // float minDepthBounds;
365 1.0f, // float maxDepthBounds;
366 };
367
368 const VkColorComponentFlags colorComponentsAll =
369 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
370 const VkPipelineColorBlendAttachmentState pipelineColorBlendAttachmentState = {
371 VK_FALSE, // VkBool32 blendEnable;
372 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
373 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
374 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp;
375 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
376 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
377 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp;
378 colorComponentsAll, // VkColorComponentFlags colorWriteMask;
379 };
380
381 const VkPipelineColorBlendStateCreateInfo pipelineColorBlendStateInfo = {
382 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
383 DE_NULL, // const void* pNext;
384 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
385 VK_FALSE, // VkBool32 logicOpEnable;
386 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
387 1u, // uint32_t attachmentCount;
388 &pipelineColorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
389 {0.0f, 0.0f, 0.0f, 0.0f}, // float blendConstants[4];
390 };
391
392 const VkGraphicsPipelineCreateInfo graphicsPipelineInfo = {
393 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
394 DE_NULL, // const void* pNext;
395 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
396 stageCount, // uint32_t stageCount;
397 pStages, // const VkPipelineShaderStageCreateInfo* pStages;
398 &vertexInputStateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
399 &pipelineInputAssemblyStateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
400 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
401 &pipelineViewportStateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
402 &pipelineRasterizationStateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
403 &pipelineMultisampleStateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
404 &pipelineDepthStencilStateInfo, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
405 &pipelineColorBlendStateInfo, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
406 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
407 pipelineLayout, // VkPipelineLayout layout;
408 renderPass, // VkRenderPass renderPass;
409 0u, // uint32_t subpass;
410 DE_NULL, // VkPipeline basePipelineHandle;
411 0, // int32_t basePipelineIndex;
412 };
413
414 return createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineInfo);
415 }
416
417 //! Return true if there are any red (or all zero) pixels in the image
imageHasErrorPixels(const tcu::ConstPixelBufferAccess image)418 bool imageHasErrorPixels(const tcu::ConstPixelBufferAccess image)
419 {
420 const Vec4 errorColor = Vec4(1.0f, 0.0f, 0.0f, 1.0f);
421 const Vec4 blankColor = Vec4();
422
423 for (int y = 0; y < image.getHeight(); ++y)
424 for (int x = 0; x < image.getWidth(); ++x)
425 {
426 const Vec4 color = image.getPixel(x, y);
427 if (color == errorColor || color == blankColor)
428 return true;
429 }
430
431 return false;
432 }
433
434 class Renderer
435 {
436 public:
437 typedef std::map<VkShaderStageFlagBits, const VkSpecializationInfo *> SpecializationMap;
438
439 //! Use the delegate to bind descriptor sets, vertex buffers, etc. and make a draw call
440 struct Delegate
441 {
~Delegatevkt::sparse::__anona88b58370111::Renderer::Delegate442 virtual ~Delegate(void)
443 {
444 }
445 virtual void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const = 0;
446 };
447
Renderer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const uint32_t queueFamilyIndex,const VkDescriptorSetLayout descriptorSetLayout,BinaryCollection & binaryCollection,const std::string & vertexName,const std::string & fragmentName,const VkBuffer colorBuffer,const IVec2 & renderSize,const VkFormat colorFormat,const Vec4 & clearColor,const VkPrimitiveTopology topology,SpecializationMap specMap=SpecializationMap ())448 Renderer(const DeviceInterface &vk, const VkDevice device, Allocator &allocator, const uint32_t queueFamilyIndex,
449 const VkDescriptorSetLayout descriptorSetLayout, //!< may be NULL, if no descriptors are used
450 BinaryCollection &binaryCollection, const std::string &vertexName, const std::string &fragmentName,
451 const VkBuffer colorBuffer, const IVec2 &renderSize, const VkFormat colorFormat, const Vec4 &clearColor,
452 const VkPrimitiveTopology topology, SpecializationMap specMap = SpecializationMap())
453 : m_colorBuffer(colorBuffer)
454 , m_renderSize(renderSize)
455 , m_colorFormat(colorFormat)
456 , m_colorSubresourceRange(makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u))
457 , m_clearColor(clearColor)
458 , m_topology(topology)
459 , m_descriptorSetLayout(descriptorSetLayout)
460 {
461 m_colorImage =
462 makeImage(vk, device,
463 makeImageCreateInfo(m_colorFormat, m_renderSize,
464 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT));
465 m_colorImageAlloc = bindImage(vk, device, allocator, *m_colorImage, MemoryRequirement::Any);
466 m_colorAttachment =
467 makeImageView(vk, device, *m_colorImage, VK_IMAGE_VIEW_TYPE_2D, m_colorFormat, m_colorSubresourceRange);
468
469 m_vertexModule = createShaderModule(vk, device, binaryCollection.get(vertexName), 0u);
470 m_fragmentModule = createShaderModule(vk, device, binaryCollection.get(fragmentName), 0u);
471
472 const VkPipelineShaderStageCreateInfo pShaderStages[] = {
473 {
474 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
475 DE_NULL, // const void* pNext;
476 (VkPipelineShaderStageCreateFlags)0, // VkPipelineShaderStageCreateFlags flags;
477 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage;
478 *m_vertexModule, // VkShaderModule module;
479 "main", // const char* pName;
480 specMap[VK_SHADER_STAGE_VERTEX_BIT], // const VkSpecializationInfo* pSpecializationInfo;
481 },
482 {
483 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
484 DE_NULL, // const void* pNext;
485 (VkPipelineShaderStageCreateFlags)0, // VkPipelineShaderStageCreateFlags flags;
486 VK_SHADER_STAGE_FRAGMENT_BIT, // VkShaderStageFlagBits stage;
487 *m_fragmentModule, // VkShaderModule module;
488 "main", // const char* pName;
489 specMap[VK_SHADER_STAGE_FRAGMENT_BIT], // const VkSpecializationInfo* pSpecializationInfo;
490 }};
491
492 m_renderPass = makeRenderPass(vk, device, m_colorFormat);
493 m_framebuffer =
494 makeFramebuffer(vk, device, *m_renderPass, m_colorAttachment.get(), static_cast<uint32_t>(m_renderSize.x()),
495 static_cast<uint32_t>(m_renderSize.y()));
496 m_pipelineLayout = makePipelineLayout(vk, device, m_descriptorSetLayout);
497 m_pipeline = makeGraphicsPipeline(vk, device, *m_pipelineLayout, *m_renderPass, m_renderSize, m_topology,
498 DE_LENGTH_OF_ARRAY(pShaderStages), pShaderStages);
499 m_cmdPool = makeCommandPool(vk, device, queueFamilyIndex);
500 m_cmdBuffer = allocateCommandBuffer(vk, device, *m_cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
501 }
502
draw(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const Delegate & drawDelegate,const bool useDeviceGroups,const uint32_t deviceID) const503 void draw(const DeviceInterface &vk, const VkDevice device, const VkQueue queue, const Delegate &drawDelegate,
504 const bool useDeviceGroups, const uint32_t deviceID) const
505 {
506 beginCommandBuffer(vk, *m_cmdBuffer);
507
508 beginRenderPass(vk, *m_cmdBuffer, *m_renderPass, *m_framebuffer,
509 makeRect2D(0, 0, m_renderSize.x(), m_renderSize.y()), m_clearColor);
510
511 vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
512 drawDelegate.rendererDraw(*m_pipelineLayout, *m_cmdBuffer);
513
514 endRenderPass(vk, *m_cmdBuffer);
515
516 copyImageToBuffer(vk, *m_cmdBuffer, *m_colorImage, m_colorBuffer, m_renderSize);
517
518 endCommandBuffer(vk, *m_cmdBuffer);
519 submitCommandsAndWait(vk, device, queue, *m_cmdBuffer, 0U, DE_NULL, DE_NULL, 0U, DE_NULL, useDeviceGroups,
520 deviceID);
521 }
522
523 private:
524 const VkBuffer m_colorBuffer;
525 const IVec2 m_renderSize;
526 const VkFormat m_colorFormat;
527 const VkImageSubresourceRange m_colorSubresourceRange;
528 const Vec4 m_clearColor;
529 const VkPrimitiveTopology m_topology;
530 const VkDescriptorSetLayout m_descriptorSetLayout;
531
532 Move<VkImage> m_colorImage;
533 MovePtr<Allocation> m_colorImageAlloc;
534 Move<VkImageView> m_colorAttachment;
535 Move<VkShaderModule> m_vertexModule;
536 Move<VkShaderModule> m_fragmentModule;
537 Move<VkRenderPass> m_renderPass;
538 Move<VkFramebuffer> m_framebuffer;
539 Move<VkPipelineLayout> m_pipelineLayout;
540 Move<VkPipeline> m_pipeline;
541 Move<VkCommandPool> m_cmdPool;
542 Move<VkCommandBuffer> m_cmdBuffer;
543
544 // "deleted"
545 Renderer(const Renderer &);
546 Renderer &operator=(const Renderer &);
547 };
548
bindSparseBuffer(const DeviceInterface & vk,const VkDevice device,const VkQueue sparseQueue,const VkBuffer buffer,const SparseAllocation & sparseAllocation,const bool useDeviceGroups,uint32_t resourceDevId,uint32_t memoryDeviceId)549 void bindSparseBuffer(const DeviceInterface &vk, const VkDevice device, const VkQueue sparseQueue,
550 const VkBuffer buffer, const SparseAllocation &sparseAllocation, const bool useDeviceGroups,
551 uint32_t resourceDevId, uint32_t memoryDeviceId)
552 {
553 const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo = {
554 buffer, // VkBuffer buffer;
555 static_cast<uint32_t>(sparseAllocation.memoryBinds.size()), // uint32_t bindCount;
556 &sparseAllocation.memoryBinds[0], // const VkSparseMemoryBind* pBinds;
557 };
558
559 const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo = {
560 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO, //VkStructureType sType;
561 DE_NULL, //const void* pNext;
562 resourceDevId, //uint32_t resourceDeviceIndex;
563 memoryDeviceId, //uint32_t memoryDeviceIndex;
564 };
565
566 const VkBindSparseInfo bindInfo = {
567 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, // VkStructureType sType;
568 useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL, // const void* pNext;
569 0u, // uint32_t waitSemaphoreCount;
570 DE_NULL, // const VkSemaphore* pWaitSemaphores;
571 1u, // uint32_t bufferBindCount;
572 &sparseBufferMemoryBindInfo, // const VkSparseBufferMemoryBindInfo* pBufferBinds;
573 0u, // uint32_t imageOpaqueBindCount;
574 DE_NULL, // const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
575 0u, // uint32_t imageBindCount;
576 DE_NULL, // const VkSparseImageMemoryBindInfo* pImageBinds;
577 0u, // uint32_t signalSemaphoreCount;
578 DE_NULL, // const VkSemaphore* pSignalSemaphores;
579 };
580
581 const Unique<VkFence> fence(createFence(vk, device));
582
583 VK_CHECK(vk.queueBindSparse(sparseQueue, 1u, &bindInfo, *fence));
584 VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), VK_TRUE, ~0ull));
585 }
586
587 class SparseBufferTestInstance : public SparseResourcesBaseInstance, Renderer::Delegate
588 {
589 public:
SparseBufferTestInstance(Context & context,const TestFlags flags)590 SparseBufferTestInstance(Context &context, const TestFlags flags)
591 : SparseResourcesBaseInstance(context, (flags & TEST_FLAG_ENABLE_DEVICE_GROUPS) != 0)
592 , m_aliased((flags & TEST_FLAG_ALIASED) != 0)
593 , m_residency((flags & TEST_FLAG_RESIDENCY) != 0)
594 , m_nonResidentStrict((flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0)
595 , m_renderSize(RENDER_SIZE, RENDER_SIZE)
596 , m_colorFormat(VK_FORMAT_R8G8B8A8_UNORM)
597 , m_colorBufferSize(m_renderSize.x() * m_renderSize.y() * tcu::getPixelSize(mapVkFormat(m_colorFormat)))
598 {
599 {
600 QueueRequirementsVec requirements;
601 requirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
602 requirements.push_back(QueueRequirements(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 1u));
603
604 createDeviceSupportingQueues(requirements);
605 }
606
607 const DeviceInterface &vk = getDeviceInterface();
608
609 m_sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0u);
610 m_universalQueue = getQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0u);
611
612 m_sharedQueueFamilyIndices[0] = m_sparseQueue.queueFamilyIndex;
613 m_sharedQueueFamilyIndices[1] = m_universalQueue.queueFamilyIndex;
614
615 m_colorBuffer = makeBuffer(vk, getDevice(), m_colorBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
616 m_colorBufferAlloc =
617 bindBuffer(vk, getDevice(), getAllocator(), *m_colorBuffer, MemoryRequirement::HostVisible);
618
619 deMemset(m_colorBufferAlloc->getHostPtr(), 0, static_cast<std::size_t>(m_colorBufferSize));
620 flushAlloc(vk, getDevice(), *m_colorBufferAlloc);
621 }
622
623 protected:
getSparseBufferCreateInfo(const VkBufferUsageFlags usage) const624 VkBufferCreateInfo getSparseBufferCreateInfo(const VkBufferUsageFlags usage) const
625 {
626 VkBufferCreateFlags flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
627 if (m_residency)
628 flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
629 if (m_aliased)
630 flags |= VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
631
632 VkBufferCreateInfo referenceBufferCreateInfo = {
633 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
634 DE_NULL, // const void* pNext;
635 flags, // VkBufferCreateFlags flags;
636 0u, // override later // VkDeviceSize size;
637 VK_BUFFER_USAGE_TRANSFER_DST_BIT | usage, // VkBufferUsageFlags usage;
638 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
639 0u, // uint32_t queueFamilyIndexCount;
640 DE_NULL, // const uint32_t* pQueueFamilyIndices;
641 };
642
643 if (m_sparseQueue.queueFamilyIndex != m_universalQueue.queueFamilyIndex)
644 {
645 referenceBufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
646 referenceBufferCreateInfo.queueFamilyIndexCount = DE_LENGTH_OF_ARRAY(m_sharedQueueFamilyIndices);
647 referenceBufferCreateInfo.pQueueFamilyIndices = m_sharedQueueFamilyIndices;
648 }
649
650 return referenceBufferCreateInfo;
651 }
652
draw(const VkPrimitiveTopology topology,const VkDescriptorSetLayout descriptorSetLayout=DE_NULL,Renderer::SpecializationMap specMap=Renderer::SpecializationMap (),bool useDeviceGroups=false,uint32_t deviceID=0)653 void draw(const VkPrimitiveTopology topology, const VkDescriptorSetLayout descriptorSetLayout = DE_NULL,
654 Renderer::SpecializationMap specMap = Renderer::SpecializationMap(), bool useDeviceGroups = false,
655 uint32_t deviceID = 0)
656 {
657 const UniquePtr<Renderer> renderer(
658 new Renderer(getDeviceInterface(), getDevice(), getAllocator(), m_universalQueue.queueFamilyIndex,
659 descriptorSetLayout, m_context.getBinaryCollection(), "vert", "frag", *m_colorBuffer,
660 m_renderSize, m_colorFormat, Vec4(1.0f, 0.0f, 0.0f, 1.0f), topology, specMap));
661
662 renderer->draw(getDeviceInterface(), getDevice(), m_universalQueue.queueHandle, *this, useDeviceGroups,
663 deviceID);
664 }
665
isResultImageCorrect(void) const666 bool isResultImageCorrect(void) const
667 {
668 invalidateAlloc(getDeviceInterface(), getDevice(), *m_colorBufferAlloc);
669
670 const tcu::ConstPixelBufferAccess resultImage(mapVkFormat(m_colorFormat), m_renderSize.x(), m_renderSize.y(),
671 1u, m_colorBufferAlloc->getHostPtr());
672
673 m_context.getTestContext().getLog() << tcu::LogImageSet("Result", "Result")
674 << tcu::LogImage("color0", "", resultImage) << tcu::TestLog::EndImageSet;
675
676 return !imageHasErrorPixels(resultImage);
677 }
678
679 const bool m_aliased;
680 const bool m_residency;
681 const bool m_nonResidentStrict;
682
683 Queue m_sparseQueue;
684 Queue m_universalQueue;
685
686 private:
687 const IVec2 m_renderSize;
688 const VkFormat m_colorFormat;
689 const VkDeviceSize m_colorBufferSize;
690
691 Move<VkBuffer> m_colorBuffer;
692 MovePtr<Allocation> m_colorBufferAlloc;
693
694 uint32_t m_sharedQueueFamilyIndices[2];
695 };
696
initProgramsDrawWithUBO(vk::SourceCollections & programCollection,const TestFlags flags)697 void initProgramsDrawWithUBO(vk::SourceCollections &programCollection, const TestFlags flags)
698 {
699 // Vertex shader
700 {
701 std::ostringstream src;
702 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
703 << "\n"
704 << "layout(location = 0) in vec4 in_position;\n"
705 << "\n"
706 << "out gl_PerVertex {\n"
707 << " vec4 gl_Position;\n"
708 << "};\n"
709 << "\n"
710 << "void main(void)\n"
711 << "{\n"
712 << " gl_Position = in_position;\n"
713 << "}\n";
714
715 programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
716 }
717
718 // Fragment shader
719 {
720 const bool aliased = (flags & TEST_FLAG_ALIASED) != 0;
721 const bool residency = (flags & TEST_FLAG_RESIDENCY) != 0;
722 const bool nonResidentStrict = (flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0;
723 const std::string valueExpr =
724 (aliased ? "ivec4(3*(ndx % nonAliasedSize) ^ 127, 0, 0, 0)" : "ivec4(3*ndx ^ 127, 0, 0, 0)");
725
726 std::ostringstream src;
727 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
728 << "\n"
729 << "layout(location = 0) out vec4 o_color;\n"
730 << "\n"
731 << "layout(constant_id = 1) const int dataSize = 1;\n"
732 << "layout(constant_id = 2) const int chunkSize = 1;\n"
733 << "\n"
734 << "layout(set = 0, binding = 0, std140) uniform SparseBuffer {\n"
735 << " ivec4 data[dataSize];\n"
736 << "} ubo;\n"
737 << "\n"
738 << "void main(void)\n"
739 << "{\n"
740 << " const int fragNdx = int(gl_FragCoord.x) + " << RENDER_SIZE << " * int(gl_FragCoord.y);\n"
741 << " const int pageSize = " << RENDER_SIZE << " * " << RENDER_SIZE << ";\n"
742 << " const int numChunks = dataSize / chunkSize;\n";
743
744 if (aliased)
745 src << " const int nonAliasedSize = (numChunks > 1 ? dataSize - chunkSize : dataSize);\n";
746
747 src << " bool ok = true;\n"
748 << "\n"
749 << " for (int ndx = fragNdx; ndx < dataSize; ndx += pageSize)\n"
750 << " {\n";
751
752 if (residency && nonResidentStrict)
753 {
754 src << " if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
755 << " ok = ok && (ubo.data[ndx] == ivec4(0));\n"
756 << " else\n"
757 << " ok = ok && (ubo.data[ndx] == " + valueExpr + ");\n";
758 }
759 else if (residency)
760 {
761 src << " if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
762 << " continue;\n"
763 << " ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
764 }
765 else
766 src << " ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
767
768 src << " }\n"
769 << "\n"
770 << " if (ok)\n"
771 << " o_color = vec4(0.0, 1.0, 0.0, 1.0);\n"
772 << " else\n"
773 << " o_color = vec4(1.0, 0.0, 0.0, 1.0);\n"
774 << "}\n";
775
776 programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
777 }
778 }
779
780 //! Sparse buffer backing a UBO
781 class UBOTestInstance : public SparseBufferTestInstance
782 {
783 public:
UBOTestInstance(Context & context,const TestFlags flags)784 UBOTestInstance(Context &context, const TestFlags flags) : SparseBufferTestInstance(context, flags)
785 {
786 }
787
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const788 void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
789 {
790 const DeviceInterface &vk = getDeviceInterface();
791 const VkDeviceSize vertexOffset = 0ull;
792
793 vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
794 vk.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0u, 1u,
795 &m_descriptorSet.get(), 0u, DE_NULL);
796 vk.cmdDraw(cmdBuffer, 4u, 1u, 0u, 0u);
797 }
798
iterate(void)799 tcu::TestStatus iterate(void)
800 {
801 const InstanceInterface &instance = m_context.getInstanceInterface();
802 const DeviceInterface &vk = getDeviceInterface();
803 MovePtr<SparseAllocation> sparseAllocation;
804 Move<VkBuffer> sparseBuffer;
805 Move<VkBuffer> sparseBufferAliased;
806 bool setupDescriptors = true;
807
808 // Go through all physical devices
809 for (uint32_t physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
810 {
811 const uint32_t firstDeviceID = physDevID;
812 const uint32_t secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
813
814 // Set up the sparse buffer
815 {
816 VkBufferCreateInfo referenceBufferCreateInfo =
817 getSparseBufferCreateInfo(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
818 const VkDeviceSize minChunkSize = 512u; // make sure the smallest allocation is at least this big
819 uint32_t numMaxChunks = 0u;
820
821 // Check how many chunks we can allocate given the alignment and size requirements of UBOs
822 {
823 const UniquePtr<SparseAllocation> minAllocation(SparseAllocationBuilder().addMemoryBind().build(
824 instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(),
825 referenceBufferCreateInfo, minChunkSize));
826
827 numMaxChunks =
828 deMaxu32(static_cast<uint32_t>(m_context.getDeviceProperties().limits.maxUniformBufferRange /
829 minAllocation->resourceSize),
830 1u);
831 }
832
833 if (numMaxChunks < 4)
834 {
835 sparseAllocation = SparseAllocationBuilder().addMemoryBind().build(
836 instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(),
837 referenceBufferCreateInfo, minChunkSize);
838 }
839 else
840 {
841 // Try to use a non-trivial memory allocation scheme to make it different from a non-sparse binding
842 SparseAllocationBuilder builder;
843 builder.addMemoryBind();
844
845 if (m_residency)
846 builder.addResourceHole();
847
848 builder.addMemoryAllocation().addMemoryHole().addMemoryBind();
849
850 if (m_aliased)
851 builder.addAliasedMemoryBind(0u, 0u);
852
853 sparseAllocation = builder.build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(),
854 getAllocator(), referenceBufferCreateInfo, minChunkSize);
855 DE_ASSERT(sparseAllocation->resourceSize <=
856 m_context.getDeviceProperties().limits.maxUniformBufferRange);
857 }
858
859 if (firstDeviceID != secondDeviceID)
860 {
861 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
862 vk.getDeviceGroupPeerMemoryFeatures(getDevice(), sparseAllocation->heapIndex, firstDeviceID,
863 secondDeviceID, &peerMemoryFeatureFlags);
864
865 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT) == 0) ||
866 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
867 {
868 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
869 }
870 }
871
872 // Create the buffer
873 referenceBufferCreateInfo.size = sparseAllocation->resourceSize;
874 sparseBuffer = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
875 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBuffer, *sparseAllocation,
876 usingDeviceGroups(), firstDeviceID, secondDeviceID);
877
878 if (m_aliased)
879 {
880 sparseBufferAliased = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
881 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBufferAliased,
882 *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
883 }
884 }
885
886 // Set uniform data
887 {
888 const bool hasAliasedChunk = (m_aliased && sparseAllocation->memoryBinds.size() > 1u);
889 const VkDeviceSize chunkSize = sparseAllocation->resourceSize / sparseAllocation->numResourceChunks;
890 const VkDeviceSize stagingBufferSize =
891 sparseAllocation->resourceSize - (hasAliasedChunk ? chunkSize : 0);
892 const uint32_t numBufferEntries = static_cast<uint32_t>(stagingBufferSize / sizeof(IVec4));
893
894 const Unique<VkBuffer> stagingBuffer(
895 makeBuffer(vk, getDevice(), stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT));
896 const UniquePtr<Allocation> stagingBufferAlloc(
897 bindBuffer(vk, getDevice(), getAllocator(), *stagingBuffer, MemoryRequirement::HostVisible));
898
899 {
900 // If aliased chunk is used, the staging buffer is smaller than the sparse buffer and we don't overwrite the last chunk
901 IVec4 *const pData = static_cast<IVec4 *>(stagingBufferAlloc->getHostPtr());
902 for (uint32_t i = 0; i < numBufferEntries; ++i)
903 pData[i] = IVec4(3 * i ^ 127, 0, 0, 0);
904
905 flushAlloc(vk, getDevice(), *stagingBufferAlloc);
906
907 const VkBufferCopy copyRegion = {
908 0ull, // VkDeviceSize srcOffset;
909 0ull, // VkDeviceSize dstOffset;
910 stagingBufferSize, // VkDeviceSize size;
911 };
912
913 const Unique<VkCommandPool> cmdPool(
914 makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
915 const Unique<VkCommandBuffer> cmdBuffer(
916 allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
917
918 beginCommandBuffer(vk, *cmdBuffer);
919 vk.cmdCopyBuffer(*cmdBuffer, *stagingBuffer, *sparseBuffer, 1u, ©Region);
920 endCommandBuffer(vk, *cmdBuffer);
921
922 submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL,
923 DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
924 // Once the fence is signaled, the write is also available to the aliasing buffer.
925 }
926 }
927
928 // Make sure that we don't try to access a larger range than is allowed. This only applies to a single chunk case.
929 const uint32_t maxBufferRange = deMinu32(static_cast<uint32_t>(sparseAllocation->resourceSize),
930 m_context.getDeviceProperties().limits.maxUniformBufferRange);
931
932 // Descriptor sets
933 {
934 // Setup only once
935 if (setupDescriptors)
936 {
937 m_descriptorSetLayout =
938 DescriptorSetLayoutBuilder()
939 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT)
940 .build(vk, getDevice());
941
942 m_descriptorPool =
943 DescriptorPoolBuilder()
944 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
945 .build(vk, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
946
947 m_descriptorSet = makeDescriptorSet(vk, getDevice(), *m_descriptorPool, *m_descriptorSetLayout);
948 setupDescriptors = false;
949 }
950
951 const VkBuffer buffer = (m_aliased ? *sparseBufferAliased : *sparseBuffer);
952 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(buffer, 0ull, maxBufferRange);
953
954 DescriptorSetUpdateBuilder()
955 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
956 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &sparseBufferInfo)
957 .update(vk, getDevice());
958 }
959
960 // Vertex data
961 {
962 const Vec4 vertexData[] = {
963 Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
964 Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
965 Vec4(1.0f, -1.0f, 0.0f, 1.0f),
966 Vec4(1.0f, 1.0f, 0.0f, 1.0f),
967 };
968
969 const VkDeviceSize vertexBufferSize = sizeof(vertexData);
970
971 m_vertexBuffer = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
972 m_vertexBufferAlloc =
973 bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
974
975 deMemcpy(m_vertexBufferAlloc->getHostPtr(), &vertexData[0], vertexBufferSize);
976 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
977 }
978
979 // Draw
980 {
981 std::vector<int32_t> specializationData;
982 {
983 const uint32_t numBufferEntries = maxBufferRange / static_cast<uint32_t>(sizeof(IVec4));
984 const uint32_t numEntriesPerChunk = numBufferEntries / sparseAllocation->numResourceChunks;
985
986 specializationData.push_back(numBufferEntries);
987 specializationData.push_back(numEntriesPerChunk);
988 }
989
990 const VkSpecializationMapEntry specMapEntries[] = {
991 {
992 1u, // uint32_t constantID;
993 0u, // uint32_t offset;
994 sizeof(int32_t), // size_t size;
995 },
996 {
997 2u, // uint32_t constantID;
998 sizeof(int32_t), // uint32_t offset;
999 sizeof(int32_t), // size_t size;
1000 },
1001 };
1002
1003 const VkSpecializationInfo specInfo = {
1004 DE_LENGTH_OF_ARRAY(specMapEntries), // uint32_t mapEntryCount;
1005 specMapEntries, // const VkSpecializationMapEntry* pMapEntries;
1006 sizeInBytes(specializationData), // size_t dataSize;
1007 getDataOrNullptr(specializationData), // const void* pData;
1008 };
1009
1010 Renderer::SpecializationMap specMap;
1011 specMap[VK_SHADER_STAGE_FRAGMENT_BIT] = &specInfo;
1012
1013 draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, *m_descriptorSetLayout, specMap, usingDeviceGroups(),
1014 firstDeviceID);
1015 }
1016
1017 if (!isResultImageCorrect())
1018 return tcu::TestStatus::fail("Some buffer values were incorrect");
1019 }
1020 return tcu::TestStatus::pass("Pass");
1021 }
1022
1023 private:
1024 Move<VkBuffer> m_vertexBuffer;
1025 MovePtr<Allocation> m_vertexBufferAlloc;
1026
1027 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
1028 Move<VkDescriptorPool> m_descriptorPool;
1029 Move<VkDescriptorSet> m_descriptorSet;
1030 };
1031
initProgramsDrawGrid(vk::SourceCollections & programCollection,const TestFlags flags)1032 void initProgramsDrawGrid(vk::SourceCollections &programCollection, const TestFlags flags)
1033 {
1034 DE_UNREF(flags);
1035
1036 // Vertex shader
1037 {
1038 std::ostringstream src;
1039 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1040 << "\n"
1041 << "layout(location = 0) in vec4 in_position;\n"
1042 << "layout(location = 0) out int out_ndx;\n"
1043 << "\n"
1044 << "out gl_PerVertex {\n"
1045 << " vec4 gl_Position;\n"
1046 << "};\n"
1047 << "\n"
1048 << "void main(void)\n"
1049 << "{\n"
1050 << " gl_Position = in_position;\n"
1051 << " out_ndx = gl_VertexIndex;\n"
1052 << "}\n";
1053
1054 programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
1055 }
1056
1057 // Fragment shader
1058 {
1059 std::ostringstream src;
1060 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1061 << "\n"
1062 << "layout(location = 0) flat in int in_ndx;\n"
1063 << "layout(location = 0) out vec4 o_color;\n"
1064 << "\n"
1065 << "void main(void)\n"
1066 << "{\n"
1067 << " if (in_ndx % 2 == 0)\n"
1068 << " o_color = vec4(vec3(1.0), 1.0);\n"
1069 << " else\n"
1070 << " o_color = vec4(vec3(0.75), 1.0);\n"
1071 << "}\n";
1072
1073 programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
1074 }
1075 }
1076
1077 //! Generate vertex positions for a grid of tiles composed of two triangles each (6 vertices)
generateGrid(void * pRawData,const float step,const float ox,const float oy,const uint32_t numX,const uint32_t numY,const float z=0.0f)1078 void generateGrid(void *pRawData, const float step, const float ox, const float oy, const uint32_t numX,
1079 const uint32_t numY, const float z = 0.0f)
1080 {
1081 typedef Vec4(*TilePtr)[6];
1082
1083 TilePtr const pData = static_cast<TilePtr>(pRawData);
1084 {
1085 for (uint32_t iy = 0; iy < numY; ++iy)
1086 for (uint32_t ix = 0; ix < numX; ++ix)
1087 {
1088 const uint32_t ndx = ix + numX * iy;
1089 const float x = ox + step * static_cast<float>(ix);
1090 const float y = oy + step * static_cast<float>(iy);
1091
1092 pData[ndx][0] = Vec4(x + step, y, z, 1.0f);
1093 pData[ndx][1] = Vec4(x, y, z, 1.0f);
1094 pData[ndx][2] = Vec4(x, y + step, z, 1.0f);
1095
1096 pData[ndx][3] = Vec4(x, y + step, z, 1.0f);
1097 pData[ndx][4] = Vec4(x + step, y + step, z, 1.0f);
1098 pData[ndx][5] = Vec4(x + step, y, z, 1.0f);
1099 }
1100 }
1101 }
1102
1103 //! Base test for a sparse buffer backing a vertex/index buffer
1104 class DrawGridTestInstance : public SparseBufferTestInstance
1105 {
1106 public:
DrawGridTestInstance(Context & context,const TestFlags flags,const VkBufferUsageFlags usage,const VkDeviceSize minChunkSize)1107 DrawGridTestInstance(Context &context, const TestFlags flags, const VkBufferUsageFlags usage,
1108 const VkDeviceSize minChunkSize)
1109 : SparseBufferTestInstance(context, flags)
1110 , m_bufferUsage(usage)
1111 , m_minChunkSize(minChunkSize)
1112 , m_perDrawBufferOffset(0)
1113 , m_stagingBufferSize(0)
1114 {
1115 }
1116
createResources(uint32_t memoryDeviceIndex)1117 void createResources(uint32_t memoryDeviceIndex)
1118 {
1119 const InstanceInterface &instance = m_context.getInstanceInterface();
1120 const DeviceInterface &vk = getDeviceInterface();
1121 VkBufferCreateInfo referenceBufferCreateInfo = getSparseBufferCreateInfo(m_bufferUsage);
1122
1123 {
1124 // Allocate two chunks, each covering half of the viewport
1125 SparseAllocationBuilder builder;
1126 builder.addMemoryBind();
1127
1128 if (m_residency)
1129 builder.addResourceHole();
1130
1131 builder.addMemoryAllocation().addMemoryHole().addMemoryBind();
1132
1133 if (m_aliased)
1134 builder.addAliasedMemoryBind(0u, 0u);
1135
1136 m_sparseAllocation = builder.build(instance, getPhysicalDevice(memoryDeviceIndex), vk, getDevice(),
1137 getAllocator(), referenceBufferCreateInfo, m_minChunkSize);
1138 }
1139
1140 // Create the buffer
1141 referenceBufferCreateInfo.size = m_sparseAllocation->resourceSize;
1142 m_sparseBuffer = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
1143
1144 m_perDrawBufferOffset = m_sparseAllocation->resourceSize / m_sparseAllocation->numResourceChunks;
1145 m_stagingBufferSize = 2 * m_perDrawBufferOffset;
1146 m_stagingBuffer = makeBuffer(vk, getDevice(), m_stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
1147 m_stagingBufferAlloc =
1148 bindBuffer(vk, getDevice(), getAllocator(), *m_stagingBuffer, MemoryRequirement::HostVisible);
1149 }
1150
iterate(void)1151 tcu::TestStatus iterate(void)
1152 {
1153 const DeviceInterface &vk = getDeviceInterface();
1154
1155 for (uint32_t physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
1156 {
1157 const uint32_t firstDeviceID = physDevID;
1158 const uint32_t secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
1159
1160 createResources(secondDeviceID);
1161
1162 if (firstDeviceID != secondDeviceID)
1163 {
1164 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
1165 vk.getDeviceGroupPeerMemoryFeatures(getDevice(), m_sparseAllocation->heapIndex, firstDeviceID,
1166 secondDeviceID, &peerMemoryFeatureFlags);
1167
1168 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT) == 0) ||
1169 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
1170 {
1171 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
1172 }
1173 }
1174
1175 // Bind the memory
1176 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *m_sparseBuffer, *m_sparseAllocation,
1177 usingDeviceGroups(), firstDeviceID, secondDeviceID);
1178
1179 initializeBuffers();
1180
1181 // Upload to the sparse buffer
1182 {
1183 flushAlloc(vk, getDevice(), *m_stagingBufferAlloc);
1184
1185 VkDeviceSize firstChunkOffset = 0ull;
1186 VkDeviceSize secondChunkOffset = m_perDrawBufferOffset;
1187
1188 if (m_residency)
1189 secondChunkOffset += m_perDrawBufferOffset;
1190
1191 if (m_aliased)
1192 firstChunkOffset = secondChunkOffset + m_perDrawBufferOffset;
1193
1194 const VkBufferCopy copyRegions[] = {
1195 {
1196 0ull, // VkDeviceSize srcOffset;
1197 firstChunkOffset, // VkDeviceSize dstOffset;
1198 m_perDrawBufferOffset, // VkDeviceSize size;
1199 },
1200 {
1201 m_perDrawBufferOffset, // VkDeviceSize srcOffset;
1202 secondChunkOffset, // VkDeviceSize dstOffset;
1203 m_perDrawBufferOffset, // VkDeviceSize size;
1204 },
1205 };
1206
1207 const Unique<VkCommandPool> cmdPool(
1208 makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
1209 const Unique<VkCommandBuffer> cmdBuffer(
1210 allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1211
1212 beginCommandBuffer(vk, *cmdBuffer);
1213 vk.cmdCopyBuffer(*cmdBuffer, *m_stagingBuffer, *m_sparseBuffer, DE_LENGTH_OF_ARRAY(copyRegions),
1214 copyRegions);
1215 endCommandBuffer(vk, *cmdBuffer);
1216
1217 submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL,
1218 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
1219 }
1220
1221 Renderer::SpecializationMap specMap;
1222 draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, DE_NULL, specMap, usingDeviceGroups(), firstDeviceID);
1223
1224 if (!isResultImageCorrect())
1225 return tcu::TestStatus::fail("Some buffer values were incorrect");
1226 }
1227 return tcu::TestStatus::pass("Pass");
1228 }
1229
1230 protected:
1231 virtual void initializeBuffers(void) = 0;
1232
1233 const VkBufferUsageFlags m_bufferUsage;
1234 const VkDeviceSize m_minChunkSize;
1235
1236 VkDeviceSize m_perDrawBufferOffset;
1237
1238 VkDeviceSize m_stagingBufferSize;
1239 Move<VkBuffer> m_stagingBuffer;
1240 MovePtr<Allocation> m_stagingBufferAlloc;
1241
1242 MovePtr<SparseAllocation> m_sparseAllocation;
1243 Move<VkBuffer> m_sparseBuffer;
1244 };
1245
1246 //! Sparse buffer backing a vertex input buffer
1247 class VertexBufferTestInstance : public DrawGridTestInstance
1248 {
1249 public:
VertexBufferTestInstance(Context & context,const TestFlags flags)1250 VertexBufferTestInstance(Context &context, const TestFlags flags)
1251 : DrawGridTestInstance(context, flags, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
1252 GRID_SIZE * GRID_SIZE * 6 * sizeof(Vec4))
1253 {
1254 }
1255
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1256 void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1257 {
1258 DE_UNREF(pipelineLayout);
1259
1260 m_context.getTestContext().getLog()
1261 << tcu::TestLog::Message
1262 << "Drawing a grid of triangles backed by a sparse vertex buffer. There should be no red pixels visible."
1263 << tcu::TestLog::EndMessage;
1264
1265 const DeviceInterface &vk = getDeviceInterface();
1266 const uint32_t vertexCount = 6 * (GRID_SIZE * GRID_SIZE) / 2;
1267 VkDeviceSize vertexOffset = 0ull;
1268
1269 vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1270 vk.cmdDraw(cmdBuffer, vertexCount, 1u, 0u, 0u);
1271
1272 vertexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1273
1274 vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1275 vk.cmdDraw(cmdBuffer, vertexCount, 1u, 0u, 0u);
1276 }
1277
initializeBuffers(void)1278 void initializeBuffers(void)
1279 {
1280 uint8_t *pData = static_cast<uint8_t *>(m_stagingBufferAlloc->getHostPtr());
1281 const float step = 2.0f / static_cast<float>(GRID_SIZE);
1282
1283 // Prepare data for two draw calls
1284 generateGrid(pData, step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE / 2);
1285 generateGrid(pData + m_perDrawBufferOffset, step, -1.0f, 0.0f, GRID_SIZE, GRID_SIZE / 2);
1286 }
1287 };
1288
1289 //! Sparse buffer backing an index buffer
1290 class IndexBufferTestInstance : public DrawGridTestInstance
1291 {
1292 public:
IndexBufferTestInstance(Context & context,const TestFlags flags)1293 IndexBufferTestInstance(Context &context, const TestFlags flags)
1294 : DrawGridTestInstance(context, flags, VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
1295 GRID_SIZE * GRID_SIZE * 6 * sizeof(uint32_t))
1296 , m_halfVertexCount(6 * (GRID_SIZE * GRID_SIZE) / 2)
1297 {
1298 }
1299
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1300 void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1301 {
1302 DE_UNREF(pipelineLayout);
1303
1304 m_context.getTestContext().getLog()
1305 << tcu::TestLog::Message
1306 << "Drawing a grid of triangles from a sparse index buffer. There should be no red pixels visible."
1307 << tcu::TestLog::EndMessage;
1308
1309 const DeviceInterface &vk = getDeviceInterface();
1310 const VkDeviceSize vertexOffset = 0ull;
1311 VkDeviceSize indexOffset = 0ull;
1312
1313 vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1314
1315 vk.cmdBindIndexBuffer(cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1316 vk.cmdDrawIndexed(cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1317
1318 indexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1319
1320 vk.cmdBindIndexBuffer(cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1321 vk.cmdDrawIndexed(cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1322 }
1323
initializeBuffers(void)1324 void initializeBuffers(void)
1325 {
1326 // Vertex buffer
1327 const DeviceInterface &vk = getDeviceInterface();
1328 const VkDeviceSize vertexBufferSize = 2 * m_halfVertexCount * sizeof(Vec4);
1329 m_vertexBuffer = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1330 m_vertexBufferAlloc =
1331 bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1332
1333 {
1334 const float step = 2.0f / static_cast<float>(GRID_SIZE);
1335
1336 generateGrid(m_vertexBufferAlloc->getHostPtr(), step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE);
1337
1338 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1339 }
1340
1341 // Sparse index buffer
1342 for (uint32_t chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1343 {
1344 uint8_t *const pData =
1345 static_cast<uint8_t *>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1346 uint32_t *const pIndexData = reinterpret_cast<uint32_t *>(pData);
1347 const uint32_t ndxBase = chunkNdx * m_halfVertexCount;
1348
1349 for (uint32_t i = 0u; i < m_halfVertexCount; ++i)
1350 pIndexData[i] = ndxBase + i;
1351 }
1352 }
1353
1354 private:
1355 const uint32_t m_halfVertexCount;
1356 Move<VkBuffer> m_vertexBuffer;
1357 MovePtr<Allocation> m_vertexBufferAlloc;
1358 };
1359
1360 //! Draw from a sparse indirect buffer
1361 class IndirectBufferTestInstance : public DrawGridTestInstance
1362 {
1363 public:
IndirectBufferTestInstance(Context & context,const TestFlags flags)1364 IndirectBufferTestInstance(Context &context, const TestFlags flags)
1365 : DrawGridTestInstance(context, flags, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, sizeof(VkDrawIndirectCommand))
1366 {
1367 }
1368
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1369 void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1370 {
1371 DE_UNREF(pipelineLayout);
1372
1373 m_context.getTestContext().getLog()
1374 << tcu::TestLog::Message
1375 << "Drawing two triangles covering the whole viewport. There should be no red pixels visible."
1376 << tcu::TestLog::EndMessage;
1377
1378 const DeviceInterface &vk = getDeviceInterface();
1379 const VkDeviceSize vertexOffset = 0ull;
1380 VkDeviceSize indirectOffset = 0ull;
1381
1382 vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1383 vk.cmdDrawIndirect(cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1384
1385 indirectOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1386
1387 vk.cmdDrawIndirect(cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1388 }
1389
initializeBuffers(void)1390 void initializeBuffers(void)
1391 {
1392 // Vertex buffer
1393 const DeviceInterface &vk = getDeviceInterface();
1394 const VkDeviceSize vertexBufferSize = 2 * 3 * sizeof(Vec4);
1395 m_vertexBuffer = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1396 m_vertexBufferAlloc =
1397 bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1398
1399 {
1400 generateGrid(m_vertexBufferAlloc->getHostPtr(), 2.0f, -1.0f, -1.0f, 1, 1);
1401 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1402 }
1403
1404 // Indirect buffer
1405 for (uint32_t chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1406 {
1407 uint8_t *const pData =
1408 static_cast<uint8_t *>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1409 VkDrawIndirectCommand *const pCmdData = reinterpret_cast<VkDrawIndirectCommand *>(pData);
1410
1411 pCmdData->firstVertex = 3u * chunkNdx;
1412 pCmdData->firstInstance = 0u;
1413 pCmdData->vertexCount = 3u;
1414 pCmdData->instanceCount = 1u;
1415 }
1416 }
1417
1418 private:
1419 Move<VkBuffer> m_vertexBuffer;
1420 MovePtr<Allocation> m_vertexBufferAlloc;
1421 };
1422
1423 //! Similar to the class in vktTestCaseUtil.hpp, but uses Arg0 directly rather than through a InstanceFunction1
1424 template <typename Arg0>
1425 class FunctionProgramsSimple1
1426 {
1427 public:
1428 typedef void (*Function)(vk::SourceCollections &dst, Arg0 arg0);
FunctionProgramsSimple1(Function func)1429 FunctionProgramsSimple1(Function func) : m_func(func)
1430 {
1431 }
init(vk::SourceCollections & dst,const Arg0 & arg0) const1432 void init(vk::SourceCollections &dst, const Arg0 &arg0) const
1433 {
1434 m_func(dst, arg0);
1435 }
1436
1437 private:
1438 const Function m_func;
1439 };
1440
checkSupport(Context & context,const TestFlags flags)1441 void checkSupport(Context &context, const TestFlags flags)
1442 {
1443 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
1444
1445 if (flags & TEST_FLAG_RESIDENCY)
1446 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_BUFFER);
1447
1448 if (flags & TEST_FLAG_ALIASED)
1449 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_ALIASED);
1450
1451 if (flags & TEST_FLAG_NON_RESIDENT_STRICT &&
1452 !context.getDeviceProperties().sparseProperties.residencyNonResidentStrict)
1453 TCU_THROW(NotSupportedError, "Missing sparse property: residencyNonResidentStrict");
1454 }
1455
1456 //! Convenience function to create a TestCase based on a freestanding initPrograms and a TestInstance implementation
1457 template <typename TestInstanceT, typename Arg0>
createTestInstanceWithPrograms(tcu::TestContext & testCtx,const std::string & name,typename FunctionProgramsSimple1<Arg0>::Function initPrograms,Arg0 arg0)1458 TestCase *createTestInstanceWithPrograms(tcu::TestContext &testCtx, const std::string &name,
1459 typename FunctionProgramsSimple1<Arg0>::Function initPrograms, Arg0 arg0)
1460 {
1461 return new InstanceFactory1WithSupport<TestInstanceT, Arg0, FunctionSupport1<Arg0>, FunctionProgramsSimple1<Arg0>>(
1462 testCtx, name, FunctionProgramsSimple1<Arg0>(initPrograms), arg0,
1463 typename FunctionSupport1<Arg0>::Args(checkSupport, arg0));
1464 }
1465
populateTestGroup(tcu::TestCaseGroup * parentGroup)1466 void populateTestGroup(tcu::TestCaseGroup *parentGroup)
1467 {
1468 const struct
1469 {
1470 std::string name;
1471 TestFlags flags;
1472 } groups[] = {
1473 {
1474 "sparse_binding",
1475 0u,
1476 },
1477 {
1478 "sparse_binding_aliased",
1479 TEST_FLAG_ALIASED,
1480 },
1481 {
1482 "sparse_residency",
1483 TEST_FLAG_RESIDENCY,
1484 },
1485 {
1486 "sparse_residency_aliased",
1487 TEST_FLAG_RESIDENCY | TEST_FLAG_ALIASED,
1488 },
1489 {
1490 "sparse_residency_non_resident_strict",
1491 TEST_FLAG_RESIDENCY | TEST_FLAG_NON_RESIDENT_STRICT,
1492 },
1493 };
1494
1495 const int numGroupsIncludingNonResidentStrict = DE_LENGTH_OF_ARRAY(groups);
1496 const int numGroupsDefaultList = numGroupsIncludingNonResidentStrict - 1;
1497 std::string devGroupPrefix = "device_group_";
1498
1499 // Transfer
1500 {
1501 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "transfer"));
1502 {
1503 MovePtr<tcu::TestCaseGroup> subGroup(
1504 new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding"));
1505 addBufferSparseBindingTests(subGroup.get(), false);
1506 group->addChild(subGroup.release());
1507
1508 MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(
1509 new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding"));
1510 addBufferSparseBindingTests(subGroupDeviceGroups.get(), true);
1511 group->addChild(subGroupDeviceGroups.release());
1512 }
1513 parentGroup->addChild(group.release());
1514 }
1515
1516 // SSBO
1517 {
1518 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ssbo"));
1519 {
1520 MovePtr<tcu::TestCaseGroup> subGroup(
1521 new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding_aliased"));
1522 addBufferSparseMemoryAliasingTests(subGroup.get(), false);
1523 group->addChild(subGroup.release());
1524
1525 MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(
1526 new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding_aliased"));
1527 addBufferSparseMemoryAliasingTests(subGroupDeviceGroups.get(), true);
1528 group->addChild(subGroupDeviceGroups.release());
1529 }
1530 {
1531 MovePtr<tcu::TestCaseGroup> subGroup(
1532 new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_residency"));
1533 addBufferSparseResidencyTests(subGroup.get(), false);
1534 group->addChild(subGroup.release());
1535
1536 MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(
1537 new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_residency"));
1538 addBufferSparseResidencyTests(subGroupDeviceGroups.get(), true);
1539 group->addChild(subGroupDeviceGroups.release());
1540 }
1541 parentGroup->addChild(group.release());
1542 }
1543
1544 // UBO
1545 {
1546 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ubo"));
1547
1548 for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1549 {
1550 group->addChild(
1551 createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(),
1552 initProgramsDrawWithUBO, groups[groupNdx].flags));
1553 }
1554 for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1555 {
1556 group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(
1557 group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), initProgramsDrawWithUBO,
1558 groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1559 }
1560 parentGroup->addChild(group.release());
1561 }
1562
1563 // Vertex buffer
1564 {
1565 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "vertex_buffer"));
1566
1567 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1568 {
1569 group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(
1570 group->getTestContext(), groups[groupNdx].name.c_str(), initProgramsDrawGrid, groups[groupNdx].flags));
1571 }
1572 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1573 {
1574 group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(
1575 group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), initProgramsDrawGrid,
1576 groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1577 }
1578
1579 parentGroup->addChild(group.release());
1580 }
1581
1582 // Index buffer
1583 {
1584 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "index_buffer"));
1585
1586 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1587 {
1588 group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(
1589 group->getTestContext(), groups[groupNdx].name.c_str(), initProgramsDrawGrid, groups[groupNdx].flags));
1590 }
1591 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1592 {
1593 group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(
1594 group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), initProgramsDrawGrid,
1595 groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1596 }
1597
1598 parentGroup->addChild(group.release());
1599 }
1600
1601 // Indirect buffer
1602 {
1603 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "indirect_buffer"));
1604
1605 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1606 {
1607 group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(
1608 group->getTestContext(), groups[groupNdx].name.c_str(), initProgramsDrawGrid, groups[groupNdx].flags));
1609 }
1610 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1611 {
1612 group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(
1613 group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), initProgramsDrawGrid,
1614 groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1615 }
1616
1617 parentGroup->addChild(group.release());
1618 }
1619 }
1620
1621 } // namespace
1622
createSparseBufferTests(tcu::TestContext & testCtx)1623 tcu::TestCaseGroup *createSparseBufferTests(tcu::TestContext &testCtx)
1624 {
1625 return createTestGroup(testCtx, "buffer", populateTestGroup);
1626 }
1627
1628 } // namespace sparse
1629 } // namespace vkt
1630