1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Sparse buffer tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesBufferTests.hpp"
25 #include "vktTestCaseUtil.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktSparseResourcesTestsUtil.hpp"
28 #include "vktSparseResourcesBase.hpp"
29 #include "vktSparseResourcesBufferSparseBinding.hpp"
30 #include "vktSparseResourcesBufferSparseResidency.hpp"
31 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
32 
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkPlatform.hpp"
36 #include "vkPrograms.hpp"
37 #include "vkMemUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkQueryUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43 
44 #include "tcuTestLog.hpp"
45 
46 #include "deUniquePtr.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deMath.h"
49 
50 #include <string>
51 #include <vector>
52 #include <map>
53 
54 using namespace vk;
55 using de::MovePtr;
56 using de::SharedPtr;
57 using de::UniquePtr;
58 using tcu::IVec2;
59 using tcu::IVec4;
60 using tcu::Vec4;
61 
62 namespace vkt
63 {
64 namespace sparse
65 {
66 namespace
67 {
68 
69 typedef SharedPtr<UniquePtr<Allocation>> AllocationSp;
70 
71 enum
72 {
73     RENDER_SIZE = 128,             //!< framebuffer size in pixels
74     GRID_SIZE   = RENDER_SIZE / 8, //!< number of grid tiles in a row
75 };
76 
77 enum TestFlagBits
78 {
79     //   sparseBinding is implied
80     TEST_FLAG_ALIASED              = 1u << 0, //!< sparseResidencyAliased
81     TEST_FLAG_RESIDENCY            = 1u << 1, //!< sparseResidencyBuffer
82     TEST_FLAG_NON_RESIDENT_STRICT  = 1u << 2, //!< residencyNonResidentStrict
83     TEST_FLAG_ENABLE_DEVICE_GROUPS = 1u << 3, //!< device groups are enabled
84 };
85 typedef uint32_t TestFlags;
86 
87 //! SparseAllocationBuilder output. Owns the allocated memory.
88 struct SparseAllocation
89 {
90     uint32_t numResourceChunks;
91     VkDeviceSize resourceSize;                   //!< buffer size in bytes
92     std::vector<AllocationSp> allocations;       //!< actual allocated memory
93     std::vector<VkSparseMemoryBind> memoryBinds; //!< memory binds backing the resource
94     uint32_t memoryType;                         //!< memory type (same for all allocations)
95     uint32_t heapIndex;                          //!< memory heap index
96 };
97 
98 //! Utility to lay out memory allocations for a sparse buffer, including holes and aliased regions.
99 //! Will allocate memory upon building.
100 class SparseAllocationBuilder
101 {
102 public:
103     SparseAllocationBuilder(void);
104 
105     // \note "chunk" is the smallest (due to alignment) bindable amount of memory
106 
107     SparseAllocationBuilder &addMemoryHole(const uint32_t numChunks = 1u);
108     SparseAllocationBuilder &addResourceHole(const uint32_t numChunks = 1u);
109     SparseAllocationBuilder &addMemoryBind(const uint32_t numChunks = 1u);
110     SparseAllocationBuilder &addAliasedMemoryBind(const uint32_t allocationNdx, const uint32_t chunkOffset,
111                                                   const uint32_t numChunks = 1u);
112     SparseAllocationBuilder &addMemoryAllocation(void);
113 
114     MovePtr<SparseAllocation> build(
115         const InstanceInterface &instanceInterface, const VkPhysicalDevice physicalDevice, const DeviceInterface &vk,
116         const VkDevice device, Allocator &allocator,
117         VkBufferCreateInfo referenceCreateInfo,        //!< buffer size is ignored in this info
118         const VkDeviceSize minChunkSize = 0ull) const; //!< make sure chunks are at least this big
119 
120 private:
121     struct MemoryBind
122     {
123         uint32_t allocationNdx;
124         uint32_t resourceChunkNdx;
125         uint32_t memoryChunkNdx;
126         uint32_t numChunks;
127     };
128 
129     uint32_t m_allocationNdx;
130     uint32_t m_resourceChunkNdx;
131     uint32_t m_memoryChunkNdx;
132     std::vector<MemoryBind> m_memoryBinds;
133     std::vector<uint32_t> m_chunksPerAllocation;
134 };
135 
SparseAllocationBuilder(void)136 SparseAllocationBuilder::SparseAllocationBuilder(void) : m_allocationNdx(0), m_resourceChunkNdx(0), m_memoryChunkNdx(0)
137 {
138     m_chunksPerAllocation.push_back(0);
139 }
140 
addMemoryHole(const uint32_t numChunks)141 SparseAllocationBuilder &SparseAllocationBuilder::addMemoryHole(const uint32_t numChunks)
142 {
143     m_memoryChunkNdx += numChunks;
144     m_chunksPerAllocation[m_allocationNdx] += numChunks;
145 
146     return *this;
147 }
148 
addResourceHole(const uint32_t numChunks)149 SparseAllocationBuilder &SparseAllocationBuilder::addResourceHole(const uint32_t numChunks)
150 {
151     m_resourceChunkNdx += numChunks;
152 
153     return *this;
154 }
155 
addMemoryAllocation(void)156 SparseAllocationBuilder &SparseAllocationBuilder::addMemoryAllocation(void)
157 {
158     DE_ASSERT(m_memoryChunkNdx != 0); // doesn't make sense to have an empty allocation
159 
160     m_allocationNdx += 1;
161     m_memoryChunkNdx = 0;
162     m_chunksPerAllocation.push_back(0);
163 
164     return *this;
165 }
166 
addMemoryBind(const uint32_t numChunks)167 SparseAllocationBuilder &SparseAllocationBuilder::addMemoryBind(const uint32_t numChunks)
168 {
169     const MemoryBind memoryBind = {m_allocationNdx, m_resourceChunkNdx, m_memoryChunkNdx, numChunks};
170     m_memoryBinds.push_back(memoryBind);
171 
172     m_resourceChunkNdx += numChunks;
173     m_memoryChunkNdx += numChunks;
174     m_chunksPerAllocation[m_allocationNdx] += numChunks;
175 
176     return *this;
177 }
178 
addAliasedMemoryBind(const uint32_t allocationNdx,const uint32_t chunkOffset,const uint32_t numChunks)179 SparseAllocationBuilder &SparseAllocationBuilder::addAliasedMemoryBind(const uint32_t allocationNdx,
180                                                                        const uint32_t chunkOffset,
181                                                                        const uint32_t numChunks)
182 {
183     DE_ASSERT(allocationNdx <= m_allocationNdx);
184 
185     const MemoryBind memoryBind = {allocationNdx, m_resourceChunkNdx, chunkOffset, numChunks};
186     m_memoryBinds.push_back(memoryBind);
187 
188     m_resourceChunkNdx += numChunks;
189 
190     return *this;
191 }
192 
build(const InstanceInterface & instanceInterface,const VkPhysicalDevice physicalDevice,const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkBufferCreateInfo referenceCreateInfo,const VkDeviceSize minChunkSize) const193 MovePtr<SparseAllocation> SparseAllocationBuilder::build(const InstanceInterface &instanceInterface,
194                                                          const VkPhysicalDevice physicalDevice,
195                                                          const DeviceInterface &vk, const VkDevice device,
196                                                          Allocator &allocator, VkBufferCreateInfo referenceCreateInfo,
197                                                          const VkDeviceSize minChunkSize) const
198 {
199 
200     MovePtr<SparseAllocation> sparseAllocation(new SparseAllocation());
201 
202     referenceCreateInfo.size = sizeof(uint32_t);
203     const Unique<VkBuffer> refBuffer(createBuffer(vk, device, &referenceCreateInfo));
204     const VkMemoryRequirements memoryRequirements = getBufferMemoryRequirements(vk, device, *refBuffer);
205     const VkDeviceSize chunkSize                  = std::max(
206         memoryRequirements.alignment, static_cast<VkDeviceSize>(deAlign64(minChunkSize, memoryRequirements.alignment)));
207     const uint32_t memoryTypeNdx =
208         findMatchingMemoryType(instanceInterface, physicalDevice, memoryRequirements, MemoryRequirement::Any);
209     VkMemoryAllocateInfo allocInfo = {
210         VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType;
211         DE_NULL,                                // const void* pNext;
212         memoryRequirements.size,                // VkDeviceSize allocationSize;
213         memoryTypeNdx,                          // uint32_t memoryTypeIndex;
214     };
215 
216     for (std::vector<uint32_t>::const_iterator numChunksIter = m_chunksPerAllocation.begin();
217          numChunksIter != m_chunksPerAllocation.end(); ++numChunksIter)
218     {
219         allocInfo.allocationSize = *numChunksIter * chunkSize;
220         sparseAllocation->allocations.push_back(makeDeSharedPtr(allocator.allocate(allocInfo, (VkDeviceSize)0)));
221     }
222 
223     for (std::vector<MemoryBind>::const_iterator memBindIter = m_memoryBinds.begin();
224          memBindIter != m_memoryBinds.end(); ++memBindIter)
225     {
226         const Allocation &alloc       = **sparseAllocation->allocations[memBindIter->allocationNdx];
227         const VkSparseMemoryBind bind = {
228             memBindIter->resourceChunkNdx * chunkSize,                   // VkDeviceSize               resourceOffset;
229             memBindIter->numChunks * chunkSize,                          // VkDeviceSize               size;
230             alloc.getMemory(),                                           // VkDeviceMemory             memory;
231             alloc.getOffset() + memBindIter->memoryChunkNdx * chunkSize, // VkDeviceSize               memoryOffset;
232             (VkSparseMemoryBindFlags)0,                                  // VkSparseMemoryBindFlags    flags;
233         };
234         sparseAllocation->memoryBinds.push_back(bind);
235         referenceCreateInfo.size = std::max(referenceCreateInfo.size, bind.resourceOffset + bind.size);
236     }
237 
238     sparseAllocation->resourceSize      = referenceCreateInfo.size;
239     sparseAllocation->numResourceChunks = m_resourceChunkNdx;
240     sparseAllocation->memoryType        = memoryTypeNdx;
241     sparseAllocation->heapIndex         = getHeapIndexForMemoryType(instanceInterface, physicalDevice, memoryTypeNdx);
242 
243     return sparseAllocation;
244 }
245 
makeImageCreateInfo(const VkFormat format,const IVec2 & size,const VkImageUsageFlags usage)246 VkImageCreateInfo makeImageCreateInfo(const VkFormat format, const IVec2 &size, const VkImageUsageFlags usage)
247 {
248     const VkImageCreateInfo imageParams = {
249         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
250         DE_NULL,                             // const void* pNext;
251         (VkImageCreateFlags)0,               // VkImageCreateFlags flags;
252         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
253         format,                              // VkFormat format;
254         makeExtent3D(size.x(), size.y(), 1), // VkExtent3D extent;
255         1u,                                  // uint32_t mipLevels;
256         1u,                                  // uint32_t arrayLayers;
257         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
258         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
259         usage,                               // VkImageUsageFlags usage;
260         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
261         0u,                                  // uint32_t queueFamilyIndexCount;
262         DE_NULL,                             // const uint32_t* pQueueFamilyIndices;
263         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
264     };
265     return imageParams;
266 }
267 
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkRenderPass renderPass,const IVec2 renderSize,const VkPrimitiveTopology topology,const uint32_t stageCount,const VkPipelineShaderStageCreateInfo * pStages)268 Move<VkPipeline> makeGraphicsPipeline(const DeviceInterface &vk, const VkDevice device,
269                                       const VkPipelineLayout pipelineLayout, const VkRenderPass renderPass,
270                                       const IVec2 renderSize, const VkPrimitiveTopology topology,
271                                       const uint32_t stageCount, const VkPipelineShaderStageCreateInfo *pStages)
272 {
273     const VkVertexInputBindingDescription vertexInputBindingDescription = {
274         0u,                          // uint32_t binding;
275         sizeof(Vec4),                // uint32_t stride;
276         VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate;
277     };
278 
279     const VkVertexInputAttributeDescription vertexInputAttributeDescription = {
280         0u,                            // uint32_t location;
281         0u,                            // uint32_t binding;
282         VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
283         0u,                            // uint32_t offset;
284     };
285 
286     const VkPipelineVertexInputStateCreateInfo vertexInputStateInfo = {
287         VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType                             sType;
288         DE_NULL,                                                   // const void*                                 pNext;
289         (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags       flags;
290         1u,                             // uint32_t                                    vertexBindingDescriptionCount;
291         &vertexInputBindingDescription, // const VkVertexInputBindingDescription*      pVertexBindingDescriptions;
292         1u,                             // uint32_t                                    vertexAttributeDescriptionCount;
293         &vertexInputAttributeDescription, // const VkVertexInputAttributeDescription*    pVertexAttributeDescriptions;
294     };
295 
296     const VkPipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateInfo = {
297         VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType                             sType;
298         DE_NULL,                                    // const void*                                 pNext;
299         (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags     flags;
300         topology,                                   // VkPrimitiveTopology                         topology;
301         VK_FALSE, // VkBool32                                    primitiveRestartEnable;
302     };
303 
304     const VkViewport viewport = makeViewport(renderSize);
305     const VkRect2D scissor    = makeRect2D(renderSize);
306 
307     const VkPipelineViewportStateCreateInfo pipelineViewportStateInfo = {
308         VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType                             sType;
309         DE_NULL,                                               // const void*                                 pNext;
310         (VkPipelineViewportStateCreateFlags)0,                 // VkPipelineViewportStateCreateFlags          flags;
311         1u,        // uint32_t                                    viewportCount;
312         &viewport, // const VkViewport*                           pViewports;
313         1u,        // uint32_t                                    scissorCount;
314         &scissor,  // const VkRect2D*                             pScissors;
315     };
316 
317     const VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateInfo = {
318         VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType                          sType;
319         DE_NULL,                                                    // const void*                              pNext;
320         (VkPipelineRasterizationStateCreateFlags)0,                 // VkPipelineRasterizationStateCreateFlags  flags;
321         VK_FALSE,                        // VkBool32                                 depthClampEnable;
322         VK_FALSE,                        // VkBool32                                 rasterizerDiscardEnable;
323         VK_POLYGON_MODE_FILL,            // VkPolygonMode polygonMode;
324         VK_CULL_MODE_NONE,               // VkCullModeFlags cullMode;
325         VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace;
326         VK_FALSE,                        // VkBool32 depthBiasEnable;
327         0.0f,                            // float depthBiasConstantFactor;
328         0.0f,                            // float depthBiasClamp;
329         0.0f,                            // float depthBiasSlopeFactor;
330         1.0f,                            // float lineWidth;
331     };
332 
333     const VkPipelineMultisampleStateCreateInfo pipelineMultisampleStateInfo = {
334         VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType;
335         DE_NULL,                                                  // const void* pNext;
336         (VkPipelineMultisampleStateCreateFlags)0,                 // VkPipelineMultisampleStateCreateFlags flags;
337         VK_SAMPLE_COUNT_1_BIT,                                    // VkSampleCountFlagBits rasterizationSamples;
338         VK_FALSE,                                                 // VkBool32 sampleShadingEnable;
339         0.0f,                                                     // float minSampleShading;
340         DE_NULL,                                                  // const VkSampleMask* pSampleMask;
341         VK_FALSE,                                                 // VkBool32 alphaToCoverageEnable;
342         VK_FALSE                                                  // VkBool32 alphaToOneEnable;
343     };
344 
345     const VkStencilOpState stencilOpState = makeStencilOpState(VK_STENCIL_OP_KEEP,   // stencil fail
346                                                                VK_STENCIL_OP_KEEP,   // depth & stencil pass
347                                                                VK_STENCIL_OP_KEEP,   // depth only fail
348                                                                VK_COMPARE_OP_ALWAYS, // compare op
349                                                                0u,                   // compare mask
350                                                                0u,                   // write mask
351                                                                0u);                  // reference
352 
353     VkPipelineDepthStencilStateCreateInfo pipelineDepthStencilStateInfo = {
354         VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType;
355         DE_NULL,                                                    // const void* pNext;
356         (VkPipelineDepthStencilStateCreateFlags)0,                  // VkPipelineDepthStencilStateCreateFlags flags;
357         VK_FALSE,                                                   // VkBool32 depthTestEnable;
358         VK_FALSE,                                                   // VkBool32 depthWriteEnable;
359         VK_COMPARE_OP_LESS,                                         // VkCompareOp depthCompareOp;
360         VK_FALSE,                                                   // VkBool32 depthBoundsTestEnable;
361         VK_FALSE,                                                   // VkBool32 stencilTestEnable;
362         stencilOpState,                                             // VkStencilOpState front;
363         stencilOpState,                                             // VkStencilOpState back;
364         0.0f,                                                       // float minDepthBounds;
365         1.0f,                                                       // float maxDepthBounds;
366     };
367 
368     const VkColorComponentFlags colorComponentsAll =
369         VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
370     const VkPipelineColorBlendAttachmentState pipelineColorBlendAttachmentState = {
371         VK_FALSE,             // VkBool32 blendEnable;
372         VK_BLEND_FACTOR_ONE,  // VkBlendFactor srcColorBlendFactor;
373         VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
374         VK_BLEND_OP_ADD,      // VkBlendOp colorBlendOp;
375         VK_BLEND_FACTOR_ONE,  // VkBlendFactor srcAlphaBlendFactor;
376         VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
377         VK_BLEND_OP_ADD,      // VkBlendOp alphaBlendOp;
378         colorComponentsAll,   // VkColorComponentFlags colorWriteMask;
379     };
380 
381     const VkPipelineColorBlendStateCreateInfo pipelineColorBlendStateInfo = {
382         VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
383         DE_NULL,                                                  // const void* pNext;
384         (VkPipelineColorBlendStateCreateFlags)0,                  // VkPipelineColorBlendStateCreateFlags flags;
385         VK_FALSE,                                                 // VkBool32 logicOpEnable;
386         VK_LOGIC_OP_COPY,                                         // VkLogicOp logicOp;
387         1u,                                                       // uint32_t attachmentCount;
388         &pipelineColorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
389         {0.0f, 0.0f, 0.0f, 0.0f},           // float blendConstants[4];
390     };
391 
392     const VkGraphicsPipelineCreateInfo graphicsPipelineInfo = {
393         VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
394         DE_NULL,                                         // const void* pNext;
395         (VkPipelineCreateFlags)0,                        // VkPipelineCreateFlags flags;
396         stageCount,                                      // uint32_t stageCount;
397         pStages,                                         // const VkPipelineShaderStageCreateInfo* pStages;
398         &vertexInputStateInfo,           // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
399         &pipelineInputAssemblyStateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
400         DE_NULL,                         // const VkPipelineTessellationStateCreateInfo* pTessellationState;
401         &pipelineViewportStateInfo,      // const VkPipelineViewportStateCreateInfo* pViewportState;
402         &pipelineRasterizationStateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
403         &pipelineMultisampleStateInfo,   // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
404         &pipelineDepthStencilStateInfo,  // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
405         &pipelineColorBlendStateInfo,    // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
406         DE_NULL,                         // const VkPipelineDynamicStateCreateInfo* pDynamicState;
407         pipelineLayout,                  // VkPipelineLayout layout;
408         renderPass,                      // VkRenderPass renderPass;
409         0u,                              // uint32_t subpass;
410         DE_NULL,                         // VkPipeline basePipelineHandle;
411         0,                               // int32_t basePipelineIndex;
412     };
413 
414     return createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineInfo);
415 }
416 
417 //! Return true if there are any red (or all zero) pixels in the image
imageHasErrorPixels(const tcu::ConstPixelBufferAccess image)418 bool imageHasErrorPixels(const tcu::ConstPixelBufferAccess image)
419 {
420     const Vec4 errorColor = Vec4(1.0f, 0.0f, 0.0f, 1.0f);
421     const Vec4 blankColor = Vec4();
422 
423     for (int y = 0; y < image.getHeight(); ++y)
424         for (int x = 0; x < image.getWidth(); ++x)
425         {
426             const Vec4 color = image.getPixel(x, y);
427             if (color == errorColor || color == blankColor)
428                 return true;
429         }
430 
431     return false;
432 }
433 
434 class Renderer
435 {
436 public:
437     typedef std::map<VkShaderStageFlagBits, const VkSpecializationInfo *> SpecializationMap;
438 
439     //! Use the delegate to bind descriptor sets, vertex buffers, etc. and make a draw call
440     struct Delegate
441     {
~Delegatevkt::sparse::__anona88b58370111::Renderer::Delegate442         virtual ~Delegate(void)
443         {
444         }
445         virtual void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const = 0;
446     };
447 
Renderer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const uint32_t queueFamilyIndex,const VkDescriptorSetLayout descriptorSetLayout,BinaryCollection & binaryCollection,const std::string & vertexName,const std::string & fragmentName,const VkBuffer colorBuffer,const IVec2 & renderSize,const VkFormat colorFormat,const Vec4 & clearColor,const VkPrimitiveTopology topology,SpecializationMap specMap=SpecializationMap ())448     Renderer(const DeviceInterface &vk, const VkDevice device, Allocator &allocator, const uint32_t queueFamilyIndex,
449              const VkDescriptorSetLayout descriptorSetLayout, //!< may be NULL, if no descriptors are used
450              BinaryCollection &binaryCollection, const std::string &vertexName, const std::string &fragmentName,
451              const VkBuffer colorBuffer, const IVec2 &renderSize, const VkFormat colorFormat, const Vec4 &clearColor,
452              const VkPrimitiveTopology topology, SpecializationMap specMap = SpecializationMap())
453         : m_colorBuffer(colorBuffer)
454         , m_renderSize(renderSize)
455         , m_colorFormat(colorFormat)
456         , m_colorSubresourceRange(makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u))
457         , m_clearColor(clearColor)
458         , m_topology(topology)
459         , m_descriptorSetLayout(descriptorSetLayout)
460     {
461         m_colorImage =
462             makeImage(vk, device,
463                       makeImageCreateInfo(m_colorFormat, m_renderSize,
464                                           VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT));
465         m_colorImageAlloc = bindImage(vk, device, allocator, *m_colorImage, MemoryRequirement::Any);
466         m_colorAttachment =
467             makeImageView(vk, device, *m_colorImage, VK_IMAGE_VIEW_TYPE_2D, m_colorFormat, m_colorSubresourceRange);
468 
469         m_vertexModule   = createShaderModule(vk, device, binaryCollection.get(vertexName), 0u);
470         m_fragmentModule = createShaderModule(vk, device, binaryCollection.get(fragmentName), 0u);
471 
472         const VkPipelineShaderStageCreateInfo pShaderStages[] = {
473             {
474                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
475                 DE_NULL,                                             // const void* pNext;
476                 (VkPipelineShaderStageCreateFlags)0,                 // VkPipelineShaderStageCreateFlags flags;
477                 VK_SHADER_STAGE_VERTEX_BIT,                          // VkShaderStageFlagBits stage;
478                 *m_vertexModule,                                     // VkShaderModule module;
479                 "main",                                              // const char* pName;
480                 specMap[VK_SHADER_STAGE_VERTEX_BIT],                 // const VkSpecializationInfo* pSpecializationInfo;
481             },
482             {
483                 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
484                 DE_NULL,                                             // const void* pNext;
485                 (VkPipelineShaderStageCreateFlags)0,                 // VkPipelineShaderStageCreateFlags flags;
486                 VK_SHADER_STAGE_FRAGMENT_BIT,                        // VkShaderStageFlagBits stage;
487                 *m_fragmentModule,                                   // VkShaderModule module;
488                 "main",                                              // const char* pName;
489                 specMap[VK_SHADER_STAGE_FRAGMENT_BIT],               // const VkSpecializationInfo* pSpecializationInfo;
490             }};
491 
492         m_renderPass = makeRenderPass(vk, device, m_colorFormat);
493         m_framebuffer =
494             makeFramebuffer(vk, device, *m_renderPass, m_colorAttachment.get(), static_cast<uint32_t>(m_renderSize.x()),
495                             static_cast<uint32_t>(m_renderSize.y()));
496         m_pipelineLayout = makePipelineLayout(vk, device, m_descriptorSetLayout);
497         m_pipeline       = makeGraphicsPipeline(vk, device, *m_pipelineLayout, *m_renderPass, m_renderSize, m_topology,
498                                                 DE_LENGTH_OF_ARRAY(pShaderStages), pShaderStages);
499         m_cmdPool        = makeCommandPool(vk, device, queueFamilyIndex);
500         m_cmdBuffer      = allocateCommandBuffer(vk, device, *m_cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
501     }
502 
draw(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const Delegate & drawDelegate,const bool useDeviceGroups,const uint32_t deviceID) const503     void draw(const DeviceInterface &vk, const VkDevice device, const VkQueue queue, const Delegate &drawDelegate,
504               const bool useDeviceGroups, const uint32_t deviceID) const
505     {
506         beginCommandBuffer(vk, *m_cmdBuffer);
507 
508         beginRenderPass(vk, *m_cmdBuffer, *m_renderPass, *m_framebuffer,
509                         makeRect2D(0, 0, m_renderSize.x(), m_renderSize.y()), m_clearColor);
510 
511         vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
512         drawDelegate.rendererDraw(*m_pipelineLayout, *m_cmdBuffer);
513 
514         endRenderPass(vk, *m_cmdBuffer);
515 
516         copyImageToBuffer(vk, *m_cmdBuffer, *m_colorImage, m_colorBuffer, m_renderSize);
517 
518         endCommandBuffer(vk, *m_cmdBuffer);
519         submitCommandsAndWait(vk, device, queue, *m_cmdBuffer, 0U, DE_NULL, DE_NULL, 0U, DE_NULL, useDeviceGroups,
520                               deviceID);
521     }
522 
523 private:
524     const VkBuffer m_colorBuffer;
525     const IVec2 m_renderSize;
526     const VkFormat m_colorFormat;
527     const VkImageSubresourceRange m_colorSubresourceRange;
528     const Vec4 m_clearColor;
529     const VkPrimitiveTopology m_topology;
530     const VkDescriptorSetLayout m_descriptorSetLayout;
531 
532     Move<VkImage> m_colorImage;
533     MovePtr<Allocation> m_colorImageAlloc;
534     Move<VkImageView> m_colorAttachment;
535     Move<VkShaderModule> m_vertexModule;
536     Move<VkShaderModule> m_fragmentModule;
537     Move<VkRenderPass> m_renderPass;
538     Move<VkFramebuffer> m_framebuffer;
539     Move<VkPipelineLayout> m_pipelineLayout;
540     Move<VkPipeline> m_pipeline;
541     Move<VkCommandPool> m_cmdPool;
542     Move<VkCommandBuffer> m_cmdBuffer;
543 
544     // "deleted"
545     Renderer(const Renderer &);
546     Renderer &operator=(const Renderer &);
547 };
548 
bindSparseBuffer(const DeviceInterface & vk,const VkDevice device,const VkQueue sparseQueue,const VkBuffer buffer,const SparseAllocation & sparseAllocation,const bool useDeviceGroups,uint32_t resourceDevId,uint32_t memoryDeviceId)549 void bindSparseBuffer(const DeviceInterface &vk, const VkDevice device, const VkQueue sparseQueue,
550                       const VkBuffer buffer, const SparseAllocation &sparseAllocation, const bool useDeviceGroups,
551                       uint32_t resourceDevId, uint32_t memoryDeviceId)
552 {
553     const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo = {
554         buffer,                                                     // VkBuffer                     buffer;
555         static_cast<uint32_t>(sparseAllocation.memoryBinds.size()), // uint32_t                     bindCount;
556         &sparseAllocation.memoryBinds[0],                           // const VkSparseMemoryBind*    pBinds;
557     };
558 
559     const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo = {
560         VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO, //VkStructureType sType;
561         DE_NULL,                                         //const void* pNext;
562         resourceDevId,                                   //uint32_t resourceDeviceIndex;
563         memoryDeviceId,                                  //uint32_t memoryDeviceIndex;
564     };
565 
566     const VkBindSparseInfo bindInfo = {
567         VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,                  // VkStructureType                             sType;
568         useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL, // const void*                                 pNext;
569         0u,                          // uint32_t                                    waitSemaphoreCount;
570         DE_NULL,                     // const VkSemaphore*                          pWaitSemaphores;
571         1u,                          // uint32_t                                    bufferBindCount;
572         &sparseBufferMemoryBindInfo, // const VkSparseBufferMemoryBindInfo*         pBufferBinds;
573         0u,                          // uint32_t                                    imageOpaqueBindCount;
574         DE_NULL,                     // const VkSparseImageOpaqueMemoryBindInfo*    pImageOpaqueBinds;
575         0u,                          // uint32_t                                    imageBindCount;
576         DE_NULL,                     // const VkSparseImageMemoryBindInfo*          pImageBinds;
577         0u,                          // uint32_t                                    signalSemaphoreCount;
578         DE_NULL,                     // const VkSemaphore*                          pSignalSemaphores;
579     };
580 
581     const Unique<VkFence> fence(createFence(vk, device));
582 
583     VK_CHECK(vk.queueBindSparse(sparseQueue, 1u, &bindInfo, *fence));
584     VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), VK_TRUE, ~0ull));
585 }
586 
587 class SparseBufferTestInstance : public SparseResourcesBaseInstance, Renderer::Delegate
588 {
589 public:
SparseBufferTestInstance(Context & context,const TestFlags flags)590     SparseBufferTestInstance(Context &context, const TestFlags flags)
591         : SparseResourcesBaseInstance(context, (flags & TEST_FLAG_ENABLE_DEVICE_GROUPS) != 0)
592         , m_aliased((flags & TEST_FLAG_ALIASED) != 0)
593         , m_residency((flags & TEST_FLAG_RESIDENCY) != 0)
594         , m_nonResidentStrict((flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0)
595         , m_renderSize(RENDER_SIZE, RENDER_SIZE)
596         , m_colorFormat(VK_FORMAT_R8G8B8A8_UNORM)
597         , m_colorBufferSize(m_renderSize.x() * m_renderSize.y() * tcu::getPixelSize(mapVkFormat(m_colorFormat)))
598     {
599         {
600             QueueRequirementsVec requirements;
601             requirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
602             requirements.push_back(QueueRequirements(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 1u));
603 
604             createDeviceSupportingQueues(requirements);
605         }
606 
607         const DeviceInterface &vk = getDeviceInterface();
608 
609         m_sparseQueue    = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0u);
610         m_universalQueue = getQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0u);
611 
612         m_sharedQueueFamilyIndices[0] = m_sparseQueue.queueFamilyIndex;
613         m_sharedQueueFamilyIndices[1] = m_universalQueue.queueFamilyIndex;
614 
615         m_colorBuffer = makeBuffer(vk, getDevice(), m_colorBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
616         m_colorBufferAlloc =
617             bindBuffer(vk, getDevice(), getAllocator(), *m_colorBuffer, MemoryRequirement::HostVisible);
618 
619         deMemset(m_colorBufferAlloc->getHostPtr(), 0, static_cast<std::size_t>(m_colorBufferSize));
620         flushAlloc(vk, getDevice(), *m_colorBufferAlloc);
621     }
622 
623 protected:
getSparseBufferCreateInfo(const VkBufferUsageFlags usage) const624     VkBufferCreateInfo getSparseBufferCreateInfo(const VkBufferUsageFlags usage) const
625     {
626         VkBufferCreateFlags flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
627         if (m_residency)
628             flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
629         if (m_aliased)
630             flags |= VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
631 
632         VkBufferCreateInfo referenceBufferCreateInfo = {
633             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,     // VkStructureType        sType;
634             DE_NULL,                                  // const void*            pNext;
635             flags,                                    // VkBufferCreateFlags    flags;
636             0u,                                       // override later // VkDeviceSize           size;
637             VK_BUFFER_USAGE_TRANSFER_DST_BIT | usage, // VkBufferUsageFlags     usage;
638             VK_SHARING_MODE_EXCLUSIVE,                // VkSharingMode          sharingMode;
639             0u,                                       // uint32_t               queueFamilyIndexCount;
640             DE_NULL,                                  // const uint32_t*        pQueueFamilyIndices;
641         };
642 
643         if (m_sparseQueue.queueFamilyIndex != m_universalQueue.queueFamilyIndex)
644         {
645             referenceBufferCreateInfo.sharingMode           = VK_SHARING_MODE_CONCURRENT;
646             referenceBufferCreateInfo.queueFamilyIndexCount = DE_LENGTH_OF_ARRAY(m_sharedQueueFamilyIndices);
647             referenceBufferCreateInfo.pQueueFamilyIndices   = m_sharedQueueFamilyIndices;
648         }
649 
650         return referenceBufferCreateInfo;
651     }
652 
draw(const VkPrimitiveTopology topology,const VkDescriptorSetLayout descriptorSetLayout=DE_NULL,Renderer::SpecializationMap specMap=Renderer::SpecializationMap (),bool useDeviceGroups=false,uint32_t deviceID=0)653     void draw(const VkPrimitiveTopology topology, const VkDescriptorSetLayout descriptorSetLayout = DE_NULL,
654               Renderer::SpecializationMap specMap = Renderer::SpecializationMap(), bool useDeviceGroups = false,
655               uint32_t deviceID = 0)
656     {
657         const UniquePtr<Renderer> renderer(
658             new Renderer(getDeviceInterface(), getDevice(), getAllocator(), m_universalQueue.queueFamilyIndex,
659                          descriptorSetLayout, m_context.getBinaryCollection(), "vert", "frag", *m_colorBuffer,
660                          m_renderSize, m_colorFormat, Vec4(1.0f, 0.0f, 0.0f, 1.0f), topology, specMap));
661 
662         renderer->draw(getDeviceInterface(), getDevice(), m_universalQueue.queueHandle, *this, useDeviceGroups,
663                        deviceID);
664     }
665 
isResultImageCorrect(void) const666     bool isResultImageCorrect(void) const
667     {
668         invalidateAlloc(getDeviceInterface(), getDevice(), *m_colorBufferAlloc);
669 
670         const tcu::ConstPixelBufferAccess resultImage(mapVkFormat(m_colorFormat), m_renderSize.x(), m_renderSize.y(),
671                                                       1u, m_colorBufferAlloc->getHostPtr());
672 
673         m_context.getTestContext().getLog() << tcu::LogImageSet("Result", "Result")
674                                             << tcu::LogImage("color0", "", resultImage) << tcu::TestLog::EndImageSet;
675 
676         return !imageHasErrorPixels(resultImage);
677     }
678 
679     const bool m_aliased;
680     const bool m_residency;
681     const bool m_nonResidentStrict;
682 
683     Queue m_sparseQueue;
684     Queue m_universalQueue;
685 
686 private:
687     const IVec2 m_renderSize;
688     const VkFormat m_colorFormat;
689     const VkDeviceSize m_colorBufferSize;
690 
691     Move<VkBuffer> m_colorBuffer;
692     MovePtr<Allocation> m_colorBufferAlloc;
693 
694     uint32_t m_sharedQueueFamilyIndices[2];
695 };
696 
initProgramsDrawWithUBO(vk::SourceCollections & programCollection,const TestFlags flags)697 void initProgramsDrawWithUBO(vk::SourceCollections &programCollection, const TestFlags flags)
698 {
699     // Vertex shader
700     {
701         std::ostringstream src;
702         src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
703             << "\n"
704             << "layout(location = 0) in vec4 in_position;\n"
705             << "\n"
706             << "out gl_PerVertex {\n"
707             << "    vec4 gl_Position;\n"
708             << "};\n"
709             << "\n"
710             << "void main(void)\n"
711             << "{\n"
712             << "    gl_Position = in_position;\n"
713             << "}\n";
714 
715         programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
716     }
717 
718     // Fragment shader
719     {
720         const bool aliased           = (flags & TEST_FLAG_ALIASED) != 0;
721         const bool residency         = (flags & TEST_FLAG_RESIDENCY) != 0;
722         const bool nonResidentStrict = (flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0;
723         const std::string valueExpr =
724             (aliased ? "ivec4(3*(ndx % nonAliasedSize) ^ 127, 0, 0, 0)" : "ivec4(3*ndx ^ 127, 0, 0, 0)");
725 
726         std::ostringstream src;
727         src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
728             << "\n"
729             << "layout(location = 0) out vec4 o_color;\n"
730             << "\n"
731             << "layout(constant_id = 1) const int dataSize  = 1;\n"
732             << "layout(constant_id = 2) const int chunkSize = 1;\n"
733             << "\n"
734             << "layout(set = 0, binding = 0, std140) uniform SparseBuffer {\n"
735             << "    ivec4 data[dataSize];\n"
736             << "} ubo;\n"
737             << "\n"
738             << "void main(void)\n"
739             << "{\n"
740             << "    const int fragNdx        = int(gl_FragCoord.x) + " << RENDER_SIZE << " * int(gl_FragCoord.y);\n"
741             << "    const int pageSize       = " << RENDER_SIZE << " * " << RENDER_SIZE << ";\n"
742             << "    const int numChunks      = dataSize / chunkSize;\n";
743 
744         if (aliased)
745             src << "    const int nonAliasedSize = (numChunks > 1 ? dataSize - chunkSize : dataSize);\n";
746 
747         src << "    bool      ok             = true;\n"
748             << "\n"
749             << "    for (int ndx = fragNdx; ndx < dataSize; ndx += pageSize)\n"
750             << "    {\n";
751 
752         if (residency && nonResidentStrict)
753         {
754             src << "        if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
755                 << "            ok = ok && (ubo.data[ndx] == ivec4(0));\n"
756                 << "        else\n"
757                 << "            ok = ok && (ubo.data[ndx] == " + valueExpr + ");\n";
758         }
759         else if (residency)
760         {
761             src << "        if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
762                 << "            continue;\n"
763                 << "        ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
764         }
765         else
766             src << "        ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
767 
768         src << "    }\n"
769             << "\n"
770             << "    if (ok)\n"
771             << "        o_color = vec4(0.0, 1.0, 0.0, 1.0);\n"
772             << "    else\n"
773             << "        o_color = vec4(1.0, 0.0, 0.0, 1.0);\n"
774             << "}\n";
775 
776         programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
777     }
778 }
779 
780 //! Sparse buffer backing a UBO
781 class UBOTestInstance : public SparseBufferTestInstance
782 {
783 public:
UBOTestInstance(Context & context,const TestFlags flags)784     UBOTestInstance(Context &context, const TestFlags flags) : SparseBufferTestInstance(context, flags)
785     {
786     }
787 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const788     void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
789     {
790         const DeviceInterface &vk       = getDeviceInterface();
791         const VkDeviceSize vertexOffset = 0ull;
792 
793         vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
794         vk.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0u, 1u,
795                                  &m_descriptorSet.get(), 0u, DE_NULL);
796         vk.cmdDraw(cmdBuffer, 4u, 1u, 0u, 0u);
797     }
798 
iterate(void)799     tcu::TestStatus iterate(void)
800     {
801         const InstanceInterface &instance = m_context.getInstanceInterface();
802         const DeviceInterface &vk         = getDeviceInterface();
803         MovePtr<SparseAllocation> sparseAllocation;
804         Move<VkBuffer> sparseBuffer;
805         Move<VkBuffer> sparseBufferAliased;
806         bool setupDescriptors = true;
807 
808         // Go through all physical devices
809         for (uint32_t physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
810         {
811             const uint32_t firstDeviceID  = physDevID;
812             const uint32_t secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
813 
814             // Set up the sparse buffer
815             {
816                 VkBufferCreateInfo referenceBufferCreateInfo =
817                     getSparseBufferCreateInfo(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
818                 const VkDeviceSize minChunkSize = 512u; // make sure the smallest allocation is at least this big
819                 uint32_t numMaxChunks           = 0u;
820 
821                 // Check how many chunks we can allocate given the alignment and size requirements of UBOs
822                 {
823                     const UniquePtr<SparseAllocation> minAllocation(SparseAllocationBuilder().addMemoryBind().build(
824                         instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(),
825                         referenceBufferCreateInfo, minChunkSize));
826 
827                     numMaxChunks =
828                         deMaxu32(static_cast<uint32_t>(m_context.getDeviceProperties().limits.maxUniformBufferRange /
829                                                        minAllocation->resourceSize),
830                                  1u);
831                 }
832 
833                 if (numMaxChunks < 4)
834                 {
835                     sparseAllocation = SparseAllocationBuilder().addMemoryBind().build(
836                         instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(),
837                         referenceBufferCreateInfo, minChunkSize);
838                 }
839                 else
840                 {
841                     // Try to use a non-trivial memory allocation scheme to make it different from a non-sparse binding
842                     SparseAllocationBuilder builder;
843                     builder.addMemoryBind();
844 
845                     if (m_residency)
846                         builder.addResourceHole();
847 
848                     builder.addMemoryAllocation().addMemoryHole().addMemoryBind();
849 
850                     if (m_aliased)
851                         builder.addAliasedMemoryBind(0u, 0u);
852 
853                     sparseAllocation = builder.build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(),
854                                                      getAllocator(), referenceBufferCreateInfo, minChunkSize);
855                     DE_ASSERT(sparseAllocation->resourceSize <=
856                               m_context.getDeviceProperties().limits.maxUniformBufferRange);
857                 }
858 
859                 if (firstDeviceID != secondDeviceID)
860                 {
861                     VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
862                     vk.getDeviceGroupPeerMemoryFeatures(getDevice(), sparseAllocation->heapIndex, firstDeviceID,
863                                                         secondDeviceID, &peerMemoryFeatureFlags);
864 
865                     if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT) == 0) ||
866                         ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
867                     {
868                         TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
869                     }
870                 }
871 
872                 // Create the buffer
873                 referenceBufferCreateInfo.size = sparseAllocation->resourceSize;
874                 sparseBuffer                   = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
875                 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBuffer, *sparseAllocation,
876                                  usingDeviceGroups(), firstDeviceID, secondDeviceID);
877 
878                 if (m_aliased)
879                 {
880                     sparseBufferAliased = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
881                     bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBufferAliased,
882                                      *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
883                 }
884             }
885 
886             // Set uniform data
887             {
888                 const bool hasAliasedChunk   = (m_aliased && sparseAllocation->memoryBinds.size() > 1u);
889                 const VkDeviceSize chunkSize = sparseAllocation->resourceSize / sparseAllocation->numResourceChunks;
890                 const VkDeviceSize stagingBufferSize =
891                     sparseAllocation->resourceSize - (hasAliasedChunk ? chunkSize : 0);
892                 const uint32_t numBufferEntries = static_cast<uint32_t>(stagingBufferSize / sizeof(IVec4));
893 
894                 const Unique<VkBuffer> stagingBuffer(
895                     makeBuffer(vk, getDevice(), stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT));
896                 const UniquePtr<Allocation> stagingBufferAlloc(
897                     bindBuffer(vk, getDevice(), getAllocator(), *stagingBuffer, MemoryRequirement::HostVisible));
898 
899                 {
900                     // If aliased chunk is used, the staging buffer is smaller than the sparse buffer and we don't overwrite the last chunk
901                     IVec4 *const pData = static_cast<IVec4 *>(stagingBufferAlloc->getHostPtr());
902                     for (uint32_t i = 0; i < numBufferEntries; ++i)
903                         pData[i] = IVec4(3 * i ^ 127, 0, 0, 0);
904 
905                     flushAlloc(vk, getDevice(), *stagingBufferAlloc);
906 
907                     const VkBufferCopy copyRegion = {
908                         0ull,              // VkDeviceSize    srcOffset;
909                         0ull,              // VkDeviceSize    dstOffset;
910                         stagingBufferSize, // VkDeviceSize    size;
911                     };
912 
913                     const Unique<VkCommandPool> cmdPool(
914                         makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
915                     const Unique<VkCommandBuffer> cmdBuffer(
916                         allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
917 
918                     beginCommandBuffer(vk, *cmdBuffer);
919                     vk.cmdCopyBuffer(*cmdBuffer, *stagingBuffer, *sparseBuffer, 1u, &copyRegion);
920                     endCommandBuffer(vk, *cmdBuffer);
921 
922                     submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL,
923                                           DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
924                     // Once the fence is signaled, the write is also available to the aliasing buffer.
925                 }
926             }
927 
928             // Make sure that we don't try to access a larger range than is allowed. This only applies to a single chunk case.
929             const uint32_t maxBufferRange = deMinu32(static_cast<uint32_t>(sparseAllocation->resourceSize),
930                                                      m_context.getDeviceProperties().limits.maxUniformBufferRange);
931 
932             // Descriptor sets
933             {
934                 // Setup only once
935                 if (setupDescriptors)
936                 {
937                     m_descriptorSetLayout =
938                         DescriptorSetLayoutBuilder()
939                             .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT)
940                             .build(vk, getDevice());
941 
942                     m_descriptorPool =
943                         DescriptorPoolBuilder()
944                             .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
945                             .build(vk, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
946 
947                     m_descriptorSet  = makeDescriptorSet(vk, getDevice(), *m_descriptorPool, *m_descriptorSetLayout);
948                     setupDescriptors = false;
949                 }
950 
951                 const VkBuffer buffer                         = (m_aliased ? *sparseBufferAliased : *sparseBuffer);
952                 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(buffer, 0ull, maxBufferRange);
953 
954                 DescriptorSetUpdateBuilder()
955                     .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
956                                  VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &sparseBufferInfo)
957                     .update(vk, getDevice());
958             }
959 
960             // Vertex data
961             {
962                 const Vec4 vertexData[] = {
963                     Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
964                     Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
965                     Vec4(1.0f, -1.0f, 0.0f, 1.0f),
966                     Vec4(1.0f, 1.0f, 0.0f, 1.0f),
967                 };
968 
969                 const VkDeviceSize vertexBufferSize = sizeof(vertexData);
970 
971                 m_vertexBuffer = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
972                 m_vertexBufferAlloc =
973                     bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
974 
975                 deMemcpy(m_vertexBufferAlloc->getHostPtr(), &vertexData[0], vertexBufferSize);
976                 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
977             }
978 
979             // Draw
980             {
981                 std::vector<int32_t> specializationData;
982                 {
983                     const uint32_t numBufferEntries   = maxBufferRange / static_cast<uint32_t>(sizeof(IVec4));
984                     const uint32_t numEntriesPerChunk = numBufferEntries / sparseAllocation->numResourceChunks;
985 
986                     specializationData.push_back(numBufferEntries);
987                     specializationData.push_back(numEntriesPerChunk);
988                 }
989 
990                 const VkSpecializationMapEntry specMapEntries[] = {
991                     {
992                         1u,              // uint32_t    constantID;
993                         0u,              // uint32_t    offset;
994                         sizeof(int32_t), // size_t      size;
995                     },
996                     {
997                         2u,              // uint32_t    constantID;
998                         sizeof(int32_t), // uint32_t    offset;
999                         sizeof(int32_t), // size_t      size;
1000                     },
1001                 };
1002 
1003                 const VkSpecializationInfo specInfo = {
1004                     DE_LENGTH_OF_ARRAY(specMapEntries),   // uint32_t                           mapEntryCount;
1005                     specMapEntries,                       // const VkSpecializationMapEntry*    pMapEntries;
1006                     sizeInBytes(specializationData),      // size_t                             dataSize;
1007                     getDataOrNullptr(specializationData), // const void*                        pData;
1008                 };
1009 
1010                 Renderer::SpecializationMap specMap;
1011                 specMap[VK_SHADER_STAGE_FRAGMENT_BIT] = &specInfo;
1012 
1013                 draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, *m_descriptorSetLayout, specMap, usingDeviceGroups(),
1014                      firstDeviceID);
1015             }
1016 
1017             if (!isResultImageCorrect())
1018                 return tcu::TestStatus::fail("Some buffer values were incorrect");
1019         }
1020         return tcu::TestStatus::pass("Pass");
1021     }
1022 
1023 private:
1024     Move<VkBuffer> m_vertexBuffer;
1025     MovePtr<Allocation> m_vertexBufferAlloc;
1026 
1027     Move<VkDescriptorSetLayout> m_descriptorSetLayout;
1028     Move<VkDescriptorPool> m_descriptorPool;
1029     Move<VkDescriptorSet> m_descriptorSet;
1030 };
1031 
initProgramsDrawGrid(vk::SourceCollections & programCollection,const TestFlags flags)1032 void initProgramsDrawGrid(vk::SourceCollections &programCollection, const TestFlags flags)
1033 {
1034     DE_UNREF(flags);
1035 
1036     // Vertex shader
1037     {
1038         std::ostringstream src;
1039         src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1040             << "\n"
1041             << "layout(location = 0) in  vec4 in_position;\n"
1042             << "layout(location = 0) out int  out_ndx;\n"
1043             << "\n"
1044             << "out gl_PerVertex {\n"
1045             << "    vec4 gl_Position;\n"
1046             << "};\n"
1047             << "\n"
1048             << "void main(void)\n"
1049             << "{\n"
1050             << "    gl_Position = in_position;\n"
1051             << "    out_ndx     = gl_VertexIndex;\n"
1052             << "}\n";
1053 
1054         programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
1055     }
1056 
1057     // Fragment shader
1058     {
1059         std::ostringstream src;
1060         src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1061             << "\n"
1062             << "layout(location = 0) flat in  int  in_ndx;\n"
1063             << "layout(location = 0)      out vec4 o_color;\n"
1064             << "\n"
1065             << "void main(void)\n"
1066             << "{\n"
1067             << "    if (in_ndx % 2 == 0)\n"
1068             << "        o_color = vec4(vec3(1.0), 1.0);\n"
1069             << "    else\n"
1070             << "        o_color = vec4(vec3(0.75), 1.0);\n"
1071             << "}\n";
1072 
1073         programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
1074     }
1075 }
1076 
1077 //! Generate vertex positions for a grid of tiles composed of two triangles each (6 vertices)
generateGrid(void * pRawData,const float step,const float ox,const float oy,const uint32_t numX,const uint32_t numY,const float z=0.0f)1078 void generateGrid(void *pRawData, const float step, const float ox, const float oy, const uint32_t numX,
1079                   const uint32_t numY, const float z = 0.0f)
1080 {
1081     typedef Vec4(*TilePtr)[6];
1082 
1083     TilePtr const pData = static_cast<TilePtr>(pRawData);
1084     {
1085         for (uint32_t iy = 0; iy < numY; ++iy)
1086             for (uint32_t ix = 0; ix < numX; ++ix)
1087             {
1088                 const uint32_t ndx = ix + numX * iy;
1089                 const float x      = ox + step * static_cast<float>(ix);
1090                 const float y      = oy + step * static_cast<float>(iy);
1091 
1092                 pData[ndx][0] = Vec4(x + step, y, z, 1.0f);
1093                 pData[ndx][1] = Vec4(x, y, z, 1.0f);
1094                 pData[ndx][2] = Vec4(x, y + step, z, 1.0f);
1095 
1096                 pData[ndx][3] = Vec4(x, y + step, z, 1.0f);
1097                 pData[ndx][4] = Vec4(x + step, y + step, z, 1.0f);
1098                 pData[ndx][5] = Vec4(x + step, y, z, 1.0f);
1099             }
1100     }
1101 }
1102 
1103 //! Base test for a sparse buffer backing a vertex/index buffer
1104 class DrawGridTestInstance : public SparseBufferTestInstance
1105 {
1106 public:
DrawGridTestInstance(Context & context,const TestFlags flags,const VkBufferUsageFlags usage,const VkDeviceSize minChunkSize)1107     DrawGridTestInstance(Context &context, const TestFlags flags, const VkBufferUsageFlags usage,
1108                          const VkDeviceSize minChunkSize)
1109         : SparseBufferTestInstance(context, flags)
1110         , m_bufferUsage(usage)
1111         , m_minChunkSize(minChunkSize)
1112         , m_perDrawBufferOffset(0)
1113         , m_stagingBufferSize(0)
1114     {
1115     }
1116 
createResources(uint32_t memoryDeviceIndex)1117     void createResources(uint32_t memoryDeviceIndex)
1118     {
1119         const InstanceInterface &instance            = m_context.getInstanceInterface();
1120         const DeviceInterface &vk                    = getDeviceInterface();
1121         VkBufferCreateInfo referenceBufferCreateInfo = getSparseBufferCreateInfo(m_bufferUsage);
1122 
1123         {
1124             // Allocate two chunks, each covering half of the viewport
1125             SparseAllocationBuilder builder;
1126             builder.addMemoryBind();
1127 
1128             if (m_residency)
1129                 builder.addResourceHole();
1130 
1131             builder.addMemoryAllocation().addMemoryHole().addMemoryBind();
1132 
1133             if (m_aliased)
1134                 builder.addAliasedMemoryBind(0u, 0u);
1135 
1136             m_sparseAllocation = builder.build(instance, getPhysicalDevice(memoryDeviceIndex), vk, getDevice(),
1137                                                getAllocator(), referenceBufferCreateInfo, m_minChunkSize);
1138         }
1139 
1140         // Create the buffer
1141         referenceBufferCreateInfo.size = m_sparseAllocation->resourceSize;
1142         m_sparseBuffer                 = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
1143 
1144         m_perDrawBufferOffset = m_sparseAllocation->resourceSize / m_sparseAllocation->numResourceChunks;
1145         m_stagingBufferSize   = 2 * m_perDrawBufferOffset;
1146         m_stagingBuffer       = makeBuffer(vk, getDevice(), m_stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
1147         m_stagingBufferAlloc =
1148             bindBuffer(vk, getDevice(), getAllocator(), *m_stagingBuffer, MemoryRequirement::HostVisible);
1149     }
1150 
iterate(void)1151     tcu::TestStatus iterate(void)
1152     {
1153         const DeviceInterface &vk = getDeviceInterface();
1154 
1155         for (uint32_t physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
1156         {
1157             const uint32_t firstDeviceID  = physDevID;
1158             const uint32_t secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
1159 
1160             createResources(secondDeviceID);
1161 
1162             if (firstDeviceID != secondDeviceID)
1163             {
1164                 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
1165                 vk.getDeviceGroupPeerMemoryFeatures(getDevice(), m_sparseAllocation->heapIndex, firstDeviceID,
1166                                                     secondDeviceID, &peerMemoryFeatureFlags);
1167 
1168                 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT) == 0) ||
1169                     ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
1170                 {
1171                     TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
1172                 }
1173             }
1174 
1175             // Bind the memory
1176             bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *m_sparseBuffer, *m_sparseAllocation,
1177                              usingDeviceGroups(), firstDeviceID, secondDeviceID);
1178 
1179             initializeBuffers();
1180 
1181             // Upload to the sparse buffer
1182             {
1183                 flushAlloc(vk, getDevice(), *m_stagingBufferAlloc);
1184 
1185                 VkDeviceSize firstChunkOffset  = 0ull;
1186                 VkDeviceSize secondChunkOffset = m_perDrawBufferOffset;
1187 
1188                 if (m_residency)
1189                     secondChunkOffset += m_perDrawBufferOffset;
1190 
1191                 if (m_aliased)
1192                     firstChunkOffset = secondChunkOffset + m_perDrawBufferOffset;
1193 
1194                 const VkBufferCopy copyRegions[] = {
1195                     {
1196                         0ull,                  // VkDeviceSize    srcOffset;
1197                         firstChunkOffset,      // VkDeviceSize    dstOffset;
1198                         m_perDrawBufferOffset, // VkDeviceSize    size;
1199                     },
1200                     {
1201                         m_perDrawBufferOffset, // VkDeviceSize    srcOffset;
1202                         secondChunkOffset,     // VkDeviceSize    dstOffset;
1203                         m_perDrawBufferOffset, // VkDeviceSize    size;
1204                     },
1205                 };
1206 
1207                 const Unique<VkCommandPool> cmdPool(
1208                     makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
1209                 const Unique<VkCommandBuffer> cmdBuffer(
1210                     allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1211 
1212                 beginCommandBuffer(vk, *cmdBuffer);
1213                 vk.cmdCopyBuffer(*cmdBuffer, *m_stagingBuffer, *m_sparseBuffer, DE_LENGTH_OF_ARRAY(copyRegions),
1214                                  copyRegions);
1215                 endCommandBuffer(vk, *cmdBuffer);
1216 
1217                 submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL,
1218                                       0, DE_NULL, usingDeviceGroups(), firstDeviceID);
1219             }
1220 
1221             Renderer::SpecializationMap specMap;
1222             draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, DE_NULL, specMap, usingDeviceGroups(), firstDeviceID);
1223 
1224             if (!isResultImageCorrect())
1225                 return tcu::TestStatus::fail("Some buffer values were incorrect");
1226         }
1227         return tcu::TestStatus::pass("Pass");
1228     }
1229 
1230 protected:
1231     virtual void initializeBuffers(void) = 0;
1232 
1233     const VkBufferUsageFlags m_bufferUsage;
1234     const VkDeviceSize m_minChunkSize;
1235 
1236     VkDeviceSize m_perDrawBufferOffset;
1237 
1238     VkDeviceSize m_stagingBufferSize;
1239     Move<VkBuffer> m_stagingBuffer;
1240     MovePtr<Allocation> m_stagingBufferAlloc;
1241 
1242     MovePtr<SparseAllocation> m_sparseAllocation;
1243     Move<VkBuffer> m_sparseBuffer;
1244 };
1245 
1246 //! Sparse buffer backing a vertex input buffer
1247 class VertexBufferTestInstance : public DrawGridTestInstance
1248 {
1249 public:
VertexBufferTestInstance(Context & context,const TestFlags flags)1250     VertexBufferTestInstance(Context &context, const TestFlags flags)
1251         : DrawGridTestInstance(context, flags, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
1252                                GRID_SIZE * GRID_SIZE * 6 * sizeof(Vec4))
1253     {
1254     }
1255 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1256     void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1257     {
1258         DE_UNREF(pipelineLayout);
1259 
1260         m_context.getTestContext().getLog()
1261             << tcu::TestLog::Message
1262             << "Drawing a grid of triangles backed by a sparse vertex buffer. There should be no red pixels visible."
1263             << tcu::TestLog::EndMessage;
1264 
1265         const DeviceInterface &vk  = getDeviceInterface();
1266         const uint32_t vertexCount = 6 * (GRID_SIZE * GRID_SIZE) / 2;
1267         VkDeviceSize vertexOffset  = 0ull;
1268 
1269         vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1270         vk.cmdDraw(cmdBuffer, vertexCount, 1u, 0u, 0u);
1271 
1272         vertexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1273 
1274         vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1275         vk.cmdDraw(cmdBuffer, vertexCount, 1u, 0u, 0u);
1276     }
1277 
initializeBuffers(void)1278     void initializeBuffers(void)
1279     {
1280         uint8_t *pData   = static_cast<uint8_t *>(m_stagingBufferAlloc->getHostPtr());
1281         const float step = 2.0f / static_cast<float>(GRID_SIZE);
1282 
1283         // Prepare data for two draw calls
1284         generateGrid(pData, step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE / 2);
1285         generateGrid(pData + m_perDrawBufferOffset, step, -1.0f, 0.0f, GRID_SIZE, GRID_SIZE / 2);
1286     }
1287 };
1288 
1289 //! Sparse buffer backing an index buffer
1290 class IndexBufferTestInstance : public DrawGridTestInstance
1291 {
1292 public:
IndexBufferTestInstance(Context & context,const TestFlags flags)1293     IndexBufferTestInstance(Context &context, const TestFlags flags)
1294         : DrawGridTestInstance(context, flags, VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
1295                                GRID_SIZE * GRID_SIZE * 6 * sizeof(uint32_t))
1296         , m_halfVertexCount(6 * (GRID_SIZE * GRID_SIZE) / 2)
1297     {
1298     }
1299 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1300     void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1301     {
1302         DE_UNREF(pipelineLayout);
1303 
1304         m_context.getTestContext().getLog()
1305             << tcu::TestLog::Message
1306             << "Drawing a grid of triangles from a sparse index buffer. There should be no red pixels visible."
1307             << tcu::TestLog::EndMessage;
1308 
1309         const DeviceInterface &vk       = getDeviceInterface();
1310         const VkDeviceSize vertexOffset = 0ull;
1311         VkDeviceSize indexOffset        = 0ull;
1312 
1313         vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1314 
1315         vk.cmdBindIndexBuffer(cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1316         vk.cmdDrawIndexed(cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1317 
1318         indexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1319 
1320         vk.cmdBindIndexBuffer(cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1321         vk.cmdDrawIndexed(cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1322     }
1323 
initializeBuffers(void)1324     void initializeBuffers(void)
1325     {
1326         // Vertex buffer
1327         const DeviceInterface &vk           = getDeviceInterface();
1328         const VkDeviceSize vertexBufferSize = 2 * m_halfVertexCount * sizeof(Vec4);
1329         m_vertexBuffer = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1330         m_vertexBufferAlloc =
1331             bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1332 
1333         {
1334             const float step = 2.0f / static_cast<float>(GRID_SIZE);
1335 
1336             generateGrid(m_vertexBufferAlloc->getHostPtr(), step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE);
1337 
1338             flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1339         }
1340 
1341         // Sparse index buffer
1342         for (uint32_t chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1343         {
1344             uint8_t *const pData =
1345                 static_cast<uint8_t *>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1346             uint32_t *const pIndexData = reinterpret_cast<uint32_t *>(pData);
1347             const uint32_t ndxBase     = chunkNdx * m_halfVertexCount;
1348 
1349             for (uint32_t i = 0u; i < m_halfVertexCount; ++i)
1350                 pIndexData[i] = ndxBase + i;
1351         }
1352     }
1353 
1354 private:
1355     const uint32_t m_halfVertexCount;
1356     Move<VkBuffer> m_vertexBuffer;
1357     MovePtr<Allocation> m_vertexBufferAlloc;
1358 };
1359 
1360 //! Draw from a sparse indirect buffer
1361 class IndirectBufferTestInstance : public DrawGridTestInstance
1362 {
1363 public:
IndirectBufferTestInstance(Context & context,const TestFlags flags)1364     IndirectBufferTestInstance(Context &context, const TestFlags flags)
1365         : DrawGridTestInstance(context, flags, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, sizeof(VkDrawIndirectCommand))
1366     {
1367     }
1368 
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1369     void rendererDraw(const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1370     {
1371         DE_UNREF(pipelineLayout);
1372 
1373         m_context.getTestContext().getLog()
1374             << tcu::TestLog::Message
1375             << "Drawing two triangles covering the whole viewport. There should be no red pixels visible."
1376             << tcu::TestLog::EndMessage;
1377 
1378         const DeviceInterface &vk       = getDeviceInterface();
1379         const VkDeviceSize vertexOffset = 0ull;
1380         VkDeviceSize indirectOffset     = 0ull;
1381 
1382         vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1383         vk.cmdDrawIndirect(cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1384 
1385         indirectOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1386 
1387         vk.cmdDrawIndirect(cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1388     }
1389 
initializeBuffers(void)1390     void initializeBuffers(void)
1391     {
1392         // Vertex buffer
1393         const DeviceInterface &vk           = getDeviceInterface();
1394         const VkDeviceSize vertexBufferSize = 2 * 3 * sizeof(Vec4);
1395         m_vertexBuffer = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1396         m_vertexBufferAlloc =
1397             bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1398 
1399         {
1400             generateGrid(m_vertexBufferAlloc->getHostPtr(), 2.0f, -1.0f, -1.0f, 1, 1);
1401             flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1402         }
1403 
1404         // Indirect buffer
1405         for (uint32_t chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1406         {
1407             uint8_t *const pData =
1408                 static_cast<uint8_t *>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1409             VkDrawIndirectCommand *const pCmdData = reinterpret_cast<VkDrawIndirectCommand *>(pData);
1410 
1411             pCmdData->firstVertex   = 3u * chunkNdx;
1412             pCmdData->firstInstance = 0u;
1413             pCmdData->vertexCount   = 3u;
1414             pCmdData->instanceCount = 1u;
1415         }
1416     }
1417 
1418 private:
1419     Move<VkBuffer> m_vertexBuffer;
1420     MovePtr<Allocation> m_vertexBufferAlloc;
1421 };
1422 
1423 //! Similar to the class in vktTestCaseUtil.hpp, but uses Arg0 directly rather than through a InstanceFunction1
1424 template <typename Arg0>
1425 class FunctionProgramsSimple1
1426 {
1427 public:
1428     typedef void (*Function)(vk::SourceCollections &dst, Arg0 arg0);
FunctionProgramsSimple1(Function func)1429     FunctionProgramsSimple1(Function func) : m_func(func)
1430     {
1431     }
init(vk::SourceCollections & dst,const Arg0 & arg0) const1432     void init(vk::SourceCollections &dst, const Arg0 &arg0) const
1433     {
1434         m_func(dst, arg0);
1435     }
1436 
1437 private:
1438     const Function m_func;
1439 };
1440 
checkSupport(Context & context,const TestFlags flags)1441 void checkSupport(Context &context, const TestFlags flags)
1442 {
1443     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
1444 
1445     if (flags & TEST_FLAG_RESIDENCY)
1446         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_BUFFER);
1447 
1448     if (flags & TEST_FLAG_ALIASED)
1449         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_ALIASED);
1450 
1451     if (flags & TEST_FLAG_NON_RESIDENT_STRICT &&
1452         !context.getDeviceProperties().sparseProperties.residencyNonResidentStrict)
1453         TCU_THROW(NotSupportedError, "Missing sparse property: residencyNonResidentStrict");
1454 }
1455 
1456 //! Convenience function to create a TestCase based on a freestanding initPrograms and a TestInstance implementation
1457 template <typename TestInstanceT, typename Arg0>
createTestInstanceWithPrograms(tcu::TestContext & testCtx,const std::string & name,typename FunctionProgramsSimple1<Arg0>::Function initPrograms,Arg0 arg0)1458 TestCase *createTestInstanceWithPrograms(tcu::TestContext &testCtx, const std::string &name,
1459                                          typename FunctionProgramsSimple1<Arg0>::Function initPrograms, Arg0 arg0)
1460 {
1461     return new InstanceFactory1WithSupport<TestInstanceT, Arg0, FunctionSupport1<Arg0>, FunctionProgramsSimple1<Arg0>>(
1462         testCtx, name, FunctionProgramsSimple1<Arg0>(initPrograms), arg0,
1463         typename FunctionSupport1<Arg0>::Args(checkSupport, arg0));
1464 }
1465 
populateTestGroup(tcu::TestCaseGroup * parentGroup)1466 void populateTestGroup(tcu::TestCaseGroup *parentGroup)
1467 {
1468     const struct
1469     {
1470         std::string name;
1471         TestFlags flags;
1472     } groups[] = {
1473         {
1474             "sparse_binding",
1475             0u,
1476         },
1477         {
1478             "sparse_binding_aliased",
1479             TEST_FLAG_ALIASED,
1480         },
1481         {
1482             "sparse_residency",
1483             TEST_FLAG_RESIDENCY,
1484         },
1485         {
1486             "sparse_residency_aliased",
1487             TEST_FLAG_RESIDENCY | TEST_FLAG_ALIASED,
1488         },
1489         {
1490             "sparse_residency_non_resident_strict",
1491             TEST_FLAG_RESIDENCY | TEST_FLAG_NON_RESIDENT_STRICT,
1492         },
1493     };
1494 
1495     const int numGroupsIncludingNonResidentStrict = DE_LENGTH_OF_ARRAY(groups);
1496     const int numGroupsDefaultList                = numGroupsIncludingNonResidentStrict - 1;
1497     std::string devGroupPrefix                    = "device_group_";
1498 
1499     // Transfer
1500     {
1501         MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "transfer"));
1502         {
1503             MovePtr<tcu::TestCaseGroup> subGroup(
1504                 new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding"));
1505             addBufferSparseBindingTests(subGroup.get(), false);
1506             group->addChild(subGroup.release());
1507 
1508             MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(
1509                 new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding"));
1510             addBufferSparseBindingTests(subGroupDeviceGroups.get(), true);
1511             group->addChild(subGroupDeviceGroups.release());
1512         }
1513         parentGroup->addChild(group.release());
1514     }
1515 
1516     // SSBO
1517     {
1518         MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ssbo"));
1519         {
1520             MovePtr<tcu::TestCaseGroup> subGroup(
1521                 new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding_aliased"));
1522             addBufferSparseMemoryAliasingTests(subGroup.get(), false);
1523             group->addChild(subGroup.release());
1524 
1525             MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(
1526                 new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding_aliased"));
1527             addBufferSparseMemoryAliasingTests(subGroupDeviceGroups.get(), true);
1528             group->addChild(subGroupDeviceGroups.release());
1529         }
1530         {
1531             MovePtr<tcu::TestCaseGroup> subGroup(
1532                 new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_residency"));
1533             addBufferSparseResidencyTests(subGroup.get(), false);
1534             group->addChild(subGroup.release());
1535 
1536             MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(
1537                 new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_residency"));
1538             addBufferSparseResidencyTests(subGroupDeviceGroups.get(), true);
1539             group->addChild(subGroupDeviceGroups.release());
1540         }
1541         parentGroup->addChild(group.release());
1542     }
1543 
1544     // UBO
1545     {
1546         MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ubo"));
1547 
1548         for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1549         {
1550             group->addChild(
1551                 createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(),
1552                                                                 initProgramsDrawWithUBO, groups[groupNdx].flags));
1553         }
1554         for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1555         {
1556             group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(
1557                 group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), initProgramsDrawWithUBO,
1558                 groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1559         }
1560         parentGroup->addChild(group.release());
1561     }
1562 
1563     // Vertex buffer
1564     {
1565         MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "vertex_buffer"));
1566 
1567         for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1568         {
1569             group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(
1570                 group->getTestContext(), groups[groupNdx].name.c_str(), initProgramsDrawGrid, groups[groupNdx].flags));
1571         }
1572         for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1573         {
1574             group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(
1575                 group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), initProgramsDrawGrid,
1576                 groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1577         }
1578 
1579         parentGroup->addChild(group.release());
1580     }
1581 
1582     // Index buffer
1583     {
1584         MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "index_buffer"));
1585 
1586         for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1587         {
1588             group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(
1589                 group->getTestContext(), groups[groupNdx].name.c_str(), initProgramsDrawGrid, groups[groupNdx].flags));
1590         }
1591         for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1592         {
1593             group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(
1594                 group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), initProgramsDrawGrid,
1595                 groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1596         }
1597 
1598         parentGroup->addChild(group.release());
1599     }
1600 
1601     // Indirect buffer
1602     {
1603         MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "indirect_buffer"));
1604 
1605         for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1606         {
1607             group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(
1608                 group->getTestContext(), groups[groupNdx].name.c_str(), initProgramsDrawGrid, groups[groupNdx].flags));
1609         }
1610         for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1611         {
1612             group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(
1613                 group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), initProgramsDrawGrid,
1614                 groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1615         }
1616 
1617         parentGroup->addChild(group.release());
1618     }
1619 }
1620 
1621 } // namespace
1622 
createSparseBufferTests(tcu::TestContext & testCtx)1623 tcu::TestCaseGroup *createSparseBufferTests(tcu::TestContext &testCtx)
1624 {
1625     return createTestGroup(testCtx, "buffer", populateTestGroup);
1626 }
1627 
1628 } // namespace sparse
1629 } // namespace vkt
1630