1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file  vktSparseResourcesBufferMemoryAliasing.cpp
21  * \brief Sparse buffer memory aliasing tests
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28 
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 #include "vkCmdUtil.hpp"
41 #include "vkObjUtil.hpp"
42 
43 #include "deStringUtil.hpp"
44 #include "deUniquePtr.hpp"
45 
46 #include <string>
47 #include <vector>
48 
49 using namespace vk;
50 
51 namespace vkt
52 {
53 namespace sparse
54 {
55 namespace
56 {
57 
58 enum ShaderParameters
59 {
60     SIZE_OF_UINT_IN_SHADER = 4u,
61     MODULO_DIVISOR         = 1024u
62 };
63 
computeWorkGroupSize(const uint32_t numInvocations)64 tcu::UVec3 computeWorkGroupSize(const uint32_t numInvocations)
65 {
66     const uint32_t maxComputeWorkGroupInvocations = 128u;
67     const tcu::UVec3 maxComputeWorkGroupSize      = tcu::UVec3(128u, 128u, 64u);
68     uint32_t numInvocationsLeft                   = numInvocations;
69 
70     const uint32_t xWorkGroupSize =
71         std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
72     numInvocationsLeft = numInvocationsLeft / xWorkGroupSize + ((numInvocationsLeft % xWorkGroupSize) ? 1u : 0u);
73 
74     const uint32_t yWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.y()),
75                                              maxComputeWorkGroupInvocations / xWorkGroupSize);
76     numInvocationsLeft = numInvocationsLeft / yWorkGroupSize + ((numInvocationsLeft % yWorkGroupSize) ? 1u : 0u);
77 
78     const uint32_t zWorkGroupSize = std::min(std::min(numInvocationsLeft, maxComputeWorkGroupSize.z()),
79                                              maxComputeWorkGroupInvocations / (xWorkGroupSize * yWorkGroupSize));
80     numInvocationsLeft = numInvocationsLeft / zWorkGroupSize + ((numInvocationsLeft % zWorkGroupSize) ? 1u : 0u);
81 
82     return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
83 }
84 
85 class BufferSparseMemoryAliasingCase : public TestCase
86 {
87 public:
88     BufferSparseMemoryAliasingCase(tcu::TestContext &testCtx, const std::string &name, const uint32_t bufferSize,
89                                    const glu::GLSLVersion glslVersion, const bool useDeviceGroups);
90 
91     void initPrograms(SourceCollections &sourceCollections) const;
92     TestInstance *createInstance(Context &context) const;
93     virtual void checkSupport(Context &context) const;
94 
95 private:
96     const uint32_t m_bufferSizeInBytes;
97     const glu::GLSLVersion m_glslVersion;
98     const bool m_useDeviceGroups;
99 };
100 
BufferSparseMemoryAliasingCase(tcu::TestContext & testCtx,const std::string & name,const uint32_t bufferSize,const glu::GLSLVersion glslVersion,const bool useDeviceGroups)101 BufferSparseMemoryAliasingCase::BufferSparseMemoryAliasingCase(tcu::TestContext &testCtx, const std::string &name,
102                                                                const uint32_t bufferSize,
103                                                                const glu::GLSLVersion glslVersion,
104                                                                const bool useDeviceGroups)
105     : TestCase(testCtx, name)
106     , m_bufferSizeInBytes(bufferSize)
107     , m_glslVersion(glslVersion)
108     , m_useDeviceGroups(useDeviceGroups)
109 {
110 }
111 
checkSupport(Context & context) const112 void BufferSparseMemoryAliasingCase::checkSupport(Context &context) const
113 {
114     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
115     context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_ALIASED);
116 }
117 
initPrograms(SourceCollections & sourceCollections) const118 void BufferSparseMemoryAliasingCase::initPrograms(SourceCollections &sourceCollections) const
119 {
120     // Create compute program
121     const char *const versionDecl  = glu::getGLSLVersionDeclaration(m_glslVersion);
122     const uint32_t numInvocations  = m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER;
123     const tcu::UVec3 workGroupSize = computeWorkGroupSize(numInvocations);
124 
125     std::ostringstream src;
126     src << versionDecl << "\n"
127         << "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y()
128         << ", local_size_z = " << workGroupSize.z() << ") in;\n"
129         << "layout(set = 0, binding = 0, std430) writeonly buffer Output\n"
130         << "{\n"
131         << "    uint result[];\n"
132         << "} sb_out;\n"
133         << "\n"
134         << "void main (void)\n"
135         << "{\n"
136         << "    uint index = gl_GlobalInvocationID.x + (gl_GlobalInvocationID.y + "
137            "gl_GlobalInvocationID.z*gl_NumWorkGroups.y*gl_WorkGroupSize.y)*gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
138         << "    if ( index < " << m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER << "u )\n"
139         << "    {\n"
140         << "        sb_out.result[index] = index % " << MODULO_DIVISOR << "u;\n"
141         << "    }\n"
142         << "}\n";
143 
144     sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
145 }
146 
147 class BufferSparseMemoryAliasingInstance : public SparseResourcesBaseInstance
148 {
149 public:
150     BufferSparseMemoryAliasingInstance(Context &context, const uint32_t bufferSize, const bool useDeviceGroups);
151 
152     tcu::TestStatus iterate(void);
153 
154 private:
155     const uint32_t m_bufferSizeInBytes;
156     const uint32_t m_useDeviceGroups;
157 };
158 
BufferSparseMemoryAliasingInstance(Context & context,const uint32_t bufferSize,const bool useDeviceGroups)159 BufferSparseMemoryAliasingInstance::BufferSparseMemoryAliasingInstance(Context &context, const uint32_t bufferSize,
160                                                                        const bool useDeviceGroups)
161     : SparseResourcesBaseInstance(context, useDeviceGroups)
162     , m_bufferSizeInBytes(bufferSize)
163     , m_useDeviceGroups(useDeviceGroups)
164 {
165 }
166 
iterate(void)167 tcu::TestStatus BufferSparseMemoryAliasingInstance::iterate(void)
168 {
169     const InstanceInterface &instance = m_context.getInstanceInterface();
170     {
171         // Create logical device supporting both sparse and compute operations
172         QueueRequirementsVec queueRequirements;
173         queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
174         queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
175 
176         createDeviceSupportingQueues(queueRequirements);
177     }
178     const vk::VkPhysicalDevice &physicalDevice = getPhysicalDevice();
179     const DeviceInterface &deviceInterface     = getDeviceInterface();
180     const Queue &sparseQueue                   = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
181     const Queue &computeQueue                  = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
182 
183     // Go through all physical devices
184     for (uint32_t physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
185     {
186         const uint32_t firstDeviceID  = physDevID;
187         const uint32_t secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
188 
189         VkBufferCreateInfo bufferCreateInfo = {
190             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,                                      // VkStructureType sType;
191             DE_NULL,                                                                   // const void* pNext;
192             VK_BUFFER_CREATE_SPARSE_BINDING_BIT | VK_BUFFER_CREATE_SPARSE_ALIASED_BIT, // VkBufferCreateFlags flags;
193             m_bufferSizeInBytes,                                                       // VkDeviceSize size;
194             VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,     // VkBufferUsageFlags usage;
195             VK_SHARING_MODE_EXCLUSIVE,                                                 // VkSharingMode sharingMode;
196             0u,     // uint32_t queueFamilyIndexCount;
197             DE_NULL // const uint32_t* pQueueFamilyIndices;
198         };
199 
200         const uint32_t queueFamilyIndices[] = {sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex};
201 
202         if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
203         {
204             bufferCreateInfo.sharingMode           = VK_SHARING_MODE_CONCURRENT;
205             bufferCreateInfo.queueFamilyIndexCount = 2u;
206             bufferCreateInfo.pQueueFamilyIndices   = queueFamilyIndices;
207         }
208 
209         // Create sparse buffers
210         const Unique<VkBuffer> sparseBufferWrite(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
211         const Unique<VkBuffer> sparseBufferRead(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
212 
213         // Create sparse buffers memory bind semaphore
214         const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
215 
216         const VkMemoryRequirements bufferMemRequirements =
217             getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBufferWrite);
218 
219         if (bufferMemRequirements.size >
220             getPhysicalDeviceProperties(instance, physicalDevice).limits.sparseAddressSpaceSize)
221             TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
222 
223         DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
224 
225         const uint32_t memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID),
226                                                            bufferMemRequirements, MemoryRequirement::Any);
227 
228         if (memoryType == NO_MATCH_FOUND)
229             return tcu::TestStatus::fail("No matching memory type found");
230 
231         if (firstDeviceID != secondDeviceID)
232         {
233             VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
234             const uint32_t heapIndex =
235                 getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
236             deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID,
237                                                              &peerMemoryFeatureFlags);
238 
239             if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT) == 0) ||
240                 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
241             {
242                 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC and GENERIC_DST");
243             }
244         }
245 
246         const VkSparseMemoryBind sparseMemoryBind =
247             makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.size, memoryType, 0u);
248 
249         Move<VkDeviceMemory> deviceMemoryPtr(check<VkDeviceMemory>(sparseMemoryBind.memory),
250                                              Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL));
251 
252         {
253             const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo[2] = {
254                 makeSparseBufferMemoryBindInfo(*sparseBufferWrite, //VkBuffer buffer;
255                                                1u,                 //uint32_t bindCount;
256                                                &sparseMemoryBind   //const VkSparseMemoryBind* Binds;
257                                                ),
258 
259                 makeSparseBufferMemoryBindInfo(*sparseBufferRead, //VkBuffer buffer;
260                                                1u,                //uint32_t bindCount;
261                                                &sparseMemoryBind  //const VkSparseMemoryBind* Binds;
262                                                )};
263 
264             const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo = {
265                 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO, //VkStructureType sType;
266                 DE_NULL,                                         //const void* pNext;
267                 firstDeviceID,                                   //uint32_t resourceDeviceIndex;
268                 secondDeviceID,                                  //uint32_t memoryDeviceIndex;
269             };
270 
271             const VkBindSparseInfo bindSparseInfo = {
272                 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,                    //VkStructureType sType;
273                 m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL, //const void* pNext;
274                 0u,                                                    //uint32_t waitSemaphoreCount;
275                 DE_NULL,                                               //const VkSemaphore* pWaitSemaphores;
276                 2u,                                                    //uint32_t bufferBindCount;
277                 sparseBufferMemoryBindInfo,      //const VkSparseBufferMemoryBindInfo* pBufferBinds;
278                 0u,                              //uint32_t imageOpaqueBindCount;
279                 DE_NULL,                         //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
280                 0u,                              //uint32_t imageBindCount;
281                 DE_NULL,                         //const VkSparseImageMemoryBindInfo* pImageBinds;
282                 1u,                              //uint32_t signalSemaphoreCount;
283                 &bufferMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
284             };
285 
286             // Submit sparse bind commands for execution
287             VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
288         }
289 
290         // Create output buffer
291         const VkBufferCreateInfo outputBufferCreateInfo =
292             makeBufferCreateInfo(m_bufferSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
293         const Unique<VkBuffer> outputBuffer(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
294         const de::UniquePtr<Allocation> outputBufferAlloc(
295             bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
296 
297         // Create command buffer for compute and data transfer operations
298         const Unique<VkCommandPool> commandPool(
299             makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
300         const Unique<VkCommandBuffer> commandBuffer(
301             allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
302 
303         // Start recording commands
304         beginCommandBuffer(deviceInterface, *commandBuffer);
305 
306         // Create descriptor set
307         const Unique<VkDescriptorSetLayout> descriptorSetLayout(
308             DescriptorSetLayoutBuilder()
309                 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
310                 .build(deviceInterface, getDevice()));
311 
312         // Create compute pipeline
313         const Unique<VkShaderModule> shaderModule(
314             createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
315         const Unique<VkPipelineLayout> pipelineLayout(
316             makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
317         const Unique<VkPipeline> computePipeline(
318             makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
319 
320         deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
321 
322         // Create descriptor set
323         const Unique<VkDescriptorPool> descriptorPool(
324             DescriptorPoolBuilder()
325                 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
326                 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
327 
328         const Unique<VkDescriptorSet> descriptorSet(
329             makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
330 
331         {
332             const VkDescriptorBufferInfo sparseBufferInfo =
333                 makeDescriptorBufferInfo(*sparseBufferWrite, 0u, m_bufferSizeInBytes);
334 
335             DescriptorSetUpdateBuilder()
336                 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u),
337                              VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
338                 .update(deviceInterface, getDevice());
339         }
340 
341         deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
342                                               &descriptorSet.get(), 0u, DE_NULL);
343 
344         {
345             uint32_t numInvocationsLeft               = m_bufferSizeInBytes / SIZE_OF_UINT_IN_SHADER;
346             const tcu::UVec3 workGroupSize            = computeWorkGroupSize(numInvocationsLeft);
347             const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
348 
349             numInvocationsLeft -= workGroupSize.x() * workGroupSize.y() * workGroupSize.z();
350 
351             const uint32_t xWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.x());
352             numInvocationsLeft =
353                 numInvocationsLeft / xWorkGroupCount + ((numInvocationsLeft % xWorkGroupCount) ? 1u : 0u);
354             const uint32_t yWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.y());
355             numInvocationsLeft =
356                 numInvocationsLeft / yWorkGroupCount + ((numInvocationsLeft % yWorkGroupCount) ? 1u : 0u);
357             const uint32_t zWorkGroupCount = std::min(numInvocationsLeft, maxComputeWorkGroupCount.z());
358             numInvocationsLeft =
359                 numInvocationsLeft / zWorkGroupCount + ((numInvocationsLeft % zWorkGroupCount) ? 1u : 0u);
360 
361             if (numInvocationsLeft != 1u)
362                 TCU_THROW(NotSupportedError, "Buffer size is not supported");
363 
364             deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
365         }
366 
367         {
368             const VkBufferMemoryBarrier sparseBufferWriteBarrier = makeBufferMemoryBarrier(
369                 VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, *sparseBufferWrite, 0ull, m_bufferSizeInBytes);
370 
371             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
372                                                VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u,
373                                                &sparseBufferWriteBarrier, 0u, DE_NULL);
374         }
375 
376         {
377             const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSizeInBytes);
378 
379             deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBufferRead, *outputBuffer, 1u, &bufferCopy);
380         }
381 
382         {
383             const VkBufferMemoryBarrier outputBufferHostBarrier = makeBufferMemoryBarrier(
384                 VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, *outputBuffer, 0ull, m_bufferSizeInBytes);
385 
386             deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT,
387                                                VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u,
388                                                &outputBufferHostBarrier, 0u, DE_NULL);
389         }
390 
391         // End recording commands
392         endCommandBuffer(deviceInterface, *commandBuffer);
393 
394         // The stage at which execution is going to wait for finish of sparse binding operations
395         const VkPipelineStageFlags waitStageBits[] = {VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT};
396 
397         // Submit commands for execution and wait for completion
398         // In case of device groups, submit on the physical device with the resource
399         submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u,
400                               &bufferMemoryBindSemaphore.get(), waitStageBits, 0, DE_NULL, m_useDeviceGroups,
401                               firstDeviceID);
402 
403         // Retrieve data from output buffer to host memory
404         invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
405 
406         const uint8_t *outputData = static_cast<const uint8_t *>(outputBufferAlloc->getHostPtr());
407 
408         // Wait for sparse queue to become idle
409         deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
410 
411         // Prepare reference data
412         std::vector<uint8_t> referenceData;
413         referenceData.resize(m_bufferSizeInBytes);
414 
415         std::vector<uint32_t> referenceDataBlock;
416         referenceDataBlock.resize(MODULO_DIVISOR);
417 
418         for (uint32_t valueNdx = 0; valueNdx < MODULO_DIVISOR; ++valueNdx)
419         {
420             referenceDataBlock[valueNdx] = valueNdx % MODULO_DIVISOR;
421         }
422 
423         const uint32_t fullBlockSizeInBytes = MODULO_DIVISOR * SIZE_OF_UINT_IN_SHADER;
424         const uint32_t lastBlockSizeInBytes = m_bufferSizeInBytes % fullBlockSizeInBytes;
425         const uint32_t numberOfBlocks = m_bufferSizeInBytes / fullBlockSizeInBytes + (lastBlockSizeInBytes ? 1u : 0u);
426 
427         for (uint32_t blockNdx = 0; blockNdx < numberOfBlocks; ++blockNdx)
428         {
429             const uint32_t offset = blockNdx * fullBlockSizeInBytes;
430             deMemcpy(&referenceData[0] + offset, &referenceDataBlock[0],
431                      ((offset + fullBlockSizeInBytes) <= m_bufferSizeInBytes) ? fullBlockSizeInBytes :
432                                                                                 lastBlockSizeInBytes);
433         }
434 
435         // Compare reference data with output data
436         if (deMemCmp(&referenceData[0], outputData, m_bufferSizeInBytes) != 0)
437             return tcu::TestStatus::fail("Failed");
438     }
439     return tcu::TestStatus::pass("Passed");
440 }
441 
createInstance(Context & context) const442 TestInstance *BufferSparseMemoryAliasingCase::createInstance(Context &context) const
443 {
444     return new BufferSparseMemoryAliasingInstance(context, m_bufferSizeInBytes, m_useDeviceGroups);
445 }
446 
447 } // namespace
448 
addBufferSparseMemoryAliasingTests(tcu::TestCaseGroup * group,const bool useDeviceGroups)449 void addBufferSparseMemoryAliasingTests(tcu::TestCaseGroup *group, const bool useDeviceGroups)
450 {
451     group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_10", 1 << 10,
452                                                        glu::GLSL_VERSION_440, useDeviceGroups));
453     group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_12", 1 << 12,
454                                                        glu::GLSL_VERSION_440, useDeviceGroups));
455     group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_16", 1 << 16,
456                                                        glu::GLSL_VERSION_440, useDeviceGroups));
457     group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_17", 1 << 17,
458                                                        glu::GLSL_VERSION_440, useDeviceGroups));
459     group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_20", 1 << 20,
460                                                        glu::GLSL_VERSION_440, useDeviceGroups));
461     group->addChild(new BufferSparseMemoryAliasingCase(group->getTestContext(), "buffer_size_2_24", 1 << 24,
462                                                        glu::GLSL_VERSION_440, useDeviceGroups));
463 }
464 
465 } // namespace sparse
466 } // namespace vkt
467