1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2020 The Khronos Group Inc.
6  * Copyright (c) 2020 Intel Corporation
7  * Copyright (c) 2023 LunarG, Inc.
8  * Copyright (c) 2023 Nintendo
9  *
10  * Licensed under the Apache License, Version 2.0 (the "License");
11  * you may not use this file except in compliance with the License.
12  * You may obtain a copy of the License at
13  *
14  *      http://www.apache.org/licenses/LICENSE-2.0
15  *
16  * Unless required by applicable law or agreed to in writing, software
17  * distributed under the License is distributed on an "AS IS" BASIS,
18  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19  * See the License for the specific language governing permissions and
20  * limitations under the License.
21  *
22  *//*!
23  * \file
24  * \brief VK_KHR_workgroup_memory_explicit_layout tests
25  *//*--------------------------------------------------------------------*/
26 
27 #include "vktComputeWorkgroupMemoryExplicitLayoutTests.hpp"
28 #include "vktAmberTestCase.hpp"
29 #include "vktTestCase.hpp"
30 #include "vktTestCaseUtil.hpp"
31 #include "vktTestGroupUtil.hpp"
32 
33 #include "vkBufferWithMemory.hpp"
34 #include "vkImageWithMemory.hpp"
35 #include "vkQueryUtil.hpp"
36 #include "vkBuilderUtil.hpp"
37 #include "vkCmdUtil.hpp"
38 #include "vkTypeUtil.hpp"
39 #include "vkObjUtil.hpp"
40 #include "vkDefs.hpp"
41 #include "vkRef.hpp"
42 
43 #include "tcuCommandLine.hpp"
44 #include "tcuTestLog.hpp"
45 
46 #include "deRandom.hpp"
47 #include "deStringUtil.hpp"
48 #include "deUniquePtr.hpp"
49 
50 #include <algorithm>
51 #include <vector>
52 #include <cassert>
53 
54 using namespace vk;
55 
56 namespace vkt
57 {
58 namespace compute
59 {
60 namespace
61 {
62 
63 struct CheckSupportParams
64 {
65     bool needsScalar;
66     bool needsInt8;
67     bool needsInt16;
68     bool needsInt64;
69     bool needsFloat16;
70     bool needsFloat64;
71     vk::ComputePipelineConstructionType computePipelineConstructionType;
72 
useTypevkt::compute::__anona2244def0111::CheckSupportParams73     void useType(glu::DataType dt)
74     {
75         using namespace glu;
76 
77         needsInt8 |= isDataTypeIntOrIVec8Bit(dt) || isDataTypeUintOrUVec8Bit(dt);
78         needsInt16 |= isDataTypeIntOrIVec16Bit(dt) || isDataTypeUintOrUVec16Bit(dt);
79         needsFloat16 |= isDataTypeFloat16OrVec(dt);
80         needsFloat64 |= isDataTypeDoubleOrDVec(dt);
81     }
82 };
83 
checkSupportWithParams(Context & context,const CheckSupportParams & params)84 void checkSupportWithParams(Context &context, const CheckSupportParams &params)
85 {
86     context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
87     context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
88     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
89                                   params.computePipelineConstructionType);
90 
91     VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR layout_features;
92     deMemset(&layout_features, 0, sizeof(layout_features));
93     layout_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR;
94     layout_features.pNext = DE_NULL;
95 
96     VkPhysicalDeviceShaderFloat16Int8Features f16_i8_features;
97     deMemset(&f16_i8_features, 0, sizeof(f16_i8_features));
98     f16_i8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES;
99     f16_i8_features.pNext = &layout_features;
100 
101     VkPhysicalDeviceFeatures2 features2;
102     deMemset(&features2, 0, sizeof(features2));
103     features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
104     features2.pNext = &f16_i8_features;
105     context.getInstanceInterface().getPhysicalDeviceFeatures2(context.getPhysicalDevice(), &features2);
106 
107     if (params.needsScalar)
108     {
109         if (layout_features.workgroupMemoryExplicitLayoutScalarBlockLayout != VK_TRUE)
110             TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayoutScalarBlockLayout not supported");
111     }
112 
113     if (params.needsInt8)
114     {
115         if (f16_i8_features.shaderInt8 != VK_TRUE)
116             TCU_THROW(NotSupportedError, "shaderInt8 not supported");
117         if (layout_features.workgroupMemoryExplicitLayout8BitAccess != VK_TRUE)
118             TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout8BitAccess not supported");
119     }
120 
121     if (params.needsInt16)
122     {
123         if (features2.features.shaderInt16 != VK_TRUE)
124             TCU_THROW(NotSupportedError, "shaderInt16 not supported");
125         if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
126             TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
127     }
128 
129     if (params.needsInt64)
130     {
131         if (features2.features.shaderInt64 != VK_TRUE)
132             TCU_THROW(NotSupportedError, "shaderInt64 not supported");
133     }
134 
135     if (params.needsFloat16)
136     {
137         if (f16_i8_features.shaderFloat16 != VK_TRUE)
138             TCU_THROW(NotSupportedError, "shaderFloat16 not supported");
139         if (layout_features.workgroupMemoryExplicitLayout16BitAccess != VK_TRUE)
140             TCU_THROW(NotSupportedError, "workgroupMemoryExplicitLayout16BitAccess not supported");
141     }
142 
143     if (params.needsFloat64)
144     {
145         if (features2.features.shaderFloat64 != VK_TRUE)
146             TCU_THROW(NotSupportedError, "shaderFloat64 not supported");
147     }
148 }
149 
runCompute(Context & context,uint32_t workgroupSize,const vk::ComputePipelineConstructionType computePipelineConstructionType)150 tcu::TestStatus runCompute(Context &context, uint32_t workgroupSize,
151                            const vk::ComputePipelineConstructionType computePipelineConstructionType)
152 {
153     const DeviceInterface &vk = context.getDeviceInterface();
154     const VkDevice device     = context.getDevice();
155     Allocator &allocator      = context.getDefaultAllocator();
156     tcu::TestLog &log         = context.getTestContext().getLog();
157 
158     de::MovePtr<BufferWithMemory> buffer;
159     VkDescriptorBufferInfo bufferDescriptor;
160 
161     VkDeviceSize size = sizeof(uint32_t) * workgroupSize;
162 
163     buffer           = de::MovePtr<BufferWithMemory>(new BufferWithMemory(
164         vk, device, allocator,
165         makeBufferCreateInfo(size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
166                                                  VK_BUFFER_USAGE_TRANSFER_SRC_BIT),
167         MemoryRequirement::HostVisible | MemoryRequirement::Cached));
168     bufferDescriptor = makeDescriptorBufferInfo(**buffer, 0, size);
169 
170     uint32_t *ptr = (uint32_t *)buffer->getAllocation().getHostPtr();
171 
172     deMemset(ptr, 0xFF, static_cast<std::size_t>(size));
173 
174     DescriptorSetLayoutBuilder layoutBuilder;
175     layoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
176 
177     Unique<VkDescriptorSetLayout> descriptorSetLayout(layoutBuilder.build(vk, device));
178     Unique<VkDescriptorPool> descriptorPool(
179         DescriptorPoolBuilder()
180             .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1u)
181             .build(vk, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
182     Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(vk, device, *descriptorPool, *descriptorSetLayout));
183 
184     VkPipelineBindPoint bindPoint = VK_PIPELINE_BIND_POINT_COMPUTE;
185     flushAlloc(vk, device, buffer->getAllocation());
186 
187     ComputePipelineWrapper pipeline(vk, device, computePipelineConstructionType,
188                                     context.getBinaryCollection().get("comp"));
189     pipeline.setDescriptorSetLayout(descriptorSetLayout.get());
190     pipeline.buildPipeline();
191 
192     const VkQueue queue             = context.getUniversalQueue();
193     Move<VkCommandPool> cmdPool     = createCommandPool(vk, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
194                                                         context.getUniversalQueueFamilyIndex());
195     Move<VkCommandBuffer> cmdBuffer = allocateCommandBuffer(vk, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
196 
197     DescriptorSetUpdateBuilder setUpdateBuilder;
198     setUpdateBuilder.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0),
199                                  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &bufferDescriptor);
200     setUpdateBuilder.update(vk, device);
201 
202     beginCommandBuffer(vk, *cmdBuffer, 0);
203 
204     vk.cmdBindDescriptorSets(*cmdBuffer, bindPoint, pipeline.getPipelineLayout(), 0u, 1, &*descriptorSet, 0u, DE_NULL);
205     pipeline.bind(*cmdBuffer);
206 
207     vk.cmdDispatch(*cmdBuffer, 1, 1, 1);
208 
209     endCommandBuffer(vk, *cmdBuffer);
210 
211     submitCommandsAndWait(vk, device, queue, cmdBuffer.get());
212 
213     invalidateAlloc(vk, device, buffer->getAllocation());
214     for (uint32_t i = 0; i < workgroupSize; ++i)
215     {
216         uint32_t expected = i;
217         if (ptr[i] != expected)
218         {
219             log << tcu::TestLog::Message << "failure at index " << i << ": expected " << expected << ", got: " << ptr[i]
220                 << tcu::TestLog::EndMessage;
221             return tcu::TestStatus::fail("compute failed");
222         }
223     }
224 
225     return tcu::TestStatus::pass("compute succeeded");
226 }
227 
228 class AliasTest : public vkt::TestCase
229 {
230 public:
231     enum Requirements
232     {
233         RequirementNone    = 0,
234         RequirementFloat16 = 1 << 0,
235         RequirementFloat64 = 1 << 1,
236         RequirementInt8    = 1 << 2,
237         RequirementInt16   = 1 << 3,
238         RequirementInt64   = 1 << 4,
239     };
240 
241     enum Flags
242     {
243         FlagNone         = 0,
244         FlagLayoutStd430 = 1 << 0,
245         FlagLayoutStd140 = 1 << 1,
246         FlagLayoutScalar = 1 << 2,
247         FlagFunction     = 1 << 3,
248         FlagBarrier      = 1 << 4,
249     };
250 
251     enum LayoutFlags
252     {
253         LayoutNone = 0,
254 
255         LayoutDefault = 1 << 0,
256         LayoutStd140  = 1 << 1,
257         LayoutStd430  = 1 << 2,
258         LayoutScalar  = 1 << 3,
259         LayoutAll     = LayoutDefault | LayoutStd140 | LayoutStd430 | LayoutScalar,
260 
261         LayoutCount = 4,
262     };
263 
264     enum Function
265     {
266         FunctionNone = 0,
267         FunctionRead,
268         FunctionWrite,
269         FunctionReadWrite,
270         FunctionCount,
271     };
272 
273     enum Synchronization
274     {
275         SynchronizationNone = 0,
276         SynchronizationBarrier,
277         SynchronizationCount,
278     };
279 
280     struct CaseDef
281     {
282         std::string extraTypes;
283 
284         std::string writeDesc;
285         std::string writeType;
286         std::string writeValue;
287 
288         std::string readDesc;
289         std::string readType;
290         std::string readValue;
291 
292         LayoutFlags layout;
293         Function func;
294         Synchronization sync;
295         Requirements requirements;
296 
CaseDefvkt::compute::__anona2244def0111::AliasTest::CaseDef297         CaseDef(const std::string &extraTypes_, const std::string &writeDesc_, const std::string &writeType_,
298                 const std::string &writeValue_, const std::string &readDesc_, const std::string &readType_,
299                 const std::string &readValue_, LayoutFlags layout_, Function func_, Synchronization sync_,
300                 Requirements requirements_)
301             : extraTypes(extraTypes_)
302             , writeDesc(writeDesc_)
303             , writeType(writeType_)
304             , writeValue(writeValue_)
305             , readDesc(readDesc_)
306             , readType(readType_)
307             , readValue(readValue_)
308             , layout(layout_)
309             , func(func_)
310             , sync(sync_)
311             , requirements(requirements_)
312         {
313         }
314 
testNamevkt::compute::__anona2244def0111::AliasTest::CaseDef315         std::string testName() const
316         {
317             std::string name = writeDesc + "_to_" + readDesc;
318 
319             // In a valid test case, only one flag will be set.
320             switch (layout)
321             {
322             case LayoutDefault:
323                 name += "_default";
324                 break;
325             case LayoutStd140:
326                 name += "_std140";
327                 break;
328             case LayoutStd430:
329                 name += "_std430";
330                 break;
331             case LayoutScalar:
332                 name += "_scalar";
333                 break;
334             default:
335                 DE_ASSERT(0);
336                 break;
337             }
338 
339             switch (func)
340             {
341             case FunctionNone:
342                 break;
343             case FunctionRead:
344                 name += "_func_read";
345                 break;
346             case FunctionWrite:
347                 name += "_func_write";
348                 break;
349             case FunctionReadWrite:
350                 name += "_func_read_write";
351                 break;
352             default:
353                 DE_ASSERT(0);
354                 break;
355             }
356 
357             switch (sync)
358             {
359             case SynchronizationNone:
360                 break;
361             case SynchronizationBarrier:
362                 name += "_barrier";
363                 break;
364             default:
365                 DE_ASSERT(0);
366                 break;
367             }
368 
369             return name;
370         }
371     };
372 
AliasTest(tcu::TestContext & testCtx,const CaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)373     AliasTest(tcu::TestContext &testCtx, const CaseDef &caseDef,
374               const vk::ComputePipelineConstructionType computePipelineConstructionType)
375         : TestCase(testCtx, caseDef.testName())
376         , m_caseDef(caseDef)
377         , m_computePipelineConstructionType(computePipelineConstructionType)
378     {
379     }
380 
381     virtual void checkSupport(Context &context) const;
382     void initPrograms(SourceCollections &sourceCollections) const;
383 
384     class Instance : public vkt::TestInstance
385     {
386     public:
Instance(Context & context,const CaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)387         Instance(Context &context, const CaseDef &caseDef,
388                  const vk::ComputePipelineConstructionType computePipelineConstructionType)
389             : TestInstance(context)
390             , m_caseDef(caseDef)
391             , m_computePipelineConstructionType(computePipelineConstructionType)
392         {
393         }
394 
iterate(void)395         tcu::TestStatus iterate(void)
396         {
397             return runCompute(m_context, 1u, m_computePipelineConstructionType);
398         }
399 
400     private:
401         CaseDef m_caseDef;
402         vk::ComputePipelineConstructionType m_computePipelineConstructionType;
403     };
404 
createInstance(Context & context) const405     TestInstance *createInstance(Context &context) const
406     {
407         return new Instance(context, m_caseDef, m_computePipelineConstructionType);
408     }
409 
410 private:
411     CaseDef m_caseDef;
412     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
413 };
414 
checkSupport(Context & context) const415 void AliasTest::checkSupport(Context &context) const
416 {
417     CheckSupportParams p;
418     deMemset(&p, 0, sizeof(p));
419 
420     p.needsScalar                     = m_caseDef.layout == LayoutScalar;
421     p.needsInt8                       = m_caseDef.requirements & RequirementInt8;
422     p.needsInt16                      = m_caseDef.requirements & RequirementInt16;
423     p.needsInt64                      = m_caseDef.requirements & RequirementInt64;
424     p.needsFloat16                    = m_caseDef.requirements & RequirementFloat16;
425     p.needsFloat64                    = m_caseDef.requirements & RequirementFloat64;
426     p.computePipelineConstructionType = m_computePipelineConstructionType;
427 
428     checkSupportWithParams(context, p);
429 }
430 
initPrograms(SourceCollections & sourceCollections) const431 void AliasTest::initPrograms(SourceCollections &sourceCollections) const
432 {
433     std::string layout;
434     switch (m_caseDef.layout)
435     {
436     case LayoutStd140:
437         layout = "layout(std140)";
438         break;
439     case LayoutStd430:
440         layout = "layout(std430)";
441         break;
442     case LayoutScalar:
443         layout = "layout(scalar)";
444         break;
445     default:
446         // No layout specified.
447         break;
448     }
449 
450     std::ostringstream src;
451 
452     src << "#version 450\n";
453     src << "#extension GL_EXT_shared_memory_block : enable\n";
454     src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
455 
456     if (m_caseDef.layout == LayoutScalar)
457         src << "#extension GL_EXT_scalar_block_layout : enable\n";
458 
459     src << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
460 
461     if (!m_caseDef.extraTypes.empty())
462         src << m_caseDef.extraTypes << ";\n";
463 
464     src << layout << "shared A { " << m_caseDef.writeType << "; } a;\n";
465     src << layout << "shared B { " << m_caseDef.readType << "; } b;\n";
466     src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
467 
468     if (m_caseDef.func == FunctionRead || m_caseDef.func == FunctionReadWrite)
469     {
470         src << "void read(int index) {\n";
471         src << "  if (b.v == " << m_caseDef.readValue << ")\n";
472         src << "    result = index;\n";
473         src << "}\n";
474     }
475 
476     if (m_caseDef.func == FunctionWrite || m_caseDef.func == FunctionReadWrite)
477     {
478         src << "void write(int index) {\n";
479         src << "  if (index == 0)\n";
480         src << "    a.v = " << m_caseDef.writeValue << ";\n";
481         src << "}\n";
482     }
483 
484     src << "void main() {\n";
485     src << "  int index = int(gl_LocalInvocationIndex);\n";
486 
487     if (m_caseDef.func == FunctionWrite)
488         src << "  write(index);\n";
489     else
490         src << "  a.v = " << m_caseDef.writeValue << ";\n";
491 
492     if (m_caseDef.sync == SynchronizationBarrier)
493         src << "  barrier();\n";
494 
495     if (m_caseDef.func == FunctionRead || m_caseDef.func == FunctionReadWrite)
496     {
497         src << "  read(index);\n";
498     }
499     else
500     {
501         src << "  if (b.v == " << m_caseDef.readValue << ")\n";
502         src << "    result = index;\n";
503     }
504     src << "}\n";
505 
506     uint32_t buildFlags = m_caseDef.layout == LayoutScalar ? ShaderBuildOptions::FLAG_ALLOW_WORKGROUP_SCALAR_OFFSETS :
507                                                              ShaderBuildOptions::Flags(0u);
508 
509     sourceCollections.glslSources.add("comp")
510         << glu::ComputeSource(src.str())
511         << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4, buildFlags, true);
512 }
513 
makeArray(const std::string & type,const std::vector<uint64_t> & values)514 std::string makeArray(const std::string &type, const std::vector<uint64_t> &values)
515 {
516     std::ostringstream s;
517     s << type << "[](";
518     for (std::size_t i = 0; i < values.size(); i++)
519     {
520         s << type << "(" << std::to_string(values[i]) << ")";
521         if (i != values.size() - 1)
522             s << ", ";
523     }
524     s << ")";
525     return s.str();
526 }
527 
makeU8Array(const std::vector<uint64_t> & values)528 std::string makeU8Array(const std::vector<uint64_t> &values)
529 {
530     return makeArray("uint8_t", values);
531 }
532 
makeU16Array(const std::vector<uint64_t> & values)533 std::string makeU16Array(const std::vector<uint64_t> &values)
534 {
535     return makeArray("uint16_t", values);
536 }
537 
makeU32Array(const std::vector<uint64_t> & values)538 std::string makeU32Array(const std::vector<uint64_t> &values)
539 {
540     return makeArray("uint32_t", values);
541 }
542 
AddAliasTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)543 void AddAliasTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
544 {
545     const int DEFAULT = AliasTest::LayoutDefault;
546     const int STD140  = AliasTest::LayoutStd140;
547     const int STD430  = AliasTest::LayoutStd430;
548     const int SCALAR  = AliasTest::LayoutScalar;
549     const int ALL     = DEFAULT | STD140 | STD430 | SCALAR;
550 
551     const int FLOAT16 = AliasTest::RequirementFloat16;
552     const int FLOAT64 = AliasTest::RequirementFloat64;
553     const int INT8    = AliasTest::RequirementInt8;
554     const int INT16   = AliasTest::RequirementInt16;
555     const int INT64   = AliasTest::RequirementInt64;
556 
557 #define CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2)                                                                    \
558     {                                                                                                                  \
559         E, D1, T1, V1, D2, T2, V2, AliasTest::LayoutFlags(L), AliasTest::FunctionNone, AliasTest::SynchronizationNone, \
560             AliasTest::Requirements(R)                                                                                 \
561     }
562 
563 #define CASE_EXTRA_WITH_REVERSE(L, R, E, D1, T1, V1, D2, T2, V2) \
564     CASE_EXTRA(L, R, E, D1, T1, V1, D2, T2, V2), CASE_EXTRA(L, R, E, D2, T2, V2, D1, T1, V1)
565 
566 #define CASE_WITH_REVERSE(L, R, D1, T1, V1, D2, T2, V2) CASE_EXTRA_WITH_REVERSE(L, R, "", D1, T1, V1, D2, T2, V2)
567 #define CASE(L, R, D1, T1, V1, D2, T2, V2) CASE_EXTRA(L, R, "", D1, T1, V1, D2, T2, V2)
568 
569     const std::vector<AliasTest::CaseDef> cases{
570         CASE_WITH_REVERSE(ALL, INT8, "i8", "int8_t v", "int8_t(-2)", "u8", "uint8_t v", "uint8_t(0xFE)"),
571         CASE_WITH_REVERSE(ALL, INT16, "i16", "int16_t v", "int16_t(-2)", "u16", "uint16_t v", "uint16_t(0xFFFE)"),
572         CASE_WITH_REVERSE(ALL, 0, "i32", "int32_t v", "int32_t(-2)", "u32", "uint32_t v", "uint32_t(0xFFFFFFFE)"),
573         CASE_WITH_REVERSE(ALL, INT64, "i64", "int64_t v", "int64_t(-2UL)", "u64", "uint64_t v",
574                           "uint64_t(0xFFFFFFFFFFFFFFFEUL)"),
575         CASE_WITH_REVERSE(ALL, FLOAT16 | INT16, "f16", "float16_t v", "float16_t(1.0)", "u16", "uint16_t v",
576                           "uint16_t(0x3C00)"),
577         CASE_WITH_REVERSE(ALL, 0, "f32", "float32_t v", "float32_t(1.0)", "u32", "uint32_t v", "uint32_t(0x3F800000)"),
578         CASE_WITH_REVERSE(ALL, FLOAT64 | INT64, "f64", "float64_t v", "float64_t(1.0)", "u64", "uint64_t v",
579                           "uint64_t(0x3FF0000000000000UL)"),
580 
581         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8, "u16", "uint16_t v", "uint16_t(0x1234)", "u8_array",
582                           "uint8_t v[2]", makeU8Array({0x34, 0x12})),
583         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8, "u32", "uint32_t v", "uint32_t(0x12345678)", "u8_array",
584                           "uint8_t v[4]", makeU8Array({0x78, 0x56, 0x34, 0x12})),
585         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16, "u32", "uint32_t v", "uint32_t(0x12345678)", "u16_array",
586                           "uint16_t v[2]", makeU16Array({0x5678, 0x1234})),
587         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT8, "u64", "uint64_t v",
588                           "uint64_t(0x1234567890ABCDEFUL)", "u8_array", "uint8_t v[8]",
589                           makeU8Array({0xEF, 0xCD, 0xAB, 0x90, 0x78, 0x56, 0x34, 0x12})),
590         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16, "u64", "uint64_t v",
591                           "uint64_t(0x1234567890ABCDEFUL)", "u16_array", "uint16_t v[4]",
592                           makeU16Array({0xCDEF, 0x90AB, 0x5678, 0x1234})),
593         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64, "u64", "uint64_t v", "uint64_t(0x1234567890ABCDEFUL)",
594                           "u32_array", "uint32_t v[2]", makeU32Array({0x90ABCDEF, 0x12345678})),
595         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16 | INT8, "i16", "int16_t v", "int16_t(-2)", "u8_array",
596                           "uint8_t v[2]", makeU8Array({0xFE, 0xFF})),
597         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8, "i32", "int32_t v", "int32_t(-2)", "u8_array",
598                           "uint8_t v[4]", makeU8Array({0xFE, 0xFF, 0xFF, 0xFF})),
599         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16, "i32", "int32_t v", "int32_t(-2)", "u16_array",
600                           "uint16_t v[2]", makeU16Array({0xFFFE, 0xFFFF})),
601         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT8, "i64", "int64_t v", "int64_t(-2UL)", "u8_array",
602                           "uint8_t v[8]", makeU8Array({0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF})),
603         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64 | INT16, "i64", "int64_t v", "int64_t(-2UL)", "u16_array",
604                           "uint16_t v[4]", makeU16Array({0xFFFE, 0xFFFF, 0xFFFF, 0xFFFF})),
605         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT64, "i64", "int64_t v", "int64_t(-2UL)", "u32_array",
606                           "uint32_t v[2]", makeU32Array({0xFFFFFFFE, 0xFFFFFFFF})),
607         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT16 | INT8, "f16", "float16_t v", "float16_t(1.0)", "u8_array",
608                           "uint8_t v[2]", makeU8Array({0x00, 0x3C})),
609         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT8, "f32", "float32_t v", "float32_t(1.0)", "u8_array",
610                           "uint8_t v[4]", makeU8Array({0x00, 0x00, 0x80, 0x3F})),
611         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, INT16, "f32", "float32_t v", "float32_t(1.0)", "u16_array",
612                           "uint16_t v[2]", makeU16Array({0x0000, 0x3F80})),
613         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT8, "f64", "float64_t v", "float64_t(1.0)", "u8_array",
614                           "uint8_t v[8]", makeU8Array({0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF0, 0x3F})),
615         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64 | INT16, "f64", "float64_t v", "float64_t(1.0)",
616                           "u16_array", "uint16_t v[4]", makeU16Array({0x0000, 0x0000, 0x0000, 0x3FF0})),
617         CASE_WITH_REVERSE(DEFAULT | STD430 | SCALAR, FLOAT64, "f64", "float64_t v", "float64_t(1.0)", "u32_array",
618                           "uint32_t v[2]", makeU32Array({0x00000000, 0x3FF00000})),
619 
620         CASE(DEFAULT | STD430, 0, "vec4_array", "vec4 v[3]",
621              "vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))", "vec2_array", "vec2 v[6]",
622              "vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
623         CASE(STD140, 0, "vec4_array", "vec4 v[3]",
624              "vec4[](vec4(1, 1, 999, 999), vec4(2, 2, 999, 999), vec4(3, 3, 999, 999))", "vec2_array", "vec2 v[3]",
625              "vec2[](vec2(1), vec2(2), vec2(3))"),
626         CASE(SCALAR, 0, "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 2, 2), vec4(3, 3, 4, 4), vec4(5, 5, 6, 6))",
627              "vec2_array", "vec2 v[6]", "vec2[](vec2(1), vec2(2), vec2(3), vec2(4), vec2(5), vec2(6))"),
628 
629         CASE(DEFAULT | STD430, 0, "vec4_array", "vec4 v[3]",
630              "vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))", "vec3_array", "vec3 v[3]",
631              "vec3[](vec3(1), vec3(2), vec3(3))"),
632         CASE(STD140, 0, "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 999), vec4(2, 2, 2, 999), vec4(3, 3, 3, 999))",
633              "vec3_array", "vec3 v[3]", "vec3[](vec3(1), vec3(2), vec3(3))"),
634         CASE(SCALAR, 0, "vec4_array", "vec4 v[3]", "vec4[](vec4(1, 1, 1, 2), vec4(2, 2, 3, 3), vec4(3, 4, 4, 4))",
635              "vec3_array", "vec3 v[4]", "vec3[](vec3(1), vec3(2), vec3(3), vec3(4))"),
636 
637         CASE_EXTRA(DEFAULT | STD430 | SCALAR, INT8, "struct s { int a; int b; }", "u8_array", "uint8_t v[8]",
638                    makeU8Array({2, 0, 0, 0, 0xFE, 0xFF, 0xFF, 0xFF}), "struct_int_int", "s v", "s(2, -2)"),
639         CASE_EXTRA(ALL, 0, "struct s { int a; int b; }", "uvec2", "uvec2 v", "uvec2(2, 0xFFFFFFFE)", "struct_int_int",
640                    "s v", "s(2, -2)"),
641     };
642 
643 #undef CASE_EXTRA
644 #undef CASE_EXTRA_WITH_REVERSE
645 #undef CASE_WITH_REVERSE
646 #undef CASE
647 
648     for (uint32_t i = 0; i < cases.size(); i++)
649     {
650         for (int syncIndex = 0; syncIndex < AliasTest::SynchronizationCount; syncIndex++)
651         {
652             const AliasTest::Synchronization sync = AliasTest::Synchronization(syncIndex);
653 
654             for (int funcIndex = 0; funcIndex < AliasTest::FunctionCount; funcIndex++)
655             {
656                 const AliasTest::Function func = AliasTest::Function(funcIndex);
657 
658                 for (int layoutIndex = 0; layoutIndex < AliasTest::LayoutCount; layoutIndex++)
659                 {
660                     const AliasTest::LayoutFlags layout = AliasTest::LayoutFlags(1 << layoutIndex);
661 
662                     AliasTest::CaseDef c = cases[i];
663 
664                     assert(c.writeDesc != c.readDesc);
665 
666                     if ((c.layout & layout) == 0)
667                         continue;
668 
669                     c.layout = layout;
670                     c.func   = func;
671                     c.sync   = sync;
672 
673                     group->addChild(new AliasTest(group->getTestContext(), c, computePipelineConstructionType));
674                 }
675             }
676         }
677     }
678 }
679 
680 class ZeroTest : public vkt::TestCase
681 {
682 public:
683     struct CaseDef
684     {
685         glu::DataType zeroElementType;
686         glu::DataType fieldType[2];
687         uint32_t elements;
688 
testNamevkt::compute::__anona2244def0111::ZeroTest::CaseDef689         std::string testName() const
690         {
691             std::string name = glu::getDataTypeName(zeroElementType);
692             name += "_array_to";
693 
694             for (uint32_t i = 0; i < DE_LENGTH_OF_ARRAY(fieldType); ++i)
695             {
696                 if (fieldType[i] == glu::TYPE_INVALID)
697                     break;
698                 name += "_";
699                 name += glu::getDataTypeName(fieldType[i]);
700             }
701             name += "_array_" + de::toString(elements);
702             return name;
703         }
704     };
705 
ZeroTest(tcu::TestContext & testCtx,const CaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)706     ZeroTest(tcu::TestContext &testCtx, const CaseDef &caseDef,
707              const vk::ComputePipelineConstructionType computePipelineConstructionType)
708         : TestCase(testCtx, caseDef.testName())
709         , m_caseDef(caseDef)
710         , m_computePipelineConstructionType(computePipelineConstructionType)
711     {
712     }
713 
714     virtual void checkSupport(Context &context) const;
715     void initPrograms(SourceCollections &sourceCollections) const;
716 
717     class Instance : public vkt::TestInstance
718     {
719     public:
Instance(Context & context,const vk::ComputePipelineConstructionType computePipelineConstructionType)720         Instance(Context &context, const vk::ComputePipelineConstructionType computePipelineConstructionType)
721             : TestInstance(context)
722             , m_computePipelineConstructionType(computePipelineConstructionType)
723         {
724         }
725 
iterate(void)726         tcu::TestStatus iterate(void)
727         {
728             return runCompute(m_context, 1u, m_computePipelineConstructionType);
729         }
730 
731     private:
732         vk::ComputePipelineConstructionType m_computePipelineConstructionType;
733     };
734 
createInstance(Context & context) const735     TestInstance *createInstance(Context &context) const
736     {
737         return new Instance(context, m_computePipelineConstructionType);
738     }
739 
740 private:
741     CaseDef m_caseDef;
742     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
743 };
744 
checkSupport(Context & context) const745 void ZeroTest::checkSupport(Context &context) const
746 {
747     CheckSupportParams p;
748     deMemset(&p, 0, sizeof(p));
749 
750     DE_ASSERT(!glu::isDataTypeFloat16OrVec(m_caseDef.zeroElementType));
751 
752     p.useType(m_caseDef.zeroElementType);
753     p.useType(m_caseDef.fieldType[0]);
754     p.useType(m_caseDef.fieldType[1]);
755     p.computePipelineConstructionType = m_computePipelineConstructionType;
756 
757     checkSupportWithParams(context, p);
758 }
759 
getDataTypeLiteral(glu::DataType dt,std::string baseValue)760 std::string getDataTypeLiteral(glu::DataType dt, std::string baseValue)
761 {
762     using namespace glu;
763 
764     if (isDataTypeVector(dt))
765     {
766         std::string elemValue = getDataTypeLiteral(getDataTypeScalarType(dt), baseValue);
767 
768         std::ostringstream result;
769         result << getDataTypeName(dt) << "(";
770         for (int i = 0; i < getDataTypeScalarSize(dt); ++i)
771         {
772             if (i > 0)
773                 result << ", ";
774             result << elemValue;
775         }
776         result << ")";
777         return result.str();
778     }
779     else if (isDataTypeScalar(dt))
780     {
781         return getDataTypeName(dt) + std::string("(") + baseValue + std::string(")");
782     }
783     else
784     {
785         DE_ASSERT(0);
786         return std::string();
787     }
788 }
789 
initPrograms(SourceCollections & sourceCollections) const790 void ZeroTest::initPrograms(SourceCollections &sourceCollections) const
791 {
792     using namespace glu;
793 
794     std::ostringstream src;
795 
796     src << "#version 450\n"
797         << "#extension GL_EXT_shared_memory_block : enable\n"
798         << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
799         << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
800 
801     // Large enough to cover the largest B block even if just 8-bit elements.
802     // Small enough to fit in the minimum shared memory size limit even if with uvec4.
803     src << "shared A { " << getDataTypeName(m_caseDef.zeroElementType) << " arr[256]; } zero;\n";
804 
805     src << "struct st {\n"
806         << "    " << getDataTypeName(m_caseDef.fieldType[0]) << " x;\n";
807     if (m_caseDef.fieldType[1])
808         src << "    " << getDataTypeName(m_caseDef.fieldType[1]) << " y;\n";
809     src << "};\n";
810 
811     src << "shared B { st arr[4]; };\n"
812         << "layout(set = 0, binding = 0) buffer Result { uint result; };\n"
813         << "void main() {\n"
814         << "for (int i = 0; i < zero.arr.length(); i++) {\n"
815         << "    zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "1") << ";\n"
816         << "  }\n"
817         << "  for (int i = 0; i < zero.arr.length(); i++) {\n"
818         << "    zero.arr[i] = " << getDataTypeLiteral(m_caseDef.zeroElementType, "0") << ";\n"
819         << "  }\n"
820         << "  result = (\n";
821 
822     for (uint32_t i = 0; i < 4; i++)
823     {
824         src << "    ";
825         if (i > 0)
826             src << "&& ";
827         src << "(arr[" << de::toString(i) << "].x == " << getDataTypeLiteral(m_caseDef.fieldType[0], "0") << ")\n";
828         if (m_caseDef.fieldType[1])
829             src << "    && (arr[" << de::toString(i) << "].y == " << getDataTypeLiteral(m_caseDef.fieldType[1], "0")
830                 << ")\n";
831     }
832 
833     src << "  ) ? 0 : 0xFF;\n"
834         << "}\n";
835 
836     sourceCollections.glslSources.add("comp")
837         << ComputeSource(src.str())
838         << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
839                                   vk::ShaderBuildOptions::Flags(0u), true);
840 }
841 
isTestedZeroElementType(glu::DataType dt)842 bool isTestedZeroElementType(glu::DataType dt)
843 {
844     using namespace glu;
845 
846     // Select only a few interesting types.
847     switch (dt)
848     {
849     case TYPE_UINT:
850     case TYPE_UINT_VEC4:
851     case TYPE_UINT8:
852     case TYPE_UINT8_VEC4:
853     case TYPE_UINT16:
854         return true;
855     default:
856         return false;
857     }
858 }
859 
isTestedFieldType(glu::DataType dt)860 bool isTestedFieldType(glu::DataType dt)
861 {
862     using namespace glu;
863 
864     // Select only a few interesting types.
865     switch (dt)
866     {
867     case TYPE_UINT:
868     case TYPE_UINT_VEC3:
869     case TYPE_UINT8:
870     case TYPE_UINT16:
871     case TYPE_FLOAT:
872     case TYPE_FLOAT_VEC4:
873     case TYPE_FLOAT16:
874     case TYPE_DOUBLE:
875     case TYPE_DOUBLE_VEC4:
876     case TYPE_BOOL:
877         return true;
878 
879     default:
880         return false;
881     }
882 }
883 
AddZeroTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)884 void AddZeroTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
885 {
886     using namespace glu;
887 
888     ZeroTest::CaseDef c;
889 
890     for (uint32_t i = 0; i < TYPE_LAST; ++i)
891     {
892         c.zeroElementType = DataType(i);
893 
894         if (isTestedZeroElementType(c.zeroElementType))
895         {
896             uint32_t idx[2] = {0, 0};
897 
898             while (idx[1] < TYPE_LAST && idx[0] < TYPE_LAST)
899             {
900                 c.fieldType[0] = DataType(idx[0]);
901                 c.fieldType[1] = DataType(idx[1]);
902 
903                 if (isTestedFieldType(c.fieldType[0]) &&
904                     (c.fieldType[1] == TYPE_INVALID || isTestedFieldType(c.fieldType[1])))
905                 {
906                     for (uint32_t elements = 1; elements <= 4; ++elements)
907                     {
908                         c.elements = elements;
909                         group->addChild(new ZeroTest(group->getTestContext(), c, computePipelineConstructionType));
910                     }
911                 }
912 
913                 idx[0]++;
914                 if (idx[0] >= TYPE_LAST)
915                 {
916                     idx[1]++;
917                     idx[0] = 0;
918                 }
919             }
920         }
921     }
922 }
923 
924 class PaddingTest : public vkt::TestCase
925 {
926 public:
927     struct CaseDef
928     {
929         std::vector<glu::DataType> types;
930         std::vector<uint32_t> offsets;
931         std::vector<std::string> values;
932         uint32_t expected[32];
933 
testNamevkt::compute::__anona2244def0111::PaddingTest::CaseDef934         std::string testName() const
935         {
936             DE_ASSERT(types.size() > 0);
937             DE_ASSERT(types.size() == offsets.size());
938             DE_ASSERT(types.size() == values.size());
939 
940             std::string name;
941             for (uint32_t i = 0; i < types.size(); ++i)
942             {
943                 if (i > 0)
944                     name += "_";
945                 name += glu::getDataTypeName(types[i]);
946                 name += "_" + de::toString(offsets[i]);
947             }
948             return name;
949         }
950 
addvkt::compute::__anona2244def0111::PaddingTest::CaseDef951         void add(glu::DataType dt, uint32_t offset, const std::string &v)
952         {
953             types.push_back(dt);
954             offsets.push_back(offset);
955             values.push_back(v);
956         }
957 
needsScalarvkt::compute::__anona2244def0111::PaddingTest::CaseDef958         bool needsScalar() const
959         {
960             for (uint32_t i = 0; i < offsets.size(); ++i)
961             {
962                 if (offsets[i] % 4 != 0)
963                     return true;
964             }
965             return false;
966         }
967     };
968 
PaddingTest(tcu::TestContext & testCtx,const CaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)969     PaddingTest(tcu::TestContext &testCtx, const CaseDef &caseDef,
970                 const vk::ComputePipelineConstructionType computePipelineConstructionType)
971         : TestCase(testCtx, caseDef.testName())
972         , m_caseDef(caseDef)
973         , m_computePipelineConstructionType(computePipelineConstructionType)
974     {
975     }
976 
977     virtual void checkSupport(Context &context) const;
978     void initPrograms(SourceCollections &sourceCollections) const;
979 
980     class Instance : public vkt::TestInstance
981     {
982     public:
Instance(Context & context,const CaseDef & caseDef,const vk::ComputePipelineConstructionType computePipelineConstructionType)983         Instance(Context &context, const CaseDef &caseDef,
984                  const vk::ComputePipelineConstructionType computePipelineConstructionType)
985             : TestInstance(context)
986             , m_caseDef(caseDef)
987             , m_computePipelineConstructionType(computePipelineConstructionType)
988         {
989         }
990 
iterate(void)991         tcu::TestStatus iterate(void)
992         {
993             return runCompute(m_context, 1u, m_computePipelineConstructionType);
994         }
995 
996     private:
997         CaseDef m_caseDef;
998         vk::ComputePipelineConstructionType m_computePipelineConstructionType;
999     };
1000 
createInstance(Context & context) const1001     TestInstance *createInstance(Context &context) const
1002     {
1003         return new Instance(context, m_caseDef, m_computePipelineConstructionType);
1004     }
1005 
1006 private:
1007     CaseDef m_caseDef;
1008     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1009 };
1010 
checkSupport(Context & context) const1011 void PaddingTest::checkSupport(Context &context) const
1012 {
1013     CheckSupportParams p;
1014     deMemset(&p, 0, sizeof(p));
1015 
1016     for (uint32_t i = 0; i < m_caseDef.types.size(); ++i)
1017         p.useType(m_caseDef.types[i]);
1018 
1019     p.needsScalar                     = m_caseDef.needsScalar();
1020     p.computePipelineConstructionType = m_computePipelineConstructionType;
1021 
1022     checkSupportWithParams(context, p);
1023 }
1024 
initPrograms(SourceCollections & sourceCollections) const1025 void PaddingTest::initPrograms(SourceCollections &sourceCollections) const
1026 {
1027     using namespace glu;
1028 
1029     std::ostringstream src;
1030 
1031     src << "#version 450\n"
1032         << "#extension GL_EXT_shared_memory_block : enable\n"
1033         << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n"
1034         << "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n";
1035 
1036     src << "shared A { uint32_t words[32]; };\n";
1037 
1038     if (m_caseDef.needsScalar())
1039     {
1040         src << "#extension GL_EXT_scalar_block_layout : enable\n"
1041             << "layout (scalar) ";
1042     }
1043 
1044     src << "shared B {\n";
1045 
1046     for (uint32_t i = 0; i < m_caseDef.types.size(); ++i)
1047     {
1048         src << "  layout(offset = " << m_caseDef.offsets[i] << ") " << glu::getDataTypeName(m_caseDef.types[i]) << " x"
1049             << i << ";\n";
1050     }
1051 
1052     src << "};\n"
1053         << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1054 
1055     src << "void main() {\n"
1056         << "for (int i = 0; i < 32; i++) words[i] = 0;\n";
1057 
1058     for (uint32_t i = 0; i < m_caseDef.values.size(); ++i)
1059         src << "x" << i << " = " << m_caseDef.values[i] << ";\n";
1060 
1061     src << "result = 32;\n";
1062     for (uint32_t i = 0; i < 32; ++i)
1063     {
1064         src << "if (words[" << std::dec << i << "] == 0x" << std::uppercase << std::hex << m_caseDef.expected[i]
1065             << ") result--;\n";
1066     }
1067 
1068     src << "}\n";
1069 
1070     sourceCollections.glslSources.add("comp")
1071         << ComputeSource(src.str())
1072         << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1073                                   vk::ShaderBuildOptions::Flags(0u), true);
1074 }
1075 
AddPaddingTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1076 void AddPaddingTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1077 {
1078     using namespace glu;
1079 
1080     for (uint32_t i = 0; i < 31; ++i)
1081     {
1082         for (uint32_t j = i + 1; j < 32; j += 4)
1083         {
1084             PaddingTest::CaseDef c;
1085             deMemset(&c, 0, sizeof(c));
1086 
1087             c.add(TYPE_UINT, 4 * i, "0x1234");
1088             c.expected[i] = 0x1234;
1089 
1090             c.add(TYPE_UINT, 4 * j, "0x5678");
1091             c.expected[j] = 0x5678;
1092 
1093             group->addChild(new PaddingTest(group->getTestContext(), c, computePipelineConstructionType));
1094         }
1095     }
1096 
1097     for (uint32_t i = 0; i < 127; ++i)
1098     {
1099         for (uint32_t j = i + 1; j < 32; j += 16)
1100         {
1101             PaddingTest::CaseDef c;
1102             deMemset(&c, 0, sizeof(c));
1103 
1104             uint8_t *expected = reinterpret_cast<uint8_t *>(c.expected);
1105 
1106             c.add(TYPE_UINT8, i, "uint8_t(0xAA)");
1107             expected[i] = 0xAA;
1108 
1109             c.add(TYPE_UINT8, j, "uint8_t(0xBB)");
1110             expected[j] = 0xBB;
1111 
1112             group->addChild(new PaddingTest(group->getTestContext(), c, computePipelineConstructionType));
1113         }
1114     }
1115 }
1116 
1117 class SizeTest : public vkt::TestCase
1118 {
1119 public:
SizeTest(tcu::TestContext & testCtx,uint32_t size,const vk::ComputePipelineConstructionType computePipelineConstructionType)1120     SizeTest(tcu::TestContext &testCtx, uint32_t size,
1121              const vk::ComputePipelineConstructionType computePipelineConstructionType)
1122         : TestCase(testCtx, de::toString(size))
1123         , m_size(size)
1124         , m_computePipelineConstructionType(computePipelineConstructionType)
1125     {
1126         DE_ASSERT(size % 8 == 0);
1127     }
1128 
1129     virtual void checkSupport(Context &context) const;
1130     void initPrograms(SourceCollections &sourceCollections) const;
1131 
1132     class Instance : public vkt::TestInstance
1133     {
1134     public:
Instance(Context & context,const vk::ComputePipelineConstructionType computePipelineConstructionType)1135         Instance(Context &context, const vk::ComputePipelineConstructionType computePipelineConstructionType)
1136             : TestInstance(context)
1137             , m_computePipelineConstructionType(computePipelineConstructionType)
1138         {
1139         }
1140 
iterate(void)1141         tcu::TestStatus iterate(void)
1142         {
1143             return runCompute(m_context, 1u, m_computePipelineConstructionType);
1144         }
1145 
1146     private:
1147         vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1148     };
1149 
createInstance(Context & context) const1150     TestInstance *createInstance(Context &context) const
1151     {
1152         return new Instance(context, m_computePipelineConstructionType);
1153     }
1154 
1155 private:
1156     uint32_t m_size;
1157     vk::ComputePipelineConstructionType m_computePipelineConstructionType;
1158 };
1159 
checkSupport(Context & context) const1160 void SizeTest::checkSupport(Context &context) const
1161 {
1162     context.requireDeviceFunctionality("VK_KHR_workgroup_memory_explicit_layout");
1163     context.requireDeviceFunctionality("VK_KHR_spirv_1_4");
1164 
1165     if (context.getDeviceProperties().limits.maxComputeSharedMemorySize < m_size)
1166         TCU_THROW(NotSupportedError, "Not enough shared memory supported.");
1167 
1168     checkShaderObjectRequirements(context.getInstanceInterface(), context.getPhysicalDevice(),
1169                                   m_computePipelineConstructionType);
1170 }
1171 
initPrograms(SourceCollections & sourceCollections) const1172 void SizeTest::initPrograms(SourceCollections &sourceCollections) const
1173 {
1174     using namespace glu;
1175 
1176     std::ostringstream src;
1177 
1178     src << "#version 450\n";
1179     src << "#extension GL_EXT_shared_memory_block : enable\n";
1180     src << "#extension GL_EXT_shader_explicit_arithmetic_types : enable\n";
1181     src << "layout(local_size_x = 8, local_size_y = 1, local_size_z = 1) in;\n";
1182 
1183     for (uint32_t i = 0; i < 8; ++i)
1184         src << "shared B" << i << " { uint32_t words[" << (m_size / 4) << "]; } b" << i << ";\n";
1185 
1186     src << "layout(set = 0, binding = 0) buffer Result { uint result; };\n";
1187 
1188     src << "void main() {\n";
1189     src << "  int index = int(gl_LocalInvocationIndex);\n";
1190     src << "  int size = " << (m_size / 4) << ";\n";
1191 
1192     src << "  if (index == 0) for (int x = 0; x < size; x++) b0.words[x] = 0xFFFF;\n";
1193     src << "  barrier();\n";
1194 
1195     src << "  for (int x = 0; x < size; x++) {\n";
1196     src << "    if (x % 8 != index) continue;\n";
1197     for (uint32_t i = 0; i < 8; ++i)
1198         src << "    if (index == " << i << ") b" << i << ".words[x] = (x << 3) | " << i << ";\n";
1199     src << "  }\n";
1200 
1201     src << "  barrier();\n";
1202     src << "  if (index != 0) return;\n";
1203 
1204     src << "  int r = size;\n";
1205     src << "  for (int x = 0; x < size; x++) {\n";
1206     src << "    int expected = (x << 3) | (x % 8);\n";
1207     src << "    if (b0.words[x] == expected) r--;\n";
1208     src << "  }\n";
1209     src << "  result = r;\n";
1210     src << "}\n";
1211 
1212     sourceCollections.glslSources.add("comp")
1213         << ComputeSource(src.str())
1214         << vk::ShaderBuildOptions(sourceCollections.usedVulkanVersion, vk::SPIRV_VERSION_1_4,
1215                                   vk::ShaderBuildOptions::Flags(0u), true);
1216 }
1217 
AddSizeTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType computePipelineConstructionType)1218 void AddSizeTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType computePipelineConstructionType)
1219 {
1220     uint32_t sizes[] = {
1221         8u,
1222         64u,
1223         4096u,
1224 
1225         // Dynamic generation of shaders based on properties reported
1226         // by devices is not allowed in the CTS, so let's create a few
1227         // variants based on common known maximum sizes.
1228         16384u,
1229         32768u,
1230         49152u,
1231         65536u,
1232     };
1233 
1234     for (uint32_t i = 0; i < DE_LENGTH_OF_ARRAY(sizes); ++i)
1235         group->addChild(new SizeTest(group->getTestContext(), sizes[i], computePipelineConstructionType));
1236 }
1237 
CreateAmberTestCase(tcu::TestContext & testCtx,const char * name,const std::string & filename,const std::vector<std::string> & requirements=std::vector<std::string> (),bool zeroinit=false,bool shaderObjects=false)1238 cts_amber::AmberTestCase *CreateAmberTestCase(tcu::TestContext &testCtx, const char *name, const std::string &filename,
1239                                               const std::vector<std::string> &requirements = std::vector<std::string>(),
1240                                               bool zeroinit = false, bool shaderObjects = false)
1241 {
1242     vk::SpirVAsmBuildOptions asm_options(VK_MAKE_API_VERSION(0, 1, 1, 0), vk::SPIRV_VERSION_1_4);
1243     asm_options.supports_VK_KHR_spirv_1_4 = true;
1244 
1245     const std::string test_filename = shaderObjects ? "shader_object_" + std::string(filename) : filename;
1246 
1247     cts_amber::AmberTestCase *t = cts_amber::createAmberTestCase(
1248         testCtx, name, "compute/workgroup_memory_explicit_layout", test_filename.c_str(), requirements);
1249     t->setSpirVAsmBuildOptions(asm_options);
1250     t->addRequirement("VK_KHR_workgroup_memory_explicit_layout");
1251     t->addRequirement("VK_KHR_spirv_1_4");
1252     if (zeroinit)
1253     {
1254         t->addRequirement("VK_KHR_zero_initialize_workgroup_memory");
1255     }
1256     if (shaderObjects)
1257     {
1258         t->addRequirement("VK_EXT_shader_object");
1259     }
1260     return t;
1261 }
1262 
AddCopyMemoryTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType pipelineConstructionType)1263 void AddCopyMemoryTests(tcu::TestCaseGroup *group, vk::ComputePipelineConstructionType pipelineConstructionType)
1264 {
1265     tcu::TestContext &testCtx = group->getTestContext();
1266 
1267     bool shaderObject = (pipelineConstructionType == COMPUTE_PIPELINE_CONSTRUCTION_TYPE_SHADER_OBJECT_SPIRV) ||
1268                         (pipelineConstructionType == COMPUTE_PIPELINE_CONSTRUCTION_TYPE_SHADER_OBJECT_BINARY);
1269 
1270     group->addChild(CreateAmberTestCase(testCtx, "basic", "copy_memory_basic.amber", {}, false, shaderObject));
1271     group->addChild(
1272         CreateAmberTestCase(testCtx, "two_invocations", "copy_memory_two_invocations.amber", {}, false, shaderObject));
1273     group->addChild(CreateAmberTestCase(testCtx, "variable_pointers", "copy_memory_variable_pointers.amber",
1274                                         {"VariablePointerFeatures.variablePointers"}, false, shaderObject));
1275 }
1276 
AddZeroInitializeExtensionTests(tcu::TestCaseGroup * group,vk::ComputePipelineConstructionType pipelineConstructionType)1277 void AddZeroInitializeExtensionTests(tcu::TestCaseGroup *group,
1278                                      vk::ComputePipelineConstructionType pipelineConstructionType)
1279 {
1280     tcu::TestContext &testCtx = group->getTestContext();
1281 
1282     bool shaderObject = (pipelineConstructionType == COMPUTE_PIPELINE_CONSTRUCTION_TYPE_SHADER_OBJECT_SPIRV) ||
1283                         (pipelineConstructionType == COMPUTE_PIPELINE_CONSTRUCTION_TYPE_SHADER_OBJECT_BINARY);
1284 
1285     group->addChild(
1286         CreateAmberTestCase(testCtx, "block", "zero_ext_block.amber", std::vector<std::string>(), true, shaderObject));
1287     group->addChild(CreateAmberTestCase(testCtx, "other_block", "zero_ext_other_block.amber",
1288                                         std::vector<std::string>(), true, shaderObject));
1289     group->addChild(CreateAmberTestCase(testCtx, "block_with_offset", "zero_ext_block_with_offset.amber",
1290                                         std::vector<std::string>(), true, shaderObject));
1291 }
1292 
1293 } // namespace
1294 
createWorkgroupMemoryExplicitLayoutTests(tcu::TestContext & testCtx,vk::ComputePipelineConstructionType computePipelineConstructionType)1295 tcu::TestCaseGroup *createWorkgroupMemoryExplicitLayoutTests(
1296     tcu::TestContext &testCtx, vk::ComputePipelineConstructionType computePipelineConstructionType)
1297 {
1298     de::MovePtr<tcu::TestCaseGroup> tests(new tcu::TestCaseGroup(testCtx, "workgroup_memory_explicit_layout"));
1299 
1300     // Aliasing between different blocks and types
1301     tcu::TestCaseGroup *alias = new tcu::TestCaseGroup(testCtx, "alias");
1302     AddAliasTests(alias, computePipelineConstructionType);
1303     tests->addChild(alias);
1304 
1305     // Manually zero initialize a block and read from another
1306     tcu::TestCaseGroup *zero = new tcu::TestCaseGroup(testCtx, "zero");
1307     AddZeroTests(zero, computePipelineConstructionType);
1308     tests->addChild(zero);
1309 
1310     tcu::TestCaseGroup *padding = new tcu::TestCaseGroup(testCtx, "padding");
1311     AddPaddingTests(padding, computePipelineConstructionType);
1312     tests->addChild(padding);
1313 
1314     tcu::TestCaseGroup *size = new tcu::TestCaseGroup(testCtx, "size");
1315     AddSizeTests(size, computePipelineConstructionType);
1316     tests->addChild(size);
1317 
1318     tcu::TestCaseGroup *copy_memory = new tcu::TestCaseGroup(testCtx, "copy_memory");
1319     AddCopyMemoryTests(copy_memory, computePipelineConstructionType);
1320     tests->addChild(copy_memory);
1321 
1322     tcu::TestCaseGroup *zero_ext = new tcu::TestCaseGroup(testCtx, "zero_ext");
1323     AddZeroInitializeExtensionTests(zero_ext, computePipelineConstructionType);
1324     tests->addChild(zero_ext);
1325 
1326     return tests.release();
1327 }
1328 
1329 } // namespace compute
1330 } // namespace vkt
1331