xref: /aosp_15_r20/external/deqp/external/vulkancts/modules/vulkan/draw/vktDrawMultiExtTests.cpp (revision 35238bce31c2a825756842865a792f8cf7f89930)
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2021 The Khronos Group Inc.
6  * Copyright (c) 2021 Valve Corporation.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief Test for VK_EXT_multi_draw
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktDrawMultiExtTests.hpp"
26 
27 #include "vkTypeUtil.hpp"
28 #include "vkImageWithMemory.hpp"
29 #include "vkObjUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkBufferWithMemory.hpp"
33 #include "vkImageUtil.hpp"
34 #include "vkBarrierUtil.hpp"
35 
36 #include "tcuTexture.hpp"
37 #include "tcuMaybe.hpp"
38 #include "tcuImageCompare.hpp"
39 
40 #include "deUniquePtr.hpp"
41 #include "deMath.h"
42 #include "deRandom.hpp"
43 
44 #include <vector>
45 #include <sstream>
46 #include <algorithm>
47 #include <iterator>
48 #include <limits>
49 
50 using namespace vk;
51 
52 namespace vkt
53 {
54 namespace Draw
55 {
56 
57 namespace
58 {
59 
60 // Normal or indexed draws.
61 enum class DrawType
62 {
63     NORMAL = 0,
64     INDEXED
65 };
66 
67 // How to apply the vertex offset in indexed draws.
68 enum class VertexOffsetType
69 {
70     MIXED = 0,       // Do not use pVertexOffset and mix values in struct-indicated offsets.
71     CONSTANT_RANDOM, // Use a constant value for pVertexOffset and fill offset struct members with random values.
72     CONSTANT_PACK, // Use a constant value for pVertexOffset and a stride that removes the vertex offset member in structs.
73 };
74 
75 // Triangle mesh type.
76 enum class MeshType
77 {
78     MOSAIC = 0,
79     OVERLAPPING
80 };
81 
82 // Vertex offset parameters.
83 struct VertexOffsetParams
84 {
85     VertexOffsetType offsetType;
86     uint32_t offset;
87 };
88 
89 // Test parameters.
90 struct TestParams
91 {
92     MeshType meshType;
93     DrawType drawType;
94     uint32_t drawCount;
95     uint32_t instanceCount;
96     uint32_t firstInstance;
97     uint32_t stride;
98     tcu::Maybe<VertexOffsetParams> vertexOffset; // Only used for indexed draws.
99     uint32_t seed;
100     bool useTessellation;
101     bool useGeometry;
102     bool multiview;
103     bool drawId;
104     const SharedGroupParams groupParams;
105 
maxInstanceIndexvkt::Draw::__anon9e5b82040111::TestParams106     uint32_t maxInstanceIndex() const
107     {
108         if (instanceCount == 0u)
109             return 0u;
110         return (firstInstance + instanceCount - 1u);
111     }
112 };
113 
114 // For the color attachment. Must match what the fragment shader expects.
getColorFormat()115 VkFormat getColorFormat()
116 {
117     return VK_FORMAT_R8G8B8A8_UINT;
118 }
119 
120 // Compatible with getColorFormat() but better when used with the image logging facilities.
getVerificationFormat()121 VkFormat getVerificationFormat()
122 {
123     return VK_FORMAT_R8G8B8A8_UNORM;
124 }
125 
126 // Find a suitable format for the depth/stencil buffer.
chooseDepthStencilFormat(const InstanceInterface & vki,VkPhysicalDevice physDev)127 VkFormat chooseDepthStencilFormat(const InstanceInterface &vki, VkPhysicalDevice physDev)
128 {
129     // The spec mandates support for one of these two formats.
130     const VkFormat candidates[] = {VK_FORMAT_D32_SFLOAT_S8_UINT, VK_FORMAT_D24_UNORM_S8_UINT};
131 
132     for (const auto &format : candidates)
133     {
134         const auto properties = getPhysicalDeviceFormatProperties(vki, physDev, format);
135         if ((properties.optimalTilingFeatures & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT) != 0u)
136             return format;
137     }
138 
139     TCU_FAIL("No suitable depth/stencil format found");
140     return VK_FORMAT_UNDEFINED; // Unreachable.
141 }
142 
143 // Format used when verifying the stencil aspect.
getStencilVerificationFormat()144 VkFormat getStencilVerificationFormat()
145 {
146     return VK_FORMAT_S8_UINT;
147 }
148 
getTriangleCount()149 uint32_t getTriangleCount()
150 {
151     return 1024u; // This matches the minumum allowed limit for maxMultiDrawCount, so we can submit a single triangle per draw call.
152 }
153 
getVerticesPerTriangle()154 uint32_t getVerticesPerTriangle()
155 {
156     return 3u;
157 }
158 
159 // Base class for creating triangles.
160 class TriangleGenerator
161 {
162 public:
~TriangleGenerator()163     virtual ~TriangleGenerator()
164     {
165     }
166 
167     // Append a new triangle for ID (x, y).
168     virtual void appendTriangle(uint32_t x, uint32_t y, std::vector<tcu::Vec4> &vertices) = 0;
169 };
170 
171 // Class that helps creating triangle vertices for each framebuffer pixel, forming a mosaic of triangles.
172 class TriangleMosaicGenerator : public TriangleGenerator
173 {
174 private:
175     // Normalized width and height taking into account the framebuffer's width and height are two units (from -1 to 1).
176     float m_pixelWidth;
177     float m_pixelHeight;
178 
179     float m_deltaX;
180     float m_deltaY;
181 
182 public:
TriangleMosaicGenerator(uint32_t width,uint32_t height)183     TriangleMosaicGenerator(uint32_t width, uint32_t height)
184         : m_pixelWidth(2.0f / static_cast<float>(width))
185         , m_pixelHeight(2.0f / static_cast<float>(height))
186         , m_deltaX(m_pixelWidth * 0.25f)
187         , m_deltaY(m_pixelHeight * 0.25f)
188     {
189     }
190 
191     // Creates a triangle for framebuffer pixel (x, y) around its center. Appends the triangle vertices to the given list.
appendTriangle(uint32_t x,uint32_t y,std::vector<tcu::Vec4> & vertices)192     void appendTriangle(uint32_t x, uint32_t y, std::vector<tcu::Vec4> &vertices) override
193     {
194         // Pixel center.
195         const float coordX = (static_cast<float>(x) + 0.5f) * m_pixelWidth - 1.0f;
196         const float coordY = (static_cast<float>(y) + 0.5f) * m_pixelHeight - 1.0f;
197 
198         // Triangle around it.
199         const float topY    = coordY - m_deltaY;
200         const float bottomY = coordY + m_deltaY;
201 
202         const float leftX  = coordX - m_deltaX;
203         const float rightX = coordX + m_deltaX;
204 
205         // Note: clockwise.
206         vertices.emplace_back(leftX, bottomY, 0.0f, 1.0f);
207         vertices.emplace_back(coordX, topY, 0.0f, 1.0f);
208         vertices.emplace_back(rightX, bottomY, 0.0f, 1.0f);
209     }
210 };
211 
212 // Class that helps create full-screen triangles that overlap each other.
213 // This generator will generate width*height full-screen triangles with decreasing depth from 0.75 to 0.25.
214 class TriangleOverlapGenerator : public TriangleGenerator
215 {
216 private:
217     uint32_t m_width;
218     uint32_t m_totalPixels;
219     float m_depthStep;
220 
221     static constexpr float kMinDepth   = 0.25f;
222     static constexpr float kMaxDepth   = 0.75f;
223     static constexpr float kDepthRange = kMaxDepth - kMinDepth;
224 
225 public:
TriangleOverlapGenerator(uint32_t width,uint32_t height)226     TriangleOverlapGenerator(uint32_t width, uint32_t height)
227         : m_width(width)
228         , m_totalPixels(width * height)
229         , m_depthStep(kDepthRange / static_cast<float>(m_totalPixels))
230     {
231     }
232 
233     // Creates full-screen triangle with 2D id (x, y) and decreasing depth with increasing ids.
appendTriangle(uint32_t x,uint32_t y,std::vector<tcu::Vec4> & vertices)234     void appendTriangle(uint32_t x, uint32_t y, std::vector<tcu::Vec4> &vertices) override
235     {
236         const auto pixelId = static_cast<float>(y * m_width + x);
237         const auto depth   = kMaxDepth - m_depthStep * pixelId;
238 
239         // Note: clockwise.
240         vertices.emplace_back(-1.0f, -1.0f, depth, 1.0f);
241         vertices.emplace_back(4.0f, -1.0f, depth, 1.0f);
242         vertices.emplace_back(-1.0f, 4.0f, depth, 1.0f);
243     }
244 };
245 
246 // Class that helps creating a suitable draw info vector.
247 class DrawInfoPacker
248 {
249 private:
250     DrawType m_drawType;
251     tcu::Maybe<VertexOffsetType> m_offsetType; // Offset type when m_drawType is DrawType::INDEXED.
252     uint32_t m_stride;     // Desired stride. Must be zero or at least as big as the needed VkMultiDraw*InfoExt.
253     uint32_t m_extraBytes; // Used to match the desired stride.
254     de::Random m_random;   // Used to generate random offsets.
255     uint32_t m_infoCount;  // How many infos have we appended so far?
256     std::vector<uint8_t> m_dataVec; // Data vector in generic form.
257     bool m_finalized;               // Finished appending data.
258 
259     // Are draws indexed and using the offset member of VkMultiDrawIndexedInfoEXT?
indexedWithOffset(DrawType drawType,const tcu::Maybe<VertexOffsetType> & offsetType)260     static bool indexedWithOffset(DrawType drawType, const tcu::Maybe<VertexOffsetType> &offsetType)
261     {
262         return (drawType == DrawType::INDEXED && *offsetType != VertexOffsetType::CONSTANT_PACK);
263     }
264 
265     // Are draws indexed and packed?
indexedPacked(DrawType drawType,const tcu::Maybe<VertexOffsetType> & offsetType)266     static bool indexedPacked(DrawType drawType, const tcu::Maybe<VertexOffsetType> &offsetType)
267     {
268         return (drawType == DrawType::INDEXED && *offsetType == VertexOffsetType::CONSTANT_PACK);
269     }
270 
271     // Size in bytes for the base structure used with the given draw type.
baseSize(DrawType drawType,const tcu::Maybe<VertexOffsetType> & offsetType)272     static uint32_t baseSize(DrawType drawType, const tcu::Maybe<VertexOffsetType> &offsetType)
273     {
274         return static_cast<uint32_t>(indexedWithOffset(drawType, offsetType) ? sizeof(VkMultiDrawIndexedInfoEXT) :
275                                                                                sizeof(VkMultiDrawInfoEXT));
276     }
277 
278     // Number of extra bytes per entry according to the given stride.
calcExtraBytes(DrawType drawType,const tcu::Maybe<VertexOffsetType> & offsetType,uint32_t stride)279     static uint32_t calcExtraBytes(DrawType drawType, const tcu::Maybe<VertexOffsetType> &offsetType, uint32_t stride)
280     {
281         // Stride 0 is a special allowed case.
282         if (stride == 0u)
283             return 0u;
284 
285         const auto minStride = baseSize(drawType, offsetType);
286         DE_ASSERT(stride >= minStride);
287         return (stride - minStride);
288     }
289 
290     // Entry size in bytes taking into account the number of extra bytes due to stride.
entrySize() const291     uint32_t entrySize() const
292     {
293         return baseSize(m_drawType, m_offsetType) + m_extraBytes;
294     }
295 
296 public:
DrawInfoPacker(DrawType drawType,const tcu::Maybe<VertexOffsetType> & offsetType,uint32_t stride,uint32_t estimatedInfoCount,uint32_t seed)297     DrawInfoPacker(DrawType drawType, const tcu::Maybe<VertexOffsetType> &offsetType, uint32_t stride,
298                    uint32_t estimatedInfoCount, uint32_t seed)
299         : m_drawType(drawType)
300         , m_offsetType(offsetType)
301         , m_stride(stride)
302         , m_extraBytes(calcExtraBytes(drawType, offsetType, stride))
303         , m_random(seed)
304         , m_infoCount(0u)
305         , m_dataVec()
306         , m_finalized(false)
307     {
308         // estimatedInfoCount is used to avoid excessive reallocation.
309         m_dataVec.reserve((estimatedInfoCount + 1u) * entrySize());
310     }
311 
addDrawInfo(uint32_t first,uint32_t count,int32_t offset)312     void addDrawInfo(uint32_t first, uint32_t count, int32_t offset)
313     {
314         DE_ASSERT(!m_finalized);
315 
316         std::vector<uint8_t> entry(entrySize(), 0);
317 
318         if (indexedWithOffset(m_drawType, m_offsetType))
319         {
320             const auto usedOffset =
321                 ((*m_offsetType == VertexOffsetType::CONSTANT_RANDOM) ? m_random.getInt32() : offset);
322             const VkMultiDrawIndexedInfoEXT info = {first, count, usedOffset};
323             deMemcpy(entry.data(), &info, sizeof(info));
324         }
325         else
326         {
327             const VkMultiDrawInfoEXT info = {first, count};
328             deMemcpy(entry.data(), &info, sizeof(info));
329         }
330 
331         std::copy(begin(entry), end(entry), std::back_inserter(m_dataVec));
332         ++m_infoCount;
333     }
334 
finalize()335     void finalize()
336     {
337         if (indexedPacked(m_drawType, m_offsetType) && m_infoCount > 0u)
338         {
339             // VUID-vkCmdDrawMultiIndexedEXT-drawCount-04940 says:
340             // If drawCount is greater than zero, pIndexInfo must be a valid pointer to memory containing one or more
341             // valid instances of VkMultiDrawIndexedInfoEXT structures
342             //
343             // This means if infoCount is greater than zero, we need to have enough bytes in the buffer so that reading
344             // a VkMultiDrawIndexedInfoEXT structure (12 bytes) at the last offset does not produce an OOB read. As
345             // we've been packing data in the buffer using smaller VkMultiDrawInfoEXT structures, we need 4 extra bytes
346             // at the end to make these tests legal.
347             std::vector<uint8_t> extraData(sizeof(int32_t), 0);
348             std::copy(begin(extraData), end(extraData), std::back_inserter(m_dataVec));
349         }
350 
351         m_finalized = true;
352     }
353 
drawInfoCount() const354     uint32_t drawInfoCount() const
355     {
356         DE_ASSERT(m_finalized);
357         return m_infoCount;
358     }
359 
drawInfoData() const360     const void *drawInfoData() const
361     {
362         DE_ASSERT(m_finalized);
363         return de::dataOrNull(m_dataVec);
364     }
365 
stride() const366     uint32_t stride() const
367     {
368         return m_stride;
369     }
370 };
371 
372 class MultiDrawTest : public vkt::TestCase
373 {
374 public:
375     MultiDrawTest(tcu::TestContext &testCtx, const std::string &name, const TestParams &params);
~MultiDrawTest(void)376     virtual ~MultiDrawTest(void)
377     {
378     }
379 
380     void initPrograms(vk::SourceCollections &programCollection) const override;
381     TestInstance *createInstance(Context &context) const override;
382     void checkSupport(Context &context) const override;
383 
384 private:
385     TestParams m_params;
386 };
387 
388 class MultiDrawInstance : public vkt::TestInstance
389 {
390 public:
391     MultiDrawInstance(Context &context, const TestParams &params);
~MultiDrawInstance(void)392     virtual ~MultiDrawInstance(void)
393     {
394     }
395 
396     tcu::TestStatus iterate(void) override;
397 
398 protected:
399     void beginSecondaryCmdBuffer(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer, VkFormat colorFormat,
400                                  VkFormat depthStencilFormat, VkRenderingFlagsKHR renderingFlags,
401                                  uint32_t viewMask) const;
402     void preRenderingCommands(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer, VkImage colorImage,
403                               const VkImageSubresourceRange colorSubresourceRange, VkImage dsImage,
404                               const VkImageSubresourceRange dsSubresourceRange) const;
405     void drawCommands(const DeviceInterface &vkd, VkCommandBuffer cmdBuffer, VkPipeline pipeline, VkBuffer vertexBuffer,
406                       VkDeviceSize vertexBufferOffset, int32_t vertexOffset, VkBuffer indexBuffer,
407                       VkDeviceSize indexBufferOffset, bool isMixedMode, const DrawInfoPacker &drawInfos) const;
408 
409 private:
410     TestParams m_params;
411 };
412 
MultiDrawTest(tcu::TestContext & testCtx,const std::string & name,const TestParams & params)413 MultiDrawTest::MultiDrawTest(tcu::TestContext &testCtx, const std::string &name, const TestParams &params)
414     : vkt::TestCase(testCtx, name)
415     , m_params(params)
416 {
417 }
418 
createInstance(Context & context) const419 TestInstance *MultiDrawTest::createInstance(Context &context) const
420 {
421     return new MultiDrawInstance(context, m_params);
422 }
423 
checkSupport(Context & context) const424 void MultiDrawTest::checkSupport(Context &context) const
425 {
426     context.requireDeviceFunctionality("VK_EXT_multi_draw");
427 
428     if (m_params.drawId)
429         context.requireDeviceFunctionality("VK_KHR_shader_draw_parameters");
430 
431     if (m_params.useTessellation)
432         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_TESSELLATION_SHADER);
433 
434     if (m_params.useGeometry)
435         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
436 
437     if (m_params.multiview)
438     {
439         const auto &multiviewFeatures = context.getMultiviewFeatures();
440 
441         if (!multiviewFeatures.multiview)
442             TCU_THROW(NotSupportedError, "Multiview not supported");
443 
444         if (m_params.useTessellation && !multiviewFeatures.multiviewTessellationShader)
445             TCU_THROW(NotSupportedError, "Multiview not supported with tesellation shaders");
446 
447         if (m_params.useGeometry && !multiviewFeatures.multiviewGeometryShader)
448             TCU_THROW(NotSupportedError, "Multiview not supported with geometry shaders");
449     }
450 
451     if (m_params.groupParams->useDynamicRendering)
452         context.requireDeviceFunctionality("VK_KHR_dynamic_rendering");
453 }
454 
initPrograms(vk::SourceCollections & programCollection) const455 void MultiDrawTest::initPrograms(vk::SourceCollections &programCollection) const
456 {
457     // The general idea behind these tests is to have a 32x32 framebuffer with 1024 pixels and 1024 triangles to draw.
458     //
459     // When using a mosaic mesh, the tests will generally draw a single triangle around the center of each of these pixels. When
460     // using an overlapping mesh, each single triangle will cover the whole framebuffer using a different depth value, and the depth
461     // test will be enabled.
462     //
463     // The color of each triangle will depend on the instance index, the draw index and, when using multiview, the view index. This
464     // way, it's possible to draw those 1024 triangles with a single draw call or to draw each triangle with a separate draw call,
465     // with up to 1024 draw calls. Combinations in between are possible.
466     //
467     // With overlapping meshes, the resulting color buffer will be uniform in color. With mosaic meshes, it depends on the submitted
468     // draw count. In some cases, all pixels will be slightly different in color.
469     //
470     // The color buffer will be cleared to transparent black when beginning the render pass, and in some special cases some or all
471     // pixels will preserve that clear color because they will not be drawn into. This happens, for example, if the instance count
472     // or draw count is zero and in some cases of meshed geometry with stride zero.
473     //
474     // The output color for each pixel will:
475     // - Have the draw index or primitive index split into the R and G components.
476     // - Have the instance index I stored into the B component as 255-I.
477     // - Have the layer index L stored into the A component as 255-L.
478     //
479     // In addition, the tests will use a depth/stencil buffer. The stencil buffer will be cleared to zero and the depth buffer to an
480     // appropriate initial value (0.0 or 1.0, depending on triangle order). The stencil component will be increased with each draw
481     // on each pixel. This will allow us to verify that not only the last draw for the last instance has set the proper color, but
482     // that all draw operations have taken place.
483 
484     // Make sure the blue channel can be calculated without issues.
485     DE_ASSERT(m_params.maxInstanceIndex() <= 255u);
486 
487     std::ostringstream vert;
488     vert << "#version 460\n"
489          << (m_params.multiview ? "#extension GL_EXT_multiview : enable\n" : "") << "\n"
490          << "out gl_PerVertex\n"
491          << "{\n"
492          << "    vec4 gl_Position;\n"
493          << "};\n"
494          << "\n"
495          << "layout (location=0) in vec4 inPos;\n"
496          << "layout (location=0) out uvec4 outColor;\n"
497          << "\n"
498          << "void main()\n"
499          << "{\n"
500          << "    gl_Position = inPos;\n"
501          << "    const uint storedIndex = uint(" << (m_params.drawId ? "gl_DrawID" : "gl_VertexIndex / 3") << ");\n"
502          << "    outColor.r = ((storedIndex >> 8u) & 0xFFu);\n"
503          << "    outColor.g = ((storedIndex      ) & 0xFFu);\n"
504          << "    outColor.b = 255u - uint(gl_InstanceIndex);\n"
505          << "    outColor.a = 255u" << (m_params.multiview ? " - uint(gl_ViewIndex)" : "") << ";\n"
506          << "}\n";
507     programCollection.glslSources.add("vert") << glu::VertexSource(vert.str());
508 
509     std::ostringstream frag;
510     frag << "#version 460\n"
511          << "\n"
512          << "layout (location=0) flat in uvec4 inColor;\n"
513          << "layout (location=0) out uvec4 outColor;\n"
514          << "\n"
515          << "void main ()\n"
516          << "{\n"
517          << "    outColor = inColor;\n"
518          << "}\n";
519     programCollection.glslSources.add("frag") << glu::FragmentSource(frag.str());
520 
521     if (m_params.useTessellation)
522     {
523         std::ostringstream tesc;
524         tesc << "#version 460\n"
525              << "\n"
526              << "layout (vertices=3) out;\n"
527              << "in gl_PerVertex\n"
528              << "{\n"
529              << "    vec4 gl_Position;\n"
530              << "} gl_in[gl_MaxPatchVertices];\n"
531              << "out gl_PerVertex\n"
532              << "{\n"
533              << "    vec4 gl_Position;\n"
534              << "} gl_out[];\n"
535              << "\n"
536              << "layout (location=0) in uvec4 inColor[gl_MaxPatchVertices];\n"
537              << "layout (location=0) out uvec4 outColor[];\n"
538              << "\n"
539              << "void main (void)\n"
540              << "{\n"
541              << "    gl_TessLevelInner[0] = 1.0;\n"
542              << "    gl_TessLevelInner[1] = 1.0;\n"
543              << "    gl_TessLevelOuter[0] = 1.0;\n"
544              << "    gl_TessLevelOuter[1] = 1.0;\n"
545              << "    gl_TessLevelOuter[2] = 1.0;\n"
546              << "    gl_TessLevelOuter[3] = 1.0;\n"
547              << "    gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
548              << "    outColor[gl_InvocationID] = inColor[gl_InvocationID];\n"
549              << "}\n";
550         programCollection.glslSources.add("tesc") << glu::TessellationControlSource(tesc.str());
551 
552         std::ostringstream tese;
553         tese << "#version 460\n"
554              << "\n"
555              << "layout (triangles, fractional_odd_spacing, cw) in;\n"
556              << "in gl_PerVertex\n"
557              << "{\n"
558              << "    vec4 gl_Position;\n"
559              << "} gl_in[gl_MaxPatchVertices];\n"
560              << "out gl_PerVertex\n"
561              << "{\n"
562              << "    vec4 gl_Position;\n"
563              << "};\n"
564              << "\n"
565              << "layout (location=0) in uvec4 inColor[gl_MaxPatchVertices];\n"
566              << "layout (location=0) out uvec4 outColor;\n"
567              << "\n"
568              << "void main (void)\n"
569              << "{\n"
570              << "    gl_Position = (gl_TessCoord.x * gl_in[0].gl_Position) +\n"
571              << "                  (gl_TessCoord.y * gl_in[1].gl_Position) +\n"
572              << "                  (gl_TessCoord.z * gl_in[2].gl_Position);\n"
573              << "    outColor = inColor[0];\n"
574              << "}\n";
575         programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(tese.str());
576     }
577 
578     if (m_params.useGeometry)
579     {
580         std::ostringstream geom;
581         geom << "#version 460\n"
582              << "\n"
583              << "layout (triangles) in;\n"
584              << "layout (triangle_strip, max_vertices=3) out;\n"
585              << "in gl_PerVertex\n"
586              << "{\n"
587              << "    vec4 gl_Position;\n"
588              << "} gl_in[3];\n"
589              << "out gl_PerVertex\n"
590              << "{\n"
591              << "    vec4 gl_Position;\n"
592              << "};\n"
593              << "\n"
594              << "layout (location=0) in uvec4 inColor[3];\n"
595              << "layout (location=0) out uvec4 outColor;\n"
596              << "\n"
597              << "void main ()\n"
598              << "{\n"
599              << "    gl_Position = gl_in[0].gl_Position; outColor = inColor[0]; EmitVertex();\n"
600              << "    gl_Position = gl_in[1].gl_Position; outColor = inColor[1]; EmitVertex();\n"
601              << "    gl_Position = gl_in[2].gl_Position; outColor = inColor[2]; EmitVertex();\n"
602              << "}\n";
603         programCollection.glslSources.add("geom") << glu::GeometrySource(geom.str());
604     }
605 }
606 
MultiDrawInstance(Context & context,const TestParams & params)607 MultiDrawInstance::MultiDrawInstance(Context &context, const TestParams &params)
608     : vkt::TestInstance(context)
609     , m_params(params)
610 {
611 }
612 
appendPaddingVertices(std::vector<tcu::Vec4> & vertices,uint32_t count)613 void appendPaddingVertices(std::vector<tcu::Vec4> &vertices, uint32_t count)
614 {
615     for (uint32_t i = 0u; i < count; ++i)
616         vertices.emplace_back(0.0f, 0.0f, 0.0f, 1.0f);
617 }
618 
619 // Creates a render pass with multiple subpasses, one per layer.
makeMultidrawRenderPass(const DeviceInterface & vk,VkDevice device,VkFormat colorFormat,VkFormat depthStencilFormat,uint32_t layerCount)620 Move<VkRenderPass> makeMultidrawRenderPass(const DeviceInterface &vk, VkDevice device, VkFormat colorFormat,
621                                            VkFormat depthStencilFormat, uint32_t layerCount)
622 {
623     const VkAttachmentDescription colorAttachmentDescription = {
624         0u,                                       // VkAttachmentDescriptionFlags    flags
625         colorFormat,                              // VkFormat                        format
626         VK_SAMPLE_COUNT_1_BIT,                    // VkSampleCountFlagBits           samples
627         VK_ATTACHMENT_LOAD_OP_CLEAR,              // VkAttachmentLoadOp              loadOp
628         VK_ATTACHMENT_STORE_OP_STORE,             // VkAttachmentStoreOp             storeOp
629         VK_ATTACHMENT_LOAD_OP_DONT_CARE,          // VkAttachmentLoadOp              stencilLoadOp
630         VK_ATTACHMENT_STORE_OP_DONT_CARE,         // VkAttachmentStoreOp             stencilStoreOp
631         VK_IMAGE_LAYOUT_UNDEFINED,                // VkImageLayout                   initialLayout
632         VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout                   finalLayout
633     };
634 
635     const VkAttachmentDescription depthStencilAttachmentDescription = {
636         0u,                                               // VkAttachmentDescriptionFlags    flags
637         depthStencilFormat,                               // VkFormat                        format
638         VK_SAMPLE_COUNT_1_BIT,                            // VkSampleCountFlagBits           samples
639         VK_ATTACHMENT_LOAD_OP_CLEAR,                      // VkAttachmentLoadOp              loadOp
640         VK_ATTACHMENT_STORE_OP_STORE,                     // VkAttachmentStoreOp             storeOp
641         VK_ATTACHMENT_LOAD_OP_CLEAR,                      // VkAttachmentLoadOp              stencilLoadOp
642         VK_ATTACHMENT_STORE_OP_STORE,                     // VkAttachmentStoreOp             stencilStoreOp
643         VK_IMAGE_LAYOUT_UNDEFINED,                        // VkImageLayout                   initialLayout
644         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // VkImageLayout                   finalLayout
645     };
646 
647     const std::vector<VkAttachmentDescription> attachmentDescriptions = {colorAttachmentDescription,
648                                                                          depthStencilAttachmentDescription};
649     const VkAttachmentReference colorAttachmentRef =
650         makeAttachmentReference(0u, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
651     const VkAttachmentReference depthStencilAttachmentRef =
652         makeAttachmentReference(1u, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
653 
654     const VkSubpassDescription subpassDescription = {
655         0u,                              // VkSubpassDescriptionFlags       flags
656         VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint             pipelineBindPoint
657         0u,                              // uint32_t                        inputAttachmentCount
658         nullptr,                         // const VkAttachmentReference*    pInputAttachments
659         1u,                              // uint32_t                        colorAttachmentCount
660         &colorAttachmentRef,             // const VkAttachmentReference*    pColorAttachments
661         nullptr,                         // const VkAttachmentReference*    pResolveAttachments
662         &depthStencilAttachmentRef,      // const VkAttachmentReference*    pDepthStencilAttachment
663         0u,                              // uint32_t                        preserveAttachmentCount
664         nullptr                          // const uint32_t*                 pPreserveAttachments
665     };
666 
667     std::vector<VkSubpassDescription> subpassDescriptions;
668 
669     subpassDescriptions.reserve(layerCount);
670     for (uint32_t subpassIdx = 0u; subpassIdx < layerCount; ++subpassIdx)
671         subpassDescriptions.push_back(subpassDescription);
672 
673     using MultiviewInfoPtr = de::MovePtr<VkRenderPassMultiviewCreateInfo>;
674 
675     MultiviewInfoPtr multiviewCreateInfo;
676     std::vector<uint32_t> viewMasks;
677 
678     if (layerCount > 1u)
679     {
680         multiviewCreateInfo  = MultiviewInfoPtr(new VkRenderPassMultiviewCreateInfo);
681         *multiviewCreateInfo = initVulkanStructure();
682 
683         viewMasks.resize(subpassDescriptions.size());
684         for (uint32_t subpassIdx = 0u; subpassIdx < static_cast<uint32_t>(viewMasks.size()); ++subpassIdx)
685             viewMasks[subpassIdx] = (1u << subpassIdx);
686 
687         multiviewCreateInfo->subpassCount = static_cast<uint32_t>(viewMasks.size());
688         multiviewCreateInfo->pViewMasks   = de::dataOrNull(viewMasks);
689     }
690 
691     // Dependencies between subpasses for color and depth/stencil read/writes.
692     std::vector<VkSubpassDependency> dependencies;
693     if (layerCount > 1u)
694         dependencies.reserve((layerCount - 1u) * 2u);
695 
696     const auto fragmentTestStages =
697         (VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT);
698     const auto dsWrites = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
699     const auto dsReadWrites =
700         (VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT);
701     const auto colorStage      = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
702     const auto colorWrites     = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
703     const auto colorReadWrites = (VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT);
704 
705     for (uint32_t subpassIdx = 1u; subpassIdx < layerCount; ++subpassIdx)
706     {
707         const auto prev = subpassIdx - 1u;
708 
709         const VkSubpassDependency dsDep = {
710             prev,                        // uint32_t srcSubpass;
711             subpassIdx,                  // uint32_t dstSubpass;
712             fragmentTestStages,          // VkPipelineStageFlags srcStageMask;
713             fragmentTestStages,          // VkPipelineStageFlags dstStageMask;
714             dsWrites,                    // VkAccessFlags srcAccessMask;
715             dsReadWrites,                // VkAccessFlags dstAccessMask;
716             VK_DEPENDENCY_BY_REGION_BIT, // VkDependencyFlags dependencyFlags;
717         };
718         dependencies.push_back(dsDep);
719 
720         const VkSubpassDependency colorDep = {
721             prev,                        // uint32_t srcSubpass;
722             subpassIdx,                  // uint32_t dstSubpass;
723             colorStage,                  // VkPipelineStageFlags srcStageMask;
724             colorStage,                  // VkPipelineStageFlags dstStageMask;
725             colorWrites,                 // VkAccessFlags srcAccessMask;
726             colorReadWrites,             // VkAccessFlags dstAccessMask;
727             VK_DEPENDENCY_BY_REGION_BIT, // VkDependencyFlags dependencyFlags;
728         };
729         dependencies.push_back(colorDep);
730     }
731 
732     const VkRenderPassCreateInfo renderPassInfo = {
733         VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,            // VkStructureType                   sType
734         multiviewCreateInfo.get(),                            // const void*                       pNext
735         0u,                                                   // VkRenderPassCreateFlags           flags
736         static_cast<uint32_t>(attachmentDescriptions.size()), // uint32_t                          attachmentCount
737         de::dataOrNull(attachmentDescriptions),               // const VkAttachmentDescription*    pAttachments
738         static_cast<uint32_t>(subpassDescriptions.size()),    // uint32_t                          subpassCount
739         de::dataOrNull(subpassDescriptions),                  // const VkSubpassDescription*       pSubpasses
740         static_cast<uint32_t>(dependencies.size()),           // uint32_t                          dependencyCount
741         de::dataOrNull(dependencies),                         // const VkSubpassDependency*        pDependencies
742     };
743 
744     return createRenderPass(vk, device, &renderPassInfo, nullptr);
745 }
746 
beginSecondaryCmdBuffer(const DeviceInterface & vk,VkCommandBuffer cmdBuffer,VkFormat colorFormat,VkFormat depthStencilFormat,VkRenderingFlagsKHR renderingFlags,uint32_t viewMask) const747 void MultiDrawInstance::beginSecondaryCmdBuffer(const DeviceInterface &vk, VkCommandBuffer cmdBuffer,
748                                                 VkFormat colorFormat, VkFormat depthStencilFormat,
749                                                 VkRenderingFlagsKHR renderingFlags, uint32_t viewMask) const
750 {
751     VkCommandBufferInheritanceRenderingInfoKHR inheritanceRenderingInfo{
752         VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_RENDERING_INFO_KHR, // VkStructureType sType;
753         DE_NULL,                                                         // const void* pNext;
754         renderingFlags,                                                  // VkRenderingFlagsKHR flags;
755         viewMask,                                                        // uint32_t viewMask;
756         1u,                                                              // uint32_t colorAttachmentCount;
757         &colorFormat,                                                    // const VkFormat* pColorAttachmentFormats;
758         depthStencilFormat,                                              // VkFormat depthAttachmentFormat;
759         depthStencilFormat,                                              // VkFormat stencilAttachmentFormat;
760         VK_SAMPLE_COUNT_1_BIT,                                           // VkSampleCountFlagBits rasterizationSamples;
761     };
762 
763     const VkCommandBufferInheritanceInfo bufferInheritanceInfo = initVulkanStructure(&inheritanceRenderingInfo);
764 
765     VkCommandBufferUsageFlags usageFlags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
766     if (!m_params.groupParams->secondaryCmdBufferCompletelyContainsDynamicRenderpass)
767         usageFlags |= VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
768 
769     const VkCommandBufferBeginInfo commandBufBeginParams{
770         VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, // VkStructureType sType;
771         DE_NULL,                                     // const void* pNext;
772         usageFlags,                                  // VkCommandBufferUsageFlags flags;
773         &bufferInheritanceInfo};
774 
775     VK_CHECK(vk.beginCommandBuffer(cmdBuffer, &commandBufBeginParams));
776 }
777 
preRenderingCommands(const DeviceInterface & vk,VkCommandBuffer cmdBuffer,VkImage colorImage,const VkImageSubresourceRange colorSubresourceRange,VkImage dsImage,const VkImageSubresourceRange dsSubresourceRange) const778 void MultiDrawInstance::preRenderingCommands(const DeviceInterface &vk, VkCommandBuffer cmdBuffer, VkImage colorImage,
779                                              const VkImageSubresourceRange colorSubresourceRange, VkImage dsImage,
780                                              const VkImageSubresourceRange dsSubresourceRange) const
781 {
782     // Transition color and depth stencil attachment to the proper initial layout for dynamic rendering
783     const auto colorPreBarrier =
784         makeImageMemoryBarrier(0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED,
785                                VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, colorImage, colorSubresourceRange);
786 
787     vk.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
788                           0u, 0u, nullptr, 0u, nullptr, 1u, &colorPreBarrier);
789 
790     const auto dsPreBarrier = makeImageMemoryBarrier(
791         0u, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
792         VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, dsImage, dsSubresourceRange);
793 
794     vk.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
795                           (VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT), 0u,
796                           0u, nullptr, 0u, nullptr, 1u, &dsPreBarrier);
797 }
798 
drawCommands(const DeviceInterface & vk,VkCommandBuffer cmdBuffer,VkPipeline pipeline,VkBuffer vertexBuffer,VkDeviceSize vertexBufferOffset,int32_t vertexOffset,VkBuffer indexBuffer,VkDeviceSize indexBufferOffset,bool isMixedMode,const DrawInfoPacker & drawInfos) const799 void MultiDrawInstance::drawCommands(const DeviceInterface &vk, VkCommandBuffer cmdBuffer, VkPipeline pipeline,
800                                      VkBuffer vertexBuffer, VkDeviceSize vertexBufferOffset, int32_t vertexOffset,
801                                      VkBuffer indexBuffer, VkDeviceSize indexBufferOffset, bool isMixedMode,
802                                      const DrawInfoPacker &drawInfos) const
803 {
804     vk.cmdBindPipeline(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
805     vk.cmdBindVertexBuffers(cmdBuffer, 0u, 1u, &vertexBuffer, &vertexBufferOffset);
806 
807     if (indexBuffer == VK_NULL_HANDLE)
808     {
809         const auto drawInfoPtr = reinterpret_cast<const VkMultiDrawInfoEXT *>(drawInfos.drawInfoData());
810         vk.cmdDrawMultiEXT(cmdBuffer, drawInfos.drawInfoCount(), drawInfoPtr, m_params.instanceCount,
811                            m_params.firstInstance, drawInfos.stride());
812     }
813     else
814     {
815         vk.cmdBindIndexBuffer(cmdBuffer, indexBuffer, indexBufferOffset, VK_INDEX_TYPE_UINT32);
816 
817         const auto drawInfoPtr = reinterpret_cast<const VkMultiDrawIndexedInfoEXT *>(drawInfos.drawInfoData());
818         const auto offsetPtr   = (isMixedMode ? nullptr : &vertexOffset);
819         vk.cmdDrawMultiIndexedEXT(cmdBuffer, drawInfos.drawInfoCount(), drawInfoPtr, m_params.instanceCount,
820                                   m_params.firstInstance, drawInfos.stride(), offsetPtr);
821     }
822 }
823 
iterate(void)824 tcu::TestStatus MultiDrawInstance::iterate(void)
825 {
826     const auto &vki    = m_context.getInstanceInterface();
827     const auto physDev = m_context.getPhysicalDevice();
828     const auto &vkd    = m_context.getDeviceInterface();
829     const auto device  = m_context.getDevice();
830     auto &alloc        = m_context.getDefaultAllocator();
831     const auto queue   = m_context.getUniversalQueue();
832     const auto qIndex  = m_context.getUniversalQueueFamilyIndex();
833 
834     const auto colorFormat    = getColorFormat();
835     const auto dsFormat       = chooseDepthStencilFormat(vki, physDev);
836     const auto tcuColorFormat = mapVkFormat(colorFormat);
837     const auto triangleCount  = getTriangleCount();
838     const auto imageDim       = static_cast<uint32_t>(deSqrt(static_cast<double>(triangleCount)));
839     const auto imageExtent    = makeExtent3D(imageDim, imageDim, 1u);
840     const auto imageLayers    = (m_params.multiview ? 2u : 1u);
841     const auto imageViewType  = ((imageLayers > 1u) ? VK_IMAGE_VIEW_TYPE_2D_ARRAY : VK_IMAGE_VIEW_TYPE_2D);
842     const auto colorUsage     = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
843     const auto dsUsage        = (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
844     const auto pixelCount     = imageExtent.width * imageExtent.height;
845     const auto triVertexCount = getVerticesPerTriangle();
846     const auto vertexCount    = pixelCount * triVertexCount; // Triangle list.
847     const auto isIndexed      = (m_params.drawType == DrawType::INDEXED);
848     const auto isMixedMode =
849         (isIndexed && m_params.vertexOffset && m_params.vertexOffset->offsetType == VertexOffsetType::MIXED);
850     const auto extraVertices  = (m_params.vertexOffset ? m_params.vertexOffset->offset : 0u);
851     const auto extraTriangles = extraVertices / triVertexCount;
852     const auto isMosaic       = (m_params.meshType == MeshType::MOSAIC);
853 
854     // Make sure we're providing a vertex offset for indexed cases.
855     DE_ASSERT(!isIndexed || static_cast<bool>(m_params.vertexOffset));
856 
857     // Make sure overlapping draws use a single instance.
858     DE_ASSERT(isMosaic || m_params.instanceCount <= 1u);
859 
860     // Color buffer.
861     const VkImageCreateInfo imageCreateInfo = {
862         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
863         nullptr,                             // const void* pNext;
864         0u,                                  // VkImageCreateFlags flags;
865         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
866         colorFormat,                         // VkFormat format;
867         imageExtent,                         // VkExtent3D extent;
868         1u,                                  // uint32_t mipLevels;
869         imageLayers,                         // uint32_t arrayLayers;
870         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
871         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
872         colorUsage,                          // VkImageUsageFlags usage;
873         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
874         0u,                                  // uint32_t queueFamilyIndexCount;
875         nullptr,                             // const uint32_t* pQueueFamilyIndices;
876         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
877     };
878 
879     ImageWithMemory colorBuffer(vkd, device, alloc, imageCreateInfo, MemoryRequirement::Any);
880     const auto colorSubresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, imageLayers);
881     const auto colorBufferView =
882         makeImageView(vkd, device, colorBuffer.get(), imageViewType, colorFormat, colorSubresourceRange);
883 
884     // Depth/stencil buffer.
885     const VkImageCreateInfo dsCreateInfo = {
886         VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
887         nullptr,                             // const void* pNext;
888         0u,                                  // VkImageCreateFlags flags;
889         VK_IMAGE_TYPE_2D,                    // VkImageType imageType;
890         dsFormat,                            // VkFormat format;
891         imageExtent,                         // VkExtent3D extent;
892         1u,                                  // uint32_t mipLevels;
893         imageLayers,                         // uint32_t arrayLayers;
894         VK_SAMPLE_COUNT_1_BIT,               // VkSampleCountFlagBits samples;
895         VK_IMAGE_TILING_OPTIMAL,             // VkImageTiling tiling;
896         dsUsage,                             // VkImageUsageFlags usage;
897         VK_SHARING_MODE_EXCLUSIVE,           // VkSharingMode sharingMode;
898         0u,                                  // uint32_t queueFamilyIndexCount;
899         nullptr,                             // const uint32_t* pQueueFamilyIndices;
900         VK_IMAGE_LAYOUT_UNDEFINED,           // VkImageLayout initialLayout;
901     };
902 
903     ImageWithMemory dsBuffer(vkd, device, alloc, dsCreateInfo, MemoryRequirement::Any);
904     const auto dsSubresourceRange =
905         makeImageSubresourceRange((VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT), 0u, 1u, 0u, imageLayers);
906     const auto dsBufferView = makeImageView(vkd, device, dsBuffer.get(), imageViewType, dsFormat, dsSubresourceRange);
907 
908     // Output buffers to verify attachments.
909     using BufferWithMemoryPtr = de::MovePtr<BufferWithMemory>;
910 
911     // Buffers to read color attachment.
912     const auto outputBufferSize = pixelCount * static_cast<VkDeviceSize>(tcu::getPixelSize(tcuColorFormat));
913     const auto bufferCreateInfo = makeBufferCreateInfo(outputBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
914 
915     std::vector<BufferWithMemoryPtr> outputBuffers;
916     for (uint32_t i = 0u; i < imageLayers; ++i)
917         outputBuffers.push_back(BufferWithMemoryPtr(
918             new BufferWithMemory(vkd, device, alloc, bufferCreateInfo, MemoryRequirement::HostVisible)));
919 
920     // Buffer to read depth/stencil attachment. Note: this supposes we'll only copy the stencil aspect. See below.
921     const auto tcuStencilFmt        = mapVkFormat(getStencilVerificationFormat());
922     const auto stencilOutBufferSize = pixelCount * static_cast<VkDeviceSize>(tcu::getPixelSize(tcuStencilFmt));
923     const auto stencilOutCreateInfo = makeBufferCreateInfo(stencilOutBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
924 
925     std::vector<BufferWithMemoryPtr> stencilOutBuffers;
926     for (uint32_t i = 0u; i < imageLayers; ++i)
927         stencilOutBuffers.push_back(BufferWithMemoryPtr(
928             new BufferWithMemory(vkd, device, alloc, stencilOutCreateInfo, MemoryRequirement::HostVisible)));
929 
930     // Shaders.
931     const auto vertModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("vert"), 0u);
932     const auto fragModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("frag"), 0u);
933     Move<VkShaderModule> tescModule;
934     Move<VkShaderModule> teseModule;
935     Move<VkShaderModule> geomModule;
936 
937     if (m_params.useGeometry)
938         geomModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("geom"), 0u);
939 
940     if (m_params.useTessellation)
941     {
942         tescModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("tesc"), 0u);
943         teseModule = createShaderModule(vkd, device, m_context.getBinaryCollection().get("tese"), 0u);
944     }
945 
946     DescriptorSetLayoutBuilder layoutBuilder;
947     const auto descriptorSetLayout = layoutBuilder.build(vkd, device);
948     const auto pipelineLayout      = makePipelineLayout(vkd, device, descriptorSetLayout.get());
949 
950     Move<VkRenderPass> renderPass;
951     Move<VkFramebuffer> framebuffer;
952 
953     // Render pass and Framebuffer (note layers is always 1 as required by the spec).
954     if (!m_params.groupParams->useDynamicRendering)
955     {
956         renderPass = makeMultidrawRenderPass(vkd, device, colorFormat, dsFormat, imageLayers);
957         const std::vector<VkImageView> attachments{colorBufferView.get(), dsBufferView.get()};
958         framebuffer = makeFramebuffer(vkd, device, renderPass.get(), static_cast<uint32_t>(attachments.size()),
959                                       de::dataOrNull(attachments), imageExtent.width, imageExtent.height, 1u);
960     }
961 
962     // Viewports and scissors.
963     const auto viewport = makeViewport(imageExtent);
964     const std::vector<VkViewport> viewports(1u, viewport);
965     const auto scissor = makeRect2D(imageExtent);
966     const std::vector<VkRect2D> scissors(1u, scissor);
967 
968     // Indexed draws will have triangle vertices in reverse order. See index buffer creation below.
969     const auto frontFace = (isIndexed ? VK_FRONT_FACE_COUNTER_CLOCKWISE : VK_FRONT_FACE_CLOCKWISE);
970     const VkPipelineRasterizationStateCreateInfo rasterizationInfo = {
971         VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
972         nullptr,                                                    // const void* pNext;
973         0u,                                                         // VkPipelineRasterizationStateCreateFlags flags;
974         VK_FALSE,                                                   // VkBool32 depthClampEnable;
975         VK_FALSE,                                                   // VkBool32 rasterizerDiscardEnable;
976         VK_POLYGON_MODE_FILL,                                       // VkPolygonMode polygonMode;
977         VK_CULL_MODE_BACK_BIT,                                      // VkCullModeFlags cullMode;
978         frontFace,                                                  // VkFrontFace frontFace;
979         VK_FALSE,                                                   // VkBool32 depthBiasEnable;
980         0.0f,                                                       // float depthBiasConstantFactor;
981         0.0f,                                                       // float depthBiasClamp;
982         0.0f,                                                       // float depthBiasSlopeFactor;
983         1.0f,                                                       // float lineWidth;
984     };
985 
986     const auto frontStencilState = makeStencilOpState(VK_STENCIL_OP_KEEP, VK_STENCIL_OP_INCREMENT_AND_WRAP,
987                                                       VK_STENCIL_OP_KEEP, VK_COMPARE_OP_ALWAYS, 0xFFu, 0xFFu, 0u);
988     const auto backStencilState  = makeStencilOpState(VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP,
989                                                       VK_COMPARE_OP_NEVER, 0xFFu, 0xFFu, 0u);
990     const auto depthTestEnable   = (isMosaic ? VK_FALSE : VK_TRUE);
991     const auto depthWriteEnable  = depthTestEnable;
992     const auto depthCompareOp =
993         (isMosaic ? VK_COMPARE_OP_ALWAYS : (isIndexed ? VK_COMPARE_OP_GREATER : VK_COMPARE_OP_LESS));
994 
995     const VkPipelineDepthStencilStateCreateInfo depthStencilInfo = {
996         VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType;
997         nullptr,                                                    // const void* pNext;
998         0u,                                                         // VkPipelineDepthStencilStateCreateFlags flags;
999         depthTestEnable,                                            // VkBool32 depthTestEnable;
1000         depthWriteEnable,                                           // VkBool32 depthWriteEnable;
1001         depthCompareOp,                                             // VkCompareOp depthCompareOp;
1002         VK_FALSE,                                                   // VkBool32 depthBoundsTestEnable;
1003         VK_TRUE,                                                    // VkBool32 stencilTestEnable;
1004         frontStencilState,                                          // VkStencilOpState front;
1005         backStencilState,                                           // VkStencilOpState back;
1006         0.0f,                                                       // float minDepthBounds;
1007         1.0f,                                                       // float maxDepthBounds;
1008     };
1009 
1010     vk::VkPipelineRenderingCreateInfoKHR renderingCreateInfo{
1011         vk::VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO_KHR, DE_NULL, 0u, 1u, &colorFormat, dsFormat, dsFormat};
1012 
1013     vk::VkPipelineRenderingCreateInfoKHR *nextPtr = nullptr;
1014     if (m_params.groupParams->useDynamicRendering)
1015         nextPtr = &renderingCreateInfo;
1016 
1017     const auto primitiveTopology =
1018         (m_params.useTessellation ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST);
1019     const auto patchControlPoints = (m_params.useTessellation ? triVertexCount : 0u);
1020 
1021     // Pipelines.
1022     std::vector<Move<VkPipeline>> pipelines;
1023     pipelines.reserve(imageLayers);
1024     for (uint32_t subpassIdx = 0u; subpassIdx < imageLayers; ++subpassIdx)
1025     {
1026         renderingCreateInfo.viewMask = m_params.multiview ? (1u << subpassIdx) : 0u;
1027         pipelines.emplace_back(makeGraphicsPipeline(
1028             vkd, device, pipelineLayout.get(), vertModule.get(), tescModule.get(), teseModule.get(), geomModule.get(),
1029             fragModule.get(), renderPass.get(), viewports, scissors, primitiveTopology,
1030             m_params.groupParams->useDynamicRendering ? 0u : subpassIdx, patchControlPoints,
1031             nullptr /*vertexInputStateCreateInfo*/, &rasterizationInfo, nullptr /*multisampleStateCreateInfo*/,
1032             &depthStencilInfo, nullptr /*colorBlendStateCreateInfo*/, nullptr /*dynamicStateCreateInfo*/, nextPtr));
1033     }
1034 
1035     // Command pool and buffer.
1036     const auto cmdPool = makeCommandPool(vkd, device, qIndex);
1037     Move<VkCommandBuffer> cmdBufferPtr =
1038         allocateCommandBuffer(vkd, device, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1039     VkCommandBuffer cmdBuffer = cmdBufferPtr.get();
1040     std::vector<Move<VkCommandBuffer>> secCmdBuffers;
1041 
1042     // Create vertex buffer.
1043     std::vector<tcu::Vec4> triangleVertices;
1044     triangleVertices.reserve(vertexCount + extraVertices);
1045 
1046     // Vertex count per draw call.
1047     const bool atLeastOneDraw   = (m_params.drawCount > 0u);
1048     const bool moreThanOneDraw  = (m_params.drawCount > 1u);
1049     const auto trianglesPerDraw = (atLeastOneDraw ? pixelCount / m_params.drawCount : 0u);
1050     const auto verticesPerDraw  = trianglesPerDraw * triVertexCount;
1051 
1052     if (atLeastOneDraw)
1053         DE_ASSERT(pixelCount % m_params.drawCount == 0u);
1054 
1055     {
1056         using TriangleGeneratorPtr = de::MovePtr<TriangleGenerator>;
1057         TriangleGeneratorPtr triangleGen;
1058 
1059         if (m_params.meshType == MeshType::MOSAIC)
1060             triangleGen = TriangleGeneratorPtr(new TriangleMosaicGenerator(imageExtent.width, imageExtent.height));
1061         else if (m_params.meshType == MeshType::OVERLAPPING)
1062             triangleGen = TriangleGeneratorPtr(new TriangleOverlapGenerator(imageExtent.width, imageExtent.height));
1063         else
1064             DE_ASSERT(false);
1065 
1066         // When applying a vertex offset in nonmixed modes, there will be a few extra vertices at the start of the vertex buffer.
1067         if (isIndexed && !isMixedMode)
1068             appendPaddingVertices(triangleVertices, extraVertices);
1069 
1070         for (uint32_t y = 0u; y < imageExtent.height; ++y)
1071             for (uint32_t x = 0u; x < imageExtent.width; ++x)
1072             {
1073                 // When applying a vertex offset in mixed mode, there will be some extra padding between the triangles for the first
1074                 // block and the rest, so that the vertex offset will not be constant in all draw info structures. This way, the first
1075                 // triangles will always have offset zero, and the number of them depends on the given draw count.
1076                 const auto pixelIndex = y * imageExtent.width + x;
1077                 if (isIndexed && isMixedMode && moreThanOneDraw && pixelIndex == trianglesPerDraw)
1078                     appendPaddingVertices(triangleVertices, extraVertices);
1079 
1080                 triangleGen->appendTriangle(x, y, triangleVertices);
1081             }
1082     }
1083 
1084     const auto vertexBufferSize = static_cast<VkDeviceSize>(de::dataSize(triangleVertices));
1085     const auto vertexBufferInfo = makeBufferCreateInfo(vertexBufferSize, (VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1086     BufferWithMemory vertexBuffer(vkd, device, alloc, vertexBufferInfo, MemoryRequirement::HostVisible);
1087     auto &vertexBufferAlloc       = vertexBuffer.getAllocation();
1088     const auto vertexBufferOffset = vertexBufferAlloc.getOffset();
1089     void *vertexBufferData        = vertexBufferAlloc.getHostPtr();
1090 
1091     deMemcpy(vertexBufferData, triangleVertices.data(), de::dataSize(triangleVertices));
1092     flushAlloc(vkd, device, vertexBufferAlloc);
1093 
1094     // Index buffer if needed.
1095     de::MovePtr<BufferWithMemory> indexBuffer;
1096     VkDeviceSize indexBufferOffset = 0ull;
1097     VkBuffer indexBufferHandle     = DE_NULL;
1098 
1099     if (isIndexed)
1100     {
1101         // Indices will be given in reverse order, so they effectively also make the triangles have reverse winding order.
1102         std::vector<uint32_t> indices;
1103         indices.reserve(vertexCount);
1104         for (uint32_t i = 0u; i < vertexCount; ++i)
1105             indices.push_back(vertexCount - i - 1u);
1106 
1107         const auto indexBufferSize = static_cast<VkDeviceSize>(de::dataSize(indices));
1108         const auto indexBufferInfo = makeBufferCreateInfo(indexBufferSize, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
1109         indexBuffer                = de::MovePtr<BufferWithMemory>(
1110             new BufferWithMemory(vkd, device, alloc, indexBufferInfo, MemoryRequirement::HostVisible));
1111         auto &indexBufferAlloc = indexBuffer->getAllocation();
1112         indexBufferOffset      = indexBufferAlloc.getOffset();
1113         void *indexBufferData  = indexBufferAlloc.getHostPtr();
1114 
1115         deMemcpy(indexBufferData, indices.data(), de::dataSize(indices));
1116         flushAlloc(vkd, device, indexBufferAlloc);
1117         indexBufferHandle = indexBuffer->get();
1118     }
1119 
1120     // Prepare draw information.
1121     const auto offsetType   = (m_params.vertexOffset ? tcu::just(m_params.vertexOffset->offsetType) : tcu::Nothing);
1122     const auto vertexOffset = static_cast<int32_t>(extraVertices);
1123 
1124     DrawInfoPacker drawInfos(m_params.drawType, offsetType, m_params.stride, m_params.drawCount, m_params.seed);
1125 
1126     if (atLeastOneDraw)
1127     {
1128         uint32_t vertexIndex = 0u;
1129         for (uint32_t drawIdx = 0u; drawIdx < m_params.drawCount; ++drawIdx)
1130         {
1131             // For indexed draws in mixed offset mode, taking into account vertex indices have been stored in reverse
1132             // order and there may be a padding in the vertex buffer after the first verticesPerDraw vertices, we need
1133             // to use offset 0 in the last draw call. That draw will contain the indices for the first verticesPerDraw
1134             // vertices, which are stored without any offset, while other draw calls will use indices which are off by
1135             // extraVertices vertices. This will make sure not every draw call will use the same offset and the
1136             // implementation handles that.
1137             const auto drawOffset =
1138                 ((isIndexed && (!isMixedMode || (moreThanOneDraw && drawIdx < m_params.drawCount - 1u))) ?
1139                      vertexOffset :
1140                      0);
1141             drawInfos.addDrawInfo(vertexIndex, verticesPerDraw, drawOffset);
1142             vertexIndex += verticesPerDraw;
1143         }
1144     }
1145     drawInfos.finalize();
1146 
1147     std::vector<VkClearValue> clearValues;
1148     clearValues.reserve(2u);
1149     clearValues.push_back(makeClearValueColorU32(0u, 0u, 0u, 0u));
1150     clearValues.push_back(makeClearValueDepthStencil(((isMosaic || isIndexed) ? 0.0f : 1.0f), 0u));
1151 
1152     if (m_params.groupParams->useSecondaryCmdBuffer)
1153     {
1154         secCmdBuffers.resize(imageLayers);
1155         for (uint32_t layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
1156         {
1157             secCmdBuffers[layerIdx] = allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1158             VkCommandBuffer secCmdBuffer = *secCmdBuffers[layerIdx];
1159             const uint32_t viewMask      = m_params.multiview ? (1u << layerIdx) : 0u;
1160 
1161             // record secondary command buffer
1162             if (m_params.groupParams->secondaryCmdBufferCompletelyContainsDynamicRenderpass)
1163             {
1164                 beginSecondaryCmdBuffer(vkd, secCmdBuffer, colorFormat, dsFormat,
1165                                         VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, viewMask);
1166                 beginRendering(vkd, secCmdBuffer, *colorBufferView, *dsBufferView, true, scissor, clearValues[0],
1167                                clearValues[1], vk::VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1168                                vk::VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_ATTACHMENT_LOAD_OP_CLEAR, 0,
1169                                imageLayers, viewMask);
1170             }
1171             else
1172                 beginSecondaryCmdBuffer(vkd, secCmdBuffer, colorFormat, dsFormat, 0u, viewMask);
1173 
1174             drawCommands(vkd, secCmdBuffer, pipelines[layerIdx].get(), vertexBuffer.get(), vertexBufferOffset,
1175                          vertexOffset, indexBufferHandle, indexBufferOffset, isMixedMode, drawInfos);
1176 
1177             if (m_params.groupParams->secondaryCmdBufferCompletelyContainsDynamicRenderpass)
1178                 endRendering(vkd, secCmdBuffer);
1179 
1180             endCommandBuffer(vkd, secCmdBuffer);
1181         }
1182 
1183         // record primary command buffer
1184         beginCommandBuffer(vkd, cmdBuffer, 0u);
1185         preRenderingCommands(vkd, cmdBuffer, *colorBuffer, colorSubresourceRange, *dsBuffer, dsSubresourceRange);
1186 
1187         for (uint32_t layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
1188         {
1189             if (!m_params.groupParams->secondaryCmdBufferCompletelyContainsDynamicRenderpass)
1190             {
1191                 beginRendering(vkd, cmdBuffer, *colorBufferView, *dsBufferView, true, scissor, clearValues[0],
1192                                clearValues[1], vk::VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1193                                vk::VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_ATTACHMENT_LOAD_OP_CLEAR,
1194                                VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, imageLayers,
1195                                m_params.multiview ? (1u << layerIdx) : 0u);
1196             }
1197 
1198             vkd.cmdExecuteCommands(cmdBuffer, 1u, &*secCmdBuffers[layerIdx]);
1199 
1200             if (!m_params.groupParams->secondaryCmdBufferCompletelyContainsDynamicRenderpass)
1201                 endRendering(vkd, cmdBuffer);
1202         }
1203     }
1204     else
1205     {
1206         beginCommandBuffer(vkd, cmdBuffer);
1207 
1208         if (m_params.groupParams->useDynamicRendering)
1209             preRenderingCommands(vkd, cmdBuffer, *colorBuffer, colorSubresourceRange, *dsBuffer, dsSubresourceRange);
1210         else
1211             beginRenderPass(vkd, cmdBuffer, renderPass.get(), framebuffer.get(), scissor,
1212                             static_cast<uint32_t>(clearValues.size()), de::dataOrNull(clearValues));
1213 
1214         for (uint32_t layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
1215         {
1216             if (m_params.groupParams->useDynamicRendering)
1217                 beginRendering(vkd, cmdBuffer, *colorBufferView, *dsBufferView, true, scissor, clearValues[0],
1218                                clearValues[1], vk::VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1219                                vk::VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_ATTACHMENT_LOAD_OP_CLEAR, 0,
1220                                imageLayers, m_params.multiview ? (1u << layerIdx) : 0u);
1221             else if (layerIdx > 0u)
1222                 vkd.cmdNextSubpass(cmdBuffer, VK_SUBPASS_CONTENTS_INLINE);
1223 
1224             drawCommands(vkd, cmdBuffer, pipelines[layerIdx].get(), vertexBuffer.get(), vertexBufferOffset,
1225                          vertexOffset, indexBufferHandle, indexBufferOffset, isMixedMode, drawInfos);
1226 
1227             if (m_params.groupParams->useDynamicRendering)
1228                 endRendering(vkd, cmdBuffer);
1229         }
1230 
1231         if (!m_params.groupParams->useDynamicRendering)
1232             endRenderPass(vkd, cmdBuffer);
1233     }
1234 
1235     // Prepare images for copying.
1236     const auto colorBufferBarrier = makeImageMemoryBarrier(
1237         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1238         VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, colorBuffer.get(), colorSubresourceRange);
1239     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u,
1240                            0u, nullptr, 0u, nullptr, 1u, &colorBufferBarrier);
1241 
1242     const auto dsBufferBarrier =
1243         makeImageMemoryBarrier(VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
1244                                VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1245                                dsBuffer.get(), dsSubresourceRange);
1246     vkd.cmdPipelineBarrier(cmdBuffer,
1247                            (VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT),
1248                            VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, nullptr, 0u, nullptr, 1u, &dsBufferBarrier);
1249 
1250     // Copy images to output buffers.
1251     for (uint32_t layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
1252     {
1253         const auto colorSubresourceLayers = makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, layerIdx, 1u);
1254         const auto colorCopyRegion        = makeBufferImageCopy(imageExtent, colorSubresourceLayers);
1255         vkd.cmdCopyImageToBuffer(cmdBuffer, colorBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1256                                  outputBuffers[layerIdx]->get(), 1u, &colorCopyRegion);
1257     }
1258 
1259     // Note: this only copies the stencil aspect. See stencilOutBuffer creation.
1260     for (uint32_t layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
1261     {
1262         const auto stencilSubresourceLayers = makeImageSubresourceLayers(VK_IMAGE_ASPECT_STENCIL_BIT, 0u, layerIdx, 1u);
1263         const auto stencilCopyRegion        = makeBufferImageCopy(imageExtent, stencilSubresourceLayers);
1264         vkd.cmdCopyImageToBuffer(cmdBuffer, dsBuffer.get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1265                                  stencilOutBuffers[layerIdx]->get(), 1u, &stencilCopyRegion);
1266     }
1267 
1268     // Prepare buffers for host reading.
1269     const auto outputBufferBarrier = makeMemoryBarrier(VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT);
1270     vkd.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 1u,
1271                            &outputBufferBarrier, 0u, nullptr, 0u, nullptr);
1272 
1273     endCommandBuffer(vkd, cmdBuffer);
1274     submitCommandsAndWait(vkd, device, queue, cmdBuffer);
1275 
1276     // Read output buffers and verify their contents.
1277 
1278     // With stride zero, mosaic meshes increment the stencil buffer as many times as draw operations for affected pixels and
1279     // overlapping meshes increment the stencil buffer only in the first draw operation (the rest fail the depth test) as many times
1280     // as triangles per draw.
1281     //
1282     // With nonzero stride, mosaic meshes increment the stencil buffer once per pixel. Overlapping meshes increment it once per
1283     // triangle.
1284     const auto stencilIncrements =
1285         ((m_params.stride == 0u) ? (isMosaic ? drawInfos.drawInfoCount() : trianglesPerDraw) :
1286                                    (isMosaic ? 1u : triangleCount));
1287     const auto maxInstanceIndex        = m_params.maxInstanceIndex();
1288     const auto colorVerificationFormat = mapVkFormat(getVerificationFormat());
1289     const auto iWidth                  = static_cast<int>(imageExtent.width);
1290     const auto iHeight                 = static_cast<int>(imageExtent.height);
1291     auto &log                          = m_context.getTestContext().getLog();
1292     const auto logMode                 = tcu::CompareLogMode::COMPARE_LOG_ON_ERROR;
1293 
1294     for (uint32_t layerIdx = 0u; layerIdx < imageLayers; ++layerIdx)
1295     {
1296         auto &outputBufferAlloc = outputBuffers[layerIdx]->getAllocation();
1297         invalidateAlloc(vkd, device, outputBufferAlloc);
1298         const void *outputBufferData = outputBufferAlloc.getHostPtr();
1299 
1300         auto &stencilOutBufferAlloc = stencilOutBuffers[layerIdx]->getAllocation();
1301         invalidateAlloc(vkd, device, stencilOutBufferAlloc);
1302         const void *stencilOutBufferData = stencilOutBufferAlloc.getHostPtr();
1303 
1304         tcu::ConstPixelBufferAccess colorAccess(colorVerificationFormat, iWidth, iHeight, 1, outputBufferData);
1305         tcu::ConstPixelBufferAccess stencilAccess(tcuStencilFmt, iWidth, iHeight, 1, stencilOutBufferData);
1306 
1307         // Generate reference images.
1308         tcu::TextureLevel refColorLevel(colorVerificationFormat, iWidth, iHeight);
1309         tcu::PixelBufferAccess refColorAccess = refColorLevel.getAccess();
1310         tcu::TextureLevel refStencilLevel(tcuStencilFmt, iWidth, iHeight);
1311         tcu::PixelBufferAccess refStencilAccess = refStencilLevel.getAccess();
1312         tcu::IVec4 referenceColor;
1313         int referenceStencil;
1314 
1315         for (int y = 0; y < iHeight; ++y)
1316             for (int x = 0; x < iWidth; ++x)
1317             {
1318                 const auto pixelNumber = static_cast<uint32_t>(y * iWidth + x);
1319                 const auto triangleIndex =
1320                     (isIndexed ? (pixelCount - 1u - pixelNumber) : pixelNumber); // Reverse order for indexed draws.
1321 
1322                 if (m_params.instanceCount == 0u || drawInfos.drawInfoCount() == 0u ||
1323                     (m_params.stride == 0u && triangleIndex >= trianglesPerDraw && isMosaic))
1324                 {
1325                     // Some pixels may not be drawn into when there are no instances or draws, or when the stride is zero in mosaic mode.
1326                     referenceColor   = tcu::IVec4(0, 0, 0, 0);
1327                     referenceStencil = 0;
1328                 }
1329                 else
1330                 {
1331                     // This must match the vertex shader.
1332                     auto storedVal = std::numeric_limits<uint32_t>::max();
1333                     if (m_params.drawId)
1334                     {
1335                         // With stride zero, the same block is drawn over and over again in each draw call. This affects both the draw index and
1336                         // the values in the depth/stencil buffer and, with overlapping meshes, only the first draw passes the depth test.
1337                         //
1338                         // With nonzero stride, the draw index depends on the triangle index and the number of triangles per draw and, for
1339                         // overlapping meshes, the draw index is always the last one.
1340                         storedVal = (m_params.stride == 0u ? (isMosaic ? (drawInfos.drawInfoCount() - 1u) : 0u) :
1341                                                              (isMosaic ? (triangleIndex / trianglesPerDraw) :
1342                                                                          (drawInfos.drawInfoCount() - 1u)));
1343                     }
1344                     else
1345                     {
1346                         if (isMosaic)
1347                         {
1348                             const auto triangleId = (isIndexed ? (triangleCount - triangleIndex - 1u) : triangleIndex);
1349                             const auto primOffset =
1350                                 ((isIndexed &&
1351                                   (!isMixedMode || (moreThanOneDraw && !(triangleId < trianglesPerDraw)))) ?
1352                                      extraTriangles :
1353                                      0u);
1354                             const auto primitiveId = triangleId + primOffset;
1355 
1356                             storedVal = primitiveId;
1357                         }
1358                         else
1359                         {
1360                             if (m_params.stride == 0u && moreThanOneDraw)
1361                                 storedVal = (isIndexed ? (pixelCount - trianglesPerDraw + extraTriangles) :
1362                                                          (trianglesPerDraw - 1u));
1363                             else
1364                                 storedVal = (isIndexed ? (isMixedMode ? 0u : extraTriangles) : (pixelCount - 1u));
1365                         }
1366                     }
1367 
1368                     referenceColor =
1369                         tcu::IVec4(static_cast<int>((storedVal >> 8) & 0xFFu), static_cast<int>((storedVal)&0xFFu),
1370                                    static_cast<int>(255u - maxInstanceIndex), static_cast<int>(255u - layerIdx));
1371 
1372                     referenceStencil = static_cast<int>((m_params.instanceCount * stencilIncrements) %
1373                                                         256u); // VK_STENCIL_OP_INCREMENT_AND_WRAP.
1374                 }
1375 
1376                 refColorAccess.setPixel(referenceColor, x, y);
1377                 refStencilAccess.setPixStencil(referenceStencil, x, y);
1378             }
1379 
1380         const auto layerIdxStr    = de::toString(layerIdx);
1381         const auto colorSetName   = "ColorTestResultLayer" + layerIdxStr;
1382         const auto stencilSetName = "StencilTestResultLayer" + layerIdxStr;
1383 
1384         if (!tcu::intThresholdCompare(log, colorSetName.c_str(), "", refColorAccess, colorAccess,
1385                                       tcu::UVec4(0u, 0u, 0u, 0u), logMode))
1386             return tcu::TestStatus::fail("Color image comparison failed; check log for more details");
1387 
1388         if (!tcu::dsThresholdCompare(log, stencilSetName.c_str(), "", refStencilAccess, stencilAccess, 0.0f, logMode))
1389             return tcu::TestStatus::fail("Stencil image comparison failed; check log for more details");
1390     }
1391 
1392     return tcu::TestStatus::pass("Pass");
1393 }
1394 
1395 } // namespace
1396 
createDrawMultiExtTests(tcu::TestContext & testCtx,const SharedGroupParams groupParams)1397 tcu::TestCaseGroup *createDrawMultiExtTests(tcu::TestContext &testCtx, const SharedGroupParams groupParams)
1398 {
1399     using GroupPtr = de::MovePtr<tcu::TestCaseGroup>;
1400 
1401     GroupPtr drawMultiGroup(new tcu::TestCaseGroup(testCtx, "multi_draw"));
1402 
1403     const struct
1404     {
1405         MeshType meshType;
1406         const char *name;
1407     } meshTypeCases[] = {
1408         {MeshType::MOSAIC, "mosaic"},
1409         {MeshType::OVERLAPPING, "overlapping"},
1410     };
1411 
1412     const struct
1413     {
1414         DrawType drawType;
1415         const char *name;
1416     } drawTypeCases[] = {
1417         {DrawType::NORMAL, "normal"},
1418         {DrawType::INDEXED, "indexed"},
1419     };
1420 
1421     const struct
1422     {
1423         tcu::Maybe<VertexOffsetType> vertexOffsetType;
1424         const char *name;
1425     } offsetTypeCases[] = {
1426         {tcu::Nothing, ""},
1427         {VertexOffsetType::MIXED, "mixed"},
1428         {VertexOffsetType::CONSTANT_RANDOM, "random"},
1429         {VertexOffsetType::CONSTANT_PACK, "packed"},
1430     };
1431 
1432     const struct
1433     {
1434         uint32_t drawCount;
1435         const char *name;
1436     } drawCountCases[] = {
1437         {0u, "no_draws"},
1438         {1u, "one_draw"},
1439         {16u, "16_draws"},
1440         {getTriangleCount(), "max_draws"},
1441     };
1442 
1443     const struct
1444     {
1445         int extraBytes;
1446         const char *name;
1447     } strideCases[] = {
1448         {-1, "stride_zero"},
1449         {0, "standard_stride"},
1450         {4, "stride_extra_4"},
1451         {12, "stride_extra_12"},
1452     };
1453 
1454     const struct
1455     {
1456         uint32_t firstInstance;
1457         uint32_t instanceCount;
1458         const char *name;
1459     } instanceCases[] = {
1460         {0u, 0u, "no_instances"},
1461         {0u, 1u, "1_instance"},
1462         {0u, 10u, "10_instances"},
1463         {3u, 2u, "2_instances_base_3"},
1464     };
1465 
1466     const struct
1467     {
1468         bool useTessellation;
1469         bool useGeometry;
1470         const char *name;
1471     } shaderCases[] = {
1472         {false, false, "vert_only"},
1473         {false, true, "with_geom"},
1474         {true, false, "with_tess"},
1475         {true, true, "tess_geom"},
1476     };
1477 
1478     const struct
1479     {
1480         bool drawId;
1481         const char *suffix;
1482     } drawIdCases[] = {
1483         {true, ""},
1484         {false, "_no_draw_id"},
1485     };
1486 
1487     const struct
1488     {
1489         bool multiview;
1490         const char *name;
1491     } multiviewCases[] = {
1492         {false, "single_view"},
1493         {true, "multiview"},
1494     };
1495 
1496     constexpr uint32_t kSeed = 1621260419u;
1497 
1498     for (const auto &meshTypeCase : meshTypeCases)
1499     {
1500         // reduce number of tests for dynamic rendering cases where secondary command buffer is used
1501         if (groupParams->useSecondaryCmdBuffer && (meshTypeCase.meshType != MeshType::MOSAIC))
1502             continue;
1503 
1504         GroupPtr meshTypeGroup(new tcu::TestCaseGroup(testCtx, meshTypeCase.name));
1505 
1506         for (const auto &drawTypeCase : drawTypeCases)
1507         {
1508             for (const auto &offsetTypeCase : offsetTypeCases)
1509             {
1510                 // reduce number of tests for dynamic rendering cases where secondary command buffer is used
1511                 if (groupParams->useSecondaryCmdBuffer && offsetTypeCase.vertexOffsetType &&
1512                     (*offsetTypeCase.vertexOffsetType != VertexOffsetType::CONSTANT_RANDOM))
1513                     continue;
1514 
1515                 const auto hasOffsetType = static_cast<bool>(offsetTypeCase.vertexOffsetType);
1516                 if ((drawTypeCase.drawType == DrawType::NORMAL && hasOffsetType) ||
1517                     (drawTypeCase.drawType == DrawType::INDEXED && !hasOffsetType))
1518                 {
1519                     continue;
1520                 }
1521 
1522                 std::string drawGroupName = drawTypeCase.name;
1523                 if (hasOffsetType)
1524                     drawGroupName += std::string("_") + offsetTypeCase.name;
1525 
1526                 GroupPtr drawTypeGroup(new tcu::TestCaseGroup(testCtx, drawGroupName.c_str()));
1527 
1528                 for (const auto &drawCountCase : drawCountCases)
1529                 {
1530                     // reduce number of tests for dynamic rendering cases where secondary command buffer is used
1531                     if (groupParams->useSecondaryCmdBuffer && (drawCountCase.drawCount != 1u))
1532                         continue;
1533 
1534                     GroupPtr drawCountGroup(new tcu::TestCaseGroup(testCtx, drawCountCase.name));
1535 
1536                     for (const auto &strideCase : strideCases)
1537                     {
1538                         GroupPtr strideGroup(new tcu::TestCaseGroup(testCtx, strideCase.name));
1539 
1540                         for (const auto &instanceCase : instanceCases)
1541                         {
1542                             GroupPtr instanceGroup(new tcu::TestCaseGroup(testCtx, instanceCase.name));
1543 
1544                             for (const auto &shaderCase : shaderCases)
1545                             {
1546                                 GroupPtr shaderGroup(new tcu::TestCaseGroup(testCtx, shaderCase.name));
1547 
1548                                 for (const auto &multiviewCase : multiviewCases)
1549                                 {
1550                                     GroupPtr multiviewGroup(new tcu::TestCaseGroup(testCtx, multiviewCase.name));
1551 
1552                                     for (const auto &drawIdCase : drawIdCases)
1553                                     {
1554                                         const auto isIndexed = (drawTypeCase.drawType == DrawType::INDEXED);
1555                                         const auto isPacked =
1556                                             (offsetTypeCase.vertexOffsetType &&
1557                                              *offsetTypeCase.vertexOffsetType == VertexOffsetType::CONSTANT_PACK);
1558                                         const auto baseStride =
1559                                             ((isIndexed && !isPacked) ? sizeof(VkMultiDrawIndexedInfoEXT) :
1560                                                                         sizeof(VkMultiDrawInfoEXT));
1561                                         const auto &extraBytes = strideCase.extraBytes;
1562                                         const auto testOffset =
1563                                             (isIndexed ?
1564                                                  tcu::just(VertexOffsetParams{*offsetTypeCase.vertexOffsetType, 0u}) :
1565                                                  tcu::Nothing);
1566                                         uint32_t testStride = 0u;
1567 
1568                                         if (extraBytes >= 0)
1569                                             testStride =
1570                                                 static_cast<uint32_t>(baseStride) + static_cast<uint32_t>(extraBytes);
1571 
1572                                         if (drawCountCase.drawCount > 1u)
1573                                         {
1574                                             // VUID-vkCmdDrawMultiEXT-drawCount-09628
1575                                             // VUID-vkCmdDrawMultiIndexedEXT-drawCount-09629
1576                                             const auto minStride =
1577                                                 static_cast<uint32_t>(isIndexed ? sizeof(VkMultiDrawIndexedInfoEXT) :
1578                                                                                   sizeof(VkMultiDrawInfoEXT));
1579                                             if (testStride < minStride || testStride % 4u != 0u)
1580                                                 continue;
1581                                         }
1582 
1583                                         // For overlapping triangles we will skip instanced drawing.
1584                                         if (instanceCase.instanceCount > 1u &&
1585                                             meshTypeCase.meshType == MeshType::OVERLAPPING)
1586                                             continue;
1587 
1588                                         TestParams params{
1589                                             meshTypeCase.meshType,      // MeshType meshType;
1590                                             drawTypeCase.drawType,      // DrawType drawType;
1591                                             drawCountCase.drawCount,    // uint32_t drawCount;
1592                                             instanceCase.instanceCount, // uint32_t instanceCount;
1593                                             instanceCase.firstInstance, // uint32_t firstInstance;
1594                                             testStride,                 // uint32_t stride;
1595                                             testOffset, //    tcu::Maybe<VertexOffsetParams>>    vertexOffset;    // Only used for indexed draws.
1596                                             kSeed,                      // uint32_t seed;
1597                                             shaderCase.useTessellation, // bool useTessellation;
1598                                             shaderCase.useGeometry,     // bool useGeometry;
1599                                             multiviewCase.multiview,    // bool multiview;
1600                                             drawIdCase.drawId,          // bool drawId;
1601                                             groupParams,                // SharedGroupParams groupParams;
1602                                         };
1603 
1604                                         multiviewGroup->addChild(new MultiDrawTest(
1605                                             testCtx, std::string("no_offset") + drawIdCase.suffix, params));
1606 
1607                                         if (isIndexed)
1608                                         {
1609                                             params.vertexOffset->offset = 6u;
1610                                             multiviewGroup->addChild(new MultiDrawTest(
1611                                                 testCtx, std::string("offset_6") + drawIdCase.suffix, params));
1612                                         }
1613                                     }
1614 
1615                                     shaderGroup->addChild(multiviewGroup.release());
1616                                 }
1617 
1618                                 instanceGroup->addChild(shaderGroup.release());
1619                             }
1620 
1621                             strideGroup->addChild(instanceGroup.release());
1622                         }
1623 
1624                         drawCountGroup->addChild(strideGroup.release());
1625                     }
1626 
1627                     drawTypeGroup->addChild(drawCountGroup.release());
1628                 }
1629 
1630                 meshTypeGroup->addChild(drawTypeGroup.release());
1631             }
1632         }
1633 
1634         drawMultiGroup->addChild(meshTypeGroup.release());
1635     }
1636 
1637     return drawMultiGroup.release();
1638 }
1639 
1640 } // namespace Draw
1641 } // namespace vkt
1642