1 /*------------------------------------------------------------------------
2 * OpenGL Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 * Copyright (c) 2018-2019 NVIDIA Corporation
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests
24 */ /*--------------------------------------------------------------------*/
25
26 #include "glcSubgroupsPartitionedTests.hpp"
27 #include "glcSubgroupsTestsUtils.hpp"
28
29 #include <string>
30 #include <vector>
31
32 using namespace tcu;
33 using namespace std;
34
35 namespace glc
36 {
37 namespace subgroups
38 {
39 namespace
40 {
41 enum OpType
42 {
43 OPTYPE_ADD = 0,
44 OPTYPE_MUL,
45 OPTYPE_MIN,
46 OPTYPE_MAX,
47 OPTYPE_AND,
48 OPTYPE_OR,
49 OPTYPE_XOR,
50 OPTYPE_INCLUSIVE_ADD,
51 OPTYPE_INCLUSIVE_MUL,
52 OPTYPE_INCLUSIVE_MIN,
53 OPTYPE_INCLUSIVE_MAX,
54 OPTYPE_INCLUSIVE_AND,
55 OPTYPE_INCLUSIVE_OR,
56 OPTYPE_INCLUSIVE_XOR,
57 OPTYPE_EXCLUSIVE_ADD,
58 OPTYPE_EXCLUSIVE_MUL,
59 OPTYPE_EXCLUSIVE_MIN,
60 OPTYPE_EXCLUSIVE_MAX,
61 OPTYPE_EXCLUSIVE_AND,
62 OPTYPE_EXCLUSIVE_OR,
63 OPTYPE_EXCLUSIVE_XOR,
64 OPTYPE_LAST
65 };
66
checkVertexPipelineStages(std::vector<const void * > datas,uint32_t width,uint32_t)67 static bool checkVertexPipelineStages(std::vector<const void *> datas, uint32_t width, uint32_t)
68 {
69 return glc::subgroups::check(datas, width, 0xFFFFFF);
70 }
71
checkComputeStage(std::vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)72 static bool checkComputeStage(std::vector<const void *> datas, const uint32_t numWorkgroups[3],
73 const uint32_t localSize[3], uint32_t)
74 {
75 return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 0xFFFFFF);
76 }
77
getOpTypeName(int opType)78 std::string getOpTypeName(int opType)
79 {
80 switch (opType)
81 {
82 default:
83 DE_FATAL("Unsupported op type");
84 return "";
85 case OPTYPE_ADD:
86 return "subgroupAdd";
87 case OPTYPE_MUL:
88 return "subgroupMul";
89 case OPTYPE_MIN:
90 return "subgroupMin";
91 case OPTYPE_MAX:
92 return "subgroupMax";
93 case OPTYPE_AND:
94 return "subgroupAnd";
95 case OPTYPE_OR:
96 return "subgroupOr";
97 case OPTYPE_XOR:
98 return "subgroupXor";
99 case OPTYPE_INCLUSIVE_ADD:
100 return "subgroupInclusiveAdd";
101 case OPTYPE_INCLUSIVE_MUL:
102 return "subgroupInclusiveMul";
103 case OPTYPE_INCLUSIVE_MIN:
104 return "subgroupInclusiveMin";
105 case OPTYPE_INCLUSIVE_MAX:
106 return "subgroupInclusiveMax";
107 case OPTYPE_INCLUSIVE_AND:
108 return "subgroupInclusiveAnd";
109 case OPTYPE_INCLUSIVE_OR:
110 return "subgroupInclusiveOr";
111 case OPTYPE_INCLUSIVE_XOR:
112 return "subgroupInclusiveXor";
113 case OPTYPE_EXCLUSIVE_ADD:
114 return "subgroupExclusiveAdd";
115 case OPTYPE_EXCLUSIVE_MUL:
116 return "subgroupExclusiveMul";
117 case OPTYPE_EXCLUSIVE_MIN:
118 return "subgroupExclusiveMin";
119 case OPTYPE_EXCLUSIVE_MAX:
120 return "subgroupExclusiveMax";
121 case OPTYPE_EXCLUSIVE_AND:
122 return "subgroupExclusiveAnd";
123 case OPTYPE_EXCLUSIVE_OR:
124 return "subgroupExclusiveOr";
125 case OPTYPE_EXCLUSIVE_XOR:
126 return "subgroupExclusiveXor";
127 }
128 }
129
getOpTypeNamePartitioned(int opType)130 std::string getOpTypeNamePartitioned(int opType)
131 {
132 switch (opType)
133 {
134 default:
135 DE_FATAL("Unsupported op type");
136 return "";
137 case OPTYPE_ADD:
138 return "subgroupPartitionedAddNV";
139 case OPTYPE_MUL:
140 return "subgroupPartitionedMulNV";
141 case OPTYPE_MIN:
142 return "subgroupPartitionedMinNV";
143 case OPTYPE_MAX:
144 return "subgroupPartitionedMaxNV";
145 case OPTYPE_AND:
146 return "subgroupPartitionedAndNV";
147 case OPTYPE_OR:
148 return "subgroupPartitionedOrNV";
149 case OPTYPE_XOR:
150 return "subgroupPartitionedXorNV";
151 case OPTYPE_INCLUSIVE_ADD:
152 return "subgroupPartitionedInclusiveAddNV";
153 case OPTYPE_INCLUSIVE_MUL:
154 return "subgroupPartitionedInclusiveMulNV";
155 case OPTYPE_INCLUSIVE_MIN:
156 return "subgroupPartitionedInclusiveMinNV";
157 case OPTYPE_INCLUSIVE_MAX:
158 return "subgroupPartitionedInclusiveMaxNV";
159 case OPTYPE_INCLUSIVE_AND:
160 return "subgroupPartitionedInclusiveAndNV";
161 case OPTYPE_INCLUSIVE_OR:
162 return "subgroupPartitionedInclusiveOrNV";
163 case OPTYPE_INCLUSIVE_XOR:
164 return "subgroupPartitionedInclusiveXorNV";
165 case OPTYPE_EXCLUSIVE_ADD:
166 return "subgroupPartitionedExclusiveAddNV";
167 case OPTYPE_EXCLUSIVE_MUL:
168 return "subgroupPartitionedExclusiveMulNV";
169 case OPTYPE_EXCLUSIVE_MIN:
170 return "subgroupPartitionedExclusiveMinNV";
171 case OPTYPE_EXCLUSIVE_MAX:
172 return "subgroupPartitionedExclusiveMaxNV";
173 case OPTYPE_EXCLUSIVE_AND:
174 return "subgroupPartitionedExclusiveAndNV";
175 case OPTYPE_EXCLUSIVE_OR:
176 return "subgroupPartitionedExclusiveOrNV";
177 case OPTYPE_EXCLUSIVE_XOR:
178 return "subgroupPartitionedExclusiveXorNV";
179 }
180 }
181
getIdentity(int opType,Format format)182 std::string getIdentity(int opType, Format format)
183 {
184 bool isFloat = false;
185 bool isInt = false;
186 bool isUnsigned = false;
187
188 switch (format)
189 {
190 default:
191 DE_FATAL("Unhandled format!");
192 return "";
193 case FORMAT_R32_SINT:
194 case FORMAT_R32G32_SINT:
195 case FORMAT_R32G32B32_SINT:
196 case FORMAT_R32G32B32A32_SINT:
197 isInt = true;
198 break;
199 case FORMAT_R32_UINT:
200 case FORMAT_R32G32_UINT:
201 case FORMAT_R32G32B32_UINT:
202 case FORMAT_R32G32B32A32_UINT:
203 isUnsigned = true;
204 break;
205 case FORMAT_R32_SFLOAT:
206 case FORMAT_R32G32_SFLOAT:
207 case FORMAT_R32G32B32_SFLOAT:
208 case FORMAT_R32G32B32A32_SFLOAT:
209 case FORMAT_R64_SFLOAT:
210 case FORMAT_R64G64_SFLOAT:
211 case FORMAT_R64G64B64_SFLOAT:
212 case FORMAT_R64G64B64A64_SFLOAT:
213 isFloat = true;
214 break;
215 case FORMAT_R32_BOOL:
216 case FORMAT_R32G32_BOOL:
217 case FORMAT_R32G32B32_BOOL:
218 case FORMAT_R32G32B32A32_BOOL:
219 break; // bool types are not anything
220 }
221
222 switch (opType)
223 {
224 default:
225 DE_FATAL("Unsupported op type");
226 return "";
227 case OPTYPE_ADD:
228 case OPTYPE_INCLUSIVE_ADD:
229 case OPTYPE_EXCLUSIVE_ADD:
230 return subgroups::getFormatNameForGLSL(format) + "(0)";
231 case OPTYPE_MUL:
232 case OPTYPE_INCLUSIVE_MUL:
233 case OPTYPE_EXCLUSIVE_MUL:
234 return subgroups::getFormatNameForGLSL(format) + "(1)";
235 case OPTYPE_MIN:
236 case OPTYPE_INCLUSIVE_MIN:
237 case OPTYPE_EXCLUSIVE_MIN:
238 if (isFloat)
239 {
240 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
241 }
242 else if (isInt)
243 {
244 return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
245 }
246 else if (isUnsigned)
247 {
248 return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
249 }
250 else
251 {
252 DE_FATAL("Unhandled case");
253 return "";
254 }
255 case OPTYPE_MAX:
256 case OPTYPE_INCLUSIVE_MAX:
257 case OPTYPE_EXCLUSIVE_MAX:
258 if (isFloat)
259 {
260 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
261 }
262 else if (isInt)
263 {
264 return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
265 }
266 else if (isUnsigned)
267 {
268 return subgroups::getFormatNameForGLSL(format) + "(0u)";
269 }
270 else
271 {
272 DE_FATAL("Unhandled case");
273 return "";
274 }
275 case OPTYPE_AND:
276 case OPTYPE_INCLUSIVE_AND:
277 case OPTYPE_EXCLUSIVE_AND:
278 return subgroups::getFormatNameForGLSL(format) + "(~0)";
279 case OPTYPE_OR:
280 case OPTYPE_INCLUSIVE_OR:
281 case OPTYPE_EXCLUSIVE_OR:
282 return subgroups::getFormatNameForGLSL(format) + "(0)";
283 case OPTYPE_XOR:
284 case OPTYPE_INCLUSIVE_XOR:
285 case OPTYPE_EXCLUSIVE_XOR:
286 return subgroups::getFormatNameForGLSL(format) + "(0)";
287 }
288 }
289
getCompare(int opType,Format format,std::string lhs,std::string rhs)290 std::string getCompare(int opType, Format format, std::string lhs, std::string rhs)
291 {
292 std::string formatName = subgroups::getFormatNameForGLSL(format);
293 switch (format)
294 {
295 default:
296 return "all(equal(" + lhs + ", " + rhs + "))";
297 case FORMAT_R32_BOOL:
298 case FORMAT_R32_UINT:
299 case FORMAT_R32_SINT:
300 return "(" + lhs + " == " + rhs + ")";
301 case FORMAT_R32_SFLOAT:
302 case FORMAT_R64_SFLOAT:
303 switch (opType)
304 {
305 default:
306 return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
307 case OPTYPE_MIN:
308 case OPTYPE_INCLUSIVE_MIN:
309 case OPTYPE_EXCLUSIVE_MIN:
310 case OPTYPE_MAX:
311 case OPTYPE_INCLUSIVE_MAX:
312 case OPTYPE_EXCLUSIVE_MAX:
313 return "(" + lhs + " == " + rhs + ")";
314 }
315 case FORMAT_R32G32_SFLOAT:
316 case FORMAT_R32G32B32_SFLOAT:
317 case FORMAT_R32G32B32A32_SFLOAT:
318 case FORMAT_R64G64_SFLOAT:
319 case FORMAT_R64G64B64_SFLOAT:
320 case FORMAT_R64G64B64A64_SFLOAT:
321 switch (opType)
322 {
323 default:
324 return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
325 case OPTYPE_MIN:
326 case OPTYPE_INCLUSIVE_MIN:
327 case OPTYPE_EXCLUSIVE_MIN:
328 case OPTYPE_MAX:
329 case OPTYPE_INCLUSIVE_MAX:
330 case OPTYPE_EXCLUSIVE_MAX:
331 return "all(equal(" + lhs + ", " + rhs + "))";
332 }
333 }
334 }
335
336 struct CaseDefinition
337 {
338 int opType;
339 ShaderStageFlags shaderStage;
340 Format format;
341 };
342
getTestString(const CaseDefinition & caseDef)343 string getTestString(const CaseDefinition &caseDef)
344 {
345 // NOTE: tempResult can't have anything in bits 31:24 to avoid int->float
346 // conversion overflow in framebuffer tests.
347 string fmt = subgroups::getFormatNameForGLSL(caseDef.format);
348 string bdy = " uint tempResult = 0u;\n"
349 " uint id = gl_SubgroupInvocationID;\n";
350
351 // Test the case where the partition has a single subset with all invocations in it.
352 // This should generate the same result as the non-partitioned function.
353 bdy += " uvec4 allBallot = mask;\n"
354 " " +
355 fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) +
356 "(data[gl_SubgroupInvocationID], allBallot);\n"
357 " " +
358 fmt + " refResult = " + getOpTypeName(caseDef.opType) +
359 "(data[gl_SubgroupInvocationID]);\n"
360 " if (" +
361 getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") +
362 ") {\n"
363 " tempResult |= 0x1u;\n"
364 " }\n";
365
366 // The definition of a partition doesn't forbid bits corresponding to inactive
367 // invocations being in the subset with active invocations. In other words, test that
368 // bits corresponding to inactive invocations are ignored.
369 bdy += " if (0u == (gl_SubgroupInvocationID % 2u)) {\n"
370 " " +
371 fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) +
372 "(data[gl_SubgroupInvocationID], allBallot);\n"
373 " " +
374 fmt + " refResult = " + getOpTypeName(caseDef.opType) +
375 "(data[gl_SubgroupInvocationID]);\n"
376 " if (" +
377 getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") +
378 ") {\n"
379 " tempResult |= 0x2u;\n"
380 " }\n"
381 " } else {\n"
382 " tempResult |= 0x2u;\n"
383 " }\n";
384
385 // Test the case where the partition has each invocation in a unique subset. For
386 // exclusive ops, the result is identity. For reduce/inclusive, it's the original value.
387 string expectedSelfResult = "data[gl_SubgroupInvocationID]";
388 if (caseDef.opType >= OPTYPE_EXCLUSIVE_ADD && caseDef.opType <= OPTYPE_EXCLUSIVE_XOR)
389 {
390 expectedSelfResult = getIdentity(caseDef.opType, caseDef.format);
391 }
392
393 bdy += " uvec4 selfBallot = subgroupPartitionNV(gl_SubgroupInvocationID);\n"
394 " " +
395 fmt + " selfResult = " + getOpTypeNamePartitioned(caseDef.opType) +
396 "(data[gl_SubgroupInvocationID], selfBallot);\n"
397 " if (" +
398 getCompare(caseDef.opType, caseDef.format, "selfResult", expectedSelfResult) +
399 ") {\n"
400 " tempResult |= 0x4u;\n"
401 " }\n";
402
403 // Test "random" partitions based on a hash of the invocation id.
404 // This "hash" function produces interesting/randomish partitions.
405 static const char *idhash = "((id%N)+(id%(N+1u))-(id%2u)+(id/2u))%((N+1u)/2u)";
406
407 bdy += " for (uint N = 1u; N < 16u; ++N) {\n"
408 " " +
409 fmt + " idhashFmt = " + fmt + "(" + idhash +
410 ");\n"
411 " uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
412 " " +
413 fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) +
414 "(data[gl_SubgroupInvocationID], partitionBallot);\n"
415 " for (uint i = 0u; i < N; ++i) {\n"
416 " " +
417 fmt + " iFmt = " + fmt +
418 "(i);\n"
419 " if (" +
420 getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") +
421 ") {\n"
422 " " +
423 fmt + " subsetResult = " + getOpTypeName(caseDef.opType) +
424 "(data[gl_SubgroupInvocationID]);\n"
425 " tempResult |= " +
426 getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") +
427 " ? (0x4u << N) : 0u;\n"
428 " }\n"
429 " }\n"
430 " }\n"
431 // tests in flow control:
432 " if (1u == (gl_SubgroupInvocationID % 2u)) {\n"
433 " for (uint N = 1u; N < 7u; ++N) {\n"
434 " " +
435 fmt + " idhashFmt = " + fmt + "(" + idhash +
436 ");\n"
437 " uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
438 " " +
439 fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) +
440 "(data[gl_SubgroupInvocationID], partitionBallot);\n"
441 " for (uint i = 0u; i < N; ++i) {\n"
442 " " +
443 fmt + " iFmt = " + fmt +
444 "(i);\n"
445 " if (" +
446 getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") +
447 ") {\n"
448 " " +
449 fmt + " subsetResult = " + getOpTypeName(caseDef.opType) +
450 "(data[gl_SubgroupInvocationID]);\n"
451 " tempResult |= " +
452 getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") +
453 " ? (0x20000u << N) : 0u;\n"
454 " }\n"
455 " }\n"
456 " }\n"
457 " } else {\n"
458 " tempResult |= 0xFC0000u;\n"
459 " }\n";
460
461 return bdy;
462 }
463
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)464 void initFrameBufferPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
465 {
466 std::ostringstream bdy;
467
468 subgroups::setFragmentShaderFrameBuffer(programCollection);
469
470 if (SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
471 subgroups::setVertexShaderFrameBuffer(programCollection);
472
473 bdy << getTestString(caseDef);
474
475 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
476 {
477 std::ostringstream vertexSrc;
478 vertexSrc << "${VERSION_DECL}\n"
479 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
480 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
481 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
482 << "layout(location = 0) in highp vec4 in_position;\n"
483 << "layout(location = 0) out float out_color;\n"
484 << "layout(binding = 0, std140) uniform Buffer0\n"
485 << "{\n"
486 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data["
487 << subgroups::maxSupportedSubgroupSize() << "];\n"
488 << "};\n"
489 << "\n"
490 << "void main (void)\n"
491 << "{\n"
492 << " uvec4 mask = subgroupBallot(true);\n"
493 << bdy.str() << " out_color = float(tempResult);\n"
494 << " gl_Position = in_position;\n"
495 << " gl_PointSize = 1.0f;\n"
496 << "}\n";
497 programCollection.add("vert") << glu::VertexSource(vertexSrc.str());
498 }
499 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
500 {
501 std::ostringstream geometry;
502
503 geometry << "${VERSION_DECL}\n"
504 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
505 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
506 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
507 << "layout(points) in;\n"
508 << "layout(points, max_vertices = 1) out;\n"
509 << "layout(location = 0) out float out_color;\n"
510 << "layout(binding = 0, std140) uniform Buffer0\n"
511 << "{\n"
512 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data["
513 << subgroups::maxSupportedSubgroupSize() << "];\n"
514 << "};\n"
515 << "\n"
516 << "void main (void)\n"
517 << "{\n"
518 << " uvec4 mask = subgroupBallot(true);\n"
519 << bdy.str() << " out_color = float(tempResult);\n"
520 << " gl_Position = gl_in[0].gl_Position;\n"
521 << " EmitVertex();\n"
522 << " EndPrimitive();\n"
523 << "}\n";
524
525 programCollection.add("geometry") << glu::GeometrySource(geometry.str());
526 }
527 else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
528 {
529 std::ostringstream controlSource;
530 controlSource << "${VERSION_DECL}\n"
531 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
532 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
533 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
534 << "layout(vertices = 2) out;\n"
535 << "layout(location = 0) out float out_color[];\n"
536 << "layout(binding = 0, std140) uniform Buffer0\n"
537 << "{\n"
538 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data["
539 << subgroups::maxSupportedSubgroupSize() << "];\n"
540 << "};\n"
541 << "\n"
542 << "void main (void)\n"
543 << "{\n"
544 << " if (gl_InvocationID == 0)\n"
545 << " {\n"
546 << " gl_TessLevelOuter[0] = 1.0f;\n"
547 << " gl_TessLevelOuter[1] = 1.0f;\n"
548 << " }\n"
549 << " uvec4 mask = subgroupBallot(true);\n"
550 << bdy.str() << " out_color[gl_InvocationID] = float(tempResult);"
551 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
552 << "}\n";
553
554 programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
555 subgroups::setTesEvalShaderFrameBuffer(programCollection);
556 }
557 else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
558 {
559
560 std::ostringstream evaluationSource;
561 evaluationSource << "${VERSION_DECL}\n"
562 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
563 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
564 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
565 << "layout(isolines, equal_spacing, ccw ) in;\n"
566 << "layout(location = 0) out float out_color;\n"
567 << "layout(binding = 0, std140) uniform Buffer0\n"
568 << "{\n"
569 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data["
570 << subgroups::maxSupportedSubgroupSize() << "];\n"
571 << "};\n"
572 << "\n"
573 << "void main (void)\n"
574 << "{\n"
575 << " uvec4 mask = subgroupBallot(true);\n"
576 << bdy.str() << " out_color = float(tempResult);\n"
577 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
578 << "}\n";
579
580 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
581 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
582 }
583 else
584 {
585 DE_FATAL("Unsupported shader stage");
586 }
587 }
588
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)589 void initPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
590 {
591 const string bdy = getTestString(caseDef);
592
593 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
594 {
595 std::ostringstream src;
596
597 src << "${VERSION_DECL}\n"
598 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
599 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
600 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
601 << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
602 << "layout(binding = 0, std430) buffer Buffer0\n"
603 << "{\n"
604 << " uint result[];\n"
605 << "};\n"
606 << "layout(binding = 1, std430) buffer Buffer1\n"
607 << "{\n"
608 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
609 << "};\n"
610 << "\n"
611 << "void main (void)\n"
612 << "{\n"
613 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
614 << " highp uint offset = globalSize.x * ((globalSize.y * "
615 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
616 "gl_GlobalInvocationID.x;\n"
617 << " uvec4 mask = subgroupBallot(true);\n"
618 << bdy << " result[offset] = tempResult;\n"
619 << "}\n";
620
621 programCollection.add("comp") << glu::ComputeSource(src.str());
622 }
623 else
624 {
625 {
626 const std::string vertex =
627 "${VERSION_DECL}\n"
628 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
629 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
630 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
631 "layout(binding = 0, std430) buffer Buffer0\n"
632 "{\n"
633 " uint result[];\n"
634 "} b0;\n"
635 "layout(binding = 4, std430) readonly buffer Buffer4\n"
636 "{\n"
637 " " +
638 subgroups::getFormatNameForGLSL(caseDef.format) +
639 " data[];\n"
640 "};\n"
641 "\n"
642 "void main (void)\n"
643 "{\n"
644 " uvec4 mask = subgroupBallot(true);\n" +
645 bdy +
646 " b0.result[gl_VertexID] = tempResult;\n"
647 " float pixelSize = 2.0f/1024.0f;\n"
648 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
649 " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
650 " gl_PointSize = 1.0f;\n"
651 "}\n";
652 programCollection.add("vert") << glu::VertexSource(vertex);
653 }
654
655 {
656 const std::string tesc = "${VERSION_DECL}\n"
657 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
658 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
659 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
660 "layout(vertices=1) out;\n"
661 "layout(binding = 1, std430) buffer Buffer1\n"
662 "{\n"
663 " uint result[];\n"
664 "} b1;\n"
665 "layout(binding = 4, std430) readonly buffer Buffer4\n"
666 "{\n"
667 " " +
668 subgroups::getFormatNameForGLSL(caseDef.format) +
669 " data[];\n"
670 "};\n"
671 "\n"
672 "void main (void)\n"
673 "{\n"
674 " uvec4 mask = subgroupBallot(true);\n" +
675 bdy +
676 " b1.result[gl_PrimitiveID] = tempResult;\n"
677 " if (gl_InvocationID == 0)\n"
678 " {\n"
679 " gl_TessLevelOuter[0] = 1.0f;\n"
680 " gl_TessLevelOuter[1] = 1.0f;\n"
681 " }\n"
682 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
683 "}\n";
684 programCollection.add("tesc") << glu::TessellationControlSource(tesc);
685 }
686
687 {
688 const std::string tese = "${VERSION_DECL}\n"
689 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
690 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
691 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
692 "layout(isolines) in;\n"
693 "layout(binding = 2, std430) buffer Buffer2\n"
694 "{\n"
695 " uint result[];\n"
696 "} b2;\n"
697 "layout(binding = 4, std430) readonly buffer Buffer4\n"
698 "{\n"
699 " " +
700 subgroups::getFormatNameForGLSL(caseDef.format) +
701 " data[];\n"
702 "};\n"
703 "\n"
704 "void main (void)\n"
705 "{\n"
706 " uvec4 mask = subgroupBallot(true);\n" +
707 bdy +
708 " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = tempResult;\n"
709 " float pixelSize = 2.0f/1024.0f;\n"
710 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
711 "}\n";
712 programCollection.add("tese") << glu::TessellationEvaluationSource(tese);
713 }
714
715 {
716 const std::string geometry =
717 // version added by addGeometryShadersFromTemplate
718 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
719 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
720 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
721 "layout(${TOPOLOGY}) in;\n"
722 "layout(points, max_vertices = 1) out;\n"
723 "layout(binding = 3, std430) buffer Buffer3\n"
724 "{\n"
725 " uint result[];\n"
726 "} b3;\n"
727 "layout(binding = 4, std430) readonly buffer Buffer4\n"
728 "{\n"
729 " " +
730 subgroups::getFormatNameForGLSL(caseDef.format) +
731 " data[];\n"
732 "};\n"
733 "\n"
734 "void main (void)\n"
735 "{\n"
736 " uvec4 mask = subgroupBallot(true);\n" +
737 bdy +
738 " b3.result[gl_PrimitiveIDIn] = tempResult;\n"
739 " gl_Position = gl_in[0].gl_Position;\n"
740 " EmitVertex();\n"
741 " EndPrimitive();\n"
742 "}\n";
743 subgroups::addGeometryShadersFromTemplate(geometry, programCollection);
744 }
745
746 {
747 const std::string fragment = "${VERSION_DECL}\n"
748 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
749 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
750 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
751 "precision highp int;\n"
752 "precision highp float;\n"
753 "layout(location = 0) out uint result;\n"
754 "layout(binding = 4, std430) readonly buffer Buffer4\n"
755 "{\n"
756 " " +
757 subgroups::getFormatNameForGLSL(caseDef.format) +
758 " data[];\n"
759 "};\n"
760 "void main (void)\n"
761 "{\n"
762 " uvec4 mask = subgroupBallot(true);\n" +
763 bdy +
764 " result = tempResult;\n"
765 "}\n";
766 programCollection.add("fragment") << glu::FragmentSource(fragment);
767 }
768 subgroups::addNoSubgroupShader(programCollection);
769 }
770 }
771
supportedCheck(Context & context,CaseDefinition caseDef)772 void supportedCheck(Context &context, CaseDefinition caseDef)
773 {
774 if (!subgroups::isSubgroupSupported(context))
775 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
776
777 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, SUBGROUP_FEATURE_PARTITIONED_BIT_NV))
778 {
779 TCU_THROW(NotSupportedError, "Device does not support subgroup partitioned operations");
780 }
781
782 if (subgroups::isDoubleFormat(caseDef.format) && !subgroups::isDoubleSupportedForDevice(context))
783 {
784 TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
785 }
786 }
787
noSSBOtest(Context & context,const CaseDefinition caseDef)788 tcu::TestStatus noSSBOtest(Context &context, const CaseDefinition caseDef)
789 {
790 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
791 {
792 if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
793 {
794 return tcu::TestStatus::fail("Shader stage " + subgroups::getShaderStageName(caseDef.shaderStage) +
795 " is required to support subgroup operations!");
796 }
797 else
798 {
799 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
800 }
801 }
802
803 subgroups::SSBOData inputData;
804 inputData.format = caseDef.format;
805 inputData.layout = subgroups::SSBOData::LayoutStd140;
806 inputData.numElements = subgroups::maxSupportedSubgroupSize();
807 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
808 inputData.binding = 0u;
809
810 if (SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
811 return subgroups::makeVertexFrameBufferTest(context, FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
812 else if (SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
813 return subgroups::makeGeometryFrameBufferTest(context, FORMAT_R32_UINT, &inputData, 1,
814 checkVertexPipelineStages);
815 else if (SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
816 return subgroups::makeTessellationEvaluationFrameBufferTest(
817 context, FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, SHADER_STAGE_TESS_CONTROL_BIT);
818 else if (SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
819 return subgroups::makeTessellationEvaluationFrameBufferTest(
820 context, FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, SHADER_STAGE_TESS_EVALUATION_BIT);
821 else
822 TCU_THROW(InternalError, "Unhandled shader stage");
823 }
824
checkShaderStages(Context & context,const CaseDefinition & caseDef)825 bool checkShaderStages(Context &context, const CaseDefinition &caseDef)
826 {
827 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
828 {
829 if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
830 {
831 return false;
832 }
833 else
834 {
835 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
836 }
837 }
838 return true;
839 }
840
test(Context & context,const CaseDefinition caseDef)841 tcu::TestStatus test(Context &context, const CaseDefinition caseDef)
842 {
843 if (SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
844 {
845 if (!checkShaderStages(context, caseDef))
846 {
847 return tcu::TestStatus::fail("Shader stage " + subgroups::getShaderStageName(caseDef.shaderStage) +
848 " is required to support subgroup operations!");
849 }
850 subgroups::SSBOData inputData;
851 inputData.format = caseDef.format;
852 inputData.layout = subgroups::SSBOData::LayoutStd430;
853 inputData.numElements = subgroups::maxSupportedSubgroupSize();
854 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
855 inputData.binding = 1u;
856
857 return subgroups::makeComputeTest(context, FORMAT_R32_UINT, &inputData, 1, checkComputeStage);
858 }
859 else
860 {
861 int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR);
862
863 ShaderStageFlags stages = (ShaderStageFlags)(caseDef.shaderStage & supportedStages);
864
865 if (SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
866 {
867 if ((stages & SHADER_STAGE_FRAGMENT_BIT) == 0)
868 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
869 else
870 stages = SHADER_STAGE_FRAGMENT_BIT;
871 }
872
873 if ((ShaderStageFlags)0u == stages)
874 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
875
876 subgroups::SSBOData inputData;
877 inputData.format = caseDef.format;
878 inputData.layout = subgroups::SSBOData::LayoutStd430;
879 inputData.numElements = subgroups::maxSupportedSubgroupSize();
880 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
881 inputData.binding = 4u;
882 inputData.stages = stages;
883
884 return subgroups::allStages(context, FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
885 }
886 }
887 } // namespace
888
createSubgroupsPartitionedTests(deqp::Context & testCtx)889 deqp::TestCaseGroup *createSubgroupsPartitionedTests(deqp::Context &testCtx)
890 {
891 de::MovePtr<deqp::TestCaseGroup> graphicGroup(
892 new deqp::TestCaseGroup(testCtx, "graphics", "Subgroup partitioned category tests: graphics"));
893 de::MovePtr<deqp::TestCaseGroup> computeGroup(
894 new deqp::TestCaseGroup(testCtx, "compute", "Subgroup partitioned category tests: compute"));
895 de::MovePtr<deqp::TestCaseGroup> framebufferGroup(
896 new deqp::TestCaseGroup(testCtx, "framebuffer", "Subgroup partitioned category tests: framebuffer"));
897
898 const ShaderStageFlags stages[] = {
899 SHADER_STAGE_VERTEX_BIT,
900 SHADER_STAGE_TESS_EVALUATION_BIT,
901 SHADER_STAGE_TESS_CONTROL_BIT,
902 SHADER_STAGE_GEOMETRY_BIT,
903 };
904
905 const Format formats[] = {
906 FORMAT_R32_SINT, FORMAT_R32G32_SINT, FORMAT_R32G32B32_SINT, FORMAT_R32G32B32A32_SINT,
907 FORMAT_R32_UINT, FORMAT_R32G32_UINT, FORMAT_R32G32B32_UINT, FORMAT_R32G32B32A32_UINT,
908 FORMAT_R32_SFLOAT, FORMAT_R32G32_SFLOAT, FORMAT_R32G32B32_SFLOAT, FORMAT_R32G32B32A32_SFLOAT,
909 FORMAT_R64_SFLOAT, FORMAT_R64G64_SFLOAT, FORMAT_R64G64B64_SFLOAT, FORMAT_R64G64B64A64_SFLOAT,
910 FORMAT_R32_BOOL, FORMAT_R32G32_BOOL, FORMAT_R32G32B32_BOOL, FORMAT_R32G32B32A32_BOOL,
911 };
912
913 for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
914 {
915 const Format format = formats[formatIndex];
916
917 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
918 {
919 bool isBool = false;
920 bool isFloat = false;
921
922 switch (format)
923 {
924 default:
925 break;
926 case FORMAT_R32_SFLOAT:
927 case FORMAT_R32G32_SFLOAT:
928 case FORMAT_R32G32B32_SFLOAT:
929 case FORMAT_R32G32B32A32_SFLOAT:
930 case FORMAT_R64_SFLOAT:
931 case FORMAT_R64G64_SFLOAT:
932 case FORMAT_R64G64B64_SFLOAT:
933 case FORMAT_R64G64B64A64_SFLOAT:
934 isFloat = true;
935 break;
936 case FORMAT_R32_BOOL:
937 case FORMAT_R32G32_BOOL:
938 case FORMAT_R32G32B32_BOOL:
939 case FORMAT_R32G32B32A32_BOOL:
940 isBool = true;
941 break;
942 }
943
944 bool isBitwiseOp = false;
945
946 switch (opTypeIndex)
947 {
948 default:
949 break;
950 case OPTYPE_AND:
951 case OPTYPE_INCLUSIVE_AND:
952 case OPTYPE_EXCLUSIVE_AND:
953 case OPTYPE_OR:
954 case OPTYPE_INCLUSIVE_OR:
955 case OPTYPE_EXCLUSIVE_OR:
956 case OPTYPE_XOR:
957 case OPTYPE_INCLUSIVE_XOR:
958 case OPTYPE_EXCLUSIVE_XOR:
959 isBitwiseOp = true;
960 break;
961 }
962
963 if (isFloat && isBitwiseOp)
964 {
965 // Skip float with bitwise category.
966 continue;
967 }
968
969 if (isBool && !isBitwiseOp)
970 {
971 // Skip bool when its not the bitwise category.
972 continue;
973 }
974 const std::string name =
975 de::toLower(getOpTypeName(opTypeIndex)) + "_" + subgroups::getFormatNameForGLSL(format);
976
977 {
978 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT, format};
979 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(
980 computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
981 }
982
983 {
984 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_ALL_GRAPHICS, format};
985 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(
986 graphicGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
987 }
988
989 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
990 {
991 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
992 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(
993 framebufferGroup.get(), name + "_" + getShaderStageName(caseDef.shaderStage), "", supportedCheck,
994 initFrameBufferPrograms, noSSBOtest, caseDef);
995 }
996 }
997 }
998 de::MovePtr<deqp::TestCaseGroup> group(
999 new deqp::TestCaseGroup(testCtx, "partitioned", "NV_shader_subgroup_partitioned category tests"));
1000
1001 group->addChild(graphicGroup.release());
1002 group->addChild(computeGroup.release());
1003 group->addChild(framebufferGroup.release());
1004
1005 return group.release();
1006 }
1007
1008 } // namespace subgroups
1009 } // namespace glc
1010