1 /*------------------------------------------------------------------------
2 * OpenGL Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017-2019 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 * Copyright (c) 2019 NVIDIA Corporation.
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests
24 */ /*--------------------------------------------------------------------*/
25
26 #include "glcSubgroupsBasicTests.hpp"
27 #include "glcSubgroupsTestsUtils.hpp"
28
29 #include <string>
30 #include <vector>
31 #include "tcuStringTemplate.hpp"
32
33 using namespace tcu;
34 using namespace std;
35
36 namespace glc
37 {
38 namespace subgroups
39 {
40
41 namespace
42 {
43 static const uint32_t ELECTED_VALUE = 42u;
44 static const uint32_t UNELECTED_VALUE = 13u;
45 static const uint64_t SHADER_BUFFER_SIZE = 4096ull;
46
checkFragmentSubgroupBarriersNoSSBO(std::vector<const void * > datas,uint32_t width,uint32_t height,uint32_t)47 static bool checkFragmentSubgroupBarriersNoSSBO(std::vector<const void *> datas, uint32_t width, uint32_t height,
48 uint32_t)
49 {
50 const float *const resultData = reinterpret_cast<const float *>(datas[0]);
51
52 for (uint32_t x = 0u; x < width; ++x)
53 {
54 for (uint32_t y = 0u; y < height; ++y)
55 {
56 const uint32_t ndx = (x * height + y) * 4u;
57 if (1.0f == resultData[ndx + 2])
58 {
59 if (resultData[ndx] != resultData[ndx + 1])
60 {
61 return false;
62 }
63 }
64 else if (resultData[ndx] != resultData[ndx + 3])
65 {
66 return false;
67 }
68 }
69 }
70
71 return true;
72 }
73
checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void * > datas,uint32_t width,uint32_t)74 static bool checkVertexPipelineStagesSubgroupElectNoSSBO(std::vector<const void *> datas, uint32_t width, uint32_t)
75 {
76 const float *const resultData = reinterpret_cast<const float *>(datas[0]);
77 float poisonValuesFound = 0.0f;
78 float numSubgroupsUsed = 0.0f;
79
80 for (uint32_t x = 0; x < width; ++x)
81 {
82 uint32_t val = static_cast<uint32_t>(resultData[x * 2]);
83 numSubgroupsUsed += resultData[x * 2 + 1];
84
85 switch (val)
86 {
87 default:
88 // some garbage value was found!
89 return false;
90 case UNELECTED_VALUE:
91 break;
92 case ELECTED_VALUE:
93 poisonValuesFound += 1.0f;
94 break;
95 }
96 }
97 return numSubgroupsUsed == poisonValuesFound;
98 }
99
checkVertexPipelineStagesSubgroupElect(std::vector<const void * > datas,uint32_t width,uint32_t)100 static bool checkVertexPipelineStagesSubgroupElect(std::vector<const void *> datas, uint32_t width, uint32_t)
101 {
102 const uint32_t *const resultData = reinterpret_cast<const uint32_t *>(datas[0]);
103 uint32_t poisonValuesFound = 0;
104
105 for (uint32_t x = 0; x < width; ++x)
106 {
107 uint32_t val = resultData[x];
108
109 switch (val)
110 {
111 default:
112 // some garbage value was found!
113 return false;
114 case UNELECTED_VALUE:
115 break;
116 case ELECTED_VALUE:
117 poisonValuesFound++;
118 break;
119 }
120 }
121
122 // we used an atomicly incremented counter to note how many subgroups we used for the vertex shader
123 const uint32_t numSubgroupsUsed = *reinterpret_cast<const uint32_t *>(datas[1]);
124
125 return numSubgroupsUsed == poisonValuesFound;
126 }
127
checkVertexPipelineStagesSubgroupBarriers(std::vector<const void * > datas,uint32_t width,uint32_t)128 static bool checkVertexPipelineStagesSubgroupBarriers(std::vector<const void *> datas, uint32_t width, uint32_t)
129 {
130 const uint32_t *const resultData = reinterpret_cast<const uint32_t *>(datas[0]);
131
132 // We used this SSBO to generate our unique value!
133 const uint32_t ref = *reinterpret_cast<const uint32_t *>(datas[1]);
134
135 for (uint32_t x = 0; x < width; ++x)
136 {
137 uint32_t val = resultData[x];
138
139 if (val != ref)
140 return false;
141 }
142
143 return true;
144 }
145
checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const void * > datas,uint32_t width,uint32_t)146 static bool checkVertexPipelineStagesSubgroupBarriersNoSSBO(std::vector<const void *> datas, uint32_t width, uint32_t)
147 {
148 const float *const resultData = reinterpret_cast<const float *>(datas[0]);
149
150 for (uint32_t x = 0u; x < width; ++x)
151 {
152 const uint32_t ndx = x * 4u;
153 if (1.0f == resultData[ndx + 2])
154 {
155 if (resultData[ndx] != resultData[ndx + 1])
156 return false;
157 }
158 else if (resultData[ndx] != resultData[ndx + 3])
159 {
160 return false;
161 }
162 }
163 return true;
164 }
165
checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector<const void * > datas,uint32_t width,uint32_t)166 static bool checkTessellationEvaluationSubgroupBarriersNoSSBO(std::vector<const void *> datas, uint32_t width, uint32_t)
167 {
168 const float *const resultData = reinterpret_cast<const float *>(datas[0]);
169
170 for (uint32_t x = 0u; x < width; ++x)
171 {
172 const uint32_t ndx = x * 4u;
173 if (0.0f == resultData[ndx + 2] && resultData[ndx] != resultData[ndx + 3])
174 {
175 return false;
176 }
177 }
178 return true;
179 }
180
checkComputeSubgroupElect(std::vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)181 static bool checkComputeSubgroupElect(std::vector<const void *> datas, const uint32_t numWorkgroups[3],
182 const uint32_t localSize[3], uint32_t)
183 {
184 return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
185 }
186
checkComputeSubgroupBarriers(std::vector<const void * > datas,const uint32_t numWorkgroups[3],const uint32_t localSize[3],uint32_t)187 static bool checkComputeSubgroupBarriers(std::vector<const void *> datas, const uint32_t numWorkgroups[3],
188 const uint32_t localSize[3], uint32_t)
189 {
190 // We used this SSBO to generate our unique value!
191 const uint32_t ref = *reinterpret_cast<const uint32_t *>(datas[1]);
192 return glc::subgroups::checkCompute(datas, numWorkgroups, localSize, ref);
193 }
194
195 enum OpType
196 {
197 OPTYPE_ELECT = 0,
198 OPTYPE_SUBGROUP_BARRIER,
199 OPTYPE_SUBGROUP_MEMORY_BARRIER,
200 OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER,
201 OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED,
202 OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE,
203 OPTYPE_LAST
204 };
205
getOpTypeName(int opType)206 std::string getOpTypeName(int opType)
207 {
208 switch (opType)
209 {
210 default:
211 DE_FATAL("Unsupported op type");
212 return "";
213 case OPTYPE_ELECT:
214 return "subgroupElect";
215 case OPTYPE_SUBGROUP_BARRIER:
216 return "subgroupBarrier";
217 case OPTYPE_SUBGROUP_MEMORY_BARRIER:
218 return "subgroupMemoryBarrier";
219 case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
220 return "subgroupMemoryBarrierBuffer";
221 case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
222 return "subgroupMemoryBarrierShared";
223 case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
224 return "subgroupMemoryBarrierImage";
225 }
226 }
227
228 struct CaseDefinition
229 {
230 int opType;
231 subgroups::ShaderStageFlags shaderStage;
232 };
233
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)234 void initFrameBufferPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
235 {
236 if (subgroups::SHADER_STAGE_FRAGMENT_BIT != caseDef.shaderStage)
237 {
238 const string fragmentGLSL = "${VERSION_DECL}\n"
239 "layout(location = 0) in highp vec4 in_color;\n"
240 "layout(location = 0) out highp vec4 out_color;\n"
241 "void main()\n"
242 "{\n"
243 " out_color = in_color;\n"
244 "}\n";
245
246 programCollection.add("fragment") << glu::FragmentSource(fragmentGLSL);
247 }
248 if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
249 {
250 const string vertexGLSL = "${VERSION_DECL}\n"
251 "void main (void)\n"
252 "{\n"
253 " vec2 uv = vec2((gl_VertexID << 1) & 2, gl_VertexID & 2);\n"
254 " gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f);\n"
255 " gl_PointSize = 1.0f;\n"
256 "}\n";
257
258 programCollection.add("vert") << glu::VertexSource(vertexGLSL);
259 }
260 else if (subgroups::SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
261 subgroups::setVertexShaderFrameBuffer(programCollection);
262
263 if (OPTYPE_ELECT == caseDef.opType)
264 {
265 std::ostringstream electedValue;
266 std::ostringstream unelectedValue;
267 electedValue << ELECTED_VALUE;
268 unelectedValue << UNELECTED_VALUE;
269
270 if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
271 {
272 const string vertexGLSL = "${VERSION_DECL}\n"
273 "#extension GL_KHR_shader_subgroup_basic: enable\n"
274 "layout(location = 0) out vec4 out_color;\n"
275 "layout(location = 0) in highp vec4 in_position;\n"
276 "\n"
277 "void main (void)\n"
278 "{\n"
279 " if (subgroupElect())\n"
280 " {\n"
281 " out_color.r = " +
282 electedValue.str() +
283 ".0f;\n"
284 " out_color.g = 1.0f;\n"
285 " }\n"
286 " else\n"
287 " {\n"
288 " out_color.r = " +
289 unelectedValue.str() +
290 ".0f;\n"
291 " out_color.g = 0.0f;\n"
292 " }\n"
293 " gl_Position = in_position;\n"
294 " gl_PointSize = 1.0f;\n"
295 "}\n";
296
297 programCollection.add("vert") << glu::VertexSource(vertexGLSL);
298 }
299 else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
300 {
301 const string geometryGLSL = "${VERSION_DECL}\n"
302 "#extension GL_KHR_shader_subgroup_basic: enable\n"
303 "layout(points) in;\n"
304 "layout(points, max_vertices = 1) out;\n"
305 "layout(location = 0) out vec4 out_color;\n"
306 "void main (void)\n"
307 "{\n"
308 " if (subgroupElect())\n"
309 " {\n"
310 " out_color.r = " +
311 electedValue.str() +
312 ".0f;\n"
313 " out_color.g = 1.0f;\n"
314 " }\n"
315 " else\n"
316 " {\n"
317 " out_color.r = " +
318 unelectedValue.str() +
319 ".0f;\n"
320 " out_color.g = 0.0f;\n"
321 " }\n"
322 " gl_Position = gl_in[0].gl_Position;\n"
323 " EmitVertex();\n"
324 " EndPrimitive();\n"
325 "}\n";
326
327 programCollection.add("geometry") << glu::GeometrySource(geometryGLSL);
328 }
329 else if (subgroups::SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
330 {
331 const string controlSourceGLSL =
332 "${VERSION_DECL}\n"
333 "#extension GL_KHR_shader_subgroup_basic: enable\n"
334 "${TESS_EXTENSION}\n"
335 "layout(vertices = 2) out;\n"
336 "void main (void)\n"
337 "{\n"
338 " if (gl_InvocationID == 0)\n"
339 " {\n"
340 " gl_TessLevelOuter[0] = 1.0f;\n"
341 " gl_TessLevelOuter[1] = 1.0f;\n"
342 " }\n"
343 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
344 "}\n";
345
346 programCollection.add("tesc") << glu::TessellationControlSource(controlSourceGLSL);
347
348 const string evaluationSourceGLSL =
349 "${VERSION_DECL}\n"
350 "#extension GL_KHR_shader_subgroup_basic: enable\n"
351 "${TESS_EXTENSION}\n"
352 "layout(isolines, equal_spacing, ccw ) in;\n"
353 "layout(location = 0) out vec4 out_color;\n"
354 "\n"
355 "void main (void)\n"
356 "{\n"
357 " if (subgroupElect())\n"
358 " {\n"
359 " out_color.r = 2.0f * " +
360 electedValue.str() + ".0f - " + unelectedValue.str() +
361 ".0f;\n"
362 " out_color.g = 2.0f;\n"
363 " }\n"
364 " else\n"
365 " {\n"
366 " out_color.r = " +
367 unelectedValue.str() +
368 ".0f;\n"
369 " out_color.g = 0.0f;\n"
370 " }\n"
371 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
372 "}\n";
373
374 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSourceGLSL);
375 }
376 else if (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
377 {
378 const string controlSourceGLSL =
379 "${VERSION_DECL}\n"
380 "#extension GL_KHR_shader_subgroup_basic: enable\n"
381 "${TESS_EXTENSION}\n"
382 "layout(vertices = 2) out;\n"
383 "layout(location = 0) out vec4 out_color[];\n"
384 "void main (void)\n"
385 "{\n"
386 " if (gl_InvocationID == 0)\n"
387 " {\n"
388 " gl_TessLevelOuter[0] = 1.0f;\n"
389 " gl_TessLevelOuter[1] = 1.0f;\n"
390 " }\n"
391 " if (subgroupElect())\n"
392 " {\n"
393 " out_color[gl_InvocationID].r = " +
394 electedValue.str() +
395 ".0f;\n"
396 " out_color[gl_InvocationID].g = 1.0f;\n"
397 " }\n"
398 " else\n"
399 " {\n"
400 " out_color[gl_InvocationID].r = " +
401 unelectedValue.str() +
402 ".0f;\n"
403 " out_color[gl_InvocationID].g = 0.0f;\n"
404 " }\n"
405 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
406 "}\n";
407
408 programCollection.add("tesc") << glu::TessellationControlSource(controlSourceGLSL);
409
410 const string evaluationSourceGLSL =
411 "${VERSION_DECL}\n"
412 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
413 "${TESS_EXTENSION}\n"
414 "layout(isolines, equal_spacing, ccw ) in;\n"
415 "layout(location = 0) in vec4 in_color[];\n"
416 "layout(location = 0) out vec4 out_color;\n"
417 "\n"
418 "void main (void)\n"
419 "{\n"
420 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
421 " out_color = in_color[0];\n"
422 "}\n";
423
424 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSourceGLSL);
425 }
426 else
427 {
428 DE_FATAL("Unsupported shader stage");
429 }
430 }
431 else
432 {
433 std::ostringstream bdy;
434 string color = (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage) ?
435 "out_color[gl_InvocationID].b = 1.0f;\n" :
436 "out_color.b = 1.0f;\n";
437 switch (caseDef.opType)
438 {
439 default:
440 DE_FATAL("Unhandled op type!");
441 break;
442 case OPTYPE_SUBGROUP_BARRIER:
443 case OPTYPE_SUBGROUP_MEMORY_BARRIER:
444 case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
445 {
446 bdy << " tempResult2 = tempBuffer[id];\n"
447 << " if (subgroupElect())\n"
448 << " {\n"
449 << " tempResult = value;\n"
450 << " " << color << " }\n"
451 << " else\n"
452 << " {\n"
453 << " tempResult = tempBuffer[id];\n"
454 << " }\n"
455 << " " << getOpTypeName(caseDef.opType) << "();\n";
456 break;
457 }
458 case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
459 bdy << " tempResult2 = imageLoad(tempImage, ivec2(id, 0)).x;\n"
460 << " if (subgroupElect())\n"
461 << " {\n"
462 << " tempResult = value;\n"
463 << " " << color << " }\n"
464 << " else\n"
465 << " {\n"
466 << " tempResult = imageLoad(tempImage, ivec2(id, 0)).x;\n"
467 << " }\n"
468 << " subgroupMemoryBarrierImage();\n";
469
470 break;
471 }
472
473 if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
474 {
475 std::ostringstream fragment;
476 fragment << "${VERSION_DECL}\n"
477 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
478 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
479 << "precision highp int;\n"
480 << "layout(location = 0) out highp vec4 out_color;\n"
481 << "\n"
482 << "layout(binding = 0, std140) uniform Buffer1\n"
483 << "{\n"
484 << " uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
485 << "};\n"
486 << "\n"
487 << "layout(binding = 1, std140) uniform Buffer2\n"
488 << "{\n"
489 << " uint value;\n"
490 << "};\n"
491 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
492 "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" :
493 "\n")
494 << "void main (void)\n"
495 << "{\n"
496 << " if (gl_HelperInvocation) return;\n"
497 << " uint id = 0u;\n"
498 << " if (subgroupElect())\n"
499 << " {\n"
500 << " id = uint(gl_FragCoord.x);\n"
501 << " }\n"
502 << " id = subgroupBroadcastFirst(id);\n"
503 << " uint localId = id;\n"
504 << " uint tempResult = 0u;\n"
505 << " uint tempResult2 = 0u;\n"
506 << " out_color.b = 0.0f;\n"
507 << bdy.str() << " out_color.r = float(tempResult);\n"
508 << " out_color.g = float(value);\n"
509 << " out_color.a = float(tempResult2);\n"
510 << "}\n";
511 programCollection.add("fragment") << glu::FragmentSource(fragment.str());
512 }
513 else if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
514 {
515 std::ostringstream vertex;
516 vertex << "${VERSION_DECL}\n"
517 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
518 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
519 << "\n"
520 << "layout(location = 0) out vec4 out_color;\n"
521 << "layout(location = 0) in highp vec4 in_position;\n"
522 << "\n"
523 << "layout(binding = 0, std140) uniform Buffer1\n"
524 << "{\n"
525 << " uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
526 << "};\n"
527 << "\n"
528 << "layout(binding = 1, std140) uniform Buffer2\n"
529 << "{\n"
530 << " uint value;\n"
531 << "};\n"
532 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
533 "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" :
534 "\n")
535 << "void main (void)\n"
536 << "{\n"
537 << " uint id = 0u;\n"
538 << " if (subgroupElect())\n"
539 << " {\n"
540 << " id = uint(gl_VertexID);\n"
541 << " }\n"
542 << " id = subgroupBroadcastFirst(id);\n"
543 << " uint tempResult = 0u;\n"
544 << " uint tempResult2 = 0u;\n"
545 << " out_color.b = 0.0f;\n"
546 << bdy.str() << " out_color.r = float(tempResult);\n"
547 << " out_color.g = float(value);\n"
548 << " out_color.a = float(tempResult2);\n"
549 << " gl_Position = in_position;\n"
550 << " gl_PointSize = 1.0f;\n"
551 << "}\n";
552 programCollection.add("vert") << glu::VertexSource(vertex.str());
553 }
554 else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
555 {
556 std::ostringstream geometry;
557
558 geometry << "${VERSION_DECL}\n"
559 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
560 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
561 << "layout(points) in;\n"
562 << "layout(points, max_vertices = 1) out;\n"
563 << "layout(location = 0) out vec4 out_color;\n"
564 << "layout(binding = 0, std140) uniform Buffer1\n"
565 << "{\n"
566 << " uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
567 << "};\n"
568 << "\n"
569 << "layout(binding = 1, std140) uniform Buffer2\n"
570 << "{\n"
571 << " uint value;\n"
572 << "};\n"
573 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
574 "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" :
575 "\n")
576 << "void main (void)\n"
577 << "{\n"
578 << " uint id = 0u;\n"
579 << " if (subgroupElect())\n"
580 << " {\n"
581 << " id = uint(gl_InvocationID);\n"
582 << " }\n"
583 << " id = subgroupBroadcastFirst(id);\n"
584 << " uint tempResult = 0u;\n"
585 << " uint tempResult2 = 0u;\n"
586 << " out_color.b = 0.0f;\n"
587 << bdy.str() << " out_color.r = float(tempResult);\n"
588 << " out_color.g = float(value);\n"
589 << " out_color.a = float(tempResult2);\n"
590 << " gl_Position = gl_in[0].gl_Position;\n"
591 << " EmitVertex();\n"
592 << " EndPrimitive();\n"
593 << "}\n";
594
595 programCollection.add("geometry") << glu::GeometrySource(geometry.str());
596 }
597 else if (subgroups::SHADER_STAGE_TESS_EVALUATION_BIT == caseDef.shaderStage)
598 {
599 std::ostringstream controlSource;
600 std::ostringstream evaluationSource;
601
602 controlSource << "${VERSION_DECL}\n"
603 << "${TESS_EXTENSION}\n"
604 << "layout(vertices = 2) out;\n"
605 << "void main (void)\n"
606 << "{\n"
607 << " if (gl_InvocationID == 0)\n"
608 << " {\n"
609 << " gl_TessLevelOuter[0] = 1.0f;\n"
610 << " gl_TessLevelOuter[1] = 1.0f;\n"
611 << " }\n"
612 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
613 << "}\n";
614
615 evaluationSource << "${VERSION_DECL}\n"
616 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
617 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
618 << "${TESS_EXTENSION}\n"
619 << "layout(isolines, equal_spacing, ccw ) in;\n"
620 << "layout(location = 0) out vec4 out_color;\n"
621 << "layout(binding = 0, std140) uniform Buffer1\n"
622 << "{\n"
623 << " uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
624 << "};\n"
625 << "\n"
626 << "layout(binding = 1, std140) uniform Buffer2\n"
627 << "{\n"
628 << " uint value;\n"
629 << "};\n"
630 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
631 "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" :
632 "\n")
633 << "void main (void)\n"
634 << "{\n"
635 << " uint id = 0u;\n"
636 << " if (subgroupElect())\n"
637 << " {\n"
638 << " id = uint(gl_PrimitiveID);\n"
639 << " }\n"
640 << " id = subgroupBroadcastFirst(id);\n"
641 << " uint tempResult = 0u;\n"
642 << " uint tempResult2 = 0u;\n"
643 << " out_color.b = 0.0f;\n"
644 << bdy.str() << " out_color.r = float(tempResult);\n"
645 << " out_color.g = float(value);\n"
646 << " out_color.a = float(tempResult2);\n"
647 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
648 << "}\n";
649
650 programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
651 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
652 }
653 else if (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage)
654 {
655 std::ostringstream controlSource;
656 std::ostringstream evaluationSource;
657
658 controlSource << "${VERSION_DECL}\n"
659 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
660 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
661 << "${TESS_EXTENSION}\n"
662 << "layout(vertices = 2) out;\n"
663 << "layout(location = 0) out vec4 out_color[];\n"
664 << "layout(binding = 0, std140) uniform Buffer1\n"
665 << "{\n"
666 << " uint tempBuffer[" << SHADER_BUFFER_SIZE / 4ull << "];\n"
667 << "};\n"
668 << "\n"
669 << "layout(binding = 1, std140) uniform Buffer2\n"
670 << "{\n"
671 << " uint value;\n"
672 << "};\n"
673 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
674 "layout(binding = 0, r32ui) readonly uniform highp uimage2D tempImage;\n" :
675 "\n")
676 << "void main (void)\n"
677 << "{\n"
678 << " uint id = 0u;\n"
679 << " if (gl_InvocationID == 0)\n"
680 << " {\n"
681 << " gl_TessLevelOuter[0] = 1.0f;\n"
682 << " gl_TessLevelOuter[1] = 1.0f;\n"
683 << " }\n"
684 << " if (subgroupElect())\n"
685 << " {\n"
686 << " id = uint(gl_InvocationID);\n"
687 << " }\n"
688 << " id = subgroupBroadcastFirst(id);\n"
689 << " uint tempResult = 0u;\n"
690 << " uint tempResult2 = 0u;\n"
691 << " out_color[gl_InvocationID].b = 0.0f;\n"
692 << bdy.str() << " out_color[gl_InvocationID].r = float(tempResult);\n"
693 << " out_color[gl_InvocationID].g = float(value);\n"
694 << " out_color[gl_InvocationID].a = float(tempResult2);\n"
695 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
696 << "}\n";
697
698 evaluationSource << "${VERSION_DECL}\n"
699 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
700 << "${TESS_EXTENSION}\n"
701 << "layout(isolines, equal_spacing, ccw ) in;\n"
702 << "layout(location = 0) in vec4 in_color[];\n"
703 << "layout(location = 0) out vec4 out_color;\n"
704 << "\n"
705 << "void main (void)\n"
706 << "{\n"
707 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
708 << " out_color = in_color[0];\n"
709 << "}\n";
710
711 programCollection.add("tesc") << glu::TessellationControlSource(controlSource.str());
712 programCollection.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str());
713 }
714 else
715 {
716 DE_FATAL("Unsupported shader stage");
717 }
718 }
719 }
720
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)721 void initPrograms(SourceCollections &programCollection, CaseDefinition caseDef)
722 {
723 if (OPTYPE_ELECT == caseDef.opType)
724 {
725 if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
726 {
727 std::ostringstream src;
728
729 src << "${VERSION_DECL}\n"
730 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
731 << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
732 << "layout(binding = 0, std430) buffer Buffer1\n"
733 << "{\n"
734 << " uint result[];\n"
735 << "};\n"
736 << "\n"
737 << subgroups::getSharedMemoryBallotHelper() << "void main (void)\n"
738 << "{\n"
739 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
740 << " highp uint offset = globalSize.x * ((globalSize.y * "
741 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
742 "gl_GlobalInvocationID.x;\n"
743 << " uint value = " << UNELECTED_VALUE << "u;\n"
744 << " if (subgroupElect())\n"
745 << " {\n"
746 << " value = " << ELECTED_VALUE << "u;\n"
747 << " }\n"
748 << " uvec4 bits = uvec4(bitCount(sharedMemoryBallot(value == " << ELECTED_VALUE << "u)));\n"
749 << " result[offset] = bits.x + bits.y + bits.z + bits.w;\n"
750 << "}\n";
751
752 programCollection.add("comp") << glu::ComputeSource(src.str());
753 }
754 else
755 {
756 {
757 std::ostringstream vertex;
758 vertex << "${VERSION_DECL}\n"
759 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
760 << "layout(binding = 0, std430) buffer Buffer0\n"
761 << "{\n"
762 << " uint result[];\n"
763 << "} b0;\n"
764 << "layout(binding = 4, std430) buffer Buffer4\n"
765 << "{\n"
766 << " uint numSubgroupsExecuted;\n"
767 << "} b4;\n"
768 << "\n"
769 << "void main (void)\n"
770 << "{\n"
771 << " if (subgroupElect())\n"
772 << " {\n"
773 << " b0.result[gl_VertexID] = " << ELECTED_VALUE << "u;\n"
774 << " atomicAdd(b4.numSubgroupsExecuted, 1u);\n"
775 << " }\n"
776 << " else\n"
777 << " {\n"
778 << " b0.result[gl_VertexID] = " << UNELECTED_VALUE << "u;\n"
779 << " }\n"
780 << " float pixelSize = 2.0f/1024.0f;\n"
781 << " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
782 << " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
783 << " gl_PointSize = 1.0f;\n"
784 << "}\n";
785 programCollection.add("vert") << glu::VertexSource(vertex.str());
786 }
787
788 {
789 std::ostringstream tesc;
790 tesc << "${VERSION_DECL}\n"
791 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
792 << "layout(vertices=1) out;\n"
793 << "layout(binding = 1, std430) buffer Buffer1\n"
794 << "{\n"
795 << " uint result[];\n"
796 << "} b1;\n"
797 << "layout(binding = 5, std430) buffer Buffer5\n"
798 << "{\n"
799 << " uint numSubgroupsExecuted;\n"
800 << "} b5;\n"
801 << "\n"
802 << "void main (void)\n"
803 << "{\n"
804 << " if (subgroupElect())\n"
805 << " {\n"
806 << " b1.result[gl_PrimitiveID] = " << ELECTED_VALUE << "u;\n"
807 << " atomicAdd(b5.numSubgroupsExecuted, 1u);\n"
808 << " }\n"
809 << " else\n"
810 << " {\n"
811 << " b1.result[gl_PrimitiveID] = " << UNELECTED_VALUE << "u;\n"
812 << " }\n"
813 << " if (gl_InvocationID == 0)\n"
814 << " {\n"
815 << " gl_TessLevelOuter[0] = 1.0f;\n"
816 << " gl_TessLevelOuter[1] = 1.0f;\n"
817 << " }\n"
818 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
819 << "}\n";
820 programCollection.add("tesc") << glu::TessellationControlSource(tesc.str());
821 }
822
823 {
824 std::ostringstream tese;
825 tese << "${VERSION_DECL}\n"
826 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
827 << "layout(isolines) in;\n"
828 << "layout(binding = 2, std430) buffer Buffer2\n"
829 << "{\n"
830 << " uint result[];\n"
831 << "} b2;\n"
832 << "layout(binding = 6, std430) buffer Buffer6\n"
833 << "{\n"
834 << " uint numSubgroupsExecuted;\n"
835 << "} b6;\n"
836 << "\n"
837 << "void main (void)\n"
838 << "{\n"
839 << " if (subgroupElect())\n"
840 << " {\n"
841 << " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = " << ELECTED_VALUE << "u;\n"
842 << " atomicAdd(b6.numSubgroupsExecuted, 1u);\n"
843 << " }\n"
844 << " else\n"
845 << " {\n"
846 << " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = " << UNELECTED_VALUE << "u;\n"
847 << " }\n"
848 << " float pixelSize = 2.0f/1024.0f;\n"
849 << " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
850 << "}\n";
851 programCollection.add("tese") << glu::TessellationEvaluationSource(tese.str());
852 }
853 {
854 std::ostringstream geometry;
855 geometry << "#extension GL_KHR_shader_subgroup_basic: enable\n"
856 << "layout(${TOPOLOGY}) in;\n"
857 << "layout(points, max_vertices = 1) out;\n"
858 << "layout(binding = 3, std430) buffer Buffer3\n"
859 << "{\n"
860 << " uint result[];\n"
861 << "} b3;\n"
862 << "layout(binding = 7, std430) buffer Buffer7\n"
863 << "{\n"
864 << " uint numSubgroupsExecuted;\n"
865 << "} b7;\n"
866 << "\n"
867 << "void main (void)\n"
868 << "{\n"
869 << " if (subgroupElect())\n"
870 << " {\n"
871 << " b3.result[gl_PrimitiveIDIn] = " << ELECTED_VALUE << "u;\n"
872 << " atomicAdd(b7.numSubgroupsExecuted, 1u);\n"
873 << " }\n"
874 << " else\n"
875 << " {\n"
876 << " b3.result[gl_PrimitiveIDIn] = " << UNELECTED_VALUE << "u;\n"
877 << " }\n"
878 << " gl_Position = gl_in[0].gl_Position;\n"
879 << " EmitVertex();\n"
880 << " EndPrimitive();\n"
881 << "}\n";
882 subgroups::addGeometryShadersFromTemplate(geometry.str(), programCollection);
883 }
884
885 {
886 std::ostringstream fragment;
887 fragment << "${VERSION_DECL}\n"
888 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
889 << "precision highp int;\n"
890 << "layout(location = 0) out uint data;\n"
891 << "layout(binding = 8, std430) buffer Buffer8\n"
892 << "{\n"
893 << " uint numSubgroupsExecuted;\n"
894 << "} b8;\n"
895 << "void main (void)\n"
896 << "{\n"
897 << " if (gl_HelperInvocation) return;\n"
898 << " if (subgroupElect())\n"
899 << " {\n"
900 << " data = " << ELECTED_VALUE << "u;\n"
901 << " atomicAdd(b8.numSubgroupsExecuted, 1u);\n"
902 << " }\n"
903 << " else\n"
904 << " {\n"
905 << " data = " << UNELECTED_VALUE << "u;\n"
906 << " }\n"
907 << "}\n";
908 programCollection.add("fragment") << glu::FragmentSource(fragment.str());
909 }
910 subgroups::addNoSubgroupShader(programCollection);
911 }
912 }
913 else
914 {
915 std::ostringstream bdy;
916
917 switch (caseDef.opType)
918 {
919 default:
920 DE_FATAL("Unhandled op type!");
921 break;
922 case OPTYPE_SUBGROUP_BARRIER:
923 case OPTYPE_SUBGROUP_MEMORY_BARRIER:
924 case OPTYPE_SUBGROUP_MEMORY_BARRIER_BUFFER:
925 bdy << " if (subgroupElect())\n"
926 << " {\n"
927 << " b${SSBO1}.tempBuffer[id] = b${SSBO1}.value;\n"
928 << " }\n"
929 << " " << getOpTypeName(caseDef.opType) << "();\n"
930 << " tempResult = b${SSBO1}.tempBuffer[id];\n";
931 break;
932 case OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED:
933 bdy << " if (subgroupElect())\n"
934 << " {\n"
935 << " tempShared[localId] = b${SSBO1}.value;\n"
936 << " }\n"
937 << " subgroupMemoryBarrierShared();\n"
938 << " tempResult = tempShared[localId];\n";
939 break;
940 case OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE:
941 bdy << " if (subgroupElect())\n"
942 << " {\n"
943 << " imageStore(tempImage${IMG1}, ivec2(id, 0), uvec4(b${SSBO1}.value));\n"
944 << " }\n"
945 << " subgroupMemoryBarrierImage();\n"
946 << " tempResult = imageLoad(tempImage${IMG1}, ivec2(id, 0)).x;\n";
947 break;
948 }
949
950 tcu::StringTemplate bdyTemplate(bdy.str());
951
952 if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
953 {
954 std::ostringstream src;
955 map<string, string> bufferNameMapping;
956 bufferNameMapping.insert(pair<string, string>("SSBO1", "1"));
957 bufferNameMapping.insert(pair<string, string>("IMG1", "0"));
958
959 src << "${VERSION_DECL}\n"
960 << "#extension GL_KHR_shader_subgroup_basic: enable\n"
961 << "layout (${LOCAL_SIZE_X}, ${LOCAL_SIZE_Y}, ${LOCAL_SIZE_Z}) in;\n"
962 << "layout(binding = 0, std430) buffer Buffer0\n"
963 << "{\n"
964 << " uint result[];\n"
965 << "} b0;\n"
966 << "layout(binding = 1, std430) buffer Buffer1\n"
967 << "{\n"
968 << " uint value;\n"
969 << " uint tempBuffer[];\n"
970 << "} b1;\n"
971 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
972 "layout(binding = 0, r32ui) uniform highp uimage2D tempImage0;\n" :
973 "\n")
974 << "shared uint tempShared[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
975 << "\n"
976 << "void main (void)\n"
977 << "{\n"
978 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
979 << " highp uint offset = globalSize.x * ((globalSize.y * "
980 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
981 "gl_GlobalInvocationID.x;\n"
982 << " uint localId = gl_SubgroupID;\n"
983 << " uint id = globalSize.x * ((globalSize.y * "
984 "gl_WorkGroupID.z) + gl_WorkGroupID.y) + "
985 "gl_WorkGroupID.x + localId;\n"
986 << " uint tempResult = 0u;\n"
987 << bdyTemplate.specialize(bufferNameMapping) << " b0.result[offset] = tempResult;\n"
988 << "}\n";
989
990 programCollection.add("comp") << glu::ComputeSource(src.str());
991 }
992 else
993 {
994 {
995 map<string, string> bufferNameMapping;
996 bufferNameMapping.insert(pair<string, string>("SSBO1", "4"));
997 bufferNameMapping.insert(pair<string, string>("IMG1", "0"));
998
999 std::ostringstream vertex;
1000 vertex
1001 << "${VERSION_DECL}\n"
1002 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1003 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1004 "layout(binding = 0, std430) buffer Buffer0\n"
1005 "{\n"
1006 " uint result[];\n"
1007 "} b0;\n"
1008 "layout(binding = 4, std430) buffer Buffer4\n"
1009 "{\n"
1010 " uint value;\n"
1011 " uint tempBuffer[];\n"
1012 "} b4;\n"
1013 "layout(binding = 5, std430) buffer Buffer5\n"
1014 "{\n"
1015 " uint subgroupID;\n"
1016 "} b5;\n"
1017 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1018 "layout(binding = 0, r32ui) uniform highp uimage2D tempImage0;\n" :
1019 "")
1020 << "void main (void)\n"
1021 "{\n"
1022 " uint id = 0u;\n"
1023 " if (subgroupElect())\n"
1024 " {\n"
1025 " id = atomicAdd(b5.subgroupID, 1u);\n"
1026 " }\n"
1027 " id = subgroupBroadcastFirst(id);\n"
1028 " uint localId = id;\n"
1029 " uint tempResult = 0u;\n" +
1030 bdyTemplate.specialize(bufferNameMapping) +
1031 " b0.result[gl_VertexID] = tempResult;\n"
1032 " float pixelSize = 2.0f/1024.0f;\n"
1033 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
1034 " gl_Position = vec4(float(gl_VertexID) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
1035 " gl_PointSize = 1.0f;\n"
1036 "}\n";
1037 programCollection.add("vert") << glu::VertexSource(vertex.str());
1038 }
1039
1040 {
1041 map<string, string> bufferNameMapping;
1042 bufferNameMapping.insert(pair<string, string>("SSBO1", "6"));
1043 bufferNameMapping.insert(pair<string, string>("IMG1", "1"));
1044
1045 std::ostringstream tesc;
1046 tesc << "${VERSION_DECL}\n"
1047 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1048 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1049 "layout(vertices=1) out;\n"
1050 "layout(binding = 1, std430) buffer Buffer1\n"
1051 "{\n"
1052 " uint result[];\n"
1053 "} b1;\n"
1054 "layout(binding = 6, std430) buffer Buffer6\n"
1055 "{\n"
1056 " uint value;\n"
1057 " uint tempBuffer[];\n"
1058 "} b6;\n"
1059 "layout(binding = 7, std430) buffer Buffer7\n"
1060 "{\n"
1061 " uint subgroupID;\n"
1062 "} b7;\n"
1063 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1064 "layout(binding = 1, r32ui) uniform highp uimage2D tempImage1;\n" :
1065 "")
1066 << "void main (void)\n"
1067 "{\n"
1068 " uint id = 0u;\n"
1069 " if (subgroupElect())\n"
1070 " {\n"
1071 " id = atomicAdd(b7.subgroupID, 1u);\n"
1072 " }\n"
1073 " id = subgroupBroadcastFirst(id);\n"
1074 " uint localId = id;\n"
1075 " uint tempResult = 0u;\n" +
1076 bdyTemplate.specialize(bufferNameMapping) +
1077 " b1.result[gl_PrimitiveID] = tempResult;\n"
1078 " if (gl_InvocationID == 0)\n"
1079 " {\n"
1080 " gl_TessLevelOuter[0] = 1.0f;\n"
1081 " gl_TessLevelOuter[1] = 1.0f;\n"
1082 " }\n"
1083 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1084 "}\n";
1085 programCollection.add("tesc") << glu::TessellationControlSource(tesc.str());
1086 }
1087
1088 {
1089 map<string, string> bufferNameMapping;
1090 bufferNameMapping.insert(pair<string, string>("SSBO1", "8"));
1091 bufferNameMapping.insert(pair<string, string>("IMG1", "2"));
1092
1093 std::ostringstream tese;
1094 tese << "${VERSION_DECL}\n"
1095 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1096 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1097 "layout(isolines) in;\n"
1098 "layout(binding = 2, std430) buffer Buffer2\n"
1099 "{\n"
1100 " uint result[];\n"
1101 "} b2;\n"
1102 "layout(binding = 8, std430) buffer Buffer8\n"
1103 "{\n"
1104 " uint value;\n"
1105 " uint tempBuffer[];\n"
1106 "} b8;\n"
1107 "layout(binding = 9, std430) buffer Buffer9\n"
1108 "{\n"
1109 " uint subgroupID;\n"
1110 "} b9;\n"
1111 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1112 "layout(binding = 2, r32ui) uniform highp uimage2D tempImage2;\n" :
1113 "")
1114 << "void main (void)\n"
1115 "{\n"
1116 " uint id = 0u;\n"
1117 " if (subgroupElect())\n"
1118 " {\n"
1119 " id = atomicAdd(b9.subgroupID, 1u);\n"
1120 " }\n"
1121 " id = subgroupBroadcastFirst(id);\n"
1122 " uint localId = id;\n"
1123 " uint tempResult = 0u;\n" +
1124 bdyTemplate.specialize(bufferNameMapping) +
1125 " b2.result[gl_PrimitiveID * 2 + int(gl_TessCoord.x + 0.5)] = tempResult;\n"
1126 " float pixelSize = 2.0f/1024.0f;\n"
1127 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
1128 "}\n";
1129 programCollection.add("tese") << glu::TessellationEvaluationSource(tese.str());
1130 }
1131 {
1132 map<string, string> bufferNameMapping;
1133 bufferNameMapping.insert(pair<string, string>("SSBO1", "10"));
1134 bufferNameMapping.insert(pair<string, string>("IMG1", "3"));
1135
1136 std::ostringstream geometry;
1137 geometry << "#extension GL_KHR_shader_subgroup_basic: enable\n"
1138 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1139 "layout(${TOPOLOGY}) in;\n"
1140 "layout(points, max_vertices = 1) out;\n"
1141 "layout(binding = 3, std430) buffer Buffer3\n"
1142 "{\n"
1143 " uint result[];\n"
1144 "} b3;\n"
1145 "layout(binding = 10, std430) buffer Buffer10\n"
1146 "{\n"
1147 " uint value;\n"
1148 " uint tempBuffer[];\n"
1149 "} b10;\n"
1150 "layout(binding = 11, std430) buffer Buffer11\n"
1151 "{\n"
1152 " uint subgroupID;\n"
1153 "} b11;\n"
1154 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1155 "layout(binding = 3, r32ui) uniform highp uimage2D tempImage3;\n" :
1156 "")
1157 << "void main (void)\n"
1158 "{\n"
1159 " uint id = 0u;\n"
1160 " if (subgroupElect())\n"
1161 " {\n"
1162 " id = atomicAdd(b11.subgroupID, 1u);\n"
1163 " }\n"
1164 " id = subgroupBroadcastFirst(id);\n"
1165 " uint localId = id;\n"
1166 " uint tempResult = 0u;\n" +
1167 bdyTemplate.specialize(bufferNameMapping) +
1168 " b3.result[gl_PrimitiveIDIn] = tempResult;\n"
1169 " gl_Position = gl_in[0].gl_Position;\n"
1170 " EmitVertex();\n"
1171 " EndPrimitive();\n"
1172 "}\n";
1173 subgroups::addGeometryShadersFromTemplate(geometry.str(), programCollection);
1174 }
1175
1176 {
1177 map<string, string> bufferNameMapping;
1178 bufferNameMapping.insert(pair<string, string>("SSBO1", "12"));
1179 bufferNameMapping.insert(pair<string, string>("IMG1", "4"));
1180
1181 std::ostringstream fragment;
1182 fragment << "${VERSION_DECL}\n"
1183 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1184 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1185 "precision highp int;\n"
1186 "layout(location = 0) out uint result;\n"
1187 "layout(binding = 12, std430) buffer Buffer12\n"
1188 "{\n"
1189 " uint value;\n"
1190 " uint tempBuffer[];\n"
1191 "} b12;\n"
1192 "layout(binding = 13, std430) buffer Buffer13\n"
1193 "{\n"
1194 " uint subgroupID;\n"
1195 "} b13;\n"
1196 << (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ?
1197 "layout(binding = 4, r32ui) uniform highp uimage2D tempImage4;\n" :
1198 "")
1199 << "void main (void)\n"
1200 "{\n"
1201 " if (gl_HelperInvocation) return;\n"
1202 " uint id = 0u;\n"
1203 " if (subgroupElect())\n"
1204 " {\n"
1205 " id = atomicAdd(b13.subgroupID, 1u);\n"
1206 " }\n"
1207 " id = subgroupBroadcastFirst(id);\n"
1208 " uint localId = id;\n"
1209 " uint tempResult = 0u;\n" +
1210 bdyTemplate.specialize(bufferNameMapping) +
1211 " result = tempResult;\n"
1212 "}\n";
1213 programCollection.add("fragment") << glu::FragmentSource(fragment.str());
1214 }
1215
1216 subgroups::addNoSubgroupShader(programCollection);
1217 }
1218 }
1219 }
1220
supportedCheck(Context & context,CaseDefinition caseDef)1221 void supportedCheck(Context &context, CaseDefinition caseDef)
1222 {
1223 DE_UNREF(caseDef);
1224 if (!subgroups::isSubgroupSupported(context))
1225 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
1226 }
1227
noSSBOtest(Context & context,const CaseDefinition caseDef)1228 tcu::TestStatus noSSBOtest(Context &context, const CaseDefinition caseDef)
1229 {
1230 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
1231 {
1232 if (subgroups::areSubgroupOperationsRequiredForStage(caseDef.shaderStage))
1233 {
1234 return tcu::TestStatus::fail("Shader stage " + subgroups::getShaderStageName(caseDef.shaderStage) +
1235 " is required to support subgroup operations!");
1236 }
1237 else
1238 {
1239 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
1240 }
1241 }
1242
1243 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BASIC_BIT))
1244 {
1245 return tcu::TestStatus::fail("Subgroup feature " +
1246 subgroups::getSubgroupFeatureName(subgroups::SUBGROUP_FEATURE_BASIC_BIT) +
1247 " is a required capability!");
1248 }
1249
1250 if (OPTYPE_ELECT != caseDef.opType && subgroups::SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
1251 {
1252 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BALLOT_BIT))
1253 {
1254 TCU_THROW(NotSupportedError,
1255 "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
1256 }
1257 }
1258
1259 if (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType)
1260 {
1261 if (!subgroups::isImageSupportedForStageOnDevice(context, caseDef.shaderStage))
1262 {
1263 TCU_THROW(NotSupportedError, "Subgroup basic memory barrier image test for " +
1264 subgroups::getShaderStageName(caseDef.shaderStage) +
1265 " stage requires that image uniforms be supported on this stage");
1266 }
1267 }
1268
1269 const uint32_t inputDatasCount = OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType ? 3u : 2u;
1270 std::vector<subgroups::SSBOData> inputDatas(inputDatasCount);
1271
1272 inputDatas[0].format = subgroups::FORMAT_R32_UINT;
1273 inputDatas[0].layout = subgroups::SSBOData::LayoutStd140;
1274 inputDatas[0].numElements = SHADER_BUFFER_SIZE / 4ull;
1275 inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1276 inputDatas[0].binding = 0u;
1277
1278 inputDatas[1].format = subgroups::FORMAT_R32_UINT;
1279 inputDatas[1].layout = subgroups::SSBOData::LayoutStd140;
1280 inputDatas[1].numElements = 1ull;
1281 inputDatas[1].initializeType = subgroups::SSBOData::InitializeNonZero;
1282 inputDatas[1].binding = 1u;
1283
1284 if (OPTYPE_SUBGROUP_MEMORY_BARRIER_IMAGE == caseDef.opType)
1285 {
1286 inputDatas[2].format = subgroups::FORMAT_R32_UINT;
1287 inputDatas[2].layout = subgroups::SSBOData::LayoutPacked;
1288 inputDatas[2].numElements = SHADER_BUFFER_SIZE;
1289 inputDatas[2].initializeType = subgroups::SSBOData::InitializeNone;
1290 inputDatas[2].isImage = true;
1291 inputDatas[2].binding = 0u;
1292 }
1293
1294 if (subgroups::SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
1295 {
1296 if (OPTYPE_ELECT == caseDef.opType)
1297 return subgroups::makeVertexFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u,
1298 checkVertexPipelineStagesSubgroupElectNoSSBO);
1299 else
1300 return subgroups::makeVertexFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0],
1301 inputDatasCount,
1302 checkVertexPipelineStagesSubgroupBarriersNoSSBO);
1303 }
1304 else if (subgroups::SHADER_STAGE_FRAGMENT_BIT == caseDef.shaderStage)
1305 {
1306 return subgroups::makeFragmentFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0],
1307 inputDatasCount, checkFragmentSubgroupBarriersNoSSBO);
1308 }
1309 else if (subgroups::SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
1310 {
1311 if (OPTYPE_ELECT == caseDef.opType)
1312 return subgroups::makeGeometryFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL, 0u,
1313 checkVertexPipelineStagesSubgroupElectNoSSBO);
1314 else
1315 return subgroups::makeGeometryFrameBufferTest(context, subgroups::FORMAT_R32G32B32A32_SFLOAT,
1316 &inputDatas[0], inputDatasCount,
1317 checkVertexPipelineStagesSubgroupBarriersNoSSBO);
1318 }
1319
1320 if (OPTYPE_ELECT == caseDef.opType)
1321 return subgroups::makeTessellationEvaluationFrameBufferTest(context, subgroups::FORMAT_R32G32_SFLOAT, DE_NULL,
1322 0u, checkVertexPipelineStagesSubgroupElectNoSSBO,
1323 caseDef.shaderStage);
1324
1325 return subgroups::makeTessellationEvaluationFrameBufferTest(
1326 context, subgroups::FORMAT_R32G32B32A32_SFLOAT, &inputDatas[0], inputDatasCount,
1327 (subgroups::SHADER_STAGE_TESS_CONTROL_BIT == caseDef.shaderStage) ?
1328 checkVertexPipelineStagesSubgroupBarriersNoSSBO :
1329 checkTessellationEvaluationSubgroupBarriersNoSSBO,
1330 caseDef.shaderStage);
1331 }
1332
test(Context & context,const CaseDefinition caseDef)1333 tcu::TestStatus test(Context &context, const CaseDefinition caseDef)
1334 {
1335 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BASIC_BIT))
1336 {
1337 return tcu::TestStatus::fail("Subgroup feature " +
1338 subgroups::getSubgroupFeatureName(subgroups::SUBGROUP_FEATURE_BASIC_BIT) +
1339 " is a required capability!");
1340 }
1341
1342 if (OPTYPE_ELECT != caseDef.opType && subgroups::SHADER_STAGE_COMPUTE_BIT != caseDef.shaderStage)
1343 {
1344 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, subgroups::SUBGROUP_FEATURE_BALLOT_BIT))
1345 {
1346 TCU_THROW(NotSupportedError,
1347 "Subgroup basic operation non-compute stage test required that ballot operations are supported!");
1348 }
1349 }
1350
1351 if (subgroups::SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
1352 {
1353 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
1354 {
1355 return tcu::TestStatus::fail("Shader stage " + subgroups::getShaderStageName(caseDef.shaderStage) +
1356 " is required to support subgroup operations!");
1357 }
1358
1359 if (OPTYPE_ELECT == caseDef.opType)
1360 {
1361 return subgroups::makeComputeTest(context, subgroups::FORMAT_R32_UINT, DE_NULL, 0,
1362 checkComputeSubgroupElect);
1363 }
1364 else
1365 {
1366 const uint32_t inputDatasCount = 2;
1367 subgroups::SSBOData inputDatas[inputDatasCount];
1368 inputDatas[0].format = subgroups::FORMAT_R32_UINT;
1369 inputDatas[0].layout = subgroups::SSBOData::LayoutStd430;
1370 inputDatas[0].numElements = 1 + SHADER_BUFFER_SIZE;
1371 inputDatas[0].initializeType = subgroups::SSBOData::InitializeNonZero;
1372 inputDatas[0].binding = 1u;
1373
1374 inputDatas[1].format = subgroups::FORMAT_R32_UINT;
1375 inputDatas[1].layout = subgroups::SSBOData::LayoutPacked;
1376 inputDatas[1].numElements = SHADER_BUFFER_SIZE;
1377 inputDatas[1].initializeType = subgroups::SSBOData::InitializeNone;
1378 inputDatas[1].isImage = true;
1379 inputDatas[1].binding = 0u;
1380
1381 return subgroups::makeComputeTest(context, subgroups::FORMAT_R32_UINT, inputDatas, inputDatasCount,
1382 checkComputeSubgroupBarriers);
1383 }
1384 }
1385 else
1386 {
1387 if (!subgroups::isFragmentSSBOSupportedForDevice(context))
1388 {
1389 TCU_THROW(NotSupportedError,
1390 "Subgroup basic operation require that the fragment stage be able to write to SSBOs!");
1391 }
1392
1393 int supportedStages = context.getDeqpContext().getContextInfo().getInt(GL_SUBGROUP_SUPPORTED_STAGES_KHR);
1394 int combinedSSBOs = context.getDeqpContext().getContextInfo().getInt(GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS);
1395
1396 subgroups::ShaderStageFlags stages = (subgroups::ShaderStageFlags)(caseDef.shaderStage & supportedStages);
1397
1398 if (subgroups::SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
1399 {
1400 if ((stages & subgroups::SHADER_STAGE_FRAGMENT_BIT) == 0)
1401 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
1402 else
1403 stages = subgroups::SHADER_STAGE_FRAGMENT_BIT;
1404 }
1405
1406 if ((subgroups::ShaderStageFlags)0u == stages)
1407 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
1408
1409 // with sufficient effort we could dynamically assign the binding points
1410 // based on the number of stages actually supported, etc, but we already
1411 // have the framebuffer tests which cover those cases, so there doesn't seem
1412 // to be much benefit in doing that right now.
1413 if (combinedSSBOs < 14)
1414 TCU_THROW(NotSupportedError, "Device does not support enough combined SSBOs for this test (14)");
1415
1416 if (OPTYPE_ELECT == caseDef.opType)
1417 {
1418 const uint32_t inputCount = 5u;
1419 subgroups::SSBOData inputData[inputCount];
1420
1421 inputData[0].format = subgroups::FORMAT_R32_UINT;
1422 inputData[0].layout = subgroups::SSBOData::LayoutStd430;
1423 inputData[0].numElements = 1;
1424 inputData[0].initializeType = subgroups::SSBOData::InitializeZero;
1425 inputData[0].binding = 4u;
1426 inputData[0].stages = subgroups::SHADER_STAGE_VERTEX_BIT;
1427
1428 inputData[1].format = subgroups::FORMAT_R32_UINT;
1429 inputData[1].layout = subgroups::SSBOData::LayoutStd430;
1430 inputData[1].numElements = 1;
1431 inputData[1].initializeType = subgroups::SSBOData::InitializeZero;
1432 inputData[1].binding = 5u;
1433 inputData[1].stages = subgroups::SHADER_STAGE_TESS_CONTROL_BIT;
1434
1435 inputData[2].format = subgroups::FORMAT_R32_UINT;
1436 inputData[2].layout = subgroups::SSBOData::LayoutStd430;
1437 inputData[2].numElements = 1;
1438 inputData[2].initializeType = subgroups::SSBOData::InitializeZero;
1439 inputData[2].binding = 6u;
1440 inputData[2].stages = subgroups::SHADER_STAGE_TESS_EVALUATION_BIT;
1441
1442 inputData[3].format = subgroups::FORMAT_R32_UINT;
1443 inputData[3].layout = subgroups::SSBOData::LayoutStd430;
1444 inputData[3].numElements = 1;
1445 inputData[3].initializeType = subgroups::SSBOData::InitializeZero;
1446 inputData[3].binding = 7u;
1447 inputData[3].stages = subgroups::SHADER_STAGE_GEOMETRY_BIT;
1448
1449 inputData[4].format = subgroups::FORMAT_R32_UINT;
1450 inputData[4].layout = subgroups::SSBOData::LayoutStd430;
1451 inputData[4].numElements = 1;
1452 inputData[4].initializeType = subgroups::SSBOData::InitializeZero;
1453 inputData[4].binding = 8u;
1454 inputData[4].stages = subgroups::SHADER_STAGE_FRAGMENT_BIT;
1455
1456 return subgroups::allStages(context, subgroups::FORMAT_R32_UINT, inputData, inputCount,
1457 checkVertexPipelineStagesSubgroupElect, stages);
1458 }
1459 else
1460 {
1461 const subgroups::ShaderStageFlags stagesBits[] = {
1462 subgroups::SHADER_STAGE_VERTEX_BIT, subgroups::SHADER_STAGE_TESS_CONTROL_BIT,
1463 subgroups::SHADER_STAGE_TESS_EVALUATION_BIT, subgroups::SHADER_STAGE_GEOMETRY_BIT,
1464 subgroups::SHADER_STAGE_FRAGMENT_BIT,
1465 };
1466
1467 const uint32_t inputDatasCount = DE_LENGTH_OF_ARRAY(stagesBits) * 3u;
1468 subgroups::SSBOData inputDatas[inputDatasCount];
1469
1470 for (int ndx = 0; ndx < DE_LENGTH_OF_ARRAY(stagesBits); ++ndx)
1471 {
1472 const uint32_t index = ndx * 3;
1473 const uint32_t ssboIndex = ndx * 2;
1474 const uint32_t imgIndex = ndx;
1475 inputDatas[index].format = subgroups::FORMAT_R32_UINT;
1476 inputDatas[index].layout = subgroups::SSBOData::LayoutStd430;
1477 inputDatas[index].numElements = 1 + SHADER_BUFFER_SIZE;
1478 inputDatas[index].initializeType = subgroups::SSBOData::InitializeNonZero;
1479 inputDatas[index].binding = ssboIndex + 4u;
1480 inputDatas[index].stages = stagesBits[ndx];
1481
1482 inputDatas[index + 1].format = subgroups::FORMAT_R32_UINT;
1483 inputDatas[index + 1].layout = subgroups::SSBOData::LayoutStd430;
1484 inputDatas[index + 1].numElements = 1;
1485 inputDatas[index + 1].initializeType = subgroups::SSBOData::InitializeZero;
1486 inputDatas[index + 1].binding = ssboIndex + 5u;
1487 inputDatas[index + 1].stages = stagesBits[ndx];
1488
1489 inputDatas[index + 2].format = subgroups::FORMAT_R32_UINT;
1490 inputDatas[index + 2].layout = subgroups::SSBOData::LayoutPacked;
1491 inputDatas[index + 2].numElements = SHADER_BUFFER_SIZE;
1492 inputDatas[index + 2].initializeType = subgroups::SSBOData::InitializeNone;
1493 inputDatas[index + 2].isImage = true;
1494 inputDatas[index + 2].binding = imgIndex;
1495 inputDatas[index + 2].stages = stagesBits[ndx];
1496 }
1497
1498 return subgroups::allStages(context, subgroups::FORMAT_R32_UINT, inputDatas, inputDatasCount,
1499 checkVertexPipelineStagesSubgroupBarriers, stages);
1500 }
1501 }
1502 }
1503 } // namespace
1504
createSubgroupsBasicTests(deqp::Context & testCtx)1505 deqp::TestCaseGroup *createSubgroupsBasicTests(deqp::Context &testCtx)
1506 {
1507 de::MovePtr<deqp::TestCaseGroup> graphicGroup(
1508 new deqp::TestCaseGroup(testCtx, "graphics", "Subgroup basic category tests: graphics"));
1509 de::MovePtr<deqp::TestCaseGroup> computeGroup(
1510 new deqp::TestCaseGroup(testCtx, "compute", "Subgroup basic category tests: compute"));
1511 de::MovePtr<deqp::TestCaseGroup> framebufferGroup(
1512 new deqp::TestCaseGroup(testCtx, "framebuffer", "Subgroup basic category tests: framebuffer"));
1513
1514 const subgroups::ShaderStageFlags stages[] = {
1515 SHADER_STAGE_FRAGMENT_BIT, SHADER_STAGE_VERTEX_BIT, SHADER_STAGE_TESS_EVALUATION_BIT,
1516 SHADER_STAGE_TESS_CONTROL_BIT, SHADER_STAGE_GEOMETRY_BIT,
1517 };
1518
1519 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
1520 {
1521 const std::string op = de::toLower(getOpTypeName(opTypeIndex));
1522
1523 {
1524 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_COMPUTE_BIT};
1525 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(computeGroup.get(), op, "", supportedCheck,
1526 initPrograms, test, caseDef);
1527 }
1528
1529 if (OPTYPE_SUBGROUP_MEMORY_BARRIER_SHARED == opTypeIndex)
1530 {
1531 // Shared isn't available in non compute shaders.
1532 continue;
1533 }
1534
1535 {
1536 const CaseDefinition caseDef = {opTypeIndex, SHADER_STAGE_ALL_GRAPHICS};
1537 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(graphicGroup.get(), op, "", supportedCheck,
1538 initPrograms, test, caseDef);
1539 }
1540
1541 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
1542 {
1543 if (opTypeIndex == OPTYPE_ELECT && stageIndex == 0)
1544 continue; // This is not tested. I don't know why.
1545
1546 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex]};
1547 SubgroupFactory<CaseDefinition>::addFunctionCaseWithPrograms(
1548 framebufferGroup.get(), op + "_" + getShaderStageName(caseDef.shaderStage), "", supportedCheck,
1549 initFrameBufferPrograms, noSSBOtest, caseDef);
1550 }
1551 }
1552
1553 de::MovePtr<deqp::TestCaseGroup> group(new deqp::TestCaseGroup(testCtx, "basic", "Subgroup basic category tests"));
1554
1555 group->addChild(graphicGroup.release());
1556 group->addChild(computeGroup.release());
1557 group->addChild(framebufferGroup.release());
1558
1559 return group.release();
1560 }
1561
1562 } // namespace subgroups
1563 } // namespace glc
1564