/*------------------------------------------------------------------------- * drawElements Quality Program OpenGL ES 3.1 Module * ------------------------------------------------- * * Copyright 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *//*! * \file * \brief Basic Compute Shader Tests. *//*--------------------------------------------------------------------*/ #include "es31fBasicComputeShaderTests.hpp" #include "gluShaderProgram.hpp" #include "gluObjectWrapper.hpp" #include "gluRenderContext.hpp" #include "gluProgramInterfaceQuery.hpp" #include "gluContextInfo.hpp" #include "glwFunctions.hpp" #include "glwEnums.hpp" #include "tcuTestLog.hpp" #include "deRandom.hpp" #include "deStringUtil.hpp" #include "deMemory.h" namespace deqp { namespace gles31 { namespace Functional { using std::string; using std::vector; using tcu::TestLog; using namespace glu; //! Utility for mapping buffers. class BufferMemMap { public: BufferMemMap(const glw::Functions &gl, uint32_t target, int offset, int size, uint32_t access) : m_gl(gl) , m_target(target) , m_ptr(DE_NULL) { m_ptr = gl.mapBufferRange(target, offset, size, access); GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()"); TCU_CHECK(m_ptr); } ~BufferMemMap(void) { m_gl.unmapBuffer(m_target); } void *getPtr(void) const { return m_ptr; } void *operator*(void) const { return m_ptr; } private: BufferMemMap(const BufferMemMap &other); BufferMemMap &operator=(const BufferMemMap &other); const glw::Functions &m_gl; const uint32_t m_target; void *m_ptr; }; namespace { class EmptyComputeShaderCase : public TestCase { public: EmptyComputeShaderCase(Context &context) : TestCase(context, "empty", "Empty shader") { } IterateResult iterate(void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = 1) in;\n" "void main (void) {}\n"; const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const glw::Functions &gl = m_context.getRenderContext().getFunctions(); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); gl.useProgram(program.getProgram()); gl.dispatchCompute(1, 1, 1); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } }; class UBOToSSBOInvertCase : public TestCase { public: UBOToSSBOInvertCase(Context &context, const char *name, const char *description, int numValues, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize) : TestCase(context, name, description) , m_numValues(numValues) , m_localSize(localSize) , m_workSize(workSize) { DE_ASSERT(m_numValues % (m_workSize[0] * m_workSize[1] * m_workSize[2] * m_localSize[0] * m_localSize[1] * m_localSize[2]) == 0); } IterateResult iterate(void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "uniform Input {\n" << " uint values[" << m_numValues << "];\n" << "} ub_in;\n" << "layout(binding = 1) buffer Output {\n" << " uint values[" << m_numValues << "];\n" << "} sb_out;\n" << "void main (void) {\n" << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" << " uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n" << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + " "gl_GlobalInvocationID.x;\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n" << "}\n"; const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const Buffer inputBuffer(m_context.getRenderContext()); const Buffer outputBuffer(m_context.getRenderContext()); std::vector inputValues(m_numValues); // Compute input values. { de::Random rnd(0x111223f); for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) inputValues[ndx] = rnd.getUint32(); } m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex); gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer); gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); { const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); for (uint32_t ndx = 0; ndx < de::min(valueInfo.arraySize, (uint32_t)inputValues.size()); ndx++) *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) = inputValues[ndx]; } gl.uniformBlockBinding(program.getProgram(), blockIndex, 0); gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); } // Output buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size()); for (uint32_t ndx = 0; ndx < valueInfo.arraySize; ndx++) { const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride * ndx)); const uint32_t ref = ~inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_numValues; const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class CopyInvertSSBOCase : public TestCase { public: CopyInvertSSBOCase(Context &context, const char *name, const char *description, int numValues, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize) : TestCase(context, name, description) , m_numValues(numValues) , m_localSize(localSize) , m_workSize(workSize) { DE_ASSERT(m_numValues % (m_workSize[0] * m_workSize[1] * m_workSize[2] * m_localSize[0] * m_localSize[1] * m_localSize[2]) == 0); } IterateResult iterate(void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Input {\n" << " uint values[" << m_numValues << "];\n" << "} sb_in;\n" << "layout (binding = 1) buffer Output {\n" << " uint values[" << m_numValues << "];\n" << "} sb_out;\n" << "void main (void) {\n" << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" << " uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n" << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + " "gl_GlobalInvocationID.x;\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n" << "}\n"; const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const Buffer inputBuffer(m_context.getRenderContext()); const Buffer outputBuffer(m_context.getRenderContext()); std::vector inputValues(m_numValues); // Compute input values. { de::Random rnd(0x124fef); for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) inputValues[ndx] = rnd.getUint32(); } m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size()); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++) *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) = inputValues[ndx]; } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); } // Output buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size()); for (uint32_t ndx = 0; ndx < valueInfo.arraySize; ndx++) { const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride * ndx)); const uint32_t ref = ~inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_numValues; const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class InvertSSBOInPlaceCase : public TestCase { public: InvertSSBOInPlaceCase(Context &context, const char *name, const char *description, int numValues, bool isSized, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize) : TestCase(context, name, description) , m_numValues(numValues) , m_isSized(isSized) , m_localSize(localSize) , m_workSize(workSize) { DE_ASSERT(m_numValues % (m_workSize[0] * m_workSize[1] * m_workSize[2] * m_localSize[0] * m_localSize[1] * m_localSize[2]) == 0); } IterateResult iterate(void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer InOut {\n" << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" << "} sb_inout;\n" << "void main (void) {\n" << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" << " uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n" << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + " "gl_GlobalInvocationID.x;\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n" << "}\n"; const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); const Buffer outputBuffer(m_context.getRenderContext()); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const uint32_t blockSize = valueInfo.arrayStride * (uint32_t)m_numValues; std::vector inputValues(m_numValues); // Compute input values. { de::Random rnd(0x82ce7f); for (int ndx = 0; ndx < (int)inputValues.size(); ndx++) inputValues[ndx] = rnd.getUint32(); } TCU_CHECK(valueInfo.arraySize == (uint32_t)(m_isSized ? m_numValues : 0)); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT); for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++) *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) = inputValues[ndx]; } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++) { const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride * ndx)); const uint32_t ref = ~inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_numValues; const bool m_isSized; const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class WriteToMultipleSSBOCase : public TestCase { public: WriteToMultipleSSBOCase(Context &context, const char *name, const char *description, int numValues, bool isSized, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize) : TestCase(context, name, description) , m_numValues(numValues) , m_isSized(isSized) , m_localSize(localSize) , m_workSize(workSize) { DE_ASSERT(m_numValues % (m_workSize[0] * m_workSize[1] * m_workSize[2] * m_localSize[0] * m_localSize[1] * m_localSize[2]) == 0); } IterateResult iterate(void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Out0 {\n" << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" << "} sb_out0;\n" << "layout(binding = 1) buffer Out1 {\n" << " uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n" << "} sb_out1;\n" << "void main (void) {\n" << " uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n" << " uint groupNdx = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + " "gl_GlobalInvocationID.x;\n" << "\n" << " {\n" << " uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_out0.values[offset + ndx] = offset + ndx;\n" << " }\n" << " {\n" << " uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n" << " uint offset = numValuesPerInv*groupNdx;\n" << "\n" << " for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n" << " sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n" << " }\n" << "}\n"; const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); const Buffer outputBuffer0(m_context.getRenderContext()); const uint32_t value0Index = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values"); const InterfaceVariableInfo value0Info = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index); const uint32_t block0Size = value0Info.arrayStride * (uint32_t)m_numValues; const Buffer outputBuffer1(m_context.getRenderContext()); const uint32_t value1Index = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values"); const InterfaceVariableInfo value1Info = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index); const uint32_t block1Size = value1Info.arrayStride * (uint32_t)m_numValues; TCU_CHECK(value0Info.arraySize == (uint32_t)(m_isSized ? m_numValues : 0)); TCU_CHECK(value1Info.arraySize == (uint32_t)(m_isSized ? m_numValues : 0)); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0); gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); } { gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1); gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1); GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT); for (uint32_t ndx = 0; ndx < (uint32_t)m_numValues; ndx++) { const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + value0Info.offset + value0Info.arrayStride * ndx)); const uint32_t ref = ndx; if (res != ref) throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref)); } } gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT); for (uint32_t ndx = 0; ndx < (uint32_t)m_numValues; ndx++) { const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + value1Info.offset + value1Info.arrayStride * ndx)); const uint32_t ref = m_numValues - ndx; if (res != ref) throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref)); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_numValues; const bool m_isSized; const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class SSBOLocalBarrierCase : public TestCase { public: SSBOLocalBarrierCase(Context &context, const char *name, const char *description, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize) : TestCase(context, name, description) , m_localSize(localSize) , m_workSize(workSize) { } IterateResult iterate(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer(m_context.getRenderContext()); const int workGroupSize = m_localSize[0] * m_localSize[1] * m_localSize[2]; const int workGroupCount = m_workSize[0] * m_workSize[1] * m_workSize[2]; const int numValues = workGroupSize * workGroupCount; const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Output {\n" << " coherent uint values[" << numValues << "];\n" << "} sb_out;\n\n" << "shared uint offsets[" << workGroupSize << "];\n\n" << "void main (void) {\n" << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + " "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" << " uint globalOffs = localSize*globalNdx;\n" << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + " "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" << "\n" << " sb_out.values[globalOffs + localOffs] = globalOffs;\n" << " memoryBarrierBuffer();\n" << " barrier();\n" << " sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n" << " memoryBarrierBuffer();\n" << " barrier();\n" << " sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n" << "}\n"; const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) { for (int localOffs = 0; localOffs < workGroupSize; localOffs++) { const int globalOffs = groupNdx * workGroupSize; const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride * (globalOffs + localOffs))); const int offs0 = localOffs - 1 < 0 ? ((localOffs + workGroupSize - 1) % workGroupSize) : ((localOffs - 1) % workGroupSize); const int offs1 = localOffs - 2 < 0 ? ((localOffs + workGroupSize - 2) % workGroupSize) : ((localOffs - 2) % workGroupSize); const uint32_t ref = (uint32_t)(globalOffs + offs0 + offs1); if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); } } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class SSBOBarrierCase : public TestCase { public: SSBOBarrierCase(Context &context, const char *name, const char *description, const tcu::IVec3 &workSize) : TestCase(context, name, description) , m_workSize(workSize) { } IterateResult iterate(void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); const char *const glslVersionDeclaration = getGLSLVersionDeclaration(glslVersion); std::ostringstream src0; src0 << glslVersionDeclaration << "\n" << "layout (local_size_x = 1) in;\n" "uniform uint u_baseVal;\n" "layout(binding = 1) buffer Output {\n" " uint values[];\n" "};\n" "void main (void) {\n" " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + " "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" " values[offset] = u_baseVal+offset;\n" "}\n"; std::ostringstream src1; src1 << glslVersionDeclaration << "\n" << "layout (local_size_x = 1) in;\n" "uniform uint u_baseVal;\n" "layout(binding = 1) buffer Input {\n" " uint values[];\n" "};\n" "layout(binding = 0) buffer Output {\n" " coherent uint sum;\n" "};\n" "void main (void) {\n" " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + " "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" " uint value = values[offset];\n" " atomicAdd(sum, value);\n" "}\n"; const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str())); const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str())); const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const Buffer tempBuffer(m_context.getRenderContext()); const Buffer outputBuffer(m_context.getRenderContext()); const uint32_t baseValue = 127; m_testCtx.getLog() << program0 << program1; if (!program0.isOk() || !program1.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; // Temp buffer setup { const uint32_t valueIndex = gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const uint32_t bufferSize = valueInfo.arrayStride * m_workSize[0] * m_workSize[1] * m_workSize[2]; gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed"); } // Output buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT); deMemset(bufMap.getPtr(), 0, blockSize); } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.useProgram(program0.getProgram()); gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue); gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); gl.useProgram(program1.getProgram()); gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands"); // Read back and compare { const uint32_t blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const uint32_t valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset)); uint32_t ref = 0; for (int ndx = 0; ndx < m_workSize[0] * m_workSize[1] * m_workSize[2]; ndx++) ref += baseValue + (uint32_t)ndx; if (res != ref) { m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage; throw tcu::TestError("Comparison failed"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_workSize; }; class BasicSharedVarCase : public TestCase { public: BasicSharedVarCase(Context &context, const char *name, const char *description, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize) : TestCase(context, name, description) , m_localSize(localSize) , m_workSize(workSize) { } IterateResult iterate(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer(m_context.getRenderContext()); const int workGroupSize = m_localSize[0] * m_localSize[1] * m_localSize[2]; const int workGroupCount = m_workSize[0] * m_workSize[1] * m_workSize[2]; const int numValues = workGroupSize * workGroupCount; const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Output {\n" << " uint values[" << numValues << "];\n" << "} sb_out;\n\n" << "shared uint offsets[" << workGroupSize << "];\n\n" << "void main (void) {\n" << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + " "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" << " uint globalOffs = localSize*globalNdx;\n" << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + " "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" << "\n" << " offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n" << " barrier();\n" << " sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n" << "}\n"; const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) { for (int localOffs = 0; localOffs < workGroupSize; localOffs++) { const int globalOffs = groupNdx * workGroupSize; const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride * (globalOffs + localOffs))); const uint32_t ref = (uint32_t)(globalOffs + (workGroupSize - localOffs - 1) * (workGroupSize - localOffs - 1)); if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); } } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class SharedVarAtomicOpCase : public TestCase { public: SharedVarAtomicOpCase(Context &context, const char *name, const char *description, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize) : TestCase(context, name, description) , m_localSize(localSize) , m_workSize(workSize) { } IterateResult iterate(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer(m_context.getRenderContext()); const int workGroupSize = m_localSize[0] * m_localSize[1] * m_localSize[2]; const int workGroupCount = m_workSize[0] * m_workSize[1] * m_workSize[2]; const int numValues = workGroupSize * workGroupCount; const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Output {\n" << " uint values[" << numValues << "];\n" << "} sb_out;\n\n" << "shared uint count;\n\n" << "void main (void) {\n" << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + " "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" << " uint globalOffs = localSize*globalNdx;\n" << "\n" << " count = 0u;\n" << " barrier();\n" << " uint oldVal = atomicAdd(count, 1u);\n" << " sb_out.values[globalOffs+oldVal] = oldVal+1u;\n" << "}\n"; const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Output buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++) { for (int localOffs = 0; localOffs < workGroupSize; localOffs++) { const int globalOffs = groupNdx * workGroupSize; const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride * (globalOffs + localOffs))); const uint32_t ref = (uint32_t)(localOffs + 1); if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]"); } } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; class CopyImageToSSBOCase : public TestCase { public: CopyImageToSSBOCase(Context &context, const char *name, const char *description, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize) : TestCase(context, name, description) , m_localSize(localSize) , m_imageSize(imageSize) { DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0); DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0); } IterateResult iterate(void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n" << "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n" << "layout(binding = 0) buffer Output {\n" << " uint values[" << (m_imageSize[0] * m_imageSize[1]) << "];\n" << "} sb_out;\n\n" << "void main (void) {\n" << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" << " uint value = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n" << " sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n" << "}\n"; const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer(m_context.getRenderContext()); const Texture inputTexture(m_context.getRenderContext()); const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const tcu::IVec2 workSize = m_imageSize / m_localSize; de::Random rnd(0xab2c7); vector inputValues(m_imageSize[0] * m_imageSize[1]); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input values for (vector::iterator i = inputValues.begin(); i != inputValues.end(); ++i) *i = rnd.getUint32(); // Input image setup gl.bindTexture(GL_TEXTURE_2D, *inputTexture); gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT, &inputValues[0]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); // Bind to unit 1 gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI); GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); // Output buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(workSize[0], workSize[1], 1); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size()); for (uint32_t ndx = 0; ndx < valueInfo.arraySize; ndx++) { const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride * ndx)); const uint32_t ref = inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec2 m_localSize; const tcu::IVec2 m_imageSize; }; class CopySSBOToImageCase : public TestCase { public: CopySSBOToImageCase(Context &context, const char *name, const char *description, const tcu::IVec2 &localSize, const tcu::IVec2 &imageSize) : TestCase(context, name, description) , m_localSize(localSize) , m_imageSize(imageSize) { DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0); DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0); } IterateResult iterate(void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n" << "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n" << "buffer Input {\n" << " uint values[" << (m_imageSize[0] * m_imageSize[1]) << "];\n" << "} sb_in;\n\n" << "void main (void) {\n" << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n" << " imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n" << "}\n"; const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const Buffer inputBuffer(m_context.getRenderContext()); const Texture outputTexture(m_context.getRenderContext()); const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); const tcu::IVec2 workSize = m_imageSize / m_localSize; de::Random rnd(0x77238ac2); vector inputValues(m_imageSize[0] * m_imageSize[1]); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input values for (vector::iterator i = inputValues.begin(); i != inputValues.end(); ++i) *i = rnd.getUint32(); // Input buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size()); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++) *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) = inputValues[ndx]; } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); } // Output image setup gl.bindTexture(GL_TEXTURE_2D, *outputTexture); gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); // Bind to unit 1 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI); GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); // Dispatch compute workload gl.dispatchCompute(workSize[0], workSize[1], 1); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { Framebuffer fbo(m_context.getRenderContext()); vector pixels(inputValues.size() * 4); gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo); gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0); TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); // \note In ES3 we have to use GL_RGBA_INTEGER gl.readBuffer(GL_COLOR_ATTACHMENT0); gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]); GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed"); for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++) { const uint32_t res = pixels[ndx * 4]; const uint32_t ref = inputValues[ndx]; if (res != ref) throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx)); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec2 m_localSize; const tcu::IVec2 m_imageSize; }; class ImageAtomicOpCase : public TestCase { public: ImageAtomicOpCase(Context &context, const char *name, const char *description, int localSize, const tcu::IVec2 &imageSize) : TestCase(context, name, description) , m_localSize(localSize) , m_imageSize(imageSize) { } void init(void) { auto contextType = m_context.getRenderContext().getType(); if (!glu::contextSupports(contextType, glu::ApiType::es(3, 2)) && !glu::contextSupports(contextType, glu::ApiType::core(4, 5)) && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic")) TCU_THROW(NotSupportedError, "Test requires OES_shader_image_atomic extension"); } IterateResult iterate(void) { glu::ContextType contextType = m_context.getRenderContext().getType(); const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(contextType); const bool supportsES32orGL45 = glu::contextSupports(contextType, glu::ApiType::es(3, 2)) || glu::contextSupports(contextType, glu::ApiType::core(4, 5)); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << (supportsES32orGL45 ? "\n" : "#extension GL_OES_shader_image_atomic : require\n") << "layout (local_size_x = " << m_localSize << ") in;\n" << "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n" << "buffer Input {\n" << " uint values[" << (m_imageSize[0] * m_imageSize[1] * m_localSize) << "];\n" << "} sb_in;\n\n" << "void main (void) {\n" << " uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n" << " uint value = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n" << "\n" << " if (gl_LocalInvocationIndex == 0u)\n" << " imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n" << " barrier();\n" << " imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n" << "}\n"; const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const Buffer inputBuffer(m_context.getRenderContext()); const Texture outputTexture(m_context.getRenderContext()); const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str())); de::Random rnd(0x77238ac2); vector inputValues(m_imageSize[0] * m_imageSize[1] * m_localSize); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Input values for (vector::iterator i = inputValues.begin(); i != inputValues.end(); ++i) *i = rnd.getUint32(); // Input buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input"); const InterfaceBlockInfo blockInfo = getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW); TCU_CHECK(valueInfo.arraySize == (uint32_t)inputValues.size()); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT); for (uint32_t ndx = 0; ndx < (uint32_t)inputValues.size(); ndx++) *(uint32_t *)((uint8_t *)bufMap.getPtr() + valueInfo.offset + ndx * valueInfo.arrayStride) = inputValues[ndx]; } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed"); } // Output image setup gl.bindTexture(GL_TEXTURE_2D, *outputTexture); gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); // Bind to unit 1 gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI); GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); // Dispatch compute workload gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare { Framebuffer fbo(m_context.getRenderContext()); vector pixels(m_imageSize[0] * m_imageSize[1] * 4); gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo); gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0); TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); // \note In ES3 we have to use GL_RGBA_INTEGER gl.readBuffer(GL_COLOR_ATTACHMENT0); gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]); GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed"); for (int pixelNdx = 0; pixelNdx < (int)inputValues.size() / m_localSize; pixelNdx++) { const uint32_t res = pixels[pixelNdx * 4]; uint32_t ref = 0; for (int offs = 0; offs < m_localSize; offs++) ref += inputValues[pixelNdx * m_localSize + offs]; if (res != ref) throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx)); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const int m_localSize; const tcu::IVec2 m_imageSize; }; class ImageBarrierCase : public TestCase { public: ImageBarrierCase(Context &context, const char *name, const char *description, const tcu::IVec2 &workSize) : TestCase(context, name, description) , m_workSize(workSize) { } IterateResult iterate(void) { const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); const char *const glslVersionDeclaration = getGLSLVersionDeclaration(glslVersion); std::ostringstream src0; src0 << glslVersionDeclaration << "\n" << "layout (local_size_x = 1) in;\n" "uniform uint u_baseVal;\n" "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n" "void main (void) {\n" " uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + " "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" " imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n" "}\n"; std::ostringstream src1; src1 << glslVersionDeclaration << "\n" << "layout (local_size_x = 1) in;\n" "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n" "layout(binding = 0) buffer Output {\n" " coherent uint sum;\n" "};\n" "void main (void) {\n" " uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n" " atomicAdd(sum, value);\n" "}\n"; const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() << ComputeSource(src0.str())); const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() << ComputeSource(src1.str())); const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const Texture tempTexture(m_context.getRenderContext()); const Buffer outputBuffer(m_context.getRenderContext()); const uint32_t baseValue = 127; m_testCtx.getLog() << program0 << program1; if (!program0.isOk() || !program1.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; // Temp texture setup gl.bindTexture(GL_TEXTURE_2D, *tempTexture); gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed"); // Bind to unit 2 gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI); GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed"); // Output buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); { const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT); deMemset(bufMap.getPtr(), 0, blockSize); } gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.useProgram(program0.getProgram()); gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue); gl.dispatchCompute(m_workSize[0], m_workSize[1], 1); gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); gl.useProgram(program1.getProgram()); gl.dispatchCompute(m_workSize[0], m_workSize[1], 1); GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands"); // Read back and compare { const uint32_t blockIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const uint32_t valueIndex = gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset)); uint32_t ref = 0; for (int ndx = 0; ndx < m_workSize[0] * m_workSize[1]; ndx++) ref += baseValue + (uint32_t)ndx; if (res != ref) { m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage; throw tcu::TestError("Comparison failed"); } } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec2 m_workSize; }; class AtomicCounterCase : public TestCase { public: AtomicCounterCase(Context &context, const char *name, const char *description, const tcu::IVec3 &localSize, const tcu::IVec3 &workSize) : TestCase(context, name, description) , m_localSize(localSize) , m_workSize(workSize) { } IterateResult iterate(void) { const glw::Functions &gl = m_context.getRenderContext().getFunctions(); const Buffer outputBuffer(m_context.getRenderContext()); const Buffer counterBuffer(m_context.getRenderContext()); const int workGroupSize = m_localSize[0] * m_localSize[1] * m_localSize[2]; const int workGroupCount = m_workSize[0] * m_workSize[1] * m_workSize[2]; const int numValues = workGroupSize * workGroupCount; const GLSLVersion glslVersion = glu::getContextTypeGLSLVersion(m_context.getRenderContext().getType()); std::ostringstream src; src << getGLSLVersionDeclaration(glslVersion) << "\n" << "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n" << "layout(binding = 0) buffer Output {\n" << " uint values[" << numValues << "];\n" << "} sb_out;\n\n" << "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n" << "void main (void) {\n" << " uint localSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n" << " uint globalNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + " "gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n" << " uint globalOffs = localSize*globalNdx;\n" << " uint localOffs = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + " "gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n" << "\n" << " uint oldVal = atomicCounterIncrement(u_count);\n" << " sb_out.values[globalOffs+localOffs] = oldVal;\n" << "}\n"; const ShaderProgram program(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str())); m_testCtx.getLog() << program; if (!program.isOk()) TCU_FAIL("Compile failed"); m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage; gl.useProgram(program.getProgram()); // Atomic counter buffer setup { const uint32_t uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count"); const uint32_t bufferIndex = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX); const uint32_t bufferSize = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer); gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ); { const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT); deMemset(memMap.getPtr(), 0, (int)bufferSize); } gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed"); } // Output buffer setup { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer); gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ); gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer); GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed"); } // Dispatch compute workload gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]); GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()"); // Read back and compare atomic counter { const uint32_t uniformIndex = gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count"); const uint32_t uniformOffset = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET); const uint32_t bufferIndex = getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX); const uint32_t bufferSize = getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE); const BufferMemMap bufMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT); const uint32_t resVal = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + uniformOffset)); if (resVal != (uint32_t)numValues) throw tcu::TestError("Invalid atomic counter value"); } // Read back and compare SSBO { const uint32_t blockIndex = gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output"); const int blockSize = getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE); const uint32_t valueIndex = gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values"); const InterfaceVariableInfo valueInfo = getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex); const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT); uint32_t valSum = 0; uint32_t refSum = 0; for (int valNdx = 0; valNdx < numValues; valNdx++) { const uint32_t res = *((const uint32_t *)((const uint8_t *)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride * valNdx)); valSum += res; refSum += (uint32_t)valNdx; if (!de::inBounds(res, 0, (uint32_t)numValues)) throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]"); } if (valSum != refSum) throw tcu::TestError("Total sum of values in Output.values doesn't match"); } m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass"); return STOP; } private: const tcu::IVec3 m_localSize; const tcu::IVec3 m_workSize; }; } // namespace BasicComputeShaderTests::BasicComputeShaderTests(Context &context) : TestCaseGroup(context, "basic", "Basic Compute Shader Tests") { } BasicComputeShaderTests::~BasicComputeShaderTests(void) { } void BasicComputeShaderTests::init(void) { addChild(new EmptyComputeShaderCase(m_context)); addChild(new UBOToSSBOInvertCase(m_context, "ubo_to_ssbo_single_invocation", "Copy from UBO to SSBO, inverting bits", 256, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new UBOToSSBOInvertCase(m_context, "ubo_to_ssbo_single_group", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(2, 1, 4), tcu::IVec3(1, 1, 1))); addChild(new UBOToSSBOInvertCase(m_context, "ubo_to_ssbo_multiple_invocations", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1, 1, 1), tcu::IVec3(2, 4, 1))); addChild(new UBOToSSBOInvertCase(m_context, "ubo_to_ssbo_multiple_groups", "Copy from UBO to SSBO, inverting bits", 1024, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4))); addChild(new CopyInvertSSBOCase(m_context, "copy_ssbo_single_invocation", "Copy between SSBOs, inverting bits", 256, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new CopyInvertSSBOCase(m_context, "copy_ssbo_multiple_invocations", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1, 1, 1), tcu::IVec3(2, 4, 1))); addChild(new CopyInvertSSBOCase(m_context, "copy_ssbo_multiple_groups", "Copy between SSBOs, inverting bits", 1024, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4))); addChild(new InvertSSBOInPlaceCase(m_context, "ssbo_rw_single_invocation", "Read and write same SSBO", 256, true, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new InvertSSBOInPlaceCase(m_context, "ssbo_rw_multiple_groups", "Read and write same SSBO", 1024, true, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4))); addChild(new InvertSSBOInPlaceCase(m_context, "ssbo_unsized_arr_single_invocation", "Read and write same SSBO", 256, false, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new InvertSSBOInPlaceCase(m_context, "ssbo_unsized_arr_multiple_groups", "Read and write same SSBO", 1024, false, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4))); addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation", "Write to multiple SSBOs", 256, true, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups", "Write to multiple SSBOs", 1024, true, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4))); addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation", "Write to multiple SSBOs", 256, false, tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups", "Write to multiple SSBOs", 1024, false, tcu::IVec3(1, 4, 2), tcu::IVec3(2, 2, 4))); addChild(new SSBOLocalBarrierCase(m_context, "ssbo_local_barrier_single_invocation", "SSBO local barrier usage", tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new SSBOLocalBarrierCase(m_context, "ssbo_local_barrier_single_group", "SSBO local barrier usage", tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1))); addChild(new SSBOLocalBarrierCase(m_context, "ssbo_local_barrier_multiple_groups", "SSBO local barrier usage", tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3))); addChild( new SSBOBarrierCase(m_context, "ssbo_cmd_barrier_single", "SSBO memory barrier usage", tcu::IVec3(1, 1, 1))); addChild( new SSBOBarrierCase(m_context, "ssbo_cmd_barrier_multiple", "SSBO memory barrier usage", tcu::IVec3(11, 5, 7))); addChild(new BasicSharedVarCase(m_context, "shared_var_single_invocation", "Basic shared variable usage", tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new BasicSharedVarCase(m_context, "shared_var_single_group", "Basic shared variable usage", tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1))); addChild(new BasicSharedVarCase(m_context, "shared_var_multiple_invocations", "Basic shared variable usage", tcu::IVec3(1, 1, 1), tcu::IVec3(2, 5, 4))); addChild(new BasicSharedVarCase(m_context, "shared_var_multiple_groups", "Basic shared variable usage", tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3))); addChild(new SharedVarAtomicOpCase(m_context, "shared_atomic_op_single_invocation", "Atomic operation with shared var", tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new SharedVarAtomicOpCase(m_context, "shared_atomic_op_single_group", "Atomic operation with shared var", tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1))); addChild(new SharedVarAtomicOpCase(m_context, "shared_atomic_op_multiple_invocations", "Atomic operation with shared var", tcu::IVec3(1, 1, 1), tcu::IVec3(2, 5, 4))); addChild(new SharedVarAtomicOpCase(m_context, "shared_atomic_op_multiple_groups", "Atomic operation with shared var", tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3))); addChild(new CopyImageToSSBOCase(m_context, "copy_image_to_ssbo_small", "Image to SSBO copy", tcu::IVec2(1, 1), tcu::IVec2(64, 64))); addChild(new CopyImageToSSBOCase(m_context, "copy_image_to_ssbo_large", "Image to SSBO copy", tcu::IVec2(2, 4), tcu::IVec2(512, 512))); addChild(new CopySSBOToImageCase(m_context, "copy_ssbo_to_image_small", "SSBO to image copy", tcu::IVec2(1, 1), tcu::IVec2(64, 64))); addChild(new CopySSBOToImageCase(m_context, "copy_ssbo_to_image_large", "SSBO to image copy", tcu::IVec2(2, 4), tcu::IVec2(512, 512))); addChild(new ImageAtomicOpCase(m_context, "image_atomic_op_local_size_1", "Atomic operation with image", 1, tcu::IVec2(64, 64))); addChild(new ImageAtomicOpCase(m_context, "image_atomic_op_local_size_8", "Atomic operation with image", 8, tcu::IVec2(64, 64))); addChild(new ImageBarrierCase(m_context, "image_barrier_single", "Image barrier", tcu::IVec2(1, 1))); addChild(new ImageBarrierCase(m_context, "image_barrier_multiple", "Image barrier", tcu::IVec2(64, 64))); addChild(new AtomicCounterCase(m_context, "atomic_counter_single_invocation", "Basic atomic counter test", tcu::IVec3(1, 1, 1), tcu::IVec3(1, 1, 1))); addChild(new AtomicCounterCase(m_context, "atomic_counter_single_group", "Basic atomic counter test", tcu::IVec3(3, 2, 5), tcu::IVec3(1, 1, 1))); addChild(new AtomicCounterCase(m_context, "atomic_counter_multiple_invocations", "Basic atomic counter test", tcu::IVec3(1, 1, 1), tcu::IVec3(2, 5, 4))); addChild(new AtomicCounterCase(m_context, "atomic_counter_multiple_groups", "Basic atomic counter test", tcu::IVec3(3, 4, 1), tcu::IVec3(2, 7, 3))); } } // namespace Functional } // namespace gles31 } // namespace deqp