/*------------------------------------------------------------------------- * drawElements Quality Program OpenGL ES 3.0 Module * ------------------------------------------------- * * Copyright 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *//*! * \file * \brief Depth buffer performance tests. *//*--------------------------------------------------------------------*/ #include "es3pDepthTests.hpp" #include "glsCalibration.hpp" #include "gluShaderProgram.hpp" #include "gluObjectWrapper.hpp" #include "gluPixelTransfer.hpp" #include "glwFunctions.hpp" #include "glwEnums.hpp" #include "tcuTestLog.hpp" #include "tcuStringTemplate.hpp" #include "tcuCPUWarmup.hpp" #include "tcuCommandLine.hpp" #include "tcuResultCollector.hpp" #include "deClock.h" #include "deString.h" #include "deMath.h" #include "deStringUtil.hpp" #include "deRandom.hpp" #include "deUniquePtr.hpp" #include #include namespace deqp { namespace gles3 { namespace Performance { namespace { using namespace glw; using de::MovePtr; using glu::ProgramSources; using glu::RenderContext; using glu::ShaderSource; using std::map; using std::string; using std::vector; using tcu::TestContext; using tcu::TestLog; using tcu::Vec2; using tcu::Vec3; using tcu::Vec4; struct Sample { int64_t nullTime; int64_t baseTime; int64_t testTime; int order; int workload; }; struct SampleParams { int step; int measurement; SampleParams(int step_, int measurement_) : step(step_), measurement(measurement_) { } }; typedef vector Geometry; struct ObjectData { ProgramSources shader; Geometry geometry; ObjectData(const ProgramSources &shader_, const Geometry &geometry_) : shader(shader_), geometry(geometry_) { } }; class RenderData { public: RenderData(const ObjectData &object, const glu::RenderContext &renderCtx, TestLog &log); ~RenderData(void) { } const glu::ShaderProgram m_program; const glu::VertexArray m_vao; const glu::Buffer m_vbo; const int m_numVertices; }; RenderData::RenderData(const ObjectData &object, const glu::RenderContext &renderCtx, TestLog &log) : m_program(renderCtx, object.shader) , m_vao(renderCtx.getFunctions()) , m_vbo(renderCtx.getFunctions()) , m_numVertices(int(object.geometry.size()) / 4) { const glw::Functions &gl = renderCtx.getFunctions(); if (!m_program.isOk()) log << m_program; gl.bindBuffer(GL_ARRAY_BUFFER, *m_vbo); gl.bufferData(GL_ARRAY_BUFFER, object.geometry.size() * sizeof(float), &object.geometry[0], GL_STATIC_DRAW); gl.bindAttribLocation(m_program.getProgram(), 0, "a_position"); gl.bindVertexArray(*m_vao); gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); gl.enableVertexAttribArray(0); gl.bindVertexArray(0); } namespace Utils { vector getFullscreenQuad(float depth) { const float data[] = { +1.0f, +1.0f, depth, 0.0f, // .w is gl_VertexId%3 since Nexus 4&5 can't handle that on their own +1.0f, -1.0f, depth, 1.0f, -1.0f, -1.0f, depth, 2.0f, -1.0f, -1.0f, depth, 0.0f, -1.0f, +1.0f, depth, 1.0f, +1.0f, +1.0f, depth, 2.0f, }; return vector(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); } vector getFullscreenQuadWithGradient(float depth0, float depth1) { const float data[] = { +1.0f, +1.0f, depth0, 0.0f, +1.0f, -1.0f, depth0, 1.0f, -1.0f, -1.0f, depth1, 2.0f, -1.0f, -1.0f, depth1, 0.0f, -1.0f, +1.0f, depth1, 1.0f, +1.0f, +1.0f, depth0, 2.0f, }; return vector(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); } vector getPartScreenQuad(float coverage, float depth) { const float xMax = -1.0f + 2.0f * coverage; const float data[] = { xMax, +1.0f, depth, 0.0f, xMax, -1.0f, depth, 1.0f, -1.0f, -1.0f, depth, 2.0f, -1.0f, -1.0f, depth, 0.0f, -1.0f, +1.0f, depth, 1.0f, xMax, +1.0f, depth, 2.0f, }; return vector(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); } // Axis aligned grid. Depth of vertices is baseDepth +/- depthNoise vector getFullScreenGrid(int resolution, uint32_t seed, float baseDepth, float depthNoise, float xyNoise) { const int gridsize = resolution + 1; vector vertices(gridsize * gridsize); vector retval; de::Random rng(seed); for (int y = 0; y < gridsize; y++) for (int x = 0; x < gridsize; x++) { const bool isEdge = x == 0 || y == 0 || x == resolution || y == resolution; const float x_ = float(x) / float(resolution) * 2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise)); const float y_ = float(y) / float(resolution) * 2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise)); const float z_ = baseDepth + rng.getFloat(-depthNoise, +depthNoise); vertices[y * gridsize + x] = Vec3(x_, y_, z_); } retval.reserve(resolution * resolution * 6); for (int y = 0; y < resolution; y++) for (int x = 0; x < resolution; x++) { const Vec3 &p0 = vertices[(y + 0) * gridsize + (x + 0)]; const Vec3 &p1 = vertices[(y + 0) * gridsize + (x + 1)]; const Vec3 &p2 = vertices[(y + 1) * gridsize + (x + 0)]; const Vec3 &p3 = vertices[(y + 1) * gridsize + (x + 1)]; const float temp[6 * 4] = { p0.x(), p0.y(), p0.z(), 0.0f, p2.x(), p2.y(), p2.z(), 1.0f, p1.x(), p1.y(), p1.z(), 2.0f, p3.x(), p3.y(), p3.z(), 0.0f, p1.x(), p1.y(), p1.z(), 1.0f, p2.x(), p2.y(), p2.z(), 2.0f, }; retval.insert(retval.end(), DE_ARRAY_BEGIN(temp), DE_ARRAY_END(temp)); } return retval; } // Outputs barycentric coordinates as v_bcoords. Otherwise a passthrough shader string getBaseVertexShader(void) { return "#version 300 es\n" "in highp vec4 a_position;\n" "out mediump vec3 v_bcoords;\n" "void main()\n" "{\n" " v_bcoords = vec3(0, 0, 0);\n" " v_bcoords[int(a_position.w)] = 1.0;\n" " gl_Position = vec4(a_position.xyz, 1.0);\n" "}\n"; } // Adds noise to coordinates based on InstanceID Outputs barycentric coordinates as v_bcoords string getInstanceNoiseVertexShader(void) { return "#version 300 es\n" "in highp vec4 a_position;\n" "out mediump vec3 v_bcoords;\n" "void main()\n" "{\n" " v_bcoords = vec3(0, 0, 0);\n" " v_bcoords[int(a_position.w)] = 1.0;\n" " vec3 noise = vec3(sin(float(gl_InstanceID)*1.05), sin(float(gl_InstanceID)*1.23), " "sin(float(gl_InstanceID)*1.71));\n" " gl_Position = vec4(a_position.xyz + noise * 0.005, 1.0);\n" "}\n"; } // Renders green triangles with edges highlighted. Exact shade depends on depth. string getDepthAsGreenFragmentShader(void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" " fragColor = vec4(d,1,d,1);\n" " else\n" " fragColor = vec4(0,d,0,1);\n" "}\n"; } // Renders green triangles with edges highlighted. Exact shade depends on depth. string getDepthAsRedFragmentShader(void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" " fragColor = vec4(1,d,d,1);\n" " else\n" " fragColor = vec4(d,0,0,1);\n" "}\n"; } // Basic time waster. Renders red triangles with edges highlighted. Exact shade depends on depth. string getArithmeticWorkloadFragmentShader(void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "uniform mediump int u_iterations;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " for (int i = 0; i params; params["GRIDRENDER_SIZE"] = de::toString(gridsize); return tcu::StringTemplate(fragSrc).specialize(params); } // A static increment to frag depth string getStaticFragDepthFragmentShader(void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " gl_FragDepth = gl_FragCoord.z + 0.1;\n" " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" " fragColor = vec4(d,1,d,1);\n" " else\n" " fragColor = vec4(0,d,0,1);\n" "}\n"; } // A trivial dynamic change to frag depth string getDynamicFragDepthFragmentShader(void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" " fragColor = vec4(d,1,d,1);\n" " else\n" " fragColor = vec4(0,d,0,1);\n" "}\n"; } // A static increment to frag depth string getStaticFragDepthArithmeticWorkloadFragmentShader(void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "uniform mediump int u_iterations;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " gl_FragDepth = gl_FragCoord.z + 0.1;\n" " for (int i = 0; i &samples, const string &name, const string &desc); void logGeometry(const tcu::ConstPixelBufferAccess &sample, const glu::ShaderProgram &occluderProg, const glu::ShaderProgram &occludedProg); virtual void logAnalysis(const vector &samples) = 0; virtual void logDescription(void) = 0; virtual ObjectData genOccluderGeometry(void) const = 0; virtual ObjectData genOccludedGeometry(void) const = 0; virtual int calibrate(void) const = 0; virtual Sample renderSample(const RenderData &occluder, const RenderData &occluded, int workload) const = 0; void render(const RenderData &data) const; void render(const RenderData &data, int instances) const; const RenderContext &m_renderCtx; tcu::ResultCollector m_results; enum { ITERATION_STEPS = 10, ITERATION_SAMPLES = 16 }; }; BaseCase::BaseCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : TestCase(testCtx, tcu::NODETYPE_PERFORMANCE, name, desc) , m_renderCtx(renderCtx) { } BaseCase::IterateResult BaseCase::iterate(void) { typedef de::MovePtr RenderDataP; const glw::Functions &gl = m_renderCtx.getFunctions(); TestLog &log = m_testCtx.getLog(); const glu::Framebuffer framebuffer(gl); const glu::Renderbuffer renderbuffer(gl); const glu::Renderbuffer depthbuffer(gl); vector results; vector params; RenderDataP occluderData; RenderDataP occludedData; tcu::TextureLevel resultTex(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8), RENDER_SIZE, RENDER_SIZE); int maxWorkload = 0; de::Random rng(deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed()); logDescription(); gl.bindRenderbuffer(GL_RENDERBUFFER, *renderbuffer); gl.renderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, RENDER_SIZE, RENDER_SIZE); gl.bindRenderbuffer(GL_RENDERBUFFER, *depthbuffer); gl.renderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, RENDER_SIZE, RENDER_SIZE); gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer); gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, *renderbuffer); gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, *depthbuffer); gl.viewport(0, 0, RENDER_SIZE, RENDER_SIZE); gl.clearColor(0.125f, 0.25f, 0.5f, 1.0f); maxWorkload = calibrate(); // Setup data occluderData = RenderDataP(new RenderData(genOccluderGeometry(), m_renderCtx, log)); occludedData = RenderDataP(new RenderData(genOccludedGeometry(), m_renderCtx, log)); TCU_CHECK(occluderData->m_program.isOk()); TCU_CHECK(occludedData->m_program.isOk()); // Force initialization of GPU resources gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); render(*occluderData); render(*occludedData); glu::readPixels(m_renderCtx, 0, 0, resultTex.getAccess()); logGeometry(resultTex.getAccess(), occluderData->m_program, occludedData->m_program); params.reserve(ITERATION_STEPS * ITERATION_SAMPLES); // Setup parameters for (int step = 0; step < ITERATION_STEPS; step++) { const int workload = maxWorkload * step / ITERATION_STEPS; for (int count = 0; count < ITERATION_SAMPLES; count++) params.push_back(workload); } rng.shuffle(params.begin(), params.end()); // Render samples for (size_t ndx = 0; ndx < params.size(); ndx++) { const int workload = params[ndx]; Sample sample = renderSample(*occluderData, *occludedData, workload); sample.workload = workload; sample.order = int(ndx); results.push_back(sample); } logSamples(results, "Samples", "Samples"); logAnalysis(results); m_results.setTestContextResult(m_testCtx); return STOP; } void BaseCase::logSamples(const vector &samples, const string &name, const string &desc) { TestLog &log = m_testCtx.getLog(); bool testOnly = true; for (size_t ndx = 0; ndx < samples.size(); ndx++) { if (samples[ndx].baseTime != 0 || samples[ndx].nullTime != 0) { testOnly = false; break; } } log << TestLog::SampleList(name, desc); if (testOnly) { log << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << TestLog::ValueInfo("Order", "Order of sample", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << TestLog::ValueInfo("TestTime", "Test render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << TestLog::EndSampleInfo; for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++) { const Sample &sample = samples[sampleNdx]; log << TestLog::Sample << sample.workload << sample.order << sample.testTime << TestLog::EndSample; } } else { log << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << TestLog::ValueInfo("Order", "Order of sample", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << TestLog::ValueInfo("TestTime", "Test render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << TestLog::ValueInfo("NullTime", "Read pixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << TestLog::ValueInfo("BaseTime", "Base render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << TestLog::EndSampleInfo; for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++) { const Sample &sample = samples[sampleNdx]; log << TestLog::Sample << sample.workload << sample.order << sample.testTime << sample.nullTime << sample.baseTime << TestLog::EndSample; } } log << TestLog::EndSampleList; } void BaseCase::logGeometry(const tcu::ConstPixelBufferAccess &sample, const glu::ShaderProgram &occluderProg, const glu::ShaderProgram &occludedProg) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Geometry", "Geometry"); log << TestLog::Message << "Occluding geometry is green with shade dependent on depth (rgb == 0, depth, 0)" << TestLog::EndMessage; log << TestLog::Message << "Occluded geometry is red with shade dependent on depth (rgb == depth, 0, 0)" << TestLog::EndMessage; log << TestLog::Message << "Primitive edges are a lighter shade of red/green" << TestLog::EndMessage; log << TestLog::Image("Test Geometry", "Test Geometry", sample); log << TestLog::EndSection; log << TestLog::Section("Occluder", "Occluder"); log << occluderProg; log << TestLog::EndSection; log << TestLog::Section("Occluded", "Occluded"); log << occludedProg; log << TestLog::EndSection; } void BaseCase::render(const RenderData &data) const { const glw::Functions &gl = m_renderCtx.getFunctions(); gl.useProgram(data.m_program.getProgram()); gl.bindVertexArray(*data.m_vao); gl.drawArrays(GL_TRIANGLES, 0, data.m_numVertices); gl.bindVertexArray(0); } void BaseCase::render(const RenderData &data, int instances) const { const glw::Functions &gl = m_renderCtx.getFunctions(); gl.useProgram(data.m_program.getProgram()); gl.bindVertexArray(*data.m_vao); gl.drawArraysInstanced(GL_TRIANGLES, 0, data.m_numVertices, instances); gl.bindVertexArray(0); } // Render occluder once, then repeatedly render occluded geometry. Sample with multiple repetition counts & establish time per call with linear regression class RenderCountCase : public BaseCase { public: RenderCountCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc); ~RenderCountCase(void) { } protected: virtual void logAnalysis(const vector &samples); private: virtual int calibrate(void) const; virtual Sample renderSample(const RenderData &occluder, const RenderData &occluded, int callcount) const; }; RenderCountCase::RenderCountCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : BaseCase(testCtx, renderCtx, name, desc) { } void RenderCountCase::logAnalysis(const vector &samples) { using namespace gls; TestLog &log = m_testCtx.getLog(); int maxWorkload = 0; vector testSamples(samples.size()); for (size_t ndx = 0; ndx < samples.size(); ndx++) { const Sample &sample = samples[ndx]; testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime); maxWorkload = de::max(maxWorkload, sample.workload); } { const float confidence = 0.60f; const LineParametersWithConfidence testParam = theilSenSiegelLinearRegression(testSamples, confidence); const float usPerCall = testParam.coefficient; const float pxPerCall = RENDER_SIZE * RENDER_SIZE; const float pxPerUs = pxPerCall / usPerCall; const float mpxPerS = pxPerUs; log << TestLog::Section("Linear Regression", "Linear Regression"); log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. " "Reported confidence interval for this test is " << confidence << TestLog::EndMessage; log << TestLog::Message << "Render time for scene with depth test was\n\t" << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset << ", " << testParam.offsetConfidenceUpper << "]us +" << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]" << "us/workload" << TestLog::EndMessage; log << TestLog::EndSection; log << TestLog::Section("Result", "Result"); if (testParam.coefficientConfidenceLower < 0.0f) { log << TestLog::Message << "Coefficient confidence bounds include values below 0.0, the operation likely has neglible " "per-pixel cost" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "Pass"); } else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper * 0.25) { log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); } else { log << TestLog::Message << "Culled hidden pixels @ " << mpxPerS << "Mpx/s" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(mpxPerS, 2)); } log << TestLog::EndSection; } } Sample RenderCountCase::renderSample(const RenderData &occluder, const RenderData &occluded, int callcount) const { const glw::Functions &gl = m_renderCtx.getFunctions(); Sample sample; uint64_t now = 0; uint64_t prev = 0; uint8_t buffer[4]; // Stabilize { gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); } prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); render(occluder); render(occluded, callcount); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.testTime = now - prev; sample.baseTime = 0; sample.nullTime = 0; sample.workload = callcount; return sample; } int RenderCountCase::calibrate(void) const { using namespace gls; const glw::Functions &gl = m_renderCtx.getFunctions(); TestLog &log = m_testCtx.getLog(); const RenderData occluderGeometry(genOccluderGeometry(), m_renderCtx, log); const RenderData occludedGeometry(genOccludedGeometry(), m_renderCtx, log); TheilSenCalibrator calibrator(CalibratorParameters(20, // Initial workload 10, // Max iteration frames 20.0f, // Iteration shortcut threshold ms 20, // Max iterations 33.0f, // Target frame time 40.0f, // Frame time cap 1000.0f // Target measurement duration )); while (true) { switch (calibrator.getState()) { case TheilSenCalibrator::STATE_FINISHED: logCalibrationInfo(m_testCtx.getLog(), calibrator); return calibrator.getCallCount(); case TheilSenCalibrator::STATE_MEASURE: { uint8_t buffer[4]; int64_t now; int64_t prev; prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); render(occluderGeometry); render(occludedGeometry, calibrator.getCallCount()); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); calibrator.recordIteration(now - prev); break; } case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS: calibrator.recomputeParameters(); break; default: DE_ASSERT(false); return 1; } } } // Compares time/workload gradients of same geometry with and without depth testing class RelativeChangeCase : public BaseCase { public: RelativeChangeCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc); virtual ~RelativeChangeCase(void) { } protected: Sample renderSample(const RenderData &occluder, const RenderData &occluded, int workload) const; virtual void logAnalysis(const vector &samples); private: int calibrate(void) const; }; RelativeChangeCase::RelativeChangeCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : BaseCase(testCtx, renderCtx, name, desc) { } int RelativeChangeCase::calibrate(void) const { using namespace gls; const glw::Functions &gl = m_renderCtx.getFunctions(); TestLog &log = m_testCtx.getLog(); const RenderData geom(genOccludedGeometry(), m_renderCtx, log); TheilSenCalibrator calibrator(CalibratorParameters(20, // Initial workload 10, // Max iteration frames 20.0f, // Iteration shortcut threshold ms 20, // Max iterations 33.0f, // Target frame time 40.0f, // Frame time cap 1000.0f // Target measurement duration )); while (true) { switch (calibrator.getState()) { case TheilSenCalibrator::STATE_FINISHED: logCalibrationInfo(m_testCtx.getLog(), calibrator); return calibrator.getCallCount(); case TheilSenCalibrator::STATE_MEASURE: { uint8_t buffer[4]; const GLuint program = geom.m_program.getProgram(); gl.useProgram(program); gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), calibrator.getCallCount()); const int64_t prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); render(geom); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); const int64_t now = deGetMicroseconds(); calibrator.recordIteration(now - prev); break; } case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS: calibrator.recomputeParameters(); break; default: DE_ASSERT(false); return 1; } } } Sample RelativeChangeCase::renderSample(const RenderData &occluder, const RenderData &occluded, int workload) const { const glw::Functions &gl = m_renderCtx.getFunctions(); const GLuint program = occluded.m_program.getProgram(); Sample sample; uint64_t now = 0; uint64_t prev = 0; uint8_t buffer[4]; gl.useProgram(program); gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload); // Warmup (this workload seems to reduce variation in following workloads) { gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); } // Null time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.nullTime = now - prev; } // Test time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); render(occluder); render(occluded); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.testTime = now - prev; } // Base time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); render(occluder); render(occluded); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.baseTime = now - prev; } sample.workload = 0; return sample; } void RelativeChangeCase::logAnalysis(const vector &samples) { using namespace gls; TestLog &log = m_testCtx.getLog(); int maxWorkload = 0; vector nullSamples(samples.size()); vector baseSamples(samples.size()); vector testSamples(samples.size()); for (size_t ndx = 0; ndx < samples.size(); ndx++) { const Sample &sample = samples[ndx]; nullSamples[ndx] = Vec2((float)sample.workload, (float)sample.nullTime); baseSamples[ndx] = Vec2((float)sample.workload, (float)sample.baseTime); testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime); maxWorkload = de::max(maxWorkload, sample.workload); } { const float confidence = 0.60f; const LineParametersWithConfidence nullParam = theilSenSiegelLinearRegression(nullSamples, confidence); const LineParametersWithConfidence baseParam = theilSenSiegelLinearRegression(baseSamples, confidence); const LineParametersWithConfidence testParam = theilSenSiegelLinearRegression(testSamples, confidence); if (!de::inRange(0.0f, nullParam.coefficientConfidenceLower, nullParam.coefficientConfidenceUpper)) { m_results.addResult(QP_TEST_RESULT_FAIL, "Constant operation sequence duration not constant"); log << TestLog::Message << "Constant operation sequence timing may vary as a function of workload. Result quality extremely low" << TestLog::EndMessage; } if (de::inRange(0.0f, baseParam.coefficientConfidenceLower, baseParam.coefficientConfidenceUpper)) { m_results.addResult(QP_TEST_RESULT_FAIL, "Workload has no effect on duration"); log << TestLog::Message << "Workload factor has no effect on duration of sample (smart optimizer?)" << TestLog::EndMessage; } log << TestLog::Section("Linear Regression", "Linear Regression"); log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. " "Reported confidence interval for this test is " << confidence << TestLog::EndMessage; log << TestLog::Message << "Render time for empty scene was\n\t" << "[" << nullParam.offsetConfidenceLower << ", " << nullParam.offset << ", " << nullParam.offsetConfidenceUpper << "]us +" << "[" << nullParam.coefficientConfidenceLower << ", " << nullParam.coefficient << ", " << nullParam.coefficientConfidenceUpper << "]" << "us/workload" << TestLog::EndMessage; log << TestLog::Message << "Render time for scene without depth test was\n\t" << "[" << baseParam.offsetConfidenceLower << ", " << baseParam.offset << ", " << baseParam.offsetConfidenceUpper << "]us +" << "[" << baseParam.coefficientConfidenceLower << ", " << baseParam.coefficient << ", " << baseParam.coefficientConfidenceUpper << "]" << "us/workload" << TestLog::EndMessage; log << TestLog::Message << "Render time for scene with depth test was\n\t" << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset << ", " << testParam.offsetConfidenceUpper << "]us +" << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]" << "us/workload" << TestLog::EndMessage; log << TestLog::EndSection; if (de::inRange(0.0f, testParam.coefficientConfidenceLower, testParam.coefficientConfidenceUpper)) { log << TestLog::Message << "Test duration not dependent on culled workload" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "0.0"); } else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper * 0.25) { log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); } else if (baseParam.coefficientConfidenceLower < baseParam.coefficientConfidenceUpper * 0.25) { log << TestLog::Message << "Coefficient confidence range for base render time is extremely large, cannot give reliable result" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); } else { log << TestLog::Message << "Test duration is dependent on culled workload" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(de::abs(testParam.coefficient) / de::abs(baseParam.coefficient), 2)); } } } // Speed of trivial culling class BaseCostCase : public RenderCountCase { public: BaseCostCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RenderCountCase(testCtx, renderCtx, name, desc) { } ~BaseCostCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::variableQuad(0.8f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second " "(occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of " "workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Gradient class GradientCostCase : public RenderCountCase { public: GradientCostCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc, float gradientDistance) : RenderCountCase(testCtx, renderCtx, name, desc) , m_gradientDistance(gradientDistance) { } ~GradientCostCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::fastQuadWithGradient(0.0f, 1.0f - m_gradientDistance); } virtual ObjectData genOccludedGeometry(void) const { return ObjectData( glu::makeVtxFragSources(Utils::getInstanceNoiseVertexShader(), Utils::getDepthAsRedFragmentShader()), Utils::getFullscreenQuadWithGradient(m_gradientDistance, 1.0f)); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second " "(occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The quads are tilted so that the left edge of the occluded quad has a depth of 1.0 and the right edge " "of the occluding quad has a depth of 0.0." << TestLog::EndMessage; log << TestLog::Message << "The quads are spaced to have a depth difference of " << m_gradientDistance << " at all points." << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of " "workload" << TestLog::EndMessage; log << TestLog::EndSection; } const float m_gradientDistance; }; // Constant offset to frag depth in occluder class OccluderStaticFragDepthCostCase : public RenderCountCase { public: OccluderStaticFragDepthCostCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RenderCountCase(testCtx, renderCtx, name, desc) { } ~OccluderStaticFragDepthCostCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::fastQuad(0.8f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second " "(occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of " "workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluder class OccluderDynamicFragDepthCostCase : public RenderCountCase { public: OccluderDynamicFragDepthCostCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RenderCountCase(testCtx, renderCtx, name, desc) { } ~OccluderDynamicFragDepthCostCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::fastQuad(0.8f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second " "(occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of " "workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Constant offset to frag depth in occluder class OccludedStaticFragDepthCostCase : public RenderCountCase { public: OccludedStaticFragDepthCostCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RenderCountCase(testCtx, renderCtx, name, desc) { } ~OccludedStaticFragDepthCostCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second " "(occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of " "workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluder class OccludedDynamicFragDepthCostCase : public RenderCountCase { public: OccludedDynamicFragDepthCostCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RenderCountCase(testCtx, renderCtx, name, desc) { } ~OccludedDynamicFragDepthCostCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second " "(occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of " "workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Culling speed with slightly less trivial geometry class OccludingGeometryComplexityCostCase : public RenderCountCase { public: OccludingGeometryComplexityCostCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc, int resolution, float xyNoise, float zNoise) : RenderCountCase(testCtx, renderCtx, name, desc) , m_resolution(resolution) , m_xyNoise(xyNoise) , m_zNoise(zNoise) { } ~OccludingGeometryComplexityCostCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return ObjectData(Utils::getBaseShader(), Utils::getFullScreenGrid(m_resolution, deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed(), 0.2f, m_zNoise, m_xyNoise)); } virtual ObjectData genOccludedGeometry(void) const { return Utils::variableQuad(0.8f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of an occluding grid and an occluded fullsceen quad. The occluding geometry is " "rendered once, the occluded one is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of " "workload" << TestLog::EndMessage; log << TestLog::EndSection; } const int m_resolution; const float m_xyNoise; const float m_zNoise; }; // Cases with varying workloads in the fragment shader class FragmentWorkloadCullCase : public RelativeChangeCase { public: FragmentWorkloadCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc); virtual ~FragmentWorkloadCullCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::fastQuad(0.2f); } virtual void logDescription(void); }; FragmentWorkloadCullCase::FragmentWorkloadCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } void FragmentWorkloadCullCase::logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader," "the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of unused work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } // Additional workload consists of texture lookups class FragmentTextureWorkloadCullCase : public FragmentWorkloadCullCase { public: FragmentTextureWorkloadCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc); virtual ~FragmentTextureWorkloadCullCase(void) { } virtual void init(void); virtual void deinit(void); private: typedef MovePtr TexPtr; virtual ObjectData genOccludedGeometry(void) const { return ObjectData(Utils::getTextureWorkloadShader(), Utils::getFullscreenQuad(0.8f)); } TexPtr m_texture; }; FragmentTextureWorkloadCullCase::FragmentTextureWorkloadCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : FragmentWorkloadCullCase(testCtx, renderCtx, name, desc) { } void FragmentTextureWorkloadCullCase::init(void) { const glw::Functions &gl = m_renderCtx.getFunctions(); const int size = 128; const vector data(size * size * 4, 255); m_texture = MovePtr(new glu::Texture(gl)); gl.bindTexture(GL_TEXTURE_2D, m_texture); gl.texImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size, size, 0, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); } void FragmentTextureWorkloadCullCase::deinit(void) { m_texture.clear(); } // Additional workload consists of arithmetic class FragmentArithmeticWorkloadCullCase : public FragmentWorkloadCullCase { public: FragmentArithmeticWorkloadCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : FragmentWorkloadCullCase(testCtx, renderCtx, name, desc) { } virtual ~FragmentArithmeticWorkloadCullCase(void) { } private: virtual ObjectData genOccludedGeometry(void) const { return ObjectData(Utils::getArithmeticWorkloadShader(), Utils::getFullscreenQuad(0.8f)); } }; // Contains dynamicly unused discard after a series of calculations class FragmentDiscardArithmeticWorkloadCullCase : public FragmentWorkloadCullCase { public: FragmentDiscardArithmeticWorkloadCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : FragmentWorkloadCullCase(testCtx, renderCtx, name, desc) { } virtual ~FragmentDiscardArithmeticWorkloadCullCase(void) { } private: virtual ObjectData genOccludedGeometry(void) const { return ObjectData(Utils::getArithmeticWorkloadDiscardShader(), Utils::getFullscreenQuad(0.8f)); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader," "the second (occluded) contains significant fragment shader work and a discard that is never triggers " "but has a dynamic condition" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of unused work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Discards fragments from the occluder in a grid pattern class PartialOccluderDiscardCullCase : public RelativeChangeCase { public: PartialOccluderDiscardCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc, int gridsize) : RelativeChangeCase(testCtx, renderCtx, name, desc) , m_gridsize(gridsize) { } virtual ~PartialOccluderDiscardCullCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::quadWith(Utils::getGridDiscardShader(m_gridsize), 0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::slowQuad(0.8f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of partially discarded occluder on rendering time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad discards half the " "fragments in a grid pattern, the second (partially occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of unused work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in depth testing halving the render time" << TestLog::EndMessage; log << TestLog::EndSection; } const int m_gridsize; }; // Trivial occluder covering part of screen class PartialOccluderCullCase : public RelativeChangeCase { public: PartialOccluderCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc, float coverage) : RelativeChangeCase(testCtx, renderCtx, name, desc) , m_coverage(coverage) { } ~PartialOccluderCullCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return ObjectData(Utils::getBaseShader(), Utils::getPartScreenQuad(m_coverage, 0.2f)); } virtual ObjectData genOccludedGeometry(void) const { return Utils::slowQuad(0.8f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of partial occluder on rendering time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two quads. The first (occluding) quad covers " << m_coverage * 100.0f << "% of the screen, while the second (partially occluded, fullscreen) contains significant fragment " "shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of unused work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in render time increasing proportionally with unoccluded area" << TestLog::EndMessage; log << TestLog::EndSection; } const float m_coverage; }; // Constant offset to frag depth in occluder class StaticOccluderFragDepthCullCase : public RelativeChangeCase { public: StaticOccluderFragDepthCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~StaticOccluderFragDepthCullCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::slowQuad(0.8f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second " "(occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of unused work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluder class DynamicOccluderFragDepthCullCase : public RelativeChangeCase { public: DynamicOccluderFragDepthCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~DynamicOccluderFragDepthCullCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::slowQuad(0.8f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second " "(occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of unused work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Constant offset to frag depth in occluded class StaticOccludedFragDepthCullCase : public RelativeChangeCase { public: StaticOccludedFragDepthCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~StaticOccludedFragDepthCullCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::quadWith(Utils::getStaticFragDepthArithmeticWorkloadFragmentShader(), 0.2f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second " "(occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of unused work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluded class DynamicOccludedFragDepthCullCase : public RelativeChangeCase { public: DynamicOccludedFragDepthCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~DynamicOccludedFragDepthCullCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::quadWith(Utils::getDynamicFragDepthArithmeticWorkloadFragmentShader(), 0.2f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second " "(occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of unused work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluded class ReversedDepthOrderCullCase : public RelativeChangeCase { public: ReversedDepthOrderCullCase(TestContext &testCtx, const RenderContext &renderCtx, const char *name, const char *desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~ReversedDepthOrderCullCase(void) { } private: virtual ObjectData genOccluderGeometry(void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry(void) const { return Utils::slowQuad(0.8f); } virtual void logDescription(void) { TestLog &log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of of back first rendering order on culling efficiency" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The second (occluding) quad is trivial, while the first " "(occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of unused work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } // Rendering order of occluder & occluded is reversed, otherwise identical to parent version Sample renderSample(const RenderData &occluder, const RenderData &occluded, int workload) const { const glw::Functions &gl = m_renderCtx.getFunctions(); const GLuint program = occluded.m_program.getProgram(); Sample sample; uint64_t now = 0; uint64_t prev = 0; uint8_t buffer[4]; gl.useProgram(program); gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload); // Warmup (this workload seems to reduce variation in following workloads) { gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); } // Null time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.nullTime = now - prev; } // Test time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); render(occluded); render(occluder); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.testTime = now - prev; } // Base time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); render(occluded); render(occluder); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.baseTime = now - prev; } sample.workload = 0; return sample; } }; } // namespace DepthTests::DepthTests(Context &context) : TestCaseGroup(context, "depth", "Depth culling performance") { } void DepthTests::init(void) { TestContext &testCtx = m_context.getTestContext(); const RenderContext &renderCtx = m_context.getRenderContext(); { tcu::TestCaseGroup *const cullEfficiencyGroup = new tcu::TestCaseGroup(m_testCtx, "cull_efficiency", "Fragment cull efficiency"); addChild(cullEfficiencyGroup); { tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, "workload", "Workload"); cullEfficiencyGroup->addChild(group); group->addChild(new FragmentTextureWorkloadCullCase(testCtx, renderCtx, "workload_texture", "Fragment shader with texture lookup workload")); group->addChild(new FragmentArithmeticWorkloadCullCase(testCtx, renderCtx, "workload_arithmetic", "Fragment shader with arithmetic workload")); group->addChild(new FragmentDiscardArithmeticWorkloadCullCase( testCtx, renderCtx, "workload_arithmetic_discard", "Fragment shader that may discard with arithmetic workload")); } { tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, "occluder_discard", "Discard"); cullEfficiencyGroup->addChild(group); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_256", "Parts of occluder geometry discarded", 256)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_128", "Parts of occluder geometry discarded", 128)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_64", "Parts of occluder geometry discarded", 64)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_32", "Parts of occluder geometry discarded", 32)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_16", "Parts of occluder geometry discarded", 16)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_8", "Parts of occluder geometry discarded", 8)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_4", "Parts of occluder geometry discarded", 4)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_2", "Parts of occluder geometry discarded", 2)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_1", "Parts of occluder geometry discarded", 1)); } { tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, "partial_coverage", "Partial Coverage"); cullEfficiencyGroup->addChild(group); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "100", "Occluder covering only part of occluded geometry", 1.00f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "099", "Occluder covering only part of occluded geometry", 0.99f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "095", "Occluder covering only part of occluded geometry", 0.95f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "090", "Occluder covering only part of occluded geometry", 0.90f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "080", "Occluder covering only part of occluded geometry", 0.80f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "070", "Occluder covering only part of occluded geometry", 0.70f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "050", "Occluder covering only part of occluded geometry", 0.50f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "025", "Occluder covering only part of occluded geometry", 0.25f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "010", "Occluder covering only part of occluded geometry", 0.10f)); } { tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Partial Coverage"); cullEfficiencyGroup->addChild(group); group->addChild(new StaticOccluderFragDepthCullCase(testCtx, renderCtx, "occluder_static", "")); group->addChild(new DynamicOccluderFragDepthCullCase(testCtx, renderCtx, "occluder_dynamic", "")); group->addChild(new StaticOccludedFragDepthCullCase(testCtx, renderCtx, "occluded_static", "")); group->addChild(new DynamicOccludedFragDepthCullCase(testCtx, renderCtx, "occluded_dynamic", "")); } { tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, "order", "Rendering order"); cullEfficiencyGroup->addChild(group); group->addChild( new ReversedDepthOrderCullCase(testCtx, renderCtx, "reversed", "Back to front rendering order")); } } { tcu::TestCaseGroup *const testCostGroup = new tcu::TestCaseGroup(m_testCtx, "culled_pixel_cost", "Fragment cull efficiency"); addChild(testCostGroup); { tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, "gradient", "Gradients with small depth differences"); testCostGroup->addChild(group); group->addChild(new BaseCostCase(testCtx, renderCtx, "flat", "")); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_050", "", 0.50f)); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_010", "", 0.10f)); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_005", "", 0.05f)); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_002", "", 0.02f)); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_001", "", 0.01f)); } { tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, "occluder_geometry", "Occluders with varying geometry complexity"); testCostGroup->addChild(group); group->addChild( new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_5", "", 5, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_15", "", 15, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_25", "", 25, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_50", "", 50, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_100", "", 100, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_5", "", 5, 1.0f / 5.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_15", "", 15, 1.0f / 15.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_25", "", 25, 1.0f / 25.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_50", "", 50, 1.0f / 50.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_100", "", 100, 1.0f / 100.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_5", "", 5, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_15", "", 15, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_25", "", 25, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_50", "", 50, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_100", "", 100, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_5", "", 5, 1.0f / 5.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_15", "", 15, 1.0f / 15.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_25", "", 25, 1.0f / 25.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_50", "", 50, 1.0f / 50.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_100", "", 100, 1.0f / 100.0f, 0.2f)); } { tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Modifying gl_FragDepth"); testCostGroup->addChild(group); group->addChild(new OccluderStaticFragDepthCostCase(testCtx, renderCtx, "occluder_static", "")); group->addChild(new OccluderDynamicFragDepthCostCase(testCtx, renderCtx, "occluder_dynamic", "")); group->addChild(new OccludedStaticFragDepthCostCase(testCtx, renderCtx, "occluded_static", "")); group->addChild(new OccludedDynamicFragDepthCostCase(testCtx, renderCtx, "occluded_dynamic", "")); } } } } // namespace Performance } // namespace gles3 } // namespace deqp