// Copyright 2016 The SwiftShader Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "Renderer.hpp" #include "Clipper.hpp" #include "Polygon.hpp" #include "Primitive.hpp" #include "Vertex.hpp" #include "Pipeline/Constants.hpp" #include "Pipeline/SpirvShader.hpp" #include "Reactor/Reactor.hpp" #include "System/Debug.hpp" #include "System/Half.hpp" #include "System/Math.hpp" #include "System/Memory.hpp" #include "System/Timer.hpp" #include "Vulkan/VkConfig.hpp" #include "Vulkan/VkDescriptorSet.hpp" #include "Vulkan/VkDevice.hpp" #include "Vulkan/VkFence.hpp" #include "Vulkan/VkImageView.hpp" #include "Vulkan/VkPipelineLayout.hpp" #include "Vulkan/VkQueryPool.hpp" #include "marl/containers.h" #include "marl/defer.h" #include "marl/trace.h" #undef max #ifndef NDEBUG unsigned int minPrimitives = 1; unsigned int maxPrimitives = 1 << 21; #endif namespace sw { template inline bool setBatchIndices(unsigned int batch[128][3], VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode, T indices, unsigned int start, unsigned int triangleCount) { bool provokeFirst = (provokingVertexMode == VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT); switch(topology) { case VK_PRIMITIVE_TOPOLOGY_POINT_LIST: { auto index = start; auto pointBatch = &(batch[0][0]); for(unsigned int i = 0; i < triangleCount; i++) { *pointBatch++ = indices[index++]; } // Repeat the last index to allow for SIMD width overrun. index--; for(unsigned int i = 0; i < 3; i++) { *pointBatch++ = indices[index]; } } break; case VK_PRIMITIVE_TOPOLOGY_LINE_LIST: { auto index = 2 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = indices[index + (provokeFirst ? 0 : 1)]; batch[i][1] = indices[index + (provokeFirst ? 1 : 0)]; batch[i][2] = indices[index + 1]; index += 2; } } break; case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP: { auto index = start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = indices[index + (provokeFirst ? 0 : 1)]; batch[i][1] = indices[index + (provokeFirst ? 1 : 0)]; batch[i][2] = indices[index + 1]; index += 1; } } break; case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST: { auto index = 3 * start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = indices[index + (provokeFirst ? 0 : 2)]; batch[i][1] = indices[index + (provokeFirst ? 1 : 0)]; batch[i][2] = indices[index + (provokeFirst ? 2 : 1)]; index += 3; } } break; case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP: { auto index = start; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][0] = indices[index + (provokeFirst ? 0 : 2)]; batch[i][1] = indices[index + ((start + i) & 1) + (provokeFirst ? 1 : 0)]; batch[i][2] = indices[index + (~(start + i) & 1) + (provokeFirst ? 1 : 0)]; index += 1; } } break; case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN: { auto index = start + 1; for(unsigned int i = 0; i < triangleCount; i++) { batch[i][provokeFirst ? 0 : 2] = indices[index + 0]; batch[i][provokeFirst ? 1 : 0] = indices[index + 1]; batch[i][provokeFirst ? 2 : 1] = indices[0]; index += 1; } } break; default: ASSERT(false); return false; } return true; } DrawCall::DrawCall() { // TODO(b/140991626): Use allocateUninitialized() instead of allocateZeroOrPoison() to improve startup peformance. data = (DrawData *)sw::allocateZeroOrPoison(sizeof(DrawData)); } DrawCall::~DrawCall() { sw::freeMemory(data); } Renderer::Renderer(vk::Device *device) : device(device) { vertexProcessor.setRoutineCacheSize(1024); pixelProcessor.setRoutineCacheSize(1024); setupProcessor.setRoutineCacheSize(1024); } Renderer::~Renderer() { drawTickets.take().wait(); } // Renderer objects have to be mem aligned to the alignment provided in the class declaration void *Renderer::operator new(size_t size) { ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class return vk::allocateHostMemory(sizeof(Renderer), alignof(Renderer), vk::NULL_ALLOCATION_CALLBACKS, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); } void Renderer::operator delete(void *mem) { vk::freeHostMemory(mem, vk::NULL_ALLOCATION_CALLBACKS); } void Renderer::draw(const vk::GraphicsPipeline *pipeline, const vk::DynamicState &dynamicState, unsigned int count, int baseVertex, CountedEvent *events, int instanceID, int layer, void *indexBuffer, const VkRect2D &renderArea, const vk::Pipeline::PushConstantStorage &pushConstants, bool update) { if(count == 0) { return; } auto id = nextDrawID++; MARL_SCOPED_EVENT("draw %d", id); marl::Pool::Loan draw; { MARL_SCOPED_EVENT("drawCallPool.borrow()"); draw = drawCallPool.borrow(); } draw->id = id; const vk::GraphicsState &pipelineState = pipeline->getCombinedState(dynamicState); // A graphics pipeline must always be "complete" before it can be used for drawing. A // complete graphics pipeline always includes the vertex input interface and // pre-rasterization subsets, but only includes fragment and fragment output interface // subsets if rasterizer discard is not enabled. // // Note that in the following, the setupPrimitives, setupRoutine and pixelRoutine functions // are only called when rasterizer discard is not enabled. If rasterizer discard is // enabled, these functions and state for the latter two states are not set. const vk::VertexInputInterfaceState &vertexInputInterfaceState = pipelineState.getVertexInputInterfaceState(); const vk::PreRasterizationState &preRasterizationState = pipelineState.getPreRasterizationState(); const vk::FragmentState *fragmentState = nullptr; const vk::FragmentOutputInterfaceState *fragmentOutputInterfaceState = nullptr; const bool hasRasterizerDiscard = preRasterizationState.hasRasterizerDiscard(); if(!hasRasterizerDiscard) { fragmentState = &pipelineState.getFragmentState(); fragmentOutputInterfaceState = &pipelineState.getFragmentOutputInterfaceState(); pixelProcessor.setBlendConstant(fragmentOutputInterfaceState->getBlendConstants()); } const vk::Inputs &inputs = pipeline->getInputs(); if(update) { MARL_SCOPED_EVENT("update"); const sw::SpirvShader *fragmentShader = pipeline->getShader(VK_SHADER_STAGE_FRAGMENT_BIT).get(); const sw::SpirvShader *vertexShader = pipeline->getShader(VK_SHADER_STAGE_VERTEX_BIT).get(); const vk::Attachments attachments = pipeline->getAttachments(); vertexState = vertexProcessor.update(pipelineState, vertexShader, inputs); vertexRoutine = vertexProcessor.routine(vertexState, preRasterizationState.getPipelineLayout(), vertexShader, inputs.getDescriptorSets()); if(!hasRasterizerDiscard) { setupState = setupProcessor.update(pipelineState, fragmentShader, vertexShader, attachments); setupRoutine = setupProcessor.routine(setupState); pixelState = pixelProcessor.update(pipelineState, fragmentShader, vertexShader, attachments, hasOcclusionQuery()); pixelRoutine = pixelProcessor.routine(pixelState, fragmentState->getPipelineLayout(), fragmentShader, attachments, inputs.getDescriptorSets()); } } draw->preRasterizationContainsImageWrite = pipeline->preRasterizationContainsImageWrite(); draw->fragmentContainsImageWrite = pipeline->fragmentContainsImageWrite(); // The sample count affects the batch size even if rasterization is disabled. // TODO(b/147812380): Eliminate the dependency between multisampling and batch size. int ms = hasRasterizerDiscard ? 1 : fragmentOutputInterfaceState->getSampleCount(); ASSERT(ms > 0); unsigned int numPrimitivesPerBatch = MaxBatchSize / ms; DrawData *data = draw->data; draw->occlusionQuery = occlusionQuery; draw->batchDataPool = &batchDataPool; draw->numPrimitives = count; draw->numPrimitivesPerBatch = numPrimitivesPerBatch; draw->numBatches = (count + draw->numPrimitivesPerBatch - 1) / draw->numPrimitivesPerBatch; draw->topology = vertexInputInterfaceState.getTopology(); draw->provokingVertexMode = preRasterizationState.getProvokingVertexMode(); draw->lineRasterizationMode = preRasterizationState.getLineRasterizationMode(); draw->descriptorSetObjects = inputs.getDescriptorSetObjects(); draw->preRasterizationPipelineLayout = preRasterizationState.getPipelineLayout(); draw->depthClipEnable = preRasterizationState.getDepthClipEnable(); draw->depthClipNegativeOneToOne = preRasterizationState.getDepthClipNegativeOneToOne(); data->lineWidth = preRasterizationState.getLineWidth(); data->rasterizerDiscard = hasRasterizerDiscard; data->descriptorSets = inputs.getDescriptorSets(); data->descriptorDynamicOffsets = inputs.getDescriptorDynamicOffsets(); for(int i = 0; i < MAX_INTERFACE_COMPONENTS / 4; i++) { const sw::Stream &stream = inputs.getStream(i); data->input[i] = stream.buffer; data->robustnessSize[i] = stream.robustnessSize; data->stride[i] = inputs.getVertexStride(i); } data->indices = indexBuffer; data->layer = layer; data->instanceID = instanceID; data->baseVertex = baseVertex; draw->indexType = indexBuffer ? pipeline->getIndexBuffer().getIndexType() : VK_INDEX_TYPE_UINT16; draw->vertexRoutine = vertexRoutine; vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->preRasterizationPipelineLayout, device); // Viewport { const VkViewport &viewport = preRasterizationState.getViewport(); float W = 0.5f * viewport.width; float H = 0.5f * viewport.height; float X0 = viewport.x + W; float Y0 = viewport.y + H; float N = viewport.minDepth; float F = viewport.maxDepth; float Z = F - N; constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR; data->WxF = W * subPixF; data->HxF = H * subPixF; data->X0xF = X0 * subPixF - subPixF / 2; data->Y0xF = Y0 * subPixF - subPixF / 2; data->halfPixelX = 0.5f / W; data->halfPixelY = 0.5f / H; data->depthRange = Z; data->depthNear = N; data->constantDepthBias = preRasterizationState.getConstantDepthBias(); data->slopeDepthBias = preRasterizationState.getSlopeDepthBias(); data->depthBiasClamp = preRasterizationState.getDepthBiasClamp(); // Adjust viewport transform based on the negativeOneToOne state. if(preRasterizationState.getDepthClipNegativeOneToOne()) { data->depthRange = Z * 0.5f; data->depthNear = (F + N) * 0.5f; } } // Scissor { const VkRect2D &scissor = preRasterizationState.getScissor(); int x0 = renderArea.offset.x; int y0 = renderArea.offset.y; int x1 = x0 + renderArea.extent.width; int y1 = y0 + renderArea.extent.height; data->scissorX0 = clamp(scissor.offset.x, x0, x1); data->scissorX1 = clamp(scissor.offset.x + scissor.extent.width, x0, x1); data->scissorY0 = clamp(scissor.offset.y, y0, y1); data->scissorY1 = clamp(scissor.offset.y + scissor.extent.height, y0, y1); } if(!hasRasterizerDiscard) { const VkPolygonMode polygonMode = preRasterizationState.getPolygonMode(); DrawCall::SetupFunction setupPrimitives = nullptr; if(vertexInputInterfaceState.isDrawTriangle(false, polygonMode)) { switch(preRasterizationState.getPolygonMode()) { case VK_POLYGON_MODE_FILL: setupPrimitives = &DrawCall::setupSolidTriangles; break; case VK_POLYGON_MODE_LINE: setupPrimitives = &DrawCall::setupWireframeTriangles; numPrimitivesPerBatch /= 3; break; case VK_POLYGON_MODE_POINT: setupPrimitives = &DrawCall::setupPointTriangles; numPrimitivesPerBatch /= 3; break; default: UNSUPPORTED("polygon mode: %d", int(preRasterizationState.getPolygonMode())); return; } } else if(vertexInputInterfaceState.isDrawLine(false, polygonMode)) { setupPrimitives = &DrawCall::setupLines; } else // Point primitive topology { setupPrimitives = &DrawCall::setupPoints; } draw->setupState = setupState; draw->setupRoutine = setupRoutine; draw->pixelRoutine = pixelRoutine; draw->setupPrimitives = setupPrimitives; draw->fragmentPipelineLayout = fragmentState->getPipelineLayout(); if(pixelState.stencilActive) { data->stencil[0].set(fragmentState->getFrontStencil().reference, fragmentState->getFrontStencil().compareMask, fragmentState->getFrontStencil().writeMask); data->stencil[1].set(fragmentState->getBackStencil().reference, fragmentState->getBackStencil().compareMask, fragmentState->getBackStencil().writeMask); } data->factor = pixelProcessor.factor; if(pixelState.alphaToCoverage) { if(ms == 4) { data->a2c0 = 0.2f; data->a2c1 = 0.4f; data->a2c2 = 0.6f; data->a2c3 = 0.8f; } else if(ms == 2) { data->a2c0 = 0.25f; data->a2c1 = 0.75f; } else if(ms == 1) { data->a2c0 = 0.5f; } else ASSERT(false); } if(pixelState.occlusionEnabled) { for(int cluster = 0; cluster < MaxClusterCount; cluster++) { data->occlusion[cluster] = 0; } } // Viewport { const vk::Attachments attachments = pipeline->getAttachments(); if(attachments.depthBuffer) { switch(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT)) { case VK_FORMAT_D16_UNORM: // Minimum is 1 unit, but account for potential floating-point rounding errors data->minimumResolvableDepthDifference = 1.01f / 0xFFFF; break; case VK_FORMAT_D32_SFLOAT: // The minimum resolvable depth difference is determined per-polygon for floating-point depth // buffers. DrawData::minimumResolvableDepthDifference is unused. break; default: UNSUPPORTED("Depth format: %d", int(attachments.depthBuffer->getFormat(VK_IMAGE_ASPECT_DEPTH_BIT))); } } } // Target { const vk::Attachments attachments = pipeline->getAttachments(); for(int index = 0; index < MAX_COLOR_BUFFERS; index++) { draw->colorBuffer[index] = attachments.colorBuffer[index]; if(draw->colorBuffer[index]) { data->colorBuffer[index] = (unsigned int *)attachments.colorBuffer[index]->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_COLOR_BIT, 0, data->layer); data->colorPitchB[index] = attachments.colorBuffer[index]->rowPitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0); data->colorSliceB[index] = attachments.colorBuffer[index]->slicePitchBytes(VK_IMAGE_ASPECT_COLOR_BIT, 0); } } draw->depthBuffer = attachments.depthBuffer; draw->stencilBuffer = attachments.stencilBuffer; if(draw->depthBuffer) { data->depthBuffer = (float *)attachments.depthBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_DEPTH_BIT, 0, data->layer); data->depthPitchB = attachments.depthBuffer->rowPitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0); data->depthSliceB = attachments.depthBuffer->slicePitchBytes(VK_IMAGE_ASPECT_DEPTH_BIT, 0); } if(draw->stencilBuffer) { data->stencilBuffer = (unsigned char *)attachments.stencilBuffer->getOffsetPointer({ 0, 0, 0 }, VK_IMAGE_ASPECT_STENCIL_BIT, 0, data->layer); data->stencilPitchB = attachments.stencilBuffer->rowPitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0); data->stencilSliceB = attachments.stencilBuffer->slicePitchBytes(VK_IMAGE_ASPECT_STENCIL_BIT, 0); } } if(draw->fragmentPipelineLayout != draw->preRasterizationPipelineLayout) { vk::DescriptorSet::PrepareForSampling(draw->descriptorSetObjects, draw->fragmentPipelineLayout, device); } } // Push constants { data->pushConstants = pushConstants; } draw->events = events; DrawCall::run(device, draw, &drawTickets, clusterQueues); } void DrawCall::setup() { if(occlusionQuery != nullptr) { occlusionQuery->start(); } if(events) { events->add(); } } void DrawCall::teardown(vk::Device *device) { if(events) { events->done(); events = nullptr; } vertexRoutine = {}; setupRoutine = {}; pixelRoutine = {}; if(preRasterizationContainsImageWrite) { vk::DescriptorSet::ContentsChanged(descriptorSetObjects, preRasterizationPipelineLayout, device); } if(!data->rasterizerDiscard) { if(occlusionQuery != nullptr) { for(int cluster = 0; cluster < MaxClusterCount; cluster++) { occlusionQuery->add(data->occlusion[cluster]); } occlusionQuery->finish(); } for(auto *target : colorBuffer) { if(target) { target->contentsChanged(vk::Image::DIRECT_MEMORY_ACCESS); } } // If pre-rasterization and fragment use the same pipeline, and pre-rasterization // also contains image writes, don't double-notify the descriptor set. const bool descSetAlreadyNotified = preRasterizationContainsImageWrite && fragmentPipelineLayout == preRasterizationPipelineLayout; if(fragmentContainsImageWrite && !descSetAlreadyNotified) { vk::DescriptorSet::ContentsChanged(descriptorSetObjects, fragmentPipelineLayout, device); } } } void DrawCall::run(vk::Device *device, const marl::Loan &draw, marl::Ticket::Queue *tickets, marl::Ticket::Queue clusterQueues[MaxClusterCount]) { draw->setup(); const auto numPrimitives = draw->numPrimitives; const auto numPrimitivesPerBatch = draw->numPrimitivesPerBatch; const auto numBatches = draw->numBatches; auto ticket = tickets->take(); auto finally = marl::make_shared_finally([device, draw, ticket] { MARL_SCOPED_EVENT("FINISH draw %d", draw->id); draw->teardown(device); ticket.done(); }); for(unsigned int batchId = 0; batchId < numBatches; batchId++) { auto batch = draw->batchDataPool->borrow(); batch->id = batchId; batch->firstPrimitive = batch->id * numPrimitivesPerBatch; batch->numPrimitives = std::min(batch->firstPrimitive + numPrimitivesPerBatch, numPrimitives) - batch->firstPrimitive; for(int cluster = 0; cluster < MaxClusterCount; cluster++) { batch->clusterTickets[cluster] = std::move(clusterQueues[cluster].take()); } marl::schedule([device, draw, batch, finally] { processVertices(device, draw.get(), batch.get()); if(!draw->data->rasterizerDiscard) { processPrimitives(device, draw.get(), batch.get()); if(batch->numVisible > 0) { processPixels(device, draw, batch, finally); return; } } for(int cluster = 0; cluster < MaxClusterCount; cluster++) { batch->clusterTickets[cluster].done(); } }); } } void DrawCall::processVertices(vk::Device *device, DrawCall *draw, BatchData *batch) { MARL_SCOPED_EVENT("VERTEX draw %d, batch %d", draw->id, batch->id); unsigned int triangleIndices[MaxBatchSize + 1][3]; // One extra for SIMD width overrun. TODO: Adjust to dynamic batch size. { MARL_SCOPED_EVENT("processPrimitiveVertices"); processPrimitiveVertices( triangleIndices, draw->data->indices, draw->indexType, batch->firstPrimitive, batch->numPrimitives, draw->topology, draw->provokingVertexMode); } auto &vertexTask = batch->vertexTask; vertexTask.primitiveStart = batch->firstPrimitive; // We're only using batch compaction for points, not lines vertexTask.vertexCount = batch->numPrimitives * ((draw->topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST) ? 1 : 3); if(vertexTask.vertexCache.drawCall != draw->id) { vertexTask.vertexCache.clear(); vertexTask.vertexCache.drawCall = draw->id; } draw->vertexRoutine(device, &batch->triangles.front().v0, &triangleIndices[0][0], &vertexTask, draw->data); } void DrawCall::processPrimitives(vk::Device *device, DrawCall *draw, BatchData *batch) { MARL_SCOPED_EVENT("PRIMITIVES draw %d batch %d", draw->id, batch->id); auto triangles = &batch->triangles[0]; auto primitives = &batch->primitives[0]; batch->numVisible = draw->setupPrimitives(device, triangles, primitives, draw, batch->numPrimitives); } void DrawCall::processPixels(vk::Device *device, const marl::Loan &draw, const marl::Loan &batch, const std::shared_ptr &finally) { struct Data { Data(const marl::Loan &draw, const marl::Loan &batch, const std::shared_ptr &finally) : draw(draw) , batch(batch) , finally(finally) {} marl::Loan draw; marl::Loan batch; std::shared_ptr finally; }; auto data = std::make_shared(draw, batch, finally); for(int cluster = 0; cluster < MaxClusterCount; cluster++) { batch->clusterTickets[cluster].onCall([device, data, cluster] { auto &draw = data->draw; auto &batch = data->batch; MARL_SCOPED_EVENT("PIXEL draw %d, batch %d, cluster %d", draw->id, batch->id, cluster); draw->pixelRoutine(device, &batch->primitives.front(), batch->numVisible, cluster, MaxClusterCount, draw->data); batch->clusterTickets[cluster].done(); }); } } void Renderer::synchronize() { MARL_SCOPED_EVENT("synchronize"); auto ticket = drawTickets.take(); ticket.wait(); device->updateSamplingRoutineSnapshotCache(); ticket.done(); } void DrawCall::processPrimitiveVertices( unsigned int triangleIndicesOut[MaxBatchSize + 1][3], const void *primitiveIndices, VkIndexType indexType, unsigned int start, unsigned int triangleCount, VkPrimitiveTopology topology, VkProvokingVertexModeEXT provokingVertexMode) { if(!primitiveIndices) { struct LinearIndex { unsigned int operator[](unsigned int i) { return i; } }; if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, LinearIndex(), start, triangleCount)) { return; } } else { switch(indexType) { case VK_INDEX_TYPE_UINT16: if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast(primitiveIndices), start, triangleCount)) { return; } break; case VK_INDEX_TYPE_UINT32: if(!setBatchIndices(triangleIndicesOut, topology, provokingVertexMode, static_cast(primitiveIndices), start, triangleCount)) { return; } break; break; default: ASSERT(false); return; } } // setBatchIndices() takes care of the point case, since it's different due to the compaction if(topology != VK_PRIMITIVE_TOPOLOGY_POINT_LIST) { // Repeat the last index to allow for SIMD width overrun. triangleIndicesOut[triangleCount][0] = triangleIndicesOut[triangleCount - 1][2]; triangleIndicesOut[triangleCount][1] = triangleIndicesOut[triangleCount - 1][2]; triangleIndicesOut[triangleCount][2] = triangleIndicesOut[triangleCount - 1][2]; } } int DrawCall::setupSolidTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int ms = state.multiSampleCount; const DrawData *data = drawCall->data; int visible = 0; for(int i = 0; i < count; i++, triangles++) { Vertex &v0 = triangles->v0; Vertex &v1 = triangles->v1; Vertex &v2 = triangles->v2; Polygon polygon(&v0.position, &v1.position, &v2.position); if((v0.cullMask | v1.cullMask | v2.cullMask) == 0) { continue; } if((v0.clipFlags & v1.clipFlags & v2.clipFlags) != Clipper::CLIP_FINITE) { continue; } int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags; if(clipFlagsOr != Clipper::CLIP_FINITE) { if(!Clipper::Clip(polygon, clipFlagsOr, *drawCall)) { continue; } } if(drawCall->setupRoutine(device, primitives, triangles, &polygon, data)) { primitives += ms; visible++; } } return visible; } int DrawCall::setupWireframeTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int ms = state.multiSampleCount; int visible = 0; for(int i = 0; i < count; i++) { const Vertex &v0 = triangles[i].v0; const Vertex &v1 = triangles[i].v1; const Vertex &v2 = triangles[i].v2; float A = ((float)v0.projected.y - (float)v2.projected.y) * (float)v1.projected.x + ((float)v2.projected.y - (float)v1.projected.y) * (float)v0.projected.x + ((float)v1.projected.y - (float)v0.projected.y) * (float)v2.projected.x; // Area int w0w1w2 = bit_cast(v0.w) ^ bit_cast(v1.w) ^ bit_cast(v2.w); A = w0w1w2 < 0 ? -A : A; bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (A >= 0.0f) : (A <= 0.0f); if(state.cullMode & VK_CULL_MODE_FRONT_BIT) { if(frontFacing) continue; } if(state.cullMode & VK_CULL_MODE_BACK_BIT) { if(!frontFacing) continue; } Triangle lines[3]; lines[0].v0 = v0; lines[0].v1 = v1; lines[1].v0 = v1; lines[1].v1 = v2; lines[2].v0 = v2; lines[2].v1 = v0; for(int i = 0; i < 3; i++) { if(setupLine(device, *primitives, lines[i], *drawCall)) { primitives += ms; visible++; } } } return visible; } int DrawCall::setupPointTriangles(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int ms = state.multiSampleCount; int visible = 0; for(int i = 0; i < count; i++) { const Vertex &v0 = triangles[i].v0; const Vertex &v1 = triangles[i].v1; const Vertex &v2 = triangles[i].v2; float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; bool frontFacing = (state.frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE) ? (d > 0) : (d < 0); if(state.cullMode & VK_CULL_MODE_FRONT_BIT) { if(frontFacing) continue; } if(state.cullMode & VK_CULL_MODE_BACK_BIT) { if(!frontFacing) continue; } Triangle points[3]; points[0].v0 = v0; points[1].v0 = v1; points[2].v0 = v2; for(int i = 0; i < 3; i++) { if(setupPoint(device, *primitives, points[i], *drawCall)) { primitives += ms; visible++; } } } return visible; } int DrawCall::setupLines(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int visible = 0; int ms = state.multiSampleCount; for(int i = 0; i < count; i++) { if(setupLine(device, *primitives, *triangles, *drawCall)) { primitives += ms; visible++; } triangles++; } return visible; } int DrawCall::setupPoints(vk::Device *device, Triangle *triangles, Primitive *primitives, const DrawCall *drawCall, int count) { auto &state = drawCall->setupState; int visible = 0; int ms = state.multiSampleCount; for(int i = 0; i < count; i++) { if(setupPoint(device, *primitives, *triangles, *drawCall)) { primitives += ms; visible++; } triangles++; } return visible; } bool DrawCall::setupLine(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw) { const Vertex &v0 = triangle.v0; const Vertex &v1 = triangle.v1; if((v0.cullMask | v1.cullMask) == 0) { return false; } const float4 &P0 = v0.position; const float4 &P1 = v1.position; if(P0.w <= 0 && P1.w <= 0) { return false; } const DrawData &data = *draw.data; const float lineWidth = data.lineWidth; const int clipFlags = draw.depthClipEnable ? Clipper::CLIP_FRUSTUM : Clipper::CLIP_SIDES; constexpr float subPixF = vk::SUBPIXEL_PRECISION_FACTOR; const float W = data.WxF * (1.0f / subPixF); const float H = data.HxF * (1.0f / subPixF); float dx = W * (P1.x / P1.w - P0.x / P0.w); float dy = H * (P1.y / P1.w - P0.y / P0.w); if(dx == 0 && dy == 0) { return false; } if(draw.lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) { // Rectangle centered on the line segment float4 P[4]; P[0] = P0; P[1] = P1; P[2] = P1; P[3] = P0; float scale = lineWidth * 0.5f / sqrt(dx * dx + dy * dy); dx *= scale; dy *= scale; float dx0h = dx * P0.w / H; float dy0w = dy * P0.w / W; float dx1h = dx * P1.w / H; float dy1w = dy * P1.w / W; P[0].x += -dy0w; P[0].y += +dx0h; P[1].x += -dy1w; P[1].y += +dx1h; P[2].x += +dy1w; P[2].y += -dx1h; P[3].x += +dy0w; P[3].y += -dx0h; Polygon polygon(P, 4); if(!Clipper::Clip(polygon, clipFlags, draw)) { return false; } return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data); } else if(false) // TODO(b/80135519): Deprecate { // Connecting diamonds polygon // This shape satisfies the diamond test convention, except for the exit rule part. // Line segments with overlapping endpoints have duplicate fragments. // The ideal algorithm requires half-open line rasterization (b/80135519). float4 P[8]; P[0] = P0; P[1] = P0; P[2] = P0; P[3] = P0; P[4] = P1; P[5] = P1; P[6] = P1; P[7] = P1; float dx0 = lineWidth * 0.5f * P0.w / W; float dy0 = lineWidth * 0.5f * P0.w / H; float dx1 = lineWidth * 0.5f * P1.w / W; float dy1 = lineWidth * 0.5f * P1.w / H; P[0].x += -dx0; P[1].y += +dy0; P[2].x += +dx0; P[3].y += -dy0; P[4].x += -dx1; P[5].y += +dy1; P[6].x += +dx1; P[7].y += -dy1; float4 L[6]; if(dx > -dy) { if(dx > dy) // Right { L[0] = P[0]; L[1] = P[1]; L[2] = P[5]; L[3] = P[6]; L[4] = P[7]; L[5] = P[3]; } else // Down { L[0] = P[0]; L[1] = P[4]; L[2] = P[5]; L[3] = P[6]; L[4] = P[2]; L[5] = P[3]; } } else { if(dx > dy) // Up { L[0] = P[0]; L[1] = P[1]; L[2] = P[2]; L[3] = P[6]; L[4] = P[7]; L[5] = P[4]; } else // Left { L[0] = P[1]; L[1] = P[2]; L[2] = P[3]; L[3] = P[7]; L[4] = P[4]; L[5] = P[5]; } } Polygon polygon(L, 6); if(!Clipper::Clip(polygon, clipFlags, draw)) { return false; } return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data); } else { // Parallelogram approximating Bresenham line // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum // requirements for Bresenham line segment rasterization. float4 P[8]; P[0] = P0; P[1] = P0; P[2] = P0; P[3] = P0; P[4] = P1; P[5] = P1; P[6] = P1; P[7] = P1; float dx0 = lineWidth * 0.5f * P0.w / W; float dy0 = lineWidth * 0.5f * P0.w / H; float dx1 = lineWidth * 0.5f * P1.w / W; float dy1 = lineWidth * 0.5f * P1.w / H; P[0].x += -dx0; P[1].y += +dy0; P[2].x += +dx0; P[3].y += -dy0; P[4].x += -dx1; P[5].y += +dy1; P[6].x += +dx1; P[7].y += -dy1; float4 L[4]; if(dx > -dy) { if(dx > dy) // Right { L[0] = P[1]; L[1] = P[5]; L[2] = P[7]; L[3] = P[3]; } else // Down { L[0] = P[0]; L[1] = P[4]; L[2] = P[6]; L[3] = P[2]; } } else { if(dx > dy) // Up { L[0] = P[0]; L[1] = P[2]; L[2] = P[6]; L[3] = P[4]; } else // Left { L[0] = P[1]; L[1] = P[3]; L[2] = P[7]; L[3] = P[5]; } } Polygon polygon(L, 4); if(!Clipper::Clip(polygon, clipFlags, draw)) { return false; } return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data); } return false; } bool DrawCall::setupPoint(vk::Device *device, Primitive &primitive, Triangle &triangle, const DrawCall &draw) { const Vertex &v = triangle.v0; if(v.cullMask == 0) { return false; } const DrawData &data = *draw.data; const int clipFlags = draw.depthClipEnable ? Clipper::CLIP_FRUSTUM : Clipper::CLIP_SIDES; const float pSize = clamp(v.pointSize, 1.0f, static_cast(vk::MAX_POINT_SIZE)); const float X = pSize * v.position.w * data.halfPixelX; const float Y = pSize * v.position.w * data.halfPixelY; float4 P[4]; P[0] = v.position; P[0].x -= X; P[0].y += Y; P[1] = v.position; P[1].x += X; P[1].y += Y; P[2] = v.position; P[2].x += X; P[2].y -= Y; P[3] = v.position; P[3].x -= X; P[3].y -= Y; Polygon polygon(P, 4); if(!Clipper::Clip(polygon, clipFlags, draw)) { return false; } primitive.pointSizeInv = 1.0f / pSize; return draw.setupRoutine(device, &primitive, &triangle, &polygon, &data); } void Renderer::addQuery(vk::Query *query) { ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION); ASSERT(!occlusionQuery); occlusionQuery = query; } void Renderer::removeQuery(vk::Query *query) { ASSERT(query->getType() == VK_QUERY_TYPE_OCCLUSION); ASSERT(occlusionQuery == query); occlusionQuery = nullptr; } } // namespace sw