// // Copyright 2016 The ANGLE Project Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // VertexArrayVk.cpp: // Implements the class methods for VertexArrayVk. // #include "libANGLE/renderer/vulkan/VertexArrayVk.h" #include "common/debug.h" #include "common/utilities.h" #include "libANGLE/Context.h" #include "libANGLE/renderer/vulkan/BufferVk.h" #include "libANGLE/renderer/vulkan/ContextVk.h" #include "libANGLE/renderer/vulkan/FramebufferVk.h" #include "libANGLE/renderer/vulkan/vk_format_utils.h" #include "libANGLE/renderer/vulkan/vk_renderer.h" #include "libANGLE/renderer/vulkan/vk_resource.h" namespace rx { namespace { constexpr int kStreamIndexBufferCachedIndexCount = 6; constexpr int kMaxCachedStreamIndexBuffers = 4; constexpr size_t kDefaultValueSize = sizeof(gl::VertexAttribCurrentValueData::Values); ANGLE_INLINE bool BindingIsAligned(const angle::Format &angleFormat, VkDeviceSize offset, GLuint stride) { ASSERT(stride != 0); GLuint mask = angleFormat.componentAlignmentMask; if (mask != std::numeric_limits::max()) { return ((offset & mask) == 0 && (stride & mask) == 0); } else { // To perform the GPU conversion for formats with components that aren't byte-aligned // (for example, A2BGR10 or RGB10A2), one element has to be placed in 4 bytes to perform // the compute shader. So, binding offset and stride has to be aligned to formatSize. unsigned int formatSize = angleFormat.pixelBytes; return (offset % formatSize == 0) && (stride % formatSize == 0); } } ANGLE_INLINE bool ClientBindingAligned(const gl::VertexAttribute &attrib, GLuint stride, size_t alignment) { return reinterpret_cast(attrib.pointer) % alignment == 0 && stride % alignment == 0; } bool ShouldCombineAttributes(vk::Renderer *renderer, const gl::VertexAttribute &attrib, const gl::VertexBinding &binding) { if (!renderer->getFeatures().enableMergeClientAttribBuffer.enabled) { return false; } const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id); return !vertexFormat.getVertexLoadRequiresConversion(false) && binding.getDivisor() == 0 && ClientBindingAligned(attrib, binding.getStride(), vertexFormat.getVertexInputAlignment(false)); } void WarnOnVertexFormatConversion(ContextVk *contextVk, const vk::Format &vertexFormat, bool compressed, bool insertEventMarker) { if (!vertexFormat.getVertexLoadRequiresConversion(compressed)) { return; } ANGLE_VK_PERF_WARNING( contextVk, GL_DEBUG_SEVERITY_LOW, "The Vulkan driver does not support vertex attribute format 0x%04X, emulating with 0x%04X", vertexFormat.getIntendedFormat().glInternalFormat, vertexFormat.getActualBufferFormat(compressed).glInternalFormat); } angle::Result StreamVertexData(ContextVk *contextVk, vk::BufferHelper *dstBufferHelper, const uint8_t *srcData, size_t bytesToCopy, size_t dstOffset, size_t vertexCount, size_t srcStride, VertexCopyFunction vertexLoadFunction) { vk::Renderer *renderer = contextVk->getRenderer(); // If the source pointer is null, it should not be accessed. if (srcData == nullptr) { return angle::Result::Continue; } uint8_t *dst = dstBufferHelper->getMappedMemory() + dstOffset; if (vertexLoadFunction != nullptr) { vertexLoadFunction(srcData, srcStride, vertexCount, dst); } else { memcpy(dst, srcData, bytesToCopy); } ANGLE_TRY(dstBufferHelper->flush(renderer)); return angle::Result::Continue; } angle::Result StreamVertexDataWithDivisor(ContextVk *contextVk, vk::BufferHelper *dstBufferHelper, const uint8_t *srcData, size_t bytesToAllocate, size_t srcStride, size_t dstStride, VertexCopyFunction vertexLoadFunction, uint32_t divisor, size_t numSrcVertices) { vk::Renderer *renderer = contextVk->getRenderer(); uint8_t *dst = dstBufferHelper->getMappedMemory(); // Each source vertex is used `divisor` times before advancing. Clamp to avoid OOB reads. size_t clampedSize = std::min(numSrcVertices * dstStride * divisor, bytesToAllocate); ASSERT(clampedSize % dstStride == 0); ASSERT(divisor > 0); uint32_t srcVertexUseCount = 0; for (size_t dataCopied = 0; dataCopied < clampedSize; dataCopied += dstStride) { vertexLoadFunction(srcData, srcStride, 1, dst); srcVertexUseCount++; if (srcVertexUseCount == divisor) { srcData += srcStride; srcVertexUseCount = 0; } dst += dstStride; } // Satisfy robustness constraints (only if extension enabled) if (contextVk->getExtensions().robustnessAny()) { if (clampedSize < bytesToAllocate) { memset(dst, 0, bytesToAllocate - clampedSize); } } ANGLE_TRY(dstBufferHelper->flush(renderer)); return angle::Result::Continue; } size_t GetVertexCountForRange(GLint64 srcBufferBytes, uint32_t srcFormatSize, uint32_t srcVertexStride) { ASSERT(srcVertexStride != 0); ASSERT(srcFormatSize != 0); if (srcBufferBytes < srcFormatSize) { return 0; } size_t numVertices = static_cast(srcBufferBytes + srcVertexStride - 1) / srcVertexStride; return numVertices; } size_t GetVertexCount(BufferVk *srcBuffer, const gl::VertexBinding &binding, uint32_t srcFormatSize) { // Bytes usable for vertex data. GLint64 bytes = srcBuffer->getSize() - binding.getOffset(); GLuint stride = binding.getStride(); if (stride == 0) { stride = srcFormatSize; } return GetVertexCountForRange(bytes, srcFormatSize, stride); } angle::Result CalculateMaxVertexCountForConversion(ContextVk *contextVk, BufferVk *srcBuffer, VertexConversionBuffer *conversion, const angle::Format &srcFormat, const angle::Format &dstFormat, size_t *maxNumVerticesOut) { // Initialize numVertices to 0 *maxNumVerticesOut = 0; unsigned srcFormatSize = srcFormat.pixelBytes; unsigned dstFormatSize = dstFormat.pixelBytes; uint32_t srcStride = conversion->getCacheKey().stride; uint32_t dstStride = dstFormatSize; ASSERT(srcStride != 0); ASSERT(conversion->dirty()); // Start the range with the range from the the beginning of the buffer to the end of // buffer. Then scissor it with the dirtyRange. size_t srcOffset = conversion->getCacheKey().offset; GLint64 srcLength = srcBuffer->getSize() - srcOffset; // The max number of vertices from binding to the end of the buffer size_t maxNumVertices = GetVertexCountForRange(srcLength, srcFormatSize, srcStride); if (maxNumVertices == 0) { return angle::Result::Continue; } // Allocate buffer for results vk::MemoryHostVisibility hostVisible = conversion->getCacheKey().hostVisible ? vk::MemoryHostVisibility::Visible : vk::MemoryHostVisibility::NonVisible; ANGLE_TRY(contextVk->initBufferForVertexConversion(conversion, maxNumVertices * dstStride, hostVisible)); // Calculate numVertices to convert *maxNumVerticesOut = GetVertexCountForRange(srcLength, srcFormatSize, srcStride); return angle::Result::Continue; } void CalculateOffsetAndVertexCountForDirtyRange(BufferVk *bufferVk, VertexConversionBuffer *conversion, const angle::Format &srcFormat, const angle::Format &dstFormat, const RangeDeviceSize &dirtyRange, uint32_t *srcOffsetOut, uint32_t *dstOffsetOut, uint32_t *numVerticesOut) { ASSERT(!dirtyRange.empty()); unsigned srcFormatSize = srcFormat.pixelBytes; unsigned dstFormatSize = dstFormat.pixelBytes; uint32_t srcStride = conversion->getCacheKey().stride; uint32_t dstStride = dstFormatSize; ASSERT(srcStride != 0); ASSERT(conversion->dirty()); // Start the range with the range from the the beginning of the buffer to the end of // buffer. Then scissor it with the dirtyRange. size_t srcOffset = conversion->getCacheKey().offset; size_t dstOffset = 0; GLint64 srcLength = bufferVk->getSize() - srcOffset; // Adjust offset to the begining of the dirty range if (dirtyRange.low() > srcOffset) { size_t vertexCountToSkip = (static_cast(dirtyRange.low()) - srcOffset) / srcStride; size_t srcBytesToSkip = vertexCountToSkip * srcStride; size_t dstBytesToSkip = vertexCountToSkip * dstStride; srcOffset += srcBytesToSkip; srcLength -= srcBytesToSkip; dstOffset += dstBytesToSkip; } // Adjust dstOffset to align to 4 bytes. The GPU convert code path always write a uint32_t and // must aligned at 4 bytes. We could possibly make it able to store at unaligned uint32_t but // performance will be worse than just convert a few extra data. while ((dstOffset % 4) != 0) { dstOffset -= dstStride; srcOffset -= srcStride; srcLength += srcStride; } // Adjust length if (dirtyRange.high() < static_cast(bufferVk->getSize())) { srcLength = dirtyRange.high() - srcOffset; } // Calculate numVertices to convert size_t numVertices = GetVertexCountForRange(srcLength, srcFormatSize, srcStride); *numVerticesOut = static_cast(numVertices); *srcOffsetOut = static_cast(srcOffset); *dstOffsetOut = static_cast(dstOffset); } } // anonymous namespace VertexArrayVk::VertexArrayVk(ContextVk *contextVk, const gl::VertexArrayState &state) : VertexArrayImpl(state), mCurrentArrayBufferHandles{}, mCurrentArrayBufferOffsets{}, mCurrentArrayBufferRelativeOffsets{}, mCurrentArrayBuffers{}, mCurrentArrayBufferStrides{}, mCurrentArrayBufferDivisors{}, mCurrentElementArrayBuffer(nullptr), mLineLoopHelper(contextVk->getRenderer()), mDirtyLineLoopTranslation(true) { vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer(); mCurrentArrayBufferHandles.fill(emptyBuffer.getBuffer().getHandle()); mCurrentArrayBufferOffsets.fill(0); mCurrentArrayBufferRelativeOffsets.fill(0); mCurrentArrayBuffers.fill(&emptyBuffer); mCurrentArrayBufferStrides.fill(0); mCurrentArrayBufferDivisors.fill(0); mBindingDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_BINDING_DIVISOR); if (!contextVk->getFeatures().useVertexInputBindingStrideDynamicState.enabled) { mBindingDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_BINDING_STRIDE); } // All but DIRTY_ATTRIB_POINTER_BUFFER requires graphics pipeline update mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_ENABLED); mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_POINTER); mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_FORMAT); mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_BINDING); } VertexArrayVk::~VertexArrayVk() {} void VertexArrayVk::destroy(const gl::Context *context) { ContextVk *contextVk = vk::GetImpl(context); vk::Renderer *renderer = contextVk->getRenderer(); for (std::unique_ptr &buffer : mCachedStreamIndexBuffers) { buffer->release(renderer); } mStreamedIndexData.release(renderer); mTranslatedByteIndexData.release(renderer); mTranslatedByteIndirectData.release(renderer); mLineLoopHelper.release(contextVk); } angle::Result VertexArrayVk::convertIndexBufferGPU(ContextVk *contextVk, BufferVk *bufferVk, const void *indices) { intptr_t offsetIntoSrcData = reinterpret_cast(indices); size_t srcDataSize = static_cast(bufferVk->getSize()) - offsetIntoSrcData; // Allocate buffer for results ANGLE_TRY(contextVk->initBufferForVertexConversion(&mTranslatedByteIndexData, sizeof(GLushort) * srcDataSize, vk::MemoryHostVisibility::NonVisible)); mCurrentElementArrayBuffer = mTranslatedByteIndexData.getBuffer(); vk::BufferHelper *dst = mTranslatedByteIndexData.getBuffer(); vk::BufferHelper *src = &bufferVk->getBuffer(); // Copy relevant section of the source into destination at allocated offset. Note that the // offset returned by allocate() above is in bytes. As is the indices offset pointer. UtilsVk::ConvertIndexParameters params = {}; params.srcOffset = static_cast(offsetIntoSrcData); params.dstOffset = 0; params.maxIndex = static_cast(bufferVk->getSize()); ANGLE_TRY(contextVk->getUtils().convertIndexBuffer(contextVk, dst, src, params)); mTranslatedByteIndexData.clearDirty(); return angle::Result::Continue; } angle::Result VertexArrayVk::convertIndexBufferIndirectGPU(ContextVk *contextVk, vk::BufferHelper *srcIndirectBuf, VkDeviceSize srcIndirectBufOffset, vk::BufferHelper **indirectBufferVkOut) { size_t srcDataSize = static_cast(mCurrentElementArrayBuffer->getSize()); ASSERT(mCurrentElementArrayBuffer == &vk::GetImpl(getState().getElementArrayBuffer())->getBuffer()); vk::BufferHelper *srcIndexBuf = mCurrentElementArrayBuffer; // Allocate buffer for results ANGLE_TRY(contextVk->initBufferForVertexConversion(&mTranslatedByteIndexData, sizeof(GLushort) * srcDataSize, vk::MemoryHostVisibility::NonVisible)); vk::BufferHelper *dstIndexBuf = mTranslatedByteIndexData.getBuffer(); ANGLE_TRY(contextVk->initBufferForVertexConversion(&mTranslatedByteIndirectData, sizeof(VkDrawIndexedIndirectCommand), vk::MemoryHostVisibility::NonVisible)); vk::BufferHelper *dstIndirectBuf = mTranslatedByteIndirectData.getBuffer(); // Save new element array buffer mCurrentElementArrayBuffer = dstIndexBuf; // Tell caller what new indirect buffer is *indirectBufferVkOut = dstIndirectBuf; // Copy relevant section of the source into destination at allocated offset. Note that the // offset returned by allocate() above is in bytes. As is the indices offset pointer. UtilsVk::ConvertIndexIndirectParameters params = {}; params.srcIndirectBufOffset = static_cast(srcIndirectBufOffset); params.srcIndexBufOffset = 0; params.dstIndexBufOffset = 0; params.maxIndex = static_cast(srcDataSize); params.dstIndirectBufOffset = 0; ANGLE_TRY(contextVk->getUtils().convertIndexIndirectBuffer( contextVk, srcIndirectBuf, srcIndexBuf, dstIndirectBuf, dstIndexBuf, params)); mTranslatedByteIndexData.clearDirty(); mTranslatedByteIndirectData.clearDirty(); return angle::Result::Continue; } angle::Result VertexArrayVk::handleLineLoopIndexIndirect(ContextVk *contextVk, gl::DrawElementsType glIndexType, vk::BufferHelper *srcIndexBuffer, vk::BufferHelper *srcIndirectBuffer, VkDeviceSize indirectBufferOffset, vk::BufferHelper **indexBufferOut, vk::BufferHelper **indirectBufferOut) { return mLineLoopHelper.streamIndicesIndirect(contextVk, glIndexType, srcIndexBuffer, srcIndirectBuffer, indirectBufferOffset, indexBufferOut, indirectBufferOut); } angle::Result VertexArrayVk::handleLineLoopIndirectDraw(const gl::Context *context, vk::BufferHelper *indirectBufferVk, VkDeviceSize indirectBufferOffset, vk::BufferHelper **indexBufferOut, vk::BufferHelper **indirectBufferOut) { size_t maxVertexCount = 0; ContextVk *contextVk = vk::GetImpl(context); const gl::AttributesMask activeAttribs = context->getStateCache().getActiveBufferedAttribsMask(); const auto &attribs = mState.getVertexAttributes(); const auto &bindings = mState.getVertexBindings(); for (size_t attribIndex : activeAttribs) { const gl::VertexAttribute &attrib = attribs[attribIndex]; ASSERT(attrib.enabled); VkDeviceSize bufSize = getCurrentArrayBuffers()[attribIndex]->getSize(); const gl::VertexBinding &binding = bindings[attrib.bindingIndex]; size_t stride = binding.getStride(); size_t vertexCount = static_cast(bufSize / stride); if (vertexCount > maxVertexCount) { maxVertexCount = vertexCount; } } ANGLE_TRY(mLineLoopHelper.streamArrayIndirect(contextVk, maxVertexCount + 1, indirectBufferVk, indirectBufferOffset, indexBufferOut, indirectBufferOut)); return angle::Result::Continue; } angle::Result VertexArrayVk::convertIndexBufferCPU(ContextVk *contextVk, gl::DrawElementsType indexType, size_t indexCount, const void *sourcePointer, BufferBindingDirty *bindingDirty) { ASSERT(!mState.getElementArrayBuffer() || indexType == gl::DrawElementsType::UnsignedByte); vk::Renderer *renderer = contextVk->getRenderer(); size_t elementSize = contextVk->getVkIndexTypeSize(indexType); const size_t amount = elementSize * indexCount; // Applications often time draw a quad with two triangles. This is try to catch all the // common used element array buffer with pre-created BufferHelper objects to improve // performance. if (indexCount == kStreamIndexBufferCachedIndexCount && indexType == gl::DrawElementsType::UnsignedShort) { for (std::unique_ptr &buffer : mCachedStreamIndexBuffers) { void *ptr = buffer->getMappedMemory(); if (memcmp(sourcePointer, ptr, amount) == 0) { // Found a matching cached buffer, use the cached internal index buffer. *bindingDirty = mCurrentElementArrayBuffer == buffer.get() ? BufferBindingDirty::No : BufferBindingDirty::Yes; mCurrentElementArrayBuffer = buffer.get(); return angle::Result::Continue; } } // If we still have capacity, cache this index buffer for future use. if (mCachedStreamIndexBuffers.size() < kMaxCachedStreamIndexBuffers) { std::unique_ptr buffer = std::make_unique(); ANGLE_TRY(contextVk->initBufferAllocation( buffer.get(), renderer->getVertexConversionBufferMemoryTypeIndex( vk::MemoryHostVisibility::Visible), amount, renderer->getVertexConversionBufferAlignment(), BufferUsageType::Static)); memcpy(buffer->getMappedMemory(), sourcePointer, amount); ANGLE_TRY(buffer->flush(renderer)); mCachedStreamIndexBuffers.push_back(std::move(buffer)); *bindingDirty = BufferBindingDirty::Yes; mCurrentElementArrayBuffer = mCachedStreamIndexBuffers.back().get(); return angle::Result::Continue; } } ANGLE_TRY(contextVk->initBufferForVertexConversion(&mStreamedIndexData, amount, vk::MemoryHostVisibility::Visible)); mCurrentElementArrayBuffer = mStreamedIndexData.getBuffer(); GLubyte *dst = mCurrentElementArrayBuffer->getMappedMemory(); *bindingDirty = BufferBindingDirty::Yes; if (contextVk->shouldConvertUint8VkIndexType(indexType)) { // Unsigned bytes don't have direct support in Vulkan so we have to expand the // memory to a GLushort. const GLubyte *in = static_cast(sourcePointer); GLushort *expandedDst = reinterpret_cast(dst); bool primitiveRestart = contextVk->getState().isPrimitiveRestartEnabled(); constexpr GLubyte kUnsignedByteRestartValue = 0xFF; constexpr GLushort kUnsignedShortRestartValue = 0xFFFF; if (primitiveRestart) { for (size_t index = 0; index < indexCount; index++) { GLushort value = static_cast(in[index]); if (in[index] == kUnsignedByteRestartValue) { // Convert from 8-bit restart value to 16-bit restart value value = kUnsignedShortRestartValue; } expandedDst[index] = value; } } else { // Fast path for common case. for (size_t index = 0; index < indexCount; index++) { expandedDst[index] = static_cast(in[index]); } } } else { // The primitive restart value is the same for OpenGL and Vulkan, // so there's no need to perform any conversion. memcpy(dst, sourcePointer, amount); } mStreamedIndexData.clearDirty(); return mCurrentElementArrayBuffer->flush(contextVk->getRenderer()); } // We assume the buffer is completely full of the same kind of data and convert // and/or align it as we copy it to a buffer. The assumption could be wrong // but the alternative of copying it piecemeal on each draw would have a lot more // overhead. angle::Result VertexArrayVk::convertVertexBufferGPU(ContextVk *contextVk, BufferVk *srcBuffer, VertexConversionBuffer *conversion, const angle::Format &srcFormat, const angle::Format &dstFormat) { uint32_t srcStride = conversion->getCacheKey().stride; ASSERT(srcStride % (srcFormat.pixelBytes / srcFormat.channelCount) == 0); size_t maxNumVertices; ANGLE_TRY(CalculateMaxVertexCountForConversion(contextVk, srcBuffer, conversion, srcFormat, dstFormat, &maxNumVertices)); if (maxNumVertices == 0) { return angle::Result::Continue; } vk::BufferHelper *srcBufferHelper = &srcBuffer->getBuffer(); vk::BufferHelper *dstBuffer = conversion->getBuffer(); UtilsVk::OffsetAndVertexCounts additionalOffsetVertexCounts; UtilsVk::ConvertVertexParameters params; params.srcFormat = &srcFormat; params.dstFormat = &dstFormat; params.srcStride = srcStride; params.vertexCount = 0; if (conversion->isEntireBufferDirty()) { params.vertexCount = static_cast(maxNumVertices); params.srcOffset = static_cast(conversion->getCacheKey().offset); params.dstOffset = 0; } else { // dirtyRanges may overlap with each other. Try to do a quick merge to reduce the number of // dispatch calls as well as avoid redundant conversion in the overlapped area. conversion->consolidateDirtyRanges(); const std::vector &dirtyRanges = conversion->getDirtyBufferRanges(); additionalOffsetVertexCounts.reserve(dirtyRanges.size()); for (const RangeDeviceSize &dirtyRange : dirtyRanges) { if (dirtyRange.empty()) { // consolidateDirtyRanges may end up with invalid range if it gets merged. continue; } uint32_t srcOffset, dstOffset, numVertices; CalculateOffsetAndVertexCountForDirtyRange(srcBuffer, conversion, srcFormat, dstFormat, dirtyRange, &srcOffset, &dstOffset, &numVertices); if (params.vertexCount == 0) { params.vertexCount = numVertices; params.srcOffset = srcOffset; params.dstOffset = dstOffset; } else { additionalOffsetVertexCounts.emplace_back(); additionalOffsetVertexCounts.back().srcOffset = srcOffset; additionalOffsetVertexCounts.back().dstOffset = dstOffset; additionalOffsetVertexCounts.back().vertexCount = numVertices; } } } ANGLE_TRY(contextVk->getUtils().convertVertexBuffer(contextVk, dstBuffer, srcBufferHelper, params, additionalOffsetVertexCounts)); conversion->clearDirty(); return angle::Result::Continue; } angle::Result VertexArrayVk::convertVertexBufferCPU(ContextVk *contextVk, BufferVk *srcBuffer, VertexConversionBuffer *conversion, const angle::Format &srcFormat, const angle::Format &dstFormat, const VertexCopyFunction vertexLoadFunction) { ANGLE_TRACE_EVENT0("gpu.angle", "VertexArrayVk::convertVertexBufferCpu"); size_t maxNumVertices; ANGLE_TRY(CalculateMaxVertexCountForConversion(contextVk, srcBuffer, conversion, srcFormat, dstFormat, &maxNumVertices)); if (maxNumVertices == 0) { return angle::Result::Continue; } uint8_t *src = nullptr; ANGLE_TRY(srcBuffer->mapImpl(contextVk, GL_MAP_READ_BIT, reinterpret_cast(&src))); uint32_t srcStride = conversion->getCacheKey().stride; if (conversion->isEntireBufferDirty()) { size_t srcOffset = conversion->getCacheKey().offset; size_t dstOffset = 0; const uint8_t *srcBytes = src + srcOffset; size_t bytesToCopy = maxNumVertices * dstFormat.pixelBytes; ANGLE_TRY(StreamVertexData(contextVk, conversion->getBuffer(), srcBytes, bytesToCopy, dstOffset, maxNumVertices, srcStride, vertexLoadFunction)); } else { // dirtyRanges may overlap with each other. Try to do a quick merge to avoid redundant // conversion in the overlapped area. conversion->consolidateDirtyRanges(); const std::vector &dirtyRanges = conversion->getDirtyBufferRanges(); for (const RangeDeviceSize &dirtyRange : dirtyRanges) { if (dirtyRange.empty()) { // consolidateDirtyRanges may end up with invalid range if it gets merged. continue; } uint32_t srcOffset, dstOffset, numVertices; CalculateOffsetAndVertexCountForDirtyRange(srcBuffer, conversion, srcFormat, dstFormat, dirtyRange, &srcOffset, &dstOffset, &numVertices); if (numVertices > 0) { const uint8_t *srcBytes = src + srcOffset; size_t bytesToCopy = maxNumVertices * dstFormat.pixelBytes; ANGLE_TRY(StreamVertexData(contextVk, conversion->getBuffer(), srcBytes, bytesToCopy, dstOffset, maxNumVertices, srcStride, vertexLoadFunction)); } } } conversion->clearDirty(); ANGLE_TRY(srcBuffer->unmapImpl(contextVk)); return angle::Result::Continue; } void VertexArrayVk::updateCurrentElementArrayBuffer() { ASSERT(mState.getElementArrayBuffer() != nullptr); ASSERT(mState.getElementArrayBuffer()->getSize() > 0); BufferVk *bufferVk = vk::GetImpl(mState.getElementArrayBuffer()); mCurrentElementArrayBuffer = &bufferVk->getBuffer(); } angle::Result VertexArrayVk::syncState(const gl::Context *context, const gl::VertexArray::DirtyBits &dirtyBits, gl::VertexArray::DirtyAttribBitsArray *attribBits, gl::VertexArray::DirtyBindingBitsArray *bindingBits) { ASSERT(dirtyBits.any()); ContextVk *contextVk = vk::GetImpl(context); contextVk->getPerfCounters().vertexArraySyncStateCalls++; const std::vector &attribs = mState.getVertexAttributes(); const std::vector &bindings = mState.getVertexBindings(); for (auto iter = dirtyBits.begin(), endIter = dirtyBits.end(); iter != endIter; ++iter) { size_t dirtyBit = *iter; switch (dirtyBit) { case gl::VertexArray::DIRTY_BIT_LOST_OBSERVATION: { // If vertex array was not observing while unbound, we need to check buffer's // internal storage and take action if buffer storage has changed while not // observing. if (contextVk->getFeatures().compressVertexData.enabled || mContentsObservers->any()) { // We may have lost buffer content change when it became non-current. In that // case we always assume buffer has changed. If compressVertexData.enabled is // true, it also depends on buffer usage which may have changed. iter.setLaterBits( gl::VertexArray::DirtyBits(mState.getBufferBindingMask().to_ulong() << gl::VertexArray::DIRTY_BIT_BINDING_0)); } else { for (size_t bindingIndex : mState.getBufferBindingMask()) { const gl::Buffer *bufferGL = bindings[bindingIndex].getBuffer().get(); vk::BufferSerial bufferSerial = vk::GetImpl(bufferGL)->getBufferSerial(); for (size_t attribIndex : bindings[bindingIndex].getBoundAttributesMask()) { if (attribs[attribIndex].enabled && (!bufferSerial.valid() || bufferSerial != mCurrentArrayBufferSerial[attribIndex])) { iter.setLaterBit(gl::VertexArray::DIRTY_BIT_BINDING_0 + bindingIndex); break; } } } } break; } case gl::VertexArray::DIRTY_BIT_ELEMENT_ARRAY_BUFFER: case gl::VertexArray::DIRTY_BIT_ELEMENT_ARRAY_BUFFER_DATA: { gl::Buffer *bufferGL = mState.getElementArrayBuffer(); if (bufferGL && bufferGL->getSize() > 0) { // Note that just updating buffer data may still result in a new // vk::BufferHelper allocation. updateCurrentElementArrayBuffer(); } else { mCurrentElementArrayBuffer = nullptr; } mLineLoopBufferFirstIndex.reset(); mLineLoopBufferLastIndex.reset(); ANGLE_TRY(contextVk->onIndexBufferChange(mCurrentElementArrayBuffer)); mDirtyLineLoopTranslation = true; break; } #define ANGLE_VERTEX_DIRTY_ATTRIB_FUNC(INDEX) \ case gl::VertexArray::DIRTY_BIT_ATTRIB_0 + INDEX: \ { \ gl::VertexArray::DirtyAttribBits dirtyAttribBitsRequiresPipelineUpdate = \ (*attribBits)[INDEX] & mAttribDirtyBitsRequiresPipelineUpdate; \ const bool bufferOnly = dirtyAttribBitsRequiresPipelineUpdate.none(); \ ANGLE_TRY(syncDirtyAttrib(contextVk, attribs[INDEX], \ bindings[attribs[INDEX].bindingIndex], INDEX, bufferOnly)); \ (*attribBits)[INDEX].reset(); \ break; \ } ANGLE_VERTEX_INDEX_CASES(ANGLE_VERTEX_DIRTY_ATTRIB_FUNC) // Since BINDING already implies DATA and ATTRIB change, we remove these here to avoid redundant // processing. #define ANGLE_VERTEX_DIRTY_BINDING_FUNC(INDEX) \ case gl::VertexArray::DIRTY_BIT_BINDING_0 + INDEX: \ { \ gl::VertexArray::DirtyBindingBits dirtyBindingBitsRequirePipelineUpdate = \ (*bindingBits)[INDEX] & mBindingDirtyBitsRequiresPipelineUpdate; \ \ for (size_t attribIndex : bindings[INDEX].getBoundAttributesMask()) \ { \ gl::VertexArray::DirtyAttribBits dirtyAttribBitsRequiresPipelineUpdate = \ (*attribBits)[attribIndex] & mAttribDirtyBitsRequiresPipelineUpdate; \ const bool bufferOnly = dirtyBindingBitsRequirePipelineUpdate.none() && \ dirtyAttribBitsRequiresPipelineUpdate.none(); \ ANGLE_TRY(syncDirtyAttrib(contextVk, attribs[attribIndex], bindings[INDEX], \ attribIndex, bufferOnly)); \ iter.resetLaterBit(gl::VertexArray::DIRTY_BIT_BUFFER_DATA_0 + attribIndex); \ iter.resetLaterBit(gl::VertexArray::DIRTY_BIT_ATTRIB_0 + attribIndex); \ (*attribBits)[attribIndex].reset(); \ } \ (*bindingBits)[INDEX].reset(); \ break; \ } ANGLE_VERTEX_INDEX_CASES(ANGLE_VERTEX_DIRTY_BINDING_FUNC) #define ANGLE_VERTEX_DIRTY_BUFFER_DATA_FUNC(INDEX) \ case gl::VertexArray::DIRTY_BIT_BUFFER_DATA_0 + INDEX: \ ANGLE_TRY(syncDirtyAttrib(contextVk, attribs[INDEX], \ bindings[attribs[INDEX].bindingIndex], INDEX, false)); \ iter.resetLaterBit(gl::VertexArray::DIRTY_BIT_ATTRIB_0 + INDEX); \ (*attribBits)[INDEX].reset(); \ break; ANGLE_VERTEX_INDEX_CASES(ANGLE_VERTEX_DIRTY_BUFFER_DATA_FUNC) default: UNREACHABLE(); break; } } return angle::Result::Continue; } // namespace rx #undef ANGLE_VERTEX_DIRTY_ATTRIB_FUNC #undef ANGLE_VERTEX_DIRTY_BINDING_FUNC #undef ANGLE_VERTEX_DIRTY_BUFFER_DATA_FUNC ANGLE_INLINE angle::Result VertexArrayVk::setDefaultPackedInput(ContextVk *contextVk, size_t attribIndex, angle::FormatID *formatOut) { const gl::State &glState = contextVk->getState(); const gl::VertexAttribCurrentValueData &defaultValue = glState.getVertexAttribCurrentValues()[attribIndex]; *formatOut = GetCurrentValueFormatID(defaultValue.Type); return contextVk->onVertexAttributeChange(attribIndex, 0, 0, *formatOut, false, 0, nullptr); } angle::Result VertexArrayVk::updateActiveAttribInfo(ContextVk *contextVk) { const std::vector &attribs = mState.getVertexAttributes(); const std::vector &bindings = mState.getVertexBindings(); // Update pipeline cache with current active attribute info for (size_t attribIndex : mState.getEnabledAttributesMask()) { const gl::VertexAttribute &attrib = attribs[attribIndex]; const gl::VertexBinding &binding = bindings[attribs[attribIndex].bindingIndex]; const angle::FormatID format = attrib.format->id; ANGLE_TRY(contextVk->onVertexAttributeChange( attribIndex, mCurrentArrayBufferStrides[attribIndex], binding.getDivisor(), format, mCurrentArrayBufferCompressed.test(attribIndex), mCurrentArrayBufferRelativeOffsets[attribIndex], mCurrentArrayBuffers[attribIndex])); mCurrentArrayBufferFormats[attribIndex] = format; } return angle::Result::Continue; } angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk, const gl::VertexAttribute &attrib, const gl::VertexBinding &binding, size_t attribIndex, bool bufferOnly) { vk::Renderer *renderer = contextVk->getRenderer(); if (attrib.enabled) { const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id); // Init attribute offset to the front-end value mCurrentArrayBufferRelativeOffsets[attribIndex] = attrib.relativeOffset; gl::Buffer *bufferGL = binding.getBuffer().get(); // Emulated and/or client-side attribs will be streamed bool isStreamingVertexAttrib = (binding.getDivisor() > renderer->getMaxVertexAttribDivisor()) || (bufferGL == nullptr); // If we sre switching between streaming and buffer mode, set bufferOnly to false since we // are actually changing the buffer. if (bufferOnly && isStreamingVertexAttrib != mStreamingVertexAttribsMask.test(attribIndex)) { bufferOnly = false; } mStreamingVertexAttribsMask.set(attribIndex, isStreamingVertexAttrib); bool compressed = false; if (bufferGL) { mContentsObservers->disableForBuffer(bufferGL, static_cast(attribIndex)); } if (!isStreamingVertexAttrib && bufferGL->getSize() > 0) { BufferVk *bufferVk = vk::GetImpl(bufferGL); const angle::Format &srcFormat = vertexFormat.getIntendedFormat(); unsigned srcFormatSize = srcFormat.pixelBytes; uint32_t srcStride = binding.getStride() == 0 ? srcFormatSize : binding.getStride(); size_t numVertices = GetVertexCount(bufferVk, binding, srcFormatSize); bool bindingIsAligned = BindingIsAligned(srcFormat, binding.getOffset() + attrib.relativeOffset, srcStride); if (renderer->getFeatures().compressVertexData.enabled && gl::IsStaticBufferUsage(bufferGL->getUsage()) && vertexFormat.canCompressBufferData()) { compressed = true; } bool needsConversion = numVertices > 0 && (vertexFormat.getVertexLoadRequiresConversion(compressed) || !bindingIsAligned); if (needsConversion) { const angle::Format &dstFormat = vertexFormat.getActualBufferFormat(compressed); // Converted buffer is tightly packed uint32_t dstStride = dstFormat.pixelBytes; ASSERT(vertexFormat.getVertexInputAlignment(compressed) <= vk::kVertexBufferAlignment); mContentsObservers->enableForBuffer(bufferGL, static_cast(attribIndex)); WarnOnVertexFormatConversion(contextVk, vertexFormat, compressed, true); const VertexConversionBuffer::CacheKey cacheKey{ srcFormat.id, srcStride, static_cast(binding.getOffset()) + attrib.relativeOffset, !bindingIsAligned, false}; VertexConversionBuffer *conversion = bufferVk->getVertexConversionBuffer(renderer, cacheKey); // Converted attribs are packed in their own VK buffer so offset is relative to the // binding and coversion's offset. The conversion buffer try to reuse the existing // buffer as much as possible to reduce the amount of data that has to be converted. // When binding's offset changes, it will check if new offset and existing buffer's // offset are multiple of strides apart. It yes it will reuse. If new offset is // larger, all existing data are still valid. If the new offset is smaller it will // mark the newly exposed range dirty and then rely on // ContextVk::initBufferForVertexConversion to decide buffer's size is big enough or // not and reallocate (and mark entire buffer dirty) if needed. // // bufferVk:----------------------------------------------------------------------- // | | // | bingding.offset + attrib.relativeOffset. // conversion->getCacheKey().offset // // conversion.buffer: -------------------------------------------------------------- // | // dstRelativeOffset size_t srcRelativeOffset = binding.getOffset() + attrib.relativeOffset - conversion->getCacheKey().offset; size_t numberOfVerticesToSkip = srcRelativeOffset / srcStride; size_t dstRelativeOffset = numberOfVerticesToSkip * dstStride; if (conversion->dirty()) { if (compressed) { INFO() << "Compressing vertex data in buffer " << bufferGL->id().value << " from " << ToUnderlying(srcFormat.id) << " to " << ToUnderlying(dstFormat.id) << "."; } if (bindingIsAligned) { ANGLE_TRY(convertVertexBufferGPU(contextVk, bufferVk, conversion, srcFormat, dstFormat)); } else { ANGLE_VK_PERF_WARNING( contextVk, GL_DEBUG_SEVERITY_HIGH, "GPU stall due to vertex format conversion of unaligned data"); ANGLE_TRY(convertVertexBufferCPU( contextVk, bufferVk, conversion, srcFormat, dstFormat, vertexFormat.getVertexLoadFunction(compressed))); } // If conversion happens, the destination buffer stride may be changed, // therefore an attribute change needs to be called. Note that it may trigger // unnecessary vulkan PSO update when the destination buffer stride does not // change, but for simplicity just make it conservative bufferOnly = false; } vk::BufferHelper *bufferHelper = conversion->getBuffer(); mCurrentArrayBuffers[attribIndex] = bufferHelper; mCurrentArrayBufferSerial[attribIndex] = bufferHelper->getBufferSerial(); VkDeviceSize bufferOffset; mCurrentArrayBufferHandles[attribIndex] = bufferHelper ->getBufferForVertexArray(contextVk, bufferHelper->getSize(), &bufferOffset) .getHandle(); ASSERT(BindingIsAligned(dstFormat, bufferOffset + dstRelativeOffset, dstStride)); mCurrentArrayBufferOffsets[attribIndex] = bufferOffset + dstRelativeOffset; mCurrentArrayBufferRelativeOffsets[attribIndex] = 0; mCurrentArrayBufferStrides[attribIndex] = dstStride; } else { if (numVertices == 0) { vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer(); mCurrentArrayBuffers[attribIndex] = &emptyBuffer; mCurrentArrayBufferSerial[attribIndex] = emptyBuffer.getBufferSerial(); mCurrentArrayBufferHandles[attribIndex] = emptyBuffer.getBuffer().getHandle(); mCurrentArrayBufferOffsets[attribIndex] = emptyBuffer.getOffset(); mCurrentArrayBufferStrides[attribIndex] = 0; } else { vk::BufferHelper &bufferHelper = bufferVk->getBuffer(); mCurrentArrayBuffers[attribIndex] = &bufferHelper; mCurrentArrayBufferSerial[attribIndex] = bufferHelper.getBufferSerial(); VkDeviceSize bufferOffset; mCurrentArrayBufferHandles[attribIndex] = bufferHelper .getBufferForVertexArray(contextVk, bufferVk->getSize(), &bufferOffset) .getHandle(); // Vulkan requires the offset is within the buffer. We use robust access // behaviour to reset the offset if it starts outside the buffer. mCurrentArrayBufferOffsets[attribIndex] = binding.getOffset() < static_cast(bufferVk->getSize()) ? binding.getOffset() + bufferOffset : bufferOffset; mCurrentArrayBufferStrides[attribIndex] = binding.getStride(); } } } else { vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer(); mCurrentArrayBuffers[attribIndex] = &emptyBuffer; mCurrentArrayBufferSerial[attribIndex] = emptyBuffer.getBufferSerial(); mCurrentArrayBufferHandles[attribIndex] = emptyBuffer.getBuffer().getHandle(); mCurrentArrayBufferOffsets[attribIndex] = emptyBuffer.getOffset(); bool combined = ShouldCombineAttributes(renderer, attrib, binding); mCurrentArrayBufferStrides[attribIndex] = combined ? binding.getStride() : vertexFormat.getActualBufferFormat(compressed).pixelBytes; } if (bufferOnly) { ANGLE_TRY(contextVk->onVertexBufferChange(mCurrentArrayBuffers[attribIndex])); } else { const angle::FormatID format = attrib.format->id; ANGLE_TRY(contextVk->onVertexAttributeChange( attribIndex, mCurrentArrayBufferStrides[attribIndex], binding.getDivisor(), format, compressed, mCurrentArrayBufferRelativeOffsets[attribIndex], mCurrentArrayBuffers[attribIndex])); mCurrentArrayBufferFormats[attribIndex] = format; mCurrentArrayBufferCompressed[attribIndex] = compressed; mCurrentArrayBufferDivisors[attribIndex] = binding.getDivisor(); } } else { contextVk->invalidateDefaultAttribute(attribIndex); // These will be filled out by the ContextVk. vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer(); mCurrentArrayBuffers[attribIndex] = &emptyBuffer; mCurrentArrayBufferSerial[attribIndex] = emptyBuffer.getBufferSerial(); mCurrentArrayBufferHandles[attribIndex] = emptyBuffer.getBuffer().getHandle(); mCurrentArrayBufferOffsets[attribIndex] = emptyBuffer.getOffset(); mCurrentArrayBufferStrides[attribIndex] = 0; mCurrentArrayBufferDivisors[attribIndex] = 0; mCurrentArrayBufferCompressed[attribIndex] = false; mCurrentArrayBufferRelativeOffsets[attribIndex] = 0; ANGLE_TRY(setDefaultPackedInput(contextVk, attribIndex, &mCurrentArrayBufferFormats[attribIndex])); } return angle::Result::Continue; } gl::AttributesMask VertexArrayVk::mergeClientAttribsRange( vk::Renderer *renderer, const gl::AttributesMask activeStreamedAttribs, size_t startVertex, size_t endVertex, std::array &mergeRangesOut, std::array &mergedIndexesOut) const { const std::vector &attribs = mState.getVertexAttributes(); const std::vector &bindings = mState.getVertexBindings(); gl::AttributesMask attributeMaskCanCombine; angle::FixedVector combinedIndexes; for (size_t attribIndex : activeStreamedAttribs) { const gl::VertexAttribute &attrib = attribs[attribIndex]; ASSERT(attrib.enabled); const gl::VertexBinding &binding = bindings[attrib.bindingIndex]; const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id); bool combined = ShouldCombineAttributes(renderer, attrib, binding); attributeMaskCanCombine.set(attribIndex, combined); if (combined) { combinedIndexes.push_back(attribIndex); } GLuint pixelBytes = vertexFormat.getActualBufferFormat(false).pixelBytes; size_t destStride = combined ? binding.getStride() : pixelBytes; uintptr_t startAddress = reinterpret_cast(attrib.pointer); mergeRangesOut[attribIndex].startAddr = startAddress; mergeRangesOut[attribIndex].endAddr = startAddress + (endVertex - 1) * destStride + pixelBytes; mergeRangesOut[attribIndex].copyStartAddr = startAddress + startVertex * binding.getStride(); mergedIndexesOut[attribIndex] = attribIndex; } if (attributeMaskCanCombine.none()) { return attributeMaskCanCombine; } auto comp = [&mergeRangesOut](size_t a, size_t b) -> bool { return mergeRangesOut[a] < mergeRangesOut[b]; }; // Only sort combined range indexes. std::sort(combinedIndexes.begin(), combinedIndexes.end(), comp); // Merge combined range span. auto next = combinedIndexes.begin(); auto cur = next++; while (next != combinedIndexes.end() || (cur != next)) { // Cur and next overlaps: merge next into cur and move next. if (next != combinedIndexes.end() && mergeRangesOut[*cur].endAddr >= mergeRangesOut[*next].startAddr) { mergeRangesOut[*cur].endAddr = std::max(mergeRangesOut[*cur].endAddr, mergeRangesOut[*next].endAddr); mergeRangesOut[*cur].copyStartAddr = std::min(mergeRangesOut[*cur].copyStartAddr, mergeRangesOut[*next].copyStartAddr); mergedIndexesOut[*next] = mergedIndexesOut[*cur]; ++next; } else { ++cur; if (cur != next) { mergeRangesOut[*cur] = mergeRangesOut[*(cur - 1)]; } else if (next != combinedIndexes.end()) { ++next; } } } return attributeMaskCanCombine; } // Handle copying client attribs and/or expanding attrib buffer in case where attribute // divisor value has to be emulated. angle::Result VertexArrayVk::updateStreamedAttribs(const gl::Context *context, GLint firstVertex, GLsizei vertexOrIndexCount, GLsizei instanceCount, gl::DrawElementsType indexTypeOrInvalid, const void *indices) { ContextVk *contextVk = vk::GetImpl(context); vk::Renderer *renderer = contextVk->getRenderer(); const gl::AttributesMask activeAttribs = context->getStateCache().getActiveClientAttribsMask() | context->getStateCache().getActiveBufferedAttribsMask(); const gl::AttributesMask activeStreamedAttribs = mStreamingVertexAttribsMask & activeAttribs; // Early return for corner case where emulated buffered attribs are not active if (!activeStreamedAttribs.any()) { return angle::Result::Continue; } GLint startVertex; size_t vertexCount; ANGLE_TRY(GetVertexRangeInfo(context, firstVertex, vertexOrIndexCount, indexTypeOrInvalid, indices, 0, &startVertex, &vertexCount)); ASSERT(vertexCount > 0); const auto &attribs = mState.getVertexAttributes(); const auto &bindings = mState.getVertexBindings(); std::array mergedIndexes; std::array mergeRanges; std::array attribBufferHelper = {}; auto mergeAttribMask = mergeClientAttribsRange(renderer, activeStreamedAttribs, startVertex, startVertex + vertexCount, mergeRanges, mergedIndexes); for (size_t attribIndex : activeStreamedAttribs) { const gl::VertexAttribute &attrib = attribs[attribIndex]; ASSERT(attrib.enabled); const gl::VertexBinding &binding = bindings[attrib.bindingIndex]; const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id); const angle::Format &dstFormat = vertexFormat.getActualBufferFormat(false); GLuint pixelBytes = dstFormat.pixelBytes; const bool compressed = false; ASSERT(vertexFormat.getVertexInputAlignment(false) <= vk::kVertexBufferAlignment); vk::BufferHelper *vertexDataBuffer = nullptr; const uint8_t *src = static_cast(attrib.pointer); const uint32_t divisor = binding.getDivisor(); bool combined = mergeAttribMask.test(attribIndex); GLuint stride = combined ? binding.getStride() : pixelBytes; VkDeviceSize startOffset = 0; if (divisor > 0) { // Instanced attrib if (divisor > renderer->getMaxVertexAttribDivisor()) { // Divisor will be set to 1 & so update buffer to have 1 attrib per instance size_t bytesToAllocate = instanceCount * stride; // Allocate buffer for results ANGLE_TRY(contextVk->allocateStreamedVertexBuffer(attribIndex, bytesToAllocate, &vertexDataBuffer)); gl::Buffer *bufferGL = binding.getBuffer().get(); if (bufferGL != nullptr) { // Only do the data copy if src buffer is valid. if (bufferGL->getSize() > 0) { // Map buffer to expand attribs for divisor emulation BufferVk *bufferVk = vk::GetImpl(binding.getBuffer().get()); void *buffSrc = nullptr; ANGLE_TRY(bufferVk->mapImpl(contextVk, GL_MAP_READ_BIT, &buffSrc)); src = reinterpret_cast(buffSrc) + binding.getOffset(); uint32_t srcAttributeSize = static_cast(ComputeVertexAttributeTypeSize(attrib)); size_t numVertices = GetVertexCount(bufferVk, binding, srcAttributeSize); ANGLE_TRY(StreamVertexDataWithDivisor( contextVk, vertexDataBuffer, src, bytesToAllocate, binding.getStride(), stride, vertexFormat.getVertexLoadFunction(compressed), divisor, numVertices)); ANGLE_TRY(bufferVk->unmapImpl(contextVk)); } else if (contextVk->getExtensions().robustnessAny()) { // Satisfy robustness constraints (only if extension enabled) uint8_t *dst = vertexDataBuffer->getMappedMemory(); memset(dst, 0, bytesToAllocate); } } else { size_t numVertices = instanceCount; ANGLE_TRY(StreamVertexDataWithDivisor( contextVk, vertexDataBuffer, src, bytesToAllocate, binding.getStride(), stride, vertexFormat.getVertexLoadFunction(compressed), divisor, numVertices)); } } else { ASSERT(binding.getBuffer().get() == nullptr); size_t count = UnsignedCeilDivide(instanceCount, divisor); size_t bytesToAllocate = count * stride; // Allocate buffer for results ANGLE_TRY(contextVk->allocateStreamedVertexBuffer(attribIndex, bytesToAllocate, &vertexDataBuffer)); ANGLE_TRY(StreamVertexData(contextVk, vertexDataBuffer, src, bytesToAllocate, 0, count, binding.getStride(), vertexFormat.getVertexLoadFunction(compressed))); } } else { ASSERT(binding.getBuffer().get() == nullptr); size_t mergedAttribIdx = mergedIndexes[attribIndex]; const AttributeRange &range = mergeRanges[attribIndex]; if (attribBufferHelper[mergedAttribIdx] == nullptr) { size_t destOffset = combined ? range.copyStartAddr - range.startAddr : startVertex * stride; size_t bytesToAllocate = range.endAddr - range.startAddr; ANGLE_TRY(contextVk->allocateStreamedVertexBuffer( mergedAttribIdx, bytesToAllocate, &attribBufferHelper[mergedAttribIdx])); ANGLE_TRY(StreamVertexData( contextVk, attribBufferHelper[mergedAttribIdx], (const uint8_t *)range.copyStartAddr, bytesToAllocate - destOffset, destOffset, vertexCount, binding.getStride(), combined ? nullptr : vertexFormat.getVertexLoadFunction(compressed))); } vertexDataBuffer = attribBufferHelper[mergedAttribIdx]; startOffset = combined ? (uintptr_t)attrib.pointer - range.startAddr : 0; } ASSERT(vertexDataBuffer != nullptr); mCurrentArrayBuffers[attribIndex] = vertexDataBuffer; mCurrentArrayBufferSerial[attribIndex] = vertexDataBuffer->getBufferSerial(); VkDeviceSize bufferOffset; mCurrentArrayBufferHandles[attribIndex] = vertexDataBuffer ->getBufferForVertexArray(contextVk, vertexDataBuffer->getSize(), &bufferOffset) .getHandle(); mCurrentArrayBufferOffsets[attribIndex] = bufferOffset + startOffset; mCurrentArrayBufferStrides[attribIndex] = stride; mCurrentArrayBufferDivisors[attribIndex] = divisor; ASSERT(BindingIsAligned(dstFormat, mCurrentArrayBufferOffsets[attribIndex], mCurrentArrayBufferStrides[attribIndex])); } return angle::Result::Continue; } angle::Result VertexArrayVk::handleLineLoop(ContextVk *contextVk, GLint firstVertex, GLsizei vertexOrIndexCount, gl::DrawElementsType indexTypeOrInvalid, const void *indices, vk::BufferHelper **indexBufferOut, uint32_t *indexCountOut) { if (indexTypeOrInvalid != gl::DrawElementsType::InvalidEnum) { // Handle GL_LINE_LOOP drawElements. if (mDirtyLineLoopTranslation) { gl::Buffer *elementArrayBuffer = mState.getElementArrayBuffer(); if (!elementArrayBuffer) { ANGLE_TRY(mLineLoopHelper.streamIndices( contextVk, indexTypeOrInvalid, vertexOrIndexCount, reinterpret_cast(indices), indexBufferOut, indexCountOut)); } else { // When using an element array buffer, 'indices' is an offset to the first element. intptr_t offset = reinterpret_cast(indices); BufferVk *elementArrayBufferVk = vk::GetImpl(elementArrayBuffer); ANGLE_TRY(mLineLoopHelper.getIndexBufferForElementArrayBuffer( contextVk, elementArrayBufferVk, indexTypeOrInvalid, vertexOrIndexCount, offset, indexBufferOut, indexCountOut)); } } // If we've had a drawArrays call with a line loop before, we want to make sure this is // invalidated the next time drawArrays is called since we use the same index buffer for // both calls. mLineLoopBufferFirstIndex.reset(); mLineLoopBufferLastIndex.reset(); return angle::Result::Continue; } // Note: Vertex indexes can be arbitrarily large. uint32_t clampedVertexCount = gl::clampCast(vertexOrIndexCount); // Handle GL_LINE_LOOP drawArrays. size_t lastVertex = static_cast(firstVertex + clampedVertexCount); if (!mLineLoopBufferFirstIndex.valid() || !mLineLoopBufferLastIndex.valid() || mLineLoopBufferFirstIndex != firstVertex || mLineLoopBufferLastIndex != lastVertex) { ANGLE_TRY(mLineLoopHelper.getIndexBufferForDrawArrays(contextVk, clampedVertexCount, firstVertex, indexBufferOut)); mLineLoopBufferFirstIndex = firstVertex; mLineLoopBufferLastIndex = lastVertex; } else { *indexBufferOut = mLineLoopHelper.getCurrentIndexBuffer(); } *indexCountOut = vertexOrIndexCount + 1; return angle::Result::Continue; } angle::Result VertexArrayVk::updateDefaultAttrib(ContextVk *contextVk, size_t attribIndex) { if (!mState.getEnabledAttributesMask().test(attribIndex)) { vk::BufferHelper *bufferHelper; ANGLE_TRY( contextVk->allocateStreamedVertexBuffer(attribIndex, kDefaultValueSize, &bufferHelper)); const gl::VertexAttribCurrentValueData &defaultValue = contextVk->getState().getVertexAttribCurrentValues()[attribIndex]; uint8_t *ptr = bufferHelper->getMappedMemory(); memcpy(ptr, &defaultValue.Values, kDefaultValueSize); ANGLE_TRY(bufferHelper->flush(contextVk->getRenderer())); VkDeviceSize bufferOffset; mCurrentArrayBufferHandles[attribIndex] = bufferHelper->getBufferForVertexArray(contextVk, kDefaultValueSize, &bufferOffset) .getHandle(); mCurrentArrayBufferOffsets[attribIndex] = bufferOffset; mCurrentArrayBuffers[attribIndex] = bufferHelper; mCurrentArrayBufferSerial[attribIndex] = bufferHelper->getBufferSerial(); mCurrentArrayBufferStrides[attribIndex] = 0; mCurrentArrayBufferDivisors[attribIndex] = 0; ANGLE_TRY(setDefaultPackedInput(contextVk, attribIndex, &mCurrentArrayBufferFormats[attribIndex])); } return angle::Result::Continue; } } // namespace rx