1 /* 2 * Copyright 2023 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef skgpu_graphite_compute_DispatchGroup_DEFINED 9 #define skgpu_graphite_compute_DispatchGroup_DEFINED 10 11 #include "include/core/SkRefCnt.h" 12 #include "include/private/base/SkTArray.h" 13 #include "src/gpu/graphite/ComputePipelineDesc.h" 14 #include "src/gpu/graphite/ComputeTypes.h" 15 #include "src/gpu/graphite/ResourceTypes.h" 16 #include "src/gpu/graphite/compute/ComputeStep.h" 17 18 #include <cstddef> 19 #include <cstdint> 20 #include <memory> 21 #include <optional> 22 #include <variant> 23 24 namespace skgpu::graphite { 25 26 class CommandBuffer; 27 class ComputePipeline; 28 class Recorder; 29 class ResourceProvider; 30 class Sampler; 31 class Task; 32 class Texture; 33 class TextureProxy; 34 35 using BindingIndex = uint32_t; 36 struct TextureIndex { uint32_t fValue; }; 37 struct SamplerIndex { uint32_t fValue; }; 38 39 using DispatchResource = std::variant<BindBufferInfo, TextureIndex, SamplerIndex>; 40 using DispatchResourceOptional = 41 std::variant<std::monostate, BindBufferInfo, TextureIndex, SamplerIndex>; 42 43 struct ResourceBinding { 44 BindingIndex fIndex; 45 DispatchResource fResource; 46 }; 47 48 /** 49 * DispatchGroup groups a series of compute pipeline dispatches that need to execute sequentially 50 * (i.e. with a barrier). Dispatches are stored in the order that they will be encoded 51 * in the eventual command buffer. 52 * 53 * A DispatchGroup can be constructed from a series of ComputeSteps using a Builder. The Builder 54 * verifies that the data flow specification between successive ComputeSteps are compatible. 55 * The resources required by a ComputeStep (such as Buffers and TextureProxies) are created by 56 * the Builder as they get added. 57 * 58 * Once a DispatchGroup is finalized, it is immutable. It contains the complete ResourceBinding list 59 * for each dispatch. A list of finalized DispatchGroups can be submitted to the command buffer in a 60 * ComputeTask. 61 */ 62 class DispatchGroup final { 63 public: 64 class Builder; 65 66 struct Dispatch { 67 WorkgroupSize fLocalSize; 68 std::variant<WorkgroupSize, BindBufferInfo> fGlobalSizeOrIndirect; 69 70 std::optional<WorkgroupSize> fGlobalDispatchSize; 71 skia_private::TArray<ResourceBinding> fBindings; 72 skia_private::TArray<ComputeStep::WorkgroupBufferDesc> fWorkgroupBuffers; 73 int fPipelineIndex = 0; 74 }; 75 76 ~DispatchGroup(); 77 dispatches()78 const skia_private::TArray<Dispatch>& dispatches() const { return fDispatchList; } 79 getPipeline(size_t index)80 const ComputePipeline* getPipeline(size_t index) const { return fPipelines[index].get(); } 81 const Texture* getTexture(size_t index) const; 82 const Sampler* getSampler(size_t index) const; 83 84 bool prepareResources(ResourceProvider*); 85 void addResourceRefs(CommandBuffer*) const; 86 87 // Returns a single tasks that must execute before this DispatchGroup or nullptr if the group 88 // has no task dependencies. 89 sk_sp<Task> snapChildTask(); 90 91 private: 92 friend class DispatchGroupBuilder; 93 94 DispatchGroup() = default; 95 96 // Disallow copy and move. 97 DispatchGroup(const DispatchGroup&) = delete; 98 DispatchGroup(DispatchGroup&&) = delete; 99 100 skia_private::TArray<Dispatch> fDispatchList; 101 102 // The list of all buffers that must be cleared before the dispatches. 103 skia_private::TArray<BindBufferInfo> fClearList; 104 105 // Pipelines are referenced by index by each Dispatch in `fDispatchList`. They are stored as a 106 // pipeline description until instantiated in `prepareResources()`. 107 skia_private::TArray<ComputePipelineDesc> fPipelineDescs; 108 skia_private::TArray<SamplerDesc> fSamplerDescs; 109 110 // Resources instantiated by `prepareResources()` 111 skia_private::TArray<sk_sp<ComputePipeline>> fPipelines; 112 skia_private::TArray<sk_sp<TextureProxy>> fTextures; 113 skia_private::TArray<sk_sp<Sampler>> fSamplers; 114 }; 115 116 class DispatchGroup::Builder final { 117 public: 118 // Contains the resource handles assigned to the outputs of the most recently inserted 119 // ComputeStep. 120 struct OutputTable { 121 // Contains the std::monostate variant if the slot is uninitialized 122 DispatchResourceOptional fSharedSlots[kMaxComputeDataFlowSlots]; 123 124 OutputTable() = default; 125 resetOutputTable126 void reset() { *this = {}; } 127 }; 128 129 explicit Builder(Recorder*); 130 outputTable()131 const OutputTable& outputTable() const { return fOutputTable; } 132 133 // Add a new compute step to the dispatch group and initialize its required resources if 134 // necessary. 135 // 136 // If the global dispatch size (i.e. workgroup count) is known ahead of time it can be 137 // optionally provided here while appending a step. If provided, the ComputeStep will not 138 // receive a call to `calculateGlobalDispatchSize`. 139 bool appendStep(const ComputeStep*, std::optional<WorkgroupSize> globalSize = std::nullopt); 140 141 // Add a new compute step to the dispatch group with an indirectly specified global dispatch 142 // size. Initialize the required resources if necessary. 143 // 144 // The global dispatch size is determined by the GPU by reading the entries in `indirectBuffer`. 145 // The contents of this buffer must conform to the layout of the `IndirectDispatchArgs` 146 // structure declared in ComputeTypes.h. 147 // 148 // The ComputeStep will not receive a call to `calculateGlobalDispatchSize`. 149 bool appendStepIndirect(const ComputeStep*, BindBufferInfo indirectBuffer); 150 151 // Directly assign a buffer range to a shared slot. ComputeSteps that are appended after this 152 // call will use this resouce if they reference the given `slot` index. Builder will not 153 // allocate the resource internally and ComputeSteps will not receive calls to 154 // `calculateBufferSize`. 155 // 156 // If the slot is already assigned a buffer, it will be overwritten. Calling this method does 157 // not have any effect on previously appended ComputeSteps that were already bound that 158 // resource. 159 // 160 // If `cleared` is kYes, the contents of the given view will be cleared to 0 before the current 161 // DispatchGroup gets submitted. 162 void assignSharedBuffer(BindBufferInfo buffer, 163 unsigned int slot, 164 ClearBuffer cleared = ClearBuffer::kNo); 165 166 // Directly assign a texture to a shared slot. ComputeSteps that are appended after this call 167 // will use this resource if they reference the given `slot` index. Builder will not allocate 168 // the resource internally and ComputeSteps will not receive calls to 169 // `calculateTextureParameters`. 170 // 171 // If the slot is already assigned a texture, it will be overwritten. Calling this method does 172 // not have any effect on previously appended ComputeSteps that were already bound that 173 // resource. 174 void assignSharedTexture(sk_sp<TextureProxy> texture, unsigned int slot); 175 176 // Finalize and return the constructed DispatchGroup. 177 // 178 // The Builder can be used to construct a new DispatchGroup by calling "reset()" after this 179 // method returns. 180 std::unique_ptr<DispatchGroup> finalize(); 181 182 #if defined(GPU_TEST_UTILS) 183 // Clear old state and start a new DispatchGroup. 184 void reset(); 185 #endif 186 187 // Returns the buffer resource assigned to the shared slot with the given index, if any. 188 BindBufferInfo getSharedBufferResource(unsigned int slot) const; 189 190 // Returns the texture resource assigned to the shared slot with the given index, if any. 191 sk_sp<TextureProxy> getSharedTextureResource(unsigned int slot) const; 192 193 private: 194 bool appendStepInternal(const ComputeStep*, const std::variant<WorkgroupSize, BindBufferInfo>&); 195 196 // Allocate a resource that can be assigned to the shared or private data flow slots. Returns a 197 // std::monostate if allocation fails. 198 DispatchResourceOptional allocateResource(const ComputeStep* step, 199 const ComputeStep::ResourceDesc& resource, 200 int resourceIdx); 201 202 // The object under construction. 203 std::unique_ptr<DispatchGroup> fObj; 204 205 Recorder* fRecorder; 206 OutputTable fOutputTable; 207 }; 208 209 } // namespace skgpu::graphite 210 211 #endif // skgpu_graphite_compute_DispatchGroup_DEFINED 212