xref: /aosp_15_r20/external/skia/src/gpu/graphite/compute/VelloComputeSteps.h (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1*c8dee2aaSAndroid Build Coastguard Worker /*
2*c8dee2aaSAndroid Build Coastguard Worker  * Copyright 2023 Google LLC
3*c8dee2aaSAndroid Build Coastguard Worker  *
4*c8dee2aaSAndroid Build Coastguard Worker  * Use of this source code is governed by a BSD-style license that can be
5*c8dee2aaSAndroid Build Coastguard Worker  * found in the LICENSE file.
6*c8dee2aaSAndroid Build Coastguard Worker  */
7*c8dee2aaSAndroid Build Coastguard Worker 
8*c8dee2aaSAndroid Build Coastguard Worker #ifndef skgpu_graphite_compute_VelloComputeSteps_DEFINED
9*c8dee2aaSAndroid Build Coastguard Worker #define skgpu_graphite_compute_VelloComputeSteps_DEFINED
10*c8dee2aaSAndroid Build Coastguard Worker 
11*c8dee2aaSAndroid Build Coastguard Worker #include "include/core/SkColorType.h"
12*c8dee2aaSAndroid Build Coastguard Worker #include "include/core/SkSize.h"
13*c8dee2aaSAndroid Build Coastguard Worker #include "include/core/SkSpan.h"
14*c8dee2aaSAndroid Build Coastguard Worker #include "include/private/base/SkTArray.h"
15*c8dee2aaSAndroid Build Coastguard Worker #include "src/gpu/graphite/ComputeTypes.h"
16*c8dee2aaSAndroid Build Coastguard Worker #include "src/gpu/graphite/compute/ComputeStep.h"
17*c8dee2aaSAndroid Build Coastguard Worker 
18*c8dee2aaSAndroid Build Coastguard Worker #include "third_party/vello/cpp/vello.h"
19*c8dee2aaSAndroid Build Coastguard Worker 
20*c8dee2aaSAndroid Build Coastguard Worker #include <string_view>
21*c8dee2aaSAndroid Build Coastguard Worker 
22*c8dee2aaSAndroid Build Coastguard Worker namespace skgpu::graphite {
23*c8dee2aaSAndroid Build Coastguard Worker 
24*c8dee2aaSAndroid Build Coastguard Worker // This file defines ComputeSteps for all Vello compute stages and their permutations. The
25*c8dee2aaSAndroid Build Coastguard Worker // declaration of each ComputeStep subclass mirrors the name of the pipeline stage as defined in the
26*c8dee2aaSAndroid Build Coastguard Worker // shader metadata.
27*c8dee2aaSAndroid Build Coastguard Worker //
28*c8dee2aaSAndroid Build Coastguard Worker // The compute stages all operate over a shared set of buffer and image resources. The
29*c8dee2aaSAndroid Build Coastguard Worker // `kVelloSlot_*` constant definitions below each uniquely identify a shared resource that must be
30*c8dee2aaSAndroid Build Coastguard Worker // instantiated when assembling the ComputeSteps into a DispatchGroup.
31*c8dee2aaSAndroid Build Coastguard Worker //
32*c8dee2aaSAndroid Build Coastguard Worker // === Monoids and Prefix Sums ===
33*c8dee2aaSAndroid Build Coastguard Worker //
34*c8dee2aaSAndroid Build Coastguard Worker // Vello's GPU algorithms make repeated use of parallel prefix sums techniques. These occur
35*c8dee2aaSAndroid Build Coastguard Worker // frequently in path rasterization (e.g. winding number accummulation across a scanline can be
36*c8dee2aaSAndroid Build Coastguard Worker // thought of as per-pixel prefix sums) but Vello also uses them to calculate buffer offsets for
37*c8dee2aaSAndroid Build Coastguard Worker // associated entries across its variable length encoding streams.
38*c8dee2aaSAndroid Build Coastguard Worker //
39*c8dee2aaSAndroid Build Coastguard Worker // For instance, given a scene that contains Bézier paths, each path gets encoded as a transform,
40*c8dee2aaSAndroid Build Coastguard Worker // a sequence of path tags (verbs), and zero or more 2-D points associated with each
41*c8dee2aaSAndroid Build Coastguard Worker // tag. N paths will often map to N transforms, N + M tags, and N + M + L points (where N > 0, M >
42*c8dee2aaSAndroid Build Coastguard Worker // 0, L >= 0). These entries are stored in separate parallel transform, path tag, and path data
43*c8dee2aaSAndroid Build Coastguard Worker // streams. The correspondence between entries of these independent streams is implicit. To keep
44*c8dee2aaSAndroid Build Coastguard Worker // CPU encoding of these streams fast, the offsets into each buffer for a given "path object" is
45*c8dee2aaSAndroid Build Coastguard Worker // computed dynamically and in parallel on the GPU. Since the offsets for each object build
46*c8dee2aaSAndroid Build Coastguard Worker // additively on offsets that appear before it in the stream, parallel computation of
47*c8dee2aaSAndroid Build Coastguard Worker // offsets can be treated as a dynamic programming problem that maps well to parallel prefix sums
48*c8dee2aaSAndroid Build Coastguard Worker // where each object is a "monoid" (https://en.wikipedia.org/wiki/Monoid) that supports algebraic
49*c8dee2aaSAndroid Build Coastguard Worker // addition/subtraction over data encoded in the path tags themselves.
50*c8dee2aaSAndroid Build Coastguard Worker //
51*c8dee2aaSAndroid Build Coastguard Worker // Once computed, a monoid contains the offsets into the input (and sometimes output) buffers for a
52*c8dee2aaSAndroid Build Coastguard Worker // given object. The parallel prefix sums operation is defined as a monoidal reduce + pre-scan pair.
53*c8dee2aaSAndroid Build Coastguard Worker // (Prefix Sums and Their Applications, Blelloch, G., https://www.cs.cmu.edu/~guyb/papers/Ble93.pdf)
54*c8dee2aaSAndroid Build Coastguard Worker //
55*c8dee2aaSAndroid Build Coastguard Worker // While these concepts are an implementation detail they are core to the Vello algorithm and are
56*c8dee2aaSAndroid Build Coastguard Worker // reflected in the pipeline names and data slot definitions.
57*c8dee2aaSAndroid Build Coastguard Worker //
58*c8dee2aaSAndroid Build Coastguard Worker // === Full Pipeline ===
59*c8dee2aaSAndroid Build Coastguard Worker //
60*c8dee2aaSAndroid Build Coastguard Worker // The full Vello pipeline stages are as follows and should be dispatched in the following order:
61*c8dee2aaSAndroid Build Coastguard Worker //
62*c8dee2aaSAndroid Build Coastguard Worker // I. Build the path monoid stream:
63*c8dee2aaSAndroid Build Coastguard Worker //   If the input fits within the workgroup size:
64*c8dee2aaSAndroid Build Coastguard Worker //     pathtag_reduce, pathtag_scan_small
65*c8dee2aaSAndroid Build Coastguard Worker //   else
66*c8dee2aaSAndroid Build Coastguard Worker //     pathtag_reduce, pathtag_reduce2, pathtag_scan1, pathtag_scan_large
67*c8dee2aaSAndroid Build Coastguard Worker //
68*c8dee2aaSAndroid Build Coastguard Worker // II. Compute path bounding boxes, convert path segments into cubics:
69*c8dee2aaSAndroid Build Coastguard Worker //   bbox_clear, pathseg
70*c8dee2aaSAndroid Build Coastguard Worker //
71*c8dee2aaSAndroid Build Coastguard Worker // III. Process the draw object stream to build the draw monoids and inputs to the clip stage:
72*c8dee2aaSAndroid Build Coastguard Worker //   draw_reduce, draw_leaf
73*c8dee2aaSAndroid Build Coastguard Worker //
74*c8dee2aaSAndroid Build Coastguard Worker // IV. Compute the bounding boxes for the clip stack from the input stream, if the scene contains
75*c8dee2aaSAndroid Build Coastguard Worker // clips:
76*c8dee2aaSAndroid Build Coastguard Worker //   clip_reduce, clip_leaf
77*c8dee2aaSAndroid Build Coastguard Worker //
78*c8dee2aaSAndroid Build Coastguard Worker // V. Allocate tile and segment buffers for the individual bins and prepare for coarse rasterization
79*c8dee2aaSAndroid Build Coastguard Worker //   binning, tile_alloc, path_coarse
80*c8dee2aaSAndroid Build Coastguard Worker //
81*c8dee2aaSAndroid Build Coastguard Worker // VI. Coarse rasterization
82*c8dee2aaSAndroid Build Coastguard Worker //   backdrop_dyn, coarse
83*c8dee2aaSAndroid Build Coastguard Worker //
84*c8dee2aaSAndroid Build Coastguard Worker // VII. Fine rasterization
85*c8dee2aaSAndroid Build Coastguard Worker //   fine
86*c8dee2aaSAndroid Build Coastguard Worker //
87*c8dee2aaSAndroid Build Coastguard Worker // TODO: Document the coverage mask pipeline once it has been re-implemented.
88*c8dee2aaSAndroid Build Coastguard Worker 
89*c8dee2aaSAndroid Build Coastguard Worker // ***
90*c8dee2aaSAndroid Build Coastguard Worker // Shared buffers that are accessed by various stages.
91*c8dee2aaSAndroid Build Coastguard Worker //
92*c8dee2aaSAndroid Build Coastguard Worker // The render configration uniform buffer.
93*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_ConfigUniform = 0;
94*c8dee2aaSAndroid Build Coastguard Worker 
95*c8dee2aaSAndroid Build Coastguard Worker // The scene encoding buffer.
96*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_Scene = 1;
97*c8dee2aaSAndroid Build Coastguard Worker 
98*c8dee2aaSAndroid Build Coastguard Worker // ***
99*c8dee2aaSAndroid Build Coastguard Worker // Buffers used during the element processing stage. This stage converts the stream of variable
100*c8dee2aaSAndroid Build Coastguard Worker // length path tags, transforms, brushes into a "path monoid" stream containing buffer offsets for
101*c8dee2aaSAndroid Build Coastguard Worker // the subsequent stages that associate the input streams with individual draw elements. This stage
102*c8dee2aaSAndroid Build Coastguard Worker // performs a parallel prefix sum (reduce + scan) which can be performed in two dispatches if the
103*c8dee2aaSAndroid Build Coastguard Worker // entire input can be processed by a single workgroup per dispatch. Otherwise, the algorithm
104*c8dee2aaSAndroid Build Coastguard Worker // requires two additional dispatches to continue the traversal (this is due to a lack of primitives
105*c8dee2aaSAndroid Build Coastguard Worker // to synchronize execution across workgroups in MSL and WGSL).
106*c8dee2aaSAndroid Build Coastguard Worker //
107*c8dee2aaSAndroid Build Coastguard Worker // Single pass variant pipelines: pathtag_reduce, pathtag_scan_small
108*c8dee2aaSAndroid Build Coastguard Worker // Multi-pass variant pipelines: pathtag_reduce, pathtag_reduce2, pathtag_scan1, pathtag_scan_large
109*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_TagMonoid = 2;
110*c8dee2aaSAndroid Build Coastguard Worker 
111*c8dee2aaSAndroid Build Coastguard Worker // Single pass variant slots:
112*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_PathtagReduceOutput = 3;
113*c8dee2aaSAndroid Build Coastguard Worker 
114*c8dee2aaSAndroid Build Coastguard Worker // Multi pass variant slots:
115*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_LargePathtagReduceFirstPassOutput = kVelloSlot_PathtagReduceOutput;
116*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_LargePathtagReduceSecondPassOutput = 4;
117*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_LargePathtagScanFirstPassOutput = 5;
118*c8dee2aaSAndroid Build Coastguard Worker 
119*c8dee2aaSAndroid Build Coastguard Worker // ***
120*c8dee2aaSAndroid Build Coastguard Worker // The second part of element processing flattens path elements (moveTo, lineTo, quadTo, etc) into
121*c8dee2aaSAndroid Build Coastguard Worker // an unordered line soup buffer and computes their bounding boxes. This stage is where strokes get
122*c8dee2aaSAndroid Build Coastguard Worker // expanded to fills and stroke styles get applied. The output is an unordered "line soup" buffer
123*c8dee2aaSAndroid Build Coastguard Worker // and the tight device-space bounding box of each path.
124*c8dee2aaSAndroid Build Coastguard Worker //
125*c8dee2aaSAndroid Build Coastguard Worker // Pipelines: bbox_clear, flatten
126*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_PathBBoxes = 6;
127*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_Lines = 7;
128*c8dee2aaSAndroid Build Coastguard Worker 
129*c8dee2aaSAndroid Build Coastguard Worker // ***
130*c8dee2aaSAndroid Build Coastguard Worker // The next part prepares the draw object stream (entries in the per-tile command list aka PTCL)
131*c8dee2aaSAndroid Build Coastguard Worker // and additional metadata for the subsequent clipping and binning stages.
132*c8dee2aaSAndroid Build Coastguard Worker //
133*c8dee2aaSAndroid Build Coastguard Worker // Pipelines: draw_reduce, draw_leaf
134*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_DrawReduceOutput = 8;
135*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_DrawMonoid = 9;
136*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_InfoBinData = 10;
137*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_ClipInput = 11;
138*c8dee2aaSAndroid Build Coastguard Worker 
139*c8dee2aaSAndroid Build Coastguard Worker // ***
140*c8dee2aaSAndroid Build Coastguard Worker // Clipping. The outputs of this stage are the finalized draw monoid and the clip bounding-boxes.
141*c8dee2aaSAndroid Build Coastguard Worker // Clipping involves evaluating the stack monoid: refer to the following paper for the meaning of
142*c8dee2aaSAndroid Build Coastguard Worker // these buffers: https://arxiv.org/pdf/2205.11659.pdf,
143*c8dee2aaSAndroid Build Coastguard Worker // https://en.wikipedia.org/wiki/Bicyclic_semigroup
144*c8dee2aaSAndroid Build Coastguard Worker //
145*c8dee2aaSAndroid Build Coastguard Worker // Pipelines: clip_reduce, clip_leaf
146*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_ClipBicyclic = 12;
147*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_ClipElement = 13;
148*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_ClipBBoxes = 14;
149*c8dee2aaSAndroid Build Coastguard Worker 
150*c8dee2aaSAndroid Build Coastguard Worker // ***
151*c8dee2aaSAndroid Build Coastguard Worker // Buffers containing bump allocated data, the inputs and outputs to the binning, coarse raster, and
152*c8dee2aaSAndroid Build Coastguard Worker // per-tile segment assembly stages.
153*c8dee2aaSAndroid Build Coastguard Worker //
154*c8dee2aaSAndroid Build Coastguard Worker // Pipelines: binning, tile_alloc, path_count, backdrop, coarse, path_tiling
155*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_DrawBBoxes = 15;
156*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_BumpAlloc = 16;
157*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_BinHeader = 17;
158*c8dee2aaSAndroid Build Coastguard Worker 
159*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_Path = 18;
160*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_Tile = 19;
161*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_SegmentCounts = 20;
162*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_Segments = 21;
163*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_PTCL = 22;
164*c8dee2aaSAndroid Build Coastguard Worker 
165*c8dee2aaSAndroid Build Coastguard Worker // ***
166*c8dee2aaSAndroid Build Coastguard Worker // Texture resources used by the fine rasterization stage. The gradient image needs to get populated
167*c8dee2aaSAndroid Build Coastguard Worker // on the CPU with pre-computed gradient ramps. The image atlas is intended to hold pre-uploaded
168*c8dee2aaSAndroid Build Coastguard Worker // images that are composited into the scene.
169*c8dee2aaSAndroid Build Coastguard Worker //
170*c8dee2aaSAndroid Build Coastguard Worker // The output image contains the final render.
171*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_OutputImage = 23;
172*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_GradientImage = 24;
173*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_ImageAtlas = 25;
174*c8dee2aaSAndroid Build Coastguard Worker 
175*c8dee2aaSAndroid Build Coastguard Worker // ***
176*c8dee2aaSAndroid Build Coastguard Worker // The indirect count buffer is used to issue an indirect dispatch of the path count and path tiling
177*c8dee2aaSAndroid Build Coastguard Worker // stages.
178*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_IndirectCount = 26;
179*c8dee2aaSAndroid Build Coastguard Worker 
180*c8dee2aaSAndroid Build Coastguard Worker // ***
181*c8dee2aaSAndroid Build Coastguard Worker // The sample mask lookup table used in MSAA modes of the fine rasterization stage.
182*c8dee2aaSAndroid Build Coastguard Worker constexpr int kVelloSlot_MaskLUT = 27;
183*c8dee2aaSAndroid Build Coastguard Worker 
184*c8dee2aaSAndroid Build Coastguard Worker std::string_view VelloStageName(vello_cpp::ShaderStage);
185*c8dee2aaSAndroid Build Coastguard Worker WorkgroupSize VelloStageLocalSize(vello_cpp::ShaderStage);
186*c8dee2aaSAndroid Build Coastguard Worker skia_private::TArray<ComputeStep::WorkgroupBufferDesc> VelloWorkgroupBuffers(
187*c8dee2aaSAndroid Build Coastguard Worker         vello_cpp::ShaderStage);
188*c8dee2aaSAndroid Build Coastguard Worker ComputeStep::NativeShaderSource VelloNativeShaderSource(vello_cpp::ShaderStage,
189*c8dee2aaSAndroid Build Coastguard Worker                                                         ComputeStep::NativeShaderFormat);
190*c8dee2aaSAndroid Build Coastguard Worker 
191*c8dee2aaSAndroid Build Coastguard Worker template <vello_cpp::ShaderStage S>
192*c8dee2aaSAndroid Build Coastguard Worker class VelloStep : public ComputeStep {
193*c8dee2aaSAndroid Build Coastguard Worker public:
194*c8dee2aaSAndroid Build Coastguard Worker     ~VelloStep() override = default;
195*c8dee2aaSAndroid Build Coastguard Worker 
nativeShaderSource(NativeShaderFormat format)196*c8dee2aaSAndroid Build Coastguard Worker     NativeShaderSource nativeShaderSource(NativeShaderFormat format) const override {
197*c8dee2aaSAndroid Build Coastguard Worker         return VelloNativeShaderSource(S, format);
198*c8dee2aaSAndroid Build Coastguard Worker     }
199*c8dee2aaSAndroid Build Coastguard Worker 
200*c8dee2aaSAndroid Build Coastguard Worker protected:
VelloStep(SkSpan<const ResourceDesc> resources)201*c8dee2aaSAndroid Build Coastguard Worker     explicit VelloStep(SkSpan<const ResourceDesc> resources)
202*c8dee2aaSAndroid Build Coastguard Worker             : ComputeStep(VelloStageName(S),
203*c8dee2aaSAndroid Build Coastguard Worker                           VelloStageLocalSize(S),
204*c8dee2aaSAndroid Build Coastguard Worker                           resources,
205*c8dee2aaSAndroid Build Coastguard Worker                           AsSpan<ComputeStep::WorkgroupBufferDesc>(VelloWorkgroupBuffers(S)),
206*c8dee2aaSAndroid Build Coastguard Worker                           Flags::kSupportsNativeShader) {}
207*c8dee2aaSAndroid Build Coastguard Worker 
208*c8dee2aaSAndroid Build Coastguard Worker private:
209*c8dee2aaSAndroid Build Coastguard Worker     // Helper that creates a SkSpan from a universal reference to a container. Generally, creating a
210*c8dee2aaSAndroid Build Coastguard Worker     // SkSpan from an rvalue reference is not safe since the pointer stored in the SkSpan will
211*c8dee2aaSAndroid Build Coastguard Worker     // dangle beyond the constructor expression. In our usage in the constructor above,
212*c8dee2aaSAndroid Build Coastguard Worker     // the lifetime of the temporary TArray should match that of the SkSpan, both of which should
213*c8dee2aaSAndroid Build Coastguard Worker     // live through the constructor call expression.
214*c8dee2aaSAndroid Build Coastguard Worker     //
215*c8dee2aaSAndroid Build Coastguard Worker     // From https://en.cppreference.com/w/cpp/language/reference_initialization#Lifetime_of_a_temporary:
216*c8dee2aaSAndroid Build Coastguard Worker     //
217*c8dee2aaSAndroid Build Coastguard Worker     //     a temporary bound to a reference parameter in a function call exists until the end of the
218*c8dee2aaSAndroid Build Coastguard Worker     //     full expression containing that function call
219*c8dee2aaSAndroid Build Coastguard Worker     //
220*c8dee2aaSAndroid Build Coastguard Worker     template <typename T, typename C>
AsSpan(C && container)221*c8dee2aaSAndroid Build Coastguard Worker     static SkSpan<const T> AsSpan(C&& container) {
222*c8dee2aaSAndroid Build Coastguard Worker         return SkSpan(std::data(container), std::size(container));
223*c8dee2aaSAndroid Build Coastguard Worker     }
224*c8dee2aaSAndroid Build Coastguard Worker };
225*c8dee2aaSAndroid Build Coastguard Worker 
226*c8dee2aaSAndroid Build Coastguard Worker #define VELLO_COMPUTE_STEP(stage)                                                      \
227*c8dee2aaSAndroid Build Coastguard Worker     class Vello##stage##Step final : public VelloStep<vello_cpp::ShaderStage::stage> { \
228*c8dee2aaSAndroid Build Coastguard Worker     public:                                                                            \
229*c8dee2aaSAndroid Build Coastguard Worker         Vello##stage##Step();                                                          \
230*c8dee2aaSAndroid Build Coastguard Worker     };
231*c8dee2aaSAndroid Build Coastguard Worker 
232*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(BackdropDyn);
233*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(BboxClear);
234*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(Binning);
235*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(ClipLeaf);
236*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(ClipReduce);
237*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(Coarse);
238*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(Flatten);
239*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(DrawLeaf);
240*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(DrawReduce);
241*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(PathCount);
242*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(PathCountSetup);
243*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(PathTiling);
244*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(PathTilingSetup);
245*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(PathtagReduce);
246*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(PathtagReduce2);
247*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(PathtagScan1);
248*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(PathtagScanLarge);
249*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(PathtagScanSmall);
250*c8dee2aaSAndroid Build Coastguard Worker VELLO_COMPUTE_STEP(TileAlloc);
251*c8dee2aaSAndroid Build Coastguard Worker 
252*c8dee2aaSAndroid Build Coastguard Worker #undef VELLO_COMPUTE_STEP
253*c8dee2aaSAndroid Build Coastguard Worker 
254*c8dee2aaSAndroid Build Coastguard Worker template <vello_cpp::ShaderStage S, SkColorType T> class VelloFineStepBase : public VelloStep<S> {
255*c8dee2aaSAndroid Build Coastguard Worker public:
256*c8dee2aaSAndroid Build Coastguard Worker     // We need to return a texture format for the bound textures.
calculateTextureParameters(int index,const ComputeStep::ResourceDesc &)257*c8dee2aaSAndroid Build Coastguard Worker     std::tuple<SkISize, SkColorType> calculateTextureParameters(
258*c8dee2aaSAndroid Build Coastguard Worker             int index, const ComputeStep::ResourceDesc&) const override {
259*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(index == 4);
260*c8dee2aaSAndroid Build Coastguard Worker         // TODO: The texture dimensions are unknown here so this method returns 0 for the texture
261*c8dee2aaSAndroid Build Coastguard Worker         // size. In this case this field is unused since VelloRenderer assigns texture resources
262*c8dee2aaSAndroid Build Coastguard Worker         // directly to the DispatchGroupBuilder. The format must still be queried to describe the
263*c8dee2aaSAndroid Build Coastguard Worker         // ComputeStep's binding layout. This method could be improved to enable conditional
264*c8dee2aaSAndroid Build Coastguard Worker         // querying of optional/dynamic parameters.
265*c8dee2aaSAndroid Build Coastguard Worker         return {{}, T};
266*c8dee2aaSAndroid Build Coastguard Worker     }
267*c8dee2aaSAndroid Build Coastguard Worker 
268*c8dee2aaSAndroid Build Coastguard Worker protected:
VelloFineStepBase(SkSpan<const ComputeStep::ResourceDesc> resources)269*c8dee2aaSAndroid Build Coastguard Worker     explicit VelloFineStepBase(SkSpan<const ComputeStep::ResourceDesc> resources)
270*c8dee2aaSAndroid Build Coastguard Worker             : VelloStep<S>(resources) {}
271*c8dee2aaSAndroid Build Coastguard Worker };
272*c8dee2aaSAndroid Build Coastguard Worker 
273*c8dee2aaSAndroid Build Coastguard Worker template <vello_cpp::ShaderStage S, SkColorType T, ::rust::Vec<uint8_t> (*MaskLutBuilder)()>
274*c8dee2aaSAndroid Build Coastguard Worker class VelloFineMsaaStepBase : public VelloFineStepBase<S, T> {
275*c8dee2aaSAndroid Build Coastguard Worker public:
calculateBufferSize(int resourceIndex,const ComputeStep::ResourceDesc &)276*c8dee2aaSAndroid Build Coastguard Worker     size_t calculateBufferSize(int resourceIndex, const ComputeStep::ResourceDesc&) const override {
277*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(resourceIndex == 5);
278*c8dee2aaSAndroid Build Coastguard Worker         return fMaskLut.size();
279*c8dee2aaSAndroid Build Coastguard Worker     }
280*c8dee2aaSAndroid Build Coastguard Worker 
prepareStorageBuffer(int resourceIndex,const ComputeStep::ResourceDesc &,void * buffer,size_t bufferSize)281*c8dee2aaSAndroid Build Coastguard Worker     void prepareStorageBuffer(int resourceIndex,
282*c8dee2aaSAndroid Build Coastguard Worker                               const ComputeStep::ResourceDesc&,
283*c8dee2aaSAndroid Build Coastguard Worker                               void* buffer,
284*c8dee2aaSAndroid Build Coastguard Worker                               size_t bufferSize) const override {
285*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(resourceIndex == 5);
286*c8dee2aaSAndroid Build Coastguard Worker         SkASSERT(fMaskLut.size() == bufferSize);
287*c8dee2aaSAndroid Build Coastguard Worker         memcpy(buffer, fMaskLut.data(), fMaskLut.size());
288*c8dee2aaSAndroid Build Coastguard Worker     }
289*c8dee2aaSAndroid Build Coastguard Worker 
290*c8dee2aaSAndroid Build Coastguard Worker protected:
VelloFineMsaaStepBase(SkSpan<const ComputeStep::ResourceDesc> resources)291*c8dee2aaSAndroid Build Coastguard Worker     explicit VelloFineMsaaStepBase(SkSpan<const ComputeStep::ResourceDesc> resources)
292*c8dee2aaSAndroid Build Coastguard Worker             : VelloFineStepBase<S, T>(resources), fMaskLut(MaskLutBuilder()) {}
293*c8dee2aaSAndroid Build Coastguard Worker 
294*c8dee2aaSAndroid Build Coastguard Worker private:
295*c8dee2aaSAndroid Build Coastguard Worker     ::rust::Vec<uint8_t> fMaskLut;
296*c8dee2aaSAndroid Build Coastguard Worker };
297*c8dee2aaSAndroid Build Coastguard Worker 
298*c8dee2aaSAndroid Build Coastguard Worker class VelloFineAreaStep final
299*c8dee2aaSAndroid Build Coastguard Worker         : public VelloFineStepBase<vello_cpp::ShaderStage::FineArea, kRGBA_8888_SkColorType> {
300*c8dee2aaSAndroid Build Coastguard Worker public:
301*c8dee2aaSAndroid Build Coastguard Worker     VelloFineAreaStep();
302*c8dee2aaSAndroid Build Coastguard Worker };
303*c8dee2aaSAndroid Build Coastguard Worker 
304*c8dee2aaSAndroid Build Coastguard Worker class VelloFineAreaAlpha8Step final
305*c8dee2aaSAndroid Build Coastguard Worker         : public VelloFineStepBase<vello_cpp::ShaderStage::FineAreaR8, kAlpha_8_SkColorType> {
306*c8dee2aaSAndroid Build Coastguard Worker public:
307*c8dee2aaSAndroid Build Coastguard Worker     VelloFineAreaAlpha8Step();
308*c8dee2aaSAndroid Build Coastguard Worker };
309*c8dee2aaSAndroid Build Coastguard Worker 
310*c8dee2aaSAndroid Build Coastguard Worker class VelloFineMsaa16Step final : public VelloFineMsaaStepBase<vello_cpp::ShaderStage::FineMsaa16,
311*c8dee2aaSAndroid Build Coastguard Worker                                                                kRGBA_8888_SkColorType,
312*c8dee2aaSAndroid Build Coastguard Worker                                                                vello_cpp::build_mask_lut_16> {
313*c8dee2aaSAndroid Build Coastguard Worker public:
314*c8dee2aaSAndroid Build Coastguard Worker     VelloFineMsaa16Step();
315*c8dee2aaSAndroid Build Coastguard Worker };
316*c8dee2aaSAndroid Build Coastguard Worker 
317*c8dee2aaSAndroid Build Coastguard Worker class VelloFineMsaa16Alpha8Step final
318*c8dee2aaSAndroid Build Coastguard Worker         : public VelloFineMsaaStepBase<vello_cpp::ShaderStage::FineMsaa16R8,
319*c8dee2aaSAndroid Build Coastguard Worker                                        kAlpha_8_SkColorType,
320*c8dee2aaSAndroid Build Coastguard Worker                                        vello_cpp::build_mask_lut_16> {
321*c8dee2aaSAndroid Build Coastguard Worker public:
322*c8dee2aaSAndroid Build Coastguard Worker     VelloFineMsaa16Alpha8Step();
323*c8dee2aaSAndroid Build Coastguard Worker };
324*c8dee2aaSAndroid Build Coastguard Worker 
325*c8dee2aaSAndroid Build Coastguard Worker class VelloFineMsaa8Step final : public VelloFineMsaaStepBase<vello_cpp::ShaderStage::FineMsaa8,
326*c8dee2aaSAndroid Build Coastguard Worker                                                               kRGBA_8888_SkColorType,
327*c8dee2aaSAndroid Build Coastguard Worker                                                               vello_cpp::build_mask_lut_8> {
328*c8dee2aaSAndroid Build Coastguard Worker public:
329*c8dee2aaSAndroid Build Coastguard Worker     VelloFineMsaa8Step();
330*c8dee2aaSAndroid Build Coastguard Worker };
331*c8dee2aaSAndroid Build Coastguard Worker 
332*c8dee2aaSAndroid Build Coastguard Worker class VelloFineMsaa8Alpha8Step final
333*c8dee2aaSAndroid Build Coastguard Worker         : public VelloFineMsaaStepBase<vello_cpp::ShaderStage::FineMsaa8R8,
334*c8dee2aaSAndroid Build Coastguard Worker                                        kAlpha_8_SkColorType,
335*c8dee2aaSAndroid Build Coastguard Worker                                        vello_cpp::build_mask_lut_8> {
336*c8dee2aaSAndroid Build Coastguard Worker public:
337*c8dee2aaSAndroid Build Coastguard Worker     VelloFineMsaa8Alpha8Step();
338*c8dee2aaSAndroid Build Coastguard Worker };
339*c8dee2aaSAndroid Build Coastguard Worker 
340*c8dee2aaSAndroid Build Coastguard Worker }  // namespace skgpu::graphite
341*c8dee2aaSAndroid Build Coastguard Worker 
342*c8dee2aaSAndroid Build Coastguard Worker #endif  // skgpu_graphite_compute_VelloComputeSteps_DEFINED
343