xref: /aosp_15_r20/external/skia/src/gpu/ganesh/tessellate/GrStrokeTessellationShader.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2020 Google LLC.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 #include "src/gpu/ganesh/tessellate/GrStrokeTessellationShader.h"
8 
9 #include "include/core/SkMatrix.h"
10 #include "include/core/SkPaint.h"
11 #include "include/core/SkString.h"
12 #include "include/private/base/SkAssert.h"
13 #include "include/private/base/SkMacros.h"
14 #include "include/private/base/SkPoint_impl.h"
15 #include "include/private/gpu/ganesh/GrTypesPriv.h"
16 #include "src/core/SkSLTypeShared.h"
17 #include "src/gpu/KeyBuilder.h"
18 #include "src/gpu/ganesh/GrGeometryProcessor.h"
19 #include "src/gpu/ganesh/GrShaderCaps.h"
20 #include "src/gpu/ganesh/GrShaderVar.h"
21 #include "src/gpu/ganesh/glsl/GrGLSLFragmentShaderBuilder.h"
22 #include "src/gpu/ganesh/glsl/GrGLSLProgramDataManager.h"
23 #include "src/gpu/ganesh/glsl/GrGLSLUniformHandler.h"
24 #include "src/gpu/ganesh/glsl/GrGLSLVarying.h"
25 #include "src/gpu/ganesh/glsl/GrGLSLVertexGeoBuilder.h"
26 #include "src/gpu/tessellate/FixedCountBufferUtils.h"
27 
28 #include <cmath>
29 #include <cstdint>
30 
31 namespace {
32 
33 // float2 robust_normalize_diff(float2 a, float b) { ... }
34 //
35 // Returns the normalized difference between a and b, i.e. normalize(a - b), with care taken for
36 // if 'a' and/or 'b' have large coordinates.
37 static const char* kRobustNormalizeDiffFn =
38 "float2 robust_normalize_diff(float2 a, float2 b) {"
39     "float2 diff = a - b;"
40     "if (diff == float2(0.0)) {"
41         "return float2(0.0);"
42     "} else {"
43         "float invMag = 1.0 / max(abs(diff.x), abs(diff.y));"
44         "return normalize(invMag * diff);"
45     "}"
46 "}";
47 
48 // float cosine_between_unit_vectors(float2 a, float2 b) { ...
49 //
50 // Returns the cosine of the angle between a and b, assuming a and b are unit vectors already.
51 // Guaranteed to be between [-1, 1].
52 static const char* kCosineBetweenUnitVectorsFn =
53 "float cosine_between_unit_vectors(float2 a, float2 b) {"
54     // Since a and b are assumed to be normalized, the cosine is equal to the dot product, although
55     // we clamp that to ensure it falls within the expected range of [-1, 1].
56     "return clamp(dot(a, b), -1.0, 1.0);"
57 "}"
58 ;
59 
60 
61 // float miter_extent(float cosTheta, float miterLimit) { ...
62 //
63 // Extends the middle radius to either the miter point, or the bevel edge if we surpassed the
64 // miter limit and need to revert to a bevel join.
65 static const char* kMiterExtentFn =
66 "float miter_extent(float cosTheta, float miterLimit) {"
67     "float x = fma(cosTheta, .5, .5);"
68     "return (x * miterLimit * miterLimit >= 1.0) ? inversesqrt(x) : sqrt(x);"
69 "}"
70 ;
71 
72 // float num_radial_segments_per_radian(float approxDevStrokeRadius) { ...
73 //
74 // Returns the number of radial segments required for each radian of rotation, in order for the
75 // curve to appear "smooth" as defined by the approximate device-space stroke radius.
76 static const char* kNumRadialSegmentsPerRadianFn =
77 "float num_radial_segments_per_radian(float approxDevStrokeRadius) {"
78     "return .5 / acos(max(1.0 - (1.0 / PRECISION) / approxDevStrokeRadius, -1.0));"
79 "}";
80 
81 // float<N> unchecked_mix(float<N> a, float<N> b, float<N> T) { ...
82 //
83 // Unlike mix(), this does not return b when t==1. But it otherwise seems to get better
84 // precision than "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
85 // We override this result anyway when t==1 so it shouldn't be a problem.
86 static const char* kUncheckedMixFn =
87 "float unchecked_mix(float a, float b, float T) {"
88     "return fma(b - a, T, a);"
89 "}"
90 "float2 unchecked_mix(float2 a, float2 b, float T) {"
91     "return fma(b - a, float2(T), a);"
92 "}"
93 "float4 unchecked_mix(float4 a, float4 b, float4 T) {"
94     "return fma(b - a, T, a);"
95 "}"
96 ;
97 
98 using skgpu::tess::FixedCountStrokes;
99 
100 } // anonymous namespace
101 
GrStrokeTessellationShader(const GrShaderCaps & shaderCaps,PatchAttribs attribs,const SkMatrix & viewMatrix,const SkStrokeRec & stroke,SkPMColor4f color)102 GrStrokeTessellationShader::GrStrokeTessellationShader(const GrShaderCaps& shaderCaps,
103                                                        PatchAttribs attribs,
104                                                        const SkMatrix& viewMatrix,
105                                                        const SkStrokeRec& stroke,
106                                                        SkPMColor4f color)
107         : GrTessellationShader(kTessellate_GrStrokeTessellationShader_ClassID,
108                                GrPrimitiveType::kTriangleStrip, viewMatrix, color)
109         , fPatchAttribs(attribs | PatchAttribs::kJoinControlPoint)
110         , fStroke(stroke) {
111     // We should use explicit curve type when, and only when, there isn't infinity support.
112     // Otherwise the GPU can infer curve type based on infinity.
113     SkASSERT(shaderCaps.fInfinitySupport != (attribs & PatchAttribs::kExplicitCurveType));
114     // pts 0..3 define the stroke as a cubic bezier. If p3.y is infinity, then it's a conic
115     // with w=p3.x.
116     //
117     // An empty stroke (p0==p1==p2==p3) is a special case that denotes a circle, or
118     // 180-degree point stroke.
119     fAttribs.emplace_back("pts01Attr", kFloat4_GrVertexAttribType, SkSLType::kFloat4);
120     fAttribs.emplace_back("pts23Attr", kFloat4_GrVertexAttribType, SkSLType::kFloat4);
121 
122     // argsAttr contains the lastControlPoint for setting up the join.
123     fAttribs.emplace_back("argsAttr", kFloat2_GrVertexAttribType, SkSLType::kFloat2);
124 
125     if (fPatchAttribs & PatchAttribs::kStrokeParams) {
126         fAttribs.emplace_back("dynamicStrokeAttr", kFloat2_GrVertexAttribType,
127                               SkSLType::kFloat2);
128     }
129     if (fPatchAttribs & PatchAttribs::kColor) {
130         fAttribs.emplace_back("dynamicColorAttr",
131                               (fPatchAttribs & PatchAttribs::kWideColorIfEnabled)
132                                       ? kFloat4_GrVertexAttribType
133                                       : kUByte4_norm_GrVertexAttribType,
134                               SkSLType::kHalf4);
135     }
136     if (fPatchAttribs & PatchAttribs::kExplicitCurveType) {
137         // A conic curve is written out with p3=[w,Infinity], but GPUs that don't support
138         // infinity can't detect this. On these platforms we write out an extra float with each
139         // patch that explicitly tells the shader what type of curve it is.
140         fAttribs.emplace_back("curveTypeAttr", kFloat_GrVertexAttribType, SkSLType::kFloat);
141     }
142 
143     this->setInstanceAttributesWithImplicitOffsets(fAttribs.data(), fAttribs.size());
144     SkASSERT(this->instanceStride() == sizeof(SkPoint) * 4 + PatchAttribsStride(fPatchAttribs));
145     if (!shaderCaps.fVertexIDSupport) {
146         constexpr static Attribute kVertexAttrib("edgeID", kFloat_GrVertexAttribType,
147                                                     SkSLType::kFloat);
148         this->setVertexAttributesWithImplicitOffsets(&kVertexAttrib, 1);
149     }
150     SkASSERT(fAttribs.size() <= kMaxAttribCount);
151 }
152 
153 // This base class emits shader code for our parametric/radial stroke tessellation algorithm
154 // described above. The subclass emits its own specific setup code before calling into
155 // emitTessellationCode and emitFragment code.
156 class GrStrokeTessellationShader::Impl : public ProgramImpl {
157     void onEmitCode(EmitArgs&, GrGPArgs*) override;
158 
159     // Emits code that calculates the vertex position and any other inputs to the fragment shader.
160     // The onEmitCode() is responsible to define the following symbols before calling this method:
161     //
162     //     // Functions.
163     //     float2 unchecked_mix(float2, float2, float);
164     //     float unchecked_mix(float, float, float);
165     //
166     //     // Values provided by either uniforms or attribs.
167     //     float2 p0, p1, p2, p3;
168     //     float w;
169     //     float STROKE_RADIUS;
170     //     float 2x2 AFFINE_MATRIX;
171     //     float2 TRANSLATE;
172     //
173     //     // Values calculated by the specific subclass.
174     //     float combinedEdgeID;
175     //     bool isFinalEdge;
176     //     float numParametricSegments;
177     //     float radsPerSegment;
178     //     float2 tan0; // Must be pre-normalized
179     //     float2 tan1; // Must be pre-normalized
180     //     float strokeOutset;
181     //
182     void emitTessellationCode(const GrStrokeTessellationShader& shader, SkString* code,
183                               GrGPArgs* gpArgs, const GrShaderCaps& shaderCaps) const;
184 
185     // Emits all necessary fragment code. If using dynamic color, the impl is responsible to set up
186     // a half4 varying for color and provide its name in 'fDynamicColorName'.
187     void emitFragmentCode(const GrStrokeTessellationShader&, const EmitArgs&);
188 
189     void setData(const GrGLSLProgramDataManager& pdman, const GrShaderCaps&,
190                  const GrGeometryProcessor&) final;
191 
192     GrGLSLUniformHandler::UniformHandle fTessControlArgsUniform;
193     GrGLSLUniformHandler::UniformHandle fTranslateUniform;
194     GrGLSLUniformHandler::UniformHandle fAffineMatrixUniform;
195     GrGLSLUniformHandler::UniformHandle fColorUniform;
196     SkString fDynamicColorName;
197 };
198 
onEmitCode(EmitArgs & args,GrGPArgs * gpArgs)199 void GrStrokeTessellationShader::Impl::onEmitCode(EmitArgs& args, GrGPArgs* gpArgs) {
200     const auto& shader = args.fGeomProc.cast<GrStrokeTessellationShader>();
201     SkPaint::Join joinType = shader.stroke().getJoin();
202     args.fVaryingHandler->emitAttributes(shader);
203 
204     args.fVertBuilder->defineConstant("float", "PI", "3.141592653589793238");
205     args.fVertBuilder->defineConstant("PRECISION", skgpu::tess::kPrecision);
206     // There is an artificial maximum number of edges (compared to the max limit calculated based on
207     // the number of radial segments per radian, Wang's formula, and join type). When there is
208     // vertex ID support, the limit is what can be represented in a uint16; otherwise the limit is
209     // the size of the fallback vertex buffer.
210     float maxEdges = args.fShaderCaps->fVertexIDSupport ? FixedCountStrokes::kMaxEdges
211                                                         : FixedCountStrokes::kMaxEdgesNoVertexIDs;
212     args.fVertBuilder->defineConstant("NUM_TOTAL_EDGES", maxEdges);
213 
214     // Helper functions.
215     if (shader.hasDynamicStroke()) {
216         args.fVertBuilder->insertFunction(kNumRadialSegmentsPerRadianFn);
217     }
218     args.fVertBuilder->insertFunction(kRobustNormalizeDiffFn);
219     args.fVertBuilder->insertFunction(kCosineBetweenUnitVectorsFn);
220     args.fVertBuilder->insertFunction(kMiterExtentFn);
221     args.fVertBuilder->insertFunction(kUncheckedMixFn);
222     args.fVertBuilder->insertFunction(GrTessellationShader::WangsFormulaSkSL());
223 
224     // Tessellation control uniforms and/or dynamic attributes.
225     if (!shader.hasDynamicStroke()) {
226         // [NUM_RADIAL_SEGMENTS_PER_RADIAN, JOIN_TYPE, STROKE_RADIUS]
227         const char* tessArgsName;
228         fTessControlArgsUniform = args.fUniformHandler->addUniform(
229                 nullptr, kVertex_GrShaderFlag, SkSLType::kFloat3, "tessControlArgs",
230                 &tessArgsName);
231         args.fVertBuilder->codeAppendf(
232         "float NUM_RADIAL_SEGMENTS_PER_RADIAN = %s.x;"
233         "float JOIN_TYPE = %s.y;"
234         "float STROKE_RADIUS = %s.z;", tessArgsName, tessArgsName, tessArgsName);
235     } else {
236         // The shader does not currently support dynamic hairlines, so this case only needs to
237         // configure NUM_RADIAL_SEGMENTS_PER_RADIAN based on the fixed maxScale and per-instance
238         // stroke radius attribute that's defined in local space.
239         SkASSERT(!shader.stroke().isHairlineStyle());
240         const char* maxScaleName;
241         fTessControlArgsUniform = args.fUniformHandler->addUniform(
242                 nullptr, kVertex_GrShaderFlag, SkSLType::kFloat, "maxScale",
243                 &maxScaleName);
244         args.fVertBuilder->codeAppendf(
245         "float STROKE_RADIUS = dynamicStrokeAttr.x;"
246         "float JOIN_TYPE = dynamicStrokeAttr.y;"
247         "float NUM_RADIAL_SEGMENTS_PER_RADIAN = num_radial_segments_per_radian("
248                 "%s * STROKE_RADIUS);", maxScaleName);
249 
250     }
251 
252     if (shader.hasDynamicColor()) {
253         // Create a varying for color to get passed in through.
254         GrGLSLVarying dynamicColor{SkSLType::kHalf4};
255         args.fVaryingHandler->addVarying("dynamicColor", &dynamicColor);
256         args.fVertBuilder->codeAppendf("%s = dynamicColorAttr;", dynamicColor.vsOut());
257         fDynamicColorName = dynamicColor.fsIn();
258     }
259 
260     // View matrix uniforms.
261     const char* translateName, *affineMatrixName;
262     fAffineMatrixUniform = args.fUniformHandler->addUniform(nullptr, kVertex_GrShaderFlag,
263                                                             SkSLType::kFloat4, "affineMatrix",
264                                                             &affineMatrixName);
265     fTranslateUniform = args.fUniformHandler->addUniform(nullptr, kVertex_GrShaderFlag,
266                                                          SkSLType::kFloat2, "translate",
267                                                          &translateName);
268     args.fVertBuilder->codeAppendf("float2x2 AFFINE_MATRIX = float2x2(%s.xy, %s.zw);\n",
269                                    affineMatrixName, affineMatrixName);
270     args.fVertBuilder->codeAppendf("float2 TRANSLATE = %s;\n", translateName);
271 
272     if (shader.hasExplicitCurveType()) {
273         args.fVertBuilder->insertFunction(SkStringPrintf(
274         "bool is_conic_curve() { return curveTypeAttr != %g; }",
275             skgpu::tess::kCubicCurveType).c_str());
276     } else {
277         args.fVertBuilder->insertFunction(
278         "bool is_conic_curve() { return isinf(pts23Attr.w); }");
279     }
280 
281     // Tessellation code.
282     args.fVertBuilder->codeAppend(
283     "float2 p0=pts01Attr.xy, p1=pts01Attr.zw, p2=pts23Attr.xy, p3=pts23Attr.zw;"
284     "float2 lastControlPoint = argsAttr.xy;"
285     "float w = -1;"  // w<0 means the curve is an integral cubic.
286     "if (is_conic_curve()) {"
287         // Conics are 3 points, with the weight in p3.
288         "w = p3.x;"
289         "p3 = p2;"  // Setting p3 equal to p2 works for the remaining rotational logic.
290     "}"
291     );
292 
293     // Emit code to call Wang's formula to determine parametric segments. We do this before
294     // transform points for hairlines so that it is consistent with how the CPU tested the control
295     // points for chopping.
296     args.fVertBuilder->codeAppend(
297     // Find how many parametric segments this stroke requires.
298     "float numParametricSegments;"
299     "if (w < 0) {"
300         "if (p0 == p1 && p2 == p3) {"
301             "numParametricSegments = 1;" // a line
302         "} else {"
303             "numParametricSegments = wangs_formula_cubic(PRECISION, p0, p1, p2, p3, AFFINE_MATRIX);"
304         "}"
305     "} else {"
306         "numParametricSegments = wangs_formula_conic(PRECISION,"
307                                                     "AFFINE_MATRIX * p0,"
308                                                     "AFFINE_MATRIX * p1,"
309                                                     "AFFINE_MATRIX * p2, w);"
310     "}"
311     );
312 
313     if (shader.stroke().isHairlineStyle()) {
314         // Hairline case. Transform the points before tessellation. We can still hold off on the
315         // translate until the end; we just need to perform the scale and skew right now.
316         args.fVertBuilder->codeAppend(
317         "p0 = AFFINE_MATRIX * p0;"
318         "p1 = AFFINE_MATRIX * p1;"
319         "p2 = AFFINE_MATRIX * p2;"
320         "p3 = AFFINE_MATRIX * p3;"
321         "lastControlPoint = AFFINE_MATRIX * lastControlPoint;"
322         );
323     }
324 
325     args.fVertBuilder->codeAppend(
326     // Find the starting and ending tangents.
327     "float2 tan0 = robust_normalize_diff((p0 == p1) ? ((p1 == p2) ? p3 : p2) : p1, p0);"
328     "float2 tan1 = robust_normalize_diff(p3, (p3 == p2) ? ((p2 == p1) ? p0 : p1) : p2);"
329     "if (tan0 == float2(0)) {"
330         // The stroke is a point. This special case tells us to draw a stroke-width circle as a
331         // 180 degree point stroke instead.
332         "tan0 = float2(1,0);"
333         "tan1 = float2(-1,0);"
334     "}"
335     );
336 
337     if (args.fShaderCaps->fVertexIDSupport) {
338         // If we don't have sk_VertexID support then "edgeID" already came in as a vertex attrib.
339         args.fVertBuilder->codeAppend(
340         "float edgeID = float(sk_VertexID >> 1);"
341         "if ((sk_VertexID & 1) != 0) {"
342             "edgeID = -edgeID;"
343         "}"
344         );
345     }
346 
347     // Potential optimization: (shader.hasDynamicStroke() && shader.hasRoundJoins())?
348     if (shader.stroke().getJoin() == SkPaint::kRound_Join || shader.hasDynamicStroke()) {
349         args.fVertBuilder->codeAppend(
350         // Determine how many edges to give to the round join. We emit the first and final edges
351         // of the join twice: once full width and once restricted to half width. This guarantees
352         // perfect seaming by matching the vertices from the join as well as from the strokes on
353         // either side.
354         "float2 prevTan = robust_normalize_diff(p0, lastControlPoint);"
355         "float joinRads = acos(cosine_between_unit_vectors(prevTan, tan0));"
356         "float numRadialSegmentsInJoin = max(ceil(joinRads * NUM_RADIAL_SEGMENTS_PER_RADIAN), 1);"
357         // +2 because we emit the beginning and ending edges twice (see above comment).
358         "float numEdgesInJoin = numRadialSegmentsInJoin + 2;"
359         // The stroke section needs at least two edges. Don't assign more to the join than
360         // "NUM_TOTAL_EDGES - 2". (This is only relevant when the ideal max edge count calculated
361         // on the CPU had to be limited to NUM_TOTAL_EDGES in the draw call).
362         "numEdgesInJoin = min(numEdgesInJoin, NUM_TOTAL_EDGES - 2);");
363         if (shader.hasDynamicStroke()) {
364             args.fVertBuilder->codeAppend(
365             "if (JOIN_TYPE >= 0) {" // Is the join not a round type?
366                 // Bevel and miter joins get 1 and 2 segments respectively.
367                 // +2 because we emit the beginning and ending edges twice (see above comments).
368                 "numEdgesInJoin = sign(JOIN_TYPE) + 1 + 2;"
369             "}");
370         }
371     } else {
372         args.fVertBuilder->codeAppendf("float numEdgesInJoin = %i;",
373         skgpu::tess::NumFixedEdgesInJoin(joinType));
374     }
375 
376     args.fVertBuilder->codeAppend(
377     // Find which direction the curve turns.
378     // NOTE: Since the curve is not allowed to inflect, we can just check F'(.5) x F''(.5).
379     // NOTE: F'(.5) x F''(.5) has the same sign as (P2 - P0) x (P3 - P1)
380     "float turn = cross_length_2d(p2 - p0, p3 - p1);"
381     "float combinedEdgeID = abs(edgeID) - numEdgesInJoin;"
382     "if (combinedEdgeID < 0) {"
383         "tan1 = tan0;"
384         // Don't let tan0 become zero. The code as-is isn't built to handle that case. tan0=0
385         // means the join is disabled, and to disable it with the existing code we can leave
386         // tan0 equal to tan1.
387         "if (lastControlPoint != p0) {"
388             "tan0 = robust_normalize_diff(p0, lastControlPoint);"
389         "}"
390         "turn = cross_length_2d(tan0, tan1);"
391     "}"
392 
393     // Calculate the curve's starting angle and rotation.
394     "float cosTheta = cosine_between_unit_vectors(tan0, tan1);"
395     "float rotation = acos(cosTheta);"
396     "if (turn < 0) {"
397         // Adjust sign of rotation to match the direction the curve turns.
398         "rotation = -rotation;"
399     "}"
400 
401     "float numRadialSegments;"
402     "float strokeOutset = sign(edgeID);"
403     "if (combinedEdgeID < 0) {"
404         // We belong to the preceding join. The first and final edges get duplicated, so we only
405         // have "numEdgesInJoin - 2" segments.
406         "numRadialSegments = numEdgesInJoin - 2;"
407         "numParametricSegments = 1;"  // Joins don't have parametric segments.
408         "p3 = p2 = p1 = p0;"  // Colocate all points on the junction point.
409         // Shift combinedEdgeID to the range [-1, numRadialSegments]. This duplicates the first
410         // edge and lands one edge at the very end of the join. (The duplicated final edge will
411         // actually come from the section of our strip that belongs to the stroke.)
412         "combinedEdgeID += numRadialSegments + 1;"
413         // We normally restrict the join on one side of the junction, but if the tangents are
414         // nearly equivalent this could theoretically result in bad seaming and/or cracks on the
415         // side we don't put it on. If the tangents are nearly equivalent then we leave the join
416         // double-sided.
417        " float sinEpsilon = 1e-2;"  // ~= sin(180deg / 3000)
418         "bool tangentsNearlyParallel ="
419                 "(abs(turn) * inversesqrt(dot(tan0, tan0) * dot(tan1, tan1))) < sinEpsilon;"
420         "if (!tangentsNearlyParallel || dot(tan0, tan1) < 0) {"
421             // There are two edges colocated at the beginning. Leave the first one double sided
422             // for seaming with the previous stroke. (The double sided edge at the end will
423             // actually come from the section of our strip that belongs to the stroke.)
424             "if (combinedEdgeID >= 0) {"
425                 "strokeOutset = (turn < 0) ? min(strokeOutset, 0) : max(strokeOutset, 0);"
426             "}"
427         "}"
428         "combinedEdgeID = max(combinedEdgeID, 0);"
429     "} else {"
430         // We belong to the stroke. Unless NUM_RADIAL_SEGMENTS_PER_RADIAN is incredibly high,
431         // clamping to maxCombinedSegments will be a no-op because the draw call was invoked with
432         // sufficient vertices to cover the worst case scenario of 180 degree rotation.
433         "float maxCombinedSegments = NUM_TOTAL_EDGES - numEdgesInJoin - 1;"
434         "numRadialSegments = max(ceil(abs(rotation) * NUM_RADIAL_SEGMENTS_PER_RADIAN), 1);"
435         "numRadialSegments = min(numRadialSegments, maxCombinedSegments);"
436         "numParametricSegments = min(numParametricSegments,"
437                                     "maxCombinedSegments - numRadialSegments + 1);"
438     "}"
439 
440     // Additional parameters for emitTessellationCode().
441     "float radsPerSegment = rotation / numRadialSegments;"
442     "float numCombinedSegments = numParametricSegments + numRadialSegments - 1;"
443     "bool isFinalEdge = (combinedEdgeID >= numCombinedSegments);"
444     "if (combinedEdgeID > numCombinedSegments) {"
445         "strokeOutset = 0;"  // The strip has more edges than we need. Drop this one.
446     "}");
447 
448     if (joinType == SkPaint::kMiter_Join || shader.hasDynamicStroke()) {
449         args.fVertBuilder->codeAppendf(
450         // Edge #2 extends to the miter point.
451         "if (abs(edgeID) == 2 && %s) {"
452             "strokeOutset *= miter_extent(cosTheta, JOIN_TYPE);" // miterLimit
453         "}", shader.hasDynamicStroke() ? "JOIN_TYPE > 0" /*Is the join a miter type?*/ : "true");
454     }
455 
456     this->emitTessellationCode(shader, &args.fVertBuilder->code(), gpArgs, *args.fShaderCaps);
457 
458     this->emitFragmentCode(shader, args);
459 }
460 
emitTessellationCode(const GrStrokeTessellationShader & shader,SkString * code,GrGPArgs * gpArgs,const GrShaderCaps & shaderCaps) const461 void GrStrokeTessellationShader::Impl::emitTessellationCode(
462         const GrStrokeTessellationShader& shader, SkString* code, GrGPArgs* gpArgs,
463         const GrShaderCaps& shaderCaps) const {
464     // The subclass is responsible to define the following symbols before calling this method:
465     //
466     //     // Functions.
467     //     float2 unchecked_mix(float2, float2, float);
468     //     float unchecked_mix(float, float, float);
469     //
470     //     // Values provided by either uniforms or attribs.
471     //     float2 p0, p1, p2, p3;
472     //     float w;
473     //     float STROKE_RADIUS;
474     //     float 2x2 AFFINE_MATRIX;
475     //     float2 TRANSLATE;
476     //
477     //     // Values calculated by the specific subclass.
478     //     float combinedEdgeID;
479     //     bool isFinalEdge;
480     //     float numParametricSegments;
481     //     float radsPerSegment;
482     //     float2 tan0; // Must be pre-normalized
483     //     float2 tan1; // Must be pre-normalized
484     //     float strokeOutset;
485     //
486     code->appendf(
487     "float2 tangent, strokeCoord;"
488     "if (combinedEdgeID != 0 && !isFinalEdge) {"
489         // Compute the location and tangent direction of the stroke edge with the integral id
490         // "combinedEdgeID", where combinedEdgeID is the sorted-order index of parametric and radial
491         // edges. Start by finding the tangent function's power basis coefficients. These define a
492         // tangent direction (scaled by some uniform value) as:
493         //                                                 |T^2|
494         //     Tangent_Direction(T) = dx,dy = |A  2B  C| * |T  |
495         //                                    |.   .  .|   |1  |
496         "float2 A, B, C = p1 - p0;"
497         "float2 D = p3 - p0;"
498         "if (w >= 0.0) {"
499             // P0..P2 represent a conic and P3==P2. The derivative of a conic has a cumbersome
500             // order-4 denominator. However, this isn't necessary if we are only interested in a
501             // vector in the same *direction* as a given tangent line. Since the denominator scales
502             // dx and dy uniformly, we can throw it out completely after evaluating the derivative
503             // with the standard quotient rule. This leaves us with a simpler quadratic function
504             // that we use to find a tangent.
505             "C *= w;"
506             "B = .5*D - C;"
507             "A = (w - 1.0) * D;"
508             "p1 *= w;"
509         "} else {"
510             "float2 E = p2 - p1;"
511             "B = E - C;"
512             "A = fma(float2(-3), E, D);"
513         "}"
514         // FIXME(crbug.com/800804,skbug.com/11268): Consider normalizing the exponents in A,B,C at
515         // this point in order to prevent fp32 overflow.
516 
517         // Now find the coefficients that give a tangent direction from a parametric edge ID:
518         //
519         //                                                                 |parametricEdgeID^2|
520         //     Tangent_Direction(parametricEdgeID) = dx,dy = |A  B_  C_| * |parametricEdgeID  |
521         //                                                   |.   .   .|   |1                 |
522         //
523         "float2 B_ = B * (numParametricSegments * 2.0);"
524         "float2 C_ = C * (numParametricSegments * numParametricSegments);"
525 
526         // Run a binary search to determine the highest parametric edge that is located on or before
527         // the combinedEdgeID. A combined ID is determined by the sum of complete parametric and
528         // radial segments behind it. i.e., find the highest parametric edge where:
529         //
530         //    parametricEdgeID + floor(numRadialSegmentsAtParametricT) <= combinedEdgeID
531         //
532         "float lastParametricEdgeID = 0.0;"
533         "float maxParametricEdgeID = min(numParametricSegments - 1.0, combinedEdgeID);"
534         "float negAbsRadsPerSegment = -abs(radsPerSegment);"
535         "float maxRotation0 = (1.0 + combinedEdgeID) * abs(radsPerSegment);"
536         "for (int exp = %i - 1; exp >= 0; --exp) {"
537             // Test the parametric edge at lastParametricEdgeID + 2^exp.
538             "float testParametricID = lastParametricEdgeID + exp2(float(exp));"
539             "if (testParametricID <= maxParametricEdgeID) {"
540                 "float2 testTan = fma(float2(testParametricID), A, B_);"
541                 "testTan = fma(float2(testParametricID), testTan, C_);"
542                 "float cosRotation = dot(normalize(testTan), tan0);"
543                 "float maxRotation = fma(testParametricID, negAbsRadsPerSegment, maxRotation0);"
544                 "maxRotation = min(maxRotation, PI);"
545                 // Is rotation <= maxRotation? (i.e., is the number of complete radial segments
546                 // behind testT, + testParametricID <= combinedEdgeID?)
547                 "if (cosRotation >= cos(maxRotation)) {"
548                     // testParametricID is on or before the combinedEdgeID. Keep it!
549                     "lastParametricEdgeID = testParametricID;"
550                 "}"
551             "}"
552         "}"
553 
554         // Find the T value of the parametric edge at lastParametricEdgeID.
555         "float parametricT = lastParametricEdgeID / numParametricSegments;"
556 
557         // Now that we've identified the highest parametric edge on or before the
558         // combinedEdgeID, the highest radial edge is easy:
559         "float lastRadialEdgeID = combinedEdgeID - lastParametricEdgeID;"
560 
561         // Find the angle of tan0, i.e. the angle between tan0 and the positive x axis.
562         "float angle0 = acos(clamp(tan0.x, -1.0, 1.0));"
563         "angle0 = tan0.y >= 0.0 ? angle0 : -angle0;"
564 
565         // Find the tangent vector on the edge at lastRadialEdgeID. By construction it is already
566         // normalized.
567         "float radialAngle = fma(lastRadialEdgeID, radsPerSegment, angle0);"
568         "tangent = float2(cos(radialAngle), sin(radialAngle));"
569         "float2 norm = float2(-tangent.y, tangent.x);"
570 
571         // Find the T value where the tangent is orthogonal to norm. This is a quadratic:
572         //
573         //     dot(norm, Tangent_Direction(T)) == 0
574         //
575         //                         |T^2|
576         //     norm * |A  2B  C| * |T  | == 0
577         //            |.   .  .|   |1  |
578         //
579         "float a=dot(norm,A), b_over_2=dot(norm,B), c=dot(norm,C);"
580         "float discr_over_4 = max(b_over_2*b_over_2 - a*c, 0.0);"
581         "float q = sqrt(discr_over_4);"
582         "if (b_over_2 > 0.0) {"
583             "q = -q;"
584         "}"
585         "q -= b_over_2;"
586 
587         // Roots are q/a and c/q. Since each curve section does not inflect or rotate more than 180
588         // degrees, there can only be one tangent orthogonal to "norm" inside 0..1. Pick the root
589         // nearest .5.
590         "float _5qa = -.5*q*a;"
591         "float2 root = (abs(fma(q,q,_5qa)) < abs(fma(a,c,_5qa))) ? float2(q,a) : float2(c,q);"
592         "float radialT = (root.t != 0.0) ? root.s / root.t : 0.0;"
593         "radialT = clamp(radialT, 0.0, 1.0);"
594 
595         "if (lastRadialEdgeID == 0.0) {"
596             // The root finder above can become unstable when lastRadialEdgeID == 0 (e.g., if
597             // there are roots at exatly 0 and 1 both). radialT should always == 0 in this case.
598             "radialT = 0.0;"
599         "}"
600 
601         // Now that we've identified the T values of the last parametric and radial edges, our final
602         // T value for combinedEdgeID is whichever is larger.
603         "float T = max(parametricT, radialT);"
604 
605         // Evaluate the cubic at T. Use De Casteljau's for its accuracy and stability.
606         "float2 ab = unchecked_mix(p0, p1, T);"
607         "float2 bc = unchecked_mix(p1, p2, T);"
608         "float2 cd = unchecked_mix(p2, p3, T);"
609         "float2 abc = unchecked_mix(ab, bc, T);"
610         "float2 bcd = unchecked_mix(bc, cd, T);"
611         "float2 abcd = unchecked_mix(abc, bcd, T);"
612 
613         // Evaluate the conic weight at T.
614         "float u = unchecked_mix(1.0, w, T);"
615         "float v = w + 1 - u;"  // == mix(w, 1, T)
616         "float uv = unchecked_mix(u, v, T);"
617 
618         // If we went with T=parametricT, then update the tangent. Otherwise leave it at the radial
619         // tangent found previously. (In the event that parametricT == radialT, we keep the radial
620         // tangent.)
621         "if (T != radialT) {"
622             // We must re-normalize here because the tangent is determined by the curve coefficients
623             "tangent = w >= 0.0 ? robust_normalize_diff(bc*u, ab*v)"
624                                ": robust_normalize_diff(bcd, abc);"
625         "}"
626 
627         "strokeCoord = (w >= 0.0) ? abc/uv : abcd;"
628     "} else {"
629         // Edges at the beginning and end of the strip use exact endpoints and tangents. This
630         // ensures crack-free seaming between instances.
631         "tangent = (combinedEdgeID == 0) ? tan0 : tan1;"
632         "strokeCoord = (combinedEdgeID == 0) ? p0 : p3;"
633     "}", skgpu::tess::kMaxResolveLevel /* Parametric/radial sort loop count. */);
634 
635     code->append(
636     // At this point 'tangent' is normalized, so the orthogonal vector is also normalized.
637     "float2 ortho = float2(tangent.y, -tangent.x);"
638     "strokeCoord += ortho * (STROKE_RADIUS * strokeOutset);");
639 
640     if (!shader.stroke().isHairlineStyle()) {
641         // Normal case. Do the transform after tessellation.
642         code->append("float2 devCoord = AFFINE_MATRIX * strokeCoord + TRANSLATE;");
643         gpArgs->fPositionVar.set(SkSLType::kFloat2, "devCoord");
644         gpArgs->fLocalCoordVar.set(SkSLType::kFloat2, "strokeCoord");
645     } else {
646         // Hairline case. The scale and skew already happened before tessellation.
647         code->append(
648         "float2 devCoord = strokeCoord + TRANSLATE;"
649         "float2 localCoord = inverse(AFFINE_MATRIX) * strokeCoord;");
650         gpArgs->fPositionVar.set(SkSLType::kFloat2, "devCoord");
651         gpArgs->fLocalCoordVar.set(SkSLType::kFloat2, "localCoord");
652     }
653 }
654 
emitFragmentCode(const GrStrokeTessellationShader & shader,const EmitArgs & args)655 void GrStrokeTessellationShader::Impl::emitFragmentCode(const GrStrokeTessellationShader& shader,
656                                                         const EmitArgs& args) {
657     if (!shader.hasDynamicColor()) {
658         // The fragment shader just outputs a uniform color.
659         const char* colorUniformName;
660         fColorUniform = args.fUniformHandler->addUniform(nullptr, kFragment_GrShaderFlag,
661                                                          SkSLType::kHalf4, "color",
662                                                          &colorUniformName);
663         args.fFragBuilder->codeAppendf("half4 %s = %s;", args.fOutputColor, colorUniformName);
664     } else {
665         args.fFragBuilder->codeAppendf("half4 %s = %s;", args.fOutputColor,
666                                        fDynamicColorName.c_str());
667     }
668     args.fFragBuilder->codeAppendf("const half4 %s = half4(1);", args.fOutputCoverage);
669 }
670 
setData(const GrGLSLProgramDataManager & pdman,const GrShaderCaps &,const GrGeometryProcessor & geomProc)671 void GrStrokeTessellationShader::Impl::setData(const GrGLSLProgramDataManager& pdman,
672                                                const GrShaderCaps&,
673                                                const GrGeometryProcessor& geomProc) {
674     const auto& shader = geomProc.cast<GrStrokeTessellationShader>();
675     const auto& stroke = shader.stroke();
676 
677     // getMaxScale() returns -1 if it can't compute a scale factor (e.g. perspective), taking the
678     // absolute value automatically converts that to an identity scale factor for our purposes.
679     const float maxScale = std::abs(shader.viewMatrix().getMaxScale());
680     if (!shader.hasDynamicStroke()) {
681         // Set up the tessellation control uniforms. In the hairline case we transform prior to
682         // tessellation, so it will be defined in device space units instead of local units.
683         const float strokeRadius = 0.5f * (stroke.isHairlineStyle() ? 1.f : stroke.getWidth());
684         float numRadialSegmentsPerRadian = skgpu::tess::CalcNumRadialSegmentsPerRadian(
685                 (stroke.isHairlineStyle() ? 1.f : maxScale) * strokeRadius);
686 
687         pdman.set3f(fTessControlArgsUniform,
688                     numRadialSegmentsPerRadian,  // NUM_RADIAL_SEGMENTS_PER_RADIAN
689                     skgpu::tess::GetJoinType(stroke),  // JOIN_TYPE
690                     strokeRadius);  // STROKE_RADIUS
691     } else {
692         SkASSERT(!stroke.isHairlineStyle());
693         pdman.set1f(fTessControlArgsUniform, maxScale);
694     }
695 
696     // Set up the view matrix, if any.
697     const SkMatrix& m = shader.viewMatrix();
698     pdman.set2f(fTranslateUniform, m.getTranslateX(), m.getTranslateY());
699     pdman.set4f(fAffineMatrixUniform, m.getScaleX(), m.getSkewY(), m.getSkewX(),
700                 m.getScaleY());
701 
702     if (!shader.hasDynamicColor()) {
703         pdman.set4fv(fColorUniform, 1, shader.color().vec());
704     }
705 }
706 
addToKey(const GrShaderCaps &,skgpu::KeyBuilder * b) const707 void GrStrokeTessellationShader::addToKey(const GrShaderCaps&, skgpu::KeyBuilder* b) const {
708     bool keyNeedsJoin = !(fPatchAttribs & PatchAttribs::kStrokeParams);
709     SkASSERT(fStroke.getJoin() >> 2 == 0);
710     // Attribs get worked into the key automatically during GrGeometryProcessor::getAttributeKey().
711     // When color is in a uniform, it's always wide. kWideColor doesn't need to be considered here.
712     uint32_t key = (uint32_t)(fPatchAttribs & ~PatchAttribs::kColor);
713     key = (key << 2) | ((keyNeedsJoin) ? fStroke.getJoin() : 0);
714     key = (key << 1) | (uint32_t)fStroke.isHairlineStyle();
715     b->add32(key);
716 }
717 
makeProgramImpl(const GrShaderCaps &) const718 std::unique_ptr<GrGeometryProcessor::ProgramImpl> GrStrokeTessellationShader::makeProgramImpl(
719         const GrShaderCaps&) const {
720     return std::make_unique<Impl>();
721 }
722