xref: /aosp_15_r20/external/skia/src/gpu/graphite/render/PerEdgeAAQuadRenderStep.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2023 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/gpu/graphite/render/PerEdgeAAQuadRenderStep.h"
9 
10 #include "include/core/SkM44.h"
11 #include "include/private/base/SkAssert.h"
12 #include "include/private/base/SkDebug.h"
13 #include "include/private/base/SkFloatingPoint.h"
14 #include "src/base/SkEnumBitMask.h"
15 #include "src/base/SkVx.h"
16 #include "src/core/SkSLTypeShared.h"
17 #include "src/gpu/BufferWriter.h"
18 #include "src/gpu/graphite/Attribute.h"
19 #include "src/gpu/graphite/BufferManager.h"
20 #include "src/gpu/graphite/DrawOrder.h"
21 #include "src/gpu/graphite/DrawParams.h"
22 #include "src/gpu/graphite/DrawTypes.h"
23 #include "src/gpu/graphite/DrawWriter.h"
24 #include "src/gpu/graphite/geom/EdgeAAQuad.h"
25 #include "src/gpu/graphite/geom/Geometry.h"
26 #include "src/gpu/graphite/geom/Rect.h"
27 #include "src/gpu/graphite/geom/Transform_graphite.h"
28 #include "src/gpu/graphite/render/CommonDepthStencilSettings.h"
29 
30 #include <cstdint>
31 #include <string_view>
32 
33 // This RenderStep is specialized to draw filled rectangles with per-edge AA.
34 //
35 // Each of these "primitives" is represented by a single instance. The instance attributes are
36 // flexible enough to describe per-edge AA quads without relying on uniforms to define its
37 // operation. The attributes encode shape as follows:
38 //
39 // float4 edgeFlags - per-edge AA defined by each component: aa != 0.
40 // float4 quadXs - these values provide the X coordinates of the quadrilateral in top-left CW order.
41 // float4 quadYs - these values provide the Y coordinates of the quadrilateral.
42 //
43 // From the other direction, per-edge AA quads produce instance values like:
44 //  - [aa(t,r,b,l) ? 255 : 0]   [xs(tl,tr,br,bl)]     [ys(tl,tr,br,bl)]
45 //
46 // From this encoding, data can be unpacked for each corner, which are equivalent under
47 // rotational symmetry. Per-edge quads are always mitered and fill the interior, but the
48 // vertices are placed such that the edge coverage ramps can collapse to 0 area on non-AA edges.
49 //
50 // The vertices that describe each corner are placed so that edges and miters calculate
51 // coverage by interpolating a varying and then clamping in the fragment shader. Triangles that
52 // cover the inner and outer curves calculate distance to the curve within the fragment shader.
53 //
54 // See https://docs.google.com/presentation/d/1MCPstNsSlDBhR8CrsJo0r-cZNbu-sEJEvU9W94GOJoY/edit?usp=sharing
55 // for diagrams and explanation of how the geometry is defined.
56 //
57 // PerEdgeAAQuadRenderStep uses the common technique of approximating distance to the level set by
58 // one expansion of the Taylor's series for the level set's equation. Given a level set function
59 // C(x,y), this amounts to calculating C(px,py)/|∇C(px,py)|. For the straight edges the level set
60 // is linear and calculated in the vertex shader and then interpolated exactly over the rectangle.
61 // This provides distances to all four exterior edges within the fragment shader and allows it to
62 // reconstruct a relative position per elliptical corner. Unfortunately this requires the fragment
63 // shader to calculate the length of the gradient for straight edges instead of interpolating
64 // exact device-space distance.
65 //
66 // Unlike AnalyticRRectRenderStep, for per-edge AA quads it's valid to have each pixel calculate a
67 // single corner's coverage that's controlled via the vertex shader. Any bias is a constant 1/2,
68 // so this is also added in the vertex shader.
69 //
70 // Analytic derivatives are used so that a single pipeline can be used regardless of HW derivative
71 // support or for geometry that would prove difficult for forward differencing. The device-space
72 // gradient for ellipses is calculated per-pixel by transforming a per-pixel local gradient vector
73 // with the Jacobian of the inverse local-to-device transform:
74 //
75 // (px,py) is the projected point of (u,v) transformed by a 3x3 matrix, M:
76 //                [x(u,v) / w(u,v)]       [x]   [m00 m01 m02] [u]
77 //      (px,py) = [y(u,v) / w(u,v)] where [y] = [m10 m11 m12]X[v] = M*(u,v,1)
78 //                                        [w]   [m20 m21 m22] [1]
79 //
80 // C(px,py) can be defined in terms of a local Cl(u,v) as C(px,py) = Cl(p^-1(px,py)), where p^-1 =
81 //
82 //               [x'(px,py) / w'(px,py)]       [x']   [m00' m01' * m02'] [px]
83 //      (u,v) =  [y'(px,py) / w'(px,py)] where [y'] = [m10' m11' * m12']X[py] = M^-1*(px,py,0,1)
84 //                                             [w']   [m20' m21' * m22'] [ 1]
85 //
86 // Note that if the 3x3 M was arrived by dropping the 3rd row and column from a 4x4 since we assume
87 // a local 3rd coordinate of 0, M^-1 is not equal to the 4x4 inverse with dropped rows and columns.
88 //
89 // Using the chain rule, then ∇C(px,py)
90 //   =  ∇Cl(u,v)X[1/w'(px,py)     0       -x'(px,py)/w'(px,py)^2] [m00' m01']
91 //               [    0       1/w'(px,py) -y'(px,py)/w'(px,py)^2]X[m10' m11']
92 //                                                                [m20' m21']
93 //
94 //   = 1/w'(px,py)*∇Cl(u,v)X[1 0 -x'(px,py)/w'(px,py)] [m00' m01']
95 //                          [0 1 -y'(px,py)/w'(px,py)]X[m10' m11']
96 //                                                     [m20' m21']
97 //
98 //   = w(u,v)*∇Cl(u,v)X[1 0 0 -u] [m00' m01']
99 //                     [0 1 0 -v]X[m10' m11']
100 //                                [m20' m21']
101 //
102 //   = w(u,v)*∇Cl(u,v)X[m00'-m20'u m01'-m21'u]
103 //                     [m10'-m20'v m11'-m21'v]
104 //
105 // The vertex shader calculates the rightmost 2x2 matrix and interpolates it across the shape since
106 // each component is linear in (u,v). ∇Cl(u,v) is evaluated per pixel in the fragment shader and
107 // depends on which corner and edge being evaluated. w(u,v) is the device-space W coordinate, so
108 // its reciprocal is provided in sk_FragCoord.w.
109 namespace skgpu::graphite {
110 
111 using AAFlags = EdgeAAQuad::Flags;
112 
is_clockwise(const EdgeAAQuad & quad)113 static bool is_clockwise(const EdgeAAQuad& quad) {
114     if (quad.isRect()) {
115         return true; // by construction, these are always locally clockwise
116     }
117 
118     // This assumes that each corner has a consistent winding, which is the case for convex inputs,
119     // which is an assumption of the per-edge AA API. Check the sign of cross product between the
120     // first two edges.
121     const skvx::float4& xs = quad.xs();
122     const skvx::float4& ys = quad.ys();
123 
124     float winding = (xs[0] - xs[3])*(ys[1] - ys[0]) - (ys[0] - ys[3])*(xs[1] - xs[0]);
125     if (winding == 0.f) {
126         // The input possibly forms a triangle with duplicate vertices, so check the opposite corner
127         winding = (xs[2] - xs[1])*(ys[3] - ys[2]) - (ys[2] - ys[1])*(xs[3] - xs[2]);
128     }
129 
130     // At this point if winding is < 0, the quad's vertices are CCW. If it's still 0, the vertices
131     // form a line, in which case the vertex shader constructs a correct CW winding. Otherwise,
132     // the quad or triangle vertices produce a positive winding and are CW.
133     return winding >= 0.f;
134 }
135 
136 // Represents the per-vertex attributes used in each instance.
137 struct Vertex {
138     SkV2 fNormal;
139 };
140 
141 // Allowed values for the center weight instance value (selected at record time based on style
142 // and transform), and are defined such that when (insance-weight > vertex-weight) is true, the
143 // vertex should be snapped to the center instead of its regular calculation.
144 static constexpr int kCornerVertexCount = 4; // sk_VertexID is divided by this in SkSL
145 static constexpr int kVertexCount = 4 * kCornerVertexCount;
146 static constexpr int kIndexCount = 29;
147 
write_index_buffer(VertexWriter writer)148 static void write_index_buffer(VertexWriter writer) {
149     static constexpr uint16_t kTL = 0 * kCornerVertexCount;
150     static constexpr uint16_t kTR = 1 * kCornerVertexCount;
151     static constexpr uint16_t kBR = 2 * kCornerVertexCount;
152     static constexpr uint16_t kBL = 3 * kCornerVertexCount;
153 
154     static const uint16_t kIndices[kIndexCount] = {
155         // Exterior AA ramp outset
156         kTL+1,kTL+2,kTL+3,kTR+0,kTR+3,kTR+1,
157         kTR+1,kTR+2,kTR+3,kBR+0,kBR+3,kBR+1,
158         kBR+1,kBR+2,kBR+3,kBL+0,kBL+3,kBL+1,
159         kBL+1,kBL+2,kBL+3,kTL+0,kTL+3,kTL+1,
160         kTL+3,
161         // Fill triangles
162         kTL+3,kTR+3,kBL+3,kBR+3
163     };
164 
165     if (writer) {
166         writer << kIndices;
167     } // otherwise static buffer creation failed, so do nothing; Context initialization will fail.
168 }
169 
write_vertex_buffer(VertexWriter writer)170 static void write_vertex_buffer(VertexWriter writer) {
171     static constexpr float kHR2 = 0.5f * SK_FloatSqrt2; // "half root 2"
172 
173     // This template is repeated 4 times in the vertex buffer, for each of the four corners.
174     // The vertex ID is used to lookup per-corner instance properties such as positions,
175     // but otherwise this vertex data produces a consistent clockwise mesh from
176     // TL -> TR -> BR -> BL.
177     static constexpr Vertex kCornerTemplate[kCornerVertexCount] = {
178         // Normals for device-space AA outsets from outer curve
179         { {1.0f, 0.0f} },
180         { {kHR2, kHR2} },
181         { {0.0f, 1.0f} },
182 
183         // Normal for outer anchor (zero length to signal no local or device-space normal outset)
184         { {0.0f, 0.0f} },
185     };
186 
187     if (writer) {
188         writer << kCornerTemplate  // TL
189                << kCornerTemplate  // TR
190                << kCornerTemplate  // BR
191                << kCornerTemplate; // BL
192     } // otherwise static buffer creation failed, so do nothing; Context initialization will fail.
193 }
194 
PerEdgeAAQuadRenderStep(StaticBufferManager * bufferManager)195 PerEdgeAAQuadRenderStep::PerEdgeAAQuadRenderStep(StaticBufferManager* bufferManager)
196         : RenderStep("PerEdgeAAQuadRenderStep",
197                      "",
198                      Flags::kPerformsShading | Flags::kEmitsCoverage | Flags::kOutsetBoundsForAA |
199                      Flags::kUseNonAAInnerFill,
200                      /*uniforms=*/{},
201                      PrimitiveType::kTriangleStrip,
202                      kDirectDepthGreaterPass,
203                      /*vertexAttrs=*/{
204                             {"normal", VertexAttribType::kFloat2, SkSLType::kFloat2},
205                      },
206                      /*instanceAttrs=*/
207                             {{"edgeFlags", VertexAttribType::kUByte4_norm, SkSLType::kFloat4},
208                              {"quadXs", VertexAttribType::kFloat4, SkSLType::kFloat4},
209                              {"quadYs", VertexAttribType::kFloat4, SkSLType::kFloat4},
210 
211                              // TODO: pack depth and ssbo index into one 32-bit attribute, if we can
212                              // go without needing both render step and paint ssbo index attributes.
213                              {"depth", VertexAttribType::kFloat, SkSLType::kFloat},
214                              {"ssboIndices", VertexAttribType::kUInt2, SkSLType::kUInt2},
215 
216                              {"mat0", VertexAttribType::kFloat3, SkSLType::kFloat3},
217                              {"mat1", VertexAttribType::kFloat3, SkSLType::kFloat3},
218                              {"mat2", VertexAttribType::kFloat3, SkSLType::kFloat3}},
219                      /*varyings=*/{
220                              // Device-space distance to LTRB edges of quad.
221                              {"edgeDistances", SkSLType::kFloat4}, // distance to LTRB edges
222                      }) {
223     // Initialize the static buffers we'll use when recording draw calls.
224     // NOTE: Each instance of this RenderStep gets its own copy of the data. Since there should only
225     // ever be one PerEdgeAAQuadRenderStep at a time, this shouldn't be an issue.
226     write_vertex_buffer(bufferManager->getVertexWriter(sizeof(Vertex) * kVertexCount,
227                                                        &fVertexBuffer));
228     write_index_buffer(bufferManager->getIndexWriter(sizeof(uint16_t) * kIndexCount,
229                                                      &fIndexBuffer));
230 }
231 
~PerEdgeAAQuadRenderStep()232 PerEdgeAAQuadRenderStep::~PerEdgeAAQuadRenderStep() {}
233 
vertexSkSL() const234 std::string PerEdgeAAQuadRenderStep::vertexSkSL() const {
235     // Returns the body of a vertex function, which must define a float4 devPosition variable and
236     // must write to an already-defined float2 stepLocalCoords variable.
237     return "float4 devPosition = per_edge_aa_quad_vertex_fn("
238                    // Vertex Attributes
239                    "normal, "
240                    // Instance Attributes
241                    "edgeFlags, quadXs, quadYs, depth, "
242                    "float3x3(mat0, mat1, mat2), "
243                    // Varyings
244                    "edgeDistances, "
245                    // Render Step
246                    "stepLocalCoords);\n";
247 }
248 
fragmentCoverageSkSL() const249 const char* PerEdgeAAQuadRenderStep::fragmentCoverageSkSL() const {
250     // The returned SkSL must write its coverage into a 'half4 outputCoverage' variable (defined in
251     // the calling code) with the actual coverage splatted out into all four channels.
252     return "outputCoverage = per_edge_aa_quad_coverage_fn(sk_FragCoord, edgeDistances);";
253 }
254 
writeVertices(DrawWriter * writer,const DrawParams & params,skvx::uint2 ssboIndices) const255 void PerEdgeAAQuadRenderStep::writeVertices(DrawWriter* writer,
256                                            const DrawParams& params,
257                                            skvx::uint2 ssboIndices) const {
258     SkASSERT(params.geometry().isEdgeAAQuad());
259     const EdgeAAQuad& quad = params.geometry().edgeAAQuad();
260 
261     DrawWriter::Instances instance{*writer, fVertexBuffer, fIndexBuffer, kIndexCount};
262     auto vw = instance.append(1);
263 
264     // Empty fills should not have been recorded at all.
265     SkDEBUGCODE(Rect bounds = params.geometry().bounds());
266     SkASSERT(!bounds.isEmptyNegativeOrNaN());
267 
268     constexpr uint8_t kAAOn = 255;
269     constexpr uint8_t kAAOff = 0;
270     auto edgeSigns = skvx::byte4{quad.edgeFlags() & AAFlags::kLeft   ? kAAOn : kAAOff,
271                                  quad.edgeFlags() & AAFlags::kTop    ? kAAOn : kAAOff,
272                                  quad.edgeFlags() & AAFlags::kRight  ? kAAOn : kAAOff,
273                                  quad.edgeFlags() & AAFlags::kBottom ? kAAOn : kAAOff};
274 
275     // The vertex shader expects points to be in clockwise order. EdgeAAQuad is the only
276     // shape that *might* have counter-clockwise input.
277     if (is_clockwise(quad)) {
278         vw << edgeSigns << quad.xs() << quad.ys();
279     } else {
280         vw << skvx::shuffle<2,1,0,3>(edgeSigns)  // swap left and right AA bits
281            << skvx::shuffle<1,0,3,2>(quad.xs())  // swap TL with TR, and BL with BR
282            << skvx::shuffle<1,0,3,2>(quad.ys()); //   ""
283     }
284 
285     // All instance types share the remaining instance attribute definitions
286     const SkM44& m = params.transform().matrix();
287 
288     vw << params.order().depthAsFloat()
289        << ssboIndices
290        << m.rc(0,0) << m.rc(1,0) << m.rc(3,0)  // mat0
291        << m.rc(0,1) << m.rc(1,1) << m.rc(3,1)  // mat1
292        << m.rc(0,3) << m.rc(1,3) << m.rc(3,3); // mat2
293 }
294 
writeUniformsAndTextures(const DrawParams &,PipelineDataGatherer *) const295 void PerEdgeAAQuadRenderStep::writeUniformsAndTextures(const DrawParams&,
296                                                        PipelineDataGatherer*) const {
297     // All data is uploaded as instance attributes, so no uniforms are needed.
298 }
299 
300 }  // namespace skgpu::graphite
301