xref: /aosp_15_r20/external/skia/src/gpu/tessellate/PatchWriter.h (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2021 Google LLC.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef skgpu_tessellate_PatchWriter_DEFINED
9 #define skgpu_tessellate_PatchWriter_DEFINED
10 
11 #include "include/core/SkAlphaType.h"
12 #include "include/core/SkScalar.h"
13 #include "include/core/SkTypes.h"
14 #include "include/private/SkColorData.h"
15 #include "include/private/base/SkDebug.h"
16 #include "include/private/base/SkFloatingPoint.h"
17 #include "include/private/base/SkPoint_impl.h"
18 #include "include/private/base/SkTemplates.h"
19 #include "src/base/SkUtils.h"
20 #include "src/base/SkVx.h"
21 #include "src/gpu/BufferWriter.h"
22 #include "src/gpu/tessellate/LinearTolerances.h"
23 #include "src/gpu/tessellate/MiddleOutPolygonTriangulator.h"
24 #include "src/gpu/tessellate/Tessellation.h"
25 #include "src/gpu/tessellate/WangsFormula.h"
26 
27 #include <algorithm>
28 #include <cstdint>
29 #include <cstring>
30 #include <math.h>
31 #include <tuple>
32 #include <type_traits>
33 #include <utility>
34 #include <variant>
35 
36 namespace skgpu::tess {
37 
38 /**
39  * PatchWriter writes out tessellation patches, formatted with their specific attribs, to a GPU
40  * buffer.
41  *
42  * PatchWriter is a template class that takes traits to configure both its compile-time and runtime
43  * behavior for the different tessellation rendering algorithms and GPU backends. The complexity of
44  * this system is worthwhile because the attribute writing operations and math already require
45  * heavy inlining for performance, and the algorithmic variations tend to only differ slightly, but
46  * do so in the inner most loops. Additionally, Graphite and Ganesh use the same fundamental
47  * algorithms, but Graphite's architecture and higher required hardware level mean that its
48  * attribute configurations can be determined entirely at compile time.
49  *
50  * Traits are specified in PatchWriter's single var-args template pack. Traits come in two main
51  * categories: PatchAttribs configuration and feature/processing configuration. A given PatchAttrib
52  * can be always enabled, enabled at runtime, or always disabled. A feature can be either enabled
53  * or disabled and are coupled more closely with the control points of the curve. Across the two
54  * GPU backends and different path rendering strategies, a "patch" has the following structure:
55  *
56  *   - 4 control points (8 floats total) defining the curve's geometry
57  *      - quadratic curves are converted to equivalent cubics on the CPU during writing
58  *      - conic curves store {w, inf} in their last control point
59  *      - triangles store {inf, inf} in their last control point
60  *      - everything else is presumed to be a cubic defined by all 4 control points
61  *   - Enabled PatchAttrib values, constant for the entire instance
62  *      - layout is identical to PatchAttrib's definition, skipping disabled attribs
63  *      - attribs can be enabled/disabled at runtime by building a mask of attrib values
64  *
65  * Currently PatchWriter supports the following traits:
66  *   - Required<PatchAttrib>
67  *   - Optional<PatchAttrib>
68  *   - TrackJoinControlPoints
69  *   - AddTrianglesWhenChopping
70  *   - DiscardFlatCurves
71  *
72  * In addition to variable traits, PatchWriter's first template argument defines the type used for
73  * allocating the GPU instance data. The templated "PatchAllocator" can be any type that provides:
74  *    // A GPU-backed vertex writer for a single instance worth of data. The provided
75  *    // LinearTolerances value represents the tolerances for the curve that will be written to the
76  *    // returned vertex space.
77  *    skgpu::VertexWriter append(const LinearTolerances&);
78  *
79  * Additionally, it must have a constructor that takes the stride as its first argument.
80  * PatchWriter forwards any additional constructor args from its ctor to the allocator after
81  * computing the necessary stride for its PatchAttribs configuration.
82  */
83 
84 // *** TRAITS ***
85 
86 // Marks a PatchAttrib is enabled at compile time, i.e. it must always be set and will always be
87 // written to each patch's instance data. If present, will assert if the runtime attribs do not fit.
88 template <PatchAttribs A> struct Required {};
89 // Marks a PatchAttrib as supported, i.e. it can be enabled or disabled at runtime. Optional<A> is
90 // overridden by Required<A>. If neither Required<A> nor Optional<A> are in a PatchWriter's trait
91 // list, then the attrib is disabled at compile time and it will assert if the runtime attribs
92 // attempt to enable it.
93 template <PatchAttribs A> struct Optional {};
94 
95 // Enables tracking of the kJoinControlPointAttrib based on control points of the previously
96 // written patch (automatically taking into account curve chopping). When a patch is first written
97 // (and there is no prior patch to define the join control point), the PatchWriter automatically
98 // records the patch to a temporary buffer--sans join--until writeDeferredStrokePatch() is called,
99 // filling in the now-defined join control point.
100 //
101 // This feature must be paired with Required<PatchAttribs::kJoinControlPoint>
102 struct TrackJoinControlPoints {};
103 
104 // Write additional triangular patches to fill the resulting empty area when a curve is chopped.
105 // Normally, the patch geometry covers the curve defined by its control points, up to the implicitly
106 // closing edge between its first and last control points. When a curve is chopped to fit within
107 // the maximum segment count, the resulting space between the original closing edge and new closing
108 // edges is not filled, unless some mechanism of the shader makes it so (e.g. a fan point or
109 // stroking).
110 //
111 // This feature enables automatically writing triangular patches to fill this empty space when a
112 // curve is chopped.
113 struct AddTrianglesWhenChopping {};
114 
115 // If a curve requires at most 1 segment to render accurately, it's effectively a straight line.
116 // This feature turns on automatically ignoring those curves, with the assumption that some other
117 // render pass will produce equivalent geometry (e.g. middle-out or inner triangulations).
118 struct DiscardFlatCurves {};
119 
120 // Upload lines as a cubic with {a, a, b, b} for control points, instead of the truly linear cubic
121 // of {a, 2/3a + 1/3b, 1/3a + 2/3b, b}. Wang's formula will not return an tight lower bound on the
122 // number of segments in this case, but it's convenient to detect in the vertex shader and assume
123 // only a single segment is required. This bypasses numerical stability issues in Wang's formula
124 // when evaluated on the ideal linear cubic for very large control point coordinates. Other curve
125 // types with large coordinates do not need this treatment since they would be pre-chopped and
126 // culled to lines.
127 struct ReplicateLineEndPoints {};
128 
129 // *** PatchWriter internals ***
130 
131 // AttribValue exposes a consistent store and write interface for a PatchAttrib's value while
132 // abstracting over compile-time enabled, conditionally-enabled, or compile-time disabled attribs.
133 template <PatchAttribs A, typename T, bool Required, bool Optional>
134 struct AttribValue {
135     using DataType = std::conditional_t<Required, T,
136                      std::conditional_t<Optional, std::pair<T, bool>,
137                                        /* else */ std::monostate>>;
138 
139     static constexpr bool kEnabled = Required || Optional;
140 
AttribValueAttribValue141     explicit AttribValue(PatchAttribs attribs) : AttribValue(attribs, {}) {}
AttribValueAttribValue142     AttribValue(PatchAttribs attribs, const T& t) {
143         (void) attribs; // may be unused on release builds
144         if constexpr (Required) {
145             SkASSERT(attribs & A);
146         } else if constexpr (Optional) {
147             std::get<1>(fV) = attribs & A;
148         } else {
149             SkASSERT(!(attribs & A));
150         }
151         *this = t;
152     }
153 
154     AttribValue& operator=(const T& v) {
155         if constexpr (Required) {
156             fV = v;
157         } else if constexpr (Optional) {
158             // for simplicity, store even if disabled and won't be written out to VertexWriter
159             std::get<0>(fV) = v;
160         } // else ignore for disabled values
161         return *this;
162     }
163 
164     DataType fV;
165 };
166 
167 template <PatchAttribs A, typename T, bool Required, bool Optional>
168 VertexWriter& operator<<(VertexWriter& w, const AttribValue<A, T, Required, Optional>& v) {
169     if constexpr (Required) {
170         w << v.fV; // always write
171     } else if constexpr (Optional) {
172         if (std::get<1>(v.fV)) {
173             w << std::get<0>(v.fV); // write if enabled
174         }
175     } // else never write
176     return w;
177 }
178 
179 // Stores state and deferred patch data when TrackJoinControlPoints is used for a PatchWriter.
180 template <size_t Stride>
181 struct PatchStorage {
182     float fN_p4    = -1.f; // The parametric segment value to restore on LinearTolerances
183     bool  fMustDefer = true;  // True means next patch must be deferred
184 
185     // Holds an entire patch, except with an undefined join control point.
186     char fData[Stride];
187 
hasPendingPatchStorage188     bool hasPending() const {
189         return fN_p4 >= 0.f;
190     }
resetPatchStorage191     void reset() {
192         fN_p4 = -1.f;
193         fMustDefer = true;
194     }
195 };
196 
197 // An empty object that has the same constructor signature as MiddleOutPolygonTriangulator, used
198 // as a stand-in when AddTrianglesWhenChopping is not a defined trait.
199 struct NullTriangulator {
NullTriangulatorNullTriangulator200     NullTriangulator(int, SkPoint) {}
201 };
202 
203 #define AI SK_ALWAYS_INLINE
204 #define ENABLE_IF(cond) template <typename Void=void> std::enable_if_t<cond, Void>
205 
206 // *** PatchWriter ***
207 template <typename PatchAllocator, typename... Traits>
208 class PatchWriter {
209     // Helpers to extract specifics from the template traits pack.
210     template <typename F>     struct has_trait  : std::disjunction<std::is_same<F, Traits>...> {};
211     template <PatchAttribs A> using  req_attrib = has_trait<Required<A>>;
212     template <PatchAttribs A> using  opt_attrib = has_trait<Optional<A>>;
213 
214     // Enabled features and attribute configuration
215     static constexpr bool kTrackJoinControlPoints   = has_trait<TrackJoinControlPoints>::value;
216     static constexpr bool kAddTrianglesWhenChopping = has_trait<AddTrianglesWhenChopping>::value;
217     static constexpr bool kDiscardFlatCurves        = has_trait<DiscardFlatCurves>::value;
218     static constexpr bool kReplicateLineEndPoints   = has_trait<ReplicateLineEndPoints>::value;
219 
220     // NOTE: MSVC 19.24 cannot compile constexpr fold expressions referenced in templates, so
221     // extract everything into constexpr bool's instead of using `req_attrib` directly, etc. :(
222     template <PatchAttribs A, typename T, bool Req/*=req_attrib<A>*/, bool Opt/*=opt_attrib<A>*/>
223     using attrib_t = AttribValue<A, T, Req, Opt>;
224 
225     // TODO: Remove when MSVC compiler is fixed, in favor of `using Name = attrib_t<>` directly.
226 #define DEF_ATTRIB_TYPE(name, A, T) \
227     static constexpr bool kRequire##name = req_attrib<A>::value; \
228     static constexpr bool kOptional##name = opt_attrib<A>::value; \
229     using name = attrib_t<A, T, kRequire##name, kOptional##name>
230 
231     DEF_ATTRIB_TYPE(JoinAttrib,      PatchAttribs::kJoinControlPoint,  SkPoint);
232     DEF_ATTRIB_TYPE(FanPointAttrib,  PatchAttribs::kFanPoint,          SkPoint);
233     DEF_ATTRIB_TYPE(StrokeAttrib,    PatchAttribs::kStrokeParams,      StrokeParams);
234 
235     // kWideColorIfEnabled does not define an attribute, but changes the type of the kColor attrib.
236     static constexpr bool kRequireWideColor  = req_attrib<PatchAttribs::kWideColorIfEnabled>::value;
237     static constexpr bool kOptionalWideColor = opt_attrib<PatchAttribs::kWideColorIfEnabled>::value;
238     using Color = std::conditional_t<kRequireWideColor,  SkPMColor4f,
239                   std::conditional_t<kOptionalWideColor, VertexColor,
240                                               /* else */ uint32_t>>;
241 
242     DEF_ATTRIB_TYPE(ColorAttrib,     PatchAttribs::kColor,             Color);
243     DEF_ATTRIB_TYPE(DepthAttrib,     PatchAttribs::kPaintDepth,        float);
244     DEF_ATTRIB_TYPE(CurveTypeAttrib, PatchAttribs::kExplicitCurveType, float);
245     DEF_ATTRIB_TYPE(SsboIndexAttrib, PatchAttribs::kSsboIndex,         skvx::uint2);
246 #undef DEF_ATTRIB_TYPE
247 
248     static constexpr size_t kMaxStride = 4 * sizeof(SkPoint) + // control points
249             (JoinAttrib::kEnabled      ? sizeof(SkPoint)                              : 0) +
250             (FanPointAttrib::kEnabled  ? sizeof(SkPoint)                              : 0) +
251             (StrokeAttrib::kEnabled    ? sizeof(StrokeParams)                         : 0) +
252             (ColorAttrib::kEnabled     ? std::min(sizeof(Color), sizeof(SkPMColor4f)) : 0) +
253             (DepthAttrib::kEnabled     ? sizeof(float)                                : 0) +
254             (CurveTypeAttrib::kEnabled ? sizeof(float)                                : 0) +
255             (SsboIndexAttrib::kEnabled ? 2 * sizeof(uint32_t)                         : 0);
256 
257     // Types that vary depending on the activated features, but do not define the patch data.
258     using DeferredPatch = std::conditional_t<kTrackJoinControlPoints,
259             PatchStorage<kMaxStride>, std::monostate>;
260     using InnerTriangulator = std::conditional_t<kAddTrianglesWhenChopping,
261             MiddleOutPolygonTriangulator, NullTriangulator>;
262 
263     using float2 = skvx::float2;
264     using float4 = skvx::float4;
265 
266     static_assert(!kTrackJoinControlPoints || req_attrib<PatchAttribs::kJoinControlPoint>::value,
267                   "Deferred patches and auto-updating joins requires kJoinControlPoint attrib");
268 public:
269     template <typename... Args> // forwarded to PatchAllocator
PatchWriter(PatchAttribs attribs,Args &&...allocArgs)270     PatchWriter(PatchAttribs attribs,
271                 Args&&... allocArgs)
272             : fAttribs(attribs)
273             , fPatchAllocator(PatchStride(attribs), std::forward<Args>(allocArgs)...)
274             , fJoin(attribs)
275             , fFanPoint(attribs)
276             , fStrokeParams(attribs)
277             , fColor(attribs)
278             , fDepth(attribs)
279             , fSsboIndex(attribs) {
280         // Explicit curve types are provided on the writePatch signature, and not a field of
281         // PatchWriter, so initialize one in the ctor to validate the provided runtime attribs.
282         SkDEBUGCODE((void) CurveTypeAttrib(attribs);)
283         // Validate the kWideColorIfEnabled attribute variant flag as well
284         if constexpr (req_attrib<PatchAttribs::kWideColorIfEnabled>::value) {
285             SkASSERT(attribs & PatchAttribs::kWideColorIfEnabled);    // required
286         } else if constexpr (!opt_attrib<PatchAttribs::kWideColorIfEnabled>::value) {
287             SkASSERT(!(attribs & PatchAttribs::kWideColorIfEnabled)); // disabled
288         }
289     }
290 
~PatchWriter()291     ~PatchWriter() {
292         if constexpr (kTrackJoinControlPoints) {
293             // flush any pending patch
294             this->writeDeferredStrokePatch();
295         }
296     }
297 
attribs()298     PatchAttribs attribs() const { return fAttribs; }
299 
300     // The max scale factor should be derived from the same matrix that 'xform' was. It's only used
301     // in stroking calculations, so can be ignored for path filling.
302     void setShaderTransform(const wangs_formula::VectorXform& xform,
303                             float maxScale = 1.f) {
304         fApproxTransform = xform;
305         fMaxScale = maxScale;
306     }
307 
308     // Completes a closed contour of a stroke by rewriting a deferred patch with now-available
309     // join control point information. Automatically resets the join control point attribute.
writeDeferredStrokePatch()310     ENABLE_IF(kTrackJoinControlPoints) writeDeferredStrokePatch() {
311         if (fDeferredPatch.hasPending()) {
312             SkASSERT(!fDeferredPatch.fMustDefer);
313             // Overwrite join control point with updated value, which is the first attribute
314             // after the 4 control points.
315             memcpy(SkTAddOffset<void>(fDeferredPatch.fData, 4 * sizeof(SkPoint)),
316                    &fJoin, sizeof(SkPoint));
317             // Assuming that the stroke parameters aren't changing within a contour, we only have
318             // to set the parametric segments in order to recover the LinearTolerances state at the
319             // time the deferred patch was recorded.
320             fTolerances.setParametricSegments(fDeferredPatch.fN_p4);
321             if (VertexWriter vw = fPatchAllocator.append(fTolerances)) {
322                 vw << VertexWriter::Array<char>(fDeferredPatch.fData, PatchStride(fAttribs));
323             }
324         }
325 
326         fDeferredPatch.reset();
327     }
328 
329     // Updates the stroke's join control point that will be written out with each patch. This is
330     // automatically adjusted when appending various geometries (e.g. Conic/Cubic), but sometimes
331     // must be set explicitly.
updateJoinControlPointAttrib(SkPoint lastControlPoint)332     ENABLE_IF(JoinAttrib::kEnabled) updateJoinControlPointAttrib(SkPoint lastControlPoint) {
333         SkASSERT(fAttribs & PatchAttribs::kJoinControlPoint); // must be runtime enabled as well
334         fJoin = lastControlPoint;
335         if constexpr (kTrackJoinControlPoints) {
336             fDeferredPatch.fMustDefer = false;
337         }
338     }
339 
340     // Updates the fan point that will be written out with each patch (i.e., the point that wedges
341     // fan around).
updateFanPointAttrib(SkPoint fanPoint)342     ENABLE_IF(FanPointAttrib::kEnabled) updateFanPointAttrib(SkPoint fanPoint) {
343         SkASSERT(fAttribs & PatchAttribs::kFanPoint);
344         fFanPoint = fanPoint;
345     }
346 
347     // Updates the stroke params that are written out with each patch.
updateStrokeParamsAttrib(StrokeParams strokeParams)348     ENABLE_IF(StrokeAttrib::kEnabled) updateStrokeParamsAttrib(StrokeParams strokeParams) {
349         SkASSERT(fAttribs & PatchAttribs::kStrokeParams);
350         fStrokeParams = strokeParams;
351         fTolerances.setStroke(strokeParams, fMaxScale);
352     }
353     // Updates tolerances to account for stroke params that are stored as uniforms instead of
354     // dynamic instance attributes.
updateUniformStrokeParams(StrokeParams strokeParams)355     ENABLE_IF(StrokeAttrib::kEnabled) updateUniformStrokeParams(StrokeParams strokeParams) {
356         SkASSERT(!(fAttribs & PatchAttribs::kStrokeParams));
357         fTolerances.setStroke(strokeParams, fMaxScale);
358     }
359 
360     // Updates the color that will be written out with each patch.
updateColorAttrib(const SkPMColor4f & color)361     ENABLE_IF(ColorAttrib::kEnabled) updateColorAttrib(const SkPMColor4f& color) {
362         SkASSERT(fAttribs & PatchAttribs::kColor);
363         // Converts SkPMColor4f to the selected 'Color' attrib type. The always-wide and never-wide
364         // branches match what VertexColor does based on the runtime check.
365         if constexpr (req_attrib<PatchAttribs::kWideColorIfEnabled>::value) {
366             fColor = color;
367         } else if constexpr (opt_attrib<PatchAttribs::kWideColorIfEnabled>::value) {
368             fColor = VertexColor(color, fAttribs & PatchAttribs::kWideColorIfEnabled);
369         } else {
370             fColor = color.toBytes_RGBA();
371         }
372     }
373 
374     // Updates the paint depth written out with each patch.
updatePaintDepthAttrib(float depth)375     ENABLE_IF(DepthAttrib::kEnabled) updatePaintDepthAttrib(float depth) {
376         SkASSERT(fAttribs & PatchAttribs::kPaintDepth);
377         fDepth = depth;
378     }
379 
380     // Updates the storage buffer index used to access uniforms.
381     ENABLE_IF(SsboIndexAttrib::kEnabled)
updateSsboIndexAttrib(skvx::uint2 ssboIndex)382     updateSsboIndexAttrib(skvx::uint2 ssboIndex) {
383         SkASSERT(fAttribs & PatchAttribs::kSsboIndex);
384         fSsboIndex = ssboIndex;
385     }
386 
387     /**
388      * writeX functions for supported patch geometry types. Every geometric type is converted to an
389      * equivalent cubic or conic, so this will always write at minimum 8 floats for the four control
390      * points (cubic) or three control points and {w, inf} (conics). The PatchWriter additionally
391      * writes the current values of all attributes enabled in its PatchAttribs flags.
392      */
393 
394     // Write a cubic curve with its four control points.
writeCubic(float2 p0,float2 p1,float2 p2,float2 p3)395     AI void writeCubic(float2 p0, float2 p1, float2 p2, float2 p3) {
396         float n4 = wangs_formula::cubic_p4(kPrecision, p0, p1, p2, p3, fApproxTransform);
397         if constexpr (kDiscardFlatCurves) {
398             if (n4 <= 1.f) {
399                 // This cubic only needs one segment (e.g. a line) but we're not filling space with
400                 // fans or stroking, so nothing actually needs to be drawn.
401                 return;
402             }
403         }
404         if (int numPatches = this->accountForCurve(n4)) {
405             this->chopAndWriteCubics(p0, p1, p2, p3, numPatches);
406         } else {
407             this->writeCubicPatch(p0, p1, p2, p3);
408         }
409     }
writeCubic(const SkPoint pts[4])410     AI void writeCubic(const SkPoint pts[4]) {
411         float4 p0p1 = float4::Load(pts);
412         float4 p2p3 = float4::Load(pts + 2);
413         this->writeCubic(p0p1.lo, p0p1.hi, p2p3.lo, p2p3.hi);
414     }
415 
416     // Write a conic curve with three control points and 'w', with the last coord of the last
417     // control point signaling a conic by being set to infinity.
writeConic(float2 p0,float2 p1,float2 p2,float w)418     AI void writeConic(float2 p0, float2 p1, float2 p2, float w) {
419         float n2 = wangs_formula::conic_p2(kPrecision, p0, p1, p2, w, fApproxTransform);
420         if constexpr (kDiscardFlatCurves) {
421             if (n2 <= 1.f) {
422                 // This conic only needs one segment (e.g. a line) but we're not filling space with
423                 // fans or stroking, so nothing actually needs to be drawn.
424                 return;
425             }
426         }
427         if (int numPatches = this->accountForCurve(n2 * n2)) {
428             this->chopAndWriteConics(p0, p1, p2, w, numPatches);
429         } else {
430             this->writeConicPatch(p0, p1, p2, w);
431         }
432     }
writeConic(const SkPoint pts[3],float w)433     AI void writeConic(const SkPoint pts[3], float w) {
434         this->writeConic(sk_bit_cast<float2>(pts[0]),
435                          sk_bit_cast<float2>(pts[1]),
436                          sk_bit_cast<float2>(pts[2]),
437                          w);
438     }
439 
440     // Write a quadratic curve that automatically converts its three control points into an
441     // equivalent cubic.
writeQuadratic(float2 p0,float2 p1,float2 p2)442     AI void writeQuadratic(float2 p0, float2 p1, float2 p2) {
443         float n4 = wangs_formula::quadratic_p4(kPrecision, p0, p1, p2, fApproxTransform);
444         if constexpr (kDiscardFlatCurves) {
445             if (n4 <= 1.f) {
446                 // This quad only needs one segment (e.g. a line) but we're not filling space with
447                 // fans or stroking, so nothing actually needs to be drawn.
448                 return;
449             }
450         }
451         if (int numPatches = this->accountForCurve(n4)) {
452             this->chopAndWriteQuads(p0, p1, p2, numPatches);
453         } else {
454             this->writeQuadPatch(p0, p1, p2);
455         }
456     }
writeQuadratic(const SkPoint pts[3])457     AI void writeQuadratic(const SkPoint pts[3]) {
458         this->writeQuadratic(sk_bit_cast<float2>(pts[0]),
459                              sk_bit_cast<float2>(pts[1]),
460                              sk_bit_cast<float2>(pts[2]));
461     }
462 
463     // Write a line that is automatically converted into an equivalent cubic.
writeLine(float4 p0p1)464     AI void writeLine(float4 p0p1) {
465         // No chopping needed, a line only ever requires one segment (the minimum required already).
466         fTolerances.setParametricSegments(1.f);
467         if constexpr (kReplicateLineEndPoints) {
468             // Visually this cubic is still a line, but 't' does not move linearly over the line,
469             // so Wang's formula is more pessimistic. Shaders should avoid evaluating Wang's
470             // formula when a patch has control points in this arrangement.
471             this->writeCubicPatch(p0p1.lo, p0p1.lo, p0p1.hi, p0p1.hi);
472         } else {
473             // In exact math, this cubic structure should have Wang's formula return 0. Due to
474             // floating point math, this isn't always the case, so shaders need some way to restrict
475             // the number of parametric segments if Wang's formula numerically blows up.
476             this->writeCubicPatch(p0p1.lo, (p0p1.zwxy() - p0p1) * (1/3.f) + p0p1, p0p1.hi);
477         }
478     }
writeLine(float2 p0,float2 p1)479     AI void writeLine(float2 p0, float2 p1) { this->writeLine({p0, p1}); }
writeLine(SkPoint p0,SkPoint p1)480     AI void writeLine(SkPoint p0, SkPoint p1) {
481         this->writeLine(sk_bit_cast<float2>(p0), sk_bit_cast<float2>(p1));
482     }
483 
484     // Write a triangle by setting it to a conic with w=Inf, and using a distinct
485     // explicit curve type for when inf isn't supported in shaders.
writeTriangle(float2 p0,float2 p1,float2 p2)486     AI void writeTriangle(float2 p0, float2 p1, float2 p2) {
487         // No chopping needed, the max supported segment count should always support 2 lines
488         // (which form a triangle when implicitly closed).
489         static constexpr float kTriangleSegments_p4 = 2.f * 2.f * 2.f * 2.f;
490         fTolerances.setParametricSegments(kTriangleSegments_p4);
491         this->writePatch(p0, p1, p2, {SK_FloatInfinity, SK_FloatInfinity},
492                          kTriangularConicCurveType);
493     }
writeTriangle(SkPoint p0,SkPoint p1,SkPoint p2)494     AI void writeTriangle(SkPoint p0, SkPoint p1, SkPoint p2) {
495         this->writeTriangle(sk_bit_cast<float2>(p0),
496                             sk_bit_cast<float2>(p1),
497                             sk_bit_cast<float2>(p2));
498     }
499 
500     // Writes a circle used for round caps and joins in stroking, encoded as a cubic with
501     // identical control points and an empty join.
writeCircle(SkPoint p)502     AI void writeCircle(SkPoint p) {
503         // This does not use writePatch() because it uses its own location as the join attribute
504         // value instead of fJoin and never defers.
505         fTolerances.setParametricSegments(0.f);
506         if (VertexWriter vw = fPatchAllocator.append(fTolerances)) {
507             vw << VertexWriter::Repeat<4>(p); // p0,p1,p2,p3 = p -> 4 copies
508             this->emitPatchAttribs(std::move(vw), {fAttribs, p}, kCubicCurveType);
509         }
510     }
511 
512 private:
emitPatchAttribs(VertexWriter vertexWriter,const JoinAttrib & join,float explicitCurveType)513     AI void emitPatchAttribs(VertexWriter vertexWriter,
514                              const JoinAttrib& join,
515                              float explicitCurveType) {
516         // NOTE: operator<< overrides automatically handle optional and disabled attribs.
517         vertexWriter << join << fFanPoint << fStrokeParams << fColor << fDepth
518                      << CurveTypeAttrib{fAttribs, explicitCurveType} << fSsboIndex;
519     }
520 
appendPatch()521     AI VertexWriter appendPatch() {
522         if constexpr (kTrackJoinControlPoints) {
523             if (fDeferredPatch.fMustDefer) {
524                 SkASSERT(!fDeferredPatch.hasPending());
525                 SkASSERT(PatchStride(fAttribs) <= kMaxStride);
526                 // Save the computed parametric segment tolerance value so that we can pass that to
527                 // the PatchAllocator when flushing the deferred patch.
528                 fDeferredPatch.fN_p4 = fTolerances.numParametricSegments_p4();
529                 return {fDeferredPatch.fData, PatchStride(fAttribs)};
530             }
531         }
532         return fPatchAllocator.append(fTolerances);
533     }
534 
writePatch(float2 p0,float2 p1,float2 p2,float2 p3,float explicitCurveType)535     AI void writePatch(float2 p0, float2 p1, float2 p2, float2 p3, float explicitCurveType) {
536         if (VertexWriter vw = this->appendPatch()) {
537             // NOTE: fJoin will be undefined if we're writing to a deferred patch. If that's the
538             // case, correct data will overwrite it when the contour is closed (this is fine since a
539             // deferred patch writes to CPU memory instead of directly to the GPU buffer).
540             vw << p0 << p1 << p2 << p3;
541             this->emitPatchAttribs(std::move(vw), fJoin, explicitCurveType);
542 
543             // Automatically update join control point for next patch.
544             if constexpr (kTrackJoinControlPoints) {
545                 if (explicitCurveType == kCubicCurveType && any(p3 != p2)) {
546                     // p2 is control point defining the tangent vector into the next patch.
547                     p2.store(&fJoin);
548                 } else if (any(p2 != p1)) {
549                     // p1 is the control point defining the tangent vector.
550                     p1.store(&fJoin);
551                 } else {
552                     // p0 is the control point defining the tangent vector.
553                     p0.store(&fJoin);
554                 }
555                 fDeferredPatch.fMustDefer = false;
556             }
557         }
558     }
559 
560     // Helpers that normalize curves to a generic patch, but do no other work.
writeCubicPatch(float2 p0,float2 p1,float2 p2,float2 p3)561     AI void writeCubicPatch(float2 p0, float2 p1, float2 p2, float2 p3) {
562         this->writePatch(p0, p1, p2, p3, kCubicCurveType);
563     }
writeCubicPatch(float2 p0,float4 p1p2,float2 p3)564     AI void writeCubicPatch(float2 p0, float4 p1p2, float2 p3) {
565         this->writeCubicPatch(p0, p1p2.lo, p1p2.hi, p3);
566     }
writeQuadPatch(float2 p0,float2 p1,float2 p2)567     AI void writeQuadPatch(float2 p0, float2 p1, float2 p2) {
568         this->writeCubicPatch(p0, mix(float4(p0, p2), p1.xyxy(), 2/3.f), p2);
569     }
writeConicPatch(float2 p0,float2 p1,float2 p2,float w)570     AI void writeConicPatch(float2 p0, float2 p1, float2 p2, float w) {
571         this->writePatch(p0, p1, p2, {w, SK_FloatInfinity}, kConicCurveType);
572     }
573 
accountForCurve(float n4)574     int accountForCurve(float n4) {
575         if (n4 <= kMaxParametricSegments_p4) {
576             // Record n^4 and return 0 to signal no chopping
577             fTolerances.setParametricSegments(n4);
578             return 0;
579         } else {
580             // Clamp to max allowed segmentation for a patch and return required number of chops
581             // to achieve visual correctness.
582             fTolerances.setParametricSegments(kMaxParametricSegments_p4);
583             return SkScalarCeilToInt(wangs_formula::root4(std::min(n4, kMaxSegmentsPerCurve_p4) /
584                                                           kMaxParametricSegments_p4));
585         }
586     }
587 
588     // This does not return b when t==1, but it otherwise seems to get better precision than
589     // "a*(1 - t) + b*t" for things like chopping cubics on exact cusp points.
590     // The responsibility falls on the caller to check that t != 1 before calling.
mix(float4 a,float4 b,float4 T)591     static AI float4 mix(float4 a, float4 b, float4 T) {
592         SkASSERT(all((0 <= T) & (T < 1)));
593         return (b - a)*T + a;
594     }
595 
596     // Helpers that chop the curve type into 'numPatches' parametrically uniform curves. It is
597     // assumed that 'numPatches' is calculated such that the resulting curves require the maximum
598     // number of segments to draw appropriately (since the original presumably needed even more).
chopAndWriteQuads(float2 p0,float2 p1,float2 p2,int numPatches)599     void chopAndWriteQuads(float2 p0, float2 p1, float2 p2, int numPatches) {
600         InnerTriangulator triangulator(numPatches, sk_bit_cast<SkPoint>(p0));
601         for (; numPatches >= 3; numPatches -= 2) {
602             // Chop into 3 quads.
603             float4 T = float4(1,1,2,2) / numPatches;
604             float4 ab = mix(p0.xyxy(), p1.xyxy(), T);
605             float4 bc = mix(p1.xyxy(), p2.xyxy(), T);
606             float4 abc = mix(ab, bc, T);
607             // p1 & p2 of the cubic representation of the middle quad.
608             float4 middle = mix(ab, bc, mix(T, T.zwxy(), 2/3.f));
609 
610             this->writeQuadPatch(p0, ab.lo, abc.lo);  // Write the 1st quad.
611             if constexpr (kAddTrianglesWhenChopping) {
612                 this->writeTriangle(p0, abc.lo, abc.hi);
613             }
614             this->writeCubicPatch(abc.lo, middle, abc.hi);  // Write the 2nd quad (already a cubic)
615             if constexpr (kAddTrianglesWhenChopping) {
616                 this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(abc.hi)));
617             }
618             std::tie(p0, p1) = {abc.hi, bc.hi};  // Save the 3rd quad.
619         }
620         if (numPatches == 2) {
621             // Chop into 2 quads.
622             float2 ab = (p0 + p1) * .5f;
623             float2 bc = (p1 + p2) * .5f;
624             float2 abc = (ab + bc) * .5f;
625 
626             this->writeQuadPatch(p0, ab, abc);  // Write the 1st quad.
627             if constexpr (kAddTrianglesWhenChopping) {
628                 this->writeTriangle(p0, abc, p2);
629             }
630             this->writeQuadPatch(abc, bc, p2);  // Write the 2nd quad.
631         } else {
632             SkASSERT(numPatches == 1);
633             this->writeQuadPatch(p0, p1, p2);  // Write the single remaining quad.
634         }
635         if constexpr (kAddTrianglesWhenChopping) {
636             this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(p2)));
637             this->writeTriangleStack(triangulator.close());
638         }
639     }
640 
chopAndWriteConics(float2 p0,float2 p1,float2 p2,float w,int numPatches)641     void chopAndWriteConics(float2 p0, float2 p1, float2 p2, float w, int numPatches) {
642         InnerTriangulator triangulator(numPatches, sk_bit_cast<SkPoint>(p0));
643         // Load the conic in 3d homogeneous (unprojected) space.
644         float4 h0 = float4(p0,1,1);
645         float4 h1 = float4(p1,1,1) * w;
646         float4 h2 = float4(p2,1,1);
647         for (; numPatches >= 2; --numPatches) {
648             // Chop in homogeneous space.
649             float T = 1.f/numPatches;
650             float4 ab = mix(h0, h1, T);
651             float4 bc = mix(h1, h2, T);
652             float4 abc = mix(ab, bc, T);
653 
654             // Project and write the 1st conic.
655             float2 midpoint = abc.xy() / abc.w();
656             this->writeConicPatch(h0.xy() / h0.w(),
657                                   ab.xy() / ab.w(),
658                                   midpoint,
659                                   ab.w() / sqrtf(h0.w() * abc.w()));
660             if constexpr (kAddTrianglesWhenChopping) {
661                 this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(midpoint)));
662             }
663             std::tie(h0, h1) = {abc, bc};  // Save the 2nd conic (in homogeneous space).
664         }
665         // Project and write the remaining conic.
666         SkASSERT(numPatches == 1);
667         this->writeConicPatch(h0.xy() / h0.w(),
668                               h1.xy() / h1.w(),
669                               h2.xy(), // h2.w == 1
670                               h1.w() / sqrtf(h0.w()));
671         if constexpr (kAddTrianglesWhenChopping) {
672             this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(h2.xy())));
673             this->writeTriangleStack(triangulator.close());
674         }
675     }
676 
chopAndWriteCubics(float2 p0,float2 p1,float2 p2,float2 p3,int numPatches)677     void chopAndWriteCubics(float2 p0, float2 p1, float2 p2, float2 p3, int numPatches) {
678         InnerTriangulator triangulator(numPatches, sk_bit_cast<SkPoint>(p0));
679         for (; numPatches >= 3; numPatches -= 2) {
680             // Chop into 3 cubics.
681             float4 T = float4(1,1,2,2) / numPatches;
682             float4 ab = mix(p0.xyxy(), p1.xyxy(), T);
683             float4 bc = mix(p1.xyxy(), p2.xyxy(), T);
684             float4 cd = mix(p2.xyxy(), p3.xyxy(), T);
685             float4 abc = mix(ab, bc, T);
686             float4 bcd = mix(bc, cd, T);
687             float4 abcd = mix(abc, bcd, T);
688             float4 middle = mix(abc, bcd, T.zwxy());  // p1 & p2 of the middle cubic.
689 
690             this->writeCubicPatch(p0, ab.lo, abc.lo, abcd.lo);  // Write the 1st cubic.
691             if constexpr (kAddTrianglesWhenChopping) {
692                 this->writeTriangle(p0, abcd.lo, abcd.hi);
693             }
694             this->writeCubicPatch(abcd.lo, middle, abcd.hi);  // Write the 2nd cubic.
695             if constexpr (kAddTrianglesWhenChopping) {
696                 this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(abcd.hi)));
697             }
698             std::tie(p0, p1, p2) = {abcd.hi, bcd.hi, cd.hi};  // Save the 3rd cubic.
699         }
700         if (numPatches == 2) {
701             // Chop into 2 cubics.
702             float2 ab = (p0 + p1) * .5f;
703             float2 bc = (p1 + p2) * .5f;
704             float2 cd = (p2 + p3) * .5f;
705             float2 abc = (ab + bc) * .5f;
706             float2 bcd = (bc + cd) * .5f;
707             float2 abcd = (abc + bcd) * .5f;
708 
709             this->writeCubicPatch(p0, ab, abc, abcd);  // Write the 1st cubic.
710             if constexpr (kAddTrianglesWhenChopping) {
711                 this->writeTriangle(p0, abcd, p3);
712             }
713             this->writeCubicPatch(abcd, bcd, cd, p3);  // Write the 2nd cubic.
714         } else {
715             SkASSERT(numPatches == 1);
716             this->writeCubicPatch(p0, p1, p2, p3);  // Write the single remaining cubic.
717         }
718         if constexpr (kAddTrianglesWhenChopping) {
719             this->writeTriangleStack(triangulator.pushVertex(sk_bit_cast<SkPoint>(p3)));
720             this->writeTriangleStack(triangulator.close());
721         }
722     }
723 
724     ENABLE_IF(kAddTrianglesWhenChopping)
writeTriangleStack(MiddleOutPolygonTriangulator::PoppedTriangleStack && stack)725     writeTriangleStack(MiddleOutPolygonTriangulator::PoppedTriangleStack&& stack) {
726         for (auto [p0, p1, p2] : stack) {
727             this->writeTriangle(p0, p1, p2);
728         }
729     }
730 
731     // Runtime configuration, will always contain required attribs but may not have all optional
732     // attribs enabled (e.g. depending on caps or batching).
733     const PatchAttribs fAttribs;
734 
735     // The 2x2 approximation of the local-to-device transform that will affect subsequently
736     // recorded curves (when fully transformed in the vertex shader).
737     wangs_formula::VectorXform fApproxTransform = {};
738     // A maximum scale factor extracted from the current approximate transform.
739     float fMaxScale = 1.0f;
740     // Tracks the linear tolerances for the most recently written patches.
741     LinearTolerances fTolerances;
742 
743     PatchAllocator fPatchAllocator;
744     DeferredPatch  fDeferredPatch; // only usable if kTrackJoinControlPoints is true
745 
746     // Instance attribute state written after the 4 control points of a patch
747     JoinAttrib     fJoin;
748     FanPointAttrib fFanPoint;
749     StrokeAttrib   fStrokeParams;
750     ColorAttrib    fColor;
751     DepthAttrib    fDepth;
752 
753     // Index into a shared storage buffer containing this PatchWriter's patches' corresponding
754     // uniforms. Written out as an attribute with every patch, to read the appropriate uniform
755     // values from the storage buffer on draw.
756     SsboIndexAttrib fSsboIndex;
757 };
758 
759 }  // namespace skgpu::tess
760 
761 #undef ENABLE_IF
762 #undef AI
763 
764 #endif  // skgpu_tessellate_PatchWriter_DEFINED
765