xref: /aosp_15_r20/external/skia/src/gpu/graphite/ComputePathAtlas.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2024 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/gpu/graphite/ComputePathAtlas.h"
9 
10 #include "include/gpu/graphite/Recorder.h"
11 #include "src/core/SkTraceEvent.h"
12 #include "src/gpu/graphite/AtlasProvider.h"
13 #include "src/gpu/graphite/Caps.h"
14 #include "src/gpu/graphite/Log.h"
15 #include "src/gpu/graphite/RasterPathUtils.h"
16 #include "src/gpu/graphite/RecorderPriv.h"
17 #include "src/gpu/graphite/RendererProvider.h"
18 #include "src/gpu/graphite/TextureProxy.h"
19 #include "src/gpu/graphite/TextureUtils.h"
20 #include "src/gpu/graphite/geom/Transform_graphite.h"
21 
22 #ifdef SK_ENABLE_VELLO_SHADERS
23 #include "src/gpu/graphite/compute/DispatchGroup.h"
24 #endif
25 
26 namespace skgpu::graphite {
27 namespace {
28 
29 // TODO: This is the maximum target dimension that vello can handle today.
30 constexpr uint16_t kComputeAtlasDim = 4096;
31 
32 // TODO: Currently we reject shapes that are smaller than a subset of a given atlas page to avoid
33 // creating too many flushes in a Recording containing many large path draws. These shapes often
34 // don't make efficient use of the available atlas texture space and the cost of sequential
35 // dispatches to render multiple atlas pages can be prohibitive.
36 constexpr size_t kBboxAreaThreshold = 1024 * 512;
37 
38 // Coordinate size that is too large for vello to handle efficiently. See the discussion on
39 // https://github.com/linebender/vello/pull/542.
40 constexpr float kCoordinateThreshold = 1e10;
41 
42 }  // namespace
43 
ComputePathAtlas(Recorder * recorder)44 ComputePathAtlas::ComputePathAtlas(Recorder* recorder)
45     : PathAtlas(recorder, kComputeAtlasDim, kComputeAtlasDim)
46     , fRectanizer(this->width(), this->height()) {}
47 
initializeTextureIfNeeded()48 bool ComputePathAtlas::initializeTextureIfNeeded() {
49     if (!fTexture) {
50         SkColorType targetCT = ComputeShaderCoverageMaskTargetFormat(fRecorder->priv().caps());
51         fTexture = fRecorder->priv().atlasProvider()->getAtlasTexture(fRecorder,
52                                                                       this->width(),
53                                                                       this->height(),
54                                                                       targetCT,
55                                                                       /*identifier=*/0,
56                                                                       /*requireStorageUsage=*/true);
57     }
58     return fTexture != nullptr;
59 }
60 
isSuitableForAtlasing(const Rect & transformedShapeBounds,const Rect & clipBounds) const61 bool ComputePathAtlas::isSuitableForAtlasing(const Rect& transformedShapeBounds,
62                                              const Rect& clipBounds) const {
63     Rect shapeBounds = transformedShapeBounds.makeRoundOut();
64     Rect maskBounds = shapeBounds.makeIntersect(clipBounds);
65     skvx::float2 maskSize = maskBounds.size();
66     float width = maskSize.x(), height = maskSize.y();
67 
68     if (width > this->width() || height > this->height()) {
69         return false;
70     }
71 
72     // For now we're allowing paths that are smaller than 1/32nd of the full 4096x4096 atlas size
73     // to prevent the atlas texture from filling up too often. There are several approaches we
74     // should explore to alleviate the cost of atlasing large paths.
75     if (width * height > kBboxAreaThreshold) {
76         return false;
77     }
78 
79     // Reject pathological shapes that vello can't handle efficiently yet.
80     skvx::float2 unclippedSize = shapeBounds.size();
81     if (std::fabs(unclippedSize.x()) > kCoordinateThreshold ||
82         std::fabs(unclippedSize.y()) > kCoordinateThreshold) {
83         return false;
84     }
85 
86     return true;
87 }
88 
addRect(skvx::half2 maskSize,SkIPoint16 * outPos)89 const TextureProxy* ComputePathAtlas::addRect(skvx::half2 maskSize,
90                                               SkIPoint16* outPos) {
91     if (!this->initializeTextureIfNeeded()) {
92         SKGPU_LOG_E("Failed to instantiate an atlas texture");
93         return nullptr;
94     }
95 
96     // An empty mask always fits, so just return the texture.
97     // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
98     // another way. See PathAtlas::addShape().
99     if (!all(maskSize)) {
100         *outPos = {0, 0};
101         return fTexture.get();
102     }
103 
104     if (!fRectanizer.addPaddedRect(maskSize.x(), maskSize.y(), kEntryPadding, outPos)) {
105         return nullptr;
106     }
107 
108     return fTexture.get();
109 }
110 
reset()111 void ComputePathAtlas::reset() {
112     fRectanizer.reset();
113 
114     this->onReset();
115 }
116 
117 #ifdef SK_ENABLE_VELLO_SHADERS
118 
119 /**
120  * ComputePathAtlas that uses a VelloRenderer.
121  */
122 class VelloComputePathAtlas final : public ComputePathAtlas {
123 public:
VelloComputePathAtlas(Recorder * recorder)124     explicit VelloComputePathAtlas(Recorder* recorder)
125         : ComputePathAtlas(recorder)
126         , fCachedAtlasMgr(fWidth, fHeight, recorder->priv().caps()) {}
127     // Record the compute dispatches that will draw the atlas contents.
128     bool recordDispatches(Recorder*, ComputeTask::DispatchGroupList*) const override;
129 
130 private:
131     const TextureProxy* onAddShape(const Shape&,
132                                    const Transform&,
133                                    const SkStrokeRec&,
134                                    skvx::half2 maskSize,
135                                    skvx::half2* outPos) override;
onReset()136     void onReset() override {
137         fCachedAtlasMgr.onReset();
138 
139         fUncachedScene.reset();
140         fUncachedOccupiedArea = { 0, 0 };
141     }
142 
143     class VelloAtlasMgr : public PathAtlas::DrawAtlasMgr {
144     public:
VelloAtlasMgr(size_t width,size_t height,const Caps * caps)145         VelloAtlasMgr(size_t width, size_t height, const Caps* caps)
146             : PathAtlas::DrawAtlasMgr(width, height, width, height,
147                                       DrawAtlas::UseStorageTextures::kYes,
148                                       /*label=*/"VelloPathAtlas", caps) {}
149 
150         bool recordDispatches(Recorder* recorder, ComputeTask::DispatchGroupList* dispatches) const;
151 
onReset()152         void onReset() {
153             fDrawAtlas->markUsedPlotsAsFull();
154             for (int i = 0; i < PlotLocator::kMaxMultitexturePages; ++i) {
155                 fScenes[i].reset();
156                 fOccupiedAreas[i] = {0, 0};
157             }
158         }
159 
160     protected:
161         bool onAddToAtlas(const Shape&,
162                           const Transform& transform,
163                           const SkStrokeRec&,
164                           SkIRect shapeBounds,
165                           const AtlasLocator&) override;
166 
167     private:
168         VelloScene fScenes[PlotLocator::kMaxMultitexturePages];
169         SkISize fOccupiedAreas[PlotLocator::kMaxMultitexturePages] = {
170             {0, 0}, {0, 0}, {0, 0}, {0, 0}
171         };
172     };
173 
174     VelloAtlasMgr fCachedAtlasMgr;
175 
176     // Contains the encoded scene buffer data that serves as the input to a vello compute pass.
177     // For the uncached atlas.
178     VelloScene fUncachedScene;
179 
180     // Occupied bounds of the uncached atlas
181     SkISize fUncachedOccupiedArea = { 0, 0 };
182 };
183 
get_vello_aa_config(Recorder * recorder)184 static VelloAaConfig get_vello_aa_config(Recorder* recorder) {
185     // Use the analytic area AA mode unless caps say otherwise.
186     VelloAaConfig config = VelloAaConfig::kAnalyticArea;
187 #if defined(GPU_TEST_UTILS)
188     PathRendererStrategy strategy = recorder->priv().caps()->requestedPathRendererStrategy();
189     if (strategy == PathRendererStrategy::kComputeMSAA16) {
190         config = VelloAaConfig::kMSAA16;
191     } else if (strategy == PathRendererStrategy::kComputeMSAA8) {
192         config = VelloAaConfig::kMSAA8;
193     }
194 #endif
195 
196     return config;
197 }
198 
render_vello_scene(Recorder * recorder,sk_sp<TextureProxy> texture,const VelloScene & scene,SkISize occupiedArea,VelloAaConfig config)199 static std::unique_ptr<DispatchGroup> render_vello_scene(Recorder* recorder,
200                                                          sk_sp<TextureProxy> texture,
201                                                          const VelloScene& scene,
202                                                          SkISize occupiedArea,
203                                                          VelloAaConfig config) {
204     return recorder->priv().rendererProvider()->velloRenderer()->renderScene(
205                 {(uint32_t)occupiedArea.width(),
206                  (uint32_t)occupiedArea.height(),
207                  SkColors::kBlack,
208                  config},
209                 scene,
210                 std::move(texture),
211                 recorder);
212 }
213 
add_shape_to_scene(const Shape & shape,const Transform & transform,const SkStrokeRec & style,Rect atlasBounds,VelloScene * scene,SkISize * occupiedArea)214 static void add_shape_to_scene(const Shape& shape,
215                                const Transform& transform,
216                                const SkStrokeRec& style,
217                                Rect atlasBounds,
218                                VelloScene* scene,
219                                SkISize* occupiedArea) {
220     occupiedArea->fWidth = std::max(occupiedArea->fWidth,
221                                     (int)atlasBounds.right() + PathAtlas::kEntryPadding);
222     occupiedArea->fHeight = std::max(occupiedArea->fHeight,
223                                      (int)atlasBounds.bot() + PathAtlas::kEntryPadding);
224 
225     // TODO(b/283876964): Apply clips here. Initially we'll need to encode the clip stack repeatedly
226     // for each shape since the full vello renderer treats clips and their affected draws as a
227     // single shape hierarchy in the same scene coordinate space. For coverage masks we want each
228     // mask to be transformed to its atlas allocation coordinates and for the clip to be applied
229     // with a translation relative to the atlas slot.
230     //
231     // Repeatedly encoding the clip stack should be relatively cheap (depending on how deep the
232     // clips get) however it is wasteful both in terms of time and memory. If this proves to hurt
233     // performance, future work will explore building an atlas-oriented element processing stage
234     // that applies the atlas-relative translation while evaluating the stack monoid on the GPU.
235 
236     // Clip the mask to the bounds of the atlas slot, which are already inset by 1px relative to
237     // the bounds that the Rectanizer assigned.
238     SkPath clipRect = SkPath::Rect(atlasBounds.asSkRect());
239     scene->pushClipLayer(clipRect, Transform::Identity());
240 
241     // The atlas transform of the shape is the linear-components (scale, rotation, skew) of
242     // `localToDevice` translated by the top-left offset of `atlasBounds`.
243     Transform atlasTransform = transform.postTranslate(atlasBounds.x(), atlasBounds.y());
244     SkPath devicePath = shape.asPath();
245 
246     // For stroke-and-fill, draw two masks into the same atlas slot: one for the stroke and one for
247     // the fill.
248     SkStrokeRec::Style styleType = style.getStyle();
249     if (styleType == SkStrokeRec::kStroke_Style ||
250         styleType == SkStrokeRec::kHairline_Style ||
251         styleType == SkStrokeRec::kStrokeAndFill_Style) {
252         // We need to special-case hairline strokes and strokes with sub-pixel width as Vello
253         // draws these with aliasing and the results are barely visible. Draw the stroke with a
254         // device-space width of 1 pixel and scale down the alpha by the true width to approximate
255         // the sampled area.
256         float width = style.getWidth();
257         float deviceWidth = width * atlasTransform.maxScaleFactor();
258         if (style.isHairlineStyle() || deviceWidth <= 1.0) {
259             // Both strokes get 1/2 weight scaled by the theoretical area (1 for hairlines,
260             // `deviceWidth` otherwise).
261             SkColor4f color = SkColors::kRed;
262             color.fR *= style.isHairlineStyle() ? 1.0 : deviceWidth;
263 
264             // Transform the stroke's width to its local coordinate space since it'll get drawn with
265             // `atlasTransform`.
266             float transformedWidth = 1.0f / atlasTransform.maxScaleFactor();
267             SkStrokeRec adjustedStyle(style);
268             adjustedStyle.setStrokeStyle(transformedWidth);
269             scene->solidStroke(devicePath, color, adjustedStyle, atlasTransform);
270         } else {
271             scene->solidStroke(devicePath, SkColors::kRed, style, atlasTransform);
272         }
273     }
274     if (styleType == SkStrokeRec::kFill_Style || styleType == SkStrokeRec::kStrokeAndFill_Style) {
275         scene->solidFill(devicePath, SkColors::kRed, shape.fillType(), atlasTransform);
276     }
277 
278     scene->popClipLayer();
279 }
280 
recordDispatches(Recorder * recorder,ComputeTask::DispatchGroupList * dispatches) const281 bool VelloComputePathAtlas::recordDispatches(Recorder* recorder,
282                                              ComputeTask::DispatchGroupList* dispatches) const {
283     bool addedDispatches = fCachedAtlasMgr.recordDispatches(recorder, dispatches);
284 
285     if (this->texture() && !fUncachedOccupiedArea.isEmpty()) {
286         SkASSERT(recorder && recorder == fRecorder);
287 
288         VelloAaConfig config = get_vello_aa_config(recorder);
289         std::unique_ptr<DispatchGroup> dispatchGroup =
290                 render_vello_scene(recorder,
291                                    sk_ref_sp(this->texture()),
292                                    fUncachedScene,
293                                    fUncachedOccupiedArea,
294                                    config);
295         if (dispatchGroup) {
296             TRACE_EVENT_INSTANT1("skia.gpu", TRACE_FUNC, TRACE_EVENT_SCOPE_THREAD,
297                                  "# dispatches", dispatchGroup->dispatches().size());
298             dispatches->emplace_back(std::move(dispatchGroup));
299             return true;
300         } else {
301             SKGPU_LOG_E("VelloComputePathAtlas:: Failed to create dispatch group.");
302         }
303     }
304 
305     return addedDispatches;
306 }
307 
onAddShape(const Shape & shape,const Transform & transform,const SkStrokeRec & style,skvx::half2 maskSize,skvx::half2 * outPos)308 const TextureProxy* VelloComputePathAtlas::onAddShape(
309         const Shape& shape,
310         const Transform& transform,
311         const SkStrokeRec& style,
312         skvx::half2 maskSize,
313         skvx::half2* outPos) {
314 
315     skgpu::UniqueKey maskKey;
316     bool hasKey = shape.hasKey();
317     if (hasKey) {
318         // Try to locate or add to cached DrawAtlas
319         const TextureProxy* proxy = fCachedAtlasMgr.findOrCreateEntry(fRecorder,
320                                                                       shape,
321                                                                       transform,
322                                                                       style,
323                                                                       maskSize,
324                                                                       outPos);
325         if (proxy) {
326             return proxy;
327         }
328     }
329 
330     // Try to add to uncached texture
331     SkIPoint16 iPos;
332     const TextureProxy* texProxy = this->addRect(maskSize, &iPos);
333     if (!texProxy) {
334         return nullptr;
335     }
336     *outPos = skvx::half2(iPos.x(), iPos.y());
337     // If the mask is empty, just return.
338     // TODO: This may not be needed if we can handle clipped out bounds with inverse fills
339     // another way. See PathAtlas::addShape().
340     if (!all(maskSize)) {
341         return texProxy;
342     }
343 
344     // TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
345     // appropriately transformed in that case.
346     SkASSERT(transform.type() != Transform::Type::kPerspective);
347 
348     // Restrict the render to the occupied area of the atlas, including entry padding so that the
349     // padded row/column is cleared when Vello renders.
350     Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x(), iPos.y()), skvx::cast<float>(maskSize));
351 
352     add_shape_to_scene(shape, transform, style, atlasBounds,
353                        &fUncachedScene, &fUncachedOccupiedArea);
354 
355     return texProxy;
356 }
357 
358 /////////////////////////////////////////////////////////////////////////////////////////
359 
onAddToAtlas(const Shape & shape,const Transform & transform,const SkStrokeRec & style,SkIRect shapeBounds,const AtlasLocator & locator)360 bool VelloComputePathAtlas::VelloAtlasMgr::onAddToAtlas(const Shape& shape,
361                                                         const Transform& transform,
362                                                         const SkStrokeRec& style,
363                                                         SkIRect shapeBounds,
364                                                         const AtlasLocator& locator) {
365     uint32_t index = locator.pageIndex();
366     const TextureProxy* texProxy = fDrawAtlas->getProxies()[index].get();
367     if (!texProxy) {
368         return false;
369     }
370 
371     // TODO: The compute renderer doesn't support perspective yet. We assume that the path has been
372     // appropriately transformed in that case.
373     SkASSERT(transform.type() != Transform::Type::kPerspective);
374 
375     // Restrict the render to the occupied area of the atlas, including entry padding so that the
376     // padded row/column is cleared when Vello renders.
377     SkIPoint iPos = locator.topLeft();
378     Rect atlasBounds = Rect::XYWH(skvx::float2(iPos.x() + kEntryPadding, iPos.y() + kEntryPadding),
379                                   skvx::float2(shapeBounds.width(), shapeBounds.height()));
380 
381     add_shape_to_scene(shape, transform, style, atlasBounds,
382                        &fScenes[index], &fOccupiedAreas[index]);
383 
384     return true;
385 }
386 
recordDispatches(Recorder * recorder,ComputeTask::DispatchGroupList * dispatches) const387 bool VelloComputePathAtlas::VelloAtlasMgr::recordDispatches(
388         Recorder* recorder, ComputeTask::DispatchGroupList* dispatches) const {
389     SkASSERT(recorder);
390     VelloAaConfig config = get_vello_aa_config(recorder);
391 
392     bool addedDispatches = false;
393     for (int i = 0; i < 4; ++i) {
394         if (!fOccupiedAreas[i].isEmpty()) {
395             std::unique_ptr<DispatchGroup> dispatchGroup =
396                     render_vello_scene(recorder,
397                                        fDrawAtlas->getProxies()[i],
398                                        fScenes[i],
399                                        fOccupiedAreas[i],
400                                        config);
401             if (dispatchGroup) {
402                 TRACE_EVENT_INSTANT1("skia.gpu", TRACE_FUNC, TRACE_EVENT_SCOPE_THREAD,
403                                      "# dispatches", dispatchGroup->dispatches().size());
404                 dispatches->emplace_back(std::move(dispatchGroup));
405                 addedDispatches = true;
406             } else {
407                 SKGPU_LOG_E("VelloComputePathAtlas:: Failed to create dispatch group.");
408             }
409         }
410     }
411     return addedDispatches;
412 }
413 
414 
415 #endif  // SK_ENABLE_VELLO_SHADERS
416 
CreateDefault(Recorder * recorder)417 std::unique_ptr<ComputePathAtlas> ComputePathAtlas::CreateDefault(Recorder* recorder) {
418 #ifdef SK_ENABLE_VELLO_SHADERS
419     return std::make_unique<VelloComputePathAtlas>(recorder);
420 #else
421     return nullptr;
422 #endif
423 }
424 
425 }  // namespace skgpu::graphite
426