xref: /aosp_15_r20/external/skia/src/gpu/graphite/DrawPass.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2021 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/gpu/graphite/DrawPass.h"
9 
10 #include "include/gpu/graphite/GraphiteTypes.h"
11 #include "include/gpu/graphite/Recorder.h"
12 #include "include/private/base/SkAlign.h"
13 #include "src/core/SkTraceEvent.h"
14 #include "src/gpu/graphite/Buffer.h"
15 #include "src/gpu/graphite/BufferManager.h"
16 #include "src/gpu/graphite/Caps.h"
17 #include "src/gpu/graphite/ContextPriv.h"
18 #include "src/gpu/graphite/ContextUtils.h"
19 #include "src/gpu/graphite/DrawContext.h"
20 #include "src/gpu/graphite/DrawList.h"
21 #include "src/gpu/graphite/DrawWriter.h"
22 #include "src/gpu/graphite/GlobalCache.h"
23 #include "src/gpu/graphite/GraphicsPipeline.h"
24 #include "src/gpu/graphite/GraphicsPipelineDesc.h"
25 #include "src/gpu/graphite/Log.h"
26 #include "src/gpu/graphite/PaintParamsKey.h"
27 #include "src/gpu/graphite/PipelineData.h"
28 #include "src/gpu/graphite/RecorderPriv.h"
29 #include "src/gpu/graphite/Renderer.h"
30 #include "src/gpu/graphite/ResourceProvider.h"
31 #include "src/gpu/graphite/Sampler.h"
32 #include "src/gpu/graphite/Texture.h"
33 #include "src/gpu/graphite/UniformManager.h"
34 #include "src/gpu/graphite/geom/BoundsManager.h"
35 
36 #include "src/base/SkMathPriv.h"
37 #include "src/base/SkTBlockList.h"
38 
39 #include <algorithm>
40 
41 using namespace skia_private;
42 
43 namespace skgpu::graphite {
44 
45 namespace {
46 
47 // Helper to manage packed fields within a uint64_t
48 template <uint64_t Bits, uint64_t Offset>
49 struct Bitfield {
50     static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1;
51     static constexpr uint64_t kOffset = Offset;
52     static constexpr uint64_t kBits = Bits;
53 
getskgpu::graphite::__anon6929da090111::Bitfield54     static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); }
setskgpu::graphite::__anon6929da090111::Bitfield55     static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; }
56 };
57 
58 // This class maps objects to a dense index which can then be used to look them up later
59 template <typename T, typename V = T, typename C = V>
60 class DenseBiMap {
61 public:
62     using Index = uint32_t;
63 
64     // See note below in GeometryUniformField. This value can be round-tripped within the SortKey
65     // packing for all fields but will not be produced when recording actual draw data.
66     static constexpr Index kInvalidIndex{1 << SkNextLog2_portable(DrawList::kMaxRenderSteps)};
67 
empty() const68     bool empty() const { return fIndexToData.empty(); }
size() const69     size_t size() const { return fIndexToData.size(); }
70 
insert(const T & data)71     Index insert(const T& data) {
72         Index* index = fDataToIndex.find(data);
73         if (!index) {
74             SkASSERT(SkToU32(fIndexToData.size()) < kInvalidIndex);
75             index = fDataToIndex.set(data, (Index) fIndexToData.size());
76             fIndexToData.push_back(C{data});
77         }
78         return *index;
79     }
80 
lookup(Index index)81     const V& lookup(Index index) {
82         SkASSERT(index < kInvalidIndex);
83         return fIndexToData[index];
84     }
85 
data()86     SkSpan<V> data() { return {fIndexToData.data(), fIndexToData.size()}; }
87 
detach()88     TArray<V>&& detach() { return std::move(fIndexToData); }
89 
90 private:
91     THashMap<T, Index> fDataToIndex;
92     TArray<V> fIndexToData;
93 };
94 
95 // NOTE: TextureBinding's use as a key type in DenseBiMap relies on the fact that the underlying
96 // data has been de-duplicated by a PipelineDataCache earlier, so that the bit identity of the data
97 // blocks (e.g. address+size) is equivalent to the content equality of the texture lists.
98 
99 // Tracks the combination of textures from the paint and from the RenderStep to describe the full
100 // binding that needs to be in the command list.
101 struct TextureBinding {
102     TextureDataBlock fPaintTextures;
103     TextureDataBlock fStepTextures;
104 
operator ==skgpu::graphite::__anon6929da090111::TextureBinding105     bool operator==(const TextureBinding& other) const {
106         return fPaintTextures == other.fPaintTextures &&
107                fStepTextures == other.fStepTextures;
108     }
operator !=skgpu::graphite::__anon6929da090111::TextureBinding109     bool operator!=(const TextureBinding& other) const { return !(*this == other); }
110 
numTexturesskgpu::graphite::__anon6929da090111::TextureBinding111     int numTextures() const {
112         return (fPaintTextures ? fPaintTextures.numTextures() : 0) +
113                (fStepTextures ? fStepTextures.numTextures() : 0);
114     }
115 };
116 
117 using TextureBindingCache = DenseBiMap<TextureBinding>;
118 using GraphicsPipelineCache = DenseBiMap<GraphicsPipelineDesc>;
119 
120 // Writes uniform data either to uniform buffers or to shared storage buffers, and tracks when
121 // bindings need to change between draws.
122 class UniformTracker {
123 public:
UniformTracker(bool useStorageBuffers)124     UniformTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {}
125 
writeUniforms(UniformDataCache & uniformCache,DrawBufferManager * bufferMgr,UniformDataCache::Index index)126     bool writeUniforms(UniformDataCache& uniformCache,
127                        DrawBufferManager* bufferMgr,
128                        UniformDataCache::Index index) {
129         if (index >= UniformDataCache::kInvalidIndex) {
130             return false;
131         }
132 
133         if (index == fLastIndex) {
134             return false;
135         }
136         fLastIndex = index;
137 
138         UniformDataCache::Entry& uniformData = uniformCache.lookup(index);
139         const size_t uniformDataSize = uniformData.fCpuData.size();
140 
141         // Upload the uniform data if we haven't already.
142         // Alternatively, re-upload the uniform data to avoid a rebind if we're using storage
143         // buffers. This will result in more data uploaded, but the tradeoff seems worthwhile.
144         if (!uniformData.fBufferBinding.fBuffer ||
145             (fUseStorageBuffers && uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer)) {
146             UniformWriter writer;
147             std::tie(writer, uniformData.fBufferBinding) =
148                     fUseStorageBuffers ? bufferMgr->getAlignedSsboWriter(1, uniformDataSize)
149                                        : bufferMgr->getUniformWriter(1, uniformDataSize);
150 
151             // Early out if buffer mapping failed.
152             if (!writer) {
153                 return {};
154             }
155 
156             writer.write(uniformData.fCpuData.data(), uniformDataSize);
157 
158             if (fUseStorageBuffers) {
159                 // When using storage buffers, store the SSBO index in the binding's offset field
160                 // and always use the entire buffer's size in the size field.
161                 SkASSERT(uniformData.fBufferBinding.fOffset % uniformDataSize == 0);
162                 uniformData.fBufferBinding.fOffset /= uniformDataSize;
163                 uniformData.fBufferBinding.fSize = uniformData.fBufferBinding.fBuffer->size();
164             }
165         }
166 
167         const bool needsRebind =
168                 uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer ||
169                 (!fUseStorageBuffers && uniformData.fBufferBinding.fOffset != fLastBinding.fOffset);
170 
171         fLastBinding = uniformData.fBufferBinding;
172 
173         return needsRebind;
174     }
175 
bindUniforms(UniformSlot slot,DrawPassCommands::List * commandList)176     void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) {
177         BindBufferInfo binding = fLastBinding;
178         if (fUseStorageBuffers) {
179             // Track the SSBO index in fLastBinding, but set offset = 0 in the actual used binding.
180             binding.fOffset = 0;
181         }
182         commandList->bindUniformBuffer(binding, slot);
183     }
184 
ssboIndex() const185     uint32_t ssboIndex() const {
186         // The SSBO index for the last-bound storage buffer is stored in the binding's offset field.
187         return fLastBinding.fOffset;
188     }
189 
190 private:
191     // Internally track the last binding returned, so that we know whether new uploads or rebindings
192     // are necessary. If we're using SSBOs, this is treated specially -- the fOffset field holds the
193     // index in the storage buffer of the last-written uniforms, and the offsets used for actual
194     // bindings are always zero.
195     BindBufferInfo fLastBinding;
196 
197     // This keeps track of the last index used for writing uniforms from a provided uniform cache.
198     // If a provided index matches the last index, the uniforms are assumed to already be written
199     // and no additional uploading is performed. This assumes a UniformTracker will always be
200     // provided with the same uniform cache.
201     UniformDataCache::Index fLastIndex = UniformDataCache::kInvalidIndex;
202 
203     const bool fUseStorageBuffers;
204 };
205 
206 // Automatically merges and manages texture bindings and uniform bindings sourced from either the
207 // paint or the RenderStep. Tracks the bound state based on last-provided unique index to write
208 // Bind commands to a CommandList when necessary.
209 class TextureBindingTracker {
210 public:
trackTextures(TextureDataBlock paintTextures,TextureDataBlock stepTextures)211     TextureBindingCache::Index trackTextures(TextureDataBlock paintTextures,
212                                              TextureDataBlock stepTextures) {
213         if (!paintTextures && !stepTextures) {
214             return TextureBindingCache::kInvalidIndex;
215         }
216         return fBindingCache.insert({paintTextures, stepTextures});
217     }
218 
setCurrentTextureBindings(TextureBindingCache::Index bindingIndex)219     bool setCurrentTextureBindings(TextureBindingCache::Index bindingIndex) {
220         if (bindingIndex < TextureBindingCache::kInvalidIndex && fLastIndex != bindingIndex) {
221             fLastIndex = bindingIndex;
222             return true;
223         }
224         // No binding change
225         return false;
226     }
227 
bindTextures(DrawPassCommands::List * commandList)228     void bindTextures(DrawPassCommands::List* commandList) {
229         SkASSERT(fLastIndex < TextureBindingCache::kInvalidIndex);
230         const TextureBinding& binding = fBindingCache.lookup(fLastIndex);
231 
232         auto [texIndices, samplerIndices] =
233                 commandList->bindDeferredTexturesAndSamplers(binding.numTextures());
234 
235         if (binding.fPaintTextures) {
236             for (int i = 0; i < binding.fPaintTextures.numTextures(); ++i) {
237                 auto [tex, sampler] = binding.fPaintTextures.texture(i);
238                 *texIndices++     = fProxyCache.insert(tex.get());
239                 *samplerIndices++ = fSamplerCache.insert(sampler);
240             }
241         }
242         if (binding.fStepTextures) {
243             for (int i = 0; i < binding.fStepTextures.numTextures(); ++i) {
244                 auto [tex, sampler] = binding.fStepTextures.texture(i);
245                 *texIndices++     = fProxyCache.insert(tex.get());
246                 *samplerIndices++ = fSamplerCache.insert(sampler);
247             }
248         }
249     }
250 
detachTextures()251     TArray<sk_sp<TextureProxy>>&& detachTextures() { return fProxyCache.detach(); }
detachSamplers()252     TArray<SamplerDesc>&& detachSamplers() { return fSamplerCache.detach(); }
253 
254 private:
255     struct ProxyRef {
256         const TextureProxy* fProxy;
operator sk_sp<TextureProxy>skgpu::graphite::__anon6929da090111::TextureBindingTracker::ProxyRef257         operator sk_sp<TextureProxy>() const { return sk_ref_sp(fProxy); }
258     };
259     using TextureProxyCache = DenseBiMap<const TextureProxy*, sk_sp<TextureProxy>, ProxyRef>;
260     using SamplerDescCache = DenseBiMap<SamplerDesc>;
261 
262     TextureBindingCache fBindingCache;
263 
264     TextureProxyCache fProxyCache;
265     SamplerDescCache fSamplerCache;
266 
267     TextureBindingCache::Index fLastIndex = TextureBindingCache::kInvalidIndex;
268 };
269 
270 class GradientBufferTracker {
271 public:
writeData(SkSpan<const float> gradData,DrawBufferManager * bufferMgr)272     bool writeData(SkSpan<const float> gradData, DrawBufferManager* bufferMgr) {
273         if (gradData.empty()) {
274             return true;
275         }
276 
277         auto [writer, bufferInfo] = bufferMgr->getSsboWriter(gradData.size(), sizeof(float));
278 
279         if (!writer) {
280             return false;
281         }
282 
283         writer.write(gradData.data(), gradData.size_bytes());
284         fBufferInfo = bufferInfo;
285         fHasData = true;
286 
287         return true;
288     }
289 
bindIfNeeded(DrawPassCommands::List * commandList) const290     void bindIfNeeded(DrawPassCommands::List* commandList) const {
291         if (fHasData) {
292             commandList->bindUniformBuffer(fBufferInfo, UniformSlot::kGradient);
293         }
294     }
295 
296 private:
297     BindBufferInfo fBufferInfo;
298     bool fHasData = false;
299 };
300 
301 } // namespace
302 
303 ///////////////////////////////////////////////////////////////////////////////////////////////////
304 
305 /**
306  * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
307  * Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey.
308  *
309  * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds
310  * within a pipeline, while still respecting the overall painter's order. This decreases the number
311  * of low-level draw commands in a command buffer and increases the size of those, allowing the GPU
312  * to operate more efficiently and have fewer bubbles within its own instruction stream.
313  *
314  * The Draw's CompresssedPaintersOrder and DisjointStencilINdex represent the most significant bits
315  * of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
316  * description is encoded in two steps:
317  *  1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
318  *     ordered correctly.
319  *  2. An index into a cache of pipeline descriptions is used to encode the identity of the
320  *     pipeline (SortKeys that differ in the bits from #1 necessarily would have different
321  *     descriptions, but then the specific ordering of the RenderSteps isn't enforced).
322  * Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
323  * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not
324  * require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and
325  * the pipeline description index are packed into indices and not actual pointers, a given SortKey
326  * is only valid for the a specific DrawList->DrawPass conversion.
327  */
328 class DrawPass::SortKey {
329 public:
SortKey(const DrawList::Draw * draw,int renderStep,GraphicsPipelineCache::Index pipelineIndex,UniformDataCache::Index geomUniformIndex,UniformDataCache::Index shadingUniformIndex,TextureBindingCache::Index textureBindingIndex)330     SortKey(const DrawList::Draw* draw,
331             int renderStep,
332             GraphicsPipelineCache::Index pipelineIndex,
333             UniformDataCache::Index geomUniformIndex,
334             UniformDataCache::Index shadingUniformIndex,
335             TextureBindingCache::Index textureBindingIndex)
336         : fPipelineKey(ColorDepthOrderField::set(draw->fDrawParams.order().paintOrder().bits()) |
337                        StencilIndexField::set(draw->fDrawParams.order().stencilIndex().bits())  |
338                        RenderStepField::set(static_cast<uint32_t>(renderStep))                  |
339                        PipelineField::set(pipelineIndex))
340         , fUniformKey(GeometryUniformField::set(geomUniformIndex)   |
341                       ShadingUniformField::set(shadingUniformIndex) |
342                       TextureBindingsField::set(textureBindingIndex))
343         , fDraw(draw) {
344         SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex);
345         SkASSERT(renderStep <= draw->fRenderer->numRenderSteps());
346     }
347 
operator <(const SortKey & k) const348     bool operator<(const SortKey& k) const {
349         return fPipelineKey < k.fPipelineKey ||
350                (fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey);
351     }
352 
renderStep() const353     const RenderStep& renderStep() const {
354         return fDraw->fRenderer->step(RenderStepField::get(fPipelineKey));
355     }
356 
draw() const357     const DrawList::Draw& draw() const { return *fDraw; }
358 
pipelineIndex() const359     GraphicsPipelineCache::Index pipelineIndex() const {
360         return PipelineField::get(fPipelineKey);
361     }
geometryUniformIndex() const362     UniformDataCache::Index geometryUniformIndex() const {
363         return GeometryUniformField::get(fUniformKey);
364     }
shadingUniformIndex() const365     UniformDataCache::Index shadingUniformIndex() const {
366         return ShadingUniformField::get(fUniformKey);
367     }
textureBindingIndex() const368     TextureBindingCache::Index textureBindingIndex() const {
369         return TextureBindingsField::get(fUniformKey);
370     }
371 
372 private:
373     // Fields are ordered from most-significant to least when sorting by 128-bit value.
374     // NOTE: We don't use C++ bit fields because field ordering is implementation defined and we
375     // need to sort consistently.
376     using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder)
377     using StencilIndexField    = Bitfield<16, 32>; // sizeof(DisjointStencilIndex)
378     using RenderStepField      = Bitfield<2,  30>; // bits >= log2(Renderer::kMaxRenderSteps)
379     using PipelineField        = Bitfield<30, 0>;  // bits >= log2(max total steps in draw list)
380     uint64_t fPipelineKey;
381 
382     // The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are
383     // greater than or equal to 2^(bits-1) represent "no-data", while values between
384     // [0, 2^(bits-1)-1] can access data arrays without extra logic.
385     using GeometryUniformField = Bitfield<17, 47>; // bits >= 1+log2(max total steps)
386     using ShadingUniformField  = Bitfield<17, 30>; // bits >= 1+log2(max total steps)
387     using TextureBindingsField = Bitfield<30, 0>;  // bits >= 1+log2(max total steps)
388     uint64_t fUniformKey;
389 
390     // Backpointer to the draw that produced the sort key
391     const DrawList::Draw* fDraw;
392 
393     static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder));
394     static_assert(StencilIndexField::kBits    >= sizeof(DisjointStencilIndex));
395     static_assert(RenderStepField::kBits      >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
396     static_assert(PipelineField::kBits        >= SkNextLog2_portable(DrawList::kMaxRenderSteps));
397     static_assert(GeometryUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
398     static_assert(ShadingUniformField::kBits  >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
399     static_assert(TextureBindingsField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
400 };
401 
402 ///////////////////////////////////////////////////////////////////////////////////////////////////
403 
DrawPass(sk_sp<TextureProxy> target,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor)404 DrawPass::DrawPass(sk_sp<TextureProxy> target,
405                    std::pair<LoadOp, StoreOp> ops,
406                    std::array<float, 4> clearColor)
407         : fTarget(std::move(target))
408         , fBounds(SkIRect::MakeEmpty())
409         , fOps(ops)
410         , fClearColor(clearColor) {}
411 
412 DrawPass::~DrawPass() = default;
413 
Make(Recorder * recorder,std::unique_ptr<DrawList> draws,sk_sp<TextureProxy> target,const SkImageInfo & targetInfo,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor)414 std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder,
415                                          std::unique_ptr<DrawList> draws,
416                                          sk_sp<TextureProxy> target,
417                                          const SkImageInfo& targetInfo,
418                                          std::pair<LoadOp, StoreOp> ops,
419                                          std::array<float, 4> clearColor) {
420     // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
421     // its size should be done with care and good reason. The performance of sorting the keys is
422     // heavily tied to the total size.
423     //
424     // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
425     // 16 bytes. There are several ways this could be done if necessary:
426     //  - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
427     //    the uniform data => 8 bytes of key, 8 bytes of pointer.
428     //  - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
429     //    4 low bits from the Draw* pointer since it's 16 byte aligned.
430     //  - Compact the Draw* to an index into the original collection, although that has extra
431     //    indirection and does not work as well with SkTBlockList.
432     // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
433     // than an 8 byte key and unmodified pointer.
434     static_assert(sizeof(DrawPass::SortKey) ==
435                   SkAlignTo(16 + sizeof(void*), alignof(DrawPass::SortKey)));
436 
437     TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", draws->fDraws.count());
438 
439     // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
440     // is returned from Make(), it is considered immutable.
441     std::unique_ptr<DrawPass> drawPass(new DrawPass(target, ops, clearColor));
442 
443     Rect passBounds = Rect::InfiniteInverted();
444 
445     UniformDataCache geometryUniformDataCache;
446     UniformDataCache shadingUniformDataCache;
447     TextureDataCache* textureDataCache = recorder->priv().textureDataCache();
448     DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
449     if (bufferMgr->hasMappingFailed()) {
450         SKGPU_LOG_W("Buffer mapping has already failed; dropping draw pass!");
451         return nullptr;
452     }
453 
454     GraphicsPipelineCache pipelineCache;
455 
456     // Geometry uniforms are currently always UBO-backed.
457     const bool useStorageBuffers = recorder->priv().caps()->storageBufferSupport();
458     const ResourceBindingRequirements& bindingReqs =
459             recorder->priv().caps()->resourceBindingRequirements();
460     Layout uniformLayout =
461             useStorageBuffers ? bindingReqs.fStorageBufferLayout : bindingReqs.fUniformBufferLayout;
462 
463     TextureBindingTracker textureBindingTracker;
464     GradientBufferTracker gradientBufferTracker;
465 
466     ShaderCodeDictionary* dict = recorder->priv().shaderCodeDictionary();
467     PaintParamsKeyBuilder builder(dict);
468 
469     // The initial layout we pass here is not important as it will be re-assigned when writing
470     // shading and geometry uniforms below.
471     PipelineDataGatherer gatherer(uniformLayout);
472 
473     std::vector<SortKey> keys;
474     keys.reserve(draws->renderStepCount());
475 
476     for (const DrawList::Draw& draw : draws->fDraws.items()) {
477         // If we have two different descriptors, such that the uniforms from the PaintParams can be
478         // bound independently of those used by the rest of the RenderStep, then we can upload now
479         // and remember the location for re-use on any RenderStep that does shading.
480         UniquePaintParamsID shaderID;
481         UniformDataCache::Index shadingUniformIndex = UniformDataCache::kInvalidIndex;
482         TextureDataBlock paintTextures;
483 
484         if (draw.fPaintParams.has_value()) {
485             shaderID = ExtractPaintData(recorder,
486                                         &gatherer,
487                                         &builder,
488                                         uniformLayout,
489                                         draw.fDrawParams.transform(),
490                                         draw.fPaintParams.value(),
491                                         draw.fDrawParams.geometry(),
492                                         targetInfo.colorInfo());
493 
494             if (shaderID.isValid()) {
495                 if (gatherer.hasUniforms()) {
496                     shadingUniformIndex =
497                             shadingUniformDataCache.insert(gatherer.finishUniformDataBlock());
498                 }
499                 if (gatherer.hasTextures()) {
500                     paintTextures = textureDataCache->insert(gatherer.textureDataBlock());
501                 }
502             }
503         } // else depth-only
504 
505         // Create a sort key for every render step in this draw, extracting out any
506         // RenderStep-specific data.
507         for (int stepIndex = 0; stepIndex < draw.fRenderer->numRenderSteps(); ++stepIndex) {
508             const RenderStep* const step = draw.fRenderer->steps()[stepIndex];
509             const bool performsShading = draw.fPaintParams.has_value() && step->performsShading();
510 
511             GraphicsPipelineCache::Index pipelineIndex = pipelineCache.insert(
512                     {step, performsShading ? shaderID : UniquePaintParamsID::InvalidID()});
513 
514             gatherer.resetWithNewLayout(uniformLayout);
515             step->writeUniformsAndTextures(draw.fDrawParams, &gatherer);
516 
517             UniformDataCache::Index geomUniformIndex =
518                     gatherer.hasUniforms()
519                             ? geometryUniformDataCache.insert(gatherer.finishUniformDataBlock())
520                             : UniformDataCache::kInvalidIndex;
521 
522             TextureDataBlock stepTextures =
523                     gatherer.hasTextures() ? textureDataCache->insert(gatherer.textureDataBlock())
524                                            : TextureDataBlock();
525             TextureBindingCache::Index textureIndex = textureBindingTracker.trackTextures(
526                     performsShading ? paintTextures : TextureDataBlock(), stepTextures);
527 
528             keys.push_back({&draw, stepIndex, pipelineIndex,
529                             geomUniformIndex, shadingUniformIndex, textureIndex});
530         }
531 
532         passBounds.join(draw.fDrawParams.clip().drawBounds());
533         drawPass->fDepthStencilFlags |= draw.fRenderer->depthStencilFlags();
534         drawPass->fRequiresMSAA |= draw.fRenderer->requiresMSAA();
535     }
536 
537     if (!gradientBufferTracker.writeData(gatherer.gradientBufferData(), bufferMgr)) {
538         // The necessary uniform data couldn't be written to the GPU, so the DrawPass is invalid.
539         // Early out now since the next Recording snap will fail.
540         return nullptr;
541     }
542 
543     // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
544     // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
545     // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
546     // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
547     // vs. algorithms that require an extra O(n) storage.
548     // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
549     // bugs in the DrawOrder determination code?
550     std::sort(keys.begin(), keys.end());
551 
552     // Used to record vertex/instance data, buffer binds, and draw calls
553     DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
554     GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
555     SkIRect lastScissor = SkIRect::MakeSize(targetInfo.dimensions());
556 
557     SkASSERT(drawPass->fTarget->isFullyLazy() ||
558              SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
559     drawPass->fCommandList.setScissor(lastScissor);
560 
561     // All large gradients pack their data into a single buffer throughout the draw pass,
562     // therefore the gradient buffer only needs to be bound once.
563     gradientBufferTracker.bindIfNeeded(&drawPass->fCommandList);
564 
565     UniformTracker geometryUniformTracker(useStorageBuffers);
566     UniformTracker shadingUniformTracker(useStorageBuffers);
567 
568     // TODO(b/372953722): Remove this forced binding command behavior once dst copies are always
569     // bound separately from the rest of the textures.
570     const bool rebindTexturesOnPipelineChange =
571             recorder->priv().caps()->getDstReadRequirement() == DstReadRequirement::kTextureCopy;
572 
573     for (const SortKey& key : keys) {
574         const DrawList::Draw& draw = key.draw();
575         const RenderStep& renderStep = key.renderStep();
576 
577         const bool pipelineChange = key.pipelineIndex() != lastPipeline;
578 
579         const bool geomBindingChange = geometryUniformTracker.writeUniforms(
580                 geometryUniformDataCache, bufferMgr, key.geometryUniformIndex());
581         const bool shadingBindingChange = shadingUniformTracker.writeUniforms(
582                 shadingUniformDataCache, bufferMgr, key.shadingUniformIndex());
583 
584         // TODO(b/372953722): The Dawn and Vulkan CommandBuffer implementations currently append any
585         // dst copy to the texture bind group/descriptor set automatically when processing a
586         // BindTexturesAndSamplers call because they use a single group to contain all textures.
587         // However, from the DrawPass POV, we can run into the scenario where two pipelines have the
588         // same textures+samplers except one requires a dst-copy and the other does not. In this
589         // case we wouldn't necessarily insert a new command when the pipeline changed and then
590         // end up with layout validation errors.
591         const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
592                 key.textureBindingIndex()) ||
593                 (rebindTexturesOnPipelineChange && pipelineChange &&
594                  key.textureBindingIndex() != TextureBindingCache::kInvalidIndex);
595         const SkIRect* newScissor        = draw.fDrawParams.clip().scissor() != lastScissor ?
596                 &draw.fDrawParams.clip().scissor() : nullptr;
597 
598         const bool stateChange = geomBindingChange ||
599                                  shadingBindingChange ||
600                                  textureBindingsChange ||
601                                  SkToBool(newScissor);
602 
603         // Update DrawWriter *before* we actually change any state so that accumulated draws from
604         // the previous state use the proper state.
605         if (pipelineChange) {
606             drawWriter.newPipelineState(renderStep.primitiveType(),
607                                         renderStep.vertexStride(),
608                                         renderStep.instanceStride());
609         } else if (stateChange) {
610             drawWriter.newDynamicState();
611         }
612 
613         // Make state changes before accumulating new draw data
614         if (pipelineChange) {
615             drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
616             lastPipeline = key.pipelineIndex();
617         }
618         if (stateChange) {
619             if (geomBindingChange) {
620                 geometryUniformTracker.bindUniforms(UniformSlot::kRenderStep,
621                                                     &drawPass->fCommandList);
622             }
623             if (shadingBindingChange) {
624                 shadingUniformTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList);
625             }
626             if (textureBindingsChange) {
627                 textureBindingTracker.bindTextures(&drawPass->fCommandList);
628             }
629             if (newScissor) {
630                 drawPass->fCommandList.setScissor(*newScissor);
631                 lastScissor = *newScissor;
632             }
633         }
634 
635         uint32_t geometrySsboIndex = useStorageBuffers ? geometryUniformTracker.ssboIndex() : 0;
636         uint32_t shadingSsboIndex = useStorageBuffers ? shadingUniformTracker.ssboIndex() : 0;
637         skvx::uint2 ssboIndices = {geometrySsboIndex, shadingSsboIndex};
638         renderStep.writeVertices(&drawWriter, draw.fDrawParams, ssboIndices);
639 
640         if (bufferMgr->hasMappingFailed()) {
641             SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!");
642             return nullptr;
643         }
644     }
645     // Finish recording draw calls for any collected data at the end of the loop
646     drawWriter.flush();
647 
648     drawPass->fBounds = passBounds.roundOut().asSkIRect();
649 
650     drawPass->fPipelineDescs   = pipelineCache.detach();
651     drawPass->fSamplerDescs    = textureBindingTracker.detachSamplers();
652     drawPass->fSampledTextures = textureBindingTracker.detachTextures();
653 
654     TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size());
655     TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size());
656     TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count());
657 
658     return drawPass;
659 }
660 
prepareResources(ResourceProvider * resourceProvider,const RuntimeEffectDictionary * runtimeDict,const RenderPassDesc & renderPassDesc)661 bool DrawPass::prepareResources(ResourceProvider* resourceProvider,
662                                 const RuntimeEffectDictionary* runtimeDict,
663                                 const RenderPassDesc& renderPassDesc) {
664     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
665 
666     fFullPipelines.reserve(fFullPipelines.size() + fPipelineDescs.size());
667     for (const GraphicsPipelineDesc& pipelineDesc : fPipelineDescs) {
668         auto pipeline = resourceProvider->findOrCreateGraphicsPipeline(runtimeDict,
669                                                                        pipelineDesc,
670                                                                        renderPassDesc);
671         if (!pipeline) {
672             SKGPU_LOG_W("Failed to create GraphicsPipeline for draw in RenderPass. Dropping pass!");
673             return false;
674         }
675         fFullPipelines.push_back(std::move(pipeline));
676     }
677     // The DrawPass may be long lived on a Recording and we no longer need the GraphicPipelineDescs
678     // once we've created pipelines, so we drop the storage for them here.
679     fPipelineDescs.clear();
680 
681 #if defined(SK_DEBUG)
682     for (int i = 0; i < fSampledTextures.size(); ++i) {
683         // It should not have been possible to draw an Image that has an invalid texture info
684         SkASSERT(fSampledTextures[i]->textureInfo().isValid());
685         // Tasks should have been ordered to instantiate any scratch textures already, or any
686         // client-owned image will have been instantiated at creation.
687         SkASSERTF(fSampledTextures[i]->isInstantiated() ||
688                   fSampledTextures[i]->isLazy(),
689                   "proxy label = %s", fSampledTextures[i]->label());
690     }
691 #endif
692 
693     fSamplers.reserve(fSamplers.size() + fSamplerDescs.size());
694     for (int i = 0; i < fSamplerDescs.size(); ++i) {
695         sk_sp<Sampler> sampler = resourceProvider->findOrCreateCompatibleSampler(fSamplerDescs[i]);
696         if (!sampler) {
697             SKGPU_LOG_W("Failed to create sampler. Will not create renderpass!");
698             return false;
699         }
700         fSamplers.push_back(std::move(sampler));
701     }
702     // The DrawPass may be long lived on a Recording and we no longer need the SamplerDescs
703     // once we've created Samplers, so we drop the storage for them here.
704     fSamplerDescs.clear();
705 
706     return true;
707 }
708 
addResourceRefs(CommandBuffer * commandBuffer) const709 void DrawPass::addResourceRefs(CommandBuffer* commandBuffer) const {
710     for (int i = 0; i < fFullPipelines.size(); ++i) {
711         commandBuffer->trackResource(fFullPipelines[i]);
712     }
713     for (int i = 0; i < fSampledTextures.size(); ++i) {
714         commandBuffer->trackCommandBufferResource(fSampledTextures[i]->refTexture());
715     }
716     for (int i = 0; i < fSamplers.size(); ++i) {
717         commandBuffer->trackResource(fSamplers[i]);
718     }
719 }
720 
getTexture(size_t index) const721 const Texture* DrawPass::getTexture(size_t index) const {
722     SkASSERT(index < SkToSizeT(fSampledTextures.size()));
723     SkASSERT(fSampledTextures[index]);
724     SkASSERT(fSampledTextures[index]->texture());
725     return fSampledTextures[index]->texture();
726 }
getSampler(size_t index) const727 const Sampler* DrawPass::getSampler(size_t index) const {
728     SkASSERT(index < SkToSizeT(fSamplers.size()));
729     SkASSERT(fSamplers[index]);
730     return fSamplers[index].get();
731 }
732 
733 } // namespace skgpu::graphite
734