1 /*
2 * Copyright 2021 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/graphite/DrawPass.h"
9
10 #include "include/gpu/graphite/GraphiteTypes.h"
11 #include "include/gpu/graphite/Recorder.h"
12 #include "include/private/base/SkAlign.h"
13 #include "src/core/SkTraceEvent.h"
14 #include "src/gpu/graphite/Buffer.h"
15 #include "src/gpu/graphite/BufferManager.h"
16 #include "src/gpu/graphite/Caps.h"
17 #include "src/gpu/graphite/ContextPriv.h"
18 #include "src/gpu/graphite/ContextUtils.h"
19 #include "src/gpu/graphite/DrawContext.h"
20 #include "src/gpu/graphite/DrawList.h"
21 #include "src/gpu/graphite/DrawWriter.h"
22 #include "src/gpu/graphite/GlobalCache.h"
23 #include "src/gpu/graphite/GraphicsPipeline.h"
24 #include "src/gpu/graphite/GraphicsPipelineDesc.h"
25 #include "src/gpu/graphite/Log.h"
26 #include "src/gpu/graphite/PaintParamsKey.h"
27 #include "src/gpu/graphite/PipelineData.h"
28 #include "src/gpu/graphite/RecorderPriv.h"
29 #include "src/gpu/graphite/Renderer.h"
30 #include "src/gpu/graphite/ResourceProvider.h"
31 #include "src/gpu/graphite/Sampler.h"
32 #include "src/gpu/graphite/Texture.h"
33 #include "src/gpu/graphite/UniformManager.h"
34 #include "src/gpu/graphite/geom/BoundsManager.h"
35
36 #include "src/base/SkMathPriv.h"
37 #include "src/base/SkTBlockList.h"
38
39 #include <algorithm>
40
41 using namespace skia_private;
42
43 namespace skgpu::graphite {
44
45 namespace {
46
47 // Helper to manage packed fields within a uint64_t
48 template <uint64_t Bits, uint64_t Offset>
49 struct Bitfield {
50 static constexpr uint64_t kMask = ((uint64_t) 1 << Bits) - 1;
51 static constexpr uint64_t kOffset = Offset;
52 static constexpr uint64_t kBits = Bits;
53
getskgpu::graphite::__anon6929da090111::Bitfield54 static uint32_t get(uint64_t v) { return static_cast<uint32_t>((v >> kOffset) & kMask); }
setskgpu::graphite::__anon6929da090111::Bitfield55 static uint64_t set(uint32_t v) { return (v & kMask) << kOffset; }
56 };
57
58 // This class maps objects to a dense index which can then be used to look them up later
59 template <typename T, typename V = T, typename C = V>
60 class DenseBiMap {
61 public:
62 using Index = uint32_t;
63
64 // See note below in GeometryUniformField. This value can be round-tripped within the SortKey
65 // packing for all fields but will not be produced when recording actual draw data.
66 static constexpr Index kInvalidIndex{1 << SkNextLog2_portable(DrawList::kMaxRenderSteps)};
67
empty() const68 bool empty() const { return fIndexToData.empty(); }
size() const69 size_t size() const { return fIndexToData.size(); }
70
insert(const T & data)71 Index insert(const T& data) {
72 Index* index = fDataToIndex.find(data);
73 if (!index) {
74 SkASSERT(SkToU32(fIndexToData.size()) < kInvalidIndex);
75 index = fDataToIndex.set(data, (Index) fIndexToData.size());
76 fIndexToData.push_back(C{data});
77 }
78 return *index;
79 }
80
lookup(Index index)81 const V& lookup(Index index) {
82 SkASSERT(index < kInvalidIndex);
83 return fIndexToData[index];
84 }
85
data()86 SkSpan<V> data() { return {fIndexToData.data(), fIndexToData.size()}; }
87
detach()88 TArray<V>&& detach() { return std::move(fIndexToData); }
89
90 private:
91 THashMap<T, Index> fDataToIndex;
92 TArray<V> fIndexToData;
93 };
94
95 // NOTE: TextureBinding's use as a key type in DenseBiMap relies on the fact that the underlying
96 // data has been de-duplicated by a PipelineDataCache earlier, so that the bit identity of the data
97 // blocks (e.g. address+size) is equivalent to the content equality of the texture lists.
98
99 // Tracks the combination of textures from the paint and from the RenderStep to describe the full
100 // binding that needs to be in the command list.
101 struct TextureBinding {
102 TextureDataBlock fPaintTextures;
103 TextureDataBlock fStepTextures;
104
operator ==skgpu::graphite::__anon6929da090111::TextureBinding105 bool operator==(const TextureBinding& other) const {
106 return fPaintTextures == other.fPaintTextures &&
107 fStepTextures == other.fStepTextures;
108 }
operator !=skgpu::graphite::__anon6929da090111::TextureBinding109 bool operator!=(const TextureBinding& other) const { return !(*this == other); }
110
numTexturesskgpu::graphite::__anon6929da090111::TextureBinding111 int numTextures() const {
112 return (fPaintTextures ? fPaintTextures.numTextures() : 0) +
113 (fStepTextures ? fStepTextures.numTextures() : 0);
114 }
115 };
116
117 using TextureBindingCache = DenseBiMap<TextureBinding>;
118 using GraphicsPipelineCache = DenseBiMap<GraphicsPipelineDesc>;
119
120 // Writes uniform data either to uniform buffers or to shared storage buffers, and tracks when
121 // bindings need to change between draws.
122 class UniformTracker {
123 public:
UniformTracker(bool useStorageBuffers)124 UniformTracker(bool useStorageBuffers) : fUseStorageBuffers(useStorageBuffers) {}
125
writeUniforms(UniformDataCache & uniformCache,DrawBufferManager * bufferMgr,UniformDataCache::Index index)126 bool writeUniforms(UniformDataCache& uniformCache,
127 DrawBufferManager* bufferMgr,
128 UniformDataCache::Index index) {
129 if (index >= UniformDataCache::kInvalidIndex) {
130 return false;
131 }
132
133 if (index == fLastIndex) {
134 return false;
135 }
136 fLastIndex = index;
137
138 UniformDataCache::Entry& uniformData = uniformCache.lookup(index);
139 const size_t uniformDataSize = uniformData.fCpuData.size();
140
141 // Upload the uniform data if we haven't already.
142 // Alternatively, re-upload the uniform data to avoid a rebind if we're using storage
143 // buffers. This will result in more data uploaded, but the tradeoff seems worthwhile.
144 if (!uniformData.fBufferBinding.fBuffer ||
145 (fUseStorageBuffers && uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer)) {
146 UniformWriter writer;
147 std::tie(writer, uniformData.fBufferBinding) =
148 fUseStorageBuffers ? bufferMgr->getAlignedSsboWriter(1, uniformDataSize)
149 : bufferMgr->getUniformWriter(1, uniformDataSize);
150
151 // Early out if buffer mapping failed.
152 if (!writer) {
153 return {};
154 }
155
156 writer.write(uniformData.fCpuData.data(), uniformDataSize);
157
158 if (fUseStorageBuffers) {
159 // When using storage buffers, store the SSBO index in the binding's offset field
160 // and always use the entire buffer's size in the size field.
161 SkASSERT(uniformData.fBufferBinding.fOffset % uniformDataSize == 0);
162 uniformData.fBufferBinding.fOffset /= uniformDataSize;
163 uniformData.fBufferBinding.fSize = uniformData.fBufferBinding.fBuffer->size();
164 }
165 }
166
167 const bool needsRebind =
168 uniformData.fBufferBinding.fBuffer != fLastBinding.fBuffer ||
169 (!fUseStorageBuffers && uniformData.fBufferBinding.fOffset != fLastBinding.fOffset);
170
171 fLastBinding = uniformData.fBufferBinding;
172
173 return needsRebind;
174 }
175
bindUniforms(UniformSlot slot,DrawPassCommands::List * commandList)176 void bindUniforms(UniformSlot slot, DrawPassCommands::List* commandList) {
177 BindBufferInfo binding = fLastBinding;
178 if (fUseStorageBuffers) {
179 // Track the SSBO index in fLastBinding, but set offset = 0 in the actual used binding.
180 binding.fOffset = 0;
181 }
182 commandList->bindUniformBuffer(binding, slot);
183 }
184
ssboIndex() const185 uint32_t ssboIndex() const {
186 // The SSBO index for the last-bound storage buffer is stored in the binding's offset field.
187 return fLastBinding.fOffset;
188 }
189
190 private:
191 // Internally track the last binding returned, so that we know whether new uploads or rebindings
192 // are necessary. If we're using SSBOs, this is treated specially -- the fOffset field holds the
193 // index in the storage buffer of the last-written uniforms, and the offsets used for actual
194 // bindings are always zero.
195 BindBufferInfo fLastBinding;
196
197 // This keeps track of the last index used for writing uniforms from a provided uniform cache.
198 // If a provided index matches the last index, the uniforms are assumed to already be written
199 // and no additional uploading is performed. This assumes a UniformTracker will always be
200 // provided with the same uniform cache.
201 UniformDataCache::Index fLastIndex = UniformDataCache::kInvalidIndex;
202
203 const bool fUseStorageBuffers;
204 };
205
206 // Automatically merges and manages texture bindings and uniform bindings sourced from either the
207 // paint or the RenderStep. Tracks the bound state based on last-provided unique index to write
208 // Bind commands to a CommandList when necessary.
209 class TextureBindingTracker {
210 public:
trackTextures(TextureDataBlock paintTextures,TextureDataBlock stepTextures)211 TextureBindingCache::Index trackTextures(TextureDataBlock paintTextures,
212 TextureDataBlock stepTextures) {
213 if (!paintTextures && !stepTextures) {
214 return TextureBindingCache::kInvalidIndex;
215 }
216 return fBindingCache.insert({paintTextures, stepTextures});
217 }
218
setCurrentTextureBindings(TextureBindingCache::Index bindingIndex)219 bool setCurrentTextureBindings(TextureBindingCache::Index bindingIndex) {
220 if (bindingIndex < TextureBindingCache::kInvalidIndex && fLastIndex != bindingIndex) {
221 fLastIndex = bindingIndex;
222 return true;
223 }
224 // No binding change
225 return false;
226 }
227
bindTextures(DrawPassCommands::List * commandList)228 void bindTextures(DrawPassCommands::List* commandList) {
229 SkASSERT(fLastIndex < TextureBindingCache::kInvalidIndex);
230 const TextureBinding& binding = fBindingCache.lookup(fLastIndex);
231
232 auto [texIndices, samplerIndices] =
233 commandList->bindDeferredTexturesAndSamplers(binding.numTextures());
234
235 if (binding.fPaintTextures) {
236 for (int i = 0; i < binding.fPaintTextures.numTextures(); ++i) {
237 auto [tex, sampler] = binding.fPaintTextures.texture(i);
238 *texIndices++ = fProxyCache.insert(tex.get());
239 *samplerIndices++ = fSamplerCache.insert(sampler);
240 }
241 }
242 if (binding.fStepTextures) {
243 for (int i = 0; i < binding.fStepTextures.numTextures(); ++i) {
244 auto [tex, sampler] = binding.fStepTextures.texture(i);
245 *texIndices++ = fProxyCache.insert(tex.get());
246 *samplerIndices++ = fSamplerCache.insert(sampler);
247 }
248 }
249 }
250
detachTextures()251 TArray<sk_sp<TextureProxy>>&& detachTextures() { return fProxyCache.detach(); }
detachSamplers()252 TArray<SamplerDesc>&& detachSamplers() { return fSamplerCache.detach(); }
253
254 private:
255 struct ProxyRef {
256 const TextureProxy* fProxy;
operator sk_sp<TextureProxy>skgpu::graphite::__anon6929da090111::TextureBindingTracker::ProxyRef257 operator sk_sp<TextureProxy>() const { return sk_ref_sp(fProxy); }
258 };
259 using TextureProxyCache = DenseBiMap<const TextureProxy*, sk_sp<TextureProxy>, ProxyRef>;
260 using SamplerDescCache = DenseBiMap<SamplerDesc>;
261
262 TextureBindingCache fBindingCache;
263
264 TextureProxyCache fProxyCache;
265 SamplerDescCache fSamplerCache;
266
267 TextureBindingCache::Index fLastIndex = TextureBindingCache::kInvalidIndex;
268 };
269
270 class GradientBufferTracker {
271 public:
writeData(SkSpan<const float> gradData,DrawBufferManager * bufferMgr)272 bool writeData(SkSpan<const float> gradData, DrawBufferManager* bufferMgr) {
273 if (gradData.empty()) {
274 return true;
275 }
276
277 auto [writer, bufferInfo] = bufferMgr->getSsboWriter(gradData.size(), sizeof(float));
278
279 if (!writer) {
280 return false;
281 }
282
283 writer.write(gradData.data(), gradData.size_bytes());
284 fBufferInfo = bufferInfo;
285 fHasData = true;
286
287 return true;
288 }
289
bindIfNeeded(DrawPassCommands::List * commandList) const290 void bindIfNeeded(DrawPassCommands::List* commandList) const {
291 if (fHasData) {
292 commandList->bindUniformBuffer(fBufferInfo, UniformSlot::kGradient);
293 }
294 }
295
296 private:
297 BindBufferInfo fBufferInfo;
298 bool fHasData = false;
299 };
300
301 } // namespace
302
303 ///////////////////////////////////////////////////////////////////////////////////////////////////
304
305 /**
306 * Each Draw in a DrawList might be processed by multiple RenderSteps (determined by the Draw's
307 * Renderer), which can be sorted independently. Each (step, draw) pair produces its own SortKey.
308 *
309 * The goal of sorting draws for the DrawPass is to minimize pipeline transitions and dynamic binds
310 * within a pipeline, while still respecting the overall painter's order. This decreases the number
311 * of low-level draw commands in a command buffer and increases the size of those, allowing the GPU
312 * to operate more efficiently and have fewer bubbles within its own instruction stream.
313 *
314 * The Draw's CompresssedPaintersOrder and DisjointStencilINdex represent the most significant bits
315 * of the key, and are shared by all SortKeys produced by the same draw. Next, the pipeline
316 * description is encoded in two steps:
317 * 1. The index of the RenderStep packed in the high bits to ensure each step for a draw is
318 * ordered correctly.
319 * 2. An index into a cache of pipeline descriptions is used to encode the identity of the
320 * pipeline (SortKeys that differ in the bits from #1 necessarily would have different
321 * descriptions, but then the specific ordering of the RenderSteps isn't enforced).
322 * Last, the SortKey encodes an index into the set of uniform bindings accumulated for a DrawPass.
323 * This allows the SortKey to cluster draw steps that have both a compatible pipeline and do not
324 * require rebinding uniform data or other state (e.g. scissor). Since the uniform data index and
325 * the pipeline description index are packed into indices and not actual pointers, a given SortKey
326 * is only valid for the a specific DrawList->DrawPass conversion.
327 */
328 class DrawPass::SortKey {
329 public:
SortKey(const DrawList::Draw * draw,int renderStep,GraphicsPipelineCache::Index pipelineIndex,UniformDataCache::Index geomUniformIndex,UniformDataCache::Index shadingUniformIndex,TextureBindingCache::Index textureBindingIndex)330 SortKey(const DrawList::Draw* draw,
331 int renderStep,
332 GraphicsPipelineCache::Index pipelineIndex,
333 UniformDataCache::Index geomUniformIndex,
334 UniformDataCache::Index shadingUniformIndex,
335 TextureBindingCache::Index textureBindingIndex)
336 : fPipelineKey(ColorDepthOrderField::set(draw->fDrawParams.order().paintOrder().bits()) |
337 StencilIndexField::set(draw->fDrawParams.order().stencilIndex().bits()) |
338 RenderStepField::set(static_cast<uint32_t>(renderStep)) |
339 PipelineField::set(pipelineIndex))
340 , fUniformKey(GeometryUniformField::set(geomUniformIndex) |
341 ShadingUniformField::set(shadingUniformIndex) |
342 TextureBindingsField::set(textureBindingIndex))
343 , fDraw(draw) {
344 SkASSERT(pipelineIndex < GraphicsPipelineCache::kInvalidIndex);
345 SkASSERT(renderStep <= draw->fRenderer->numRenderSteps());
346 }
347
operator <(const SortKey & k) const348 bool operator<(const SortKey& k) const {
349 return fPipelineKey < k.fPipelineKey ||
350 (fPipelineKey == k.fPipelineKey && fUniformKey < k.fUniformKey);
351 }
352
renderStep() const353 const RenderStep& renderStep() const {
354 return fDraw->fRenderer->step(RenderStepField::get(fPipelineKey));
355 }
356
draw() const357 const DrawList::Draw& draw() const { return *fDraw; }
358
pipelineIndex() const359 GraphicsPipelineCache::Index pipelineIndex() const {
360 return PipelineField::get(fPipelineKey);
361 }
geometryUniformIndex() const362 UniformDataCache::Index geometryUniformIndex() const {
363 return GeometryUniformField::get(fUniformKey);
364 }
shadingUniformIndex() const365 UniformDataCache::Index shadingUniformIndex() const {
366 return ShadingUniformField::get(fUniformKey);
367 }
textureBindingIndex() const368 TextureBindingCache::Index textureBindingIndex() const {
369 return TextureBindingsField::get(fUniformKey);
370 }
371
372 private:
373 // Fields are ordered from most-significant to least when sorting by 128-bit value.
374 // NOTE: We don't use C++ bit fields because field ordering is implementation defined and we
375 // need to sort consistently.
376 using ColorDepthOrderField = Bitfield<16, 48>; // sizeof(CompressedPaintersOrder)
377 using StencilIndexField = Bitfield<16, 32>; // sizeof(DisjointStencilIndex)
378 using RenderStepField = Bitfield<2, 30>; // bits >= log2(Renderer::kMaxRenderSteps)
379 using PipelineField = Bitfield<30, 0>; // bits >= log2(max total steps in draw list)
380 uint64_t fPipelineKey;
381
382 // The uniform/texture index fields need 1 extra bit to encode "no-data". Values that are
383 // greater than or equal to 2^(bits-1) represent "no-data", while values between
384 // [0, 2^(bits-1)-1] can access data arrays without extra logic.
385 using GeometryUniformField = Bitfield<17, 47>; // bits >= 1+log2(max total steps)
386 using ShadingUniformField = Bitfield<17, 30>; // bits >= 1+log2(max total steps)
387 using TextureBindingsField = Bitfield<30, 0>; // bits >= 1+log2(max total steps)
388 uint64_t fUniformKey;
389
390 // Backpointer to the draw that produced the sort key
391 const DrawList::Draw* fDraw;
392
393 static_assert(ColorDepthOrderField::kBits >= sizeof(CompressedPaintersOrder));
394 static_assert(StencilIndexField::kBits >= sizeof(DisjointStencilIndex));
395 static_assert(RenderStepField::kBits >= SkNextLog2_portable(Renderer::kMaxRenderSteps));
396 static_assert(PipelineField::kBits >= SkNextLog2_portable(DrawList::kMaxRenderSteps));
397 static_assert(GeometryUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
398 static_assert(ShadingUniformField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
399 static_assert(TextureBindingsField::kBits >= 1+SkNextLog2_portable(DrawList::kMaxRenderSteps));
400 };
401
402 ///////////////////////////////////////////////////////////////////////////////////////////////////
403
DrawPass(sk_sp<TextureProxy> target,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor)404 DrawPass::DrawPass(sk_sp<TextureProxy> target,
405 std::pair<LoadOp, StoreOp> ops,
406 std::array<float, 4> clearColor)
407 : fTarget(std::move(target))
408 , fBounds(SkIRect::MakeEmpty())
409 , fOps(ops)
410 , fClearColor(clearColor) {}
411
412 DrawPass::~DrawPass() = default;
413
Make(Recorder * recorder,std::unique_ptr<DrawList> draws,sk_sp<TextureProxy> target,const SkImageInfo & targetInfo,std::pair<LoadOp,StoreOp> ops,std::array<float,4> clearColor)414 std::unique_ptr<DrawPass> DrawPass::Make(Recorder* recorder,
415 std::unique_ptr<DrawList> draws,
416 sk_sp<TextureProxy> target,
417 const SkImageInfo& targetInfo,
418 std::pair<LoadOp, StoreOp> ops,
419 std::array<float, 4> clearColor) {
420 // NOTE: This assert is here to ensure SortKey is as tightly packed as possible. Any change to
421 // its size should be done with care and good reason. The performance of sorting the keys is
422 // heavily tied to the total size.
423 //
424 // At 24 bytes (current), sorting is about 30% slower than if SortKey could be packed into just
425 // 16 bytes. There are several ways this could be done if necessary:
426 // - Restricting the max draw count to 16k (14-bits) and only using a single index to refer to
427 // the uniform data => 8 bytes of key, 8 bytes of pointer.
428 // - Restrict the max draw count to 32k (15-bits), use a single uniform index, and steal the
429 // 4 low bits from the Draw* pointer since it's 16 byte aligned.
430 // - Compact the Draw* to an index into the original collection, although that has extra
431 // indirection and does not work as well with SkTBlockList.
432 // In pseudo tests, manipulating the pointer or having to mask out indices was about 15% slower
433 // than an 8 byte key and unmodified pointer.
434 static_assert(sizeof(DrawPass::SortKey) ==
435 SkAlignTo(16 + sizeof(void*), alignof(DrawPass::SortKey)));
436
437 TRACE_EVENT1("skia.gpu", TRACE_FUNC, "draw count", draws->fDraws.count());
438
439 // The DrawList is converted directly into the DrawPass' data structures, but once the DrawPass
440 // is returned from Make(), it is considered immutable.
441 std::unique_ptr<DrawPass> drawPass(new DrawPass(target, ops, clearColor));
442
443 Rect passBounds = Rect::InfiniteInverted();
444
445 UniformDataCache geometryUniformDataCache;
446 UniformDataCache shadingUniformDataCache;
447 TextureDataCache* textureDataCache = recorder->priv().textureDataCache();
448 DrawBufferManager* bufferMgr = recorder->priv().drawBufferManager();
449 if (bufferMgr->hasMappingFailed()) {
450 SKGPU_LOG_W("Buffer mapping has already failed; dropping draw pass!");
451 return nullptr;
452 }
453
454 GraphicsPipelineCache pipelineCache;
455
456 // Geometry uniforms are currently always UBO-backed.
457 const bool useStorageBuffers = recorder->priv().caps()->storageBufferSupport();
458 const ResourceBindingRequirements& bindingReqs =
459 recorder->priv().caps()->resourceBindingRequirements();
460 Layout uniformLayout =
461 useStorageBuffers ? bindingReqs.fStorageBufferLayout : bindingReqs.fUniformBufferLayout;
462
463 TextureBindingTracker textureBindingTracker;
464 GradientBufferTracker gradientBufferTracker;
465
466 ShaderCodeDictionary* dict = recorder->priv().shaderCodeDictionary();
467 PaintParamsKeyBuilder builder(dict);
468
469 // The initial layout we pass here is not important as it will be re-assigned when writing
470 // shading and geometry uniforms below.
471 PipelineDataGatherer gatherer(uniformLayout);
472
473 std::vector<SortKey> keys;
474 keys.reserve(draws->renderStepCount());
475
476 for (const DrawList::Draw& draw : draws->fDraws.items()) {
477 // If we have two different descriptors, such that the uniforms from the PaintParams can be
478 // bound independently of those used by the rest of the RenderStep, then we can upload now
479 // and remember the location for re-use on any RenderStep that does shading.
480 UniquePaintParamsID shaderID;
481 UniformDataCache::Index shadingUniformIndex = UniformDataCache::kInvalidIndex;
482 TextureDataBlock paintTextures;
483
484 if (draw.fPaintParams.has_value()) {
485 shaderID = ExtractPaintData(recorder,
486 &gatherer,
487 &builder,
488 uniformLayout,
489 draw.fDrawParams.transform(),
490 draw.fPaintParams.value(),
491 draw.fDrawParams.geometry(),
492 targetInfo.colorInfo());
493
494 if (shaderID.isValid()) {
495 if (gatherer.hasUniforms()) {
496 shadingUniformIndex =
497 shadingUniformDataCache.insert(gatherer.finishUniformDataBlock());
498 }
499 if (gatherer.hasTextures()) {
500 paintTextures = textureDataCache->insert(gatherer.textureDataBlock());
501 }
502 }
503 } // else depth-only
504
505 // Create a sort key for every render step in this draw, extracting out any
506 // RenderStep-specific data.
507 for (int stepIndex = 0; stepIndex < draw.fRenderer->numRenderSteps(); ++stepIndex) {
508 const RenderStep* const step = draw.fRenderer->steps()[stepIndex];
509 const bool performsShading = draw.fPaintParams.has_value() && step->performsShading();
510
511 GraphicsPipelineCache::Index pipelineIndex = pipelineCache.insert(
512 {step, performsShading ? shaderID : UniquePaintParamsID::InvalidID()});
513
514 gatherer.resetWithNewLayout(uniformLayout);
515 step->writeUniformsAndTextures(draw.fDrawParams, &gatherer);
516
517 UniformDataCache::Index geomUniformIndex =
518 gatherer.hasUniforms()
519 ? geometryUniformDataCache.insert(gatherer.finishUniformDataBlock())
520 : UniformDataCache::kInvalidIndex;
521
522 TextureDataBlock stepTextures =
523 gatherer.hasTextures() ? textureDataCache->insert(gatherer.textureDataBlock())
524 : TextureDataBlock();
525 TextureBindingCache::Index textureIndex = textureBindingTracker.trackTextures(
526 performsShading ? paintTextures : TextureDataBlock(), stepTextures);
527
528 keys.push_back({&draw, stepIndex, pipelineIndex,
529 geomUniformIndex, shadingUniformIndex, textureIndex});
530 }
531
532 passBounds.join(draw.fDrawParams.clip().drawBounds());
533 drawPass->fDepthStencilFlags |= draw.fRenderer->depthStencilFlags();
534 drawPass->fRequiresMSAA |= draw.fRenderer->requiresMSAA();
535 }
536
537 if (!gradientBufferTracker.writeData(gatherer.gradientBufferData(), bufferMgr)) {
538 // The necessary uniform data couldn't be written to the GPU, so the DrawPass is invalid.
539 // Early out now since the next Recording snap will fail.
540 return nullptr;
541 }
542
543 // TODO: Explore sorting algorithms; in all likelihood this will be mostly sorted already, so
544 // algorithms that approach O(n) in that condition may be favorable. Alternatively, could
545 // explore radix sort that is always O(n). Brief testing suggested std::sort was faster than
546 // std::stable_sort and SkTQSort on my [ml]'s Windows desktop. Also worth considering in-place
547 // vs. algorithms that require an extra O(n) storage.
548 // TODO: It's not strictly necessary, but would a stable sort be useful or just end up hiding
549 // bugs in the DrawOrder determination code?
550 std::sort(keys.begin(), keys.end());
551
552 // Used to record vertex/instance data, buffer binds, and draw calls
553 DrawWriter drawWriter(&drawPass->fCommandList, bufferMgr);
554 GraphicsPipelineCache::Index lastPipeline = GraphicsPipelineCache::kInvalidIndex;
555 SkIRect lastScissor = SkIRect::MakeSize(targetInfo.dimensions());
556
557 SkASSERT(drawPass->fTarget->isFullyLazy() ||
558 SkIRect::MakeSize(drawPass->fTarget->dimensions()).contains(lastScissor));
559 drawPass->fCommandList.setScissor(lastScissor);
560
561 // All large gradients pack their data into a single buffer throughout the draw pass,
562 // therefore the gradient buffer only needs to be bound once.
563 gradientBufferTracker.bindIfNeeded(&drawPass->fCommandList);
564
565 UniformTracker geometryUniformTracker(useStorageBuffers);
566 UniformTracker shadingUniformTracker(useStorageBuffers);
567
568 // TODO(b/372953722): Remove this forced binding command behavior once dst copies are always
569 // bound separately from the rest of the textures.
570 const bool rebindTexturesOnPipelineChange =
571 recorder->priv().caps()->getDstReadRequirement() == DstReadRequirement::kTextureCopy;
572
573 for (const SortKey& key : keys) {
574 const DrawList::Draw& draw = key.draw();
575 const RenderStep& renderStep = key.renderStep();
576
577 const bool pipelineChange = key.pipelineIndex() != lastPipeline;
578
579 const bool geomBindingChange = geometryUniformTracker.writeUniforms(
580 geometryUniformDataCache, bufferMgr, key.geometryUniformIndex());
581 const bool shadingBindingChange = shadingUniformTracker.writeUniforms(
582 shadingUniformDataCache, bufferMgr, key.shadingUniformIndex());
583
584 // TODO(b/372953722): The Dawn and Vulkan CommandBuffer implementations currently append any
585 // dst copy to the texture bind group/descriptor set automatically when processing a
586 // BindTexturesAndSamplers call because they use a single group to contain all textures.
587 // However, from the DrawPass POV, we can run into the scenario where two pipelines have the
588 // same textures+samplers except one requires a dst-copy and the other does not. In this
589 // case we wouldn't necessarily insert a new command when the pipeline changed and then
590 // end up with layout validation errors.
591 const bool textureBindingsChange = textureBindingTracker.setCurrentTextureBindings(
592 key.textureBindingIndex()) ||
593 (rebindTexturesOnPipelineChange && pipelineChange &&
594 key.textureBindingIndex() != TextureBindingCache::kInvalidIndex);
595 const SkIRect* newScissor = draw.fDrawParams.clip().scissor() != lastScissor ?
596 &draw.fDrawParams.clip().scissor() : nullptr;
597
598 const bool stateChange = geomBindingChange ||
599 shadingBindingChange ||
600 textureBindingsChange ||
601 SkToBool(newScissor);
602
603 // Update DrawWriter *before* we actually change any state so that accumulated draws from
604 // the previous state use the proper state.
605 if (pipelineChange) {
606 drawWriter.newPipelineState(renderStep.primitiveType(),
607 renderStep.vertexStride(),
608 renderStep.instanceStride());
609 } else if (stateChange) {
610 drawWriter.newDynamicState();
611 }
612
613 // Make state changes before accumulating new draw data
614 if (pipelineChange) {
615 drawPass->fCommandList.bindGraphicsPipeline(key.pipelineIndex());
616 lastPipeline = key.pipelineIndex();
617 }
618 if (stateChange) {
619 if (geomBindingChange) {
620 geometryUniformTracker.bindUniforms(UniformSlot::kRenderStep,
621 &drawPass->fCommandList);
622 }
623 if (shadingBindingChange) {
624 shadingUniformTracker.bindUniforms(UniformSlot::kPaint, &drawPass->fCommandList);
625 }
626 if (textureBindingsChange) {
627 textureBindingTracker.bindTextures(&drawPass->fCommandList);
628 }
629 if (newScissor) {
630 drawPass->fCommandList.setScissor(*newScissor);
631 lastScissor = *newScissor;
632 }
633 }
634
635 uint32_t geometrySsboIndex = useStorageBuffers ? geometryUniformTracker.ssboIndex() : 0;
636 uint32_t shadingSsboIndex = useStorageBuffers ? shadingUniformTracker.ssboIndex() : 0;
637 skvx::uint2 ssboIndices = {geometrySsboIndex, shadingSsboIndex};
638 renderStep.writeVertices(&drawWriter, draw.fDrawParams, ssboIndices);
639
640 if (bufferMgr->hasMappingFailed()) {
641 SKGPU_LOG_W("Failed to write necessary vertex/instance data for DrawPass, dropping!");
642 return nullptr;
643 }
644 }
645 // Finish recording draw calls for any collected data at the end of the loop
646 drawWriter.flush();
647
648 drawPass->fBounds = passBounds.roundOut().asSkIRect();
649
650 drawPass->fPipelineDescs = pipelineCache.detach();
651 drawPass->fSamplerDescs = textureBindingTracker.detachSamplers();
652 drawPass->fSampledTextures = textureBindingTracker.detachTextures();
653
654 TRACE_COUNTER1("skia.gpu", "# pipelines", drawPass->fPipelineDescs.size());
655 TRACE_COUNTER1("skia.gpu", "# textures", drawPass->fSampledTextures.size());
656 TRACE_COUNTER1("skia.gpu", "# commands", drawPass->fCommandList.count());
657
658 return drawPass;
659 }
660
prepareResources(ResourceProvider * resourceProvider,const RuntimeEffectDictionary * runtimeDict,const RenderPassDesc & renderPassDesc)661 bool DrawPass::prepareResources(ResourceProvider* resourceProvider,
662 const RuntimeEffectDictionary* runtimeDict,
663 const RenderPassDesc& renderPassDesc) {
664 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
665
666 fFullPipelines.reserve(fFullPipelines.size() + fPipelineDescs.size());
667 for (const GraphicsPipelineDesc& pipelineDesc : fPipelineDescs) {
668 auto pipeline = resourceProvider->findOrCreateGraphicsPipeline(runtimeDict,
669 pipelineDesc,
670 renderPassDesc);
671 if (!pipeline) {
672 SKGPU_LOG_W("Failed to create GraphicsPipeline for draw in RenderPass. Dropping pass!");
673 return false;
674 }
675 fFullPipelines.push_back(std::move(pipeline));
676 }
677 // The DrawPass may be long lived on a Recording and we no longer need the GraphicPipelineDescs
678 // once we've created pipelines, so we drop the storage for them here.
679 fPipelineDescs.clear();
680
681 #if defined(SK_DEBUG)
682 for (int i = 0; i < fSampledTextures.size(); ++i) {
683 // It should not have been possible to draw an Image that has an invalid texture info
684 SkASSERT(fSampledTextures[i]->textureInfo().isValid());
685 // Tasks should have been ordered to instantiate any scratch textures already, or any
686 // client-owned image will have been instantiated at creation.
687 SkASSERTF(fSampledTextures[i]->isInstantiated() ||
688 fSampledTextures[i]->isLazy(),
689 "proxy label = %s", fSampledTextures[i]->label());
690 }
691 #endif
692
693 fSamplers.reserve(fSamplers.size() + fSamplerDescs.size());
694 for (int i = 0; i < fSamplerDescs.size(); ++i) {
695 sk_sp<Sampler> sampler = resourceProvider->findOrCreateCompatibleSampler(fSamplerDescs[i]);
696 if (!sampler) {
697 SKGPU_LOG_W("Failed to create sampler. Will not create renderpass!");
698 return false;
699 }
700 fSamplers.push_back(std::move(sampler));
701 }
702 // The DrawPass may be long lived on a Recording and we no longer need the SamplerDescs
703 // once we've created Samplers, so we drop the storage for them here.
704 fSamplerDescs.clear();
705
706 return true;
707 }
708
addResourceRefs(CommandBuffer * commandBuffer) const709 void DrawPass::addResourceRefs(CommandBuffer* commandBuffer) const {
710 for (int i = 0; i < fFullPipelines.size(); ++i) {
711 commandBuffer->trackResource(fFullPipelines[i]);
712 }
713 for (int i = 0; i < fSampledTextures.size(); ++i) {
714 commandBuffer->trackCommandBufferResource(fSampledTextures[i]->refTexture());
715 }
716 for (int i = 0; i < fSamplers.size(); ++i) {
717 commandBuffer->trackResource(fSamplers[i]);
718 }
719 }
720
getTexture(size_t index) const721 const Texture* DrawPass::getTexture(size_t index) const {
722 SkASSERT(index < SkToSizeT(fSampledTextures.size()));
723 SkASSERT(fSampledTextures[index]);
724 SkASSERT(fSampledTextures[index]->texture());
725 return fSampledTextures[index]->texture();
726 }
getSampler(size_t index) const727 const Sampler* DrawPass::getSampler(size_t index) const {
728 SkASSERT(index < SkToSizeT(fSamplers.size()));
729 SkASSERT(fSamplers[index]);
730 return fSamplers[index].get();
731 }
732
733 } // namespace skgpu::graphite
734