xref: /aosp_15_r20/external/skia/src/gpu/graphite/compute/DispatchGroup.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2023 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/gpu/graphite/compute/DispatchGroup.h"
9 
10 #include "include/core/SkColorType.h"
11 #include "include/core/SkTypes.h"
12 #include "include/gpu/GpuTypes.h"
13 #include "include/gpu/graphite/Recorder.h"
14 #include "include/gpu/graphite/TextureInfo.h"
15 #include "include/private/base/SkSpan_impl.h"
16 #include "include/private/base/SkTo.h"
17 #include "src/gpu/BufferWriter.h"
18 #include "src/gpu/graphite/BufferManager.h"
19 #include "src/gpu/graphite/Caps.h"
20 #include "src/gpu/graphite/CommandBuffer.h"
21 #include "src/gpu/graphite/ComputePipeline.h"
22 #include "src/gpu/graphite/Log.h"
23 #include "src/gpu/graphite/RecorderPriv.h"
24 #include "src/gpu/graphite/Resource.h"
25 #include "src/gpu/graphite/ResourceProvider.h"
26 #include "src/gpu/graphite/Sampler.h"
27 #include "src/gpu/graphite/Texture.h"  // IWYU pragma: keep
28 #include "src/gpu/graphite/TextureProxy.h"
29 #include "src/gpu/graphite/UniformManager.h"
30 #include "src/gpu/graphite/task/ClearBuffersTask.h"
31 
32 #include <utility>
33 
34 namespace skgpu::graphite {
35 
36 DispatchGroup::~DispatchGroup() = default;
37 
prepareResources(ResourceProvider * resourceProvider)38 bool DispatchGroup::prepareResources(ResourceProvider* resourceProvider) {
39     fPipelines.reserve(fPipelines.size() + fPipelineDescs.size());
40     for (const ComputePipelineDesc& desc : fPipelineDescs) {
41         auto pipeline = resourceProvider->findOrCreateComputePipeline(desc);
42         if (!pipeline) {
43             SKGPU_LOG_W("Failed to create ComputePipeline for dispatch group. Dropping group!");
44             return false;
45         }
46         fPipelines.push_back(std::move(pipeline));
47     }
48 
49     for (int i = 0; i < fTextures.size(); ++i) {
50         if (!fTextures[i]->textureInfo().isValid()) {
51             SKGPU_LOG_W("Failed to validate bound texture. Dropping dispatch group!");
52             return false;
53         }
54         if (!TextureProxy::InstantiateIfNotLazy(resourceProvider, fTextures[i].get())) {
55             SKGPU_LOG_W("Failed to instantiate bound texture. Dropping dispatch group!");
56             return false;
57         }
58     }
59 
60     for (const SamplerDesc& desc : fSamplerDescs) {
61         sk_sp<Sampler> sampler = resourceProvider->findOrCreateCompatibleSampler(desc);
62         if (!sampler) {
63             SKGPU_LOG_W("Failed to create sampler. Dropping dispatch group!");
64             return false;
65         }
66         fSamplers.push_back(std::move(sampler));
67     }
68 
69     // The DispatchGroup may be long lived on a Recording and we no longer need the descriptors
70     // once we've created pipelines.
71     fPipelineDescs.clear();
72     fSamplerDescs.clear();
73 
74     return true;
75 }
76 
addResourceRefs(CommandBuffer * commandBuffer) const77 void DispatchGroup::addResourceRefs(CommandBuffer* commandBuffer) const {
78     for (int i = 0; i < fPipelines.size(); ++i) {
79         commandBuffer->trackResource(fPipelines[i]);
80     }
81     for (int i = 0; i < fTextures.size(); ++i) {
82         commandBuffer->trackCommandBufferResource(fTextures[i]->refTexture());
83     }
84 }
85 
snapChildTask()86 sk_sp<Task> DispatchGroup::snapChildTask() {
87     if (fClearList.empty()) {
88         return nullptr;
89     }
90     return ClearBuffersTask::Make(std::move(fClearList));
91 }
92 
getTexture(size_t index) const93 const Texture* DispatchGroup::getTexture(size_t index) const {
94     SkASSERT(index < SkToSizeT(fTextures.size()));
95     SkASSERT(fTextures[index]);
96     SkASSERT(fTextures[index]->texture());
97     return fTextures[index]->texture();
98 }
99 
getSampler(size_t index) const100 const Sampler* DispatchGroup::getSampler(size_t index) const {
101     SkASSERT(index < SkToSizeT(fSamplers.size()));
102     SkASSERT(fSamplers[index]);
103     return fSamplers[index].get();
104 }
105 
106 using Builder = DispatchGroup::Builder;
107 
Builder(Recorder * recorder)108 Builder::Builder(Recorder* recorder) : fObj(new DispatchGroup()), fRecorder(recorder) {
109     SkASSERT(fRecorder);
110 }
111 
appendStep(const ComputeStep * step,std::optional<WorkgroupSize> globalSize)112 bool Builder::appendStep(const ComputeStep* step, std::optional<WorkgroupSize> globalSize) {
113     return this->appendStepInternal(step,
114                                     globalSize ? *globalSize : step->calculateGlobalDispatchSize());
115 }
116 
appendStepIndirect(const ComputeStep * step,BindBufferInfo indirectBuffer)117 bool Builder::appendStepIndirect(const ComputeStep* step, BindBufferInfo indirectBuffer) {
118     return this->appendStepInternal(step, indirectBuffer);
119 }
120 
appendStepInternal(const ComputeStep * step,const std::variant<WorkgroupSize,BindBufferInfo> & globalSizeOrIndirect)121 bool Builder::appendStepInternal(
122         const ComputeStep* step,
123         const std::variant<WorkgroupSize, BindBufferInfo>& globalSizeOrIndirect) {
124     SkASSERT(fObj);
125     SkASSERT(step);
126 
127     Dispatch dispatch;
128 
129     // Process the step's resources.
130     auto resources = step->resources();
131     dispatch.fBindings.reserve(resources.size());
132 
133     // `nextIndex` matches the declaration order of resources as specified by the ComputeStep.
134     int nextIndex = 0;
135 
136     // We assign buffer, texture, and sampler indices from separate ranges. This is compatible with
137     // how Graphite assigns indices on Metal, as these map directly to the buffer/texture/sampler
138     // index ranges. On Dawn/Vulkan buffers and textures/samplers are allocated from separate bind
139     // groups/descriptor sets but texture and sampler indices need to not overlap.
140     const auto& bindingReqs = fRecorder->priv().caps()->resourceBindingRequirements();
141     bool distinctRanges = bindingReqs.fDistinctIndexRanges;
142     bool separateSampler = bindingReqs.fSeparateTextureAndSamplerBinding;
143     int bufferOrGlobalIndex = 0;
144     int texIndex = 0;
145     // NOTE: SkSL Metal codegen always assigns the same binding index to a texture and its sampler.
146     // TODO: This could cause sampler indices to not be tightly packed if the sampler2D declaration
147     // comes after 1 or more storage texture declarations (which don't have samplers).
148     for (const ComputeStep::ResourceDesc& r : resources) {
149         SkASSERT(r.fSlot == -1 || (r.fSlot >= 0 && r.fSlot < kMaxComputeDataFlowSlots));
150         const int index = nextIndex++;
151 
152         DispatchResourceOptional maybeResource;
153 
154         using DataFlow = ComputeStep::DataFlow;
155         using Type = ComputeStep::ResourceType;
156         switch (r.fFlow) {
157             case DataFlow::kPrivate:
158                 // A sampled or fetched-type readonly texture must either get assigned via
159                 // `assignSharedTexture()` or internally allocated as a storage texture of a
160                 // preceding step. Such a texture always has a data slot.
161                 SkASSERT(r.fType != Type::kReadOnlyTexture);
162                 SkASSERT(r.fType != Type::kSampledTexture);
163                 maybeResource = this->allocateResource(step, r, index);
164                 break;
165             case DataFlow::kShared: {
166                 SkASSERT(r.fSlot >= 0);
167                 // Allocate a new resource only if the shared slot is empty (except for a
168                 // SampledTexture which needs its sampler to be allocated internally).
169                 DispatchResourceOptional* slot = &fOutputTable.fSharedSlots[r.fSlot];
170                 if (std::holds_alternative<std::monostate>(*slot)) {
171                     SkASSERT(r.fType != Type::kReadOnlyTexture);
172                     SkASSERT(r.fType != Type::kSampledTexture);
173                     maybeResource = this->allocateResource(step, r, index);
174                     *slot = maybeResource;
175                 } else {
176                     SkASSERT(((r.fType == Type::kUniformBuffer ||
177                                r.fType == Type::kStorageBuffer ||
178                                r.fType == Type::kReadOnlyStorageBuffer ||
179                                r.fType == Type::kIndirectBuffer) &&
180                               std::holds_alternative<BindBufferInfo>(*slot)) ||
181                              ((r.fType == Type::kReadOnlyTexture ||
182                                r.fType == Type::kSampledTexture ||
183                                r.fType == Type::kWriteOnlyStorageTexture) &&
184                               std::holds_alternative<TextureIndex>(*slot)));
185 #ifdef SK_DEBUG
186                     // Ensure that the texture has the right format if it was assigned via
187                     // `assignSharedTexture()`.
188                     const TextureIndex* texIdx = std::get_if<TextureIndex>(slot);
189                     if (texIdx && r.fType == Type::kWriteOnlyStorageTexture) {
190                         const TextureProxy* t = fObj->fTextures[texIdx->fValue].get();
191                         SkASSERT(t);
192                         auto [_, colorType] = step->calculateTextureParameters(index, r);
193                         SkASSERT(t->textureInfo().isCompatible(
194                                 fRecorder->priv().caps()->getDefaultStorageTextureInfo(colorType)));
195                     }
196 #endif  // SK_DEBUG
197 
198                     maybeResource = *slot;
199 
200                     if (r.fType == Type::kSampledTexture) {
201                         // The shared slot holds the texture part of the sampled texture but we
202                         // still need to allocate the sampler.
203                         SkASSERT(std::holds_alternative<TextureIndex>(*slot));
204                         auto samplerResource = this->allocateResource(step, r, index);
205                         const SamplerIndex* samplerIdx =
206                                 std::get_if<SamplerIndex>(&samplerResource);
207                         SkASSERT(samplerIdx);
208                         int bindingIndex = distinctRanges    ? texIndex
209                                            : separateSampler ? bufferOrGlobalIndex++
210                                                              : bufferOrGlobalIndex;
211                         dispatch.fBindings.push_back(
212                                 {static_cast<BindingIndex>(bindingIndex), *samplerIdx});
213                     }
214                 }
215                 break;
216             }
217         }
218 
219         int bindingIndex = 0;
220         DispatchResource dispatchResource;
221         if (const BindBufferInfo* buffer = std::get_if<BindBufferInfo>(&maybeResource)) {
222             dispatchResource = *buffer;
223             bindingIndex = bufferOrGlobalIndex++;
224         } else if (const TextureIndex* texIdx = std::get_if<TextureIndex>(&maybeResource)) {
225             dispatchResource = *texIdx;
226             bindingIndex = distinctRanges ? texIndex++ : bufferOrGlobalIndex++;
227         } else {
228             SKGPU_LOG_W("Failed to allocate resource for compute dispatch");
229             return false;
230         }
231         dispatch.fBindings.push_back({static_cast<BindingIndex>(bindingIndex), dispatchResource});
232     }
233 
234     auto wgBufferDescs = step->workgroupBuffers();
235     if (!wgBufferDescs.empty()) {
236         dispatch.fWorkgroupBuffers.push_back_n(wgBufferDescs.size(), wgBufferDescs.data());
237     }
238 
239     // We need to switch pipelines if this step uses a different pipeline from the previous step.
240     if (fObj->fPipelineDescs.empty() ||
241         fObj->fPipelineDescs.back().uniqueID() != step->uniqueID()) {
242         fObj->fPipelineDescs.push_back(ComputePipelineDesc(step));
243     }
244 
245     dispatch.fPipelineIndex = fObj->fPipelineDescs.size() - 1;
246     dispatch.fLocalSize = step->localDispatchSize();
247     dispatch.fGlobalSizeOrIndirect = globalSizeOrIndirect;
248 
249     fObj->fDispatchList.push_back(std::move(dispatch));
250 
251     return true;
252 }
253 
assignSharedBuffer(BindBufferInfo buffer,unsigned int slot,ClearBuffer cleared)254 void Builder::assignSharedBuffer(BindBufferInfo buffer, unsigned int slot, ClearBuffer cleared) {
255     SkASSERT(fObj);
256     SkASSERT(buffer);
257     SkASSERT(buffer.fSize);
258 
259     fOutputTable.fSharedSlots[slot] = buffer;
260     if (cleared == ClearBuffer::kYes) {
261         fObj->fClearList.push_back(buffer);
262     }
263 }
264 
assignSharedTexture(sk_sp<TextureProxy> texture,unsigned int slot)265 void Builder::assignSharedTexture(sk_sp<TextureProxy> texture, unsigned int slot) {
266     SkASSERT(fObj);
267     SkASSERT(texture);
268 
269     fObj->fTextures.push_back(std::move(texture));
270     fOutputTable.fSharedSlots[slot] = TextureIndex{fObj->fTextures.size() - 1u};
271 }
272 
finalize()273 std::unique_ptr<DispatchGroup> Builder::finalize() {
274     auto obj = std::move(fObj);
275     fOutputTable.reset();
276     return obj;
277 }
278 
279 #if defined(GPU_TEST_UTILS)
reset()280 void Builder::reset() {
281     fOutputTable.reset();
282     fObj.reset(new DispatchGroup);
283 }
284 #endif
285 
getSharedBufferResource(unsigned int slot) const286 BindBufferInfo Builder::getSharedBufferResource(unsigned int slot) const {
287     SkASSERT(fObj);
288 
289     BindBufferInfo info;
290     if (const BindBufferInfo* slotValue =
291                 std::get_if<BindBufferInfo>(&fOutputTable.fSharedSlots[slot])) {
292         info = *slotValue;
293     }
294     return info;
295 }
296 
getSharedTextureResource(unsigned int slot) const297 sk_sp<TextureProxy> Builder::getSharedTextureResource(unsigned int slot) const {
298     SkASSERT(fObj);
299 
300     const TextureIndex* idx = std::get_if<TextureIndex>(&fOutputTable.fSharedSlots[slot]);
301     if (!idx) {
302         return nullptr;
303     }
304 
305     SkASSERT(idx->fValue < SkToSizeT(fObj->fTextures.size()));
306     return fObj->fTextures[idx->fValue];
307 }
308 
allocateResource(const ComputeStep * step,const ComputeStep::ResourceDesc & resource,int resourceIdx)309 DispatchResourceOptional Builder::allocateResource(const ComputeStep* step,
310                                                    const ComputeStep::ResourceDesc& resource,
311                                                    int resourceIdx) {
312     SkASSERT(step);
313     SkASSERT(fObj);
314     using Type = ComputeStep::ResourceType;
315     using ResourcePolicy = ComputeStep::ResourcePolicy;
316 
317     DrawBufferManager* bufferMgr = fRecorder->priv().drawBufferManager();
318     DispatchResourceOptional result;
319     switch (resource.fType) {
320         case Type::kReadOnlyStorageBuffer:
321         case Type::kStorageBuffer: {
322             size_t bufferSize = step->calculateBufferSize(resourceIdx, resource);
323             SkASSERT(bufferSize);
324             if (resource.fPolicy == ResourcePolicy::kMapped) {
325                 auto [ptr, bufInfo] = bufferMgr->getStoragePointer(bufferSize);
326                 if (ptr) {
327                     step->prepareStorageBuffer(resourceIdx, resource, ptr, bufferSize);
328                     result = bufInfo;
329                 }
330             } else {
331                 auto bufInfo = bufferMgr->getStorage(bufferSize,
332                                                      resource.fPolicy == ResourcePolicy::kClear
333                                                              ? ClearBuffer::kYes
334                                                              : ClearBuffer::kNo);
335                 if (bufInfo) {
336                     result = bufInfo;
337                 }
338             }
339             break;
340         }
341         case Type::kIndirectBuffer: {
342             SkASSERT(resource.fPolicy != ResourcePolicy::kMapped);
343 
344             size_t bufferSize = step->calculateBufferSize(resourceIdx, resource);
345             SkASSERT(bufferSize);
346             auto bufInfo = bufferMgr->getIndirectStorage(bufferSize,
347                                                          resource.fPolicy == ResourcePolicy::kClear
348                                                                  ? ClearBuffer::kYes
349                                                                  : ClearBuffer::kNo);
350             if (bufInfo) {
351                 result = bufInfo;
352             }
353             break;
354         }
355         case Type::kUniformBuffer: {
356             SkASSERT(resource.fPolicy == ResourcePolicy::kMapped);
357 
358             const auto& resourceReqs = fRecorder->priv().caps()->resourceBindingRequirements();
359             UniformManager uboMgr(resourceReqs.fUniformBufferLayout);
360             step->prepareUniformBuffer(resourceIdx, resource, &uboMgr);
361 
362             auto dataBlock = uboMgr.finish();
363             SkASSERT(!dataBlock.empty());
364 
365             auto [writer, bufInfo] = bufferMgr->getUniformWriter(/*count=*/1, dataBlock.size());
366             if (bufInfo) {
367                 writer.write(dataBlock.data(), dataBlock.size());
368                 result = bufInfo;
369             }
370             break;
371         }
372         case Type::kWriteOnlyStorageTexture: {
373             auto [size, colorType] = step->calculateTextureParameters(resourceIdx, resource);
374             SkASSERT(!size.isEmpty());
375             SkASSERT(colorType != kUnknown_SkColorType);
376 
377             auto textureInfo = fRecorder->priv().caps()->getDefaultStorageTextureInfo(colorType);
378             sk_sp<TextureProxy> texture = TextureProxy::Make(
379                     fRecorder->priv().caps(), fRecorder->priv().resourceProvider(),
380                     size, textureInfo, "DispatchWriteOnlyStorageTexture", skgpu::Budgeted::kYes);
381             if (texture) {
382                 fObj->fTextures.push_back(std::move(texture));
383                 result = TextureIndex{fObj->fTextures.size() - 1u};
384             }
385             break;
386         }
387         case Type::kReadOnlyTexture:
388             // This resource type is meant to be populated externally (e.g. by an upload or a render
389             // pass) and only read/sampled by a ComputeStep. It's not meaningful to allocate an
390             // internal texture for a DispatchGroup if none of the ComputeSteps will write to it.
391             //
392             // Instead of using internal allocation, this texture must be assigned explicitly to a
393             // slot by calling the Builder::assignSharedTexture() method.
394             //
395             // Note: A ComputeStep is allowed to read/sample from a storage texture that a previous
396             // ComputeStep has written to.
397             SK_ABORT("a readonly texture must be externally assigned to a ComputeStep");
398             break;
399         case Type::kSampledTexture: {
400             fObj->fSamplerDescs.push_back(step->calculateSamplerParameters(resourceIdx, resource));
401             result = SamplerIndex{fObj->fSamplerDescs.size() - 1u};
402             break;
403         }
404     }
405     return result;
406 }
407 
408 }  // namespace skgpu::graphite
409