xref: /aosp_15_r20/external/skia/src/gpu/graphite/compute/ComputeStep.h (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2023 Google LLC
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #ifndef skgpu_graphite_compute_ComputeStep_DEFINED
9 #define skgpu_graphite_compute_ComputeStep_DEFINED
10 
11 #include "include/core/SkSpan.h"
12 #include "include/private/base/SkTArray.h"
13 #include "include/private/base/SkTo.h"
14 #include "src/base/SkEnumBitMask.h"
15 #include "src/gpu/graphite/ComputeTypes.h"
16 #include "src/gpu/graphite/ResourceTypes.h"
17 
18 #include <cstddef>
19 #include <cstdint>
20 #include <string>
21 #include <string_view>
22 #include <tuple>
23 
24 enum SkColorType : int;
25 struct SkISize;
26 
27 namespace skgpu::graphite {
28 
29 class UniformManager;
30 
31 /**
32  * A `ComputeStep` represents a compute pass within a wider draw operation. A `ComputeStep`
33  * implementation describes an invocation of a compute program and its data binding layout.
34  *
35  * A `ComputeStep` can perform arbitrary operations on the GPU over various types of data, including
36  * geometry and image processing. The data processed by a `ComputeStep` can be inputs (textures or
37  * buffers) populated on the CPU, data forwarded to and from other `ComputeStep` invocations (via
38  * "slots"), transient storage buffers/textures that are only used within an individual dispatch,
39  * geometry attribute (vertex/index/instance) and indirect draw parameters of a subsequent raster
40  * pipeline stage, as well as texture outputs.
41  *
42  * The data flow between sequential `ComputeStep` invocations within a DispatchGroup is achieved by
43  * operating over a shared "resource table". `ComputeStep`s can declare a resource with a slot
44  * number. Multiple `ComputeStep`s in a group that declare a resource with the same slot number will
45  * have access to the same backing resource object through that slot:
46  *
47  *      _______________                _______________
48  *     |               |              |               |
49  *     |                ---[Slot 0]---                |
50  *     |               |              |               |
51  *     |                ---[Slot 1]---                |
52  *     | ComputeStep 1 |              | ComputeStep 2 |
53  *     |                ---[Slot 2]   |               |
54  *     |               |              |               |
55  *     |               |   [Slot 3]---                |
56  *     |               |              |               |
57  *      ---------------                ---------------
58  *
59  * In the example above, slots 0 and 1 are accessed by both ComputeSteps, while slots 2 and 3 are
60  * exclusively accessed by ComputeStep 1 and 2 respectively. Alternately, slots 2 and 3 could be
61  * declared as "private" resources which are visible to a single ComputeStep.
62  *
63  * Similarly, raster stage geometry buffers that are specified as the output of a ComputeStep can be
64  * used to assign the draw buffers of a RenderStep.
65  *
66  * It is the responsibility of the owning entity (e.g. a RendererProvider) to ensure that a chain of
67  * ComputeStep and RenderStep invocations have a compatible resource and data-flow layout.
68  */
69 class ComputeStep {
70 public:
71     enum class DataFlow {
72         // A private binding is a resource that is only visible to a single ComputeStep invocation.
73         kPrivate,
74 
75         // Bindings with a slot number that can be used to forward data between a series of
76         // `ComputeStep`s. This DataFlow type is accompanied with a "slot number" that can be
77         // shared by multiple `ComputeStep`s in a group.
78         kShared,
79     };
80 
81     enum class ResourceType {
82         kUniformBuffer,
83         kStorageBuffer,
84         kReadOnlyStorageBuffer,
85 
86         // An indirect buffer is a storage buffer populated by this ComputeStep to determine the
87         // global dispatch size of a subsequent ComputeStep within the same DispatchGroup. The
88         // contents of the buffer must be laid out according to the `IndirectDispatchArgs` struct
89         // definition declared in ComputeTypes.h.
90         kIndirectBuffer,
91 
92         kWriteOnlyStorageTexture,
93         kReadOnlyTexture,
94         kSampledTexture,
95     };
96 
97     enum class ResourcePolicy {
98         kNone,
99 
100         // The memory of the resource will be initialized to 0
101         kClear,
102 
103         // The ComputeStep will be asked to initialize the memory on the CPU via
104         // `ComputeStep::prepareStorageBuffer` or `ComputeStep::prepareUniformBuffer` prior to
105         // pipeline execution. This may incur a transfer cost on platforms that do not allow buffers
106         // to be mapped in shared memory.
107         //
108         // If multiple ComputeSteps in a DispatchGroup declare a mapped resource with the same
109         // shared slot number, only the first ComputeStep in the group will receive a call to
110         // prepare the buffer.
111         //
112         // This only has meaning for buffer resources. A resource with the `kUniformBuffer` resource
113         // type must specify the `kMapped` resource policy.
114         kMapped,
115     };
116 
117     struct ResourceDesc final {
118         ResourceType fType;
119         DataFlow fFlow;
120         ResourcePolicy fPolicy;
121 
122         // This field only has meaning (and must have a non-negative value) if `fFlow` is
123         // `DataFlow::kShared`.
124         int fSlot;
125 
126         // The SkSL variable declaration code excluding the layout and type definitions. This field
127         // is ignored for a ComputeStep that supports native shader source.
128         const char* fSkSL = "";
129 
130         constexpr ResourceDesc(ResourceType type,
131                                DataFlow flow,
132                                ResourcePolicy policy,
133                                int slot = -1)
fTypefinal134                 : fType(type), fFlow(flow), fPolicy(policy), fSlot(slot) {}
135 
ResourceDescfinal136         constexpr ResourceDesc(ResourceType type,
137                                DataFlow flow,
138                                ResourcePolicy policy,
139                                int slot,
140                                const char* sksl)
141                 : fType(type), fFlow(flow), fPolicy(policy), fSlot(slot), fSkSL(sksl) {}
142 
ResourceDescfinal143         constexpr ResourceDesc(ResourceType type,
144                                DataFlow flow,
145                                ResourcePolicy policy,
146                                const char* sksl)
147                 : fType(type), fFlow(flow), fPolicy(policy), fSlot(-1), fSkSL(sksl) {}
148     };
149 
150     // On platforms that support late bound workgroup shared resources (e.g. Metal) a ComputeStep
151     // can optionally provide a list of memory sizes and binding indices.
152     struct WorkgroupBufferDesc {
153         // The buffer size in bytes.
154         size_t size;
155         size_t index;
156     };
157 
158     virtual ~ComputeStep() = default;
159 
160     // Returns a complete SkSL compute program. The returned SkSL must constitute a complete compute
161     // program and declare all resource bindings starting at `nextBindingIndex` in the order in
162     // which they are enumerated by `ComputeStep::resources()`.
163     //
164     // If this ComputeStep supports native shader source then it must override
165     // `nativeShaderSource()` instead.
166     virtual std::string computeSkSL() const;
167 
168     // A ComputeStep that supports native shader source then then it must implement
169     // `nativeShaderSource()` and return the shader source in the requested format. This is intended
170     // to instantiate a compute pipeline from a pre-compiled shader module. The returned source must
171     // constitute a shader module that contains at least one compute entry-point function that
172     // matches the specified name.
173     enum class NativeShaderFormat {
174         kWGSL,
175         kMSL,
176     };
177     struct NativeShaderSource {
178         std::string_view fSource;
179         std::string fEntryPoint;
180     };
181     virtual NativeShaderSource nativeShaderSource(NativeShaderFormat) const;
182 
183     // This method will be called for buffer entries in the ComputeStep's resource list to
184     // determine the required allocation size. The ComputeStep must return a non-zero value.
185     //
186     // TODO(b/279955342): Provide a context object, e.g. a type a associated with
187     // DispatchGroup::Builder, to aid the ComputeStep in its buffer size calculations.
188     virtual size_t calculateBufferSize(int resourceIndex, const ResourceDesc&) const;
189 
190     // This method will be called for storage texture entries in the ComputeStep's resource list to
191     // determine the required dimensions and color type. The ComputeStep must return a non-zero
192     // value for the size and a valid color type.
193     virtual std::tuple<SkISize, SkColorType> calculateTextureParameters(int resourceIndex,
194                                                                         const ResourceDesc&) const;
195 
196     // This method will be called for sampler entries in the ComputeStep's resource list to
197     // determine the sampling and tile mode options.
198     virtual SamplerDesc calculateSamplerParameters(int resourceIndex, const ResourceDesc&) const;
199 
200     // Return the global dispatch size (aka "workgroup count") for this step based on the draw
201     // parameters. The default value is a workgroup count of (1, 1, 1)
202     //
203     // TODO(b/279955342): Provide a context object, e.g. a type a associated with
204     // DispatchGroup::Builder, to aid the ComputeStep in its buffer size calculations.
205     virtual WorkgroupSize calculateGlobalDispatchSize() const;
206 
207     // Populates a storage buffer resource which was specified as "mapped". This method will only be
208     // called once for a resource right after its allocation and before pipeline execution. For
209     // shared resources, only the first ComputeStep in a DispatchGroup will be asked to prepare the
210     // buffer.
211     //
212     // `resourceIndex` matches the order in which `resource` was enumerated by
213     // `ComputeStep::resources()`.
214     virtual void prepareStorageBuffer(int resourceIndex,
215                                       const ResourceDesc& resource,
216                                       void* buffer,
217                                       size_t bufferSize) const;
218 
219     // Populates a uniform buffer resource. This method will be called once for a resource right
220     // after its allocation and before pipeline execution. For shared resources, only the first
221     // ComputeStep in a DispatchGroup will be asked to prepare the buffer.
222     //
223     // `resourceIndex` matches the order in which `resource` was enumerated by
224     // `ComputeStep::resources()`.
225     //
226     // The implementation must use the provided `UniformManager` to populate the buffer. On debug
227     // builds, the implementation must validate the buffer layout by setting up an expectation, for
228     // example:
229     //
230     //     SkDEBUGCODE(mgr->setExpectedUniforms({{"foo", SkSLType::kFloat}}));
231     //
232     // TODO(b/279955342): Provide a context object, e.g. a type a associated with
233     // DispatchGroup::Builder, to aid the ComputeStep in its buffer size calculations.
234     virtual void prepareUniformBuffer(int resourceIndex,
235                                       const ResourceDesc&,
236                                       UniformManager*) const;
237 
resources()238     SkSpan<const ResourceDesc> resources() const { return SkSpan(fResources); }
workgroupBuffers()239     SkSpan<const WorkgroupBufferDesc> workgroupBuffers() const { return SkSpan(fWorkgroupBuffers); }
240 
241     // Identifier that can be used as part of a unique key for a compute pipeline state object
242     // associated with this `ComputeStep`.
uniqueID()243     uint32_t uniqueID() const { return fUniqueID; }
244 
245     // Returns a debug name for the subclass implementation.
name()246     const char* name() const { return fName.c_str(); }
247 
248     // The size of the workgroup for this ComputeStep's entry point function. This value is hardware
249     // dependent. On Metal, this value should be used when invoking the dispatch API call. On all
250     // other backends, this value will be baked into the pipeline.
localDispatchSize()251     WorkgroupSize localDispatchSize() const { return fLocalDispatchSize; }
252 
supportsNativeShader()253     bool supportsNativeShader() const { return SkToBool(fFlags & Flags::kSupportsNativeShader); }
254 
255 protected:
256     enum class Flags : uint8_t {
257         kNone                 = 0b00000,
258         kSupportsNativeShader = 0b00010,
259     };
260     SK_DECL_BITMASK_OPS_FRIENDS(Flags)
261 
262     ComputeStep(std::string_view name,
263                 WorkgroupSize localDispatchSize,
264                 SkSpan<const ResourceDesc> resources,
265                 SkSpan<const WorkgroupBufferDesc> workgroupBuffers = {},
266                 Flags baseFlags = Flags::kNone);
267 
268 private:
269     // Disallow copy and move
270     ComputeStep(const ComputeStep&) = delete;
271     ComputeStep(ComputeStep&&)      = delete;
272 
273     uint32_t fUniqueID;
274     SkEnumBitMask<Flags> fFlags;
275     std::string fName;
276     skia_private::TArray<ResourceDesc> fResources;
277     skia_private::TArray<WorkgroupBufferDesc> fWorkgroupBuffers;
278 
279     // TODO(b/240615224): Subclasses should simply specify the workgroup size that they need.
280     // The ComputeStep constructor should check and reduce that number based on the maximum
281     // supported workgroup size stored in Caps. In Metal, we'll pass this number directly to the
282     // dispatch API call. On other backends, we'll use this value to generate the right SkSL
283     // workgroup size declaration to avoid any validation failures.
284     WorkgroupSize fLocalDispatchSize;
285 };
286 SK_MAKE_BITMASK_OPS(ComputeStep::Flags)
287 
288 }  // namespace skgpu::graphite
289 
290 #endif  // skgpu_graphite_compute_ComputeStep_DEFINED
291