1 /* 2 * Copyright 2023 Google LLC 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef skgpu_graphite_compute_ComputeStep_DEFINED 9 #define skgpu_graphite_compute_ComputeStep_DEFINED 10 11 #include "include/core/SkSpan.h" 12 #include "include/private/base/SkTArray.h" 13 #include "include/private/base/SkTo.h" 14 #include "src/base/SkEnumBitMask.h" 15 #include "src/gpu/graphite/ComputeTypes.h" 16 #include "src/gpu/graphite/ResourceTypes.h" 17 18 #include <cstddef> 19 #include <cstdint> 20 #include <string> 21 #include <string_view> 22 #include <tuple> 23 24 enum SkColorType : int; 25 struct SkISize; 26 27 namespace skgpu::graphite { 28 29 class UniformManager; 30 31 /** 32 * A `ComputeStep` represents a compute pass within a wider draw operation. A `ComputeStep` 33 * implementation describes an invocation of a compute program and its data binding layout. 34 * 35 * A `ComputeStep` can perform arbitrary operations on the GPU over various types of data, including 36 * geometry and image processing. The data processed by a `ComputeStep` can be inputs (textures or 37 * buffers) populated on the CPU, data forwarded to and from other `ComputeStep` invocations (via 38 * "slots"), transient storage buffers/textures that are only used within an individual dispatch, 39 * geometry attribute (vertex/index/instance) and indirect draw parameters of a subsequent raster 40 * pipeline stage, as well as texture outputs. 41 * 42 * The data flow between sequential `ComputeStep` invocations within a DispatchGroup is achieved by 43 * operating over a shared "resource table". `ComputeStep`s can declare a resource with a slot 44 * number. Multiple `ComputeStep`s in a group that declare a resource with the same slot number will 45 * have access to the same backing resource object through that slot: 46 * 47 * _______________ _______________ 48 * | | | | 49 * | ---[Slot 0]--- | 50 * | | | | 51 * | ---[Slot 1]--- | 52 * | ComputeStep 1 | | ComputeStep 2 | 53 * | ---[Slot 2] | | 54 * | | | | 55 * | | [Slot 3]--- | 56 * | | | | 57 * --------------- --------------- 58 * 59 * In the example above, slots 0 and 1 are accessed by both ComputeSteps, while slots 2 and 3 are 60 * exclusively accessed by ComputeStep 1 and 2 respectively. Alternately, slots 2 and 3 could be 61 * declared as "private" resources which are visible to a single ComputeStep. 62 * 63 * Similarly, raster stage geometry buffers that are specified as the output of a ComputeStep can be 64 * used to assign the draw buffers of a RenderStep. 65 * 66 * It is the responsibility of the owning entity (e.g. a RendererProvider) to ensure that a chain of 67 * ComputeStep and RenderStep invocations have a compatible resource and data-flow layout. 68 */ 69 class ComputeStep { 70 public: 71 enum class DataFlow { 72 // A private binding is a resource that is only visible to a single ComputeStep invocation. 73 kPrivate, 74 75 // Bindings with a slot number that can be used to forward data between a series of 76 // `ComputeStep`s. This DataFlow type is accompanied with a "slot number" that can be 77 // shared by multiple `ComputeStep`s in a group. 78 kShared, 79 }; 80 81 enum class ResourceType { 82 kUniformBuffer, 83 kStorageBuffer, 84 kReadOnlyStorageBuffer, 85 86 // An indirect buffer is a storage buffer populated by this ComputeStep to determine the 87 // global dispatch size of a subsequent ComputeStep within the same DispatchGroup. The 88 // contents of the buffer must be laid out according to the `IndirectDispatchArgs` struct 89 // definition declared in ComputeTypes.h. 90 kIndirectBuffer, 91 92 kWriteOnlyStorageTexture, 93 kReadOnlyTexture, 94 kSampledTexture, 95 }; 96 97 enum class ResourcePolicy { 98 kNone, 99 100 // The memory of the resource will be initialized to 0 101 kClear, 102 103 // The ComputeStep will be asked to initialize the memory on the CPU via 104 // `ComputeStep::prepareStorageBuffer` or `ComputeStep::prepareUniformBuffer` prior to 105 // pipeline execution. This may incur a transfer cost on platforms that do not allow buffers 106 // to be mapped in shared memory. 107 // 108 // If multiple ComputeSteps in a DispatchGroup declare a mapped resource with the same 109 // shared slot number, only the first ComputeStep in the group will receive a call to 110 // prepare the buffer. 111 // 112 // This only has meaning for buffer resources. A resource with the `kUniformBuffer` resource 113 // type must specify the `kMapped` resource policy. 114 kMapped, 115 }; 116 117 struct ResourceDesc final { 118 ResourceType fType; 119 DataFlow fFlow; 120 ResourcePolicy fPolicy; 121 122 // This field only has meaning (and must have a non-negative value) if `fFlow` is 123 // `DataFlow::kShared`. 124 int fSlot; 125 126 // The SkSL variable declaration code excluding the layout and type definitions. This field 127 // is ignored for a ComputeStep that supports native shader source. 128 const char* fSkSL = ""; 129 130 constexpr ResourceDesc(ResourceType type, 131 DataFlow flow, 132 ResourcePolicy policy, 133 int slot = -1) fTypefinal134 : fType(type), fFlow(flow), fPolicy(policy), fSlot(slot) {} 135 ResourceDescfinal136 constexpr ResourceDesc(ResourceType type, 137 DataFlow flow, 138 ResourcePolicy policy, 139 int slot, 140 const char* sksl) 141 : fType(type), fFlow(flow), fPolicy(policy), fSlot(slot), fSkSL(sksl) {} 142 ResourceDescfinal143 constexpr ResourceDesc(ResourceType type, 144 DataFlow flow, 145 ResourcePolicy policy, 146 const char* sksl) 147 : fType(type), fFlow(flow), fPolicy(policy), fSlot(-1), fSkSL(sksl) {} 148 }; 149 150 // On platforms that support late bound workgroup shared resources (e.g. Metal) a ComputeStep 151 // can optionally provide a list of memory sizes and binding indices. 152 struct WorkgroupBufferDesc { 153 // The buffer size in bytes. 154 size_t size; 155 size_t index; 156 }; 157 158 virtual ~ComputeStep() = default; 159 160 // Returns a complete SkSL compute program. The returned SkSL must constitute a complete compute 161 // program and declare all resource bindings starting at `nextBindingIndex` in the order in 162 // which they are enumerated by `ComputeStep::resources()`. 163 // 164 // If this ComputeStep supports native shader source then it must override 165 // `nativeShaderSource()` instead. 166 virtual std::string computeSkSL() const; 167 168 // A ComputeStep that supports native shader source then then it must implement 169 // `nativeShaderSource()` and return the shader source in the requested format. This is intended 170 // to instantiate a compute pipeline from a pre-compiled shader module. The returned source must 171 // constitute a shader module that contains at least one compute entry-point function that 172 // matches the specified name. 173 enum class NativeShaderFormat { 174 kWGSL, 175 kMSL, 176 }; 177 struct NativeShaderSource { 178 std::string_view fSource; 179 std::string fEntryPoint; 180 }; 181 virtual NativeShaderSource nativeShaderSource(NativeShaderFormat) const; 182 183 // This method will be called for buffer entries in the ComputeStep's resource list to 184 // determine the required allocation size. The ComputeStep must return a non-zero value. 185 // 186 // TODO(b/279955342): Provide a context object, e.g. a type a associated with 187 // DispatchGroup::Builder, to aid the ComputeStep in its buffer size calculations. 188 virtual size_t calculateBufferSize(int resourceIndex, const ResourceDesc&) const; 189 190 // This method will be called for storage texture entries in the ComputeStep's resource list to 191 // determine the required dimensions and color type. The ComputeStep must return a non-zero 192 // value for the size and a valid color type. 193 virtual std::tuple<SkISize, SkColorType> calculateTextureParameters(int resourceIndex, 194 const ResourceDesc&) const; 195 196 // This method will be called for sampler entries in the ComputeStep's resource list to 197 // determine the sampling and tile mode options. 198 virtual SamplerDesc calculateSamplerParameters(int resourceIndex, const ResourceDesc&) const; 199 200 // Return the global dispatch size (aka "workgroup count") for this step based on the draw 201 // parameters. The default value is a workgroup count of (1, 1, 1) 202 // 203 // TODO(b/279955342): Provide a context object, e.g. a type a associated with 204 // DispatchGroup::Builder, to aid the ComputeStep in its buffer size calculations. 205 virtual WorkgroupSize calculateGlobalDispatchSize() const; 206 207 // Populates a storage buffer resource which was specified as "mapped". This method will only be 208 // called once for a resource right after its allocation and before pipeline execution. For 209 // shared resources, only the first ComputeStep in a DispatchGroup will be asked to prepare the 210 // buffer. 211 // 212 // `resourceIndex` matches the order in which `resource` was enumerated by 213 // `ComputeStep::resources()`. 214 virtual void prepareStorageBuffer(int resourceIndex, 215 const ResourceDesc& resource, 216 void* buffer, 217 size_t bufferSize) const; 218 219 // Populates a uniform buffer resource. This method will be called once for a resource right 220 // after its allocation and before pipeline execution. For shared resources, only the first 221 // ComputeStep in a DispatchGroup will be asked to prepare the buffer. 222 // 223 // `resourceIndex` matches the order in which `resource` was enumerated by 224 // `ComputeStep::resources()`. 225 // 226 // The implementation must use the provided `UniformManager` to populate the buffer. On debug 227 // builds, the implementation must validate the buffer layout by setting up an expectation, for 228 // example: 229 // 230 // SkDEBUGCODE(mgr->setExpectedUniforms({{"foo", SkSLType::kFloat}})); 231 // 232 // TODO(b/279955342): Provide a context object, e.g. a type a associated with 233 // DispatchGroup::Builder, to aid the ComputeStep in its buffer size calculations. 234 virtual void prepareUniformBuffer(int resourceIndex, 235 const ResourceDesc&, 236 UniformManager*) const; 237 resources()238 SkSpan<const ResourceDesc> resources() const { return SkSpan(fResources); } workgroupBuffers()239 SkSpan<const WorkgroupBufferDesc> workgroupBuffers() const { return SkSpan(fWorkgroupBuffers); } 240 241 // Identifier that can be used as part of a unique key for a compute pipeline state object 242 // associated with this `ComputeStep`. uniqueID()243 uint32_t uniqueID() const { return fUniqueID; } 244 245 // Returns a debug name for the subclass implementation. name()246 const char* name() const { return fName.c_str(); } 247 248 // The size of the workgroup for this ComputeStep's entry point function. This value is hardware 249 // dependent. On Metal, this value should be used when invoking the dispatch API call. On all 250 // other backends, this value will be baked into the pipeline. localDispatchSize()251 WorkgroupSize localDispatchSize() const { return fLocalDispatchSize; } 252 supportsNativeShader()253 bool supportsNativeShader() const { return SkToBool(fFlags & Flags::kSupportsNativeShader); } 254 255 protected: 256 enum class Flags : uint8_t { 257 kNone = 0b00000, 258 kSupportsNativeShader = 0b00010, 259 }; 260 SK_DECL_BITMASK_OPS_FRIENDS(Flags) 261 262 ComputeStep(std::string_view name, 263 WorkgroupSize localDispatchSize, 264 SkSpan<const ResourceDesc> resources, 265 SkSpan<const WorkgroupBufferDesc> workgroupBuffers = {}, 266 Flags baseFlags = Flags::kNone); 267 268 private: 269 // Disallow copy and move 270 ComputeStep(const ComputeStep&) = delete; 271 ComputeStep(ComputeStep&&) = delete; 272 273 uint32_t fUniqueID; 274 SkEnumBitMask<Flags> fFlags; 275 std::string fName; 276 skia_private::TArray<ResourceDesc> fResources; 277 skia_private::TArray<WorkgroupBufferDesc> fWorkgroupBuffers; 278 279 // TODO(b/240615224): Subclasses should simply specify the workgroup size that they need. 280 // The ComputeStep constructor should check and reduce that number based on the maximum 281 // supported workgroup size stored in Caps. In Metal, we'll pass this number directly to the 282 // dispatch API call. On other backends, we'll use this value to generate the right SkSL 283 // workgroup size declaration to avoid any validation failures. 284 WorkgroupSize fLocalDispatchSize; 285 }; 286 SK_MAKE_BITMASK_OPS(ComputeStep::Flags) 287 288 } // namespace skgpu::graphite 289 290 #endif // skgpu_graphite_compute_ComputeStep_DEFINED 291