1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 // Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 // Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13
14 #include "common/Color.h"
15 #include "common/FixedVector.h"
16 #include "common/SimpleMutex.h"
17 #include "common/WorkerThread.h"
18 #include "libANGLE/Uniform.h"
19 #include "libANGLE/renderer/vulkan/ShaderInterfaceVariableInfoMap.h"
20 #include "libANGLE/renderer/vulkan/vk_resource.h"
21 #include "libANGLE/renderer/vulkan/vk_utils.h"
22
23 namespace gl
24 {
25 class ProgramExecutable;
26 } // namespace gl
27
28 namespace rx
29 {
30 class ShaderInterfaceVariableInfoMap;
31 class UpdateDescriptorSetsBuilder;
32
33 // Some descriptor set and pipeline layout constants.
34 //
35 // The set/binding assignment is done as following:
36 //
37 // - Set 0 contains uniform blocks created to encompass default uniforms. 1 binding is used per
38 // pipeline stage. Additionally, transform feedback buffers are bound from binding 2 and up.
39 // For internal shaders, set 0 is used for all the needed resources.
40 // - Set 1 contains all textures (including texture buffers).
41 // - Set 2 contains all other shader resources, such as uniform and storage blocks, atomic counter
42 // buffers, images and image buffers.
43 // - Set 3 reserved for OpenCL
44
45 enum class DescriptorSetIndex : uint32_t
46 {
47 Internal = 0, // Internal shaders
48 UniformsAndXfb = Internal, // Uniforms set index
49 Texture = 1, // Textures set index
50 ShaderResource = 2, // Other shader resources set index
51
52 // CL specific naming for set indices
53 LiteralSampler = 0,
54 KernelArguments = 1,
55 ModuleConstants = 2,
56 Printf = 3,
57
58 InvalidEnum = 4,
59 EnumCount = InvalidEnum,
60 };
61
62 namespace vk
63 {
64 class BufferHelper;
65 class DynamicDescriptorPool;
66 class SamplerHelper;
67 enum class ImageLayout;
68 class PipelineCacheAccess;
69 class RenderPassCommandBufferHelper;
70 class PackedClearValuesArray;
71 class AttachmentOpsArray;
72
73 using PipelineLayoutPtr = AtomicSharedPtr<PipelineLayout>;
74 using DescriptorSetLayoutPtr = AtomicSharedPtr<DescriptorSetLayout>;
75
76 // Packed Vk resource descriptions.
77 // Most Vk types use many more bits than required to represent the underlying data.
78 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
79 // hashing (and also needs to check equality) we can optimize these operations by
80 // using fewer bits. Hence the packed types.
81 //
82 // One implementation note: these types could potentially be improved by using even
83 // fewer bits. For example, boolean values could be represented by a single bit instead
84 // of a uint8_t. However at the current time there are concerns about the portability
85 // of bitfield operators, and complexity issues with using bit mask operations. This is
86 // something we will likely want to investigate as the Vulkan implementation progresses.
87 //
88 // Second implementation note: the struct packing is also a bit fragile, and some of the
89 // packing requirements depend on using alignas and field ordering to get the result of
90 // packing nicely into the desired space. This is something we could also potentially fix
91 // with a redesign to use bitfields or bit mask operations.
92
93 // Enable struct padding warnings for the code below since it is used in caches.
94 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
95
96 enum class ResourceAccess
97 {
98 Unused = 0x0,
99 ReadOnly = 0x1,
100 WriteOnly = 0x2,
101 ReadWrite = ReadOnly | WriteOnly,
102 };
103
UpdateAccess(ResourceAccess * oldAccess,ResourceAccess newAccess)104 inline void UpdateAccess(ResourceAccess *oldAccess, ResourceAccess newAccess)
105 {
106 *oldAccess = static_cast<ResourceAccess>(ToUnderlying(newAccess) | ToUnderlying(*oldAccess));
107 }
HasResourceWriteAccess(ResourceAccess access)108 inline bool HasResourceWriteAccess(ResourceAccess access)
109 {
110 return (ToUnderlying(access) & ToUnderlying(ResourceAccess::WriteOnly)) != 0;
111 }
112
113 enum class RenderPassLoadOp
114 {
115 Load = VK_ATTACHMENT_LOAD_OP_LOAD,
116 Clear = VK_ATTACHMENT_LOAD_OP_CLEAR,
117 DontCare = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
118 None,
119 };
120 enum class RenderPassStoreOp
121 {
122 Store = VK_ATTACHMENT_STORE_OP_STORE,
123 DontCare = VK_ATTACHMENT_STORE_OP_DONT_CARE,
124 None,
125 };
126
127 enum class FramebufferFetchMode
128 {
129 None,
130 Color,
131 DepthStencil,
132 ColorAndDepthStencil,
133 };
134 FramebufferFetchMode GetProgramFramebufferFetchMode(const gl::ProgramExecutable *executable);
FramebufferFetchModeHasColor(FramebufferFetchMode framebufferFetchMode)135 ANGLE_INLINE bool FramebufferFetchModeHasColor(FramebufferFetchMode framebufferFetchMode)
136 {
137 static_assert(ToUnderlying(FramebufferFetchMode::Color) == 0x1);
138 static_assert(ToUnderlying(FramebufferFetchMode::ColorAndDepthStencil) == 0x3);
139 return (ToUnderlying(framebufferFetchMode) & 0x1) != 0;
140 }
FramebufferFetchModeHasDepthStencil(FramebufferFetchMode framebufferFetchMode)141 ANGLE_INLINE bool FramebufferFetchModeHasDepthStencil(FramebufferFetchMode framebufferFetchMode)
142 {
143 static_assert(ToUnderlying(FramebufferFetchMode::DepthStencil) == 0x2);
144 static_assert(ToUnderlying(FramebufferFetchMode::ColorAndDepthStencil) == 0x3);
145 return (ToUnderlying(framebufferFetchMode) & 0x2) != 0;
146 }
FramebufferFetchModeMerge(FramebufferFetchMode mode1,FramebufferFetchMode mode2)147 ANGLE_INLINE FramebufferFetchMode FramebufferFetchModeMerge(FramebufferFetchMode mode1,
148 FramebufferFetchMode mode2)
149 {
150 constexpr uint32_t kNone = ToUnderlying(FramebufferFetchMode::None);
151 constexpr uint32_t kColor = ToUnderlying(FramebufferFetchMode::Color);
152 constexpr uint32_t kDepthStencil = ToUnderlying(FramebufferFetchMode::DepthStencil);
153 constexpr uint32_t kColorAndDepthStencil =
154 ToUnderlying(FramebufferFetchMode::ColorAndDepthStencil);
155 static_assert(kNone == 0);
156 static_assert((kColor & kColorAndDepthStencil) == kColor);
157 static_assert((kDepthStencil & kColorAndDepthStencil) == kDepthStencil);
158 static_assert((kColor | kDepthStencil) == kColorAndDepthStencil);
159
160 return static_cast<FramebufferFetchMode>(ToUnderlying(mode1) | ToUnderlying(mode2));
161 }
162
163 // There can be a maximum of IMPLEMENTATION_MAX_DRAW_BUFFERS color and resolve attachments, plus -
164 // - one depth/stencil attachment
165 // - one depth/stencil resolve attachment
166 // - one fragment shading rate attachment
167 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS * 2 + 3;
168 template <typename T>
169 using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
170 template <typename T>
171 using FramebufferAttachmentsVector = angle::FixedVector<T, kMaxFramebufferAttachments>;
172 using FramebufferAttachmentMask = angle::BitSet<kMaxFramebufferAttachments>;
173
174 constexpr size_t kMaxFramebufferNonResolveAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
175 template <typename T>
176 using FramebufferNonResolveAttachmentArray = std::array<T, kMaxFramebufferNonResolveAttachments>;
177 using FramebufferNonResolveAttachmentMask = angle::BitSet16<kMaxFramebufferNonResolveAttachments>;
178
179 class PackedAttachmentIndex;
180
181 class alignas(4) RenderPassDesc final
182 {
183 public:
184 RenderPassDesc();
185 ~RenderPassDesc();
186 RenderPassDesc(const RenderPassDesc &other);
187 RenderPassDesc &operator=(const RenderPassDesc &other);
188
189 // Set format for an enabled GL color attachment.
190 void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
191 // Mark a GL color attachment index as disabled.
192 void packColorAttachmentGap(size_t colorIndexGL);
193 // The caller must pack the depth/stencil attachment last, which is packed right after the color
194 // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
195 void packDepthStencilAttachment(angle::FormatID angleFormatID);
196 void updateDepthStencilAccess(ResourceAccess access);
197 // Indicate that a color attachment should have a corresponding resolve attachment.
198 void packColorResolveAttachment(size_t colorIndexGL);
199 // Indicate that a YUV texture is attached to the resolve attachment.
200 void packYUVResolveAttachment(size_t colorIndexGL);
201 // Remove the resolve attachment. Used when optimizing blit through resolve attachment to
202 // temporarily pack a resolve attachment and then remove it.
203 void removeColorResolveAttachment(size_t colorIndexGL);
204 // Indicate that a color attachment should take its data from the resolve attachment initially.
205 void packColorUnresolveAttachment(size_t colorIndexGL);
206 void removeColorUnresolveAttachment(size_t colorIndexGL);
207 // Indicate that a depth/stencil attachment should have a corresponding resolve attachment.
208 void packDepthResolveAttachment();
209 void packStencilResolveAttachment();
210 // Indicate that a depth/stencil attachment should take its data from the resolve attachment
211 // initially.
212 void packDepthUnresolveAttachment();
213 void packStencilUnresolveAttachment();
214 void removeDepthStencilUnresolveAttachment();
215
216 PackedAttachmentIndex getPackedColorAttachmentIndex(size_t colorIndexGL);
217
218 void setWriteControlMode(gl::SrgbWriteControlMode mode);
219
220 size_t hash() const;
221
222 // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()223 size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()224 size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
225
226 bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasYUVResolveAttachment()227 bool hasYUVResolveAttachment() const { return mIsYUVResolve; }
228 bool hasDepthStencilAttachment() const;
getColorResolveAttachmentMask()229 gl::DrawBufferMask getColorResolveAttachmentMask() const { return mColorResolveAttachmentMask; }
hasColorResolveAttachment(size_t colorIndexGL)230 bool hasColorResolveAttachment(size_t colorIndexGL) const
231 {
232 return mColorResolveAttachmentMask.test(colorIndexGL);
233 }
getColorUnresolveAttachmentMask()234 gl::DrawBufferMask getColorUnresolveAttachmentMask() const
235 {
236 return mColorUnresolveAttachmentMask;
237 }
hasColorUnresolveAttachment(size_t colorIndexGL)238 bool hasColorUnresolveAttachment(size_t colorIndexGL) const
239 {
240 return mColorUnresolveAttachmentMask.test(colorIndexGL);
241 }
hasDepthStencilResolveAttachment()242 bool hasDepthStencilResolveAttachment() const { return mResolveDepth || mResolveStencil; }
hasDepthResolveAttachment()243 bool hasDepthResolveAttachment() const { return mResolveDepth; }
hasStencilResolveAttachment()244 bool hasStencilResolveAttachment() const { return mResolveStencil; }
hasDepthStencilUnresolveAttachment()245 bool hasDepthStencilUnresolveAttachment() const { return mUnresolveDepth || mUnresolveStencil; }
hasDepthUnresolveAttachment()246 bool hasDepthUnresolveAttachment() const { return mUnresolveDepth; }
hasStencilUnresolveAttachment()247 bool hasStencilUnresolveAttachment() const { return mUnresolveStencil; }
getSRGBWriteControlMode()248 gl::SrgbWriteControlMode getSRGBWriteControlMode() const
249 {
250 return static_cast<gl::SrgbWriteControlMode>(mSrgbWriteControl);
251 }
252
isLegacyDitherEnabled()253 bool isLegacyDitherEnabled() const { return mLegacyDitherEnabled; }
254
255 void setLegacyDither(bool enabled);
256
257 // Get the number of clearable attachments in the Vulkan render pass, i.e. after removing
258 // disabled color attachments.
259 size_t clearableAttachmentCount() const;
260 // Get the total number of attachments in the Vulkan render pass, i.e. after removing disabled
261 // color attachments.
262 size_t attachmentCount() const;
263
setSamples(GLint samples)264 void setSamples(GLint samples) { mSamples = static_cast<uint8_t>(samples); }
samples()265 uint8_t samples() const { return mSamples; }
266
setViewCount(GLsizei viewCount)267 void setViewCount(GLsizei viewCount) { mViewCount = static_cast<uint8_t>(viewCount); }
viewCount()268 uint8_t viewCount() const { return mViewCount; }
269
setFramebufferFetchMode(FramebufferFetchMode framebufferFetchMode)270 void setFramebufferFetchMode(FramebufferFetchMode framebufferFetchMode)
271 {
272 SetBitField(mFramebufferFetchMode, framebufferFetchMode);
273 }
framebufferFetchMode()274 FramebufferFetchMode framebufferFetchMode() const
275 {
276 return static_cast<FramebufferFetchMode>(mFramebufferFetchMode);
277 }
hasColorFramebufferFetch()278 bool hasColorFramebufferFetch() const
279 {
280 return FramebufferFetchModeHasColor(framebufferFetchMode());
281 }
hasDepthStencilFramebufferFetch()282 bool hasDepthStencilFramebufferFetch() const
283 {
284 return FramebufferFetchModeHasDepthStencil(framebufferFetchMode());
285 }
286
updateRenderToTexture(bool isRenderToTexture)287 void updateRenderToTexture(bool isRenderToTexture) { mIsRenderToTexture = isRenderToTexture; }
isRenderToTexture()288 bool isRenderToTexture() const { return mIsRenderToTexture; }
289
setFragmentShadingAttachment(bool value)290 void setFragmentShadingAttachment(bool value) { mHasFragmentShadingAttachment = value; }
hasFragmentShadingAttachment()291 bool hasFragmentShadingAttachment() const { return mHasFragmentShadingAttachment; }
292
293 angle::FormatID operator[](size_t index) const
294 {
295 ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
296 return static_cast<angle::FormatID>(mAttachmentFormats[index]);
297 }
298
299 // Start a render pass with a render pass object.
300 void beginRenderPass(Context *context,
301 PrimaryCommandBuffer *primary,
302 const RenderPass &renderPass,
303 VkFramebuffer framebuffer,
304 const gl::Rectangle &renderArea,
305 VkSubpassContents subpassContents,
306 PackedClearValuesArray &clearValues,
307 const VkRenderPassAttachmentBeginInfo *attachmentBeginInfo) const;
308
309 // Start a render pass with dynamic rendering.
310 void beginRendering(Context *context,
311 PrimaryCommandBuffer *primary,
312 const gl::Rectangle &renderArea,
313 VkSubpassContents subpassContents,
314 const FramebufferAttachmentsVector<VkImageView> &attachmentViews,
315 const AttachmentOpsArray &ops,
316 PackedClearValuesArray &clearValues,
317 uint32_t layerCount) const;
318
319 void populateRenderingInheritanceInfo(
320 Renderer *renderer,
321 VkCommandBufferInheritanceRenderingInfo *infoOut,
322 gl::DrawBuffersArray<VkFormat> *colorFormatStorageOut) const;
323
324 // Calculate perf counters for a dynamic rendering render pass instance. For render pass
325 // objects, the perf counters are updated when creating the render pass, where access to
326 // ContextVk is available.
327 void updatePerfCounters(Context *context,
328 const FramebufferAttachmentsVector<VkImageView> &attachmentViews,
329 const AttachmentOpsArray &ops,
330 angle::VulkanPerfCounters *countersOut);
331
332 private:
333 uint8_t mSamples;
334 uint8_t mColorAttachmentRange;
335
336 // Multiview
337 uint8_t mViewCount;
338
339 // sRGB
340 uint8_t mSrgbWriteControl : 1;
341
342 // Framebuffer fetch, one of FramebufferFetchMode values
343 uint8_t mFramebufferFetchMode : 2;
344
345 // Depth/stencil resolve
346 uint8_t mResolveDepth : 1;
347 uint8_t mResolveStencil : 1;
348
349 // Multisampled render to texture
350 uint8_t mIsRenderToTexture : 1;
351 uint8_t mUnresolveDepth : 1;
352 uint8_t mUnresolveStencil : 1;
353
354 // Dithering state when using VK_EXT_legacy_dithering
355 uint8_t mLegacyDitherEnabled : 1;
356
357 // external_format_resolve
358 uint8_t mIsYUVResolve : 1;
359
360 // Foveated rendering
361 uint8_t mHasFragmentShadingAttachment : 1;
362
363 // Available space for expansion.
364 uint8_t mPadding2 : 5;
365
366 // Whether each color attachment has a corresponding resolve attachment. Color resolve
367 // attachments can be used to optimize resolve through glBlitFramebuffer() as well as support
368 // GL_EXT_multisampled_render_to_texture and GL_EXT_multisampled_render_to_texture2.
369 gl::DrawBufferMask mColorResolveAttachmentMask;
370
371 // Whether each color attachment with a corresponding resolve attachment should be initialized
372 // with said resolve attachment in an initial subpass. This is an optimization to avoid
373 // loadOp=LOAD on the implicit multisampled image used with multisampled-render-to-texture
374 // render targets. This operation is referred to as "unresolve".
375 //
376 // Unused when VK_EXT_multisampled_render_to_single_sampled is available.
377 gl::DrawBufferMask mColorUnresolveAttachmentMask;
378
379 // Color attachment formats are stored with their GL attachment indices. The depth/stencil
380 // attachment formats follow the last enabled color attachment. When creating a render pass,
381 // the disabled attachments are removed and the resulting attachments are packed.
382 //
383 // The attachment indices provided as input to various functions in this file are thus GL
384 // attachment indices. These indices are marked as such, e.g. colorIndexGL. The render pass
385 // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
386 // indices marked with Vk, e.g. colorIndexVk. The subpass attachment references create the
387 // link between the two index spaces. The subpass declares attachment references with GL
388 // indices (which corresponds to the location decoration of shader outputs). The attachment
389 // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
390 //
391 // For example, if GL uses color attachments 0 and 3, then there are two render pass
392 // attachments (indexed 0 and 1) and 4 subpass attachments:
393 //
394 // - Subpass attachment 0 -> Renderpass attachment 0
395 // - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
396 // - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
397 // - Subpass attachment 3 -> Renderpass attachment 1
398 //
399 // The resolve attachments are packed after the non-resolve attachments. They use the same
400 // formats, so they are not specified in this array.
401 FramebufferNonResolveAttachmentArray<uint8_t> mAttachmentFormats;
402 };
403
404 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
405
406 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
407 static_assert(kRenderPassDescSize == 16, "Size check failed");
408
409 enum class GraphicsPipelineSubset
410 {
411 Complete, // Including all subsets
412 VertexInput,
413 Shaders,
414 FragmentOutput,
415 };
416
417 enum class CacheLookUpFeedback
418 {
419 None,
420 Hit,
421 Miss,
422 LinkedDrawHit,
423 LinkedDrawMiss,
424 WarmUpHit,
425 WarmUpMiss,
426 UtilsHit,
427 UtilsMiss,
428 };
429
430 struct PackedAttachmentOpsDesc final
431 {
432 // RenderPassLoadOp is in range [0, 3], and RenderPassStoreOp is in range [0, 2].
433 uint16_t loadOp : 2;
434 uint16_t storeOp : 2;
435 uint16_t stencilLoadOp : 2;
436 uint16_t stencilStoreOp : 2;
437 // If a corresponding resolve attachment exists, storeOp may already be DONT_CARE, and it's
438 // unclear whether the attachment was invalidated or not. This information is passed along here
439 // so that the resolve attachment's storeOp can be set to DONT_CARE if the attachment is
440 // invalidated, and if possible removed from the list of resolve attachments altogether. Note
441 // that the latter may not be possible if the render pass has multiple subpasses due to Vulkan
442 // render pass compatibility rules (not an issue with dynamic rendering).
443 uint16_t isInvalidated : 1;
444 uint16_t isStencilInvalidated : 1;
445 uint16_t padding1 : 6;
446
447 // Layouts take values from ImageLayout, so they are small. Layouts that are possible here are
448 // placed at the beginning of that enum.
449 uint16_t initialLayout : 5;
450 uint16_t finalLayout : 5;
451 uint16_t finalResolveLayout : 5;
452 uint16_t padding2 : 1;
453 };
454
455 static_assert(sizeof(PackedAttachmentOpsDesc) == 4, "Size check failed");
456
457 class AttachmentOpsArray final
458 {
459 public:
460 AttachmentOpsArray();
461 ~AttachmentOpsArray();
462 AttachmentOpsArray(const AttachmentOpsArray &other);
463 AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
464
465 const PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) const;
466 PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index);
467
468 // Initialize an attachment op with all load and store operations.
469 void initWithLoadStore(PackedAttachmentIndex index,
470 ImageLayout initialLayout,
471 ImageLayout finalLayout);
472
473 void setLayouts(PackedAttachmentIndex index,
474 ImageLayout initialLayout,
475 ImageLayout finalLayout);
476 void setOps(PackedAttachmentIndex index, RenderPassLoadOp loadOp, RenderPassStoreOp storeOp);
477 void setStencilOps(PackedAttachmentIndex index,
478 RenderPassLoadOp loadOp,
479 RenderPassStoreOp storeOp);
480
481 void setClearOp(PackedAttachmentIndex index);
482 void setClearStencilOp(PackedAttachmentIndex index);
483
484 size_t hash() const;
485
486 private:
487 gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
488 };
489
490 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
491
492 static_assert(sizeof(AttachmentOpsArray) == 40, "Size check failed");
493
494 struct PackedAttribDesc final
495 {
496 uint8_t format;
497 uint8_t divisor;
498 uint16_t offset : kAttributeOffsetMaxBits;
499 uint16_t compressed : 1;
500 };
501
502 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
503 static_assert(kPackedAttribDescSize == 4, "Size mismatch");
504
505 struct PackedVertexInputAttributes final
506 {
507 PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
508
509 // Component type of the corresponding input in the program. Used to adjust the format if
510 // necessary. Takes values from gl::ComponentType.
511 uint32_t shaderAttribComponentType;
512
513 // Although technically stride can be any value in ES 2.0, in practice supporting stride
514 // greater than MAX_USHORT should not be that helpful. Note that stride limits are
515 // introduced in ES 3.1.
516 // Dynamic in VK_EXT_extended_dynamic_state
517 uint16_t strides[gl::MAX_VERTEX_ATTRIBS];
518 };
519
520 constexpr size_t kPackedVertexInputAttributesSize = sizeof(PackedVertexInputAttributes);
521 static_assert(kPackedVertexInputAttributesSize == 100, "Size mismatch");
522
523 struct PackedInputAssemblyState final
524 {
525 struct
526 {
527 uint32_t topology : 4;
528
529 // Dynamic in VK_EXT_extended_dynamic_state2
530 uint32_t primitiveRestartEnable : 1; // ds2
531
532 // Whether dynamic state for vertex stride from VK_EXT_extended_dynamic_state can be used
533 // for. Used by GraphicsPipelineDesc::hash() to exclude |vertexStrides| from the hash
534 uint32_t useVertexInputBindingStrideDynamicState : 1;
535
536 // Whether dynamic state for vertex input state from VK_EXT_vertex_input_dynamic_state can
537 // be used by GraphicsPipelineDesc::hash() to exclude |PackedVertexInputAttributes| from the
538 // hash
539 uint32_t useVertexInputDynamicState : 1;
540
541 // Whether the pipeline is robust (vertex input copy)
542 uint32_t isRobustContext : 1;
543 // Whether the pipeline needs access to protected content (vertex input copy)
544 uint32_t isProtectedContext : 1;
545
546 // Which attributes are actually active in the program and should affect the pipeline.
547 uint32_t programActiveAttributeLocations : gl::MAX_VERTEX_ATTRIBS;
548
549 uint32_t padding : 23 - gl::MAX_VERTEX_ATTRIBS;
550 } bits;
551 };
552
553 constexpr size_t kPackedInputAssemblyStateSize = sizeof(PackedInputAssemblyState);
554 static_assert(kPackedInputAssemblyStateSize == 4, "Size mismatch");
555
556 struct PackedStencilOpState final
557 {
558 uint8_t fail : 4;
559 uint8_t pass : 4;
560 uint8_t depthFail : 4;
561 uint8_t compare : 4;
562 };
563
564 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
565 static_assert(kPackedStencilOpSize == 2, "Size check failed");
566
567 struct PackedPreRasterizationAndFragmentStates final
568 {
569 struct
570 {
571 // Affecting VkPipelineViewportStateCreateInfo
572 uint32_t viewportNegativeOneToOne : 1;
573
574 // Affecting VkPipelineRasterizationStateCreateInfo
575 uint32_t depthClampEnable : 1;
576 uint32_t polygonMode : 2;
577 // Dynamic in VK_EXT_extended_dynamic_state
578 uint32_t cullMode : 4;
579 uint32_t frontFace : 4;
580 // Dynamic in VK_EXT_extended_dynamic_state2
581 uint32_t rasterizerDiscardEnable : 1;
582 uint32_t depthBiasEnable : 1;
583
584 // Affecting VkPipelineTessellationStateCreateInfo
585 uint32_t patchVertices : 6;
586
587 // Affecting VkPipelineDepthStencilStateCreateInfo
588 uint32_t depthBoundsTest : 1;
589 // Dynamic in VK_EXT_extended_dynamic_state
590 uint32_t depthTest : 1;
591 uint32_t depthWrite : 1;
592 uint32_t stencilTest : 1;
593 uint32_t nonZeroStencilWriteMaskWorkaround : 1;
594 // Dynamic in VK_EXT_extended_dynamic_state2
595 uint32_t depthCompareOp : 4;
596
597 // Affecting specialization constants
598 uint32_t surfaceRotation : 1;
599
600 // Whether the pipeline is robust (shader stages copy)
601 uint32_t isRobustContext : 1;
602 // Whether the pipeline needs access to protected content (shader stages copy)
603 uint32_t isProtectedContext : 1;
604 } bits;
605
606 // Affecting specialization constants
607 static_assert(gl::IMPLEMENTATION_MAX_DRAW_BUFFERS <= 8,
608 "2 bits per draw buffer is needed for dither emulation");
609 uint16_t emulatedDitherControl;
610 uint16_t padding;
611
612 // Affecting VkPipelineDepthStencilStateCreateInfo
613 // Dynamic in VK_EXT_extended_dynamic_state
614 PackedStencilOpState front;
615 PackedStencilOpState back;
616 };
617
618 constexpr size_t kPackedPreRasterizationAndFragmentStatesSize =
619 sizeof(PackedPreRasterizationAndFragmentStates);
620 static_assert(kPackedPreRasterizationAndFragmentStatesSize == 12, "Size check failed");
621
622 struct PackedMultisampleAndSubpassState final
623 {
624 struct
625 {
626 // Affecting VkPipelineMultisampleStateCreateInfo
627 // Note: Only up to 16xMSAA is supported in the Vulkan backend.
628 uint16_t sampleMask;
629 // Stored as minus one so sample count 16 can fit in 4 bits.
630 uint16_t rasterizationSamplesMinusOne : 4;
631 uint16_t sampleShadingEnable : 1;
632 uint16_t alphaToCoverageEnable : 1;
633 uint16_t alphaToOneEnable : 1;
634 // The subpass index affects both the shader stages and the fragment output similarly to
635 // multisampled state, so they are grouped together.
636 // Note: Currently only 2 subpasses possible.
637 uint16_t subpass : 1;
638 // 8-bit normalized instead of float to align the struct.
639 uint16_t minSampleShading : 8;
640 } bits;
641 };
642
643 constexpr size_t kPackedMultisampleAndSubpassStateSize = sizeof(PackedMultisampleAndSubpassState);
644 static_assert(kPackedMultisampleAndSubpassStateSize == 4, "Size check failed");
645
646 struct PackedColorBlendAttachmentState final
647 {
648 uint16_t srcColorBlendFactor : 5;
649 uint16_t dstColorBlendFactor : 5;
650 uint16_t colorBlendOp : 6;
651 uint16_t srcAlphaBlendFactor : 5;
652 uint16_t dstAlphaBlendFactor : 5;
653 uint16_t alphaBlendOp : 6;
654 };
655
656 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
657 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
658
659 struct PackedColorBlendState final
660 {
661 uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
662 PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
663 };
664
665 constexpr size_t kPackedColorBlendStateSize = sizeof(PackedColorBlendState);
666 static_assert(kPackedColorBlendStateSize == 36, "Size check failed");
667
668 struct PackedBlendMaskAndLogicOpState final
669 {
670 struct
671 {
672 uint32_t blendEnableMask : 8;
673 uint32_t logicOpEnable : 1;
674 // Dynamic in VK_EXT_extended_dynamic_state2
675 uint32_t logicOp : 4;
676
677 // Whether the pipeline needs access to protected content (fragment output copy)
678 uint32_t isProtectedContext : 1;
679
680 // Output that is present in the framebuffer but is never written to in the shader. Used by
681 // GL_ANGLE_robust_fragment_shader_output which defines the behavior in this case (which is
682 // to mask these outputs)
683 uint32_t missingOutputsMask : gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
684
685 uint32_t padding : 18 - gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
686 } bits;
687 };
688
689 constexpr size_t kPackedBlendMaskAndLogicOpStateSize = sizeof(PackedBlendMaskAndLogicOpState);
690 static_assert(kPackedBlendMaskAndLogicOpStateSize == 4, "Size check failed");
691
692 // The vertex input subset of the pipeline.
693 struct PipelineVertexInputState final
694 {
695 PackedInputAssemblyState inputAssembly;
696 PackedVertexInputAttributes vertex;
697 };
698
699 // The pre-rasterization and fragment shader subsets of the pipeline. This is excluding
700 // multisampled and render pass states which are shared with fragment output.
701 struct PipelineShadersState final
702 {
703 PackedPreRasterizationAndFragmentStates shaders;
704 };
705
706 // Multisampled and render pass states.
707 struct PipelineSharedNonVertexInputState final
708 {
709 PackedMultisampleAndSubpassState multisample;
710 RenderPassDesc renderPass;
711 };
712
713 // The fragment output subset of the pipeline. This is excluding multisampled and render pass
714 // states which are shared with the shader subsets.
715 struct PipelineFragmentOutputState final
716 {
717 PackedColorBlendState blend;
718 PackedBlendMaskAndLogicOpState blendMaskAndLogic;
719 };
720
721 constexpr size_t kGraphicsPipelineVertexInputStateSize =
722 kPackedVertexInputAttributesSize + kPackedInputAssemblyStateSize;
723 constexpr size_t kGraphicsPipelineShadersStateSize = kPackedPreRasterizationAndFragmentStatesSize;
724 constexpr size_t kGraphicsPipelineSharedNonVertexInputStateSize =
725 kPackedMultisampleAndSubpassStateSize + kRenderPassDescSize;
726 constexpr size_t kGraphicsPipelineFragmentOutputStateSize =
727 kPackedColorBlendStateSize + kPackedBlendMaskAndLogicOpStateSize;
728
729 constexpr size_t kGraphicsPipelineDescSumOfSizes =
730 kGraphicsPipelineVertexInputStateSize + kGraphicsPipelineShadersStateSize +
731 kGraphicsPipelineSharedNonVertexInputStateSize + kGraphicsPipelineFragmentOutputStateSize;
732
733 // Number of dirty bits in the dirty bit set.
734 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
735 constexpr static size_t kNumGraphicsPipelineDirtyBits =
736 kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
737 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
738
739 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
740 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
741
742 GraphicsPipelineTransitionBits GetGraphicsPipelineTransitionBitsMask(GraphicsPipelineSubset subset);
743
744 // Disable padding warnings for a few helper structs that aggregate Vulkan state objects. These are
745 // not used as hash keys, they just simplify passing them around to functions.
746 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
747
748 struct GraphicsPipelineVertexInputVulkanStructs
749 {
750 VkPipelineVertexInputStateCreateInfo vertexInputState = {};
751 VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = {};
752 VkPipelineVertexInputDivisorStateCreateInfoEXT divisorState = {};
753
754 // Support storage
755 gl::AttribArray<VkVertexInputBindingDescription> bindingDescs;
756 gl::AttribArray<VkVertexInputAttributeDescription> attributeDescs;
757 gl::AttribArray<VkVertexInputBindingDivisorDescriptionEXT> divisorDesc;
758 };
759
760 struct GraphicsPipelineShadersVulkanStructs
761 {
762 VkPipelineViewportStateCreateInfo viewportState = {};
763 VkPipelineRasterizationStateCreateInfo rasterState = {};
764 VkPipelineDepthStencilStateCreateInfo depthStencilState = {};
765 VkPipelineTessellationStateCreateInfo tessellationState = {};
766 VkPipelineTessellationDomainOriginStateCreateInfo domainOriginState = {};
767 VkPipelineViewportDepthClipControlCreateInfoEXT depthClipControl = {};
768 VkPipelineRasterizationLineStateCreateInfoEXT rasterLineState = {};
769 VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provokingVertexState = {};
770 VkPipelineRasterizationStateStreamCreateInfoEXT rasterStreamState = {};
771 VkSpecializationInfo specializationInfo = {};
772
773 // Support storage
774 angle::FixedVector<VkPipelineShaderStageCreateInfo, 5> shaderStages;
775 SpecializationConstantMap<VkSpecializationMapEntry> specializationEntries;
776 };
777
778 struct GraphicsPipelineSharedNonVertexInputVulkanStructs
779 {
780 VkPipelineMultisampleStateCreateInfo multisampleState = {};
781
782 // Support storage
783 uint32_t sampleMask;
784 };
785
786 struct GraphicsPipelineFragmentOutputVulkanStructs
787 {
788 VkPipelineColorBlendStateCreateInfo blendState = {};
789
790 // Support storage
791 gl::DrawBuffersArray<VkPipelineColorBlendAttachmentState> blendAttachmentState;
792 };
793
794 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
795
796 using GraphicsPipelineDynamicStateList = angle::FixedVector<VkDynamicState, 23>;
797
798 enum class PipelineRobustness
799 {
800 NonRobust,
801 Robust,
802 };
803
804 enum class PipelineProtectedAccess
805 {
806 Unprotected,
807 Protected,
808 };
809
810 // State changes are applied through the update methods. Each update method can also have a
811 // sibling method that applies the update without marking a state transition. The non-transition
812 // update methods are used for internal shader pipelines. Not every non-transition update method
813 // is implemented yet as not every state is used in internal shaders.
814 class GraphicsPipelineDesc final
815 {
816 public:
817 // Use aligned allocation and free so we can use the alignas keyword.
818 void *operator new(std::size_t size);
819 void operator delete(void *ptr);
820
821 GraphicsPipelineDesc();
822 ~GraphicsPipelineDesc();
823 GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
824 GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
825
826 size_t hash(GraphicsPipelineSubset subset) const;
827 bool keyEqual(const GraphicsPipelineDesc &other, GraphicsPipelineSubset subset) const;
828
829 void initDefaults(const Context *context,
830 GraphicsPipelineSubset subset,
831 PipelineRobustness contextRobustness,
832 PipelineProtectedAccess contextProtectedAccess);
833
834 // For custom comparisons.
835 template <typename T>
getPtr()836 const T *getPtr() const
837 {
838 return reinterpret_cast<const T *>(this);
839 }
840
841 VkResult initializePipeline(Context *context,
842 PipelineCacheAccess *pipelineCache,
843 GraphicsPipelineSubset subset,
844 const RenderPass &compatibleRenderPass,
845 const PipelineLayout &pipelineLayout,
846 const ShaderModuleMap &shaders,
847 const SpecializationConstants &specConsts,
848 Pipeline *pipelineOut,
849 CacheLookUpFeedback *feedbackOut) const;
850
851 // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
852 void updateVertexInput(ContextVk *contextVk,
853 GraphicsPipelineTransitionBits *transition,
854 uint32_t attribIndex,
855 GLuint stride,
856 GLuint divisor,
857 angle::FormatID format,
858 bool compressed,
859 GLuint relativeOffset);
860 void setVertexShaderComponentTypes(gl::AttributesMask activeAttribLocations,
861 gl::ComponentTypeMask componentTypeMask);
862 void updateVertexShaderComponentTypes(GraphicsPipelineTransitionBits *transition,
863 gl::AttributesMask activeAttribLocations,
864 gl::ComponentTypeMask componentTypeMask);
865
866 // Input assembly info
867 void setTopology(gl::PrimitiveMode drawMode);
868 void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
869 void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
870 bool primitiveRestartEnabled);
871
872 // Viewport states
873 void updateDepthClipControl(GraphicsPipelineTransitionBits *transition, bool negativeOneToOne);
874
875 // Raster states
876 void updatePolygonMode(GraphicsPipelineTransitionBits *transition, gl::PolygonMode polygonMode);
877 void updateCullMode(GraphicsPipelineTransitionBits *transition,
878 const gl::RasterizerState &rasterState);
879 void updateFrontFace(GraphicsPipelineTransitionBits *transition,
880 const gl::RasterizerState &rasterState,
881 bool invertFrontFace);
882 void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
883 bool rasterizerDiscardEnabled);
884
885 // Multisample states
886 uint32_t getRasterizationSamples() const;
887 void setRasterizationSamples(uint32_t rasterizationSamples);
888 void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
889 uint32_t rasterizationSamples);
890 void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
891 void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
892 void updateSampleMask(GraphicsPipelineTransitionBits *transition,
893 uint32_t maskNumber,
894 uint32_t mask);
895
896 void updateSampleShading(GraphicsPipelineTransitionBits *transition, bool enable, float value);
897
898 // RenderPass description.
getRenderPassDesc()899 const RenderPassDesc &getRenderPassDesc() const { return mSharedNonVertexInput.renderPass; }
900
901 void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
902 void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
903 const angle::FeaturesVk &features,
904 const RenderPassDesc &renderPassDesc,
905 FramebufferFetchMode framebufferFetchMode);
906 void setRenderPassSampleCount(GLint samples);
907 void setRenderPassFramebufferFetchMode(FramebufferFetchMode framebufferFetchMode);
getRenderPassColorFramebufferFetchMode()908 bool getRenderPassColorFramebufferFetchMode() const
909 {
910 return mSharedNonVertexInput.renderPass.hasColorFramebufferFetch();
911 }
getRenderPassDepthStencilFramebufferFetchMode()912 bool getRenderPassDepthStencilFramebufferFetchMode() const
913 {
914 return mSharedNonVertexInput.renderPass.hasDepthStencilFramebufferFetch();
915 }
916
917 void setRenderPassFoveation(bool isFoveated);
getRenderPassFoveation()918 bool getRenderPassFoveation() const
919 {
920 return mSharedNonVertexInput.renderPass.hasFragmentShadingAttachment();
921 }
922
923 void setRenderPassColorAttachmentFormat(size_t colorIndexGL, angle::FormatID formatID);
924
925 // Blend states
926 void setSingleBlend(uint32_t colorIndexGL,
927 bool enabled,
928 VkBlendOp op,
929 VkBlendFactor srcFactor,
930 VkBlendFactor dstFactor);
931 void updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
932 gl::DrawBufferMask blendEnabledMask);
933 void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
934 const gl::BlendStateExt &blendStateExt,
935 gl::DrawBufferMask attachmentMask);
936 void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
937 const gl::BlendStateExt &blendStateExt,
938 gl::DrawBufferMask attachmentMask);
939 void resetBlendFuncsAndEquations(GraphicsPipelineTransitionBits *transition,
940 const gl::BlendStateExt &blendStateExt,
941 gl::DrawBufferMask previousAttachmentsMask,
942 gl::DrawBufferMask newAttachmentsMask);
943 void setColorWriteMasks(gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
944 const gl::DrawBufferMask &alphaMask,
945 const gl::DrawBufferMask &enabledDrawBuffers);
946 void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
947 void updateColorWriteMasks(GraphicsPipelineTransitionBits *transition,
948 gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
949 const gl::DrawBufferMask &alphaMask,
950 const gl::DrawBufferMask &enabledDrawBuffers);
951 void updateMissingOutputsMask(GraphicsPipelineTransitionBits *transition,
952 gl::DrawBufferMask missingOutputsMask);
953
954 // Logic op
955 void updateLogicOpEnabled(GraphicsPipelineTransitionBits *transition, bool enable);
956 void updateLogicOp(GraphicsPipelineTransitionBits *transition, VkLogicOp logicOp);
957
958 // Depth/stencil states.
959 void setDepthTestEnabled(bool enabled);
960 void setDepthWriteEnabled(bool enabled);
961 void setDepthFunc(VkCompareOp op);
962 void setDepthClampEnabled(bool enabled);
963 void setStencilTestEnabled(bool enabled);
964 void setStencilFrontFuncs(VkCompareOp compareOp);
965 void setStencilBackFuncs(VkCompareOp compareOp);
966 void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
967 void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
968 void setStencilFrontWriteMask(uint8_t mask);
969 void setStencilBackWriteMask(uint8_t mask);
970 void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
971 const gl::DepthStencilState &depthStencilState,
972 const gl::Framebuffer *drawFramebuffer);
973 void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
974 const gl::DepthStencilState &depthStencilState);
975 void updateDepthClampEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
976 void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
977 const gl::DepthStencilState &depthStencilState,
978 const gl::Framebuffer *drawFramebuffer);
979 void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
980 const gl::DepthStencilState &depthStencilState,
981 const gl::Framebuffer *drawFramebuffer);
982 void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
983 const gl::DepthStencilState &depthStencilState);
984 void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
985 const gl::DepthStencilState &depthStencilState);
986 void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
987 const gl::DepthStencilState &depthStencilState);
988 void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
989 const gl::DepthStencilState &depthStencilState);
990
991 // Depth offset.
992 void updatePolygonOffsetEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
993
994 // Tessellation
995 void updatePatchVertices(GraphicsPipelineTransitionBits *transition, GLuint value);
996
997 // Subpass
998 void resetSubpass(GraphicsPipelineTransitionBits *transition);
999 void nextSubpass(GraphicsPipelineTransitionBits *transition);
1000 void setSubpass(uint32_t subpass);
1001 uint32_t getSubpass() const;
1002
1003 void updateSurfaceRotation(GraphicsPipelineTransitionBits *transition,
1004 bool isRotatedAspectRatio);
getSurfaceRotation()1005 bool getSurfaceRotation() const { return mShaders.shaders.bits.surfaceRotation; }
1006
1007 void updateEmulatedDitherControl(GraphicsPipelineTransitionBits *transition, uint16_t value);
getEmulatedDitherControl()1008 uint32_t getEmulatedDitherControl() const { return mShaders.shaders.emulatedDitherControl; }
1009
isLegacyDitherEnabled()1010 bool isLegacyDitherEnabled() const
1011 {
1012 return mSharedNonVertexInput.renderPass.isLegacyDitherEnabled();
1013 }
1014
1015 void updateNonZeroStencilWriteMaskWorkaround(GraphicsPipelineTransitionBits *transition,
1016 bool enabled);
1017
setSupportsDynamicStateForTest(bool supports)1018 void setSupportsDynamicStateForTest(bool supports)
1019 {
1020 mVertexInput.inputAssembly.bits.useVertexInputBindingStrideDynamicState = supports;
1021 mShaders.shaders.bits.nonZeroStencilWriteMaskWorkaround = false;
1022 }
1023
1024 static VkFormat getPipelineVertexInputStateFormat(Context *context,
1025 angle::FormatID formatID,
1026 bool compressed,
1027 const gl::ComponentType programAttribType,
1028 uint32_t attribIndex);
1029
1030 // Helpers to dump the state
getVertexInputStateForLog()1031 const PipelineVertexInputState &getVertexInputStateForLog() const { return mVertexInput; }
getShadersStateForLog()1032 const PipelineShadersState &getShadersStateForLog() const { return mShaders; }
getSharedNonVertexInputStateForLog()1033 const PipelineSharedNonVertexInputState &getSharedNonVertexInputStateForLog() const
1034 {
1035 return mSharedNonVertexInput;
1036 }
getFragmentOutputStateForLog()1037 const PipelineFragmentOutputState &getFragmentOutputStateForLog() const
1038 {
1039 return mFragmentOutput;
1040 }
1041
1042 private:
1043 void updateSubpass(GraphicsPipelineTransitionBits *transition, uint32_t subpass);
1044
1045 const void *getPipelineSubsetMemory(GraphicsPipelineSubset subset, size_t *sizeOut) const;
1046
1047 void initializePipelineVertexInputState(
1048 Context *context,
1049 GraphicsPipelineVertexInputVulkanStructs *stateOut,
1050 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1051
1052 void initializePipelineShadersState(
1053 Context *context,
1054 const ShaderModuleMap &shaders,
1055 const SpecializationConstants &specConsts,
1056 GraphicsPipelineShadersVulkanStructs *stateOut,
1057 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1058
1059 void initializePipelineSharedNonVertexInputState(
1060 Context *context,
1061 GraphicsPipelineSharedNonVertexInputVulkanStructs *stateOut,
1062 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1063
1064 void initializePipelineFragmentOutputState(
1065 Context *context,
1066 GraphicsPipelineFragmentOutputVulkanStructs *stateOut,
1067 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1068
1069 PipelineShadersState mShaders;
1070 PipelineSharedNonVertexInputState mSharedNonVertexInput;
1071 PipelineFragmentOutputState mFragmentOutput;
1072 PipelineVertexInputState mVertexInput;
1073 };
1074
1075 // Verify the packed pipeline description has no gaps in the packing.
1076 // This is not guaranteed by the spec, but is validated by a compile-time check.
1077 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
1078 // into uninitialized memory regions.
1079 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
1080 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
1081
1082 // Values are based on data recorded here -> https://anglebug.com/42267114#comment5
1083 constexpr size_t kDefaultDescriptorSetLayoutBindingsCount = 8;
1084 constexpr size_t kDefaultImmutableSamplerBindingsCount = 1;
1085 using DescriptorSetLayoutBindingVector =
1086 angle::FastVector<VkDescriptorSetLayoutBinding, kDefaultDescriptorSetLayoutBindingsCount>;
1087
1088 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
1089 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
1090 // usage. In the future we could generalize this.
1091 class DescriptorSetLayoutDesc final
1092 {
1093 public:
1094 DescriptorSetLayoutDesc();
1095 ~DescriptorSetLayoutDesc();
1096 DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
1097 DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
1098
1099 size_t hash() const;
1100 bool operator==(const DescriptorSetLayoutDesc &other) const;
1101
1102 void addBinding(uint32_t bindingIndex,
1103 VkDescriptorType descriptorType,
1104 uint32_t count,
1105 VkShaderStageFlags stages,
1106 const Sampler *immutableSampler);
1107
1108 void unpackBindings(DescriptorSetLayoutBindingVector *bindings) const;
1109
empty()1110 bool empty() const { return mDescriptorSetLayoutBindings.empty(); }
1111
1112 private:
1113 // There is a small risk of an issue if the sampler cache is evicted but not the descriptor
1114 // cache we would have an invalid handle here. Thus propose follow-up work:
1115 // TODO: https://issuetracker.google.com/issues/159156775: Have immutable sampler use serial
1116 union PackedDescriptorSetBinding
1117 {
1118 static constexpr uint8_t kInvalidType = 255;
1119
1120 struct
1121 {
1122 uint8_t type; // Stores a packed VkDescriptorType descriptorType.
1123 uint8_t stages; // Stores a packed VkShaderStageFlags.
1124 uint16_t count : 15; // Stores a packed uint32_t descriptorCount
1125 uint16_t hasImmutableSampler : 1; // Whether this binding has an immutable sampler
1126 };
1127 uint32_t value;
1128
1129 bool operator==(const PackedDescriptorSetBinding &other) const
1130 {
1131 return value == other.value;
1132 }
1133 };
1134
1135 // 1x 32bit
1136 static_assert(sizeof(PackedDescriptorSetBinding) == 4, "Unexpected size");
1137
1138 angle::FastVector<VkSampler, kDefaultImmutableSamplerBindingsCount> mImmutableSamplers;
1139 angle::FastVector<PackedDescriptorSetBinding, kDefaultDescriptorSetLayoutBindingsCount>
1140 mDescriptorSetLayoutBindings;
1141
1142 #if !defined(ANGLE_IS_64_BIT_CPU)
1143 ANGLE_MAYBE_UNUSED_PRIVATE_FIELD uint32_t mPadding = 0;
1144 #endif
1145 };
1146
1147 // The following are for caching descriptor set layouts. Limited to max three descriptor set
1148 // layouts. This can be extended in the future.
1149 constexpr size_t kMaxDescriptorSetLayouts = ToUnderlying(DescriptorSetIndex::EnumCount);
1150
1151 union PackedPushConstantRange
1152 {
1153 struct
1154 {
1155 uint8_t offset;
1156 uint8_t size;
1157 uint16_t stageMask;
1158 };
1159 uint32_t value;
1160
1161 bool operator==(const PackedPushConstantRange &other) const { return value == other.value; }
1162 };
1163
1164 static_assert(sizeof(PackedPushConstantRange) == sizeof(uint32_t), "Unexpected Size");
1165
1166 template <typename T>
1167 using DescriptorSetArray = angle::PackedEnumMap<DescriptorSetIndex, T>;
1168 using DescriptorSetLayoutPointerArray = DescriptorSetArray<DescriptorSetLayoutPtr>;
1169
1170 class PipelineLayoutDesc final
1171 {
1172 public:
1173 PipelineLayoutDesc();
1174 ~PipelineLayoutDesc();
1175 PipelineLayoutDesc(const PipelineLayoutDesc &other);
1176 PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
1177
1178 size_t hash() const;
1179 bool operator==(const PipelineLayoutDesc &other) const;
1180
1181 void updateDescriptorSetLayout(DescriptorSetIndex setIndex,
1182 const DescriptorSetLayoutDesc &desc);
1183 void updatePushConstantRange(VkShaderStageFlags stageMask, uint32_t offset, uint32_t size);
1184
getPushConstantRange()1185 const PackedPushConstantRange &getPushConstantRange() const { return mPushConstantRange; }
1186
1187 private:
1188 DescriptorSetArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
1189 PackedPushConstantRange mPushConstantRange;
1190 ANGLE_MAYBE_UNUSED_PRIVATE_FIELD uint32_t mPadding;
1191
1192 // Verify the arrays are properly packed.
1193 static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
1194 (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
1195 "Unexpected size");
1196 };
1197
1198 // Verify the structure is properly packed.
1199 static_assert(sizeof(PipelineLayoutDesc) == sizeof(DescriptorSetArray<DescriptorSetLayoutDesc>) +
1200 sizeof(PackedPushConstantRange) + sizeof(uint32_t),
1201 "Unexpected Size");
1202
1203 enum class YcbcrLinearFilterSupport
1204 {
1205 Unsupported,
1206 Supported,
1207 };
1208
1209 class YcbcrConversionDesc final
1210 {
1211 public:
1212 YcbcrConversionDesc();
1213 ~YcbcrConversionDesc();
1214 YcbcrConversionDesc(const YcbcrConversionDesc &other);
1215 YcbcrConversionDesc &operator=(const YcbcrConversionDesc &other);
1216
1217 size_t hash() const;
1218 bool operator==(const YcbcrConversionDesc &other) const;
1219
valid()1220 bool valid() const { return mExternalOrVkFormat != 0; }
1221 void reset();
1222 void update(Renderer *renderer,
1223 uint64_t externalFormat,
1224 VkSamplerYcbcrModelConversion conversionModel,
1225 VkSamplerYcbcrRange colorRange,
1226 VkChromaLocation xChromaOffset,
1227 VkChromaLocation yChromaOffset,
1228 VkFilter chromaFilter,
1229 VkComponentMapping components,
1230 angle::FormatID intendedFormatID,
1231 YcbcrLinearFilterSupport linearFilterSupported);
getChromaFilter()1232 VkFilter getChromaFilter() const { return static_cast<VkFilter>(mChromaFilter); }
1233 bool updateChromaFilter(Renderer *renderer, VkFilter filter);
1234 void updateConversionModel(VkSamplerYcbcrModelConversion conversionModel);
getExternalFormat()1235 uint64_t getExternalFormat() const { return mIsExternalFormat ? mExternalOrVkFormat : 0; }
1236
1237 angle::Result init(Context *context, SamplerYcbcrConversion *conversionOut) const;
1238
1239 private:
1240 // If the sampler needs to convert the image content (e.g. from YUV to RGB) then
1241 // mExternalOrVkFormat will be non-zero. The value is either the external format
1242 // as returned by vkGetAndroidHardwareBufferPropertiesANDROID or a YUV VkFormat.
1243 // For VkSamplerYcbcrConversion, mExternalOrVkFormat along with mIsExternalFormat,
1244 // mConversionModel and mColorRange works as a Serial() used elsewhere in ANGLE.
1245 uint64_t mExternalOrVkFormat;
1246 // 1 bit to identify if external format is used
1247 uint32_t mIsExternalFormat : 1;
1248 // 3 bits to identify conversion model
1249 uint32_t mConversionModel : 3;
1250 // 1 bit to identify color component range
1251 uint32_t mColorRange : 1;
1252 // 1 bit to identify x chroma location
1253 uint32_t mXChromaOffset : 1;
1254 // 1 bit to identify y chroma location
1255 uint32_t mYChromaOffset : 1;
1256 // 1 bit to identify chroma filtering
1257 uint32_t mChromaFilter : 1;
1258 // 3 bit to identify R component swizzle
1259 uint32_t mRSwizzle : 3;
1260 // 3 bit to identify G component swizzle
1261 uint32_t mGSwizzle : 3;
1262 // 3 bit to identify B component swizzle
1263 uint32_t mBSwizzle : 3;
1264 // 3 bit to identify A component swizzle
1265 uint32_t mASwizzle : 3;
1266 // 1 bit for whether linear filtering is supported (independent of whether currently enabled)
1267 uint32_t mLinearFilterSupported : 1;
1268 uint32_t mPadding : 11;
1269 uint32_t mReserved;
1270 };
1271
1272 static_assert(sizeof(YcbcrConversionDesc) == 16, "Unexpected YcbcrConversionDesc size");
1273
1274 // Packed sampler description for the sampler cache.
1275 class SamplerDesc final
1276 {
1277 public:
1278 SamplerDesc();
1279 SamplerDesc(Context *context,
1280 const gl::SamplerState &samplerState,
1281 bool stencilMode,
1282 const YcbcrConversionDesc *ycbcrConversionDesc,
1283 angle::FormatID intendedFormatID);
1284 ~SamplerDesc();
1285
1286 SamplerDesc(const SamplerDesc &other);
1287 SamplerDesc &operator=(const SamplerDesc &rhs);
1288
1289 void update(Renderer *renderer,
1290 const gl::SamplerState &samplerState,
1291 bool stencilMode,
1292 const YcbcrConversionDesc *ycbcrConversionDesc,
1293 angle::FormatID intendedFormatID);
1294 void reset();
1295 angle::Result init(ContextVk *contextVk, Sampler *sampler) const;
1296
1297 size_t hash() const;
1298 bool operator==(const SamplerDesc &other) const;
1299
1300 private:
1301 // 32*4 bits for floating point data.
1302 // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
1303 float mMipLodBias;
1304 float mMaxAnisotropy;
1305 float mMinLod;
1306 float mMaxLod;
1307
1308 // 16*8 bits to uniquely identify a YCbCr conversion sampler.
1309 YcbcrConversionDesc mYcbcrConversionDesc;
1310
1311 // 16 bits for modes + states.
1312 // 1 bit per filter (only 2 possible values in GL: linear/nearest)
1313 uint16_t mMagFilter : 1;
1314 uint16_t mMinFilter : 1;
1315 uint16_t mMipmapMode : 1;
1316
1317 // 3 bits per address mode (5 possible values)
1318 uint16_t mAddressModeU : 3;
1319 uint16_t mAddressModeV : 3;
1320 uint16_t mAddressModeW : 3;
1321
1322 // 1 bit for compare enabled (2 possible values)
1323 uint16_t mCompareEnabled : 1;
1324
1325 // 3 bits for compare op. (8 possible values)
1326 uint16_t mCompareOp : 3;
1327
1328 // Values from angle::ColorGeneric::Type. Float is 0 and others are 1.
1329 uint16_t mBorderColorType : 1;
1330
1331 uint16_t mPadding : 15;
1332
1333 // 16*8 bits for BorderColor
1334 angle::ColorF mBorderColor;
1335
1336 // 32 bits reserved for future use.
1337 uint32_t mReserved;
1338 };
1339
1340 static_assert(sizeof(SamplerDesc) == 56, "Unexpected SamplerDesc size");
1341
1342 // Disable warnings about struct padding.
1343 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1344
1345 class PipelineHelper;
1346
1347 struct GraphicsPipelineTransition
1348 {
1349 GraphicsPipelineTransition();
1350 GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
1351 GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
1352 const GraphicsPipelineDesc *desc,
1353 PipelineHelper *pipeline);
1354
1355 GraphicsPipelineTransitionBits bits;
1356 const GraphicsPipelineDesc *desc;
1357 PipelineHelper *target;
1358 };
1359
1360 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
1361
1362 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1363 const GraphicsPipelineTransition &other) = default;
1364
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)1365 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1366 GraphicsPipelineTransitionBits bits,
1367 const GraphicsPipelineDesc *desc,
1368 PipelineHelper *pipeline)
1369 : bits(bits), desc(desc), target(pipeline)
1370 {}
1371
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)1372 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
1373 GraphicsPipelineTransitionBits bitsB,
1374 const GraphicsPipelineDesc &descA,
1375 const GraphicsPipelineDesc &descB)
1376 {
1377 if (bitsA != bitsB)
1378 return false;
1379
1380 // We currently mask over 4 bytes of the pipeline description with each dirty bit.
1381 // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
1382 // of the code faster. The for loop below would scan over twice as many bits per iteration.
1383 // But there may be more collisions between the same dirty bit masks leading to different
1384 // transitions. Thus there may be additional cost when applications use many transitions.
1385 // We should revisit this in the future and investigate using different bit widths.
1386 static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
1387
1388 const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
1389 const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
1390
1391 for (size_t dirtyBit : bitsA)
1392 {
1393 if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
1394 return false;
1395 }
1396
1397 return true;
1398 }
1399
1400 // A class that encapsulates the vk::PipelineCache and associated mutex. The mutex may be nullptr
1401 // if synchronization is not necessary.
1402 class PipelineCacheAccess
1403 {
1404 public:
1405 PipelineCacheAccess() = default;
1406 ~PipelineCacheAccess() = default;
1407
init(const vk::PipelineCache * pipelineCache,angle::SimpleMutex * mutex)1408 void init(const vk::PipelineCache *pipelineCache, angle::SimpleMutex *mutex)
1409 {
1410 mPipelineCache = pipelineCache;
1411 mMutex = mutex;
1412 }
1413
1414 VkResult createGraphicsPipeline(vk::Context *context,
1415 const VkGraphicsPipelineCreateInfo &createInfo,
1416 vk::Pipeline *pipelineOut);
1417 VkResult createComputePipeline(vk::Context *context,
1418 const VkComputePipelineCreateInfo &createInfo,
1419 vk::Pipeline *pipelineOut);
1420
1421 VkResult getCacheData(vk::Context *context, size_t *cacheSize, void *cacheData);
1422
1423 void merge(Renderer *renderer, const vk::PipelineCache &pipelineCache);
1424
isThreadSafe()1425 bool isThreadSafe() const { return mMutex != nullptr; }
1426
1427 private:
1428 std::unique_lock<angle::SimpleMutex> getLock();
1429
1430 const vk::PipelineCache *mPipelineCache = nullptr;
1431 angle::SimpleMutex *mMutex;
1432 };
1433
1434 // Monolithic pipeline creation tasks are created as soon as a pipeline is created out of libraries.
1435 // However, they are not immediately posted to the worker queue to allow pacing. On each use of a
1436 // pipeline, an attempt is made to post the task.
1437 class CreateMonolithicPipelineTask : public Context, public angle::Closure
1438 {
1439 public:
1440 CreateMonolithicPipelineTask(Renderer *renderer,
1441 const PipelineCacheAccess &pipelineCache,
1442 const PipelineLayout &pipelineLayout,
1443 const ShaderModuleMap &shaders,
1444 const SpecializationConstants &specConsts,
1445 const GraphicsPipelineDesc &desc);
1446
1447 // The compatible render pass is set only when the task is ready to run. This is because the
1448 // render pass cache may have been cleared since the task was created (e.g. to accomodate
1449 // framebuffer fetch). Such render pass cache clears ensure there are no active tasks, so it's
1450 // safe to hold on to this pointer for the brief period between task post and completion.
1451 //
1452 // Not applicable to dynamic rendering.
getRenderPassDesc()1453 const RenderPassDesc &getRenderPassDesc() const { return mDesc.getRenderPassDesc(); }
1454 void setCompatibleRenderPass(const RenderPass *compatibleRenderPass);
1455
1456 void operator()() override;
1457
getResult()1458 VkResult getResult() const { return mResult; }
getPipeline()1459 Pipeline &getPipeline() { return mPipeline; }
getFeedback()1460 CacheLookUpFeedback getFeedback() const { return mFeedback; }
1461
1462 void handleError(VkResult result,
1463 const char *file,
1464 const char *function,
1465 unsigned int line) override;
1466
1467 private:
1468 // Input to pipeline creation
1469 PipelineCacheAccess mPipelineCache;
1470 const RenderPass *mCompatibleRenderPass;
1471 const PipelineLayout &mPipelineLayout;
1472 const ShaderModuleMap &mShaders;
1473 SpecializationConstants mSpecConsts;
1474 GraphicsPipelineDesc mDesc;
1475
1476 // Results
1477 VkResult mResult;
1478 Pipeline mPipeline;
1479 CacheLookUpFeedback mFeedback;
1480 };
1481
1482 class WaitableMonolithicPipelineCreationTask
1483 {
1484 public:
1485 ~WaitableMonolithicPipelineCreationTask();
1486
setTask(std::shared_ptr<CreateMonolithicPipelineTask> && task)1487 void setTask(std::shared_ptr<CreateMonolithicPipelineTask> &&task) { mTask = std::move(task); }
setRenderPass(const RenderPass * compatibleRenderPass)1488 void setRenderPass(const RenderPass *compatibleRenderPass)
1489 {
1490 mTask->setCompatibleRenderPass(compatibleRenderPass);
1491 }
onSchedule(const std::shared_ptr<angle::WaitableEvent> & waitableEvent)1492 void onSchedule(const std::shared_ptr<angle::WaitableEvent> &waitableEvent)
1493 {
1494 mWaitableEvent = waitableEvent;
1495 }
reset()1496 void reset()
1497 {
1498 mWaitableEvent.reset();
1499 mTask.reset();
1500 }
1501
isValid()1502 bool isValid() const { return mTask.get() != nullptr; }
isPosted()1503 bool isPosted() const { return mWaitableEvent.get() != nullptr; }
isReady()1504 bool isReady() { return mWaitableEvent->isReady(); }
wait()1505 void wait() { return mWaitableEvent->wait(); }
1506
getTask()1507 std::shared_ptr<CreateMonolithicPipelineTask> getTask() const { return mTask; }
1508
1509 private:
1510 std::shared_ptr<angle::WaitableEvent> mWaitableEvent;
1511 std::shared_ptr<CreateMonolithicPipelineTask> mTask;
1512 };
1513
1514 class PipelineHelper final : public Resource
1515 {
1516 public:
1517 PipelineHelper();
1518 ~PipelineHelper() override;
1519 inline explicit PipelineHelper(Pipeline &&pipeline, CacheLookUpFeedback feedback);
1520 PipelineHelper &operator=(PipelineHelper &&other);
1521
1522 void destroy(VkDevice device);
1523 void release(Context *context);
1524
valid()1525 bool valid() const { return mPipeline.valid(); }
getPipeline()1526 const Pipeline &getPipeline() const { return mPipeline; }
1527
1528 // Get the pipeline. If there is a monolithic pipeline creation task pending, scheduling it is
1529 // attempted. If that task is done, the pipeline is replaced with the results and the old
1530 // pipeline released.
1531 angle::Result getPreferredPipeline(ContextVk *contextVk, const Pipeline **pipelineOut);
1532
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)1533 ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
1534 const GraphicsPipelineDesc &desc,
1535 PipelineHelper **pipelineOut) const
1536 {
1537 // Search could be improved using sorting or hashing.
1538 for (const GraphicsPipelineTransition &transition : mTransitions)
1539 {
1540 if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
1541 {
1542 *pipelineOut = transition.target;
1543 return true;
1544 }
1545 }
1546
1547 return false;
1548 }
1549
1550 void addTransition(GraphicsPipelineTransitionBits bits,
1551 const GraphicsPipelineDesc *desc,
1552 PipelineHelper *pipeline);
1553
getTransitions()1554 const std::vector<GraphicsPipelineTransition> getTransitions() const { return mTransitions; }
1555
setComputePipeline(Pipeline && pipeline,CacheLookUpFeedback feedback)1556 void setComputePipeline(Pipeline &&pipeline, CacheLookUpFeedback feedback)
1557 {
1558 ASSERT(!mPipeline.valid());
1559 mPipeline = std::move(pipeline);
1560
1561 ASSERT(mCacheLookUpFeedback == CacheLookUpFeedback::None);
1562 mCacheLookUpFeedback = feedback;
1563 }
getCacheLookUpFeedback()1564 CacheLookUpFeedback getCacheLookUpFeedback() const { return mCacheLookUpFeedback; }
1565
1566 void setLinkedLibraryReferences(vk::PipelineHelper *shadersPipeline);
1567
1568 void retainInRenderPass(RenderPassCommandBufferHelper *renderPassCommands);
1569
setMonolithicPipelineCreationTask(std::shared_ptr<CreateMonolithicPipelineTask> && task)1570 void setMonolithicPipelineCreationTask(std::shared_ptr<CreateMonolithicPipelineTask> &&task)
1571 {
1572 mMonolithicPipelineCreationTask.setTask(std::move(task));
1573 }
1574
1575 private:
1576 void reset();
1577
1578 std::vector<GraphicsPipelineTransition> mTransitions;
1579 Pipeline mPipeline;
1580 CacheLookUpFeedback mCacheLookUpFeedback = CacheLookUpFeedback::None;
1581 CacheLookUpFeedback mMonolithicCacheLookUpFeedback = CacheLookUpFeedback::None;
1582
1583 // The list of pipeline helpers that were referenced when creating a linked pipeline. These
1584 // pipelines must be kept alive, so their serial is updated at the same time as this object.
1585 // Not necessary for vertex input and fragment output as they stay alive until context's
1586 // destruction.
1587 PipelineHelper *mLinkedShaders = nullptr;
1588
1589 // If pipeline libraries are used and monolithic pipelines are created in parallel, this is the
1590 // temporary library created (previously in |mPipeline|) that is now replaced by the monolithic
1591 // one. It is not immediately garbage collected when replaced, because there is currently a bug
1592 // with that. http://anglebug.com/42266335
1593 Pipeline mLinkedPipelineToRelease;
1594
1595 // An async task to create a monolithic pipeline. Only used if the pipeline was originally
1596 // created as a linked library. The |getPipeline()| call will attempt to schedule this task
1597 // through the share group, which manages and paces these tasks. Once the task results are
1598 // ready, |mPipeline| is released and replaced by the result of this task.
1599 WaitableMonolithicPipelineCreationTask mMonolithicPipelineCreationTask;
1600 };
1601
1602 class FramebufferHelper : public Resource
1603 {
1604 public:
1605 FramebufferHelper();
1606 ~FramebufferHelper() override;
1607
1608 FramebufferHelper(FramebufferHelper &&other);
1609 FramebufferHelper &operator=(FramebufferHelper &&other);
1610
1611 angle::Result init(Context *context, const VkFramebufferCreateInfo &createInfo);
1612 void destroy(Renderer *renderer);
1613 void release(ContextVk *contextVk);
1614
valid()1615 bool valid() { return mFramebuffer.valid(); }
1616
getFramebuffer()1617 const Framebuffer &getFramebuffer() const
1618 {
1619 ASSERT(mFramebuffer.valid());
1620 return mFramebuffer;
1621 }
1622
getFramebuffer()1623 Framebuffer &getFramebuffer()
1624 {
1625 ASSERT(mFramebuffer.valid());
1626 return mFramebuffer;
1627 }
1628
1629 private:
1630 // Vulkan object.
1631 Framebuffer mFramebuffer;
1632 };
1633
PipelineHelper(Pipeline && pipeline,CacheLookUpFeedback feedback)1634 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline, CacheLookUpFeedback feedback)
1635 : mPipeline(std::move(pipeline)), mCacheLookUpFeedback(feedback)
1636 {}
1637
1638 ANGLE_INLINE PipelineHelper &PipelineHelper::operator=(PipelineHelper &&other)
1639 {
1640 ASSERT(!mPipeline.valid());
1641
1642 std::swap(mPipeline, other.mPipeline);
1643 mCacheLookUpFeedback = other.mCacheLookUpFeedback;
1644
1645 return *this;
1646 }
1647
1648 struct ImageSubresourceRange
1649 {
1650 // GL max is 1000 (fits in 10 bits).
1651 uint32_t level : 10;
1652 // Max 31 levels (2 ** 5 - 1). Can store levelCount-1 if we need to save another bit.
1653 uint32_t levelCount : 5;
1654 // Implementation max is 2048 (11 bits).
1655 uint32_t layer : 12;
1656 // One of vk::LayerMode values. If 0, it means all layers. Otherwise it's the count of layers
1657 // which is usually 1, except for multiview in which case it can be up to
1658 // gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS.
1659 uint32_t layerMode : 3;
1660 // For reads: Values are either ImageViewColorspace::Linear or ImageViewColorspace::SRGB
1661 uint32_t readColorspace : 1;
1662 // For writes: Values are either ImageViewColorspace::Linear or ImageViewColorspace::SRGB
1663 uint32_t writeColorspace : 1;
1664
1665 static_assert(gl::IMPLEMENTATION_MAX_TEXTURE_LEVELS < (1 << 5),
1666 "Not enough bits for level count");
1667 static_assert(gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS <= (1 << 12),
1668 "Not enough bits for layer index");
1669 static_assert(gl::IMPLEMENTATION_ANGLE_MULTIVIEW_MAX_VIEWS <= (1 << 3),
1670 "Not enough bits for layer count");
1671 };
1672
1673 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t), "Size mismatch");
1674
1675 inline bool operator==(const ImageSubresourceRange &a, const ImageSubresourceRange &b)
1676 {
1677 return a.level == b.level && a.levelCount == b.levelCount && a.layer == b.layer &&
1678 a.layerMode == b.layerMode && a.readColorspace == b.readColorspace &&
1679 a.writeColorspace == b.writeColorspace;
1680 }
1681
1682 constexpr ImageSubresourceRange kInvalidImageSubresourceRange = {0, 0, 0, 0, 0, 0};
1683
1684 struct ImageOrBufferViewSubresourceSerial
1685 {
1686 ImageOrBufferViewSerial viewSerial;
1687 ImageSubresourceRange subresource;
1688 };
1689
1690 inline bool operator==(const ImageOrBufferViewSubresourceSerial &a,
1691 const ImageOrBufferViewSubresourceSerial &b)
1692 {
1693 return a.viewSerial == b.viewSerial && a.subresource == b.subresource;
1694 }
1695
1696 constexpr ImageOrBufferViewSubresourceSerial kInvalidImageOrBufferViewSubresourceSerial = {
1697 kInvalidImageOrBufferViewSerial, kInvalidImageSubresourceRange};
1698
1699 // Always starts with array element zero, with descriptorCount descriptors.
1700 struct WriteDescriptorDesc
1701 {
1702 uint8_t binding; // Redundant: determined by the containing WriteDesc array.
1703 uint8_t descriptorCount; // Number of array elements in this descriptor write.
1704 uint8_t descriptorType; // Packed VkDescriptorType.
1705 uint8_t descriptorInfoIndex; // Base index into an array of DescriptorInfoDescs.
1706 };
1707
1708 static_assert(sizeof(WriteDescriptorDesc) == 4, "Size mismatch");
1709
1710 struct DescriptorInfoDesc
1711 {
1712 uint32_t samplerOrBufferSerial;
1713 uint32_t imageViewSerialOrOffset;
1714 uint32_t imageLayoutOrRange; // Packed VkImageLayout
1715 uint32_t imageSubresourceRange;
1716 };
1717
1718 static_assert(sizeof(DescriptorInfoDesc) == 16, "Size mismatch");
1719
1720 // Generic description of a descriptor set. Used as a key when indexing descriptor set caches. The
1721 // key storage is an angle:FixedVector. Beyond a certain fixed size we'll end up using heap memory
1722 // to store keys. Currently we specialize the structure for three use cases: uniforms, textures,
1723 // and other shader resources. Because of the way the specialization works we can't currently cache
1724 // programs that use some types of resources.
1725 static constexpr size_t kFastDescriptorSetDescLimit = 8;
1726
1727 struct DescriptorDescHandles
1728 {
1729 VkBuffer buffer;
1730 VkSampler sampler;
1731 VkImageView imageView;
1732 VkBufferView bufferView;
1733 };
1734
1735 class WriteDescriptorDescs
1736 {
1737 public:
reset()1738 void reset()
1739 {
1740 mDescs.clear();
1741 mDynamicDescriptorSetCount = 0;
1742 mCurrentInfoIndex = 0;
1743 }
1744
1745 void updateShaderBuffers(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1746 const std::vector<gl::InterfaceBlock> &blocks,
1747 VkDescriptorType descriptorType);
1748
1749 void updateAtomicCounters(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1750 const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers);
1751
1752 void updateImages(const gl::ProgramExecutable &executable,
1753 const ShaderInterfaceVariableInfoMap &variableInfoMap);
1754
1755 void updateInputAttachments(const gl::ProgramExecutable &executable,
1756 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1757 FramebufferVk *framebufferVk);
1758
1759 void updateExecutableActiveTextures(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1760 const gl::ProgramExecutable &executable);
1761
1762 void updateDefaultUniform(gl::ShaderBitSet shaderTypes,
1763 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1764 const gl::ProgramExecutable &executable);
1765
1766 void updateTransformFeedbackWrite(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1767 const gl::ProgramExecutable &executable);
1768
1769 void updateDynamicDescriptorsCount();
1770
size()1771 size_t size() const { return mDescs.size(); }
empty()1772 bool empty() const { return mDescs.size() == 0; }
1773
1774 const WriteDescriptorDesc &operator[](uint32_t bindingIndex) const
1775 {
1776 return mDescs[bindingIndex];
1777 }
1778
getTotalDescriptorCount()1779 size_t getTotalDescriptorCount() const { return mCurrentInfoIndex; }
getDynamicDescriptorSetCount()1780 size_t getDynamicDescriptorSetCount() const { return mDynamicDescriptorSetCount; }
1781
1782 private:
hasWriteDescAtIndex(uint32_t bindingIndex)1783 bool hasWriteDescAtIndex(uint32_t bindingIndex) const
1784 {
1785 return bindingIndex < mDescs.size() && mDescs[bindingIndex].descriptorCount > 0;
1786 }
1787
incrementDescriptorCount(uint32_t bindingIndex,uint32_t count)1788 void incrementDescriptorCount(uint32_t bindingIndex, uint32_t count)
1789 {
1790 // Validate we have no subsequent writes.
1791 ASSERT(hasWriteDescAtIndex(bindingIndex));
1792 mDescs[bindingIndex].descriptorCount += count;
1793 }
1794
1795 void updateWriteDesc(uint32_t bindingIndex,
1796 VkDescriptorType descriptorType,
1797 uint32_t descriptorCount);
1798
1799 void updateInputAttachment(uint32_t binding,
1800 ImageLayout layout,
1801 RenderTargetVk *renderTargetVk);
1802
1803 // After a preliminary minimum size, use heap memory.
1804 angle::FastMap<WriteDescriptorDesc, kFastDescriptorSetDescLimit> mDescs;
1805 size_t mDynamicDescriptorSetCount = 0;
1806 uint32_t mCurrentInfoIndex = 0;
1807 };
1808 std::ostream &operator<<(std::ostream &os, const WriteDescriptorDescs &desc);
1809
1810 class DescriptorSetDesc
1811 {
1812 public:
1813 DescriptorSetDesc() = default;
1814 ~DescriptorSetDesc() = default;
1815
DescriptorSetDesc(const DescriptorSetDesc & other)1816 DescriptorSetDesc(const DescriptorSetDesc &other) : mDescriptorInfos(other.mDescriptorInfos) {}
1817
1818 DescriptorSetDesc &operator=(const DescriptorSetDesc &other)
1819 {
1820 mDescriptorInfos = other.mDescriptorInfos;
1821 return *this;
1822 }
1823
1824 size_t hash() const;
1825
size()1826 size_t size() const { return mDescriptorInfos.size(); }
resize(size_t count)1827 void resize(size_t count) { mDescriptorInfos.resize(count); }
1828
getKeySizeBytes()1829 size_t getKeySizeBytes() const { return mDescriptorInfos.size() * sizeof(DescriptorInfoDesc); }
1830
1831 bool operator==(const DescriptorSetDesc &other) const
1832 {
1833 return mDescriptorInfos.size() == other.mDescriptorInfos.size() &&
1834 memcmp(mDescriptorInfos.data(), other.mDescriptorInfos.data(),
1835 mDescriptorInfos.size() * sizeof(DescriptorInfoDesc)) == 0;
1836 }
1837
getInfoDesc(uint32_t infoDescIndex)1838 DescriptorInfoDesc &getInfoDesc(uint32_t infoDescIndex)
1839 {
1840 return mDescriptorInfos[infoDescIndex];
1841 }
1842
getInfoDesc(uint32_t infoDescIndex)1843 const DescriptorInfoDesc &getInfoDesc(uint32_t infoDescIndex) const
1844 {
1845 return mDescriptorInfos[infoDescIndex];
1846 }
1847
1848 void updateDescriptorSet(Renderer *renderer,
1849 const WriteDescriptorDescs &writeDescriptorDescs,
1850 UpdateDescriptorSetsBuilder *updateBuilder,
1851 const DescriptorDescHandles *handles,
1852 VkDescriptorSet descriptorSet) const;
1853
1854 private:
1855 // After a preliminary minimum size, use heap memory.
1856 angle::FastVector<DescriptorInfoDesc, kFastDescriptorSetDescLimit> mDescriptorInfos;
1857 };
1858 std::ostream &operator<<(std::ostream &os, const DescriptorSetDesc &desc);
1859
1860 class DescriptorPoolHelper;
1861
1862 // SharedDescriptorSetCacheKey.
1863 // Because DescriptorSet must associate with a pool, we need to define a structure that wraps both.
1864 class DescriptorSetDescAndPool final
1865 {
1866 public:
DescriptorSetDescAndPool()1867 DescriptorSetDescAndPool() : mPool(nullptr) {}
DescriptorSetDescAndPool(const DescriptorSetDesc & desc,DynamicDescriptorPool * pool)1868 DescriptorSetDescAndPool(const DescriptorSetDesc &desc, DynamicDescriptorPool *pool)
1869 : mDesc(desc), mPool(pool)
1870 {}
DescriptorSetDescAndPool(DescriptorSetDescAndPool && other)1871 DescriptorSetDescAndPool(DescriptorSetDescAndPool &&other)
1872 : mDesc(other.mDesc), mPool(other.mPool)
1873 {
1874 other.mPool = nullptr;
1875 }
~DescriptorSetDescAndPool()1876 ~DescriptorSetDescAndPool() { ASSERT(!valid()); }
destroy(VkDevice)1877 void destroy(VkDevice /*device*/) { mPool = nullptr; }
1878
1879 void destroyCachedObject(Renderer *renderer);
releaseCachedObject(ContextVk * contextVk)1880 void releaseCachedObject(ContextVk *contextVk) { UNREACHABLE(); }
1881 void releaseCachedObject(Renderer *renderer);
valid()1882 bool valid() const { return mPool != nullptr; }
getDesc()1883 const DescriptorSetDesc &getDesc() const
1884 {
1885 ASSERT(valid());
1886 return mDesc;
1887 }
1888 bool operator==(const DescriptorSetDescAndPool &other) const
1889 {
1890 return mDesc == other.mDesc && mPool == other.mPool;
1891 }
1892
1893 private:
1894 DescriptorSetDesc mDesc;
1895 DynamicDescriptorPool *mPool;
1896 };
1897 using SharedDescriptorSetCacheKey = SharedPtr<DescriptorSetDescAndPool>;
1898 ANGLE_INLINE const SharedDescriptorSetCacheKey
CreateSharedDescriptorSetCacheKey(const DescriptorSetDesc & desc,DynamicDescriptorPool * pool)1899 CreateSharedDescriptorSetCacheKey(const DescriptorSetDesc &desc, DynamicDescriptorPool *pool)
1900 {
1901 return SharedDescriptorSetCacheKey::MakeShared(VK_NULL_HANDLE, desc, pool);
1902 }
1903
1904 constexpr VkDescriptorType kStorageBufferDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1905
1906 // Manages a descriptor set desc with a few helper routines and also stores object handles.
1907 class DescriptorSetDescBuilder final
1908 {
1909 public:
1910 DescriptorSetDescBuilder();
1911 DescriptorSetDescBuilder(size_t descriptorCount);
1912 ~DescriptorSetDescBuilder();
1913
1914 DescriptorSetDescBuilder(const DescriptorSetDescBuilder &other);
1915 DescriptorSetDescBuilder &operator=(const DescriptorSetDescBuilder &other);
1916
getDesc()1917 const DescriptorSetDesc &getDesc() const { return mDesc; }
1918
resize(size_t descriptorCount)1919 void resize(size_t descriptorCount)
1920 {
1921 mDesc.resize(descriptorCount);
1922 mHandles.resize(descriptorCount);
1923 mDynamicOffsets.resize(descriptorCount);
1924 }
1925
1926 // Specific helpers for uniforms/xfb descriptors.
1927 void updateUniformBuffer(uint32_t shaderIndex,
1928 const WriteDescriptorDescs &writeDescriptorDescs,
1929 const BufferHelper &bufferHelper,
1930 VkDeviceSize bufferRange);
1931
1932 void updateTransformFeedbackBuffer(const Context *context,
1933 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1934 const WriteDescriptorDescs &writeDescriptorDescs,
1935 uint32_t xfbBufferIndex,
1936 const BufferHelper &bufferHelper,
1937 VkDeviceSize bufferOffset,
1938 VkDeviceSize bufferRange);
1939
1940 void updateUniformsAndXfb(Context *context,
1941 const gl::ProgramExecutable &executable,
1942 const WriteDescriptorDescs &writeDescriptorDescs,
1943 const BufferHelper *currentUniformBuffer,
1944 const BufferHelper &emptyBuffer,
1945 bool activeUnpaused,
1946 TransformFeedbackVk *transformFeedbackVk);
1947
1948 // Specific helpers for shader resource descriptors.
1949 template <typename CommandBufferT>
1950 void updateOneShaderBuffer(CommandBufferT *commandBufferHelper,
1951 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1952 const gl::BufferVector &buffers,
1953 const gl::InterfaceBlock &block,
1954 uint32_t bufferIndex,
1955 VkDescriptorType descriptorType,
1956 VkDeviceSize maxBoundBufferRange,
1957 const BufferHelper &emptyBuffer,
1958 const WriteDescriptorDescs &writeDescriptorDescs,
1959 const GLbitfield memoryBarrierBits);
1960 template <typename CommandBufferT>
1961 void updateShaderBuffers(CommandBufferT *commandBufferHelper,
1962 const gl::ProgramExecutable &executable,
1963 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1964 const gl::BufferVector &buffers,
1965 const std::vector<gl::InterfaceBlock> &blocks,
1966 VkDescriptorType descriptorType,
1967 VkDeviceSize maxBoundBufferRange,
1968 const BufferHelper &emptyBuffer,
1969 const WriteDescriptorDescs &writeDescriptorDescs,
1970 const GLbitfield memoryBarrierBits);
1971 template <typename CommandBufferT>
1972 void updateAtomicCounters(CommandBufferT *commandBufferHelper,
1973 const gl::ProgramExecutable &executable,
1974 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1975 const gl::BufferVector &buffers,
1976 const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers,
1977 const VkDeviceSize requiredOffsetAlignment,
1978 const BufferHelper &emptyBuffer,
1979 const WriteDescriptorDescs &writeDescriptorDescs);
1980 angle::Result updateImages(Context *context,
1981 const gl::ProgramExecutable &executable,
1982 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1983 const gl::ActiveTextureArray<TextureVk *> &activeImages,
1984 const std::vector<gl::ImageUnit> &imageUnits,
1985 const WriteDescriptorDescs &writeDescriptorDescs);
1986 angle::Result updateInputAttachments(vk::Context *context,
1987 const gl::ProgramExecutable &executable,
1988 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1989 FramebufferVk *framebufferVk,
1990 const WriteDescriptorDescs &writeDescriptorDescs);
1991
1992 // Specialized update for textures.
1993 void updatePreCacheActiveTextures(Context *context,
1994 const gl::ProgramExecutable &executable,
1995 const gl::ActiveTextureArray<TextureVk *> &textures,
1996 const gl::SamplerBindingVector &samplers);
1997
1998 void updateDescriptorSet(Renderer *renderer,
1999 const WriteDescriptorDescs &writeDescriptorDescs,
2000 UpdateDescriptorSetsBuilder *updateBuilder,
2001 VkDescriptorSet descriptorSet) const;
2002
getDynamicOffsets()2003 const uint32_t *getDynamicOffsets() const { return mDynamicOffsets.data(); }
getDynamicOffsetsSize()2004 size_t getDynamicOffsetsSize() const { return mDynamicOffsets.size(); }
2005
2006 private:
2007 void updateInputAttachment(Context *context,
2008 uint32_t binding,
2009 ImageLayout layout,
2010 const vk::ImageView *imageView,
2011 ImageOrBufferViewSubresourceSerial serial,
2012 const WriteDescriptorDescs &writeDescriptorDescs);
2013
2014 void setEmptyBuffer(uint32_t infoDescIndex,
2015 VkDescriptorType descriptorType,
2016 const BufferHelper &emptyBuffer);
2017
2018 DescriptorSetDesc mDesc;
2019 angle::FastVector<DescriptorDescHandles, kFastDescriptorSetDescLimit> mHandles;
2020 angle::FastVector<uint32_t, kFastDescriptorSetDescLimit> mDynamicOffsets;
2021 };
2022
2023 // In the FramebufferDesc object:
2024 // - Depth/stencil serial is at index 0
2025 // - Color serials are at indices [1, gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
2026 // - Depth/stencil resolve attachment is at index gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1
2027 // - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+2,
2028 // gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1]
2029 // Fragment shading rate attachment serial is at index
2030 // (gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1)+1
2031 constexpr size_t kFramebufferDescDepthStencilIndex = 0;
2032 constexpr size_t kFramebufferDescColorIndexOffset = kFramebufferDescDepthStencilIndex + 1;
2033 constexpr size_t kFramebufferDescDepthStencilResolveIndexOffset =
2034 kFramebufferDescColorIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
2035 constexpr size_t kFramebufferDescColorResolveIndexOffset =
2036 kFramebufferDescDepthStencilResolveIndexOffset + 1;
2037 constexpr size_t kFramebufferDescFragmentShadingRateAttachmentIndexOffset =
2038 kFramebufferDescColorResolveIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
2039
2040 // Enable struct padding warnings for the code below since it is used in caches.
2041 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
2042
2043 class FramebufferDesc
2044 {
2045 public:
2046 FramebufferDesc();
2047 ~FramebufferDesc();
2048
2049 FramebufferDesc(const FramebufferDesc &other);
2050 FramebufferDesc &operator=(const FramebufferDesc &other);
2051
2052 void updateColor(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
2053 void updateColorResolve(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
2054 void updateUnresolveMask(FramebufferNonResolveAttachmentMask unresolveMask);
2055 void updateDepthStencil(ImageOrBufferViewSubresourceSerial serial);
2056 void updateDepthStencilResolve(ImageOrBufferViewSubresourceSerial serial);
setWriteControlMode(gl::SrgbWriteControlMode mode)2057 ANGLE_INLINE void setWriteControlMode(gl::SrgbWriteControlMode mode)
2058 {
2059 mSrgbWriteControlMode = static_cast<uint16_t>(mode);
2060 }
updateIsMultiview(bool isMultiview)2061 void updateIsMultiview(bool isMultiview) { mIsMultiview = isMultiview; }
2062 size_t hash() const;
2063
2064 bool operator==(const FramebufferDesc &other) const;
2065
2066 uint32_t attachmentCount() const;
2067
getColorImageViewSerial(uint32_t index)2068 ImageOrBufferViewSubresourceSerial getColorImageViewSerial(uint32_t index)
2069 {
2070 ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
2071 return mSerials[kFramebufferDescColorIndexOffset + index];
2072 }
2073
2074 FramebufferNonResolveAttachmentMask getUnresolveAttachmentMask() const;
getWriteControlMode()2075 ANGLE_INLINE gl::SrgbWriteControlMode getWriteControlMode() const
2076 {
2077 return (mSrgbWriteControlMode == 1) ? gl::SrgbWriteControlMode::Linear
2078 : gl::SrgbWriteControlMode::Default;
2079 }
2080
2081 void updateLayerCount(uint32_t layerCount);
getLayerCount()2082 uint32_t getLayerCount() const { return mLayerCount; }
2083 void setColorFramebufferFetchMode(bool hasColorFramebufferFetch);
hasColorFramebufferFetch()2084 bool hasColorFramebufferFetch() const { return mHasColorFramebufferFetch; }
2085
isMultiview()2086 bool isMultiview() const { return mIsMultiview; }
2087
2088 void updateRenderToTexture(bool isRenderToTexture);
2089
2090 void updateFragmentShadingRate(ImageOrBufferViewSubresourceSerial serial);
2091 bool hasFragmentShadingRateAttachment() const;
2092
2093 // Used by SharedFramebufferCacheKey
destroy(VkDevice)2094 void destroy(VkDevice /*device*/) { SetBitField(mIsValid, 0); }
2095 void destroyCachedObject(Renderer *renderer);
releaseCachedObject(Renderer * renderer)2096 void releaseCachedObject(Renderer *renderer) { UNREACHABLE(); }
2097 void releaseCachedObject(ContextVk *contextVk);
valid()2098 bool valid() const { return mIsValid; }
2099
2100 private:
2101 void reset();
2102 void update(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
2103
2104 // Note: this is an exclusive index. If there is one index it will be "1".
2105 // Maximum value is 18
2106 uint16_t mMaxIndex : 5;
2107
2108 // Whether the render pass has input attachments or not.
2109 // Note that depth/stencil framebuffer fetch is only implemented for dynamic rendering, and so
2110 // does not interact with this class.
2111 uint16_t mHasColorFramebufferFetch : 1;
2112 static_assert(gl::IMPLEMENTATION_MAX_FRAMEBUFFER_LAYERS < (1 << 9) - 1,
2113 "Not enough bits for mLayerCount");
2114
2115 uint16_t mLayerCount : 9;
2116
2117 uint16_t mSrgbWriteControlMode : 1;
2118
2119 // If the render pass contains an initial subpass to unresolve a number of attachments, the
2120 // subpass description is derived from the following mask, specifying which attachments need
2121 // to be unresolved. Includes both color and depth/stencil attachments.
2122 uint16_t mUnresolveAttachmentMask : kMaxFramebufferNonResolveAttachments;
2123
2124 // Whether this is a multisampled-render-to-single-sampled framebuffer. Only used when using
2125 // VK_EXT_multisampled_render_to_single_sampled. Only one bit is used and the rest is padding.
2126 uint16_t mIsRenderToTexture : 14 - kMaxFramebufferNonResolveAttachments;
2127
2128 uint16_t mIsMultiview : 1;
2129 // Used by SharedFramebufferCacheKey to indicate if this cache key is valid or not.
2130 uint16_t mIsValid : 1;
2131
2132 FramebufferAttachmentArray<ImageOrBufferViewSubresourceSerial> mSerials;
2133 };
2134
2135 constexpr size_t kFramebufferDescSize = sizeof(FramebufferDesc);
2136 static_assert(kFramebufferDescSize == 156, "Size check failed");
2137
2138 // Disable warnings about struct padding.
2139 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
2140
2141 using SharedFramebufferCacheKey = SharedPtr<FramebufferDesc>;
2142 ANGLE_INLINE const SharedFramebufferCacheKey
CreateSharedFramebufferCacheKey(const FramebufferDesc & desc)2143 CreateSharedFramebufferCacheKey(const FramebufferDesc &desc)
2144 {
2145 return SharedFramebufferCacheKey::MakeShared(VK_NULL_HANDLE, desc);
2146 }
2147
2148 // The SamplerHelper allows a Sampler to be coupled with a serial.
2149 // Must be included before we declare SamplerCache.
2150 class SamplerHelper final : angle::NonCopyable
2151 {
2152 public:
2153 SamplerHelper() = default;
~SamplerHelper()2154 ~SamplerHelper() { ASSERT(!valid()); }
2155
2156 explicit SamplerHelper(SamplerHelper &&samplerHelper);
2157 SamplerHelper &operator=(SamplerHelper &&rhs);
2158
2159 angle::Result init(Context *context, const VkSamplerCreateInfo &createInfo);
2160 angle::Result init(ContextVk *contextVk, const SamplerDesc &desc);
destroy(VkDevice device)2161 void destroy(VkDevice device) { mSampler.destroy(device); }
destroy()2162 void destroy() { ASSERT(!valid()); }
valid()2163 bool valid() const { return mSampler.valid(); }
get()2164 const Sampler &get() const { return mSampler; }
getSamplerSerial()2165 SamplerSerial getSamplerSerial() const { return mSamplerSerial; }
2166
2167 private:
2168 Sampler mSampler;
2169 SamplerSerial mSamplerSerial;
2170 };
2171
2172 using SharedSamplerPtr = SharedPtr<SamplerHelper>;
2173
2174 class RenderPassHelper final : angle::NonCopyable
2175 {
2176 public:
2177 RenderPassHelper();
2178 ~RenderPassHelper();
2179
2180 RenderPassHelper(RenderPassHelper &&other);
2181 RenderPassHelper &operator=(RenderPassHelper &&other);
2182
2183 void destroy(VkDevice device);
2184 void release(ContextVk *contextVk);
2185
2186 const RenderPass &getRenderPass() const;
2187 RenderPass &getRenderPass();
2188
2189 const RenderPassPerfCounters &getPerfCounters() const;
2190 RenderPassPerfCounters &getPerfCounters();
2191
2192 private:
2193 RenderPass mRenderPass;
2194 RenderPassPerfCounters mPerfCounters;
2195 };
2196
2197 // Helper class manages the lifetime of various cache objects so that the cache entry can be
2198 // destroyed when one of the components becomes invalid.
2199 template <class SharedCacheKeyT>
2200 class SharedCacheKeyManager
2201 {
2202 public:
2203 SharedCacheKeyManager() = default;
~SharedCacheKeyManager()2204 ~SharedCacheKeyManager() { ASSERT(empty()); }
2205 // Store the pointer to the cache key and retains it
2206 void addKey(const SharedCacheKeyT &key);
2207 // Iterate over the descriptor array and release the descriptor and cache.
2208 void releaseKeys(ContextVk *contextVk);
2209 void releaseKeys(Renderer *renderer);
2210 // Iterate over the descriptor array and destroy the descriptor and cache.
2211 void destroyKeys(Renderer *renderer);
2212 void clear();
2213
2214 // The following APIs are expected to be used for assertion only
2215 bool containsKey(const SharedCacheKeyT &key) const;
empty()2216 bool empty() const { return mSharedCacheKeys.empty(); }
2217 void assertAllEntriesDestroyed();
2218
2219 private:
2220 size_t updateEmptySlotBits();
2221
2222 // Tracks an array of cache keys with refcounting. Note this owns one refcount of
2223 // SharedCacheKeyT object.
2224 std::deque<SharedCacheKeyT> mSharedCacheKeys;
2225
2226 // To speed up searching for available slot in the mSharedCacheKeys, we use bitset to track
2227 // available (i.e, empty) slot
2228 static constexpr size_t kInvalidSlot = -1;
2229 static constexpr size_t kSlotBitCount = 64;
2230 using SlotBitMask = angle::BitSet64<kSlotBitCount>;
2231 std::vector<SlotBitMask> mEmptySlotBits;
2232 };
2233
2234 using FramebufferCacheManager = SharedCacheKeyManager<SharedFramebufferCacheKey>;
2235 using DescriptorSetCacheManager = SharedCacheKeyManager<SharedDescriptorSetCacheKey>;
2236 } // namespace vk
2237 } // namespace rx
2238
2239 // Introduce std::hash for the above classes.
2240 namespace std
2241 {
2242 template <>
2243 struct hash<rx::vk::RenderPassDesc>
2244 {
2245 size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
2246 };
2247
2248 template <>
2249 struct hash<rx::vk::AttachmentOpsArray>
2250 {
2251 size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
2252 };
2253
2254 template <>
2255 struct hash<rx::vk::DescriptorSetLayoutDesc>
2256 {
2257 size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
2258 };
2259
2260 template <>
2261 struct hash<rx::vk::PipelineLayoutDesc>
2262 {
2263 size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
2264 };
2265
2266 template <>
2267 struct hash<rx::vk::ImageSubresourceRange>
2268 {
2269 size_t operator()(const rx::vk::ImageSubresourceRange &key) const
2270 {
2271 return *reinterpret_cast<const uint32_t *>(&key);
2272 }
2273 };
2274
2275 template <>
2276 struct hash<rx::vk::DescriptorSetDesc>
2277 {
2278 size_t operator()(const rx::vk::DescriptorSetDesc &key) const { return key.hash(); }
2279 };
2280
2281 template <>
2282 struct hash<rx::vk::FramebufferDesc>
2283 {
2284 size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
2285 };
2286
2287 template <>
2288 struct hash<rx::vk::YcbcrConversionDesc>
2289 {
2290 size_t operator()(const rx::vk::YcbcrConversionDesc &key) const { return key.hash(); }
2291 };
2292
2293 template <>
2294 struct hash<rx::vk::SamplerDesc>
2295 {
2296 size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
2297 };
2298
2299 // See Resource Serial types defined in vk_utils.h.
2300 #define ANGLE_HASH_VK_SERIAL(Type) \
2301 template <> \
2302 struct hash<rx::vk::Type##Serial> \
2303 { \
2304 size_t operator()(const rx::vk::Type##Serial &key) const \
2305 { \
2306 return key.getValue(); \
2307 } \
2308 };
2309
2310 ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
2311
2312 } // namespace std
2313
2314 namespace rx
2315 {
2316 // Cache types for various Vulkan objects
2317 enum class VulkanCacheType
2318 {
2319 CompatibleRenderPass,
2320 RenderPassWithOps,
2321 GraphicsPipeline,
2322 PipelineLayout,
2323 Sampler,
2324 SamplerYcbcrConversion,
2325 DescriptorSetLayout,
2326 DriverUniformsDescriptors,
2327 TextureDescriptors,
2328 UniformsAndXfbDescriptors,
2329 ShaderResourcesDescriptors,
2330 Framebuffer,
2331 DescriptorMetaCache,
2332 EnumCount
2333 };
2334
2335 // Base class for all caches. Provides cache hit and miss counters.
2336 class CacheStats final : angle::NonCopyable
2337 {
2338 public:
2339 CacheStats() { reset(); }
2340 ~CacheStats() {}
2341
2342 CacheStats(const CacheStats &rhs)
2343 : mHitCount(rhs.mHitCount), mMissCount(rhs.mMissCount), mSize(rhs.mSize)
2344 {}
2345
2346 CacheStats &operator=(const CacheStats &rhs)
2347 {
2348 mHitCount = rhs.mHitCount;
2349 mMissCount = rhs.mMissCount;
2350 mSize = rhs.mSize;
2351 return *this;
2352 }
2353
2354 ANGLE_INLINE void hit() { mHitCount++; }
2355 ANGLE_INLINE void miss() { mMissCount++; }
2356 ANGLE_INLINE void incrementSize() { mSize++; }
2357 ANGLE_INLINE void decrementSize() { mSize--; }
2358 ANGLE_INLINE void missAndIncrementSize()
2359 {
2360 mMissCount++;
2361 mSize++;
2362 }
2363 ANGLE_INLINE void accumulate(const CacheStats &stats)
2364 {
2365 mHitCount += stats.mHitCount;
2366 mMissCount += stats.mMissCount;
2367 mSize += stats.mSize;
2368 }
2369
2370 uint32_t getHitCount() const { return mHitCount; }
2371 uint32_t getMissCount() const { return mMissCount; }
2372
2373 ANGLE_INLINE double getHitRatio() const
2374 {
2375 if (mHitCount + mMissCount == 0)
2376 {
2377 return 0;
2378 }
2379 else
2380 {
2381 return static_cast<double>(mHitCount) / (mHitCount + mMissCount);
2382 }
2383 }
2384
2385 ANGLE_INLINE uint32_t getSize() const { return mSize; }
2386 ANGLE_INLINE void setSize(uint32_t size) { mSize = size; }
2387
2388 void reset()
2389 {
2390 mHitCount = 0;
2391 mMissCount = 0;
2392 mSize = 0;
2393 }
2394
2395 void resetHitAndMissCount()
2396 {
2397 mHitCount = 0;
2398 mMissCount = 0;
2399 }
2400
2401 void accumulateCacheStats(VulkanCacheType cacheType, const CacheStats &cacheStats)
2402 {
2403 mHitCount += cacheStats.getHitCount();
2404 mMissCount += cacheStats.getMissCount();
2405 }
2406
2407 private:
2408 uint32_t mHitCount;
2409 uint32_t mMissCount;
2410 uint32_t mSize;
2411 };
2412
2413 template <VulkanCacheType CacheType>
2414 class HasCacheStats : angle::NonCopyable
2415 {
2416 public:
2417 template <typename Accumulator>
2418 void accumulateCacheStats(Accumulator *accum)
2419 {
2420 accum->accumulateCacheStats(CacheType, mCacheStats);
2421 mCacheStats.reset();
2422 }
2423
2424 void getCacheStats(CacheStats *accum) const { accum->accumulate(mCacheStats); }
2425
2426 protected:
2427 HasCacheStats() = default;
2428 virtual ~HasCacheStats() = default;
2429
2430 CacheStats mCacheStats;
2431 };
2432
2433 using VulkanCacheStats = angle::PackedEnumMap<VulkanCacheType, CacheStats>;
2434
2435 // FramebufferVk Cache
2436 class FramebufferCache final : angle::NonCopyable
2437 {
2438 public:
2439 FramebufferCache() = default;
2440 ~FramebufferCache() { ASSERT(mPayload.empty()); }
2441
2442 void destroy(vk::Renderer *renderer);
2443
2444 bool get(ContextVk *contextVk, const vk::FramebufferDesc &desc, vk::Framebuffer &framebuffer);
2445 void insert(ContextVk *contextVk,
2446 const vk::FramebufferDesc &desc,
2447 vk::FramebufferHelper &&framebufferHelper);
2448 void erase(ContextVk *contextVk, const vk::FramebufferDesc &desc);
2449
2450 size_t getSize() const { return mPayload.size(); }
2451 bool empty() const { return mPayload.empty(); }
2452
2453 private:
2454 angle::HashMap<vk::FramebufferDesc, vk::FramebufferHelper> mPayload;
2455 CacheStats mCacheStats;
2456 };
2457
2458 // TODO(jmadill): Add cache trimming/eviction.
2459 class RenderPassCache final : angle::NonCopyable
2460 {
2461 public:
2462 RenderPassCache();
2463 ~RenderPassCache();
2464
2465 void destroy(ContextVk *contextVk);
2466 void clear(ContextVk *contextVk);
2467
2468 ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
2469 const vk::RenderPassDesc &desc,
2470 const vk::RenderPass **renderPassOut)
2471 {
2472 auto outerIt = mPayload.find(desc);
2473 if (outerIt != mPayload.end())
2474 {
2475 InnerCache &innerCache = outerIt->second;
2476 ASSERT(!innerCache.empty());
2477
2478 // Find the first element and return it.
2479 *renderPassOut = &innerCache.begin()->second.getRenderPass();
2480 mCompatibleRenderPassCacheStats.hit();
2481 return angle::Result::Continue;
2482 }
2483
2484 mCompatibleRenderPassCacheStats.missAndIncrementSize();
2485 return addCompatibleRenderPass(contextVk, desc, renderPassOut);
2486 }
2487
2488 angle::Result getRenderPassWithOps(ContextVk *contextVk,
2489 const vk::RenderPassDesc &desc,
2490 const vk::AttachmentOpsArray &attachmentOps,
2491 const vk::RenderPass **renderPassOut);
2492
2493 static void InitializeOpsForCompatibleRenderPass(const vk::RenderPassDesc &desc,
2494 vk::AttachmentOpsArray *opsOut);
2495 static angle::Result MakeRenderPass(vk::Context *context,
2496 const vk::RenderPassDesc &desc,
2497 const vk::AttachmentOpsArray &ops,
2498 vk::RenderPass *renderPass,
2499 vk::RenderPassPerfCounters *renderPassCounters);
2500
2501 private:
2502 angle::Result getRenderPassWithOpsImpl(ContextVk *contextVk,
2503 const vk::RenderPassDesc &desc,
2504 const vk::AttachmentOpsArray &attachmentOps,
2505 bool updatePerfCounters,
2506 const vk::RenderPass **renderPassOut);
2507
2508 angle::Result addCompatibleRenderPass(ContextVk *contextVk,
2509 const vk::RenderPassDesc &desc,
2510 const vk::RenderPass **renderPassOut);
2511
2512 // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
2513 // The second layer caches the attachment load/store ops and initial/final layout.
2514 // Switch to `std::unordered_map` to retain pointer stability.
2515 using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassHelper>;
2516 using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
2517
2518 OuterCache mPayload;
2519 CacheStats mCompatibleRenderPassCacheStats;
2520 CacheStats mRenderPassWithOpsCacheStats;
2521 };
2522
2523 enum class PipelineSource
2524 {
2525 // Pipeline created when warming up the program's pipeline cache
2526 WarmUp,
2527 // Monolithic pipeline created at draw time
2528 Draw,
2529 // Pipeline created at draw time by linking partial pipeline libraries
2530 DrawLinked,
2531 // Pipeline created for UtilsVk
2532 Utils,
2533 };
2534
2535 struct GraphicsPipelineDescCompleteHash
2536 {
2537 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2538 {
2539 return key.hash(vk::GraphicsPipelineSubset::Complete);
2540 }
2541 };
2542 struct GraphicsPipelineDescVertexInputHash
2543 {
2544 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2545 {
2546 return key.hash(vk::GraphicsPipelineSubset::VertexInput);
2547 }
2548 };
2549 struct GraphicsPipelineDescShadersHash
2550 {
2551 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2552 {
2553 return key.hash(vk::GraphicsPipelineSubset::Shaders);
2554 }
2555 };
2556 struct GraphicsPipelineDescFragmentOutputHash
2557 {
2558 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2559 {
2560 return key.hash(vk::GraphicsPipelineSubset::FragmentOutput);
2561 }
2562 };
2563
2564 struct GraphicsPipelineDescCompleteKeyEqual
2565 {
2566 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2567 const rx::vk::GraphicsPipelineDesc &second) const
2568 {
2569 return first.keyEqual(second, vk::GraphicsPipelineSubset::Complete);
2570 }
2571 };
2572 struct GraphicsPipelineDescVertexInputKeyEqual
2573 {
2574 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2575 const rx::vk::GraphicsPipelineDesc &second) const
2576 {
2577 return first.keyEqual(second, vk::GraphicsPipelineSubset::VertexInput);
2578 }
2579 };
2580 struct GraphicsPipelineDescShadersKeyEqual
2581 {
2582 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2583 const rx::vk::GraphicsPipelineDesc &second) const
2584 {
2585 return first.keyEqual(second, vk::GraphicsPipelineSubset::Shaders);
2586 }
2587 };
2588 struct GraphicsPipelineDescFragmentOutputKeyEqual
2589 {
2590 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2591 const rx::vk::GraphicsPipelineDesc &second) const
2592 {
2593 return first.keyEqual(second, vk::GraphicsPipelineSubset::FragmentOutput);
2594 }
2595 };
2596
2597 // Derive the KeyEqual and GraphicsPipelineSubset enum from the Hash struct
2598 template <typename Hash>
2599 struct GraphicsPipelineCacheTypeHelper
2600 {
2601 using KeyEqual = GraphicsPipelineDescCompleteKeyEqual;
2602 static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::Complete;
2603 };
2604
2605 template <>
2606 struct GraphicsPipelineCacheTypeHelper<GraphicsPipelineDescVertexInputHash>
2607 {
2608 using KeyEqual = GraphicsPipelineDescVertexInputKeyEqual;
2609 static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::VertexInput;
2610 };
2611 template <>
2612 struct GraphicsPipelineCacheTypeHelper<GraphicsPipelineDescShadersHash>
2613 {
2614 using KeyEqual = GraphicsPipelineDescShadersKeyEqual;
2615 static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::Shaders;
2616 };
2617 template <>
2618 struct GraphicsPipelineCacheTypeHelper<GraphicsPipelineDescFragmentOutputHash>
2619 {
2620 using KeyEqual = GraphicsPipelineDescFragmentOutputKeyEqual;
2621 static constexpr vk::GraphicsPipelineSubset kSubset =
2622 vk::GraphicsPipelineSubset::FragmentOutput;
2623 };
2624
2625 // TODO(jmadill): Add cache trimming/eviction.
2626 template <typename Hash>
2627 class GraphicsPipelineCache final : public HasCacheStats<VulkanCacheType::GraphicsPipeline>
2628 {
2629 public:
2630 GraphicsPipelineCache() = default;
2631 ~GraphicsPipelineCache() override { ASSERT(mPayload.empty()); }
2632
2633 void destroy(vk::Context *context);
2634 void release(vk::Context *context);
2635
2636 void populate(const vk::GraphicsPipelineDesc &desc,
2637 vk::Pipeline &&pipeline,
2638 vk::PipelineHelper **pipelineHelperOut);
2639
2640 // Get a pipeline from the cache, if it exists
2641 ANGLE_INLINE bool getPipeline(const vk::GraphicsPipelineDesc &desc,
2642 const vk::GraphicsPipelineDesc **descPtrOut,
2643 vk::PipelineHelper **pipelineOut)
2644 {
2645 auto item = mPayload.find(desc);
2646 if (item == mPayload.end())
2647 {
2648 return false;
2649 }
2650
2651 *descPtrOut = &item->first;
2652 *pipelineOut = &item->second;
2653
2654 mCacheStats.hit();
2655
2656 return true;
2657 }
2658
2659 angle::Result createPipeline(vk::Context *context,
2660 vk::PipelineCacheAccess *pipelineCache,
2661 const vk::RenderPass &compatibleRenderPass,
2662 const vk::PipelineLayout &pipelineLayout,
2663 const vk::ShaderModuleMap &shaders,
2664 const vk::SpecializationConstants &specConsts,
2665 PipelineSource source,
2666 const vk::GraphicsPipelineDesc &desc,
2667 const vk::GraphicsPipelineDesc **descPtrOut,
2668 vk::PipelineHelper **pipelineOut);
2669
2670 angle::Result linkLibraries(vk::Context *context,
2671 vk::PipelineCacheAccess *pipelineCache,
2672 const vk::GraphicsPipelineDesc &desc,
2673 const vk::PipelineLayout &pipelineLayout,
2674 vk::PipelineHelper *vertexInputPipeline,
2675 vk::PipelineHelper *shadersPipeline,
2676 vk::PipelineHelper *fragmentOutputPipeline,
2677 const vk::GraphicsPipelineDesc **descPtrOut,
2678 vk::PipelineHelper **pipelineOut);
2679
2680 // Helper for VulkanPipelineCachePerf that resets the object without destroying any object.
2681 void reset() { mPayload.clear(); }
2682
2683 private:
2684 void addToCache(PipelineSource source,
2685 const vk::GraphicsPipelineDesc &desc,
2686 vk::Pipeline &&pipeline,
2687 vk::CacheLookUpFeedback feedback,
2688 const vk::GraphicsPipelineDesc **descPtrOut,
2689 vk::PipelineHelper **pipelineOut);
2690
2691 using KeyEqual = typename GraphicsPipelineCacheTypeHelper<Hash>::KeyEqual;
2692 std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper, Hash, KeyEqual> mPayload;
2693 };
2694
2695 using CompleteGraphicsPipelineCache = GraphicsPipelineCache<GraphicsPipelineDescCompleteHash>;
2696 using VertexInputGraphicsPipelineCache = GraphicsPipelineCache<GraphicsPipelineDescVertexInputHash>;
2697 using ShadersGraphicsPipelineCache = GraphicsPipelineCache<GraphicsPipelineDescShadersHash>;
2698 using FragmentOutputGraphicsPipelineCache =
2699 GraphicsPipelineCache<GraphicsPipelineDescFragmentOutputHash>;
2700
2701 class DescriptorSetLayoutCache final : angle::NonCopyable
2702 {
2703 public:
2704 DescriptorSetLayoutCache();
2705 ~DescriptorSetLayoutCache();
2706
2707 void destroy(vk::Renderer *renderer);
2708
2709 angle::Result getDescriptorSetLayout(vk::Context *context,
2710 const vk::DescriptorSetLayoutDesc &desc,
2711 vk::DescriptorSetLayoutPtr *descriptorSetLayoutOut);
2712
2713 // Helpers for white box tests
2714 size_t getCacheHitCount() const { return mCacheStats.getHitCount(); }
2715 size_t getCacheMissCount() const { return mCacheStats.getMissCount(); }
2716
2717 private:
2718 mutable angle::SimpleMutex mMutex;
2719 std::unordered_map<vk::DescriptorSetLayoutDesc, vk::DescriptorSetLayoutPtr> mPayload;
2720 CacheStats mCacheStats;
2721 };
2722
2723 class PipelineLayoutCache final : public HasCacheStats<VulkanCacheType::PipelineLayout>
2724 {
2725 public:
2726 PipelineLayoutCache();
2727 ~PipelineLayoutCache() override;
2728
2729 void destroy(vk::Renderer *renderer);
2730
2731 angle::Result getPipelineLayout(vk::Context *context,
2732 const vk::PipelineLayoutDesc &desc,
2733 const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
2734 vk::PipelineLayoutPtr *pipelineLayoutOut);
2735
2736 private:
2737 mutable angle::SimpleMutex mMutex;
2738 std::unordered_map<vk::PipelineLayoutDesc, vk::PipelineLayoutPtr> mPayload;
2739 };
2740
2741 class SamplerCache final : public HasCacheStats<VulkanCacheType::Sampler>
2742 {
2743 public:
2744 SamplerCache();
2745 ~SamplerCache() override;
2746
2747 void destroy(vk::Renderer *renderer);
2748
2749 angle::Result getSampler(ContextVk *contextVk,
2750 const vk::SamplerDesc &desc,
2751 vk::SharedSamplerPtr *samplerOut);
2752
2753 private:
2754 std::unordered_map<vk::SamplerDesc, vk::SharedSamplerPtr> mPayload;
2755 };
2756
2757 // YuvConversion Cache
2758 class SamplerYcbcrConversionCache final
2759 : public HasCacheStats<VulkanCacheType::SamplerYcbcrConversion>
2760 {
2761 public:
2762 SamplerYcbcrConversionCache();
2763 ~SamplerYcbcrConversionCache() override;
2764
2765 void destroy(vk::Renderer *renderer);
2766
2767 angle::Result getSamplerYcbcrConversion(vk::Context *context,
2768 const vk::YcbcrConversionDesc &ycbcrConversionDesc,
2769 VkSamplerYcbcrConversion *vkSamplerYcbcrConversionOut);
2770
2771 private:
2772 using SamplerYcbcrConversionMap =
2773 std::unordered_map<vk::YcbcrConversionDesc, vk::SamplerYcbcrConversion>;
2774 SamplerYcbcrConversionMap mExternalFormatPayload;
2775 SamplerYcbcrConversionMap mVkFormatPayload;
2776 };
2777
2778 // Descriptor Set Cache
2779 template <typename T>
2780 class DescriptorSetCache final : angle::NonCopyable
2781 {
2782 public:
2783 DescriptorSetCache() = default;
2784 ~DescriptorSetCache() { ASSERT(mPayload.empty()); }
2785
2786 DescriptorSetCache(DescriptorSetCache &&other) : DescriptorSetCache()
2787 {
2788 *this = std::move(other);
2789 }
2790
2791 DescriptorSetCache &operator=(DescriptorSetCache &&other)
2792 {
2793 std::swap(mPayload, other.mPayload);
2794 return *this;
2795 }
2796
2797 void clear() { mPayload.clear(); }
2798
2799 bool getDescriptorSet(const vk::DescriptorSetDesc &desc, T *descriptorSetOut)
2800 {
2801 auto iter = mPayload.find(desc);
2802 if (iter != mPayload.end())
2803 {
2804 *descriptorSetOut = iter->second;
2805 return true;
2806 }
2807 return false;
2808 }
2809
2810 void insertDescriptorSet(const vk::DescriptorSetDesc &desc, const T &descriptorSetHelper)
2811 {
2812 mPayload.emplace(desc, descriptorSetHelper);
2813 }
2814
2815 bool eraseDescriptorSet(const vk::DescriptorSetDesc &desc, T *descriptorSetOut)
2816 {
2817 auto iter = mPayload.find(desc);
2818 if (iter != mPayload.end())
2819 {
2820 *descriptorSetOut = std::move(iter->second);
2821 mPayload.erase(iter);
2822 return true;
2823 }
2824 return false;
2825 }
2826
2827 bool eraseDescriptorSet(const vk::DescriptorSetDesc &desc)
2828 {
2829 auto iter = mPayload.find(desc);
2830 if (iter != mPayload.end())
2831 {
2832 mPayload.erase(iter);
2833 return true;
2834 }
2835 return false;
2836 }
2837
2838 size_t getTotalCacheSize() const { return mPayload.size(); }
2839
2840 size_t getTotalCacheKeySizeBytes() const
2841 {
2842 size_t totalSize = 0;
2843 for (const auto &iter : mPayload)
2844 {
2845 const vk::DescriptorSetDesc &desc = iter.first;
2846 totalSize += desc.getKeySizeBytes();
2847 }
2848 return totalSize;
2849 }
2850 bool empty() const { return mPayload.empty(); }
2851
2852 private:
2853 angle::HashMap<vk::DescriptorSetDesc, T> mPayload;
2854 };
2855
2856 // There is 1 default uniform binding used per stage.
2857 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
2858
2859 class UpdateDescriptorSetsBuilder final : angle::NonCopyable
2860 {
2861 public:
2862 UpdateDescriptorSetsBuilder();
2863 ~UpdateDescriptorSetsBuilder();
2864
2865 VkDescriptorBufferInfo *allocDescriptorBufferInfos(size_t count);
2866 VkDescriptorImageInfo *allocDescriptorImageInfos(size_t count);
2867 VkWriteDescriptorSet *allocWriteDescriptorSets(size_t count);
2868 VkBufferView *allocBufferViews(size_t count);
2869
2870 VkDescriptorBufferInfo &allocDescriptorBufferInfo() { return *allocDescriptorBufferInfos(1); }
2871 VkDescriptorImageInfo &allocDescriptorImageInfo() { return *allocDescriptorImageInfos(1); }
2872 VkWriteDescriptorSet &allocWriteDescriptorSet() { return *allocWriteDescriptorSets(1); }
2873 VkBufferView &allocBufferView() { return *allocBufferViews(1); }
2874
2875 // Returns the number of written descriptor sets.
2876 uint32_t flushDescriptorSetUpdates(VkDevice device);
2877
2878 private:
2879 template <typename T, const T *VkWriteDescriptorSet::*pInfo>
2880 T *allocDescriptorInfos(std::vector<T> *descriptorVector, size_t count);
2881 template <typename T, const T *VkWriteDescriptorSet::*pInfo>
2882 void growDescriptorCapacity(std::vector<T> *descriptorVector, size_t newSize);
2883
2884 std::vector<VkDescriptorBufferInfo> mDescriptorBufferInfos;
2885 std::vector<VkDescriptorImageInfo> mDescriptorImageInfos;
2886 std::vector<VkWriteDescriptorSet> mWriteDescriptorSets;
2887 std::vector<VkBufferView> mBufferViews;
2888 };
2889
2890 } // namespace rx
2891
2892 #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
2893