1 // 2 // Copyright 2024 The ANGLE Project Authors. All rights reserved. 3 // Use of this source code is governed by a BSD-style license that can be 4 // found in the LICENSE file. 5 // 6 // RefCountedEvent: 7 // Manages reference count of VkEvent and its associated functions. 8 // 9 10 #ifndef LIBANGLE_RENDERER_VULKAN_REFCOUNTED_EVENT_H_ 11 #define LIBANGLE_RENDERER_VULKAN_REFCOUNTED_EVENT_H_ 12 13 #include <atomic> 14 #include <limits> 15 #include <queue> 16 17 #include "common/PackedEnums.h" 18 #include "common/SimpleMutex.h" 19 #include "common/debug.h" 20 #include "libANGLE/renderer/serial_utils.h" 21 #include "libANGLE/renderer/vulkan/vk_resource.h" 22 #include "libANGLE/renderer/vulkan/vk_utils.h" 23 #include "libANGLE/renderer/vulkan/vk_wrapper.h" 24 25 namespace rx 26 { 27 namespace vk 28 { 29 enum class ImageLayout; 30 31 // There are two ways to implement a barrier: Using VkCmdPipelineBarrier or VkCmdWaitEvents. The 32 // BarrierType enum will be passed around to indicate which barrier caller want to use. 33 enum class BarrierType 34 { 35 Pipeline, 36 Event, 37 }; 38 39 constexpr VkPipelineStageFlags kPreFragmentStageFlags = 40 VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | 41 VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; 42 43 constexpr VkPipelineStageFlags kAllShadersPipelineStageFlags = 44 kPreFragmentStageFlags | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | 45 VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; 46 47 constexpr VkPipelineStageFlags kAllDepthStencilPipelineStageFlags = 48 VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; 49 50 constexpr VkPipelineStageFlags kFragmentAndAttachmentPipelineStageFlags = 51 VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | 52 VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; 53 54 // We group VK_PIPELINE_STAGE_*_BITs into different groups. The expectation is that execution within 55 // Fragment/PreFragment/Compute will not overlap. This information is used to optimize the usage of 56 // VkEvent where we try to not use it when we know that it will not provide benefits over 57 // pipelineBarriers. 58 enum class PipelineStageGroup : uint8_t 59 { 60 Other, 61 PreFragmentOnly, 62 FragmentOnly, 63 ComputeOnly, 64 65 InvalidEnum, 66 EnumCount = InvalidEnum, 67 }; 68 69 class PipelineStageAccessHeuristic final 70 { 71 public: 72 constexpr PipelineStageAccessHeuristic() = default; PipelineStageAccessHeuristic(PipelineStageGroup pipelineStageGroup)73 constexpr PipelineStageAccessHeuristic(PipelineStageGroup pipelineStageGroup) 74 { 75 for (size_t i = 0; i < kHeuristicWindowSize; i++) 76 { 77 mHeuristicBits <<= kPipelineStageGroupBitShift; 78 mHeuristicBits |= ToUnderlying(pipelineStageGroup); 79 } 80 } onAccess(PipelineStageGroup pipelineStageGroup)81 void onAccess(PipelineStageGroup pipelineStageGroup) 82 { 83 mHeuristicBits <<= kPipelineStageGroupBitShift; 84 mHeuristicBits |= ToUnderlying(pipelineStageGroup); 85 } 86 constexpr bool operator==(const PipelineStageAccessHeuristic &other) const 87 { 88 return mHeuristicBits == other.mHeuristicBits; 89 } 90 91 private: 92 static constexpr size_t kPipelineStageGroupBitShift = 2; 93 static_assert(ToUnderlying(PipelineStageGroup::EnumCount) <= 94 (1 << kPipelineStageGroupBitShift)); 95 static constexpr size_t kHeuristicWindowSize = 8; 96 angle::BitSet16<kHeuristicWindowSize * kPipelineStageGroupBitShift> mHeuristicBits; 97 }; 98 static constexpr PipelineStageAccessHeuristic kPipelineStageAccessFragmentOnly = 99 PipelineStageAccessHeuristic(PipelineStageGroup::FragmentOnly); 100 static constexpr PipelineStageAccessHeuristic kPipelineStageAccessComputeOnly = 101 PipelineStageAccessHeuristic(PipelineStageGroup::ComputeOnly); 102 static constexpr PipelineStageAccessHeuristic kPipelineStageAccessPreFragmentOnly = 103 PipelineStageAccessHeuristic(PipelineStageGroup::PreFragmentOnly); 104 105 // Enum for predefined VkPipelineStageFlags set that VkEvent will be using. Because VkEvent has 106 // strict rules that waitEvent and setEvent must have matching VkPipelineStageFlags, it is desirable 107 // to keep VkEvent per VkPipelineStageFlags combination. This enum table enumerates all possible 108 // pipeline stage combinations that VkEvent used with. The enum maps to VkPipelineStageFlags via 109 // Renderer::getPipelineStageMask call. 110 enum class EventStage : uint32_t 111 { 112 Transfer = 0, 113 VertexShader = 1, 114 FragmentShader = 2, 115 ComputeShader = 3, 116 AllShaders = 4, 117 PreFragmentShaders = 5, 118 FragmentShadingRate = 6, 119 ColorAttachmentOutput = 7, 120 ColorAttachmentOutputAndFragmentShader = 8, 121 ColorAttachmentOutputAndFragmentShaderAndTransfer = 9, 122 ColorAttachmentOutputAndAllShaders = 10, 123 AllFragmentTest = 11, 124 AllFragmentTestAndFragmentShader = 12, 125 AllFragmentTestAndAllShaders = 13, 126 TransferAndComputeShader = 14, 127 InvalidEnum = 15, 128 EnumCount = InvalidEnum, 129 }; 130 131 // Initialize EventStage to VkPipelineStageFlags mapping table. 132 void InitializeEventAndPipelineStagesMap( 133 angle::PackedEnumMap<EventStage, VkPipelineStageFlags> *mapping, 134 VkPipelineStageFlags supportedVulkanPipelineStageMask); 135 136 // VkCmdWaitEvents requires srcStageMask must be the bitwise OR of the stageMask parameter used in 137 // previous calls to vkCmdSetEvent (See VUID-vkCmdWaitEvents-srcStageMask-01158). This mean we must 138 // keep the record of what stageMask each event has been used in VkCmdSetEvent call so that we can 139 // retrieve that information when we need to wait for the event. Instead of keeping just stageMask 140 // here, we keep the ImageLayout for now which gives us more information for debugging. 141 struct EventAndStage 142 { validEventAndStage143 bool valid() const { return event.valid(); } 144 Event event; 145 EventStage eventStage; 146 }; 147 148 // The VkCmdSetEvent is called after VkCmdEndRenderPass and all images that used at the given 149 // pipeline stage (i.e, they have the same stageMask) will be tracked by the same event. This means 150 // there will be multiple objects pointing to the same event. Events are thus reference counted so 151 // that we do not destroy it while other objects still referencing to it. 152 class RefCountedEvent final 153 { 154 public: RefCountedEvent()155 RefCountedEvent() { mHandle = nullptr; } ~RefCountedEvent()156 ~RefCountedEvent() { ASSERT(mHandle == nullptr); } 157 158 // Move constructor moves reference of the underline object from other to this. RefCountedEvent(RefCountedEvent && other)159 RefCountedEvent(RefCountedEvent &&other) 160 { 161 mHandle = other.mHandle; 162 other.mHandle = nullptr; 163 } 164 165 // Copy constructor adds reference to the underline object. RefCountedEvent(const RefCountedEvent & other)166 RefCountedEvent(const RefCountedEvent &other) 167 { 168 ASSERT(other.valid()); 169 mHandle = other.mHandle; 170 mHandle->addRef(); 171 } 172 173 // Move assignment moves reference of the underline object from other to this. 174 RefCountedEvent &operator=(RefCountedEvent &&other) 175 { 176 ASSERT(!valid()); 177 ASSERT(other.valid()); 178 std::swap(mHandle, other.mHandle); 179 return *this; 180 } 181 182 // Copy assignment adds reference to the underline object. 183 RefCountedEvent &operator=(const RefCountedEvent &other) 184 { 185 ASSERT(!valid()); 186 ASSERT(other.valid()); 187 mHandle = other.mHandle; 188 mHandle->addRef(); 189 return *this; 190 } 191 192 // Returns true if both points to the same underline object. 193 bool operator==(const RefCountedEvent &other) const { return mHandle == other.mHandle; } 194 195 // Create VkEvent and associated it with given layout. Returns true if success and false if 196 // failed. 197 bool init(Context *context, EventStage eventStage); 198 199 // Release one reference count to the underline Event object and destroy or recycle the handle 200 // to renderer's recycler if this is the very last reference. 201 void release(Renderer *renderer); 202 203 // Release one reference count to the underline Event object and destroy or recycle the handle 204 // to the context share group's recycler if this is the very last reference. 205 void release(Context *context); 206 207 // Destroy the event and mHandle. Caller must ensure there is no outstanding reference to the 208 // mHandle. 209 void destroy(VkDevice device); 210 valid()211 bool valid() const { return mHandle != nullptr; } 212 213 // Only intended for assertion in recycler validAndNoReference()214 bool validAndNoReference() const { return mHandle != nullptr && !mHandle->isReferenced(); } 215 216 // Returns the underlying Event object getEvent()217 const Event &getEvent() const 218 { 219 ASSERT(valid()); 220 return mHandle->get().event; 221 } 222 getEventStage()223 EventStage getEventStage() const 224 { 225 ASSERT(mHandle != nullptr); 226 return mHandle->get().eventStage; 227 } 228 229 private: 230 // Release one reference count to the underline Event object and destroy or recycle the handle 231 // to the provided recycler if this is the very last reference. 232 friend class RefCountedEventsGarbage; 233 template <typename RecyclerT> 234 void releaseImpl(Renderer *renderer, RecyclerT *recycler); 235 236 RefCounted<EventAndStage> *mHandle; 237 }; 238 using RefCountedEventCollector = std::deque<RefCountedEvent>; 239 240 // Tracks a list of RefCountedEvents per EventStage. 241 struct EventMaps 242 { 243 angle::PackedEnumMap<EventStage, RefCountedEvent> map; 244 // The mask is used to accelerate the loop of map 245 angle::PackedEnumBitSet<EventStage, uint64_t> mask; 246 // Only used by RenderPassCommandBufferHelper 247 angle::PackedEnumMap<EventStage, VkEvent> vkEvents; 248 }; 249 250 // This class tracks a vector of RefcountedEvent garbage. For performance reason, instead of 251 // individually tracking each VkEvent garbage, we collect all events that are accessed in the 252 // CommandBufferHelper into this class. After we submit the command buffer, we treat this vector of 253 // events as one garbage object and add it to renderer's garbage list. The garbage clean up will 254 // decrement the refCount and destroy event only when last refCount goes away. Basically all GPU 255 // usage will use one refCount and that refCount ensures we never destroy event until GPU is 256 // finished. 257 class RefCountedEventsGarbage final 258 { 259 public: 260 RefCountedEventsGarbage() = default; ~RefCountedEventsGarbage()261 ~RefCountedEventsGarbage() { ASSERT(mRefCountedEvents.empty()); } 262 RefCountedEventsGarbage(const QueueSerial & queueSerial,RefCountedEventCollector && refCountedEvents)263 RefCountedEventsGarbage(const QueueSerial &queueSerial, 264 RefCountedEventCollector &&refCountedEvents) 265 : mQueueSerial(queueSerial), mRefCountedEvents(std::move(refCountedEvents)) 266 { 267 ASSERT(!mRefCountedEvents.empty()); 268 } 269 270 void destroy(Renderer *renderer); 271 272 // Check the queue serial and release the events to recycler if GPU finished. 273 bool releaseIfComplete(Renderer *renderer, RefCountedEventsGarbageRecycler *recycler); 274 275 // Check the queue serial and move all events to releasedBucket if GPU finished. This is only 276 // used by RefCountedEventRecycler. 277 bool moveIfComplete(Renderer *renderer, std::deque<RefCountedEventCollector> *releasedBucket); 278 empty()279 bool empty() const { return mRefCountedEvents.empty(); } 280 size()281 size_t size() const { return mRefCountedEvents.size(); } 282 283 private: 284 QueueSerial mQueueSerial; 285 RefCountedEventCollector mRefCountedEvents; 286 }; 287 288 // Two levels of RefCountedEvents recycle system: For the performance reason, we have two levels of 289 // events recycler system. The first level is per ShareGroupVk, which owns RefCountedEventRecycler. 290 // RefCountedEvent garbage is added to it without any lock. Once GPU complete, the refCount is 291 // decremented. When the last refCount goes away, it goes into mEventsToReset. Note that since 292 // ShareGoupVk access is already protected by context share lock at the API level, so no lock is 293 // taken and reference counting is not atomic. At RefCountedEventsGarbageRecycler::cleanup time, the 294 // entire mEventsToReset is added into renderer's list. The renderer owns RefCountedEventRecycler 295 // list, and all access to it is protected with simple mutex lock. When any context calls 296 // OutsideRenderPassCommandBufferHelper::flushToPrimary, mEventsToReset is retrieved from renderer 297 // and the reset commands is added to the command buffer. The events are then moved to the 298 // renderer's garbage list. They are checked and along with renderer's garbage cleanup and if 299 // completed, they get moved to renderer's mEventsToReuse list. When a RefCountedEvent is needed, we 300 // always dip into ShareGroupVk's mEventsToReuse list. If its empty, it then dip into renderer's 301 // mEventsToReuse and grab a collector of events and try to reuse. That way the traffic into 302 // renderer is minimized as most of calls will be contained in SHareGroupVk. 303 304 // Thread safe event recycler, protected by its own lock. 305 class RefCountedEventRecycler final 306 { 307 public: RefCountedEventRecycler()308 RefCountedEventRecycler() {} ~RefCountedEventRecycler()309 ~RefCountedEventRecycler() 310 { 311 ASSERT(mEventsToReset.empty()); 312 ASSERT(mResettingQueue.empty()); 313 ASSERT(mEventsToReuse.empty()); 314 } 315 316 void destroy(VkDevice device); 317 318 // Add single event to the toReset list recycle(RefCountedEvent && garbageObject)319 void recycle(RefCountedEvent &&garbageObject) 320 { 321 ASSERT(garbageObject.validAndNoReference()); 322 std::lock_guard<angle::SimpleMutex> lock(mMutex); 323 if (mEventsToReset.empty()) 324 { 325 mEventsToReset.emplace_back(); 326 } 327 mEventsToReset.back().emplace_back(std::move(garbageObject)); 328 } 329 330 // Add a list of events to the toReset list recycle(RefCountedEventCollector && garbageObjects)331 void recycle(RefCountedEventCollector &&garbageObjects) 332 { 333 ASSERT(!garbageObjects.empty()); 334 for (const RefCountedEvent &event : garbageObjects) 335 { 336 ASSERT(event.validAndNoReference()); 337 } 338 std::lock_guard<angle::SimpleMutex> lock(mMutex); 339 mEventsToReset.emplace_back(std::move(garbageObjects)); 340 } 341 342 // Reset all events in the toReset list and move them to the toReuse list 343 void resetEvents(Context *context, 344 const QueueSerial queueSerial, 345 PrimaryCommandBuffer *commandbuffer); 346 347 // Clean up the resetting event list and move completed events to the toReuse list. 348 // Number of events released is returned. 349 size_t cleanupResettingEvents(Renderer *renderer); 350 351 // Fetch a list of events that are ready to be reused. Returns true if eventsToReuseOut is 352 // returned. 353 bool fetchEventsToReuse(RefCountedEventCollector *eventsToReuseOut); 354 355 private: 356 angle::SimpleMutex mMutex; 357 // RefCountedEvent list that has been released, needs to be reset. 358 std::deque<RefCountedEventCollector> mEventsToReset; 359 // RefCountedEvent list that is currently resetting. 360 std::queue<RefCountedEventsGarbage> mResettingQueue; 361 // RefCountedEvent list that already has been reset. Ready to be reused. 362 std::deque<RefCountedEventCollector> mEventsToReuse; 363 }; 364 365 // Not thread safe event garbage collection and recycler. Caller must ensure the thread safety. It 366 // is intended to use by ShareGroupVk which all access should already protected by share context 367 // lock. 368 class RefCountedEventsGarbageRecycler final 369 { 370 public: RefCountedEventsGarbageRecycler()371 RefCountedEventsGarbageRecycler() : mGarbageCount(0) {} 372 ~RefCountedEventsGarbageRecycler(); 373 374 // Release all garbage and free events. 375 void destroy(Renderer *renderer); 376 377 // Walk the garbage list and move completed garbage to free list 378 void cleanup(Renderer *renderer); 379 collectGarbage(const QueueSerial & queueSerial,RefCountedEventCollector && refCountedEvents)380 void collectGarbage(const QueueSerial &queueSerial, RefCountedEventCollector &&refCountedEvents) 381 { 382 mGarbageCount += refCountedEvents.size(); 383 mGarbageQueue.emplace(queueSerial, std::move(refCountedEvents)); 384 } 385 recycle(RefCountedEvent && garbageObject)386 void recycle(RefCountedEvent &&garbageObject) 387 { 388 ASSERT(garbageObject.validAndNoReference()); 389 mEventsToReset.emplace_back(std::move(garbageObject)); 390 } 391 392 bool fetch(Renderer *renderer, RefCountedEvent *outObject); 393 getGarbageCount()394 size_t getGarbageCount() const { return mGarbageCount; } 395 396 private: 397 RefCountedEventCollector mEventsToReset; 398 std::queue<RefCountedEventsGarbage> mGarbageQueue; 399 Recycler<RefCountedEvent> mEventsToReuse; 400 size_t mGarbageCount; 401 }; 402 403 // This wraps data and API for vkCmdWaitEvent call 404 class EventBarrier : angle::NonCopyable 405 { 406 public: EventBarrier()407 EventBarrier() 408 : mSrcStageMask(0), 409 mDstStageMask(0), 410 mMemoryBarrierSrcAccess(0), 411 mMemoryBarrierDstAccess(0), 412 mImageMemoryBarrierCount(0), 413 mEvent(VK_NULL_HANDLE) 414 {} 415 EventBarrier(VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,VkAccessFlags srcAccess,VkAccessFlags dstAccess,const VkEvent & event)416 EventBarrier(VkPipelineStageFlags srcStageMask, 417 VkPipelineStageFlags dstStageMask, 418 VkAccessFlags srcAccess, 419 VkAccessFlags dstAccess, 420 const VkEvent &event) 421 : mSrcStageMask(srcStageMask), 422 mDstStageMask(dstStageMask), 423 mMemoryBarrierSrcAccess(srcAccess), 424 mMemoryBarrierDstAccess(dstAccess), 425 mImageMemoryBarrierCount(0), 426 mEvent(event) 427 { 428 ASSERT(mEvent != VK_NULL_HANDLE); 429 } 430 EventBarrier(VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,const VkEvent & event,const VkImageMemoryBarrier & imageMemoryBarrier)431 EventBarrier(VkPipelineStageFlags srcStageMask, 432 VkPipelineStageFlags dstStageMask, 433 const VkEvent &event, 434 const VkImageMemoryBarrier &imageMemoryBarrier) 435 : mSrcStageMask(srcStageMask), 436 mDstStageMask(dstStageMask), 437 mMemoryBarrierSrcAccess(0), 438 mMemoryBarrierDstAccess(0), 439 mImageMemoryBarrierCount(1), 440 mEvent(event), 441 mImageMemoryBarrier(imageMemoryBarrier) 442 { 443 ASSERT(mEvent != VK_NULL_HANDLE); 444 ASSERT(mImageMemoryBarrier.image != VK_NULL_HANDLE); 445 ASSERT(mImageMemoryBarrier.pNext == nullptr); 446 } 447 EventBarrier(EventBarrier && other)448 EventBarrier(EventBarrier &&other) 449 { 450 mSrcStageMask = other.mSrcStageMask; 451 mDstStageMask = other.mDstStageMask; 452 mMemoryBarrierSrcAccess = other.mMemoryBarrierSrcAccess; 453 mMemoryBarrierDstAccess = other.mMemoryBarrierDstAccess; 454 mImageMemoryBarrierCount = other.mImageMemoryBarrierCount; 455 std::swap(mEvent, other.mEvent); 456 std::swap(mImageMemoryBarrier, other.mImageMemoryBarrier); 457 other.mSrcStageMask = 0; 458 other.mDstStageMask = 0; 459 other.mMemoryBarrierSrcAccess = 0; 460 other.mMemoryBarrierDstAccess = 0; 461 other.mImageMemoryBarrierCount = 0; 462 } 463 ~EventBarrier()464 ~EventBarrier() {} 465 isEmpty()466 bool isEmpty() const { return mEvent == VK_NULL_HANDLE; } 467 hasEvent(const VkEvent & event)468 bool hasEvent(const VkEvent &event) const { return mEvent == event; } 469 addAdditionalStageAccess(VkPipelineStageFlags dstStageMask,VkAccessFlags dstAccess)470 void addAdditionalStageAccess(VkPipelineStageFlags dstStageMask, VkAccessFlags dstAccess) 471 { 472 mDstStageMask |= dstStageMask; 473 mMemoryBarrierDstAccess |= dstAccess; 474 } 475 476 void execute(PrimaryCommandBuffer *primary); 477 478 void addDiagnosticsString(std::ostringstream &out) const; 479 480 private: 481 friend class EventBarrierArray; 482 VkPipelineStageFlags mSrcStageMask; 483 VkPipelineStageFlags mDstStageMask; 484 VkAccessFlags mMemoryBarrierSrcAccess; 485 VkAccessFlags mMemoryBarrierDstAccess; 486 uint32_t mImageMemoryBarrierCount; 487 VkEvent mEvent; 488 VkImageMemoryBarrier mImageMemoryBarrier; 489 }; 490 491 class EventBarrierArray final 492 { 493 public: isEmpty()494 bool isEmpty() const { return mBarriers.empty(); } 495 496 void execute(Renderer *renderer, PrimaryCommandBuffer *primary); 497 498 // Add the additional stageMask to the existing waitEvent. 499 void addAdditionalStageAccess(const RefCountedEvent &waitEvent, 500 VkPipelineStageFlags dstStageMask, 501 VkAccessFlags dstAccess); 502 503 void addMemoryEvent(Renderer *renderer, 504 const RefCountedEvent &waitEvent, 505 VkPipelineStageFlags dstStageMask, 506 VkAccessFlags dstAccess); 507 508 void addImageEvent(Renderer *renderer, 509 const RefCountedEvent &waitEvent, 510 VkPipelineStageFlags dstStageMask, 511 const VkImageMemoryBarrier &imageMemoryBarrier); 512 reset()513 void reset() { ASSERT(mBarriers.empty()); } 514 515 void addDiagnosticsString(std::ostringstream &out) const; 516 517 private: 518 std::deque<EventBarrier> mBarriers; 519 }; 520 } // namespace vk 521 } // namespace rx 522 #endif // LIBANGLE_RENDERER_VULKAN_REFCOUNTED_EVENT_H_ 523