xref: /aosp_15_r20/external/angle/src/libANGLE/renderer/vulkan/vk_ref_counted_event.h (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright 2024 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // RefCountedEvent:
7 //    Manages reference count of VkEvent and its associated functions.
8 //
9 
10 #ifndef LIBANGLE_RENDERER_VULKAN_REFCOUNTED_EVENT_H_
11 #define LIBANGLE_RENDERER_VULKAN_REFCOUNTED_EVENT_H_
12 
13 #include <atomic>
14 #include <limits>
15 #include <queue>
16 
17 #include "common/PackedEnums.h"
18 #include "common/SimpleMutex.h"
19 #include "common/debug.h"
20 #include "libANGLE/renderer/serial_utils.h"
21 #include "libANGLE/renderer/vulkan/vk_resource.h"
22 #include "libANGLE/renderer/vulkan/vk_utils.h"
23 #include "libANGLE/renderer/vulkan/vk_wrapper.h"
24 
25 namespace rx
26 {
27 namespace vk
28 {
29 enum class ImageLayout;
30 
31 // There are two ways to implement a barrier: Using VkCmdPipelineBarrier or VkCmdWaitEvents. The
32 // BarrierType enum will be passed around to indicate which barrier caller want to use.
33 enum class BarrierType
34 {
35     Pipeline,
36     Event,
37 };
38 
39 constexpr VkPipelineStageFlags kPreFragmentStageFlags =
40     VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
41     VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT;
42 
43 constexpr VkPipelineStageFlags kAllShadersPipelineStageFlags =
44     kPreFragmentStageFlags | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
45     VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
46 
47 constexpr VkPipelineStageFlags kAllDepthStencilPipelineStageFlags =
48     VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
49 
50 constexpr VkPipelineStageFlags kFragmentAndAttachmentPipelineStageFlags =
51     VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
52     VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
53 
54 // We group VK_PIPELINE_STAGE_*_BITs into different groups. The expectation is that execution within
55 // Fragment/PreFragment/Compute will not overlap. This information is used to optimize the usage of
56 // VkEvent where we try to not use it when we know that it will not provide benefits over
57 // pipelineBarriers.
58 enum class PipelineStageGroup : uint8_t
59 {
60     Other,
61     PreFragmentOnly,
62     FragmentOnly,
63     ComputeOnly,
64 
65     InvalidEnum,
66     EnumCount = InvalidEnum,
67 };
68 
69 class PipelineStageAccessHeuristic final
70 {
71   public:
72     constexpr PipelineStageAccessHeuristic() = default;
PipelineStageAccessHeuristic(PipelineStageGroup pipelineStageGroup)73     constexpr PipelineStageAccessHeuristic(PipelineStageGroup pipelineStageGroup)
74     {
75         for (size_t i = 0; i < kHeuristicWindowSize; i++)
76         {
77             mHeuristicBits <<= kPipelineStageGroupBitShift;
78             mHeuristicBits |= ToUnderlying(pipelineStageGroup);
79         }
80     }
onAccess(PipelineStageGroup pipelineStageGroup)81     void onAccess(PipelineStageGroup pipelineStageGroup)
82     {
83         mHeuristicBits <<= kPipelineStageGroupBitShift;
84         mHeuristicBits |= ToUnderlying(pipelineStageGroup);
85     }
86     constexpr bool operator==(const PipelineStageAccessHeuristic &other) const
87     {
88         return mHeuristicBits == other.mHeuristicBits;
89     }
90 
91   private:
92     static constexpr size_t kPipelineStageGroupBitShift = 2;
93     static_assert(ToUnderlying(PipelineStageGroup::EnumCount) <=
94                   (1 << kPipelineStageGroupBitShift));
95     static constexpr size_t kHeuristicWindowSize = 8;
96     angle::BitSet16<kHeuristicWindowSize * kPipelineStageGroupBitShift> mHeuristicBits;
97 };
98 static constexpr PipelineStageAccessHeuristic kPipelineStageAccessFragmentOnly =
99     PipelineStageAccessHeuristic(PipelineStageGroup::FragmentOnly);
100 static constexpr PipelineStageAccessHeuristic kPipelineStageAccessComputeOnly =
101     PipelineStageAccessHeuristic(PipelineStageGroup::ComputeOnly);
102 static constexpr PipelineStageAccessHeuristic kPipelineStageAccessPreFragmentOnly =
103     PipelineStageAccessHeuristic(PipelineStageGroup::PreFragmentOnly);
104 
105 // Enum for predefined VkPipelineStageFlags set that VkEvent will be using. Because VkEvent has
106 // strict rules that waitEvent and setEvent must have matching VkPipelineStageFlags, it is desirable
107 // to keep VkEvent per VkPipelineStageFlags combination. This enum table enumerates all possible
108 // pipeline stage combinations that VkEvent used with. The enum maps to VkPipelineStageFlags via
109 // Renderer::getPipelineStageMask call.
110 enum class EventStage : uint32_t
111 {
112     Transfer                                          = 0,
113     VertexShader                                      = 1,
114     FragmentShader                                    = 2,
115     ComputeShader                                     = 3,
116     AllShaders                                        = 4,
117     PreFragmentShaders                                = 5,
118     FragmentShadingRate                               = 6,
119     ColorAttachmentOutput                             = 7,
120     ColorAttachmentOutputAndFragmentShader            = 8,
121     ColorAttachmentOutputAndFragmentShaderAndTransfer = 9,
122     ColorAttachmentOutputAndAllShaders                = 10,
123     AllFragmentTest                                   = 11,
124     AllFragmentTestAndFragmentShader                  = 12,
125     AllFragmentTestAndAllShaders                      = 13,
126     TransferAndComputeShader                          = 14,
127     InvalidEnum                                       = 15,
128     EnumCount                                         = InvalidEnum,
129 };
130 
131 // Initialize EventStage to VkPipelineStageFlags mapping table.
132 void InitializeEventAndPipelineStagesMap(
133     angle::PackedEnumMap<EventStage, VkPipelineStageFlags> *mapping,
134     VkPipelineStageFlags supportedVulkanPipelineStageMask);
135 
136 // VkCmdWaitEvents requires srcStageMask must be the bitwise OR of the stageMask parameter used in
137 // previous calls to vkCmdSetEvent (See VUID-vkCmdWaitEvents-srcStageMask-01158). This mean we must
138 // keep the record of what stageMask each event has been used in VkCmdSetEvent call so that we can
139 // retrieve that information when we need to wait for the event. Instead of keeping just stageMask
140 // here, we keep the ImageLayout for now which gives us more information for debugging.
141 struct EventAndStage
142 {
validEventAndStage143     bool valid() const { return event.valid(); }
144     Event event;
145     EventStage eventStage;
146 };
147 
148 // The VkCmdSetEvent is called after VkCmdEndRenderPass and all images that used at the given
149 // pipeline stage (i.e, they have the same stageMask) will be tracked by the same event. This means
150 // there will be multiple objects pointing to the same event. Events are thus reference counted so
151 // that we do not destroy it while other objects still referencing to it.
152 class RefCountedEvent final
153 {
154   public:
RefCountedEvent()155     RefCountedEvent() { mHandle = nullptr; }
~RefCountedEvent()156     ~RefCountedEvent() { ASSERT(mHandle == nullptr); }
157 
158     // Move constructor moves reference of the underline object from other to this.
RefCountedEvent(RefCountedEvent && other)159     RefCountedEvent(RefCountedEvent &&other)
160     {
161         mHandle       = other.mHandle;
162         other.mHandle = nullptr;
163     }
164 
165     // Copy constructor adds reference to the underline object.
RefCountedEvent(const RefCountedEvent & other)166     RefCountedEvent(const RefCountedEvent &other)
167     {
168         ASSERT(other.valid());
169         mHandle = other.mHandle;
170         mHandle->addRef();
171     }
172 
173     // Move assignment moves reference of the underline object from other to this.
174     RefCountedEvent &operator=(RefCountedEvent &&other)
175     {
176         ASSERT(!valid());
177         ASSERT(other.valid());
178         std::swap(mHandle, other.mHandle);
179         return *this;
180     }
181 
182     // Copy assignment adds reference to the underline object.
183     RefCountedEvent &operator=(const RefCountedEvent &other)
184     {
185         ASSERT(!valid());
186         ASSERT(other.valid());
187         mHandle = other.mHandle;
188         mHandle->addRef();
189         return *this;
190     }
191 
192     // Returns true if both points to the same underline object.
193     bool operator==(const RefCountedEvent &other) const { return mHandle == other.mHandle; }
194 
195     // Create VkEvent and associated it with given layout. Returns true if success and false if
196     // failed.
197     bool init(Context *context, EventStage eventStage);
198 
199     // Release one reference count to the underline Event object and destroy or recycle the handle
200     // to renderer's recycler if this is the very last reference.
201     void release(Renderer *renderer);
202 
203     // Release one reference count to the underline Event object and destroy or recycle the handle
204     // to the context share group's recycler if this is the very last reference.
205     void release(Context *context);
206 
207     // Destroy the event and mHandle. Caller must ensure there is no outstanding reference to the
208     // mHandle.
209     void destroy(VkDevice device);
210 
valid()211     bool valid() const { return mHandle != nullptr; }
212 
213     // Only intended for assertion in recycler
validAndNoReference()214     bool validAndNoReference() const { return mHandle != nullptr && !mHandle->isReferenced(); }
215 
216     // Returns the underlying Event object
getEvent()217     const Event &getEvent() const
218     {
219         ASSERT(valid());
220         return mHandle->get().event;
221     }
222 
getEventStage()223     EventStage getEventStage() const
224     {
225         ASSERT(mHandle != nullptr);
226         return mHandle->get().eventStage;
227     }
228 
229   private:
230     // Release one reference count to the underline Event object and destroy or recycle the handle
231     // to the provided recycler if this is the very last reference.
232     friend class RefCountedEventsGarbage;
233     template <typename RecyclerT>
234     void releaseImpl(Renderer *renderer, RecyclerT *recycler);
235 
236     RefCounted<EventAndStage> *mHandle;
237 };
238 using RefCountedEventCollector = std::deque<RefCountedEvent>;
239 
240 // Tracks a list of RefCountedEvents per EventStage.
241 struct EventMaps
242 {
243     angle::PackedEnumMap<EventStage, RefCountedEvent> map;
244     // The mask is used to accelerate the loop of map
245     angle::PackedEnumBitSet<EventStage, uint64_t> mask;
246     // Only used by RenderPassCommandBufferHelper
247     angle::PackedEnumMap<EventStage, VkEvent> vkEvents;
248 };
249 
250 // This class tracks a vector of RefcountedEvent garbage. For performance reason, instead of
251 // individually tracking each VkEvent garbage, we collect all events that are accessed in the
252 // CommandBufferHelper into this class. After we submit the command buffer, we treat this vector of
253 // events as one garbage object and add it to renderer's garbage list. The garbage clean up will
254 // decrement the refCount and destroy event only when last refCount goes away. Basically all GPU
255 // usage will use one refCount and that refCount ensures we never destroy event until GPU is
256 // finished.
257 class RefCountedEventsGarbage final
258 {
259   public:
260     RefCountedEventsGarbage() = default;
~RefCountedEventsGarbage()261     ~RefCountedEventsGarbage() { ASSERT(mRefCountedEvents.empty()); }
262 
RefCountedEventsGarbage(const QueueSerial & queueSerial,RefCountedEventCollector && refCountedEvents)263     RefCountedEventsGarbage(const QueueSerial &queueSerial,
264                             RefCountedEventCollector &&refCountedEvents)
265         : mQueueSerial(queueSerial), mRefCountedEvents(std::move(refCountedEvents))
266     {
267         ASSERT(!mRefCountedEvents.empty());
268     }
269 
270     void destroy(Renderer *renderer);
271 
272     // Check the queue serial and release the events to recycler if GPU finished.
273     bool releaseIfComplete(Renderer *renderer, RefCountedEventsGarbageRecycler *recycler);
274 
275     // Check the queue serial and move all events to releasedBucket if GPU finished. This is only
276     // used by RefCountedEventRecycler.
277     bool moveIfComplete(Renderer *renderer, std::deque<RefCountedEventCollector> *releasedBucket);
278 
empty()279     bool empty() const { return mRefCountedEvents.empty(); }
280 
size()281     size_t size() const { return mRefCountedEvents.size(); }
282 
283   private:
284     QueueSerial mQueueSerial;
285     RefCountedEventCollector mRefCountedEvents;
286 };
287 
288 // Two levels of RefCountedEvents recycle system: For the performance reason, we have two levels of
289 // events recycler system. The first level is per ShareGroupVk, which owns RefCountedEventRecycler.
290 // RefCountedEvent garbage is added to it without any lock. Once GPU complete, the refCount is
291 // decremented. When the last refCount goes away, it goes into mEventsToReset. Note that since
292 // ShareGoupVk access is already protected by context share lock at the API level, so no lock is
293 // taken and reference counting is not atomic. At RefCountedEventsGarbageRecycler::cleanup time, the
294 // entire mEventsToReset is added into renderer's list. The renderer owns RefCountedEventRecycler
295 // list, and all access to it is protected with simple mutex lock. When any context calls
296 // OutsideRenderPassCommandBufferHelper::flushToPrimary, mEventsToReset is retrieved from renderer
297 // and the reset commands is added to the command buffer. The events are then moved to the
298 // renderer's garbage list. They are checked and along with renderer's garbage cleanup and if
299 // completed, they get moved to renderer's mEventsToReuse list. When a RefCountedEvent is needed, we
300 // always dip into ShareGroupVk's mEventsToReuse list. If its empty, it then dip into renderer's
301 // mEventsToReuse and grab a collector of events and try to reuse. That way the traffic into
302 // renderer is minimized as most of calls will be contained in SHareGroupVk.
303 
304 // Thread safe event recycler, protected by its own lock.
305 class RefCountedEventRecycler final
306 {
307   public:
RefCountedEventRecycler()308     RefCountedEventRecycler() {}
~RefCountedEventRecycler()309     ~RefCountedEventRecycler()
310     {
311         ASSERT(mEventsToReset.empty());
312         ASSERT(mResettingQueue.empty());
313         ASSERT(mEventsToReuse.empty());
314     }
315 
316     void destroy(VkDevice device);
317 
318     // Add single event to the toReset list
recycle(RefCountedEvent && garbageObject)319     void recycle(RefCountedEvent &&garbageObject)
320     {
321         ASSERT(garbageObject.validAndNoReference());
322         std::lock_guard<angle::SimpleMutex> lock(mMutex);
323         if (mEventsToReset.empty())
324         {
325             mEventsToReset.emplace_back();
326         }
327         mEventsToReset.back().emplace_back(std::move(garbageObject));
328     }
329 
330     // Add a list of events to the toReset list
recycle(RefCountedEventCollector && garbageObjects)331     void recycle(RefCountedEventCollector &&garbageObjects)
332     {
333         ASSERT(!garbageObjects.empty());
334         for (const RefCountedEvent &event : garbageObjects)
335         {
336             ASSERT(event.validAndNoReference());
337         }
338         std::lock_guard<angle::SimpleMutex> lock(mMutex);
339         mEventsToReset.emplace_back(std::move(garbageObjects));
340     }
341 
342     // Reset all events in the toReset list and move them to the toReuse list
343     void resetEvents(Context *context,
344                      const QueueSerial queueSerial,
345                      PrimaryCommandBuffer *commandbuffer);
346 
347     // Clean up the resetting event list and move completed events to the toReuse list.
348     // Number of events released is returned.
349     size_t cleanupResettingEvents(Renderer *renderer);
350 
351     // Fetch a list of events that are ready to be reused. Returns true if eventsToReuseOut is
352     // returned.
353     bool fetchEventsToReuse(RefCountedEventCollector *eventsToReuseOut);
354 
355   private:
356     angle::SimpleMutex mMutex;
357     // RefCountedEvent list that has been released, needs to be reset.
358     std::deque<RefCountedEventCollector> mEventsToReset;
359     // RefCountedEvent list that is currently resetting.
360     std::queue<RefCountedEventsGarbage> mResettingQueue;
361     // RefCountedEvent list that already has been reset. Ready to be reused.
362     std::deque<RefCountedEventCollector> mEventsToReuse;
363 };
364 
365 // Not thread safe event garbage collection and recycler. Caller must ensure the thread safety. It
366 // is intended to use by ShareGroupVk which all access should already protected by share context
367 // lock.
368 class RefCountedEventsGarbageRecycler final
369 {
370   public:
RefCountedEventsGarbageRecycler()371     RefCountedEventsGarbageRecycler() : mGarbageCount(0) {}
372     ~RefCountedEventsGarbageRecycler();
373 
374     // Release all garbage and free events.
375     void destroy(Renderer *renderer);
376 
377     // Walk the garbage list and move completed garbage to free list
378     void cleanup(Renderer *renderer);
379 
collectGarbage(const QueueSerial & queueSerial,RefCountedEventCollector && refCountedEvents)380     void collectGarbage(const QueueSerial &queueSerial, RefCountedEventCollector &&refCountedEvents)
381     {
382         mGarbageCount += refCountedEvents.size();
383         mGarbageQueue.emplace(queueSerial, std::move(refCountedEvents));
384     }
385 
recycle(RefCountedEvent && garbageObject)386     void recycle(RefCountedEvent &&garbageObject)
387     {
388         ASSERT(garbageObject.validAndNoReference());
389         mEventsToReset.emplace_back(std::move(garbageObject));
390     }
391 
392     bool fetch(Renderer *renderer, RefCountedEvent *outObject);
393 
getGarbageCount()394     size_t getGarbageCount() const { return mGarbageCount; }
395 
396   private:
397     RefCountedEventCollector mEventsToReset;
398     std::queue<RefCountedEventsGarbage> mGarbageQueue;
399     Recycler<RefCountedEvent> mEventsToReuse;
400     size_t mGarbageCount;
401 };
402 
403 // This wraps data and API for vkCmdWaitEvent call
404 class EventBarrier : angle::NonCopyable
405 {
406   public:
EventBarrier()407     EventBarrier()
408         : mSrcStageMask(0),
409           mDstStageMask(0),
410           mMemoryBarrierSrcAccess(0),
411           mMemoryBarrierDstAccess(0),
412           mImageMemoryBarrierCount(0),
413           mEvent(VK_NULL_HANDLE)
414     {}
415 
EventBarrier(VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,VkAccessFlags srcAccess,VkAccessFlags dstAccess,const VkEvent & event)416     EventBarrier(VkPipelineStageFlags srcStageMask,
417                  VkPipelineStageFlags dstStageMask,
418                  VkAccessFlags srcAccess,
419                  VkAccessFlags dstAccess,
420                  const VkEvent &event)
421         : mSrcStageMask(srcStageMask),
422           mDstStageMask(dstStageMask),
423           mMemoryBarrierSrcAccess(srcAccess),
424           mMemoryBarrierDstAccess(dstAccess),
425           mImageMemoryBarrierCount(0),
426           mEvent(event)
427     {
428         ASSERT(mEvent != VK_NULL_HANDLE);
429     }
430 
EventBarrier(VkPipelineStageFlags srcStageMask,VkPipelineStageFlags dstStageMask,const VkEvent & event,const VkImageMemoryBarrier & imageMemoryBarrier)431     EventBarrier(VkPipelineStageFlags srcStageMask,
432                  VkPipelineStageFlags dstStageMask,
433                  const VkEvent &event,
434                  const VkImageMemoryBarrier &imageMemoryBarrier)
435         : mSrcStageMask(srcStageMask),
436           mDstStageMask(dstStageMask),
437           mMemoryBarrierSrcAccess(0),
438           mMemoryBarrierDstAccess(0),
439           mImageMemoryBarrierCount(1),
440           mEvent(event),
441           mImageMemoryBarrier(imageMemoryBarrier)
442     {
443         ASSERT(mEvent != VK_NULL_HANDLE);
444         ASSERT(mImageMemoryBarrier.image != VK_NULL_HANDLE);
445         ASSERT(mImageMemoryBarrier.pNext == nullptr);
446     }
447 
EventBarrier(EventBarrier && other)448     EventBarrier(EventBarrier &&other)
449     {
450         mSrcStageMask            = other.mSrcStageMask;
451         mDstStageMask            = other.mDstStageMask;
452         mMemoryBarrierSrcAccess  = other.mMemoryBarrierSrcAccess;
453         mMemoryBarrierDstAccess  = other.mMemoryBarrierDstAccess;
454         mImageMemoryBarrierCount = other.mImageMemoryBarrierCount;
455         std::swap(mEvent, other.mEvent);
456         std::swap(mImageMemoryBarrier, other.mImageMemoryBarrier);
457         other.mSrcStageMask            = 0;
458         other.mDstStageMask            = 0;
459         other.mMemoryBarrierSrcAccess  = 0;
460         other.mMemoryBarrierDstAccess  = 0;
461         other.mImageMemoryBarrierCount = 0;
462     }
463 
~EventBarrier()464     ~EventBarrier() {}
465 
isEmpty()466     bool isEmpty() const { return mEvent == VK_NULL_HANDLE; }
467 
hasEvent(const VkEvent & event)468     bool hasEvent(const VkEvent &event) const { return mEvent == event; }
469 
addAdditionalStageAccess(VkPipelineStageFlags dstStageMask,VkAccessFlags dstAccess)470     void addAdditionalStageAccess(VkPipelineStageFlags dstStageMask, VkAccessFlags dstAccess)
471     {
472         mDstStageMask |= dstStageMask;
473         mMemoryBarrierDstAccess |= dstAccess;
474     }
475 
476     void execute(PrimaryCommandBuffer *primary);
477 
478     void addDiagnosticsString(std::ostringstream &out) const;
479 
480   private:
481     friend class EventBarrierArray;
482     VkPipelineStageFlags mSrcStageMask;
483     VkPipelineStageFlags mDstStageMask;
484     VkAccessFlags mMemoryBarrierSrcAccess;
485     VkAccessFlags mMemoryBarrierDstAccess;
486     uint32_t mImageMemoryBarrierCount;
487     VkEvent mEvent;
488     VkImageMemoryBarrier mImageMemoryBarrier;
489 };
490 
491 class EventBarrierArray final
492 {
493   public:
isEmpty()494     bool isEmpty() const { return mBarriers.empty(); }
495 
496     void execute(Renderer *renderer, PrimaryCommandBuffer *primary);
497 
498     // Add the additional stageMask to the existing waitEvent.
499     void addAdditionalStageAccess(const RefCountedEvent &waitEvent,
500                                   VkPipelineStageFlags dstStageMask,
501                                   VkAccessFlags dstAccess);
502 
503     void addMemoryEvent(Renderer *renderer,
504                         const RefCountedEvent &waitEvent,
505                         VkPipelineStageFlags dstStageMask,
506                         VkAccessFlags dstAccess);
507 
508     void addImageEvent(Renderer *renderer,
509                        const RefCountedEvent &waitEvent,
510                        VkPipelineStageFlags dstStageMask,
511                        const VkImageMemoryBarrier &imageMemoryBarrier);
512 
reset()513     void reset() { ASSERT(mBarriers.empty()); }
514 
515     void addDiagnosticsString(std::ostringstream &out) const;
516 
517   private:
518     std::deque<EventBarrier> mBarriers;
519 };
520 }  // namespace vk
521 }  // namespace rx
522 #endif  // LIBANGLE_RENDERER_VULKAN_REFCOUNTED_EVENT_H_
523