xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/cm/cm_queue_rt.h (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_queue_rt.h
24 //! \brief     Contains CmQueueRT declarations.
25 //!
26 
27 #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_
28 #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_
29 
30 #include "cm_queue.h"
31 
32 #include <queue>
33 
34 #include "cm_array.h"
35 #include "cm_csync.h"
36 #include "cm_hal.h"
37 #include "cm_log.h"
38 
39 namespace CMRT_UMD
40 {
41 class CmDeviceRT;
42 class CmKernel;
43 class CmKernelRT;
44 class CmTaskInternal;
45 class CmEventRT;
46 class CmThreadSpaceRT;
47 class CmThreadGroupSpace;
48 class CmVebox;
49 class CmBuffer;
50 class CmSurface2D;
51 class CmSurface2DRT;
52 
53 struct CM_GPUCOPY_KERNEL
54 {
55     CmKernel *kernel;
56     CM_GPUCOPY_KERNEL_ID kernelID;
57     bool locked;
58 };
59 
60 class ThreadSafeQueue
61 {
62 public:
Push(CmTaskInternal * element)63     bool Push(CmTaskInternal *element)
64     {
65         mCriticalSection.Acquire();
66         mQueue.push(element);
67         mCriticalSection.Release();
68         return true;
69     }
70 
Pop()71     CmTaskInternal *Pop()
72     {
73         CmTaskInternal *element = nullptr;
74         mCriticalSection.Acquire();
75         if (mQueue.empty())
76         {
77             CM_ASSERT(0);
78         }
79         else
80         {
81             element = mQueue.front();
82             mQueue.pop();
83         }
84         mCriticalSection.Release();
85         return element;
86     }
87 
Top()88     CmTaskInternal *Top()
89     {
90         CmTaskInternal *element = nullptr;
91         if (mQueue.empty())
92         {
93             CM_ASSERT(0);
94         }
95         else
96         {
97             element = mQueue.front();
98         }
99         return element;
100     }
101 
IsEmpty()102     bool IsEmpty() { return mQueue.empty(); }
103 
GetCount()104     int GetCount() { return mQueue.size(); }
105 
106 private:
107     std::queue<CmTaskInternal*> mQueue;
108     CSync mCriticalSection;
109 };
110 
111 //!
112 //! \brief    Class CmQueueRT definitions.
113 //!
114 class CmQueueRT: public CmQueue
115 {
116 public:
117     static int32_t Create(CmDeviceRT *device,
118                           CmQueueRT *&queue,
119                           CM_QUEUE_CREATE_OPTION queueCreateOption);
120 
121     static int32_t Destroy(CmQueueRT *&queue);
122 
123     CM_RT_API int32_t Enqueue(CmTask *task,
124                               CmEvent *&event,
125                               const CmThreadSpace *threadSpace = nullptr);
126 
127     CM_RT_API int32_t DestroyEvent(CmEvent *&event);
128 
129     CM_RT_API int32_t
130     EnqueueWithGroup(CmTask *task,
131                      CmEvent *&event,
132                      const CmThreadGroupSpace *threadGroupSpace = nullptr);
133 
134     CM_RT_API int32_t EnqueueVebox(CmVebox *vebox, CmEvent *&event);
135 
136     CM_RT_API int32_t EnqueueWithHints(CmTask *task,
137                                        CmEvent *&event,
138                                        uint32_t hints = 0);
139 
140     CM_RT_API int32_t EnqueueCopyCPUToGPU(CmSurface2D *surface,
141                                           const unsigned char *sysMem,
142                                           CmEvent *&event);
143 
144     CM_RT_API int32_t EnqueueCopyGPUToCPU(CmSurface2D *surface,
145                                           unsigned char *sysMem,
146                                           CmEvent *&event);
147 
148     CM_RT_API int32_t EnqueueInitSurface2D(CmSurface2D *surf2D,
149                                            const uint32_t initValue,
150                                            CmEvent *&event);
151 
152     CM_RT_API int32_t EnqueueCopyGPUToGPU(CmSurface2D *outputSurface,
153                                           CmSurface2D *inputSurface,
154                                           uint32_t option,
155                                           CmEvent *&event);
156 
157     CM_RT_API int32_t EnqueueCopyCPUToCPU(unsigned char *dstSysMem,
158                                           unsigned char *srcSysMem,
159                                           uint32_t size,
160                                           uint32_t option,
161                                           CmEvent *&event);
162 
163     CM_RT_API int32_t EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface,
164                                                     const unsigned char *sysMem,
165                                                     const uint32_t widthStride,
166                                                     const uint32_t heightStride,
167                                                     const uint32_t option,
168                                                     CmEvent *&event);
169 
170     CM_RT_API int32_t EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface,
171                                                     unsigned char *sysMem,
172                                                     const uint32_t widthStride,
173                                                     const uint32_t heightStride,
174                                                     const uint32_t option,
175                                                     CmEvent *&event);
176 
177     CM_RT_API int32_t EnqueueFast(CmTask *task,
178                               CmEvent *&event,
179                               const CmThreadSpace *threadSpace = nullptr);
180 
181     CM_RT_API int32_t DestroyEventFast(CmEvent *&event);
182 
183     CM_RT_API int32_t EnqueueWithGroupFast(CmTask *task,
184                                       CmEvent *&event,
185                                       const CmThreadGroupSpace *threadGroupSpace = nullptr);
186 
187     int32_t EnqueueCopyInternal_1Plane(CmSurface2DRT *surface,
188                                        unsigned char *sysMem,
189                                        CM_SURFACE_FORMAT format,
190                                        const uint32_t widthInPixel,
191                                        const uint32_t widthStride,
192                                        const uint32_t heightInRow,
193                                        const uint32_t heightStride,
194                                        const uint32_t sizePerPixel,
195                                        CM_GPUCOPY_DIRECTION direction,
196                                        const uint32_t option,
197                                        CmEvent *&event);
198 
199     int32_t EnqueueCopyInternal_2Planes(CmSurface2DRT *surface,
200                                         unsigned char *sysMem,
201                                         CM_SURFACE_FORMAT format,
202                                         const uint32_t widthInPixel,
203                                         const uint32_t widthStride,
204                                         const uint32_t heightInRow,
205                                         const uint32_t heightStride,
206                                         const uint32_t sizePerPixel,
207                                         CM_GPUCOPY_DIRECTION direction,
208                                         const uint32_t option,
209                                         CmEvent *&event);
210 
211     virtual int32_t EnqueueCopyInternal(CmSurface2DRT *surface,
212                                 unsigned char *sysMem,
213                                 const uint32_t widthStride,
214                                 const uint32_t heightStride,
215                                 CM_GPUCOPY_DIRECTION direction,
216                                 const uint32_t option,
217                                 CmEvent *&event);
218 
219     int32_t EnqueueUnalignedCopyInternal(CmSurface2DRT *surface,
220                                          unsigned char *sysMem,
221                                          const uint32_t widthStride,
222                                          const uint32_t heightStride,
223                                          CM_GPUCOPY_DIRECTION direction);
224 
225     int32_t FlushTaskWithoutSync(bool flushBlocked = false);
226 
227     int32_t GetTaskCount(uint32_t &numTasks);
228 
229     virtual int32_t TouchFlushedTasks();
230 
231     int32_t GetTaskHasThreadArg(CmKernelRT *kernelArray[],
232                                 uint32_t numKernels,
233                                 bool &threadArgExists);
234     virtual int32_t CleanQueue();
235 
236     virtual CM_QUEUE_CREATE_OPTION &GetQueueOption();
237 
238     int32_t GetOSSyncEventHandle(void *& hOSSyncEvent);
239 
GetFastTrackerIndex()240     uint32_t GetFastTrackerIndex() { return m_fastTrackerIndex; }
241 
StreamIndex()242     uint32_t StreamIndex() const { return m_streamIndex; }
243 
GpuContextHandle()244     GPU_CONTEXT_HANDLE GpuContextHandle() { return m_gpuContextHandle; };
245 
246     virtual int32_t EnqueueBufferCopy(  CmBuffer* buffer,
247                                 size_t   offset,
248                                 const unsigned char* sysMem,
249                                 uint64_t sysMemSize,
250                                 CM_GPUCOPY_DIRECTION dir,
251                                 CmEvent* wait_event,
252                                 CmEvent*& event,
253                                 uint32_t option);
254 
255 protected:
256     CmQueueRT(CmDeviceRT *device, CM_QUEUE_CREATE_OPTION queueCreateOption);
257 
258     ~CmQueueRT();
259 
260     int32_t Initialize();
261 
262     int32_t
263     Enqueue_RT(CmKernelRT *kernelArray[],
264                const uint32_t kernelCount,
265                const uint32_t totalThreadCount,
266                CmEventRT *&event,
267                const CmThreadSpaceRT *threadSpace = nullptr,
268                const uint64_t syncBitmap = 0,
269                PCM_POWER_OPTION powerOption = nullptr,
270                const uint64_t conditionalEndBitmap = 0,
271                PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo = nullptr,
272                CM_TASK_CONFIG *taskConfig = nullptr);
273 
274     int32_t Enqueue_RT(CmKernelRT *kernelArray[],
275                        const uint32_t kernelCount,
276                        const uint32_t totalThreadCount,
277                        CmEventRT *&event,
278                        const CmThreadGroupSpace *threadGroupSpace = nullptr,
279                        const uint64_t syncBitmap = 0,
280                        PCM_POWER_OPTION powerOption = nullptr,
281                        const uint64_t conditionalEndBitmap = 0,
282                        PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo = nullptr,
283                        CM_TASK_CONFIG *taskConfig = nullptr,
284                        const CM_EXECUTION_CONFIG* krnExecCfg = nullptr);
285 
286     int32_t Enqueue_RT(CmKernelRT *kernelArray[],
287                        CmEventRT *&event,
288                        uint32_t numTaskGenerated,
289                        bool isLastTask,
290                        uint32_t hints = 0,
291                        PCM_POWER_OPTION powerOption = nullptr);
292 
293     int32_t QueryFlushedTasks();
294 
295     //New sub functions for different task flush
296     int32_t FlushGeneralTask(CmTaskInternal *task);
297 
298     int32_t FlushGroupTask(CmTaskInternal *task);
299 
300     int32_t FlushVeboxTask(CmTaskInternal *task);
301 
302     int32_t FlushEnqueueWithHintsTask(CmTaskInternal *task);
303 
304     void PopTaskFromFlushedQueue();
305 
306     int32_t CreateEvent(CmTaskInternal *task,
307                         bool isVisible,
308                         int32_t &taskDriverId,
309                         CmEventRT *&event);
310 
311     int32_t AddGPUCopyKernel(CM_GPUCOPY_KERNEL* &kernelParam);
312 
313     int32_t GetGPUCopyKrnID(uint32_t widthInByte,
314                             uint32_t height,
315                             CM_SURFACE_FORMAT format,
316                             CM_GPUCOPY_DIRECTION copyDirection,
317                             CM_GPUCOPY_KERNEL_ID &kernelID);
318 
319     int32_t AllocateGPUCopyKernel(uint32_t widthInByte,
320                                   uint32_t height,
321                                   CM_SURFACE_FORMAT format,
322                                   CM_GPUCOPY_DIRECTION copyDirection,
323                                   CmKernel* &kernel);
324 
325     int32_t CreateGPUCopyKernel(uint32_t widthInByte,
326                                 uint32_t height,
327                                 CM_SURFACE_FORMAT format,
328                                 CM_GPUCOPY_DIRECTION copyDirection,
329                                 CM_GPUCOPY_KERNEL* &gpuCopyKernelParam);
330 
331     int32_t SearchGPUCopyKernel(uint32_t widthInByte,
332                                 uint32_t height,
333                                 CM_SURFACE_FORMAT format,
334                                 CM_GPUCOPY_DIRECTION copyDirection,
335                                 CM_GPUCOPY_KERNEL* &kernelParam);
336 
337     int32_t RegisterSyncEvent();
338 
339 
340     CmDeviceRT *m_device;
341     ThreadSafeQueue m_enqueuedTasks;
342     ThreadSafeQueue m_flushedTasks;
343 
344     CmDynamicArray m_eventArray;
345     CSync m_criticalSectionEvent;        // Protect m_eventArray
346     CSync m_criticalSectionHalExecute;   // Protect execution in HALCm, i.e HalCm_Execute
347     CSync m_criticalSectionFlushedTask;  // Protect QueryFlushedTask
348     CSync m_criticalSectionTaskInternal;
349 
350     uint32_t m_eventCount;
351     uint64_t m_CPUperformanceFrequency;
352 
353     CmDynamicArray m_copyKernelParamArray;
354     uint32_t m_copyKernelParamArrayCount;
355 
356     CSync m_criticalSectionGPUCopyKrn;
357 
358     CM_HAL_MAX_VALUES *m_halMaxValues;
359     CM_QUEUE_CREATE_OPTION m_queueOption;
360 
361     bool m_usingVirtualEngine;
362     MOS_VIRTUALENGINE_HINT_PARAMS m_mosVeHintParams;
363 
364     void  *m_osSyncEvent;   //KMD Notification
365 
366     uint32_t m_trackerIndex;
367     uint32_t m_fastTrackerIndex;
368 
369 private:
370     static const uint32_t INVALID_SYNC_BUFFER_HANDLE = 0xDEADBEEF;
371 
372     //--------------------------------------------------------------------------------
373     // Create a GPU context for this object.
374     //--------------------------------------------------------------------------------
375     MOS_STATUS CreateGpuContext(CM_HAL_STATE *halState,
376                                 MOS_GPU_CONTEXT gpuContextName,
377                                 MOS_GPU_NODE gpuNode,
378                                 MOS_GPUCTX_CREATOPTIONS *createOptions);
379 
380     //--------------------------------------------------------------------------------
381     // Destroy compute GPU context
382     //--------------------------------------------------------------------------------
383     MOS_STATUS DestroyComputeGpuContext();
384 
385     //--------------------------------------------------------------------------------
386     // Calls CM HAL API to submit a group task to command buffer.
387     //--------------------------------------------------------------------------------
388     MOS_STATUS ExecuteGroupTask(CM_HAL_STATE *halState,
389                                 CM_HAL_EXEC_TASK_GROUP_PARAM *taskParam,
390                                 MOS_GPU_CONTEXT gpuContextName);
391 
392     //--------------------------------------------------------------------------------
393     // Calls CM HAL API to submit a general task to command buffer.
394     //--------------------------------------------------------------------------------
395     MOS_STATUS ExecuteGeneralTask(CM_HAL_STATE *halState,
396                                   CM_HAL_EXEC_TASK_PARAM *taskParam,
397                                   MOS_GPU_CONTEXT gpuContextName);
398 
399     //--------------------------------------------------------------------------------
400     // Creates a buffer to synchronize all tasks in this queue.
401     // It's useful only on certain operating systems.
402     //--------------------------------------------------------------------------------
403     MOS_STATUS CreateSyncBuffer(CM_HAL_STATE *halState);
404 
405     //--------------------------------------------------------------------------------
406     // Selects sync buffer in this queue so CM HAL can add it to the command buffer.
407     // It's useful only on certain operating systems.
408     //--------------------------------------------------------------------------------
409     MOS_STATUS SelectSyncBuffer(CM_HAL_STATE *halState);
410 
411     //--------------------------------------------------------------------------------
412     // Releases sync buffer in this queue if it's created.
413     //--------------------------------------------------------------------------------
414     MOS_STATUS ReleaseSyncBuffer(CM_HAL_STATE *halState);
415 
416 #if CM_LOG_ON
417     CM_HAL_STATE* GetHalState();
418 #endif  // #if CM_LOG_ON
419 
420     uint32_t m_streamIndex;
421 
422     GPU_CONTEXT_HANDLE m_gpuContextHandle;
423 
424     // Handle of buffer resource for synchronizing tasks in this queue.
425     uint32_t m_syncBufferHandle;
426 
427 
428     CmQueueRT(const CmQueueRT& other);
429     CmQueueRT& operator=(const CmQueueRT& other);
430 };
431 };  //namespace
432 
433 #endif  // #ifnfef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_
434