xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/cm/cm_queue.h (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_queue.h
24 //! \brief     Contains CmQueue declarations.
25 //!
26 
27 #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUE_H_
28 #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUE_H_
29 
30 #include "cm_def.h"
31 
32 enum CM_FASTCOPY_OPTION
33 {
34     CM_FASTCOPY_OPTION_NONBLOCKING = 0x00,
35     CM_FASTCOPY_OPTION_BLOCKING = 0x01,
36     CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST = 0x02
37 };
38 
39 enum CM_GPUCOPY_DIRECTION
40 {
41     CM_FASTCOPY_GPU2CPU = 0,
42     CM_FASTCOPY_CPU2GPU = 1,
43     CM_FASTCOPY_GPU2GPU = 2,
44     CM_FASTCOPY_CPU2CPU = 3
45 };
46 
47 namespace CMRT_UMD
48 {
49 class CmTask;
50 class CmEvent;
51 class CmThreadSpace;
52 class CmThreadGroupSpace;
53 class CmVebox;
54 class CmSurface2D;
55 class CmBuffer;
56 
57 //!
58 //! \brief      CmQueue class for task queue management.
59 //! \details    The CmQueue object represents a CM task queue. Each task
60 //!             represented by a CmTask object has one or more kernels that
61 //!             can be run concurrently. Each kernel can run in multiple
62 //!             threads concurrently. The CmQueue is an in-order queue.
63 //!             Tasks get executed according to the order they get enqueued.
64 //!             The next task doesn't start execution until the current task
65 //!             finishes. Hence an output of one task can be assumed to be
66 //!             available for use as input for any subsequent task in the
67 //!             queue.Each Enqueue generates a CmEvent. Task with thread
68 //!             space should call Enqueue with a valid thread space pointer,
69 //!             while task with group specification should call
70 //!             EnqueueWithGroup with a valid pTGS group space pointer.
71 //!
72 class CmQueue
73 {
74 public:
75     //!
76     //! \brief   Enqueue a task for execution with per-task thread space.
77     //! \details This function enqueues a task represented by the CmTask object.
78     //!          The kernels in the CmTask object may be run concurrently.
79     //!          Tasks get executed according to the order they get enqueued.
80     //!          This is a non-blocking call. It returns immediately without waiting
81     //!          for GPU to start or finish execution. A CmEvent is generated each time
82     //!          a task is enqueued. The CmEvent can be used to check the status of task.
83     //!          The generated event needs to be managed and released by user.
84     //!          Since event is not useful in some cases, runtime provides the capability
85     //!          to avoid generating event.
86     //!          If thread space is valid, the dependency defined by thread space will be honored.
87     //! \param   [in] task
88     //!          pointer to task to submit
89     //! \param   [in,out] event
90     //!          reference to pointer of event generated. If it is set as CM_NO_EVENT,
91     //!          its value returned by runtime is NULL.
92     //! \param   [in] threadSpace
93     //!          pointer to thread space which can define the thread dependency within the task.
94     //!          This is a per task thread space. If this task has multiple kernels, each kernel
95     //!          will have the thread space of same dimension, same dependency etc. If it is nullptr,
96     //!          there is no thread dependency and the maximum thread space width will be asssumed
97     //!          to calculate the coordinates for each thread. For each kernel , the per kernel thread space
98     //!          that is defined by calling CmKernel::AssociateThreadSpace() overwrites the per task thread space.
99     //! \retval  CM_SUCCESS if the task is successfully enqueued.
100     //! \retval  CM_OUT_OF_HOST_MEMORY if out of host memory
101     //! \retval  CM_FAILURE otherwise
102     //!
103     CM_RT_API virtual int32_t Enqueue(CmTask *task,
104                                       CmEvent *&event,
105                                       const CmThreadSpace *threadSpace = nullptr) = 0;
106 
107     //!
108     //! \brief    Destroy the CmEvent generated by Enqueue.
109     //! \details  Destroy the event object previously generated by Enqueue.
110     //!           The CmEvent object can be destroyed even before the corresponding task flushed or finished.
111     //!           If this happens, there is no way the app can get the task status.
112     //! \param    [in] event
113     //!           reference to pointer to event
114     //! \retval   CM_SUCCESS if event destroyed successfully
115     //! \retval   CM_FAILURE otherwise
116     //!
117     CM_RT_API virtual int32_t DestroyEvent(CmEvent *&event) = 0;
118 
119     //!
120     //! \brief    Enqueue the task with thread group space.
121     //! \details
122     //! \param    [in] task
123     //!           pointer to task to submit
124     //! \param    [in,out] event
125     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
126     //!           its value returned by runtime is NULL.
127     //! \param    [in] threadGroupSpace
128     //!           pointer to thread group space which defines the dimensions of the task.
129     //!           threadGroupSpace  can not be NULL.
130     //! \retval   CM_SUCCESS if the task is successfully enqueued.
131     //! \retval   CM_INVALID_ARG_VALUE if input task is not valid
132     //! \retval   CM_EXCEED_MAX_KERNEL_PER_ENQUEUE if the task's kernel number exceeds limitation.
133     //! \retval   CM_INVALID_THREAD_GROUP_SPACE if the thread group space specification is invalid.
134     //! \retval   CM_THREAD_ARG_NOT_ALLOWED if user has per thread arguments
135     //!
136     CM_RT_API virtual int32_t
137     EnqueueWithGroup(CmTask *task,
138                      CmEvent *&event,
139                      const CmThreadGroupSpace *threadGroupSpace = nullptr) = 0;
140 
141     //!
142     //! \brief    Enqueues the kernel to copy from system(CPU) memory to video(GPU) memory.
143     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from host
144     //!           system memory to video surface.
145     //!           This is a non-blocking call. A CmEvent is generated each time a task is enqueued.
146     //!           The CmEvent can be used to check the status.
147     //!           The host memory sysMem must be 16-Byte aligned and surface's width in bytes must be 16-Byte aligned as well.
148     //! \param    [in] surface
149     //!           surface as copy destination, surface's width in bytes must be 16-Byte aligned
150     //! \param    [in] sysMem
151     //!           host memory as copy source, must be 16-Byte aligned
152     //! \param    [in,out] event
153     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
154     //!           its value returned by runtime is NULL.
155     //! \retval   CM_SUCCESS if the task is successfully enqueued
156     //! \retval   CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned
157     //!           or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE.
158     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
159     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
160     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
161     //! \retval   CM_FAILURE otherwise
162     //!
163     CM_RT_API virtual int32_t EnqueueCopyCPUToGPU(CmSurface2D *surface,
164                                                   const unsigned char *sysMem,
165                                                   CmEvent* &event) = 0;
166 
167     //!
168     //! \brief    Enqueues the kernel to copy from video(GPU) memory to system(CPU) memory.
169     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory.
170     //!           This is a non-blocking call. A CmEvent is generated each time a task is enqueued.
171     //!           The CmEvent can be used to check the status or other data regarding the task execution.
172     //!           The host memory sysMem must be 16-Byte aligned and surface's width in bytes must be 16-Byte aligned as well.
173     //! \param    [in] surface
174     //!           surface as copy source, surface's width in bytes must be 16-Byte aligned
175     //! \param    [in] sysMem
176     //!           host memory as copy destination, must be 16-Byte aligned
177     //! \param    [in,out] event
178     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
179     //!           its value returned by runtime is NULL.
180     //! \retval   CM_SUCCESS if the task is successfully enqueued
181     //! \retval   CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned
182     //!           or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE.
183     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
184     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
185     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
186     //! \retval   CM_FAILURE otherwise
187     //!
188     CM_RT_API virtual int32_t EnqueueCopyGPUToCPU(CmSurface2D *surface,
189                                                   unsigned char *sysMem,
190                                                   CmEvent *&event) = 0;
191 
192     //!
193     //! \brief    Enqueues the kernel to initialize a 2D surface.
194     //! \details  This function enqueues a task, which contains a pre-defined kernel to initialize a surface 2d
195     //!           This is a non-blocking call. A CmEvent is generated each time a task is enqueued.
196     //!           The CmEvent can be used to check the status or other data regarding the task execution.
197     //! \param    [in] surface
198     //!           surface to initialize
199     //! \param    [in] initValue
200     //!           value to fill the surface
201     //! \param    [in,out] event
202     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
203     //!           its value returned by runtime is NULL.
204     //! \retval   CM_SUCCESS if the task is successfully enqueued
205     //! \retval   CM_FAILURE otherwise
206     //!
207     CM_RT_API virtual int32_t EnqueueInitSurface2D(CmSurface2D *surface,
208                                                    const uint32_t initValue,
209                                                    CmEvent *&event) = 0;
210 
211     //!
212     //! \brief    Enqueue the kernel to copy memory between surfaces.
213     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy memory between surfaces.
214     //!           This is a non-blocking call. A CmEvent is generated each time a task is enqueued.
215     //!           The CmEvent can be used to check the status or other data regarding the task execution.
216     //!           The input and output surfaces should have the same width, height and format.
217     //! \param    [in] inputSurface
218     //!           surface as copy source
219     //! \param    [in] outputSurface
220     //!           surface as copy destination
221     //! \param    [in] option
222     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
223     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
224     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
225     //! \param    [in,out] event
226     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
227     //!           its value returned by runtime is NULL.
228     //! \retval   CM_SUCCESS if the task is successfully enqueued
229     //! \retval   CM_GPUCOPY_INVALID_SURFACES if the input and output surfaces have different
230     //!           width, height and format.
231     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
232     //! \retval   CM_FAILURE otherwise
233     //!
234     CM_RT_API virtual int32_t EnqueueCopyGPUToGPU(CmSurface2D *outputSurface,
235                                                   CmSurface2D *inputSurface,
236                                                   uint32_t option,
237                                                   CmEvent *&event) = 0;
238 
239     //!
240     //! \brief    Enqueues the kernel to copy memory between host memories.
241     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy memory from src to dest memory.
242     //!           Both dstSysMem and srcSysMem need to be 16-Byte aligned.  The maximum size is determined by sytem's memory
243     //!           and it should be less than CM_MAX_1D_SURF_WIDTH bytes which is 1G bytes now. If the copy size is less than
244     //!           1K bytes, the event will not be generated and it is a blocking call.
245     //!           For the size larger than 1K bytes, this is a non-blocking call.
246     //!           A CmEvent is generated to check the status or other data regarding the task execution.
247     //!           To avoid generating event, user can set the event as CM_NO_EVENT and pass it to this function
248     //! \param    [in] dstSysMem
249     //!           destination memory, must be 16-Byte aligned
250     //! \param    [in] srcSysMem
251     //!           source memory, must be 16-Byte aligned
252     //! \param    [in] size
253     //!           size of memory to copy in bytes
254     //! \param    [in] option
255     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
256     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
257     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
258     //! \param    [in,out] event
259     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
260     //!           its value returned by runtime is NULL.
261     //! \retval   CM_SUCCESS if the task is successfully enqueued
262     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if dstSysMem or srcSysMem is not 16-Byte aligned.
263     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
264     //! \retval   CM_FAILURE otherwise
265     //!
266     CM_RT_API virtual int32_t EnqueueCopyCPUToCPU(unsigned char *dstSysMem,
267                                                   unsigned char *srcSysMem,
268                                                   uint32_t size,
269                                                   uint32_t option,
270                                                   CmEvent *&event) = 0;
271 
272     //!
273     //! \brief    Enqueue the kernel to copy memory from system memory to video memory with width and height stride.
274     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from system memory to a surface.
275     //!           Depending on user "opiton", this is a non-blocking or blocking call.
276     //!           A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data
277     //!           regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to
278     //!           this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any
279     //!           alignment restriction.
280     //! \param    [in] surface
281     //!           surface as copy destination
282     //! \param    [in] sysMem
283     //!           system memory as copy source must be 16-Byte aligned
284     //! \param    [in] widthStride
285     //!           width stride of memory stored in host memory, in bytes, must be 16-Byte aligned
286     //! \param    [in] heightStride
287     //!           height stride of memory stored in host memory, in bytes.
288     //! \param    [in] option
289     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
290     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
291     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
292     //! \param    [in,out] event
293     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
294     //!           its value returned by runtime is NULL.
295     //! \retval   CM_SUCCESS if the task is successfully enqueued
296     //! \retval   CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned
297     //!           or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE.
298     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
299     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
300     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
301     //! \retval   CM_FAILURE otherwise
302     //!
303     CM_RT_API virtual int32_t EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface,
304                                                             const unsigned char *sysMem,
305                                                             const uint32_t widthStride,
306                                                             const uint32_t heightStride,
307                                                             const uint32_t option,
308                                                             CmEvent *& event) = 0;
309 
310     //!
311     //! \brief    Enqueue the kernel to copy memory from video memory to system memory with width and height stride.
312     //! \details  This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory.
313     //!           Depending on user "opiton", this is a non-blocking or blocking call.
314     //!           A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data
315     //!           regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to
316     //!           this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any
317     //!           alignment restriction.
318     //! \param    [in] surface
319     //!           surface as copy source
320     //! \param    [in] sysMem
321     //!           system memory as copy destination, must be 16-Byte aligned
322     //! \param    [in] widthStride
323     //!           width stride of memory stored in host memory, in bytes, must be 16-Byte aligned
324     //! \param    [in] heightStride
325     //!           height stride of memory stored in host memory, in bytes,
326     //! \param    [in] option
327     //!           If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n
328     //!           If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n
329     //!           If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled.
330     //! \param    [in,out] event
331     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
332     //!           its value returned by runtime is NULL.
333     //! \retval   CM_SUCCESS if the task is successfully enqueued
334     //! \retval   CM_GPUCOPY_INVALID_STRIDE if stride is not 16-Byte aligned or less than surface's width in bytes.
335     //! \retval   CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned.
336     //! \retval   CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT
337     //! \retval   CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources
338     //! \retval   CM_FAILURE otherwise
339     //!
340     CM_RT_API virtual int32_t EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface,
341                                                             unsigned char *sysMem,
342                                                             const uint32_t widthStride,
343                                                             const uint32_t heightStride,
344                                                             const uint32_t option,
345                                                             CmEvent *& event) = 0;
346 
347     //!
348     //! \brief   Enqueue a task for execution with hints.
349     //! \details This API is designed to saturate the EUs when running a large dependency kernel.
350     //!          At least two kernels must exist in the task. The ideal case is at least one large dependency kernel
351     //!          running with smaller kernels. The idea is to get the smaller kernels for free during the time it already
352     //!          takes to execute the large dependency kernel. Each task can have up to CAP_KERNEL_COUNT_PER_TASK kernels.
353     //!          The 0th bit of the hints indicates to use media object or media walker. Currently, only media object is valid.
354     //!          The next bits indicate whether the next kernel is in the same or different kernel group.
355     //!          For example, if the 1th bit is set then the second kernel is in a different kernel group from the first kernel,
356     //!          if it is not set it is in the same kernel group. The kernels are interleaved between different kernel groups
357     //!          and run concurrently. Within a kernel group, the kernels are dispatched in order. The kernel groups are dispatched
358     //!          to separate sub-slices. The assumption is made that the kernel groups are comparable in kernel execution time.
359     //!          There can be no dependency between different kernels; all kernels in the task should be independent of one another.
360     //!          Additionally, pKernel->AssociateThreadSpace(CmThreadSpace*& pTS) must be called for each kernel.
361     //!          A CmEvent is generated  to check the status or other data regarding the task execution.
362     //!          To avoid generating event, user can set the event as CM_NO_EVENT and pass it to this function.
363     //! \param   [in] task
364     //!          pointer to task to submit
365     //! \param   [in,out] event
366     //!          reference to pointer of event generated. If it is set as CM_NO_EVENT,
367     //!          its value returned by runtime is NULL.
368     //! \param   [in] hints
369     //!          Hints about work load from host to driver.
370     //! \retval  CM_SUCCESS if the task is successfully enqueued.
371     //! \retval  CM_OUT_OF_HOST_MEMORY if out of host memory
372     //! \retval  CM_FAILURE otherwise
373     //!
374     CM_RT_API virtual int32_t EnqueueWithHints(CmTask *task,
375                                                CmEvent *&event,
376                                                uint32_t hints = 0) = 0;
377 
378     //!
379     //! \brief   Enqueue a vebox task to vebox engine.
380     //! \details This call submits a VEBOX task to VEBOX engine for execution.
381     //!          Before this function is called, user need call CmDevice::CreateVebox() to create a CmVebox object,
382     //!          and call the APIs in CmVebox class to set up VEBOX state and surfaces.
383     //! \param   [in] vebox
384     //!          Pointer to a CmVebox object.
385     //! \param   [in,out] event
386     //!          reference to pointer of event generated. If it is set as CM_NO_EVENT,
387     //!          its value returned by runtime is NULL.
388     //! \retval  CM_SUCCESS if the task is successfully enqueued.
389     //! \retval  CM_OUT_OF_HOST_MEMORY if out of host memory
390     //! \retval  CM_INVALID_ARG_VALUE if input vebox is not valid
391     //! \retval  CM_FAILURE otherwise
392     //!
393     CM_RT_API virtual int32_t EnqueueVebox(CmVebox *vebox, CmEvent *&event) = 0;
394 
395     //!
396     //! \brief   Enqueue a task for execution with per-task thread space in a fast path.
397     //! \details This function enqueues a task represented by the CmTask object.
398     //!          The kernels in the CmTask object may be run concurrently.
399     //!          Tasks get executed according to the order they get enqueued.
400     //!          This is a non-blocking call. It returns immediately without waiting
401     //!          for GPU to start or finish execution. A CmEvent is generated each time
402     //!          a task is enqueued. The CmEvent can be used to check the status of task.
403     //!          The generated event needs to be managed and released by user.
404     //!          Since event is not useful in some cases, runtime provides the capability
405     //!          to avoid generating event.
406     //!          If thread space is valid, the dependency defined by thread space will be honored.
407     //! \param   [in] task
408     //!          pointer to task to submit
409     //! \param   [in,out] event
410     //!          reference to pointer of event generated. If it is set as CM_NO_EVENT,
411     //!          its value returned by runtime is NULL.
412     //! \param   [in] threadSpace
413     //!          pointer to thread space which can define the thread dependency within the task.
414     //!          This is a per task thread space. If this task has multiple kernels, each kernel
415     //!          will have the thread space of same dimension, same dependency etc. If it is nullptr,
416     //!          there is no thread dependency and the maximum thread space width will be asssumed
417     //!          to calculate the coordinates for each thread. For each kernel , the per kernel thread space
418     //!          that is defined by calling CmKernel::AssociateThreadSpace() overwrites the per task thread space.
419     //! \retval  CM_SUCCESS if the task is successfully enqueued.
420     //! \retval  CM_OUT_OF_HOST_MEMORY if out of host memory
421     //! \retval  CM_FAILURE otherwise
422     //!
423     CM_RT_API virtual int32_t EnqueueFast(CmTask *task,
424                               CmEvent *&event,
425                               const CmThreadSpace *threadSpace = nullptr) = 0;
426     //!
427     //! \brief    Destroy the CmEvent generated by EnqueueFast.
428     //! \details  Destroy the event object previously generated by EnqueueFast.
429     //!           The CmEvent object can be destroyed even before the corresponding task flushed or finished.
430     //!           If this happens, there is no way the app can get the task status.
431     //! \param    [in] event
432     //!           reference to pointer to event
433     //! \retval   CM_SUCCESS if event destroyed successfully
434     //! \retval   CM_FAILURE otherwise
435     //!
436     CM_RT_API virtual int32_t DestroyEventFast(CmEvent *&event) = 0;
437 
438     //!
439     //! \brief    Enqueue the task with thread group space in a fast path.
440     //! \details
441     //! \param    [in]task
442     //!           pointer to task to submit
443     //! \param    [in,out] event
444     //!           reference to pointer of event generated. If it is set as CM_NO_EVENT,
445     //!           its value returned by runtime is NULL.
446     //! \param    [in] threadGroupSpace
447     //!           pointer to thread group space which defines the dimensions of the task.
448     //!           pThreadGroupSpace  can not be NULL.
449     //! \retval   CM_SUCCESS if the task is successfully enqueued.
450     //! \retval   CM_INVALID_ARG_VALUE if input task is not valid
451     //! \retval   CM_EXCEED_MAX_KERNEL_PER_ENQUEUE if the task's kernel number exceeds limitation.
452     //! \retval   CM_INVALID_THREAD_GROUP_SPACE if the thread group space specification is invalid.
453     //! \retval   CM_THREAD_ARG_NOT_ALLOWED if user has per thread arguments
454     //!
455     CM_RT_API virtual int32_t EnqueueWithGroupFast(CmTask *task,
456                                   CmEvent *&event,
457                                   const CmThreadGroupSpace *threadGroupSpace = nullptr) = 0;
458 };
459 };//namespace
460 
461 #endif  // #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUE_H_
462