xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/cm/cm_kernel_rt.h (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_kernel_rt.h
24 //! \brief     Contains CmKernelRT declarations.
25 //!
26 
27 #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNELRT_H_
28 #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNELRT_H_
29 
30 #include "cm_kernel.h"
31 #include "cm_hal.h"
32 #include "cm_log.h"
33 
34 enum SURFACE_KIND
35 {
36     DATA_PORT_SURF,
37     SAMPLER_SURF,
38 };
39 
40 struct SURFACE_ARRAY_ARG
41 {
42     uint16_t argKindForArray;  // record each arg kind in array, used for surface array
43     uint32_t addressModeForArray;  // record each arg address control mode for media sampler in surface array
44 };
45 
46 struct CM_ARG
47 {
48     uint16_t unitKind; // value is of type CM_ARG_KIND
49     uint16_t unitKindOrig; // used to restore unitKind when reset
50 
51     uint16_t index;
52     SURFACE_KIND surfaceKind;
53 
54     uint32_t unitCount; // 1 for for per kernel arg ; thread # for per thread arg
55 
56     uint16_t unitSize; // size of arg in byte
57     uint16_t unitSizeOrig; // used to restore unitSize when reset
58 
59     uint16_t unitOffsetInPayload; // offset relative to R0 in payload
60     uint16_t unitOffsetInPayloadOrig; // used to restore unitOffsetInPayload in adding move instruction for CURBE
61     bool isDirty;      // used to indicate if its value be changed
62     bool isSet;        // used to indicate if this argument is set correctly
63     uint32_t nCustomValue;  // CM defined value for special argument kind
64 
65     uint32_t aliasIndex;    // CmSurface2D alias index
66     bool aliasCreated; // whether or not alias was created for this argument
67 
68     bool isNull;       // used to indicate if this is a null surface
69 
70     uint32_t unitVmeArraySize; // number of Vme surfaces in surface array
71 
72     // pointer to the arg values. the size is unitCount * unitSize
73     union
74     {
75         uint8_t *value;
76         int32_t *intValue;
77         uint32_t *uintValue;
78         float  *floatValue;
79     };
80 
81     uint16_t *surfIndex;
82     SURFACE_ARRAY_ARG *surfArrayArg; // record each arg kind and address control mode for media sampler in surface array
83     bool isStatelessBuffer;
CM_ARGCM_ARG84     CM_ARG()
85     {
86         unitKind = 0;
87         unitCount = 0;
88         unitSize = 0;
89         unitOffsetInPayload = 0;
90         value = nullptr;
91         isDirty = false;
92         isNull = false;
93         unitVmeArraySize = 0;
94         surfIndex = nullptr;
95         aliasIndex = 0;
96         unitOffsetInPayloadOrig = 0;
97         isSet = false;
98         index = 0;
99         unitKindOrig = 0;
100         nCustomValue = 0;
101         surfaceKind = DATA_PORT_SURF;
102         unitSizeOrig = 0;
103         surfArrayArg = nullptr;
104         aliasCreated = false;
105         isStatelessBuffer = false;
106     }
107 };
108 
109 enum CM_KERNEL_INTERNAL_ARG_TYPE
110 {
111     CM_KERNEL_INTERNEL_ARG_PERKERNEL = 0,
112     CM_KERNEL_INTERNEL_ARG_PERTHREAD = 1
113 };
114 
115 struct CM_KERNEL_INFO;
116 class CmExecutionAdv;
117 
118 namespace CMRT_UMD
119 {
120 class CmDeviceRT;
121 class CmKernelData;
122 class CmThreadSpaceRT;
123 class CmSurfaceVme;
124 class CmSurface;
125 class CmSurfaceManager;
126 class CmProgramRT;
127 class CmDynamicArray;
128 
129 class CmMovInstConstructor
130 {
131 public:
CmMovInstConstructor()132     CmMovInstConstructor() {}
~CmMovInstConstructor()133     virtual ~CmMovInstConstructor() {}
134 
SetInstDistanceConfig(uint32_t size,uint32_t renderGen)135     virtual CM_RETURN_CODE SetInstDistanceConfig(uint32_t size, uint32_t renderGen)
136     {
137         // not implemented in currect platforms
138         // reserved for future platforms
139         return CM_NOT_IMPLEMENTED;
140     }
141 
142     virtual uint32_t ConstructObjMovs(uint32_t dstOffset,
143                              uint32_t srcOffset,
144                              uint32_t size,
145                              CmDynamicArray &movInsts,
146                              uint32_t index,
147                              bool isBdw,
148                              bool isHwDebug);
149 
150 };
151 
152 //*-----------------------------------------------------------------------------
153 //! CM Kernel
154 //*-----------------------------------------------------------------------------
155 class CmKernelRT: public CmKernel
156 {
157 public:
158     static int32_t Create(CmDeviceRT *device,
159                           CmProgramRT *program,
160                           const char *kernelName,
161                           uint32_t kernelIndex,
162                           uint32_t kernelSeqNum,
163                           CmKernelRT *&kernel,
164                           const char *options);
165 
166     static int32_t Destroy(CmKernelRT *&kernel, CmProgramRT *&program);
167 
168     int32_t GetThreadCount(uint32_t &count);
169 
170     CM_RT_API int32_t SetThreadCount(uint32_t count);
171 
172     CM_RT_API int32_t SetKernelArg(uint32_t index,
173                                    size_t size,
174                                    const void *value);
175 
176     CM_RT_API virtual int32_t SetKernelArgPointer(uint32_t index,
177                                                   size_t size,
178                                                   const void *value);
179 
180     CM_RT_API int32_t SetThreadArg(uint32_t threadId,
181                                    uint32_t index,
182                                    size_t size,
183                                    const void *value);
184 
185     CM_RT_API int32_t SetStaticBuffer(uint32_t index, const void *value);
186 
187     CM_RT_API int32_t SetSurfaceBTI(SurfaceIndex *surface, uint32_t bti);
188 
189     CM_RT_API int32_t AssociateThreadSpace(CmThreadSpace *&threadSpace);
190 
191     CM_RT_API int32_t AssociateThreadGroupSpace(CmThreadGroupSpace *&threadGroupSpace);
192 
193     CM_RT_API int32_t SetSamplerBTI(SamplerIndex *sampler, uint32_t nIndex);
194 
195     CM_RT_API int32_t DeAssociateThreadSpace(CmThreadSpace *&threadSpace);
196 
197     CM_RT_API int32_t DeAssociateThreadGroupSpace(CmThreadGroupSpace *&threadGroupSpace);
198 
199     CM_RT_API int32_t QuerySpillSize(uint32_t &spillMemorySize);
200 
201     CMRT_UMD_API int32_t GetBinary(std::vector<char> &binary);
202 
203     CMRT_UMD_API int32_t ReplaceBinary(std::vector<char> &binary);
204 
205     CMRT_UMD_API int32_t ResetBinary();
206 
207     int32_t GetArgs(CM_ARG *&arg);
208 
209     int32_t GetArgCount(uint32_t &argCount);
210 
211     int32_t GetCurbeEnable(bool &b);
212 
213     int32_t SetCurbeEnable(bool b);
214 
215     int32_t GetSizeInCurbe(uint32_t &size);
216 
217     uint32_t GetAlignedCurbeSize(uint32_t value);
218 
219     virtual int32_t GetCmDevice(CmDeviceRT *&);
220 
221     virtual int32_t GetCmProgram(CmProgramRT *&);
222 
223     int32_t GetSizeInPayload(uint32_t &size);
224 
225     int32_t CreateKernelData(CmKernelData *&kernelData,
226                              uint32_t &kernelDataSize,
227                              const CmThreadSpaceRT *threadSpace);
228 
229     int32_t CreateKernelData(CmKernelData *&kernelData,
230                              uint32_t &kernelDataSize,
231                              const CmThreadGroupSpace *threadGroupSpace);
232 
233     virtual char *GetName();
234 
235     int32_t SetIndexInTask(uint32_t index);
236 
237     uint32_t GetIndexInTask();
238 
239     int32_t SetAssociatedToTSFlag(bool b);
240 
241     bool IsThreadArgExisted();
242 
243     virtual uint32_t GetKernelIndex();
244 
GetThreadSpace(CmThreadSpaceRT * & threadSpace)245     int32_t GetThreadSpace(CmThreadSpaceRT *&threadSpace)
246     {
247         threadSpace = m_threadSpace;
248         return CM_SUCCESS;
249     }
250 
GetThreadGroupSpace(CmThreadGroupSpace * & threadGroupSpace)251     int32_t GetThreadGroupSpace(CmThreadGroupSpace *&threadGroupSpace)
252     {
253         threadGroupSpace = m_threadGroupSpace;
254         return CM_SUCCESS;
255     }
256 
SetAdjustedYCoord(uint32_t value)257     int32_t SetAdjustedYCoord(uint32_t value)
258     {
259         m_adjustScoreboardY = value;
260         return CM_SUCCESS;
261     }
262 
GetAdjustedYCoord()263     int32_t GetAdjustedYCoord() { return m_adjustScoreboardY; }
264 
265     uint32_t GetSLMSize();
266 
267     uint32_t GetSpillMemUsed();
268 
269     int32_t Acquire();
270 
271     int32_t SafeRelease();
272 
273     int32_t CollectKernelSurface();
274 
275     int32_t GetKernelSurfaces(bool *&surfArray);
276 
277     int32_t ResetKernelSurfaces();
278 
279     int32_t CalculateKernelSurfacesNum(uint32_t &kernelSurfaceNum,
280                                        uint32_t &neededBTEntryNum);
281 
282     uint32_t GetKernelGenxBinarySize();
283 
284     int32_t ReleaseKernelData(CmKernelData *&kernelData);
285 
286     int32_t AcquireKernelData(CmKernelData *&kernelData);
287 
288     virtual int32_t CloneKernel(CmKernelRT*& kernelOut, uint32_t id);
289 
290     void SetAsClonedKernel(uint32_t cloneKernelID);
291 
292     bool GetCloneKernelID(uint32_t &cloneKernelID);
293 
294     void SetHasClones();
295 
GetMaxSurfaceIndexAllocated()296     uint32_t GetMaxSurfaceIndexAllocated()
297     { return m_maxSurfaceIndexAllocated; }
298 
299     int UpdateSamplerHeap(CmKernelData *kernelData);
300 
301 #if CM_LOG_ON
302     std::string Log();
303 
304     CM_HAL_STATE* GetHalState();
305 #endif
306 
307     void SurfaceDump(uint32_t kernelNumber, int32_t taskId);
308 
309 protected:
310     friend CmExecutionAdv;
311     CmKernelRT(CmDeviceRT *device,
312                CmProgramRT *program,
313                uint32_t kernelIndex,
314                uint32_t kernelSeqNum);
315 
316     virtual ~CmKernelRT();
317 
318     int32_t SetArgsInternal(CM_KERNEL_INTERNAL_ARG_TYPE nArgType,
319                             uint32_t index,
320                             size_t size,
321                             const void *value,
322                             uint32_t nThreadID = 0);
323 
324     virtual int32_t Initialize(const char *kernelName, const char *options);
325 
326     int32_t DestroyArgs();
327 
328     int32_t Reset();
329 
330     int32_t IsKernelDataReusable(CmThreadSpaceRT *threadSpace);
331 
332     int32_t CreateKernelArgDataGroup(uint8_t *&data, uint32_t value);
333 
334     int32_t CreateMovInstructions(uint32_t &movInstNum,
335                                   uint8_t *&codeDst,
336                                   CM_ARG *tempArgs,
337                                   uint32_t numArgs);
338 
339     int32_t CalcKernelDataSize(uint32_t movInstNum,
340                                uint32_t numArgs,
341                                uint32_t argSize,
342                                uint32_t &totalKernelDataSize);
343 
344     int32_t GetArgCountPlusSurfArray(uint32_t &argSize, uint32_t &argCountPlus);
345 
346     int32_t CreateKernelDataInternal(CmKernelData *&kernelData,
347                                      uint32_t &kernelDataSize,
348                                      const CmThreadSpaceRT *threadSpace);
349 
350     int32_t CreateKernelDataInternal(CmKernelData *&kernelData,
351                                      uint32_t &kernelDataSize,
352                                      const CmThreadGroupSpace *threadGroupSpace);
353 
354     int32_t UpdateKernelData(CmKernelData *kernelData,
355                              const CmThreadSpaceRT *threadSpace);
356 
357     int32_t UpdateKernelData(CmKernelData *kernelData,
358                              const CmThreadGroupSpace *threadGroupSpace);
359 
360     int32_t CreateThreadArgData(PCM_HAL_KERNEL_ARG_PARAM kernelArg,
361                                 uint32_t threadArgIndex,
362                                 CmThreadSpaceRT *threadSpace,
363                                 CM_ARG *cmArgs);
364 
365     int32_t UpdateLastKernelData(CmKernelData *&kernelData);
366 
367     int32_t CreateKernelIndirectData(
368         PCM_HAL_INDIRECT_DATA_PARAM halIndirectData);
369 
370     int32_t CreateThreadSpaceParam(
371         PCM_HAL_KERNEL_THREADSPACE_PARAM kernelThreadSpaceParam,
372         CmThreadSpaceRT *threadSpace);
373 
374     int32_t CreateTempArgs(uint32_t numArgs, CM_ARG *&tempArgs);
375 
376     int32_t SortThreadSpace(CmThreadSpaceRT *threadSpace);
377 
378     int32_t CleanArgDirtyFlag();
379 
380     bool IsBatchBufferReusable(CmThreadSpaceRT *taskThreadSpace);
381 
382     bool IsPrologueDirty();
383 
384     void DumpKernelData(CmKernelData *kernelData);
385 
386     int32_t
387     UpdateKernelDataGlobalSurfaceInfo(PCM_HAL_KERNEL_PARAM halKernelParam);
388 
389     CM_ARG_KIND SurfTypeToArgKind(CM_ENUM_CLASS_TYPE surfType);
390 
391     int32_t AcquireKernelProgram();
392 
393     int32_t SetArgsVme(CM_KERNEL_INTERNAL_ARG_TYPE nArgType,
394                        uint32_t argIndex,
395                        const void *value,
396                        uint32_t nThreadID);
397 
398     int32_t SetArgsSingleVme(CmSurfaceVme *vmeSurface,
399                              uint8_t *vmeArgValueArray,
400                              uint16_t *cmSufacesArray);
401 
402     int32_t GetVmeSurfaceIndex(uint32_t *vmeIndexArray,
403                                uint32_t *vmeCmIndexArray,
404                                uint32_t index,
405                                uint32_t *outputValue);
406 
407     CmSurface *GetSurfaceFromSurfaceArray(SurfaceIndex *value,
408                                           uint32_t indexSurfaceArray);
409 
410     void ArgLog(std::ostringstream &oss,
411                 uint32_t index,
412                 CM_ARG arg);
413 
414     int32_t CreateKernelImplicitArgDataGroup(uint8_t *&data, uint32_t size);
415 
416     int32_t SearchAvailableIndirectSurfInfoTableEntry(uint16_t kind,
417                                                       uint32_t surfaceIndex,
418                                                       uint32_t bti);
419 
420     int32_t SetSurfBTINumForIndirectData(CM_SURFACE_FORMAT format,
421                                          CM_ENUM_CLASS_TYPE surfaceType);
422 
423     int32_t SetArgsInternalSurfArray(int32_t offset,
424                                      uint32_t kernelArgIndex,
425                                      int32_t surfCount,
426                                      CmSurface *currentSurface,
427                                      uint32_t currentSurfIndex,
428                                      SurfaceIndex *value,
429                                      uint32_t surfValue[],
430                                      uint16_t origSurfIndex[]);
431 
432 #if USE_EXTENSION_CODE
433     int InitForGTPin(CmDeviceRT *device,
434                      CmProgramRT *program,
435                      CmKernelRT *kernel);
436 
437     int32_t
438     UpdateKernelDataGTPinSurfaceInfo(PCM_HAL_KERNEL_PARAM halKernelParam);
439 #endif
440 
441     CmDeviceRT *m_device;
442     CmSurfaceManager *m_surfaceMgr;
443     CmProgramRT *m_program;
444     char *m_options;
445     char *m_binary;
446     char *m_binaryOrig;
447     uint32_t m_binarySize;
448     uint32_t m_binarySizeOrig;
449 
450     uint32_t m_threadCount;
451     uint32_t m_lastThreadCount;
452     uint32_t m_sizeInCurbe;  //data size in CURBE
453     uint32_t m_sizeInPayload;  //data size of inline data in media object or media walker commands
454     uint32_t m_argCount;
455 
456     CM_ARG *m_args;
457     SurfaceIndex *m_globalSurfaces[CM_GLOBAL_SURFACE_NUMBER];
458     uint32_t m_globalCmIndex[CM_GLOBAL_SURFACE_NUMBER];
459     CM_KERNEL_INFO *m_kernelInfo;
460     uint32_t m_kernelIndexInProgram;
461 
462     bool m_curbeEnabled;
463     bool m_nonstallingScoreboardEnabled;
464 
465     uint64_t m_id;  // high 32bit is kernel id (highest 16 bits used for kernel binary re-use
466                     // in GSH), low 32bit is kernel data id
467 
468     uint32_t m_dirty;
469     CmKernelData *m_lastKernelData;
470     uint32_t m_lastKernelDataSize;
471 
472     uint32_t m_indexInTask;
473     bool m_threadSpaceAssociated;  // Indicates if this kernel is associated the task threadspace
474                             // (scoreboard)
475 
476     bool m_perThreadArgExists;  // Indicates if this kernel has thread arg.
477     bool m_perKernelArgExists;  // Indicates if the user call SetKernelArg() to set per-kernel arg
478 
479     CmThreadSpaceRT *m_threadSpace;  // Pointer to the kernel threadspace
480     uint32_t m_adjustScoreboardY;     // value to adjust Y coordinate read from r0.1 used for
481                                       // EnqueueWithHints
482     uint32_t m_lastAdjustScoreboardY;
483 
484     bool m_blCreatingGPUCopyKernel;  // Indicate if this is a predefined GPUCopy kernel
485     bool m_blhwDebugEnable;          // Indicate if the hw debug enabled
486 
487     uint16_t m_usKernelPayloadDataSize;  // Size of kernel indirect data (in byte)
488     uint8_t *m_kernelPayloadData;       // Pointer to the kernel indirect data memory
489 
490     uint16_t m_usKernelPayloadSurfaceCount;  //the surface count in kernel indirect data
491 
492     // the surface index list included in kernel indirect data
493     SurfaceIndex*
494     m_pKernelPayloadSurfaceArray[CM_MAX_STATIC_SURFACE_STATES_PER_BT];
495 
496     CM_INDIRECT_SURFACE_INFO
497     m_IndirectSurfaceInfoArray[CM_MAX_STATIC_SURFACE_STATES_PER_BT];  // information used by driver
498 
499     uint32_t m_samplerBtiCount;
500     CM_SAMPLER_BTI_ENTRY m_samplerBtiEntry[CM_MAX_SAMPLER_TABLE_SIZE];
501 
502     uint32_t m_refcount;  // reference count for kernel
503 
504     CM_HAL_MAX_VALUES *m_halMaxValues;
505     CM_HAL_MAX_VALUES_EX *m_halMaxValuesEx;
506     bool *m_surfaceArray;
507 
508     uint32_t m_kernelIndex;  // Kernel index in kernel array
509 
510     CmThreadGroupSpace *m_threadGroupSpace;  //should be exclusive with m_threadSpace
511 
512     uint32_t m_vmeSurfaceCount;  // to record how many VME surface are using in this kernel
513     uint32_t m_maxSurfaceIndexAllocated;  // to record the largest surface index used in the pool,
514                                           // static or reserved surfaces are not included
515                                           // the var should be inited in CollectKernelSurface
516 
517     uint32_t m_barrierMode;  // to record barrier mode for this kernel
518 
519     bool m_isClonedKernel;
520     uint32_t m_cloneKernelID;
521     bool m_hasClones;
522     CM_STATE_BUFFER_TYPE m_stateBufferBounded;
523 
524     CmMovInstConstructor *m_movInstConstructor;
525 
526     friend class CmThreadSpaceRT;
527 
528 private:
529     CmKernelRT(const CmKernelRT &other);
530     CmKernelRT &operator=(const CmKernelRT &other);
531 };
532 };  //namespace
533 
534 #endif  // #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMKERNELRT_H_
535