xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/cm/cm_execution_adv.cpp (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_execution_adv.cpp
24 //! \brief     Contains Class CmExecutionAdv  definitions
25 //!
26 #include "cm_execution_adv.h"
27 #include "cm_debug.h"
28 #include "cm_extension_creator.h"
29 #include "cm_surface_state_manager.h"
30 #include "cm_kernel_ex.h"
31 #include "cm_ish.h"
32 #include "cm_media_state.h"
33 #include "cm_command_buffer.h"
34 #include "cm_kernel_ex.h"
35 #include "cm_ssh.h"
36 #include "cm_event_ex.h"
37 #include "cm_tracker.h"
38 #include "cm_dsh.h"
39 #include "cm_task_rt.h"
40 #include "cm_thread_space_rt.h"
41 #include "cm_surface_manager.h"
42 #include "cm_queue_rt.h"
43 #include "cm_scratch_space.h"
44 #if IGFX_GEN9_SUPPORTED
45 #include "cm_hal_g9.h"
46 #endif
47 #if IGFX_GEN11_SUPPORTED
48 #include "cm_hal_g11.h"
49 #endif
50 #if IGFX_GEN12_SUPPORTED
51 #include "cm_hal_g12.h"
52 #endif
53 
54 static bool gGTPinInitialized = false;
55 
56 static bool advRegistered = CmExtensionCreator<CmExecutionAdv>::RegisterClass<CmExecutionAdv>();
57 
58 using namespace CMRT_UMD;
59 
CmExecutionAdv()60 CmExecutionAdv::CmExecutionAdv():
61     m_cmhal(nullptr),
62     m_tracker (nullptr),
63     m_ish (nullptr),
64     m_dsh (nullptr)
65 {
66     MOS_ZeroMemory(&m_l3Values, sizeof(m_l3Values));
67 }
68 
Initialize(CM_HAL_STATE * state)69 MOS_STATUS CmExecutionAdv::Initialize(CM_HAL_STATE *state)
70 {
71     m_cmhal = state;
72     CM_CHK_NULL_RETURN_MOSERROR(m_cmhal);
73 
74     m_tracker = MOS_New(CmTracker, m_cmhal->osInterface);
75     CM_CHK_NULL_RETURN_MOSERROR(m_tracker);
76     CM_CHK_MOSSTATUS_RETURN(m_tracker->Initialize());
77     FrameTrackerProducer *trackerProducer = m_tracker->GetTrackerProducer();
78 
79     m_ish = MOS_New(CmISH);
80     CM_CHK_NULL_RETURN_MOSERROR(m_ish);
81     CM_CHK_MOSSTATUS_RETURN(m_ish->Initialize(m_cmhal, trackerProducer));
82 
83     m_dsh = MOS_New(CmDSH, m_cmhal);
84     CM_CHK_NULL_RETURN_MOSERROR(m_dsh);
85     CM_CHK_MOSSTATUS_RETURN(m_dsh->Initialize(trackerProducer));
86 
87     MOS_ZeroMemory(&m_l3Values, sizeof(m_l3Values));
88 
89     return MOS_STATUS_SUCCESS;
90 }
91 
~CmExecutionAdv()92 CmExecutionAdv::~CmExecutionAdv()
93 {
94     MOS_Delete(m_ish);
95     MOS_Delete(m_dsh);
96     MOS_Delete(m_tracker);
97 }
98 
Create2DStateMgr(MOS_RESOURCE * resource)99 CmSurfaceState2Dor3DMgr* CmExecutionAdv::Create2DStateMgr(MOS_RESOURCE *resource)
100 {
101     return MOS_New(CmSurfaceState2Dor3DMgr, m_cmhal, resource);
102 }
103 
Create3DStateMgr(MOS_RESOURCE * resource)104 CmSurfaceState2Dor3DMgr* CmExecutionAdv::Create3DStateMgr(MOS_RESOURCE *resource)
105 {
106     return MOS_New(CmSurfaceState3DMgr, m_cmhal, resource);
107 }
108 
Delete2Dor3DStateMgr(CmSurfaceState2Dor3DMgr * stateMgr)109 void CmExecutionAdv::Delete2Dor3DStateMgr(CmSurfaceState2Dor3DMgr *stateMgr)
110 {
111     MOS_Delete(stateMgr);
112 }
113 
CreateBufferStateMgr(MOS_RESOURCE * resource)114 CmSurfaceStateBufferMgr* CmExecutionAdv::CreateBufferStateMgr(MOS_RESOURCE *resource)
115 {
116     return MOS_New(CmSurfaceStateBufferMgr, m_cmhal, resource);
117 }
118 
DeleteBufferStateMgr(CmSurfaceStateBufferMgr * stateMgr)119 void CmExecutionAdv::DeleteBufferStateMgr(CmSurfaceStateBufferMgr *stateMgr)
120 {
121     MOS_Delete(stateMgr);
122 }
123 
DeleteSurfStateVme(CmSurfaceStateVME * state)124 void CmExecutionAdv::DeleteSurfStateVme(CmSurfaceStateVME *state)
125 {
126     MOS_Delete(state);
127 }
128 
SetBufferOrigSize(CmSurfaceStateBufferMgr * stateMgr,uint32_t size)129 void CmExecutionAdv::SetBufferOrigSize(CmSurfaceStateBufferMgr *stateMgr, uint32_t size)
130 {
131     if (stateMgr)
132     {
133         stateMgr->SetOrigSize(size);
134     }
135 }
136 
SetBufferMemoryObjectControl(CmSurfaceStateBufferMgr * stateMgr,uint16_t mocs)137 void CmExecutionAdv::SetBufferMemoryObjectControl(CmSurfaceStateBufferMgr *stateMgr, uint16_t mocs)
138 {
139     if (stateMgr)
140     {
141         stateMgr->SetMemoryObjectControl(mocs);
142     }
143 }
144 
Set2Dor3DOrigFormat(CmSurfaceState2Dor3DMgr * stateMgr,MOS_FORMAT format)145 void CmExecutionAdv::Set2Dor3DOrigFormat(CmSurfaceState2Dor3DMgr *stateMgr, MOS_FORMAT format)
146 {
147     if (stateMgr)
148     {
149         stateMgr->SetOrigFormat(format);
150     }
151 }
152 
Set2Dor3DOrigDimension(CmSurfaceState2Dor3DMgr * stateMgr,uint32_t width,uint32_t height,uint32_t depth)153 void CmExecutionAdv::Set2Dor3DOrigDimension(CmSurfaceState2Dor3DMgr *stateMgr, uint32_t width, uint32_t height, uint32_t depth)
154 {
155     if (stateMgr)
156     {
157         stateMgr->SetOrigDimension(width, height, depth);
158     }
159 }
160 
Set2DRenderTarget(CmSurfaceState2Dor3DMgr * stateMgr,bool renderTarget)161 void CmExecutionAdv::Set2DRenderTarget(CmSurfaceState2Dor3DMgr *stateMgr, bool renderTarget)
162 {
163     if (stateMgr)
164     {
165         stateMgr->SetRenderTarget(renderTarget);
166     }
167 }
168 
Set2Dor3DMemoryObjectControl(CmSurfaceState2Dor3DMgr * stateMgr,uint16_t mocs)169 void CmExecutionAdv::Set2Dor3DMemoryObjectControl(CmSurfaceState2Dor3DMgr *stateMgr, uint16_t mocs)
170 {
171     if (stateMgr)
172     {
173         stateMgr->SetMemoryObjectControl(mocs);
174     }
175 }
176 
Set2DFrameType(CmSurfaceState2Dor3DMgr * stateMgr,CM_FRAME_TYPE frameType)177 void CmExecutionAdv::Set2DFrameType(CmSurfaceState2Dor3DMgr *stateMgr, CM_FRAME_TYPE frameType)
178 {
179     if (stateMgr)
180     {
181         stateMgr->SetFrameType(frameType);
182     }
183 }
184 
SetRotationFlag(CmSurfaceState2Dor3DMgr * stateMgr,uint32_t rotation)185 void CmExecutionAdv::SetRotationFlag(CmSurfaceState2Dor3DMgr *stateMgr, uint32_t rotation)
186 {
187     if (stateMgr)
188     {
189         stateMgr->SetRotationFlag(rotation);
190     }
191 }
192 
SetChromaSitting(CmSurfaceState2Dor3DMgr * stateMgr,uint8_t chromaSitting)193 void CmExecutionAdv::SetChromaSitting(CmSurfaceState2Dor3DMgr *stateMgr, uint8_t chromaSitting)
194 {
195     if (stateMgr)
196     {
197         stateMgr->SetChromaSitting(chromaSitting);
198     }
199 }
200 
GetFastTrackerProducer()201 FrameTrackerProducer *CmExecutionAdv::GetFastTrackerProducer()
202 {
203     return m_tracker->GetTrackerProducer();
204 }
205 
CreateKernelRT(CmDeviceRT * device,CmProgramRT * program,uint32_t kernelIndex,uint32_t kernelSeqNum)206 CmKernelRT *CmExecutionAdv::CreateKernelRT(CmDeviceRT *device,
207                CmProgramRT *program,
208                uint32_t kernelIndex,
209                uint32_t kernelSeqNum)
210 {
211     return new (std::nothrow) CmKernelEx(device, program, kernelIndex, kernelSeqNum);
212 }
213 
RefreshSurfaces(CmDeviceRT * device)214 int CmExecutionAdv::RefreshSurfaces(CmDeviceRT *device)
215 {
216     CM_CHK_NULL_RETURN_CMERROR(device);
217 
218     CmSurfaceManager *surfaceMgr  = nullptr;
219     CSync *           surfaceLock = nullptr;
220 
221     device->GetSurfaceManager(surfaceMgr);
222     CM_CHK_NULL_RETURN_CMERROR(surfaceMgr);
223 
224     surfaceLock = device->GetSurfaceCreationLock();
225     CM_CHK_NULL_RETURN_CMERROR(surfaceLock);
226 
227     uint32_t freeSurfNum = 0;
228     surfaceLock->Acquire();
229     surfaceMgr->RefreshDelayDestroySurfaces(freeSurfNum);
230     surfaceLock->Release();
231 
232     return CM_SUCCESS;
233 }
234 
SubmitTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadSpace * threadSpace,MOS_GPU_CONTEXT gpuContext)235 int CmExecutionAdv::SubmitTask(CMRT_UMD::CmQueueRT *queue,
236                 CMRT_UMD::CmTask *task,
237                 CMRT_UMD::CmEvent *&event,
238                 const CMRT_UMD::CmThreadSpace *threadSpace,
239                 MOS_GPU_CONTEXT gpuContext)
240 {
241     CM_NORMALMESSAGE("================ in fast path, media walker===================");
242 
243     CM_HAL_STATE * state = m_cmhal;
244     CM_CHK_NULL_RETURN_CMERROR(state->advExecutor);
245     CmTracker *cmTracker = state->advExecutor->GetTracker();
246     CmISH *cmish = state->advExecutor->GetISH();
247     CmDSH *cmdsh = state->advExecutor->GetDSH();
248     CM_CHK_NULL_RETURN_CMERROR(cmTracker);
249     CM_CHK_NULL_RETURN_CMERROR(cmish);
250     CM_CHK_NULL_RETURN_CMERROR(cmdsh);
251 
252     CLock Locker(m_criticalSection);
253 
254     bool isDummyEventCreated = false;
255 #if MDF_SURFACE_CONTENT_DUMP
256     if (state->dumpSurfaceContent && event == CM_NO_EVENT)
257     {
258         // if surface content dump is needed, the enqueueFast should be a blocking operation
259         // we need a dummy event here
260         isDummyEventCreated = true;
261         event = nullptr;
262     }
263 #endif
264 
265     state->osInterface->pfnResetOsStates(state->osInterface);
266     state->osInterface->pfnSetIndirectStateSize(state->osInterface, state->renderHal->dwIndirectHeapSize);
267 
268     CM_HAL_OSSYNC_PARAM syncParam;
269     syncParam.osSyncEvent = nullptr;
270 
271     // Call HAL layer to wait for Task finished with event-driven mechanism
272     CM_CHK_MOSSTATUS_RETURN(m_cmhal->pfnRegisterUMDNotifyEventHandle(m_cmhal, &syncParam));
273 
274     HANDLE osSyncEvent = syncParam.osSyncEvent;
275 
276     CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
277     uint32_t kernelCount = kernelArrayRT->GetKernelCount();
278     if (kernelCount == 0 || kernelCount > CM_MAX_KERNELS_PER_TASK)
279     {
280         return CM_FAILURE;
281     }
282 
283     // get an array of CmKernelEx
284     CmKernelEx *kernels[CM_MAX_KERNELS_PER_TASK];
285     MOS_ZeroMemory(kernels, sizeof(kernels));
286     for (uint32_t i = 0; i < kernelCount; i++)
287     {
288         kernels[i] = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
289         CM_CHK_NULL_RETURN_CMERROR(kernels[i]);
290         kernels[i]->AllocateCurbe();
291     }
292 
293     // get CmDeviceRT
294     CmDeviceRT *device = nullptr;
295     kernels[0]->GetCmDevice(device);
296     CM_CHK_NULL_RETURN_CMERROR(device);
297 
298     // set printf buffer if needed
299     if (device->IsPrintEnable())
300     {
301         SurfaceIndex *printBufferIndex = nullptr;
302         device->CreatePrintBuffer();
303         device->GetPrintBufferIndex(printBufferIndex);
304         CM_ASSERT(printBufferIndex);
305         for (uint32_t i = 0; i < kernelCount; i++)
306         {
307             kernels[i]->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex);
308         }
309     }
310 
311     const CmThreadSpaceRT *threadSpaceRTConst = static_cast<const CmThreadSpaceRT *>(threadSpace);
312     CmThreadSpaceRT *threadSpaceRT = const_cast<CmThreadSpaceRT *>(threadSpaceRTConst);
313     CmThreadSpaceRT *threadSpaces[CM_MAX_KERNELS_PER_TASK];
314     MOS_ZeroMemory(threadSpaces, sizeof(threadSpaces));
315     if (threadSpaceRT == nullptr)
316     {
317         for (uint32_t i = 0; i < kernelCount; i++)
318         {
319             threadSpaces[i] = kernels[i]->GetThreadSpaceEx();
320         }
321     }
322 
323     // if SWSB is used, update the SWSB arguments in kenrel
324     if (!state->cmHalInterface->IsScoreboardParamNeeded())
325     {
326         for (uint32_t i = 0; i < kernelCount; i++)
327         {
328             kernels[i]->UpdateSWSBArgs(threadSpaceRT);
329         }
330     }
331 
332     CmCommandBuffer cmdBufData(state);
333     CmCommandBuffer *cmdBuf = &cmdBufData;
334     CM_CHK_NULL_RETURN_CMERROR(cmdBuf);
335 
336     uint32_t tracker;
337     uint32_t taskId;
338     MOS_STATUS mret = cmTracker->AssignFrameTracker(queue->GetFastTrackerIndex(), &taskId, &tracker, event != CM_NO_EVENT);
339     bool taskAssigned = (mret == MOS_STATUS_SUCCESS);
340 
341     cmdBuf->Initialize();
342 
343     CmSSH *ssh = cmdBuf->GetSSH();
344     CM_CHK_NULL_RETURN_CMERROR(ssh);
345 
346     // Add kernels to ISH directly
347     cmish->LoadKernels(kernels, kernelCount);
348 
349     // initialize SSH
350     ssh->Initialize(kernels, kernelCount);
351 
352     // create new media state
353     CmMediaState *cmMediaState = cmdsh->CreateMediaState();
354     CM_CHK_NULL_RETURN_CMERROR(cmMediaState);
355     cmMediaState->Allocate(kernels, kernelCount, queue->GetFastTrackerIndex(), tracker);
356 
357     // generate curbe and load media id
358     for (uint32_t i = 0; i < kernelCount; i++)
359     {
360         ssh->AssignBindingTable();
361         kernels[i]->LoadReservedSamplers(cmMediaState, i);
362         kernels[i]->LoadReservedSurfaces(ssh);
363         kernels[i]->UpdateCurbe(ssh, cmMediaState, i);
364         kernels[i]->UpdateFastTracker(queue->GetFastTrackerIndex(), tracker);
365         cmMediaState->LoadCurbe(kernels[i], i);
366         cmMediaState->LoadMediaID(kernels[i], i, ssh->GetBindingTableOffset());
367     }
368 
369     // prepare cp resources
370     ssh->PrepareResourcesForCp();
371 
372     // get the position to write tracker
373     MOS_RESOURCE *trackerResource = nullptr;
374     uint32_t trackerOffset = 0;
375     cmTracker->GetLatestTrackerResource(queue->GetFastTrackerIndex(), &trackerResource, &trackerOffset);
376 
377     // call gtpin callback if needed
378     CmNotifierGroup *ng = nullptr;
379     if (gGTPinInitialized && taskAssigned)
380     {
381         ng = device->GetNotifiers();
382         ng->NotifyTaskFlushed(device, task, ssh, taskId);
383     }
384 
385     if (m_cmhal->platform.eRenderCoreFamily >= IGFX_GEN12_CORE)
386     {
387         cmdBuf->AddMMCProlog();
388     }
389     cmdBuf->AddFlushCacheAndSyncTask(false, false, nullptr);
390     cmdBuf->AddFlushCacheAndSyncTask(true, false, nullptr);
391 
392     cmdBuf->AddPowerOption(kernelArrayRT->GetPowerOption());
393 
394     cmdBuf->AddProtectedProlog();
395 
396     cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetStartOffset(taskId));
397     cmdBuf->AddUmdProfilerStart();
398 
399     cmdBuf->AddL3CacheConfig(&m_l3Values);
400 
401     cmdBuf->AddPreemptionConfig(false);
402 
403     cmdBuf->AddPipelineSelect(false);
404 
405     cmdBuf->AddStateBaseAddress(cmish, cmMediaState);
406 
407     CM_TASK_CONFIG taskConfig;
408     kernelArrayRT->GetProperty(taskConfig);
409     if (threadSpaceRT)
410     {
411         cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE, &threadSpaceRT);  // global thread space
412     }
413     else
414     {
415         cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE, threadSpaces, kernelCount);
416     }
417 
418     cmdBuf->AddCurbeLoad(cmMediaState);
419 
420     cmdBuf->AddMediaIDLoad(cmMediaState);
421 
422     CM_HAL_CONDITIONAL_BB_END_INFO *cbbInfos = kernelArrayRT->GetConditionalEndInfo();
423     uint64_t conditionalBitMap = kernelArrayRT->GetConditionalEndBitmap();
424     for (uint32_t i = 0; i < kernelCount; i ++)
425     {
426         CmThreadSpaceRT *ts = (threadSpaceRT != nullptr) ? threadSpaceRT: threadSpaces[i];
427 
428         // check whether need to insert a CBB
429         bool needCBB = conditionalBitMap & ((uint64_t)1 << i);
430         if (needCBB)
431         {
432             cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
433 
434             cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
435 
436             cmdBuf->AddConditionalFrameTracker(trackerResource, trackerOffset, tracker, &cbbInfos[i]);
437 
438             cmdBuf->AddConditionalBatchBufferEnd(&cbbInfos[i]);
439         }
440 
441         if (i > 0)
442         {
443             // check whether the next kernel has a dependency pattern
444             uint32_t dcount = 0;
445             if (ts != nullptr)
446             {
447                 CM_HAL_DEPENDENCY *dependency;
448                 ts->GetDependency(dependency);
449                 dcount = dependency->count;
450             }
451 
452             bool syncFlag = false;
453             uint64_t syncBitMap = kernelArrayRT->GetSyncBitmap();
454             syncFlag = syncBitMap & ((uint64_t)1 << (i-1));
455             // add sync if necessary
456             if ((dcount != 0) || syncFlag)
457             {
458                 cmdBuf->AddSyncBetweenKernels();
459             }
460         }
461 
462         cmdBuf->AddMediaObjectWalker(ts, i);
463     }
464 
465     cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
466 
467     cmdBuf->AddUmdProfilerEnd();
468     cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
469 
470     cmdBuf->AddFrameTracker(trackerResource, trackerOffset, tracker);
471 
472     cmdBuf->AddDummyVFE();
473 
474     cmdBuf->AddBatchBufferEnd();
475 
476     cmdBuf->ReturnUnusedBuffer();
477 
478 #if MDF_SURFACE_STATE_DUMP
479     if (m_cmhal->dumpSurfaceState)
480     {
481         ssh->DumpSSH();
482     }
483 #endif
484 
485 #if MDF_COMMAND_BUFFER_DUMP
486     if (m_cmhal->dumpCommandBuffer)
487     {
488         cmdBuf->Dump();
489     }
490 #endif
491 
492 #if MDF_CURBE_DATA_DUMP
493     if (m_cmhal->dumpCurbeData)
494     {
495         cmMediaState->Dump();
496     }
497 #endif
498 
499     cmdBuf->Submit();
500 
501     cmish->Submit(queue->GetFastTrackerIndex(), tracker);
502 
503     cmMediaState->Submit();
504     cmdsh->DestroyMediaState(cmMediaState);
505 
506     if (event != CM_NO_EVENT && taskAssigned)
507     {
508         CmEventEx *eventEx = MOS_New(CmEventEx, state, taskId, cmTracker);
509         CM_CHK_NULL_RETURN_CMERROR(eventEx);
510         cmTracker->AssociateEvent(eventEx);
511         eventEx->SetTaskOsData(cmdBuf->GetResource(), osSyncEvent);
512         event = static_cast<CmEventEx *>(eventEx);
513 
514         if (gGTPinInitialized)
515         {
516             eventEx->SetNotifier(ng);
517         }
518     }
519     else
520     {
521         event = nullptr;
522     }
523     cmTracker->Refresh();
524 
525     // refresh surfaces in surface manager
526     CM_CHK_CMSTATUS_RETURN(RefreshSurfaces(device));
527 
528 #if MDF_SURFACE_CONTENT_DUMP
529     if (state->dumpSurfaceContent && event != nullptr)
530     {
531         event->WaitForTaskFinished();
532         if (isDummyEventCreated)
533         {
534             DestoryEvent(queue, event);
535         }
536         for (uint32_t i = 0; i < kernelCount; i++)
537         {
538             kernels[i]->SurfaceDumpEx(i, taskId);
539         }
540     }
541 #endif
542 
543     return CM_SUCCESS;
544 }
545 
DestoryEvent(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmEvent * & event)546 int CmExecutionAdv::DestoryEvent(CMRT_UMD::CmQueueRT *queue, CMRT_UMD::CmEvent *&event)
547 {
548     CmEventEx *eventEx = static_cast<CmEventEx *>(event);
549     MOS_Delete(eventEx);
550     event = nullptr;
551     return CM_SUCCESS;
552 }
553 
SubmitComputeTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadGroupSpace * threadGroupSpace,MOS_GPU_CONTEXT gpuContext)554 int CmExecutionAdv::SubmitComputeTask(CMRT_UMD::CmQueueRT *queue,
555                 CMRT_UMD::CmTask *task,
556                 CMRT_UMD::CmEvent* &event,
557                 const CMRT_UMD::CmThreadGroupSpace* threadGroupSpace,
558                 MOS_GPU_CONTEXT gpuContext)
559 {
560     CM_ASSERTMESSAGE("Compute Tasks not support on this platform\n");
561     return CM_FAILURE;
562 }
563 
WaitForAllTasksFinished()564 int CmExecutionAdv::WaitForAllTasksFinished()
565 {
566     return m_tracker->WaitForAllTasksFinished();
567 }
568 
SetL3Config(const L3ConfigRegisterValues * l3Config)569 void CmExecutionAdv::SetL3Config(const L3ConfigRegisterValues *l3Config)
570 {
571     m_l3Values.config_register0 = l3Config->config_register0;
572     m_l3Values.config_register1 = l3Config->config_register1;
573     m_l3Values.config_register2 = l3Config->config_register2;
574     m_l3Values.config_register3 = l3Config->config_register3;
575 }
576 
SetSuggestedL3Config(L3_SUGGEST_CONFIG l3SuggestConfig)577 int CmExecutionAdv::SetSuggestedL3Config(L3_SUGGEST_CONFIG l3SuggestConfig)
578 {
579     const L3ConfigRegisterValues *table = nullptr;
580     uint32_t count = 0;
581 
582     switch(m_cmhal->platform.eRenderCoreFamily)
583     {
584 #if IGFX_GEN11_SUPPORTED
585         case IGFX_GEN11_CORE:
586             count = sizeof(ICL_L3_PLANE)/sizeof(L3ConfigRegisterValues);
587             table = (L3ConfigRegisterValues *)ICL_L3_PLANE;
588             break;
589 #endif
590         case IGFX_GEN12_CORE:
591             table = m_cmhal->cmHalInterface->m_l3Plane;
592             count = m_cmhal->cmHalInterface->m_l3ConfigCount;
593             break;
594 #if IGFX_GEN9_SUPPORTED
595         default:  // gen9
596             count = sizeof(SKL_L3_PLANE) / sizeof(L3ConfigRegisterValues);
597             table = (L3ConfigRegisterValues *)SKL_L3_PLANE;
598             break;
599 #else
600         default:
601             table = m_cmhal->cmHalInterface->m_l3Plane;
602             count = m_cmhal->cmHalInterface->m_l3ConfigCount;
603             break;
604 #endif
605     }
606     if (static_cast<size_t>(l3SuggestConfig) >= count)
607     {
608         return CM_INVALID_ARG_VALUE;
609     }
610     m_l3Values.config_register0 = table[l3SuggestConfig].config_register0;
611     m_l3Values.config_register1 = table[l3SuggestConfig].config_register1;
612     m_l3Values.config_register2 = table[l3SuggestConfig].config_register2;
613     m_l3Values.config_register3 = table[l3SuggestConfig].config_register3;
614 
615     return CM_SUCCESS;
616 }
617 
AssignNewTracker()618 int CmExecutionAdv::AssignNewTracker()
619 {
620     FrameTrackerProducer *trackerProducer = m_tracker->GetTrackerProducer();
621     return trackerProducer->AssignNewTracker();
622 }
623 
SubmitGpgpuTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadGroupSpace * threadGroupSpace,MOS_GPU_CONTEXT gpuContext)624 int CmExecutionAdv::SubmitGpgpuTask(CMRT_UMD::CmQueueRT *queue,
625                 CMRT_UMD::CmTask *task,
626                 CMRT_UMD::CmEvent* &event,
627                 const CMRT_UMD::CmThreadGroupSpace* threadGroupSpace,
628                 MOS_GPU_CONTEXT gpuContext)
629 {
630     CM_NORMALMESSAGE("================ in fast path, gpgpu walker===================");
631 
632     CM_HAL_STATE * state = m_cmhal;
633     CM_CHK_NULL_RETURN_CMERROR(state->advExecutor);
634     CmTracker *cmTracker = state->advExecutor->GetTracker();
635     CmISH *cmish = state->advExecutor->GetISH();
636     CmDSH *cmdsh = state->advExecutor->GetDSH();
637     CM_CHK_NULL_RETURN_CMERROR(cmTracker);
638     CM_CHK_NULL_RETURN_CMERROR(cmish);
639     CM_CHK_NULL_RETURN_CMERROR(cmdsh);
640 
641     CLock Locker(m_criticalSection);
642 
643     bool isDummyEventCreated = false;
644 #if MDF_SURFACE_CONTENT_DUMP
645     if (state->dumpSurfaceContent && event == CM_NO_EVENT)
646     {
647         // if surface content dump is needed, the enqueueFast should be a blocking operation
648         // we need a dummy event here
649         isDummyEventCreated = true;
650         event = nullptr;
651     }
652 #endif
653 
654     state->osInterface->pfnSetGpuContext(state->osInterface, gpuContext);
655     state->osInterface->pfnResetOsStates(state->osInterface);
656     state->osInterface->pfnSetIndirectStateSize(state->osInterface, state->renderHal->dwIndirectHeapSize);
657 
658     CM_HAL_OSSYNC_PARAM syncParam;
659     syncParam.osSyncEvent = nullptr;
660 
661     // Call HAL layer to wait for Task finished with event-driven mechanism
662     CM_CHK_MOSSTATUS_RETURN(m_cmhal->pfnRegisterUMDNotifyEventHandle(m_cmhal, &syncParam));
663 
664     HANDLE osSyncEvent = syncParam.osSyncEvent;
665 
666     CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
667     uint32_t kernelCount = kernelArrayRT->GetKernelCount();
668     if (kernelCount == 0 || kernelCount > CM_MAX_KERNELS_PER_TASK)
669     {
670         return CM_FAILURE;
671     }
672 
673     // get an array of CmKernelEx
674     CmKernelEx *kernels[CM_MAX_KERNELS_PER_TASK];
675     MOS_ZeroMemory(kernels, sizeof(kernels));
676     for (uint32_t i = 0; i < kernelCount; i++)
677     {
678         kernels[i] = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
679         CM_CHK_NULL_RETURN_CMERROR(kernels[i]);
680         kernels[i]->AllocateCurbeAndFillImplicitArgs(const_cast<CmThreadGroupSpace *>(threadGroupSpace));
681     }
682 
683     // get CmDeviceRT
684     CmDeviceRT *device = nullptr;
685     kernels[0]->GetCmDevice(device);
686     CM_CHK_NULL_RETURN_CMERROR(device);
687 
688     // set printf buffer if needed
689     if (device->IsPrintEnable())
690     {
691         SurfaceIndex *printBufferIndex = nullptr;
692         device->CreatePrintBuffer();
693         device->GetPrintBufferIndex(printBufferIndex);
694         CM_ASSERT(printBufferIndex);
695         for (uint32_t i = 0; i < kernelCount; i++)
696         {
697             kernels[i]->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex);
698         }
699     }
700 
701     CmThreadGroupSpace *threadGroupSpaces[CM_MAX_KERNELS_PER_TASK];
702     MOS_ZeroMemory(threadGroupSpaces, sizeof(threadGroupSpaces));
703     if (threadGroupSpace == nullptr)
704     {
705         for (uint32_t i = 0; i < kernelCount; i++)
706         {
707             threadGroupSpaces[i] = kernels[i]->GetThreadGroupSpaceEx();
708         }
709     }
710 
711     CmCommandBuffer cmdBufData(state);
712     CmCommandBuffer *cmdBuf = &cmdBufData;
713     CM_CHK_NULL_RETURN_CMERROR(cmdBuf);
714 
715     uint32_t tracker;
716     uint32_t taskId;
717     MOS_STATUS mret = cmTracker->AssignFrameTracker(queue->GetFastTrackerIndex(), &taskId, &tracker, event != CM_NO_EVENT);
718     bool taskAssigned = (mret == MOS_STATUS_SUCCESS);
719 
720     cmdBuf->Initialize();
721 
722     CmSSH *ssh = cmdBuf->GetSSH();
723     CM_CHK_NULL_RETURN_CMERROR(ssh);
724 
725     // Add kernels to ISH directly
726     cmish->LoadKernels(kernels, kernelCount);
727 
728     // initialize SSH
729     ssh->Initialize(kernels, kernelCount);
730 
731     // create new media state
732     CmMediaState *cmMediaState = cmdsh->CreateMediaState();
733     CM_CHK_NULL_RETURN_CMERROR(cmMediaState);
734     cmMediaState->Allocate(kernels, kernelCount, 0, tracker);
735 
736     // generate curbe and load media id
737     for (uint32_t i = 0; i < kernelCount; i++)
738     {
739         ssh->AssignBindingTable();
740         kernels[i]->LoadReservedSamplers(cmMediaState, i);
741         kernels[i]->LoadReservedSurfaces(ssh);
742         kernels[i]->UpdateCurbe(ssh, cmMediaState, i);
743         kernels[i]->UpdateFastTracker(queue->GetFastTrackerIndex(), tracker);
744         cmMediaState->LoadCurbe(kernels[i], i);
745         CmThreadGroupSpace *tgs = (threadGroupSpace != nullptr) ?
746                                   const_cast<CmThreadGroupSpace *>(threadGroupSpace)
747                                   : threadGroupSpaces[i];
748         cmMediaState->LoadMediaID(kernels[i], i, ssh->GetBindingTableOffset(), tgs);
749     }
750 
751     // prepare cp resources
752     ssh->PrepareResourcesForCp();
753 
754     // get the position to write tracker
755     MOS_RESOURCE *trackerResource = nullptr;
756     uint32_t trackerOffset = 0;
757     cmTracker->GetLatestTrackerResource(queue->GetFastTrackerIndex(), &trackerResource, &trackerOffset);
758 
759     if (m_cmhal->platform.eRenderCoreFamily >= IGFX_GEN12_CORE)
760     {
761         cmdBuf->AddMMCProlog();
762     }
763     cmdBuf->AddFlushCacheAndSyncTask(false, false, nullptr);
764     cmdBuf->AddFlushCacheAndSyncTask(true, false, nullptr);
765 
766     cmdBuf->AddPowerOption(kernelArrayRT->GetPowerOption());
767 
768     cmdBuf->AddProtectedProlog();
769 
770     cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetStartOffset(taskId));
771     cmdBuf->AddUmdProfilerStart();
772 
773     cmdBuf->AddL3CacheConfig(&m_l3Values);
774 
775     cmdBuf->AddPreemptionConfig(true);
776 
777     cmdBuf->AddPipelineSelect(true);
778 
779     cmdBuf->AddStateBaseAddress(cmish, cmMediaState);
780 
781     cmdBuf->AddSipState(cmish->GetSipKernelOffset());
782 
783     MOS_STATUS eStatus = m_cmhal->osInterface->pfnRegisterResource(
784         m_cmhal->osInterface,
785         &m_cmhal->csrResource,
786         true,
787         true);
788 
789     if (eStatus != MOS_STATUS_SUCCESS)
790     {
791         cmdsh->DestroyMediaState(cmMediaState);
792         return eStatus;
793     }
794 
795     cmdBuf->AddCsrBaseAddress(&m_cmhal->csrResource);
796 
797     CM_TASK_CONFIG taskConfig;
798     kernelArrayRT->GetProperty(taskConfig);
799     cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE);
800 
801     cmdBuf->AddCurbeLoad(cmMediaState);
802 
803     cmdBuf->AddMediaIDLoad(cmMediaState);
804 
805     const CM_EXECUTION_CONFIG *exeConfig = kernelArrayRT->GetKernelExecuteConfig();
806     CM_HAL_CONDITIONAL_BB_END_INFO *cbbInfos = kernelArrayRT->GetConditionalEndInfo();
807     uint64_t conditionalBitMap = kernelArrayRT->GetConditionalEndBitmap();
808     for (uint32_t i = 0; i < kernelCount; i ++)
809     {
810         CmThreadGroupSpace *tgs = (threadGroupSpace != nullptr) ? const_cast<CmThreadGroupSpace *>(threadGroupSpace) : threadGroupSpaces[i];
811 
812         // check whether need to insert a CBB
813         bool needCBB = conditionalBitMap & ((uint64_t)1 << i);
814         if (needCBB)
815         {
816             cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
817 
818             cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
819 
820             cmdBuf->AddConditionalFrameTracker(trackerResource, trackerOffset, tracker, &cbbInfos[i]);
821 
822             cmdBuf->AddConditionalBatchBufferEnd(&cbbInfos[i]);
823         }
824 
825         if (i > 0)
826         {
827             bool syncFlag = false;
828             uint64_t syncBitMap = kernelArrayRT->GetSyncBitmap();
829             syncFlag = syncBitMap & ((uint64_t)1 << (i-1));
830             // add sync if necessary
831             if (syncFlag)
832             {
833                 cmdBuf->AddSyncBetweenKernels();
834             }
835         }
836 
837         cmdBuf->AddGpgpuWalker(tgs, kernels[i], i);
838     }
839 
840     cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
841 
842     cmdBuf->AddUmdProfilerEnd();
843     cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
844 
845     cmdBuf->AddFrameTracker(trackerResource, trackerOffset, tracker);
846 
847     cmdBuf->AddDummyVFE();
848 
849     cmdBuf->AddBatchBufferEnd();
850 
851     cmdBuf->ReturnUnusedBuffer();
852 
853     cmdBuf->Submit();
854 
855     cmish->Submit(queue->GetFastTrackerIndex(), tracker);
856 
857 #if MDF_SURFACE_STATE_DUMP
858     if (m_cmhal->dumpSurfaceState)
859     {
860         ssh->DumpSSH();
861     }
862 #endif
863 
864 #if MDF_COMMAND_BUFFER_DUMP
865     if (m_cmhal->dumpCommandBuffer)
866     {
867         cmdBuf->Dump();
868     }
869 #endif
870 
871 #if MDF_CURBE_DATA_DUMP
872     if (m_cmhal->dumpCurbeData)
873     {
874         cmMediaState->Dump();
875     }
876 #endif
877 
878     cmMediaState->Submit();
879     cmdsh->DestroyMediaState(cmMediaState);
880 
881     if (event != CM_NO_EVENT && taskAssigned)
882     {
883         CmEventEx *eventEx = MOS_New(CmEventEx, state, taskId, cmTracker);
884         eventEx->SetTaskOsData(cmdBuf->GetResource(), osSyncEvent);
885         event = static_cast<CmEventEx *>(eventEx);
886     }
887     else
888     {
889         event = nullptr;
890     }
891     cmTracker->Refresh();
892 
893     // refresh surfaces in surface manager
894     CM_CHK_CMSTATUS_RETURN(RefreshSurfaces(device));
895 
896 #if MDF_SURFACE_CONTENT_DUMP
897     if (state->dumpSurfaceContent && event != nullptr)
898     {
899         event->WaitForTaskFinished();
900         if (isDummyEventCreated)
901         {
902             DestoryEvent(queue, event);
903         }
904         for (uint32_t i = 0; i < kernelCount; i++)
905         {
906             kernels[i]->SurfaceDumpEx(i, taskId);
907         }
908     }
909 #endif
910 
911     return CM_SUCCESS;
912 }
913 
SwitchToFastPath(CmTask * task)914 bool CmExecutionAdv::SwitchToFastPath(CmTask *task)
915 {
916     CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
917     uint32_t kernelCount = kernelArrayRT->GetKernelCount();
918     for (uint32_t i = 0; i < kernelCount; i++)
919     {
920         CmKernelEx *kernel = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
921         if (kernel == nullptr)
922         {
923             return false;
924         }
925         if (kernel->IsFastPathSupported() == false)
926         {
927             return false;
928         }
929     }
930     return true;
931 }
932 
933