1 /*
2 * Copyright (c) 2018, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_execution_adv.cpp
24 //! \brief Contains Class CmExecutionAdv definitions
25 //!
26 #include "cm_execution_adv.h"
27 #include "cm_debug.h"
28 #include "cm_extension_creator.h"
29 #include "cm_surface_state_manager.h"
30 #include "cm_kernel_ex.h"
31 #include "cm_ish.h"
32 #include "cm_media_state.h"
33 #include "cm_command_buffer.h"
34 #include "cm_kernel_ex.h"
35 #include "cm_ssh.h"
36 #include "cm_event_ex.h"
37 #include "cm_tracker.h"
38 #include "cm_dsh.h"
39 #include "cm_task_rt.h"
40 #include "cm_thread_space_rt.h"
41 #include "cm_surface_manager.h"
42 #include "cm_queue_rt.h"
43 #include "cm_scratch_space.h"
44 #if IGFX_GEN9_SUPPORTED
45 #include "cm_hal_g9.h"
46 #endif
47 #if IGFX_GEN11_SUPPORTED
48 #include "cm_hal_g11.h"
49 #endif
50 #if IGFX_GEN12_SUPPORTED
51 #include "cm_hal_g12.h"
52 #endif
53
54 static bool gGTPinInitialized = false;
55
56 static bool advRegistered = CmExtensionCreator<CmExecutionAdv>::RegisterClass<CmExecutionAdv>();
57
58 using namespace CMRT_UMD;
59
CmExecutionAdv()60 CmExecutionAdv::CmExecutionAdv():
61 m_cmhal(nullptr),
62 m_tracker (nullptr),
63 m_ish (nullptr),
64 m_dsh (nullptr)
65 {
66 MOS_ZeroMemory(&m_l3Values, sizeof(m_l3Values));
67 }
68
Initialize(CM_HAL_STATE * state)69 MOS_STATUS CmExecutionAdv::Initialize(CM_HAL_STATE *state)
70 {
71 m_cmhal = state;
72 CM_CHK_NULL_RETURN_MOSERROR(m_cmhal);
73
74 m_tracker = MOS_New(CmTracker, m_cmhal->osInterface);
75 CM_CHK_NULL_RETURN_MOSERROR(m_tracker);
76 CM_CHK_MOSSTATUS_RETURN(m_tracker->Initialize());
77 FrameTrackerProducer *trackerProducer = m_tracker->GetTrackerProducer();
78
79 m_ish = MOS_New(CmISH);
80 CM_CHK_NULL_RETURN_MOSERROR(m_ish);
81 CM_CHK_MOSSTATUS_RETURN(m_ish->Initialize(m_cmhal, trackerProducer));
82
83 m_dsh = MOS_New(CmDSH, m_cmhal);
84 CM_CHK_NULL_RETURN_MOSERROR(m_dsh);
85 CM_CHK_MOSSTATUS_RETURN(m_dsh->Initialize(trackerProducer));
86
87 MOS_ZeroMemory(&m_l3Values, sizeof(m_l3Values));
88
89 return MOS_STATUS_SUCCESS;
90 }
91
~CmExecutionAdv()92 CmExecutionAdv::~CmExecutionAdv()
93 {
94 MOS_Delete(m_ish);
95 MOS_Delete(m_dsh);
96 MOS_Delete(m_tracker);
97 }
98
Create2DStateMgr(MOS_RESOURCE * resource)99 CmSurfaceState2Dor3DMgr* CmExecutionAdv::Create2DStateMgr(MOS_RESOURCE *resource)
100 {
101 return MOS_New(CmSurfaceState2Dor3DMgr, m_cmhal, resource);
102 }
103
Create3DStateMgr(MOS_RESOURCE * resource)104 CmSurfaceState2Dor3DMgr* CmExecutionAdv::Create3DStateMgr(MOS_RESOURCE *resource)
105 {
106 return MOS_New(CmSurfaceState3DMgr, m_cmhal, resource);
107 }
108
Delete2Dor3DStateMgr(CmSurfaceState2Dor3DMgr * stateMgr)109 void CmExecutionAdv::Delete2Dor3DStateMgr(CmSurfaceState2Dor3DMgr *stateMgr)
110 {
111 MOS_Delete(stateMgr);
112 }
113
CreateBufferStateMgr(MOS_RESOURCE * resource)114 CmSurfaceStateBufferMgr* CmExecutionAdv::CreateBufferStateMgr(MOS_RESOURCE *resource)
115 {
116 return MOS_New(CmSurfaceStateBufferMgr, m_cmhal, resource);
117 }
118
DeleteBufferStateMgr(CmSurfaceStateBufferMgr * stateMgr)119 void CmExecutionAdv::DeleteBufferStateMgr(CmSurfaceStateBufferMgr *stateMgr)
120 {
121 MOS_Delete(stateMgr);
122 }
123
DeleteSurfStateVme(CmSurfaceStateVME * state)124 void CmExecutionAdv::DeleteSurfStateVme(CmSurfaceStateVME *state)
125 {
126 MOS_Delete(state);
127 }
128
SetBufferOrigSize(CmSurfaceStateBufferMgr * stateMgr,uint32_t size)129 void CmExecutionAdv::SetBufferOrigSize(CmSurfaceStateBufferMgr *stateMgr, uint32_t size)
130 {
131 if (stateMgr)
132 {
133 stateMgr->SetOrigSize(size);
134 }
135 }
136
SetBufferMemoryObjectControl(CmSurfaceStateBufferMgr * stateMgr,uint16_t mocs)137 void CmExecutionAdv::SetBufferMemoryObjectControl(CmSurfaceStateBufferMgr *stateMgr, uint16_t mocs)
138 {
139 if (stateMgr)
140 {
141 stateMgr->SetMemoryObjectControl(mocs);
142 }
143 }
144
Set2Dor3DOrigFormat(CmSurfaceState2Dor3DMgr * stateMgr,MOS_FORMAT format)145 void CmExecutionAdv::Set2Dor3DOrigFormat(CmSurfaceState2Dor3DMgr *stateMgr, MOS_FORMAT format)
146 {
147 if (stateMgr)
148 {
149 stateMgr->SetOrigFormat(format);
150 }
151 }
152
Set2Dor3DOrigDimension(CmSurfaceState2Dor3DMgr * stateMgr,uint32_t width,uint32_t height,uint32_t depth)153 void CmExecutionAdv::Set2Dor3DOrigDimension(CmSurfaceState2Dor3DMgr *stateMgr, uint32_t width, uint32_t height, uint32_t depth)
154 {
155 if (stateMgr)
156 {
157 stateMgr->SetOrigDimension(width, height, depth);
158 }
159 }
160
Set2DRenderTarget(CmSurfaceState2Dor3DMgr * stateMgr,bool renderTarget)161 void CmExecutionAdv::Set2DRenderTarget(CmSurfaceState2Dor3DMgr *stateMgr, bool renderTarget)
162 {
163 if (stateMgr)
164 {
165 stateMgr->SetRenderTarget(renderTarget);
166 }
167 }
168
Set2Dor3DMemoryObjectControl(CmSurfaceState2Dor3DMgr * stateMgr,uint16_t mocs)169 void CmExecutionAdv::Set2Dor3DMemoryObjectControl(CmSurfaceState2Dor3DMgr *stateMgr, uint16_t mocs)
170 {
171 if (stateMgr)
172 {
173 stateMgr->SetMemoryObjectControl(mocs);
174 }
175 }
176
Set2DFrameType(CmSurfaceState2Dor3DMgr * stateMgr,CM_FRAME_TYPE frameType)177 void CmExecutionAdv::Set2DFrameType(CmSurfaceState2Dor3DMgr *stateMgr, CM_FRAME_TYPE frameType)
178 {
179 if (stateMgr)
180 {
181 stateMgr->SetFrameType(frameType);
182 }
183 }
184
SetRotationFlag(CmSurfaceState2Dor3DMgr * stateMgr,uint32_t rotation)185 void CmExecutionAdv::SetRotationFlag(CmSurfaceState2Dor3DMgr *stateMgr, uint32_t rotation)
186 {
187 if (stateMgr)
188 {
189 stateMgr->SetRotationFlag(rotation);
190 }
191 }
192
SetChromaSitting(CmSurfaceState2Dor3DMgr * stateMgr,uint8_t chromaSitting)193 void CmExecutionAdv::SetChromaSitting(CmSurfaceState2Dor3DMgr *stateMgr, uint8_t chromaSitting)
194 {
195 if (stateMgr)
196 {
197 stateMgr->SetChromaSitting(chromaSitting);
198 }
199 }
200
GetFastTrackerProducer()201 FrameTrackerProducer *CmExecutionAdv::GetFastTrackerProducer()
202 {
203 return m_tracker->GetTrackerProducer();
204 }
205
CreateKernelRT(CmDeviceRT * device,CmProgramRT * program,uint32_t kernelIndex,uint32_t kernelSeqNum)206 CmKernelRT *CmExecutionAdv::CreateKernelRT(CmDeviceRT *device,
207 CmProgramRT *program,
208 uint32_t kernelIndex,
209 uint32_t kernelSeqNum)
210 {
211 return new (std::nothrow) CmKernelEx(device, program, kernelIndex, kernelSeqNum);
212 }
213
RefreshSurfaces(CmDeviceRT * device)214 int CmExecutionAdv::RefreshSurfaces(CmDeviceRT *device)
215 {
216 CM_CHK_NULL_RETURN_CMERROR(device);
217
218 CmSurfaceManager *surfaceMgr = nullptr;
219 CSync * surfaceLock = nullptr;
220
221 device->GetSurfaceManager(surfaceMgr);
222 CM_CHK_NULL_RETURN_CMERROR(surfaceMgr);
223
224 surfaceLock = device->GetSurfaceCreationLock();
225 CM_CHK_NULL_RETURN_CMERROR(surfaceLock);
226
227 uint32_t freeSurfNum = 0;
228 surfaceLock->Acquire();
229 surfaceMgr->RefreshDelayDestroySurfaces(freeSurfNum);
230 surfaceLock->Release();
231
232 return CM_SUCCESS;
233 }
234
SubmitTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadSpace * threadSpace,MOS_GPU_CONTEXT gpuContext)235 int CmExecutionAdv::SubmitTask(CMRT_UMD::CmQueueRT *queue,
236 CMRT_UMD::CmTask *task,
237 CMRT_UMD::CmEvent *&event,
238 const CMRT_UMD::CmThreadSpace *threadSpace,
239 MOS_GPU_CONTEXT gpuContext)
240 {
241 CM_NORMALMESSAGE("================ in fast path, media walker===================");
242
243 CM_HAL_STATE * state = m_cmhal;
244 CM_CHK_NULL_RETURN_CMERROR(state->advExecutor);
245 CmTracker *cmTracker = state->advExecutor->GetTracker();
246 CmISH *cmish = state->advExecutor->GetISH();
247 CmDSH *cmdsh = state->advExecutor->GetDSH();
248 CM_CHK_NULL_RETURN_CMERROR(cmTracker);
249 CM_CHK_NULL_RETURN_CMERROR(cmish);
250 CM_CHK_NULL_RETURN_CMERROR(cmdsh);
251
252 CLock Locker(m_criticalSection);
253
254 bool isDummyEventCreated = false;
255 #if MDF_SURFACE_CONTENT_DUMP
256 if (state->dumpSurfaceContent && event == CM_NO_EVENT)
257 {
258 // if surface content dump is needed, the enqueueFast should be a blocking operation
259 // we need a dummy event here
260 isDummyEventCreated = true;
261 event = nullptr;
262 }
263 #endif
264
265 state->osInterface->pfnResetOsStates(state->osInterface);
266 state->osInterface->pfnSetIndirectStateSize(state->osInterface, state->renderHal->dwIndirectHeapSize);
267
268 CM_HAL_OSSYNC_PARAM syncParam;
269 syncParam.osSyncEvent = nullptr;
270
271 // Call HAL layer to wait for Task finished with event-driven mechanism
272 CM_CHK_MOSSTATUS_RETURN(m_cmhal->pfnRegisterUMDNotifyEventHandle(m_cmhal, &syncParam));
273
274 HANDLE osSyncEvent = syncParam.osSyncEvent;
275
276 CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
277 uint32_t kernelCount = kernelArrayRT->GetKernelCount();
278 if (kernelCount == 0 || kernelCount > CM_MAX_KERNELS_PER_TASK)
279 {
280 return CM_FAILURE;
281 }
282
283 // get an array of CmKernelEx
284 CmKernelEx *kernels[CM_MAX_KERNELS_PER_TASK];
285 MOS_ZeroMemory(kernels, sizeof(kernels));
286 for (uint32_t i = 0; i < kernelCount; i++)
287 {
288 kernels[i] = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
289 CM_CHK_NULL_RETURN_CMERROR(kernels[i]);
290 kernels[i]->AllocateCurbe();
291 }
292
293 // get CmDeviceRT
294 CmDeviceRT *device = nullptr;
295 kernels[0]->GetCmDevice(device);
296 CM_CHK_NULL_RETURN_CMERROR(device);
297
298 // set printf buffer if needed
299 if (device->IsPrintEnable())
300 {
301 SurfaceIndex *printBufferIndex = nullptr;
302 device->CreatePrintBuffer();
303 device->GetPrintBufferIndex(printBufferIndex);
304 CM_ASSERT(printBufferIndex);
305 for (uint32_t i = 0; i < kernelCount; i++)
306 {
307 kernels[i]->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex);
308 }
309 }
310
311 const CmThreadSpaceRT *threadSpaceRTConst = static_cast<const CmThreadSpaceRT *>(threadSpace);
312 CmThreadSpaceRT *threadSpaceRT = const_cast<CmThreadSpaceRT *>(threadSpaceRTConst);
313 CmThreadSpaceRT *threadSpaces[CM_MAX_KERNELS_PER_TASK];
314 MOS_ZeroMemory(threadSpaces, sizeof(threadSpaces));
315 if (threadSpaceRT == nullptr)
316 {
317 for (uint32_t i = 0; i < kernelCount; i++)
318 {
319 threadSpaces[i] = kernels[i]->GetThreadSpaceEx();
320 }
321 }
322
323 // if SWSB is used, update the SWSB arguments in kenrel
324 if (!state->cmHalInterface->IsScoreboardParamNeeded())
325 {
326 for (uint32_t i = 0; i < kernelCount; i++)
327 {
328 kernels[i]->UpdateSWSBArgs(threadSpaceRT);
329 }
330 }
331
332 CmCommandBuffer cmdBufData(state);
333 CmCommandBuffer *cmdBuf = &cmdBufData;
334 CM_CHK_NULL_RETURN_CMERROR(cmdBuf);
335
336 uint32_t tracker;
337 uint32_t taskId;
338 MOS_STATUS mret = cmTracker->AssignFrameTracker(queue->GetFastTrackerIndex(), &taskId, &tracker, event != CM_NO_EVENT);
339 bool taskAssigned = (mret == MOS_STATUS_SUCCESS);
340
341 cmdBuf->Initialize();
342
343 CmSSH *ssh = cmdBuf->GetSSH();
344 CM_CHK_NULL_RETURN_CMERROR(ssh);
345
346 // Add kernels to ISH directly
347 cmish->LoadKernels(kernels, kernelCount);
348
349 // initialize SSH
350 ssh->Initialize(kernels, kernelCount);
351
352 // create new media state
353 CmMediaState *cmMediaState = cmdsh->CreateMediaState();
354 CM_CHK_NULL_RETURN_CMERROR(cmMediaState);
355 cmMediaState->Allocate(kernels, kernelCount, queue->GetFastTrackerIndex(), tracker);
356
357 // generate curbe and load media id
358 for (uint32_t i = 0; i < kernelCount; i++)
359 {
360 ssh->AssignBindingTable();
361 kernels[i]->LoadReservedSamplers(cmMediaState, i);
362 kernels[i]->LoadReservedSurfaces(ssh);
363 kernels[i]->UpdateCurbe(ssh, cmMediaState, i);
364 kernels[i]->UpdateFastTracker(queue->GetFastTrackerIndex(), tracker);
365 cmMediaState->LoadCurbe(kernels[i], i);
366 cmMediaState->LoadMediaID(kernels[i], i, ssh->GetBindingTableOffset());
367 }
368
369 // prepare cp resources
370 ssh->PrepareResourcesForCp();
371
372 // get the position to write tracker
373 MOS_RESOURCE *trackerResource = nullptr;
374 uint32_t trackerOffset = 0;
375 cmTracker->GetLatestTrackerResource(queue->GetFastTrackerIndex(), &trackerResource, &trackerOffset);
376
377 // call gtpin callback if needed
378 CmNotifierGroup *ng = nullptr;
379 if (gGTPinInitialized && taskAssigned)
380 {
381 ng = device->GetNotifiers();
382 ng->NotifyTaskFlushed(device, task, ssh, taskId);
383 }
384
385 if (m_cmhal->platform.eRenderCoreFamily >= IGFX_GEN12_CORE)
386 {
387 cmdBuf->AddMMCProlog();
388 }
389 cmdBuf->AddFlushCacheAndSyncTask(false, false, nullptr);
390 cmdBuf->AddFlushCacheAndSyncTask(true, false, nullptr);
391
392 cmdBuf->AddPowerOption(kernelArrayRT->GetPowerOption());
393
394 cmdBuf->AddProtectedProlog();
395
396 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetStartOffset(taskId));
397 cmdBuf->AddUmdProfilerStart();
398
399 cmdBuf->AddL3CacheConfig(&m_l3Values);
400
401 cmdBuf->AddPreemptionConfig(false);
402
403 cmdBuf->AddPipelineSelect(false);
404
405 cmdBuf->AddStateBaseAddress(cmish, cmMediaState);
406
407 CM_TASK_CONFIG taskConfig;
408 kernelArrayRT->GetProperty(taskConfig);
409 if (threadSpaceRT)
410 {
411 cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE, &threadSpaceRT); // global thread space
412 }
413 else
414 {
415 cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE, threadSpaces, kernelCount);
416 }
417
418 cmdBuf->AddCurbeLoad(cmMediaState);
419
420 cmdBuf->AddMediaIDLoad(cmMediaState);
421
422 CM_HAL_CONDITIONAL_BB_END_INFO *cbbInfos = kernelArrayRT->GetConditionalEndInfo();
423 uint64_t conditionalBitMap = kernelArrayRT->GetConditionalEndBitmap();
424 for (uint32_t i = 0; i < kernelCount; i ++)
425 {
426 CmThreadSpaceRT *ts = (threadSpaceRT != nullptr) ? threadSpaceRT: threadSpaces[i];
427
428 // check whether need to insert a CBB
429 bool needCBB = conditionalBitMap & ((uint64_t)1 << i);
430 if (needCBB)
431 {
432 cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
433
434 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
435
436 cmdBuf->AddConditionalFrameTracker(trackerResource, trackerOffset, tracker, &cbbInfos[i]);
437
438 cmdBuf->AddConditionalBatchBufferEnd(&cbbInfos[i]);
439 }
440
441 if (i > 0)
442 {
443 // check whether the next kernel has a dependency pattern
444 uint32_t dcount = 0;
445 if (ts != nullptr)
446 {
447 CM_HAL_DEPENDENCY *dependency;
448 ts->GetDependency(dependency);
449 dcount = dependency->count;
450 }
451
452 bool syncFlag = false;
453 uint64_t syncBitMap = kernelArrayRT->GetSyncBitmap();
454 syncFlag = syncBitMap & ((uint64_t)1 << (i-1));
455 // add sync if necessary
456 if ((dcount != 0) || syncFlag)
457 {
458 cmdBuf->AddSyncBetweenKernels();
459 }
460 }
461
462 cmdBuf->AddMediaObjectWalker(ts, i);
463 }
464
465 cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
466
467 cmdBuf->AddUmdProfilerEnd();
468 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
469
470 cmdBuf->AddFrameTracker(trackerResource, trackerOffset, tracker);
471
472 cmdBuf->AddDummyVFE();
473
474 cmdBuf->AddBatchBufferEnd();
475
476 cmdBuf->ReturnUnusedBuffer();
477
478 #if MDF_SURFACE_STATE_DUMP
479 if (m_cmhal->dumpSurfaceState)
480 {
481 ssh->DumpSSH();
482 }
483 #endif
484
485 #if MDF_COMMAND_BUFFER_DUMP
486 if (m_cmhal->dumpCommandBuffer)
487 {
488 cmdBuf->Dump();
489 }
490 #endif
491
492 #if MDF_CURBE_DATA_DUMP
493 if (m_cmhal->dumpCurbeData)
494 {
495 cmMediaState->Dump();
496 }
497 #endif
498
499 cmdBuf->Submit();
500
501 cmish->Submit(queue->GetFastTrackerIndex(), tracker);
502
503 cmMediaState->Submit();
504 cmdsh->DestroyMediaState(cmMediaState);
505
506 if (event != CM_NO_EVENT && taskAssigned)
507 {
508 CmEventEx *eventEx = MOS_New(CmEventEx, state, taskId, cmTracker);
509 CM_CHK_NULL_RETURN_CMERROR(eventEx);
510 cmTracker->AssociateEvent(eventEx);
511 eventEx->SetTaskOsData(cmdBuf->GetResource(), osSyncEvent);
512 event = static_cast<CmEventEx *>(eventEx);
513
514 if (gGTPinInitialized)
515 {
516 eventEx->SetNotifier(ng);
517 }
518 }
519 else
520 {
521 event = nullptr;
522 }
523 cmTracker->Refresh();
524
525 // refresh surfaces in surface manager
526 CM_CHK_CMSTATUS_RETURN(RefreshSurfaces(device));
527
528 #if MDF_SURFACE_CONTENT_DUMP
529 if (state->dumpSurfaceContent && event != nullptr)
530 {
531 event->WaitForTaskFinished();
532 if (isDummyEventCreated)
533 {
534 DestoryEvent(queue, event);
535 }
536 for (uint32_t i = 0; i < kernelCount; i++)
537 {
538 kernels[i]->SurfaceDumpEx(i, taskId);
539 }
540 }
541 #endif
542
543 return CM_SUCCESS;
544 }
545
DestoryEvent(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmEvent * & event)546 int CmExecutionAdv::DestoryEvent(CMRT_UMD::CmQueueRT *queue, CMRT_UMD::CmEvent *&event)
547 {
548 CmEventEx *eventEx = static_cast<CmEventEx *>(event);
549 MOS_Delete(eventEx);
550 event = nullptr;
551 return CM_SUCCESS;
552 }
553
SubmitComputeTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadGroupSpace * threadGroupSpace,MOS_GPU_CONTEXT gpuContext)554 int CmExecutionAdv::SubmitComputeTask(CMRT_UMD::CmQueueRT *queue,
555 CMRT_UMD::CmTask *task,
556 CMRT_UMD::CmEvent* &event,
557 const CMRT_UMD::CmThreadGroupSpace* threadGroupSpace,
558 MOS_GPU_CONTEXT gpuContext)
559 {
560 CM_ASSERTMESSAGE("Compute Tasks not support on this platform\n");
561 return CM_FAILURE;
562 }
563
WaitForAllTasksFinished()564 int CmExecutionAdv::WaitForAllTasksFinished()
565 {
566 return m_tracker->WaitForAllTasksFinished();
567 }
568
SetL3Config(const L3ConfigRegisterValues * l3Config)569 void CmExecutionAdv::SetL3Config(const L3ConfigRegisterValues *l3Config)
570 {
571 m_l3Values.config_register0 = l3Config->config_register0;
572 m_l3Values.config_register1 = l3Config->config_register1;
573 m_l3Values.config_register2 = l3Config->config_register2;
574 m_l3Values.config_register3 = l3Config->config_register3;
575 }
576
SetSuggestedL3Config(L3_SUGGEST_CONFIG l3SuggestConfig)577 int CmExecutionAdv::SetSuggestedL3Config(L3_SUGGEST_CONFIG l3SuggestConfig)
578 {
579 const L3ConfigRegisterValues *table = nullptr;
580 uint32_t count = 0;
581
582 switch(m_cmhal->platform.eRenderCoreFamily)
583 {
584 #if IGFX_GEN11_SUPPORTED
585 case IGFX_GEN11_CORE:
586 count = sizeof(ICL_L3_PLANE)/sizeof(L3ConfigRegisterValues);
587 table = (L3ConfigRegisterValues *)ICL_L3_PLANE;
588 break;
589 #endif
590 case IGFX_GEN12_CORE:
591 table = m_cmhal->cmHalInterface->m_l3Plane;
592 count = m_cmhal->cmHalInterface->m_l3ConfigCount;
593 break;
594 #if IGFX_GEN9_SUPPORTED
595 default: // gen9
596 count = sizeof(SKL_L3_PLANE) / sizeof(L3ConfigRegisterValues);
597 table = (L3ConfigRegisterValues *)SKL_L3_PLANE;
598 break;
599 #else
600 default:
601 table = m_cmhal->cmHalInterface->m_l3Plane;
602 count = m_cmhal->cmHalInterface->m_l3ConfigCount;
603 break;
604 #endif
605 }
606 if (static_cast<size_t>(l3SuggestConfig) >= count)
607 {
608 return CM_INVALID_ARG_VALUE;
609 }
610 m_l3Values.config_register0 = table[l3SuggestConfig].config_register0;
611 m_l3Values.config_register1 = table[l3SuggestConfig].config_register1;
612 m_l3Values.config_register2 = table[l3SuggestConfig].config_register2;
613 m_l3Values.config_register3 = table[l3SuggestConfig].config_register3;
614
615 return CM_SUCCESS;
616 }
617
AssignNewTracker()618 int CmExecutionAdv::AssignNewTracker()
619 {
620 FrameTrackerProducer *trackerProducer = m_tracker->GetTrackerProducer();
621 return trackerProducer->AssignNewTracker();
622 }
623
SubmitGpgpuTask(CMRT_UMD::CmQueueRT * queue,CMRT_UMD::CmTask * task,CMRT_UMD::CmEvent * & event,const CMRT_UMD::CmThreadGroupSpace * threadGroupSpace,MOS_GPU_CONTEXT gpuContext)624 int CmExecutionAdv::SubmitGpgpuTask(CMRT_UMD::CmQueueRT *queue,
625 CMRT_UMD::CmTask *task,
626 CMRT_UMD::CmEvent* &event,
627 const CMRT_UMD::CmThreadGroupSpace* threadGroupSpace,
628 MOS_GPU_CONTEXT gpuContext)
629 {
630 CM_NORMALMESSAGE("================ in fast path, gpgpu walker===================");
631
632 CM_HAL_STATE * state = m_cmhal;
633 CM_CHK_NULL_RETURN_CMERROR(state->advExecutor);
634 CmTracker *cmTracker = state->advExecutor->GetTracker();
635 CmISH *cmish = state->advExecutor->GetISH();
636 CmDSH *cmdsh = state->advExecutor->GetDSH();
637 CM_CHK_NULL_RETURN_CMERROR(cmTracker);
638 CM_CHK_NULL_RETURN_CMERROR(cmish);
639 CM_CHK_NULL_RETURN_CMERROR(cmdsh);
640
641 CLock Locker(m_criticalSection);
642
643 bool isDummyEventCreated = false;
644 #if MDF_SURFACE_CONTENT_DUMP
645 if (state->dumpSurfaceContent && event == CM_NO_EVENT)
646 {
647 // if surface content dump is needed, the enqueueFast should be a blocking operation
648 // we need a dummy event here
649 isDummyEventCreated = true;
650 event = nullptr;
651 }
652 #endif
653
654 state->osInterface->pfnSetGpuContext(state->osInterface, gpuContext);
655 state->osInterface->pfnResetOsStates(state->osInterface);
656 state->osInterface->pfnSetIndirectStateSize(state->osInterface, state->renderHal->dwIndirectHeapSize);
657
658 CM_HAL_OSSYNC_PARAM syncParam;
659 syncParam.osSyncEvent = nullptr;
660
661 // Call HAL layer to wait for Task finished with event-driven mechanism
662 CM_CHK_MOSSTATUS_RETURN(m_cmhal->pfnRegisterUMDNotifyEventHandle(m_cmhal, &syncParam));
663
664 HANDLE osSyncEvent = syncParam.osSyncEvent;
665
666 CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
667 uint32_t kernelCount = kernelArrayRT->GetKernelCount();
668 if (kernelCount == 0 || kernelCount > CM_MAX_KERNELS_PER_TASK)
669 {
670 return CM_FAILURE;
671 }
672
673 // get an array of CmKernelEx
674 CmKernelEx *kernels[CM_MAX_KERNELS_PER_TASK];
675 MOS_ZeroMemory(kernels, sizeof(kernels));
676 for (uint32_t i = 0; i < kernelCount; i++)
677 {
678 kernels[i] = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
679 CM_CHK_NULL_RETURN_CMERROR(kernels[i]);
680 kernels[i]->AllocateCurbeAndFillImplicitArgs(const_cast<CmThreadGroupSpace *>(threadGroupSpace));
681 }
682
683 // get CmDeviceRT
684 CmDeviceRT *device = nullptr;
685 kernels[0]->GetCmDevice(device);
686 CM_CHK_NULL_RETURN_CMERROR(device);
687
688 // set printf buffer if needed
689 if (device->IsPrintEnable())
690 {
691 SurfaceIndex *printBufferIndex = nullptr;
692 device->CreatePrintBuffer();
693 device->GetPrintBufferIndex(printBufferIndex);
694 CM_ASSERT(printBufferIndex);
695 for (uint32_t i = 0; i < kernelCount; i++)
696 {
697 kernels[i]->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex);
698 }
699 }
700
701 CmThreadGroupSpace *threadGroupSpaces[CM_MAX_KERNELS_PER_TASK];
702 MOS_ZeroMemory(threadGroupSpaces, sizeof(threadGroupSpaces));
703 if (threadGroupSpace == nullptr)
704 {
705 for (uint32_t i = 0; i < kernelCount; i++)
706 {
707 threadGroupSpaces[i] = kernels[i]->GetThreadGroupSpaceEx();
708 }
709 }
710
711 CmCommandBuffer cmdBufData(state);
712 CmCommandBuffer *cmdBuf = &cmdBufData;
713 CM_CHK_NULL_RETURN_CMERROR(cmdBuf);
714
715 uint32_t tracker;
716 uint32_t taskId;
717 MOS_STATUS mret = cmTracker->AssignFrameTracker(queue->GetFastTrackerIndex(), &taskId, &tracker, event != CM_NO_EVENT);
718 bool taskAssigned = (mret == MOS_STATUS_SUCCESS);
719
720 cmdBuf->Initialize();
721
722 CmSSH *ssh = cmdBuf->GetSSH();
723 CM_CHK_NULL_RETURN_CMERROR(ssh);
724
725 // Add kernels to ISH directly
726 cmish->LoadKernels(kernels, kernelCount);
727
728 // initialize SSH
729 ssh->Initialize(kernels, kernelCount);
730
731 // create new media state
732 CmMediaState *cmMediaState = cmdsh->CreateMediaState();
733 CM_CHK_NULL_RETURN_CMERROR(cmMediaState);
734 cmMediaState->Allocate(kernels, kernelCount, 0, tracker);
735
736 // generate curbe and load media id
737 for (uint32_t i = 0; i < kernelCount; i++)
738 {
739 ssh->AssignBindingTable();
740 kernels[i]->LoadReservedSamplers(cmMediaState, i);
741 kernels[i]->LoadReservedSurfaces(ssh);
742 kernels[i]->UpdateCurbe(ssh, cmMediaState, i);
743 kernels[i]->UpdateFastTracker(queue->GetFastTrackerIndex(), tracker);
744 cmMediaState->LoadCurbe(kernels[i], i);
745 CmThreadGroupSpace *tgs = (threadGroupSpace != nullptr) ?
746 const_cast<CmThreadGroupSpace *>(threadGroupSpace)
747 : threadGroupSpaces[i];
748 cmMediaState->LoadMediaID(kernels[i], i, ssh->GetBindingTableOffset(), tgs);
749 }
750
751 // prepare cp resources
752 ssh->PrepareResourcesForCp();
753
754 // get the position to write tracker
755 MOS_RESOURCE *trackerResource = nullptr;
756 uint32_t trackerOffset = 0;
757 cmTracker->GetLatestTrackerResource(queue->GetFastTrackerIndex(), &trackerResource, &trackerOffset);
758
759 if (m_cmhal->platform.eRenderCoreFamily >= IGFX_GEN12_CORE)
760 {
761 cmdBuf->AddMMCProlog();
762 }
763 cmdBuf->AddFlushCacheAndSyncTask(false, false, nullptr);
764 cmdBuf->AddFlushCacheAndSyncTask(true, false, nullptr);
765
766 cmdBuf->AddPowerOption(kernelArrayRT->GetPowerOption());
767
768 cmdBuf->AddProtectedProlog();
769
770 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetStartOffset(taskId));
771 cmdBuf->AddUmdProfilerStart();
772
773 cmdBuf->AddL3CacheConfig(&m_l3Values);
774
775 cmdBuf->AddPreemptionConfig(true);
776
777 cmdBuf->AddPipelineSelect(true);
778
779 cmdBuf->AddStateBaseAddress(cmish, cmMediaState);
780
781 cmdBuf->AddSipState(cmish->GetSipKernelOffset());
782
783 MOS_STATUS eStatus = m_cmhal->osInterface->pfnRegisterResource(
784 m_cmhal->osInterface,
785 &m_cmhal->csrResource,
786 true,
787 true);
788
789 if (eStatus != MOS_STATUS_SUCCESS)
790 {
791 cmdsh->DestroyMediaState(cmMediaState);
792 return eStatus;
793 }
794
795 cmdBuf->AddCsrBaseAddress(&m_cmhal->csrResource);
796
797 CM_TASK_CONFIG taskConfig;
798 kernelArrayRT->GetProperty(taskConfig);
799 cmdBuf->AddMediaVFE(cmMediaState, taskConfig.fusedEuDispatchFlag == CM_FUSED_EU_ENABLE);
800
801 cmdBuf->AddCurbeLoad(cmMediaState);
802
803 cmdBuf->AddMediaIDLoad(cmMediaState);
804
805 const CM_EXECUTION_CONFIG *exeConfig = kernelArrayRT->GetKernelExecuteConfig();
806 CM_HAL_CONDITIONAL_BB_END_INFO *cbbInfos = kernelArrayRT->GetConditionalEndInfo();
807 uint64_t conditionalBitMap = kernelArrayRT->GetConditionalEndBitmap();
808 for (uint32_t i = 0; i < kernelCount; i ++)
809 {
810 CmThreadGroupSpace *tgs = (threadGroupSpace != nullptr) ? const_cast<CmThreadGroupSpace *>(threadGroupSpace) : threadGroupSpaces[i];
811
812 // check whether need to insert a CBB
813 bool needCBB = conditionalBitMap & ((uint64_t)1 << i);
814 if (needCBB)
815 {
816 cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
817
818 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
819
820 cmdBuf->AddConditionalFrameTracker(trackerResource, trackerOffset, tracker, &cbbInfos[i]);
821
822 cmdBuf->AddConditionalBatchBufferEnd(&cbbInfos[i]);
823 }
824
825 if (i > 0)
826 {
827 bool syncFlag = false;
828 uint64_t syncBitMap = kernelArrayRT->GetSyncBitmap();
829 syncFlag = syncBitMap & ((uint64_t)1 << (i-1));
830 // add sync if necessary
831 if (syncFlag)
832 {
833 cmdBuf->AddSyncBetweenKernels();
834 }
835 }
836
837 cmdBuf->AddGpgpuWalker(tgs, kernels[i], i);
838 }
839
840 cmdBuf->AddFlushCacheAndSyncTask(false, true, nullptr);
841
842 cmdBuf->AddUmdProfilerEnd();
843 cmdBuf->AddReadTimeStamp(cmTracker->GetResource(), cmTracker->GetEndOffset(taskId), true);
844
845 cmdBuf->AddFrameTracker(trackerResource, trackerOffset, tracker);
846
847 cmdBuf->AddDummyVFE();
848
849 cmdBuf->AddBatchBufferEnd();
850
851 cmdBuf->ReturnUnusedBuffer();
852
853 cmdBuf->Submit();
854
855 cmish->Submit(queue->GetFastTrackerIndex(), tracker);
856
857 #if MDF_SURFACE_STATE_DUMP
858 if (m_cmhal->dumpSurfaceState)
859 {
860 ssh->DumpSSH();
861 }
862 #endif
863
864 #if MDF_COMMAND_BUFFER_DUMP
865 if (m_cmhal->dumpCommandBuffer)
866 {
867 cmdBuf->Dump();
868 }
869 #endif
870
871 #if MDF_CURBE_DATA_DUMP
872 if (m_cmhal->dumpCurbeData)
873 {
874 cmMediaState->Dump();
875 }
876 #endif
877
878 cmMediaState->Submit();
879 cmdsh->DestroyMediaState(cmMediaState);
880
881 if (event != CM_NO_EVENT && taskAssigned)
882 {
883 CmEventEx *eventEx = MOS_New(CmEventEx, state, taskId, cmTracker);
884 eventEx->SetTaskOsData(cmdBuf->GetResource(), osSyncEvent);
885 event = static_cast<CmEventEx *>(eventEx);
886 }
887 else
888 {
889 event = nullptr;
890 }
891 cmTracker->Refresh();
892
893 // refresh surfaces in surface manager
894 CM_CHK_CMSTATUS_RETURN(RefreshSurfaces(device));
895
896 #if MDF_SURFACE_CONTENT_DUMP
897 if (state->dumpSurfaceContent && event != nullptr)
898 {
899 event->WaitForTaskFinished();
900 if (isDummyEventCreated)
901 {
902 DestoryEvent(queue, event);
903 }
904 for (uint32_t i = 0; i < kernelCount; i++)
905 {
906 kernels[i]->SurfaceDumpEx(i, taskId);
907 }
908 }
909 #endif
910
911 return CM_SUCCESS;
912 }
913
SwitchToFastPath(CmTask * task)914 bool CmExecutionAdv::SwitchToFastPath(CmTask *task)
915 {
916 CmTaskRT *kernelArrayRT = static_cast<CmTaskRT *>(task);
917 uint32_t kernelCount = kernelArrayRT->GetKernelCount();
918 for (uint32_t i = 0; i < kernelCount; i++)
919 {
920 CmKernelEx *kernel = static_cast<CmKernelEx *>(kernelArrayRT->GetKernelPointer(i));
921 if (kernel == nullptr)
922 {
923 return false;
924 }
925 if (kernel->IsFastPathSupported() == false)
926 {
927 return false;
928 }
929 }
930 return true;
931 }
932
933