1 /*
2 * Copyright (c) 2019-2024, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file mos_gpucontext_specific_next.cpp
24 //! \brief Container class for the Linux specific gpu context
25 //!
26
27 #include <unistd.h>
28 #include "mos_gpucontext_specific_next.h"
29 #include "mos_context_specific_next.h"
30 #include "mos_graphicsresource_specific_next.h"
31 #include "mos_commandbuffer_specific_next.h"
32 #include "mos_util_devult_specific_next.h"
33 #include "mos_cmdbufmgr_next.h"
34 #include "mos_os_virtualengine_next.h"
35 #include "mos_interface.h"
36 #include "mos_os_cp_interface_specific.h"
37 #ifdef ENABLE_XE_KMD
38 #include "mos_gpucontext_specific_next_xe.h"
39 #endif
40
41 #define MI_BATCHBUFFER_END 0x05000000
42 static pthread_mutex_t command_dump_mutex = PTHREAD_MUTEX_INITIALIZER;
43
StoreCreateOptions(PMOS_GPUCTX_CREATOPTIONS createoption)44 void GpuContextSpecificNext::StoreCreateOptions(PMOS_GPUCTX_CREATOPTIONS createoption)
45 {
46 if (typeid(*createoption) == typeid(MOS_GPUCTX_CREATOPTIONS_ENHANCED))
47 {
48 m_bEnhancedUsed = true;
49 MosUtilities::MosSecureMemcpy(&m_createOptionEnhanced, sizeof(MOS_GPUCTX_CREATOPTIONS_ENHANCED), createoption, sizeof(MOS_GPUCTX_CREATOPTIONS_ENHANCED));
50 }
51 else
52 {
53 MosUtilities::MosSecureMemcpy(&m_createOption, sizeof(MOS_GPUCTX_CREATOPTIONS), createoption, sizeof(MOS_GPUCTX_CREATOPTIONS));
54 }
55 }
56
GpuContextSpecificNext(const MOS_GPU_NODE gpuNode,CmdBufMgrNext * cmdBufMgr,GpuContextNext * reusedContext)57 GpuContextSpecificNext::GpuContextSpecificNext(
58 const MOS_GPU_NODE gpuNode,
59 CmdBufMgrNext *cmdBufMgr,
60 GpuContextNext *reusedContext)
61 {
62 MOS_OS_FUNCTION_ENTER;
63
64 m_nodeOrdinal = gpuNode;
65 m_cmdBufMgr = cmdBufMgr;
66 m_statusBufferResource = nullptr;
67 m_maxPatchLocationsize = PATCHLOCATIONLIST_SIZE;
68
69 if (reusedContext)
70 {
71 MOS_OS_NORMALMESSAGE("gpucontex reusing not enabled on Linux.");
72 }
73
74 #if (_DEBUG || _RELEASE_INTERNAL)
75 // get user engine instance setting from environment variable
76 char *engineInstances = getenv("INTEL_ENGINE_INSTANCE");
77 if (engineInstances != nullptr)
78 {
79 errno = 0;
80 long int instance = strtol(engineInstances, nullptr, 16);
81 /* Check for various possible errors. */
82 if ((errno == ERANGE && instance == LONG_MAX) || (instance < 0))
83 {
84 MOS_OS_NORMALMESSAGE("Invalid INTEL_ENGINE_INSTANCE setting.(%s)\n", engineInstances);
85 m_engineInstanceSelect = 0x0;
86 }
87 else
88 {
89 m_engineInstanceSelect = (uint32_t)instance;
90 }
91 }
92 #endif
93 }
94
~GpuContextSpecificNext()95 GpuContextSpecificNext::~GpuContextSpecificNext()
96 {
97 MOS_OS_FUNCTION_ENTER;
98
99 Clear();
100 }
101
Create(const MOS_GPU_NODE gpuNode,CmdBufMgrNext * cmdBufMgr,GpuContextNext * reusedContext)102 GpuContextNext *GpuContextSpecificNext::Create(
103 const MOS_GPU_NODE gpuNode,
104 CmdBufMgrNext *cmdBufMgr,
105 GpuContextNext *reusedContext)
106 {
107 MOS_OS_FUNCTION_ENTER;
108 if (nullptr == cmdBufMgr)
109 {
110 return nullptr;
111 }
112 OsContextSpecificNext *osDeviceContext = dynamic_cast<OsContextSpecificNext*>(cmdBufMgr->m_osContext);
113 if (nullptr == osDeviceContext)
114 {
115 return nullptr;
116 }
117 int type = osDeviceContext->GetDeviceType();
118 if (DEVICE_TYPE_I915 == type)
119 {
120 return MOS_New(GpuContextSpecificNext, gpuNode, cmdBufMgr, reusedContext);
121 }
122 #ifdef ENABLE_XE_KMD
123 else if (DEVICE_TYPE_XE == type)
124 {
125 return MOS_New(GpuContextSpecificNextXe, gpuNode, cmdBufMgr, reusedContext);
126 }
127 #endif
128 return nullptr;
129 }
130
RecreateContext(bool bIsProtected,MOS_STREAM_HANDLE streamState)131 MOS_STATUS GpuContextSpecificNext::RecreateContext(bool bIsProtected, MOS_STREAM_HANDLE streamState)
132 {
133 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
134 // clear existing context
135 Clear();
136 m_bProtectedContext = bIsProtected;
137 PMOS_GPUCTX_CREATOPTIONS createOption;
138 if (m_bEnhancedUsed)
139 {
140 createOption = &m_createOptionEnhanced;
141 }
142 else
143 {
144 createOption = &m_createOption;
145 }
146 eStatus = Init(m_osContext, streamState, createOption);
147 return eStatus;
148 }
149
PatchGPUContextProtection(MOS_STREAM_HANDLE streamState)150 MOS_STATUS GpuContextSpecificNext::PatchGPUContextProtection(MOS_STREAM_HANDLE streamState)
151 {
152 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
153 MOS_OS_CHK_NULL_RETURN(streamState);
154 auto osParameters = (PMOS_CONTEXT)streamState->perStreamParameters;
155 MOS_OS_CHK_NULL_RETURN(osParameters);
156
157 // clean up clear gem context and create protected gem context if CP is enabled
158 if (streamState->osCpInterface &&
159 streamState->osCpInterface->IsCpEnabled()) // Check if CP is enabled as protected GEM context is only needed when CP is enabled
160 {
161 if (streamState->ctxBasedScheduling)
162 {
163 if (m_bProtectedContext == false) // Check if GEM context is already protected or not
164 {
165 // Context is not protected, recreate it as protected
166 eStatus = RecreateContext(true, streamState);
167 if (eStatus == MOS_STATUS_SUCCESS)
168 {
169 //Register Protected Context
170 streamState->osCpInterface->RegisterAndCheckProtectedGemCtx(true, (void*)this, nullptr);
171 }
172 }
173 //If m_bProtectedContext == true then check if is stale context or not.
174 //If it is stale protected context then recreate another one
175 else
176 {
177 bool bIsContextStale = false;
178 //Check protected context
179 streamState->osCpInterface->RegisterAndCheckProtectedGemCtx(false, (void*)this, &bIsContextStale);
180
181 //Recreate protected context
182 if (bIsContextStale)
183 {
184 eStatus = RecreateContext(true, streamState);
185 if (eStatus == MOS_STATUS_SUCCESS)
186 {
187 //Register Protected Context
188 streamState->osCpInterface->RegisterAndCheckProtectedGemCtx(true, (void*)this, nullptr);
189 }
190 }
191 }
192 }
193 else
194 {
195 if (osParameters->m_protectedGEMContext == false)
196 {
197 // for non context based scheduling protected context is always created as protected during Initialization if needed
198 // If it is not created during Initialization then do nothing and add a comment for Debug purposes
199 MOS_OS_CRITICALMESSAGE("Using Clear GEM context when protected Context is needed");
200 eStatus = MOS_STATUS_SUCCESS;
201 }
202 }
203 }
204
205 // clean up protected gem context and recreate clear gem context if CP is disabled
206 if (streamState->osCpInterface &&
207 !streamState->osCpInterface->IsCpEnabled() &&
208 streamState->ctxBasedScheduling &&
209 m_bProtectedContext == true) // Check if GEM context is protected or not
210 {
211 // Context is protected, recreate it as clear
212 eStatus = RecreateContext(false, streamState);
213 }
214
215 return eStatus;
216 }
217
Init3DCtx(PMOS_CONTEXT osParameters,PMOS_GPUCTX_CREATOPTIONS createOption,unsigned int * nengine,void * engine_map)218 MOS_STATUS GpuContextSpecificNext::Init3DCtx(PMOS_CONTEXT osParameters,
219 PMOS_GPUCTX_CREATOPTIONS createOption,
220 unsigned int *nengine,
221 void *engine_map)
222 {
223 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
224
225 m_i915Context[0] = mos_context_create_shared(osParameters->bufmgr,
226 osParameters->intel_context,
227 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
228 m_bProtectedContext,
229 engine_map,
230 1,
231 *nengine,
232 0);
233 if (m_i915Context[0] == nullptr)
234 {
235 MOS_OS_ASSERTMESSAGE("Failed to create context.\n");
236 return MOS_STATUS_UNKNOWN;
237 }
238 m_i915Context[0]->pOsContext = osParameters;
239
240 __u16 engine_class = I915_ENGINE_CLASS_RENDER;
241 __u64 caps = 0;
242
243 if (mos_query_engines(osParameters->bufmgr, engine_class, caps, nengine, engine_map))
244 {
245 MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
246 return MOS_STATUS_UNKNOWN;
247 }
248
249 if (mos_set_context_param_load_balance(m_i915Context[0], (struct i915_engine_class_instance *)engine_map, *nengine))
250 {
251 MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
252 return MOS_STATUS_UNKNOWN;
253 }
254
255 if (createOption->SSEUValue != 0)
256 {
257 struct drm_i915_gem_context_param_sseu sseu;
258 MosUtilities::MosZeroMemory(&sseu, sizeof(sseu));
259 sseu.flags = I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX;
260 sseu.engine.engine_instance = m_i915ExecFlag;
261
262 if (mos_get_context_param_sseu(m_i915Context[0], &sseu))
263 {
264 MOS_OS_ASSERTMESSAGE("Failed to get sseu configuration.");
265 return MOS_STATUS_UNKNOWN;
266 }
267
268 if (mos_hweight8(m_i915Context[0], sseu.subslice_mask) > createOption->packed.SubSliceCount)
269 {
270 sseu.subslice_mask = mos_switch_off_n_bits(m_i915Context[0], sseu.subslice_mask,
271 mos_hweight8(m_i915Context[0], sseu.subslice_mask)-createOption->packed.SubSliceCount);
272 }
273
274 if (mos_set_context_param_sseu(m_i915Context[0], sseu))
275 {
276 MOS_OS_ASSERTMESSAGE("Failed to set sseu configuration.");
277 return MOS_STATUS_UNKNOWN;
278 }
279 }
280
281 return eStatus;
282 }
283
InitComputeCtx(PMOS_CONTEXT osParameters,unsigned int * nengine,void * engine_map,MOS_GPU_NODE gpuNode,bool * isEngineSelectEnable)284 MOS_STATUS GpuContextSpecificNext::InitComputeCtx(PMOS_CONTEXT osParameters,
285 unsigned int *nengine,
286 void *engine_map,
287 MOS_GPU_NODE gpuNode,
288 bool *isEngineSelectEnable)
289 {
290 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
291
292 m_i915Context[0] = mos_context_create_shared(osParameters->bufmgr,
293 osParameters->intel_context,
294 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
295 m_bProtectedContext,
296 engine_map,
297 1,
298 *nengine,
299 0);
300 if (m_i915Context[0] == nullptr)
301 {
302 MOS_OS_ASSERTMESSAGE("Failed to create context.\n");
303 return MOS_STATUS_UNKNOWN;
304 }
305 m_i915Context[0]->pOsContext = osParameters;
306
307 __u16 engine_class = 4; //To change later when linux define the name
308 __u64 caps = 0;
309
310 if (mos_query_engines(osParameters->bufmgr, engine_class, caps, nengine, engine_map))
311 {
312 MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
313 return MOS_STATUS_UNKNOWN;
314 }
315
316 #if (_DEBUG || _RELEASE_INTERNAL)
317 *isEngineSelectEnable = SelectEngineInstanceByUser(engine_map, nengine, m_engineInstanceSelect, gpuNode);
318 #endif
319 if (mos_set_context_param_load_balance(m_i915Context[0], (struct i915_engine_class_instance *)engine_map, *nengine))
320 {
321 MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
322 return MOS_STATUS_UNKNOWN;
323 }
324
325 return eStatus;
326 }
327
InitVdVeCtx(PMOS_CONTEXT osParameters,MOS_STREAM_HANDLE streamState,PMOS_GPUCTX_CREATOPTIONS createOption,unsigned int * nengine,void * engine_map,MOS_GPU_NODE gpuNode,bool * isEngineSelectEnable)328 MOS_STATUS GpuContextSpecificNext::InitVdVeCtx(PMOS_CONTEXT osParameters,
329 MOS_STREAM_HANDLE streamState,
330 PMOS_GPUCTX_CREATOPTIONS createOption,
331 unsigned int *nengine,
332 void *engine_map,
333 MOS_GPU_NODE gpuNode,
334 bool *isEngineSelectEnable)
335 {
336 MOS_OS_FUNCTION_ENTER;
337
338 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
339
340 m_i915Context[0] = mos_context_create_shared(osParameters->bufmgr,
341 osParameters->intel_context,
342 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
343 m_bProtectedContext,
344 engine_map,
345 1,
346 *nengine,
347 0);
348 if (m_i915Context[0] == nullptr)
349 {
350 MOS_OS_ASSERTMESSAGE("Failed to create context.\n");
351 return MOS_STATUS_UNKNOWN;
352 }
353 struct i915_engine_class_instance *_engine_map = (struct i915_engine_class_instance *)engine_map;
354 m_i915Context[0]->pOsContext = osParameters;
355
356 __u16 engine_class = (gpuNode == MOS_GPU_NODE_VE)? I915_ENGINE_CLASS_VIDEO_ENHANCE : I915_ENGINE_CLASS_VIDEO;
357 __u64 caps = 0;
358
359 SetEngineQueryFlags(createOption, caps);
360
361 if (mos_query_engines(osParameters->bufmgr, engine_class, caps, nengine, (void *)_engine_map))
362 {
363 MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
364 return MOS_STATUS_UNKNOWN;
365 }
366
367 #if (_DEBUG || _RELEASE_INTERNAL)
368 *isEngineSelectEnable = SelectEngineInstanceByUser((void *)_engine_map, nengine, m_engineInstanceSelect, gpuNode);
369 #endif
370 if (mos_set_context_param_load_balance(m_i915Context[0], _engine_map, *nengine))
371 {
372 MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
373 return MOS_STATUS_UNKNOWN;
374 }
375
376 if (*nengine >= 2 && *nengine <= MAX_ENGINE_INSTANCE_NUM)
377 {
378 int i;
379 //master queue
380 m_i915Context[1] = mos_context_create_shared(osParameters->bufmgr,
381 osParameters->intel_context,
382 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
383 m_bProtectedContext,
384 (void *)_engine_map,
385 1,
386 1,
387 0);
388 if (m_i915Context[1] == nullptr)
389 {
390 MOS_OS_ASSERTMESSAGE("Failed to create master context.\n");
391 return MOS_STATUS_UNKNOWN;
392 }
393 m_i915Context[1]->pOsContext = osParameters;
394
395 if (mos_set_context_param_load_balance(m_i915Context[1], _engine_map, 1))
396 {
397 MOS_OS_ASSERTMESSAGE("Failed to set master context bond extension.\n");
398 return MOS_STATUS_UNKNOWN;
399 }
400
401 //slave queue
402 for (i=1; i < *nengine; i++)
403 {
404 m_i915Context[i+1] = mos_context_create_shared(osParameters->bufmgr,
405 osParameters->intel_context,
406 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
407 m_bProtectedContext,
408 (void *)_engine_map,
409 1,
410 1,
411 0);
412 if (m_i915Context[i+1] == nullptr)
413 {
414 MOS_OS_ASSERTMESSAGE("Failed to create slave context.\n");
415 return MOS_STATUS_UNKNOWN;
416 }
417 m_i915Context[i+1]->pOsContext = osParameters;
418
419 if (mos_set_context_param_bond(m_i915Context[i+1], _engine_map[0], &_engine_map[i], 1) != S_SUCCESS)
420 {
421 int err = errno;
422 if (err == ENODEV)
423 {
424 mos_context_destroy(m_i915Context[1]);
425 mos_context_destroy(m_i915Context[i+1]);
426 m_i915Context[i+1] = nullptr;
427 break;
428 }
429 else
430 {
431 MOS_OS_ASSERTMESSAGE("Failed to set slave context bond extension. errno=%d\n",err);
432 return MOS_STATUS_UNKNOWN;
433 }
434 }
435 }
436 if (i == *nengine)
437 {
438 streamState->bParallelSubmission = false;
439 }
440 else
441 {
442 streamState->bParallelSubmission = true;
443 //create context with different width
444 for(i = 1; i < *nengine; i++)
445 {
446 unsigned int ctxWidth = i + 1;
447 m_i915Context[i] = mos_context_create_shared(osParameters->bufmgr,
448 osParameters->intel_context,
449 0, // I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE not allowed for parallel submission
450 m_bProtectedContext,
451 (void *)_engine_map,
452 ctxWidth,
453 1,
454 0);
455 if (mos_set_context_param_parallel(m_i915Context[i], _engine_map, ctxWidth) != S_SUCCESS)
456 {
457 MOS_OS_ASSERTMESSAGE("Failed to set parallel extension since discontinuous logical engine.\n");
458 mos_context_destroy(m_i915Context[i]);
459 m_i915Context[i] = nullptr;
460 break;
461 }
462 }
463 }
464 }
465
466 return eStatus;
467 }
468
InitBltCtx(PMOS_CONTEXT osParameters,unsigned int * nengine,void * engine_map)469 MOS_STATUS GpuContextSpecificNext::InitBltCtx(PMOS_CONTEXT osParameters,
470 unsigned int *nengine,
471 void *engine_map)
472 {
473 MOS_OS_FUNCTION_ENTER;
474
475 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
476
477 m_i915Context[0] = mos_context_create_shared(osParameters->bufmgr,
478 osParameters->intel_context,
479 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
480 m_bProtectedContext,
481 engine_map,
482 1,
483 *nengine,
484 0);
485 if (m_i915Context[0] == nullptr)
486 {
487 MOS_OS_ASSERTMESSAGE("Failed to create context.\n");
488 return MOS_STATUS_UNKNOWN;
489 }
490 m_i915Context[0]->pOsContext = osParameters;
491
492 __u16 engine_class = I915_ENGINE_CLASS_COPY;
493 __u64 caps = 0;
494
495 if (mos_query_engines(osParameters->bufmgr, engine_class, caps, nengine, engine_map))
496 {
497 MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
498 return MOS_STATUS_UNKNOWN;
499 }
500
501 if (mos_set_context_param_load_balance(m_i915Context[0], (struct i915_engine_class_instance *)engine_map, *nengine))
502 {
503 MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
504 return MOS_STATUS_UNKNOWN;
505 }
506
507 return eStatus;
508 }
509
510
Init(OsContextNext * osContext,MOS_STREAM_HANDLE streamState,PMOS_GPUCTX_CREATOPTIONS createOption)511 MOS_STATUS GpuContextSpecificNext::Init(OsContextNext *osContext,
512 MOS_STREAM_HANDLE streamState,
513 PMOS_GPUCTX_CREATOPTIONS createOption)
514 {
515 MOS_OS_FUNCTION_ENTER;
516
517 MOS_OS_CHK_NULL_RETURN(osContext);
518 MOS_OS_CHK_NULL_RETURN(streamState);
519 MOS_OS_CHK_NULL_RETURN(createOption);
520
521 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
522
523 auto osParameters = (PMOS_CONTEXT)streamState->perStreamParameters;
524 MOS_OS_CHK_NULL_RETURN(osParameters);
525
526 m_osParameters = osParameters;
527
528 MOS_GPU_NODE gpuNode = MOS_GPU_NODE_3D;
529 gpuNode = static_cast<MOS_GPU_NODE>(createOption->gpuNode);
530
531 if (m_cmdBufPoolMutex == nullptr)
532 {
533 m_cmdBufPoolMutex = MosUtilities::MosCreateMutex();
534 }
535
536 MOS_OS_CHK_NULL_RETURN(m_cmdBufPoolMutex);
537
538 MosUtilities::MosLockMutex(m_cmdBufPoolMutex);
539
540 m_cmdBufPool.clear();
541
542 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
543
544 m_ocaLogSectionSupported = osContext->m_ocaLogSectionSupported;
545 if (m_ocaLogSectionSupported)
546 {
547 // increase size for oca log section
548 m_commandBufferSize = MosOcaInterfaceSpecific::IncreaseSize(COMMAND_BUFFER_SIZE);
549 }
550 else
551 {
552 m_commandBufferSize = COMMAND_BUFFER_SIZE;
553 }
554
555 m_nextFetchIndex = 0;
556
557 m_cmdBufFlushed = true;
558
559 m_osContext = osContext;
560
561 MOS_OS_CHK_STATUS_RETURN(AllocateGPUStatusBuf());
562
563 m_commandBuffer = (PMOS_COMMAND_BUFFER)MOS_AllocAndZeroMemory(sizeof(MOS_COMMAND_BUFFER));
564
565 MOS_OS_CHK_NULL_RETURN(m_commandBuffer);
566
567 m_IndirectHeapSize = 0;
568
569 // each thread has its own GPU context, so do not need any lock as guarder here
570 m_allocationList = (ALLOCATION_LIST *)MOS_AllocAndZeroMemory(sizeof(ALLOCATION_LIST) * ALLOCATIONLIST_SIZE);
571 MOS_OS_CHK_NULL_RETURN(m_allocationList);
572 m_maxNumAllocations = ALLOCATIONLIST_SIZE;
573
574 m_patchLocationList = (PATCHLOCATIONLIST *)MOS_AllocAndZeroMemory(sizeof(PATCHLOCATIONLIST) * PATCHLOCATIONLIST_SIZE);
575 MOS_OS_CHK_NULL_RETURN(m_patchLocationList);
576 m_maxPatchLocationsize = PATCHLOCATIONLIST_SIZE;
577
578 m_attachedResources = (PMOS_RESOURCE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE) * ALLOCATIONLIST_SIZE);
579 MOS_OS_CHK_NULL_RETURN(m_attachedResources);
580
581 m_writeModeList = (bool *)MOS_AllocAndZeroMemory(sizeof(bool) * ALLOCATIONLIST_SIZE);
582 MOS_OS_CHK_NULL_RETURN(m_writeModeList);
583
584 m_GPUStatusTag = 1;
585
586 StoreCreateOptions(createOption);
587
588 for (int i=0; i<MAX_ENGINE_INSTANCE_NUM+1; i++)
589 {
590 m_i915Context[i] = nullptr;
591 }
592
593 if (streamState->ctxBasedScheduling)
594 {
595 bool isEngineSelectEnable = false;
596 unsigned int nengine = 0;
597 size_t engine_class_size = 0;
598 void *engine_map = nullptr;
599
600 MOS_TraceEventExt(EVENT_GPU_CONTEXT_CREATE, EVENT_TYPE_START,
601 &gpuNode, sizeof(gpuNode), nullptr, 0);
602
603 m_i915ExecFlag = I915_EXEC_DEFAULT;
604
605 if (mos_query_engines_count(osParameters->bufmgr, &nengine))
606 {
607 MOS_OS_ASSERTMESSAGE("Failed to query engines count.\n");
608 return MOS_STATUS_UNKNOWN;
609 }
610 engine_class_size = mos_get_engine_class_size(osParameters->bufmgr);
611 if (!engine_class_size)
612 {
613 MOS_OS_ASSERTMESSAGE("Failed to get engine class instance size.\n");
614 return MOS_STATUS_UNKNOWN;
615 }
616 engine_map = MOS_AllocAndZeroMemory(nengine * engine_class_size);
617 MOS_OS_CHK_NULL_RETURN(engine_map);
618
619 if (gpuNode == MOS_GPU_NODE_3D)
620 {
621 eStatus = Init3DCtx(osParameters, createOption, &nengine, engine_map);
622 }
623 else if (gpuNode == MOS_GPU_NODE_COMPUTE)
624 {
625 eStatus = InitComputeCtx(osParameters, &nengine, engine_map, gpuNode, &isEngineSelectEnable);
626 }
627 else if (gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2
628 || gpuNode == MOS_GPU_NODE_VE)
629 {
630 eStatus = InitVdVeCtx(osParameters, streamState, createOption, &nengine, engine_map, gpuNode, &isEngineSelectEnable);
631 }
632 else if (gpuNode == MOS_GPU_NODE_BLT)
633 {
634 eStatus = InitBltCtx(osParameters, &nengine, engine_map);
635 }
636 else
637 {
638 MOS_OS_ASSERTMESSAGE("Unknown engine class.\n");
639 MOS_SafeFreeMemory(engine_map);
640 return MOS_STATUS_UNKNOWN;
641 }
642
643 if (eStatus == MOS_STATUS_SUCCESS)
644 {
645 MOS_OS_CHK_STATUS_RETURN(ReportEngineInfo(engine_map, nengine, isEngineSelectEnable));
646 }
647 MOS_SafeFreeMemory(engine_map);
648 MOS_TraceEventExt(EVENT_GPU_CONTEXT_CREATE, EVENT_TYPE_END,
649 m_i915Context, sizeof(void *),
650 &nengine, sizeof(nengine));
651 }
652
653 return eStatus;
654 }
655
Clear()656 void GpuContextSpecificNext::Clear()
657 {
658 MOS_OS_FUNCTION_ENTER;
659
660 MOS_TraceEventExt(EVENT_GPU_CONTEXT_DESTROY, EVENT_TYPE_START,
661 m_i915Context, sizeof(void *), nullptr, 0);
662 // hanlde the status buf bundled w/ the specified gpucontext
663 if (m_statusBufferResource && m_statusBufferResource->pGfxResourceNext)
664 {
665 if (m_statusBufferResource->pGfxResourceNext->Unlock(m_osContext) != MOS_STATUS_SUCCESS)
666 {
667 MOS_OS_ASSERTMESSAGE("failed to unlock the status buf bundled w/ the specified gpucontext");
668 }
669 m_statusBufferResource->pGfxResourceNext->Free(m_osContext, 0);
670 MOS_Delete(m_statusBufferResource->pGfxResourceNext);
671 }
672 MOS_FreeMemAndSetNull(m_statusBufferResource);
673
674 MosUtilities::MosLockMutex(m_cmdBufPoolMutex);
675
676 if (m_cmdBufMgr)
677 {
678 for (auto& curCommandBuffer : m_cmdBufPool)
679 {
680 auto curCommandBufferSpecific = static_cast<CommandBufferSpecificNext *>(curCommandBuffer);
681 if (curCommandBufferSpecific == nullptr)
682 continue;
683 curCommandBufferSpecific->waitReady(); // wait ready and return to comamnd buffer manager.
684 m_cmdBufMgr->ReleaseCmdBuf(curCommandBuffer);
685 }
686 }
687
688 m_cmdBufPool.clear();
689
690 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
691 MosUtilities::MosDestroyMutex(m_cmdBufPoolMutex);
692 m_cmdBufPoolMutex = nullptr;
693 MOS_SafeFreeMemory(m_commandBuffer);
694 m_commandBuffer = nullptr;
695 MOS_SafeFreeMemory(m_allocationList);
696 m_allocationList = nullptr;
697 MOS_SafeFreeMemory(m_patchLocationList);
698 m_patchLocationList = nullptr;
699 MOS_SafeFreeMemory(m_attachedResources);
700 m_attachedResources = nullptr;
701 MOS_SafeFreeMemory(m_writeModeList);
702 m_writeModeList = nullptr;
703
704 for (int i=0; i<MAX_ENGINE_INSTANCE_NUM; i++)
705 {
706 if (m_i915Context[i])
707 {
708 mos_context_destroy(m_i915Context[i]);
709 m_i915Context[i] = nullptr;
710 }
711 }
712 MOS_TraceEventExt(EVENT_GPU_CONTEXT_DESTROY, EVENT_TYPE_END,
713 nullptr, 0, nullptr, 0);
714 }
715
RegisterResource(PMOS_RESOURCE osResource,bool writeFlag)716 MOS_STATUS GpuContextSpecificNext::RegisterResource(
717 PMOS_RESOURCE osResource,
718 bool writeFlag)
719 {
720 MOS_OS_FUNCTION_ENTER;
721
722 MOS_OS_CHK_NULL_RETURN(osResource);
723
724 MOS_OS_CHK_NULL_RETURN(m_attachedResources);
725
726 PMOS_RESOURCE registeredResources = m_attachedResources;
727 uint32_t allocationIndex = 0;
728
729 for ( allocationIndex = 0; allocationIndex < m_resCount; allocationIndex++, registeredResources++)
730 {
731 if (osResource->bo == registeredResources->bo)
732 {
733 break;
734 }
735 }
736
737 // Allocation list to be updated
738 if (allocationIndex < m_maxNumAllocations)
739 {
740 // New buffer
741 if (allocationIndex == m_resCount)
742 {
743 m_resCount++;
744 }
745
746 // Set allocation
747 if (m_gpuContext >= MOS_GPU_CONTEXT_MAX)
748 {
749 MOS_OS_ASSERTMESSAGE("Gpu context exceeds max.");
750 return MOS_STATUS_UNKNOWN;
751 }
752
753 osResource->iAllocationIndex[m_gpuContext] = (allocationIndex);
754 m_attachedResources[allocationIndex] = *osResource;
755 m_writeModeList[allocationIndex] |= writeFlag;
756 m_allocationList[allocationIndex].hAllocation = &m_attachedResources[allocationIndex];
757 m_allocationList[allocationIndex].WriteOperation |= writeFlag;
758 m_numAllocations = m_resCount;
759 }
760 else
761 {
762 MOS_OS_ASSERTMESSAGE("Reached max # registrations.");
763 return MOS_STATUS_UNKNOWN;
764 }
765
766 return MOS_STATUS_SUCCESS;
767 }
768
SetPatchEntry(MOS_STREAM_HANDLE streamState,PMOS_PATCH_ENTRY_PARAMS params)769 MOS_STATUS GpuContextSpecificNext::SetPatchEntry(
770 MOS_STREAM_HANDLE streamState,
771 PMOS_PATCH_ENTRY_PARAMS params)
772 {
773 MOS_OS_FUNCTION_ENTER;
774
775 MOS_OS_CHK_NULL_RETURN(m_patchLocationList);
776 MOS_OS_CHK_NULL_RETURN(streamState);
777 MOS_OS_CHK_NULL_RETURN(params);
778
779 m_patchLocationList[m_currentNumPatchLocations].AllocationIndex = params->uiAllocationIndex;
780 m_patchLocationList[m_currentNumPatchLocations].AllocationOffset = params->uiResourceOffset;
781 m_patchLocationList[m_currentNumPatchLocations].PatchOffset = params->uiPatchOffset;
782 m_patchLocationList[m_currentNumPatchLocations].uiWriteOperation = params->bWrite ? true: false;
783 m_patchLocationList[m_currentNumPatchLocations].cmdBo =
784 params->cmdBuffer != nullptr ? params->cmdBuffer->OsResource.bo : nullptr;
785
786 if (streamState->osCpInterface &&
787 streamState->osCpInterface->IsHMEnabled())
788 {
789 if (MOS_STATUS_SUCCESS != streamState->osCpInterface->RegisterPatchForHM(
790 (uint32_t *)(params->cmdBufBase + params->uiPatchOffset),
791 params->bWrite,
792 params->HwCommandType,
793 params->forceDwordOffset,
794 params->presResource,
795 &m_patchLocationList[m_currentNumPatchLocations]))
796 {
797 MOS_OS_ASSERTMESSAGE("Failed to RegisterPatchForHM.");
798 }
799 }
800
801 m_currentNumPatchLocations++;
802
803 return MOS_STATUS_SUCCESS;
804 }
805
GetCommandBuffer(PMOS_COMMAND_BUFFER comamndBuffer,uint32_t flags)806 MOS_STATUS GpuContextSpecificNext::GetCommandBuffer(
807 PMOS_COMMAND_BUFFER comamndBuffer,
808 uint32_t flags)
809 {
810 MOS_OS_FUNCTION_ENTER;
811
812 MOS_OS_CHK_NULL_RETURN(comamndBuffer);
813 MOS_OS_CHK_NULL_RETURN(m_cmdBufMgr);
814 MOS_OS_CHK_NULL_RETURN(m_commandBuffer);
815
816 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
817 CommandBufferNext* cmdBuf = nullptr;
818
819 uint32_t secondaryIdx = flags;
820 bool isPrimaryCmdBuffer = (secondaryIdx == 0);
821 bool hasSecondaryCmdBuffer = (!isPrimaryCmdBuffer &&
822 (m_secondaryCmdBufs.count(secondaryIdx) != 0));
823
824 bool needToAlloc = ((isPrimaryCmdBuffer && m_cmdBufFlushed) ||
825 (!isPrimaryCmdBuffer && !hasSecondaryCmdBuffer));
826
827 if (needToAlloc)
828 {
829 MosUtilities::MosLockMutex(m_cmdBufPoolMutex);
830 if (m_cmdBufPool.size() < MAX_CMD_BUF_NUM)
831 {
832 cmdBuf = m_cmdBufMgr->PickupOneCmdBuf(m_commandBufferSize);
833 if (cmdBuf == nullptr)
834 {
835 MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
836 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
837 return MOS_STATUS_NULL_POINTER;
838 }
839 if ((eStatus = cmdBuf->BindToGpuContext(this)) != MOS_STATUS_SUCCESS)
840 {
841 MOS_OS_ASSERTMESSAGE("Invalid status of BindToGpuContext.");
842 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
843 return eStatus;
844 }
845 m_cmdBufPool.push_back(cmdBuf);
846 }
847 else if (m_cmdBufPool.size() == MAX_CMD_BUF_NUM && m_nextFetchIndex < m_cmdBufPool.size())
848 {
849 auto cmdBufOld = m_cmdBufPool[m_nextFetchIndex];
850 auto cmdBufSpecificOld = static_cast<CommandBufferSpecificNext *>(cmdBufOld);
851 if (cmdBufSpecificOld == nullptr)
852 {
853 MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
854 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
855 return MOS_STATUS_NULL_POINTER;
856 }
857 cmdBufSpecificOld->waitReady();
858 cmdBufSpecificOld->UnBindToGpuContext();
859 m_cmdBufMgr->ReleaseCmdBuf(cmdBufOld); // here just return old command buffer to available pool
860
861 //pick up new comamnd buffer
862 cmdBuf = m_cmdBufMgr->PickupOneCmdBuf(m_commandBufferSize);
863 if (cmdBuf == nullptr)
864 {
865 MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
866 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
867 return MOS_STATUS_NULL_POINTER;
868 }
869 if ((eStatus = cmdBuf->BindToGpuContext(this)) != MOS_STATUS_SUCCESS)
870 {
871 MOS_OS_ASSERTMESSAGE("Invalid status of BindToGpuContext.");
872 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
873 return eStatus;
874 }
875 m_cmdBufPool[m_nextFetchIndex] = cmdBuf;
876 }
877 else
878 {
879 MOS_OS_ASSERTMESSAGE("Command buffer bool size exceed max.");
880 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
881 return MOS_STATUS_UNKNOWN;
882 }
883 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
884
885 // util now, we got new command buffer from CmdBufMgr, next step to fill in the input command buffer
886 MOS_OS_CHK_STATUS_RETURN(cmdBuf->GetResource()->ConvertToMosResource(&comamndBuffer->OsResource));
887 comamndBuffer->pCmdBase = (uint32_t *)cmdBuf->GetLockAddr();
888 comamndBuffer->pCmdPtr = (uint32_t *)cmdBuf->GetLockAddr();
889 comamndBuffer->iOffset = 0;
890 comamndBuffer->iRemaining = cmdBuf->GetCmdBufSize();
891 comamndBuffer->iCmdIndex = m_nextFetchIndex;
892 comamndBuffer->iVdboxNodeIndex = MOS_VDBOX_NODE_INVALID;
893 comamndBuffer->iVeboxNodeIndex = MOS_VEBOX_NODE_INVALID;
894 comamndBuffer->Attributes.pAttriVe = nullptr;
895 comamndBuffer->is1stLvlBB = true;
896
897 // zero comamnd buffer
898 MosUtilities::MosZeroMemory(comamndBuffer->pCmdBase, comamndBuffer->iRemaining);
899 comamndBuffer->iSubmissionType = SUBMISSION_TYPE_SINGLE_PIPE;
900 MosUtilities::MosZeroMemory(&comamndBuffer->Attributes,sizeof(comamndBuffer->Attributes));
901
902 if (isPrimaryCmdBuffer)
903 {
904 // update command buffer relared filed in GPU context
905 m_cmdBufFlushed = false;
906
907 // keep a copy in GPU context
908 MosUtilities::MosSecureMemcpy(m_commandBuffer, sizeof(MOS_COMMAND_BUFFER), comamndBuffer, sizeof(MOS_COMMAND_BUFFER));
909 }
910 else
911 {
912 PMOS_COMMAND_BUFFER tempCmdBuf = (PMOS_COMMAND_BUFFER)MOS_AllocAndZeroMemory(sizeof(MOS_COMMAND_BUFFER));
913 MOS_OS_CHK_NULL_RETURN(tempCmdBuf);
914 m_secondaryCmdBufs[secondaryIdx] = tempCmdBuf;
915 MosUtilities::MosSecureMemcpy(tempCmdBuf, sizeof(MOS_COMMAND_BUFFER), comamndBuffer, sizeof(MOS_COMMAND_BUFFER));
916 }
917
918 if (m_ocaLogSectionSupported)
919 {
920 MOS_LINUX_BO *boTemp = ((GraphicsResourceSpecificNext *)cmdBuf->GetResource())->GetBufferObject();
921 MosOcaInterfaceSpecific::InitOcaLogSection(boTemp);
922 }
923
924 // Command buffers are treated as cyclical buffers, the CB after the just submitted one
925 // has the minimal fence value that we should wait
926 m_nextFetchIndex++;
927 if (m_nextFetchIndex >= MAX_CMD_BUF_NUM)
928 {
929 m_nextFetchIndex = 0;
930 }
931 }
932 else
933 {
934 // current command buffer still active, directly copy to comamndBuffer
935 if (isPrimaryCmdBuffer)
936 {
937 MosUtilities::MosSecureMemcpy(comamndBuffer, sizeof(MOS_COMMAND_BUFFER), m_commandBuffer, sizeof(MOS_COMMAND_BUFFER));
938 }
939 else
940 {
941 MosUtilities::MosSecureMemcpy(comamndBuffer, sizeof(MOS_COMMAND_BUFFER), m_secondaryCmdBufs[secondaryIdx], sizeof(MOS_COMMAND_BUFFER));
942 }
943 }
944
945 if (isPrimaryCmdBuffer)
946 {
947 MOS_OS_CHK_STATUS_RETURN(RegisterResource(&m_commandBuffer->OsResource, false));
948 }
949 else
950 {
951 MOS_OS_CHK_STATUS_RETURN(RegisterResource(&m_secondaryCmdBufs[secondaryIdx]->OsResource, false));
952 }
953
954 return MOS_STATUS_SUCCESS;
955 }
956
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,uint32_t flags)957 void GpuContextSpecificNext::ReturnCommandBuffer(
958 PMOS_COMMAND_BUFFER cmdBuffer,
959 uint32_t flags)
960 {
961 MOS_OS_FUNCTION_ENTER;
962
963 MOS_OS_ASSERT(cmdBuffer);
964 MOS_OS_ASSERT(m_commandBuffer);
965
966 bool isPrimaryCmdBuf = (flags == 0);
967
968 if (isPrimaryCmdBuf)
969 {
970 m_commandBuffer->iOffset = cmdBuffer->iOffset;
971 m_commandBuffer->iRemaining = cmdBuffer->iRemaining;
972 m_commandBuffer->pCmdPtr = cmdBuffer->pCmdPtr;
973 m_commandBuffer->iVdboxNodeIndex = cmdBuffer->iVdboxNodeIndex;
974 m_commandBuffer->iVeboxNodeIndex = cmdBuffer->iVeboxNodeIndex;
975 }
976 else
977 {
978 uint32_t secondaryIdx = flags;
979 MOS_OS_ASSERT(m_secondaryCmdBufs.count(secondaryIdx));
980
981 MosUtilities::MosSecureMemcpy(m_secondaryCmdBufs[secondaryIdx], sizeof(MOS_COMMAND_BUFFER), cmdBuffer, sizeof(MOS_COMMAND_BUFFER));
982 }
983 }
984
ResetCommandBuffer()985 MOS_STATUS GpuContextSpecificNext::ResetCommandBuffer()
986 {
987 m_cmdBufFlushed = true;
988 auto it = m_secondaryCmdBufs.begin();
989 while(it != m_secondaryCmdBufs.end())
990 {
991 MOS_FreeMemory(it->second);
992 it++;
993 }
994 m_secondaryCmdBufs.clear();
995 return MOS_STATUS_SUCCESS;
996 }
997
SetIndirectStateSize(const uint32_t size)998 MOS_STATUS GpuContextSpecificNext::SetIndirectStateSize(const uint32_t size)
999 {
1000 if (m_ocaLogSectionSupported)
1001 {
1002 if(MosOcaInterfaceSpecific::IncreaseSize(size) < m_commandBufferSize)
1003 {
1004 m_IndirectHeapSize = size;
1005 return MOS_STATUS_SUCCESS;
1006 }
1007 else
1008 {
1009 MOS_OS_ASSERTMESSAGE("Indirect State Size if out of boundry!");
1010 return MOS_STATUS_UNKNOWN;
1011 }
1012 }
1013 else
1014 {
1015 if(size < m_commandBufferSize)
1016 {
1017 m_IndirectHeapSize = size;
1018 return MOS_STATUS_SUCCESS;
1019 }
1020 else
1021 {
1022 MOS_OS_ASSERTMESSAGE("Indirect State Size if out of boundry!");
1023 return MOS_STATUS_UNKNOWN;
1024 }
1025 }
1026 }
1027
GetIndirectState(uint32_t & offset,uint32_t & size)1028 MOS_STATUS GpuContextSpecificNext::GetIndirectState(
1029 uint32_t &offset,
1030 uint32_t &size)
1031 {
1032 MOS_OS_FUNCTION_ENTER;
1033
1034 if (m_ocaLogSectionSupported)
1035 {
1036 offset = m_commandBufferSize - m_IndirectHeapSize - OCA_LOG_SECTION_SIZE_MAX;
1037 }
1038 else
1039 {
1040 offset = m_commandBufferSize - m_IndirectHeapSize;
1041 }
1042 size = m_IndirectHeapSize;
1043
1044 return MOS_STATUS_SUCCESS;
1045 }
1046
GetIndirectStatePointer(uint8_t ** indirectState)1047 MOS_STATUS GpuContextSpecificNext::GetIndirectStatePointer(
1048 uint8_t **indirectState)
1049 {
1050 MOS_OS_FUNCTION_ENTER;
1051
1052 MOS_OS_CHK_NULL_RETURN(indirectState);
1053
1054 if (m_ocaLogSectionSupported)
1055 {
1056 *indirectState = (uint8_t *)m_commandBuffer->pCmdBase + m_commandBufferSize - m_IndirectHeapSize - OCA_LOG_SECTION_SIZE_MAX;
1057 }
1058 else
1059 {
1060 *indirectState = (uint8_t *)m_commandBuffer->pCmdBase + m_commandBufferSize - m_IndirectHeapSize;
1061 }
1062
1063 return MOS_STATUS_SUCCESS;
1064 }
1065
ResizeCommandBufferAndPatchList(uint32_t requestedCommandBufferSize,uint32_t requestedPatchListSize,uint32_t flags)1066 MOS_STATUS GpuContextSpecificNext::ResizeCommandBufferAndPatchList(
1067 uint32_t requestedCommandBufferSize,
1068 uint32_t requestedPatchListSize,
1069 uint32_t flags)
1070 {
1071 MOS_OS_FUNCTION_ENTER;
1072
1073 // m_commandBufferSize is used for allocate command buffer and submit command buffer, in this moment, command buffer has not allocated yet.
1074 // Linux KMD requires command buffer size align to 8 bytes, or it will not execute the commands.
1075 if (m_ocaLogSectionSupported /*&& !m_ocaSizeIncreaseDone*/)
1076 {
1077 m_commandBufferSize = MOS_ALIGN_CEIL(MosOcaInterfaceSpecific::IncreaseSize(requestedCommandBufferSize), 8);
1078 // m_ocaSizeIncreaseDone = true;
1079 }
1080 else
1081 {
1082 m_commandBufferSize = MOS_ALIGN_CEIL(requestedCommandBufferSize, 8);
1083 }
1084
1085 if (requestedPatchListSize > m_maxPatchLocationsize)
1086 {
1087 PPATCHLOCATIONLIST newPatchList = (PPATCHLOCATIONLIST)MOS_ReallocMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * requestedPatchListSize);
1088 MOS_OS_CHK_NULL_RETURN(newPatchList);
1089
1090 m_patchLocationList = newPatchList;
1091
1092 // now zero the extended portion
1093 MosUtilities::MosZeroMemory((m_patchLocationList + m_maxPatchLocationsize), sizeof(PATCHLOCATIONLIST) * (requestedPatchListSize - m_maxPatchLocationsize));
1094 m_maxPatchLocationsize = requestedPatchListSize;
1095 }
1096
1097 return MOS_STATUS_SUCCESS;
1098 }
1099
ResizeCommandBuffer(uint32_t requestedSize)1100 MOS_STATUS GpuContextSpecificNext::ResizeCommandBuffer(uint32_t requestedSize)
1101 {
1102 MOS_OS_FUNCTION_ENTER;
1103
1104 m_commandBufferSize = requestedSize;
1105
1106 return MOS_STATUS_SUCCESS;
1107 }
1108
GetVdboxNodeId(PMOS_COMMAND_BUFFER cmdBuffer)1109 MOS_VDBOX_NODE_IND GpuContextSpecificNext::GetVdboxNodeId(
1110 PMOS_COMMAND_BUFFER cmdBuffer)
1111 {
1112 MOS_VDBOX_NODE_IND idx = MOS_VDBOX_NODE_INVALID;
1113
1114 if (cmdBuffer == nullptr)
1115 {
1116 MOS_OS_ASSERTMESSAGE("No cmd buffer provided in GetVdboxNodeId!");
1117 return idx;
1118 }
1119
1120 // If we have assigned vdbox index for the given cmdbuf, return it immediately
1121 if (MOS_VDBOX_NODE_INVALID != cmdBuffer->iVdboxNodeIndex) {
1122 idx = cmdBuffer->iVdboxNodeIndex;
1123 return idx;
1124 }
1125
1126 return idx;
1127 }
1128
GetVcsExecFlag(PMOS_COMMAND_BUFFER cmdBuffer,MOS_GPU_NODE gpuNode)1129 uint32_t GpuContextSpecificNext::GetVcsExecFlag(
1130 PMOS_COMMAND_BUFFER cmdBuffer,
1131 MOS_GPU_NODE gpuNode)
1132 {
1133 if (cmdBuffer == 0)
1134 {
1135 MOS_OS_ASSERTMESSAGE("Input invalid(null) parameter.");
1136 return I915_EXEC_DEFAULT;
1137 }
1138
1139 uint32_t vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1140
1141 if (MOS_VDBOX_NODE_INVALID == cmdBuffer->iVdboxNodeIndex)
1142 {
1143 // That's those case when BB did not have any VDBOX# specific commands.
1144 // Thus, we need to select VDBOX# here. Alternatively we can rely on KMD
1145 // to make balancing for us, i.e. rely on Virtual Engine support.
1146 cmdBuffer->iVdboxNodeIndex = GetVdboxNodeId(cmdBuffer);
1147 if (MOS_VDBOX_NODE_INVALID == cmdBuffer->iVdboxNodeIndex)
1148 {
1149 cmdBuffer->iVdboxNodeIndex = (gpuNode == MOS_GPU_NODE_VIDEO)?
1150 MOS_VDBOX_NODE_1: MOS_VDBOX_NODE_2;
1151 }
1152 }
1153
1154 if (MOS_VDBOX_NODE_1 == cmdBuffer->iVdboxNodeIndex)
1155 {
1156 vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1157 }
1158 else if (MOS_VDBOX_NODE_2 == cmdBuffer->iVdboxNodeIndex)
1159 {
1160 vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING2;
1161 }
1162
1163 return vcsExecFlag;
1164 }
1165
1166 #if (_DEBUG || _RELEASE_INTERNAL)
GetNopCommandBuffer(MOS_STREAM_HANDLE streamState)1167 MOS_LINUX_BO* GpuContextSpecificNext::GetNopCommandBuffer(
1168 MOS_STREAM_HANDLE streamState)
1169 {
1170 int j;
1171 uint32_t *buf = nullptr;
1172 MOS_LINUX_BO* bo = nullptr;
1173
1174 j = 0;
1175
1176 if(streamState == nullptr || streamState->perStreamParameters == nullptr)
1177 {
1178 return nullptr;
1179 }
1180
1181 auto perStreamParameters = (PMOS_CONTEXT)streamState->perStreamParameters;
1182 struct mos_drm_bo_alloc alloc;
1183 alloc.name = "NOP_CMD_BO";
1184 alloc.size = 4096;
1185 alloc.alignment = 4096;
1186 alloc.ext.mem_type = MOS_MEMPOOL_VIDEOMEMORY;
1187 bo = mos_bo_alloc(perStreamParameters->bufmgr, &alloc);
1188 if(bo == nullptr)
1189 {
1190 return nullptr;
1191 }
1192
1193 mos_bo_map(bo, 1);
1194 buf = (uint32_t*)bo->virt;
1195 if(buf == nullptr)
1196 {
1197 mos_bo_unreference(bo);
1198 return nullptr;
1199 }
1200
1201 buf[j++] = 0x05000000; // MI_BATCH_BUFFER_END
1202
1203 mos_bo_unmap(bo);
1204
1205 return bo;
1206 }
1207 #endif // _DEBUG || _RELEASE_INTERNAL
1208
1209
MapResourcesToAuxTable(mos_linux_bo * cmd_bo)1210 MOS_STATUS GpuContextSpecificNext::MapResourcesToAuxTable(mos_linux_bo *cmd_bo)
1211 {
1212 MOS_OS_CHK_NULL_RETURN(cmd_bo);
1213
1214 OsContextSpecificNext *osCtx = static_cast<OsContextSpecificNext*>(m_osContext);
1215 MOS_OS_CHK_NULL_RETURN(osCtx);
1216
1217 AuxTableMgr *auxTableMgr = osCtx->GetAuxTableMgr();
1218 if (auxTableMgr)
1219 {
1220 // Map compress allocations to aux table if it is not mapped.
1221 for (uint32_t i = 0; i < m_numAllocations; i++)
1222 {
1223 auto res = (PMOS_RESOURCE)m_allocationList[i].hAllocation;
1224 MOS_OS_CHK_NULL_RETURN(res);
1225 MOS_OS_CHK_STATUS_RETURN(auxTableMgr->MapResource(res->pGmmResInfo, res->bo));
1226 }
1227 MOS_OS_CHK_STATUS_RETURN(auxTableMgr->EmitAuxTableBOList(cmd_bo));
1228 }
1229 return MOS_STATUS_SUCCESS;
1230 }
1231
SubmitCommandBuffer(MOS_STREAM_HANDLE streamState,PMOS_COMMAND_BUFFER cmdBuffer,bool nullRendering)1232 MOS_STATUS GpuContextSpecificNext::SubmitCommandBuffer(
1233 MOS_STREAM_HANDLE streamState,
1234 PMOS_COMMAND_BUFFER cmdBuffer,
1235 bool nullRendering)
1236 {
1237 MOS_OS_FUNCTION_ENTER;
1238
1239 MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_START, nullptr, 0, nullptr, 0);
1240
1241 MOS_OS_CHK_NULL_RETURN(streamState);
1242 auto perStreamParameters = (PMOS_CONTEXT)streamState->perStreamParameters;
1243 MOS_OS_CHK_NULL_RETURN(perStreamParameters);
1244 MOS_OS_CHK_NULL_RETURN(cmdBuffer);
1245 MOS_OS_CHK_NULL_RETURN(m_patchLocationList);
1246
1247 MOS_GPU_NODE gpuNode = OSKMGetGpuNode(m_gpuContext);
1248 uint32_t execFlag = gpuNode;
1249 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1250 int32_t ret = 0;
1251 bool scalaEnabled = false;
1252 auto it = m_secondaryCmdBufs.begin();
1253
1254 // Command buffer object DRM pointer
1255 m_cmdBufFlushed = true;
1256 auto cmd_bo = cmdBuffer->OsResource.bo;
1257
1258 // Map Resource to Aux if needed
1259 MapResourcesToAuxTable(cmd_bo);
1260 for(auto it : m_secondaryCmdBufs)
1261 {
1262 MapResourcesToAuxTable(it.second->OsResource.bo);
1263 }
1264
1265 if (m_secondaryCmdBufs.size() >= 2)
1266 {
1267 scalaEnabled = true;
1268 cmdBuffer->iSubmissionType = SUBMISSION_TYPE_MULTI_PIPE_MASTER;
1269 }
1270
1271 std::vector<PMOS_RESOURCE> mappedResList;
1272 std::vector<MOS_LINUX_BO *> skipSyncBoList;
1273
1274 // Now, the patching will be done, based on the patch list.
1275 for (uint32_t patchIndex = 0; patchIndex < m_currentNumPatchLocations; patchIndex++)
1276 {
1277 auto currentPatch = &m_patchLocationList[patchIndex];
1278 MOS_OS_CHK_NULL_RETURN(currentPatch);
1279
1280 auto tempCmdBo = currentPatch->cmdBo == nullptr ? cmd_bo : currentPatch->cmdBo;
1281
1282 // Following are for Nested BB buffer, if it's nested BB, we need to ensure it's locked.
1283 if (tempCmdBo != cmd_bo)
1284 {
1285 bool isSecondaryCmdBuf = false;
1286 it = m_secondaryCmdBufs.begin();
1287 while(it != m_secondaryCmdBufs.end())
1288 {
1289 if (it->second->OsResource.bo == tempCmdBo)
1290 {
1291 isSecondaryCmdBuf = true;
1292 break;
1293 }
1294 it++;
1295 }
1296
1297 for(auto allocIdx = 0; allocIdx < m_numAllocations && (!isSecondaryCmdBuf); allocIdx++)
1298 {
1299 auto tempRes = (PMOS_RESOURCE)m_allocationList[allocIdx].hAllocation;
1300 if (tempCmdBo == tempRes->bo)
1301 {
1302 GraphicsResourceNext::LockParams param;
1303 param.m_writeRequest = true;
1304 tempRes->pGfxResourceNext->Lock(m_osContext, param);
1305 mappedResList.push_back(tempRes);
1306 break;
1307 }
1308 }
1309 }
1310
1311 // This is the resource for which patching will be done
1312 auto resource = (PMOS_RESOURCE)m_allocationList[currentPatch->AllocationIndex].hAllocation;
1313 MOS_OS_CHK_NULL_RETURN(resource);
1314
1315 // For now, we'll assume the system memory's DRM bo pointer
1316 // is NULL. If nullptr is detected, then the resource has been
1317 // placed inside the command buffer's indirect state area.
1318 // We'll simply set alloc_bo to the command buffer's bo pointer.
1319 MOS_OS_ASSERT(resource->bo);
1320
1321 auto alloc_bo = (resource->bo) ? resource->bo : tempCmdBo;
1322
1323 MOS_OS_CHK_STATUS_RETURN(streamState->osCpInterface->PermeatePatchForHM(
1324 tempCmdBo->virt,
1325 currentPatch,
1326 resource));
1327
1328 uint64_t boOffset = alloc_bo->offset64;
1329 if (!mos_bo_is_softpin(alloc_bo))
1330 {
1331 if (alloc_bo != tempCmdBo)
1332 {
1333 auto item_ctx = perStreamParameters->contextOffsetList.begin();
1334 for (; item_ctx != perStreamParameters->contextOffsetList.end(); item_ctx++)
1335 {
1336 if (item_ctx->intel_context == perStreamParameters->intel_context && item_ctx->target_bo == alloc_bo)
1337 {
1338 boOffset = item_ctx->offset64;
1339 break;
1340 }
1341 }
1342 }
1343 }
1344
1345 MOS_OS_CHK_NULL_RETURN(tempCmdBo->virt);
1346 if (perStreamParameters->bUse64BitRelocs)
1347 {
1348 *((uint64_t *)((uint8_t *)tempCmdBo->virt + currentPatch->PatchOffset)) =
1349 boOffset + currentPatch->AllocationOffset;
1350 }
1351 else
1352 {
1353 *((uint32_t *)((uint8_t *)tempCmdBo->virt + currentPatch->PatchOffset)) =
1354 boOffset + currentPatch->AllocationOffset;
1355 }
1356
1357 if (scalaEnabled)
1358 {
1359 it = m_secondaryCmdBufs.begin();
1360 while(it != m_secondaryCmdBufs.end())
1361 {
1362 if (it->second->OsResource.bo == tempCmdBo &&
1363 it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE &&
1364 !mos_bo_is_exec_object_async(alloc_bo))
1365 {
1366 skipSyncBoList.push_back(alloc_bo);
1367 break;
1368 }
1369 it++;
1370 }
1371 }
1372 else if (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE &&
1373 !mos_bo_is_exec_object_async(alloc_bo))
1374 {
1375 skipSyncBoList.push_back(alloc_bo);
1376 }
1377
1378 #if (_DEBUG || _RELEASE_INTERNAL)
1379 {
1380 uint32_t evtData[] = {alloc_bo->handle, currentPatch->uiWriteOperation, currentPatch->AllocationOffset};
1381 MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_INFO,
1382 evtData, sizeof(evtData),
1383 &boOffset, sizeof(boOffset));
1384 }
1385 #endif
1386
1387 if(mos_bo_is_softpin(alloc_bo))
1388 {
1389 if (alloc_bo != tempCmdBo)
1390 {
1391 ret = mos_bo_add_softpin_target(tempCmdBo, alloc_bo, currentPatch->uiWriteOperation);
1392 }
1393 }
1394 else
1395 {
1396 // This call will patch the command buffer with the offsets of the indirect state region of the command buffer
1397 ret = mos_bo_emit_reloc(
1398 tempCmdBo, // Command buffer
1399 currentPatch->PatchOffset, // Offset in the command buffer
1400 alloc_bo, // Allocation object for which the patch will be made.
1401 currentPatch->AllocationOffset, // Offset to the indirect state
1402 I915_GEM_DOMAIN_RENDER, // Read domain
1403 (currentPatch->uiWriteOperation) ? I915_GEM_DOMAIN_RENDER : 0x0, // Write domain
1404 boOffset);
1405 }
1406
1407 if (ret != 0)
1408 {
1409 MOS_OS_ASSERTMESSAGE("Error patching alloc_bo = 0x%x, cmd_bo = 0x%x.",
1410 (uintptr_t)alloc_bo,
1411 (uintptr_t)tempCmdBo);
1412 return MOS_STATUS_UNKNOWN;
1413 }
1414 }
1415
1416 for(auto res: mappedResList)
1417 {
1418 res->pGfxResourceNext->Unlock(m_osContext);
1419 }
1420 mappedResList.clear();
1421
1422 if (scalaEnabled)
1423 {
1424 it = m_secondaryCmdBufs.begin();
1425 while(it != m_secondaryCmdBufs.end())
1426 {
1427 //Add Batch buffer End Command
1428 uint32_t batchBufferEndCmd = MI_BATCHBUFFER_END;
1429 if (MOS_FAILED(Mos_AddCommand(
1430 it->second,
1431 &batchBufferEndCmd,
1432 sizeof(uint32_t))))
1433 {
1434 MOS_OS_ASSERTMESSAGE("Inserting BB_END failed!");
1435 return MOS_STATUS_UNKNOWN;
1436 }
1437 it++;
1438 }
1439 }
1440 else
1441 {
1442 //Add Batch buffer End Command
1443 uint32_t batchBufferEndCmd = MI_BATCHBUFFER_END;
1444 if (MOS_FAILED(Mos_AddCommand(
1445 cmdBuffer,
1446 &batchBufferEndCmd,
1447 sizeof(uint32_t))))
1448 {
1449 MOS_OS_ASSERTMESSAGE("Inserting BB_END failed!");
1450 return MOS_STATUS_UNKNOWN;
1451 }
1452 }
1453 // dump before cmd buffer unmap
1454 MOS_TraceDumpExt("CmdBuffer", m_gpuContext, cmdBuffer->pCmdBase, cmdBuffer->iOffset);
1455
1456 // Now, we can unmap the video command buffer, since we don't need CPU access anymore.
1457 MOS_OS_CHK_NULL_RETURN(cmdBuffer->OsResource.pGfxResourceNext);
1458
1459 cmdBuffer->OsResource.pGfxResourceNext->Unlock(m_osContext);
1460
1461 it = m_secondaryCmdBufs.begin();
1462 while(it != m_secondaryCmdBufs.end())
1463 {
1464 MOS_OS_CHK_NULL_RETURN(it->second->OsResource.pGfxResourceNext);
1465 it->second->OsResource.pGfxResourceNext->Unlock(m_osContext);
1466
1467 it++;
1468 }
1469
1470 int32_t perfData;
1471 if (perStreamParameters->pPerfData != nullptr)
1472 {
1473 perfData = *(int32_t *)(perStreamParameters->pPerfData);
1474 }
1475 else
1476 {
1477 perfData = 0;
1478 }
1479
1480 drm_clip_rect_t *cliprects = nullptr;
1481 int32_t num_cliprects = 0;
1482 int32_t DR4 = perStreamParameters->uEnablePerfTag ? perfData : 0;
1483
1484 //Since CB2 command is not supported, remove it and set cliprects to nullprt as default.
1485 if ((gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2) &&
1486 (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_SINGLE_PIPE_MASK))
1487 {
1488 if (perStreamParameters->bKMDHasVCS2)
1489 {
1490 if (perStreamParameters->bPerCmdBufferBalancing)
1491 {
1492 execFlag = GetVcsExecFlag(cmdBuffer, gpuNode);
1493 }
1494 else if (gpuNode == MOS_GPU_NODE_VIDEO)
1495 {
1496 execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1497 }
1498 else if (gpuNode == MOS_GPU_NODE_VIDEO2)
1499 {
1500 execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING2;
1501 }
1502 }
1503 else
1504 {
1505 execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1506 }
1507 }
1508
1509 #if (_DEBUG || _RELEASE_INTERNAL)
1510
1511 MOS_LINUX_BO *nop_cmd_bo = nullptr;
1512
1513 if (nullRendering == true)
1514 {
1515 nop_cmd_bo = GetNopCommandBuffer(streamState);
1516
1517 if (nop_cmd_bo)
1518 {
1519 ret = mos_bo_mrb_exec(nop_cmd_bo,
1520 4096,
1521 nullptr,
1522 0,
1523 0,
1524 execFlag);
1525 }
1526 else
1527 {
1528 MOS_OS_ASSERTMESSAGE("Mos_GetNopCommandBuffer_Linux failed!");
1529 }
1530 }
1531
1532 #endif //(_DEBUG || _RELEASE_INTERNAL)
1533
1534 if (gpuNode != I915_EXEC_RENDER &&
1535 streamState->osCpInterface->IsTearDownHappen())
1536 {
1537 // skip PAK command when CP tear down happen to avoid of GPU hang
1538 // conditonal batch buffer start PoC is in progress
1539 }
1540 else if (nullRendering == false)
1541 {
1542 UnlockPendingOcaBuffers(cmdBuffer, perStreamParameters);
1543 if (streamState->ctxBasedScheduling && m_i915Context[0] != nullptr)
1544 {
1545 if (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASK)
1546 {
1547 if (scalaEnabled && !streamState->bParallelSubmission)
1548 {
1549 uint32_t secondaryIndex = 0;
1550 it = m_secondaryCmdBufs.begin();
1551 while(it != m_secondaryCmdBufs.end())
1552 {
1553 if (it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE)
1554 {
1555 if(execFlag == MOS_GPU_NODE_VE)
1556 {
1557 // decode excluded since init in other place
1558 it->second->iSubmissionType |= (secondaryIndex << SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_SHIFT);
1559 secondaryIndex++;
1560 }
1561 }
1562 ret = SubmitPipeCommands(it->second,
1563 it->second->OsResource.bo,
1564 perStreamParameters,
1565 skipSyncBoList,
1566 execFlag,
1567 DR4);
1568 it++;
1569 }
1570 }
1571 else if(scalaEnabled && streamState->bParallelSubmission)
1572 {
1573 ret = ParallelSubmitCommands(m_secondaryCmdBufs,
1574 perStreamParameters,
1575 execFlag,
1576 DR4);
1577 }
1578 else
1579 {
1580 ret = SubmitPipeCommands(cmdBuffer,
1581 cmd_bo,
1582 perStreamParameters,
1583 skipSyncBoList,
1584 execFlag,
1585 DR4);
1586 }
1587 }
1588 else
1589 {
1590 ret = mos_bo_context_exec2(cmd_bo,
1591 m_commandBufferSize,
1592 m_i915Context[0],
1593 cliprects,
1594 num_cliprects,
1595 DR4,
1596 m_i915ExecFlag,
1597 nullptr);
1598 }
1599 }
1600 else
1601 {
1602 ret = mos_bo_context_exec2(cmd_bo,
1603 m_commandBufferSize,
1604 perStreamParameters->intel_context,
1605 cliprects,
1606 num_cliprects,
1607 DR4,
1608 execFlag,
1609 nullptr);
1610 }
1611 if (ret != 0)
1612 {
1613 eStatus = MOS_STATUS_UNKNOWN;
1614 }
1615 }
1616
1617 if (eStatus != MOS_STATUS_SUCCESS)
1618 {
1619 MOS_OS_ASSERTMESSAGE("Command buffer submission failed!");
1620 }
1621
1622 MosUtilDevUltSpecific::MOS_DEVULT_FuncCall(pfnUltGetCmdBuf, cmdBuffer);
1623
1624 #if MOS_COMMAND_BUFFER_DUMP_SUPPORTED
1625 pthread_mutex_lock(&command_dump_mutex);
1626 if (streamState->dumpCommandBuffer)
1627 {
1628 if (scalaEnabled)
1629 {
1630 it = m_secondaryCmdBufs.begin();
1631 while(it != m_secondaryCmdBufs.end())
1632 {
1633 mos_bo_map(it->second->OsResource.bo, 0);
1634 MosInterface::DumpCommandBuffer(streamState, it->second);
1635 mos_bo_unmap(it->second->OsResource.bo);
1636 it++;
1637 }
1638 }
1639 else
1640 {
1641 mos_bo_map(cmd_bo, 0);
1642 MosInterface::DumpCommandBuffer(streamState, cmdBuffer);
1643 mos_bo_unmap(cmd_bo);
1644 }
1645 }
1646 pthread_mutex_unlock(&command_dump_mutex);
1647 #endif // MOS_COMMAND_BUFFER_DUMP_SUPPORTED
1648
1649 #if (_DEBUG || _RELEASE_INTERNAL)
1650 if (nop_cmd_bo)
1651 {
1652 mos_bo_unreference(nop_cmd_bo);
1653 }
1654 #endif //(_DEBUG || _RELEASE_INTERNAL)
1655
1656 //clear command buffer relocations to fix memory leak issue
1657 for (uint32_t patchIndex = 0; patchIndex < m_currentNumPatchLocations; patchIndex++)
1658 {
1659 auto currentPatch = &m_patchLocationList[patchIndex];
1660 MOS_OS_CHK_NULL_RETURN(currentPatch);
1661
1662 if(currentPatch->cmdBo)
1663 mos_bo_clear_relocs(currentPatch->cmdBo, 0);
1664 }
1665
1666 it = m_secondaryCmdBufs.begin();
1667 while(it != m_secondaryCmdBufs.end())
1668 {
1669 MOS_FreeMemory(it->second);
1670 it++;
1671 }
1672 m_secondaryCmdBufs.clear();
1673
1674 skipSyncBoList.clear();
1675
1676 // Reset resource allocation
1677 m_numAllocations = 0;
1678 MosUtilities::MosZeroMemory(m_allocationList, sizeof(ALLOCATION_LIST) * m_maxNumAllocations);
1679 m_currentNumPatchLocations = 0;
1680 MosUtilities::MosZeroMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * m_maxNumAllocations);
1681 m_resCount = 0;
1682
1683 MosUtilities::MosZeroMemory(m_writeModeList, sizeof(bool) * m_maxNumAllocations);
1684 finish:
1685 MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_END, &eStatus, sizeof(eStatus), nullptr, 0);
1686 return eStatus;
1687 }
1688
UnlockPendingOcaBuffers(PMOS_COMMAND_BUFFER cmdBuffer,PMOS_CONTEXT mosContext)1689 void GpuContextSpecificNext::UnlockPendingOcaBuffers(PMOS_COMMAND_BUFFER cmdBuffer, PMOS_CONTEXT mosContext)
1690 {
1691 MOS_OS_CHK_NULL_NO_STATUS_RETURN(cmdBuffer);
1692 MOS_OS_CHK_NULL_NO_STATUS_RETURN(mosContext);
1693 MosOcaInterface *pOcaInterface = &MosOcaInterfaceSpecific::GetInstance();
1694 if (nullptr == pOcaInterface || !((MosOcaInterfaceSpecific*)pOcaInterface)->IsOcaEnabled())
1695 {
1696 // Will come here for UMD_OCA not being enabled case.
1697 return;
1698 }
1699
1700 int count = 0;
1701 struct MOS_OCA_EXEC_LIST_INFO *info = nullptr;
1702 if ((cmdBuffer->iSubmissionType & SUBMISSION_TYPE_SINGLE_PIPE_MASK) && ((MosOcaInterfaceSpecific*)pOcaInterface)->IsOcaDumpExecListInfoEnabled())
1703 {
1704 info = mos_bo_get_softpin_targets_info(cmdBuffer->OsResource.bo, &count);
1705 }
1706
1707 pOcaInterface->UnlockPendingOcaBuffers(mosContext, info, count);
1708
1709 if(info)
1710 {
1711 free(info);
1712 }
1713 }
1714
SubmitPipeCommands(MOS_COMMAND_BUFFER * cmdBuffer,MOS_LINUX_BO * cmdBo,PMOS_CONTEXT osContext,const std::vector<MOS_LINUX_BO * > & skipSyncBoList,uint32_t execFlag,int32_t dr4)1715 int32_t GpuContextSpecificNext::SubmitPipeCommands(
1716 MOS_COMMAND_BUFFER *cmdBuffer,
1717 MOS_LINUX_BO *cmdBo,
1718 PMOS_CONTEXT osContext,
1719 const std::vector<MOS_LINUX_BO *> &skipSyncBoList,
1720 uint32_t execFlag,
1721 int32_t dr4)
1722 {
1723 int32_t ret = 0;
1724 int fence = -1;
1725 unsigned int fence_flag = 0;
1726
1727 MOS_LINUX_CONTEXT *queue = m_i915Context[0];
1728 bool isVeboxSubmission = false;
1729
1730 if (execFlag == MOS_GPU_NODE_VIDEO || execFlag == MOS_GPU_NODE_VIDEO2)
1731 {
1732 execFlag = I915_EXEC_DEFAULT;
1733 }
1734 if (execFlag == MOS_GPU_NODE_VE)
1735 {
1736 execFlag = I915_EXEC_DEFAULT;
1737 isVeboxSubmission = true;
1738 }
1739
1740 if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE)
1741 {
1742 fence = osContext->submit_fence;
1743 fence_flag = I915_EXEC_FENCE_SUBMIT;
1744 int slave_index = (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_MASK) >> SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_SHIFT;
1745 if(slave_index < 7)
1746 {
1747 queue = m_i915Context[2 + slave_index]; //0 is for single pipe, 1 is for master, slave starts from 2
1748 }
1749 else
1750 {
1751 MOS_OS_ASSERTMESSAGE("slave_index value: %s is invalid!", slave_index);
1752 return -1;
1753 }
1754
1755 if (isVeboxSubmission)
1756 {
1757 queue = m_i915Context[cmdBuffer->iVeboxNodeIndex + 1];
1758 }
1759
1760 for(auto bo: skipSyncBoList)
1761 {
1762 mos_bo_set_exec_object_async(cmdBo, bo);
1763 }
1764 }
1765
1766 if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)
1767 {
1768 //Only master pipe needs fence out flag
1769 fence_flag = I915_EXEC_FENCE_OUT;
1770 queue = m_i915Context[1];
1771 }
1772
1773 ret = mos_bo_context_exec2(cmdBo,
1774 cmdBo->size,
1775 queue,
1776 nullptr,
1777 0,
1778 dr4,
1779 execFlag | fence_flag,
1780 &fence);
1781
1782 if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)
1783 {
1784 osContext->submit_fence = fence;
1785 }
1786
1787 if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE)
1788 {
1789 close(fence);
1790 }
1791
1792 return ret;
1793 }
1794
ParallelSubmitCommands(std::map<uint32_t,PMOS_COMMAND_BUFFER> secondaryCmdBufs,PMOS_CONTEXT osContext,uint32_t execFlag,int32_t dr4)1795 int32_t GpuContextSpecificNext::ParallelSubmitCommands(
1796 std::map<uint32_t, PMOS_COMMAND_BUFFER> secondaryCmdBufs,
1797 PMOS_CONTEXT osContext,
1798 uint32_t execFlag,
1799 int32_t dr4)
1800 {
1801 int32_t ret = 0;
1802 int fence = -1;
1803 unsigned int fenceFlag = 0;
1804 auto it = m_secondaryCmdBufs.begin();
1805 MOS_LINUX_BO *cmdBos[MAX_PARALLEN_CMD_BO_NUM];
1806 int numBos = 0; // exclude FE bo
1807
1808 MOS_LINUX_CONTEXT *queue = m_i915Context[0];
1809 bool isVeboxSubmission = false;
1810
1811 if (execFlag == MOS_GPU_NODE_VIDEO || execFlag == MOS_GPU_NODE_VIDEO2)
1812 {
1813 execFlag = I915_EXEC_DEFAULT;
1814 }
1815 if (execFlag == MOS_GPU_NODE_VE)
1816 {
1817 execFlag = I915_EXEC_DEFAULT;
1818 isVeboxSubmission = true;
1819 }
1820
1821 while(it != m_secondaryCmdBufs.end())
1822 {
1823 if(it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_ALONE)
1824 {
1825 fenceFlag = I915_EXEC_FENCE_OUT;
1826 queue = m_i915Context[0];
1827
1828 ret = mos_bo_context_exec2(it->second->OsResource.bo,
1829 it->second->OsResource.bo->size,
1830 queue,
1831 nullptr,
1832 0,
1833 dr4,
1834 execFlag | fenceFlag,
1835 &fence);
1836
1837 osContext->submit_fence = fence;
1838 }
1839
1840 if((it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)
1841 || (it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE))
1842 {
1843 cmdBos[numBos++] = it->second->OsResource.bo;
1844
1845 if(it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE)
1846 {
1847 queue = m_i915Context[numBos - 1];
1848 MOS_OS_CHK_NULL_RETURN(queue);
1849 if(-1 != fence)
1850 {
1851 fenceFlag = I915_EXEC_FENCE_IN;
1852 }
1853
1854 ret = mos_bo_context_exec3(cmdBos,
1855 numBos,
1856 queue,
1857 nullptr,
1858 0,
1859 dr4,
1860 execFlag | fenceFlag,
1861 &fence);
1862
1863 for(int i = 0; i < numBos; i++)
1864 {
1865 cmdBos[i] = nullptr;
1866 }
1867 numBos = 0;
1868
1869 if(-1 != fence)
1870 {
1871 close(fence);
1872 }
1873 }
1874 }
1875
1876 it++;
1877 }
1878
1879 return ret;
1880 }
1881
IncrementGpuStatusTag()1882 void GpuContextSpecificNext::IncrementGpuStatusTag()
1883 {
1884 m_GPUStatusTag = m_GPUStatusTag % UINT_MAX + 1;
1885 if (m_GPUStatusTag == 0)
1886 {
1887 m_GPUStatusTag = 1;
1888 }
1889 }
1890
UpdatePriority(int32_t priority)1891 void GpuContextSpecificNext::UpdatePriority(int32_t priority)
1892 {
1893 if(m_currCtxPriority == priority)
1894 {
1895 return;
1896 }
1897
1898 for (int32_t i=0; i<MAX_ENGINE_INSTANCE_NUM+1; i++)
1899 {
1900 if (m_i915Context[i] != nullptr)
1901 {
1902 int32_t ret = mos_set_context_param(m_i915Context[i], 0, I915_CONTEXT_PARAM_PRIORITY,(uint64_t)priority);
1903 if (ret != 0)
1904 {
1905 MOS_OS_ASSERTMESSAGE("failed to set the gpu priority, errno is %d", ret);
1906 break;
1907 }
1908 }
1909 }
1910 m_currCtxPriority = priority;
1911 }
1912
ResetGpuContextStatus()1913 void GpuContextSpecificNext::ResetGpuContextStatus()
1914 {
1915 MosUtilities::MosZeroMemory(m_allocationList, sizeof(ALLOCATION_LIST) * ALLOCATIONLIST_SIZE);
1916 m_numAllocations = 0;
1917 MosUtilities::MosZeroMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * PATCHLOCATIONLIST_SIZE);
1918 m_currentNumPatchLocations = 0;
1919
1920 MosUtilities::MosZeroMemory(m_attachedResources, sizeof(MOS_RESOURCE) * ALLOCATIONLIST_SIZE);
1921 m_resCount = 0;
1922
1923 MosUtilities::MosZeroMemory(m_writeModeList, sizeof(bool) * ALLOCATIONLIST_SIZE);
1924
1925 if ((m_cmdBufFlushed == true) && m_commandBuffer->OsResource.bo)
1926 {
1927 m_commandBuffer->OsResource.bo = nullptr;
1928 }
1929 }
1930
AllocateGPUStatusBuf()1931 MOS_STATUS GpuContextSpecificNext::AllocateGPUStatusBuf()
1932 {
1933 MOS_OS_FUNCTION_ENTER;
1934
1935 m_statusBufferResource = (PMOS_RESOURCE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE));
1936 MOS_OS_CHK_NULL_RETURN(m_statusBufferResource);
1937
1938 GraphicsResourceNext::CreateParams params;
1939 params.m_tileType = MOS_TILE_LINEAR;
1940 params.m_type = MOS_GFXRES_BUFFER;
1941 params.m_format = Format_Buffer;
1942 params.m_width = sizeof(MOS_GPU_STATUS_DATA);
1943 params.m_height = 1;
1944 params.m_depth = 1;
1945 params.m_arraySize = 1;
1946 params.m_name = "GPU Status Buffer";
1947
1948 GraphicsResourceNext *graphicsResource = GraphicsResourceNext::CreateGraphicResource(GraphicsResourceNext::osSpecificResource);
1949 MOS_OS_CHK_NULL_RETURN(graphicsResource);
1950
1951 MOS_OS_CHK_STATUS_RETURN(graphicsResource->Allocate(m_osContext, params));
1952
1953 GraphicsResourceNext::LockParams lockParams;
1954 lockParams.m_writeRequest = true;
1955 auto gpuStatusData = (MOS_GPU_STATUS_DATA *)graphicsResource->Lock(m_osContext, lockParams);
1956 if (gpuStatusData == nullptr)
1957 {
1958 MOS_OS_ASSERTMESSAGE("Unable to lock gpu eStatus buffer for read.");
1959 graphicsResource->Free(m_osContext);
1960 MOS_Delete(graphicsResource);
1961 return MOS_STATUS_UNKNOWN;
1962 }
1963
1964 MOS_STATUS eStatus = graphicsResource->ConvertToMosResource(m_statusBufferResource);
1965 MOS_OS_CHK_STATUS_RETURN(eStatus);
1966
1967 return MOS_STATUS_SUCCESS;
1968 }
1969
GetOcaRTLogResource(PMOS_RESOURCE globalInst)1970 PMOS_RESOURCE GpuContextSpecificNext::GetOcaRTLogResource(PMOS_RESOURCE globalInst)
1971 {
1972 // OcaRTLogResources are shared w/ different video processors.
1973 // iAllocationIndex array in MOS_RESOURCE indexed by gpu_context type. When resource being accessed
1974 // in GpuContextSpecificNext::RegisterResource and Mos_Specific_GetResourceAllocationIndex w/ more
1975 // than 2 video processors, the value may be overwritten and wrong allocation Index in array may be used.
1976 // To avoid this, use duplicate MOS_RESOURCE instance in GPU Context to ensure differnt iAllocationIndex
1977 // array of OcaRTLogResources being used for different GPU Context.
1978 if (!m_ocaRtLogResInited && globalInst)
1979 {
1980 m_ocaRtLogResource = *globalInst;
1981 m_ocaRtLogResInited = true;
1982 }
1983 return &m_ocaRtLogResource;
1984 }
1985
1986 #if (_DEBUG || _RELEASE_INTERNAL)
SelectEngineInstanceByUser(void * engine_map,uint32_t * engineNum,uint32_t userEngineInstance,MOS_GPU_NODE gpuNode)1987 bool GpuContextSpecificNext::SelectEngineInstanceByUser(void *engine_map,
1988 uint32_t *engineNum, uint32_t userEngineInstance, MOS_GPU_NODE gpuNode)
1989 {
1990 uint32_t engineInstance = 0x0;
1991
1992 if (userEngineInstance && m_osParameters)
1993 {
1994 if(gpuNode == MOS_GPU_NODE_COMPUTE)
1995 {
1996 engineInstance = (userEngineInstance >> ENGINE_INSTANCE_SELECT_COMPUTE_INSTANCE_SHIFT)
1997 & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum));
1998 }
1999 else if(gpuNode == MOS_GPU_NODE_VE)
2000 {
2001 engineInstance = (userEngineInstance >> ENGINE_INSTANCE_SELECT_VEBOX_INSTANCE_SHIFT)
2002 & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum));
2003 }
2004 else if(gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2)
2005 {
2006 engineInstance = (userEngineInstance >> ENGINE_INSTANCE_SELECT_VDBOX_INSTANCE_SHIFT)
2007 & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum));
2008 }
2009 else
2010 {
2011 MOS_OS_NORMALMESSAGE("Invalid gpu node in use.");
2012 }
2013
2014 mos_select_fixed_engine(m_osParameters->bufmgr, engine_map, engineNum, engineInstance);
2015 }
2016
2017 return engineInstance;
2018 }
2019 #endif
2020