1 /*
2 * Copyright (c) 2019-2024, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file    mos_gpucontext_specific_next.cpp
24 //! \brief   Container class for the Linux specific gpu context
25 //!
26 
27 #include <unistd.h>
28 #include "mos_gpucontext_specific_next.h"
29 #include "mos_context_specific_next.h"
30 #include "mos_graphicsresource_specific_next.h"
31 #include "mos_commandbuffer_specific_next.h"
32 #include "mos_util_devult_specific_next.h"
33 #include "mos_cmdbufmgr_next.h"
34 #include "mos_os_virtualengine_next.h"
35 #include "mos_interface.h"
36 #include "mos_os_cp_interface_specific.h"
37 #ifdef ENABLE_XE_KMD
38 #include "mos_gpucontext_specific_next_xe.h"
39 #endif
40 
41 #define MI_BATCHBUFFER_END 0x05000000
42 static pthread_mutex_t command_dump_mutex = PTHREAD_MUTEX_INITIALIZER;
43 
StoreCreateOptions(PMOS_GPUCTX_CREATOPTIONS createoption)44 void GpuContextSpecificNext::StoreCreateOptions(PMOS_GPUCTX_CREATOPTIONS createoption)
45 {
46     if (typeid(*createoption) == typeid(MOS_GPUCTX_CREATOPTIONS_ENHANCED))
47     {
48         m_bEnhancedUsed = true;
49         MosUtilities::MosSecureMemcpy(&m_createOptionEnhanced, sizeof(MOS_GPUCTX_CREATOPTIONS_ENHANCED), createoption, sizeof(MOS_GPUCTX_CREATOPTIONS_ENHANCED));
50     }
51     else
52     {
53         MosUtilities::MosSecureMemcpy(&m_createOption, sizeof(MOS_GPUCTX_CREATOPTIONS), createoption, sizeof(MOS_GPUCTX_CREATOPTIONS));
54     }
55 }
56 
GpuContextSpecificNext(const MOS_GPU_NODE gpuNode,CmdBufMgrNext * cmdBufMgr,GpuContextNext * reusedContext)57 GpuContextSpecificNext::GpuContextSpecificNext(
58     const MOS_GPU_NODE gpuNode,
59     CmdBufMgrNext         *cmdBufMgr,
60     GpuContextNext        *reusedContext)
61 {
62     MOS_OS_FUNCTION_ENTER;
63 
64     m_nodeOrdinal          = gpuNode;
65     m_cmdBufMgr            = cmdBufMgr;
66     m_statusBufferResource = nullptr;
67     m_maxPatchLocationsize = PATCHLOCATIONLIST_SIZE;
68 
69     if (reusedContext)
70     {
71         MOS_OS_NORMALMESSAGE("gpucontex reusing not enabled on Linux.");
72     }
73 
74 #if (_DEBUG || _RELEASE_INTERNAL)
75     // get user engine instance setting from environment variable
76     char *engineInstances = getenv("INTEL_ENGINE_INSTANCE");
77     if (engineInstances != nullptr)
78     {
79         errno             = 0;
80         long int instance = strtol(engineInstances, nullptr, 16);
81         /* Check for various possible errors. */
82         if ((errno == ERANGE && instance == LONG_MAX) || (instance < 0))
83         {
84             MOS_OS_NORMALMESSAGE("Invalid INTEL_ENGINE_INSTANCE setting.(%s)\n", engineInstances);
85             m_engineInstanceSelect = 0x0;
86         }
87         else
88         {
89             m_engineInstanceSelect = (uint32_t)instance;
90         }
91     }
92 #endif
93 }
94 
~GpuContextSpecificNext()95 GpuContextSpecificNext::~GpuContextSpecificNext()
96 {
97     MOS_OS_FUNCTION_ENTER;
98 
99     Clear();
100 }
101 
Create(const MOS_GPU_NODE gpuNode,CmdBufMgrNext * cmdBufMgr,GpuContextNext * reusedContext)102 GpuContextNext *GpuContextSpecificNext::Create(
103     const MOS_GPU_NODE    gpuNode,
104     CmdBufMgrNext         *cmdBufMgr,
105     GpuContextNext        *reusedContext)
106 {
107     MOS_OS_FUNCTION_ENTER;
108     if (nullptr == cmdBufMgr)
109     {
110         return nullptr;
111     }
112     OsContextSpecificNext *osDeviceContext = dynamic_cast<OsContextSpecificNext*>(cmdBufMgr->m_osContext);
113     if (nullptr == osDeviceContext)
114     {
115         return nullptr;
116     }
117     int type = osDeviceContext->GetDeviceType();
118     if (DEVICE_TYPE_I915 == type)
119     {
120         return MOS_New(GpuContextSpecificNext, gpuNode, cmdBufMgr, reusedContext);
121     }
122 #ifdef ENABLE_XE_KMD
123     else if (DEVICE_TYPE_XE == type)
124     {
125         return MOS_New(GpuContextSpecificNextXe, gpuNode, cmdBufMgr, reusedContext);
126     }
127 #endif
128     return nullptr;
129 }
130 
RecreateContext(bool bIsProtected,MOS_STREAM_HANDLE streamState)131 MOS_STATUS GpuContextSpecificNext::RecreateContext(bool bIsProtected, MOS_STREAM_HANDLE streamState)
132 {
133     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
134     // clear existing context
135     Clear();
136     m_bProtectedContext = bIsProtected;
137     PMOS_GPUCTX_CREATOPTIONS createOption;
138     if (m_bEnhancedUsed)
139     {
140         createOption = &m_createOptionEnhanced;
141     }
142     else
143     {
144         createOption = &m_createOption;
145     }
146     eStatus = Init(m_osContext, streamState, createOption);
147     return eStatus;
148 }
149 
PatchGPUContextProtection(MOS_STREAM_HANDLE streamState)150 MOS_STATUS GpuContextSpecificNext::PatchGPUContextProtection(MOS_STREAM_HANDLE streamState)
151 {
152     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
153     MOS_OS_CHK_NULL_RETURN(streamState);
154     auto osParameters = (PMOS_CONTEXT)streamState->perStreamParameters;
155     MOS_OS_CHK_NULL_RETURN(osParameters);
156 
157     // clean up clear gem context and create protected gem context if CP is enabled
158     if (streamState->osCpInterface &&
159         streamState->osCpInterface->IsCpEnabled())    // Check if CP is enabled as protected GEM context is only needed when CP is enabled
160         {
161             if (streamState->ctxBasedScheduling)
162             {
163                 if (m_bProtectedContext == false)    // Check if GEM context is already protected or not
164                 {
165                     // Context is not protected, recreate it as protected
166                     eStatus = RecreateContext(true, streamState);
167                     if (eStatus == MOS_STATUS_SUCCESS)
168                     {
169                         //Register Protected Context
170                         streamState->osCpInterface->RegisterAndCheckProtectedGemCtx(true, (void*)this, nullptr);
171                     }
172                 }
173                 //If m_bProtectedContext == true then check if is stale context or not.
174                 //If it is stale protected context then recreate another one
175                 else
176                 {
177                     bool bIsContextStale = false;
178                     //Check protected context
179                     streamState->osCpInterface->RegisterAndCheckProtectedGemCtx(false, (void*)this, &bIsContextStale);
180 
181                     //Recreate protected context
182                     if (bIsContextStale)
183                     {
184                         eStatus = RecreateContext(true, streamState);
185                         if (eStatus == MOS_STATUS_SUCCESS)
186                         {
187                             //Register Protected Context
188                             streamState->osCpInterface->RegisterAndCheckProtectedGemCtx(true, (void*)this, nullptr);
189                         }
190                     }
191                 }
192             }
193             else
194             {
195                 if (osParameters->m_protectedGEMContext == false)
196                 {
197                     // for non context based scheduling protected context is always created as protected during Initialization if needed
198                     // If it is not created during Initialization then do nothing and add a comment for Debug purposes
199                     MOS_OS_CRITICALMESSAGE("Using Clear GEM context when protected Context is needed");
200                     eStatus = MOS_STATUS_SUCCESS;
201                 }
202             }
203         }
204 
205     // clean up protected gem context and recreate clear gem context if CP is disabled
206     if (streamState->osCpInterface &&
207         !streamState->osCpInterface->IsCpEnabled() &&
208         streamState->ctxBasedScheduling &&
209         m_bProtectedContext == true)    // Check if GEM context is protected or not
210         {
211             // Context is protected, recreate it as clear
212             eStatus = RecreateContext(false, streamState);
213         }
214 
215     return eStatus;
216 }
217 
Init3DCtx(PMOS_CONTEXT osParameters,PMOS_GPUCTX_CREATOPTIONS createOption,unsigned int * nengine,void * engine_map)218 MOS_STATUS GpuContextSpecificNext::Init3DCtx(PMOS_CONTEXT osParameters,
219                 PMOS_GPUCTX_CREATOPTIONS createOption,
220                 unsigned int *nengine,
221                 void *engine_map)
222 {
223     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
224 
225     m_i915Context[0] = mos_context_create_shared(osParameters->bufmgr,
226                                              osParameters->intel_context,
227                                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
228                                              m_bProtectedContext,
229                                              engine_map,
230                                              1,
231                                              *nengine,
232                                              0);
233     if (m_i915Context[0] == nullptr)
234     {
235         MOS_OS_ASSERTMESSAGE("Failed to create context.\n");
236         return MOS_STATUS_UNKNOWN;
237     }
238     m_i915Context[0]->pOsContext = osParameters;
239 
240     __u16 engine_class = I915_ENGINE_CLASS_RENDER;
241     __u64 caps = 0;
242 
243     if (mos_query_engines(osParameters->bufmgr, engine_class, caps, nengine, engine_map))
244     {
245         MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
246         return MOS_STATUS_UNKNOWN;
247     }
248 
249     if (mos_set_context_param_load_balance(m_i915Context[0], (struct i915_engine_class_instance *)engine_map, *nengine))
250     {
251         MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
252         return MOS_STATUS_UNKNOWN;
253     }
254 
255     if (createOption->SSEUValue != 0)
256     {
257         struct drm_i915_gem_context_param_sseu sseu;
258         MosUtilities::MosZeroMemory(&sseu, sizeof(sseu));
259         sseu.flags = I915_CONTEXT_SSEU_FLAG_ENGINE_INDEX;
260         sseu.engine.engine_instance = m_i915ExecFlag;
261 
262         if (mos_get_context_param_sseu(m_i915Context[0], &sseu))
263         {
264             MOS_OS_ASSERTMESSAGE("Failed to get sseu configuration.");
265             return MOS_STATUS_UNKNOWN;
266         }
267 
268         if (mos_hweight8(m_i915Context[0], sseu.subslice_mask) > createOption->packed.SubSliceCount)
269         {
270             sseu.subslice_mask = mos_switch_off_n_bits(m_i915Context[0], sseu.subslice_mask,
271                     mos_hweight8(m_i915Context[0], sseu.subslice_mask)-createOption->packed.SubSliceCount);
272         }
273 
274         if (mos_set_context_param_sseu(m_i915Context[0], sseu))
275         {
276             MOS_OS_ASSERTMESSAGE("Failed to set sseu configuration.");
277             return MOS_STATUS_UNKNOWN;
278         }
279     }
280 
281     return eStatus;
282 }
283 
InitComputeCtx(PMOS_CONTEXT osParameters,unsigned int * nengine,void * engine_map,MOS_GPU_NODE gpuNode,bool * isEngineSelectEnable)284 MOS_STATUS GpuContextSpecificNext::InitComputeCtx(PMOS_CONTEXT osParameters,
285                 unsigned int *nengine,
286                 void *engine_map,
287                 MOS_GPU_NODE gpuNode,
288                 bool *isEngineSelectEnable)
289 {
290     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
291 
292     m_i915Context[0] = mos_context_create_shared(osParameters->bufmgr,
293                                              osParameters->intel_context,
294                                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
295                                              m_bProtectedContext,
296                                              engine_map,
297                                              1,
298                                              *nengine,
299                                              0);
300     if (m_i915Context[0] == nullptr)
301     {
302         MOS_OS_ASSERTMESSAGE("Failed to create context.\n");
303         return MOS_STATUS_UNKNOWN;
304     }
305     m_i915Context[0]->pOsContext = osParameters;
306 
307     __u16 engine_class = 4; //To change later when linux define the name
308     __u64 caps = 0;
309 
310     if (mos_query_engines(osParameters->bufmgr, engine_class, caps, nengine, engine_map))
311     {
312         MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
313         return MOS_STATUS_UNKNOWN;
314     }
315 
316 #if (_DEBUG || _RELEASE_INTERNAL)
317     *isEngineSelectEnable = SelectEngineInstanceByUser(engine_map, nengine, m_engineInstanceSelect, gpuNode);
318 #endif
319     if (mos_set_context_param_load_balance(m_i915Context[0], (struct i915_engine_class_instance *)engine_map, *nengine))
320     {
321         MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
322         return MOS_STATUS_UNKNOWN;
323     }
324 
325     return eStatus;
326 }
327 
InitVdVeCtx(PMOS_CONTEXT osParameters,MOS_STREAM_HANDLE streamState,PMOS_GPUCTX_CREATOPTIONS createOption,unsigned int * nengine,void * engine_map,MOS_GPU_NODE gpuNode,bool * isEngineSelectEnable)328 MOS_STATUS GpuContextSpecificNext::InitVdVeCtx(PMOS_CONTEXT osParameters,
329                 MOS_STREAM_HANDLE streamState,
330                 PMOS_GPUCTX_CREATOPTIONS createOption,
331                 unsigned int *nengine,
332                 void *engine_map,
333                 MOS_GPU_NODE gpuNode,
334                 bool *isEngineSelectEnable)
335 {
336     MOS_OS_FUNCTION_ENTER;
337 
338     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
339 
340     m_i915Context[0] = mos_context_create_shared(osParameters->bufmgr,
341                                              osParameters->intel_context,
342                                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
343                                              m_bProtectedContext,
344                                              engine_map,
345                                              1,
346                                              *nengine,
347                                              0);
348     if (m_i915Context[0] == nullptr)
349     {
350         MOS_OS_ASSERTMESSAGE("Failed to create context.\n");
351         return MOS_STATUS_UNKNOWN;
352     }
353     struct i915_engine_class_instance *_engine_map = (struct i915_engine_class_instance *)engine_map;
354     m_i915Context[0]->pOsContext = osParameters;
355 
356     __u16 engine_class = (gpuNode == MOS_GPU_NODE_VE)? I915_ENGINE_CLASS_VIDEO_ENHANCE : I915_ENGINE_CLASS_VIDEO;
357     __u64 caps = 0;
358 
359     SetEngineQueryFlags(createOption, caps);
360 
361     if (mos_query_engines(osParameters->bufmgr, engine_class, caps, nengine, (void *)_engine_map))
362     {
363         MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
364         return MOS_STATUS_UNKNOWN;
365     }
366 
367 #if (_DEBUG || _RELEASE_INTERNAL)
368     *isEngineSelectEnable = SelectEngineInstanceByUser((void *)_engine_map, nengine, m_engineInstanceSelect, gpuNode);
369 #endif
370     if (mos_set_context_param_load_balance(m_i915Context[0], _engine_map, *nengine))
371     {
372         MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
373         return MOS_STATUS_UNKNOWN;
374     }
375 
376     if (*nengine >= 2 && *nengine <= MAX_ENGINE_INSTANCE_NUM)
377     {
378         int i;
379         //master queue
380         m_i915Context[1] = mos_context_create_shared(osParameters->bufmgr,
381                                                             osParameters->intel_context,
382                                                             I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
383                                                             m_bProtectedContext,
384                                                             (void *)_engine_map,
385                                                             1,
386                                                             1,
387                                                             0);
388         if (m_i915Context[1] == nullptr)
389         {
390             MOS_OS_ASSERTMESSAGE("Failed to create master context.\n");
391             return MOS_STATUS_UNKNOWN;
392         }
393         m_i915Context[1]->pOsContext = osParameters;
394 
395         if (mos_set_context_param_load_balance(m_i915Context[1], _engine_map, 1))
396         {
397             MOS_OS_ASSERTMESSAGE("Failed to set master context bond extension.\n");
398             return MOS_STATUS_UNKNOWN;
399         }
400 
401         //slave queue
402         for (i=1; i < *nengine; i++)
403         {
404             m_i915Context[i+1] = mos_context_create_shared(osParameters->bufmgr,
405                                                                 osParameters->intel_context,
406                                                                 I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
407                                                                 m_bProtectedContext,
408                                                                 (void *)_engine_map,
409                                                                 1,
410                                                                 1,
411                                                                 0);
412             if (m_i915Context[i+1] == nullptr)
413             {
414                 MOS_OS_ASSERTMESSAGE("Failed to create slave context.\n");
415                 return MOS_STATUS_UNKNOWN;
416             }
417             m_i915Context[i+1]->pOsContext = osParameters;
418 
419             if (mos_set_context_param_bond(m_i915Context[i+1], _engine_map[0], &_engine_map[i], 1) != S_SUCCESS)
420             {
421                 int err = errno;
422                 if (err == ENODEV)
423                 {
424                     mos_context_destroy(m_i915Context[1]);
425                     mos_context_destroy(m_i915Context[i+1]);
426                     m_i915Context[i+1] = nullptr;
427                     break;
428                 }
429                 else
430                 {
431                     MOS_OS_ASSERTMESSAGE("Failed to set slave context bond extension. errno=%d\n",err);
432                     return MOS_STATUS_UNKNOWN;
433                 }
434             }
435         }
436         if (i == *nengine)
437         {
438             streamState->bParallelSubmission = false;
439         }
440         else
441         {
442             streamState->bParallelSubmission = true;
443             //create context with different width
444             for(i = 1; i < *nengine; i++)
445             {
446                 unsigned int ctxWidth = i + 1;
447                 m_i915Context[i] = mos_context_create_shared(osParameters->bufmgr,
448                                                              osParameters->intel_context,
449                                                              0, // I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE not allowed for parallel submission
450                                                              m_bProtectedContext,
451                                                              (void *)_engine_map,
452                                                              ctxWidth,
453                                                              1,
454                                                              0);
455                 if (mos_set_context_param_parallel(m_i915Context[i], _engine_map, ctxWidth) != S_SUCCESS)
456                 {
457                     MOS_OS_ASSERTMESSAGE("Failed to set parallel extension since discontinuous logical engine.\n");
458                     mos_context_destroy(m_i915Context[i]);
459                     m_i915Context[i] = nullptr;
460                     break;
461                 }
462             }
463         }
464     }
465 
466     return eStatus;
467 }
468 
InitBltCtx(PMOS_CONTEXT osParameters,unsigned int * nengine,void * engine_map)469 MOS_STATUS GpuContextSpecificNext::InitBltCtx(PMOS_CONTEXT osParameters,
470                 unsigned int *nengine,
471                 void *engine_map)
472 {
473     MOS_OS_FUNCTION_ENTER;
474 
475     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
476 
477     m_i915Context[0] = mos_context_create_shared(osParameters->bufmgr,
478                                              osParameters->intel_context,
479                                              I915_CONTEXT_CREATE_FLAGS_SINGLE_TIMELINE,
480                                              m_bProtectedContext,
481                                              engine_map,
482                                              1,
483                                              *nengine,
484                                              0);
485     if (m_i915Context[0] == nullptr)
486     {
487         MOS_OS_ASSERTMESSAGE("Failed to create context.\n");
488         return MOS_STATUS_UNKNOWN;
489     }
490     m_i915Context[0]->pOsContext = osParameters;
491 
492     __u16 engine_class = I915_ENGINE_CLASS_COPY;
493     __u64 caps = 0;
494 
495     if (mos_query_engines(osParameters->bufmgr, engine_class, caps, nengine, engine_map))
496     {
497         MOS_OS_ASSERTMESSAGE("Failed to query engines.\n");
498         return MOS_STATUS_UNKNOWN;
499     }
500 
501     if (mos_set_context_param_load_balance(m_i915Context[0], (struct i915_engine_class_instance *)engine_map, *nengine))
502     {
503         MOS_OS_ASSERTMESSAGE("Failed to set balancer extension.\n");
504         return MOS_STATUS_UNKNOWN;
505     }
506 
507     return eStatus;
508 }
509 
510 
Init(OsContextNext * osContext,MOS_STREAM_HANDLE streamState,PMOS_GPUCTX_CREATOPTIONS createOption)511 MOS_STATUS GpuContextSpecificNext::Init(OsContextNext *osContext,
512                     MOS_STREAM_HANDLE streamState,
513                     PMOS_GPUCTX_CREATOPTIONS createOption)
514 {
515     MOS_OS_FUNCTION_ENTER;
516 
517     MOS_OS_CHK_NULL_RETURN(osContext);
518     MOS_OS_CHK_NULL_RETURN(streamState);
519     MOS_OS_CHK_NULL_RETURN(createOption);
520 
521     MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
522 
523     auto osParameters = (PMOS_CONTEXT)streamState->perStreamParameters;
524     MOS_OS_CHK_NULL_RETURN(osParameters);
525 
526     m_osParameters       = osParameters;
527 
528     MOS_GPU_NODE gpuNode = MOS_GPU_NODE_3D;
529     gpuNode = static_cast<MOS_GPU_NODE>(createOption->gpuNode);
530 
531     if (m_cmdBufPoolMutex == nullptr)
532     {
533         m_cmdBufPoolMutex = MosUtilities::MosCreateMutex();
534     }
535 
536     MOS_OS_CHK_NULL_RETURN(m_cmdBufPoolMutex);
537 
538     MosUtilities::MosLockMutex(m_cmdBufPoolMutex);
539 
540     m_cmdBufPool.clear();
541 
542     MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
543 
544     m_ocaLogSectionSupported = osContext->m_ocaLogSectionSupported;
545     if (m_ocaLogSectionSupported)
546     {
547         // increase size for oca log section
548         m_commandBufferSize = MosOcaInterfaceSpecific::IncreaseSize(COMMAND_BUFFER_SIZE);
549     }
550     else
551     {
552         m_commandBufferSize = COMMAND_BUFFER_SIZE;
553     }
554 
555     m_nextFetchIndex = 0;
556 
557     m_cmdBufFlushed = true;
558 
559     m_osContext = osContext;
560 
561     MOS_OS_CHK_STATUS_RETURN(AllocateGPUStatusBuf());
562 
563     m_commandBuffer = (PMOS_COMMAND_BUFFER)MOS_AllocAndZeroMemory(sizeof(MOS_COMMAND_BUFFER));
564 
565     MOS_OS_CHK_NULL_RETURN(m_commandBuffer);
566 
567     m_IndirectHeapSize = 0;
568 
569     // each thread has its own GPU context, so do not need any lock as guarder here
570     m_allocationList = (ALLOCATION_LIST *)MOS_AllocAndZeroMemory(sizeof(ALLOCATION_LIST) * ALLOCATIONLIST_SIZE);
571     MOS_OS_CHK_NULL_RETURN(m_allocationList);
572     m_maxNumAllocations = ALLOCATIONLIST_SIZE;
573 
574     m_patchLocationList = (PATCHLOCATIONLIST *)MOS_AllocAndZeroMemory(sizeof(PATCHLOCATIONLIST) * PATCHLOCATIONLIST_SIZE);
575     MOS_OS_CHK_NULL_RETURN(m_patchLocationList);
576     m_maxPatchLocationsize = PATCHLOCATIONLIST_SIZE;
577 
578     m_attachedResources = (PMOS_RESOURCE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE) * ALLOCATIONLIST_SIZE);
579     MOS_OS_CHK_NULL_RETURN(m_attachedResources);
580 
581     m_writeModeList = (bool *)MOS_AllocAndZeroMemory(sizeof(bool) * ALLOCATIONLIST_SIZE);
582     MOS_OS_CHK_NULL_RETURN(m_writeModeList);
583 
584     m_GPUStatusTag = 1;
585 
586     StoreCreateOptions(createOption);
587 
588     for (int i=0; i<MAX_ENGINE_INSTANCE_NUM+1; i++)
589     {
590         m_i915Context[i] = nullptr;
591     }
592 
593     if (streamState->ctxBasedScheduling)
594     {
595         bool         isEngineSelectEnable = false;
596         unsigned int nengine              = 0;
597         size_t       engine_class_size    = 0;
598         void         *engine_map          = nullptr;
599 
600         MOS_TraceEventExt(EVENT_GPU_CONTEXT_CREATE, EVENT_TYPE_START,
601                           &gpuNode, sizeof(gpuNode), nullptr, 0);
602 
603         m_i915ExecFlag = I915_EXEC_DEFAULT;
604 
605         if (mos_query_engines_count(osParameters->bufmgr, &nengine))
606         {
607             MOS_OS_ASSERTMESSAGE("Failed to query engines count.\n");
608             return MOS_STATUS_UNKNOWN;
609         }
610         engine_class_size = mos_get_engine_class_size(osParameters->bufmgr);
611         if (!engine_class_size)
612         {
613             MOS_OS_ASSERTMESSAGE("Failed to get engine class instance size.\n");
614             return MOS_STATUS_UNKNOWN;
615         }
616         engine_map = MOS_AllocAndZeroMemory(nengine * engine_class_size);
617         MOS_OS_CHK_NULL_RETURN(engine_map);
618 
619         if (gpuNode == MOS_GPU_NODE_3D)
620         {
621             eStatus = Init3DCtx(osParameters, createOption, &nengine, engine_map);
622         }
623         else if (gpuNode == MOS_GPU_NODE_COMPUTE)
624         {
625             eStatus = InitComputeCtx(osParameters, &nengine, engine_map, gpuNode, &isEngineSelectEnable);
626         }
627         else if (gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2
628                 || gpuNode == MOS_GPU_NODE_VE)
629         {
630             eStatus = InitVdVeCtx(osParameters, streamState, createOption, &nengine, engine_map, gpuNode, &isEngineSelectEnable);
631         }
632         else if (gpuNode == MOS_GPU_NODE_BLT)
633         {
634             eStatus = InitBltCtx(osParameters, &nengine, engine_map);
635         }
636         else
637         {
638             MOS_OS_ASSERTMESSAGE("Unknown engine class.\n");
639             MOS_SafeFreeMemory(engine_map);
640             return MOS_STATUS_UNKNOWN;
641         }
642 
643         if (eStatus == MOS_STATUS_SUCCESS)
644         {
645             MOS_OS_CHK_STATUS_RETURN(ReportEngineInfo(engine_map, nengine, isEngineSelectEnable));
646         }
647         MOS_SafeFreeMemory(engine_map);
648         MOS_TraceEventExt(EVENT_GPU_CONTEXT_CREATE, EVENT_TYPE_END,
649                           m_i915Context, sizeof(void *),
650                           &nengine, sizeof(nengine));
651     }
652 
653     return eStatus;
654 }
655 
Clear()656 void GpuContextSpecificNext::Clear()
657 {
658     MOS_OS_FUNCTION_ENTER;
659 
660     MOS_TraceEventExt(EVENT_GPU_CONTEXT_DESTROY, EVENT_TYPE_START,
661                       m_i915Context, sizeof(void *), nullptr, 0);
662     // hanlde the status buf bundled w/ the specified gpucontext
663     if (m_statusBufferResource && m_statusBufferResource->pGfxResourceNext)
664     {
665         if (m_statusBufferResource->pGfxResourceNext->Unlock(m_osContext) != MOS_STATUS_SUCCESS)
666         {
667             MOS_OS_ASSERTMESSAGE("failed to unlock the status buf bundled w/ the specified gpucontext");
668         }
669         m_statusBufferResource->pGfxResourceNext->Free(m_osContext, 0);
670         MOS_Delete(m_statusBufferResource->pGfxResourceNext);
671     }
672     MOS_FreeMemAndSetNull(m_statusBufferResource);
673 
674     MosUtilities::MosLockMutex(m_cmdBufPoolMutex);
675 
676     if (m_cmdBufMgr)
677     {
678         for (auto& curCommandBuffer : m_cmdBufPool)
679         {
680             auto curCommandBufferSpecific = static_cast<CommandBufferSpecificNext *>(curCommandBuffer);
681             if (curCommandBufferSpecific == nullptr)
682                 continue;
683             curCommandBufferSpecific->waitReady(); // wait ready and return to comamnd buffer manager.
684             m_cmdBufMgr->ReleaseCmdBuf(curCommandBuffer);
685         }
686     }
687 
688     m_cmdBufPool.clear();
689 
690     MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
691     MosUtilities::MosDestroyMutex(m_cmdBufPoolMutex);
692     m_cmdBufPoolMutex = nullptr;
693     MOS_SafeFreeMemory(m_commandBuffer);
694     m_commandBuffer = nullptr;
695     MOS_SafeFreeMemory(m_allocationList);
696     m_allocationList = nullptr;
697     MOS_SafeFreeMemory(m_patchLocationList);
698     m_patchLocationList = nullptr;
699     MOS_SafeFreeMemory(m_attachedResources);
700     m_attachedResources = nullptr;
701     MOS_SafeFreeMemory(m_writeModeList);
702     m_writeModeList = nullptr;
703 
704     for (int i=0; i<MAX_ENGINE_INSTANCE_NUM; i++)
705     {
706         if (m_i915Context[i])
707         {
708             mos_context_destroy(m_i915Context[i]);
709             m_i915Context[i] = nullptr;
710         }
711     }
712     MOS_TraceEventExt(EVENT_GPU_CONTEXT_DESTROY, EVENT_TYPE_END,
713                       nullptr, 0, nullptr, 0);
714 }
715 
RegisterResource(PMOS_RESOURCE osResource,bool writeFlag)716 MOS_STATUS GpuContextSpecificNext::RegisterResource(
717     PMOS_RESOURCE osResource,
718     bool          writeFlag)
719 {
720     MOS_OS_FUNCTION_ENTER;
721 
722     MOS_OS_CHK_NULL_RETURN(osResource);
723 
724     MOS_OS_CHK_NULL_RETURN(m_attachedResources);
725 
726     PMOS_RESOURCE registeredResources = m_attachedResources;
727     uint32_t      allocationIndex     = 0;
728 
729     for ( allocationIndex = 0; allocationIndex < m_resCount; allocationIndex++, registeredResources++)
730     {
731         if (osResource->bo == registeredResources->bo)
732         {
733             break;
734         }
735     }
736 
737     // Allocation list to be updated
738     if (allocationIndex < m_maxNumAllocations)
739     {
740         // New buffer
741         if (allocationIndex == m_resCount)
742         {
743             m_resCount++;
744         }
745 
746         // Set allocation
747         if (m_gpuContext >= MOS_GPU_CONTEXT_MAX)
748         {
749             MOS_OS_ASSERTMESSAGE("Gpu context exceeds max.");
750             return MOS_STATUS_UNKNOWN;
751         }
752 
753         osResource->iAllocationIndex[m_gpuContext] = (allocationIndex);
754         m_attachedResources[allocationIndex]           = *osResource;
755         m_writeModeList[allocationIndex] |= writeFlag;
756         m_allocationList[allocationIndex].hAllocation = &m_attachedResources[allocationIndex];
757         m_allocationList[allocationIndex].WriteOperation |= writeFlag;
758         m_numAllocations = m_resCount;
759     }
760     else
761     {
762         MOS_OS_ASSERTMESSAGE("Reached max # registrations.");
763         return MOS_STATUS_UNKNOWN;
764     }
765 
766     return MOS_STATUS_SUCCESS;
767 }
768 
SetPatchEntry(MOS_STREAM_HANDLE streamState,PMOS_PATCH_ENTRY_PARAMS params)769 MOS_STATUS GpuContextSpecificNext::SetPatchEntry(
770     MOS_STREAM_HANDLE streamState,
771     PMOS_PATCH_ENTRY_PARAMS params)
772 {
773     MOS_OS_FUNCTION_ENTER;
774 
775     MOS_OS_CHK_NULL_RETURN(m_patchLocationList);
776     MOS_OS_CHK_NULL_RETURN(streamState);
777     MOS_OS_CHK_NULL_RETURN(params);
778 
779     m_patchLocationList[m_currentNumPatchLocations].AllocationIndex  = params->uiAllocationIndex;
780     m_patchLocationList[m_currentNumPatchLocations].AllocationOffset = params->uiResourceOffset;
781     m_patchLocationList[m_currentNumPatchLocations].PatchOffset      = params->uiPatchOffset;
782     m_patchLocationList[m_currentNumPatchLocations].uiWriteOperation = params->bWrite ? true: false;
783     m_patchLocationList[m_currentNumPatchLocations].cmdBo            =
784                 params->cmdBuffer != nullptr ? params->cmdBuffer->OsResource.bo : nullptr;
785 
786     if (streamState->osCpInterface &&
787         streamState->osCpInterface->IsHMEnabled())
788     {
789         if (MOS_STATUS_SUCCESS != streamState->osCpInterface->RegisterPatchForHM(
790             (uint32_t *)(params->cmdBufBase + params->uiPatchOffset),
791             params->bWrite,
792             params->HwCommandType,
793             params->forceDwordOffset,
794             params->presResource,
795             &m_patchLocationList[m_currentNumPatchLocations]))
796         {
797             MOS_OS_ASSERTMESSAGE("Failed to RegisterPatchForHM.");
798         }
799     }
800 
801     m_currentNumPatchLocations++;
802 
803     return MOS_STATUS_SUCCESS;
804 }
805 
GetCommandBuffer(PMOS_COMMAND_BUFFER comamndBuffer,uint32_t flags)806 MOS_STATUS GpuContextSpecificNext::GetCommandBuffer(
807     PMOS_COMMAND_BUFFER comamndBuffer,
808     uint32_t            flags)
809 {
810     MOS_OS_FUNCTION_ENTER;
811 
812     MOS_OS_CHK_NULL_RETURN(comamndBuffer);
813     MOS_OS_CHK_NULL_RETURN(m_cmdBufMgr);
814     MOS_OS_CHK_NULL_RETURN(m_commandBuffer);
815 
816     MOS_STATUS      eStatus = MOS_STATUS_SUCCESS;
817     CommandBufferNext* cmdBuf = nullptr;
818 
819     uint32_t secondaryIdx = flags;
820     bool isPrimaryCmdBuffer = (secondaryIdx == 0);
821     bool hasSecondaryCmdBuffer = (!isPrimaryCmdBuffer &&
822                                (m_secondaryCmdBufs.count(secondaryIdx) != 0));
823 
824     bool needToAlloc = ((isPrimaryCmdBuffer && m_cmdBufFlushed) ||
825                         (!isPrimaryCmdBuffer && !hasSecondaryCmdBuffer));
826 
827     if (needToAlloc)
828     {
829         MosUtilities::MosLockMutex(m_cmdBufPoolMutex);
830         if (m_cmdBufPool.size() < MAX_CMD_BUF_NUM)
831         {
832             cmdBuf = m_cmdBufMgr->PickupOneCmdBuf(m_commandBufferSize);
833             if (cmdBuf == nullptr)
834             {
835                 MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
836                 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
837                 return MOS_STATUS_NULL_POINTER;
838             }
839             if ((eStatus = cmdBuf->BindToGpuContext(this)) != MOS_STATUS_SUCCESS)
840             {
841                 MOS_OS_ASSERTMESSAGE("Invalid status of BindToGpuContext.");
842                 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
843                 return eStatus;
844             }
845             m_cmdBufPool.push_back(cmdBuf);
846         }
847         else if (m_cmdBufPool.size() == MAX_CMD_BUF_NUM && m_nextFetchIndex < m_cmdBufPool.size())
848         {
849             auto cmdBufOld = m_cmdBufPool[m_nextFetchIndex];
850             auto cmdBufSpecificOld = static_cast<CommandBufferSpecificNext *>(cmdBufOld);
851             if (cmdBufSpecificOld == nullptr)
852             {
853                 MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
854                 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
855                 return MOS_STATUS_NULL_POINTER;
856             }
857             cmdBufSpecificOld->waitReady();
858             cmdBufSpecificOld->UnBindToGpuContext();
859             m_cmdBufMgr->ReleaseCmdBuf(cmdBufOld);  // here just return old command buffer to available pool
860 
861             //pick up new comamnd buffer
862             cmdBuf = m_cmdBufMgr->PickupOneCmdBuf(m_commandBufferSize);
863             if (cmdBuf == nullptr)
864             {
865                 MOS_OS_ASSERTMESSAGE("Invalid (nullptr) Pointer.");
866                 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
867                 return MOS_STATUS_NULL_POINTER;
868             }
869             if ((eStatus = cmdBuf->BindToGpuContext(this)) != MOS_STATUS_SUCCESS)
870             {
871                 MOS_OS_ASSERTMESSAGE("Invalid status of BindToGpuContext.");
872                 MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
873                 return eStatus;
874             }
875             m_cmdBufPool[m_nextFetchIndex] = cmdBuf;
876         }
877         else
878         {
879             MOS_OS_ASSERTMESSAGE("Command buffer bool size exceed max.");
880             MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
881             return MOS_STATUS_UNKNOWN;
882         }
883         MosUtilities::MosUnlockMutex(m_cmdBufPoolMutex);
884 
885         // util now, we got new command buffer from CmdBufMgr, next step to fill in the input command buffer
886         MOS_OS_CHK_STATUS_RETURN(cmdBuf->GetResource()->ConvertToMosResource(&comamndBuffer->OsResource));
887         comamndBuffer->pCmdBase   = (uint32_t *)cmdBuf->GetLockAddr();
888         comamndBuffer->pCmdPtr    = (uint32_t *)cmdBuf->GetLockAddr();
889         comamndBuffer->iOffset    = 0;
890         comamndBuffer->iRemaining = cmdBuf->GetCmdBufSize();
891         comamndBuffer->iCmdIndex  = m_nextFetchIndex;
892         comamndBuffer->iVdboxNodeIndex = MOS_VDBOX_NODE_INVALID;
893         comamndBuffer->iVeboxNodeIndex = MOS_VEBOX_NODE_INVALID;
894         comamndBuffer->Attributes.pAttriVe = nullptr;
895         comamndBuffer->is1stLvlBB = true;
896 
897         // zero comamnd buffer
898         MosUtilities::MosZeroMemory(comamndBuffer->pCmdBase, comamndBuffer->iRemaining);
899         comamndBuffer->iSubmissionType = SUBMISSION_TYPE_SINGLE_PIPE;
900         MosUtilities::MosZeroMemory(&comamndBuffer->Attributes,sizeof(comamndBuffer->Attributes));
901 
902         if (isPrimaryCmdBuffer)
903         {
904             // update command buffer relared filed in GPU context
905             m_cmdBufFlushed = false;
906 
907             // keep a copy in GPU context
908             MosUtilities::MosSecureMemcpy(m_commandBuffer, sizeof(MOS_COMMAND_BUFFER), comamndBuffer, sizeof(MOS_COMMAND_BUFFER));
909         }
910         else
911         {
912             PMOS_COMMAND_BUFFER tempCmdBuf = (PMOS_COMMAND_BUFFER)MOS_AllocAndZeroMemory(sizeof(MOS_COMMAND_BUFFER));
913             MOS_OS_CHK_NULL_RETURN(tempCmdBuf);
914             m_secondaryCmdBufs[secondaryIdx] = tempCmdBuf;
915             MosUtilities::MosSecureMemcpy(tempCmdBuf, sizeof(MOS_COMMAND_BUFFER), comamndBuffer, sizeof(MOS_COMMAND_BUFFER));
916         }
917 
918         if (m_ocaLogSectionSupported)
919         {
920             MOS_LINUX_BO *boTemp = ((GraphicsResourceSpecificNext *)cmdBuf->GetResource())->GetBufferObject();
921             MosOcaInterfaceSpecific::InitOcaLogSection(boTemp);
922         }
923 
924         // Command buffers are treated as cyclical buffers, the CB after the just submitted one
925         // has the minimal fence value that we should wait
926         m_nextFetchIndex++;
927         if (m_nextFetchIndex >= MAX_CMD_BUF_NUM)
928         {
929             m_nextFetchIndex = 0;
930         }
931     }
932     else
933     {
934         // current command buffer still active, directly copy to comamndBuffer
935         if (isPrimaryCmdBuffer)
936         {
937             MosUtilities::MosSecureMemcpy(comamndBuffer, sizeof(MOS_COMMAND_BUFFER), m_commandBuffer, sizeof(MOS_COMMAND_BUFFER));
938         }
939         else
940         {
941             MosUtilities::MosSecureMemcpy(comamndBuffer, sizeof(MOS_COMMAND_BUFFER), m_secondaryCmdBufs[secondaryIdx], sizeof(MOS_COMMAND_BUFFER));
942         }
943     }
944 
945     if (isPrimaryCmdBuffer)
946     {
947         MOS_OS_CHK_STATUS_RETURN(RegisterResource(&m_commandBuffer->OsResource, false));
948     }
949     else
950     {
951         MOS_OS_CHK_STATUS_RETURN(RegisterResource(&m_secondaryCmdBufs[secondaryIdx]->OsResource, false));
952     }
953 
954     return MOS_STATUS_SUCCESS;
955 }
956 
ReturnCommandBuffer(PMOS_COMMAND_BUFFER cmdBuffer,uint32_t flags)957 void GpuContextSpecificNext::ReturnCommandBuffer(
958     PMOS_COMMAND_BUFFER cmdBuffer,
959     uint32_t            flags)
960 {
961     MOS_OS_FUNCTION_ENTER;
962 
963     MOS_OS_ASSERT(cmdBuffer);
964     MOS_OS_ASSERT(m_commandBuffer);
965 
966     bool isPrimaryCmdBuf = (flags == 0);
967 
968     if (isPrimaryCmdBuf)
969     {
970         m_commandBuffer->iOffset    = cmdBuffer->iOffset;
971         m_commandBuffer->iRemaining = cmdBuffer->iRemaining;
972         m_commandBuffer->pCmdPtr    = cmdBuffer->pCmdPtr;
973         m_commandBuffer->iVdboxNodeIndex = cmdBuffer->iVdboxNodeIndex;
974         m_commandBuffer->iVeboxNodeIndex = cmdBuffer->iVeboxNodeIndex;
975     }
976     else
977     {
978         uint32_t secondaryIdx = flags;
979         MOS_OS_ASSERT(m_secondaryCmdBufs.count(secondaryIdx));
980 
981         MosUtilities::MosSecureMemcpy(m_secondaryCmdBufs[secondaryIdx], sizeof(MOS_COMMAND_BUFFER), cmdBuffer, sizeof(MOS_COMMAND_BUFFER));
982     }
983 }
984 
ResetCommandBuffer()985 MOS_STATUS GpuContextSpecificNext::ResetCommandBuffer()
986 {
987     m_cmdBufFlushed = true;
988     auto it = m_secondaryCmdBufs.begin();
989     while(it != m_secondaryCmdBufs.end())
990     {
991         MOS_FreeMemory(it->second);
992         it++;
993     }
994     m_secondaryCmdBufs.clear();
995     return MOS_STATUS_SUCCESS;
996 }
997 
SetIndirectStateSize(const uint32_t size)998 MOS_STATUS GpuContextSpecificNext::SetIndirectStateSize(const uint32_t size)
999 {
1000     if (m_ocaLogSectionSupported)
1001     {
1002         if(MosOcaInterfaceSpecific::IncreaseSize(size) < m_commandBufferSize)
1003         {
1004             m_IndirectHeapSize = size;
1005             return MOS_STATUS_SUCCESS;
1006         }
1007         else
1008         {
1009             MOS_OS_ASSERTMESSAGE("Indirect State Size if out of boundry!");
1010             return MOS_STATUS_UNKNOWN;
1011         }
1012     }
1013     else
1014     {
1015         if(size < m_commandBufferSize)
1016         {
1017             m_IndirectHeapSize = size;
1018             return MOS_STATUS_SUCCESS;
1019         }
1020         else
1021         {
1022             MOS_OS_ASSERTMESSAGE("Indirect State Size if out of boundry!");
1023             return MOS_STATUS_UNKNOWN;
1024         }
1025     }
1026 }
1027 
GetIndirectState(uint32_t & offset,uint32_t & size)1028 MOS_STATUS GpuContextSpecificNext::GetIndirectState(
1029     uint32_t &offset,
1030     uint32_t &size)
1031 {
1032     MOS_OS_FUNCTION_ENTER;
1033 
1034     if (m_ocaLogSectionSupported)
1035     {
1036         offset = m_commandBufferSize - m_IndirectHeapSize - OCA_LOG_SECTION_SIZE_MAX;
1037     }
1038     else
1039     {
1040         offset = m_commandBufferSize - m_IndirectHeapSize;
1041     }
1042     size = m_IndirectHeapSize;
1043 
1044     return MOS_STATUS_SUCCESS;
1045 }
1046 
GetIndirectStatePointer(uint8_t ** indirectState)1047 MOS_STATUS GpuContextSpecificNext::GetIndirectStatePointer(
1048     uint8_t **indirectState)
1049 {
1050     MOS_OS_FUNCTION_ENTER;
1051 
1052     MOS_OS_CHK_NULL_RETURN(indirectState);
1053 
1054     if (m_ocaLogSectionSupported)
1055     {
1056         *indirectState = (uint8_t *)m_commandBuffer->pCmdBase + m_commandBufferSize - m_IndirectHeapSize - OCA_LOG_SECTION_SIZE_MAX;
1057     }
1058     else
1059     {
1060         *indirectState = (uint8_t *)m_commandBuffer->pCmdBase + m_commandBufferSize - m_IndirectHeapSize;
1061     }
1062 
1063     return MOS_STATUS_SUCCESS;
1064 }
1065 
ResizeCommandBufferAndPatchList(uint32_t requestedCommandBufferSize,uint32_t requestedPatchListSize,uint32_t flags)1066 MOS_STATUS GpuContextSpecificNext::ResizeCommandBufferAndPatchList(
1067     uint32_t requestedCommandBufferSize,
1068     uint32_t requestedPatchListSize,
1069     uint32_t flags)
1070 {
1071     MOS_OS_FUNCTION_ENTER;
1072 
1073     // m_commandBufferSize is used for allocate command buffer and submit command buffer, in this moment, command buffer has not allocated yet.
1074     // Linux KMD requires command buffer size align to 8 bytes, or it will not execute the commands.
1075     if (m_ocaLogSectionSupported /*&& !m_ocaSizeIncreaseDone*/)
1076     {
1077         m_commandBufferSize = MOS_ALIGN_CEIL(MosOcaInterfaceSpecific::IncreaseSize(requestedCommandBufferSize), 8);
1078         // m_ocaSizeIncreaseDone = true;
1079     }
1080     else
1081     {
1082         m_commandBufferSize = MOS_ALIGN_CEIL(requestedCommandBufferSize, 8);
1083     }
1084 
1085     if (requestedPatchListSize > m_maxPatchLocationsize)
1086     {
1087         PPATCHLOCATIONLIST newPatchList = (PPATCHLOCATIONLIST)MOS_ReallocMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * requestedPatchListSize);
1088         MOS_OS_CHK_NULL_RETURN(newPatchList);
1089 
1090         m_patchLocationList = newPatchList;
1091 
1092         // now zero the extended portion
1093         MosUtilities::MosZeroMemory((m_patchLocationList + m_maxPatchLocationsize), sizeof(PATCHLOCATIONLIST) * (requestedPatchListSize - m_maxPatchLocationsize));
1094         m_maxPatchLocationsize = requestedPatchListSize;
1095     }
1096 
1097     return MOS_STATUS_SUCCESS;
1098 }
1099 
ResizeCommandBuffer(uint32_t requestedSize)1100 MOS_STATUS GpuContextSpecificNext::ResizeCommandBuffer(uint32_t requestedSize)
1101 {
1102     MOS_OS_FUNCTION_ENTER;
1103 
1104     m_commandBufferSize = requestedSize;
1105 
1106     return MOS_STATUS_SUCCESS;
1107 }
1108 
GetVdboxNodeId(PMOS_COMMAND_BUFFER cmdBuffer)1109 MOS_VDBOX_NODE_IND GpuContextSpecificNext::GetVdboxNodeId(
1110     PMOS_COMMAND_BUFFER cmdBuffer)
1111 {
1112     MOS_VDBOX_NODE_IND idx = MOS_VDBOX_NODE_INVALID;
1113 
1114     if (cmdBuffer == nullptr)
1115     {
1116         MOS_OS_ASSERTMESSAGE("No cmd buffer provided in GetVdboxNodeId!");
1117         return idx;
1118     }
1119 
1120     // If we have assigned vdbox index for the given cmdbuf, return it immediately
1121     if (MOS_VDBOX_NODE_INVALID != cmdBuffer->iVdboxNodeIndex) {
1122         idx = cmdBuffer->iVdboxNodeIndex;
1123         return idx;
1124     }
1125 
1126     return idx;
1127 }
1128 
GetVcsExecFlag(PMOS_COMMAND_BUFFER cmdBuffer,MOS_GPU_NODE gpuNode)1129 uint32_t GpuContextSpecificNext::GetVcsExecFlag(
1130     PMOS_COMMAND_BUFFER cmdBuffer,
1131     MOS_GPU_NODE gpuNode)
1132 {
1133     if (cmdBuffer == 0)
1134     {
1135         MOS_OS_ASSERTMESSAGE("Input invalid(null) parameter.");
1136         return I915_EXEC_DEFAULT;
1137     }
1138 
1139     uint32_t vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1140 
1141     if (MOS_VDBOX_NODE_INVALID == cmdBuffer->iVdboxNodeIndex)
1142     {
1143        // That's those case when BB did not have any VDBOX# specific commands.
1144        // Thus, we need to select VDBOX# here. Alternatively we can rely on KMD
1145        // to make balancing for us, i.e. rely on Virtual Engine support.
1146        cmdBuffer->iVdboxNodeIndex = GetVdboxNodeId(cmdBuffer);
1147        if (MOS_VDBOX_NODE_INVALID == cmdBuffer->iVdboxNodeIndex)
1148        {
1149            cmdBuffer->iVdboxNodeIndex = (gpuNode == MOS_GPU_NODE_VIDEO)?
1150                MOS_VDBOX_NODE_1: MOS_VDBOX_NODE_2;
1151        }
1152      }
1153 
1154      if (MOS_VDBOX_NODE_1 == cmdBuffer->iVdboxNodeIndex)
1155      {
1156          vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1157      }
1158      else if (MOS_VDBOX_NODE_2 == cmdBuffer->iVdboxNodeIndex)
1159      {
1160          vcsExecFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING2;
1161      }
1162 
1163      return vcsExecFlag;
1164 }
1165 
1166 #if (_DEBUG || _RELEASE_INTERNAL)
GetNopCommandBuffer(MOS_STREAM_HANDLE streamState)1167 MOS_LINUX_BO* GpuContextSpecificNext::GetNopCommandBuffer(
1168     MOS_STREAM_HANDLE streamState)
1169 {
1170     int j;
1171     uint32_t *buf = nullptr;
1172     MOS_LINUX_BO* bo = nullptr;
1173 
1174     j = 0;
1175 
1176     if(streamState == nullptr || streamState->perStreamParameters == nullptr)
1177     {
1178         return nullptr;
1179     }
1180 
1181     auto perStreamParameters = (PMOS_CONTEXT)streamState->perStreamParameters;
1182     struct mos_drm_bo_alloc alloc;
1183     alloc.name = "NOP_CMD_BO";
1184     alloc.size = 4096;
1185     alloc.alignment = 4096;
1186     alloc.ext.mem_type = MOS_MEMPOOL_VIDEOMEMORY;
1187     bo = mos_bo_alloc(perStreamParameters->bufmgr, &alloc);
1188     if(bo == nullptr)
1189     {
1190         return nullptr;
1191     }
1192 
1193     mos_bo_map(bo, 1);
1194     buf = (uint32_t*)bo->virt;
1195     if(buf == nullptr)
1196     {
1197         mos_bo_unreference(bo);
1198         return nullptr;
1199     }
1200 
1201     buf[j++] = 0x05000000; // MI_BATCH_BUFFER_END
1202 
1203     mos_bo_unmap(bo);
1204 
1205     return bo;
1206 }
1207 #endif // _DEBUG || _RELEASE_INTERNAL
1208 
1209 
MapResourcesToAuxTable(mos_linux_bo * cmd_bo)1210 MOS_STATUS GpuContextSpecificNext::MapResourcesToAuxTable(mos_linux_bo *cmd_bo)
1211 {
1212     MOS_OS_CHK_NULL_RETURN(cmd_bo);
1213 
1214     OsContextSpecificNext *osCtx = static_cast<OsContextSpecificNext*>(m_osContext);
1215     MOS_OS_CHK_NULL_RETURN(osCtx);
1216 
1217     AuxTableMgr *auxTableMgr = osCtx->GetAuxTableMgr();
1218     if (auxTableMgr)
1219     {
1220         // Map compress allocations to aux table if it is not mapped.
1221         for (uint32_t i = 0; i < m_numAllocations; i++)
1222         {
1223             auto res = (PMOS_RESOURCE)m_allocationList[i].hAllocation;
1224             MOS_OS_CHK_NULL_RETURN(res);
1225             MOS_OS_CHK_STATUS_RETURN(auxTableMgr->MapResource(res->pGmmResInfo, res->bo));
1226         }
1227         MOS_OS_CHK_STATUS_RETURN(auxTableMgr->EmitAuxTableBOList(cmd_bo));
1228     }
1229     return MOS_STATUS_SUCCESS;
1230 }
1231 
SubmitCommandBuffer(MOS_STREAM_HANDLE streamState,PMOS_COMMAND_BUFFER cmdBuffer,bool nullRendering)1232 MOS_STATUS GpuContextSpecificNext::SubmitCommandBuffer(
1233     MOS_STREAM_HANDLE   streamState,
1234     PMOS_COMMAND_BUFFER cmdBuffer,
1235     bool                nullRendering)
1236 {
1237     MOS_OS_FUNCTION_ENTER;
1238 
1239     MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_START, nullptr, 0, nullptr, 0);
1240 
1241     MOS_OS_CHK_NULL_RETURN(streamState);
1242     auto perStreamParameters = (PMOS_CONTEXT)streamState->perStreamParameters;
1243     MOS_OS_CHK_NULL_RETURN(perStreamParameters);
1244     MOS_OS_CHK_NULL_RETURN(cmdBuffer);
1245     MOS_OS_CHK_NULL_RETURN(m_patchLocationList);
1246 
1247     MOS_GPU_NODE gpuNode  = OSKMGetGpuNode(m_gpuContext);
1248     uint32_t     execFlag = gpuNode;
1249     MOS_STATUS   eStatus  = MOS_STATUS_SUCCESS;
1250     int32_t      ret      = 0;
1251     bool         scalaEnabled = false;
1252     auto         it           = m_secondaryCmdBufs.begin();
1253 
1254     // Command buffer object DRM pointer
1255     m_cmdBufFlushed = true;
1256     auto cmd_bo     = cmdBuffer->OsResource.bo;
1257 
1258     // Map Resource to Aux if needed
1259     MapResourcesToAuxTable(cmd_bo);
1260     for(auto it : m_secondaryCmdBufs)
1261     {
1262         MapResourcesToAuxTable(it.second->OsResource.bo);
1263     }
1264 
1265     if (m_secondaryCmdBufs.size() >= 2)
1266     {
1267         scalaEnabled = true;
1268         cmdBuffer->iSubmissionType = SUBMISSION_TYPE_MULTI_PIPE_MASTER;
1269     }
1270 
1271     std::vector<PMOS_RESOURCE> mappedResList;
1272     std::vector<MOS_LINUX_BO *> skipSyncBoList;
1273 
1274     // Now, the patching will be done, based on the patch list.
1275     for (uint32_t patchIndex = 0; patchIndex < m_currentNumPatchLocations; patchIndex++)
1276     {
1277         auto currentPatch = &m_patchLocationList[patchIndex];
1278         MOS_OS_CHK_NULL_RETURN(currentPatch);
1279 
1280         auto tempCmdBo = currentPatch->cmdBo == nullptr ? cmd_bo : currentPatch->cmdBo;
1281 
1282         // Following are for Nested BB buffer, if it's nested BB, we need to ensure it's locked.
1283         if (tempCmdBo != cmd_bo)
1284         {
1285             bool isSecondaryCmdBuf = false;
1286             it = m_secondaryCmdBufs.begin();
1287             while(it != m_secondaryCmdBufs.end())
1288             {
1289                 if (it->second->OsResource.bo == tempCmdBo)
1290                 {
1291                     isSecondaryCmdBuf = true;
1292                     break;
1293                 }
1294                 it++;
1295             }
1296 
1297             for(auto allocIdx = 0; allocIdx < m_numAllocations && (!isSecondaryCmdBuf); allocIdx++)
1298             {
1299                 auto tempRes = (PMOS_RESOURCE)m_allocationList[allocIdx].hAllocation;
1300                 if (tempCmdBo == tempRes->bo)
1301                 {
1302                     GraphicsResourceNext::LockParams param;
1303                     param.m_writeRequest = true;
1304                     tempRes->pGfxResourceNext->Lock(m_osContext, param);
1305                     mappedResList.push_back(tempRes);
1306                     break;
1307                 }
1308             }
1309         }
1310 
1311         // This is the resource for which patching will be done
1312         auto resource = (PMOS_RESOURCE)m_allocationList[currentPatch->AllocationIndex].hAllocation;
1313         MOS_OS_CHK_NULL_RETURN(resource);
1314 
1315         // For now, we'll assume the system memory's DRM bo pointer
1316         // is NULL.  If nullptr is detected, then the resource has been
1317         // placed inside the command buffer's indirect state area.
1318         // We'll simply set alloc_bo to the command buffer's bo pointer.
1319         MOS_OS_ASSERT(resource->bo);
1320 
1321         auto alloc_bo = (resource->bo) ? resource->bo : tempCmdBo;
1322 
1323         MOS_OS_CHK_STATUS_RETURN(streamState->osCpInterface->PermeatePatchForHM(
1324             tempCmdBo->virt,
1325             currentPatch,
1326             resource));
1327 
1328         uint64_t boOffset = alloc_bo->offset64;
1329         if (!mos_bo_is_softpin(alloc_bo))
1330         {
1331             if (alloc_bo != tempCmdBo)
1332             {
1333                 auto item_ctx = perStreamParameters->contextOffsetList.begin();
1334                 for (; item_ctx != perStreamParameters->contextOffsetList.end(); item_ctx++)
1335                 {
1336                     if (item_ctx->intel_context == perStreamParameters->intel_context && item_ctx->target_bo == alloc_bo)
1337                     {
1338                         boOffset = item_ctx->offset64;
1339                         break;
1340                     }
1341                 }
1342             }
1343         }
1344 
1345         MOS_OS_CHK_NULL_RETURN(tempCmdBo->virt);
1346         if (perStreamParameters->bUse64BitRelocs)
1347         {
1348             *((uint64_t *)((uint8_t *)tempCmdBo->virt + currentPatch->PatchOffset)) =
1349                     boOffset + currentPatch->AllocationOffset;
1350         }
1351         else
1352         {
1353             *((uint32_t *)((uint8_t *)tempCmdBo->virt + currentPatch->PatchOffset)) =
1354                     boOffset + currentPatch->AllocationOffset;
1355         }
1356 
1357         if (scalaEnabled)
1358         {
1359             it = m_secondaryCmdBufs.begin();
1360             while(it != m_secondaryCmdBufs.end())
1361             {
1362                 if (it->second->OsResource.bo == tempCmdBo &&
1363                     it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE &&
1364                     !mos_bo_is_exec_object_async(alloc_bo))
1365                 {
1366                     skipSyncBoList.push_back(alloc_bo);
1367                     break;
1368                 }
1369                 it++;
1370             }
1371         }
1372         else if (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE &&
1373                  !mos_bo_is_exec_object_async(alloc_bo))
1374         {
1375             skipSyncBoList.push_back(alloc_bo);
1376         }
1377 
1378 #if (_DEBUG || _RELEASE_INTERNAL)
1379         {
1380             uint32_t evtData[] = {alloc_bo->handle, currentPatch->uiWriteOperation, currentPatch->AllocationOffset};
1381             MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_INFO,
1382                               evtData, sizeof(evtData),
1383                               &boOffset, sizeof(boOffset));
1384         }
1385 #endif
1386 
1387         if(mos_bo_is_softpin(alloc_bo))
1388         {
1389             if (alloc_bo != tempCmdBo)
1390             {
1391                 ret = mos_bo_add_softpin_target(tempCmdBo, alloc_bo, currentPatch->uiWriteOperation);
1392             }
1393         }
1394         else
1395         {
1396             // This call will patch the command buffer with the offsets of the indirect state region of the command buffer
1397             ret = mos_bo_emit_reloc(
1398                 tempCmdBo,                                                         // Command buffer
1399                 currentPatch->PatchOffset,                                         // Offset in the command buffer
1400                 alloc_bo,                                                          // Allocation object for which the patch will be made.
1401                 currentPatch->AllocationOffset,                                    // Offset to the indirect state
1402                 I915_GEM_DOMAIN_RENDER,                                            // Read domain
1403                 (currentPatch->uiWriteOperation) ? I915_GEM_DOMAIN_RENDER : 0x0,   // Write domain
1404                 boOffset);
1405         }
1406 
1407         if (ret != 0)
1408         {
1409             MOS_OS_ASSERTMESSAGE("Error patching alloc_bo = 0x%x, cmd_bo = 0x%x.",
1410                 (uintptr_t)alloc_bo,
1411                 (uintptr_t)tempCmdBo);
1412             return MOS_STATUS_UNKNOWN;
1413         }
1414     }
1415 
1416     for(auto res: mappedResList)
1417     {
1418         res->pGfxResourceNext->Unlock(m_osContext);
1419     }
1420     mappedResList.clear();
1421 
1422     if (scalaEnabled)
1423     {
1424          it = m_secondaryCmdBufs.begin();
1425          while(it != m_secondaryCmdBufs.end())
1426          {
1427              //Add Batch buffer End Command
1428              uint32_t batchBufferEndCmd = MI_BATCHBUFFER_END;
1429              if (MOS_FAILED(Mos_AddCommand(
1430                      it->second,
1431                      &batchBufferEndCmd,
1432                      sizeof(uint32_t))))
1433              {
1434                  MOS_OS_ASSERTMESSAGE("Inserting BB_END failed!");
1435                  return MOS_STATUS_UNKNOWN;
1436              }
1437              it++;
1438          }
1439     }
1440     else
1441     {
1442         //Add Batch buffer End Command
1443         uint32_t batchBufferEndCmd = MI_BATCHBUFFER_END;
1444         if (MOS_FAILED(Mos_AddCommand(
1445                 cmdBuffer,
1446                 &batchBufferEndCmd,
1447                 sizeof(uint32_t))))
1448         {
1449             MOS_OS_ASSERTMESSAGE("Inserting BB_END failed!");
1450             return MOS_STATUS_UNKNOWN;
1451         }
1452     }
1453     // dump before cmd buffer unmap
1454     MOS_TraceDumpExt("CmdBuffer", m_gpuContext, cmdBuffer->pCmdBase, cmdBuffer->iOffset);
1455 
1456     // Now, we can unmap the video command buffer, since we don't need CPU access anymore.
1457     MOS_OS_CHK_NULL_RETURN(cmdBuffer->OsResource.pGfxResourceNext);
1458 
1459     cmdBuffer->OsResource.pGfxResourceNext->Unlock(m_osContext);
1460 
1461     it = m_secondaryCmdBufs.begin();
1462     while(it != m_secondaryCmdBufs.end())
1463     {
1464         MOS_OS_CHK_NULL_RETURN(it->second->OsResource.pGfxResourceNext);
1465         it->second->OsResource.pGfxResourceNext->Unlock(m_osContext);
1466 
1467         it++;
1468     }
1469 
1470     int32_t perfData;
1471     if (perStreamParameters->pPerfData != nullptr)
1472     {
1473         perfData = *(int32_t *)(perStreamParameters->pPerfData);
1474     }
1475     else
1476     {
1477         perfData = 0;
1478     }
1479 
1480     drm_clip_rect_t *cliprects     = nullptr;
1481     int32_t          num_cliprects = 0;
1482     int32_t          DR4           = perStreamParameters->uEnablePerfTag ? perfData : 0;
1483 
1484     //Since CB2 command is not supported, remove it and set cliprects to nullprt as default.
1485     if ((gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2) &&
1486         (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_SINGLE_PIPE_MASK))
1487     {
1488         if (perStreamParameters->bKMDHasVCS2)
1489         {
1490             if (perStreamParameters->bPerCmdBufferBalancing)
1491             {
1492                 execFlag = GetVcsExecFlag(cmdBuffer, gpuNode);
1493             }
1494             else if (gpuNode == MOS_GPU_NODE_VIDEO)
1495             {
1496                 execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1497             }
1498             else if (gpuNode == MOS_GPU_NODE_VIDEO2)
1499             {
1500                 execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING2;
1501             }
1502         }
1503         else
1504         {
1505             execFlag = I915_EXEC_BSD | I915_EXEC_BSD_RING1;
1506         }
1507     }
1508 
1509 #if (_DEBUG || _RELEASE_INTERNAL)
1510 
1511     MOS_LINUX_BO *nop_cmd_bo = nullptr;
1512 
1513     if (nullRendering == true)
1514     {
1515         nop_cmd_bo = GetNopCommandBuffer(streamState);
1516 
1517         if (nop_cmd_bo)
1518         {
1519             ret = mos_bo_mrb_exec(nop_cmd_bo,
1520                 4096,
1521                 nullptr,
1522                 0,
1523                 0,
1524                 execFlag);
1525         }
1526         else
1527         {
1528             MOS_OS_ASSERTMESSAGE("Mos_GetNopCommandBuffer_Linux failed!");
1529         }
1530     }
1531 
1532 #endif  //(_DEBUG || _RELEASE_INTERNAL)
1533 
1534     if (gpuNode != I915_EXEC_RENDER &&
1535         streamState->osCpInterface->IsTearDownHappen())
1536     {
1537         // skip PAK command when CP tear down happen to avoid of GPU hang
1538         // conditonal batch buffer start PoC is in progress
1539     }
1540     else if (nullRendering == false)
1541     {
1542         UnlockPendingOcaBuffers(cmdBuffer, perStreamParameters);
1543         if (streamState->ctxBasedScheduling && m_i915Context[0] != nullptr)
1544         {
1545             if (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASK)
1546             {
1547                 if (scalaEnabled && !streamState->bParallelSubmission)
1548                 {
1549                     uint32_t secondaryIndex = 0;
1550                     it = m_secondaryCmdBufs.begin();
1551                     while(it != m_secondaryCmdBufs.end())
1552                     {
1553                         if (it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE)
1554                         {
1555                             if(execFlag == MOS_GPU_NODE_VE)
1556                             {
1557                                 // decode excluded since init in other place
1558                                 it->second->iSubmissionType |= (secondaryIndex << SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_SHIFT);
1559                                 secondaryIndex++;
1560                             }
1561                         }
1562                         ret = SubmitPipeCommands(it->second,
1563                                                  it->second->OsResource.bo,
1564                                                  perStreamParameters,
1565                                                  skipSyncBoList,
1566                                                  execFlag,
1567                                                  DR4);
1568                         it++;
1569                     }
1570                 }
1571                 else if(scalaEnabled && streamState->bParallelSubmission)
1572                 {
1573                     ret = ParallelSubmitCommands(m_secondaryCmdBufs,
1574                                          perStreamParameters,
1575                                          execFlag,
1576                                          DR4);
1577                 }
1578                 else
1579                 {
1580                     ret = SubmitPipeCommands(cmdBuffer,
1581                                              cmd_bo,
1582                                              perStreamParameters,
1583                                              skipSyncBoList,
1584                                              execFlag,
1585                                              DR4);
1586                 }
1587             }
1588             else
1589             {
1590                 ret = mos_bo_context_exec2(cmd_bo,
1591                     m_commandBufferSize,
1592                     m_i915Context[0],
1593                     cliprects,
1594                     num_cliprects,
1595                     DR4,
1596                     m_i915ExecFlag,
1597                     nullptr);
1598             }
1599         }
1600         else
1601         {
1602             ret = mos_bo_context_exec2(cmd_bo,
1603                 m_commandBufferSize,
1604                 perStreamParameters->intel_context,
1605                 cliprects,
1606                 num_cliprects,
1607                 DR4,
1608                 execFlag,
1609                 nullptr);
1610         }
1611         if (ret != 0)
1612         {
1613             eStatus = MOS_STATUS_UNKNOWN;
1614         }
1615     }
1616 
1617     if (eStatus != MOS_STATUS_SUCCESS)
1618     {
1619         MOS_OS_ASSERTMESSAGE("Command buffer submission failed!");
1620     }
1621 
1622     MosUtilDevUltSpecific::MOS_DEVULT_FuncCall(pfnUltGetCmdBuf, cmdBuffer);
1623 
1624 #if MOS_COMMAND_BUFFER_DUMP_SUPPORTED
1625 pthread_mutex_lock(&command_dump_mutex);
1626 if (streamState->dumpCommandBuffer)
1627     {
1628         if (scalaEnabled)
1629         {
1630             it = m_secondaryCmdBufs.begin();
1631             while(it != m_secondaryCmdBufs.end())
1632             {
1633                 mos_bo_map(it->second->OsResource.bo, 0);
1634                 MosInterface::DumpCommandBuffer(streamState, it->second);
1635                 mos_bo_unmap(it->second->OsResource.bo);
1636                 it++;
1637             }
1638         }
1639         else
1640         {
1641             mos_bo_map(cmd_bo, 0);
1642             MosInterface::DumpCommandBuffer(streamState, cmdBuffer);
1643             mos_bo_unmap(cmd_bo);
1644         }
1645     }
1646     pthread_mutex_unlock(&command_dump_mutex);
1647 #endif  // MOS_COMMAND_BUFFER_DUMP_SUPPORTED
1648 
1649 #if (_DEBUG || _RELEASE_INTERNAL)
1650     if (nop_cmd_bo)
1651     {
1652         mos_bo_unreference(nop_cmd_bo);
1653     }
1654 #endif  //(_DEBUG || _RELEASE_INTERNAL)
1655 
1656     //clear command buffer relocations to fix memory leak issue
1657     for (uint32_t patchIndex = 0; patchIndex < m_currentNumPatchLocations; patchIndex++)
1658     {
1659         auto currentPatch = &m_patchLocationList[patchIndex];
1660         MOS_OS_CHK_NULL_RETURN(currentPatch);
1661 
1662         if(currentPatch->cmdBo)
1663             mos_bo_clear_relocs(currentPatch->cmdBo, 0);
1664     }
1665 
1666     it = m_secondaryCmdBufs.begin();
1667     while(it != m_secondaryCmdBufs.end())
1668     {
1669         MOS_FreeMemory(it->second);
1670         it++;
1671     }
1672     m_secondaryCmdBufs.clear();
1673 
1674     skipSyncBoList.clear();
1675 
1676     // Reset resource allocation
1677     m_numAllocations = 0;
1678     MosUtilities::MosZeroMemory(m_allocationList, sizeof(ALLOCATION_LIST) * m_maxNumAllocations);
1679     m_currentNumPatchLocations = 0;
1680     MosUtilities::MosZeroMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * m_maxNumAllocations);
1681     m_resCount = 0;
1682 
1683     MosUtilities::MosZeroMemory(m_writeModeList, sizeof(bool) * m_maxNumAllocations);
1684 finish:
1685     MOS_TraceEventExt(EVENT_MOS_BATCH_SUBMIT, EVENT_TYPE_END, &eStatus, sizeof(eStatus), nullptr, 0);
1686     return eStatus;
1687 }
1688 
UnlockPendingOcaBuffers(PMOS_COMMAND_BUFFER cmdBuffer,PMOS_CONTEXT mosContext)1689 void GpuContextSpecificNext::UnlockPendingOcaBuffers(PMOS_COMMAND_BUFFER cmdBuffer, PMOS_CONTEXT mosContext)
1690 {
1691     MOS_OS_CHK_NULL_NO_STATUS_RETURN(cmdBuffer);
1692     MOS_OS_CHK_NULL_NO_STATUS_RETURN(mosContext);
1693     MosOcaInterface *pOcaInterface         = &MosOcaInterfaceSpecific::GetInstance();
1694     if (nullptr == pOcaInterface || !((MosOcaInterfaceSpecific*)pOcaInterface)->IsOcaEnabled())
1695     {
1696         // Will come here for UMD_OCA not being enabled case.
1697         return;
1698     }
1699 
1700     int count = 0;
1701     struct MOS_OCA_EXEC_LIST_INFO *info = nullptr;
1702     if ((cmdBuffer->iSubmissionType & SUBMISSION_TYPE_SINGLE_PIPE_MASK) && ((MosOcaInterfaceSpecific*)pOcaInterface)->IsOcaDumpExecListInfoEnabled())
1703     {
1704         info = mos_bo_get_softpin_targets_info(cmdBuffer->OsResource.bo, &count);
1705     }
1706 
1707     pOcaInterface->UnlockPendingOcaBuffers(mosContext, info, count);
1708 
1709     if(info)
1710     {
1711         free(info);
1712     }
1713 }
1714 
SubmitPipeCommands(MOS_COMMAND_BUFFER * cmdBuffer,MOS_LINUX_BO * cmdBo,PMOS_CONTEXT osContext,const std::vector<MOS_LINUX_BO * > & skipSyncBoList,uint32_t execFlag,int32_t dr4)1715 int32_t GpuContextSpecificNext::SubmitPipeCommands(
1716     MOS_COMMAND_BUFFER *cmdBuffer,
1717     MOS_LINUX_BO *cmdBo,
1718     PMOS_CONTEXT osContext,
1719     const std::vector<MOS_LINUX_BO *> &skipSyncBoList,
1720     uint32_t execFlag,
1721     int32_t dr4)
1722 {
1723     int32_t      ret        = 0;
1724     int          fence      = -1;
1725     unsigned int fence_flag = 0;
1726 
1727     MOS_LINUX_CONTEXT *queue = m_i915Context[0];
1728     bool isVeboxSubmission   = false;
1729 
1730     if (execFlag == MOS_GPU_NODE_VIDEO || execFlag == MOS_GPU_NODE_VIDEO2)
1731     {
1732         execFlag = I915_EXEC_DEFAULT;
1733     }
1734     if (execFlag == MOS_GPU_NODE_VE)
1735     {
1736         execFlag = I915_EXEC_DEFAULT;
1737         isVeboxSubmission = true;
1738     }
1739 
1740     if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE)
1741     {
1742         fence = osContext->submit_fence;
1743         fence_flag = I915_EXEC_FENCE_SUBMIT;
1744         int slave_index = (cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_MASK) >> SUBMISSION_TYPE_MULTI_PIPE_SLAVE_INDEX_SHIFT;
1745         if(slave_index < 7)
1746         {
1747             queue = m_i915Context[2 + slave_index]; //0 is for single pipe, 1 is for master, slave starts from 2
1748         }
1749         else
1750         {
1751             MOS_OS_ASSERTMESSAGE("slave_index value: %s is invalid!", slave_index);
1752             return -1;
1753         }
1754 
1755         if (isVeboxSubmission)
1756         {
1757             queue = m_i915Context[cmdBuffer->iVeboxNodeIndex + 1];
1758         }
1759 
1760         for(auto bo: skipSyncBoList)
1761         {
1762             mos_bo_set_exec_object_async(cmdBo, bo);
1763         }
1764     }
1765 
1766     if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)
1767     {
1768         //Only master pipe needs fence out flag
1769         fence_flag = I915_EXEC_FENCE_OUT;
1770         queue = m_i915Context[1];
1771     }
1772 
1773     ret = mos_bo_context_exec2(cmdBo,
1774                                   cmdBo->size,
1775                                   queue,
1776                                   nullptr,
1777                                   0,
1778                                   dr4,
1779                                   execFlag | fence_flag,
1780                                   &fence);
1781 
1782     if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)
1783     {
1784         osContext->submit_fence = fence;
1785     }
1786 
1787     if(cmdBuffer->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE)
1788     {
1789         close(fence);
1790     }
1791 
1792     return ret;
1793 }
1794 
ParallelSubmitCommands(std::map<uint32_t,PMOS_COMMAND_BUFFER> secondaryCmdBufs,PMOS_CONTEXT osContext,uint32_t execFlag,int32_t dr4)1795 int32_t GpuContextSpecificNext::ParallelSubmitCommands(
1796     std::map<uint32_t, PMOS_COMMAND_BUFFER> secondaryCmdBufs,
1797     PMOS_CONTEXT osContext,
1798     uint32_t execFlag,
1799     int32_t dr4)
1800 {
1801     int32_t      ret        = 0;
1802     int          fence      = -1;
1803     unsigned int fenceFlag  = 0;
1804     auto         it         = m_secondaryCmdBufs.begin();
1805     MOS_LINUX_BO *cmdBos[MAX_PARALLEN_CMD_BO_NUM];
1806     int          numBos     = 0; // exclude FE bo
1807 
1808     MOS_LINUX_CONTEXT *queue = m_i915Context[0];
1809     bool isVeboxSubmission   = false;
1810 
1811     if (execFlag == MOS_GPU_NODE_VIDEO || execFlag == MOS_GPU_NODE_VIDEO2)
1812     {
1813         execFlag = I915_EXEC_DEFAULT;
1814     }
1815     if (execFlag == MOS_GPU_NODE_VE)
1816     {
1817         execFlag = I915_EXEC_DEFAULT;
1818         isVeboxSubmission = true;
1819     }
1820 
1821     while(it != m_secondaryCmdBufs.end())
1822     {
1823         if(it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_ALONE)
1824         {
1825             fenceFlag = I915_EXEC_FENCE_OUT;
1826             queue = m_i915Context[0];
1827 
1828             ret = mos_bo_context_exec2(it->second->OsResource.bo,
1829                                   it->second->OsResource.bo->size,
1830                                   queue,
1831                                   nullptr,
1832                                   0,
1833                                   dr4,
1834                                   execFlag | fenceFlag,
1835                                   &fence);
1836 
1837             osContext->submit_fence = fence;
1838         }
1839 
1840         if((it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_MASTER)
1841             || (it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_SLAVE))
1842         {
1843             cmdBos[numBos++] = it->second->OsResource.bo;
1844 
1845             if(it->second->iSubmissionType & SUBMISSION_TYPE_MULTI_PIPE_FLAGS_LAST_PIPE)
1846             {
1847                 queue = m_i915Context[numBos - 1];
1848                 MOS_OS_CHK_NULL_RETURN(queue);
1849                 if(-1 != fence)
1850                 {
1851                     fenceFlag = I915_EXEC_FENCE_IN;
1852                 }
1853 
1854                 ret = mos_bo_context_exec3(cmdBos,
1855                                               numBos,
1856                                               queue,
1857                                               nullptr,
1858                                               0,
1859                                               dr4,
1860                                               execFlag | fenceFlag,
1861                                               &fence);
1862 
1863                 for(int i = 0; i < numBos; i++)
1864                 {
1865                     cmdBos[i] = nullptr;
1866                 }
1867                 numBos = 0;
1868 
1869                 if(-1 != fence)
1870                 {
1871                     close(fence);
1872                 }
1873             }
1874         }
1875 
1876         it++;
1877     }
1878 
1879     return ret;
1880 }
1881 
IncrementGpuStatusTag()1882 void GpuContextSpecificNext::IncrementGpuStatusTag()
1883 {
1884     m_GPUStatusTag = m_GPUStatusTag % UINT_MAX + 1;
1885     if (m_GPUStatusTag == 0)
1886     {
1887         m_GPUStatusTag = 1;
1888     }
1889 }
1890 
UpdatePriority(int32_t priority)1891 void GpuContextSpecificNext::UpdatePriority(int32_t priority)
1892 {
1893     if(m_currCtxPriority == priority)
1894     {
1895         return;
1896     }
1897 
1898     for (int32_t i=0; i<MAX_ENGINE_INSTANCE_NUM+1; i++)
1899     {
1900         if (m_i915Context[i] != nullptr)
1901         {
1902             int32_t ret = mos_set_context_param(m_i915Context[i], 0, I915_CONTEXT_PARAM_PRIORITY,(uint64_t)priority);
1903             if (ret != 0)
1904             {
1905                 MOS_OS_ASSERTMESSAGE("failed to set the gpu priority, errno is %d", ret);
1906                 break;
1907             }
1908         }
1909     }
1910     m_currCtxPriority = priority;
1911 }
1912 
ResetGpuContextStatus()1913 void GpuContextSpecificNext::ResetGpuContextStatus()
1914 {
1915     MosUtilities::MosZeroMemory(m_allocationList, sizeof(ALLOCATION_LIST) * ALLOCATIONLIST_SIZE);
1916     m_numAllocations = 0;
1917     MosUtilities::MosZeroMemory(m_patchLocationList, sizeof(PATCHLOCATIONLIST) * PATCHLOCATIONLIST_SIZE);
1918     m_currentNumPatchLocations = 0;
1919 
1920     MosUtilities::MosZeroMemory(m_attachedResources, sizeof(MOS_RESOURCE) * ALLOCATIONLIST_SIZE);
1921     m_resCount = 0;
1922 
1923     MosUtilities::MosZeroMemory(m_writeModeList, sizeof(bool) * ALLOCATIONLIST_SIZE);
1924 
1925     if ((m_cmdBufFlushed == true) && m_commandBuffer->OsResource.bo)
1926     {
1927         m_commandBuffer->OsResource.bo = nullptr;
1928     }
1929 }
1930 
AllocateGPUStatusBuf()1931 MOS_STATUS GpuContextSpecificNext::AllocateGPUStatusBuf()
1932 {
1933     MOS_OS_FUNCTION_ENTER;
1934 
1935     m_statusBufferResource = (PMOS_RESOURCE)MOS_AllocAndZeroMemory(sizeof(MOS_RESOURCE));
1936     MOS_OS_CHK_NULL_RETURN(m_statusBufferResource);
1937 
1938     GraphicsResourceNext::CreateParams params;
1939     params.m_tileType  = MOS_TILE_LINEAR;
1940     params.m_type      = MOS_GFXRES_BUFFER;
1941     params.m_format    = Format_Buffer;
1942     params.m_width     = sizeof(MOS_GPU_STATUS_DATA);
1943     params.m_height    = 1;
1944     params.m_depth     = 1;
1945     params.m_arraySize = 1;
1946     params.m_name      = "GPU Status Buffer";
1947 
1948     GraphicsResourceNext *graphicsResource = GraphicsResourceNext::CreateGraphicResource(GraphicsResourceNext::osSpecificResource);
1949     MOS_OS_CHK_NULL_RETURN(graphicsResource);
1950 
1951     MOS_OS_CHK_STATUS_RETURN(graphicsResource->Allocate(m_osContext, params));
1952 
1953     GraphicsResourceNext::LockParams lockParams;
1954     lockParams.m_writeRequest = true;
1955     auto gpuStatusData       = (MOS_GPU_STATUS_DATA *)graphicsResource->Lock(m_osContext, lockParams);
1956     if (gpuStatusData == nullptr)
1957     {
1958         MOS_OS_ASSERTMESSAGE("Unable to lock gpu eStatus buffer for read.");
1959         graphicsResource->Free(m_osContext);
1960         MOS_Delete(graphicsResource);
1961         return MOS_STATUS_UNKNOWN;
1962     }
1963 
1964     MOS_STATUS eStatus = graphicsResource->ConvertToMosResource(m_statusBufferResource);
1965     MOS_OS_CHK_STATUS_RETURN(eStatus);
1966 
1967     return MOS_STATUS_SUCCESS;
1968 }
1969 
GetOcaRTLogResource(PMOS_RESOURCE globalInst)1970 PMOS_RESOURCE GpuContextSpecificNext::GetOcaRTLogResource(PMOS_RESOURCE globalInst)
1971 {
1972     // OcaRTLogResources are shared w/ different video processors.
1973     // iAllocationIndex array in MOS_RESOURCE indexed by gpu_context type. When resource being accessed
1974     // in GpuContextSpecificNext::RegisterResource and Mos_Specific_GetResourceAllocationIndex w/ more
1975     // than 2 video processors, the value may be overwritten and wrong allocation Index in array may be used.
1976     // To avoid this, use duplicate MOS_RESOURCE instance in GPU Context to ensure differnt iAllocationIndex
1977     // array of OcaRTLogResources being used for different GPU Context.
1978     if (!m_ocaRtLogResInited && globalInst)
1979     {
1980         m_ocaRtLogResource = *globalInst;
1981         m_ocaRtLogResInited = true;
1982     }
1983     return &m_ocaRtLogResource;
1984 }
1985 
1986 #if (_DEBUG || _RELEASE_INTERNAL)
SelectEngineInstanceByUser(void * engine_map,uint32_t * engineNum,uint32_t userEngineInstance,MOS_GPU_NODE gpuNode)1987 bool GpuContextSpecificNext::SelectEngineInstanceByUser(void *engine_map,
1988         uint32_t *engineNum, uint32_t userEngineInstance, MOS_GPU_NODE gpuNode)
1989 {
1990     uint32_t engineInstance     = 0x0;
1991 
1992     if (userEngineInstance && m_osParameters)
1993     {
1994         if(gpuNode == MOS_GPU_NODE_COMPUTE)
1995         {
1996             engineInstance  = (userEngineInstance >> ENGINE_INSTANCE_SELECT_COMPUTE_INSTANCE_SHIFT)
1997                 & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum));
1998         }
1999         else if(gpuNode == MOS_GPU_NODE_VE)
2000         {
2001             engineInstance  = (userEngineInstance >> ENGINE_INSTANCE_SELECT_VEBOX_INSTANCE_SHIFT)
2002                 & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum));
2003         }
2004         else if(gpuNode == MOS_GPU_NODE_VIDEO || gpuNode == MOS_GPU_NODE_VIDEO2)
2005         {
2006             engineInstance  = (userEngineInstance >> ENGINE_INSTANCE_SELECT_VDBOX_INSTANCE_SHIFT)
2007                 & (ENGINE_INSTANCE_SELECT_ENABLE_MASK >> (MAX_ENGINE_INSTANCE_NUM - *engineNum));
2008         }
2009         else
2010         {
2011             MOS_OS_NORMALMESSAGE("Invalid gpu node in use.");
2012         }
2013 
2014         mos_select_fixed_engine(m_osParameters->bufmgr, engine_map, engineNum, engineInstance);
2015     }
2016 
2017     return engineInstance;
2018 }
2019 #endif
2020