xref: /aosp_15_r20/external/intel-media-driver/media_driver/agnostic/common/cm/cm_task_internal.cpp (revision ba62d9d3abf0e404f2022b4cd7a85e107f48596f)
1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_task_internal.cpp
24 //! \brief     Contains Class CmTaskInternal  definitions
25 //!
26 
27 #include "cm_task_internal.h"
28 
29 #include "cm_kernel_rt.h"
30 #include "cm_mem.h"
31 #include "cm_event_rt.h"
32 #include "cm_device_rt.h"
33 #include "cm_kernel_data.h"
34 #include "cm_thread_space_rt.h"
35 #include "cm_group_space.h"
36 #include "cm_vebox_rt.h"
37 #include "cm_vebox_data.h"
38 #include "cm_queue_rt.h"
39 #include "cm_surface_manager.h"
40 #include "cm_buffer_rt.h"
41 #include "cm_surface_2d_rt.h"
42 #include "cm_surface_2d_up_rt.h"
43 #include "cm_surface_3d_rt.h"
44 #include "cm_surface_vme.h"
45 #include "cm_surface_sampler.h"
46 #include "cm_surface_sampler8x8.h"
47 #include "mos_os_cp_interface_specific.h"
48 
49 namespace CMRT_UMD
50 {
51 //*-----------------------------------------------------------------------------
52 //| Purpose:    Create Task internal
53 //| Returns:    Result of the operation.
54 //*-----------------------------------------------------------------------------
Create(const uint32_t kernelCount,const uint32_t totalThreadCount,CmKernelRT * kernelArray[],const CmThreadSpaceRT * threadSpace,CmDeviceRT * device,const uint64_t syncBitmap,CmTaskInternal * & task,const uint64_t conditionalEndBitmap,PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo)55 int32_t CmTaskInternal::Create(const uint32_t kernelCount, const uint32_t totalThreadCount,
56                                CmKernelRT* kernelArray[], const CmThreadSpaceRT* threadSpace,
57                                CmDeviceRT* device, const uint64_t syncBitmap, CmTaskInternal*& task,
58                                const uint64_t conditionalEndBitmap,
59                                PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo)
60 {
61     int32_t result = CM_SUCCESS;
62     task = new (std::nothrow) CmTaskInternal(kernelCount, totalThreadCount, kernelArray, device,
63                                              syncBitmap, conditionalEndBitmap, conditionalEndInfo,
64                                              nullptr);
65     if( task )
66     {
67         result = task->Initialize(threadSpace, false);
68         if( result != CM_SUCCESS )
69         {
70             CmTaskInternal::Destroy( task);
71         }
72     }
73     else
74     {
75         CM_ASSERTMESSAGE("Error: Failed to create CmTaskInternal due to out of system memory.");
76         result = CM_OUT_OF_HOST_MEMORY;
77     }
78     return result;
79 }
80 
81 //*-----------------------------------------------------------------------------
82 //| Purpose:    Create Task internal with Thread Group Space
83 //| Returns:    Result of the operation.
84 //*-----------------------------------------------------------------------------
Create(const uint32_t kernelCount,const uint32_t totalThreadCount,CmKernelRT * kernelArray[],const CmThreadGroupSpace * threadGroupSpace,CmDeviceRT * device,const uint64_t syncBitmap,CmTaskInternal * & task,const uint64_t conditionalEndBitmap,PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,const CM_EXECUTION_CONFIG * krnExecCfg)85 int32_t CmTaskInternal::Create( const uint32_t kernelCount, const uint32_t totalThreadCount,
86                                CmKernelRT* kernelArray[], const CmThreadGroupSpace* threadGroupSpace,
87                                CmDeviceRT* device, const uint64_t syncBitmap, CmTaskInternal*& task,
88                                const uint64_t conditionalEndBitmap,
89                                PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,
90                                const CM_EXECUTION_CONFIG* krnExecCfg)
91 {
92     int32_t result = CM_SUCCESS;
93     task = new (std::nothrow) CmTaskInternal(kernelCount, totalThreadCount, kernelArray, device,
94                                              syncBitmap, conditionalEndBitmap, conditionalEndInfo,
95                                              krnExecCfg);
96 
97     if( task )
98     {
99         result = task->Initialize(threadGroupSpace);
100         if( result != CM_SUCCESS )
101         {
102             CmTaskInternal::Destroy( task);
103         }
104     }
105     else
106     {
107         CM_ASSERTMESSAGE("Error: Failed to create CmTaskInternal due to out of system memory.");
108         result = CM_OUT_OF_HOST_MEMORY;
109     }
110     return result;
111 }
112 
Create(CmDeviceRT * device,CmVeboxRT * vebox,CmTaskInternal * & task)113 int32_t CmTaskInternal::Create( CmDeviceRT* device, CmVeboxRT* vebox, CmTaskInternal*& task )
114 {
115     int32_t result = CM_SUCCESS;
116     task = new (std::nothrow) CmTaskInternal(0, 0, nullptr, device, CM_NO_KERNEL_SYNC,
117                                              CM_NO_CONDITIONAL_END, nullptr, nullptr);
118     if( task )
119     {
120         result = task->Initialize(vebox);
121         if( result != CM_SUCCESS )
122         {
123             CmTaskInternal::Destroy( task);
124         }
125     }
126     else
127     {
128         CM_ASSERTMESSAGE("Error: Failed to create CmTaskInternal due to out of system memory.");
129         result = CM_OUT_OF_HOST_MEMORY;
130     }
131     return result;
132 }
133 
134 //*-----------------------------------------------------------------------------
135 //| Purpose:    Create Task internal with hints
136 //| Returns:    Result of the operation.
137 //*-----------------------------------------------------------------------------
Create(const uint32_t kernelCount,const uint32_t totalThreadCount,CmKernelRT * kernelArray[],CmTaskInternal * & task,uint32_t numGeneratedTasks,bool isLastTask,uint32_t hints,CmDeviceRT * device)138 int32_t CmTaskInternal::Create(const uint32_t kernelCount, const uint32_t totalThreadCount,
139                                CmKernelRT* kernelArray[], CmTaskInternal*& task,
140                                uint32_t numGeneratedTasks, bool isLastTask, uint32_t hints,
141                                CmDeviceRT* device)
142 {
143     int32_t result = CM_SUCCESS;
144     task = new (std::nothrow) CmTaskInternal(kernelCount, totalThreadCount, kernelArray, device,
145                                              CM_NO_KERNEL_SYNC, CM_NO_CONDITIONAL_END, nullptr, nullptr);
146     if ( task )
147     {
148         result = task->Initialize(hints, numGeneratedTasks, isLastTask);
149         if ( result != CM_SUCCESS )
150         {
151             CmTaskInternal::Destroy( task );
152         }
153     }
154     else
155     {
156         CM_ASSERTMESSAGE("Error: Failed to create CmTaskInternal due to out of system memory.");
157         result = CM_OUT_OF_HOST_MEMORY;
158     }
159     return result;
160 }
161 
162 //*-----------------------------------------------------------------------------
163 //| Purpose:    Destroy Task internal
164 //| Returns:    None.
165 //*-----------------------------------------------------------------------------
Destroy(CmTaskInternal * & task)166 int32_t CmTaskInternal::Destroy( CmTaskInternal* &task )
167 {
168     CmSafeDelete( task );
169     return CM_SUCCESS;
170 }
171 
172 //*-----------------------------------------------------------------------------
173 //| Purpose:    Constructor of  CmTaskInternal
174 //| Returns:    None.
175 //*-----------------------------------------------------------------------------
CmTaskInternal(const uint32_t kernelCount,const uint32_t totalThreadCount,CmKernelRT * kernelArray[],CmDeviceRT * device,const uint64_t syncBitmap,const uint64_t conditionalEndBitmap,PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,const CM_EXECUTION_CONFIG * krnExecCfg)176 CmTaskInternal::CmTaskInternal(const uint32_t kernelCount, const uint32_t totalThreadCount,
177                                CmKernelRT* kernelArray[], CmDeviceRT* device,
178                                const uint64_t syncBitmap, const uint64_t conditionalEndBitmap,
179                                PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,
180                                const CM_EXECUTION_CONFIG* krnExecCfg) :
181     m_kernels( kernelCount ),
182     m_kernelData( kernelCount ),
183     m_kernelCount( kernelCount ),
184     m_totalThreadCount(totalThreadCount),
185     m_taskEvent( nullptr ),
186     m_isThreadSpaceCreated(false),
187     m_isThreadCoordinatesExisted(false),
188     m_threadSpaceWidth(0),
189     m_threadSpaceHeight(0),
190     m_threadSpaceDepth(0),
191     m_threadCoordinates(nullptr),
192     m_dependencyPattern(CM_NONE_DEPENDENCY),
193     m_walkingPattern(CM_WALK_DEFAULT),
194     m_mediaWalkerParamsSet( false ),
195     m_dependencyVectorsSet( false ),
196     m_dependencyMasks( nullptr ),
197     m_mediaWalkerGroupSelect(CM_MW_GROUP_NONE),
198     m_isThreadGroupSpaceCreated(false),
199     m_groupSpaceWidth(0),
200     m_groupSpaceHeight(0),
201     m_groupSpaceDepth(0),
202     m_slmSize(0),
203     m_spillMemUsed(0),
204     m_colorCountMinusOne( 0 ),
205     m_hints(0),
206     m_numTasksGenerated( 0 ),
207     m_isLastTask( false ),
208     m_ui64SyncBitmap (syncBitmap ),
209     m_ui64ConditionalEndBitmap(conditionalEndBitmap),
210     m_cmDevice( device ),
211     m_surfaceArray (nullptr),
212     m_isSurfaceUpdateDone(false),
213     m_taskType(CM_TASK_TYPE_DEFAULT),
214     m_mediaStatePtr( nullptr )
215 {
216     m_kernelSurfInfo.kernelNum = 0;
217     m_kernelSurfInfo.surfEntryInfosArray = nullptr;
218     m_kernelCurbeOffsetArray = MOS_NewArray(uint32_t, kernelCount);
219     CM_ASSERT(m_kernelCurbeOffsetArray != nullptr);
220 
221     for( uint32_t i = 0 ; i < kernelCount; i ++ )
222     {
223         m_kernels.SetElement( i, kernelArray[ i ] );
224         m_kernelData.SetElement( i, nullptr );
225     }
226 
227     CmSafeMemSet( &m_walkingParameters, 0, sizeof(m_walkingParameters));
228     CmSafeMemSet( &m_dependencyVectors, 0, sizeof(m_dependencyVectors));
229     CmSafeMemSet( &m_taskConfig, 0, sizeof(m_taskConfig));
230     if ( m_kernelCurbeOffsetArray != nullptr )
231     {
232         CmSafeMemSet( m_kernelCurbeOffsetArray, 0, sizeof(uint32_t) * kernelCount );
233     }
234 
235     CmSafeMemSet(&m_taskProfilingInfo, 0, sizeof(m_taskProfilingInfo));
236 
237     if (conditionalEndInfo != nullptr)
238     {
239         CmSafeMemCopy(&m_conditionalEndInfo, conditionalEndInfo, sizeof(m_conditionalEndInfo));
240     }
241     else
242     {
243         CmSafeMemSet(&m_conditionalEndInfo, 0, sizeof(m_conditionalEndInfo));
244     }
245 
246     CmSafeMemSet(&m_veboxParam, 0, sizeof(m_veboxParam));
247     CmSafeMemSet(&m_veboxState, 0, sizeof(m_veboxState));
248     CmSafeMemSet(&m_veboxSurfaceData, 0, sizeof(m_veboxSurfaceData));
249     CmSafeMemSet(&m_powerOption, 0, sizeof(m_powerOption));
250 
251     if (krnExecCfg != nullptr)
252     {
253         CmSafeMemCopy(&m_krnExecCfg, krnExecCfg, sizeof(m_krnExecCfg));
254     }
255 }
256 
257 //*-----------------------------------------------------------------------------
258 //| Purpose:    Destructor of  CmTaskInternal
259 //| Returns:    None.
260 //*-----------------------------------------------------------------------------
~CmTaskInternal(void)261 CmTaskInternal::~CmTaskInternal( void )
262 {
263 
264     //Write Event Infos
265     VtuneWriteEventInfo();
266 
267     //Release Profiling Info
268     VtuneReleaseProfilingInfo();
269 
270     for( uint32_t i = 0; i < m_kernelCount; i ++ )
271     {
272         CmKernelRT *kernel = (CmKernelRT*)m_kernels.GetElement(i);
273         CmKernelData* kernelData = (CmKernelData*)m_kernelData.GetElement( i );
274         if(kernel && kernelData)
275         {
276            kernel->ReleaseKernelData(kernelData);
277            CmKernel *kernelBase = kernel;
278            m_cmDevice->DestroyKernel(kernelBase);
279         }
280     }
281     m_kernelData.Delete();
282     m_kernels.Delete();
283 
284     MosSafeDeleteArray(m_kernelCurbeOffsetArray);
285 
286     if( m_taskEvent )
287     {
288         CmEvent *eventBase = m_taskEvent;
289         CmQueueRT *cmQueue = nullptr;
290         m_taskEvent->GetQueue(cmQueue);
291         if (cmQueue)
292         {
293             cmQueue->DestroyEvent(eventBase); // need to update the m_EventArray
294         }
295     }
296 
297     if(m_threadCoordinates){
298         for (uint32_t i=0; i<m_kernelCount; i++)
299         {
300             if (m_threadCoordinates[i])
301             {
302                 MosSafeDeleteArray(m_threadCoordinates[i]);
303             }
304         }
305         MosSafeDeleteArray( m_threadCoordinates );
306     }
307 
308     if( m_dependencyMasks )
309     {
310         for( uint32_t i = 0; i < m_kernelCount; ++i )
311         {
312             MosSafeDeleteArray(m_dependencyMasks[i]);
313         }
314         MosSafeDeleteArray( m_dependencyMasks );
315     }
316 
317     if((m_kernelSurfInfo.kernelNum != 0)&&(m_kernelSurfInfo.surfEntryInfosArray != nullptr))
318     {
319         ClearKernelSurfInfo();
320     }
321 
322     MosSafeDeleteArray(m_surfaceArray);
323 
324 }
325 
326 //*-----------------------------------------------------------------------------
327 //| Purpose:    Initialize Class  CmTaskInternal
328 //| Returns:    None.
329 //*-----------------------------------------------------------------------------
Initialize(const CmThreadSpaceRT * threadSpace,bool isWithHints)330 int32_t CmTaskInternal::Initialize(const CmThreadSpaceRT* threadSpace, bool isWithHints)
331 {
332     uint32_t totalCurbeSize             = 0;
333     uint32_t surfacePoolSize            = 0;
334     uint32_t totalKernelBinarySize      = 0;
335     uint32_t kernelCurbeSize            = 0;
336     uint32_t kernelPayloadSize          = 0;
337     CmSurfaceManager* surfaceMgr = nullptr;
338     int32_t result              = CM_SUCCESS;
339     CM_HAL_MAX_VALUES* halMaxValues = nullptr;
340     CM_HAL_MAX_VALUES_EX* halMaxValuesEx = nullptr;
341     m_cmDevice->GetHalMaxValues( halMaxValues, halMaxValuesEx );
342 
343     if (m_cmDevice->IsPrintEnable())
344     {
345         SurfaceIndex *printBufferIndex = nullptr;
346         m_cmDevice->GetPrintBufferIndex(printBufferIndex);
347         CM_ASSERT(printBufferIndex);
348         for (uint32_t i = 0; i < m_kernelCount; i++)
349         {
350             CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement(i);
351             if(kernel == nullptr)
352             {
353                 CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
354                 return CM_FAILURE;
355             }
356             if(FAILED(kernel->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex)))
357             {
358                 CM_ASSERTMESSAGE("Error: Failed to set static buffer.");
359                 return CM_FAILURE;
360             }
361         }
362     }
363 
364     m_cmDevice->GetSurfaceManager( surfaceMgr );
365     CM_CHK_NULL_RETURN_CMERROR(surfaceMgr);
366     surfacePoolSize = surfaceMgr->GetSurfacePoolSize();
367 
368     m_surfaceArray = MOS_NewArray(bool, surfacePoolSize);
369     if (!m_surfaceArray)
370     {
371         CM_ASSERTMESSAGE("Error: Out of system memory.");
372         return CM_FAILURE;
373     }
374     CmSafeMemSet( m_surfaceArray, 0, surfacePoolSize * sizeof( bool ) );
375 
376     for( uint32_t i = 0; i < m_kernelCount; i ++ )
377     {
378 
379         CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement( i );
380         if(kernel == nullptr)
381         {
382             CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
383             return CM_FAILURE;
384         }
385 
386         uint32_t totalSize =  0;
387         CmKernelData* kernelData = nullptr;
388 
389         if ( isWithHints )
390         {
391             CmThreadSpaceRT* kernelThreadSpace = nullptr;
392             kernel->GetThreadSpace(kernelThreadSpace);
393             if( kernelThreadSpace )
394             {
395                 for(uint32_t j = i; j > 0; --j)
396                 {
397                     uint32_t width, height, myAdjY;
398                     CmKernelRT* tmpKernel = (CmKernelRT*)m_kernels.GetElement( j-1 );
399                     if( !tmpKernel )
400                     {
401                         CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
402                         return CM_FAILURE;
403                     }
404                     tmpKernel->GetThreadSpace(kernelThreadSpace);
405                     kernelThreadSpace->GetThreadSpaceSize(width, height);
406                     myAdjY = kernel->GetAdjustedYCoord();
407                     kernel->SetAdjustedYCoord(myAdjY + height);
408                 }
409             }
410         }
411 
412         if (threadSpace == nullptr)
413         {
414             CmThreadSpaceRT* kernelThreadSpace = nullptr;
415             kernel->GetThreadSpace(kernelThreadSpace);
416             if (kernelThreadSpace)
417             {
418                 kernelThreadSpace->SetDependencyArgToKernel(kernel);
419             }
420         }
421 
422         if (threadSpace != nullptr)
423         {
424             threadSpace->SetDependencyArgToKernel(kernel);
425         }
426 
427         kernel->CollectKernelSurface();
428         result = kernel->CreateKernelData( kernelData, totalSize, threadSpace );
429         if( (kernelData == nullptr) || (result != CM_SUCCESS))
430         {
431             CM_ASSERTMESSAGE("Error: Failed to create kernel data.");
432             CmKernelData::Destroy( kernelData );
433             return result;
434         }
435 
436         kernel->GetSizeInPayload( kernelPayloadSize );
437         kernel->GetSizeInCurbe( kernelCurbeSize );
438 
439         if ( ( kernelCurbeSize + kernelPayloadSize ) > halMaxValues->maxArgByteSizePerKernel )
440         {   //Failed, exceed the maximum of inline data
441             CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
442             return CM_EXCEED_KERNEL_ARG_SIZE_IN_BYTE;
443         }
444         else
445         {
446             kernelCurbeSize = kernel->GetAlignedCurbeSize( kernelCurbeSize );
447             totalCurbeSize += kernelCurbeSize;
448         }
449         m_kernelCurbeOffsetArray[ i ] = totalCurbeSize - kernelCurbeSize;
450 
451         m_kernelData.SetElement( i, kernelData );
452 
453         totalKernelBinarySize += kernel->GetKernelGenxBinarySize();
454         totalKernelBinarySize += CM_KERNEL_BINARY_PADDING_SIZE;  //Padding is necessary after kernel binary to avoid page fault issue
455 
456         bool *surfArray = nullptr;
457         kernel->GetKernelSurfaces(surfArray);
458         for (uint32_t j = 0; j < surfacePoolSize; j ++)
459         {
460             m_surfaceArray[j] |= surfArray[j];
461         }
462         kernel->ResetKernelSurfaces();
463 
464         PCM_CONTEXT_DATA cmData = ( PCM_CONTEXT_DATA )m_cmDevice->GetAccelData();
465         PCM_HAL_STATE state = cmData->cmHalState;
466         PRENDERHAL_MEDIA_STATE mediaStatePtr = state->pfnGetMediaStatePtrForKernel( state, kernel );
467 
468         if ( ( mediaStatePtr != nullptr ) && ( m_mediaStatePtr == nullptr ) )
469         {
470             m_mediaStatePtr = mediaStatePtr;
471         }
472         else if ( ( mediaStatePtr != nullptr ) && ( m_mediaStatePtr != nullptr ) )
473         {
474             CM_ASSERTMESSAGE( "Error: More than one media state heap are used in one task! User-provided state heap error.\n" );
475             return CM_INVALID_ARG_VALUE;
476         }
477     }
478 
479     if (totalKernelBinarySize > halMaxValues->maxKernelBinarySize * halMaxValues->maxKernelsPerTask)
480     {
481         CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
482         return CM_EXCEED_MAX_KERNEL_SIZE_IN_BYTE;
483     }
484 
485     if (threadSpace)
486     {
487         if(FAILED(this->CreateThreadSpaceData(threadSpace)))
488         {
489             CM_ASSERTMESSAGE("Error: Failed to create thread space data.");
490             return CM_FAILURE;
491         }
492         m_isThreadSpaceCreated = true;
493     }
494 
495     UpdateSurfaceStateOnTaskCreation();
496 
497     m_taskType = CM_INTERNAL_TASK_WITH_THREADSPACE;
498 
499     if ( m_cmDevice->CheckGTPinEnabled())
500     {
501         AllocateKernelSurfInfo();
502     }
503 
504     this->VtuneInitProfilingInfo(threadSpace);
505 
506     return CM_SUCCESS;
507 }
508 
509 //*-----------------------------------------------------------------------------
510 //| Purpose:    Initialize Class  CmTaskInternal with thread group space
511 //| Returns:    None.
512 //*-----------------------------------------------------------------------------
Initialize(const CmThreadGroupSpace * threadGroupSpace)513 int32_t CmTaskInternal::Initialize(const CmThreadGroupSpace* threadGroupSpace)
514 {
515     uint32_t totalCurbeSize         = 0;
516     uint32_t surfacePoolSize        = 0;
517     uint32_t totalKernelBinarySize  = 0;
518     uint32_t kernelCurbeSize        = 0;
519     uint32_t kernelPayloadSize      = 0;
520 
521     CmSurfaceManager* surfaceMgr = nullptr;
522     CM_HAL_MAX_VALUES* halMaxValues = nullptr;
523     CM_HAL_MAX_VALUES_EX* halMaxValuesEx = nullptr;
524     m_cmDevice->GetHalMaxValues( halMaxValues, halMaxValuesEx );
525 
526     m_cmDevice->GetSurfaceManager( surfaceMgr );
527     CM_CHK_NULL_RETURN_CMERROR( surfaceMgr );
528     surfacePoolSize = surfaceMgr->GetSurfacePoolSize();
529     m_surfaceArray = MOS_NewArray(bool, surfacePoolSize);
530     if (!m_surfaceArray)
531     {
532         CM_ASSERTMESSAGE("Error: Out of system memory.");
533         return CM_OUT_OF_HOST_MEMORY;
534     }
535     CmSafeMemSet( m_surfaceArray, 0, surfacePoolSize * sizeof( bool ) );
536 
537     if (m_cmDevice->IsPrintEnable())
538     {
539         SurfaceIndex *printBufferIndex = nullptr;
540         m_cmDevice->GetPrintBufferIndex(printBufferIndex);
541         CM_ASSERT(printBufferIndex);
542         for (uint32_t i = 0; i < m_kernelCount; i++)
543         {
544             CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement(i);
545             if(kernel == nullptr)
546             {
547                 CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
548                 return CM_FAILURE;
549             }
550             if(FAILED(kernel->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex)))
551             {
552                 CM_ASSERTMESSAGE("Error: Failed to set static buffer.");
553                 return CM_FAILURE;
554             }
555         }
556     }
557 
558     for( uint32_t i = 0; i < m_kernelCount; i ++ )
559     {
560         CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement( i );
561         if(kernel == nullptr)
562         {
563             CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
564             return CM_FAILURE;
565         }
566 
567         kernel->CollectKernelSurface();
568 
569         uint32_t totalSize =  0;
570         CmKernelData* kernelData = nullptr;
571 
572         int32_t result = kernel->CreateKernelData( kernelData, totalSize, threadGroupSpace );
573         if(result != CM_SUCCESS)
574         {
575             CM_ASSERTMESSAGE("Error: Failed to create kernel data.");
576             CmKernelData::Destroy( kernelData );
577             return result;
578         }
579 
580         kernelData->SetKernelDataSize(totalSize);
581 
582         kernel->GetSizeInPayload(kernelPayloadSize);
583 
584         PCM_HAL_KERNEL_PARAM  halKernelParam = kernelData->GetHalCmKernelData();
585         if (halKernelParam->crossThreadConstDataLen + halKernelParam->curbeSizePerThread + kernelPayloadSize
586             > halMaxValues->maxArgByteSizePerKernel)
587         {   //Failed, exceed the maximum of inline data
588             CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
589             return CM_EXCEED_KERNEL_ARG_SIZE_IN_BYTE;
590         }
591         else
592         {
593             kernel->GetSizeInCurbe(kernelCurbeSize);
594             kernelCurbeSize = kernel->GetAlignedCurbeSize(kernelCurbeSize);
595             totalCurbeSize += kernelCurbeSize;
596         }
597 
598         m_kernelCurbeOffsetArray[ i ] = totalCurbeSize - kernelCurbeSize;
599 
600         m_kernelData.SetElement( i, kernelData );
601 
602         m_slmSize = kernel->GetSLMSize();
603 
604         m_spillMemUsed = kernel->GetSpillMemUsed();
605 
606         totalKernelBinarySize += kernel->GetKernelGenxBinarySize();
607         totalKernelBinarySize += CM_KERNEL_BINARY_PADDING_SIZE;
608 
609         bool *surfArray = nullptr;
610         kernel->GetKernelSurfaces(surfArray);
611         for (uint32_t j = 0; j < surfacePoolSize; j ++)
612         {
613             m_surfaceArray[j] |= surfArray[j];
614         }
615         kernel->ResetKernelSurfaces();
616 
617         PCM_CONTEXT_DATA cmData = ( PCM_CONTEXT_DATA )m_cmDevice->GetAccelData();
618         PCM_HAL_STATE state = cmData->cmHalState;
619         PRENDERHAL_MEDIA_STATE mediaStatePtr = state->pfnGetMediaStatePtrForKernel( state, kernel );
620 
621         if ( ( mediaStatePtr != nullptr ) && ( m_mediaStatePtr == nullptr ) )
622         {
623             m_mediaStatePtr = mediaStatePtr;
624         }
625         else if ( ( mediaStatePtr != nullptr ) && ( m_mediaStatePtr != nullptr ) )
626         {
627             CM_ASSERTMESSAGE("Error: More than one media state heap are used in one task! User-provided state heap error.\n" );
628             return CM_INVALID_ARG_VALUE;
629         }
630     }
631 
632     if( totalKernelBinarySize > halMaxValues->maxKernelBinarySize * halMaxValues->maxKernelsPerTask)
633     {
634         CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
635         return CM_EXCEED_MAX_KERNEL_SIZE_IN_BYTE;
636     }
637 
638     UpdateSurfaceStateOnTaskCreation();
639 
640     m_taskType = CM_INTERNAL_TASK_WITH_THREADGROUPSPACE;
641 
642     if (threadGroupSpace)
643     {
644         threadGroupSpace->GetThreadGroupSpaceSize(m_threadSpaceWidth, m_threadSpaceHeight,
645                                                   m_threadSpaceDepth, m_groupSpaceWidth,
646                                                   m_groupSpaceHeight, m_groupSpaceDepth);
647         m_isThreadGroupSpaceCreated = true;
648     }
649 
650     if ( m_cmDevice->CheckGTPinEnabled())
651     {
652         AllocateKernelSurfInfo();
653     }
654 
655     this->VtuneInitProfilingInfo(threadGroupSpace);
656 
657     return CM_SUCCESS;
658 }
659 
660 //*-----------------------------------------------------------------------------
661 //| Purpose:    Initialize Class  CmTaskInternal
662 //| Returns:    None.
663 //*-----------------------------------------------------------------------------
Initialize(CmVeboxRT * vebox)664 int32_t CmTaskInternal::Initialize(CmVeboxRT* vebox)
665 {
666     int32_t result = CM_SUCCESS;
667     CmSurfaceManager* surfaceMgr = nullptr;
668     uint32_t surfacePoolSize = 0;
669 
670     m_cmDevice->GetSurfaceManager( surfaceMgr );
671     CM_CHK_NULL_RETURN_CMERROR( surfaceMgr );
672     surfacePoolSize = surfaceMgr->GetSurfacePoolSize();
673     m_surfaceArray = MOS_NewArray(bool, surfacePoolSize);
674     if (!m_surfaceArray)
675     {
676         CM_ASSERTMESSAGE("Error: Out of system memory.");
677         return CM_FAILURE;
678     }
679     CmSafeMemSet( m_surfaceArray, 0, surfacePoolSize * sizeof( bool ) );
680 
681     CmBufferUP *paramBuffer = nullptr;
682 
683     paramBuffer = vebox->GetParam();
684     m_veboxState = vebox->GetState();
685 
686     m_veboxParam = paramBuffer;
687     m_taskType = CM_INTERNAL_TASK_VEBOX;
688 
689     //Update used surfaces
690     for (int i = 0; i < VEBOX_SURFACE_NUMBER; i++)
691     {
692         CmSurface2DRT* surf = nullptr;
693         vebox->GetSurface(i, surf);
694         if (surf)
695         {
696             uint32_t surfaceHandle = 0;
697             SurfaceIndex* surfIndex = nullptr;
698             surf->GetIndex(surfIndex);
699             surf->GetHandle(surfaceHandle);
700             m_surfaceArray[surfIndex->get_data()] = true;
701             m_veboxSurfaceData.surfaceEntry[i].surfaceIndex = (uint16_t)surfaceHandle;
702             m_veboxSurfaceData.surfaceEntry[i].surfaceCtrlBits = vebox->GetSurfaceControlBits(i);
703         }
704         else
705         {
706             m_veboxSurfaceData.surfaceEntry[i].surfaceIndex = CM_INVALID_INDEX;
707             m_veboxSurfaceData.surfaceEntry[i].surfaceCtrlBits = CM_INVALID_INDEX;
708         }
709     }
710 
711     UpdateSurfaceStateOnTaskCreation();
712 
713     return result;
714 }
715 
716 //*-----------------------------------------------------------------------------
717 //| Purpose:    Initialize Class  CmTaskInternal with hints
718 //| Returns:    Result of the operation
719 //*-----------------------------------------------------------------------------
Initialize(uint32_t hints,uint32_t numTasksGenerated,bool isLastTask)720 int32_t CmTaskInternal::Initialize(uint32_t hints, uint32_t numTasksGenerated, bool isLastTask)
721 {
722     CmThreadSpaceRT* threadSpace = nullptr;
723     int32_t result = CM_SUCCESS;
724 
725     // use ThreadSpace Initialize function to create kernel data
726     result = this->Initialize(threadSpace, true);
727 
728     // set hints in task
729     m_hints = hints;
730 
731     m_numTasksGenerated = numTasksGenerated;
732     m_isLastTask = isLastTask;
733 
734     // set task type to be EnqueueWithHints
735     m_taskType = CM_INTERNAL_TASK_ENQUEUEWITHHINTS;
736 
737     return result;
738 }
739 
740 //*-----------------------------------------------------------------------------
741 //| Purpose:    Get Kernel Count
742 //| Returns:    CM_SUCCESS.
743 //*-----------------------------------------------------------------------------
GetKernelCount(uint32_t & count)744 int32_t CmTaskInternal::GetKernelCount( uint32_t& count )
745 {
746     count = m_kernelCount;
747     return CM_SUCCESS;
748 }
749 
GetTaskSurfaces(bool * & surfArray)750 int32_t CmTaskInternal::GetTaskSurfaces( bool  *&surfArray )
751 {
752     surfArray = m_surfaceArray;
753     return CM_SUCCESS;
754 }
755 
756 //*-----------------------------------------------------------------------------
757 //| Purpose:    Geth Kernel from the Kernel array
758 //| Returns:    Result of operation.
759 //*-----------------------------------------------------------------------------
GetKernel(const uint32_t index,CmKernelRT * & kernel)760 int32_t CmTaskInternal::GetKernel( const uint32_t index, CmKernelRT* & kernel )
761 {
762     kernel = nullptr;
763     if( index < m_kernels.GetSize() )
764     {
765         kernel = (CmKernelRT*)m_kernels.GetElement( index );
766         return CM_SUCCESS;
767     }
768     else
769     {
770         return CM_FAILURE;
771     }
772 }
773 
774 //*-----------------------------------------------------------------------------
775 //| Purpose:    Geth Kernel data by kernel's index
776 //| Returns:    Result of operation.
777 //*-----------------------------------------------------------------------------
GetKernelData(const uint32_t index,CmKernelData * & kernelData)778 int32_t CmTaskInternal::GetKernelData( const uint32_t index, CmKernelData* & kernelData )
779 {
780     kernelData = nullptr;
781     if( index < m_kernelData.GetSize() )
782     {
783         kernelData = (CmKernelData*)m_kernelData.GetElement( index );
784         return CM_SUCCESS;
785     }
786     else
787     {
788         return CM_FAILURE;
789     }
790 }
791 
792 //*-----------------------------------------------------------------------------
793 //| Purpose:    Geth Kernel data size by kernel's index
794 //| Returns:    Result of operation.
795 //*-----------------------------------------------------------------------------
GetKernelDataSize(const uint32_t index,uint32_t & size)796 int32_t CmTaskInternal::GetKernelDataSize( const uint32_t index, uint32_t & size )
797 {
798     size = 0;
799     CmKernelData*  kernelData = nullptr;
800     if( index < m_kernelData.GetSize() )
801     {
802         kernelData = (CmKernelData*)m_kernelData.GetElement( index );
803         if (kernelData == nullptr)
804         {
805             CM_ASSERTMESSAGE("Error: Invalid kernel data.");
806             return CM_FAILURE;
807         }
808         size = kernelData->GetKernelDataSize();
809         return CM_SUCCESS;
810     }
811     else
812     {
813         return CM_FAILURE;
814     }
815 }
816 
817 //*-----------------------------------------------------------------------------
818 //| Purpose:    Get kernel's curbe offset
819 //| Returns:    Result of operation.
820 //*-----------------------------------------------------------------------------
GetKernelCurbeOffset(const uint32_t index)821 uint32_t CmTaskInternal::GetKernelCurbeOffset( const uint32_t index )
822 {
823     return ( uint32_t ) m_kernelCurbeOffsetArray[ index ];
824 }
825 
826 //*-----------------------------------------------------------------------------
827 //| Purpose:    Set task event, need add refcount hehe.
828 //| Returns:    Result of operation.
829 //*-----------------------------------------------------------------------------
SetTaskEvent(CmEventRT * event)830 int32_t CmTaskInternal::SetTaskEvent( CmEventRT* event )
831 {
832     m_taskEvent = event;
833     // add refCount
834      m_taskEvent->Acquire();
835     return CM_SUCCESS;
836 }
837 
838 //*-----------------------------------------------------------------------------
839 //| Purpose:    Get the task event
840 //| Returns:    Result of operation.
841 //*-----------------------------------------------------------------------------
GetTaskEvent(CmEventRT * & event)842 int32_t CmTaskInternal::GetTaskEvent( CmEventRT* & event )
843 {
844     event = m_taskEvent;
845     return CM_SUCCESS;
846 }
847 
848 //*-----------------------------------------------------------------------------
849 //| Purpose:    Get the task's status
850 //| Returns:    Result of operation.
851 //*-----------------------------------------------------------------------------
GetTaskStatus(CM_STATUS & taskStatus)852 int32_t CmTaskInternal::GetTaskStatus(CM_STATUS & taskStatus)
853 {
854     if(m_taskEvent == nullptr)
855     {
856         return CM_FAILURE;
857     }
858 
859     return m_taskEvent->GetStatusNoFlush(taskStatus);
860 }
861 
862 //*-----------------------------------------------------------------------------
863 //| Purpose:    Record CPU ticks for Flush Time
864 //| Returns:    Result of operation.
865 //*-----------------------------------------------------------------------------
VtuneSetFlushTime()866 int32_t CmTaskInternal::VtuneSetFlushTime()
867 {
868     if(!m_cmDevice->IsVtuneLogOn())
869     {   // return directly if ETW log is off
870         return CM_SUCCESS;
871     }
872 
873     MosUtilities::MosQueryPerformanceCounter((uint64_t*)&m_taskProfilingInfo.flushTime.QuadPart);
874     return CM_SUCCESS;
875 }
876 
877 //*-----------------------------------------------------------------------------
878 //| Purpose:    Initialize Profiling Information for Media Pipeline
879 //| Returns:    Result of operation.
880 //*-----------------------------------------------------------------------------
VtuneInitProfilingInfo(const CmThreadSpaceRT * perTaskThreadSpace)881 int32_t CmTaskInternal::VtuneInitProfilingInfo(const CmThreadSpaceRT *perTaskThreadSpace)
882 {
883     CmKernelRT    *cmKernel = nullptr;
884     CmThreadSpaceRT *perKernelThreadSpace = nullptr;
885     uint32_t    threadSpaceWidth = 0;
886     uint32_t    threadSpaceHeight = 0;
887 
888     int32_t     hr = CM_SUCCESS;
889 
890     if(!m_cmDevice->IsVtuneLogOn())
891     {   // return directly if ETW log is off
892         return CM_SUCCESS;
893     }
894 
895     CmSafeMemSet(&m_taskProfilingInfo, 0, sizeof(m_taskProfilingInfo));
896     m_taskProfilingInfo.kernelCount = m_kernelCount;
897     m_taskProfilingInfo.threadID    = CmGetCurThreadId(); // Get Thread ID
898 
899     MosUtilities::MosQueryPerformanceCounter((uint64_t*)&m_taskProfilingInfo.enqueueTime.QuadPart); // Get Enqueue Time
900 
901     //  Currently, the Kernel/ThreadSpace/ThreadGroupSpace could not be deleted before task finished.
902     m_taskProfilingInfo.kernelNames = MOS_NewArray(char, (CM_MAX_KERNEL_NAME_SIZE_IN_BYTE * m_kernelCount));
903     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.kernelNames);
904 
905     m_taskProfilingInfo.localWorkWidth = MOS_NewArray(uint32_t, m_kernelCount);
906     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.localWorkWidth);
907 
908     m_taskProfilingInfo.localWorkHeight = MOS_NewArray(uint32_t, m_kernelCount);
909     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.localWorkHeight);
910 
911     m_taskProfilingInfo.globalWorkWidth = MOS_NewArray(uint32_t, m_kernelCount);
912     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.globalWorkWidth);
913 
914     m_taskProfilingInfo.globalWorkHeight = MOS_NewArray(uint32_t, m_kernelCount);
915     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.globalWorkHeight);
916 
917     for (uint32_t i = 0; i < m_kernelCount; i++)
918     {
919         CM_CHK_CMSTATUS_GOTOFINISH(GetKernel(i, cmKernel));
920         CM_CHK_NULL_GOTOFINISH_CMERROR(cmKernel);
921 
922         //Copy Kernel Name
923         MOS_SecureStrcpy(m_taskProfilingInfo.kernelNames + m_taskProfilingInfo.kernelNameLen,
924                  CM_MAX_KERNEL_NAME_SIZE_IN_BYTE, cmKernel->GetName());
925 
926         //Add Kernel Name Length
927         m_taskProfilingInfo.kernelNameLen += strlen(cmKernel->GetName()) + 1;
928 
929         CM_CHK_CMSTATUS_GOTOFINISH(cmKernel->GetThreadSpace(perKernelThreadSpace));
930 
931         if (perTaskThreadSpace)
932         {
933             //Per Task Thread Space Exists
934             m_taskProfilingInfo.localWorkWidth[i] = m_threadSpaceWidth;
935             m_taskProfilingInfo.localWorkHeight[i] = m_threadSpaceHeight;
936             m_taskProfilingInfo.globalWorkWidth[i] = m_threadSpaceWidth;
937             m_taskProfilingInfo.globalWorkHeight[i] = m_threadSpaceHeight;
938         }
939         else if (perKernelThreadSpace)
940         {
941             //Fill each threads Space's info
942             perKernelThreadSpace->GetThreadSpaceSize(threadSpaceWidth, threadSpaceHeight);
943             m_taskProfilingInfo.localWorkWidth[i] = threadSpaceWidth;
944             m_taskProfilingInfo.localWorkHeight[i] = threadSpaceHeight;
945             m_taskProfilingInfo.globalWorkWidth[i] = threadSpaceWidth;
946             m_taskProfilingInfo.globalWorkHeight[i] = threadSpaceHeight;
947         }
948         else
949         {
950             //Fill the thread count
951             uint32_t threadCount = 0;
952             cmKernel->GetThreadCount(threadCount);
953             m_taskProfilingInfo.localWorkWidth[i] = threadCount;
954             m_taskProfilingInfo.localWorkHeight[i] = 1;
955             m_taskProfilingInfo.globalWorkWidth[i] = threadCount;
956             m_taskProfilingInfo.globalWorkHeight[i] = 1;
957         }
958 
959     }
960 
961 finish:
962     if (hr != CM_SUCCESS)
963     {
964         MosSafeDeleteArray(m_taskProfilingInfo.kernelNames);
965         MosSafeDeleteArray(m_taskProfilingInfo.localWorkWidth);
966         MosSafeDeleteArray(m_taskProfilingInfo.localWorkHeight);
967         MosSafeDeleteArray(m_taskProfilingInfo.globalWorkWidth);
968         MosSafeDeleteArray(m_taskProfilingInfo.globalWorkHeight);
969     }
970     return hr;
971 
972 }
973 
974 //*-----------------------------------------------------------------------------
975 //| Purpose:    Initialize Profiling Information
976 //| Returns:    Result of operation.
977 //*-----------------------------------------------------------------------------
VtuneInitProfilingInfo(const CmThreadGroupSpace * perTaskThreadGroupSpace)978 int32_t CmTaskInternal::VtuneInitProfilingInfo(const CmThreadGroupSpace *perTaskThreadGroupSpace)
979 {
980     CmKernelRT    *cmKernel = nullptr;
981     CmThreadGroupSpace *perKernelGroupSpace = nullptr;
982     uint32_t    threadSpaceWidth = 0;
983     uint32_t    threadSpaceHeight = 0;
984     uint32_t    threadSpaceDepth = 0;
985     uint32_t    threadGroupSpaceWidth = 0;
986     uint32_t    threadGroupSpaceHeight = 0;
987     uint32_t    threadGroupSpaceDepth = 0;
988     int32_t     hr = CM_SUCCESS;
989 
990     if(!m_cmDevice->IsVtuneLogOn())
991     {   // return directly if ETW log is off
992         return CM_SUCCESS;
993     }
994 
995     CmSafeMemSet(&m_taskProfilingInfo, 0, sizeof(m_taskProfilingInfo));
996     m_taskProfilingInfo.kernelCount = m_kernelCount;
997 
998     m_taskProfilingInfo.threadID    = CmGetCurThreadId(); // Get Thread ID
999 
1000     MosUtilities::MosQueryPerformanceCounter((uint64_t*)&m_taskProfilingInfo.enqueueTime.QuadPart); // Get Enqueue Time
1001 
1002     m_taskProfilingInfo.kernelNames = MOS_NewArray(char, (CM_MAX_KERNEL_NAME_SIZE_IN_BYTE * m_kernelCount));
1003     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.kernelNames);
1004 
1005     m_taskProfilingInfo.localWorkWidth = MOS_NewArray(uint32_t, m_kernelCount);
1006     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.localWorkWidth);
1007 
1008     m_taskProfilingInfo.localWorkHeight = MOS_NewArray(uint32_t, m_kernelCount);
1009     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.localWorkHeight);
1010 
1011     m_taskProfilingInfo.globalWorkWidth = MOS_NewArray(uint32_t, m_kernelCount);
1012     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.globalWorkWidth);
1013 
1014     m_taskProfilingInfo.globalWorkHeight = MOS_NewArray(uint32_t, m_kernelCount);
1015     CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.globalWorkHeight);
1016 
1017     for (uint32_t i = 0; i < m_kernelCount; i++)
1018     {
1019         CM_CHK_CMSTATUS_GOTOFINISH(GetKernel(i, cmKernel));
1020         CM_CHK_NULL_GOTOFINISH_CMERROR(cmKernel);
1021 
1022         //Copy Kernel Name
1023         MOS_SecureStrcpy(m_taskProfilingInfo.kernelNames + m_taskProfilingInfo.kernelNameLen,
1024                  CM_MAX_KERNEL_NAME_SIZE_IN_BYTE, cmKernel->GetName());
1025 
1026         //Add Kernel Name Length
1027         m_taskProfilingInfo.kernelNameLen += strlen(cmKernel->GetName()) + 1;
1028 
1029         CM_CHK_CMSTATUS_GOTOFINISH(cmKernel->GetThreadGroupSpace(perKernelGroupSpace));
1030 
1031         if (perTaskThreadGroupSpace)
1032         {  // Per Thread Group Space
1033             perTaskThreadGroupSpace->GetThreadGroupSpaceSize(threadSpaceWidth, threadSpaceHeight,
1034                                                              threadSpaceDepth, threadGroupSpaceWidth,
1035                                                              threadGroupSpaceHeight, threadGroupSpaceDepth);
1036             m_taskProfilingInfo.localWorkWidth[i] = threadSpaceWidth;
1037             m_taskProfilingInfo.localWorkHeight[i] = threadSpaceHeight;
1038             m_taskProfilingInfo.globalWorkWidth[i] = threadSpaceWidth*threadGroupSpaceWidth;
1039             m_taskProfilingInfo.globalWorkHeight[i] = threadSpaceHeight*threadGroupSpaceHeight;
1040 
1041         }
1042         else if (perKernelGroupSpace)
1043         {
1044             //Fill each threads group space's info
1045             perKernelGroupSpace->GetThreadGroupSpaceSize(threadSpaceWidth, threadSpaceHeight,
1046                                                          threadSpaceDepth, threadGroupSpaceWidth,
1047                                                          threadGroupSpaceHeight, threadGroupSpaceDepth);
1048             m_taskProfilingInfo.localWorkWidth[i] = threadSpaceWidth;
1049             m_taskProfilingInfo.localWorkHeight[i] = threadSpaceHeight;
1050             m_taskProfilingInfo.globalWorkWidth[i] = threadSpaceWidth*threadGroupSpaceWidth;
1051             m_taskProfilingInfo.globalWorkHeight[i] = threadSpaceHeight*threadGroupSpaceHeight;  //Yi need to rethink
1052         }
1053 
1054     }
1055 
1056 finish:
1057     if (hr != CM_SUCCESS)
1058     {
1059         MosSafeDeleteArray(m_taskProfilingInfo.kernelNames);
1060         MosSafeDeleteArray(m_taskProfilingInfo.localWorkWidth);
1061         MosSafeDeleteArray(m_taskProfilingInfo.localWorkHeight);
1062         MosSafeDeleteArray(m_taskProfilingInfo.globalWorkWidth);
1063         MosSafeDeleteArray(m_taskProfilingInfo.globalWorkHeight);
1064     }
1065     return hr;
1066 }
1067 
1068 //*-----------------------------------------------------------------------------
1069 //| Purpose:    Release Profiling information
1070 //| Returns:    Result of operation.
1071 //*-----------------------------------------------------------------------------
VtuneReleaseProfilingInfo()1072 int32_t CmTaskInternal::VtuneReleaseProfilingInfo()
1073 {
1074     if(!m_cmDevice->IsVtuneLogOn())
1075     {   // return directly if ETW log is off
1076         return CM_SUCCESS;
1077     }
1078 
1079     MosSafeDeleteArray(m_taskProfilingInfo.kernelNames);
1080     MosSafeDeleteArray(m_taskProfilingInfo.localWorkWidth);
1081     MosSafeDeleteArray(m_taskProfilingInfo.localWorkHeight);
1082     MosSafeDeleteArray(m_taskProfilingInfo.globalWorkWidth);
1083     MosSafeDeleteArray(m_taskProfilingInfo.globalWorkHeight);
1084 
1085     return CM_SUCCESS;
1086 }
1087 
1088 //*-----------------------------------------------------------------------------
1089 //| Purpose:    Reset KernelData status from IN_USE to IDLE.
1090 //              It is called immediately after the task being flushed.
1091 //| Returns:    Result of operation.
1092 //*-----------------------------------------------------------------------------
ResetKernelDataStatus()1093 int32_t CmTaskInternal::ResetKernelDataStatus()
1094 {
1095     int32_t     hr          = CM_SUCCESS;
1096 
1097     for(uint32_t krnDataIndex =0 ; krnDataIndex < m_kernelCount; krnDataIndex++ )
1098     {
1099         CmKernelData    *kernelData;
1100         CM_CHK_CMSTATUS_GOTOFINISH(GetKernelData(krnDataIndex, kernelData));
1101         CM_CHK_NULL_GOTOFINISH_CMERROR(kernelData);
1102         CM_CHK_CMSTATUS_GOTOFINISH(kernelData->ResetStatus());
1103     }
1104 
1105 finish:
1106     return hr;
1107 }
1108 
1109 //*-----------------------------------------------------------------------------
1110 //| Purpose:    Create thread space data
1111 //| Returns:    Result of operation.
1112 //*-----------------------------------------------------------------------------
CreateThreadSpaceData(const CmThreadSpaceRT * threadSpace)1113 int32_t CmTaskInternal::CreateThreadSpaceData(const CmThreadSpaceRT* threadSpace)
1114 {
1115     uint32_t i;
1116     uint32_t width, height;
1117     uint32_t *kernelCoordinateIndex = nullptr;
1118     int hr = CM_SUCCESS;
1119     CmThreadSpaceRT *threadSpaceRT = const_cast<CmThreadSpaceRT*>(threadSpace);
1120     CmKernelRT* kernelInThreadSpace = nullptr;
1121     CmKernelRT* kernelInTask = nullptr;
1122     CM_CHK_NULL_GOTOFINISH(threadSpaceRT, CM_NULL_POINTER);
1123 
1124     threadSpaceRT->GetThreadSpaceSize(m_threadSpaceWidth, m_threadSpaceHeight);
1125 
1126     if (threadSpaceRT->IsThreadAssociated())
1127     {
1128         m_threadCoordinates = MOS_NewArray(PCM_HAL_SCOREBOARD, m_kernelCount);
1129         CM_CHK_NULL_GOTOFINISH(m_threadCoordinates, CM_FAILURE);
1130         CmSafeMemSet(m_threadCoordinates, 0, m_kernelCount*sizeof(PCM_HAL_SCOREBOARD));
1131 
1132         m_dependencyMasks = MOS_NewArray(PCM_HAL_MASK_AND_RESET, m_kernelCount);
1133         CM_CHK_NULL_GOTOFINISH(m_dependencyMasks, CM_FAILURE);
1134         CmSafeMemSet(m_dependencyMasks, 0, m_kernelCount*sizeof(PCM_HAL_MASK_AND_RESET));
1135 
1136         kernelCoordinateIndex = MOS_NewArray(uint32_t, m_kernelCount);
1137         if(m_threadCoordinates && kernelCoordinateIndex && m_dependencyMasks)
1138         {
1139             CmSafeMemSet(kernelCoordinateIndex, 0, m_kernelCount*sizeof(uint32_t));
1140             for (i = 0; i< m_kernelCount; i++)
1141             {
1142                 kernelCoordinateIndex[i] = 0;
1143                 uint32_t threadCount;
1144                 this->GetKernel(i, kernelInTask);
1145 
1146                 if(kernelInTask == nullptr)
1147                 {
1148                     CM_ASSERTMESSAGE("Error: Invalid kernel pointer in task.");
1149                     hr = CM_NULL_POINTER;
1150                     goto finish;
1151                 }
1152 
1153                 kernelInTask->GetThreadCount(threadCount);
1154                 if (threadCount == 0)
1155                 {
1156                     threadCount = m_threadSpaceWidth*m_threadSpaceHeight;
1157                 }
1158                 m_threadCoordinates[i] = MOS_NewArray(CM_HAL_SCOREBOARD, threadCount);
1159                 if (m_threadCoordinates[i])
1160                 {
1161                     CmSafeMemSet(m_threadCoordinates[i], 0, sizeof(CM_HAL_SCOREBOARD)* threadCount);
1162                 }
1163                 else
1164                 {
1165                     CM_ASSERTMESSAGE("Error: Pointer to thread coordinates is null.");
1166                     hr = CM_NULL_POINTER;
1167                     goto finish;
1168                 }
1169 
1170                 m_dependencyMasks[i] = MOS_NewArray(CM_HAL_MASK_AND_RESET, threadCount);
1171                 if( m_dependencyMasks[i] )
1172                 {
1173                     CmSafeMemSet(m_dependencyMasks[i], 0, sizeof(CM_HAL_MASK_AND_RESET) * threadCount);
1174                 }
1175                 else
1176                 {
1177                     CM_ASSERTMESSAGE("Error: Pointer to dependency masks is null.");
1178                     hr = CM_NULL_POINTER;
1179                     goto finish;
1180                 }
1181             }
1182 
1183             CM_THREAD_SPACE_UNIT *threadSpaceUnit = nullptr;
1184             threadSpaceRT->GetThreadSpaceSize(width, height);
1185             threadSpaceRT->GetThreadSpaceUnit(threadSpaceUnit);
1186 
1187             uint32_t *boardOrder = nullptr;
1188             threadSpaceRT->GetBoardOrder(boardOrder);
1189             for (uint32_t tIndex=0; tIndex < height*width; tIndex ++)
1190             {
1191                 kernelInThreadSpace = static_cast<CmKernelRT *>(threadSpaceUnit[boardOrder[tIndex]].kernel);
1192                 if (kernelInThreadSpace == nullptr)
1193                 {
1194                     if (threadSpaceRT->GetNeedSetKernelPointer())
1195                     {
1196                         kernelInThreadSpace = threadSpaceRT->GetKernelPointer();
1197                     }
1198                     if (kernelInThreadSpace == nullptr)
1199                     {
1200                         CM_ASSERTMESSAGE("Error: Invalid kernel pointer in task.");
1201                         hr = CM_NULL_POINTER;
1202                         goto finish;
1203                     }
1204                 }
1205                 uint32_t kIndex = kernelInThreadSpace->GetIndexInTask();
1206 
1207                 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].x
1208                   = threadSpaceUnit[boardOrder[tIndex]].scoreboardCoordinates.x;
1209                 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].y
1210                   = threadSpaceUnit[boardOrder[tIndex]].scoreboardCoordinates.y;
1211                 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].mask
1212                   = threadSpaceUnit[boardOrder[tIndex]].dependencyMask;
1213                 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].resetMask
1214                   = threadSpaceUnit[boardOrder[tIndex]].reset;
1215                 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].color
1216                   = threadSpaceUnit[boardOrder[tIndex]].scoreboardColor;
1217                 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].sliceSelect
1218                   = threadSpaceUnit[boardOrder[tIndex]].sliceDestinationSelect;
1219                 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].subSliceSelect
1220                   = threadSpaceUnit[boardOrder[tIndex]].subSliceDestinationSelect;
1221                 m_dependencyMasks[kIndex][kernelCoordinateIndex[kIndex]].mask
1222                   = threadSpaceUnit[boardOrder[tIndex]].dependencyMask;
1223                 m_dependencyMasks[kIndex][kernelCoordinateIndex[kIndex]].resetMask
1224                   = threadSpaceUnit[boardOrder[tIndex]].reset;
1225                 kernelCoordinateIndex[kIndex] ++;
1226             }
1227 
1228             MosSafeDeleteArray(kernelCoordinateIndex);
1229         }
1230         else
1231         {
1232             CM_ASSERTMESSAGE("Error: Failed to create thread space data.");
1233             hr = CM_FAILURE;
1234             goto finish;
1235         }
1236 
1237         m_isThreadCoordinatesExisted = true;
1238     }
1239     else
1240     {
1241         m_threadCoordinates = nullptr;
1242         m_dependencyMasks = nullptr;
1243         m_isThreadCoordinatesExisted = false;
1244     }
1245 
1246     if (threadSpaceRT->IsDependencySet())
1247     {
1248         threadSpaceRT->GetDependencyPatternType(m_dependencyPattern);
1249     }
1250 
1251     threadSpaceRT->GetColorCountMinusOne(m_colorCountMinusOne);
1252     threadSpaceRT->GetMediaWalkerGroupSelect(m_mediaWalkerGroupSelect);
1253 
1254     threadSpaceRT->GetWalkingPattern(m_walkingPattern);
1255 
1256     m_mediaWalkerParamsSet = threadSpaceRT->CheckWalkingParametersSet();
1257     if( m_mediaWalkerParamsSet )
1258     {
1259         CM_WALKING_PARAMETERS tmpMWParams;
1260         CM_CHK_CMSTATUS_GOTOFINISH(threadSpaceRT->GetWalkingParameters(tmpMWParams));
1261         CmSafeMemCopy(&m_walkingParameters, &tmpMWParams, sizeof(tmpMWParams));
1262     }
1263 
1264     m_dependencyVectorsSet = threadSpaceRT->CheckDependencyVectorsSet();
1265     if( m_dependencyVectorsSet )
1266     {
1267         CM_HAL_DEPENDENCY tmpDepVectors;
1268         CM_CHK_CMSTATUS_GOTOFINISH(threadSpaceRT->GetDependencyVectors(tmpDepVectors));
1269         CmSafeMemCopy(&m_dependencyVectors, &tmpDepVectors, sizeof(tmpDepVectors));
1270     }
1271 
1272 finish:
1273     if(hr != CM_SUCCESS)
1274     {
1275         if(m_threadCoordinates )
1276         {
1277             for (i = 0; i< m_kernelCount; i++)
1278             {
1279                 MosSafeDeleteArray(m_threadCoordinates[i]);
1280             }
1281         }
1282 
1283         if(m_dependencyMasks)
1284         {
1285             for (i = 0; i< m_kernelCount; i++)
1286             {
1287                 MosSafeDeleteArray(m_dependencyMasks[i]);
1288             }
1289         }
1290         MosSafeDeleteArray(m_threadCoordinates);
1291         MosSafeDeleteArray(m_dependencyMasks);
1292         MosSafeDeleteArray(kernelCoordinateIndex);
1293     }
1294     return hr;
1295 }
1296 
1297 //*-----------------------------------------------------------------------------
1298 //| Purpose:    Get thread space's coordinates
1299 //| Returns:    CM_SUCCESS.
1300 //*-----------------------------------------------------------------------------
GetKernelCoordinates(const uint32_t index,void * & kernelCoordinates)1301 int32_t CmTaskInternal::GetKernelCoordinates(const uint32_t index, void  *&kernelCoordinates)
1302 {
1303     if (m_threadCoordinates != nullptr)
1304     {
1305         kernelCoordinates = (void *)m_threadCoordinates[index];
1306     }
1307     else
1308     {
1309         kernelCoordinates = nullptr;
1310     }
1311 
1312     return CM_SUCCESS;
1313 }
1314 
1315 //*-----------------------------------------------------------------------------
1316 //| Purpose:    Get thread space's dependency masks
1317 //| Returns:    CM_SUCCESS.
1318 //*-----------------------------------------------------------------------------
GetKernelDependencyMasks(const uint32_t index,void * & kernelDependencyMasks)1319 int32_t CmTaskInternal::GetKernelDependencyMasks(const uint32_t index, void  *&kernelDependencyMasks)
1320 {
1321     if (m_dependencyMasks != nullptr)
1322     {
1323         kernelDependencyMasks = (void *)m_dependencyMasks[index];
1324     }
1325     else
1326     {
1327         kernelDependencyMasks = nullptr;
1328     }
1329 
1330     return CM_SUCCESS;
1331 }
1332 
1333 //*-----------------------------------------------------------------------------
1334 //| Purpose:    Get dependency pattern
1335 //| Returns:    CM_SUCCESS.
1336 //*-----------------------------------------------------------------------------
GetDependencyPattern(CM_DEPENDENCY_PATTERN & dependencyPattern)1337 int32_t CmTaskInternal::GetDependencyPattern(CM_DEPENDENCY_PATTERN &dependencyPattern)
1338 {
1339     dependencyPattern = m_dependencyPattern;
1340     return CM_SUCCESS;
1341 }
1342 
1343 //*-----------------------------------------------------------------------------
1344 //| Purpose:    Get media walking pattern
1345 //| Returns:    CM_SUCCESS.
1346 //*-----------------------------------------------------------------------------
GetWalkingPattern(CM_WALKING_PATTERN & walkingPattern)1347 int32_t CmTaskInternal::GetWalkingPattern(CM_WALKING_PATTERN &walkingPattern)
1348 {
1349     walkingPattern = m_walkingPattern;
1350     return CM_SUCCESS;
1351 }
1352 
1353 //*-----------------------------------------------------------------------------
1354 //| Purpose:    Get media walking parameters
1355 //| Returns:    CM_FAILURE if dest ptr is nullptr, CM_SUCCESS otherwise
1356 //*-----------------------------------------------------------------------------
GetWalkingParameters(CM_WALKING_PARAMETERS & walkingParameters)1357 int32_t CmTaskInternal::GetWalkingParameters(CM_WALKING_PARAMETERS &walkingParameters)
1358 {
1359     CmSafeMemCopy(&walkingParameters, &m_walkingParameters, sizeof(m_walkingParameters));
1360     return CM_SUCCESS;
1361 }
1362 
1363 //*-----------------------------------------------------------------------------
1364 //| Purpose:    Check to see if media walking parameters have been set
1365 //| Returns:    true if media walking parameters set, false otherwise
1366 //*-----------------------------------------------------------------------------
CheckWalkingParametersSet()1367 bool CmTaskInternal::CheckWalkingParametersSet( )
1368 {
1369     return m_mediaWalkerParamsSet;
1370 }
1371 
1372 //*-----------------------------------------------------------------------------
1373 //| Purpose:    Get dependency vectors
1374 //| Returns:    CM_FAILURE if dest ptr is nullptr, CM_SUCCESS otherwise
1375 //*-----------------------------------------------------------------------------
GetDependencyVectors(CM_HAL_DEPENDENCY & dependencyVectors)1376 int32_t CmTaskInternal::GetDependencyVectors(CM_HAL_DEPENDENCY &dependencyVectors)
1377 {
1378     CmSafeMemCopy(&dependencyVectors, &m_dependencyVectors, sizeof(m_dependencyVectors));
1379     return CM_SUCCESS;
1380 }
1381 
1382 //*-----------------------------------------------------------------------------
1383 //| Purpose:    Check to see if dependency vectors have been set
1384 //| Returns:    true if dependency vectors are set, false otherwise
1385 //*-----------------------------------------------------------------------------
CheckDependencyVectorsSet()1386 bool CmTaskInternal::CheckDependencyVectorsSet( )
1387 {
1388     return m_dependencyVectorsSet;
1389 }
1390 
1391 //*-----------------------------------------------------------------------------
1392 //| Purpose:    Get the total thread count
1393 //| Returns:    CM_SUCCESS.
1394 //*-----------------------------------------------------------------------------
GetTotalThreadCount(uint32_t & totalThreadCount)1395 int32_t CmTaskInternal::GetTotalThreadCount( uint32_t& totalThreadCount )
1396 {
1397     totalThreadCount = m_totalThreadCount;
1398 
1399     return CM_SUCCESS;
1400 }
1401 
1402 //*-----------------------------------------------------------------------------
1403 //| Purpose:    Get the width,height of thread space
1404 //| Returns:    CM_SUCCESS.
1405 //*-----------------------------------------------------------------------------
1406 
GetThreadSpaceSize(uint32_t & width,uint32_t & height)1407 int32_t CmTaskInternal::GetThreadSpaceSize(uint32_t& width, uint32_t& height )
1408 {
1409     width = m_threadSpaceWidth;
1410     height = m_threadSpaceHeight;
1411 
1412     return CM_SUCCESS;
1413 }
1414 
1415 //*-----------------------------------------------------------------------------
1416 //| Purpose:    Get the color count minus one of the thread space
1417 //|             Used to dispatch multiple sets of dependency threads
1418 //|             for media walker
1419 //| Returns:    CM_SUCCESS.
1420 //*-----------------------------------------------------------------------------
1421 
GetColorCountMinusOne(uint32_t & colorCount)1422 int32_t CmTaskInternal::GetColorCountMinusOne( uint32_t& colorCount )
1423 {
1424     colorCount = m_colorCountMinusOne;
1425 
1426     return CM_SUCCESS;
1427 }
1428 
1429 //*-----------------------------------------------------------------------------
1430 //| Purpose:    Whether thread space is created
1431 //| Returns:    Boolean.
1432 //*-----------------------------------------------------------------------------
1433 
IsThreadSpaceCreated(void)1434 bool CmTaskInternal::IsThreadSpaceCreated(void )
1435 {
1436     return m_isThreadSpaceCreated;
1437 }
1438 
1439 //*-----------------------------------------------------------------------------
1440 //| Purpose:    Whether thread coordinates are existed
1441 //| Returns:    Boolean.
1442 //*-----------------------------------------------------------------------------
IsThreadCoordinatesExisted(void)1443 bool CmTaskInternal::IsThreadCoordinatesExisted(void)
1444 {
1445     return m_isThreadCoordinatesExisted;
1446 }
1447 
1448 //*-----------------------------------------------------------------------------
1449 //| Purpose:    Whether thread coordinates are existed
1450 //| Returns:    Result of operation.
1451 //*-----------------------------------------------------------------------------
1452 
GetThreadGroupSpaceSize(uint32_t & threadSpaceWidth,uint32_t & threadSpaceHeight,uint32_t & threadSpaceDepth,uint32_t & groupSpaceWidth,uint32_t & groupSpaceHeight,uint32_t & groupSpaceDepth)1453 int32_t CmTaskInternal::GetThreadGroupSpaceSize(uint32_t& threadSpaceWidth, uint32_t& threadSpaceHeight,
1454                                                 uint32_t& threadSpaceDepth, uint32_t& groupSpaceWidth,
1455                                                 uint32_t& groupSpaceHeight, uint32_t& groupSpaceDepth)
1456 {
1457     threadSpaceWidth = m_threadSpaceWidth;
1458     threadSpaceHeight = m_threadSpaceHeight;
1459     threadSpaceDepth  = m_threadSpaceDepth;
1460     groupSpaceWidth = m_groupSpaceWidth;
1461     groupSpaceHeight = m_groupSpaceHeight;
1462     groupSpaceDepth = m_groupSpaceDepth;
1463     return CM_SUCCESS;
1464 }
1465 
1466 //*-----------------------------------------------------------------------------
1467 //| Purpose:    Get the size of sharedlocalmemory
1468 //| Returns:    CM_SUCCESS.
1469 //*-----------------------------------------------------------------------------
GetSLMSize(uint32_t & slmSize)1470 int32_t CmTaskInternal::GetSLMSize(uint32_t& slmSize)
1471 {
1472     slmSize = m_slmSize;
1473     return CM_SUCCESS;
1474 }
1475 
1476 //*-----------------------------------------------------------------------------
1477 //| Purpose:    Get the size of spill memory used
1478 //| Returns:    CM_SUCCESS.
1479 //*-----------------------------------------------------------------------------
GetSpillMemUsed(uint32_t & spillMemUsed)1480 int32_t CmTaskInternal::GetSpillMemUsed(uint32_t& spillMemUsed)
1481 {
1482     spillMemUsed = m_spillMemUsed;
1483     return CM_SUCCESS;
1484 }
1485 
1486 //*-----------------------------------------------------------------------------
1487 //| Purpose:    Get the hints for EnqueueWithHints
1488 //| Returns:    CM_SUCCESS.
1489 //*-----------------------------------------------------------------------------
GetHints(uint32_t & hints)1490 int32_t CmTaskInternal::GetHints(uint32_t& hints)
1491 {
1492     hints = m_hints;
1493     return CM_SUCCESS;
1494 }
1495 
1496 //*-----------------------------------------------------------------------------
1497 //| Purpose:    Gets the number of tasks generated for EnqueueWithHints
1498 //|             Used when splitting large task to smaller tasks
1499 //| Returns:    CM_SUCCESS.
1500 //*-----------------------------------------------------------------------------
GetNumTasksGenerated(uint32_t & numTasksGenerated)1501 int32_t CmTaskInternal::GetNumTasksGenerated(uint32_t& numTasksGenerated)
1502 {
1503     numTasksGenerated = m_numTasksGenerated;
1504     return CM_SUCCESS;
1505 }
1506 
1507 //*-----------------------------------------------------------------------------
1508 //| Purpose:    Gets whether or not this task is the last task for EnqueueWithHints
1509 //|             Used to identify last smaller task when splitting large task
1510 //| Returns:    CM_SUCCESS.
1511 //*-----------------------------------------------------------------------------
GetLastTask(bool & isLastTask)1512 int32_t CmTaskInternal::GetLastTask(bool& isLastTask)
1513 {
1514     isLastTask = m_isLastTask;
1515     return CM_SUCCESS;
1516 }
1517 
1518 //*-----------------------------------------------------------------------------
1519 //| Purpose:    Whether thread group space is created
1520 //| Returns:    Value.
1521 //*-----------------------------------------------------------------------------
IsThreadGroupSpaceCreated(void)1522 bool CmTaskInternal::IsThreadGroupSpaceCreated(void)
1523 {
1524     return m_isThreadGroupSpaceCreated;
1525 }
1526 
1527 //*-----------------------------------------------------------------------------
1528 //| Purpose:    Allocate Space to record kernel surface's information
1529 //| Returns:    result of operation.
1530 //*-----------------------------------------------------------------------------
AllocateKernelSurfInfo()1531 int32_t CmTaskInternal::AllocateKernelSurfInfo()
1532 {
1533     //Allocate Surf info array
1534     m_kernelSurfInfo.kernelNum = m_kernelCount;
1535     m_kernelSurfInfo.surfEntryInfosArray
1536       = (CM_HAL_SURFACE_ENTRY_INFO_ARRAY*)MOS_AllocAndZeroMemory(m_kernelCount *
1537                                                                  sizeof(CM_HAL_SURFACE_ENTRY_INFO_ARRAY));
1538     if(m_kernelSurfInfo.surfEntryInfosArray == nullptr)
1539     {
1540 
1541         CM_ASSERTMESSAGE("Error: Mem allocation fail.");
1542         return CM_OUT_OF_HOST_MEMORY;
1543     }
1544 
1545     for( uint32_t i = 0; i < m_kernelCount; i ++ )
1546     {
1547         CmKernelRT * tempCmKernel = nullptr;
1548         this->GetKernel(i, tempCmKernel);
1549         if(tempCmKernel == nullptr)
1550         {
1551             CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
1552             return CM_FAILURE;
1553         }
1554 
1555         CM_ARG* arg=NULL;
1556         tempCmKernel->GetArgs( arg );
1557 
1558         uint32_t argCount = 0;
1559         tempCmKernel->GetArgCount( argCount);
1560         //allocate memory for non_static buffer&2D&3D
1561         uint32_t surfEntryNum = 0;
1562         for( uint32_t j = 0; j < argCount; j ++ )
1563         {
1564             switch(arg[ j ].unitKind)
1565             {
1566                 case    ARG_KIND_SURFACE_1D:
1567                         surfEntryNum = surfEntryNum + arg[ j ].unitCount * arg[j].unitSize/sizeof(int);
1568                         break;
1569 
1570                 case    ARG_KIND_SURFACE_2D:
1571                 case    ARG_KIND_SURFACE_2D_UP:
1572                 case    ARG_KIND_SURFACE_3D:
1573                 case    ARG_KIND_SURFACE_SAMPLER8X8_AVS:
1574                 case    ARG_KIND_SURFACE_SAMPLER8X8_VA:
1575                         surfEntryNum = surfEntryNum + 3 * arg[ j ].unitCount * arg[j].unitSize/sizeof(int);//one 2D or 3D can have upto 3 planes
1576                         break;
1577 
1578                 case    ARG_KIND_SURFACE_VME:
1579                         surfEntryNum = surfEntryNum + 24 * arg[ j ].unitCount;//surfaceVME will use upto 8 surfaces, each one can have upto 3 planes
1580                         break;
1581 
1582                 default:
1583                     break;
1584             }
1585         }
1586         CM_HAL_SURFACE_ENTRY_INFO_ARRAY* tempArray =  m_kernelSurfInfo.surfEntryInfosArray;
1587         if(surfEntryNum>0)
1588         {
1589             tempArray[i].maxEntryNum = surfEntryNum;
1590             tempArray[i].surfEntryInfos = (CM_SURFACE_DETAILS*)MOS_AllocAndZeroMemory(surfEntryNum*sizeof(CM_SURFACE_DETAILS));
1591 
1592             if(tempArray[i].surfEntryInfos == nullptr)
1593             {
1594                 CM_ASSERTMESSAGE("Error: Mem allocation fail.");
1595                 return CM_OUT_OF_HOST_MEMORY;
1596             }
1597 
1598         }
1599 
1600         //allocate memory for those 7 static buffers
1601         uint32_t globalBufNum=CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_BUFFER_NUM;
1602         tempArray[i].globalSurfNum=globalBufNum;
1603         tempArray[i].globalSurfInfos = (CM_SURFACE_DETAILS*)MOS_AllocAndZeroMemory(
1604                                 globalBufNum*sizeof(CM_SURFACE_DETAILS));
1605         if(tempArray[i].globalSurfInfos == nullptr)
1606         {
1607             CM_ASSERTMESSAGE("Mem allocation fail.");
1608             return CM_OUT_OF_HOST_MEMORY;
1609         }
1610     }
1611     return CM_SUCCESS;
1612 }
1613 
GetKernelSurfInfo(CM_HAL_SURFACE_ENTRY_INFO_ARRAYS & surfEntryInfoArray)1614 int32_t CmTaskInternal::GetKernelSurfInfo(CM_HAL_SURFACE_ENTRY_INFO_ARRAYS & surfEntryInfoArray)
1615 {
1616     surfEntryInfoArray = m_kernelSurfInfo;
1617     return CM_SUCCESS;
1618 }
1619 
ClearKernelSurfInfo()1620 int32_t CmTaskInternal::ClearKernelSurfInfo()
1621 {
1622     if (m_kernelSurfInfo.surfEntryInfosArray == nullptr)
1623     { // if surfEntryInfosArray is empty, return directly
1624         return CM_SUCCESS;
1625     }
1626 
1627     //free memory
1628     for( uint32_t i = 0; i < m_kernelCount; i ++ )
1629     {
1630         if (m_kernelSurfInfo.surfEntryInfosArray[i].surfEntryInfos != nullptr)
1631         {
1632             MosSafeDelete(m_kernelSurfInfo.surfEntryInfosArray[i].surfEntryInfos);
1633         }
1634         if (m_kernelSurfInfo.surfEntryInfosArray[i].globalSurfInfos!= nullptr)
1635         {
1636             MosSafeDelete(m_kernelSurfInfo.surfEntryInfosArray[i].globalSurfInfos);
1637         }
1638     }
1639 
1640     MosSafeDelete(m_kernelSurfInfo.surfEntryInfosArray);
1641 
1642     m_kernelSurfInfo.kernelNum = 0 ;
1643     m_kernelSurfInfo.surfEntryInfosArray = nullptr;
1644 
1645     return CM_SUCCESS;
1646 }
1647 
GetTaskType(uint32_t & taskType)1648 int32_t CmTaskInternal::GetTaskType(uint32_t& taskType)
1649 {
1650     taskType = m_taskType;
1651 
1652     return CM_SUCCESS;
1653 }
1654 
1655 //*-----------------------------------------------------------------------------
1656 //| Purpose:    Get vebox state
1657 //| Returns:    Result of operation.
1658 //*-----------------------------------------------------------------------------
GetVeboxState(CM_VEBOX_STATE & veboxState)1659 int32_t CmTaskInternal::GetVeboxState(CM_VEBOX_STATE &veboxState)
1660 {
1661     veboxState = m_veboxState;
1662 
1663     return CM_SUCCESS;
1664 }
1665 
GetVeboxParam(CmBufferUP * & veboxParam)1666 int32_t CmTaskInternal::GetVeboxParam(CmBufferUP * &veboxParam)
1667 {
1668     veboxParam = m_veboxParam;
1669 
1670     return CM_SUCCESS;
1671 }
1672 
GetVeboxSurfaceData(CM_VEBOX_SURFACE_DATA & veboxSurfaceData)1673 int32_t CmTaskInternal::GetVeboxSurfaceData(CM_VEBOX_SURFACE_DATA &veboxSurfaceData)
1674 {
1675     veboxSurfaceData = m_veboxSurfaceData;
1676     return CM_SUCCESS;
1677 }
1678 
GetSyncBitmap()1679 uint64_t CmTaskInternal::GetSyncBitmap()
1680 {
1681     return m_ui64SyncBitmap;
1682 }
1683 
GetConditionalEndBitmap()1684 uint64_t CmTaskInternal::GetConditionalEndBitmap()
1685 {
1686     return m_ui64ConditionalEndBitmap;
1687 }
1688 
GetConditionalEndInfo()1689 CM_HAL_CONDITIONAL_BB_END_INFO* CmTaskInternal::GetConditionalEndInfo()
1690 {
1691     return m_conditionalEndInfo;
1692 }
1693 
1694 //*-----------------------------------------------------------------------------
1695 //| Purpose:    Set power option for this task
1696 //| Returns:    Result of operation.
1697 //*-----------------------------------------------------------------------------
SetPowerOption(PCM_POWER_OPTION powerOption)1698 int32_t CmTaskInternal::SetPowerOption( PCM_POWER_OPTION powerOption )
1699 {
1700     if (powerOption == nullptr)
1701     {
1702         CM_ASSERTMESSAGE("Error: Pointer to power option is null.");
1703         return CM_NULL_POINTER;
1704     }
1705     CmSafeMemCopy( &m_powerOption, powerOption, sizeof( m_powerOption ) );
1706     return CM_SUCCESS;
1707 }
1708 
1709 //*-----------------------------------------------------------------------------
1710 //| Purpose:    Get power option for this task
1711 //| Returns:    Pointer to power option.
1712 //*-----------------------------------------------------------------------------
GetPowerOption()1713 PCM_POWER_OPTION CmTaskInternal::GetPowerOption()
1714 {
1715     return &m_powerOption;
1716 }
1717 
1718 #if _DEBUG
1719 const char *gDependencyPatternString[] =
1720 {
1721     "DEPENDENCY_NONE",
1722     "DEPENDENCY_WAVEFRONT45",
1723     "DEPENDENCY_WAVEFRONT26"
1724 };
1725 
1726 //Only for debugging
DisplayThreadSpaceData(uint32_t width,uint32_t height)1727 int32_t CmTaskInternal::DisplayThreadSpaceData(uint32_t width, uint32_t height)
1728 {
1729     if (m_threadCoordinates != nullptr)
1730     {
1731         CM_NORMALMESSAGE("Score board[Kernel x: (x1, y1), (x2, y2)...]:");
1732         for (uint32_t i = 0; i < m_kernelCount; i ++)
1733         {
1734             CmKernelRT *kernelRT = nullptr;
1735             GetKernel(i, kernelRT);
1736             if(nullptr == kernelRT)
1737             {
1738                 return CM_FAILURE;
1739             }
1740 
1741             uint32_t threadCount;
1742             kernelRT->GetThreadCount(threadCount);
1743             if (threadCount == 0)
1744             {
1745                 threadCount = m_threadSpaceWidth*m_threadSpaceHeight;
1746             }
1747             CM_NORMALMESSAGE("Kernel %d: ", i);
1748             for (uint32_t j=0; j<threadCount; j++)
1749             {
1750                 CM_NORMALMESSAGE("(%d, %d) ", m_threadCoordinates[i][j].x, m_threadCoordinates[i][j].y);
1751             }
1752         }
1753     }
1754     else
1755     {
1756         CM_NORMALMESSAGE("Score Board is NULL.");
1757     }
1758 
1759     if (m_dependencyPattern <= CM_WAVEFRONT26)
1760     {
1761         CM_NORMALMESSAGE("Dependency Pattern: %s.", gDependencyPatternString[m_dependencyPattern]);
1762     }
1763     else
1764     {
1765         CM_NORMALMESSAGE("Dependency Pattern: UNASSIGNED.");
1766     }
1767 
1768     return CM_SUCCESS;
1769 }
1770 #endif
1771 
GetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT & groupSelect)1772 int32_t CmTaskInternal::GetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT& groupSelect)
1773 {
1774     groupSelect = m_mediaWalkerGroupSelect;
1775     return CM_SUCCESS;
1776 }
1777 
1778 //*-----------------------------------------------------------------------------
1779 //| Purpose:    Update surface state on task destroy stage
1780 //*-----------------------------------------------------------------------------
UpdateSurfaceStateOnTaskCreation()1781 int32_t CmTaskInternal::UpdateSurfaceStateOnTaskCreation()
1782 {
1783     CmSurfaceManager*   surfaceMgr = nullptr;
1784     int32_t             *surfState = nullptr;
1785 
1786     m_cmDevice->GetSurfaceManager(surfaceMgr);
1787     if (surfaceMgr == nullptr)
1788     {
1789         CM_ASSERTMESSAGE("Error: Pointer to surface manager is null.");
1790         return CM_NULL_POINTER;
1791     }
1792 
1793     uint32_t poolSize = surfaceMgr->GetSurfacePoolSize();
1794     uint32_t handle = 0;
1795     uint32_t curTaskSurfCnt = 0;
1796     void **  curTaskSurfResArray = nullptr;
1797     uint32_t  refSurfCnt = 0;
1798     uint32_t *refSurfHandleArray = nullptr;
1799     CM_RETURN_CODE hr = CM_SUCCESS;
1800 
1801     curTaskSurfResArray = (void **)MOS_AllocAndZeroMemory(sizeof(void *)*poolSize);
1802     CM_CHK_NULL_RETURN_CMERROR(curTaskSurfResArray);
1803 
1804     CSync* surfaceLock = m_cmDevice->GetSurfaceCreationLock();
1805 
1806     if (surfaceLock == nullptr)
1807     {
1808         CM_ASSERTMESSAGE("Error: Pointer to surface creation lock is null.");
1809         if (curTaskSurfResArray)
1810         {
1811             MOS_FreeMemory(curTaskSurfResArray);
1812             curTaskSurfResArray = nullptr;
1813         }
1814         return CM_NULL_POINTER;
1815     }
1816 
1817     surfaceLock->Acquire();
1818 
1819     // get the last tracker
1820     PCM_CONTEXT_DATA cmData = ( PCM_CONTEXT_DATA )m_cmDevice->GetAccelData();
1821     PCM_HAL_STATE state = nullptr;
1822     CM_CHK_NULL_GOTOFINISH_CMERROR(cmData);
1823     state = cmData->cmHalState;
1824     CM_CHK_NULL_GOTOFINISH_CMERROR(state);
1825 
1826     if (!m_isSurfaceUpdateDone)
1827     {
1828         for (uint32_t i = 0; i < poolSize; i++)
1829         {
1830             if (m_surfaceArray[i])
1831             {
1832                 CmSurface *surface = NULL;
1833                 CM_CHK_CMSTATUS_GOTOFINISH(surfaceMgr->GetSurface(i, surface));
1834                 if (surface == nullptr) // surface destroyed but not updated in kernel
1835                 {
1836                     continue;
1837                 }
1838                 if (m_taskType == CM_INTERNAL_TASK_VEBOX)
1839                 {
1840                     surface->SetVeboxTracker(state->renderHal->veBoxTrackerRes.currentTrackerId);
1841                 }
1842                 else
1843                 {
1844                     surface->SetRenderTracker(state->renderHal->currentTrackerIndex,
1845                                state->renderHal->trackerProducer.GetNextTracker(state->renderHal->currentTrackerIndex));
1846                 }
1847 
1848                 // Push this surface's resource into array for CP check.
1849                 switch (surface->Type())
1850                 {
1851                     case CM_ENUM_CLASS_TYPE_CMBUFFER_RT :
1852                         static_cast< CmBuffer_RT* >( surface )->GetHandle(handle);
1853                         curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->bufferTable[handle].osResource;
1854                         break;
1855 
1856                     case CM_ENUM_CLASS_TYPE_CMSURFACE2D :
1857                         static_cast< CmSurface2DRT* >( surface )->GetHandle(handle);
1858                         curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[handle].osResource;
1859                         break;
1860 
1861                     case CM_ENUM_CLASS_TYPE_CMSURFACE2DUP:
1862                         static_cast< CmSurface2DUPRT* >( surface )->GetHandle(handle);
1863                         curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->surf2DUPTable[handle].osResource;
1864                         break;
1865 
1866                     case CM_ENUM_CLASS_TYPE_CMSURFACE3D :
1867                         static_cast< CmSurface3DRT* >( surface )->GetHandle(handle);
1868                         curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->surf3DTable[handle].osResource;
1869                         break;
1870 
1871                     case CM_ENUM_CLASS_TYPE_CMSURFACEVME:
1872                         static_cast< CmSurfaceVme* >( surface )->GetIndexCurrent(handle);
1873                         curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[handle].osResource; // current surface
1874                         static_cast< CmSurfaceVme* >( surface )->GetIndexForwardCount(refSurfCnt);
1875                         static_cast< CmSurfaceVme* >( surface )->GetIndexForwardArray(refSurfHandleArray);
1876                         for(i = 0; i < refSurfCnt; i++)
1877                         {
1878                             curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[refSurfHandleArray[i]].osResource; // forward surfaces
1879                         }
1880                         static_cast< CmSurfaceVme* >( surface )->GetIndexForwardCount(refSurfCnt);
1881                         static_cast< CmSurfaceVme* >( surface )->GetIndexForwardArray(refSurfHandleArray);
1882                         for(i = 0; i < refSurfCnt; i++)
1883                         {
1884                             curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[refSurfHandleArray[i]].osResource; // backward surfaces
1885                         }
1886                         break;
1887 
1888                     case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER8X8:
1889                         static_cast< CmSurfaceSampler8x8* >( surface )->GetIndexCurrent(handle);
1890                         curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[handle].osResource;
1891                         break;
1892 
1893                     case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER:
1894                         static_cast< CmSurfaceSampler* >( surface )->GetHandle(handle);
1895                         SAMPLER_SURFACE_TYPE type;
1896                         static_cast< CmSurfaceSampler* >( surface )->GetSurfaceType(type);
1897                         if (type == SAMPLER_SURFACE_TYPE_2D)
1898                         {
1899                             curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[handle].osResource;
1900                         }
1901                         else if (type == SAMPLER_SURFACE_TYPE_2DUP)
1902                         {
1903                             curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->surf2DUPTable[handle].osResource;
1904                         }
1905                         else if (type == SAMPLER_SURFACE_TYPE_3D)
1906                         {
1907                             curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->surf3DTable[handle].osResource;
1908                         }
1909                         else
1910                         {
1911                             hr = CM_INVALID_ARG_INDEX;
1912                             goto finish;
1913                         }
1914                         break;
1915 
1916                     default:
1917                         break;
1918                 }
1919             }
1920         }
1921 
1922         m_isSurfaceUpdateDone = true;
1923     }
1924 
1925     // Check if there is any secure surface.
1926     if (curTaskSurfCnt > 0 && state->osInterface && state->osInterface->osCpInterface)
1927     {
1928         state->osInterface->osCpInterface->PrepareResources(curTaskSurfResArray, curTaskSurfCnt, nullptr, 0);
1929     }
1930 
1931 finish:
1932     surfaceLock->Release();
1933     if (curTaskSurfResArray)
1934     {
1935         MOS_FreeMemory(curTaskSurfResArray);
1936         curTaskSurfResArray = nullptr;
1937     }
1938 
1939     return hr;
1940 }
1941 
1942 #if CM_LOG_ON
Log()1943 std::string CmTaskInternal::Log()
1944 {
1945     std::ostringstream  oss;
1946 
1947     oss << "Enqueue Task Type:" << m_taskType
1948         << " Kernel Count:" << m_kernelCount
1949         << " Total Thread Count:" << m_totalThreadCount
1950         << " Sync Bit:"<<m_ui64SyncBitmap
1951         << " Conditional End Bit:" << m_ui64ConditionalEndBitmap
1952         << std::endl;
1953 
1954     switch(m_taskType)
1955     {
1956         case CM_INTERNAL_TASK_WITH_THREADSPACE:
1957             if ( m_isThreadSpaceCreated )
1958             {
1959                 oss << "Thread Space Width :" << m_threadSpaceWidth << " Height :" << m_threadSpaceHeight
1960                     << "Walker Patten :" << (int)m_walkingPattern << std::endl;
1961             }
1962             break;
1963 
1964         case CM_INTERNAL_TASK_WITH_THREADGROUPSPACE:
1965             if(m_isThreadGroupSpaceCreated)
1966             {
1967                 oss << "Thread Group Space Width:" << m_groupSpaceWidth << " Height:" << m_groupSpaceHeight
1968                     << "SLM Size:" <<m_slmSize << std::endl;
1969             }
1970             break;
1971 
1972         case CM_INTERNAL_TASK_VEBOX:
1973             break;
1974 
1975         case CM_INTERNAL_TASK_ENQUEUEWITHHINTS:
1976             oss << " Hints :" << m_hints
1977                 << " Thread Space Width :" << m_threadSpaceWidth
1978                 << " Height :" << m_threadSpaceHeight
1979                 << " Walker Patten :" << (int)m_walkingPattern
1980                 << std::endl;
1981             break;
1982 
1983         default:    // by default, assume the task is considered as general task: CM_INTERNAL_TASK_WITH_THREADSPACE
1984             break;
1985     }
1986 
1987     for (uint32_t i=0 ; i< m_kernelCount; i++)
1988     {
1989         CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement( i );
1990 
1991         oss << kernel->Log(); // log each kernel
1992     }
1993 
1994     return oss.str();
1995 }
1996 
GetHalState()1997 CM_HAL_STATE* CmTaskInternal::GetHalState() { return m_cmDevice->GetHalState(); }
1998 
1999 #endif  // #if CM_LOG_ON
2000 
SurfaceDump(int32_t taskId)2001 void CmTaskInternal::SurfaceDump(int32_t taskId)
2002 {
2003 #if MDF_SURFACE_CONTENT_DUMP
2004     for (uint32_t i=0 ; i< m_kernelCount; i++)
2005     {
2006         CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement( i );
2007         kernel->SurfaceDump(i, taskId);
2008     }
2009 #endif
2010 }
2011 
SetProperty(CM_TASK_CONFIG * taskConfig)2012 int32_t CmTaskInternal::SetProperty(CM_TASK_CONFIG * taskConfig)
2013 {
2014     if (taskConfig == nullptr)
2015     {
2016         CM_ASSERTMESSAGE("Error: Pointer to task config is null.");
2017         return CM_NULL_POINTER;
2018     }
2019     CmSafeMemCopy(&m_taskConfig, taskConfig, sizeof(m_taskConfig));
2020     return CM_SUCCESS;
2021 }
2022 
GetProperty(CM_TASK_CONFIG & taskConfig)2023 int32_t CmTaskInternal::GetProperty(CM_TASK_CONFIG &taskConfig)
2024 {
2025     taskConfig = m_taskConfig;
2026     return CM_SUCCESS;
2027 }
2028 
GetMediaStatePtr()2029 void  *CMRT_UMD::CmTaskInternal::GetMediaStatePtr()
2030 {
2031     return m_mediaStatePtr;
2032 }
2033 }  // namespace
2034