1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_task_internal.cpp
24 //! \brief Contains Class CmTaskInternal definitions
25 //!
26
27 #include "cm_task_internal.h"
28
29 #include "cm_kernel_rt.h"
30 #include "cm_mem.h"
31 #include "cm_event_rt.h"
32 #include "cm_device_rt.h"
33 #include "cm_kernel_data.h"
34 #include "cm_thread_space_rt.h"
35 #include "cm_group_space.h"
36 #include "cm_vebox_rt.h"
37 #include "cm_vebox_data.h"
38 #include "cm_queue_rt.h"
39 #include "cm_surface_manager.h"
40 #include "cm_buffer_rt.h"
41 #include "cm_surface_2d_rt.h"
42 #include "cm_surface_2d_up_rt.h"
43 #include "cm_surface_3d_rt.h"
44 #include "cm_surface_vme.h"
45 #include "cm_surface_sampler.h"
46 #include "cm_surface_sampler8x8.h"
47 #include "mos_os_cp_interface_specific.h"
48
49 namespace CMRT_UMD
50 {
51 //*-----------------------------------------------------------------------------
52 //| Purpose: Create Task internal
53 //| Returns: Result of the operation.
54 //*-----------------------------------------------------------------------------
Create(const uint32_t kernelCount,const uint32_t totalThreadCount,CmKernelRT * kernelArray[],const CmThreadSpaceRT * threadSpace,CmDeviceRT * device,const uint64_t syncBitmap,CmTaskInternal * & task,const uint64_t conditionalEndBitmap,PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo)55 int32_t CmTaskInternal::Create(const uint32_t kernelCount, const uint32_t totalThreadCount,
56 CmKernelRT* kernelArray[], const CmThreadSpaceRT* threadSpace,
57 CmDeviceRT* device, const uint64_t syncBitmap, CmTaskInternal*& task,
58 const uint64_t conditionalEndBitmap,
59 PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo)
60 {
61 int32_t result = CM_SUCCESS;
62 task = new (std::nothrow) CmTaskInternal(kernelCount, totalThreadCount, kernelArray, device,
63 syncBitmap, conditionalEndBitmap, conditionalEndInfo,
64 nullptr);
65 if( task )
66 {
67 result = task->Initialize(threadSpace, false);
68 if( result != CM_SUCCESS )
69 {
70 CmTaskInternal::Destroy( task);
71 }
72 }
73 else
74 {
75 CM_ASSERTMESSAGE("Error: Failed to create CmTaskInternal due to out of system memory.");
76 result = CM_OUT_OF_HOST_MEMORY;
77 }
78 return result;
79 }
80
81 //*-----------------------------------------------------------------------------
82 //| Purpose: Create Task internal with Thread Group Space
83 //| Returns: Result of the operation.
84 //*-----------------------------------------------------------------------------
Create(const uint32_t kernelCount,const uint32_t totalThreadCount,CmKernelRT * kernelArray[],const CmThreadGroupSpace * threadGroupSpace,CmDeviceRT * device,const uint64_t syncBitmap,CmTaskInternal * & task,const uint64_t conditionalEndBitmap,PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,const CM_EXECUTION_CONFIG * krnExecCfg)85 int32_t CmTaskInternal::Create( const uint32_t kernelCount, const uint32_t totalThreadCount,
86 CmKernelRT* kernelArray[], const CmThreadGroupSpace* threadGroupSpace,
87 CmDeviceRT* device, const uint64_t syncBitmap, CmTaskInternal*& task,
88 const uint64_t conditionalEndBitmap,
89 PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,
90 const CM_EXECUTION_CONFIG* krnExecCfg)
91 {
92 int32_t result = CM_SUCCESS;
93 task = new (std::nothrow) CmTaskInternal(kernelCount, totalThreadCount, kernelArray, device,
94 syncBitmap, conditionalEndBitmap, conditionalEndInfo,
95 krnExecCfg);
96
97 if( task )
98 {
99 result = task->Initialize(threadGroupSpace);
100 if( result != CM_SUCCESS )
101 {
102 CmTaskInternal::Destroy( task);
103 }
104 }
105 else
106 {
107 CM_ASSERTMESSAGE("Error: Failed to create CmTaskInternal due to out of system memory.");
108 result = CM_OUT_OF_HOST_MEMORY;
109 }
110 return result;
111 }
112
Create(CmDeviceRT * device,CmVeboxRT * vebox,CmTaskInternal * & task)113 int32_t CmTaskInternal::Create( CmDeviceRT* device, CmVeboxRT* vebox, CmTaskInternal*& task )
114 {
115 int32_t result = CM_SUCCESS;
116 task = new (std::nothrow) CmTaskInternal(0, 0, nullptr, device, CM_NO_KERNEL_SYNC,
117 CM_NO_CONDITIONAL_END, nullptr, nullptr);
118 if( task )
119 {
120 result = task->Initialize(vebox);
121 if( result != CM_SUCCESS )
122 {
123 CmTaskInternal::Destroy( task);
124 }
125 }
126 else
127 {
128 CM_ASSERTMESSAGE("Error: Failed to create CmTaskInternal due to out of system memory.");
129 result = CM_OUT_OF_HOST_MEMORY;
130 }
131 return result;
132 }
133
134 //*-----------------------------------------------------------------------------
135 //| Purpose: Create Task internal with hints
136 //| Returns: Result of the operation.
137 //*-----------------------------------------------------------------------------
Create(const uint32_t kernelCount,const uint32_t totalThreadCount,CmKernelRT * kernelArray[],CmTaskInternal * & task,uint32_t numGeneratedTasks,bool isLastTask,uint32_t hints,CmDeviceRT * device)138 int32_t CmTaskInternal::Create(const uint32_t kernelCount, const uint32_t totalThreadCount,
139 CmKernelRT* kernelArray[], CmTaskInternal*& task,
140 uint32_t numGeneratedTasks, bool isLastTask, uint32_t hints,
141 CmDeviceRT* device)
142 {
143 int32_t result = CM_SUCCESS;
144 task = new (std::nothrow) CmTaskInternal(kernelCount, totalThreadCount, kernelArray, device,
145 CM_NO_KERNEL_SYNC, CM_NO_CONDITIONAL_END, nullptr, nullptr);
146 if ( task )
147 {
148 result = task->Initialize(hints, numGeneratedTasks, isLastTask);
149 if ( result != CM_SUCCESS )
150 {
151 CmTaskInternal::Destroy( task );
152 }
153 }
154 else
155 {
156 CM_ASSERTMESSAGE("Error: Failed to create CmTaskInternal due to out of system memory.");
157 result = CM_OUT_OF_HOST_MEMORY;
158 }
159 return result;
160 }
161
162 //*-----------------------------------------------------------------------------
163 //| Purpose: Destroy Task internal
164 //| Returns: None.
165 //*-----------------------------------------------------------------------------
Destroy(CmTaskInternal * & task)166 int32_t CmTaskInternal::Destroy( CmTaskInternal* &task )
167 {
168 CmSafeDelete( task );
169 return CM_SUCCESS;
170 }
171
172 //*-----------------------------------------------------------------------------
173 //| Purpose: Constructor of CmTaskInternal
174 //| Returns: None.
175 //*-----------------------------------------------------------------------------
CmTaskInternal(const uint32_t kernelCount,const uint32_t totalThreadCount,CmKernelRT * kernelArray[],CmDeviceRT * device,const uint64_t syncBitmap,const uint64_t conditionalEndBitmap,PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,const CM_EXECUTION_CONFIG * krnExecCfg)176 CmTaskInternal::CmTaskInternal(const uint32_t kernelCount, const uint32_t totalThreadCount,
177 CmKernelRT* kernelArray[], CmDeviceRT* device,
178 const uint64_t syncBitmap, const uint64_t conditionalEndBitmap,
179 PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,
180 const CM_EXECUTION_CONFIG* krnExecCfg) :
181 m_kernels( kernelCount ),
182 m_kernelData( kernelCount ),
183 m_kernelCount( kernelCount ),
184 m_totalThreadCount(totalThreadCount),
185 m_taskEvent( nullptr ),
186 m_isThreadSpaceCreated(false),
187 m_isThreadCoordinatesExisted(false),
188 m_threadSpaceWidth(0),
189 m_threadSpaceHeight(0),
190 m_threadSpaceDepth(0),
191 m_threadCoordinates(nullptr),
192 m_dependencyPattern(CM_NONE_DEPENDENCY),
193 m_walkingPattern(CM_WALK_DEFAULT),
194 m_mediaWalkerParamsSet( false ),
195 m_dependencyVectorsSet( false ),
196 m_dependencyMasks( nullptr ),
197 m_mediaWalkerGroupSelect(CM_MW_GROUP_NONE),
198 m_isThreadGroupSpaceCreated(false),
199 m_groupSpaceWidth(0),
200 m_groupSpaceHeight(0),
201 m_groupSpaceDepth(0),
202 m_slmSize(0),
203 m_spillMemUsed(0),
204 m_colorCountMinusOne( 0 ),
205 m_hints(0),
206 m_numTasksGenerated( 0 ),
207 m_isLastTask( false ),
208 m_ui64SyncBitmap (syncBitmap ),
209 m_ui64ConditionalEndBitmap(conditionalEndBitmap),
210 m_cmDevice( device ),
211 m_surfaceArray (nullptr),
212 m_isSurfaceUpdateDone(false),
213 m_taskType(CM_TASK_TYPE_DEFAULT),
214 m_mediaStatePtr( nullptr )
215 {
216 m_kernelSurfInfo.kernelNum = 0;
217 m_kernelSurfInfo.surfEntryInfosArray = nullptr;
218 m_kernelCurbeOffsetArray = MOS_NewArray(uint32_t, kernelCount);
219 CM_ASSERT(m_kernelCurbeOffsetArray != nullptr);
220
221 for( uint32_t i = 0 ; i < kernelCount; i ++ )
222 {
223 m_kernels.SetElement( i, kernelArray[ i ] );
224 m_kernelData.SetElement( i, nullptr );
225 }
226
227 CmSafeMemSet( &m_walkingParameters, 0, sizeof(m_walkingParameters));
228 CmSafeMemSet( &m_dependencyVectors, 0, sizeof(m_dependencyVectors));
229 CmSafeMemSet( &m_taskConfig, 0, sizeof(m_taskConfig));
230 if ( m_kernelCurbeOffsetArray != nullptr )
231 {
232 CmSafeMemSet( m_kernelCurbeOffsetArray, 0, sizeof(uint32_t) * kernelCount );
233 }
234
235 CmSafeMemSet(&m_taskProfilingInfo, 0, sizeof(m_taskProfilingInfo));
236
237 if (conditionalEndInfo != nullptr)
238 {
239 CmSafeMemCopy(&m_conditionalEndInfo, conditionalEndInfo, sizeof(m_conditionalEndInfo));
240 }
241 else
242 {
243 CmSafeMemSet(&m_conditionalEndInfo, 0, sizeof(m_conditionalEndInfo));
244 }
245
246 CmSafeMemSet(&m_veboxParam, 0, sizeof(m_veboxParam));
247 CmSafeMemSet(&m_veboxState, 0, sizeof(m_veboxState));
248 CmSafeMemSet(&m_veboxSurfaceData, 0, sizeof(m_veboxSurfaceData));
249 CmSafeMemSet(&m_powerOption, 0, sizeof(m_powerOption));
250
251 if (krnExecCfg != nullptr)
252 {
253 CmSafeMemCopy(&m_krnExecCfg, krnExecCfg, sizeof(m_krnExecCfg));
254 }
255 }
256
257 //*-----------------------------------------------------------------------------
258 //| Purpose: Destructor of CmTaskInternal
259 //| Returns: None.
260 //*-----------------------------------------------------------------------------
~CmTaskInternal(void)261 CmTaskInternal::~CmTaskInternal( void )
262 {
263
264 //Write Event Infos
265 VtuneWriteEventInfo();
266
267 //Release Profiling Info
268 VtuneReleaseProfilingInfo();
269
270 for( uint32_t i = 0; i < m_kernelCount; i ++ )
271 {
272 CmKernelRT *kernel = (CmKernelRT*)m_kernels.GetElement(i);
273 CmKernelData* kernelData = (CmKernelData*)m_kernelData.GetElement( i );
274 if(kernel && kernelData)
275 {
276 kernel->ReleaseKernelData(kernelData);
277 CmKernel *kernelBase = kernel;
278 m_cmDevice->DestroyKernel(kernelBase);
279 }
280 }
281 m_kernelData.Delete();
282 m_kernels.Delete();
283
284 MosSafeDeleteArray(m_kernelCurbeOffsetArray);
285
286 if( m_taskEvent )
287 {
288 CmEvent *eventBase = m_taskEvent;
289 CmQueueRT *cmQueue = nullptr;
290 m_taskEvent->GetQueue(cmQueue);
291 if (cmQueue)
292 {
293 cmQueue->DestroyEvent(eventBase); // need to update the m_EventArray
294 }
295 }
296
297 if(m_threadCoordinates){
298 for (uint32_t i=0; i<m_kernelCount; i++)
299 {
300 if (m_threadCoordinates[i])
301 {
302 MosSafeDeleteArray(m_threadCoordinates[i]);
303 }
304 }
305 MosSafeDeleteArray( m_threadCoordinates );
306 }
307
308 if( m_dependencyMasks )
309 {
310 for( uint32_t i = 0; i < m_kernelCount; ++i )
311 {
312 MosSafeDeleteArray(m_dependencyMasks[i]);
313 }
314 MosSafeDeleteArray( m_dependencyMasks );
315 }
316
317 if((m_kernelSurfInfo.kernelNum != 0)&&(m_kernelSurfInfo.surfEntryInfosArray != nullptr))
318 {
319 ClearKernelSurfInfo();
320 }
321
322 MosSafeDeleteArray(m_surfaceArray);
323
324 }
325
326 //*-----------------------------------------------------------------------------
327 //| Purpose: Initialize Class CmTaskInternal
328 //| Returns: None.
329 //*-----------------------------------------------------------------------------
Initialize(const CmThreadSpaceRT * threadSpace,bool isWithHints)330 int32_t CmTaskInternal::Initialize(const CmThreadSpaceRT* threadSpace, bool isWithHints)
331 {
332 uint32_t totalCurbeSize = 0;
333 uint32_t surfacePoolSize = 0;
334 uint32_t totalKernelBinarySize = 0;
335 uint32_t kernelCurbeSize = 0;
336 uint32_t kernelPayloadSize = 0;
337 CmSurfaceManager* surfaceMgr = nullptr;
338 int32_t result = CM_SUCCESS;
339 CM_HAL_MAX_VALUES* halMaxValues = nullptr;
340 CM_HAL_MAX_VALUES_EX* halMaxValuesEx = nullptr;
341 m_cmDevice->GetHalMaxValues( halMaxValues, halMaxValuesEx );
342
343 if (m_cmDevice->IsPrintEnable())
344 {
345 SurfaceIndex *printBufferIndex = nullptr;
346 m_cmDevice->GetPrintBufferIndex(printBufferIndex);
347 CM_ASSERT(printBufferIndex);
348 for (uint32_t i = 0; i < m_kernelCount; i++)
349 {
350 CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement(i);
351 if(kernel == nullptr)
352 {
353 CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
354 return CM_FAILURE;
355 }
356 if(FAILED(kernel->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex)))
357 {
358 CM_ASSERTMESSAGE("Error: Failed to set static buffer.");
359 return CM_FAILURE;
360 }
361 }
362 }
363
364 m_cmDevice->GetSurfaceManager( surfaceMgr );
365 CM_CHK_NULL_RETURN_CMERROR(surfaceMgr);
366 surfacePoolSize = surfaceMgr->GetSurfacePoolSize();
367
368 m_surfaceArray = MOS_NewArray(bool, surfacePoolSize);
369 if (!m_surfaceArray)
370 {
371 CM_ASSERTMESSAGE("Error: Out of system memory.");
372 return CM_FAILURE;
373 }
374 CmSafeMemSet( m_surfaceArray, 0, surfacePoolSize * sizeof( bool ) );
375
376 for( uint32_t i = 0; i < m_kernelCount; i ++ )
377 {
378
379 CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement( i );
380 if(kernel == nullptr)
381 {
382 CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
383 return CM_FAILURE;
384 }
385
386 uint32_t totalSize = 0;
387 CmKernelData* kernelData = nullptr;
388
389 if ( isWithHints )
390 {
391 CmThreadSpaceRT* kernelThreadSpace = nullptr;
392 kernel->GetThreadSpace(kernelThreadSpace);
393 if( kernelThreadSpace )
394 {
395 for(uint32_t j = i; j > 0; --j)
396 {
397 uint32_t width, height, myAdjY;
398 CmKernelRT* tmpKernel = (CmKernelRT*)m_kernels.GetElement( j-1 );
399 if( !tmpKernel )
400 {
401 CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
402 return CM_FAILURE;
403 }
404 tmpKernel->GetThreadSpace(kernelThreadSpace);
405 kernelThreadSpace->GetThreadSpaceSize(width, height);
406 myAdjY = kernel->GetAdjustedYCoord();
407 kernel->SetAdjustedYCoord(myAdjY + height);
408 }
409 }
410 }
411
412 if (threadSpace == nullptr)
413 {
414 CmThreadSpaceRT* kernelThreadSpace = nullptr;
415 kernel->GetThreadSpace(kernelThreadSpace);
416 if (kernelThreadSpace)
417 {
418 kernelThreadSpace->SetDependencyArgToKernel(kernel);
419 }
420 }
421
422 if (threadSpace != nullptr)
423 {
424 threadSpace->SetDependencyArgToKernel(kernel);
425 }
426
427 kernel->CollectKernelSurface();
428 result = kernel->CreateKernelData( kernelData, totalSize, threadSpace );
429 if( (kernelData == nullptr) || (result != CM_SUCCESS))
430 {
431 CM_ASSERTMESSAGE("Error: Failed to create kernel data.");
432 CmKernelData::Destroy( kernelData );
433 return result;
434 }
435
436 kernel->GetSizeInPayload( kernelPayloadSize );
437 kernel->GetSizeInCurbe( kernelCurbeSize );
438
439 if ( ( kernelCurbeSize + kernelPayloadSize ) > halMaxValues->maxArgByteSizePerKernel )
440 { //Failed, exceed the maximum of inline data
441 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
442 return CM_EXCEED_KERNEL_ARG_SIZE_IN_BYTE;
443 }
444 else
445 {
446 kernelCurbeSize = kernel->GetAlignedCurbeSize( kernelCurbeSize );
447 totalCurbeSize += kernelCurbeSize;
448 }
449 m_kernelCurbeOffsetArray[ i ] = totalCurbeSize - kernelCurbeSize;
450
451 m_kernelData.SetElement( i, kernelData );
452
453 totalKernelBinarySize += kernel->GetKernelGenxBinarySize();
454 totalKernelBinarySize += CM_KERNEL_BINARY_PADDING_SIZE; //Padding is necessary after kernel binary to avoid page fault issue
455
456 bool *surfArray = nullptr;
457 kernel->GetKernelSurfaces(surfArray);
458 for (uint32_t j = 0; j < surfacePoolSize; j ++)
459 {
460 m_surfaceArray[j] |= surfArray[j];
461 }
462 kernel->ResetKernelSurfaces();
463
464 PCM_CONTEXT_DATA cmData = ( PCM_CONTEXT_DATA )m_cmDevice->GetAccelData();
465 PCM_HAL_STATE state = cmData->cmHalState;
466 PRENDERHAL_MEDIA_STATE mediaStatePtr = state->pfnGetMediaStatePtrForKernel( state, kernel );
467
468 if ( ( mediaStatePtr != nullptr ) && ( m_mediaStatePtr == nullptr ) )
469 {
470 m_mediaStatePtr = mediaStatePtr;
471 }
472 else if ( ( mediaStatePtr != nullptr ) && ( m_mediaStatePtr != nullptr ) )
473 {
474 CM_ASSERTMESSAGE( "Error: More than one media state heap are used in one task! User-provided state heap error.\n" );
475 return CM_INVALID_ARG_VALUE;
476 }
477 }
478
479 if (totalKernelBinarySize > halMaxValues->maxKernelBinarySize * halMaxValues->maxKernelsPerTask)
480 {
481 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
482 return CM_EXCEED_MAX_KERNEL_SIZE_IN_BYTE;
483 }
484
485 if (threadSpace)
486 {
487 if(FAILED(this->CreateThreadSpaceData(threadSpace)))
488 {
489 CM_ASSERTMESSAGE("Error: Failed to create thread space data.");
490 return CM_FAILURE;
491 }
492 m_isThreadSpaceCreated = true;
493 }
494
495 UpdateSurfaceStateOnTaskCreation();
496
497 m_taskType = CM_INTERNAL_TASK_WITH_THREADSPACE;
498
499 if ( m_cmDevice->CheckGTPinEnabled())
500 {
501 AllocateKernelSurfInfo();
502 }
503
504 this->VtuneInitProfilingInfo(threadSpace);
505
506 return CM_SUCCESS;
507 }
508
509 //*-----------------------------------------------------------------------------
510 //| Purpose: Initialize Class CmTaskInternal with thread group space
511 //| Returns: None.
512 //*-----------------------------------------------------------------------------
Initialize(const CmThreadGroupSpace * threadGroupSpace)513 int32_t CmTaskInternal::Initialize(const CmThreadGroupSpace* threadGroupSpace)
514 {
515 uint32_t totalCurbeSize = 0;
516 uint32_t surfacePoolSize = 0;
517 uint32_t totalKernelBinarySize = 0;
518 uint32_t kernelCurbeSize = 0;
519 uint32_t kernelPayloadSize = 0;
520
521 CmSurfaceManager* surfaceMgr = nullptr;
522 CM_HAL_MAX_VALUES* halMaxValues = nullptr;
523 CM_HAL_MAX_VALUES_EX* halMaxValuesEx = nullptr;
524 m_cmDevice->GetHalMaxValues( halMaxValues, halMaxValuesEx );
525
526 m_cmDevice->GetSurfaceManager( surfaceMgr );
527 CM_CHK_NULL_RETURN_CMERROR( surfaceMgr );
528 surfacePoolSize = surfaceMgr->GetSurfacePoolSize();
529 m_surfaceArray = MOS_NewArray(bool, surfacePoolSize);
530 if (!m_surfaceArray)
531 {
532 CM_ASSERTMESSAGE("Error: Out of system memory.");
533 return CM_OUT_OF_HOST_MEMORY;
534 }
535 CmSafeMemSet( m_surfaceArray, 0, surfacePoolSize * sizeof( bool ) );
536
537 if (m_cmDevice->IsPrintEnable())
538 {
539 SurfaceIndex *printBufferIndex = nullptr;
540 m_cmDevice->GetPrintBufferIndex(printBufferIndex);
541 CM_ASSERT(printBufferIndex);
542 for (uint32_t i = 0; i < m_kernelCount; i++)
543 {
544 CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement(i);
545 if(kernel == nullptr)
546 {
547 CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
548 return CM_FAILURE;
549 }
550 if(FAILED(kernel->SetStaticBuffer(CM_PRINTF_STATIC_BUFFER_ID, printBufferIndex)))
551 {
552 CM_ASSERTMESSAGE("Error: Failed to set static buffer.");
553 return CM_FAILURE;
554 }
555 }
556 }
557
558 for( uint32_t i = 0; i < m_kernelCount; i ++ )
559 {
560 CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement( i );
561 if(kernel == nullptr)
562 {
563 CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
564 return CM_FAILURE;
565 }
566
567 kernel->CollectKernelSurface();
568
569 uint32_t totalSize = 0;
570 CmKernelData* kernelData = nullptr;
571
572 int32_t result = kernel->CreateKernelData( kernelData, totalSize, threadGroupSpace );
573 if(result != CM_SUCCESS)
574 {
575 CM_ASSERTMESSAGE("Error: Failed to create kernel data.");
576 CmKernelData::Destroy( kernelData );
577 return result;
578 }
579
580 kernelData->SetKernelDataSize(totalSize);
581
582 kernel->GetSizeInPayload(kernelPayloadSize);
583
584 PCM_HAL_KERNEL_PARAM halKernelParam = kernelData->GetHalCmKernelData();
585 if (halKernelParam->crossThreadConstDataLen + halKernelParam->curbeSizePerThread + kernelPayloadSize
586 > halMaxValues->maxArgByteSizePerKernel)
587 { //Failed, exceed the maximum of inline data
588 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
589 return CM_EXCEED_KERNEL_ARG_SIZE_IN_BYTE;
590 }
591 else
592 {
593 kernel->GetSizeInCurbe(kernelCurbeSize);
594 kernelCurbeSize = kernel->GetAlignedCurbeSize(kernelCurbeSize);
595 totalCurbeSize += kernelCurbeSize;
596 }
597
598 m_kernelCurbeOffsetArray[ i ] = totalCurbeSize - kernelCurbeSize;
599
600 m_kernelData.SetElement( i, kernelData );
601
602 m_slmSize = kernel->GetSLMSize();
603
604 m_spillMemUsed = kernel->GetSpillMemUsed();
605
606 totalKernelBinarySize += kernel->GetKernelGenxBinarySize();
607 totalKernelBinarySize += CM_KERNEL_BINARY_PADDING_SIZE;
608
609 bool *surfArray = nullptr;
610 kernel->GetKernelSurfaces(surfArray);
611 for (uint32_t j = 0; j < surfacePoolSize; j ++)
612 {
613 m_surfaceArray[j] |= surfArray[j];
614 }
615 kernel->ResetKernelSurfaces();
616
617 PCM_CONTEXT_DATA cmData = ( PCM_CONTEXT_DATA )m_cmDevice->GetAccelData();
618 PCM_HAL_STATE state = cmData->cmHalState;
619 PRENDERHAL_MEDIA_STATE mediaStatePtr = state->pfnGetMediaStatePtrForKernel( state, kernel );
620
621 if ( ( mediaStatePtr != nullptr ) && ( m_mediaStatePtr == nullptr ) )
622 {
623 m_mediaStatePtr = mediaStatePtr;
624 }
625 else if ( ( mediaStatePtr != nullptr ) && ( m_mediaStatePtr != nullptr ) )
626 {
627 CM_ASSERTMESSAGE("Error: More than one media state heap are used in one task! User-provided state heap error.\n" );
628 return CM_INVALID_ARG_VALUE;
629 }
630 }
631
632 if( totalKernelBinarySize > halMaxValues->maxKernelBinarySize * halMaxValues->maxKernelsPerTask)
633 {
634 CM_ASSERTMESSAGE("Error: Invalid kernel arg size.");
635 return CM_EXCEED_MAX_KERNEL_SIZE_IN_BYTE;
636 }
637
638 UpdateSurfaceStateOnTaskCreation();
639
640 m_taskType = CM_INTERNAL_TASK_WITH_THREADGROUPSPACE;
641
642 if (threadGroupSpace)
643 {
644 threadGroupSpace->GetThreadGroupSpaceSize(m_threadSpaceWidth, m_threadSpaceHeight,
645 m_threadSpaceDepth, m_groupSpaceWidth,
646 m_groupSpaceHeight, m_groupSpaceDepth);
647 m_isThreadGroupSpaceCreated = true;
648 }
649
650 if ( m_cmDevice->CheckGTPinEnabled())
651 {
652 AllocateKernelSurfInfo();
653 }
654
655 this->VtuneInitProfilingInfo(threadGroupSpace);
656
657 return CM_SUCCESS;
658 }
659
660 //*-----------------------------------------------------------------------------
661 //| Purpose: Initialize Class CmTaskInternal
662 //| Returns: None.
663 //*-----------------------------------------------------------------------------
Initialize(CmVeboxRT * vebox)664 int32_t CmTaskInternal::Initialize(CmVeboxRT* vebox)
665 {
666 int32_t result = CM_SUCCESS;
667 CmSurfaceManager* surfaceMgr = nullptr;
668 uint32_t surfacePoolSize = 0;
669
670 m_cmDevice->GetSurfaceManager( surfaceMgr );
671 CM_CHK_NULL_RETURN_CMERROR( surfaceMgr );
672 surfacePoolSize = surfaceMgr->GetSurfacePoolSize();
673 m_surfaceArray = MOS_NewArray(bool, surfacePoolSize);
674 if (!m_surfaceArray)
675 {
676 CM_ASSERTMESSAGE("Error: Out of system memory.");
677 return CM_FAILURE;
678 }
679 CmSafeMemSet( m_surfaceArray, 0, surfacePoolSize * sizeof( bool ) );
680
681 CmBufferUP *paramBuffer = nullptr;
682
683 paramBuffer = vebox->GetParam();
684 m_veboxState = vebox->GetState();
685
686 m_veboxParam = paramBuffer;
687 m_taskType = CM_INTERNAL_TASK_VEBOX;
688
689 //Update used surfaces
690 for (int i = 0; i < VEBOX_SURFACE_NUMBER; i++)
691 {
692 CmSurface2DRT* surf = nullptr;
693 vebox->GetSurface(i, surf);
694 if (surf)
695 {
696 uint32_t surfaceHandle = 0;
697 SurfaceIndex* surfIndex = nullptr;
698 surf->GetIndex(surfIndex);
699 surf->GetHandle(surfaceHandle);
700 m_surfaceArray[surfIndex->get_data()] = true;
701 m_veboxSurfaceData.surfaceEntry[i].surfaceIndex = (uint16_t)surfaceHandle;
702 m_veboxSurfaceData.surfaceEntry[i].surfaceCtrlBits = vebox->GetSurfaceControlBits(i);
703 }
704 else
705 {
706 m_veboxSurfaceData.surfaceEntry[i].surfaceIndex = CM_INVALID_INDEX;
707 m_veboxSurfaceData.surfaceEntry[i].surfaceCtrlBits = CM_INVALID_INDEX;
708 }
709 }
710
711 UpdateSurfaceStateOnTaskCreation();
712
713 return result;
714 }
715
716 //*-----------------------------------------------------------------------------
717 //| Purpose: Initialize Class CmTaskInternal with hints
718 //| Returns: Result of the operation
719 //*-----------------------------------------------------------------------------
Initialize(uint32_t hints,uint32_t numTasksGenerated,bool isLastTask)720 int32_t CmTaskInternal::Initialize(uint32_t hints, uint32_t numTasksGenerated, bool isLastTask)
721 {
722 CmThreadSpaceRT* threadSpace = nullptr;
723 int32_t result = CM_SUCCESS;
724
725 // use ThreadSpace Initialize function to create kernel data
726 result = this->Initialize(threadSpace, true);
727
728 // set hints in task
729 m_hints = hints;
730
731 m_numTasksGenerated = numTasksGenerated;
732 m_isLastTask = isLastTask;
733
734 // set task type to be EnqueueWithHints
735 m_taskType = CM_INTERNAL_TASK_ENQUEUEWITHHINTS;
736
737 return result;
738 }
739
740 //*-----------------------------------------------------------------------------
741 //| Purpose: Get Kernel Count
742 //| Returns: CM_SUCCESS.
743 //*-----------------------------------------------------------------------------
GetKernelCount(uint32_t & count)744 int32_t CmTaskInternal::GetKernelCount( uint32_t& count )
745 {
746 count = m_kernelCount;
747 return CM_SUCCESS;
748 }
749
GetTaskSurfaces(bool * & surfArray)750 int32_t CmTaskInternal::GetTaskSurfaces( bool *&surfArray )
751 {
752 surfArray = m_surfaceArray;
753 return CM_SUCCESS;
754 }
755
756 //*-----------------------------------------------------------------------------
757 //| Purpose: Geth Kernel from the Kernel array
758 //| Returns: Result of operation.
759 //*-----------------------------------------------------------------------------
GetKernel(const uint32_t index,CmKernelRT * & kernel)760 int32_t CmTaskInternal::GetKernel( const uint32_t index, CmKernelRT* & kernel )
761 {
762 kernel = nullptr;
763 if( index < m_kernels.GetSize() )
764 {
765 kernel = (CmKernelRT*)m_kernels.GetElement( index );
766 return CM_SUCCESS;
767 }
768 else
769 {
770 return CM_FAILURE;
771 }
772 }
773
774 //*-----------------------------------------------------------------------------
775 //| Purpose: Geth Kernel data by kernel's index
776 //| Returns: Result of operation.
777 //*-----------------------------------------------------------------------------
GetKernelData(const uint32_t index,CmKernelData * & kernelData)778 int32_t CmTaskInternal::GetKernelData( const uint32_t index, CmKernelData* & kernelData )
779 {
780 kernelData = nullptr;
781 if( index < m_kernelData.GetSize() )
782 {
783 kernelData = (CmKernelData*)m_kernelData.GetElement( index );
784 return CM_SUCCESS;
785 }
786 else
787 {
788 return CM_FAILURE;
789 }
790 }
791
792 //*-----------------------------------------------------------------------------
793 //| Purpose: Geth Kernel data size by kernel's index
794 //| Returns: Result of operation.
795 //*-----------------------------------------------------------------------------
GetKernelDataSize(const uint32_t index,uint32_t & size)796 int32_t CmTaskInternal::GetKernelDataSize( const uint32_t index, uint32_t & size )
797 {
798 size = 0;
799 CmKernelData* kernelData = nullptr;
800 if( index < m_kernelData.GetSize() )
801 {
802 kernelData = (CmKernelData*)m_kernelData.GetElement( index );
803 if (kernelData == nullptr)
804 {
805 CM_ASSERTMESSAGE("Error: Invalid kernel data.");
806 return CM_FAILURE;
807 }
808 size = kernelData->GetKernelDataSize();
809 return CM_SUCCESS;
810 }
811 else
812 {
813 return CM_FAILURE;
814 }
815 }
816
817 //*-----------------------------------------------------------------------------
818 //| Purpose: Get kernel's curbe offset
819 //| Returns: Result of operation.
820 //*-----------------------------------------------------------------------------
GetKernelCurbeOffset(const uint32_t index)821 uint32_t CmTaskInternal::GetKernelCurbeOffset( const uint32_t index )
822 {
823 return ( uint32_t ) m_kernelCurbeOffsetArray[ index ];
824 }
825
826 //*-----------------------------------------------------------------------------
827 //| Purpose: Set task event, need add refcount hehe.
828 //| Returns: Result of operation.
829 //*-----------------------------------------------------------------------------
SetTaskEvent(CmEventRT * event)830 int32_t CmTaskInternal::SetTaskEvent( CmEventRT* event )
831 {
832 m_taskEvent = event;
833 // add refCount
834 m_taskEvent->Acquire();
835 return CM_SUCCESS;
836 }
837
838 //*-----------------------------------------------------------------------------
839 //| Purpose: Get the task event
840 //| Returns: Result of operation.
841 //*-----------------------------------------------------------------------------
GetTaskEvent(CmEventRT * & event)842 int32_t CmTaskInternal::GetTaskEvent( CmEventRT* & event )
843 {
844 event = m_taskEvent;
845 return CM_SUCCESS;
846 }
847
848 //*-----------------------------------------------------------------------------
849 //| Purpose: Get the task's status
850 //| Returns: Result of operation.
851 //*-----------------------------------------------------------------------------
GetTaskStatus(CM_STATUS & taskStatus)852 int32_t CmTaskInternal::GetTaskStatus(CM_STATUS & taskStatus)
853 {
854 if(m_taskEvent == nullptr)
855 {
856 return CM_FAILURE;
857 }
858
859 return m_taskEvent->GetStatusNoFlush(taskStatus);
860 }
861
862 //*-----------------------------------------------------------------------------
863 //| Purpose: Record CPU ticks for Flush Time
864 //| Returns: Result of operation.
865 //*-----------------------------------------------------------------------------
VtuneSetFlushTime()866 int32_t CmTaskInternal::VtuneSetFlushTime()
867 {
868 if(!m_cmDevice->IsVtuneLogOn())
869 { // return directly if ETW log is off
870 return CM_SUCCESS;
871 }
872
873 MosUtilities::MosQueryPerformanceCounter((uint64_t*)&m_taskProfilingInfo.flushTime.QuadPart);
874 return CM_SUCCESS;
875 }
876
877 //*-----------------------------------------------------------------------------
878 //| Purpose: Initialize Profiling Information for Media Pipeline
879 //| Returns: Result of operation.
880 //*-----------------------------------------------------------------------------
VtuneInitProfilingInfo(const CmThreadSpaceRT * perTaskThreadSpace)881 int32_t CmTaskInternal::VtuneInitProfilingInfo(const CmThreadSpaceRT *perTaskThreadSpace)
882 {
883 CmKernelRT *cmKernel = nullptr;
884 CmThreadSpaceRT *perKernelThreadSpace = nullptr;
885 uint32_t threadSpaceWidth = 0;
886 uint32_t threadSpaceHeight = 0;
887
888 int32_t hr = CM_SUCCESS;
889
890 if(!m_cmDevice->IsVtuneLogOn())
891 { // return directly if ETW log is off
892 return CM_SUCCESS;
893 }
894
895 CmSafeMemSet(&m_taskProfilingInfo, 0, sizeof(m_taskProfilingInfo));
896 m_taskProfilingInfo.kernelCount = m_kernelCount;
897 m_taskProfilingInfo.threadID = CmGetCurThreadId(); // Get Thread ID
898
899 MosUtilities::MosQueryPerformanceCounter((uint64_t*)&m_taskProfilingInfo.enqueueTime.QuadPart); // Get Enqueue Time
900
901 // Currently, the Kernel/ThreadSpace/ThreadGroupSpace could not be deleted before task finished.
902 m_taskProfilingInfo.kernelNames = MOS_NewArray(char, (CM_MAX_KERNEL_NAME_SIZE_IN_BYTE * m_kernelCount));
903 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.kernelNames);
904
905 m_taskProfilingInfo.localWorkWidth = MOS_NewArray(uint32_t, m_kernelCount);
906 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.localWorkWidth);
907
908 m_taskProfilingInfo.localWorkHeight = MOS_NewArray(uint32_t, m_kernelCount);
909 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.localWorkHeight);
910
911 m_taskProfilingInfo.globalWorkWidth = MOS_NewArray(uint32_t, m_kernelCount);
912 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.globalWorkWidth);
913
914 m_taskProfilingInfo.globalWorkHeight = MOS_NewArray(uint32_t, m_kernelCount);
915 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.globalWorkHeight);
916
917 for (uint32_t i = 0; i < m_kernelCount; i++)
918 {
919 CM_CHK_CMSTATUS_GOTOFINISH(GetKernel(i, cmKernel));
920 CM_CHK_NULL_GOTOFINISH_CMERROR(cmKernel);
921
922 //Copy Kernel Name
923 MOS_SecureStrcpy(m_taskProfilingInfo.kernelNames + m_taskProfilingInfo.kernelNameLen,
924 CM_MAX_KERNEL_NAME_SIZE_IN_BYTE, cmKernel->GetName());
925
926 //Add Kernel Name Length
927 m_taskProfilingInfo.kernelNameLen += strlen(cmKernel->GetName()) + 1;
928
929 CM_CHK_CMSTATUS_GOTOFINISH(cmKernel->GetThreadSpace(perKernelThreadSpace));
930
931 if (perTaskThreadSpace)
932 {
933 //Per Task Thread Space Exists
934 m_taskProfilingInfo.localWorkWidth[i] = m_threadSpaceWidth;
935 m_taskProfilingInfo.localWorkHeight[i] = m_threadSpaceHeight;
936 m_taskProfilingInfo.globalWorkWidth[i] = m_threadSpaceWidth;
937 m_taskProfilingInfo.globalWorkHeight[i] = m_threadSpaceHeight;
938 }
939 else if (perKernelThreadSpace)
940 {
941 //Fill each threads Space's info
942 perKernelThreadSpace->GetThreadSpaceSize(threadSpaceWidth, threadSpaceHeight);
943 m_taskProfilingInfo.localWorkWidth[i] = threadSpaceWidth;
944 m_taskProfilingInfo.localWorkHeight[i] = threadSpaceHeight;
945 m_taskProfilingInfo.globalWorkWidth[i] = threadSpaceWidth;
946 m_taskProfilingInfo.globalWorkHeight[i] = threadSpaceHeight;
947 }
948 else
949 {
950 //Fill the thread count
951 uint32_t threadCount = 0;
952 cmKernel->GetThreadCount(threadCount);
953 m_taskProfilingInfo.localWorkWidth[i] = threadCount;
954 m_taskProfilingInfo.localWorkHeight[i] = 1;
955 m_taskProfilingInfo.globalWorkWidth[i] = threadCount;
956 m_taskProfilingInfo.globalWorkHeight[i] = 1;
957 }
958
959 }
960
961 finish:
962 if (hr != CM_SUCCESS)
963 {
964 MosSafeDeleteArray(m_taskProfilingInfo.kernelNames);
965 MosSafeDeleteArray(m_taskProfilingInfo.localWorkWidth);
966 MosSafeDeleteArray(m_taskProfilingInfo.localWorkHeight);
967 MosSafeDeleteArray(m_taskProfilingInfo.globalWorkWidth);
968 MosSafeDeleteArray(m_taskProfilingInfo.globalWorkHeight);
969 }
970 return hr;
971
972 }
973
974 //*-----------------------------------------------------------------------------
975 //| Purpose: Initialize Profiling Information
976 //| Returns: Result of operation.
977 //*-----------------------------------------------------------------------------
VtuneInitProfilingInfo(const CmThreadGroupSpace * perTaskThreadGroupSpace)978 int32_t CmTaskInternal::VtuneInitProfilingInfo(const CmThreadGroupSpace *perTaskThreadGroupSpace)
979 {
980 CmKernelRT *cmKernel = nullptr;
981 CmThreadGroupSpace *perKernelGroupSpace = nullptr;
982 uint32_t threadSpaceWidth = 0;
983 uint32_t threadSpaceHeight = 0;
984 uint32_t threadSpaceDepth = 0;
985 uint32_t threadGroupSpaceWidth = 0;
986 uint32_t threadGroupSpaceHeight = 0;
987 uint32_t threadGroupSpaceDepth = 0;
988 int32_t hr = CM_SUCCESS;
989
990 if(!m_cmDevice->IsVtuneLogOn())
991 { // return directly if ETW log is off
992 return CM_SUCCESS;
993 }
994
995 CmSafeMemSet(&m_taskProfilingInfo, 0, sizeof(m_taskProfilingInfo));
996 m_taskProfilingInfo.kernelCount = m_kernelCount;
997
998 m_taskProfilingInfo.threadID = CmGetCurThreadId(); // Get Thread ID
999
1000 MosUtilities::MosQueryPerformanceCounter((uint64_t*)&m_taskProfilingInfo.enqueueTime.QuadPart); // Get Enqueue Time
1001
1002 m_taskProfilingInfo.kernelNames = MOS_NewArray(char, (CM_MAX_KERNEL_NAME_SIZE_IN_BYTE * m_kernelCount));
1003 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.kernelNames);
1004
1005 m_taskProfilingInfo.localWorkWidth = MOS_NewArray(uint32_t, m_kernelCount);
1006 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.localWorkWidth);
1007
1008 m_taskProfilingInfo.localWorkHeight = MOS_NewArray(uint32_t, m_kernelCount);
1009 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.localWorkHeight);
1010
1011 m_taskProfilingInfo.globalWorkWidth = MOS_NewArray(uint32_t, m_kernelCount);
1012 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.globalWorkWidth);
1013
1014 m_taskProfilingInfo.globalWorkHeight = MOS_NewArray(uint32_t, m_kernelCount);
1015 CM_CHK_NULL_GOTOFINISH_CMERROR(m_taskProfilingInfo.globalWorkHeight);
1016
1017 for (uint32_t i = 0; i < m_kernelCount; i++)
1018 {
1019 CM_CHK_CMSTATUS_GOTOFINISH(GetKernel(i, cmKernel));
1020 CM_CHK_NULL_GOTOFINISH_CMERROR(cmKernel);
1021
1022 //Copy Kernel Name
1023 MOS_SecureStrcpy(m_taskProfilingInfo.kernelNames + m_taskProfilingInfo.kernelNameLen,
1024 CM_MAX_KERNEL_NAME_SIZE_IN_BYTE, cmKernel->GetName());
1025
1026 //Add Kernel Name Length
1027 m_taskProfilingInfo.kernelNameLen += strlen(cmKernel->GetName()) + 1;
1028
1029 CM_CHK_CMSTATUS_GOTOFINISH(cmKernel->GetThreadGroupSpace(perKernelGroupSpace));
1030
1031 if (perTaskThreadGroupSpace)
1032 { // Per Thread Group Space
1033 perTaskThreadGroupSpace->GetThreadGroupSpaceSize(threadSpaceWidth, threadSpaceHeight,
1034 threadSpaceDepth, threadGroupSpaceWidth,
1035 threadGroupSpaceHeight, threadGroupSpaceDepth);
1036 m_taskProfilingInfo.localWorkWidth[i] = threadSpaceWidth;
1037 m_taskProfilingInfo.localWorkHeight[i] = threadSpaceHeight;
1038 m_taskProfilingInfo.globalWorkWidth[i] = threadSpaceWidth*threadGroupSpaceWidth;
1039 m_taskProfilingInfo.globalWorkHeight[i] = threadSpaceHeight*threadGroupSpaceHeight;
1040
1041 }
1042 else if (perKernelGroupSpace)
1043 {
1044 //Fill each threads group space's info
1045 perKernelGroupSpace->GetThreadGroupSpaceSize(threadSpaceWidth, threadSpaceHeight,
1046 threadSpaceDepth, threadGroupSpaceWidth,
1047 threadGroupSpaceHeight, threadGroupSpaceDepth);
1048 m_taskProfilingInfo.localWorkWidth[i] = threadSpaceWidth;
1049 m_taskProfilingInfo.localWorkHeight[i] = threadSpaceHeight;
1050 m_taskProfilingInfo.globalWorkWidth[i] = threadSpaceWidth*threadGroupSpaceWidth;
1051 m_taskProfilingInfo.globalWorkHeight[i] = threadSpaceHeight*threadGroupSpaceHeight; //Yi need to rethink
1052 }
1053
1054 }
1055
1056 finish:
1057 if (hr != CM_SUCCESS)
1058 {
1059 MosSafeDeleteArray(m_taskProfilingInfo.kernelNames);
1060 MosSafeDeleteArray(m_taskProfilingInfo.localWorkWidth);
1061 MosSafeDeleteArray(m_taskProfilingInfo.localWorkHeight);
1062 MosSafeDeleteArray(m_taskProfilingInfo.globalWorkWidth);
1063 MosSafeDeleteArray(m_taskProfilingInfo.globalWorkHeight);
1064 }
1065 return hr;
1066 }
1067
1068 //*-----------------------------------------------------------------------------
1069 //| Purpose: Release Profiling information
1070 //| Returns: Result of operation.
1071 //*-----------------------------------------------------------------------------
VtuneReleaseProfilingInfo()1072 int32_t CmTaskInternal::VtuneReleaseProfilingInfo()
1073 {
1074 if(!m_cmDevice->IsVtuneLogOn())
1075 { // return directly if ETW log is off
1076 return CM_SUCCESS;
1077 }
1078
1079 MosSafeDeleteArray(m_taskProfilingInfo.kernelNames);
1080 MosSafeDeleteArray(m_taskProfilingInfo.localWorkWidth);
1081 MosSafeDeleteArray(m_taskProfilingInfo.localWorkHeight);
1082 MosSafeDeleteArray(m_taskProfilingInfo.globalWorkWidth);
1083 MosSafeDeleteArray(m_taskProfilingInfo.globalWorkHeight);
1084
1085 return CM_SUCCESS;
1086 }
1087
1088 //*-----------------------------------------------------------------------------
1089 //| Purpose: Reset KernelData status from IN_USE to IDLE.
1090 // It is called immediately after the task being flushed.
1091 //| Returns: Result of operation.
1092 //*-----------------------------------------------------------------------------
ResetKernelDataStatus()1093 int32_t CmTaskInternal::ResetKernelDataStatus()
1094 {
1095 int32_t hr = CM_SUCCESS;
1096
1097 for(uint32_t krnDataIndex =0 ; krnDataIndex < m_kernelCount; krnDataIndex++ )
1098 {
1099 CmKernelData *kernelData;
1100 CM_CHK_CMSTATUS_GOTOFINISH(GetKernelData(krnDataIndex, kernelData));
1101 CM_CHK_NULL_GOTOFINISH_CMERROR(kernelData);
1102 CM_CHK_CMSTATUS_GOTOFINISH(kernelData->ResetStatus());
1103 }
1104
1105 finish:
1106 return hr;
1107 }
1108
1109 //*-----------------------------------------------------------------------------
1110 //| Purpose: Create thread space data
1111 //| Returns: Result of operation.
1112 //*-----------------------------------------------------------------------------
CreateThreadSpaceData(const CmThreadSpaceRT * threadSpace)1113 int32_t CmTaskInternal::CreateThreadSpaceData(const CmThreadSpaceRT* threadSpace)
1114 {
1115 uint32_t i;
1116 uint32_t width, height;
1117 uint32_t *kernelCoordinateIndex = nullptr;
1118 int hr = CM_SUCCESS;
1119 CmThreadSpaceRT *threadSpaceRT = const_cast<CmThreadSpaceRT*>(threadSpace);
1120 CmKernelRT* kernelInThreadSpace = nullptr;
1121 CmKernelRT* kernelInTask = nullptr;
1122 CM_CHK_NULL_GOTOFINISH(threadSpaceRT, CM_NULL_POINTER);
1123
1124 threadSpaceRT->GetThreadSpaceSize(m_threadSpaceWidth, m_threadSpaceHeight);
1125
1126 if (threadSpaceRT->IsThreadAssociated())
1127 {
1128 m_threadCoordinates = MOS_NewArray(PCM_HAL_SCOREBOARD, m_kernelCount);
1129 CM_CHK_NULL_GOTOFINISH(m_threadCoordinates, CM_FAILURE);
1130 CmSafeMemSet(m_threadCoordinates, 0, m_kernelCount*sizeof(PCM_HAL_SCOREBOARD));
1131
1132 m_dependencyMasks = MOS_NewArray(PCM_HAL_MASK_AND_RESET, m_kernelCount);
1133 CM_CHK_NULL_GOTOFINISH(m_dependencyMasks, CM_FAILURE);
1134 CmSafeMemSet(m_dependencyMasks, 0, m_kernelCount*sizeof(PCM_HAL_MASK_AND_RESET));
1135
1136 kernelCoordinateIndex = MOS_NewArray(uint32_t, m_kernelCount);
1137 if(m_threadCoordinates && kernelCoordinateIndex && m_dependencyMasks)
1138 {
1139 CmSafeMemSet(kernelCoordinateIndex, 0, m_kernelCount*sizeof(uint32_t));
1140 for (i = 0; i< m_kernelCount; i++)
1141 {
1142 kernelCoordinateIndex[i] = 0;
1143 uint32_t threadCount;
1144 this->GetKernel(i, kernelInTask);
1145
1146 if(kernelInTask == nullptr)
1147 {
1148 CM_ASSERTMESSAGE("Error: Invalid kernel pointer in task.");
1149 hr = CM_NULL_POINTER;
1150 goto finish;
1151 }
1152
1153 kernelInTask->GetThreadCount(threadCount);
1154 if (threadCount == 0)
1155 {
1156 threadCount = m_threadSpaceWidth*m_threadSpaceHeight;
1157 }
1158 m_threadCoordinates[i] = MOS_NewArray(CM_HAL_SCOREBOARD, threadCount);
1159 if (m_threadCoordinates[i])
1160 {
1161 CmSafeMemSet(m_threadCoordinates[i], 0, sizeof(CM_HAL_SCOREBOARD)* threadCount);
1162 }
1163 else
1164 {
1165 CM_ASSERTMESSAGE("Error: Pointer to thread coordinates is null.");
1166 hr = CM_NULL_POINTER;
1167 goto finish;
1168 }
1169
1170 m_dependencyMasks[i] = MOS_NewArray(CM_HAL_MASK_AND_RESET, threadCount);
1171 if( m_dependencyMasks[i] )
1172 {
1173 CmSafeMemSet(m_dependencyMasks[i], 0, sizeof(CM_HAL_MASK_AND_RESET) * threadCount);
1174 }
1175 else
1176 {
1177 CM_ASSERTMESSAGE("Error: Pointer to dependency masks is null.");
1178 hr = CM_NULL_POINTER;
1179 goto finish;
1180 }
1181 }
1182
1183 CM_THREAD_SPACE_UNIT *threadSpaceUnit = nullptr;
1184 threadSpaceRT->GetThreadSpaceSize(width, height);
1185 threadSpaceRT->GetThreadSpaceUnit(threadSpaceUnit);
1186
1187 uint32_t *boardOrder = nullptr;
1188 threadSpaceRT->GetBoardOrder(boardOrder);
1189 for (uint32_t tIndex=0; tIndex < height*width; tIndex ++)
1190 {
1191 kernelInThreadSpace = static_cast<CmKernelRT *>(threadSpaceUnit[boardOrder[tIndex]].kernel);
1192 if (kernelInThreadSpace == nullptr)
1193 {
1194 if (threadSpaceRT->GetNeedSetKernelPointer())
1195 {
1196 kernelInThreadSpace = threadSpaceRT->GetKernelPointer();
1197 }
1198 if (kernelInThreadSpace == nullptr)
1199 {
1200 CM_ASSERTMESSAGE("Error: Invalid kernel pointer in task.");
1201 hr = CM_NULL_POINTER;
1202 goto finish;
1203 }
1204 }
1205 uint32_t kIndex = kernelInThreadSpace->GetIndexInTask();
1206
1207 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].x
1208 = threadSpaceUnit[boardOrder[tIndex]].scoreboardCoordinates.x;
1209 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].y
1210 = threadSpaceUnit[boardOrder[tIndex]].scoreboardCoordinates.y;
1211 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].mask
1212 = threadSpaceUnit[boardOrder[tIndex]].dependencyMask;
1213 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].resetMask
1214 = threadSpaceUnit[boardOrder[tIndex]].reset;
1215 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].color
1216 = threadSpaceUnit[boardOrder[tIndex]].scoreboardColor;
1217 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].sliceSelect
1218 = threadSpaceUnit[boardOrder[tIndex]].sliceDestinationSelect;
1219 m_threadCoordinates[kIndex][kernelCoordinateIndex[kIndex]].subSliceSelect
1220 = threadSpaceUnit[boardOrder[tIndex]].subSliceDestinationSelect;
1221 m_dependencyMasks[kIndex][kernelCoordinateIndex[kIndex]].mask
1222 = threadSpaceUnit[boardOrder[tIndex]].dependencyMask;
1223 m_dependencyMasks[kIndex][kernelCoordinateIndex[kIndex]].resetMask
1224 = threadSpaceUnit[boardOrder[tIndex]].reset;
1225 kernelCoordinateIndex[kIndex] ++;
1226 }
1227
1228 MosSafeDeleteArray(kernelCoordinateIndex);
1229 }
1230 else
1231 {
1232 CM_ASSERTMESSAGE("Error: Failed to create thread space data.");
1233 hr = CM_FAILURE;
1234 goto finish;
1235 }
1236
1237 m_isThreadCoordinatesExisted = true;
1238 }
1239 else
1240 {
1241 m_threadCoordinates = nullptr;
1242 m_dependencyMasks = nullptr;
1243 m_isThreadCoordinatesExisted = false;
1244 }
1245
1246 if (threadSpaceRT->IsDependencySet())
1247 {
1248 threadSpaceRT->GetDependencyPatternType(m_dependencyPattern);
1249 }
1250
1251 threadSpaceRT->GetColorCountMinusOne(m_colorCountMinusOne);
1252 threadSpaceRT->GetMediaWalkerGroupSelect(m_mediaWalkerGroupSelect);
1253
1254 threadSpaceRT->GetWalkingPattern(m_walkingPattern);
1255
1256 m_mediaWalkerParamsSet = threadSpaceRT->CheckWalkingParametersSet();
1257 if( m_mediaWalkerParamsSet )
1258 {
1259 CM_WALKING_PARAMETERS tmpMWParams;
1260 CM_CHK_CMSTATUS_GOTOFINISH(threadSpaceRT->GetWalkingParameters(tmpMWParams));
1261 CmSafeMemCopy(&m_walkingParameters, &tmpMWParams, sizeof(tmpMWParams));
1262 }
1263
1264 m_dependencyVectorsSet = threadSpaceRT->CheckDependencyVectorsSet();
1265 if( m_dependencyVectorsSet )
1266 {
1267 CM_HAL_DEPENDENCY tmpDepVectors;
1268 CM_CHK_CMSTATUS_GOTOFINISH(threadSpaceRT->GetDependencyVectors(tmpDepVectors));
1269 CmSafeMemCopy(&m_dependencyVectors, &tmpDepVectors, sizeof(tmpDepVectors));
1270 }
1271
1272 finish:
1273 if(hr != CM_SUCCESS)
1274 {
1275 if(m_threadCoordinates )
1276 {
1277 for (i = 0; i< m_kernelCount; i++)
1278 {
1279 MosSafeDeleteArray(m_threadCoordinates[i]);
1280 }
1281 }
1282
1283 if(m_dependencyMasks)
1284 {
1285 for (i = 0; i< m_kernelCount; i++)
1286 {
1287 MosSafeDeleteArray(m_dependencyMasks[i]);
1288 }
1289 }
1290 MosSafeDeleteArray(m_threadCoordinates);
1291 MosSafeDeleteArray(m_dependencyMasks);
1292 MosSafeDeleteArray(kernelCoordinateIndex);
1293 }
1294 return hr;
1295 }
1296
1297 //*-----------------------------------------------------------------------------
1298 //| Purpose: Get thread space's coordinates
1299 //| Returns: CM_SUCCESS.
1300 //*-----------------------------------------------------------------------------
GetKernelCoordinates(const uint32_t index,void * & kernelCoordinates)1301 int32_t CmTaskInternal::GetKernelCoordinates(const uint32_t index, void *&kernelCoordinates)
1302 {
1303 if (m_threadCoordinates != nullptr)
1304 {
1305 kernelCoordinates = (void *)m_threadCoordinates[index];
1306 }
1307 else
1308 {
1309 kernelCoordinates = nullptr;
1310 }
1311
1312 return CM_SUCCESS;
1313 }
1314
1315 //*-----------------------------------------------------------------------------
1316 //| Purpose: Get thread space's dependency masks
1317 //| Returns: CM_SUCCESS.
1318 //*-----------------------------------------------------------------------------
GetKernelDependencyMasks(const uint32_t index,void * & kernelDependencyMasks)1319 int32_t CmTaskInternal::GetKernelDependencyMasks(const uint32_t index, void *&kernelDependencyMasks)
1320 {
1321 if (m_dependencyMasks != nullptr)
1322 {
1323 kernelDependencyMasks = (void *)m_dependencyMasks[index];
1324 }
1325 else
1326 {
1327 kernelDependencyMasks = nullptr;
1328 }
1329
1330 return CM_SUCCESS;
1331 }
1332
1333 //*-----------------------------------------------------------------------------
1334 //| Purpose: Get dependency pattern
1335 //| Returns: CM_SUCCESS.
1336 //*-----------------------------------------------------------------------------
GetDependencyPattern(CM_DEPENDENCY_PATTERN & dependencyPattern)1337 int32_t CmTaskInternal::GetDependencyPattern(CM_DEPENDENCY_PATTERN &dependencyPattern)
1338 {
1339 dependencyPattern = m_dependencyPattern;
1340 return CM_SUCCESS;
1341 }
1342
1343 //*-----------------------------------------------------------------------------
1344 //| Purpose: Get media walking pattern
1345 //| Returns: CM_SUCCESS.
1346 //*-----------------------------------------------------------------------------
GetWalkingPattern(CM_WALKING_PATTERN & walkingPattern)1347 int32_t CmTaskInternal::GetWalkingPattern(CM_WALKING_PATTERN &walkingPattern)
1348 {
1349 walkingPattern = m_walkingPattern;
1350 return CM_SUCCESS;
1351 }
1352
1353 //*-----------------------------------------------------------------------------
1354 //| Purpose: Get media walking parameters
1355 //| Returns: CM_FAILURE if dest ptr is nullptr, CM_SUCCESS otherwise
1356 //*-----------------------------------------------------------------------------
GetWalkingParameters(CM_WALKING_PARAMETERS & walkingParameters)1357 int32_t CmTaskInternal::GetWalkingParameters(CM_WALKING_PARAMETERS &walkingParameters)
1358 {
1359 CmSafeMemCopy(&walkingParameters, &m_walkingParameters, sizeof(m_walkingParameters));
1360 return CM_SUCCESS;
1361 }
1362
1363 //*-----------------------------------------------------------------------------
1364 //| Purpose: Check to see if media walking parameters have been set
1365 //| Returns: true if media walking parameters set, false otherwise
1366 //*-----------------------------------------------------------------------------
CheckWalkingParametersSet()1367 bool CmTaskInternal::CheckWalkingParametersSet( )
1368 {
1369 return m_mediaWalkerParamsSet;
1370 }
1371
1372 //*-----------------------------------------------------------------------------
1373 //| Purpose: Get dependency vectors
1374 //| Returns: CM_FAILURE if dest ptr is nullptr, CM_SUCCESS otherwise
1375 //*-----------------------------------------------------------------------------
GetDependencyVectors(CM_HAL_DEPENDENCY & dependencyVectors)1376 int32_t CmTaskInternal::GetDependencyVectors(CM_HAL_DEPENDENCY &dependencyVectors)
1377 {
1378 CmSafeMemCopy(&dependencyVectors, &m_dependencyVectors, sizeof(m_dependencyVectors));
1379 return CM_SUCCESS;
1380 }
1381
1382 //*-----------------------------------------------------------------------------
1383 //| Purpose: Check to see if dependency vectors have been set
1384 //| Returns: true if dependency vectors are set, false otherwise
1385 //*-----------------------------------------------------------------------------
CheckDependencyVectorsSet()1386 bool CmTaskInternal::CheckDependencyVectorsSet( )
1387 {
1388 return m_dependencyVectorsSet;
1389 }
1390
1391 //*-----------------------------------------------------------------------------
1392 //| Purpose: Get the total thread count
1393 //| Returns: CM_SUCCESS.
1394 //*-----------------------------------------------------------------------------
GetTotalThreadCount(uint32_t & totalThreadCount)1395 int32_t CmTaskInternal::GetTotalThreadCount( uint32_t& totalThreadCount )
1396 {
1397 totalThreadCount = m_totalThreadCount;
1398
1399 return CM_SUCCESS;
1400 }
1401
1402 //*-----------------------------------------------------------------------------
1403 //| Purpose: Get the width,height of thread space
1404 //| Returns: CM_SUCCESS.
1405 //*-----------------------------------------------------------------------------
1406
GetThreadSpaceSize(uint32_t & width,uint32_t & height)1407 int32_t CmTaskInternal::GetThreadSpaceSize(uint32_t& width, uint32_t& height )
1408 {
1409 width = m_threadSpaceWidth;
1410 height = m_threadSpaceHeight;
1411
1412 return CM_SUCCESS;
1413 }
1414
1415 //*-----------------------------------------------------------------------------
1416 //| Purpose: Get the color count minus one of the thread space
1417 //| Used to dispatch multiple sets of dependency threads
1418 //| for media walker
1419 //| Returns: CM_SUCCESS.
1420 //*-----------------------------------------------------------------------------
1421
GetColorCountMinusOne(uint32_t & colorCount)1422 int32_t CmTaskInternal::GetColorCountMinusOne( uint32_t& colorCount )
1423 {
1424 colorCount = m_colorCountMinusOne;
1425
1426 return CM_SUCCESS;
1427 }
1428
1429 //*-----------------------------------------------------------------------------
1430 //| Purpose: Whether thread space is created
1431 //| Returns: Boolean.
1432 //*-----------------------------------------------------------------------------
1433
IsThreadSpaceCreated(void)1434 bool CmTaskInternal::IsThreadSpaceCreated(void )
1435 {
1436 return m_isThreadSpaceCreated;
1437 }
1438
1439 //*-----------------------------------------------------------------------------
1440 //| Purpose: Whether thread coordinates are existed
1441 //| Returns: Boolean.
1442 //*-----------------------------------------------------------------------------
IsThreadCoordinatesExisted(void)1443 bool CmTaskInternal::IsThreadCoordinatesExisted(void)
1444 {
1445 return m_isThreadCoordinatesExisted;
1446 }
1447
1448 //*-----------------------------------------------------------------------------
1449 //| Purpose: Whether thread coordinates are existed
1450 //| Returns: Result of operation.
1451 //*-----------------------------------------------------------------------------
1452
GetThreadGroupSpaceSize(uint32_t & threadSpaceWidth,uint32_t & threadSpaceHeight,uint32_t & threadSpaceDepth,uint32_t & groupSpaceWidth,uint32_t & groupSpaceHeight,uint32_t & groupSpaceDepth)1453 int32_t CmTaskInternal::GetThreadGroupSpaceSize(uint32_t& threadSpaceWidth, uint32_t& threadSpaceHeight,
1454 uint32_t& threadSpaceDepth, uint32_t& groupSpaceWidth,
1455 uint32_t& groupSpaceHeight, uint32_t& groupSpaceDepth)
1456 {
1457 threadSpaceWidth = m_threadSpaceWidth;
1458 threadSpaceHeight = m_threadSpaceHeight;
1459 threadSpaceDepth = m_threadSpaceDepth;
1460 groupSpaceWidth = m_groupSpaceWidth;
1461 groupSpaceHeight = m_groupSpaceHeight;
1462 groupSpaceDepth = m_groupSpaceDepth;
1463 return CM_SUCCESS;
1464 }
1465
1466 //*-----------------------------------------------------------------------------
1467 //| Purpose: Get the size of sharedlocalmemory
1468 //| Returns: CM_SUCCESS.
1469 //*-----------------------------------------------------------------------------
GetSLMSize(uint32_t & slmSize)1470 int32_t CmTaskInternal::GetSLMSize(uint32_t& slmSize)
1471 {
1472 slmSize = m_slmSize;
1473 return CM_SUCCESS;
1474 }
1475
1476 //*-----------------------------------------------------------------------------
1477 //| Purpose: Get the size of spill memory used
1478 //| Returns: CM_SUCCESS.
1479 //*-----------------------------------------------------------------------------
GetSpillMemUsed(uint32_t & spillMemUsed)1480 int32_t CmTaskInternal::GetSpillMemUsed(uint32_t& spillMemUsed)
1481 {
1482 spillMemUsed = m_spillMemUsed;
1483 return CM_SUCCESS;
1484 }
1485
1486 //*-----------------------------------------------------------------------------
1487 //| Purpose: Get the hints for EnqueueWithHints
1488 //| Returns: CM_SUCCESS.
1489 //*-----------------------------------------------------------------------------
GetHints(uint32_t & hints)1490 int32_t CmTaskInternal::GetHints(uint32_t& hints)
1491 {
1492 hints = m_hints;
1493 return CM_SUCCESS;
1494 }
1495
1496 //*-----------------------------------------------------------------------------
1497 //| Purpose: Gets the number of tasks generated for EnqueueWithHints
1498 //| Used when splitting large task to smaller tasks
1499 //| Returns: CM_SUCCESS.
1500 //*-----------------------------------------------------------------------------
GetNumTasksGenerated(uint32_t & numTasksGenerated)1501 int32_t CmTaskInternal::GetNumTasksGenerated(uint32_t& numTasksGenerated)
1502 {
1503 numTasksGenerated = m_numTasksGenerated;
1504 return CM_SUCCESS;
1505 }
1506
1507 //*-----------------------------------------------------------------------------
1508 //| Purpose: Gets whether or not this task is the last task for EnqueueWithHints
1509 //| Used to identify last smaller task when splitting large task
1510 //| Returns: CM_SUCCESS.
1511 //*-----------------------------------------------------------------------------
GetLastTask(bool & isLastTask)1512 int32_t CmTaskInternal::GetLastTask(bool& isLastTask)
1513 {
1514 isLastTask = m_isLastTask;
1515 return CM_SUCCESS;
1516 }
1517
1518 //*-----------------------------------------------------------------------------
1519 //| Purpose: Whether thread group space is created
1520 //| Returns: Value.
1521 //*-----------------------------------------------------------------------------
IsThreadGroupSpaceCreated(void)1522 bool CmTaskInternal::IsThreadGroupSpaceCreated(void)
1523 {
1524 return m_isThreadGroupSpaceCreated;
1525 }
1526
1527 //*-----------------------------------------------------------------------------
1528 //| Purpose: Allocate Space to record kernel surface's information
1529 //| Returns: result of operation.
1530 //*-----------------------------------------------------------------------------
AllocateKernelSurfInfo()1531 int32_t CmTaskInternal::AllocateKernelSurfInfo()
1532 {
1533 //Allocate Surf info array
1534 m_kernelSurfInfo.kernelNum = m_kernelCount;
1535 m_kernelSurfInfo.surfEntryInfosArray
1536 = (CM_HAL_SURFACE_ENTRY_INFO_ARRAY*)MOS_AllocAndZeroMemory(m_kernelCount *
1537 sizeof(CM_HAL_SURFACE_ENTRY_INFO_ARRAY));
1538 if(m_kernelSurfInfo.surfEntryInfosArray == nullptr)
1539 {
1540
1541 CM_ASSERTMESSAGE("Error: Mem allocation fail.");
1542 return CM_OUT_OF_HOST_MEMORY;
1543 }
1544
1545 for( uint32_t i = 0; i < m_kernelCount; i ++ )
1546 {
1547 CmKernelRT * tempCmKernel = nullptr;
1548 this->GetKernel(i, tempCmKernel);
1549 if(tempCmKernel == nullptr)
1550 {
1551 CM_ASSERTMESSAGE("Error: Invalid kernel pointer.");
1552 return CM_FAILURE;
1553 }
1554
1555 CM_ARG* arg=NULL;
1556 tempCmKernel->GetArgs( arg );
1557
1558 uint32_t argCount = 0;
1559 tempCmKernel->GetArgCount( argCount);
1560 //allocate memory for non_static buffer&2D&3D
1561 uint32_t surfEntryNum = 0;
1562 for( uint32_t j = 0; j < argCount; j ++ )
1563 {
1564 switch(arg[ j ].unitKind)
1565 {
1566 case ARG_KIND_SURFACE_1D:
1567 surfEntryNum = surfEntryNum + arg[ j ].unitCount * arg[j].unitSize/sizeof(int);
1568 break;
1569
1570 case ARG_KIND_SURFACE_2D:
1571 case ARG_KIND_SURFACE_2D_UP:
1572 case ARG_KIND_SURFACE_3D:
1573 case ARG_KIND_SURFACE_SAMPLER8X8_AVS:
1574 case ARG_KIND_SURFACE_SAMPLER8X8_VA:
1575 surfEntryNum = surfEntryNum + 3 * arg[ j ].unitCount * arg[j].unitSize/sizeof(int);//one 2D or 3D can have upto 3 planes
1576 break;
1577
1578 case ARG_KIND_SURFACE_VME:
1579 surfEntryNum = surfEntryNum + 24 * arg[ j ].unitCount;//surfaceVME will use upto 8 surfaces, each one can have upto 3 planes
1580 break;
1581
1582 default:
1583 break;
1584 }
1585 }
1586 CM_HAL_SURFACE_ENTRY_INFO_ARRAY* tempArray = m_kernelSurfInfo.surfEntryInfosArray;
1587 if(surfEntryNum>0)
1588 {
1589 tempArray[i].maxEntryNum = surfEntryNum;
1590 tempArray[i].surfEntryInfos = (CM_SURFACE_DETAILS*)MOS_AllocAndZeroMemory(surfEntryNum*sizeof(CM_SURFACE_DETAILS));
1591
1592 if(tempArray[i].surfEntryInfos == nullptr)
1593 {
1594 CM_ASSERTMESSAGE("Error: Mem allocation fail.");
1595 return CM_OUT_OF_HOST_MEMORY;
1596 }
1597
1598 }
1599
1600 //allocate memory for those 7 static buffers
1601 uint32_t globalBufNum=CM_GLOBAL_SURFACE_NUMBER + CM_GTPIN_BUFFER_NUM;
1602 tempArray[i].globalSurfNum=globalBufNum;
1603 tempArray[i].globalSurfInfos = (CM_SURFACE_DETAILS*)MOS_AllocAndZeroMemory(
1604 globalBufNum*sizeof(CM_SURFACE_DETAILS));
1605 if(tempArray[i].globalSurfInfos == nullptr)
1606 {
1607 CM_ASSERTMESSAGE("Mem allocation fail.");
1608 return CM_OUT_OF_HOST_MEMORY;
1609 }
1610 }
1611 return CM_SUCCESS;
1612 }
1613
GetKernelSurfInfo(CM_HAL_SURFACE_ENTRY_INFO_ARRAYS & surfEntryInfoArray)1614 int32_t CmTaskInternal::GetKernelSurfInfo(CM_HAL_SURFACE_ENTRY_INFO_ARRAYS & surfEntryInfoArray)
1615 {
1616 surfEntryInfoArray = m_kernelSurfInfo;
1617 return CM_SUCCESS;
1618 }
1619
ClearKernelSurfInfo()1620 int32_t CmTaskInternal::ClearKernelSurfInfo()
1621 {
1622 if (m_kernelSurfInfo.surfEntryInfosArray == nullptr)
1623 { // if surfEntryInfosArray is empty, return directly
1624 return CM_SUCCESS;
1625 }
1626
1627 //free memory
1628 for( uint32_t i = 0; i < m_kernelCount; i ++ )
1629 {
1630 if (m_kernelSurfInfo.surfEntryInfosArray[i].surfEntryInfos != nullptr)
1631 {
1632 MosSafeDelete(m_kernelSurfInfo.surfEntryInfosArray[i].surfEntryInfos);
1633 }
1634 if (m_kernelSurfInfo.surfEntryInfosArray[i].globalSurfInfos!= nullptr)
1635 {
1636 MosSafeDelete(m_kernelSurfInfo.surfEntryInfosArray[i].globalSurfInfos);
1637 }
1638 }
1639
1640 MosSafeDelete(m_kernelSurfInfo.surfEntryInfosArray);
1641
1642 m_kernelSurfInfo.kernelNum = 0 ;
1643 m_kernelSurfInfo.surfEntryInfosArray = nullptr;
1644
1645 return CM_SUCCESS;
1646 }
1647
GetTaskType(uint32_t & taskType)1648 int32_t CmTaskInternal::GetTaskType(uint32_t& taskType)
1649 {
1650 taskType = m_taskType;
1651
1652 return CM_SUCCESS;
1653 }
1654
1655 //*-----------------------------------------------------------------------------
1656 //| Purpose: Get vebox state
1657 //| Returns: Result of operation.
1658 //*-----------------------------------------------------------------------------
GetVeboxState(CM_VEBOX_STATE & veboxState)1659 int32_t CmTaskInternal::GetVeboxState(CM_VEBOX_STATE &veboxState)
1660 {
1661 veboxState = m_veboxState;
1662
1663 return CM_SUCCESS;
1664 }
1665
GetVeboxParam(CmBufferUP * & veboxParam)1666 int32_t CmTaskInternal::GetVeboxParam(CmBufferUP * &veboxParam)
1667 {
1668 veboxParam = m_veboxParam;
1669
1670 return CM_SUCCESS;
1671 }
1672
GetVeboxSurfaceData(CM_VEBOX_SURFACE_DATA & veboxSurfaceData)1673 int32_t CmTaskInternal::GetVeboxSurfaceData(CM_VEBOX_SURFACE_DATA &veboxSurfaceData)
1674 {
1675 veboxSurfaceData = m_veboxSurfaceData;
1676 return CM_SUCCESS;
1677 }
1678
GetSyncBitmap()1679 uint64_t CmTaskInternal::GetSyncBitmap()
1680 {
1681 return m_ui64SyncBitmap;
1682 }
1683
GetConditionalEndBitmap()1684 uint64_t CmTaskInternal::GetConditionalEndBitmap()
1685 {
1686 return m_ui64ConditionalEndBitmap;
1687 }
1688
GetConditionalEndInfo()1689 CM_HAL_CONDITIONAL_BB_END_INFO* CmTaskInternal::GetConditionalEndInfo()
1690 {
1691 return m_conditionalEndInfo;
1692 }
1693
1694 //*-----------------------------------------------------------------------------
1695 //| Purpose: Set power option for this task
1696 //| Returns: Result of operation.
1697 //*-----------------------------------------------------------------------------
SetPowerOption(PCM_POWER_OPTION powerOption)1698 int32_t CmTaskInternal::SetPowerOption( PCM_POWER_OPTION powerOption )
1699 {
1700 if (powerOption == nullptr)
1701 {
1702 CM_ASSERTMESSAGE("Error: Pointer to power option is null.");
1703 return CM_NULL_POINTER;
1704 }
1705 CmSafeMemCopy( &m_powerOption, powerOption, sizeof( m_powerOption ) );
1706 return CM_SUCCESS;
1707 }
1708
1709 //*-----------------------------------------------------------------------------
1710 //| Purpose: Get power option for this task
1711 //| Returns: Pointer to power option.
1712 //*-----------------------------------------------------------------------------
GetPowerOption()1713 PCM_POWER_OPTION CmTaskInternal::GetPowerOption()
1714 {
1715 return &m_powerOption;
1716 }
1717
1718 #if _DEBUG
1719 const char *gDependencyPatternString[] =
1720 {
1721 "DEPENDENCY_NONE",
1722 "DEPENDENCY_WAVEFRONT45",
1723 "DEPENDENCY_WAVEFRONT26"
1724 };
1725
1726 //Only for debugging
DisplayThreadSpaceData(uint32_t width,uint32_t height)1727 int32_t CmTaskInternal::DisplayThreadSpaceData(uint32_t width, uint32_t height)
1728 {
1729 if (m_threadCoordinates != nullptr)
1730 {
1731 CM_NORMALMESSAGE("Score board[Kernel x: (x1, y1), (x2, y2)...]:");
1732 for (uint32_t i = 0; i < m_kernelCount; i ++)
1733 {
1734 CmKernelRT *kernelRT = nullptr;
1735 GetKernel(i, kernelRT);
1736 if(nullptr == kernelRT)
1737 {
1738 return CM_FAILURE;
1739 }
1740
1741 uint32_t threadCount;
1742 kernelRT->GetThreadCount(threadCount);
1743 if (threadCount == 0)
1744 {
1745 threadCount = m_threadSpaceWidth*m_threadSpaceHeight;
1746 }
1747 CM_NORMALMESSAGE("Kernel %d: ", i);
1748 for (uint32_t j=0; j<threadCount; j++)
1749 {
1750 CM_NORMALMESSAGE("(%d, %d) ", m_threadCoordinates[i][j].x, m_threadCoordinates[i][j].y);
1751 }
1752 }
1753 }
1754 else
1755 {
1756 CM_NORMALMESSAGE("Score Board is NULL.");
1757 }
1758
1759 if (m_dependencyPattern <= CM_WAVEFRONT26)
1760 {
1761 CM_NORMALMESSAGE("Dependency Pattern: %s.", gDependencyPatternString[m_dependencyPattern]);
1762 }
1763 else
1764 {
1765 CM_NORMALMESSAGE("Dependency Pattern: UNASSIGNED.");
1766 }
1767
1768 return CM_SUCCESS;
1769 }
1770 #endif
1771
GetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT & groupSelect)1772 int32_t CmTaskInternal::GetMediaWalkerGroupSelect(CM_MW_GROUP_SELECT& groupSelect)
1773 {
1774 groupSelect = m_mediaWalkerGroupSelect;
1775 return CM_SUCCESS;
1776 }
1777
1778 //*-----------------------------------------------------------------------------
1779 //| Purpose: Update surface state on task destroy stage
1780 //*-----------------------------------------------------------------------------
UpdateSurfaceStateOnTaskCreation()1781 int32_t CmTaskInternal::UpdateSurfaceStateOnTaskCreation()
1782 {
1783 CmSurfaceManager* surfaceMgr = nullptr;
1784 int32_t *surfState = nullptr;
1785
1786 m_cmDevice->GetSurfaceManager(surfaceMgr);
1787 if (surfaceMgr == nullptr)
1788 {
1789 CM_ASSERTMESSAGE("Error: Pointer to surface manager is null.");
1790 return CM_NULL_POINTER;
1791 }
1792
1793 uint32_t poolSize = surfaceMgr->GetSurfacePoolSize();
1794 uint32_t handle = 0;
1795 uint32_t curTaskSurfCnt = 0;
1796 void ** curTaskSurfResArray = nullptr;
1797 uint32_t refSurfCnt = 0;
1798 uint32_t *refSurfHandleArray = nullptr;
1799 CM_RETURN_CODE hr = CM_SUCCESS;
1800
1801 curTaskSurfResArray = (void **)MOS_AllocAndZeroMemory(sizeof(void *)*poolSize);
1802 CM_CHK_NULL_RETURN_CMERROR(curTaskSurfResArray);
1803
1804 CSync* surfaceLock = m_cmDevice->GetSurfaceCreationLock();
1805
1806 if (surfaceLock == nullptr)
1807 {
1808 CM_ASSERTMESSAGE("Error: Pointer to surface creation lock is null.");
1809 if (curTaskSurfResArray)
1810 {
1811 MOS_FreeMemory(curTaskSurfResArray);
1812 curTaskSurfResArray = nullptr;
1813 }
1814 return CM_NULL_POINTER;
1815 }
1816
1817 surfaceLock->Acquire();
1818
1819 // get the last tracker
1820 PCM_CONTEXT_DATA cmData = ( PCM_CONTEXT_DATA )m_cmDevice->GetAccelData();
1821 PCM_HAL_STATE state = nullptr;
1822 CM_CHK_NULL_GOTOFINISH_CMERROR(cmData);
1823 state = cmData->cmHalState;
1824 CM_CHK_NULL_GOTOFINISH_CMERROR(state);
1825
1826 if (!m_isSurfaceUpdateDone)
1827 {
1828 for (uint32_t i = 0; i < poolSize; i++)
1829 {
1830 if (m_surfaceArray[i])
1831 {
1832 CmSurface *surface = NULL;
1833 CM_CHK_CMSTATUS_GOTOFINISH(surfaceMgr->GetSurface(i, surface));
1834 if (surface == nullptr) // surface destroyed but not updated in kernel
1835 {
1836 continue;
1837 }
1838 if (m_taskType == CM_INTERNAL_TASK_VEBOX)
1839 {
1840 surface->SetVeboxTracker(state->renderHal->veBoxTrackerRes.currentTrackerId);
1841 }
1842 else
1843 {
1844 surface->SetRenderTracker(state->renderHal->currentTrackerIndex,
1845 state->renderHal->trackerProducer.GetNextTracker(state->renderHal->currentTrackerIndex));
1846 }
1847
1848 // Push this surface's resource into array for CP check.
1849 switch (surface->Type())
1850 {
1851 case CM_ENUM_CLASS_TYPE_CMBUFFER_RT :
1852 static_cast< CmBuffer_RT* >( surface )->GetHandle(handle);
1853 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->bufferTable[handle].osResource;
1854 break;
1855
1856 case CM_ENUM_CLASS_TYPE_CMSURFACE2D :
1857 static_cast< CmSurface2DRT* >( surface )->GetHandle(handle);
1858 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[handle].osResource;
1859 break;
1860
1861 case CM_ENUM_CLASS_TYPE_CMSURFACE2DUP:
1862 static_cast< CmSurface2DUPRT* >( surface )->GetHandle(handle);
1863 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->surf2DUPTable[handle].osResource;
1864 break;
1865
1866 case CM_ENUM_CLASS_TYPE_CMSURFACE3D :
1867 static_cast< CmSurface3DRT* >( surface )->GetHandle(handle);
1868 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->surf3DTable[handle].osResource;
1869 break;
1870
1871 case CM_ENUM_CLASS_TYPE_CMSURFACEVME:
1872 static_cast< CmSurfaceVme* >( surface )->GetIndexCurrent(handle);
1873 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[handle].osResource; // current surface
1874 static_cast< CmSurfaceVme* >( surface )->GetIndexForwardCount(refSurfCnt);
1875 static_cast< CmSurfaceVme* >( surface )->GetIndexForwardArray(refSurfHandleArray);
1876 for(i = 0; i < refSurfCnt; i++)
1877 {
1878 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[refSurfHandleArray[i]].osResource; // forward surfaces
1879 }
1880 static_cast< CmSurfaceVme* >( surface )->GetIndexForwardCount(refSurfCnt);
1881 static_cast< CmSurfaceVme* >( surface )->GetIndexForwardArray(refSurfHandleArray);
1882 for(i = 0; i < refSurfCnt; i++)
1883 {
1884 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[refSurfHandleArray[i]].osResource; // backward surfaces
1885 }
1886 break;
1887
1888 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER8X8:
1889 static_cast< CmSurfaceSampler8x8* >( surface )->GetIndexCurrent(handle);
1890 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[handle].osResource;
1891 break;
1892
1893 case CM_ENUM_CLASS_TYPE_CMSURFACESAMPLER:
1894 static_cast< CmSurfaceSampler* >( surface )->GetHandle(handle);
1895 SAMPLER_SURFACE_TYPE type;
1896 static_cast< CmSurfaceSampler* >( surface )->GetSurfaceType(type);
1897 if (type == SAMPLER_SURFACE_TYPE_2D)
1898 {
1899 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->umdSurf2DTable[handle].osResource;
1900 }
1901 else if (type == SAMPLER_SURFACE_TYPE_2DUP)
1902 {
1903 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->surf2DUPTable[handle].osResource;
1904 }
1905 else if (type == SAMPLER_SURFACE_TYPE_3D)
1906 {
1907 curTaskSurfResArray[curTaskSurfCnt++] = (void *)&state->surf3DTable[handle].osResource;
1908 }
1909 else
1910 {
1911 hr = CM_INVALID_ARG_INDEX;
1912 goto finish;
1913 }
1914 break;
1915
1916 default:
1917 break;
1918 }
1919 }
1920 }
1921
1922 m_isSurfaceUpdateDone = true;
1923 }
1924
1925 // Check if there is any secure surface.
1926 if (curTaskSurfCnt > 0 && state->osInterface && state->osInterface->osCpInterface)
1927 {
1928 state->osInterface->osCpInterface->PrepareResources(curTaskSurfResArray, curTaskSurfCnt, nullptr, 0);
1929 }
1930
1931 finish:
1932 surfaceLock->Release();
1933 if (curTaskSurfResArray)
1934 {
1935 MOS_FreeMemory(curTaskSurfResArray);
1936 curTaskSurfResArray = nullptr;
1937 }
1938
1939 return hr;
1940 }
1941
1942 #if CM_LOG_ON
Log()1943 std::string CmTaskInternal::Log()
1944 {
1945 std::ostringstream oss;
1946
1947 oss << "Enqueue Task Type:" << m_taskType
1948 << " Kernel Count:" << m_kernelCount
1949 << " Total Thread Count:" << m_totalThreadCount
1950 << " Sync Bit:"<<m_ui64SyncBitmap
1951 << " Conditional End Bit:" << m_ui64ConditionalEndBitmap
1952 << std::endl;
1953
1954 switch(m_taskType)
1955 {
1956 case CM_INTERNAL_TASK_WITH_THREADSPACE:
1957 if ( m_isThreadSpaceCreated )
1958 {
1959 oss << "Thread Space Width :" << m_threadSpaceWidth << " Height :" << m_threadSpaceHeight
1960 << "Walker Patten :" << (int)m_walkingPattern << std::endl;
1961 }
1962 break;
1963
1964 case CM_INTERNAL_TASK_WITH_THREADGROUPSPACE:
1965 if(m_isThreadGroupSpaceCreated)
1966 {
1967 oss << "Thread Group Space Width:" << m_groupSpaceWidth << " Height:" << m_groupSpaceHeight
1968 << "SLM Size:" <<m_slmSize << std::endl;
1969 }
1970 break;
1971
1972 case CM_INTERNAL_TASK_VEBOX:
1973 break;
1974
1975 case CM_INTERNAL_TASK_ENQUEUEWITHHINTS:
1976 oss << " Hints :" << m_hints
1977 << " Thread Space Width :" << m_threadSpaceWidth
1978 << " Height :" << m_threadSpaceHeight
1979 << " Walker Patten :" << (int)m_walkingPattern
1980 << std::endl;
1981 break;
1982
1983 default: // by default, assume the task is considered as general task: CM_INTERNAL_TASK_WITH_THREADSPACE
1984 break;
1985 }
1986
1987 for (uint32_t i=0 ; i< m_kernelCount; i++)
1988 {
1989 CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement( i );
1990
1991 oss << kernel->Log(); // log each kernel
1992 }
1993
1994 return oss.str();
1995 }
1996
GetHalState()1997 CM_HAL_STATE* CmTaskInternal::GetHalState() { return m_cmDevice->GetHalState(); }
1998
1999 #endif // #if CM_LOG_ON
2000
SurfaceDump(int32_t taskId)2001 void CmTaskInternal::SurfaceDump(int32_t taskId)
2002 {
2003 #if MDF_SURFACE_CONTENT_DUMP
2004 for (uint32_t i=0 ; i< m_kernelCount; i++)
2005 {
2006 CmKernelRT* kernel = (CmKernelRT*)m_kernels.GetElement( i );
2007 kernel->SurfaceDump(i, taskId);
2008 }
2009 #endif
2010 }
2011
SetProperty(CM_TASK_CONFIG * taskConfig)2012 int32_t CmTaskInternal::SetProperty(CM_TASK_CONFIG * taskConfig)
2013 {
2014 if (taskConfig == nullptr)
2015 {
2016 CM_ASSERTMESSAGE("Error: Pointer to task config is null.");
2017 return CM_NULL_POINTER;
2018 }
2019 CmSafeMemCopy(&m_taskConfig, taskConfig, sizeof(m_taskConfig));
2020 return CM_SUCCESS;
2021 }
2022
GetProperty(CM_TASK_CONFIG & taskConfig)2023 int32_t CmTaskInternal::GetProperty(CM_TASK_CONFIG &taskConfig)
2024 {
2025 taskConfig = m_taskConfig;
2026 return CM_SUCCESS;
2027 }
2028
GetMediaStatePtr()2029 void *CMRT_UMD::CmTaskInternal::GetMediaStatePtr()
2030 {
2031 return m_mediaStatePtr;
2032 }
2033 } // namespace
2034