1 /*
2 * Copyright (c) 2017-2021, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file cm_hal.cpp
24 //! \brief HAL Layer for CM Component
25 //!
26 #include "mos_os.h"
27 #include "cm_hal.h"
28 #include "media_interfaces_cmhal.h"
29 #include "media_interfaces_mhw.h"
30 #include "cm_common.h"
31 #include "cm_hal_vebox.h"
32 #include "cm_mem.h"
33 #include "renderhal_platform_interface.h"
34 #include "cm_execution_adv.h"
35 #include "cm_extension_creator.h"
36 #include "mos_interface.h"
37
38 #define INDEX_ALIGN(index, elemperIndex, base) ((index * elemperIndex)/base + ( (index *elemperIndex % base))? 1:0)
39
40 //----------------------------------
41 //| CM scoreboard XY
42 //----------------------------------
43 struct CM_HAL_SCOREBOARD_XY
44 {
45 int32_t x;
46 int32_t y;
47 };
48 typedef CM_HAL_SCOREBOARD_XY *PCM_HAL_SCOREBOARD_XY;
49
50 //---------------------------------------
51 //| CM scoreboard XY with mask
52 //---------------------------------------
53 struct CM_HAL_SCOREBOARD_XY_MASK
54 {
55 int32_t x;
56 int32_t y;
57 uint8_t mask;
58 uint8_t resetMask;
59 };
60 typedef CM_HAL_SCOREBOARD_XY_MASK *PCM_HAL_SCOREBOARD_XY_MASK;
61
62 //------------------------------------------------------------------------------
63 //| CM kernel slice and subslice being assigned to (for EnqueueWithHints)
64 //------------------------------------------------------------------------------
65 struct CM_HAL_KERNEL_SLICE_SUBSLICE
66 {
67 uint32_t slice;
68 uint32_t subSlice;
69 };
70 typedef CM_HAL_KERNEL_SLICE_SUBSLICE *PCM_HAL_KERNEL_SLICE_SUBSLICE;
71
72 //------------------------------------------------------------------------------
73 //| CM kernel information for EnqueueWithHints to assign subslice
74 //------------------------------------------------------------------------------
75 struct CM_HAL_KERNEL_SUBSLICE_INFO
76 {
77 uint32_t numSubSlices;
78 uint32_t counter;
79 PCM_HAL_KERNEL_SLICE_SUBSLICE destination;
80 };
81 typedef CM_HAL_KERNEL_SUBSLICE_INFO *PCM_HAL_KERNEL_SUBSLICE_INFO;
82
83 // forward declaration
84 int32_t HalCm_InsertCloneKernel(
85 PCM_HAL_STATE state,
86 PCM_HAL_KERNEL_PARAM kernelParam,
87 PRENDERHAL_KRN_ALLOCATION &kernelAllocation);
88
89 #if MDF_COMMAND_BUFFER_DUMP
90 extern int32_t HalCm_InitDumpCommandBuffer(PCM_HAL_STATE state);
91
92 extern int32_t HalCm_DumpCommadBuffer(PCM_HAL_STATE state, PMOS_COMMAND_BUFFER cmdBuffer,
93 int offsetSurfaceState, size_t sizeOfSurfaceState);
94 #endif
95
96 #if MDF_CURBE_DATA_DUMP
97 extern int32_t HalCm_InitDumpCurbeData(PCM_HAL_STATE state);
98
99 extern int32_t HalCm_DumpCurbeData(PCM_HAL_STATE state);
100 #endif
101
102 #if MDF_SURFACE_CONTENT_DUMP
103 extern int32_t HalCm_InitSurfaceDump(PCM_HAL_STATE state);
104
105 #endif
106
107 #if MDF_SURFACE_STATE_DUMP
108 extern int32_t HalCm_InitDumpSurfaceState(PCM_HAL_STATE state);
109 extern int32_t HalCm_DumpSurfaceState(PCM_HAL_STATE state, int offsetSurfaceState, size_t sizeOfSurfaceState);
110 #endif
111
112 #if MDF_INTERFACE_DESCRIPTOR_DATA_DUMP
113 extern int32_t HalCm_InitDumpInterfaceDescriporData(PCM_HAL_STATE state);
114 extern int32_t HalCm_DumpInterfaceDescriptorData(PCM_HAL_STATE state);
115 #endif
116
117 extern uint64_t HalCm_GetTsFrequency(PMOS_INTERFACE pOsInterface);
118
119 //===============<Private Functions>============================================
120 //*-----------------------------------------------------------------------------
121 //| Purpose: Align to the next power of 2
122 //| Returns: Aligned data
123 //| Reference: http://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
124 //*-----------------------------------------------------------------------------
HalCm_GetPow2Aligned(uint32_t d)125 __inline uint32_t HalCm_GetPow2Aligned(uint32_t d)
126 {
127 CM_ASSERT(d > 0);
128
129 // subtract the number first
130 --d;
131
132 d |= d >> 1;
133 d |= d >> 2;
134 d |= d >> 4;
135 d |= d >> 8;
136 d |= d >> 16;
137
138 return ++d;
139 }
140
141 //*-----------------------------------------------------------------------------
142 //| Purpose: Checks if Task has any thread arguments
143 //| Returns: True if task has any thread arguments, false otherwise
144 //*-----------------------------------------------------------------------------
HalCm_GetTaskHasThreadArg(PCM_HAL_KERNEL_PARAM * kernels,uint32_t numKernels)145 bool HalCm_GetTaskHasThreadArg(PCM_HAL_KERNEL_PARAM *kernels, uint32_t numKernels)
146 {
147 PCM_HAL_KERNEL_PARAM kernelParam;
148 PCM_HAL_KERNEL_ARG_PARAM argParam;
149 bool threadArgExists = false;
150
151 for( uint32_t krn = 0; krn < numKernels; krn++)
152 {
153 kernelParam = kernels[krn];
154 for(uint32_t argIndex = 0; argIndex < kernelParam->numArgs; argIndex++)
155 {
156 argParam = &kernelParam->argParams[argIndex];
157 if( argParam->perThread )
158 {
159 threadArgExists = true;
160 break;
161 }
162 }
163
164 if( threadArgExists )
165 break;
166 }
167
168 return threadArgExists;
169 }
170
171 //*-----------------------------------------------------------------------------
172 //| Purpose: Allocate Timestamp Resource
173 //| Returns: Result of the operation
174 //*-----------------------------------------------------------------------------
HalCm_AllocateTsResource(PCM_HAL_STATE state)175 MOS_STATUS HalCm_AllocateTsResource(
176 PCM_HAL_STATE state) // [in] Pointer to CM HAL State
177 {
178 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
179 uint32_t size;
180 PMOS_INTERFACE osInterface;
181 MOS_ALLOC_GFXRES_PARAMS allocParams;
182 MOS_LOCK_PARAMS lockFlags;
183
184 osInterface = state->osInterface;
185 CM_CHK_NULL_GOTOFINISH_MOSERROR(osInterface);
186
187 size = state->cmHalInterface->GetTimeStampResourceSize() * state->cmDeviceParam.maxTasks;
188 // allocate render engine Ts Resource
189 MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
190 allocParams.Type = MOS_GFXRES_BUFFER;
191 allocParams.dwBytes = size;
192 allocParams.Format = Format_Buffer; //used in RenderHal_OsAllocateResource_Linux
193 allocParams.TileType= MOS_TILE_LINEAR;
194 allocParams.pBufName = "TsResource";
195
196 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(
197 osInterface->pfnAllocateResource(osInterface,
198 &allocParams,
199 &state->renderTimeStampResource.osResource));
200
201 // RegisterResource will be called in AddResourceToHWCmd. It is not allowed to be called by hal explicitly
202 if (!osInterface->apoMosEnabled)
203 {
204 CM_CHK_MOSSTATUS_GOTOFINISH(
205 osInterface->pfnRegisterResource(osInterface,
206 &state->renderTimeStampResource.osResource,
207 true,
208 true));
209 }
210
211 osInterface->pfnSkipResourceSync(&state->renderTimeStampResource.osResource);
212
213 // Lock the Resource
214 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
215
216 lockFlags.ReadOnly = 1;
217 lockFlags.ForceCached = true;
218
219 state->renderTimeStampResource.data = (uint8_t*)osInterface->pfnLockResource(
220 osInterface,
221 &state->renderTimeStampResource.osResource,
222 &lockFlags);
223
224 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderTimeStampResource.data);
225
226 state->renderTimeStampResource.locked = true;
227
228 //allocated for vebox TS resource
229
230 MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
231 allocParams.Type = MOS_GFXRES_BUFFER;
232 allocParams.dwBytes = size;
233 allocParams.Format = Format_Buffer; //used in RenderHal_OsAllocateResource_Linux
234 allocParams.TileType = MOS_TILE_LINEAR;
235 allocParams.pBufName = "TsResource";
236
237 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
238 osInterface,
239 &allocParams,
240 &state->veboxTimeStampResource.osResource));
241
242 // Lock the Resource
243 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
244
245 lockFlags.ReadOnly = 1;
246 lockFlags.ForceCached = true;
247
248 state->veboxTimeStampResource.data = (uint8_t*)osInterface->pfnLockResource(
249 osInterface,
250 &state->veboxTimeStampResource.osResource,
251 &lockFlags);
252
253 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->veboxTimeStampResource.data);
254
255 state->veboxTimeStampResource.locked = true;
256
257 finish:
258 return eStatus;
259 }
260
261 //! \brief Allocate tracker resource
262 //! \param [in] state
263 //! Pointer to CM_HAL_STATE structure
264 //! \return MOS_STATUS
HalCm_AllocateTrackerResource(PCM_HAL_STATE state)265 MOS_STATUS HalCm_AllocateTrackerResource(
266 PCM_HAL_STATE state)
267 {
268 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
269 MOS_ALLOC_GFXRES_PARAMS allocParamsLinearBuffer;
270 MOS_LOCK_PARAMS lockFlags;
271 PMOS_INTERFACE osInterface;
272 PRENDERHAL_INTERFACE renderHal;
273
274 osInterface = state->osInterface;
275 renderHal = state->renderHal;
276
277 CM_CHK_NULL_GOTOFINISH_MOSERROR(osInterface);
278 // Tracker producer for RENDER engine
279 renderHal->trackerProducer.Initialize(osInterface);
280
281 // Tracker resource for VeBox engine
282 osInterface->pfnResetResource(&renderHal->veBoxTrackerRes.osResource);
283
284 MOS_ZeroMemory(&allocParamsLinearBuffer, sizeof(MOS_ALLOC_GFXRES_PARAMS));
285 allocParamsLinearBuffer.Type = MOS_GFXRES_BUFFER;
286 allocParamsLinearBuffer.TileType = MOS_TILE_LINEAR;
287 allocParamsLinearBuffer.Format = Format_Buffer;
288 allocParamsLinearBuffer.dwBytes = MHW_CACHELINE_SIZE;
289 allocParamsLinearBuffer.pBufName = "VeboxTrackerRes";
290
291 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
292 osInterface,
293 &allocParamsLinearBuffer,
294 &renderHal->veBoxTrackerRes.osResource));
295
296 // Lock the Resource
297 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
298
299 lockFlags.ReadOnly = 1;
300 lockFlags.ForceCached = true;
301
302 renderHal->veBoxTrackerRes.data = (uint32_t*)osInterface->pfnLockResource(
303 osInterface,
304 &renderHal->veBoxTrackerRes.osResource,
305 &lockFlags);
306
307 CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHal->veBoxTrackerRes.data);
308
309 *(renderHal->veBoxTrackerRes.data) = MemoryBlock::m_invalidTrackerId;
310
311 renderHal->veBoxTrackerRes.currentTrackerId = 1;
312
313 renderHal->veBoxTrackerRes.locked = true;
314
315 finish:
316 return eStatus;
317 }
318
319 //! \brief Initialize dynamic state heap
320 //! \param [in] state
321 //! Pointer to CM_HAL_STATE structure
322 //! \param [in] heapParam
323 //! Pointer to CM_HAL_HEAP_PARAM structure
324 //! \return MOS_STATUS
HalCm_InitializeDynamicStateHeaps(PCM_HAL_STATE state,CM_HAL_HEAP_PARAM * heapParam)325 MOS_STATUS HalCm_InitializeDynamicStateHeaps(
326 PCM_HAL_STATE state,
327 CM_HAL_HEAP_PARAM *heapParam)
328 {
329 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
330 HeapManager* dgsHeap = state->renderHal->dgsheapManager;
331
332 CM_CHK_NULL_GOTOFINISH_MOSERROR(heapParam);
333
334 dgsHeap = MOS_New(HeapManager);
335 CM_CHK_NULL_GOTOFINISH_MOSERROR(dgsHeap);
336 CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->RegisterOsInterface(state->osInterface));
337
338 dgsHeap->SetDefaultBehavior(heapParam->behaviorGSH);
339 CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->SetInitialHeapSize(heapParam->initialSizeGSH));
340 CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->SetExtendHeapSize(heapParam->extendSizeGSH));
341 CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->RegisterTrackerProducer(heapParam->trackerProducer));
342 // lock the heap in the beginning, so cpu doesn't need to wait gpu finishing occupying it to lock it again
343 CM_CHK_MOSSTATUS_GOTOFINISH(dgsHeap->LockHeapsOnAllocate());
344
345 state->renderHal->dgsheapManager = dgsHeap;
346
347 finish:
348 return eStatus;
349 }
350
351
352 //*-----------------------------------------------------------------------------
353 //| Purpose: Free Timestamp Resource
354 //| Returns: Result of the operation
355 //*-----------------------------------------------------------------------------
HalCm_FreeTsResource(PCM_HAL_STATE state)356 __inline void HalCm_FreeTsResource(
357 PCM_HAL_STATE state) // [in] Pointer to CM HAL State
358 {
359 PMOS_INTERFACE osInterface;
360 MOS_STATUS hr;
361 MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
362
363 resFreeFlags.AssumeNotInUse = 1;
364 osInterface = state->osInterface;
365
366 if (!Mos_ResourceIsNull(&state->renderTimeStampResource.osResource))
367 {
368 if (state->renderTimeStampResource.locked)
369 {
370 hr = (MOS_STATUS)osInterface->pfnUnlockResource(
371 osInterface,
372 &state->renderTimeStampResource.osResource);
373
374 CM_ASSERT(hr == MOS_STATUS_SUCCESS);
375 }
376
377 osInterface->pfnFreeResourceWithFlag(
378 osInterface,
379 &state->renderTimeStampResource.osResource,
380 resFreeFlags.Value);
381 }
382
383 //free vebox TS resource
384
385 if (!Mos_ResourceIsNull(&state->veboxTimeStampResource.osResource))
386 {
387 if (state->veboxTimeStampResource.locked)
388 {
389 hr = (MOS_STATUS)osInterface->pfnUnlockResource(
390 osInterface,
391 &state->veboxTimeStampResource.osResource);
392
393 CM_ASSERT(hr == MOS_STATUS_SUCCESS);
394 }
395
396 osInterface->pfnFreeResourceWithFlag(
397 osInterface,
398 &state->veboxTimeStampResource.osResource,
399 resFreeFlags.Value);
400 }
401 }
402
403 //! \brief Free tracker resource
404 //! \param PCM_HAL_STATE state
405 //! [in] Pointer to CM_HAL_STATE structure
406 //! \return void
HalCm_FreeTrackerResources(PCM_HAL_STATE state)407 __inline void HalCm_FreeTrackerResources(
408 PCM_HAL_STATE state) // [in] Pointer to CM HAL State
409 {
410 PMOS_INTERFACE osInterface;
411 MOS_STATUS hr;
412 MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
413
414 resFreeFlags.AssumeNotInUse = 1;
415 osInterface = state->osInterface;
416
417 if (!Mos_ResourceIsNull(&state->renderHal->veBoxTrackerRes.osResource))
418 {
419 if (state->renderHal->veBoxTrackerRes.locked)
420 {
421 hr = (MOS_STATUS)osInterface->pfnUnlockResource(
422 osInterface,
423 &state->renderHal->veBoxTrackerRes.osResource);
424
425 CM_ASSERT(hr == MOS_STATUS_SUCCESS);
426 }
427
428 osInterface->pfnFreeResourceWithFlag(
429 osInterface,
430 &state->renderHal->veBoxTrackerRes.osResource,
431 resFreeFlags.Value);
432 }
433 }
434
435 //*-----------------------------------------------------------------------------
436 //| Purpose: Allocate CSR Resource
437 //| Returns: Result of the operation
438 //*-----------------------------------------------------------------------------
HalCm_AllocateCSRResource(PCM_HAL_STATE state)439 MOS_STATUS HalCm_AllocateCSRResource(
440 PCM_HAL_STATE state) // [in] Pointer to CM HAL State
441 {
442 PMOS_INTERFACE osInterface = state->osInterface;
443 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
444 uint32_t size;
445 MOS_ALLOC_GFXRES_PARAMS allocParams;
446
447 //Enable Mid-thread
448 state->renderHal->pfnEnableGpgpuMiddleThreadPreemption(state->renderHal);
449
450 size = CM_CSR_SURFACE_SIZE;
451
452 MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
453 allocParams.Type = MOS_GFXRES_BUFFER;
454 allocParams.dwBytes = size;
455 allocParams.Format = Format_RAW; //used in VpHal_OsAllocateResource_Linux
456 allocParams.TileType = MOS_TILE_LINEAR;
457 allocParams.pBufName = "CSRResource";
458
459 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
460 osInterface,
461 &allocParams,
462 &state->csrResource));
463
464 osInterface->pfnSkipResourceSync(&state->csrResource);
465
466 finish:
467 return eStatus;
468 }
469
470 //*-----------------------------------------------------------------------------
471 //| Purpose: Allocate Sip Resource
472 //| Returns: Result of the operation
473 //*-----------------------------------------------------------------------------
HalCm_AllocateSipResource(PCM_HAL_STATE state)474 MOS_STATUS HalCm_AllocateSipResource(
475 PCM_HAL_STATE state) // [in] Pointer to CM HAL State
476 {
477 PMOS_INTERFACE osInterface = state->osInterface;
478 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
479 uint32_t size;
480 MOS_ALLOC_GFXRES_PARAMS allocParams;
481 MOS_LOCK_PARAMS lockFlags;
482
483 size = CM_DEBUG_SURFACE_SIZE;
484
485 MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
486 allocParams.Type = MOS_GFXRES_BUFFER;
487 allocParams.dwBytes = size;
488 allocParams.Format = Format_Buffer; //used in RenderHal_OsAllocateResource_Linux
489 allocParams.TileType = MOS_TILE_LINEAR;
490 allocParams.pBufName = "SipResource";
491
492 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
493 osInterface,
494 &allocParams,
495 &state->sipResource.osResource));
496
497 // Lock the Resource
498 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
499
500 lockFlags.ReadOnly = 1;
501 lockFlags.ForceCached = true;
502
503 state->sipResource.data = (uint8_t*)osInterface->pfnLockResource(
504 osInterface,
505 &state->sipResource.osResource,
506 &lockFlags);
507 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->sipResource.data);
508
509 state->sipResource.locked = true;
510
511 finish:
512 return eStatus;
513 }
514
515 //*-----------------------------------------------------------------------------
516 //| Purpose: Free CSR Resource
517 //| Returns: Result of the operation
518 //*-----------------------------------------------------------------------------
HalCm_FreeCsrResource(PCM_HAL_STATE state)519 __inline void HalCm_FreeCsrResource(
520 PCM_HAL_STATE state) // [in] Pointer to CM HAL State
521 {
522 PMOS_INTERFACE osInterface = state->osInterface;
523 MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
524
525 resFreeFlags.AssumeNotInUse = 1;
526
527 if (!Mos_ResourceIsNull(&state->csrResource))
528 {
529 osInterface->pfnFreeResourceWithFlag(
530 osInterface,
531 &state->csrResource,
532 resFreeFlags.Value);
533 }
534 }
535
536 //*-----------------------------------------------------------------------------
537 //| Purpose: Free Sip Resource
538 //| Returns: Result of the operation
539 //*-----------------------------------------------------------------------------
HalCm_FreeSipResource(PCM_HAL_STATE state)540 __inline void HalCm_FreeSipResource(
541 PCM_HAL_STATE state) // [in] Pointer to CM HAL State
542 {
543 PMOS_INTERFACE osInterface = state->osInterface;
544 MOS_STATUS hr = MOS_STATUS_SUCCESS;
545 MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
546
547 resFreeFlags.AssumeNotInUse = 1;
548
549 if (!Mos_ResourceIsNull(&state->sipResource.osResource))
550 {
551 if (state->sipResource.locked)
552 {
553 hr = (MOS_STATUS)osInterface->pfnUnlockResource(
554 osInterface,
555 &state->sipResource.osResource);
556
557 CM_ASSERT(hr == MOS_STATUS_SUCCESS);
558 }
559
560 osInterface->pfnFreeResourceWithFlag(
561 osInterface,
562 &state->sipResource.osResource,
563 resFreeFlags.Value);
564 }
565 }
566
567 //*-----------------------------------------------------------------------------
568 //| Purpose: Sets Arg data in the buffer
569 //| Returns: Result of the operation
570 //*-----------------------------------------------------------------------------
HalCm_SetArgData(PCM_HAL_KERNEL_ARG_PARAM argParam,uint32_t threadIndex,uint8_t * buffer)571 __inline void HalCm_SetArgData(
572 PCM_HAL_KERNEL_ARG_PARAM argParam,
573 uint32_t threadIndex,
574 uint8_t *buffer)
575 {
576 uint8_t *dst;
577 uint8_t *src;
578
579 dst = buffer + argParam->payloadOffset;
580 src = argParam->firstValue + (threadIndex * argParam->unitSize);
581
582 MOS_SecureMemcpy(dst, argParam->unitSize, src, argParam->unitSize);
583 }
584
585 //*-----------------------------------------------------------------------------
586 //| Purpose: Get the Buffer Entry
587 //| Returns: Result of the operation.
588 //*-----------------------------------------------------------------------------
HalCm_GetResourceUPEntry(PCM_HAL_STATE state,uint32_t handle,PCM_HAL_SURFACE2D_UP_ENTRY * entryOut)589 __inline MOS_STATUS HalCm_GetResourceUPEntry(
590 PCM_HAL_STATE state, // [in] Pointer to CM State
591 uint32_t handle, // [in] Handle
592 PCM_HAL_SURFACE2D_UP_ENTRY *entryOut) // [out] Buffer Entry
593 {
594 MOS_STATUS eStatus;
595 PCM_HAL_SURFACE2D_UP_ENTRY entry;
596
597 eStatus = MOS_STATUS_SUCCESS;
598
599 if (handle >= state->cmDeviceParam.max2DSurfaceUPTableSize)
600 {
601 eStatus = MOS_STATUS_INVALID_HANDLE;
602 CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
603 goto finish;
604 }
605
606 entry = &state->surf2DUPTable[handle];
607 if (entry->width == 0)
608 {
609 eStatus = MOS_STATUS_INVALID_HANDLE;
610 CM_ASSERTMESSAGE("handle '%d' is not set", handle);
611 goto finish;
612 }
613
614 *entryOut = entry;
615
616 finish:
617 return eStatus;
618 }
619
620 //*-----------------------------------------------------------------------------
621 //| Purpose: Get the Buffer Entry
622 //| Returns: Result of the operation.
623 //*-----------------------------------------------------------------------------
HalCm_GetBufferEntry(PCM_HAL_STATE state,uint32_t handle,PCM_HAL_BUFFER_ENTRY * entryOut)624 __inline MOS_STATUS HalCm_GetBufferEntry(
625 PCM_HAL_STATE state, // [in] Pointer to CM State
626 uint32_t handle, // [in] Handle
627 PCM_HAL_BUFFER_ENTRY *entryOut) // [out] Buffer Entry
628 {
629 MOS_STATUS eStatus;
630 PCM_HAL_BUFFER_ENTRY entry;
631
632 eStatus = MOS_STATUS_SUCCESS;
633
634 if (handle >= state->cmDeviceParam.maxBufferTableSize)
635 {
636 eStatus = MOS_STATUS_INVALID_HANDLE;
637 CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
638 goto finish;
639 }
640
641 entry = &state->bufferTable[handle];
642 if (entry->size == 0)
643 {
644 eStatus = MOS_STATUS_INVALID_HANDLE;
645 CM_ASSERTMESSAGE("handle '%d' is not set", handle);
646 goto finish;
647 }
648
649 *entryOut = entry;
650
651 finish:
652 return eStatus;
653 }
654
655 //*-----------------------------------------------------------------------------
656 //| Purpose: Get the Surface2D Entry
657 //| Returns: Result of the operation.
658 //*-----------------------------------------------------------------------------
HalCm_GetSurface2DEntry(PCM_HAL_STATE state,uint32_t handle,PCM_HAL_SURFACE2D_ENTRY * entryOut)659 __inline MOS_STATUS HalCm_GetSurface2DEntry(
660 PCM_HAL_STATE state, // [in] Pointer to CM State
661 uint32_t handle, // [in] Handle
662 PCM_HAL_SURFACE2D_ENTRY *entryOut) // [out] Buffer Entry
663 {
664 MOS_STATUS eStatus;
665 PCM_HAL_SURFACE2D_ENTRY entry;
666
667 eStatus = MOS_STATUS_SUCCESS;
668
669 if (handle >= state->cmDeviceParam.max2DSurfaceTableSize)
670 {
671 eStatus = MOS_STATUS_INVALID_HANDLE;
672 CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
673 goto finish;
674 }
675
676 entry = &state->umdSurf2DTable[handle];
677 if ((entry->width == 0)||(entry->height == 0))
678 {
679 eStatus = MOS_STATUS_INVALID_HANDLE;
680 CM_ASSERTMESSAGE("handle '%d' is not set", handle);
681 goto finish;
682 }
683
684 *entryOut = entry;
685
686 finish:
687 return eStatus;
688 }
689
690 //*-----------------------------------------------------------------------------
691 //| Purpose: Get the 3D Entry
692 //| Returns: Result of the operation.
693 //*-----------------------------------------------------------------------------
HalCm_Get3DResourceEntry(PCM_HAL_STATE state,uint32_t handle,PCM_HAL_3DRESOURCE_ENTRY * entryOut)694 __inline MOS_STATUS HalCm_Get3DResourceEntry(
695 PCM_HAL_STATE state, // [in] Pointer to CM State
696 uint32_t handle, // [in] Handle
697 PCM_HAL_3DRESOURCE_ENTRY *entryOut) // [out] Buffer Entry
698 {
699 MOS_STATUS eStatus;
700 PCM_HAL_3DRESOURCE_ENTRY entry;
701
702 eStatus = MOS_STATUS_SUCCESS;
703
704 if (handle >= state->cmDeviceParam.max3DSurfaceTableSize)
705 {
706 eStatus = MOS_STATUS_INVALID_HANDLE;
707 CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
708 goto finish;
709 }
710
711 entry = &state->surf3DTable[handle];
712 if (Mos_ResourceIsNull(&entry->osResource))
713 {
714 eStatus = MOS_STATUS_INVALID_HANDLE;
715 CM_ASSERTMESSAGE("3D handle '%d' is not set", handle);
716 goto finish;
717 }
718
719 *entryOut = entry;
720
721 finish:
722 return eStatus;
723 }
724
725 //*-----------------------------------------------------------------------------
726 //| Purpose: Allocates and sets up Task Param memory structure
727 //| Return: true if enabled
728 //| Note: A single layer of memory is allocated to avoid fragmentation
729 //*-----------------------------------------------------------------------------
HalCm_AllocateTables(PCM_HAL_STATE state)730 MOS_STATUS HalCm_AllocateTables(
731 PCM_HAL_STATE state) // [in] Pointer to HAL CM state
732 {
733 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
734 PCM_HAL_DEVICE_PARAM deviceParam;
735 uint8_t *pb;
736
737 deviceParam = &state->cmDeviceParam;
738
739 uint32_t lookUpTableSize = deviceParam->max2DSurfaceTableSize *
740 sizeof(CMLOOKUP_ENTRY);
741 uint32_t i2DSURFTableSize = deviceParam->max2DSurfaceTableSize *
742 sizeof(CM_HAL_SURFACE2D_ENTRY);
743 uint32_t bufferTableSize = deviceParam->maxBufferTableSize *
744 sizeof(CM_HAL_BUFFER_ENTRY);
745 uint32_t i2DSURFUPTableSize = deviceParam->max2DSurfaceUPTableSize *
746 sizeof(CM_HAL_SURFACE2D_UP_ENTRY);
747 uint32_t i3DSurfTableSize = deviceParam->max3DSurfaceTableSize *
748 sizeof(CM_HAL_3DRESOURCE_ENTRY);
749 uint32_t samplerTableSize = deviceParam->maxSamplerTableSize *
750 sizeof(MHW_SAMPLER_STATE_PARAM);
751 uint32_t sampler8x8TableSize = deviceParam->maxSampler8x8TableSize *
752 sizeof(CM_HAL_SAMPLER_8X8_ENTRY);
753 uint32_t taskStatusTableSize = deviceParam->maxTasks * sizeof(char);
754 uint32_t bt2DIndexTableSize = deviceParam->max2DSurfaceTableSize * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
755 uint32_t bt2DUPIndexTableSize = deviceParam->max2DSurfaceUPTableSize * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
756 uint32_t bt3DIndexTableSize = deviceParam->max3DSurfaceTableSize * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
757 uint32_t btbufferIndexTableSize = deviceParam->maxBufferTableSize * sizeof(CM_HAL_MULTI_USE_BTI_ENTRY);
758 uint32_t samplerIndexTableSize = deviceParam->maxSamplerTableSize * sizeof(char);
759 uint32_t sampler8x8IndexTableSize = deviceParam->maxSampler8x8TableSize * sizeof(char);
760
761 uint32_t size = lookUpTableSize +
762 i2DSURFTableSize +
763 bufferTableSize +
764 i2DSURFUPTableSize +
765 i3DSurfTableSize +
766 samplerTableSize +
767 sampler8x8TableSize +
768 taskStatusTableSize +
769 bt2DIndexTableSize +
770 bt2DUPIndexTableSize +
771 bt3DIndexTableSize +
772 btbufferIndexTableSize +
773 samplerIndexTableSize +
774 sampler8x8IndexTableSize;
775
776 state->tableMemories = MOS_AllocAndZeroMemory(size);
777 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->tableMemories);
778 pb = (uint8_t*)state->tableMemories;
779
780 state->surf2DTable = (PCMLOOKUP_ENTRY)pb;
781 pb += lookUpTableSize;
782
783 state->umdSurf2DTable = (PCM_HAL_SURFACE2D_ENTRY)pb;
784 pb += i2DSURFTableSize;
785
786 state->bufferTable = (PCM_HAL_BUFFER_ENTRY)pb;
787 pb += bufferTableSize;
788
789 state->surf2DUPTable = (PCM_HAL_SURFACE2D_UP_ENTRY)pb;
790 pb += i2DSURFUPTableSize;
791
792 state->surf3DTable = (PCM_HAL_3DRESOURCE_ENTRY)pb;
793 pb += i3DSurfTableSize;
794
795 state->samplerTable = (PMHW_SAMPLER_STATE_PARAM)pb;
796 pb += samplerTableSize;
797
798 state->sampler8x8Table = (PCM_HAL_SAMPLER_8X8_ENTRY)pb;
799 pb += sampler8x8TableSize;
800
801 state->taskStatusTable = (char *)pb;
802 pb += taskStatusTableSize;
803
804 state->bti2DIndexTable = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
805 pb += bt2DIndexTableSize;
806
807 state->bti2DUPIndexTable = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
808 pb += bt2DUPIndexTableSize;
809
810 state->bti3DIndexTable = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
811 pb += bt3DIndexTableSize;
812
813 state->btiBufferIndexTable = (PCM_HAL_MULTI_USE_BTI_ENTRY)pb;
814 pb += btbufferIndexTableSize;
815
816 state->samplerIndexTable = (char *)pb;
817 pb += samplerIndexTableSize;
818
819 state->sampler8x8IndexTable = (char *)pb;
820 pb += sampler8x8IndexTableSize;
821
822 finish:
823 return MOS_STATUS_SUCCESS;
824 }
825
826 //*-----------------------------------------------------------------------------
827 //| Purpose: Adds a tag to distinguish between same kernel ID
828 //| Used for batch buffer re-use when splitting large task into
829 //| smaller pieces for EnqueueWithHints
830 //| Using bits [48:42] from kernel ID for extra tag
831 //| Returns: Result of the operation
832 //*-----------------------------------------------------------------------------
HalCm_AddKernelIDTag(PCM_HAL_KERNEL_PARAM * pKernels,uint32_t numKernels,uint32_t numTasks,uint32_t numCurrentTask)833 MOS_STATUS HalCm_AddKernelIDTag(
834 PCM_HAL_KERNEL_PARAM *pKernels,
835 uint32_t numKernels,
836 uint32_t numTasks,
837 uint32_t numCurrentTask)
838 {
839 uint32_t i;
840 uint64_t tmpNumTasks;
841 uint64_t tmpNumCurrentTask;
842 uint64_t tmpNumTasksMask;
843 uint64_t tmpNumCurrentTaskMask;
844
845 tmpNumTasks = numTasks;
846 tmpNumCurrentTask = numCurrentTask;
847 tmpNumTasksMask = tmpNumTasks << 45;
848 tmpNumCurrentTaskMask = tmpNumCurrentTask << 42;
849
850 for( i = 0; i < numKernels; ++i )
851 {
852 pKernels[i]->kernelId |= tmpNumTasksMask;
853 pKernels[i]->kernelId |= tmpNumCurrentTaskMask;
854 }
855
856 return MOS_STATUS_SUCCESS;
857 }
858
859 //*-----------------------------------------------------------------------------
860 //| Purpose: Gets the Batch Buffer for rendering. If needed, de-allocate /
861 //| allocate the memory for BB
862 //| Returns: Result of the operation
863 //*-----------------------------------------------------------------------------
HalCm_GetBatchBuffer(PCM_HAL_STATE state,uint32_t numKernels,PCM_HAL_KERNEL_PARAM * kernels,PMHW_BATCH_BUFFER * batchBufferOut)864 MOS_STATUS HalCm_GetBatchBuffer(
865 PCM_HAL_STATE state, // [in] Pointer to CM State
866 uint32_t numKernels, // [in] Number of Kernels
867 PCM_HAL_KERNEL_PARAM *kernels, // [in] Array for kernel data
868 PMHW_BATCH_BUFFER *batchBufferOut) // [out] Batch Buffer Out
869 {
870 MOS_STATUS eStatus;
871 PMHW_BATCH_BUFFER batchBuffer = nullptr;
872 PRENDERHAL_INTERFACE renderHal;
873 int32_t size;
874 uint32_t i;
875 uint32_t j;
876 uint32_t k;
877 int32_t freeIdx;
878 uint64_t kernelIds[CM_MAX_KERNELS_PER_TASK];
879 uint64_t kernelParamsIds[CM_MAX_KERNELS_PER_TASK];
880 CM_HAL_BB_DIRTY_STATUS bbDirtyStatus;
881 PCM_HAL_BB_ARGS bbcmArgs;
882
883 eStatus = MOS_STATUS_SUCCESS;
884 renderHal = state->renderHal;
885 freeIdx = CM_INVALID_INDEX;
886 bbDirtyStatus = CM_HAL_BB_CLEAN;
887
888 // Align the Batch Buffer size to power of 2
889 size = HalCm_GetPow2Aligned(state->taskParam->batchBufferSize);
890
891 MOS_ZeroMemory(&kernelIds, CM_MAX_KERNELS_PER_TASK * sizeof(uint64_t));
892 MOS_ZeroMemory(&kernelParamsIds, CM_MAX_KERNELS_PER_TASK * sizeof(uint64_t));
893
894 //Sanity check for batch buffer
895 if (size > CM_MAX_BB_SIZE)
896 {
897 eStatus = MOS_STATUS_EXCEED_MAX_BB_SIZE;
898 CM_ASSERTMESSAGE("Batch Buffer Size exeeceds Max '%d'", size);
899 goto finish;
900 }
901
902 for( i = 0; i < numKernels; ++i )
903 {
904 // remove upper 16 bits used for kernel binary re-use in GSH
905 kernelParamsIds[i] = ((kernels[i])->kernelId << 16 ) >> 16;
906 }
907
908 #if CM_BATCH_BUFFER_REUSE_ENABLE
909
910 bbDirtyStatus = CM_HAL_BB_CLEAN;
911 for (k = 0; k < numKernels; ++k)
912 {
913 if (kernels[k]->kernelThreadSpaceParam.bbDirtyStatus == CM_HAL_BB_DIRTY)
914 {
915 bbDirtyStatus = CM_HAL_BB_DIRTY;
916 break;
917 }
918 }
919
920 for (i = 0; i < (uint32_t)state->numBatchBuffers; i++)
921 {
922 batchBuffer = &state->batchBuffers[i];
923 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
924 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
925
926 //if (!Mos_ResourceIsNull(&batchBuffer->OsResource) && (!batchBuffer->bBusy))
927 if (!Mos_ResourceIsNull(&batchBuffer->OsResource))
928 {
929 MOS_FillMemory(kernelIds, sizeof(uint64_t)*CM_MAX_KERNELS_PER_TASK, 0);
930 for (j = 0; j < numKernels; j ++)
931 {
932 kernelIds[j] = kernelParamsIds[j];
933 }
934
935 bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
936 if (RtlEqualMemory(kernelIds, bbcmArgs->kernelIds, sizeof(uint64_t)*CM_MAX_KERNELS_PER_TASK))
937 {
938 if( batchBuffer->bBusy && bbDirtyStatus == CM_HAL_BB_DIRTY )
939 {
940 bbcmArgs->latest = false;
941 }
942 else if( bbcmArgs->latest == true )
943 {
944 break;
945 }
946 }
947 }
948 }
949 if (i < (uint32_t)state->numBatchBuffers)
950 {
951 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
952 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
953 bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
954
955 bbcmArgs->refCount ++;
956 batchBuffer->iCurrent = 0;
957 batchBuffer->dwSyncTag = 0;
958 batchBuffer->iRemaining = batchBuffer->iSize;
959 *batchBufferOut = batchBuffer;
960 eStatus = MOS_STATUS_SUCCESS;
961 goto finish;
962 }
963 #endif
964
965 for (i = 0; i < (uint32_t)state->numBatchBuffers; i++)
966 {
967 batchBuffer = &state->batchBuffers[i];
968 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
969 // No holes in the array of batch buffers
970 if (Mos_ResourceIsNull(&batchBuffer->OsResource))
971 {
972 freeIdx = i;
973 break;
974 }
975 }
976 if (freeIdx == CM_INVALID_INDEX)
977 {
978 for (i = 0; i < (uint32_t)state->numBatchBuffers; i++)
979 {
980 batchBuffer = &state->batchBuffers[i];
981 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
982 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
983 bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
984 if (!batchBuffer->bBusy)
985 {
986 if (batchBuffer->iSize >= size)
987 {
988 batchBuffer->iCurrent = 0;
989 batchBuffer->iRemaining = batchBuffer->iSize;
990 batchBuffer->dwSyncTag = 0;
991
992 bbcmArgs->refCount = 1;
993 for (i = 0; i <numKernels; i ++)
994 {
995 bbcmArgs->kernelIds[i] = kernelParamsIds[i];
996 }
997
998 bbcmArgs->latest = true;
999
1000 *batchBufferOut = batchBuffer;
1001 eStatus = MOS_STATUS_SUCCESS;
1002 goto finish;
1003 }
1004
1005 if (freeIdx == CM_INVALID_INDEX)
1006 {
1007 freeIdx = i;
1008 }
1009 }
1010 }
1011 }
1012 if (freeIdx == CM_INVALID_INDEX)
1013 {
1014 eStatus = MOS_STATUS_INVALID_PARAMETER;
1015 CM_ASSERTMESSAGE("No batch buffer available");
1016 goto finish;
1017 }
1018
1019 batchBuffer = &state->batchBuffers[freeIdx];
1020 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
1021 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
1022 bbcmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
1023 bbcmArgs->refCount = 1;
1024 for (i = 0; i <numKernels; i ++)
1025 {
1026 bbcmArgs->kernelIds[i] = kernelParamsIds[i];
1027 }
1028
1029 bbcmArgs->latest = true;
1030
1031 if (!Mos_ResourceIsNull(&batchBuffer->OsResource))
1032 {
1033 // Deallocate Batch Buffer
1034 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnFreeBB(renderHal, batchBuffer));
1035 }
1036
1037 // Allocate Batch Buffer
1038 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAllocateBB(renderHal, batchBuffer, size));
1039 *batchBufferOut = batchBuffer;
1040
1041 finish:
1042 return eStatus;
1043 }
1044
1045 //*-----------------------------------------------------------------------------
1046 //| Purpose: Parse the Kernel and populate the Task Param structure
1047 //| Return: Result of the operation
1048 //*-----------------------------------------------------------------------------
HalCm_ParseTask(PCM_HAL_STATE state,PCM_HAL_EXEC_TASK_PARAM execParam)1049 MOS_STATUS HalCm_ParseTask(
1050 PCM_HAL_STATE state, // [in] Pointer to HAL CM state
1051 PCM_HAL_EXEC_TASK_PARAM execParam) // [in] Pointer to Exec Task Param
1052 {
1053 MOS_STATUS eStatus;
1054 PCM_HAL_TASK_PARAM taskParam;
1055 PCM_HAL_KERNEL_PARAM kernelParam;
1056 uint32_t hdrSize;
1057 uint32_t totalThreads;
1058 uint32_t krn;
1059 uint32_t curbeOffset;
1060 PMHW_VFE_SCOREBOARD scoreboardParams;
1061 uint32_t hasThreadArg;
1062 bool nonstallingScoreboardEnable;
1063 CM_HAL_DEPENDENCY vfeDependencyInfo;
1064 PCM_HAL_KERNEL_THREADSPACE_PARAM kernelTSParam;
1065 uint32_t i, j, k;
1066 uint8_t reuseBBUpdateMask;
1067 bool bitIsSet;
1068 PCM_HAL_MASK_AND_RESET dependencyMask;
1069 uint32_t uSurfaceNumber;
1070 uint32_t uSurfaceIndex;
1071 bool threadArgExists;
1072
1073 eStatus = MOS_STATUS_SUCCESS;
1074 curbeOffset = 0;
1075 totalThreads = 0;
1076 taskParam = state->taskParam;
1077 taskParam->batchBufferSize = 0;
1078 hasThreadArg = 0;
1079 nonstallingScoreboardEnable = true;
1080 reuseBBUpdateMask = 0;
1081 bitIsSet = false;
1082 threadArgExists = false;
1083 hdrSize = state->renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
1084 taskParam->dependencyPattern = execParam->dependencyPattern;
1085 taskParam->threadSpaceWidth = execParam->threadSpaceWidth;
1086 taskParam->threadSpaceHeight = execParam->threadSpaceHeight;
1087 taskParam->walkingPattern = execParam->walkingPattern;
1088 taskParam->walkingParamsValid = execParam->walkingParamsValid;
1089 taskParam->dependencyVectorsValid = execParam->dependencyVectorsValid;
1090 if( taskParam->walkingParamsValid )
1091 {
1092 taskParam->walkingParams = execParam->walkingParams;
1093 }
1094 if( taskParam->dependencyVectorsValid )
1095 {
1096 taskParam->dependencyVectors = execParam->dependencyVectors;
1097 }
1098 taskParam->kernelDebugEnabled = (uint32_t)execParam->kernelDebugEnabled;
1099 //GT-PIN
1100 taskParam->surfEntryInfoArrays = execParam->surfEntryInfoArrays;
1101
1102 taskParam->surfacePerBT = 0;
1103
1104 taskParam->colorCountMinusOne = execParam->colorCountMinusOne;
1105 taskParam->mediaWalkerGroupSelect = execParam->mediaWalkerGroupSelect;
1106
1107 if (execParam->threadCoordinates)
1108 {
1109 taskParam->threadCoordinates = execParam->threadCoordinates;
1110 }
1111
1112 taskParam->dependencyMasks = execParam->dependencyMasks;
1113 taskParam->syncBitmap = execParam->syncBitmap;
1114 taskParam->conditionalEndBitmap = execParam->conditionalEndBitmap;
1115 MOS_SecureMemcpy(taskParam->conditionalEndInfo, sizeof(taskParam->conditionalEndInfo), execParam->conditionalEndInfo, sizeof(execParam->conditionalEndInfo));
1116
1117 taskParam->numKernels = execParam->numKernels;
1118 taskParam->taskConfig = execParam->taskConfig;
1119 state->walkerParams.CmWalkerEnable = true;
1120 state->renderHal->IsMDFLoad = (taskParam->taskConfig.turboBoostFlag == CM_TURBO_BOOST_ENABLE);
1121
1122 for (krn = 0; krn < execParam->numKernels; krn++)
1123 {
1124 if ((execParam->kernels[krn] == nullptr) ||
1125 (execParam->kernelSizes[krn] == 0))
1126 {
1127 eStatus = MOS_STATUS_INVALID_PARAMETER;
1128 CM_ASSERTMESSAGE("Invalid Kernel data");
1129 goto finish;
1130 }
1131
1132 kernelParam = (PCM_HAL_KERNEL_PARAM)execParam->kernels[krn];
1133 PCM_INDIRECT_SURFACE_INFO indirectSurfaceInfo = kernelParam->indirectDataParam.surfaceInfo;
1134 uSurfaceNumber = 0;
1135 if (kernelParam->indirectDataParam.surfaceCount)
1136 {
1137 uSurfaceIndex = 0;
1138 for (i = 0; i < kernelParam->indirectDataParam.surfaceCount; i++)
1139 {
1140 uSurfaceIndex = (indirectSurfaceInfo + i)->bindingTableIndex > uSurfaceIndex ? ((indirectSurfaceInfo + i)->bindingTableIndex + (indirectSurfaceInfo + i)->numBTIPerSurf - 1) : uSurfaceIndex;
1141 uSurfaceNumber = uSurfaceNumber + (indirectSurfaceInfo + i)->numBTIPerSurf;
1142 }
1143 taskParam->surfacePerBT = taskParam->surfacePerBT > uSurfaceIndex ? taskParam->surfacePerBT : uSurfaceIndex;
1144 }
1145
1146 uSurfaceNumber += kernelParam->numSurfaces;
1147 taskParam->surfacePerBT = taskParam->surfacePerBT < uSurfaceNumber ?
1148 uSurfaceNumber : taskParam->surfacePerBT;
1149
1150 // 26Z must be media object because by default it uses thread dependency mask
1151 // if there is no thread payload and dependency is not WAVEFRONT26Z, check if walker can be used
1152 if ( kernelParam->payloadSize == 0)
1153 {
1154 //per-kernel thread space is avaiable, and check it at first
1155 if((kernelParam->kernelThreadSpaceParam.threadSpaceWidth != 0) &&
1156 (kernelParam->kernelThreadSpaceParam.patternType != CM_WAVEFRONT26Z) &&
1157 (kernelParam->kernelThreadSpaceParam.patternType != CM_WAVEFRONT26ZI) &&
1158 (kernelParam->kernelThreadSpaceParam.threadCoordinates == nullptr))
1159 {
1160 kernelParam->walkerParams.cmWalkerEnable = true;
1161 kernelParam->walkerParams.groupIdLoopSelect = execParam->mediaWalkerGroupSelect;
1162 }
1163 else if (kernelParam->kernelThreadSpaceParam.threadSpaceWidth == 0)
1164 {
1165 //Check per-task thread space setting
1166 if (state->taskParam->threadCoordinates)
1167 {
1168 if (state->taskParam->threadCoordinates[krn] == nullptr)
1169 {
1170 kernelParam->walkerParams.cmWalkerEnable = true;
1171 kernelParam->walkerParams.groupIdLoopSelect = execParam->mediaWalkerGroupSelect;
1172 }
1173 }
1174 else
1175 {
1176 kernelParam->walkerParams.cmWalkerEnable = true;
1177 kernelParam->walkerParams.groupIdLoopSelect = execParam->mediaWalkerGroupSelect;
1178 }
1179 }
1180 }
1181
1182 //Media walker mode will be disabled if any kernel need use media object, we don't support mixed working modes
1183 state->walkerParams.CmWalkerEnable &= kernelParam->walkerParams.cmWalkerEnable;
1184
1185 if (!state->walkerParams.CmWalkerEnable)
1186 {
1187 taskParam->batchBufferSize +=
1188 kernelParam->numThreads * (hdrSize + MOS_MAX(kernelParam->payloadSize, 4));
1189 }
1190
1191 totalThreads += kernelParam->numThreads;
1192
1193 }
1194
1195 taskParam->batchBufferSize += CM_EXTRA_BB_SPACE;
1196
1197 if (state->cmHalInterface->IsScoreboardParamNeeded())
1198 {
1199 scoreboardParams = &state->scoreboardParams;
1200 scoreboardParams->ScoreboardMask = 0;
1201 scoreboardParams->ScoreboardType = nonstallingScoreboardEnable;
1202
1203 // set VFE scoreboarding information from union of kernel dependency vectors
1204 MOS_ZeroMemory(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY));
1205 for (krn = 0; krn < execParam->numKernels; krn++)
1206 {
1207 kernelParam = execParam->kernels[krn];
1208 kernelTSParam = &kernelParam->kernelThreadSpaceParam;
1209
1210 // calculate union dependency vector of all kernels with dependency
1211 if (kernelTSParam->dependencyInfo.count || kernelTSParam->dependencyVectorsValid)
1212 {
1213 if (vfeDependencyInfo.count == 0)
1214 {
1215 if (kernelTSParam->dependencyInfo.count)
1216 {
1217 MOS_SecureMemcpy(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY), &kernelTSParam->dependencyInfo, sizeof(CM_HAL_DEPENDENCY));
1218 }
1219 else if (kernelTSParam->dependencyVectorsValid)
1220 {
1221 MOS_SecureMemcpy(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY), &kernelTSParam->dependencyVectors, sizeof(CM_HAL_DEPENDENCY));
1222 }
1223 kernelTSParam->globalDependencyMask = (1 << vfeDependencyInfo.count) - 1;
1224 }
1225 else
1226 {
1227 uint32_t count = 0;
1228 CM_HAL_DEPENDENCY dependencyInfo;
1229 if (kernelTSParam->dependencyVectorsValid)
1230 {
1231 count = kernelTSParam->dependencyVectors.count;
1232 MOS_SecureMemcpy(&dependencyInfo.deltaX, sizeof(int32_t) * count, &kernelTSParam->dependencyVectors.deltaX, sizeof(int32_t) * count);
1233 MOS_SecureMemcpy(&dependencyInfo.deltaY, sizeof(int32_t) * count, &kernelTSParam->dependencyVectors.deltaY, sizeof(int32_t) * count);
1234 }
1235 else
1236 {
1237 count = kernelTSParam->dependencyInfo.count;
1238 MOS_SecureMemcpy(&dependencyInfo.deltaX, sizeof(int32_t) * count, &kernelTSParam->dependencyInfo.deltaX, sizeof(int32_t) * count);
1239 MOS_SecureMemcpy(&dependencyInfo.deltaY, sizeof(int32_t) * count, &kernelTSParam->dependencyInfo.deltaY, sizeof(int32_t) * count);
1240 }
1241
1242 for (j = 0; j < count; ++j)
1243 {
1244 for (k = 0; k < vfeDependencyInfo.count; ++k)
1245 {
1246 if ((dependencyInfo.deltaX[j] == vfeDependencyInfo.deltaX[k]) &&
1247 (dependencyInfo.deltaY[j] == vfeDependencyInfo.deltaY[k]))
1248 {
1249 CM_HAL_SETBIT(kernelTSParam->globalDependencyMask, k);
1250 break;
1251 }
1252 }
1253 if (k == vfeDependencyInfo.count)
1254 {
1255 vfeDependencyInfo.deltaX[vfeDependencyInfo.count] = dependencyInfo.deltaX[j];
1256 vfeDependencyInfo.deltaY[vfeDependencyInfo.count] = dependencyInfo.deltaY[j];
1257 CM_HAL_SETBIT(kernelTSParam->globalDependencyMask, vfeDependencyInfo.count);
1258 vfeDependencyInfo.count++;
1259 }
1260 }
1261 }
1262 }
1263
1264 reuseBBUpdateMask |= kernelTSParam->reuseBBUpdateMask;
1265 }
1266
1267 if (vfeDependencyInfo.count > CM_HAL_MAX_DEPENDENCY_COUNT)
1268 {
1269 eStatus = MOS_STATUS_INVALID_PARAMETER;
1270 CM_ASSERTMESSAGE("Union of kernel dependencies exceeds max dependency count (8)");
1271 goto finish;
1272 }
1273
1274 scoreboardParams->ScoreboardMask = (uint8_t)vfeDependencyInfo.count;
1275 for (i = 0; i < scoreboardParams->ScoreboardMask; ++i)
1276 {
1277 scoreboardParams->ScoreboardDelta[i].x = vfeDependencyInfo.deltaX[i];
1278 scoreboardParams->ScoreboardDelta[i].y = vfeDependencyInfo.deltaY[i];
1279 }
1280
1281 //If no dependency defined in kernel data, then check per-task thread space setting
1282 if (scoreboardParams->ScoreboardMask == 0)
1283 {
1284 if (taskParam->dependencyVectorsValid)
1285 {
1286 scoreboardParams->ScoreboardMask = (uint8_t)taskParam->dependencyVectors.count;
1287 for (uint32_t i = 0; i < scoreboardParams->ScoreboardMask; ++i)
1288 {
1289 scoreboardParams->ScoreboardDelta[i].x = taskParam->dependencyVectors.deltaX[i];
1290 scoreboardParams->ScoreboardDelta[i].y = taskParam->dependencyVectors.deltaY[i];
1291 }
1292 }
1293 else
1294 {
1295 switch (taskParam->dependencyPattern)
1296 {
1297 case CM_NONE_DEPENDENCY:
1298 break;
1299
1300 case CM_VERTICAL_WAVE:
1301 scoreboardParams->ScoreboardMask = 1;
1302 scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1303 scoreboardParams->ScoreboardDelta[0].y = 0;
1304 break;
1305
1306 case CM_HORIZONTAL_WAVE:
1307 scoreboardParams->ScoreboardMask = 1;
1308 scoreboardParams->ScoreboardDelta[0].x = 0;
1309 scoreboardParams->ScoreboardDelta[0].y = 0xF; // -1 in uint8_t:4
1310 break;
1311
1312 case CM_WAVEFRONT:
1313 scoreboardParams->ScoreboardMask = 3;
1314 scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1315 scoreboardParams->ScoreboardDelta[0].y = 0;
1316 scoreboardParams->ScoreboardDelta[1].x = 0xF; // -1 in uint8_t:4
1317 scoreboardParams->ScoreboardDelta[1].y = 0xF; // -1 in uint8_t:4
1318 scoreboardParams->ScoreboardDelta[2].x = 0;
1319 scoreboardParams->ScoreboardDelta[2].y = 0xF; // -1 in uint8_t:4
1320 break;
1321
1322 case CM_WAVEFRONT26:
1323 scoreboardParams->ScoreboardMask = 4;
1324 scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1325 scoreboardParams->ScoreboardDelta[0].y = 0;
1326 scoreboardParams->ScoreboardDelta[1].x = 0xF; // -1 in uint8_t:4
1327 scoreboardParams->ScoreboardDelta[1].y = 0xF; // -1 in uint8_t:4
1328 scoreboardParams->ScoreboardDelta[2].x = 0;
1329 scoreboardParams->ScoreboardDelta[2].y = 0xF; // -1 in uint8_t:4
1330 scoreboardParams->ScoreboardDelta[3].x = 1;
1331 scoreboardParams->ScoreboardDelta[3].y = 0xF; // -1 in uint8_t:4
1332 break;
1333
1334 case CM_WAVEFRONT26Z:
1335 case CM_WAVEFRONT26ZIG:
1336 scoreboardParams->ScoreboardMask = 5;
1337 scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1338 scoreboardParams->ScoreboardDelta[0].y = 1;
1339 scoreboardParams->ScoreboardDelta[1].x = 0xF; // -1 in uint8_t:4
1340 scoreboardParams->ScoreboardDelta[1].y = 0;
1341 scoreboardParams->ScoreboardDelta[2].x = 0xF; // -1 in uint8_t:4
1342 scoreboardParams->ScoreboardDelta[2].y = 0xF; // -1 in uint8_t:4
1343 scoreboardParams->ScoreboardDelta[3].x = 0;
1344 scoreboardParams->ScoreboardDelta[3].y = 0xF; // -1 in uint8_t:4
1345 scoreboardParams->ScoreboardDelta[4].x = 1;
1346 scoreboardParams->ScoreboardDelta[4].y = 0xF; // -1 in uint8_t:4
1347 break;
1348
1349 case CM_WAVEFRONT26ZI:
1350 scoreboardParams->ScoreboardMask = 7;
1351 scoreboardParams->ScoreboardDelta[0].x = 0xF; // -1 in uint8_t:4
1352 scoreboardParams->ScoreboardDelta[0].y = 1;
1353 scoreboardParams->ScoreboardDelta[1].x = 0xE; // -2
1354 scoreboardParams->ScoreboardDelta[1].y = 0;
1355 scoreboardParams->ScoreboardDelta[2].x = 0xF; // -1 in uint8_t:4
1356 scoreboardParams->ScoreboardDelta[2].y = 0;
1357 scoreboardParams->ScoreboardDelta[3].x = 0xF; // -1 in uint8_t:4
1358 scoreboardParams->ScoreboardDelta[3].y = 0xF; // -1 in uint8_t:4
1359 scoreboardParams->ScoreboardDelta[4].x = 0;
1360 scoreboardParams->ScoreboardDelta[4].y = 0xF; // -1 in uint8_t:4
1361 scoreboardParams->ScoreboardDelta[5].x = 1;
1362 scoreboardParams->ScoreboardDelta[5].y = 0xF; // -1 in uint8_t:4
1363 scoreboardParams->ScoreboardDelta[6].x = 1;
1364 scoreboardParams->ScoreboardDelta[6].y = 0;
1365 break;
1366
1367 case CM_WAVEFRONT26X:
1368 scoreboardParams->ScoreboardMask = 7;
1369 scoreboardParams->ScoreboardDelta[0].x = 0xF;
1370 scoreboardParams->ScoreboardDelta[0].y = 3;
1371 scoreboardParams->ScoreboardDelta[1].x = 0xF;
1372 scoreboardParams->ScoreboardDelta[1].y = 1;
1373 scoreboardParams->ScoreboardDelta[2].x = 0xF;
1374 scoreboardParams->ScoreboardDelta[2].y = 0xF;
1375 scoreboardParams->ScoreboardDelta[3].x = 0;
1376 scoreboardParams->ScoreboardDelta[3].y = 0xF;
1377 scoreboardParams->ScoreboardDelta[4].x = 0;
1378 scoreboardParams->ScoreboardDelta[4].y = 0xE;
1379 scoreboardParams->ScoreboardDelta[5].x = 0;
1380 scoreboardParams->ScoreboardDelta[5].y = 0xD;
1381 scoreboardParams->ScoreboardDelta[6].x = 1;
1382 scoreboardParams->ScoreboardDelta[6].y = 0xD;
1383 break;
1384
1385 default:
1386 taskParam->dependencyPattern = CM_NONE_DEPENDENCY;
1387 break;
1388
1389 }
1390 }
1391 }
1392 }
1393 //Set size of surface binding table size
1394 CM_SURFACE_BTI_INFO surfBTIInfo;
1395 state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
1396
1397 taskParam->surfacePerBT += surfBTIInfo.normalSurfaceStart ;
1398
1399 // add one if kernel debugger is enabled
1400 if (execParam->kernelDebugEnabled)
1401 {
1402 taskParam->surfacePerBT += CM_RESERVED_SURFACE_NUMBER_FOR_KERNEL_DEBUG;
1403 }
1404
1405 //If global surface is used and current surface bt size less than the max index of reserved surfaces
1406 //use set it as max bti size
1407 if ((execParam->globalSurfaceUsed) && (taskParam->surfacePerBT < surfBTIInfo.reservedSurfaceEnd))
1408 {
1409 taskParam->surfacePerBT = CM_MAX_STATIC_SURFACE_STATES_PER_BT;
1410 }
1411
1412 //Make sure surfacePerBT do not exceed CM_MAX_STATIC_SURFACE_STATES_PER_BT
1413 taskParam->surfacePerBT = MOS_MIN(CM_MAX_STATIC_SURFACE_STATES_PER_BT, taskParam->surfacePerBT);
1414
1415 if( taskParam->dependencyMasks )
1416 {
1417 for (krn = 0; krn < execParam->numKernels; krn++)
1418 {
1419 kernelParam = execParam->kernels[krn];
1420 dependencyMask = taskParam->dependencyMasks[krn];
1421 if( dependencyMask )
1422 {
1423 for( i = 0; i < kernelParam->numThreads; ++i )
1424 {
1425 reuseBBUpdateMask |= dependencyMask[i].resetMask;
1426 }
1427 }
1428 }
1429 }
1430
1431 CM_HAL_CHECKBIT_IS_SET(bitIsSet, reuseBBUpdateMask, CM_NO_BATCH_BUFFER_REUSE_BIT_POS);
1432 if( bitIsSet || reuseBBUpdateMask == 0 )
1433 {
1434 taskParam->reuseBBUpdateMask = 0;
1435 }
1436 else
1437 {
1438 taskParam->reuseBBUpdateMask = 1;
1439 }
1440
1441 threadArgExists = HalCm_GetTaskHasThreadArg(execParam->kernels, execParam->numKernels);
1442
1443 // For media object with thread arg, only support up to CM_MAX_USER_THREADS (512*512) threads
1444 // otherwise can support up to 262144 media object commands in batch buffer
1445 if (!state->walkerParams.CmWalkerEnable) {
1446 if (!threadArgExists)
1447 {
1448 if(totalThreads > CM_MAX_USER_THREADS_NO_THREADARG)
1449 {
1450 eStatus = MOS_STATUS_INVALID_PARAMETER;
1451 CM_ASSERTMESSAGE(
1452 "Total task threads '%d' exceeds max allowed threads '%d'",
1453 totalThreads,
1454 CM_MAX_USER_THREADS_NO_THREADARG);
1455 goto finish;
1456 }
1457 }
1458 else
1459 {
1460 if (totalThreads > CM_MAX_USER_THREADS)
1461 {
1462 eStatus = MOS_STATUS_INVALID_PARAMETER;
1463 CM_ASSERTMESSAGE(
1464 "Total task threads '%d' exceeds max allowed threads '%d'",
1465 totalThreads,
1466 CM_MAX_USER_THREADS);
1467 goto finish;
1468 }
1469 }
1470 }
1471
1472 taskParam->queueOption = execParam->queueOption;
1473
1474 finish:
1475 return eStatus;
1476 }
1477
1478 //*-----------------------------------------------------------------------------
1479 //| Purpose: Parse the Kernel and populate the Task Param structure
1480 //| Return: Result of the operation
1481 //*-----------------------------------------------------------------------------
HalCm_ParseGroupTask(PCM_HAL_STATE state,PCM_HAL_EXEC_GROUP_TASK_PARAM execGroupParam)1482 MOS_STATUS HalCm_ParseGroupTask(
1483 PCM_HAL_STATE state, // [in] Pointer to HAL CM state
1484 PCM_HAL_EXEC_GROUP_TASK_PARAM execGroupParam) // [in] Pointer to Exec Task Param
1485 {
1486 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
1487 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
1488 PCM_HAL_KERNEL_PARAM kernelParam = nullptr;
1489 uint32_t uSurfaceIndex;
1490
1491 taskParam->surfEntryInfoArrays = execGroupParam->surEntryInfoArrays; //GT-PIN
1492 taskParam->batchBufferSize = 0;
1493 taskParam->kernelDebugEnabled = (uint32_t)execGroupParam->kernelDebugEnabled;
1494
1495 taskParam->numKernels = execGroupParam->numKernels;
1496 taskParam->syncBitmap = execGroupParam->syncBitmap;
1497 taskParam->conditionalEndBitmap = execGroupParam->conditionalEndBitmap;
1498 MOS_SecureMemcpy(taskParam->conditionalEndInfo, sizeof(taskParam->conditionalEndInfo),
1499 execGroupParam->conditionalEndInfo, sizeof(execGroupParam->conditionalEndInfo));
1500
1501 taskParam->taskConfig = execGroupParam->taskConfig;
1502
1503 MOS_SecureMemcpy(taskParam->krnExecCfg, sizeof(taskParam->krnExecCfg),
1504 execGroupParam->krnExecCfg, sizeof(execGroupParam->krnExecCfg));
1505
1506 for (uint32_t krn = 0; krn < execGroupParam->numKernels; krn ++)
1507 {
1508 kernelParam = execGroupParam->kernels[krn];
1509 PCM_INDIRECT_SURFACE_INFO indirectSurfaceInfo = kernelParam->indirectDataParam.surfaceInfo;
1510 uint32_t uSurfaceNumber = 0;
1511 if (kernelParam->indirectDataParam.surfaceCount)
1512 {
1513 uSurfaceIndex = 0;
1514 for (uint32_t i = 0; i < kernelParam->indirectDataParam.surfaceCount; i++)
1515 {
1516 uSurfaceIndex = (indirectSurfaceInfo + i)->bindingTableIndex > uSurfaceIndex ? (indirectSurfaceInfo + i)->bindingTableIndex : uSurfaceIndex;
1517 uSurfaceNumber++;
1518 }
1519 taskParam->surfacePerBT = taskParam->surfacePerBT > uSurfaceIndex ? taskParam->surfacePerBT : uSurfaceIndex;
1520 }
1521
1522 uSurfaceNumber += kernelParam->numSurfaces;
1523
1524 taskParam->surfacePerBT = taskParam->surfacePerBT < uSurfaceNumber ?
1525 uSurfaceNumber : taskParam->surfacePerBT;
1526 }
1527
1528 CM_SURFACE_BTI_INFO surfBTIInfo;
1529 state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
1530
1531 taskParam->surfacePerBT += surfBTIInfo.normalSurfaceStart ;
1532
1533 // add one if kernel debugger is enabled
1534 if (execGroupParam->kernelDebugEnabled)
1535 {
1536 taskParam->surfacePerBT += CM_RESERVED_SURFACE_NUMBER_FOR_KERNEL_DEBUG;
1537 }
1538
1539 //If global surface is used and current surface bt size less than the max index of reserved surfaces
1540 //use set it as max bti size
1541 if ((execGroupParam->globalSurfaceUsed) &&
1542 (taskParam->surfacePerBT < surfBTIInfo.reservedSurfaceEnd))
1543 {
1544 taskParam->surfacePerBT = CM_MAX_STATIC_SURFACE_STATES_PER_BT;
1545 }
1546
1547 //Make sure surfacePerBT do not exceed CM_MAX_STATIC_SURFACE_STATES_PER_BT
1548 taskParam->surfacePerBT = MOS_MIN(CM_MAX_STATIC_SURFACE_STATES_PER_BT, taskParam->surfacePerBT);
1549
1550 taskParam->queueOption = execGroupParam->queueOption;
1551 taskParam->mosVeHintParams = execGroupParam->mosVeHintParams;
1552
1553 return eStatus;
1554 }
1555
1556 //*-----------------------------------------------------------------------------
1557 //| Purpose: Parse the Kernel and populate the Hints Task Param structure
1558 //| Return: Result of the operation
1559 //*-----------------------------------------------------------------------------
HalCm_ParseHintsTask(PCM_HAL_STATE state,PCM_HAL_EXEC_HINTS_TASK_PARAM execHintsParam)1560 MOS_STATUS HalCm_ParseHintsTask(
1561 PCM_HAL_STATE state, // [in] Pointer to HAL CM state
1562 PCM_HAL_EXEC_HINTS_TASK_PARAM execHintsParam)
1563 {
1564 MOS_STATUS eStatus;
1565 PCM_HAL_TASK_PARAM taskParam;
1566 PCM_HAL_KERNEL_PARAM kernelParam;
1567 uint32_t hdrSize;
1568 uint32_t totalThreads;
1569 uint32_t krn;
1570 uint32_t curbeOffset;
1571 PMHW_VFE_SCOREBOARD scoreboardParams;
1572 uint32_t hasThreadArg;
1573 bool nonstallingScoreboardEnable;
1574 bool bitIsSet;
1575 uint8_t reuseBBUpdateMask;
1576 bool threadArgExists;
1577
1578 eStatus = MOS_STATUS_SUCCESS;
1579 krn = 0;
1580 taskParam = state->taskParam;
1581 nonstallingScoreboardEnable = true;
1582 bitIsSet = false;
1583 curbeOffset = 0;
1584 hasThreadArg = 0;
1585 totalThreads = 0;
1586 reuseBBUpdateMask = 0;
1587 threadArgExists = false;
1588
1589 hdrSize = state->renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
1590 scoreboardParams = &state->scoreboardParams;
1591
1592 for( krn = 0; krn < execHintsParam->numKernels; ++krn )
1593 {
1594 if ((execHintsParam->kernels[krn] == nullptr) ||
1595 (execHintsParam->kernelSizes[krn] == 0))
1596 {
1597 eStatus = MOS_STATUS_INVALID_PARAMETER;
1598 CM_ASSERTMESSAGE("Invalid Kernel data");
1599 goto finish;
1600 }
1601
1602 // Parse the kernel Param
1603 kernelParam = execHintsParam->kernels[krn];
1604
1605 // if any kernel disables non-stalling, the non-stalling will be disabled
1606 nonstallingScoreboardEnable &= (kernelParam->cmFlags & CM_KERNEL_FLAGS_NONSTALLING_SCOREBOARD) ? true : false;
1607
1608 if (!state->walkerParams.CmWalkerEnable)
1609 {
1610 taskParam->batchBufferSize +=
1611 kernelParam->numThreads * (hdrSize + MOS_MAX(kernelParam->payloadSize, 4));
1612 }
1613
1614 totalThreads += kernelParam->numThreads;
1615
1616 reuseBBUpdateMask |= kernelParam->kernelThreadSpaceParam.reuseBBUpdateMask;
1617 }
1618
1619 CM_HAL_CHECKBIT_IS_SET(bitIsSet, reuseBBUpdateMask, CM_NO_BATCH_BUFFER_REUSE_BIT_POS);
1620 if( bitIsSet || reuseBBUpdateMask == 0 )
1621 {
1622 taskParam->reuseBBUpdateMask = 0;
1623 }
1624 else
1625 {
1626 taskParam->reuseBBUpdateMask = 1;
1627 }
1628
1629 taskParam->batchBufferSize += CM_EXTRA_BB_SPACE;
1630
1631 scoreboardParams->ScoreboardType = nonstallingScoreboardEnable;
1632
1633 threadArgExists = HalCm_GetTaskHasThreadArg(execHintsParam->kernels, execHintsParam->numKernels);
1634
1635 if (!state->walkerParams.CmWalkerEnable) {
1636 if (!threadArgExists)
1637 {
1638 if(totalThreads > CM_MAX_USER_THREADS_NO_THREADARG)
1639 {
1640 eStatus = MOS_STATUS_INVALID_PARAMETER;
1641 CM_ASSERTMESSAGE(
1642 "Total task threads '%d' exceeds max allowed threads '%d'",
1643 totalThreads,
1644 CM_MAX_USER_THREADS_NO_THREADARG);
1645 goto finish;
1646 }
1647 }
1648 else
1649 {
1650 if (totalThreads > CM_MAX_USER_THREADS)
1651 {
1652 eStatus = MOS_STATUS_INVALID_PARAMETER;
1653 CM_ASSERTMESSAGE(
1654 "Total task threads '%d' exceeds max allowed threads '%d'",
1655 totalThreads,
1656 CM_MAX_USER_THREADS);
1657 goto finish;
1658 }
1659 }
1660 }
1661
1662 taskParam->queueOption = execHintsParam->queueOption;
1663
1664 finish:
1665 return eStatus;
1666 }
1667
1668 /*
1669 ** check to see if kernel entry is flaged as free or it is null
1670 ** used for combining
1671 */
bIsFree(PRENDERHAL_KRN_ALLOCATION kAlloc)1672 bool bIsFree( PRENDERHAL_KRN_ALLOCATION kAlloc )
1673 {
1674 if (kAlloc== nullptr)
1675 {
1676 return false;
1677 }
1678 else
1679 {
1680 if (kAlloc->dwFlags != RENDERHAL_KERNEL_ALLOCATION_FREE)
1681 {
1682 return false;
1683 }
1684 }
1685
1686 return true;
1687 }
1688
1689 /*
1690 ** local used supporting function
1691 ** setup correct values according to input and copy kernelBinary as needed
1692 */
CmLoadKernel(PCM_HAL_STATE state,PRENDERHAL_STATE_HEAP stateHeap,PRENDERHAL_KRN_ALLOCATION kernelAllocation,uint32_t sync,uint32_t count,PRENDERHAL_KERNEL_PARAM parameters,PCM_HAL_KERNEL_PARAM kernelParam,MHW_KERNEL_PARAM * mhwKernelParam,bool isCloneEntry)1693 void CmLoadKernel(PCM_HAL_STATE state,
1694 PRENDERHAL_STATE_HEAP stateHeap,
1695 PRENDERHAL_KRN_ALLOCATION kernelAllocation,
1696 uint32_t sync,
1697 uint32_t count,
1698 PRENDERHAL_KERNEL_PARAM parameters,
1699 PCM_HAL_KERNEL_PARAM kernelParam,
1700 MHW_KERNEL_PARAM *mhwKernelParam,
1701 bool isCloneEntry)
1702 {
1703 UNUSED(state);
1704 if (mhwKernelParam)
1705 {
1706 kernelAllocation->iKID = -1;
1707 kernelAllocation->iKUID = mhwKernelParam->iKUID;
1708 kernelAllocation->iKCID = mhwKernelParam->iKCID;
1709 kernelAllocation->dwSync = sync;
1710 kernelAllocation->dwCount = count & 0xFFFFFFFF; // 28 bits
1711 kernelAllocation->dwFlags = RENDERHAL_KERNEL_ALLOCATION_USED;
1712 kernelAllocation->Params = *parameters;
1713 kernelAllocation->pMhwKernelParam = mhwKernelParam;
1714
1715 if (!isCloneEntry)
1716 {
1717 // Copy kernel data
1718 // Copy MovInstruction First
1719 MOS_SecureMemcpy(stateHeap->pIshBuffer + kernelAllocation->dwOffset,
1720 kernelParam->movInsDataSize,
1721 kernelParam->movInsData,
1722 kernelParam->movInsDataSize);
1723
1724 // Copy Cm Kernel Binary
1725 MOS_SecureMemcpy(stateHeap->pIshBuffer + kernelAllocation->dwOffset + kernelParam->movInsDataSize,
1726 kernelParam->kernelBinarySize - kernelParam->movInsDataSize,
1727 kernelParam->kernelBinary,
1728 kernelParam->kernelBinarySize - kernelParam->movInsDataSize);
1729
1730 // Padding bytes dummy instructions after kernel binary to resolve page fault issue
1731 MOS_ZeroMemory(stateHeap->pIshBuffer + kernelAllocation->dwOffset + kernelParam->kernelBinarySize, CM_KERNEL_BINARY_PADDING_SIZE);
1732 }
1733 }
1734 else
1735 {
1736 kernelAllocation->iKID = -1;
1737 kernelAllocation->iKUID = -1;
1738 kernelAllocation->iKCID = -1;
1739 kernelAllocation->dwSync = 0;
1740 FrameTrackerTokenFlat_Clear(&kernelAllocation->trackerToken);
1741 kernelAllocation->dwCount = 0;
1742 kernelAllocation->dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
1743 kernelAllocation->pMhwKernelParam = nullptr;
1744 kernelAllocation->cloneKernelParams.cloneKernelID = -1;
1745 kernelAllocation->cloneKernelParams.isClone = false;
1746 kernelAllocation->cloneKernelParams.isHeadKernel = false;
1747 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = -1;
1748 kernelAllocation->cloneKernelParams.referenceCount = 0;
1749 }
1750 }
1751
1752 /*
1753 ** local used supporting function
1754 ** Try to find free entry which is big enough to load kernel binary
1755 ** If we cannot find one, then return fail, so we will delete more entries
1756 */
CmSearchFreeSlotSize(PCM_HAL_STATE state,MHW_KERNEL_PARAM * mhwKernelParam,bool isCloneEntry)1757 int32_t CmSearchFreeSlotSize(PCM_HAL_STATE state, MHW_KERNEL_PARAM *mhwKernelParam, bool isCloneEntry)
1758 {
1759 PRENDERHAL_STATE_HEAP stateHeap;
1760 PRENDERHAL_KRN_ALLOCATION kernelAllocation;
1761 int32_t kernelAllocationID;
1762 int32_t returnVal = -1;
1763 int32_t neededSize;
1764
1765 stateHeap = state->renderHal->pStateHeap;
1766 kernelAllocation = stateHeap->pKernelAllocation;
1767
1768 if (isCloneEntry)
1769 {
1770 neededSize = CM_64BYTE;
1771 }
1772 else
1773 {
1774 neededSize = mhwKernelParam->iSize;
1775 }
1776
1777 for (kernelAllocationID = 0;
1778 kernelAllocationID < state->kernelNumInGsh;
1779 kernelAllocationID++, kernelAllocation++)
1780 {
1781 if(kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE)
1782 {
1783 if(state->totalKernelSize[kernelAllocationID] >= neededSize)
1784 {
1785 // found free slot which is big enough
1786 return kernelAllocationID;
1787 }
1788 }
1789 }
1790
1791 // not found
1792 return returnVal;
1793 }
1794
1795 //*-----------------------------------------------------------------------------
1796 //| Purpose: Updates the clone entries' head kernel binary allocation IDs
1797 //| Function is called after kernel allocations are shifted due to combining neighboring free entries
1798 //| Return: Result of the operation
1799 //*-----------------------------------------------------------------------------
HalCm_UpdateCloneKernel(PCM_HAL_STATE state,uint32_t shiftPoint,CM_SHIFT_DIRECTION shiftDirection,uint32_t shiftFactor)1800 void HalCm_UpdateCloneKernel(PCM_HAL_STATE state,
1801 uint32_t shiftPoint,
1802 CM_SHIFT_DIRECTION shiftDirection,
1803 uint32_t shiftFactor)
1804 {
1805 PRENDERHAL_STATE_HEAP stateHeap;
1806 PRENDERHAL_KRN_ALLOCATION kernelAllocation;
1807 int32_t allocationID;
1808
1809 stateHeap = state->renderHal->pStateHeap;
1810 kernelAllocation = stateHeap->pKernelAllocation;
1811
1812 for (allocationID = 0; allocationID < state->kernelNumInGsh; allocationID++, kernelAllocation++)
1813 {
1814 kernelAllocation = &(stateHeap->pKernelAllocation[allocationID]);
1815 if (kernelAllocation->cloneKernelParams.isClone && ((kernelAllocation->cloneKernelParams.kernelBinaryAllocID) > (int32_t)shiftPoint))
1816 {
1817 if (shiftDirection == CM_SHIFT_LEFT)
1818 {
1819 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = kernelAllocation->cloneKernelParams.kernelBinaryAllocID + shiftFactor;
1820 }
1821 else
1822 {
1823 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = kernelAllocation->cloneKernelParams.kernelBinaryAllocID - shiftFactor;
1824 }
1825 }
1826 }
1827 }
1828
1829 /*
1830 ** local used supporting function
1831 ** We found free slot and load kernel to this slot. There are 3 cases (see code)
1832 */
CmAddCurrentKernelToFreeSlot(PCM_HAL_STATE state,int32_t slot,PRENDERHAL_KERNEL_PARAM parameters,PCM_HAL_KERNEL_PARAM kernelParam,MHW_KERNEL_PARAM * mhwKernelParam,CM_CLONE_TYPE cloneType,int32_t headKernelAllocationID)1833 int32_t CmAddCurrentKernelToFreeSlot(PCM_HAL_STATE state,
1834 int32_t slot,
1835 PRENDERHAL_KERNEL_PARAM parameters,
1836 PCM_HAL_KERNEL_PARAM kernelParam,
1837 MHW_KERNEL_PARAM *mhwKernelParam,
1838 CM_CLONE_TYPE cloneType,
1839 int32_t headKernelAllocationID)
1840 {
1841 PRENDERHAL_STATE_HEAP stateHeap;
1842 PRENDERHAL_KRN_ALLOCATION kernelAllocation, pKernelAllocationN;
1843
1844 int32_t hr = CM_SUCCESS;
1845 int32_t i;
1846 int32_t totalSize, tmpSize, dwOffset, neededSize;
1847 bool adjust, isCloneEntry, isHeadKernel, isCloneAsHead, adjustHeadKernelID;
1848 uint32_t tag;
1849
1850 stateHeap = state->renderHal->pStateHeap;
1851 kernelAllocation = stateHeap->pKernelAllocation;
1852 adjustHeadKernelID = false;
1853
1854 switch (cloneType)
1855 {
1856 case CM_CLONE_ENTRY:
1857 {
1858 neededSize = CM_64BYTE;
1859 isCloneEntry = true;
1860 isHeadKernel = false;
1861 isCloneAsHead = false;
1862 }
1863 break;
1864 case CM_HEAD_KERNEL:
1865 {
1866 neededSize = mhwKernelParam->iSize;
1867 isHeadKernel = true;
1868 isCloneEntry = false;
1869 isCloneAsHead = false;
1870 }
1871 break;
1872 case CM_CLONE_AS_HEAD_KERNEL:
1873 {
1874 neededSize = mhwKernelParam->iSize;
1875 isHeadKernel = true;
1876 isCloneEntry = false;
1877 isCloneAsHead = true;
1878 }
1879 break;
1880 case CM_NO_CLONE:
1881 {
1882 neededSize = mhwKernelParam->iSize;
1883 isCloneEntry = false;
1884 isHeadKernel = false;
1885 isCloneAsHead = false;
1886 }
1887 break;
1888 default:
1889 {
1890 hr = CM_FAILURE;
1891 goto finish;
1892 }
1893 }
1894
1895 // to check if we have perfect size match
1896 if(stateHeap->pKernelAllocation[slot].iSize == neededSize)
1897 {
1898 adjust = false;
1899 }
1900 else
1901 {
1902 adjust = true;
1903 }
1904
1905 if ((state->kernelNumInGsh < state->cmDeviceParam.maxGshKernelEntries) && adjust)
1906 {
1907 // we have extra entry to add
1908 // add new entry and pump index down below
1909 int32_t lastKernel = state->kernelNumInGsh - 1;
1910 for(i = lastKernel; i>slot; i--)
1911 {
1912 kernelAllocation = &stateHeap->pKernelAllocation[i];
1913 pKernelAllocationN = &stateHeap->pKernelAllocation[i+1];
1914 *pKernelAllocationN = *kernelAllocation;
1915 state->totalKernelSize[i+1] = state->totalKernelSize[i];
1916 }
1917
1918 if (lastKernel > slot)
1919 {
1920 // update the headKernelAllocationID if it was shifted
1921 if (headKernelAllocationID > slot)
1922 {
1923 headKernelAllocationID++;
1924 adjustHeadKernelID = true;
1925 }
1926 }
1927
1928 totalSize = state->totalKernelSize[slot];
1929 tmpSize = neededSize;
1930
1931 dwOffset = stateHeap->pKernelAllocation[slot].dwOffset;
1932
1933 // now add new one
1934 kernelAllocation = &stateHeap->pKernelAllocation[slot];
1935 if(state->cbbEnabled)
1936 {
1937 tag = state->osInterface->pfnGetGpuStatusTag(state->osInterface,
1938 state->osInterface->CurrentGpuContextOrdinal);
1939 }
1940 else
1941 {
1942 tag = stateHeap->dwNextTag;
1943 }
1944
1945 CmLoadKernel(state, stateHeap, kernelAllocation, tag, stateHeap->dwAccessCounter, parameters, kernelParam, mhwKernelParam, isCloneEntry);
1946 stateHeap->dwAccessCounter++;
1947
1948 kernelAllocation->iSize = tmpSize;
1949 state->totalKernelSize[slot] = MOS_ALIGN_CEIL(tmpSize, 64);
1950
1951 // insert a new slot which is free with rest
1952 tmpSize = MOS_ALIGN_CEIL(tmpSize, 64); // HW required 64 byte align
1953 kernelAllocation = &stateHeap->pKernelAllocation[slot+1];
1954 CmLoadKernel(state, stateHeap, kernelAllocation, 0, 0, parameters, kernelParam, nullptr, isCloneEntry);
1955 kernelAllocation->dwOffset = dwOffset+tmpSize;
1956 kernelAllocation->iSize = 0;
1957 state->totalKernelSize[slot+1] = totalSize - tmpSize;
1958
1959 // added one more entry
1960 state->kernelNumInGsh++;
1961
1962 kernelAllocation = &stateHeap->pKernelAllocation[slot];
1963 if (isCloneEntry)
1964 {
1965 if (!stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.isHeadKernel)
1966 {
1967 // ERROR thought kernel with allocation ID, headKernelAllocationID, was a head kernel, but it's not
1968 hr = CM_FAILURE;
1969 goto finish;
1970 }
1971
1972 kernelAllocation->cloneKernelParams.dwOffsetForAllocID = dwOffset;
1973 kernelAllocation->dwOffset = stateHeap->pKernelAllocation[headKernelAllocationID].dwOffset;
1974 kernelAllocation->cloneKernelParams.isClone = true;
1975 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = headKernelAllocationID;
1976 kernelAllocation->cloneKernelParams.cloneKernelID = stateHeap->pKernelAllocation[headKernelAllocationID].iKUID;
1977
1978 stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount = stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount + 1;
1979
1980 // update head kernel's count after updating the clone entry's count so that clone will be selected for deletion first
1981 stateHeap->pKernelAllocation[headKernelAllocationID].dwCount = stateHeap->dwAccessCounter++;
1982
1983 }
1984 else
1985 {
1986 kernelAllocation->dwOffset = dwOffset;
1987
1988 if (isHeadKernel)
1989 {
1990 kernelAllocation->cloneKernelParams.isHeadKernel = true;
1991 if (isCloneAsHead)
1992 {
1993 kernelAllocation->cloneKernelParams.cloneKernelID = kernelParam->clonedKernelParam.kernelID;
1994 }
1995 }
1996 }
1997
1998 if (lastKernel > slot)
1999 {
2000 HalCm_UpdateCloneKernel(state, slot, CM_SHIFT_LEFT, 1);
2001 if (isCloneEntry && adjustHeadKernelID)
2002 {
2003 // if clone entry and already adjusted head kernel ID, then adjusted again in HalCm_UpdateCloneKernel, need to do only once
2004 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = kernelAllocation->cloneKernelParams.kernelBinaryAllocID - 1;
2005 }
2006 }
2007 }
2008 else if (state->kernelNumInGsh < state->cmDeviceParam.maxGshKernelEntries)
2009 {
2010 // no need to create a new entry since we have the same size
2011 kernelAllocation = &stateHeap->pKernelAllocation[slot];
2012
2013 if(state->cbbEnabled)
2014 {
2015 tag = state->osInterface->pfnGetGpuStatusTag(state->osInterface,
2016 state->osInterface->CurrentGpuContextOrdinal);
2017 }
2018 else
2019 {
2020 tag = stateHeap->dwNextTag;
2021 }
2022
2023 CmLoadKernel(state, stateHeap, kernelAllocation, tag, stateHeap->dwAccessCounter, parameters, kernelParam, mhwKernelParam, isCloneEntry);
2024 stateHeap->dwAccessCounter++;
2025 // no change for kernelAllocation->dwOffset
2026 kernelAllocation->iSize = neededSize;
2027 state->totalKernelSize[slot] = MOS_ALIGN_CEIL(mhwKernelParam->iSize, 64);
2028 if (isCloneEntry)
2029 {
2030 if (!stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.isHeadKernel)
2031 {
2032 // ERROR thought kernel with allocation ID, headKernelAllocationID, was a head kernel, but it's not
2033 hr = CM_FAILURE;
2034 goto finish;
2035 }
2036
2037 kernelAllocation->cloneKernelParams.dwOffsetForAllocID = kernelAllocation->dwOffset;
2038 kernelAllocation->dwOffset = stateHeap->pKernelAllocation[headKernelAllocationID].dwOffset;
2039 kernelAllocation->cloneKernelParams.isClone = true;
2040 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = headKernelAllocationID;
2041 kernelAllocation->cloneKernelParams.cloneKernelID = stateHeap->pKernelAllocation[headKernelAllocationID].iKUID;
2042
2043 stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount = stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount + 1;
2044
2045 // update head kernel's count after updating the clone entry's count so that clone will be selected for deletion first
2046 stateHeap->pKernelAllocation[headKernelAllocationID].dwCount = stateHeap->dwAccessCounter++;
2047 }
2048 else if (isHeadKernel)
2049 {
2050 kernelAllocation->cloneKernelParams.isHeadKernel = true;
2051 if (isCloneAsHead)
2052 {
2053 kernelAllocation->cloneKernelParams.cloneKernelID = kernelParam->clonedKernelParam.kernelID;
2054 }
2055 }
2056 }
2057 else
2058 {
2059 // all slots are used, but we have one free which is big enough
2060 // we may have fragmentation, but code is the same as above case
2061 kernelAllocation = &stateHeap->pKernelAllocation[slot];
2062
2063 if(state->cbbEnabled)
2064 {
2065 tag = state->osInterface->pfnGetGpuStatusTag(state->osInterface, state->osInterface->CurrentGpuContextOrdinal);
2066 }
2067 else
2068 {
2069 tag = stateHeap->dwNextTag;
2070 }
2071
2072 CmLoadKernel(state, stateHeap, kernelAllocation, tag, stateHeap->dwAccessCounter, parameters, kernelParam, mhwKernelParam, isCloneEntry);
2073 stateHeap->dwAccessCounter++;
2074 // kernelAllocation->iTotalSize is not changed, but we have smaller actual size
2075 // no change for kernelAllocation->dwOffset
2076 kernelAllocation->iSize = neededSize;
2077
2078 if (isCloneEntry)
2079 {
2080 if (!stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.isHeadKernel)
2081 {
2082 // ERROR thought kernel with allocation ID, headKernelAllocationID, was a head kernel, but it's not
2083 hr = CM_FAILURE;
2084 goto finish;
2085 }
2086
2087 kernelAllocation->cloneKernelParams.dwOffsetForAllocID = kernelAllocation->dwOffset;
2088 kernelAllocation->dwOffset = stateHeap->pKernelAllocation[headKernelAllocationID].dwOffset;
2089 kernelAllocation->cloneKernelParams.isClone = true;
2090 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = headKernelAllocationID;
2091 kernelAllocation->cloneKernelParams.cloneKernelID = stateHeap->pKernelAllocation[headKernelAllocationID].iKUID;
2092
2093 stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount = stateHeap->pKernelAllocation[headKernelAllocationID].cloneKernelParams.referenceCount + 1;
2094
2095 // update head kernel's count after updating the clone entry's count so that clone will be selected for deletion first
2096 stateHeap->pKernelAllocation[headKernelAllocationID].dwCount = stateHeap->dwAccessCounter++;
2097 }
2098 else if (isHeadKernel)
2099 {
2100 kernelAllocation->cloneKernelParams.isHeadKernel = true;
2101 if (isCloneAsHead)
2102 {
2103 kernelAllocation->cloneKernelParams.cloneKernelID = kernelParam->clonedKernelParam.kernelID;
2104 }
2105 }
2106 }
2107
2108 finish:
2109 return hr;
2110 }
2111
2112 /*----------------------------------------------------------------------------
2113 | Name : HalCm_UnLoadKernel ( Replace RenderHal_UnloadKernel)
2114 \---------------------------------------------------------------------------*/
HalCm_UnloadKernel(PCM_HAL_STATE state,PRENDERHAL_KRN_ALLOCATION kernelAllocation)2115 int32_t HalCm_UnloadKernel(
2116 PCM_HAL_STATE state,
2117 PRENDERHAL_KRN_ALLOCATION kernelAllocation)
2118 {
2119 PRENDERHAL_INTERFACE renderHal = state->renderHal;
2120 PRENDERHAL_STATE_HEAP stateHeap;
2121 int32_t hr;
2122
2123 //---------------------------------------
2124 CM_CHK_NULL_GOTOFINISH_CMERROR(renderHal);
2125 CM_CHK_NULL_GOTOFINISH_CMERROR(renderHal->pStateHeap);
2126 CM_CHK_NULL_GOTOFINISH_CMERROR(kernelAllocation);
2127 //---------------------------------------
2128
2129 hr = CM_FAILURE;
2130 stateHeap = renderHal->pStateHeap;
2131
2132 if (kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE)
2133 {
2134 goto finish;
2135 }
2136
2137 CM_CHK_CMSTATUS_GOTOFINISH(state->pfnSyncKernel(state, kernelAllocation->dwSync));
2138
2139 // Unload kernel
2140 if (kernelAllocation->pMhwKernelParam)
2141 {
2142 kernelAllocation->pMhwKernelParam->bLoaded = 0;
2143 }
2144
2145 if (kernelAllocation->cloneKernelParams.isClone)
2146 {
2147 if (stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.isHeadKernel)
2148 {
2149 if ((stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.referenceCount) <= 0)
2150 {
2151 // ERROR
2152 hr = CM_FAILURE;
2153 goto finish;
2154 }
2155 }
2156 else
2157 {
2158 // ERROR
2159 hr = CM_FAILURE;
2160 goto finish;
2161 }
2162
2163 stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.referenceCount =
2164 stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID].cloneKernelParams.referenceCount - 1;
2165
2166 // restore the dwOffset for this allocationID
2167 kernelAllocation->dwOffset = kernelAllocation->cloneKernelParams.dwOffsetForAllocID;
2168 }
2169 else if (kernelAllocation->cloneKernelParams.isHeadKernel && kernelAllocation->cloneKernelParams.referenceCount != 0)
2170 {
2171 // ERROR, cloned kernel entries should have been selected for deletion before head kernel entry
2172 hr = CM_FAILURE;
2173 goto finish;
2174 }
2175
2176 // Release kernel entry (Offset/size may be used for reallocation)
2177 kernelAllocation->iKID = -1;
2178 kernelAllocation->iKUID = -1;
2179 kernelAllocation->iKCID = -1;
2180 kernelAllocation->dwSync = 0;
2181 FrameTrackerTokenFlat_Clear(&kernelAllocation->trackerToken);
2182 kernelAllocation->dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
2183 kernelAllocation->dwCount = 0;
2184 kernelAllocation->pMhwKernelParam = nullptr;
2185 kernelAllocation->cloneKernelParams.cloneKernelID = -1;
2186 kernelAllocation->cloneKernelParams.isClone = false;
2187 kernelAllocation->cloneKernelParams.isHeadKernel = false;
2188 kernelAllocation->cloneKernelParams.kernelBinaryAllocID = -1;
2189 kernelAllocation->cloneKernelParams.referenceCount = 0;
2190
2191 hr = CM_SUCCESS;
2192
2193 finish:
2194 return hr;
2195 }
2196
2197 /*----------------------------------------------------------------------------
2198 | Name : HalCmw_TouchKernel ( Replace RenderHal_TouchKernel)
2199 \---------------------------------------------------------------------------*/
HalCm_TouchKernel(PCM_HAL_STATE state,int32_t kernelAllocationID)2200 int32_t HalCm_TouchKernel(
2201 PCM_HAL_STATE state,
2202 int32_t kernelAllocationID)
2203 {
2204 int32_t hr = CM_SUCCESS;
2205 PRENDERHAL_STATE_HEAP stateHeap;
2206 PRENDERHAL_KRN_ALLOCATION kernelAllocation;
2207 PRENDERHAL_KRN_ALLOCATION headKernelAllocation;
2208 uint32_t tag;
2209
2210 PRENDERHAL_INTERFACE renderHal = state->renderHal;
2211 PMOS_INTERFACE osInterface = state->osInterface;
2212
2213 stateHeap = (renderHal) ? renderHal->pStateHeap : nullptr;
2214 if (stateHeap == nullptr ||
2215 stateHeap->pKernelAllocation == nullptr ||
2216 kernelAllocationID < 0 ||
2217 kernelAllocationID >= renderHal->StateHeapSettings.iKernelCount)
2218 {
2219 hr = CM_FAILURE;
2220 goto finish;
2221 }
2222
2223 // Update usage
2224 kernelAllocation = &(stateHeap->pKernelAllocation[kernelAllocationID]);
2225 if (kernelAllocation->dwFlags != RENDERHAL_KERNEL_ALLOCATION_FREE &&
2226 kernelAllocation->dwFlags != RENDERHAL_KERNEL_ALLOCATION_LOCKED)
2227 {
2228 kernelAllocation->dwCount = stateHeap->dwAccessCounter++;
2229 }
2230
2231 // Set sync tag, for deallocation control
2232 if(state->cbbEnabled)
2233 {
2234 tag = osInterface->pfnGetGpuStatusTag(osInterface, osInterface->CurrentGpuContextOrdinal);
2235 }
2236 else
2237 {
2238 tag = stateHeap->dwNextTag;
2239 }
2240
2241 kernelAllocation->dwSync = tag;
2242
2243 // if this kernel allocation is a cloned kernel, update the orig kernel sync tag and access counter
2244 if (kernelAllocation->cloneKernelParams.isClone)
2245 {
2246 headKernelAllocation = &(stateHeap->pKernelAllocation[kernelAllocation->cloneKernelParams.kernelBinaryAllocID]);
2247
2248 if (headKernelAllocation->cloneKernelParams.referenceCount <= 0)
2249 {
2250 // ERROR
2251 hr = CM_FAILURE;
2252 goto finish;
2253 }
2254
2255 headKernelAllocation->dwSync = tag;
2256 headKernelAllocation->dwCount = stateHeap->dwAccessCounter++;
2257
2258 }
2259
2260 finish:
2261 return hr;
2262 }
2263
2264 /*
2265 ** Supporting function
2266 ** Delete oldest entry from table to free more space
2267 ** According to different cases, we will combine space with previous or next slot to get max space
2268 */
CmDeleteOldestKernel(PCM_HAL_STATE state,MHW_KERNEL_PARAM * mhwKernelParam)2269 int32_t CmDeleteOldestKernel(PCM_HAL_STATE state, MHW_KERNEL_PARAM *mhwKernelParam)
2270 {
2271 PRENDERHAL_KRN_ALLOCATION kernelAllocation;
2272 PRENDERHAL_INTERFACE renderHal = state->renderHal;;
2273 PRENDERHAL_STATE_HEAP stateHeap = renderHal->pStateHeap;
2274 UNUSED(state);
2275 UNUSED(mhwKernelParam);
2276
2277 uint32_t oldest = 0;
2278 uint32_t lastUsed;
2279 int32_t kernelAllocationID, searchIndex = -1, index = -1;
2280 int32_t alignedSize, shiftOffset;
2281 int32_t hr = CM_SUCCESS;
2282
2283 kernelAllocation = stateHeap->pKernelAllocation;
2284
2285 // Search and deallocate oldest kernel (most likely this is optimal scheduling algorithm)
2286 kernelAllocation = stateHeap->pKernelAllocation;
2287 for (kernelAllocationID = 0;
2288 kernelAllocationID < state->kernelNumInGsh;
2289 kernelAllocationID++, kernelAllocation++)
2290 {
2291 // Skip unused entries
2292 // Skip kernels flagged as locked (cannot be automatically deallocated)
2293 if (kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE ||
2294 kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_LOCKED)
2295 {
2296 continue;
2297 }
2298
2299 // Find kernel not used for the greater amount of time (measured in number of operations)
2300 // Must not unload recently allocated kernels
2301 lastUsed = (uint32_t)(stateHeap->dwAccessCounter - kernelAllocation->dwCount);
2302 if (lastUsed > oldest)
2303 {
2304 searchIndex = kernelAllocationID;
2305 oldest = lastUsed;
2306 }
2307 }
2308
2309 // Did not found any entry for deallocation, we get into a strange case!
2310 if (searchIndex < 0)
2311 {
2312 CM_ASSERTMESSAGE("Failed to delete any slot from GSH. It is impossible.");
2313 return CM_FAILURE;
2314 }
2315
2316 if (stateHeap->pKernelAllocation[searchIndex].cloneKernelParams.isHeadKernel &&
2317 (stateHeap->pKernelAllocation[searchIndex].cloneKernelParams.referenceCount != 0))
2318 {
2319 // ERROR, chose a head kernel for deletion but it still has clones pointing to it
2320 return CM_FAILURE;
2321 }
2322
2323 // Free kernel entry and states associated with the kernel (if any)
2324 kernelAllocation = &stateHeap->pKernelAllocation[searchIndex];
2325 if (HalCm_UnloadKernel(state, kernelAllocation) != CM_SUCCESS)
2326 {
2327 CM_ASSERTMESSAGE("Failed to load kernel - no space available in GSH.");
2328 return CM_FAILURE;
2329 }
2330
2331 // Let's check if we can merge searchIndex-1, searchIndex, searchIndex+1
2332 index = searchIndex;
2333 PRENDERHAL_KRN_ALLOCATION kAlloc0, kAlloc1, kAlloc2;
2334 kAlloc0 = (index == 0)? nullptr : &stateHeap->pKernelAllocation[index-1];
2335 kAlloc1 = &stateHeap->pKernelAllocation[index]; // free one
2336 kAlloc2 = (index == state->cmDeviceParam.maxGshKernelEntries - 1) ? nullptr : &stateHeap->pKernelAllocation[index + 1];
2337
2338 if (bIsFree(kAlloc0) && bIsFree(kAlloc2))
2339 {
2340 // merge 3 into 1 slot and bump index after
2341 stateHeap->pKernelAllocation[index-1].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
2342 state->totalKernelSize[index-1] += state->totalKernelSize[index] + state->totalKernelSize[index+1];
2343 stateHeap->pKernelAllocation[index-1].iSize = 0;
2344 // no change for stateHeap->pKernelAllocation[index-1].dwOffset
2345
2346 // copy the rest
2347 for (int32_t i = index + 2; i<state->kernelNumInGsh; i++)
2348 {
2349 stateHeap->pKernelAllocation[i-2] = stateHeap->pKernelAllocation[i];
2350 state->totalKernelSize[i-2] = state->totalKernelSize[i];
2351 }
2352
2353 state->kernelNumInGsh -= 2;
2354
2355 if ( index == 0 )
2356 HalCm_UpdateCloneKernel(state, 0, CM_SHIFT_RIGHT, 2);
2357 else
2358 HalCm_UpdateCloneKernel(state, index - 1, CM_SHIFT_RIGHT, 2);
2359 }
2360 else if (bIsFree(kAlloc0))
2361 {
2362 // merge before and current into 1 slot
2363 stateHeap->pKernelAllocation[index-1].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
2364 state->totalKernelSize[index-1] += state->totalKernelSize[index];
2365 stateHeap->pKernelAllocation[index-1].iSize = 0;
2366 // no change for stateHeap->pKernelAllocation[index-1].dwOffset
2367
2368 for (int32_t i = index + 1; i<state->kernelNumInGsh; i++)
2369 {
2370 stateHeap->pKernelAllocation[i-1] = stateHeap->pKernelAllocation[i];
2371 state->totalKernelSize[i-1] = state->totalKernelSize[i];
2372 }
2373
2374 state->kernelNumInGsh -= 1;
2375
2376 if ( index == 0 )
2377 HalCm_UpdateCloneKernel(state, 0, CM_SHIFT_RIGHT, 1);
2378 else
2379 HalCm_UpdateCloneKernel(state, index - 1, CM_SHIFT_RIGHT, 1);
2380
2381 }
2382 else if (bIsFree(kAlloc2))
2383 {
2384 // kAlloc0 is not free, but it can be nullptr
2385 // merge after and current into 1 slot
2386 stateHeap->pKernelAllocation[index].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
2387 state->totalKernelSize[index] += state->totalKernelSize[index+1];
2388 stateHeap->pKernelAllocation[index].iSize = 0;
2389 if (kAlloc0)
2390 {
2391 // get free space starting point
2392 alignedSize = MOS_ALIGN_CEIL(kAlloc0->iSize, 64);
2393 shiftOffset = state->totalKernelSize[index-1] - alignedSize;
2394
2395 state->totalKernelSize[index-1] -= shiftOffset;
2396 // no change for stateHeap->pKernelAllocation[index-1].iSize -= 0;
2397 state->totalKernelSize[index] += shiftOffset;
2398 stateHeap->pKernelAllocation[index].dwOffset -= shiftOffset;
2399 }
2400
2401 for (int32_t i = index + 1; i<state->kernelNumInGsh; i++)
2402 {
2403 stateHeap->pKernelAllocation[i] = stateHeap->pKernelAllocation[i+1];
2404 state->totalKernelSize[i] = state->totalKernelSize[i+1];
2405 }
2406
2407 state->kernelNumInGsh -= 1;
2408
2409 if ( index == 0 )
2410 HalCm_UpdateCloneKernel(state, 0, CM_SHIFT_RIGHT, 1);
2411 else
2412 HalCm_UpdateCloneKernel(state, index - 1, CM_SHIFT_RIGHT, 1);
2413 }
2414 else
2415 {
2416 // no merge
2417 stateHeap->pKernelAllocation[index].dwFlags = RENDERHAL_KERNEL_ALLOCATION_FREE;
2418 // no change for stateHeap->pKernelAllocation[index].iTotalSize;
2419 stateHeap->pKernelAllocation[index].iSize = 0;
2420 if(kAlloc0)
2421 {
2422 // get free space starting point
2423 alignedSize = MOS_ALIGN_CEIL(kAlloc0->iSize, 64);
2424 shiftOffset = state->totalKernelSize[index-1] - alignedSize;
2425 state->totalKernelSize[index-1] -= shiftOffset;
2426 // no change for stateHeap->pKernelAllocation[index-1].iSize -= 0;
2427 state->totalKernelSize[index] += shiftOffset;
2428 stateHeap->pKernelAllocation[index].dwOffset -= shiftOffset;
2429 }
2430 // no change for stateHeap->iNumKernels;
2431 }
2432
2433 return hr;
2434 }
2435
2436 /*----------------------------------------------------------------------------
2437 | Name : HalCm_LoadKernel ( Replace RenderHal_LoadKernel)
2438 \---------------------------------------------------------------------------*/
HalCm_LoadKernel(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,int32_t samplerCount,PRENDERHAL_KRN_ALLOCATION & kernelAllocation)2439 int32_t HalCm_LoadKernel(
2440 PCM_HAL_STATE state,
2441 PCM_HAL_KERNEL_PARAM kernelParam,
2442 int32_t samplerCount,
2443 PRENDERHAL_KRN_ALLOCATION &kernelAllocation)
2444 {
2445 PRENDERHAL_STATE_HEAP stateHeap;
2446 PRENDERHAL_INTERFACE renderHal;
2447 int32_t hr;
2448 PRENDERHAL_KERNEL_PARAM parameters;
2449 PMHW_KERNEL_PARAM mhwKernelParam;
2450
2451 int32_t kernelAllocationID; // Kernel allocation ID in GSH
2452 int32_t kernelCacheID; // Kernel cache ID
2453 int32_t kernelUniqueID; // Kernel unique ID
2454 void *kernelPtr;
2455 int32_t kernelSize;
2456 int32_t searchIndex;
2457 int32_t freeSlot;
2458 bool isClonedKernel;
2459 bool hasClones;
2460
2461 hr = CM_SUCCESS;
2462 renderHal = state->renderHal;
2463 stateHeap = (renderHal) ? renderHal->pStateHeap : nullptr;
2464 kernelAllocationID = RENDERHAL_KERNEL_LOAD_FAIL;
2465 mhwKernelParam = &(state->kernelParamsMhw);
2466 parameters = &(state->kernelParamsRenderHal.Params);
2467
2468 // Validate parameters
2469 if (stateHeap == nullptr ||
2470 stateHeap->bIshLocked == false ||
2471 stateHeap->pKernelAllocation == nullptr ||
2472 kernelParam->kernelBinarySize == 0 ||
2473 state->kernelNumInGsh > state->cmDeviceParam.maxGshKernelEntries)
2474 {
2475 CM_ASSERTMESSAGE("Failed to load kernel - invalid parameters.");
2476 return CM_FAILURE;
2477 }
2478
2479 isClonedKernel = kernelParam->clonedKernelParam.isClonedKernel;
2480 hasClones = kernelParam->clonedKernelParam.hasClones;
2481
2482 parameters->Sampler_Count = samplerCount;
2483 mhwKernelParam->iKUID = static_cast<int>( (kernelParam->kernelId >> 32) );
2484 mhwKernelParam->iKCID = -1;
2485 mhwKernelParam->pBinary = kernelParam->kernelBinary;
2486 mhwKernelParam->iSize = kernelParam->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
2487
2488 // Kernel parameters
2489 kernelPtr = mhwKernelParam->pBinary;
2490 kernelSize = mhwKernelParam->iSize;
2491 kernelUniqueID = mhwKernelParam->iKUID;
2492 kernelCacheID = mhwKernelParam->iKCID;
2493
2494 // Check if kernel is already loaded; Search free allocation index
2495 searchIndex = -1;
2496 kernelAllocation = stateHeap->pKernelAllocation;
2497 for (kernelAllocationID = 0;
2498 kernelAllocationID < state->kernelNumInGsh;
2499 kernelAllocationID++, kernelAllocation++)
2500 {
2501 if (kernelAllocation->iKUID == kernelUniqueID &&
2502 kernelAllocation->iKCID == kernelCacheID)
2503 {
2504 // found match and Update kernel usage
2505 hr = HalCm_TouchKernel(state, kernelAllocationID);
2506 if (hr == CM_FAILURE)
2507 {
2508 goto finish;
2509 }
2510 // Increment reference counter
2511 mhwKernelParam->bLoaded = 1;
2512 // Record kernel allocation
2513 kernelAllocation = &stateHeap->pKernelAllocation[kernelAllocationID];
2514
2515 goto finish;
2516 }
2517 }
2518
2519 if (isClonedKernel || hasClones)
2520 {
2521 hr = HalCm_InsertCloneKernel(state, kernelParam, kernelAllocation);
2522 goto finish;
2523 }
2524
2525 // here is the algorithm
2526 // 1) search for free slot which is big enough to load current kerenel
2527 // 2) if found slot, then add current kerenel
2528 // 3) if we cannot find slot, we need to delete some entry (delete oldest first), after delete oldest entry
2529 // we will loop over to step 1 until we get enough space.
2530 // The algorithm won't fail except we load 1 kernel which is larger than 2MB
2531 do
2532 {
2533 freeSlot = CmSearchFreeSlotSize(state, mhwKernelParam, false);
2534 if (freeSlot >= 0)
2535 {
2536 // found free slot which is big enough to hold kernel
2537 hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, parameters, kernelParam, mhwKernelParam, CM_NO_CLONE, -1);
2538 // update GSH states stateHeap->numKernels inside add function
2539 break;
2540 }
2541 else
2542 {
2543 if (CmDeleteOldestKernel(state, mhwKernelParam) != CM_SUCCESS)
2544 {
2545 return CM_FAILURE;
2546 }
2547 }
2548 } while(1);
2549
2550 mhwKernelParam->bLoaded = 1; // Increment reference counter
2551 kernelAllocation = &stateHeap->pKernelAllocation[freeSlot]; // Record kernel allocation
2552
2553 finish:
2554
2555 return hr;
2556 }
2557
2558 //*-----------------------------------------------------------------------------
2559 //| Purpose: Loads cloned kernel entries and kernels with clones into free slot
2560 //| Return: Result of the operation
2561 //*-----------------------------------------------------------------------------
HalCm_InsertCloneKernel(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PRENDERHAL_KRN_ALLOCATION & kernelAllocation)2562 int32_t HalCm_InsertCloneKernel(
2563 PCM_HAL_STATE state,
2564 PCM_HAL_KERNEL_PARAM kernelParam,
2565 PRENDERHAL_KRN_ALLOCATION &kernelAllocation)
2566 {
2567 int32_t hr = CM_SUCCESS;
2568 int32_t kernelAllocationID; // Kernel allocation ID in GSH
2569 uint32_t tag;
2570 PMOS_INTERFACE osInterface = state->osInterface;
2571 PMHW_KERNEL_PARAM mhwKernelParam = &(state->kernelParamsMhw);
2572 int32_t freeSlot = -1;
2573 PRENDERHAL_STATE_HEAP stateHeap = state->renderHal->pStateHeap;
2574
2575 kernelAllocation = state->renderHal->pStateHeap->pKernelAllocation;
2576
2577 for (kernelAllocationID = 0; kernelAllocationID < state->kernelNumInGsh;
2578 kernelAllocationID++, kernelAllocation++)
2579 {
2580 if (kernelAllocation->cloneKernelParams.isHeadKernel)
2581 {
2582 if ((kernelAllocation->iKUID == kernelParam->clonedKernelParam.kernelID) || // original kernel that cloned from is already loaded as head
2583 (kernelAllocation->cloneKernelParams.cloneKernelID == kernelParam->clonedKernelParam.kernelID) || // another clone from same original kernel is serving as the head
2584 (kernelAllocation->cloneKernelParams.cloneKernelID == static_cast<int>(kernelParam->kernelId >> 32))) // clone is serving as the head and this is the original kernel
2585 {
2586 // found match, insert 64B dummy entry and set piKAID
2587 do
2588 {
2589 // Before getting a free slot, update head kernel sync tag and count so head will not be selected for deletion
2590 // then update head kernel count after inserting clone
2591 // so that clone will be selected first for deletion (this is done in CmAddCurrentKernelToFreeSlot)
2592
2593 // update head kernel sync tag
2594 if(state->cbbEnabled)
2595 {
2596 tag = osInterface->pfnGetGpuStatusTag(osInterface, osInterface->CurrentGpuContextOrdinal);
2597 }
2598 else
2599 {
2600 tag = state->renderHal->pStateHeap->dwNextTag;
2601 }
2602 kernelAllocation->dwSync = tag;
2603
2604 // update the head kernel count so it will not be selected for deletion
2605 kernelAllocation->dwCount = state->renderHal->pStateHeap->dwAccessCounter++;
2606
2607 freeSlot = CmSearchFreeSlotSize(state, mhwKernelParam, true);
2608 if (freeSlot >= 0)
2609 {
2610 // found free slot
2611 hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, &(state->kernelParamsRenderHal.Params),
2612 kernelParam, &(state->kernelParamsMhw), CM_CLONE_ENTRY, kernelAllocationID);
2613
2614 goto finish;
2615
2616 }
2617 else
2618 {
2619 if (CmDeleteOldestKernel(state, mhwKernelParam) != CM_SUCCESS)
2620 {
2621 hr = CM_FAILURE;
2622 goto finish;
2623 }
2624 }
2625 } while (1);
2626 }
2627 }
2628 }
2629
2630 // didn't find a match, insert this kernel as the head kernel
2631 do
2632 {
2633 freeSlot = CmSearchFreeSlotSize(state, mhwKernelParam, false);
2634 if (freeSlot >= 0)
2635 {
2636 if (kernelParam->clonedKernelParam.isClonedKernel)
2637 {
2638 hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, &(state->kernelParamsRenderHal.Params),
2639 kernelParam, &(state->kernelParamsMhw), CM_CLONE_AS_HEAD_KERNEL, -1);
2640 }
2641 else
2642 {
2643 hr = CmAddCurrentKernelToFreeSlot(state, freeSlot, &(state->kernelParamsRenderHal.Params),
2644 kernelParam, &(state->kernelParamsMhw), CM_HEAD_KERNEL, -1);
2645 }
2646 break;
2647 }
2648 else
2649 {
2650 if (CmDeleteOldestKernel(state, mhwKernelParam) != CM_SUCCESS)
2651 {
2652 hr = CM_FAILURE;
2653 goto finish;
2654 }
2655 }
2656 } while (1);
2657
2658 finish:
2659
2660 if (hr == CM_SUCCESS)
2661 {
2662 mhwKernelParam->bLoaded = 1;
2663 kernelAllocation = &stateHeap->pKernelAllocation[freeSlot];
2664 }
2665
2666 return hr;
2667 }
2668
2669 //!
2670 //! \brief Get offset to sampler state
2671 //! \details Get offset to sampler state in General State Heap,
2672 //! (Cm customized version of the RenderHal function which calculates
2673 //! the sampler offset by MDF owned parameters).
2674 //! \param PCM_HAL_STATE state
2675 //! [in] Pointer to CM_HAL_STATE structure
2676 //! \param PRENDERHAL_INTERFACE renderHal
2677 //! [in] Pointer to RenderHal Interface
2678 //! \param int mediaID
2679 //! [in] Media ID associated with sampler
2680 //! \param int samplerOffset
2681 //! [in] sampler offset from the base of current kernel's sampler heap
2682 //! \param int samplerBTI
2683 //! [in] sampler BTI
2684 //! \param unsigned long *pdwSamplerOffset
2685 //! [out] optional; offset of sampler state from GSH base
2686 //! \return MOS_STATUS
2687 //!
HalCm_GetSamplerOffset(PCM_HAL_STATE state,PRENDERHAL_INTERFACE renderHal,int mediaID,unsigned int samplerOffset,unsigned int samplerBTI,PMHW_SAMPLER_STATE_PARAM samplerParam,uint32_t * pdwSamplerOffset)2688 MOS_STATUS HalCm_GetSamplerOffset(
2689 PCM_HAL_STATE state,
2690 PRENDERHAL_INTERFACE renderHal,
2691 int mediaID,
2692 unsigned int samplerOffset,
2693 unsigned int samplerBTI,
2694 PMHW_SAMPLER_STATE_PARAM samplerParam,
2695 uint32_t *pdwSamplerOffset)
2696 {
2697 PRENDERHAL_MEDIA_STATE_LEGACY pCurMediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)renderHal->pStateHeap->pCurMediaState;
2698 unsigned int tmpSamplerOffset = pCurMediaStateLegacy->pDynamicState->Sampler3D.dwOffset +
2699 state->taskParam->samplerOffsetsByKernel[mediaID] +
2700 samplerOffset;
2701
2702 if (pdwSamplerOffset != nullptr)
2703 {
2704 *pdwSamplerOffset = tmpSamplerOffset;
2705 }
2706
2707 if (samplerParam->SamplerType == MHW_SAMPLER_TYPE_3D)
2708 {
2709 samplerParam->Unorm.IndirectStateOffset = MOS_ALIGN_CEIL( pCurMediaStateLegacy->pDynamicState->Sampler3D.dwOffset +
2710 state->taskParam->samplerIndirectOffsetsByKernel[mediaID] +
2711 samplerBTI * renderHal->pHwSizes->dwSizeSamplerIndirectState,
2712 1 << MHW_SAMPLER_INDIRECT_SHIFT);
2713 }
2714 return MOS_STATUS_SUCCESS;
2715 }
2716
2717 //!
2718 //! \brief Setup Interface Descriptor
2719 //! \details Set interface descriptor, (overriding RenderHal function),
2720 //! (Cm customized version of the RenderHal function which set
2721 //! dwSamplerOffset and dwSamplerCount by MDF owned parameters).
2722 //! \param PCM_HAL_STATE state
2723 //! [in] Pointer to CM_HAL_STATE structure
2724 //! \param PRENDERHAL_INTERFACE renderHal
2725 //! [in] Pointer to HW interface
2726 //! \param PRENDERHAL_MEDIA_STATE mediaState
2727 //! [in] Pointer to media state
2728 //! \param PRENDERHAL_KRN_ALLOCATION kernelAllocation
2729 //! [in] Pointer to kernel allocation
2730 //! \param PRENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams
2731 //! [in] Pointer to interface descriptor parameters
2732 //! \param PMHW_GPGPU_WALKER_PARAMS pGpGpuWalkerParams
2733 //! [in] Pointer to gpgpu walker parameters
2734 //! \return MOS_STATUS
2735 //!
HalCm_SetupInterfaceDescriptor(PCM_HAL_STATE state,PRENDERHAL_INTERFACE renderHal,PRENDERHAL_MEDIA_STATE mediaState,PRENDERHAL_KRN_ALLOCATION kernelAllocation,PRENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams)2736 MOS_STATUS HalCm_SetupInterfaceDescriptor(
2737 PCM_HAL_STATE state,
2738 PRENDERHAL_INTERFACE renderHal,
2739 PRENDERHAL_MEDIA_STATE mediaState,
2740 PRENDERHAL_KRN_ALLOCATION kernelAllocation,
2741 PRENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams)
2742 {
2743 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2744 MHW_ID_ENTRY_PARAMS params;
2745 PRENDERHAL_STATE_HEAP stateHeap;
2746 PRENDERHAL_DYNAMIC_STATE dynamicState;
2747 unsigned long mediaStateOffset;
2748
2749 PRENDERHAL_MEDIA_STATE_LEGACY mediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)mediaState;
2750 //-----------------------------------------
2751 MHW_RENDERHAL_CHK_NULL(renderHal);
2752 MHW_RENDERHAL_CHK_NULL(renderHal->pMhwStateHeap);
2753 MHW_RENDERHAL_CHK_NULL(mediaStateLegacy);
2754 MHW_RENDERHAL_CHK_NULL(mediaStateLegacy->pDynamicState);
2755 MHW_RENDERHAL_CHK_NULL(interfaceDescriptorParams);
2756 //-----------------------------------------
2757
2758 // Get states, params
2759 stateHeap = renderHal->pStateHeap;
2760 dynamicState = mediaStateLegacy->pDynamicState;
2761 mediaStateOffset = dynamicState->memoryBlock.GetOffset();
2762
2763 params.dwMediaIdOffset = mediaStateOffset + dynamicState->MediaID.dwOffset;
2764 params.iMediaId = interfaceDescriptorParams->iMediaID;
2765 params.dwKernelOffset = kernelAllocation->dwOffset;
2766 params.dwSamplerOffset = mediaStateOffset + dynamicState->Sampler3D.dwOffset + state->taskParam->samplerOffsetsByKernel[params.iMediaId];
2767 params.dwSamplerCount = ( state->taskParam->samplerCountsByKernel[params.iMediaId] + 3 ) / 4;
2768 params.dwSamplerCount = (params.dwSamplerCount > 4) ? 4 : params.dwSamplerCount;
2769 params.dwBindingTableOffset = interfaceDescriptorParams->iBindingTableID * stateHeap->iBindingTableSize;
2770 params.iCurbeOffset = interfaceDescriptorParams->iCurbeOffset;
2771 params.iCurbeLength = interfaceDescriptorParams->iCurbeLength;
2772
2773 params.bBarrierEnable = interfaceDescriptorParams->blBarrierEnable;
2774 params.bGlobalBarrierEnable = interfaceDescriptorParams->blGlobalBarrierEnable; //It's only applied for BDW+
2775 params.dwNumberofThreadsInGPGPUGroup = interfaceDescriptorParams->iNumberThreadsInGroup;
2776 params.dwSharedLocalMemorySize = renderHal->pfnEncodeSLMSize(renderHal, interfaceDescriptorParams->iSLMSize);
2777 params.iCrsThdConDataRdLn = interfaceDescriptorParams->iCrsThrdConstDataLn;
2778 params.memoryBlock = &dynamicState->memoryBlock;
2779
2780 MHW_RENDERHAL_CHK_STATUS(renderHal->pMhwStateHeap->AddInterfaceDescriptorData(¶ms));
2781 dynamicState->MediaID.iCurrent++;
2782
2783 finish:
2784 return eStatus;
2785 }
2786
2787 /*----------------------------------------------------------------------------
2788 | Name : HalCm_AllocateMediaID replace old RenderHal_AllocateMediaID
2789 | Don't need touch kernel since we handle this a loadKernel time
2790 |
2791 | Purpose : Allocates an setup Interface Descriptor for Media Pipeline
2792 |
2793 | Arguments : [in] renderHal - Pointer to RenderHal interface structure
2794 | [in] kernelParam - Pointer to Kernel parameters
2795 | [in] pKernelAllocationID - Pointer to Kernel allocation
2796 | [in] bindingTableID - Binding table ID
2797 | [in] curbeOffset - Curbe offset (from CURBE base)
2798 |
2799 | Returns : Media Interface descriptor ID
2800 | -1 if invalid parameters
2801 | no Interface Descriptor entry available in GSH
2802 |
2803 | Comments : Kernel must be preloaded
2804 | Curbe must be allocated using pfnAllocateCurbe
2805 | Binding Table must be allocated using pfnAllocateBindingTable
2806 \---------------------------------------------------------------------------*/
2807 //!
2808 //! \brief
2809 //! \details
2810 //! \param PRENDERHAL_INTERFACE renderHal
2811 //| \param PCM_HAL_KERNEL_PARAM kernelParam
2812 //| \param PRENDERHAL_KRN_ALLOCATION kernelAllocation
2813 //| \param int32_t bindingTableID
2814 //| \param int32_t curbeOffset
2815 //! \return int32_t
2816 //!
HalCm_AllocateMediaID(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PRENDERHAL_KRN_ALLOCATION kernelAllocation,int32_t bindingTableID,int32_t curbeOffset)2817 int32_t HalCm_AllocateMediaID(
2818 PCM_HAL_STATE state,
2819 PCM_HAL_KERNEL_PARAM kernelParam,
2820 PRENDERHAL_KRN_ALLOCATION kernelAllocation,
2821 int32_t bindingTableID,
2822 int32_t curbeOffset)
2823 {
2824 PRENDERHAL_INTERFACE renderHal = state->renderHal;
2825 PRENDERHAL_MEDIA_STATE_LEGACY curMediaState;
2826 int32_t curbeSize, iCurbeCurrent;
2827 int32_t interfaceDescriptor;
2828 RENDERHAL_INTERFACE_DESCRIPTOR_PARAMS interfaceDescriptorParams;
2829
2830 interfaceDescriptor = -1;
2831
2832 // Obtain pointer and validate current media state
2833 curMediaState = (PRENDERHAL_MEDIA_STATE_LEGACY)renderHal->pStateHeap->pCurMediaState;
2834
2835 if (state->dshEnabled)
2836 {
2837 if (curMediaState == nullptr || (state->dshEnabled && (curMediaState->pDynamicState == nullptr)))
2838 {
2839 CM_ASSERTMESSAGE("Invalid Media State.");
2840 goto finish;
2841 }
2842 }
2843 else
2844 {
2845 if (curMediaState == nullptr)
2846 {
2847 CM_ASSERTMESSAGE("Invalid Media State.");
2848 goto finish;
2849 }
2850 }
2851
2852 // Validate kernel allocation (kernel must be pre-loaded into GSH)
2853 if (!kernelAllocation ||
2854 kernelAllocation->dwFlags == RENDERHAL_KERNEL_ALLOCATION_FREE ||
2855 kernelAllocation->iSize == 0)
2856 {
2857 CM_ASSERTMESSAGE("Error: Invalid Kernel Allocation.");
2858 goto finish;
2859 }
2860
2861 // Check Curbe allocation (CURBE_Lenght is in 256-bit count -> convert to bytes)
2862 curbeSize = kernelParam->curbeSizePerThread;
2863
2864 if (state->dshEnabled)
2865 {
2866 iCurbeCurrent = curMediaState->pDynamicState->Curbe.iCurrent;
2867 }
2868 else
2869 {
2870 iCurbeCurrent = curMediaState->iCurbeOffset;
2871 }
2872
2873 if (curbeSize <= 0)
2874 {
2875 // Curbe is not used by the kernel
2876 curbeSize = curbeOffset = 0;
2877 }
2878 // Validate Curbe Offset (curbe must be pre-allocated)
2879 else if ( curbeOffset < 0 || // Not allocated
2880 (curbeOffset & 0x1F) != 0 || // Invalid alignment
2881 (curbeOffset + curbeSize) > iCurbeCurrent) // Invalid size
2882 {
2883 CM_ASSERTMESSAGE("Error: Invalid Curbe Allocation.");
2884 goto finish;
2885 }
2886
2887 // Try to reuse interface descriptor (for 2nd level buffer optimizations)
2888 // Check if ID already in use by another kernel - must use a different ID
2889 interfaceDescriptor = renderHal->pfnGetMediaID(renderHal, curMediaState, kernelAllocation);
2890 if (interfaceDescriptor < 0)
2891 {
2892 CM_ASSERTMESSAGE("Error: No Interface Descriptor available.");
2893 goto finish;
2894 }
2895
2896 interfaceDescriptorParams.iMediaID = interfaceDescriptor;
2897 interfaceDescriptorParams.iBindingTableID = bindingTableID;
2898
2899 //CURBE size and offset setting
2900 //Media w/o group: only per-thread CURBE is used, CrossThread CURBE is not used.
2901 //Media w/ group: should follow GPGPU walker setting, there is per-thread CURBE and cross-thread CURBE. But per-thread CURBE should be ZERO, and all should be cross-thread CURBE
2902 //GPGPU: both per-thread CURBE and cross-thread CURBE need be set.
2903 interfaceDescriptorParams.iCurbeOffset = curbeOffset;
2904 if ((!kernelParam->gpgpuWalkerParams.gpgpuEnabled) && (kernelParam->kernelThreadSpaceParam.groupSelect == CM_MW_GROUP_NONE) && (state->taskParam->mediaWalkerGroupSelect == CM_MW_GROUP_NONE))
2905 { //Media pipe without group
2906 interfaceDescriptorParams.iCurbeLength = kernelParam->curbeSizePerThread;
2907 interfaceDescriptorParams.iCrsThrdConstDataLn = kernelParam->crossThreadConstDataLen; //should always be 0 in this case
2908 interfaceDescriptorParams.iNumberThreadsInGroup = (kernelParam->numberThreadsInGroup > 0) ? kernelParam->numberThreadsInGroup : 1; // This field should not be set to 0 even if the barrier is disabled, since an accurate value is needed for proper pre-emption.
2909 interfaceDescriptorParams.blGlobalBarrierEnable = false;
2910 interfaceDescriptorParams.blBarrierEnable = false;
2911 interfaceDescriptorParams.iSLMSize = 0;
2912 }
2913 else if ((!kernelParam->gpgpuWalkerParams.gpgpuEnabled) && ((kernelParam->kernelThreadSpaceParam.groupSelect != CM_MW_GROUP_NONE) || (state->taskParam->mediaWalkerGroupSelect != CM_MW_GROUP_NONE)))
2914 { //Media w/ group
2915 interfaceDescriptorParams.iCurbeLength = 0; //No using per-thread CURBE
2916 interfaceDescriptorParams.iCrsThrdConstDataLn = kernelParam->curbeSizePerThread; //treat all CURBE as cross-thread CURBE
2917 interfaceDescriptorParams.iNumberThreadsInGroup = (kernelParam->numberThreadsInGroup > 0) ? kernelParam->numberThreadsInGroup : 1; // This field should not be set to 0 even if the barrier is disabled, since an accurate value is needed for proper pre-emption.
2918 interfaceDescriptorParams.blBarrierEnable = (kernelParam->barrierMode != CM_NO_BARRIER) ? true : false;
2919 interfaceDescriptorParams.blGlobalBarrierEnable = (kernelParam->barrierMode == CM_GLOBAL_BARRIER) ? true : false;
2920 interfaceDescriptorParams.iSLMSize = kernelParam->slmSize;
2921 }
2922 else
2923 { //GPGPU pipe
2924 interfaceDescriptorParams.iCurbeLength = kernelParam->curbeSizePerThread;
2925 interfaceDescriptorParams.iCrsThrdConstDataLn = kernelParam->crossThreadConstDataLen;
2926 interfaceDescriptorParams.iNumberThreadsInGroup = (kernelParam->numberThreadsInGroup > 0) ? kernelParam->numberThreadsInGroup : 1;
2927 interfaceDescriptorParams.blBarrierEnable = (kernelParam->barrierMode != CM_NO_BARRIER) ? true : false;
2928 interfaceDescriptorParams.blGlobalBarrierEnable = (kernelParam->barrierMode == CM_GLOBAL_BARRIER) ? true : false;
2929 interfaceDescriptorParams.iSLMSize = kernelParam->slmSize;
2930 }
2931 if (state->useNewSamplerHeap == true)
2932 {
2933 HalCm_SetupInterfaceDescriptor(state, renderHal, curMediaState, kernelAllocation, &interfaceDescriptorParams);
2934 }
2935 else
2936 {
2937 // Setup Media ID entry - this call could be HW dependent
2938 renderHal->pfnSetupInterfaceDescriptor(
2939 renderHal,
2940 curMediaState,
2941 kernelAllocation,
2942 &interfaceDescriptorParams);
2943 }
2944
2945 finish:
2946 return interfaceDescriptor;
2947 }
2948
isRenderTarget(PCM_HAL_STATE state,uint32_t index)2949 bool isRenderTarget(PCM_HAL_STATE state, uint32_t index)
2950 {
2951 bool readSync = false;
2952
2953 readSync = state->umdSurf2DTable[index].readSyncs[state->osInterface->CurrentGpuContextOrdinal];
2954
2955 if (readSync)
2956 return false;
2957 else
2958 return true;
2959 }
2960
HalCm_DSH_LoadKernelArray(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM * kernelArray,int32_t kernelCount,PRENDERHAL_KRN_ALLOCATION * krnAllocation)2961 int32_t HalCm_DSH_LoadKernelArray(
2962 PCM_HAL_STATE state,
2963 PCM_HAL_KERNEL_PARAM *kernelArray,
2964 int32_t kernelCount,
2965 PRENDERHAL_KRN_ALLOCATION *krnAllocation)
2966 {
2967 PRENDERHAL_INTERFACE renderHal;
2968 PCM_HAL_KERNEL_PARAM kernel;
2969 PMHW_STATE_HEAP_MEMORY_BLOCK memoryBlock; // Kernel memory block
2970 int32_t totalSize; // Total size
2971 uint32_t blockSize[CM_MAX_KERNELS_PER_TASK]; // Size of kernels to load
2972 int32_t blockCount; // Number of kernels to load
2973 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
2974 int32_t hr = CM_FAILURE;
2975
2976 renderHal = state->renderHal;
2977 state->criticalSectionDSH->Acquire();
2978 do
2979 {
2980 blockCount = 0;
2981 totalSize = 0;
2982
2983 // Obtain list of kernels already loaded, discard kernels loaded in older heaps.
2984 // Calculate total size of kernels to be loaded, and get size of largest kernel.
2985 for (int i = 0; i < kernelCount; i++)
2986 {
2987 // Find out if kernel is already allocated and loaded in ISH
2988 kernel = kernelArray[i];
2989 krnAllocation[i] = (PRENDERHAL_KRN_ALLOCATION)renderHal->pfnSearchDynamicKernel(renderHal, static_cast<int>((kernel->kernelId >> 32)), -1);
2990
2991 // Kernel is allocated - check if kernel is in current ISH
2992 if (krnAllocation[i])
2993 {
2994 // Check if kernel is loaded
2995 memoryBlock = krnAllocation[i]->pMemoryBlock;
2996
2997 if (memoryBlock)
2998 {
2999 // Kernel needs to be reloaded in current heap
3000 if (memoryBlock->pStateHeap != renderHal->pMhwStateHeap->GetISHPointer() || state->forceKernelReload) //pInstructionStateHeaps
3001 {
3002 renderHal->pMhwStateHeap->FreeDynamicBlockDyn(MHW_ISH_TYPE, memoryBlock);
3003 krnAllocation[i]->pMemoryBlock = nullptr;
3004 }
3005 else
3006 {
3007 // Increment kernel usage count, used in kernel caching architecture
3008 state->dshKernelCacheHit++;
3009 krnAllocation[i]->dwCount++;
3010
3011 // Lock kernel to avoid removal while loading other kernels
3012 krnAllocation[i]->dwFlags = RENDERHAL_KERNEL_ALLOCATION_LOCKED;
3013 }
3014 }
3015 else if (krnAllocation[i]->dwFlags == RENDERHAL_KERNEL_ALLOCATION_REMOVED)
3016 {
3017 // This is a kernel that was unloaded and now needs to be reloaded
3018 // Track how many times this "cache miss" happens to determine if the
3019 // ISH is under pressure and needs to be expanded
3020 state->dshKernelCacheMiss++;
3021 }
3022 }
3023 else
3024 {
3025 // Assign kernel allocation for this kernel
3026 krnAllocation[i] = renderHal->pfnAllocateDynamicKernel(renderHal, static_cast<int>((kernel->kernelId >> 32)), -1);
3027 CM_CHK_NULL_GOTOFINISH_MOSERROR(krnAllocation[i]);
3028 }
3029
3030 // Kernel is not loaded -> add to list of kernels to be loaded
3031 if (krnAllocation[i]->pMemoryBlock == nullptr &&
3032 krnAllocation[i]->dwFlags != RENDERHAL_KERNEL_ALLOCATION_LOADING)
3033 {
3034 // Increment amount of data that needs to be loaded in ISH (kernel already registered but unloaded)
3035 blockSize[blockCount++] = kernel->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
3036 totalSize += kernel->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
3037
3038 // Flag this kernel as loading - one single kernel instance is needed, not multiple!
3039 // If the same kernel is used multiple times, avoid multiple reservations/loads
3040 krnAllocation[i]->dwFlags = RENDERHAL_KERNEL_ALLOCATION_LOADING;
3041 }
3042 }
3043
3044 // Use Hit/Miss ratio to ignore eventual cache misses
3045 // This code prevents ISH reallocation in case of eventual cache misses
3046 while (state->dshKernelCacheHit >= HAL_CM_KERNEL_CACHE_HIT_TO_MISS_RATIO)
3047 {
3048 if (state->dshKernelCacheMiss > 0) state->dshKernelCacheMiss--;
3049 state->dshKernelCacheHit -= HAL_CM_KERNEL_CACHE_HIT_TO_MISS_RATIO;
3050 }
3051
3052 // Grow the kernel heap if too many kernels are being reloaded or there isn't enough room to load all kernels
3053 if (state->dshKernelCacheMiss > HAL_CM_KERNEL_CACHE_MISS_THRESHOLD ||
3054 renderHal->pfnRefreshDynamicKernels(renderHal, totalSize, blockSize, blockCount) != MOS_STATUS_SUCCESS)
3055 {
3056 renderHal->pfnExpandKernelStateHeap(renderHal, (uint32_t)totalSize);
3057 state->dshKernelCacheHit = 0;
3058 state->dshKernelCacheMiss = 0;
3059 continue;
3060 }
3061
3062 // blockSize/blockCount define a list of blocks that must be loaded in current ISH for the
3063 // kernels not yet present. Pre-existing kernels are marked as bStatic to avoid being unloaded here
3064 if (blockCount > 0)
3065 {
3066 // Allocate array of kernels
3067 MHW_STATE_HEAP_DYNAMIC_ALLOC_PARAMS params;
3068 params.piSizes = (int32_t*)blockSize;
3069 params.iCount = blockCount;
3070 params.dwAlignment = RENDERHAL_KERNEL_BLOCK_ALIGN;
3071 params.bHeapAffinity = true; // heap affinity - load all kernels in the same heap
3072 params.pHeapAffinity = renderHal->pMhwStateHeap->GetISHPointer(); // Select the active instruction heap
3073 params.dwScratchSpace = 0;
3074 params.bZeroAssignedMem = true;
3075 params.bStatic = true;
3076 params.bGrow = false;
3077
3078 // Try to allocate array of blocks; if it fails, we may need to clear some space or grow the heap!
3079 memoryBlock = renderHal->pMhwStateHeap->AllocateDynamicBlockDyn(MHW_ISH_TYPE, ¶ms);
3080 if (!memoryBlock)
3081 {
3082 // Reset flags
3083 for (int i = 0; i < kernelCount; i++)
3084 {
3085 if (krnAllocation[i] && krnAllocation[i]->dwFlags == RENDERHAL_KERNEL_ALLOCATION_LOADING)
3086 {
3087 krnAllocation[i]->dwFlags = RENDERHAL_KERNEL_ALLOCATION_STALE;
3088 }
3089 }
3090
3091 if (renderHal->pfnRefreshDynamicKernels(renderHal, totalSize, blockSize, blockCount) != MOS_STATUS_SUCCESS)
3092 {
3093 renderHal->pfnExpandKernelStateHeap(renderHal, (uint32_t)totalSize);
3094 }
3095 continue;
3096 }
3097
3098 // All blocks are allocated in ISH
3099 // Setup kernel allocations, load kernel binaries
3100 for (int32_t i = 0; i < kernelCount; i++)
3101 {
3102 // Load kernels in ISH
3103 if (!krnAllocation[i]->pMemoryBlock)
3104 {
3105 PCM_HAL_KERNEL_PARAM kernelParam = kernelArray[i];
3106 PRENDERHAL_KRN_ALLOCATION allocation = krnAllocation[i];
3107 if (memoryBlock)
3108 {
3109 allocation->iKID = -1;
3110 allocation->iKUID = static_cast<int>((kernelArray[i]->kernelId >> 32));
3111 allocation->iKCID = -1;
3112 FrameTrackerTokenFlat_SetProducer(&allocation->trackerToken, &renderHal->trackerProducer);
3113 FrameTrackerTokenFlat_Merge(&allocation->trackerToken,
3114 renderHal->currentTrackerIndex,
3115 renderHal->trackerProducer.GetNextTracker(renderHal->currentTrackerIndex));
3116 allocation->dwOffset = memoryBlock->dwDataOffset;
3117 allocation->iSize = kernelArray[i]->kernelBinarySize + CM_KERNEL_BINARY_PADDING_SIZE;
3118 allocation->dwCount = 0;
3119 allocation->dwFlags = RENDERHAL_KERNEL_ALLOCATION_USED;
3120 allocation->Params = state->kernelParamsRenderHal.Params;
3121 allocation->pMhwKernelParam = &state->kernelParamsMhw;
3122 allocation->pMemoryBlock = memoryBlock;
3123
3124 // Copy kernel data
3125 // Copy MovInstruction First
3126 if (allocation->pMemoryBlock &&
3127 allocation->pMemoryBlock->dwDataSize >= kernelParam->kernelBinarySize)
3128 {
3129 MOS_SecureMemcpy(allocation->pMemoryBlock->pDataPtr,
3130 kernelParam->movInsDataSize,
3131 kernelParam->movInsData,
3132 kernelParam->movInsDataSize);
3133
3134 // Copy Cm Kernel Binary
3135 MOS_SecureMemcpy(allocation->pMemoryBlock->pDataPtr + kernelParam->movInsDataSize,
3136 kernelParam->kernelBinarySize - kernelParam->movInsDataSize,
3137 kernelParam->kernelBinary,
3138 kernelParam->kernelBinarySize - kernelParam->movInsDataSize);
3139
3140 // Padding bytes dummy instructions after kernel binary to resolve page fault issue
3141 MOS_ZeroMemory(allocation->pMemoryBlock->pDataPtr + kernelParam->kernelBinarySize, CM_KERNEL_BINARY_PADDING_SIZE);
3142 }
3143
3144 // Get next memory block returned as part of the array
3145 memoryBlock = memoryBlock->pNext;
3146 }
3147 }
3148 }
3149 }
3150
3151 // Kernel load was successfull, or nothing else to load -
3152 // Quit the kernel load loop
3153 hr = CM_SUCCESS;
3154 eStatus = MOS_STATUS_SUCCESS;
3155 break;
3156
3157 } while (1);
3158
3159 finish:
3160 if (eStatus == MOS_STATUS_SUCCESS)
3161 {
3162 for (int32_t i = 0; i < kernelCount; i++)
3163 {
3164 renderHal->pfnTouchDynamicKernel(renderHal, krnAllocation[i]);
3165 }
3166 }
3167 state->criticalSectionDSH->Release();
3168 return hr;
3169 }
3170
HalCm_DSH_GetDynamicStateConfiguration(PCM_HAL_STATE state,PRENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params,uint32_t numKernels,PCM_HAL_KERNEL_PARAM * kernels,uint32_t * piCurbeOffsets)3171 MOS_STATUS HalCm_DSH_GetDynamicStateConfiguration(
3172 PCM_HAL_STATE state,
3173 PRENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params,
3174 uint32_t numKernels,
3175 PCM_HAL_KERNEL_PARAM *kernels,
3176 uint32_t *piCurbeOffsets)
3177 {
3178 PCM_HAL_KERNEL_PARAM cmKernel;
3179
3180 PRENDERHAL_INTERFACE renderHal = state->renderHal;
3181 PRENDERHAL_KRN_ALLOCATION krnAllocation;
3182
3183 MOS_ZeroMemory(params, sizeof(RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS));
3184
3185 params->iMaxMediaIDs = numKernels;
3186
3187 for (uint32_t i = 0; i < numKernels; i++)
3188 {
3189 cmKernel = kernels[i];
3190
3191 // get max curbe size
3192 int32_t curbeSize = MOS_ALIGN_CEIL(cmKernel->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
3193 int32_t curbeOffset = piCurbeOffsets[i] + curbeSize;
3194 params->iMaxCurbeOffset = MOS_MAX(params->iMaxCurbeOffset, curbeOffset);
3195 params->iMaxCurbeSize += curbeSize;
3196
3197 // get max spill size
3198 params->iMaxSpillSize = MOS_MAX(params->iMaxSpillSize, (int32_t)cmKernel->spillSize);
3199
3200 // check if kernel already used - increase Max Media ID to allow BB reuse logic
3201 krnAllocation = renderHal->pfnSearchDynamicKernel(renderHal, static_cast<int>((cmKernel->kernelId >> 32)), -1);
3202 if (krnAllocation)
3203 {
3204 params->iMaxMediaIDs = MOS_MAX(params->iMaxMediaIDs, krnAllocation->iKID + 1);
3205 }
3206 }
3207
3208 if (state->useNewSamplerHeap == true)
3209 {
3210 // Update offset to the base of first kernel and update count
3211 // for 3D sampler, update indirect state information
3212 unsigned int heapOffset = 0;
3213 unsigned int sampler3DCount = 0;
3214 MHW_SAMPLER_STATE_PARAM samplerParamMhw = {};
3215 SamplerParam samplerParam = {};
3216 samplerParamMhw.SamplerType = MHW_SAMPLER_TYPE_3D;
3217 state->cmHalInterface->GetSamplerParamInfoForSamplerType(&samplerParamMhw, samplerParam);
3218 for (unsigned int i = 0; i < numKernels; i++)
3219 {
3220 cmKernel = kernels[i];
3221 std::list<SamplerParam> *sampler_heap = cmKernel->samplerHeap;
3222 std::list<SamplerParam>::iterator iter;
3223
3224 heapOffset = MOS_ALIGN_CEIL(heapOffset, MHW_SAMPLER_STATE_ALIGN);
3225 state->taskParam->samplerOffsetsByKernel[i] = heapOffset;
3226 state->taskParam->samplerCountsByKernel[i] = sampler_heap->size();
3227
3228 if (sampler_heap->size() > 0)
3229 {
3230 heapOffset = heapOffset + sampler_heap->back().heapOffset + sampler_heap->back().size;
3231
3232 // 3D sampler needs indirect sampler heap, so calculates the required size
3233 // and offset for indirect sampler heap.
3234 unsigned int max3DCount = 0;
3235 for (iter = sampler_heap->begin(); iter != sampler_heap->end(); ++iter)
3236 {
3237 if (iter->elementType == samplerParam.elementType)
3238 {
3239 if (iter->userDefinedBti == true)
3240 {
3241 max3DCount = iter->bti + 1;
3242 }
3243 else
3244 {
3245 max3DCount += 1;
3246 }
3247 }
3248 }
3249 heapOffset = MOS_ALIGN_CEIL(heapOffset, MHW_SAMPLER_STATE_ALIGN);
3250 state->taskParam->samplerIndirectOffsetsByKernel[i] = heapOffset;
3251 heapOffset += max3DCount * state->renderHal->pHwSizes->dwSizeSamplerIndirectState;
3252 sampler3DCount += max3DCount;
3253 }
3254 }
3255
3256 // Temporary solution for DSH sampler heap assginment:
3257 // Adjust sampler space for DSH, because the DSH use sampler count to
3258 // allocate the space. However the mechanism is not correct. The sampler
3259 // heap size is actually calculated by the maximum offset of the largest
3260 // sampler type.
3261 // So the offset of largest element plus the size of all of the largest
3262 // element samplers should be equal to the maximum size. However we cannot
3263 // do this because of the DSH's mechanism.
3264 // To resolve this, we first let DSH allocate enough 3D samplers
3265 // (because 3D samplers has indirect state), then just convert the rest of
3266 // the heap to AVS. Here we only care about the size, not the correct
3267 // number because we are going to calculate the offset by ourself.
3268 // Since DSH allocation has some alignments inside, the actually size of the
3269 // heap should be slightly larger, which should be OK.
3270
3271 samplerParamMhw.SamplerType = MHW_SAMPLER_TYPE_AVS;
3272 state->cmHalInterface->GetSamplerParamInfoForSamplerType(&samplerParamMhw, samplerParam);
3273 params->iMaxSamplerIndex3D = (sampler3DCount + numKernels - 1) / numKernels;
3274 params->iMaxSamplerIndexAVS = ((heapOffset - sampler3DCount * (state->renderHal->pHwSizes->dwSizeSamplerState + state->renderHal->pHwSizes->dwSizeSamplerIndirectState)) + samplerParam.btiMultiplier * numKernels - 1) / (samplerParam.btiMultiplier * numKernels);
3275 }
3276 else
3277 {
3278 // Get total sampler count
3279
3280 // Initialize pointers to samplers and reset sampler index table
3281 MOS_FillMemory(state->samplerIndexTable, state->cmDeviceParam.maxSamplerTableSize, CM_INVALID_INDEX);
3282
3283 params->iMaxSamplerIndex3D = CM_MAX_3D_SAMPLER_SIZE;
3284 params->iMaxSamplerIndexAVS = CM_MAX_AVS_SAMPLER_SIZE;
3285 params->iMaxSamplerIndexConv = 0;
3286 params->iMaxSamplerIndexMisc = 0;
3287 params->iMax8x8Tables = CM_MAX_AVS_SAMPLER_SIZE;
3288 }
3289 return MOS_STATUS_SUCCESS;
3290 }
3291
HalCm_DSH_UnregisterKernel(PCM_HAL_STATE state,uint64_t kernelId)3292 MOS_STATUS HalCm_DSH_UnregisterKernel(
3293 PCM_HAL_STATE state,
3294 uint64_t kernelId)
3295 {
3296 PRENDERHAL_INTERFACE renderHal = state->renderHal;
3297 PRENDERHAL_KRN_ALLOCATION krnAllocation = renderHal->pfnSearchDynamicKernel(renderHal, static_cast<int>((kernelId >> 32)), -1);
3298 if (krnAllocation)
3299 {
3300 state->criticalSectionDSH->Acquire();
3301 renderHal->pfnUnregisterKernel(renderHal, krnAllocation);
3302 state->criticalSectionDSH->Release();
3303 }
3304 return MOS_STATUS_SUCCESS;
3305 }
3306
3307 //*-----------------------------------------------------------------------------
3308 //| Purpose: Setup Sampler State
3309 //| Returns: Result of the operation
3310 //*-----------------------------------------------------------------------------
HalCm_SetupSamplerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t mediaID,uint32_t threadIndex,uint8_t * buffer)3311 MOS_STATUS HalCm_SetupSamplerState(
3312 PCM_HAL_STATE state,
3313 PCM_HAL_KERNEL_PARAM kernelParam,
3314 PCM_HAL_KERNEL_ARG_PARAM argParam,
3315 PCM_HAL_INDEX_PARAM indexParam,
3316 int32_t mediaID,
3317 uint32_t threadIndex,
3318 uint8_t *buffer)
3319 {
3320 MOS_STATUS eStatus;
3321 PRENDERHAL_INTERFACE renderHal;
3322 PMHW_SAMPLER_STATE_PARAM samplerParam;
3323 uint8_t *src;
3324 uint8_t *dst;
3325 uint32_t index;
3326 uint32_t samplerIndex = 0;
3327 void *sampler = nullptr;
3328 uint32_t samplerOffset = 0;
3329
3330 eStatus = MOS_STATUS_SUCCESS;
3331
3332 CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
3333
3334 renderHal = state->renderHal;
3335
3336 if (indexParam->samplerIndexCount >= (uint32_t)renderHal->StateHeapSettings.iSamplers)
3337 {
3338 eStatus = MOS_STATUS_INVALID_PARAMETER;
3339 CM_ASSERTMESSAGE(
3340 "Exceeded Max samplers '%d'",
3341 indexParam->samplerIndexCount);
3342 goto finish;
3343 }
3344
3345 // Get the Index to sampler array from the kernel data
3346 //----------------------------------
3347 CM_ASSERT(argParam->unitSize == sizeof(index));
3348 //----------------------------------
3349
3350 src = argParam->firstValue + (threadIndex * argParam->unitSize);
3351 index = *((uint32_t*)src);
3352
3353 // check to see if the data present for the sampler in the array
3354 if (index >= state->cmDeviceParam.maxSamplerTableSize ||
3355 !state->samplerTable[index].bInUse)
3356 {
3357 eStatus = MOS_STATUS_INVALID_PARAMETER;
3358 CM_ASSERTMESSAGE(
3359 "Invalid Sampler array index '%d'", index);
3360 goto finish;
3361 }
3362 // Setup samplers
3363 samplerParam = &state->samplerTable[index];
3364
3365 if (state->useNewSamplerHeap == true)
3366 {
3367 std::list<SamplerParam>::iterator iter;
3368 for (iter = kernelParam->samplerHeap->begin(); iter != kernelParam->samplerHeap->end(); ++iter)
3369 {
3370 if ((iter->samplerTableIndex == index)&&(iter->regularBti == true))
3371 {
3372 break;
3373 }
3374 }
3375 if (iter != kernelParam->samplerHeap->end())
3376 {
3377 samplerIndex = iter->bti;
3378 }
3379 else
3380 {
3381 // There must be incorrect internal logic
3382 CM_ASSERTMESSAGE( "BTI calculation error in cm_hal\n");
3383 return MOS_STATUS_UNKNOWN;
3384 }
3385 HalCm_GetSamplerOffset(state, renderHal, mediaID, iter->heapOffset, iter->bti, samplerParam, &samplerOffset);
3386 }
3387 else
3388 {
3389 // Check to see if sampler is already assigned
3390 samplerIndex = state->samplerIndexTable[index];
3391 if ((int)samplerIndex == CM_INVALID_INDEX)
3392 {
3393
3394 switch (state->samplerTable[index].ElementType)
3395 {
3396
3397 case MHW_Sampler2Elements:
3398 {
3399 unsigned int index = 0;
3400 index = state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements];
3401 while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3402 {
3403 index++;
3404 }
3405 samplerIndex = index;
3406 state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements] = (index + 1);
3407 break;
3408 }
3409 case MHW_Sampler4Elements:
3410 {
3411 unsigned int index = 0;
3412 index = state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements];
3413 while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3414 {
3415 index++;
3416 }
3417 samplerIndex = index;
3418 state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements] = (index + 1);
3419 break;
3420 }
3421 case MHW_Sampler8Elements:
3422 {
3423 unsigned int index = 0;
3424 index = state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements];
3425 while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3426 {
3427 index++;
3428 }
3429 samplerIndex = index;
3430 state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements] = (index + 1);
3431 break;
3432 }
3433 case MHW_Sampler64Elements:
3434 {
3435 unsigned int index = 0;
3436 index = state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements];
3437 while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3438 {
3439 index += index + 2;
3440 }
3441 samplerIndex = index;
3442 state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements] = (index + 2);
3443
3444 break;
3445 }
3446 case MHW_Sampler128Elements:
3447 {
3448 unsigned int index = 0;
3449 index = state->samplerStatistics.samplerIndexBase[MHW_Sampler128Elements];
3450 while (state->samplerIndexTable[index] != CM_INVALID_INDEX)
3451 {
3452 index++;
3453 }
3454 samplerIndex = index;
3455 state->samplerStatistics.samplerIndexBase[MHW_Sampler128Elements] = (index + 1);
3456
3457 break;
3458 }
3459 default:
3460 CM_ASSERTMESSAGE("Invalid sampler type '%d'.", state->samplerTable[index].SamplerType);
3461 break;
3462 }
3463 }
3464
3465 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnGetSamplerOffsetAndPtr(
3466 renderHal,
3467 mediaID,
3468 samplerIndex,
3469 samplerParam,
3470 &samplerOffset,
3471 &sampler));
3472 }
3473 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwStateHeap->AddSamplerStateData(
3474 samplerOffset,
3475 &(((PRENDERHAL_MEDIA_STATE_LEGACY)renderHal->pStateHeap->pCurMediaState)->pDynamicState->memoryBlock),
3476 samplerParam));
3477
3478 state->samplerIndexTable[index] = (unsigned char)samplerIndex;
3479
3480 // Update the Batch Buffer
3481 if (buffer)
3482 {
3483 dst = buffer + argParam->payloadOffset;
3484 *((uint32_t*)dst) = samplerIndex;
3485 }
3486
3487 finish:
3488 return eStatus;
3489 }
3490
3491 //*-----------------------------------------------------------------------------
3492 //| Purpose: Setup Sampler State
3493 //| Returns: Result of the operation
3494 //*-----------------------------------------------------------------------------
HalCm_SetupSamplerStateWithBTIndex(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PCM_HAL_SAMPLER_BTI_ENTRY samplerBTIEntry,uint32_t samplerCount,int32_t mediaID)3495 MOS_STATUS HalCm_SetupSamplerStateWithBTIndex(
3496 PCM_HAL_STATE state,
3497 PCM_HAL_KERNEL_PARAM kernelParam,
3498 PCM_HAL_SAMPLER_BTI_ENTRY samplerBTIEntry,
3499 uint32_t samplerCount,
3500 int32_t mediaID )
3501 {
3502 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3503 PRENDERHAL_INTERFACE renderHal;
3504 PMHW_SAMPLER_STATE_PARAM samplerParam;
3505 uint32_t index;
3506 uint32_t samplerIndex;
3507 void *sampler = nullptr;
3508 uint32_t samplerOffset = 0;
3509
3510 renderHal = state->renderHal;
3511
3512 if (state->useNewSamplerHeap != true)
3513 {
3514 if (samplerCount >= (uint32_t)renderHal->StateHeapSettings.iSamplers)
3515 {
3516 eStatus = MOS_STATUS_INVALID_PARAMETER;
3517 CM_ASSERTMESSAGE(
3518 "Exceeded Max samplers '%d'",
3519 samplerCount);
3520 goto finish;
3521 }
3522 }
3523
3524 index = samplerBTIEntry[ samplerCount ].samplerIndex;
3525
3526 // check to see if the data present for the sampler in the array
3527 if ( index >= state->cmDeviceParam.maxSamplerTableSize ||
3528 !state->samplerTable[ index ].bInUse )
3529 {
3530 eStatus = MOS_STATUS_INVALID_PARAMETER;
3531 CM_ASSERTMESSAGE(
3532 "Invalid Sampler array index '%d'", index );
3533 goto finish;
3534 }
3535
3536 samplerIndex = samplerBTIEntry[ samplerCount ].samplerBTI;
3537 // Setup samplers
3538 samplerParam = &state->samplerTable[ index ];
3539
3540 if (state->useNewSamplerHeap == true)
3541 {
3542 std::list<SamplerParam>::iterator iter;
3543 for (iter = kernelParam->samplerHeap->begin(); iter != kernelParam->samplerHeap->end(); ++iter)
3544 {
3545 if ((iter->samplerTableIndex == index) && (iter->bti == samplerIndex) && (iter->userDefinedBti == true))
3546 {
3547 break;
3548 }
3549 }
3550 if (iter == kernelParam->samplerHeap->end())
3551 {
3552 // There must be incorrect internal logic
3553 CM_ASSERTMESSAGE("BTI calculation error in cm_hal\n");
3554 return MOS_STATUS_UNKNOWN;
3555 }
3556 HalCm_GetSamplerOffset(state, renderHal, mediaID, iter->heapOffset, iter->bti, samplerParam, &samplerOffset);
3557 }
3558 else
3559 {
3560 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnGetSamplerOffsetAndPtr(renderHal, mediaID, samplerIndex, samplerParam, &samplerOffset, &sampler));
3561 }
3562
3563 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pMhwStateHeap->AddSamplerStateData(
3564 samplerOffset,
3565 &(((PRENDERHAL_MEDIA_STATE_LEGACY)renderHal->pStateHeap->pCurMediaState)->pDynamicState->memoryBlock),
3566 samplerParam));
3567
3568 finish:
3569 return eStatus;
3570 }
3571
3572 //*-----------------------------------------------------------------------------
3573 //| Purpose: Setup Buffer surface State
3574 //| Returns: Result of the operation
3575 //*-----------------------------------------------------------------------------
HalCm_SetupBufferSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,int16_t globalSurface,uint32_t threadIndex,uint8_t * buffer)3576 MOS_STATUS HalCm_SetupBufferSurfaceState(
3577 PCM_HAL_STATE state,
3578 PCM_HAL_KERNEL_ARG_PARAM argParam,
3579 PCM_HAL_INDEX_PARAM indexParam,
3580 int32_t bindingTable,
3581 int16_t globalSurface,
3582 uint32_t threadIndex,
3583 uint8_t *buffer)
3584 {
3585 MOS_STATUS eStatus;
3586 RENDERHAL_SURFACE surface;
3587 PMOS_SURFACE mosSurface;
3588 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
3589 PRENDERHAL_INTERFACE renderHal;
3590 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry;
3591 uint8_t *src;
3592 uint8_t *dst;
3593 uint32_t index;
3594 uint32_t btIndex;
3595 uint16_t memObjCtl;
3596 uint32_t offsetSrc;
3597 PRENDERHAL_STATE_HEAP stateHeap;
3598 CM_SURFACE_BTI_INFO surfBTIInfo;
3599
3600 eStatus = MOS_STATUS_UNKNOWN;
3601 renderHal = state->renderHal;
3602 //GT-PIN
3603 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
3604
3605 // Get the Index to Buffer array from the kernel data
3606 CM_ASSERT(argParam->unitSize == sizeof(index));
3607
3608 //Init surfBTIInfo
3609 state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
3610
3611 src = argParam->firstValue + (threadIndex * argParam->unitSize);
3612 index = *((uint32_t*)src) & CM_SURFACE_MASK;
3613 if (index == CM_NULL_SURFACE)
3614 {
3615 if (buffer)
3616 {
3617 dst = buffer + argParam->payloadOffset;
3618 *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
3619 }
3620
3621 eStatus = MOS_STATUS_SUCCESS;
3622 goto finish;
3623 }
3624
3625 memObjCtl = state->bufferTable[index].memObjCtl;
3626 if (!memObjCtl)
3627 {
3628 memObjCtl = CM_DEFAULT_CACHE_TYPE;
3629 }
3630
3631 // check to see if index is valid
3632 if (index >= state->cmDeviceParam.maxBufferTableSize ||
3633 (state->bufferTable[index].size == 0))
3634 {
3635 eStatus = MOS_STATUS_INVALID_PARAMETER;
3636 CM_ASSERTMESSAGE(
3637 "Invalid Buffer surface array index '%d'", index);
3638 goto finish;
3639 }
3640
3641 // Check to see if buffer is already assigned
3642 btIndex = state->btiBufferIndexTable[index].BTI.regularSurfIndex;
3643 if (btIndex == ( unsigned char )CM_INVALID_INDEX || argParam->aliasCreated == true)
3644 {
3645 if (globalSurface < 0)
3646 {
3647 btIndex = HalCm_GetFreeBindingIndex(state, indexParam, 1);
3648 }
3649 else
3650 {
3651 btIndex = globalSurface + surfBTIInfo.reservedSurfaceStart; //CM_BINDING_START_INDEX_OF_GLOBAL_SURFACE(state);
3652 if ( btIndex >= (surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER) ) {
3653 eStatus = MOS_STATUS_INVALID_PARAMETER;
3654 CM_ASSERTMESSAGE("Exceeded Max Global Surfaces '%d'", btIndex);
3655 goto finish;
3656 }
3657 }
3658 // Get Details of Buffer surface and fill the surface
3659 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACEBUFFER, index, 0));
3660
3661 MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
3662
3663 // override the buffer offset and size if alias is used
3664 mosSurface = &(surface.OsSurface);
3665 if (state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateSize)
3666 {
3667 mosSurface->dwWidth = state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateSize;
3668 mosSurface->dwOffset = state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateOffset;
3669 surface.rcSrc.right = mosSurface->dwWidth;
3670 surface.rcDst.right = mosSurface->dwWidth;
3671 }
3672 // override the mocs value if it is set
3673 if (state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateMOCS)
3674 {
3675 memObjCtl = state->bufferTable[index].surfaceStateEntry[argParam->aliasIndex / state->surfaceArraySize].surfaceStateMOCS;
3676 }
3677
3678 //Cache configurations
3679 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
3680
3681 // Set the isOutput by default
3682 surfaceParam.isOutput = true;
3683
3684 // Setup Buffer surface
3685 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupBufferSurfaceState(
3686 renderHal,
3687 &surface,
3688 &surfaceParam,
3689 &surfaceEntry));
3690
3691 // Bind the surface State
3692 CM_ASSERT(((int32_t)btIndex) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart);
3693 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
3694 renderHal,
3695 bindingTable,
3696 btIndex,
3697 surfaceEntry));
3698
3699 if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
3700 (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
3701 {
3702 //GT-Pin
3703 uint32_t dummy = 0;
3704 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
3705 state,
3706 indexParam,
3707 btIndex,
3708 surface.OsSurface,
3709 globalSurface,
3710 nullptr,
3711 dummy,
3712 surfaceParam,
3713 CM_ARGUMENT_SURFACEBUFFER));
3714 }
3715
3716 // Update index to table
3717 state->btiBufferIndexTable[ index ].BTI.regularSurfIndex = btIndex;
3718 state->btiBufferIndexTable[ index ].nPlaneNumber = 1;
3719
3720 stateHeap = renderHal->pStateHeap;
3721 offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
3722 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
3723 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
3724 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
3725
3726 state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
3727 }
3728 else
3729 {
3730 stateHeap = renderHal->pStateHeap;
3731
3732 // Get Offset to Current Binding Table
3733 uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
3734 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
3735 ( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
3736
3737 uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
3738
3739 int nEntryIndex = (int) ((uint32_t*)( state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
3740
3741 if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
3742 {
3743 uint32_t surfaceEntries = state->btiBufferIndexTable[ index ].nPlaneNumber;
3744 if ( globalSurface < 0 )
3745 {
3746 btIndex = HalCm_GetFreeBindingIndex( state, indexParam, surfaceEntries );
3747 }
3748 else
3749 {
3750 btIndex = globalSurface + surfBTIInfo.reservedSurfaceStart;
3751 if ( btIndex >= (surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER ) )
3752 {
3753 eStatus = MOS_STATUS_INVALID_PARAMETER;
3754 CM_ASSERTMESSAGE( "Exceeded Max Global Surfaces '%d'", btIndex );
3755 goto finish;
3756 }
3757 }
3758
3759 // Bind the surface State
3760 CM_ASSERT( ( ( int32_t )btIndex ) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart);
3761
3762 // Get Offset to Current Binding Table
3763 uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
3764
3765 uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
3766 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * surfaceEntries, state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * surfaceEntries );
3767
3768 // Update index to table
3769 state->btiBufferIndexTable[ index ].BTI.regularSurfIndex = btIndex;
3770 state->btiBufferIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
3771 }
3772 }
3773
3774 // Update the Batch Buffer
3775 if (buffer)
3776 {
3777 dst = buffer + argParam->payloadOffset;
3778 *((uint32_t*)dst) = btIndex;
3779 }
3780 eStatus = MOS_STATUS_SUCCESS;
3781
3782 finish:
3783 return eStatus;
3784 }
3785
3786 //*-----------------------------------------------------------------------------
3787 //| Purpose: Setup 3D surface State
3788 //| Returns: Result of the operation
3789 //*-----------------------------------------------------------------------------
HalCm_Setup3DSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)3790 MOS_STATUS HalCm_Setup3DSurfaceState(
3791 PCM_HAL_STATE state,
3792 PCM_HAL_KERNEL_ARG_PARAM argParam,
3793 PCM_HAL_INDEX_PARAM indexParam,
3794 int32_t bindingTable,
3795 uint32_t threadIndex,
3796 uint8_t *buffer)
3797 {
3798 MOS_STATUS eStatus;
3799 PRENDERHAL_INTERFACE renderHal;
3800 RENDERHAL_SURFACE surface;
3801 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
3802 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
3803 RENDERHAL_GET_SURFACE_INFO info;
3804 uint8_t *src;
3805 uint8_t *dst;
3806 int32_t nSurfaceEntries;
3807 uint32_t index;
3808 uint32_t btIndex;
3809 uint16_t memObjCtl;
3810 uint32_t i;
3811 uint32_t offsetSrc;
3812 PRENDERHAL_STATE_HEAP stateHeap;
3813 CM_SURFACE_BTI_INFO surfBTIInfo;
3814
3815 eStatus = MOS_STATUS_UNKNOWN;
3816 renderHal = state->renderHal;
3817 //GT-PIN
3818 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
3819
3820 state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
3821
3822 // Get the Index to 3dsurface array from the kernel data
3823 CM_ASSERT(argParam->unitSize == sizeof(index));
3824 src = argParam->firstValue + (threadIndex * argParam->unitSize);
3825 index = *((uint32_t*)src) & CM_SURFACE_MASK;
3826 if (index == CM_NULL_SURFACE)
3827 {
3828 if (buffer)
3829 {
3830 dst = buffer + argParam->payloadOffset;
3831 *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
3832 }
3833
3834 eStatus = MOS_STATUS_SUCCESS;
3835 goto finish;
3836 }
3837
3838 memObjCtl = state->surf3DTable[index].memObjCtl;
3839 if (!memObjCtl)
3840 {
3841 memObjCtl = CM_DEFAULT_CACHE_TYPE;
3842 }
3843
3844 // check to see if the data present for the 3d surface in the array
3845 if ((index >= state->cmDeviceParam.max3DSurfaceTableSize) ||
3846 Mos_ResourceIsNull(&state->surf3DTable[index].osResource))
3847 {
3848 eStatus = MOS_STATUS_INVALID_PARAMETER;
3849 CM_ASSERTMESSAGE(
3850 "Invalid 2D surface array index '%d'", index);
3851 goto finish;
3852 }
3853
3854 // Check to see if surface is already assigned
3855 btIndex = state->bti3DIndexTable[index].BTI.regularSurfIndex;
3856 if ( btIndex == ( unsigned char )CM_INVALID_INDEX )
3857 {
3858 uint32_t tempPlaneIndex = 0;
3859 nSurfaceEntries = 0;
3860
3861 // Get Details of 3D surface and fill the surface
3862 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE3D, index, 0));
3863
3864 // Setup 3D surface
3865 MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
3866 surfaceParam.Type = renderHal->SurfaceTypeDefault;
3867 surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
3868 surfaceParam.isOutput = true;
3869
3870 //Cache configurations
3871 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
3872
3873 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
3874 renderHal,
3875 &surface,
3876 &surfaceParam,
3877 &nSurfaceEntries,
3878 surfaceEntries,
3879 nullptr));
3880
3881 MOS_ZeroMemory(&info, sizeof(RENDERHAL_GET_SURFACE_INFO));
3882
3883 CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_GetSurfaceInfo(
3884 state->osInterface,
3885 &info,
3886 &surface.OsSurface));
3887
3888 btIndex = HalCm_GetFreeBindingIndex(state, indexParam, nSurfaceEntries);
3889 for (i = 0; i < (uint32_t)nSurfaceEntries; i++)
3890 {
3891 *(surfaceEntries[i]->pSurface) = surface.OsSurface;
3892
3893 // Bind the surface State
3894 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
3895 renderHal,
3896 bindingTable,
3897 btIndex + i,
3898 surfaceEntries[i]));
3899
3900 if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
3901 (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
3902 {
3903 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
3904 state,
3905 indexParam,
3906 btIndex + i,
3907 surface.OsSurface,
3908 0,
3909 surfaceEntries[i],
3910 tempPlaneIndex,
3911 surfaceParam,
3912 CM_ARGUMENT_SURFACE3D));
3913 }
3914 }
3915 // Update index to table
3916 state->bti3DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
3917 state->bti3DIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
3918
3919 stateHeap = renderHal->pStateHeap;
3920 offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
3921 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
3922 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
3923 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
3924
3925 state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
3926 }
3927 else
3928 {
3929 stateHeap = renderHal->pStateHeap;
3930
3931 // Get Offset to Current Binding Table
3932 uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
3933 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
3934 ( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
3935
3936 uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
3937
3938 int nEntryIndex = (int)((uint32_t*)( state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
3939
3940 if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
3941 {
3942 nSurfaceEntries = state->bti3DIndexTable[ index ].nPlaneNumber;
3943 btIndex = HalCm_GetFreeBindingIndex( state, indexParam, nSurfaceEntries );
3944
3945 // Bind the surface State
3946 CM_ASSERT( ( ( int32_t )btIndex ) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart);
3947
3948 // Get Offset to Current Binding Table
3949 uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
3950
3951 uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
3952 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
3953
3954 // Update index to table
3955 state->bti3DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
3956 state->bti3DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
3957 }
3958 }
3959
3960 // Update the Batch Buffer
3961 if (buffer)
3962 {
3963 dst = buffer + argParam->payloadOffset;
3964 *((uint32_t*)dst) = btIndex;
3965 }
3966
3967 eStatus = MOS_STATUS_SUCCESS;
3968
3969 finish:
3970 return eStatus;
3971 }
3972
3973 /*----------------------------------------------------------------------------
3974 | Purpose : Set's surface state interlaced settings
3975 | Returns : dword value
3976 \---------------------------------------------------------------------------*/
HalCm_HwSetSurfaceProperty(PCM_HAL_STATE state,CM_FRAME_TYPE frameType,PRENDERHAL_SURFACE_STATE_PARAMS params)3977 MOS_STATUS HalCm_HwSetSurfaceProperty(
3978 PCM_HAL_STATE state,
3979 CM_FRAME_TYPE frameType,
3980 PRENDERHAL_SURFACE_STATE_PARAMS params)
3981 {
3982 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
3983
3984 switch (frameType)
3985 {
3986 case CM_FRAME:
3987 params->bVertStride = 0;
3988 params->bVertStrideOffs = 0;
3989 break;
3990 case CM_TOP_FIELD:
3991 params->bVertStride = 1;
3992 params->bVertStrideOffs = 0;
3993 break;
3994 case CM_BOTTOM_FIELD:
3995 params->bVertStride = 1;
3996 params->bVertStrideOffs = 1;
3997 break;
3998 default:
3999 eStatus = MOS_STATUS_UNKNOWN;
4000 }
4001
4002 return eStatus;
4003 }
4004
4005 // A special treatment of NV12 format. Offset of the UV plane in an NV12 surface is adjusted, so
4006 // this plane can be accessed as a separate R8G8 surface in kernels.
UpdateSurfaceAliasPlaneOffset(CM_HAL_SURFACE2D_SURFACE_STATE_PARAM * surfaceStateParam,MOS_SURFACE * mosSurface)4007 static bool UpdateSurfaceAliasPlaneOffset(
4008 CM_HAL_SURFACE2D_SURFACE_STATE_PARAM *surfaceStateParam,
4009 MOS_SURFACE *mosSurface)
4010 {
4011 if (Format_R8G8UN != surfaceStateParam->format
4012 || Format_NV12 != mosSurface->Format)
4013 {
4014 mosSurface->Format
4015 = static_cast<MOS_FORMAT>(surfaceStateParam->format);
4016 return false; // No need to update offset.
4017 }
4018 mosSurface->dwOffset = mosSurface->UPlaneOffset.iSurfaceOffset;
4019 mosSurface->Format = Format_R8G8UN;
4020 return false;
4021 }
4022
4023 //*-----------------------------------------------------------------------------
4024 //| Purpose: Setup 2D surface State
4025 //| Returns: Result of the operation
4026 //*-----------------------------------------------------------------------------
HalCm_Setup2DSurfaceStateBasic(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,bool pixelPitch,uint8_t * buffer,bool multipleBinding)4027 MOS_STATUS HalCm_Setup2DSurfaceStateBasic(
4028 PCM_HAL_STATE state,
4029 PCM_HAL_KERNEL_ARG_PARAM argParam,
4030 PCM_HAL_INDEX_PARAM indexParam,
4031 int32_t bindingTable,
4032 uint32_t threadIndex,
4033 bool pixelPitch,
4034 uint8_t *buffer,
4035 bool multipleBinding )
4036 {
4037 MOS_STATUS eStatus;
4038 RENDERHAL_SURFACE renderHalSurface;
4039 PMOS_SURFACE surface;
4040 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
4041 PRENDERHAL_INTERFACE renderHal;
4042 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[ MHW_MAX_SURFACE_PLANES ];
4043 uint8_t *src;
4044 uint8_t *dst;
4045 int32_t nSurfaceEntries = 0;
4046 uint32_t index;
4047 uint32_t btIndex;
4048 uint16_t memObjCtl;
4049 uint32_t i;
4050 uint32_t tempPlaneIndex = 0;
4051 uint32_t offsetSrc;
4052 PRENDERHAL_STATE_HEAP stateHeap;
4053 PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM surfStateParam = nullptr;
4054 UNUSED(multipleBinding);
4055
4056 eStatus = MOS_STATUS_UNKNOWN;
4057 renderHal = state->renderHal;
4058 MOS_ZeroMemory(&renderHalSurface, sizeof(renderHalSurface));
4059 surface = &renderHalSurface.OsSurface;
4060 nSurfaceEntries = 0;
4061
4062 //GT-PIN
4063 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
4064
4065 // Get the Index to 2dsurface array from the kernel data
4066 CM_ASSERT( argParam->unitSize == sizeof( index ) );
4067 src = argParam->firstValue + ( threadIndex * argParam->unitSize );
4068 index = *( ( uint32_t *)src ) & CM_SURFACE_MASK;
4069 if ( index == CM_NULL_SURFACE )
4070 {
4071 if ( buffer )
4072 {
4073 dst = buffer + argParam->payloadOffset;
4074 *( ( uint32_t *)dst ) = CM_NULL_SURFACE_BINDING_INDEX;
4075 }
4076
4077 eStatus = MOS_STATUS_SUCCESS;
4078 goto finish;
4079 }
4080
4081 memObjCtl = state->umdSurf2DTable[index].memObjCtl;
4082 if ( !memObjCtl )
4083 {
4084 memObjCtl = CM_DEFAULT_CACHE_TYPE;
4085 }
4086
4087 // check to see if the data present for the 2d surface in the array
4088 if ( index >= state->cmDeviceParam.max2DSurfaceTableSize ||
4089 Mos_ResourceIsNull( &state->umdSurf2DTable[ index ].osResource ) )
4090 {
4091 eStatus = MOS_STATUS_INVALID_PARAMETER;
4092 CM_ASSERTMESSAGE(
4093 "Invalid 2D surface array index '%d'", index );
4094 goto finish;
4095 }
4096
4097 // Check to see if surface is already assigned
4098 unsigned char nBTIRegularSurf, nBTISamplerSurf;
4099 nBTIRegularSurf = state->bti2DIndexTable[ index ].BTI.regularSurfIndex;
4100 nBTISamplerSurf = state->bti2DIndexTable[ index ].BTI.samplerSurfIndex;
4101
4102 if (((!pixelPitch && (nBTIRegularSurf != (unsigned char)CM_INVALID_INDEX)) || (pixelPitch && (nBTISamplerSurf != (unsigned char)CM_INVALID_INDEX))) && argParam->aliasCreated == false )
4103 {
4104 if ( pixelPitch )
4105 {
4106 btIndex = nBTISamplerSurf;
4107 }
4108 else
4109 {
4110 btIndex = nBTIRegularSurf;
4111 }
4112
4113 stateHeap = renderHal->pStateHeap;
4114
4115 // Get Offset to Current Binding Table
4116 uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
4117 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
4118 ( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
4119
4120 uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
4121
4122 int nEntryIndex = 0;
4123
4124 if ( pixelPitch )
4125 {
4126 nEntryIndex = (int)((uint32_t*)( state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition ) - currentBTStart);
4127 }
4128 else
4129 {
4130 nEntryIndex = (int)((uint32_t*)( state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
4131 }
4132
4133 if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
4134 {
4135 nSurfaceEntries = state->bti2DIndexTable[ index ].nPlaneNumber;
4136
4137 btIndex = HalCm_GetFreeBindingIndex( state, indexParam, nSurfaceEntries );
4138
4139 // Get Offset to Current Binding Table
4140 uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
4141
4142 uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
4143
4144 if ( pixelPitch )
4145 {
4146 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
4147 }
4148 else
4149 {
4150 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
4151 }
4152
4153 // update index to table
4154 if ( pixelPitch )
4155 {
4156 state->bti2DIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
4157 state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = bindingTableEntry;
4158 }
4159 else
4160 {
4161 state->bti2DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
4162 state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
4163 }
4164 }
4165
4166 // Update the Batch Buffer
4167 if ( buffer )
4168 {
4169 dst = buffer + argParam->payloadOffset;
4170 *( ( uint32_t *)dst ) = btIndex;
4171 }
4172
4173 eStatus = MOS_STATUS_SUCCESS;
4174 goto finish;
4175 }
4176
4177 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &renderHalSurface, CM_ARGUMENT_SURFACE2D, index, pixelPitch ) );
4178
4179 // Setup 2D surface
4180 MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
4181 surfaceParam.Type = renderHal->SurfaceTypeDefault;
4182 surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
4183 surfaceParam.bVertStride = 0;
4184 surfaceParam.bVertStrideOffs = 0;
4185 if (!pixelPitch) {
4186 surfaceParam.bWidthInDword_UV = true;
4187 surfaceParam.bWidthInDword_Y = true;
4188 }
4189
4190 surfaceParam.isOutput = isRenderTarget(state, index);
4191 surfStateParam = &(state->umdSurf2DTable[index].surfaceStateParam[argParam->aliasIndex / state->surfaceArraySize]);
4192 if (surfStateParam->width)
4193 {
4194 surface->dwWidth = surfStateParam->width;
4195 }
4196 if (surfStateParam->height)
4197 {
4198 surface->dwHeight = surfStateParam->height;
4199 }
4200 if (surfStateParam->depth)
4201 {
4202 surface->dwDepth = surfStateParam->depth;
4203 }
4204 if (surfStateParam->pitch)
4205 {
4206 surface->dwPitch= surfStateParam->pitch;
4207 }
4208 if (surfStateParam->format)
4209 {
4210 UpdateSurfaceAliasPlaneOffset(surfStateParam, surface);
4211 }
4212 if (surfStateParam->surfaceXOffset)
4213 {
4214 surface->YPlaneOffset.iXOffset = surfStateParam->surfaceXOffset;
4215 if (surface->Format == Format_NV12)
4216 {
4217 surface->UPlaneOffset.iXOffset += surfStateParam->surfaceXOffset;
4218 }
4219 }
4220 if (surfStateParam->surfaceYOffset)
4221 {
4222 surface->YPlaneOffset.iYOffset = surfStateParam->surfaceYOffset;
4223 if (surface->Format == Format_NV12)
4224 {
4225 surface->UPlaneOffset.iYOffset += surfStateParam->surfaceYOffset/2;
4226 }
4227 }
4228 if (surfStateParam->memoryObjectControl)
4229 {
4230 memObjCtl = surfStateParam->memoryObjectControl;
4231 }
4232
4233 if(pixelPitch)
4234 renderHalSurface.Rotation = state->umdSurf2DTable[index].rotationFlag;
4235
4236 //Cache configurations
4237 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
4238
4239 // interlace setting
4240 HalCm_HwSetSurfaceProperty(state,
4241 state->umdSurf2DTable[index].frameType,
4242 &surfaceParam);
4243
4244 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
4245 renderHal,
4246 &renderHalSurface,
4247 &surfaceParam,
4248 &nSurfaceEntries,
4249 surfaceEntries,
4250 nullptr));
4251
4252 nSurfaceEntries = MOS_MIN( nSurfaceEntries, MHW_MAX_SURFACE_PLANES );
4253
4254 btIndex = HalCm_GetFreeBindingIndex(state, indexParam, nSurfaceEntries);
4255 for (i = 0; i < (uint32_t)nSurfaceEntries; i++)
4256 {
4257 // Bind the surface State
4258 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
4259 renderHal,
4260 bindingTable,
4261 btIndex + i,
4262 surfaceEntries[i]));
4263 if ((taskParam->surfEntryInfoArrays.kernelNum !=0) &&
4264 (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
4265 {
4266 //GT-Pin
4267 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
4268 state,
4269 indexParam,
4270 btIndex + i,
4271 *surface,
4272 0,
4273 surfaceEntries[i],
4274 tempPlaneIndex,
4275 surfaceParam,
4276 CM_ARGUMENT_SURFACE2D));
4277 }
4278 surfaceEntries[i]->pSurface->dwWidth = state->umdSurf2DTable[index].width;
4279 surfaceEntries[i]->pSurface->dwHeight = state->umdSurf2DTable[index].height;
4280 }
4281
4282 // only update the reuse table for non-aliased surface
4283 if ( argParam->aliasCreated == false )
4284 {
4285 state->bti2DIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
4286 // Get Offset to Current Binding Table
4287 stateHeap = renderHal->pStateHeap;
4288 offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
4289 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
4290 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
4291 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
4292
4293 if ( pixelPitch )
4294 {
4295 state->bti2DIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
4296 state->bti2DIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
4297 }
4298 else
4299 {
4300 state->bti2DIndexTable[ index ].BTI.regularSurfIndex = btIndex;
4301 state->bti2DIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
4302 }
4303 }
4304
4305 // Update the Batch Buffer
4306 if (buffer)
4307 {
4308 dst = buffer + argParam->payloadOffset;
4309 *((uint32_t*)dst) = btIndex;
4310 }
4311
4312 // reset surface height and width
4313 surface->dwWidth = state->umdSurf2DTable[index].width;
4314 surface->dwHeight = state->umdSurf2DTable[index].height;
4315
4316 eStatus = MOS_STATUS_SUCCESS;
4317
4318 finish:
4319 return eStatus;
4320 }
4321
4322
HalCm_Setup2DSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4323 MOS_STATUS HalCm_Setup2DSurfaceState(
4324 PCM_HAL_STATE state,
4325 PCM_HAL_KERNEL_ARG_PARAM argParam,
4326 PCM_HAL_INDEX_PARAM indexParam,
4327 int32_t bindingTable,
4328 uint32_t threadIndex,
4329 uint8_t *buffer)
4330 {
4331 MOS_STATUS eStatus;
4332
4333 if (state->cmHalInterface->GetDecompressFlag())
4334 {
4335 state->pfnDecompressSurface(state, argParam, threadIndex);
4336 }
4337
4338 //Binding surface based at the unit of dword
4339 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateBasic(
4340 state, argParam, indexParam, bindingTable, threadIndex, false, buffer, false));
4341 eStatus = MOS_STATUS_SUCCESS;
4342
4343 finish:
4344 return eStatus;
4345 }
4346
HalCm_Setup2DSurfaceSamplerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4347 MOS_STATUS HalCm_Setup2DSurfaceSamplerState(
4348 PCM_HAL_STATE state,
4349 PCM_HAL_KERNEL_ARG_PARAM argParam,
4350 PCM_HAL_INDEX_PARAM indexParam,
4351 int32_t bindingTable,
4352 uint32_t threadIndex,
4353 uint8_t *buffer)
4354 {
4355 MOS_STATUS eStatus;
4356
4357 //Binding surface based at the unit of dword
4358 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateBasic(
4359 state, argParam, indexParam, bindingTable, threadIndex, true, buffer, false));
4360 eStatus = MOS_STATUS_SUCCESS;
4361
4362 finish:
4363 return eStatus;
4364 }
4365
4366 //*-----------------------------------------------------------------------------
4367 //| Purpose: Setup 2D surface State
4368 //| Returns: Result of the operation
4369 //*-----------------------------------------------------------------------------
HalCm_Setup2DSurfaceUPStateBasic(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer,bool pixelPitch)4370 MOS_STATUS HalCm_Setup2DSurfaceUPStateBasic(
4371 PCM_HAL_STATE state,
4372 PCM_HAL_KERNEL_ARG_PARAM argParam,
4373 PCM_HAL_INDEX_PARAM indexParam,
4374 int32_t bindingTable,
4375 uint32_t threadIndex,
4376 uint8_t *buffer,
4377 bool pixelPitch)
4378 {
4379 MOS_STATUS eStatus;
4380 RENDERHAL_SURFACE surface;
4381 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
4382 PRENDERHAL_INTERFACE renderHal;
4383 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
4384 uint8_t *src;
4385 uint8_t *dst;
4386 int32_t nSurfaceEntries;
4387 uint32_t index;
4388 uint32_t btIndex;
4389 uint16_t memObjCtl;
4390 uint32_t i;
4391 uint32_t offsetSrc;
4392 PRENDERHAL_STATE_HEAP stateHeap;
4393
4394 eStatus = MOS_STATUS_UNKNOWN;
4395 renderHal = state->renderHal;
4396 //GT-PIN
4397 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
4398
4399 // Get the Index to sampler array from the kernel data
4400 CM_ASSERT(argParam->unitSize == sizeof(index));
4401 src = argParam->firstValue + (threadIndex * argParam->unitSize);
4402 index = *((uint32_t*)src) & CM_SURFACE_MASK;
4403 if (index == CM_NULL_SURFACE)
4404 {
4405 if (buffer)
4406 {
4407 dst = buffer + argParam->payloadOffset;
4408 *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
4409 }
4410
4411 eStatus = MOS_STATUS_SUCCESS;
4412 goto finish;
4413 }
4414
4415 memObjCtl = state->surf2DUPTable[index].memObjCtl;
4416 if (!memObjCtl)
4417 {
4418 memObjCtl = CM_DEFAULT_CACHE_TYPE;
4419 }
4420
4421 // check to see if the data present for the sampler in the array
4422 if (index >= state->cmDeviceParam.max2DSurfaceUPTableSize ||
4423 (state->surf2DUPTable[index].width == 0))
4424 {
4425 eStatus = MOS_STATUS_INVALID_PARAMETER;
4426 CM_ASSERTMESSAGE(
4427 "Invalid 2D SurfaceUP array index '%d'", index);
4428 goto finish;
4429 }
4430
4431 // Check to see if surface is already assigned
4432 if ( pixelPitch )
4433 {
4434 btIndex = state->bti2DUPIndexTable[ index ].BTI.samplerSurfIndex;
4435 }
4436 else
4437 {
4438 btIndex = state->bti2DUPIndexTable[ index ].BTI.regularSurfIndex;
4439 }
4440
4441 if ( btIndex == ( unsigned char )CM_INVALID_INDEX )
4442 {
4443 uint32_t tempPlaneIndex = 0;
4444
4445 // Get Details of 2DUP surface and fill the surface
4446 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE2D_UP, index, pixelPitch));
4447
4448 // Setup 2D surface
4449 MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
4450 surfaceParam.Type = renderHal->SurfaceTypeDefault;
4451 surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
4452
4453 if (!pixelPitch) {
4454 surfaceParam.bWidthInDword_UV = true;
4455 surfaceParam.bWidthInDword_Y = true;
4456 }
4457
4458 surfaceParam.isOutput = true;
4459
4460 //Cache configurations
4461 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
4462
4463 // interlace setting
4464 HalCm_HwSetSurfaceProperty(state,
4465 state->umdSurf2DTable[index].frameType,
4466 &surfaceParam);
4467
4468 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
4469 renderHal,
4470 &surface,
4471 &surfaceParam,
4472 &nSurfaceEntries,
4473 surfaceEntries,
4474 nullptr));
4475
4476 //GT-PIN
4477 btIndex = HalCm_GetFreeBindingIndex(state, indexParam, nSurfaceEntries);
4478 for (i = 0; i < (uint32_t)nSurfaceEntries; i++)
4479 {
4480 // Bind the surface State
4481 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
4482 renderHal,
4483 bindingTable,
4484 btIndex + i,
4485 surfaceEntries[i]));
4486 //GT-Pin
4487 if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
4488 (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
4489 {
4490 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
4491 state,
4492 indexParam,
4493 btIndex + i,
4494 surface.OsSurface,
4495 0,
4496 surfaceEntries[i],
4497 tempPlaneIndex,
4498 surfaceParam,
4499 CM_ARGUMENT_SURFACE2D_UP));
4500 }
4501 }
4502 state->bti2DUPIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
4503
4504 stateHeap = renderHal->pStateHeap;
4505 offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
4506 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
4507 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
4508 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
4509
4510 if ( pixelPitch )
4511 {
4512 state->bti2DUPIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
4513 state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
4514 }
4515 else
4516 {
4517 state->bti2DUPIndexTable[ index ].BTI.regularSurfIndex = btIndex;
4518 state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
4519 }
4520 }
4521 else
4522 {
4523 stateHeap = renderHal->pStateHeap;
4524
4525 // Get Offset to Current Binding Table
4526 uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
4527 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
4528 ( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
4529
4530 uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
4531
4532 int nEntryIndex = 0;
4533
4534 if ( pixelPitch )
4535 {
4536 nEntryIndex = (int) ((uint32_t*)( state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition ) - currentBTStart);
4537 }
4538 else
4539 {
4540 nEntryIndex = (int) ((uint32_t*)( state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition ) - currentBTStart);
4541 }
4542
4543 if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
4544 {
4545 uint32_t tmpSurfaceEntries = state->bti2DUPIndexTable[ index ].nPlaneNumber;
4546
4547 btIndex = HalCm_GetFreeBindingIndex( state, indexParam, tmpSurfaceEntries );
4548
4549 // Get Offset to Current Binding Table
4550 uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
4551
4552 uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
4553 if ( pixelPitch )
4554 {
4555 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * tmpSurfaceEntries, state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * tmpSurfaceEntries );
4556 }
4557 else
4558 {
4559 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * tmpSurfaceEntries, state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * tmpSurfaceEntries );
4560 }
4561
4562 // update index to table
4563 if ( pixelPitch )
4564 {
4565 state->bti2DUPIndexTable[ index ].BTI.samplerSurfIndex = btIndex;
4566 state->bti2DUPIndexTable[ index ].BTITableEntry.samplerBtiEntryPosition = bindingTableEntry;
4567 }
4568 else
4569 {
4570 state->bti2DUPIndexTable[ index ].BTI.regularSurfIndex = btIndex;
4571 state->bti2DUPIndexTable[ index ].BTITableEntry.regularBtiEntryPosition = bindingTableEntry;
4572 }
4573 }
4574 }
4575
4576 // Update the Batch Buffer
4577 if (buffer)
4578 {
4579 dst = buffer + argParam->payloadOffset;
4580 *((uint32_t*)dst) = btIndex;
4581 }
4582
4583 eStatus = MOS_STATUS_SUCCESS;
4584
4585 finish:
4586 return eStatus;
4587 }
4588
HalCm_Setup2DSurfaceUPState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4589 MOS_STATUS HalCm_Setup2DSurfaceUPState(
4590 PCM_HAL_STATE state,
4591 PCM_HAL_KERNEL_ARG_PARAM argParam,
4592 PCM_HAL_INDEX_PARAM indexParam,
4593 int32_t bindingTable,
4594 uint32_t threadIndex,
4595 uint8_t *buffer)
4596 {
4597 MOS_STATUS eStatus;
4598
4599 //Binding surface based at the unit of dword
4600 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateBasic(
4601 state, argParam, indexParam, bindingTable, threadIndex, buffer, false));
4602 eStatus = MOS_STATUS_SUCCESS;
4603
4604 finish:
4605 return eStatus;
4606 }
4607
HalCm_Setup2DSurfaceUPSamplerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4608 MOS_STATUS HalCm_Setup2DSurfaceUPSamplerState(
4609 PCM_HAL_STATE state,
4610 PCM_HAL_KERNEL_ARG_PARAM argParam,
4611 PCM_HAL_INDEX_PARAM indexParam,
4612 int32_t bindingTable,
4613 uint32_t threadIndex,
4614 uint8_t *buffer)
4615 {
4616 MOS_STATUS eStatus;
4617
4618 //Binding surface based at the unit of pixel
4619 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateBasic(
4620 state, argParam, indexParam, bindingTable, threadIndex, buffer, true));
4621 eStatus = MOS_STATUS_SUCCESS;
4622
4623 finish:
4624 return eStatus;
4625 }
4626
HalCm_SetupSpecificVmeSurfaceState(PCM_HAL_STATE state,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,uint16_t memObjCtl,uint32_t surfaceStateWidth,uint32_t surfaceStateHeight)4627 MOS_STATUS HalCm_SetupSpecificVmeSurfaceState(
4628 PCM_HAL_STATE state,
4629 PCM_HAL_INDEX_PARAM indexParam,
4630 int32_t bindingTable,
4631 uint32_t surfIndex,
4632 uint32_t btIndex,
4633 uint16_t memObjCtl,
4634 uint32_t surfaceStateWidth,
4635 uint32_t surfaceStateHeight)
4636 {
4637 MOS_STATUS eStatus;
4638 RENDERHAL_SURFACE surface;
4639 int32_t nSurfaceEntries = 0;
4640 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
4641 PRENDERHAL_INTERFACE renderHal;
4642 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
4643 uint32_t tempPlaneIndex = 0;
4644 PMOS_SURFACE mosSurface = nullptr;
4645
4646 eStatus = MOS_STATUS_UNKNOWN;
4647 renderHal = state->renderHal;
4648 nSurfaceEntries = 0;
4649
4650 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
4651
4652 // Get Details of VME surface and fill the surface
4653 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_VME_STATE, surfIndex, 0));
4654
4655 // Setup 2D surface
4656 MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
4657 surfaceParam.Type = renderHal->SurfaceTypeAdvanced;
4658 surfaceParam.isOutput = true;
4659 surfaceParam.bWidthInDword_Y = false;
4660 surfaceParam.bWidthInDword_UV = false;
4661 surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
4662 surfaceParam.bVmeUse = true;
4663
4664 // Overwrite the width and height if specified
4665 if (surfaceStateWidth && surfaceStateHeight)
4666 {
4667 mosSurface = &surface.OsSurface;
4668 if (surfaceStateWidth > mosSurface->dwWidth || surfaceStateHeight > mosSurface->dwHeight)
4669 {
4670 CM_ASSERTMESSAGE("Error: VME surface state's resolution is larger than the original surface.");
4671 eStatus = MOS_STATUS_INVALID_PARAMETER;
4672 goto finish;
4673 }
4674 mosSurface->dwWidth = surfaceStateWidth;
4675 mosSurface->dwHeight = surfaceStateHeight;
4676 }
4677
4678 //Cache configurations
4679 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
4680 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
4681 renderHal,
4682 &surface,
4683 &surfaceParam,
4684 &nSurfaceEntries,
4685 surfaceEntries,
4686 nullptr));
4687
4688 CM_ASSERT(nSurfaceEntries == 1);
4689
4690 {
4691 // Bind the surface State
4692 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
4693 renderHal,
4694 bindingTable,
4695 btIndex,
4696 surfaceEntries[0]));
4697
4698 if ((taskParam->surfEntryInfoArrays.kernelNum != 0) &&
4699 (taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr))
4700 {
4701 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceDetails(
4702 state,
4703 indexParam,
4704 btIndex,
4705 surface.OsSurface,
4706 0,
4707 surfaceEntries[0],
4708 tempPlaneIndex,
4709 surfaceParam,
4710 CM_ARGUMENT_SURFACE2D));
4711 }
4712 }
4713 state->bti2DIndexTable[ surfIndex ].BTI.vmeSurfIndex = btIndex;
4714
4715 eStatus = MOS_STATUS_SUCCESS;
4716
4717 finish:
4718 return eStatus;
4719
4720 }
4721
4722 //*-----------------------------------------------------------------------------
4723 //| Purpose: Setup VME surface State
4724 //| Returns: Result of the operation
4725 //*-----------------------------------------------------------------------------
HalCm_SetupVmeSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4726 MOS_STATUS HalCm_SetupVmeSurfaceState(
4727 PCM_HAL_STATE state,
4728 PCM_HAL_KERNEL_ARG_PARAM argParam,
4729 PCM_HAL_INDEX_PARAM indexParam,
4730 int32_t bindingTable,
4731 uint32_t threadIndex,
4732 uint8_t *buffer)
4733 {
4734 MOS_STATUS eStatus;
4735 PRENDERHAL_INTERFACE renderHal;
4736 PCM_HAL_VME_ARG_VALUE vmeSrc;
4737 uint8_t *dst;
4738 uint32_t index[CM_MAX_VME_BINDING_INDEX_1];
4739 uint16_t memObjCtl[CM_MAX_VME_BINDING_INDEX_1];
4740 uint32_t fwSurfCount = 0;
4741 uint32_t bwSurfCount = 0;
4742 bool alreadyBind = true;
4743 uint32_t surfPairNum;
4744 uint32_t idx;
4745 uint32_t curBTIndex;
4746 uint32_t btIndex;
4747 uint32_t surfaceStateWidth = 0;
4748 uint32_t surfaceStateHeight = 0;
4749 uint32_t *fPtr = nullptr;
4750 uint32_t *bPtr = nullptr;
4751 uint32_t *refSurfaces = nullptr;
4752
4753 eStatus = MOS_STATUS_UNKNOWN;
4754 renderHal = state->renderHal;
4755 btIndex = 0;
4756
4757 MOS_ZeroMemory(memObjCtl, CM_MAX_VME_BINDING_INDEX_1*sizeof(uint16_t));
4758 MOS_ZeroMemory(index, CM_MAX_VME_BINDING_INDEX_1*sizeof(uint32_t));
4759
4760 CM_ASSERT(argParam->unitSize <= sizeof(uint32_t)*(CM_MAX_VME_BINDING_INDEX_1 + 2));
4761 CM_ASSERT(threadIndex == 0); // VME surface is not allowed in thread arg
4762
4763 vmeSrc = (PCM_HAL_VME_ARG_VALUE)argParam->firstValue;
4764 fwSurfCount = vmeSrc->fwRefNum;
4765 bwSurfCount = vmeSrc->bwRefNum;
4766 refSurfaces = findRefInVmeArg(vmeSrc);
4767
4768 index[0] = vmeSrc->curSurface & CM_SURFACE_MASK;
4769 // check to see if index[0] is valid
4770 if (index[0] == CM_NULL_SURFACE)
4771 {
4772 if (buffer)
4773 {
4774 dst = buffer + argParam->payloadOffset;
4775 *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
4776 }
4777
4778 eStatus = MOS_STATUS_SUCCESS;
4779 goto finish;
4780 }
4781
4782 if (index[0] >= state->cmDeviceParam.max2DSurfaceTableSize ||
4783 Mos_ResourceIsNull(&state->umdSurf2DTable[index[0]].osResource))
4784 {
4785 eStatus = MOS_STATUS_INVALID_PARAMETER;
4786 CM_ASSERTMESSAGE(
4787 "Invalid 2D surface array index '%d'", index[0]);
4788 goto finish;
4789 }
4790
4791 memObjCtl[0] = state->umdSurf2DTable[index[0]].memObjCtl;
4792 if (!memObjCtl[0])
4793 {
4794 memObjCtl[0] = CM_DEFAULT_CACHE_TYPE;
4795 }
4796 for (idx = 0; idx < (vmeSrc->fwRefNum + vmeSrc->bwRefNum); idx++)
4797 {
4798 index[idx + 1] = refSurfaces[idx] & CM_SURFACE_MASK;
4799 memObjCtl[idx + 1] = state->umdSurf2DTable[index[idx + 1]].memObjCtl;
4800 if (!memObjCtl[idx + 1])
4801 {
4802 memObjCtl[idx + 1] = CM_DEFAULT_CACHE_TYPE;
4803 }
4804 }
4805
4806 surfaceStateWidth = vmeSrc->surfStateParam.surfaceStateWidth;
4807 surfaceStateHeight = vmeSrc->surfStateParam.surfaceStateHeight;
4808
4809 fPtr = index + 1;
4810 bPtr = index + 1 + fwSurfCount;
4811
4812 //Max surface pair number
4813 surfPairNum = fwSurfCount > bwSurfCount ? fwSurfCount : bwSurfCount;
4814
4815 btIndex = curBTIndex = HalCm_GetFreeBindingIndex(state, indexParam, surfPairNum*2 + 1);
4816
4817 HalCm_SetupSpecificVmeSurfaceState(state, indexParam, bindingTable, index[0], curBTIndex, memObjCtl[0], surfaceStateWidth, surfaceStateHeight);
4818 curBTIndex++;
4819
4820 //Setup surface states interleavely for backward and forward surfaces pairs.
4821 for (idx = 0; idx < surfPairNum; idx++)
4822 {
4823 if (idx < fwSurfCount)
4824 {
4825 HalCm_SetupSpecificVmeSurfaceState(state, indexParam, bindingTable, fPtr[idx], curBTIndex, memObjCtl[idx + 1], surfaceStateWidth, surfaceStateHeight);
4826 }
4827 curBTIndex++;
4828
4829 if (idx < bwSurfCount)
4830 {
4831 HalCm_SetupSpecificVmeSurfaceState(state, indexParam, bindingTable, bPtr[idx], curBTIndex, memObjCtl[idx+ 1 + fwSurfCount], surfaceStateWidth, surfaceStateHeight);
4832 }
4833 curBTIndex++;
4834 }
4835
4836 // Update the Batch Buffer
4837 if (buffer)
4838 {
4839 dst = buffer + argParam->payloadOffset;
4840 *((uint32_t*)dst) = btIndex;
4841 }
4842
4843 eStatus = MOS_STATUS_SUCCESS;
4844
4845 finish:
4846 return eStatus;
4847 }
4848
4849 static bool
UpdateMosSurfaceFromAliasState(CM_HAL_STATE * state,CM_HAL_KERNEL_ARG_PARAM * argParam,uint32_t surface_index,MOS_SURFACE * surface)4850 UpdateMosSurfaceFromAliasState(CM_HAL_STATE *state,
4851 CM_HAL_KERNEL_ARG_PARAM *argParam,
4852 uint32_t surface_index,
4853 MOS_SURFACE *surface)
4854 {
4855 uint32_t surface_state_index = argParam->aliasIndex/state->surfaceArraySize;
4856 const CM_HAL_SURFACE2D_SURFACE_STATE_PARAM &surface_state_param
4857 = state->umdSurf2DTable[surface_index].surfaceStateParam[
4858 surface_state_index];
4859 if (surface_state_param.width)
4860 {
4861 surface->dwWidth = surface_state_param.width;
4862 }
4863 if (surface_state_param.height)
4864 {
4865 surface->dwHeight = surface_state_param.height;
4866 }
4867 if (surface_state_param.depth)
4868 {
4869 surface->dwDepth = surface_state_param.depth;
4870 }
4871 if (surface_state_param.pitch)
4872 {
4873 surface->dwPitch= surface_state_param.pitch;
4874 }
4875 if (surface_state_param.format)
4876 {
4877 surface->Format
4878 = static_cast<MOS_FORMAT>(surface_state_param.format);
4879 }
4880 if (surface_state_param.surfaceXOffset)
4881 {
4882 surface->YPlaneOffset.iXOffset = surface_state_param.surfaceXOffset;
4883 }
4884 if (surface_state_param.surfaceYOffset)
4885 {
4886 surface->YPlaneOffset.iYOffset = surface_state_param.surfaceYOffset;
4887 }
4888 if (surface_state_param.surfaceOffset)
4889 {
4890 surface->dwOffset = surface_state_param.surfaceOffset;
4891 }
4892
4893 return true;
4894 }
4895
4896 //*-----------------------------------------------------------------------------
4897 //| Purpose: Setup VME surface State
4898 //| Returns: Result of the operation
4899 //*-----------------------------------------------------------------------------
HalCm_SetupSampler8x8SurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)4900 MOS_STATUS HalCm_SetupSampler8x8SurfaceState(
4901 PCM_HAL_STATE state,
4902 PCM_HAL_KERNEL_ARG_PARAM argParam,
4903 PCM_HAL_INDEX_PARAM indexParam,
4904 int32_t bindingTable,
4905 uint32_t threadIndex,
4906 uint8_t *buffer)
4907 {
4908 MOS_STATUS eStatus;
4909 RENDERHAL_SURFACE surface;
4910 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
4911 PRENDERHAL_INTERFACE renderHal;
4912 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
4913 uint8_t *src;
4914 uint8_t *dst;
4915 int32_t nSurfaceEntries;
4916 uint32_t index;
4917 uint16_t memObjCtl;
4918 int32_t i;
4919 uint32_t btIndex;
4920 uint32_t tempPlaneIndex = 0;
4921 uint32_t offsetSrc;
4922 PRENDERHAL_STATE_HEAP stateHeap;
4923
4924 eStatus = MOS_STATUS_UNKNOWN;
4925 renderHal = state->renderHal;
4926
4927 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
4928
4929 nSurfaceEntries = 0;
4930
4931 CM_ASSERT(argParam->unitSize == sizeof(uint32_t));
4932
4933 src = argParam->firstValue + (threadIndex * argParam->unitSize);
4934 index = *((uint32_t*)src) & CM_SURFACE_MASK;
4935 if (index == CM_NULL_SURFACE)
4936 {
4937 if (buffer)
4938 {
4939 dst = buffer + argParam->payloadOffset;
4940 *((uint32_t*)dst) = CM_NULL_SURFACE_BINDING_INDEX;
4941 }
4942
4943 eStatus = MOS_STATUS_SUCCESS;
4944 goto finish;
4945 }
4946
4947 memObjCtl = state->umdSurf2DTable[index].memObjCtl;
4948 if (!memObjCtl)
4949 {
4950 memObjCtl = CM_DEFAULT_CACHE_TYPE;
4951 }
4952
4953 // check to see if index is valid
4954 if (index >= state->cmDeviceParam.max2DSurfaceTableSize ||
4955 Mos_ResourceIsNull(&state->umdSurf2DTable[index].osResource))
4956 {
4957 eStatus = MOS_STATUS_INVALID_PARAMETER;
4958 CM_ASSERTMESSAGE(
4959 "Invalid 2D surface array index '%d'", index);
4960 goto finish;
4961 }
4962
4963 renderHal->bEnableP010SinglePass = state->cmHalInterface->IsP010SinglePassSupported();
4964
4965 btIndex = state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex;
4966 if (btIndex == ( unsigned char )CM_INVALID_INDEX || argParam->aliasCreated)
4967 {
4968 // Get Details of Sampler8x8 surface and fill the surface
4969 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &surface, argParam->kind, index, 0 ) );
4970
4971 // Setup surface
4972 MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
4973 surfaceParam.Type = renderHal->SurfaceTypeAdvanced;
4974 surfaceParam.isOutput = true;
4975 surfaceParam.bWidthInDword_Y = false;
4976 surfaceParam.bWidthInDword_UV = false;
4977 surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
4978 surfaceParam.bVASurface = ( argParam->kind == CM_ARGUMENT_SURFACE_SAMPLER8X8_VA ) ? 1 : 0;
4979 surfaceParam.AddressControl = argParam->nCustomValue;
4980
4981 UpdateMosSurfaceFromAliasState(state, argParam, index,
4982 &surface.OsSurface);
4983
4984 //Set memory object control
4985 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
4986
4987 surface.Rotation = state->umdSurf2DTable[index].rotationFlag;
4988 surface.ChromaSiting = state->umdSurf2DTable[index].chromaSiting;
4989 surface.ScalingMode = RENDERHAL_SCALING_AVS;
4990 nSurfaceEntries = 0;
4991
4992 // interlace setting
4993 HalCm_HwSetSurfaceProperty(state,
4994 state->umdSurf2DTable[index].frameType,
4995 &surfaceParam);
4996
4997 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSetupSurfaceState(
4998 renderHal,
4999 &surface,
5000 &surfaceParam,
5001 &nSurfaceEntries,
5002 surfaceEntries,
5003 nullptr ) );
5004
5005 CM_ASSERT( nSurfaceEntries == 1 );
5006
5007 btIndex = HalCm_GetFreeBindingIndex( state, indexParam, nSurfaceEntries );
5008
5009 for ( i = 0; i < nSurfaceEntries; i++ )
5010 {
5011 // Bind the surface State
5012 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnBindSurfaceState(
5013 renderHal,
5014 bindingTable,
5015 btIndex + i,
5016 surfaceEntries[ i ] ) );
5017
5018 if ( ( taskParam->surfEntryInfoArrays.kernelNum != 0 ) &&
5019 ( taskParam->surfEntryInfoArrays.surfEntryInfosArray != nullptr ) )
5020 {
5021 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceDetails(
5022 state,
5023 indexParam,
5024 btIndex + i,
5025 surface.OsSurface,
5026 0,
5027 surfaceEntries[ i ],
5028 tempPlaneIndex,
5029 surfaceParam,
5030 CM_ARGUMENT_SURFACE2D ) );
5031 }
5032 }
5033
5034 stateHeap = renderHal->pStateHeap;
5035 offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
5036 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
5037 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
5038 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
5039
5040 state->bti2DIndexTable[ index ].nPlaneNumber = nSurfaceEntries;
5041 state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
5042 state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex = btIndex;
5043 }
5044 else
5045 {
5046 stateHeap = renderHal->pStateHeap;
5047
5048 // Get Offset to Current Binding Table
5049 uint32_t offsetCurrentBTStart = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
5050 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
5051 ( bindingTable * stateHeap->iBindingTableSize ); // Moves the pointer to a Particular Binding Table
5052
5053 uint32_t *currentBTStart = ( uint32_t *)( stateHeap->pSshBuffer + offsetCurrentBTStart );
5054
5055 int nEntryIndex = 0;
5056
5057 nEntryIndex = ( int )( ( uint32_t *)( state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition ) - currentBTStart );
5058
5059 if ( ( nEntryIndex < 0 ) || ( nEntryIndex >= renderHal->StateHeapSettings.iSurfacesPerBT ) )
5060 {
5061 uint32_t tmpSurfaceEntries = state->bti2DIndexTable[ index ].nPlaneNumber;
5062
5063 btIndex = HalCm_GetFreeBindingIndex( state, indexParam, tmpSurfaceEntries );
5064
5065 // Get Offset to Current Binding Table
5066 uint32_t offsetDst = offsetCurrentBTStart + ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
5067
5068 uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
5069 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * tmpSurfaceEntries, state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition, sizeof( uint32_t ) * tmpSurfaceEntries );
5070
5071 // update index to table
5072 state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex = btIndex;
5073 state->bti2DIndexTable[ index ].BTITableEntry.sampler8x8BtiEntryPosition = bindingTableEntry;
5074 }
5075 }
5076 // Update the Batch Buffer
5077 if ( buffer )
5078 {
5079 dst = buffer + argParam->payloadOffset;
5080 *( ( uint32_t *)dst ) = state->bti2DIndexTable[ index ].BTI.sampler8x8SurfIndex;
5081 }
5082
5083 eStatus = MOS_STATUS_SUCCESS;
5084
5085 finish:
5086 renderHal->bEnableP010SinglePass = false;
5087 return eStatus;
5088 }
5089
5090 //*-----------------------------------------------------------------------------
5091 //| Purpose: Setup State Buffer surface State
5092 //| Returns: Result of the operation
5093 //*-----------------------------------------------------------------------------
HalCm_SetupStateBufferSurfaceState(PCM_HAL_STATE state,PCM_HAL_KERNEL_ARG_PARAM argParam,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,uint32_t threadIndex,uint8_t * buffer)5094 MOS_STATUS HalCm_SetupStateBufferSurfaceState(
5095 PCM_HAL_STATE state,
5096 PCM_HAL_KERNEL_ARG_PARAM argParam,
5097 PCM_HAL_INDEX_PARAM indexParam,
5098 int32_t bindingTable,
5099 uint32_t threadIndex,
5100 uint8_t *buffer )
5101 {
5102 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5103 PRENDERHAL_INTERFACE renderHal;
5104 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
5105 RENDERHAL_SURFACE renderhalSurface;
5106 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry;
5107 uint32_t btIndex;
5108 CM_SURFACE_BTI_INFO surfBTIInfo;
5109 uint16_t memObjCtl;
5110
5111 state->cmHalInterface->GetHwSurfaceBTIInfo( &surfBTIInfo );
5112 uint32_t surfIndex = reinterpret_cast< uint32_t *>( argParam->firstValue )[ 0 ];
5113
5114 surfIndex = surfIndex & CM_SURFACE_MASK;
5115 memObjCtl = state->bufferTable[ surfIndex ].memObjCtl;
5116
5117 btIndex = HalCm_GetFreeBindingIndex( state, indexParam, 1 );
5118
5119 renderHal = state->renderHal;
5120 MOS_ZeroMemory( &renderhalSurface, sizeof( renderhalSurface ) );
5121
5122 // Get Details of Sampler8x8 surface and fill the surface
5123 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &renderhalSurface, argParam->kind, surfIndex, 0 ) );
5124
5125 MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
5126
5127 // Set the isOutput by default
5128 surfaceParam.isOutput = true;
5129
5130 //Cache configurations default
5131 state->cmHalInterface->HwSetSurfaceMemoryObjectControl( memObjCtl, &surfaceParam );
5132
5133 // Setup Buffer surface
5134 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSetupBufferSurfaceState(
5135 renderHal,
5136 &renderhalSurface,
5137 &surfaceParam,
5138 &surfaceEntry ) );
5139
5140 // Bind the surface State
5141 CM_ASSERT( ( ( int32_t )btIndex ) < renderHal->StateHeapSettings.iSurfacesPerBT + surfBTIInfo.normalSurfaceStart );
5142 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnBindSurfaceState(
5143 renderHal,
5144 bindingTable,
5145 btIndex,
5146 surfaceEntry ) );
5147
5148 if ( buffer )
5149 {
5150 *( ( uint32_t *)( buffer + argParam->payloadOffset ) ) = btIndex;
5151 }
5152
5153 finish:
5154 return eStatus;
5155 }
5156
5157 //------------------------------------------------------------------------------
5158 //| Purpose: Get usr defined threadcount / threadgroup
5159 //| Returns: Result of the operation
5160 //------------------------------------------------------------------------------
HalCm_GetMaxThreadCountPerThreadGroup(PCM_HAL_STATE state,uint32_t * threadsPerThreadGroup)5161 MOS_STATUS HalCm_GetMaxThreadCountPerThreadGroup(
5162 PCM_HAL_STATE state, // [in] Pointer to CM State
5163 uint32_t *threadsPerThreadGroup) // [out] Pointer to threadsPerThreadGroup
5164 {
5165 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5166
5167 CM_PLATFORM_INFO platformInfo;
5168 MOS_ZeroMemory(&platformInfo, sizeof(CM_PLATFORM_INFO));
5169 CM_CHK_MOSSTATUS_GOTOFINISH( state->pfnGetPlatformInfo( state, &platformInfo, false) );
5170
5171 if (platformInfo.numMaxEUsPerPool)
5172 {
5173 *threadsPerThreadGroup = (platformInfo.numHWThreadsPerEU) * (platformInfo.numMaxEUsPerPool);
5174 }
5175 else
5176 {
5177 *threadsPerThreadGroup = (platformInfo.numHWThreadsPerEU) * (platformInfo.numEUsPerSubSlice);
5178 }
5179
5180 finish:
5181 return eStatus;
5182 }
5183
5184 //*-----------------------------------------------------------------------------
5185 //| Purpose: Decodes hints to get number and size of kernel groups
5186 //| Returns: Result of the operation
5187 //*-----------------------------------------------------------------------------
HalCm_GetNumKernelsPerGroup(uint8_t hintsBits,uint32_t numKernels,uint32_t * numKernelsPerGroup,uint32_t * numKernelGroups,uint32_t * remapKernelToGroup,uint32_t * remapGroupToKernel)5188 MOS_STATUS HalCm_GetNumKernelsPerGroup(
5189 uint8_t hintsBits,
5190 uint32_t numKernels,
5191 uint32_t *numKernelsPerGroup,
5192 uint32_t *numKernelGroups,
5193 uint32_t *remapKernelToGroup,
5194 uint32_t *remapGroupToKernel
5195 )
5196 {
5197 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5198 uint32_t currGrp = 0;
5199 uint32_t i = 0;
5200
5201 // first group at least has one kernel
5202 numKernelsPerGroup[currGrp]++;
5203 remapGroupToKernel[currGrp] = 0;
5204
5205 for( i = 0; i < numKernels - 1; ++i )
5206 {
5207 if( (hintsBits & CM_HINTS_LEASTBIT_MASK) == CM_HINTS_LEASTBIT_MASK )
5208 {
5209 currGrp++;
5210 *numKernelGroups = *numKernelGroups + 1;
5211
5212 remapGroupToKernel[currGrp] = i + 1;
5213 }
5214 numKernelsPerGroup[currGrp]++;
5215 hintsBits >>= 1;
5216 remapKernelToGroup[i+1] = currGrp;
5217 }
5218
5219 return eStatus;
5220 }
5221
5222 //*-----------------------------------------------------------------------------
5223 //| Purpose: Gets information about max parallelism graphs
5224 //| numThreadsOnSides based on formula to sum 1 to n: (n(n+1))/2
5225 //| Returns: Result of the operation
5226 //*-----------------------------------------------------------------------------
HalCm_GetParallelGraphInfo(uint32_t maximum,uint32_t numThreads,uint32_t width,uint32_t height,PCM_HAL_PARALLELISM_GRAPH_INFO graphInfo,CM_DEPENDENCY_PATTERN pattern,bool noDependencyCase)5227 MOS_STATUS HalCm_GetParallelGraphInfo(
5228 uint32_t maximum,
5229 uint32_t numThreads,
5230 uint32_t width,
5231 uint32_t height,
5232 PCM_HAL_PARALLELISM_GRAPH_INFO graphInfo,
5233 CM_DEPENDENCY_PATTERN pattern,
5234 bool noDependencyCase)
5235 {
5236 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5237 uint32_t numThreadsOnSides = 0;
5238 uint32_t numMaxRepeat = 0;
5239 uint32_t numSteps = 0;
5240
5241 switch( pattern )
5242 {
5243 case CM_NONE_DEPENDENCY:
5244 if (noDependencyCase)
5245 {
5246 maximum = 1;
5247 numMaxRepeat = width * height;
5248 numSteps = width * height;
5249 }
5250 // do nothing will depend on other kernels
5251 break;
5252
5253 case CM_VERTICAL_WAVE:
5254 numMaxRepeat = width;
5255 numSteps = width;
5256 break;
5257
5258 case CM_HORIZONTAL_WAVE:
5259 numMaxRepeat = height;
5260 numSteps = height;
5261 break;
5262
5263 case CM_WAVEFRONT:
5264 numThreadsOnSides = ( maximum - 1 ) * maximum;
5265 numMaxRepeat = (numThreads - numThreadsOnSides ) / maximum;
5266 numSteps = ( maximum - 1) * 2 + numMaxRepeat;
5267 break;
5268
5269 case CM_WAVEFRONT26:
5270 numThreadsOnSides = ( maximum - 1 ) * maximum * 2;
5271 numMaxRepeat = (numThreads - numThreadsOnSides ) / maximum;
5272 numSteps = ( (maximum - 1) * 2 ) * 2 + numMaxRepeat;
5273 break;
5274
5275 case CM_WAVEFRONT26Z:
5276 // do nothing already set outside of this function
5277 break;
5278
5279 default:
5280 eStatus = MOS_STATUS_INVALID_PARAMETER;
5281 CM_ASSERTMESSAGE("Unsupported dependency pattern for EnqueueWithHints");
5282 goto finish;
5283 }
5284
5285 graphInfo->maxParallelism = maximum;
5286 graphInfo->numMaxRepeat = numMaxRepeat;
5287 graphInfo->numSteps = numSteps;
5288
5289 finish:
5290 return eStatus;
5291 }
5292
5293 //*-----------------------------------------------------------------------------
5294 //| Purpose: Sets dispatch pattern based on max parallelism for media objects
5295 //| Returns: Result of the operation
5296 //*-----------------------------------------------------------------------------
HalCm_SetDispatchPattern(CM_HAL_PARALLELISM_GRAPH_INFO graphInfo,CM_DEPENDENCY_PATTERN pattern,uint32_t * dispatchFreq)5297 MOS_STATUS HalCm_SetDispatchPattern(
5298 CM_HAL_PARALLELISM_GRAPH_INFO graphInfo,
5299 CM_DEPENDENCY_PATTERN pattern,
5300 uint32_t *dispatchFreq
5301 )
5302 {
5303 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5304 uint32_t i = 0;
5305 uint32_t j = 0;
5306 uint32_t k = 0;
5307
5308 switch( pattern )
5309 {
5310 case CM_NONE_DEPENDENCY:
5311 break;
5312 case CM_HORIZONTAL_WAVE:
5313 case CM_VERTICAL_WAVE:
5314 for( i = 0; i < graphInfo.numSteps; ++i )
5315 {
5316 dispatchFreq[i] = graphInfo.maxParallelism;
5317 }
5318 break;
5319 case CM_WAVEFRONT:
5320 for( i = 1; i < graphInfo.maxParallelism; ++i )
5321 {
5322 dispatchFreq[i-1] = i;
5323 }
5324 for( j = 0; j < graphInfo.numMaxRepeat; ++i, ++j )
5325 {
5326 dispatchFreq[i-1] = graphInfo.maxParallelism;
5327 }
5328 for( j = graphInfo.maxParallelism - 1; i <= graphInfo.numSteps; ++i, --j )
5329 {
5330 dispatchFreq[i-1] = j;
5331 }
5332 break;
5333 case CM_WAVEFRONT26:
5334 for( i = 1, j = 0; i < graphInfo.maxParallelism; ++i, j +=2 )
5335 {
5336 dispatchFreq[j] = i;
5337 dispatchFreq[j+1] = i;
5338 }
5339 for( k = 0; k < graphInfo.numMaxRepeat; ++k, ++j)
5340 {
5341 dispatchFreq[j] = graphInfo.maxParallelism;
5342 }
5343 for( i = graphInfo.maxParallelism - 1; j < graphInfo.numSteps; j +=2, --i )
5344 {
5345 dispatchFreq[j] = i;
5346 dispatchFreq[j+1] = i;
5347 }
5348 break;
5349 case CM_WAVEFRONT26Z:
5350 break;
5351 default:
5352 eStatus = MOS_STATUS_INVALID_PARAMETER;
5353 CM_ASSERTMESSAGE("Unsupported dependency pattern for EnqueueWithHints");
5354 goto finish;
5355 }
5356
5357 finish:
5358 return eStatus;
5359 }
5360
5361 //*-----------------------------------------------------------------------------
5362 //| Purpose: Sets dispatch frequency for kernel group based on number of steps
5363 //| Returns: Result of the operation
5364 //*-----------------------------------------------------------------------------
HalCm_SetKernelGrpFreqDispatch(PCM_HAL_PARALLELISM_GRAPH_INFO graphInfo,PCM_HAL_KERNEL_GROUP_INFO groupInfo,uint32_t numKernelGroups,uint32_t * minSteps)5365 MOS_STATUS HalCm_SetKernelGrpFreqDispatch(
5366 PCM_HAL_PARALLELISM_GRAPH_INFO graphInfo,
5367 PCM_HAL_KERNEL_GROUP_INFO groupInfo,
5368 uint32_t numKernelGroups,
5369 uint32_t *minSteps)
5370 {
5371 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5372 uint32_t i = 0;
5373 uint32_t j = 0;
5374 uint32_t tmpSteps = 0;
5375 uint32_t kerIndex = 0;
5376
5377 for( i = 0; i < numKernelGroups; ++i)
5378 {
5379 for( j = 0; j < groupInfo[i].numKernelsInGroup; ++j )
5380 {
5381 tmpSteps += graphInfo[kerIndex].numSteps;
5382 kerIndex++;
5383 }
5384
5385 if ( tmpSteps )
5386 {
5387 *minSteps = MOS_MIN(*minSteps, tmpSteps);
5388 groupInfo[i].numStepsInGrp = tmpSteps;
5389 }
5390
5391 tmpSteps = 0;
5392 }
5393
5394 for( i = 0; i < numKernelGroups; ++i )
5395 {
5396 groupInfo[i].freqDispatch = (uint32_t)ceil( (groupInfo[i].numStepsInGrp / (double)*minSteps) );
5397 }
5398
5399 return eStatus;
5400 }
5401
5402 //*-----------------------------------------------------------------------------
5403 //| Purpose: Sets dispatch pattern for kernel with no dependency based on
5404 //| the minimum number of steps calculated from kernels with dependency
5405 //| Returns: Result of the operation
5406 //*-----------------------------------------------------------------------------
HalCm_SetNoDependKernelDispatchPattern(uint32_t numThreads,uint32_t minSteps,uint32_t * dispatchFreq)5407 MOS_STATUS HalCm_SetNoDependKernelDispatchPattern(
5408 uint32_t numThreads,
5409 uint32_t minSteps,
5410 uint32_t *dispatchFreq)
5411 {
5412 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5413 uint32_t i = 0;
5414 uint32_t numEachStep = 0;
5415 uint32_t total = 0;
5416
5417 numEachStep = numThreads / minSteps;
5418 for( i = 0; i < minSteps; ++i )
5419 {
5420 dispatchFreq[i] = numEachStep;
5421 total += numEachStep;
5422 }
5423
5424 while( total != numThreads )
5425 {
5426 // dispatch more at beginning
5427 i = 0;
5428 dispatchFreq[i]++;
5429 total++;
5430 i++;
5431 }
5432
5433 return eStatus;
5434 }
5435
HalCm_FinishStatesForKernel(PCM_HAL_STATE state,PRENDERHAL_MEDIA_STATE mediaState,PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM kernelParam,int32_t kernelIndex,PCM_HAL_INDEX_PARAM indexParam,int32_t bindingTable,int32_t mediaID,PRENDERHAL_KRN_ALLOCATION krnAllocation)5436 MOS_STATUS HalCm_FinishStatesForKernel(
5437 PCM_HAL_STATE state, // [in] Pointer to CM State
5438 PRENDERHAL_MEDIA_STATE mediaState,
5439 PMHW_BATCH_BUFFER batchBuffer, // [in] Pointer to Batch Buffer
5440 int32_t taskId, // [in] Task ID
5441 PCM_HAL_KERNEL_PARAM kernelParam,
5442 int32_t kernelIndex,
5443 PCM_HAL_INDEX_PARAM indexParam,
5444 int32_t bindingTable,
5445 int32_t mediaID,
5446 PRENDERHAL_KRN_ALLOCATION krnAllocation
5447 )
5448 {
5449 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5450 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
5451 PRENDERHAL_INTERFACE renderHal = state->renderHal;
5452 PCM_HAL_WALKER_PARAMS mediaWalkerParams = &kernelParam->walkerParams;
5453 PCM_GPGPU_WALKER_PARAMS perKernelGpGpuWalkerParams = &kernelParam->gpgpuWalkerParams;
5454 PCM_HAL_SCOREBOARD threadCoordinates = nullptr;
5455 PCM_HAL_MASK_AND_RESET dependencyMask = nullptr;
5456 bool enableThreadSpace = false;
5457 bool enableKernelThreadSpace = false;
5458 PCM_HAL_SCOREBOARD kernelThreadCoordinates = nullptr;
5459 UNUSED(taskId);
5460
5461 MHW_MEDIA_OBJECT_PARAMS mediaObjectParam;
5462 PCM_HAL_KERNEL_ARG_PARAM argParam;
5463 MHW_PIPE_CONTROL_PARAMS pipeControlParam;
5464 uint32_t i;
5465 uint32_t hdrSize;
5466 uint32_t aIndex;
5467 uint32_t tIndex;
5468 uint32_t index;
5469
5470 //GT-PIN
5471 taskParam->curKernelIndex = kernelIndex;
5472
5473 CmSafeMemSet(&mediaObjectParam, 0, sizeof(MHW_MEDIA_OBJECT_PARAMS));
5474
5475 if (perKernelGpGpuWalkerParams->gpgpuEnabled)
5476 {
5477 // GPGPU_WALKER, just update ID here. other fields are already filled.
5478 perKernelGpGpuWalkerParams->interfaceDescriptorOffset = mediaID;// mediaObjectParam.dwInterfaceDescriptorOffset;
5479 }
5480 else if (mediaWalkerParams->cmWalkerEnable)
5481 {
5482 // Media walker, just update ID here. other fields are already filled.
5483 mediaWalkerParams->interfaceDescriptorOffset = mediaID;
5484 }
5485 else
5486 {
5487 // MEDIA_OBJECT
5488 mediaObjectParam.dwInterfaceDescriptorOffset = mediaID;
5489 hdrSize = renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
5490
5491 if (kernelParam->indirectDataParam.indirectDataSize)
5492 {
5493 mediaObjectParam.dwInlineDataSize = 0;
5494 }
5495 else
5496 {
5497 mediaObjectParam.dwInlineDataSize = MOS_MAX(kernelParam->payloadSize, 4);
5498 }
5499
5500 if (taskParam->threadCoordinates)
5501 {
5502 threadCoordinates = taskParam->threadCoordinates[kernelIndex];
5503 if (threadCoordinates)
5504 {
5505 enableThreadSpace = true;
5506 }
5507 }
5508 else if (kernelParam->kernelThreadSpaceParam.threadCoordinates)
5509 {
5510 kernelThreadCoordinates = kernelParam->kernelThreadSpaceParam.threadCoordinates;
5511 if (kernelThreadCoordinates)
5512 {
5513 enableKernelThreadSpace = true;
5514 }
5515 }
5516
5517 if (taskParam->dependencyMasks)
5518 {
5519 dependencyMask = taskParam->dependencyMasks[kernelIndex];
5520 }
5521
5522 CM_CHK_NULL_GOTOFINISH_MOSERROR( batchBuffer );
5523
5524 uint8_t inlineData[CM_MAX_THREAD_PAYLOAD_SIZE];
5525 uint8_t *cmdInline = inlineData;
5526 uint32_t cmdSize = mediaObjectParam.dwInlineDataSize + hdrSize;
5527
5528 // Setup states for arguments and threads
5529 if (((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount > 1)
5530 {
5531 uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
5532 for (aIndex = 0; aIndex < kernelParam->numArgs; aIndex++)
5533 {
5534 argParam = &kernelParam->argParams[aIndex];
5535
5536 if ((kernelParam->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParam->perThread)
5537 {
5538 continue;
5539 }
5540
5541 for (tIndex = 0; tIndex < kernelParam->numThreads; tIndex++)
5542 {
5543 index = tIndex * argParam->perThread;
5544
5545 //-----------------------------------------------------
5546 CM_ASSERT(argParam->payloadOffset < kernelParam->payloadSize);
5547 //-----------------------------------------------------
5548
5549 switch(argParam->kind)
5550 {
5551 case CM_ARGUMENT_GENERAL:
5552 break;
5553
5554 case CM_ARGUMENT_SAMPLER:
5555 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
5556 state, kernelParam, argParam, indexParam, mediaID, index, nullptr));
5557 break;
5558
5559 case CM_ARGUMENT_SURFACEBUFFER:
5560 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
5561 state, argParam, indexParam, bindingTable, -1, index, nullptr));
5562 break;
5563
5564 case CM_ARGUMENT_SURFACE2D_UP:
5565 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
5566 state, argParam, indexParam, bindingTable, index, nullptr));
5567 break;
5568
5569 case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
5570 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
5571 state, argParam, indexParam, bindingTable, index, nullptr));
5572 break;
5573
5574 case CM_ARGUMENT_SURFACE2D_SAMPLER:
5575 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
5576 state, argParam, indexParam, bindingTable, 0, nullptr));
5577 break;
5578
5579 case CM_ARGUMENT_SURFACE2D:
5580 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
5581 state, argParam, indexParam, bindingTable, index, nullptr));
5582 break;
5583
5584 case CM_ARGUMENT_SURFACE3D:
5585 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
5586 state, argParam, indexParam, bindingTable, index, nullptr));
5587 break;
5588
5589 case CM_ARGUMENT_SURFACE_VME:
5590 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
5591 state, argParam, indexParam, bindingTable, 0, nullptr));
5592 break;
5593
5594 case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
5595 case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
5596 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
5597 state, argParam, indexParam, bindingTable, 0, nullptr));
5598 break;
5599
5600 default:
5601 eStatus = MOS_STATUS_INVALID_PARAMETER;
5602 CM_ASSERTMESSAGE(
5603 "Argument kind '%d' is not supported", argParam->kind);
5604 goto finish;
5605 }
5606 }
5607
5608 if( dependencyMask )
5609 {
5610 if( dependencyMask[tIndex].resetMask == CM_RESET_DEPENDENCY_MASK )
5611 {
5612 MOS_SecureMemcpy(bBuffer + (CM_SCOREBOARD_MASK_POS_IN_MEDIA_OBJECT_CMD*sizeof(uint32_t)),
5613 sizeof(uint8_t), &dependencyMask[tIndex].mask, sizeof(uint8_t));
5614 }
5615 }
5616 batchBuffer->iCurrent += cmdSize;
5617 bBuffer += cmdSize;
5618 }
5619 }
5620 else
5621 {
5622 //Insert synchronization if needed (PIPE_CONTROL)
5623 // 1. synchronization is set
5624 // 2. the next kernel has dependency pattern
5625 if((kernelIndex > 0) && ((taskParam->syncBitmap & ((uint64_t)1 << (kernelIndex-1))) || (kernelParam->kernelThreadSpaceParam.patternType != CM_NONE_DEPENDENCY)))
5626 {
5627 pipeControlParam = g_cRenderHal_InitPipeControlParams;
5628 pipeControlParam.presDest = nullptr;
5629 pipeControlParam.dwFlushMode = MHW_FLUSH_CUSTOM; // Use custom flags
5630 pipeControlParam.dwPostSyncOp = MHW_FLUSH_NOWRITE;
5631 pipeControlParam.bDisableCSStall = false;
5632 pipeControlParam.bTlbInvalidate = false;
5633 pipeControlParam.bFlushRenderTargetCache = true;
5634 pipeControlParam.bInvalidateTextureCache = true;
5635 CM_CHK_MOSSTATUS_RETURN(renderHal->pMhwMiInterface->AddPipeControl(nullptr, batchBuffer, &pipeControlParam));
5636 }
5637
5638 uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
5639 for (tIndex = 0; tIndex < kernelParam->numThreads; tIndex++)
5640 {
5641 if (enableThreadSpace)
5642 {
5643 mediaObjectParam.VfeScoreboard.ScoreboardEnable = (state->scoreboardParams.ScoreboardMask==0) ? 0:1;
5644 mediaObjectParam.VfeScoreboard.Value[0] = threadCoordinates[tIndex].x;
5645 mediaObjectParam.VfeScoreboard.Value[1] = threadCoordinates[tIndex].y;
5646 mediaObjectParam.VfeScoreboard.ScoreboardColor = threadCoordinates[tIndex].color;
5647 mediaObjectParam.dwSliceDestinationSelect = threadCoordinates[tIndex].sliceSelect;
5648 mediaObjectParam.dwHalfSliceDestinationSelect = threadCoordinates[tIndex].subSliceSelect;
5649 if( !dependencyMask )
5650 {
5651 mediaObjectParam.VfeScoreboard.ScoreboardMask = (1 << state->scoreboardParams.ScoreboardMask)-1;
5652 }
5653 else
5654 {
5655 mediaObjectParam.VfeScoreboard.ScoreboardMask = dependencyMask[tIndex].mask;
5656 }
5657 }
5658 else if (enableKernelThreadSpace)
5659 {
5660 mediaObjectParam.VfeScoreboard.ScoreboardEnable = (state->scoreboardParams.ScoreboardMask == 0) ? 0 : 1;
5661 mediaObjectParam.VfeScoreboard.Value[0] = kernelThreadCoordinates[tIndex].x;
5662 mediaObjectParam.VfeScoreboard.Value[1] = kernelThreadCoordinates[tIndex].y;
5663 mediaObjectParam.VfeScoreboard.ScoreboardColor = kernelThreadCoordinates[tIndex].color;
5664 mediaObjectParam.dwSliceDestinationSelect = kernelThreadCoordinates[tIndex].sliceSelect;
5665 mediaObjectParam.dwHalfSliceDestinationSelect = kernelThreadCoordinates[tIndex].subSliceSelect;
5666 if (!dependencyMask)
5667 {
5668 mediaObjectParam.VfeScoreboard.ScoreboardMask = (1 << state->scoreboardParams.ScoreboardMask) - 1;
5669 }
5670 else
5671 {
5672 mediaObjectParam.VfeScoreboard.ScoreboardMask = dependencyMask[tIndex].mask;
5673 }
5674 }
5675 else
5676 {
5677 mediaObjectParam.VfeScoreboard.Value[0] = tIndex % taskParam->threadSpaceWidth;
5678 mediaObjectParam.VfeScoreboard.Value[1] = tIndex / taskParam->threadSpaceWidth;
5679 }
5680
5681 for (aIndex = 0; aIndex < kernelParam->numArgs; aIndex++)
5682 {
5683 argParam = &kernelParam->argParams[aIndex];
5684 index = tIndex * argParam->perThread;
5685
5686 if ((kernelParam->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParam->perThread)
5687 {
5688 continue;
5689 }
5690
5691 //-----------------------------------------------------
5692 CM_ASSERT(argParam->payloadOffset < kernelParam->payloadSize);
5693 //-----------------------------------------------------
5694
5695 switch(argParam->kind)
5696 {
5697 case CM_ARGUMENT_GENERAL:
5698 MOS_SecureMemcpy(
5699 cmdInline + argParam->payloadOffset,
5700 argParam->unitSize,
5701 argParam->firstValue + index * argParam->unitSize,
5702 argParam->unitSize);
5703 break;
5704
5705 case CM_ARGUMENT_SAMPLER:
5706 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
5707 state, kernelParam, argParam, indexParam, mediaID, index, cmdInline));
5708 break;
5709
5710 case CM_ARGUMENT_SURFACEBUFFER:
5711 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
5712 state, argParam, indexParam, bindingTable, -1, index, cmdInline));
5713 break;
5714
5715 case CM_ARGUMENT_SURFACE2D_UP:
5716 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
5717 state, argParam, indexParam, bindingTable, index, cmdInline));
5718 break;
5719
5720 case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
5721 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
5722 state, argParam, indexParam, bindingTable, index, cmdInline));
5723 break;
5724
5725 case CM_ARGUMENT_SURFACE2D_SAMPLER:
5726 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
5727 state, argParam, indexParam, bindingTable, index, cmdInline));
5728 break;
5729
5730 case CM_ARGUMENT_SURFACE2D:
5731 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
5732 state, argParam, indexParam, bindingTable, index, cmdInline));
5733 break;
5734
5735 case CM_ARGUMENT_SURFACE3D:
5736 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
5737 state, argParam, indexParam, bindingTable, index, cmdInline));
5738 break;
5739
5740 case CM_ARGUMENT_SURFACE_VME:
5741 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
5742 state, argParam, indexParam, bindingTable, 0, cmdInline));
5743 break;
5744
5745 case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
5746 case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
5747 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
5748 state, argParam, indexParam, bindingTable, 0, cmdInline));
5749 break;
5750
5751 default:
5752 eStatus = MOS_STATUS_INVALID_PARAMETER;
5753 CM_ASSERTMESSAGE(
5754 "Argument kind '%d' is not supported", argParam->kind);
5755 goto finish;
5756 }
5757 }
5758
5759 mediaObjectParam.pInlineData = inlineData;
5760 state->renderHal->pMhwRenderInterface->AddMediaObject(nullptr, batchBuffer, &mediaObjectParam);
5761 }
5762 }
5763 }
5764
5765 for (i = 0; i < CM_MAX_GLOBAL_SURFACE_NUMBER; i++) {
5766 if ((kernelParam->globalSurface[i] & CM_SURFACE_MASK) != CM_NULL_SURFACE)
5767 {
5768 CM_HAL_KERNEL_ARG_PARAM tempArgParam;
5769 argParam = &tempArgParam;
5770
5771 tempArgParam.kind = CM_ARGUMENT_SURFACEBUFFER;
5772 tempArgParam.payloadOffset = 0;
5773 tempArgParam.unitCount = 1;
5774 tempArgParam.unitSize = sizeof(uint32_t);
5775 tempArgParam.perThread = false;
5776 tempArgParam.firstValue = (uint8_t*)&kernelParam->globalSurface[i];
5777 tempArgParam.aliasIndex = 0;
5778 tempArgParam.aliasCreated = false;
5779
5780 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
5781 state, argParam, indexParam, bindingTable, (int16_t)i, 0, nullptr));
5782 }
5783 }
5784
5785 // set number of samplers
5786 krnAllocation->Params.Sampler_Count = indexParam->samplerIndexCount;
5787
5788 // add SIP surface
5789 if (kernelParam->kernelDebugEnabled)
5790 {
5791 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSipSurfaceState(state, indexParam, bindingTable));
5792 }
5793
5794 finish:
5795 return eStatus;
5796 }
5797
5798 //*-----------------------------------------------------------------------------
5799 //| Purpose: Finishes setting up HW states for the kernel
5800 //| Used by EnqueueWithHints
5801 //| Returns: Result of the operation
5802 //*-----------------------------------------------------------------------------
HalCm_FinishStatesForKernelMix(PCM_HAL_STATE state,PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM * cmExecKernels,PCM_HAL_INDEX_PARAM indexParams,int32_t * bindingTableEntries,int32_t * mediaIds,PRENDERHAL_KRN_ALLOCATION * krnAllocations,uint32_t numKernels,uint32_t hints,bool lastTask)5803 MOS_STATUS HalCm_FinishStatesForKernelMix(
5804 PCM_HAL_STATE state,
5805 PMHW_BATCH_BUFFER batchBuffer,
5806 int32_t taskId,
5807 PCM_HAL_KERNEL_PARAM* cmExecKernels,
5808 PCM_HAL_INDEX_PARAM indexParams,
5809 int32_t *bindingTableEntries,
5810 int32_t *mediaIds,
5811 PRENDERHAL_KRN_ALLOCATION *krnAllocations,
5812 uint32_t numKernels,
5813 uint32_t hints,
5814 bool lastTask)
5815 {
5816 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
5817 PRENDERHAL_INTERFACE renderHal = state->renderHal;
5818 PMHW_MEDIA_OBJECT_PARAMS mediaObjectParams = nullptr;
5819 PCM_HAL_KERNEL_PARAM* kernelParams = nullptr;
5820 PCM_HAL_KERNEL_ARG_PARAM* argParams = nullptr;
5821 PCM_HAL_BB_ARGS bbCmArgs = nullptr;
5822 PMHW_VFE_SCOREBOARD scoreboardParams = nullptr;
5823 PCM_HAL_PARALLELISM_GRAPH_INFO parallelGraphInfo = nullptr;
5824 PCM_HAL_KERNEL_ARG_PARAM argParam = nullptr;
5825 PCM_HAL_KERNEL_SUBSLICE_INFO kernelsSliceInfo = nullptr;
5826 PCM_HAL_KERNEL_THREADSPACE_PARAM kernelTSParam = nullptr;
5827 PCM_HAL_KERNEL_GROUP_INFO groupInfo = nullptr;
5828 CM_HAL_DEPENDENCY vfeDependencyInfo ;
5829 CM_PLATFORM_INFO platformInfo ;
5830 CM_GT_SYSTEM_INFO systemInfo ;
5831 CM_HAL_SCOREBOARD_XY_MASK threadCoordinates ;
5832 uint32_t **dependRemap = nullptr;
5833 uint32_t **dispatchFreq = nullptr;
5834 uint8_t **cmdInline = nullptr;
5835 uint32_t *cmdSizes = nullptr;
5836 uint32_t *remapKrnToGrp = nullptr;
5837 uint32_t *remapGrpToKrn = nullptr;
5838 uint32_t *numKernelsPerGrp = nullptr;
5839 uint8_t *kernelScoreboardMask = nullptr;
5840 uint8_t hintsBits = 0;
5841 uint8_t tmpThreadScoreboardMask = 0;
5842 uint8_t scoreboardMask = 0;
5843 bool singleSubSlice = false;
5844 bool enableThreadSpace = false;
5845 bool kernelFound = false;
5846 bool updateCurrKernel = false;
5847 bool noDependencyCase = false;
5848 bool sufficientSliceInfo = true;
5849 uint32_t adjustedYCoord = 0;
5850 uint32_t numKernelGroups = CM_HINTS_DEFAULT_NUM_KERNEL_GRP;
5851 uint32_t totalNumThreads = 0;
5852 uint32_t hdrSize = 0;
5853 uint32_t i = 0;
5854 uint32_t j = 0;
5855 uint32_t k = 0;
5856 uint32_t tmp = 0;
5857 uint32_t tmp1 = 0;
5858 uint32_t loopCount = 0;
5859 uint32_t aIndex = 0;
5860 uint32_t index = 0;
5861 uint32_t totalReqSubSlices = 0;
5862 uint32_t difference = 0;
5863 uint32_t curKernel = 0;
5864 uint32_t numSet = 0;
5865 uint32_t numSubSlicesEnabled = 0;
5866 uint32_t sliceIndex = 0;
5867 uint32_t tmpNumSubSlice = 0;
5868 uint32_t tmpNumKernelsPerGrp = 0;
5869 uint32_t maximum = 0;
5870 uint32_t count = 0;
5871 uint32_t numDispatched = 0;
5872 uint32_t tmpIndex = 0;
5873 uint32_t numStepsDispatched = 0;
5874 uint32_t minSteps = UINT_MAX;
5875 uint32_t grpId = 0;
5876 uint32_t allocSize = 0;
5877 uint32_t currentKernel = 0;
5878 uint32_t roundRobinCount = 0;
5879 uint32_t numTasks = 0;
5880 uint32_t extraSWThreads = 0;
5881 UNUSED(taskId);
5882
5883 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
5884
5885 MOS_ZeroMemory(&threadCoordinates, sizeof(CM_HAL_SCOREBOARD_XY_MASK));
5886 MOS_ZeroMemory(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY));
5887 MOS_ZeroMemory(&platformInfo, sizeof(CM_PLATFORM_INFO));
5888 MOS_ZeroMemory(&systemInfo, sizeof(CM_GT_SYSTEM_INFO));
5889
5890 mediaObjectParams = (PMHW_MEDIA_OBJECT_PARAMS)MOS_AllocAndZeroMemory(sizeof(MHW_MEDIA_OBJECT_PARAMS)*numKernels);
5891 kernelParams = (PCM_HAL_KERNEL_PARAM*)MOS_AllocAndZeroMemory(sizeof(PCM_HAL_KERNEL_PARAM)*numKernels);
5892 argParams = (PCM_HAL_KERNEL_ARG_PARAM*)MOS_AllocAndZeroMemory(sizeof(PCM_HAL_KERNEL_ARG_PARAM)*numKernels);
5893 cmdInline = (uint8_t**)MOS_AllocAndZeroMemory(sizeof(uint8_t*)*numKernels);
5894 cmdSizes = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
5895 remapKrnToGrp = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
5896 remapGrpToKrn = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
5897 kernelScoreboardMask = (uint8_t*)MOS_AllocAndZeroMemory(sizeof(uint8_t)*numKernels);
5898 dependRemap = (uint32_t**)MOS_AllocAndZeroMemory(sizeof(uint32_t*)*numKernels);
5899 parallelGraphInfo = (PCM_HAL_PARALLELISM_GRAPH_INFO)MOS_AllocAndZeroMemory(sizeof(CM_HAL_PARALLELISM_GRAPH_INFO)*numKernels);
5900 dispatchFreq = (uint32_t**)MOS_AllocAndZeroMemory(sizeof(uint32_t*)*numKernels);
5901 numKernelsPerGrp = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*numKernels);
5902
5903 if( !mediaObjectParams || !kernelParams || !argParams ||
5904 !cmdInline || !cmdSizes ||
5905 !remapKrnToGrp || !remapGrpToKrn || !kernelScoreboardMask || !dependRemap ||
5906 !parallelGraphInfo || !dispatchFreq || !numKernelsPerGrp )
5907 {
5908 eStatus = MOS_STATUS_INVALID_PARAMETER;
5909 CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
5910 goto finish;
5911 }
5912
5913 state->euSaturationEnabled = true;
5914
5915 hintsBits = (hints & CM_HINTS_MASK_KERNEL_GROUPS) >> CM_HINTS_NUM_BITS_WALK_OBJ;
5916 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNumKernelsPerGroup(hintsBits, numKernels, numKernelsPerGrp,
5917 &numKernelGroups, remapKrnToGrp, remapGrpToKrn));
5918
5919 kernelsSliceInfo = (PCM_HAL_KERNEL_SUBSLICE_INFO)MOS_AllocAndZeroMemory(sizeof(CM_HAL_KERNEL_SUBSLICE_INFO)*numKernelGroups);
5920 groupInfo = (PCM_HAL_KERNEL_GROUP_INFO)MOS_AllocAndZeroMemory(sizeof(CM_HAL_KERNEL_GROUP_INFO)*numKernelGroups);
5921 if( !kernelsSliceInfo || !groupInfo )
5922 {
5923 eStatus = MOS_STATUS_INVALID_PARAMETER;
5924 CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
5925 goto finish;
5926 }
5927
5928 for( i = 0; i < numKernelGroups; ++i)
5929 {
5930 groupInfo[i].numKernelsInGroup = numKernelsPerGrp[i];
5931 }
5932
5933 hdrSize = renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
5934
5935 for ( i = 0; i < numKernels; ++i )
5936 {
5937 kernelParams[i] = cmExecKernels[i];
5938
5939 mediaObjectParams[i].dwInterfaceDescriptorOffset = mediaIds[i];
5940 mediaObjectParams[i].dwInlineDataSize = MOS_MAX(kernelParams[i]->payloadSize, 4);
5941
5942 cmdInline[i] = (uint8_t*)MOS_AllocAndZeroMemory(sizeof(uint8_t) * 1024);
5943 cmdSizes[i] = mediaObjectParams[i].dwInlineDataSize + hdrSize;
5944
5945 totalNumThreads += kernelParams[i]->numThreads;
5946 }
5947
5948 numTasks = ( hints & CM_HINTS_MASK_NUM_TASKS ) >> CM_HINTS_NUM_BITS_TASK_POS;
5949 if( numTasks > 1 )
5950 {
5951 if( lastTask )
5952 {
5953 extraSWThreads = totalNumThreads % numTasks;
5954 }
5955
5956 totalNumThreads = (totalNumThreads / numTasks) + extraSWThreads;
5957 }
5958
5959 for( i = 0; i < numKernels; ++i )
5960 {
5961 dependRemap[i] = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t) * CM_HAL_MAX_DEPENDENCY_COUNT);
5962 for( k = 0; k < CM_HAL_MAX_DEPENDENCY_COUNT; ++k )
5963 {
5964 // initialize each index to map to itself
5965 dependRemap[i][k] = k;
5966 }
5967 }
5968
5969 for( i = 0; i < numKernels; ++i )
5970 {
5971 kernelTSParam = &kernelParams[i]->kernelThreadSpaceParam;
5972
5973 // calculate union dependency vector of all kernels with dependency
5974 if( kernelTSParam->dependencyInfo.count )
5975 {
5976 if( vfeDependencyInfo.count == 0 )
5977 {
5978 MOS_SecureMemcpy(&vfeDependencyInfo, sizeof(CM_HAL_DEPENDENCY), &kernelTSParam->dependencyInfo, sizeof(CM_HAL_DEPENDENCY));
5979 kernelScoreboardMask[i] = ( 1 << vfeDependencyInfo.count ) - 1;
5980 }
5981 else
5982 {
5983 for( j = 0; j < kernelTSParam->dependencyInfo.count; ++j )
5984 {
5985 for( k = 0; k < vfeDependencyInfo.count; ++k )
5986 {
5987 if( (kernelTSParam->dependencyInfo.deltaX[j] == vfeDependencyInfo.deltaX[k]) &&
5988 (kernelTSParam->dependencyInfo.deltaY[j] == vfeDependencyInfo.deltaY[k]) )
5989 {
5990 CM_HAL_SETBIT(kernelScoreboardMask[i], k);
5991 dependRemap[i][j] = k;
5992 break;
5993 }
5994 }
5995 if ( k == vfeDependencyInfo.count )
5996 {
5997 vfeDependencyInfo.deltaX[vfeDependencyInfo.count] = kernelTSParam->dependencyInfo.deltaX[j];
5998 vfeDependencyInfo.deltaY[vfeDependencyInfo.count] = kernelTSParam->dependencyInfo.deltaY[j];
5999 CM_HAL_SETBIT(kernelScoreboardMask[i], vfeDependencyInfo.count);
6000 vfeDependencyInfo.count++;
6001 dependRemap[i][j] = k;
6002 }
6003 }
6004 }
6005 }
6006 } // for num kernels
6007
6008 if( vfeDependencyInfo.count > CM_HAL_MAX_DEPENDENCY_COUNT )
6009 {
6010 eStatus = MOS_STATUS_INVALID_PARAMETER;
6011 CM_ASSERTMESSAGE("Union of kernel dependencies exceeds max dependency count (8)");
6012 goto finish;
6013 }
6014
6015 // set VFE scoreboarding information from union of kernel dependency vectors
6016 scoreboardParams = &state->scoreboardParams;
6017 scoreboardParams->ScoreboardMask = (uint8_t)vfeDependencyInfo.count;
6018 for( i = 0; i < scoreboardParams->ScoreboardMask; ++i )
6019 {
6020 scoreboardParams->ScoreboardDelta[i].x = vfeDependencyInfo.deltaX[i];
6021 scoreboardParams->ScoreboardDelta[i].y = vfeDependencyInfo.deltaY[i];
6022 }
6023
6024 if (vfeDependencyInfo.count == 0)
6025 {
6026 noDependencyCase = true;
6027 }
6028
6029 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetPlatformInfo(state, &platformInfo, true));
6030 singleSubSlice = (platformInfo.numSubSlices == 1) ? true : false;
6031
6032 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGTSystemInfo(state, &systemInfo));
6033
6034 if( !singleSubSlice )
6035 {
6036 for( i = 0; i < numKernelGroups; ++i )
6037 {
6038 tmpNumKernelsPerGrp = numKernelsPerGrp[i];
6039
6040 for( j = 0; j < tmpNumKernelsPerGrp; ++j )
6041 {
6042 kernelTSParam = &kernelParams[count]->kernelThreadSpaceParam;
6043
6044 switch( kernelTSParam->patternType )
6045 {
6046 case CM_NONE_DEPENDENCY:
6047 maximum = kernelParams[count]->numThreads;
6048 break;
6049 case CM_WAVEFRONT:
6050 maximum = MOS_MIN(kernelTSParam->threadSpaceWidth, kernelTSParam->threadSpaceHeight);
6051 break;
6052 case CM_WAVEFRONT26:
6053 maximum = MOS_MIN( ((kernelTSParam->threadSpaceWidth + 1) >> 1), kernelTSParam->threadSpaceHeight);
6054 break;
6055 case CM_VERTICAL_WAVE:
6056 maximum = kernelTSParam->threadSpaceHeight;
6057 break;
6058 case CM_HORIZONTAL_WAVE:
6059 maximum = kernelTSParam->threadSpaceWidth;
6060 break;
6061 case CM_WAVEFRONT26Z:
6062 maximum = MOS_MIN( ((kernelTSParam->threadSpaceWidth - 1) >> 1), kernelTSParam->threadSpaceHeight);
6063 break;
6064 default:
6065 eStatus = MOS_STATUS_INVALID_PARAMETER;
6066 CM_ASSERTMESSAGE("Unsupported dependency pattern for EnqueueWithHints");
6067 goto finish;
6068 }
6069
6070 if( kernelTSParam->patternType != CM_WAVEFRONT26Z )
6071 {
6072 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetParallelGraphInfo(maximum, kernelParams[count]->numThreads,
6073 kernelTSParam->threadSpaceWidth, kernelTSParam->threadSpaceHeight,
6074 ¶llelGraphInfo[count], kernelTSParam->patternType, noDependencyCase));
6075 }
6076 else
6077 {
6078 parallelGraphInfo[count].numSteps = kernelTSParam->dispatchInfo.numWaves;
6079 }
6080
6081 if( kernelTSParam->patternType != CM_NONE_DEPENDENCY )
6082 {
6083 dispatchFreq[count] = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*parallelGraphInfo[count].numSteps);
6084 if( !dispatchFreq[count] )
6085 {
6086 eStatus = MOS_STATUS_INVALID_PARAMETER;
6087 CM_ASSERTMESSAGE("Memory allocation failed for EnqueueWithHints");
6088 goto finish;
6089 }
6090
6091 if( kernelTSParam->patternType != CM_WAVEFRONT26Z )
6092 {
6093 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetDispatchPattern(parallelGraphInfo[count], kernelTSParam->patternType, dispatchFreq[count]));
6094 }
6095 else
6096 {
6097 MOS_SecureMemcpy(dispatchFreq[count], sizeof(uint32_t)*parallelGraphInfo[count].numSteps,
6098 kernelTSParam->dispatchInfo.numThreadsInWave, sizeof(uint32_t)*parallelGraphInfo[count].numSteps);
6099 }
6100 }
6101
6102 if (!noDependencyCase)
6103 {
6104 tmpNumSubSlice =
6105 (maximum / (platformInfo.numEUsPerSubSlice * platformInfo.numHWThreadsPerEU)) + 1;
6106
6107 if (tmpNumSubSlice > platformInfo.numSubSlices)
6108 {
6109 tmpNumSubSlice = platformInfo.numSubSlices - 1;
6110 }
6111
6112 if (tmpNumSubSlice > kernelsSliceInfo[i].numSubSlices)
6113 {
6114 kernelsSliceInfo[i].numSubSlices = tmpNumSubSlice;
6115 }
6116 }
6117 else
6118 {
6119 kernelsSliceInfo[i].numSubSlices = platformInfo.numSubSlices;
6120 }
6121
6122 count++;
6123 }
6124 }
6125
6126 if (!noDependencyCase)
6127 {
6128 for (i = 0; i < numKernelGroups; ++i)
6129 {
6130 totalReqSubSlices += kernelsSliceInfo[i].numSubSlices;
6131 }
6132
6133 // adjust if requested less or more subslices than architecture has
6134 if (totalReqSubSlices < platformInfo.numSubSlices)
6135 {
6136 // want to add subslices starting from K0
6137 difference = platformInfo.numSubSlices - totalReqSubSlices;
6138 tmp = tmp1 = 0;
6139 for (i = 0; i < difference; ++i)
6140 {
6141 tmp = tmp1 % numKernelGroups;
6142 kernelsSliceInfo[tmp].numSubSlices++;
6143 totalReqSubSlices++;
6144 tmp1++;
6145 }
6146 }
6147 else if (totalReqSubSlices > platformInfo.numSubSlices)
6148 {
6149 // want to subtract subslices starting from last kernel
6150 difference = totalReqSubSlices - platformInfo.numSubSlices;
6151 tmp = 0;
6152 tmp1 = numKernelGroups - 1;
6153 for (i = numKernelGroups - 1, j = 0; j < difference; --i, ++j)
6154 {
6155 tmp = tmp1 % numKernelGroups;
6156 kernelsSliceInfo[tmp].numSubSlices--;
6157 totalReqSubSlices--;
6158 tmp1 += numKernelGroups - 1;
6159 }
6160 }
6161
6162 if (totalReqSubSlices != platformInfo.numSubSlices)
6163 {
6164 eStatus = MOS_STATUS_INVALID_PARAMETER;
6165 CM_ASSERTMESSAGE("Total requested sub-slices does not match platform's number of sub-slices");
6166 goto finish;
6167 }
6168 }
6169
6170 for(i = 0; i < numKernelGroups; ++i)
6171 {
6172 kernelsSliceInfo[i].destination = (PCM_HAL_KERNEL_SLICE_SUBSLICE)MOS_AllocAndZeroMemory(sizeof(CM_HAL_KERNEL_SLICE_SUBSLICE)*kernelsSliceInfo[i].numSubSlices);
6173 if( !kernelsSliceInfo[i].destination )
6174 {
6175 eStatus = MOS_STATUS_INVALID_PARAMETER;
6176 CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
6177 goto finish;
6178 }
6179 }
6180
6181 // set slice, subslice for each kernel group
6182 if (systemInfo.isSliceInfoValid)
6183 {
6184 for (i = 0; i < systemInfo.numMaxSlicesSupported; ++i)
6185 {
6186 for (j = 0; j < (systemInfo.numMaxSubSlicesSupported / systemInfo.numMaxSlicesSupported); ++j)
6187 {
6188 if (systemInfo.sliceInfo[i].SubSliceInfo[j].Enabled && systemInfo.sliceInfo[i].Enabled)
6189 {
6190 if (curKernel < numKernelGroups)
6191 {
6192 if (kernelsSliceInfo[curKernel].numSubSlices == numSet)
6193 {
6194 curKernel++;
6195 numSet = 0;
6196 }
6197 }
6198
6199 if (curKernel < numKernelGroups)
6200 {
6201 kernelsSliceInfo[curKernel].destination[numSet].slice = i;
6202 kernelsSliceInfo[curKernel].destination[numSet].subSlice = j;
6203
6204 numSet++;
6205 }
6206
6207 numSubSlicesEnabled++;
6208 }
6209 }
6210 }
6211
6212
6213 if (numSubSlicesEnabled != platformInfo.numSubSlices)
6214 {
6215 // not enough slice information, do not assign sub-slice destination
6216 sufficientSliceInfo = false;
6217 }
6218 }
6219
6220 // set freq dispatch ratio for each group
6221 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetKernelGrpFreqDispatch(parallelGraphInfo, groupInfo, numKernelGroups, &minSteps));
6222
6223 // set dispatch pattern for kernel with no dependency
6224 for( i = 0; i < numKernels; ++i )
6225 {
6226 if( kernelParams[i]->kernelThreadSpaceParam.patternType == CM_NONE_DEPENDENCY )
6227 {
6228 grpId = remapKrnToGrp[i];
6229 allocSize = 0;
6230
6231 if( groupInfo[grpId].freqDispatch == 0 )
6232 {
6233 allocSize = minSteps;
6234 groupInfo[grpId].freqDispatch = 1;
6235 }
6236 else
6237 {
6238 allocSize = minSteps * groupInfo[grpId].freqDispatch;
6239 groupInfo[grpId].freqDispatch = groupInfo[grpId].freqDispatch * 2;
6240 }
6241
6242 dispatchFreq[i] = (uint32_t*)MOS_AllocAndZeroMemory(sizeof(uint32_t)*allocSize);
6243 if( !dispatchFreq[i] )
6244 {
6245 eStatus = MOS_STATUS_INVALID_PARAMETER;
6246 CM_ASSERTMESSAGE("Memory allocation failed in EnqueueWithHints");
6247 goto finish;
6248 }
6249
6250 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetNoDependKernelDispatchPattern(kernelParams[i]->numThreads,
6251 allocSize, dispatchFreq[i]));
6252 }
6253 }
6254 }
6255
6256 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
6257 bbCmArgs = (PCM_HAL_BB_ARGS) batchBuffer->pPrivateData;
6258 if( bbCmArgs->refCount > 1 )
6259 {
6260
6261 uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
6262 updateCurrKernel = false;
6263 for( i = 0; i < totalNumThreads; ++i )
6264 {
6265 if( !singleSubSlice )
6266 {
6267 if( (dispatchFreq[currentKernel][state->hintIndexes.dispatchIndexes[currentKernel]] == numDispatched) ||
6268 (state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads) )
6269 {
6270 numDispatched = 0;
6271 numStepsDispatched++;
6272 state->hintIndexes.dispatchIndexes[currentKernel]++;
6273
6274 if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
6275 {
6276 updateCurrKernel = true;
6277 groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished++;
6278 if( groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished ==
6279 groupInfo[remapKrnToGrp[currentKernel]].numKernelsInGroup )
6280 {
6281 groupInfo[remapKrnToGrp[currentKernel]].groupFinished = 1;
6282 }
6283 else
6284 {
6285 remapGrpToKrn[tmpIndex]++;
6286 }
6287 }
6288
6289 if( (groupInfo[remapKrnToGrp[currentKernel]].freqDispatch == numStepsDispatched) ||
6290 updateCurrKernel )
6291 {
6292 numStepsDispatched = 0;
6293 roundRobinCount++;
6294
6295 tmpIndex = roundRobinCount % numKernelGroups;
6296
6297 if( groupInfo[tmpIndex].groupFinished )
6298 {
6299 loopCount = 0;
6300 while( (loopCount < numKernelGroups) && (!kernelFound) )
6301 {
6302 roundRobinCount++;
6303 tmpIndex = roundRobinCount % numKernelGroups;
6304 if( state->hintIndexes.kernelIndexes[remapGrpToKrn[tmpIndex]] < kernelParams[remapGrpToKrn[tmpIndex]]->numThreads )
6305 {
6306 kernelFound = true;
6307 }
6308 loopCount++;
6309 }
6310 if( !kernelFound )
6311 {
6312 // Error shouldn't be here
6313 // if still in for loop totalNumThreads, needs to be a kernel with threads left
6314 eStatus = MOS_STATUS_UNKNOWN;
6315 CM_ASSERTMESSAGE("Couldn't find kernel with threads left for EnqueueWithHints");
6316 goto finish;
6317 }
6318 }
6319 currentKernel = remapGrpToKrn[tmpIndex];
6320 }
6321 }
6322 }
6323 else
6324 {
6325 if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
6326 {
6327 currentKernel++;
6328 }
6329 }
6330
6331 if( kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates )
6332 {
6333 threadCoordinates.y = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].y;
6334 threadCoordinates.mask = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].mask;
6335 enableThreadSpace = true;
6336 threadCoordinates.resetMask = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].resetMask;
6337 }
6338
6339 if( enableThreadSpace )
6340 {
6341 if( threadCoordinates.mask != CM_DEFAULT_THREAD_DEPENDENCY_MASK )
6342 {
6343 tmpThreadScoreboardMask = kernelScoreboardMask[currentKernel];
6344 // do the remapping
6345 for( k = 0; k < kernelParams[currentKernel]->kernelThreadSpaceParam.dependencyInfo.count; ++k )
6346 {
6347 if( (threadCoordinates.mask & CM_HINTS_LEASTBIT_MASK) == 0 )
6348 {
6349 CM_HAL_UNSETBIT(tmpThreadScoreboardMask, dependRemap[currentKernel][k]);
6350 }
6351
6352 threadCoordinates.mask = threadCoordinates.mask >> 1;
6353 }
6354 scoreboardMask = tmpThreadScoreboardMask;
6355 }
6356 else
6357 {
6358 scoreboardMask = kernelScoreboardMask[currentKernel];
6359 }
6360 }
6361 else
6362 {
6363 threadCoordinates.y = state->hintIndexes.kernelIndexes[currentKernel] / kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceWidth;
6364 scoreboardMask = kernelScoreboardMask[currentKernel];
6365 }
6366
6367 adjustedYCoord = 0;
6368 if( currentKernel > 0 )
6369 {
6370 // if not first kernel, and has dependency,
6371 // and along scoreboard border top need to mask out dependencies with y < 0
6372 if( kernelScoreboardMask[currentKernel] )
6373 {
6374 if( threadCoordinates.y == 0 )
6375 {
6376 for( k = 0; k < vfeDependencyInfo.count; ++k )
6377 {
6378 if( vfeDependencyInfo.deltaY[k] < 0 )
6379 {
6380 CM_HAL_UNSETBIT(scoreboardMask, k);
6381 }
6382 }
6383 }
6384 }
6385 }
6386
6387 if( currentKernel < numKernels - 1 )
6388 {
6389 // if not last kernel, and has dependency,
6390 // along scoreboard border bottom need to mask out dependencies with y > 0
6391 if( kernelScoreboardMask[currentKernel] )
6392 {
6393 if( threadCoordinates.y == (kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceHeight - 1))
6394 {
6395 for( k = 0; k < vfeDependencyInfo.count; ++k)
6396 {
6397 if( vfeDependencyInfo.deltaY[k] > 0 )
6398 {
6399 CM_HAL_UNSETBIT(scoreboardMask, k);
6400 }
6401 }
6402 }
6403 }
6404 }
6405
6406 for( aIndex = 0; aIndex < kernelParams[currentKernel]->numArgs; aIndex++ )
6407 {
6408 argParams[currentKernel] = &kernelParams[currentKernel]->argParams[aIndex];
6409 index = state->hintIndexes.kernelIndexes[currentKernel] * argParams[currentKernel]->perThread;
6410
6411 if( (kernelParams[currentKernel]->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParams[currentKernel]->perThread )
6412 {
6413 continue;
6414 }
6415
6416 CM_ASSERT(argParams[currentKernel]->payloadOffset < kernelParams[currentKernel]->payloadSize);
6417
6418 switch(argParams[currentKernel]->kind)
6419 {
6420 case CM_ARGUMENT_GENERAL:
6421 break;
6422
6423 case CM_ARGUMENT_SAMPLER:
6424 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
6425 state, kernelParams[currentKernel], argParams[currentKernel], &indexParams[currentKernel],
6426 mediaIds[currentKernel], index, nullptr));
6427 break;
6428
6429 case CM_ARGUMENT_SURFACEBUFFER:
6430 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
6431 state, argParams[currentKernel], &indexParams[currentKernel],
6432 bindingTableEntries[currentKernel], -1, index, nullptr));
6433 break;
6434
6435 case CM_ARGUMENT_SURFACE2D_UP:
6436 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
6437 state, argParams[currentKernel], &indexParams[currentKernel],
6438 bindingTableEntries[currentKernel], index, nullptr));
6439 break;
6440
6441 case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
6442 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
6443 state, argParams[currentKernel], &indexParams[currentKernel],
6444 bindingTableEntries[currentKernel], index, nullptr));
6445 break;
6446
6447 case CM_ARGUMENT_SURFACE2D_SAMPLER:
6448 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
6449 state, argParams[currentKernel], &indexParams[currentKernel],
6450 bindingTableEntries[currentKernel], 0, nullptr));
6451 break;
6452
6453 case CM_ARGUMENT_SURFACE2D:
6454 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
6455 state, argParams[currentKernel], &indexParams[currentKernel],
6456 bindingTableEntries[currentKernel], index, nullptr));
6457 break;
6458
6459 case CM_ARGUMENT_SURFACE3D:
6460 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
6461 state, argParams[currentKernel], &indexParams[currentKernel],
6462 bindingTableEntries[currentKernel], index, nullptr));
6463 break;
6464
6465 case CM_ARGUMENT_SURFACE_VME:
6466 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
6467 state, argParams[currentKernel], &indexParams[currentKernel],
6468 bindingTableEntries[currentKernel], 0, nullptr));
6469 break;
6470
6471 case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
6472 case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
6473 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
6474 state, argParams[currentKernel], &indexParams[currentKernel],
6475 bindingTableEntries[currentKernel], 0, nullptr));
6476 break;
6477
6478 default:
6479 eStatus = MOS_STATUS_INVALID_PARAMETER;
6480 CM_ASSERTMESSAGE(
6481 "Argument kind '%d' is not supported", argParams[currentKernel]->kind);
6482 goto finish;
6483
6484 } // switch argKind
6485 } // for numArgs
6486
6487 if( threadCoordinates.resetMask == CM_RESET_DEPENDENCY_MASK )
6488 {
6489 MOS_SecureMemcpy(bBuffer + (CM_SCOREBOARD_MASK_POS_IN_MEDIA_OBJECT_CMD*sizeof(uint32_t)),
6490 sizeof(uint8_t), &scoreboardMask, sizeof(uint8_t));
6491 }
6492
6493 batchBuffer->iCurrent += cmdSizes[currentKernel];
6494 bBuffer += cmdSizes[currentKernel];
6495
6496 state->hintIndexes.kernelIndexes[currentKernel]++;
6497 enableThreadSpace = false;
6498 kernelFound = false;
6499 updateCurrKernel = false;
6500 numDispatched++;
6501 } // for totalNumThreads
6502 } // if uiRefCount > 1
6503 else
6504 {
6505 uint8_t *bBuffer = batchBuffer->pData + batchBuffer->iCurrent;
6506 updateCurrKernel = false;
6507
6508 for( i = 0; i < totalNumThreads; ++i)
6509 {
6510 if( !singleSubSlice )
6511 {
6512 if( (dispatchFreq[currentKernel][state->hintIndexes.dispatchIndexes[currentKernel]] == numDispatched) ||
6513 (state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads) )
6514 {
6515 numDispatched = 0;
6516 numStepsDispatched++;
6517 state->hintIndexes.dispatchIndexes[currentKernel]++;
6518
6519 if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
6520 {
6521 updateCurrKernel = true;
6522 groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished++;
6523 if( groupInfo[remapKrnToGrp[currentKernel]].numKernelsFinished ==
6524 groupInfo[remapKrnToGrp[currentKernel]].numKernelsInGroup )
6525 {
6526 groupInfo[remapKrnToGrp[currentKernel]].groupFinished = 1;
6527 }
6528 else
6529 {
6530 remapGrpToKrn[tmpIndex]++;
6531 }
6532 }
6533
6534 if( (groupInfo[remapKrnToGrp[currentKernel]].freqDispatch == numStepsDispatched) ||
6535 updateCurrKernel )
6536 {
6537 numStepsDispatched = 0;
6538 roundRobinCount++;
6539
6540 tmpIndex = roundRobinCount % numKernelGroups;
6541
6542 if( groupInfo[tmpIndex].groupFinished )
6543 {
6544 loopCount = 0;
6545 while( (loopCount < numKernelGroups) && (!kernelFound) )
6546 {
6547 roundRobinCount++;
6548 tmpIndex = roundRobinCount % numKernelGroups;
6549 if( state->hintIndexes.kernelIndexes[remapGrpToKrn[tmpIndex]] < kernelParams[remapGrpToKrn[tmpIndex]]->numThreads )
6550 {
6551 kernelFound = true;
6552 }
6553 loopCount++;
6554 }
6555 if( !kernelFound )
6556 {
6557 // Error shouldn't be here
6558 // if still in for loop totalNumThreads, needs to be a kernel with threads left
6559 eStatus = MOS_STATUS_UNKNOWN;
6560 CM_ASSERTMESSAGE("Couldn't find kernel with threads left for EnqueueWithHints");
6561 goto finish;
6562 }
6563 }
6564
6565 currentKernel = remapGrpToKrn[tmpIndex];
6566 }
6567 }
6568 }
6569 else
6570 {
6571 if( state->hintIndexes.kernelIndexes[currentKernel] >= kernelParams[currentKernel]->numThreads )
6572 {
6573 currentKernel++;
6574 }
6575 }
6576
6577 if( kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates )
6578 {
6579 threadCoordinates.x = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].x;
6580 threadCoordinates.y = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].y;
6581 threadCoordinates.mask = kernelParams[currentKernel]->kernelThreadSpaceParam.threadCoordinates[state->hintIndexes.kernelIndexes[currentKernel]].mask;
6582 enableThreadSpace = true;
6583 }
6584
6585 mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardEnable =
6586 (kernelParams[currentKernel]->kernelThreadSpaceParam.dependencyInfo.count == 0) ? 0:1;
6587
6588 if( !singleSubSlice && systemInfo.isSliceInfoValid && sufficientSliceInfo )
6589 {
6590 sliceIndex = kernelsSliceInfo[remapKrnToGrp[currentKernel]].counter % kernelsSliceInfo[remapKrnToGrp[currentKernel]].numSubSlices;
6591 mediaObjectParams[currentKernel].dwSliceDestinationSelect = kernelsSliceInfo[remapKrnToGrp[currentKernel]].destination[sliceIndex].slice;
6592 mediaObjectParams[currentKernel].dwHalfSliceDestinationSelect = kernelsSliceInfo[remapKrnToGrp[currentKernel]].destination[sliceIndex].subSlice;
6593 mediaObjectParams[currentKernel].bForceDestination = true;
6594
6595 kernelsSliceInfo[remapKrnToGrp[currentKernel]].counter++;
6596 }
6597
6598 if( enableThreadSpace )
6599 {
6600 mediaObjectParams[currentKernel].VfeScoreboard.Value[0] = threadCoordinates.x;
6601 mediaObjectParams[currentKernel].VfeScoreboard.Value[1] = threadCoordinates.y;
6602 if( threadCoordinates.mask != CM_DEFAULT_THREAD_DEPENDENCY_MASK )
6603 {
6604 tmpThreadScoreboardMask = kernelScoreboardMask[currentKernel];
6605 // do the remapping
6606 for( k = 0; k < kernelParams[currentKernel]->kernelThreadSpaceParam.dependencyInfo.count; ++k )
6607 {
6608 if( (threadCoordinates.mask & CM_HINTS_LEASTBIT_MASK) == 0 )
6609 {
6610 CM_HAL_UNSETBIT(tmpThreadScoreboardMask, dependRemap[currentKernel][k]);
6611 }
6612
6613 threadCoordinates.mask = threadCoordinates.mask >> 1;
6614 }
6615
6616 mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask = tmpThreadScoreboardMask;
6617 }
6618 else
6619 {
6620 mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask = kernelScoreboardMask[currentKernel];
6621 }
6622 }
6623 else
6624 {
6625 mediaObjectParams[currentKernel].VfeScoreboard.Value[0] = state->hintIndexes.kernelIndexes[currentKernel] %
6626 kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceWidth;
6627 mediaObjectParams[currentKernel].VfeScoreboard.Value[1] = state->hintIndexes.kernelIndexes[currentKernel] /
6628 kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceWidth;
6629 mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask = kernelScoreboardMask[currentKernel];
6630 }
6631
6632 adjustedYCoord = 0;
6633 // adjust y coordinate for kernels after the first one
6634 if( currentKernel > 0 )
6635 {
6636 // if not first kernel, and has dependency,
6637 // and along scoreboard border need to mask out dependencies with y < 0
6638 if( kernelScoreboardMask[currentKernel] )
6639 {
6640 if (mediaObjectParams[currentKernel].VfeScoreboard.Value[1] == 0)
6641 {
6642 for( k = 0; k < vfeDependencyInfo.count; ++k )
6643 {
6644 if( vfeDependencyInfo.deltaY[k] < 0 )
6645 {
6646 CM_HAL_UNSETBIT(mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask, k);
6647 }
6648 }
6649 }
6650 }
6651
6652 for( j = currentKernel; j > 0; --j )
6653 {
6654 adjustedYCoord += kernelParams[j-1]->kernelThreadSpaceParam.threadSpaceHeight;
6655 }
6656 }
6657
6658 if( currentKernel < numKernels - 1 )
6659 {
6660 // if not last kernel, and has dependency,
6661 // along scoreboard border bottom need to mask out dependencies with y > 0
6662 if( kernelScoreboardMask[currentKernel] )
6663 {
6664 if (mediaObjectParams[currentKernel].VfeScoreboard.Value[1] ==
6665 (kernelParams[currentKernel]->kernelThreadSpaceParam.threadSpaceHeight - 1))
6666 {
6667 for( k = 0; k < vfeDependencyInfo.count; ++k )
6668 {
6669 if( vfeDependencyInfo.deltaY[k] > 0 )
6670 {
6671 CM_HAL_UNSETBIT(mediaObjectParams[currentKernel].VfeScoreboard.ScoreboardMask, k);
6672 }
6673 }
6674 }
6675 }
6676 }
6677
6678 mediaObjectParams[currentKernel].VfeScoreboard.Value[1] =
6679 mediaObjectParams[currentKernel].VfeScoreboard.Value[1] + adjustedYCoord;
6680
6681 for( aIndex = 0; aIndex < kernelParams[currentKernel]->numArgs; aIndex++ )
6682 {
6683 argParams[currentKernel] = &kernelParams[currentKernel]->argParams[aIndex];
6684 index = state->hintIndexes.kernelIndexes[currentKernel] * argParams[currentKernel]->perThread;
6685
6686 if( (kernelParams[currentKernel]->cmFlags & CM_KERNEL_FLAGS_CURBE) && !argParams[currentKernel]->perThread )
6687 {
6688 continue;
6689 }
6690
6691 CM_ASSERT(argParams[currentKernel]->payloadOffset < kernelParams[currentKernel]->payloadSize);
6692
6693 switch(argParams[currentKernel]->kind)
6694 {
6695 case CM_ARGUMENT_GENERAL:
6696 MOS_SecureMemcpy(
6697 cmdInline[currentKernel] + argParams[currentKernel]->payloadOffset,
6698 argParams[currentKernel]->unitSize,
6699 argParams[currentKernel]->firstValue + index * argParams[currentKernel]->unitSize,
6700 argParams[currentKernel]->unitSize);
6701 break;
6702
6703 case CM_ARGUMENT_SAMPLER:
6704 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
6705 state, kernelParams[currentKernel], argParams[currentKernel], &indexParams[currentKernel],
6706 mediaIds[currentKernel], index, cmdInline[currentKernel]));
6707 break;
6708
6709 case CM_ARGUMENT_SURFACEBUFFER:
6710 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
6711 state, argParams[currentKernel], &indexParams[currentKernel],
6712 bindingTableEntries[currentKernel], -1, index, cmdInline[currentKernel]));
6713 break;
6714
6715 case CM_ARGUMENT_SURFACE2D_UP:
6716 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
6717 state, argParams[currentKernel], &indexParams[currentKernel],
6718 bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6719 break;
6720
6721 case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
6722 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
6723 state, argParams[currentKernel], &indexParams[currentKernel],
6724 bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6725 break;
6726
6727 case CM_ARGUMENT_SURFACE2D_SAMPLER:
6728 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
6729 state, argParams[currentKernel], &indexParams[currentKernel],
6730 bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6731 break;
6732
6733 case CM_ARGUMENT_SURFACE2D:
6734 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
6735 state, argParams[currentKernel], &indexParams[currentKernel],
6736 bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6737 break;
6738
6739 case CM_ARGUMENT_SURFACE3D:
6740 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
6741 state, argParams[currentKernel], &indexParams[currentKernel],
6742 bindingTableEntries[currentKernel], index, cmdInline[currentKernel]));
6743 break;
6744
6745 case CM_ARGUMENT_SURFACE_VME:
6746 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
6747 state, argParams[currentKernel], &indexParams[currentKernel],
6748 bindingTableEntries[currentKernel], 0, cmdInline[currentKernel]));
6749 break;
6750
6751 case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
6752 case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
6753 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
6754 state, argParams[currentKernel], &indexParams[currentKernel],
6755 bindingTableEntries[currentKernel], 0, cmdInline[currentKernel]));
6756 break;
6757
6758 default:
6759 eStatus = MOS_STATUS_INVALID_PARAMETER;
6760 CM_ASSERTMESSAGE(
6761 "Argument kind '%d' is not supported", argParams[currentKernel]->kind);
6762 goto finish;
6763 }
6764 }
6765
6766 mediaObjectParams[currentKernel].pInlineData = cmdInline[currentKernel];
6767 state->renderHal->pMhwRenderInterface->AddMediaObject(nullptr, batchBuffer, &mediaObjectParams[currentKernel]);
6768
6769 state->hintIndexes.kernelIndexes[currentKernel]++;
6770 enableThreadSpace = false;
6771 kernelFound = false;
6772 updateCurrKernel = false;
6773 numDispatched++;
6774 } // for totalNumThreads
6775 } // else refCount <= 1
6776
6777 // setup global surfaces
6778 for( j = 0; j < numKernels; ++j )
6779 {
6780 for( i = 0; i < CM_MAX_GLOBAL_SURFACE_NUMBER; ++i )
6781 {
6782 if(( kernelParams[j]->globalSurface[i] & CM_SURFACE_MASK) != CM_NULL_SURFACE)
6783 {
6784 CM_HAL_KERNEL_ARG_PARAM tmpArgParam;
6785 argParam = &tmpArgParam;
6786
6787 tmpArgParam.kind = CM_ARGUMENT_SURFACEBUFFER;
6788 tmpArgParam.payloadOffset = 0;
6789 tmpArgParam.unitCount = 1;
6790 tmpArgParam.unitSize = sizeof(uint32_t);
6791 tmpArgParam.perThread = false;
6792 tmpArgParam.firstValue = (uint8_t*)&kernelParams[j]->globalSurface[i];
6793
6794 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
6795 state, argParam, &indexParams[j], bindingTableEntries[j],
6796 (int16_t)i, 0, nullptr));
6797 }
6798 }
6799
6800 // set number of samplers
6801 krnAllocations[j]->Params.Sampler_Count = indexParams[j].samplerIndexCount;
6802 }
6803
6804 // check to make sure we did all threads for all kernels
6805 if (numTasks <= 1 || lastTask )
6806 {
6807 for( i = 0; i < numKernels; ++i )
6808 {
6809 if( state->hintIndexes.kernelIndexes[i] < kernelParams[i]->numThreads )
6810 {
6811 eStatus = MOS_STATUS_INVALID_PARAMETER;
6812 CM_ASSERTMESSAGE("Not all threads for all kernels were put into batch buffer");
6813 goto finish;
6814 }
6815 }
6816 }
6817
6818 if ( lastTask )
6819 {
6820 MOS_ZeroMemory(&state->hintIndexes.kernelIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
6821 MOS_ZeroMemory(&state->hintIndexes.dispatchIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
6822 }
6823
6824 finish:
6825 // free memory
6826 if( mediaObjectParams ) MOS_FreeMemory(mediaObjectParams);
6827 if( kernelParams ) MOS_FreeMemory(kernelParams);
6828 if( argParams ) MOS_FreeMemory(argParams);
6829 if( cmdSizes ) MOS_FreeMemory(cmdSizes);
6830 if( remapKrnToGrp ) MOS_FreeMemory(remapKrnToGrp);
6831 if( remapGrpToKrn ) MOS_FreeMemory(remapGrpToKrn);
6832 if( kernelScoreboardMask ) MOS_FreeMemory(kernelScoreboardMask);
6833 if( parallelGraphInfo ) MOS_FreeMemory(parallelGraphInfo);
6834 if( numKernelsPerGrp ) MOS_FreeMemory(numKernelsPerGrp);
6835 if( groupInfo ) MOS_FreeMemory(groupInfo);
6836
6837 if( cmdInline )
6838 {
6839 for( i = 0; i < numKernels; ++i )
6840 {
6841 if( cmdInline[i] )
6842 MOS_FreeMemory(cmdInline[i]);
6843 }
6844 MOS_FreeMemory(cmdInline);
6845 }
6846
6847 if( kernelsSliceInfo )
6848 {
6849 for( i = 0; i < numKernelGroups; ++i )
6850 {
6851 if( kernelsSliceInfo[i].destination )
6852 MOS_FreeMemory(kernelsSliceInfo[i].destination);
6853 }
6854 MOS_FreeMemory(kernelsSliceInfo);
6855 }
6856
6857 if( dependRemap )
6858 {
6859 for( i = 0; i < numKernels; ++i )
6860 {
6861 if( dependRemap[i] )
6862 MOS_FreeMemory(dependRemap[i]);
6863 }
6864 MOS_FreeMemory(dependRemap);
6865 }
6866
6867 if( dispatchFreq )
6868 {
6869 for( i = 0; i < numKernels; ++i )
6870 {
6871 if( dispatchFreq[i] )
6872 MOS_FreeMemory(dispatchFreq[i]);
6873 }
6874 MOS_FreeMemory(dispatchFreq);
6875 }
6876
6877 return eStatus;
6878 }
6879
HalCm_ThreadsNumberPerGroup_MW(PCM_HAL_WALKER_PARAMS walkerParams)6880 uint32_t HalCm_ThreadsNumberPerGroup_MW(PCM_HAL_WALKER_PARAMS walkerParams)
6881 {
6882 int localInnerCount = 0, localMidCount = 0, localOuterCount = 0, globalInnerCount = 0, globalOuterCount = 0;
6883 int localInnerCountMax = 0, localMidCountMax = 0, localOuterCountMax = 0, globalInnerCountMax = 0;
6884 int midX = 0, midY = 0, midStep = 0;
6885 int outerX = 0, outerY = 0;
6886 int localInnerX = 0, localInnerY = 0;
6887 int blockSizeX = 0, blockSizeY = 0;
6888 //int x, y;
6889
6890 int localLoopExecCount = walkerParams->localLoopExecCount;
6891 int globalLoopExecCount = walkerParams->globalLoopExecCount;
6892 int globalresX = walkerParams->globalResolution.x, globalresY = walkerParams->globalResolution.y;
6893 int globalOuterX = walkerParams->globalStart.x, globalOuterY = walkerParams->globalStart.y;
6894 int globalOuterStepX = walkerParams->globalOutlerLoopStride.x, globalOuterStepY = walkerParams->globalOutlerLoopStride.y;
6895 int globalInnerStepX = walkerParams->globalInnerLoopUnit.x, globalInnerStepY = walkerParams->globalInnerLoopUnit.y;
6896 int middleStepX = walkerParams->midLoopUnitX, middleStepY = walkerParams->midLoopUnitY, extraSteps = walkerParams->middleLoopExtraSteps;
6897 int localblockresX = walkerParams->blockResolution.x, localblockresY = walkerParams->blockResolution.y;
6898 int localStartX = walkerParams->localStart.x, localStartY = walkerParams->localStart.y;
6899 int localOuterStepX = walkerParams->localOutLoopStride.x, localOuterStepY = walkerParams->localOutLoopStride.y;
6900 int localInnerStepX = walkerParams->localInnerLoopUnit.x, localInnerStepY = walkerParams->localInnerLoopUnit.y;
6901
6902 uint32_t threadsNumberPergroup = 0;
6903
6904 //do global_outer_looper initialization
6905 while (((globalOuterX >= globalresX) && (globalInnerStepX < 0)) ||
6906 (((globalOuterX + localblockresX) < 0) && (globalInnerStepX > 0)) ||
6907 ((globalOuterY >= globalresY) && (globalInnerStepY < 0)) ||
6908 (((globalOuterX + localblockresY) < 0) && (globalInnerStepY > 0)))
6909 {
6910 globalOuterX += globalInnerStepX;
6911 globalOuterY += globalInnerStepY;
6912 }
6913
6914 //global_ouer_loop_in_bounds()
6915 while ((globalOuterX < globalresX) &&
6916 (globalOuterY < globalresY) &&
6917 (globalOuterX + localblockresX > 0) &&
6918 (globalOuterY + localblockresY > 0) &&
6919 (globalOuterCount <= globalLoopExecCount))
6920 {
6921 int globalInnerX = globalOuterX;
6922 int globalInnerY = globalOuterY;
6923
6924 if (globalInnerCountMax < globalInnerCount)
6925 {
6926 globalInnerCountMax = globalInnerCount;
6927 }
6928 globalInnerCount = 0;
6929
6930 //global_inner_loop_in_bounds()
6931 while ((globalInnerX < globalresX) &&
6932 (globalInnerY < globalresY) &&
6933 (globalInnerX + localblockresX > 0) &&
6934 (globalInnerY + localblockresY > 0))
6935 {
6936 int globalInnerXCopy = globalInnerX;
6937 int globalInnerYCopy = globalInnerY;
6938 if (globalInnerX < 0)
6939 globalInnerXCopy = 0;
6940 if (globalInnerY < 0)
6941 globalInnerYCopy = 0;
6942
6943 if (globalInnerX < 0)
6944 blockSizeX = localblockresX + globalInnerX;
6945 else if ((globalresX - globalInnerX) < localblockresX)
6946 blockSizeX = globalresX - globalInnerX;
6947 else
6948 blockSizeX = localblockresX;
6949 if (globalInnerY < 0)
6950 blockSizeY = localblockresY + globalInnerY;
6951 else if ((globalresY - globalInnerY) < localblockresY)
6952 blockSizeY = globalresY - globalInnerY;
6953 else
6954 blockSizeY = localblockresY;
6955
6956 outerX = localStartX;
6957 outerY = localStartY;
6958
6959 if (localOuterCountMax < localOuterCount)
6960 {
6961 localOuterCountMax = localOuterCount;
6962 }
6963 localOuterCount = 0;
6964
6965 while ((outerX >= blockSizeX && localInnerStepX < 0) ||
6966 (outerX < 0 && localInnerStepX > 0) ||
6967 (outerY >= blockSizeY && localInnerStepY < 0) ||
6968 (outerY < 0 && localInnerStepY > 0))
6969 {
6970 outerX += localInnerStepX;
6971 outerY += localInnerStepY;
6972 }
6973
6974 //local_outer_loop_in_bounds()
6975 while ((outerX < blockSizeX) &&
6976 (outerY < blockSizeY) &&
6977 (outerX >= 0) &&
6978 (outerY >= 0) &&
6979 (localOuterCount <= localLoopExecCount))
6980 {
6981 midX = outerX;
6982 midY = outerY;
6983 midStep = 0;
6984
6985 if (localMidCountMax < localMidCount)
6986 {
6987 localMidCountMax = localMidCount;
6988 }
6989 localMidCount = 0;
6990
6991 //local_middle_steps_remaining()
6992 while ((midStep <= extraSteps) &&
6993 (midX < blockSizeX) &&
6994 (midY < blockSizeY) &&
6995 (midX >= 0) &&
6996 (midY >= 0))
6997 {
6998 localInnerX = midX;
6999 localInnerY = midY;
7000
7001 if (localInnerCountMax < localInnerCount)
7002 {
7003 localInnerCountMax = localInnerCount;
7004 }
7005 localInnerCount = 0;
7006
7007 //local_inner_loop_shrinking()
7008 while ((localInnerX < blockSizeX) &&
7009 (localInnerY < blockSizeY) &&
7010 (localInnerX >= 0) &&
7011 (localInnerY >= 0))
7012 {
7013 //x = localInnerX + globalInnerXCopy;
7014 //y = localInnerY + globalInnerYCopy;
7015 localInnerCount ++;
7016
7017 localInnerX += localInnerStepX;
7018 localInnerY += localInnerStepY;
7019 }
7020 localMidCount++;
7021 midStep++;
7022 midX += middleStepX;
7023 midY += middleStepY;
7024 }
7025 localOuterCount += 1;
7026 outerX += localOuterStepX;
7027 outerY += localOuterStepY;
7028 while ((outerX >= blockSizeX && localInnerStepX < 0) ||
7029 (outerX <0 && localInnerStepX > 0) ||
7030 (outerY >= blockSizeY && localInnerStepY < 0) ||
7031 (outerY <0 && localInnerStepY > 0))
7032 {
7033 outerX += localInnerStepX;
7034 outerY += localInnerStepY;
7035 }
7036 }
7037 globalInnerCount++;
7038 globalInnerX += globalInnerStepX;
7039 globalInnerY += globalInnerStepY;
7040 }
7041 globalOuterCount += 1;
7042 globalOuterX += globalOuterStepX;
7043 globalOuterY += globalOuterStepY;
7044 while (((globalOuterX >= globalresX) && (globalInnerStepX < 0)) ||
7045 (((globalOuterX + localblockresX) < 0) && (globalInnerStepX > 0)) ||
7046 ((globalOuterY >= globalresY) && (globalInnerStepY < 0)) ||
7047 (((globalOuterX + localblockresY) < 0) && (globalInnerStepY > 0)))
7048 {
7049 globalOuterX += globalInnerStepX;
7050 globalOuterY += globalInnerStepY;
7051 }
7052 }
7053
7054 switch (walkerParams->groupIdLoopSelect)
7055 {
7056 case CM_MW_GROUP_COLORLOOP:
7057 threadsNumberPergroup = walkerParams->colorCountMinusOne + 1;
7058 break;
7059 case CM_MW_GROUP_INNERLOCAL:
7060 threadsNumberPergroup = localInnerCount * (walkerParams->colorCountMinusOne + 1);
7061 break;
7062 case CM_MW_GROUP_MIDLOCAL:
7063 threadsNumberPergroup = localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
7064 break;
7065 case CM_MW_GROUP_OUTERLOCAL:
7066 threadsNumberPergroup = localOuterCount * localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
7067 break;
7068 case CM_MW_GROUP_INNERGLOBAL:
7069 threadsNumberPergroup = globalInnerCount * localOuterCount * localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
7070 break;
7071 default:
7072 threadsNumberPergroup = globalOuterCount * globalInnerCount * localOuterCount * localMidCount * localInnerCount * (walkerParams->colorCountMinusOne + 1);
7073 break;
7074 }
7075
7076 return threadsNumberPergroup;
7077 }
7078
HalCm_SetupMediaWalkerParams(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam)7079 MOS_STATUS HalCm_SetupMediaWalkerParams(
7080 PCM_HAL_STATE state,
7081 PCM_HAL_KERNEL_PARAM kernelParam)
7082 {
7083 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7084 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
7085 PCM_HAL_WALKER_PARAMS walkerParams = &kernelParam->walkerParams;
7086
7087 //Using global walker enable flag
7088 walkerParams->cmWalkerEnable = state->walkerParams.CmWalkerEnable;
7089 if (walkerParams->cmWalkerEnable)
7090 {
7091 // MEDIA_WALKER
7092 CM_HAL_KERNEL_THREADSPACE_PARAM kernelThreadSpace;
7093 if (kernelParam->kernelThreadSpaceParam.threadSpaceWidth)
7094 {
7095 kernelThreadSpace.threadSpaceWidth = kernelParam->kernelThreadSpaceParam.threadSpaceWidth;
7096 kernelThreadSpace.threadSpaceHeight = kernelParam->kernelThreadSpaceParam.threadSpaceHeight;
7097 kernelThreadSpace.patternType = kernelParam->kernelThreadSpaceParam.patternType;
7098 kernelThreadSpace.walkingPattern = kernelParam->kernelThreadSpaceParam.walkingPattern;
7099 kernelThreadSpace.groupSelect = kernelParam->kernelThreadSpaceParam.groupSelect;
7100 kernelThreadSpace.colorCountMinusOne = kernelParam->kernelThreadSpaceParam.colorCountMinusOne;
7101 }
7102 else
7103 {
7104 kernelThreadSpace.threadSpaceWidth = (uint16_t)taskParam->threadSpaceWidth;
7105 kernelThreadSpace.threadSpaceHeight = (uint16_t)taskParam->threadSpaceHeight;
7106 kernelThreadSpace.patternType = taskParam->dependencyPattern;
7107 kernelThreadSpace.walkingPattern = taskParam->walkingPattern;
7108 kernelThreadSpace.groupSelect = taskParam->mediaWalkerGroupSelect;
7109 kernelThreadSpace.colorCountMinusOne = taskParam->colorCountMinusOne;
7110 }
7111
7112 // check for valid thread space width and height here since different from media object
7113 if (kernelThreadSpace.threadSpaceWidth > state->cmHalInterface->GetMediaWalkerMaxThreadWidth())
7114 {
7115 CM_ASSERTMESSAGE("Error: Exceeds the maximum thread space width.");
7116 eStatus = MOS_STATUS_INVALID_PARAMETER;
7117 goto finish;
7118 }
7119 if (kernelThreadSpace.threadSpaceHeight > state->cmHalInterface->GetMediaWalkerMaxThreadHeight())
7120 {
7121 CM_ASSERTMESSAGE("Error: Exceeds the maximum thread space height.");
7122 eStatus = MOS_STATUS_INVALID_PARAMETER;
7123 goto finish;
7124 }
7125
7126 //walkerParams->InterfaceDescriptorOffset = mediaID;// mediaObjectParam.dwInterfaceDescriptorOffset;
7127 walkerParams->inlineDataLength = MOS_ALIGN_CEIL(kernelParam->indirectDataParam.indirectDataSize, 4);
7128 walkerParams->inlineData = kernelParam->indirectDataParam.indirectData;
7129
7130 walkerParams->colorCountMinusOne = kernelThreadSpace.colorCountMinusOne;// taskParam->ColorCountMinusOne;
7131 walkerParams->groupIdLoopSelect = (uint32_t)kernelThreadSpace.groupSelect;
7132
7133 CM_WALKING_PATTERN walkPattern = kernelThreadSpace.walkingPattern;
7134 switch (kernelThreadSpace.patternType)
7135 {
7136 case CM_NONE_DEPENDENCY:
7137 break;
7138 case CM_HORIZONTAL_WAVE:
7139 walkPattern = CM_WALK_HORIZONTAL;
7140 break;
7141 case CM_VERTICAL_WAVE:
7142 walkPattern = CM_WALK_VERTICAL;
7143 break;
7144 case CM_WAVEFRONT:
7145 walkPattern = CM_WALK_WAVEFRONT;
7146 break;
7147 case CM_WAVEFRONT26:
7148 walkPattern = CM_WALK_WAVEFRONT26;
7149 break;
7150 case CM_WAVEFRONT26X:
7151 if (kernelThreadSpace.threadSpaceWidth > 1)
7152 {
7153 walkPattern = CM_WALK_WAVEFRONT26X;
7154 }
7155 else
7156 {
7157 walkPattern = CM_WALK_DEFAULT;
7158 }
7159 break;
7160 case CM_WAVEFRONT26ZIG:
7161 if (kernelThreadSpace.threadSpaceWidth > 2)
7162 {
7163 walkPattern = CM_WALK_WAVEFRONT26ZIG;
7164 }
7165 else
7166 {
7167 walkPattern = CM_WALK_DEFAULT;
7168 }
7169 break;
7170 default:
7171 CM_ASSERTMESSAGE("Error: Invalid walking pattern.");
7172 walkPattern = CM_WALK_DEFAULT;
7173 break;
7174 }
7175 if (taskParam->walkingParamsValid)
7176 {
7177 CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SetMediaWalkerParams
7178 (taskParam->walkingParams, walkerParams));
7179
7180 if (walkPattern == CM_WALK_HORIZONTAL || walkPattern == CM_WALK_DEFAULT)
7181 {
7182 walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
7183 }
7184 else if (walkPattern == CM_WALK_VERTICAL)
7185 {
7186 walkerParams->localEnd.y = walkerParams->blockResolution.y - 1;
7187 }
7188 }
7189 else if (kernelParam->kernelThreadSpaceParam.walkingParamsValid)
7190 {
7191 CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SetMediaWalkerParams(
7192 kernelParam->kernelThreadSpaceParam.walkingParams, walkerParams));
7193
7194 if (walkPattern == CM_WALK_HORIZONTAL || walkPattern == CM_WALK_DEFAULT)
7195 {
7196 walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
7197 }
7198 else if (walkPattern == CM_WALK_VERTICAL)
7199 {
7200 walkerParams->localEnd.y = walkerParams->blockResolution.y - 1;
7201 }
7202
7203 }
7204 else
7205 {
7206 //Local loop parameters
7207 walkerParams->blockResolution.x = kernelThreadSpace.threadSpaceWidth;
7208 walkerParams->blockResolution.y = kernelThreadSpace.threadSpaceHeight;
7209
7210 walkerParams->localStart.x = 0;
7211 walkerParams->localStart.y = 0;
7212 walkerParams->localEnd.x = 0;
7213 walkerParams->localEnd.y = 0;
7214
7215 walkerParams->globalLoopExecCount = 1;
7216 walkerParams->midLoopUnitX = 0;
7217 walkerParams->midLoopUnitY = 0;
7218 walkerParams->middleLoopExtraSteps = 0;
7219
7220 // account for odd Height/Width for 26x and 26Zig
7221 uint16_t adjHeight = ((kernelThreadSpace.threadSpaceHeight + 1) >> 1) << 1;
7222 uint16_t adjWidth = ((kernelThreadSpace.threadSpaceWidth + 1) >> 1) << 1;
7223
7224 uint32_t maxThreadWidth = state->cmHalInterface->GetMediaWalkerMaxThreadWidth();
7225 switch (walkPattern)
7226 {
7227 case CM_WALK_DEFAULT:
7228 case CM_WALK_HORIZONTAL:
7229 if (kernelThreadSpace.threadSpaceWidth == kernelParam->numThreads &&
7230 kernelThreadSpace.threadSpaceHeight == 1)
7231 {
7232 walkerParams->blockResolution.x = MOS_MIN(kernelParam->numThreads, maxThreadWidth);
7233 walkerParams->blockResolution.y = 1 + kernelParam->numThreads / maxThreadWidth;
7234 }
7235 walkerParams->localLoopExecCount = walkerParams->blockResolution.y - 1;
7236
7237 walkerParams->localOutLoopStride.x = 0;
7238 walkerParams->localOutLoopStride.y = 1;
7239 walkerParams->localInnerLoopUnit.x = 1;
7240 walkerParams->localInnerLoopUnit.y = 0;
7241
7242 walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
7243
7244 break;
7245
7246 case CM_WALK_WAVEFRONT:
7247 walkerParams->localLoopExecCount = kernelThreadSpace.threadSpaceWidth + (kernelThreadSpace.threadSpaceHeight - 1) * 1 - 1;
7248
7249 walkerParams->localOutLoopStride.x = 1;
7250 walkerParams->localOutLoopStride.y = 0;
7251 walkerParams->localInnerLoopUnit.x = 0xFFFF; // -1 in uint32_t:16
7252 walkerParams->localInnerLoopUnit.y = 1;
7253 break;
7254
7255 case CM_WALK_WAVEFRONT26:
7256 walkerParams->globalResolution.x = kernelThreadSpace.threadSpaceWidth;
7257 walkerParams->globalResolution.y = kernelThreadSpace.threadSpaceHeight;
7258 walkerParams->localOutLoopStride.x = 1;
7259 walkerParams->localOutLoopStride.y = 0;
7260 walkerParams->localInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
7261 walkerParams->localInnerLoopUnit.y = 1;
7262 walkerParams->localLoopExecCount = kernelThreadSpace.threadSpaceWidth +
7263 (kernelThreadSpace.threadSpaceHeight - 1) * 2 - 1;
7264
7265 //localLoopExecCount has limitation, it should be less than 2^12
7266 while (walkerParams->localLoopExecCount >= 0xFFF)
7267 {
7268 //separate to multiple global levels
7269 if (walkerParams->blockResolution.x > (walkerParams->blockResolution.y * 2))
7270 {
7271 walkerParams->blockResolution.x = (walkerParams->blockResolution.x+1) >> 1;
7272 walkerParams->globalLoopExecCount = (walkerParams->globalResolution.x +
7273 walkerParams->blockResolution.x - 1) / walkerParams->blockResolution.x;
7274 }
7275 else
7276 {
7277 walkerParams->blockResolution.y = (walkerParams->blockResolution.y + 1) >> 1;
7278 }
7279 walkerParams->localLoopExecCount = walkerParams->blockResolution.x +
7280 (walkerParams->blockResolution.y - 1) * 2 - 1;
7281 }
7282 walkerParams->globalOutlerLoopStride.x = walkerParams->blockResolution.x;
7283 walkerParams->globalOutlerLoopStride.y = 0;
7284 walkerParams->globalInnerLoopUnit.x = 0;
7285 walkerParams->globalInnerLoopUnit.y = walkerParams->blockResolution.y;
7286 break;
7287
7288 case CM_WALK_WAVEFRONT26X:
7289 case CM_WALK_WAVEFRONT26XALT:
7290 walkerParams->localLoopExecCount = 0x7ff;
7291 walkerParams->globalLoopExecCount = 0;
7292
7293 walkerParams->localOutLoopStride.x = 1;
7294 walkerParams->localOutLoopStride.y = 0;
7295 walkerParams->localInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
7296 walkerParams->localInnerLoopUnit.y = 2;
7297
7298 walkerParams->middleLoopExtraSteps = 1;
7299 walkerParams->midLoopUnitX = 0;
7300 walkerParams->midLoopUnitY = 1;
7301 break;
7302
7303 case CM_WALK_WAVEFRONT26ZIG:
7304 walkerParams->localLoopExecCount = 1;
7305 walkerParams->globalLoopExecCount = (adjHeight / 2 - 1) * 2 + (adjWidth / 2) - 1;
7306
7307 walkerParams->localOutLoopStride.x = 0;
7308 walkerParams->localOutLoopStride.y = 1;
7309 walkerParams->localInnerLoopUnit.x = 1;
7310 walkerParams->localInnerLoopUnit.y = 0;
7311
7312 walkerParams->blockResolution.x = 2;
7313 walkerParams->blockResolution.y = 2;
7314
7315 walkerParams->localEnd.x = walkerParams->blockResolution.x - 1;
7316 break;
7317
7318 case CM_WALK_VERTICAL:
7319 walkerParams->localLoopExecCount = walkerParams->blockResolution.x - 1;
7320
7321 walkerParams->localOutLoopStride.x = 1;
7322 walkerParams->localOutLoopStride.y = 0;
7323 walkerParams->localInnerLoopUnit.x = 0;
7324 walkerParams->localInnerLoopUnit.y = 1;
7325
7326 walkerParams->localEnd.y = walkerParams->blockResolution.y - 1;
7327
7328 break;
7329
7330 case CM_WALK_WAVEFRONT45D:
7331 walkerParams->localLoopExecCount = 0x7ff;
7332 walkerParams->globalLoopExecCount = 0x7ff;
7333
7334 walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
7335 walkerParams->localOutLoopStride.x = 1;
7336 walkerParams->localOutLoopStride.y = 0;
7337 walkerParams->localInnerLoopUnit.x = 0xFFFF; // -1 in uint32_t:16
7338 walkerParams->localInnerLoopUnit.y = 1;
7339 break;
7340
7341 case CM_WALK_WAVEFRONT45XD_2:
7342 walkerParams->localLoopExecCount = 0x7ff;
7343 walkerParams->globalLoopExecCount = 0x7ff;
7344
7345 // Local
7346 walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
7347 walkerParams->localOutLoopStride.x = 1;
7348 walkerParams->localOutLoopStride.y = 0;
7349 walkerParams->localInnerLoopUnit.x = 0xFFFF; // -1 in uint32_t:16
7350 walkerParams->localInnerLoopUnit.y = 2;
7351
7352 // Mid
7353 walkerParams->middleLoopExtraSteps = 1;
7354 walkerParams->midLoopUnitX = 0;
7355 walkerParams->midLoopUnitY = 1;
7356
7357 break;
7358
7359 case CM_WALK_WAVEFRONT26D:
7360 walkerParams->localLoopExecCount = 0x7ff;
7361 walkerParams->globalLoopExecCount = 0x7ff;
7362
7363 walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
7364 walkerParams->localOutLoopStride.x = 1;
7365 walkerParams->localOutLoopStride.y = 0;
7366 walkerParams->localInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
7367 walkerParams->localInnerLoopUnit.y = 1;
7368 break;
7369
7370 case CM_WALK_WAVEFRONT26XD:
7371 walkerParams->localLoopExecCount = 0x7ff;
7372 walkerParams->globalLoopExecCount = 0x7ff;
7373
7374 // Local
7375 walkerParams->localStart.x = kernelThreadSpace.threadSpaceWidth;
7376 walkerParams->localOutLoopStride.x = 1;
7377 walkerParams->localOutLoopStride.y = 0;
7378 walkerParams->localInnerLoopUnit.x = 0xFFFE; // -2 in uint32_t:16
7379 walkerParams->localInnerLoopUnit.y = 2;
7380
7381 // Mid
7382 walkerParams->middleLoopExtraSteps = 1;
7383 walkerParams->midLoopUnitX = 0;
7384 walkerParams->midLoopUnitY = 1;
7385 break;
7386
7387 default:
7388 walkerParams->localLoopExecCount = MOS_MIN(kernelParam->numThreads, 0x3FF);
7389
7390 walkerParams->localOutLoopStride.x = 0;
7391 walkerParams->localOutLoopStride.y = 1;
7392 walkerParams->localInnerLoopUnit.x = 1;
7393 walkerParams->localInnerLoopUnit.y = 0;
7394 break;
7395 }
7396
7397 //Global loop parameters: execution count, resolution and strides
7398 //Since no global loop, global resolution equals block resolution.
7399 walkerParams->globalStart.x = 0;
7400 walkerParams->globalStart.y = 0;
7401 walkerParams->globalOutlerLoopStride.y = 0;
7402
7403 if (walkPattern == CM_WALK_WAVEFRONT26ZIG)
7404 {
7405 walkerParams->globalResolution.x = kernelThreadSpace.threadSpaceWidth;
7406 walkerParams->globalResolution.y = kernelThreadSpace.threadSpaceHeight;
7407 walkerParams->globalOutlerLoopStride.x = 2;
7408 walkerParams->globalInnerLoopUnit.x = 0xFFFC;
7409 walkerParams->globalInnerLoopUnit.y = 2;
7410 }
7411 else if(walkPattern != CM_WALK_WAVEFRONT26)
7412 {
7413 walkerParams->globalResolution.x = walkerParams->blockResolution.x;
7414 walkerParams->globalResolution.y = walkerParams->blockResolution.y;
7415 walkerParams->globalOutlerLoopStride.x = walkerParams->globalResolution.x;
7416 walkerParams->globalInnerLoopUnit.x = 0;
7417 walkerParams->globalInnerLoopUnit.y = walkerParams->globalResolution.y;
7418 }
7419 }
7420
7421 //Need calculate number threads per group for media walker, the minimum value is 1
7422 if (kernelThreadSpace.groupSelect > CM_MW_GROUP_NONE)
7423 {
7424 kernelParam->numberThreadsInGroup = HalCm_ThreadsNumberPerGroup_MW(walkerParams);
7425 }
7426 else
7427 {
7428 kernelParam->numberThreadsInGroup = 1;
7429 }
7430 }
7431
7432 finish:
7433 return eStatus;
7434 }
7435
HalCm_AcquireSamplerStatistics(PCM_HAL_STATE state)7436 MOS_STATUS HalCm_AcquireSamplerStatistics(PCM_HAL_STATE state)
7437 {
7438 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7439 uint32_t i = 0;
7440
7441 unsigned int maxBTIindex[MAX_ELEMENT_TYPE_COUNT] = {0}; //tempoary variable, it will hold the max BTI index in each element type
7442
7443 /* enumerate through the samplerTable for the one in use, then count and analyze */
7444 for (i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++) { //state->CmDeviceParam.iMaxSamplerTableSize;
7445
7446 if (state->samplerTable[i].bInUse) {
7447 uint32_t samplerIndex = state->samplerIndexTable[i];
7448 if (samplerIndex != CM_INVALID_INDEX) {
7449 MHW_SAMPLER_ELEMENT_TYPE elementType = state->samplerTable[i].ElementType;
7450 maxBTIindex[elementType] = (maxBTIindex[elementType] > samplerIndex) ? maxBTIindex[elementType] : samplerIndex;
7451 }
7452 else
7453 state->samplerStatistics.samplerCount[state->samplerTable[i].ElementType]++;
7454 }
7455
7456 }
7457
7458 int tempbase=0;
7459 state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements]
7460 = (state->samplerStatistics.samplerCount[MHW_Sampler2Elements]) ? 0 : -1;
7461 tempbase
7462 = state->samplerStatistics.samplerIndexBase[MHW_Sampler2Elements];
7463 state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements]
7464 = (state->samplerStatistics.samplerCount[MHW_Sampler4Elements]) ?
7465 ((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler2Elements], 2, 4))
7466 : tempbase;
7467 tempbase
7468 = state->samplerStatistics.samplerIndexBase[MHW_Sampler4Elements];
7469 state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements]
7470 = (state->samplerStatistics.samplerCount[MHW_Sampler8Elements]) ?
7471 ((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler4Elements], 4, 8))
7472 : tempbase;
7473 tempbase
7474 = state->samplerStatistics.samplerIndexBase[MHW_Sampler8Elements];
7475 state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements]
7476 = (state->samplerStatistics.samplerCount[MHW_Sampler64Elements]) ?
7477 ((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler8Elements], 8, 64))
7478 : tempbase;
7479 tempbase
7480 = state->samplerStatistics.samplerIndexBase[MHW_Sampler64Elements];
7481 state->samplerStatistics.samplerIndexBase[MHW_Sampler128Elements]
7482 = (state->samplerStatistics.samplerCount[MHW_Sampler128Elements]) ?
7483 ((tempbase == -1) ? 0 : INDEX_ALIGN(state->samplerStatistics.samplerCount[MHW_Sampler64Elements], 64, 128))
7484 : tempbase;
7485
7486 /* There are Sampler BTI, next step needs to consider it during calculate the base */
7487 for (int k = MHW_Sampler2Elements; k < MHW_Sampler128Elements; k++) {
7488 if (state->samplerStatistics.samplerIndexBase[k + 1] < maxBTIindex[k])
7489 state->samplerStatistics.samplerIndexBase[k + 1] = maxBTIindex[k];
7490 }
7491 return eStatus;
7492 }
7493
7494 //*-----------------------------------------------------------------------------
7495 //| Purpose: Initial setup of HW states for the kernel
7496 //| Returns: Result of the operation
7497 //*-----------------------------------------------------------------------------
HalCm_SetupStatesForKernelInitial(PCM_HAL_STATE state,PRENDERHAL_MEDIA_STATE mediaState,PMHW_BATCH_BUFFER batchBuffer,int32_t taskId,PCM_HAL_KERNEL_PARAM kernelParam,PCM_HAL_INDEX_PARAM indexParam,uint32_t kernelCurbeOffset,int32_t & bindingTable,int32_t & mediaID,PRENDERHAL_KRN_ALLOCATION & krnAllocation)7498 MOS_STATUS HalCm_SetupStatesForKernelInitial(
7499 PCM_HAL_STATE state,
7500 PRENDERHAL_MEDIA_STATE mediaState,
7501 PMHW_BATCH_BUFFER batchBuffer,
7502 int32_t taskId,
7503 PCM_HAL_KERNEL_PARAM kernelParam,
7504 PCM_HAL_INDEX_PARAM indexParam,
7505 uint32_t kernelCurbeOffset,
7506 int32_t& bindingTable,
7507 int32_t& mediaID,
7508 PRENDERHAL_KRN_ALLOCATION &krnAllocation)
7509 {
7510 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
7511 PRENDERHAL_INTERFACE renderHal = state->renderHal;
7512 PRENDERHAL_STATE_HEAP stateHeap = renderHal->pStateHeap;
7513 PCM_INDIRECT_SURFACE_INFO indirectSurfaceInfo = kernelParam->indirectDataParam.surfaceInfo;
7514 PCM_GPGPU_WALKER_PARAMS perKernelGpGpuWalkerParames = &kernelParam->gpgpuWalkerParams;
7515 UNUSED(batchBuffer);
7516 UNUSED(taskId);
7517
7518 MHW_MEDIA_OBJECT_PARAMS mediaObjectParam;
7519 PCM_HAL_KERNEL_ARG_PARAM argParam;
7520 uint32_t hdrSize;
7521 uint32_t index;
7522 uint32_t value;
7523 uint32_t btIndex;
7524 uint32_t surfIndex;
7525 uint32_t aIndex;
7526 uint32_t idZ;
7527 uint32_t idY;
7528 uint32_t idX;
7529 uint32_t localIdIndex;
7530 CM_SURFACE_BTI_INFO surfBTIInfo;
7531
7532 bool vmeUsed = false;
7533 CM_PLATFORM_INFO platformInfo;
7534 PRENDERHAL_MEDIA_STATE_LEGACY mediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)mediaState;
7535
7536 localIdIndex = kernelParam->localIdIndex;
7537
7538 state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
7539
7540 HalCm_PreSetBindingIndex(indexParam, CM_NULL_SURFACE_BINDING_INDEX, CM_NULL_SURFACE_BINDING_INDEX);
7541
7542 HalCm_PreSetBindingIndex(indexParam, surfBTIInfo.reservedSurfaceStart,
7543 surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER - 1);
7544
7545 if (kernelParam->indirectDataParam.surfaceCount)
7546 {
7547 for (index = 0; index < kernelParam->indirectDataParam.surfaceCount; index++)
7548 {
7549 value = (indirectSurfaceInfo + index)->bindingTableIndex;
7550 HalCm_PreSetBindingIndex(indexParam, value, value);
7551 }
7552 }
7553
7554 // Get the binding table for this kernel
7555 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(renderHal, &bindingTable));
7556
7557 if (state->dshEnabled)
7558 {
7559 // Kernels are already pre-loaded in GSH
7560 // krnAllocation is the head of a linked list
7561 if (!krnAllocation)
7562 {
7563 CM_ASSERTMESSAGE("Error: Invalid kernel allocation.");
7564 goto finish;
7565 }
7566 }
7567 else
7568 {
7569 // Load the Kernel to GSH
7570 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_LoadKernel(
7571 state,
7572 kernelParam,
7573 0,
7574 krnAllocation));
7575 }
7576
7577 // initialize curbe buffer
7578 if (kernelParam->totalCurbeSize > 0)
7579 {
7580 // Update Curbe offset after curbe load command
7581 if (state->dshEnabled)
7582 {
7583 mediaStateLegacy->pDynamicState->Curbe.iCurrent += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7584 }
7585 else
7586 {
7587 mediaStateLegacy->iCurbeOffset += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7588 }
7589 }
7590
7591 //Setup media walker parameters if it is
7592 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupMediaWalkerParams(state, kernelParam));
7593
7594 // Allocate Interface Descriptor
7595 mediaID = HalCm_AllocateMediaID(
7596 state,
7597 kernelParam,
7598 krnAllocation,
7599 bindingTable,
7600 kernelCurbeOffset);
7601
7602 if (mediaID < 0)
7603 {
7604 eStatus = MOS_STATUS_INVALID_PARAMETER;
7605 CM_ASSERTMESSAGE("Unable to get Media ID");
7606 goto finish;
7607 }
7608
7609 // Setup the Media object
7610 hdrSize = renderHal->pHwSizes->dwSizeMediaObjectHeaderCmd;
7611 mediaObjectParam.dwInterfaceDescriptorOffset = mediaID;
7612 if (kernelParam->indirectDataParam.indirectDataSize)
7613 {
7614 mediaObjectParam.dwInlineDataSize = 0;
7615 }
7616 else
7617 {
7618 mediaObjectParam.dwInlineDataSize = MOS_MAX(kernelParam->payloadSize, 4);
7619 }
7620
7621 // set surface state and binding table
7622 if (kernelParam->indirectDataParam.surfaceCount)
7623 {
7624 for (index = 0; index < kernelParam->indirectDataParam.surfaceCount; index++)
7625 {
7626 btIndex = (indirectSurfaceInfo + index)->bindingTableIndex;
7627 surfIndex = (indirectSurfaceInfo + index)->surfaceIndex;
7628 switch ((indirectSurfaceInfo + index)->kind)
7629 {
7630 case CM_ARGUMENT_SURFACEBUFFER:
7631 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceStateWithBTIndex(
7632 state, bindingTable, surfIndex, btIndex, 0));
7633 break;
7634
7635 case CM_ARGUMENT_SURFACE2D:
7636 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateWithBTIndex(
7637 state, bindingTable, surfIndex, btIndex, 0));
7638 break;
7639
7640 case CM_ARGUMENT_SURFACE2D_UP:
7641 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateWithBTIndex(
7642 state, bindingTable, surfIndex, btIndex, 0));
7643 break;
7644
7645 case CM_ARGUMENT_SURFACE2D_SAMPLER:
7646 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceStateWithBTIndex(
7647 state, bindingTable, surfIndex, btIndex, 1));
7648 break;
7649 case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
7650 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPStateWithBTIndex(
7651 state, bindingTable, surfIndex, btIndex, 1));
7652 break;
7653 case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
7654 case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
7655 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceStateWithBTIndex(
7656 state, bindingTable, surfIndex, btIndex, 0, (CM_HAL_KERNEL_ARG_KIND)(indirectSurfaceInfo + index)->kind, 0));
7657 break;
7658 case CM_ARGUMENT_SURFACE3D:
7659 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceStateWithBTIndex(
7660 state, bindingTable, surfIndex, btIndex));
7661 break;
7662 default:
7663 eStatus = MOS_STATUS_INVALID_PARAMETER;
7664 CM_ASSERTMESSAGE("Indirect Data surface kind is not supported");
7665 goto finish;
7666 }
7667 }
7668 }
7669
7670 // set sampler bti
7671 if (kernelParam->samplerBTIParam.samplerCount > 0)
7672 {
7673 for (uint32_t i = 0; i < kernelParam->samplerBTIParam.samplerCount; i++)
7674 {
7675 HalCm_SetupSamplerStateWithBTIndex(state, kernelParam, &kernelParam->samplerBTIParam.samplerInfo[0], i, mediaID);
7676 }
7677 }
7678
7679 if ( ( kernelParam->curbeSizePerThread > 0 ) && ( kernelParam->stateBufferType == CM_STATE_BUFFER_NONE ) )
7680 {
7681 uint8_t data[CM_MAX_THREAD_PAYLOAD_SIZE + 32];
7682 uint8_t curbe[CM_MAX_CURBE_SIZE_PER_TASK + 32];
7683
7684 MOS_ZeroMemory(data, sizeof(data));
7685 MOS_ZeroMemory(curbe, sizeof(curbe));
7686 for (aIndex = 0; aIndex < kernelParam->numArgs; aIndex++)
7687 {
7688 argParam = &kernelParam->argParams[aIndex];
7689
7690 if (argParam->perThread || argParam->isNull)
7691 {
7692 continue;
7693 }
7694
7695 switch (argParam->kind)
7696 {
7697 case CM_ARGUMENT_GENERAL:
7698 case CM_ARGUMENT_IMPLICT_GROUPSIZE:
7699 case CM_ARGUMENT_IMPLICT_LOCALSIZE:
7700 case CM_ARGUMENT_IMPLICIT_LOCALID:
7701 case CM_ARGUMENT_GENERAL_DEPVEC:
7702 HalCm_SetArgData(argParam, 0, data);
7703 break;
7704
7705 case CM_ARGUMENT_SAMPLER:
7706 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSamplerState(
7707 state, kernelParam, argParam, indexParam, mediaID, 0, data));
7708 break;
7709
7710 case CM_ARGUMENT_SURFACEBUFFER:
7711 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupBufferSurfaceState(
7712 state, argParam, indexParam, bindingTable, -1, 0, data));
7713 break;
7714
7715 case CM_ARGUMENT_SURFACE2D_UP:
7716 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPState(
7717 state, argParam, indexParam, bindingTable, 0, data));
7718 break;
7719
7720 case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
7721 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceUPSamplerState(
7722 state, argParam, indexParam, bindingTable, 0, data));
7723 break;
7724
7725 case CM_ARGUMENT_SURFACE2D_SAMPLER:
7726 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceSamplerState(
7727 state, argParam, indexParam, bindingTable, 0, data));
7728 break;
7729
7730 case CM_ARGUMENT_SURFACE2D:
7731 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
7732 state, argParam, indexParam, bindingTable, 0, data));
7733 break;
7734
7735 case CM_ARGUMENT_SURFACE3D:
7736 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup3DSurfaceState(
7737 state, argParam, indexParam, bindingTable, 0, data));
7738 break;
7739
7740 case CM_ARGUMENT_SURFACE_VME: // 3 surface indices
7741 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupVmeSurfaceState(
7742 state, argParam, indexParam, bindingTable, 0, data));
7743 vmeUsed = true;
7744 break;
7745
7746 case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS: // sampler 8x8 surface
7747 case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA: // sampler 8x8 surface
7748 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupSampler8x8SurfaceState(
7749 state, argParam, indexParam, bindingTable, 0, data));
7750 break;
7751
7752 case CM_ARGUMENT_STATE_BUFFER:
7753 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_SetupStateBufferSurfaceState(
7754 state, argParam, indexParam, bindingTable, 0, data ) );
7755 break;
7756
7757 case CM_ARGUMENT_SURFACE:
7758 // Allow null surface
7759 break;
7760 case CM_ARGUMENT_SURFACE2D_SCOREBOARD:
7761 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Setup2DSurfaceState(
7762 state, argParam, indexParam, bindingTable, 0, data));
7763 break;
7764
7765 default:
7766 eStatus = MOS_STATUS_INVALID_PARAMETER;
7767 CM_ASSERTMESSAGE("Argument kind '%d' is not supported", argParam->kind);
7768 goto finish;
7769 }
7770 }
7771
7772 if (perKernelGpGpuWalkerParames->gpgpuEnabled)
7773 {
7774 uint32_t offset = 0;
7775
7776 uint32_t localIdXOffset = kernelParam->argParams[localIdIndex].payloadOffset;
7777 uint32_t localIdYOffset = localIdXOffset + 4;
7778 uint32_t localIdZOffset = localIdXOffset + 8;
7779
7780 //totalCurbeSize aligned when parsing task
7781 int32_t crossThreadSize = kernelParam->crossThreadConstDataLen;
7782
7783 //Cross thread constant data
7784 MOS_SecureMemcpy(curbe + offset, crossThreadSize, data, crossThreadSize);
7785 offset += crossThreadSize;
7786
7787 //Per-thread data
7788 for (idZ = 0; idZ < perKernelGpGpuWalkerParames->threadDepth; idZ++)
7789 {
7790 for (idY = 0; idY < perKernelGpGpuWalkerParames->threadHeight; idY++)
7791 {
7792 for (idX = 0; idX < perKernelGpGpuWalkerParames->threadWidth; idX++)
7793 {
7794 *((uint32_t *)(data + localIdXOffset)) = idX;
7795 *((uint32_t *)(data + localIdYOffset)) = idY;
7796 *((uint32_t *)(data + localIdZOffset)) = idZ;
7797 MOS_SecureMemcpy(curbe + offset, kernelParam->curbeSizePerThread, data + crossThreadSize, kernelParam->curbeSizePerThread);
7798 offset += kernelParam->curbeSizePerThread;
7799 }
7800 }
7801 }
7802
7803 // tell pfnLoadCurbeData the current curbe offset
7804 if (state->dshEnabled)
7805 {
7806 PRENDERHAL_MEDIA_STATE_LEGACY pCurMediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)stateHeap->pCurMediaState;
7807 PRENDERHAL_DYNAMIC_STATE dynamicState = pCurMediaStateLegacy->pDynamicState;
7808 dynamicState->Curbe.iCurrent -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7809 kernelParam->curbeOffset = dynamicState->Curbe.iCurrent;
7810 }
7811 else
7812 {
7813 stateHeap->pCurMediaState->iCurbeOffset -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7814 kernelParam->curbeOffset = stateHeap->pCurMediaState->iCurbeOffset;
7815 }
7816 // update curbe with data.
7817 renderHal->pfnLoadCurbeData(renderHal,
7818 stateHeap->pCurMediaState,
7819 curbe,
7820 kernelParam->totalCurbeSize);
7821 }
7822 else
7823 {
7824 CM_ASSERT(kernelParam->totalCurbeSize == kernelParam->curbeSizePerThread);
7825
7826 // tell pfnLoadCurbeData the current curbe offset
7827 if (state->dshEnabled)
7828 {
7829 PRENDERHAL_MEDIA_STATE_LEGACY pCurMediaStateLegacy = (PRENDERHAL_MEDIA_STATE_LEGACY)stateHeap->pCurMediaState;
7830 PRENDERHAL_DYNAMIC_STATE dynamicState = pCurMediaStateLegacy->pDynamicState;
7831 dynamicState->Curbe.iCurrent -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7832 kernelParam->curbeOffset = dynamicState->Curbe.iCurrent;
7833 }
7834 else
7835 {
7836 stateHeap->pCurMediaState->iCurbeOffset -= MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
7837 kernelParam->curbeOffset = stateHeap->pCurMediaState->iCurbeOffset;
7838 }
7839 // update curbe with data.
7840 renderHal->pfnLoadCurbeData(renderHal,
7841 stateHeap->pCurMediaState,
7842 data,
7843 kernelParam->totalCurbeSize);
7844 }
7845
7846 if (state->cmHalInterface->IsOverridePowerOptionPerGpuContext() == false) // false means override per Batch.
7847 {
7848 if ((vmeUsed == true) && state->cmHalInterface->IsRequestShutdownSubslicesForVmeUsage())
7849 {
7850 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetPlatformInfo(state, &platformInfo, true));
7851 CM_POWER_OPTION cmPower;
7852 cmPower.nSlice = 1;
7853 cmPower.nSubSlice = platformInfo.numSubSlices / 2;
7854 cmPower.nEU = (uint16_t)platformInfo.numEUsPerSubSlice;
7855 state->pfnSetPowerOption(state, &cmPower);
7856 }
7857 }
7858 }
7859
7860 #if MDF_CURBE_DATA_DUMP
7861 if (state->dumpCurbeData)
7862 {
7863 HalCm_DumpCurbeData(state);
7864 }
7865
7866 #endif
7867
7868 #if MDF_INTERFACE_DESCRIPTOR_DATA_DUMP
7869 if (state->dumpIDData)
7870 {
7871 HalCm_DumpInterfaceDescriptorData(state);
7872 }
7873 #endif
7874
7875 finish:
7876 return eStatus;
7877 }
7878
HalCm_SetConditionalEndInfo(PCM_HAL_STATE state,PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS conditionalBBEndParams,uint32_t index)7879 MOS_STATUS HalCm_SetConditionalEndInfo(
7880 PCM_HAL_STATE state,
7881 PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo,
7882 PMHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS conditionalBBEndParams,
7883 uint32_t index
7884 )
7885 {
7886 if (index >= CM_MAX_CONDITIONAL_END_CMDS)
7887 {
7888 return MOS_STATUS_INVALID_PARAMETER;
7889 }
7890
7891 MOS_ZeroMemory(&conditionalBBEndParams[index], sizeof(MHW_MI_CONDITIONAL_BATCH_BUFFER_END_PARAMS));
7892
7893 conditionalBBEndParams[index].presSemaphoreBuffer = &(state->bufferTable[conditionalEndInfo[index].bufferTableIndex].osResource);
7894 conditionalBBEndParams[index].dwValue = conditionalEndInfo[index].compareValue;
7895 conditionalBBEndParams[index].bDisableCompareMask = conditionalEndInfo[index].disableCompareMask;
7896 conditionalBBEndParams[index].dwOffset = conditionalEndInfo[index].offset;
7897
7898 return MOS_STATUS_SUCCESS;
7899 }
7900
7901 //===============<Interface Functions>==========================================
7902
7903 //*-----------------------------------------------------------------------------
7904 //| Purpose: Allocate Structures required for HW Rendering
7905 //| Returns: Result of the operation
7906 //*-----------------------------------------------------------------------------
HalCm_Allocate(PCM_HAL_STATE state)7907 MOS_STATUS HalCm_Allocate(
7908 PCM_HAL_STATE state) // [in] Pointer to CM State
7909 {
7910 MOS_STATUS eStatus;
7911 PCM_HAL_DEVICE_PARAM deviceParam;
7912 PRENDERHAL_INTERFACE renderHal;
7913 PRENDERHAL_STATE_HEAP_SETTINGS stateHeapSettings;
7914 uint32_t i;
7915 MOS_NULL_RENDERING_FLAGS nullHWAccelerationEnable;
7916 RENDERHAL_SETTINGS_LEGACY renderHalSettings;
7917 uint32_t maxTasks;
7918
7919 PMHW_BATCH_BUFFER batchBuffer = nullptr;
7920
7921 //------------------------------------
7922 CM_ASSERT(state);
7923 //------------------------------------
7924
7925 eStatus = MOS_STATUS_UNKNOWN;
7926 deviceParam = &state->cmDeviceParam;
7927 renderHal = state->renderHal;
7928 stateHeapSettings = &renderHal->StateHeapSettings;
7929
7930 stateHeapSettings->iCurbeSize = CM_MAX_CURBE_SIZE_PER_TASK;
7931 stateHeapSettings->iMediaStateHeaps = deviceParam->maxTasks + 1; // + 1 to handle sync issues with current RenderHal impl (we can remove this once we insert sync value in 2nd level BB)
7932 stateHeapSettings->iMediaIDs = deviceParam->maxKernelsPerTask; // Number of Media IDs = Number of Kernels/Task
7933
7934 stateHeapSettings->iKernelCount = deviceParam->maxGshKernelEntries;
7935 stateHeapSettings->iKernelBlockSize = deviceParam->maxKernelBinarySize; // The kernel occupied memory need be this block size aligned 256K for IVB/HSW
7936 stateHeapSettings->iKernelHeapSize = deviceParam->maxGshKernelEntries * CM_32K; // CM_MAX_GSH_KERNEL_ENTRIES * 32*1024;
7937 state->totalKernelSize = (int32_t*)MOS_AllocAndZeroMemory(sizeof(int32_t) * deviceParam->maxGshKernelEntries);
7938 if(!state->totalKernelSize)
7939 {
7940 CM_ASSERTMESSAGE("Could not allocate enough memory for state->totalKernelSize\n");
7941 eStatus = MOS_STATUS_NO_SPACE;
7942 goto finish;
7943 }
7944
7945 stateHeapSettings->iPerThreadScratchSize = deviceParam->maxPerThreadScratchSpaceSize;
7946 stateHeapSettings->iSipSize = CM_MAX_SIP_SIZE;
7947 stateHeapSettings->iBindingTables = deviceParam->maxKernelsPerTask; // Number of Binding tables = Number of Kernels/Task
7948 stateHeapSettings->iSurfacesPerBT = CM_MAX_SURFACE_STATES_PER_BT; // Allocate Max Binding Table indices per binding table
7949 stateHeapSettings->iSurfaceStates = CM_MAX_SURFACE_STATES; // Allocate Max Surfaces that can be indexed
7950 stateHeapSettings->iSamplersAVS = deviceParam->maxAvsSamplers; // Allocate Max AVS samplers
7951
7952 // Initialize RenderHal Interface
7953 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnInitialize(renderHal, nullptr));
7954
7955 // Initialize Vebox Interface
7956 if (state->veboxInterface)
7957 {
7958 CM_CHK_MOSSTATUS_GOTOFINISH(state->veboxInterface->CreateHeap());
7959 }
7960
7961 // Initialize the table only in Static Mode (DSH doesn't use this table at all)
7962 if (!state->dshEnabled)
7963 {
7964 // Init the data in kernel entries for Dynamic GSH
7965 for (int32_t kernelID = 0; kernelID < stateHeapSettings->iKernelCount; ++kernelID)
7966 {
7967 if (kernelID > 0)
7968 {
7969 state->totalKernelSize[kernelID] = 0;
7970 }
7971 else
7972 {
7973 state->totalKernelSize[kernelID] = stateHeapSettings->iKernelHeapSize;
7974 }
7975 }
7976 state->kernelNumInGsh = 1;
7977 }
7978
7979 // Allocate BB (one for each media-state heap)
7980 state->numBatchBuffers = stateHeapSettings->iMediaStateHeaps;
7981 state->batchBuffers = (PMHW_BATCH_BUFFER)MOS_AllocAndZeroMemory(
7982 state->numBatchBuffers *
7983 sizeof(MHW_BATCH_BUFFER));
7984
7985 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->batchBuffers);
7986
7987 batchBuffer = state->batchBuffers;
7988 for (i = 0; i < (uint32_t)state->numBatchBuffers; i ++, batchBuffer ++)
7989 {
7990 batchBuffer->dwSyncTag = 0;
7991 batchBuffer->bMatch = false;
7992 batchBuffer->iPrivateType = RENDERHAL_BB_TYPE_CM;
7993 batchBuffer->iPrivateSize = sizeof(CM_HAL_BB_ARGS);
7994 batchBuffer->pPrivateData = (PCM_HAL_BB_ARGS)MOS_AllocAndZeroMemory(sizeof(CM_HAL_BB_ARGS));
7995 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
7996 ((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount = 1;
7997 }
7998
7999 // Allocate TimeStamp Buffer
8000 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AllocateTsResource(state));
8001
8002 // Allocate tracker resources
8003 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AllocateTrackerResource(state));
8004
8005 // Initialize dynamic general state heap
8006 CM_HAL_HEAP_PARAM heapParams;
8007 heapParams.behaviorGSH = HeapManager::Behavior::destructiveExtend;
8008 heapParams.initialSizeGSH = 0x0080000;
8009 heapParams.extendSizeGSH = 0x0080000;
8010 heapParams.trackerProducer = &state->renderHal->trackerProducer;
8011 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_InitializeDynamicStateHeaps(state, &heapParams));
8012
8013 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AllocateTables(state));
8014
8015 // Allocate Task Param to hold max tasks
8016 state->taskParam = (PCM_HAL_TASK_PARAM)MOS_AllocAndZeroMemory(sizeof(CM_HAL_TASK_PARAM));
8017 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->taskParam);
8018 state->currentTaskEntry = 0;
8019
8020 // Allocate Task TimeStamp to hold time stamps
8021 state->taskTimeStamp = (PCM_HAL_TASK_TIMESTAMP)MOS_AllocAndZeroMemory(sizeof(CM_HAL_TASK_TIMESTAMP));
8022 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->taskTimeStamp);
8023
8024 // Setup Registration table entries
8025 state->surfaceRegTable.count = state->cmDeviceParam.max2DSurfaceTableSize;
8026 state->surfaceRegTable.entries = state->surf2DTable;
8027
8028 maxTasks = state->cmDeviceParam.maxTasks;
8029 // Initialize the task status table
8030 MOS_FillMemory(state->taskStatusTable, (size_t)maxTasks, CM_INVALID_INDEX);
8031
8032 // Init the null render flag
8033 nullHWAccelerationEnable = state->osInterface->pfnGetNullHWRenderFlags(state->osInterface);
8034 state->nullHwRenderCm = nullHWAccelerationEnable.Cm || nullHWAccelerationEnable.VPGobal;
8035
8036 //during initialization stage to allocate sip resource and Get sip binary.
8037 if ((state->midThreadPreemptionDisabled == false)
8038 || (state->kernelDebugEnabled == true))
8039 {
8040 CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->AllocateSIPCSRResource());
8041 state->pfnGetSipBinary(state);
8042 }
8043
8044 //Init flag for conditional batch buffer
8045 state->cbbEnabled = HalCm_IsCbbEnabled(state);
8046
8047 //Turn Turbo boost on
8048 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnEnableTurboBoost(state));
8049 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->osInterface);
8050 state->tsFrequency = state->osInterface->pfnGetTsFrequency(state->osInterface);
8051
8052 if (state->refactor)
8053 {
8054 state->advExecutor = CmExtensionCreator<CmExecutionAdv>::CreateClass();
8055 if (state->advExecutor == nullptr)
8056 {
8057 CM_ASSERTMESSAGE("Could not allocate enough memory for state->advExecutor\n");
8058 eStatus = MOS_STATUS_NO_SPACE;
8059 goto finish;
8060 }
8061 state->advExecutor->Initialize(state);
8062 }
8063 else
8064 {
8065 state->advExecutor = nullptr;
8066 }
8067
8068 eStatus = MOS_STATUS_SUCCESS;
8069
8070 finish:
8071 return eStatus;
8072 }
8073
HalCm_GetKernelPerfTag(PCM_HAL_STATE cmState,PCM_HAL_KERNEL_PARAM * kernelParams,uint32_t numKernels)8074 uint16_t HalCm_GetKernelPerfTag(
8075 PCM_HAL_STATE cmState,
8076 PCM_HAL_KERNEL_PARAM *kernelParams,
8077 uint32_t numKernels)
8078 {
8079 using namespace std;
8080
8081 CM_ASSERT(cmState);
8082 CM_ASSERT(kernelParams);
8083
8084 int perfTagKernelNum = numKernels - 1;
8085 if (numKernels > MAX_COMBINE_NUM_IN_PERFTAG)
8086 {
8087 perfTagKernelNum = MAX_COMBINE_NUM_IN_PERFTAG - 1;
8088 }
8089
8090 // get a combined kernel name
8091 uint32_t len = numKernels * CM_MAX_KERNEL_NAME_SIZE_IN_BYTE;
8092 char *combinedName = MOS_NewArray(char, len);
8093 if (combinedName == nullptr)
8094 { // Not need to abort the process as this is only for pnp profiling
8095 CM_ASSERTMESSAGE("Error: Memory allocation error in getPertTag.");
8096 return 0; // return the default perftag
8097 }
8098 CmSafeMemSet(combinedName, 0, len);
8099
8100 MOS_SecureStrcat(combinedName, len, kernelParams[0]->kernelName);
8101 for (uint32_t i = 1; i < numKernels; i++)
8102 {
8103 MOS_SecureStrcat(combinedName, len, ";");
8104 MOS_SecureStrcat(combinedName, len, kernelParams[i]->kernelName);
8105 }
8106
8107 // get perftag index
8108 int perfTagIndex = 0;
8109 map<string, int>::iterator ite = cmState->perfTagIndexMap[perfTagKernelNum]->find(combinedName);
8110 if (ite == cmState->perfTagIndexMap[perfTagKernelNum]->end())
8111 {
8112 if (cmState->currentPerfTagIndex[perfTagKernelNum] <= MAX_CUSTOMIZED_PERFTAG_INDEX)
8113 {
8114 cmState->perfTagIndexMap[perfTagKernelNum]->insert(pair<string, int>(combinedName, cmState->currentPerfTagIndex[perfTagKernelNum]));
8115 perfTagIndex = cmState->currentPerfTagIndex[perfTagKernelNum] ++;
8116 }
8117 }
8118 else
8119 {
8120 perfTagIndex = ite->second;
8121 }
8122
8123 perfTagIndex = (perfTagIndex &0xFF) | (perfTagKernelNum << 8);
8124 MosSafeDeleteArray(combinedName);
8125 return (uint16_t)perfTagIndex;
8126 }
8127
8128 //*-----------------------------------------------------------------------------
8129 //| Purpose: Executes the CM Task
8130 //| Returns: Result of the operation
8131 //*-----------------------------------------------------------------------------
HalCm_ExecuteTask(PCM_HAL_STATE state,PCM_HAL_EXEC_TASK_PARAM execParam)8132 MOS_STATUS HalCm_ExecuteTask(
8133 PCM_HAL_STATE state, // [in] Pointer to CM State
8134 PCM_HAL_EXEC_TASK_PARAM execParam) // [in] Pointer to Task Param
8135 {
8136 MOS_STATUS eStatus;
8137 PRENDERHAL_INTERFACE renderHal;
8138 PRENDERHAL_MEDIA_STATE mediaState;
8139 PMHW_BATCH_BUFFER batchBuffer;
8140 PCM_HAL_BB_ARGS bbCmArgs;
8141 PCM_HAL_KERNEL_PARAM kernelParam;
8142 int32_t taskId;
8143 int32_t remBindingTables;
8144 int32_t bindingTable;
8145 int32_t bti;
8146 int32_t mediaID;
8147 PRENDERHAL_KRN_ALLOCATION krnAllocations[CM_MAX_KERNELS_PER_TASK];
8148 uint32_t vfeCurbeSize;
8149 uint32_t maxInlineDataSize, maxIndirectDataSize;
8150 uint32_t i;
8151 void *cmdBuffer = nullptr;
8152 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
8153 uint32_t btsizePower2;
8154 PMOS_INTERFACE osInterface = nullptr;
8155
8156 //-----------------------------------
8157 CM_ASSERT(state);
8158 CM_ASSERT(execParam);
8159 //-----------------------------------
8160
8161 eStatus = MOS_STATUS_SUCCESS;
8162 renderHal = state->renderHal;
8163 mediaState = nullptr;
8164 batchBuffer = nullptr;
8165
8166 if (execParam->numKernels > state->cmDeviceParam.maxKernelsPerTask)
8167 {
8168 eStatus = MOS_STATUS_INVALID_PARAMETER;
8169 CM_ASSERTMESSAGE("Number of Kernels per task exceeds maximum");
8170 goto finish;
8171 }
8172
8173 // Reset states before execute
8174 // (clear allocations, get GSH allocation index + any additional housekeeping)
8175 state->osInterface->pfnResetOsStates(state->osInterface);
8176 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnReset(renderHal));
8177
8178 MOS_ZeroMemory(state->taskParam, sizeof(CM_HAL_TASK_PARAM));
8179
8180 MOS_FillMemory(
8181 state->bti2DIndexTable,
8182 state->cmDeviceParam.max2DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8183 CM_INVALID_INDEX );
8184
8185 MOS_FillMemory(
8186 state->bti2DUPIndexTable,
8187 state->cmDeviceParam.max2DSurfaceUPTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8188 CM_INVALID_INDEX );
8189
8190 MOS_FillMemory(
8191 state->bti3DIndexTable,
8192 state->cmDeviceParam.max3DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8193 CM_INVALID_INDEX );
8194
8195 MOS_FillMemory(
8196 state->btiBufferIndexTable,
8197 state->cmDeviceParam.maxBufferTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8198 CM_INVALID_INDEX );
8199
8200 MOS_FillMemory(
8201 state->samplerIndexTable,
8202 state->cmDeviceParam.maxSamplerTableSize,
8203 CM_INVALID_INDEX);
8204
8205 MOS_FillMemory(
8206 state->sampler8x8IndexTable,
8207 state->cmDeviceParam.maxSampler8x8TableSize,
8208 CM_INVALID_INDEX);
8209
8210 state->walkerParams.CmWalkerEnable = 0;
8211
8212 vfeCurbeSize = 0;
8213 maxInlineDataSize = 0;
8214 maxIndirectDataSize = 0;
8215
8216 // Get the Task Id
8217 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNewTaskId(state, &taskId));
8218
8219 // Parse the task
8220 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_ParseTask(state, execParam));
8221
8222 // Reset the SSH configuration according to the property of the task
8223 renderHal->pStateHeap->iBindingTableSize = MOS_ALIGN_CEIL(taskParam->surfacePerBT * // Reconfigure the binding table size
8224 renderHal->pRenderHalPltInterface->GetBTStateCmdSize(), renderHal->StateHeapSettings.iBTAlignment);
8225
8226 taskParam->surfacePerBT = renderHal->pStateHeap->iBindingTableSize/renderHal->pRenderHalPltInterface->GetBTStateCmdSize();
8227
8228 renderHal->StateHeapSettings.iBindingTables = renderHal->StateHeapSettings.iBindingTables * // Reconfigure the binding table number
8229 renderHal->StateHeapSettings.iSurfacesPerBT / taskParam->surfacePerBT;
8230
8231 renderHal->StateHeapSettings.iSurfacesPerBT = taskParam->surfacePerBT; // Reconfigure the surface per BT
8232
8233 if (execParam->numKernels > (uint32_t)renderHal->StateHeapSettings.iBindingTables)
8234 {
8235 eStatus = MOS_STATUS_INVALID_PARAMETER;
8236 CM_ASSERTMESSAGE("Number of Kernels per task exceeds the number can be hold by binding table");
8237 goto finish;
8238 }
8239
8240 if (execParam->kernelDebugEnabled && Mos_ResourceIsNull(&state->sipResource.osResource))
8241 {
8242 HalCm_AllocateSipResource( state); // create sip resource if it does not exist
8243 }
8244
8245 // Assign a MediaState from the MediaStateHeap
8246 // !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
8247 // since this method syncs the batch buffer and media state.
8248 if (state->dshEnabled)
8249 {
8250 if ( execParam->userDefinedMediaState != nullptr )
8251 {
8252 // use exsiting media state as current state
8253 mediaState = static_cast< PRENDERHAL_MEDIA_STATE >( execParam->userDefinedMediaState );
8254
8255 // update current state to dsh
8256 renderHal->pStateHeap->pCurMediaState = mediaState;
8257 // Refresh sync tag for all media states in submitted queue
8258 state->criticalSectionDSH->Acquire();
8259 renderHal->pfnRefreshSync( renderHal );
8260 state->criticalSectionDSH->Release();
8261 }
8262 else
8263 {
8264 // Obtain media state configuration - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs, Kernel Spill area
8265 RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params;
8266 state->criticalSectionDSH->Acquire();
8267 HalCm_DSH_GetDynamicStateConfiguration( state, ¶ms, execParam->numKernels, execParam->kernels, execParam->kernelCurbeOffset );
8268
8269 // Prepare Media States to accommodate all parameters - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs
8270 mediaState = renderHal->pfnAssignDynamicState( renderHal, ¶ms, RENDERHAL_COMPONENT_CM );
8271 state->criticalSectionDSH->Release();
8272 }
8273 }
8274 else
8275 {
8276 mediaState = renderHal->pfnAssignMediaState(renderHal, RENDERHAL_COMPONENT_CM);
8277 }
8278 CM_CHK_NULL_GOTOFINISH_MOSERROR(mediaState);
8279
8280 // Assign/Reset SSH instance
8281 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignSshInstance(renderHal));
8282
8283 // Dynamic Batch Buffer allocation
8284
8285 if (!state->walkerParams.CmWalkerEnable)
8286 {
8287 // Get the Batch buffer
8288 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBatchBuffer(state, execParam->numKernels, execParam->kernels, &batchBuffer));
8289 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
8290 bbCmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
8291
8292 // Lock the batch buffer
8293 if ( (bbCmArgs->refCount == 1) ||
8294 (state->taskParam->reuseBBUpdateMask == 1) )
8295 {
8296 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnLockBB(renderHal, batchBuffer));
8297 }
8298 }
8299
8300 if (state->useNewSamplerHeap == false)
8301 {
8302 HalCm_AcquireSamplerStatistics(state);
8303 }
8304
8305 // Load all kernels in the same state heap - expand ISH if necessary BEFORE programming media states.
8306 // This is better than having to expand ISH in the middle of loading, when part of MediaIDs are
8307 // already programmed - not a problem in the old implementation where it would simply remove old
8308 // kernels out of the way.
8309 if (state->dshEnabled)
8310 {
8311 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_DSH_LoadKernelArray(state, execParam->kernels, execParam->numKernels, krnAllocations));
8312 }
8313
8314 for (i = 0; i < execParam->numKernels; i++)
8315 {
8316 CM_HAL_INDEX_PARAM indexParam;
8317 MOS_ZeroMemory(&indexParam, sizeof(CM_HAL_INDEX_PARAM));
8318 kernelParam = execParam->kernels[i];
8319
8320 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupStatesForKernelInitial(state, mediaState, batchBuffer, taskId, kernelParam, &indexParam,
8321 execParam->kernelCurbeOffset[i], bti, mediaID, krnAllocations[i]));
8322
8323 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_FinishStatesForKernel(state, mediaState, batchBuffer, taskId, kernelParam, i, &indexParam,
8324 bti, mediaID, krnAllocations[i]));
8325
8326 vfeCurbeSize += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
8327 if (kernelParam->payloadSize > maxInlineDataSize)
8328 {
8329 maxInlineDataSize = kernelParam->payloadSize;
8330 }
8331 if (kernelParam->indirectDataParam.indirectDataSize > maxIndirectDataSize)
8332 {
8333 maxIndirectDataSize = kernelParam->indirectDataParam.indirectDataSize;
8334 }
8335
8336 if (execParam->conditionalEndBitmap & (uint64_t)1 << i)
8337 {
8338 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetConditionalEndInfo(state, taskParam->conditionalEndInfo, taskParam->conditionalBBEndParams, i));
8339 }
8340 }
8341
8342 // Store the Max Payload Sizes in the Task params
8343 state->taskParam->vfeCurbeSize = vfeCurbeSize;
8344 if (maxIndirectDataSize)
8345 {
8346 state->taskParam->urbEntrySize = maxIndirectDataSize;
8347 }
8348 else
8349 {
8350 state->taskParam->urbEntrySize = maxInlineDataSize;
8351 }
8352
8353 // We may have to send additional Binding table commands in command buffer.
8354 // This is needed because the surface offset (from the base on SSH)
8355 // calculation takes into account the max binding tables allocated in the
8356 // SSH.
8357 remBindingTables = renderHal->StateHeapSettings.iBindingTables - execParam->numKernels;
8358
8359 if (remBindingTables > 0)
8360 {
8361 for (i = 0; i < (uint32_t)remBindingTables; i++)
8362 {
8363 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(
8364 renderHal,
8365 &bindingTable));
8366 }
8367 }
8368
8369 // until now, we know binding table index for debug surface
8370 // let's get system thread
8371 osInterface = state->osInterface;
8372 osInterface->pfnResetPerfBufferID(osInterface);
8373 if (osInterface->pfnIsPerfTagSet(osInterface) == false)
8374 {
8375 osInterface->pfnIncPerfFrameID(osInterface);
8376 uint16_t perfTag = HalCm_GetKernelPerfTag(state, execParam->kernels, execParam->numKernels);
8377 osInterface->pfnSetPerfTag(osInterface, perfTag);
8378 }
8379 #if (_RELEASE_INTERNAL || _DEBUG)
8380 #if defined(CM_DIRECT_GUC_SUPPORT)
8381 // Update the task ID table
8382 state->taskStatusTable[taskId] = (char)taskId;
8383
8384 //for GuC direct submission, need to send out dummy command buffer to make sure PDP table got binded
8385 CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitDummyCommands(
8386 batchBuffer, taskId, execParam->kernels, &cmdBuffer));
8387
8388 /* make sure Dummy submission is done */
8389
8390 CM_HAL_QUERY_TASK_PARAM queryParam;
8391
8392 queryParam.taskId = taskId;
8393 queryParam.status = CM_TASK_IN_PROGRESS;
8394
8395 do {
8396 state->pfnQueryTask(state, &queryParam);
8397 } while (queryParam.status != CM_TASK_FINISHED);
8398
8399 #endif
8400 #endif
8401
8402 // Submit HW commands and states
8403 CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitCommands(
8404 batchBuffer, taskId, execParam->kernels, &cmdBuffer));
8405
8406 // Set the Task ID
8407 execParam->taskIdOut = taskId;
8408
8409 // Set OS data
8410 if(cmdBuffer)
8411 {
8412 execParam->osData = cmdBuffer;
8413 }
8414
8415 // Update the task ID table
8416 state->taskStatusTable[taskId] = (char)taskId;
8417
8418 finish:
8419
8420 if (state->dshEnabled)
8421 {
8422 state->criticalSectionDSH->Acquire();
8423 if (mediaState && eStatus != MOS_STATUS_SUCCESS)
8424 {
8425 // Failed, release media state and heap resources
8426 renderHal->pfnReleaseDynamicState(renderHal, mediaState);
8427 }
8428 else
8429 {
8430 renderHal->pfnSubmitDynamicState(renderHal, mediaState);
8431 }
8432 state->criticalSectionDSH->Release();
8433 }
8434
8435 if (batchBuffer) // for Media Walker, batchBuffer is empty
8436 {
8437 if (batchBuffer->bLocked)
8438 {
8439 // Only happens in Error cases
8440 CM_CHK_NULL_RETURN_MOSERROR(batchBuffer->pPrivateData);
8441 if (((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount == 1)
8442 {
8443 renderHal->pfnUnlockBB(renderHal, batchBuffer);
8444 }
8445 }
8446 }
8447
8448 return eStatus;
8449 }
8450
8451 //*-----------------------------------------------------------------------------
8452 //| Purpose: Executes the CM Group Task
8453 //| Returns: Result of the operation
8454 //*-----------------------------------------------------------------------------
HalCm_ExecuteGroupTask(PCM_HAL_STATE state,PCM_HAL_EXEC_GROUP_TASK_PARAM execGroupParam)8455 MOS_STATUS HalCm_ExecuteGroupTask(
8456 PCM_HAL_STATE state, // [in] Pointer to CM State
8457 PCM_HAL_EXEC_GROUP_TASK_PARAM execGroupParam) // [in] Pointer to Task Param
8458 {
8459 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
8460 PRENDERHAL_INTERFACE renderHal = state->renderHal;
8461 CM_HAL_INDEX_PARAM indexParam;
8462 int32_t taskId;
8463 uint32_t remBindingTables;
8464 int32_t bindingTable;
8465 int32_t bti;
8466 int32_t mediaID;
8467 PRENDERHAL_MEDIA_STATE mediaState = nullptr;
8468 uint32_t i;
8469 void *cmdBuffer = nullptr;
8470 PCM_HAL_KERNEL_PARAM kernelParam = nullptr;
8471 PCM_HAL_TASK_PARAM taskParam = state->taskParam;
8472 uint32_t btsizePower2;
8473 uint32_t vfeCurbeSize = 0;
8474 PRENDERHAL_KRN_ALLOCATION krnAllocations[CM_MAX_KERNELS_PER_TASK];
8475 PMOS_INTERFACE osInterface = nullptr;
8476
8477 //-----------------------------------
8478 CM_ASSERT(state);
8479 CM_ASSERT(execGroupParam);
8480 //-----------------------------------
8481
8482 MOS_ZeroMemory(state->taskParam, sizeof(CM_HAL_TASK_PARAM));
8483 MOS_ZeroMemory(&indexParam, sizeof(CM_HAL_INDEX_PARAM));
8484
8485 MOS_FillMemory(
8486 state->bti2DIndexTable,
8487 state->cmDeviceParam.max2DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8488 CM_INVALID_INDEX );
8489
8490 MOS_FillMemory(
8491 state->bti2DUPIndexTable,
8492 state->cmDeviceParam.max2DSurfaceUPTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8493 CM_INVALID_INDEX );
8494
8495 MOS_FillMemory(
8496 state->bti3DIndexTable,
8497 state->cmDeviceParam.max3DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8498 CM_INVALID_INDEX );
8499
8500 MOS_FillMemory(
8501 state->btiBufferIndexTable,
8502 state->cmDeviceParam.maxBufferTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8503 CM_INVALID_INDEX );
8504 MOS_FillMemory(
8505 state->samplerIndexTable,
8506 state->cmDeviceParam.maxSamplerTableSize,
8507 CM_INVALID_INDEX);
8508 MOS_FillMemory(
8509 state->sampler8x8IndexTable,
8510 state->cmDeviceParam.maxSampler8x8TableSize,
8511 CM_INVALID_INDEX);
8512
8513 // Reset states before execute
8514 // (clear allocations, get GSH allocation index + any additional housekeeping)
8515 state->osInterface->pfnResetOsStates(state->osInterface);
8516 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnReset(renderHal));
8517
8518 state->walkerParams.CmWalkerEnable = 0;
8519 state->taskParam->blGpGpuWalkerEnabled = true;
8520
8521 // Get the Task Id
8522 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNewTaskId(state, &taskId));
8523
8524 // Parse the task
8525 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_ParseGroupTask(state, execGroupParam));
8526
8527 // Reset the SSH configuration according to the property of the task
8528 renderHal->pStateHeap->iBindingTableSize = MOS_ALIGN_CEIL(taskParam->surfacePerBT * // Reconfigure the binding table size
8529 renderHal->pRenderHalPltInterface->GetBTStateCmdSize(),
8530 renderHal->StateHeapSettings.iBTAlignment);
8531
8532 taskParam->surfacePerBT = renderHal->pStateHeap->iBindingTableSize / renderHal->pRenderHalPltInterface->GetBTStateCmdSize();
8533
8534 renderHal->StateHeapSettings.iBindingTables = renderHal->StateHeapSettings.iBindingTables * // Reconfigure the binding table number
8535 renderHal->StateHeapSettings.iSurfacesPerBT / taskParam->surfacePerBT;
8536 renderHal->StateHeapSettings.iSurfacesPerBT = taskParam->surfacePerBT; // Reconfigure the surface per BT
8537
8538 if (execGroupParam->numKernels > (uint32_t)renderHal->StateHeapSettings.iBindingTables)
8539 {
8540 eStatus = MOS_STATUS_INVALID_PARAMETER;
8541 CM_ASSERTMESSAGE("Number of Kernels per task exceeds the number can be hold by binding table");
8542 goto finish;
8543 }
8544
8545 if (execGroupParam->kernelDebugEnabled && Mos_ResourceIsNull(&state->sipResource.osResource))
8546 {
8547 HalCm_AllocateSipResource( state); // create sip resource if it does not exist
8548 }
8549
8550 // Assign a MediaState from the MediaStateHeap
8551 // !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
8552 // since this method syncs the batch buffer and media state.
8553 if (state->dshEnabled)
8554 {
8555 if ( execGroupParam->userDefinedMediaState != nullptr )
8556 {
8557 // Preload all kernels
8558 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_DSH_LoadKernelArray( state, execGroupParam->kernels, execGroupParam->numKernels, krnAllocations ) );
8559
8560 // use exsiting media state as current state
8561 mediaState = static_cast< PRENDERHAL_MEDIA_STATE >( execGroupParam->userDefinedMediaState );
8562
8563 // update current state to dsh
8564 renderHal->pStateHeap->pCurMediaState = mediaState;
8565 state->criticalSectionDSH->Acquire();
8566 // Refresh sync tag for all media states in submitted queue
8567 renderHal->pfnRefreshSync( renderHal );
8568 state->criticalSectionDSH->Release();
8569 }
8570 else
8571 {
8572 // Preload all kernels
8573 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_DSH_LoadKernelArray(state, execGroupParam->kernels, execGroupParam->numKernels, krnAllocations));
8574
8575 // Obtain media state configuration - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs, Kernel Spill area
8576 RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params;
8577
8578 state->criticalSectionDSH->Acquire();
8579 HalCm_DSH_GetDynamicStateConfiguration(state, ¶ms, execGroupParam->numKernels, execGroupParam->kernels, execGroupParam->kernelCurbeOffset);
8580 // Prepare Media States to accommodate all parameters
8581 mediaState = renderHal->pfnAssignDynamicState(renderHal, ¶ms, RENDERHAL_COMPONENT_CM);
8582 state->criticalSectionDSH->Release();
8583 }
8584 }
8585 else
8586 {
8587 // Assign a MediaState from the MediaStateHeap
8588 // !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
8589 // since this method syncs the batch buffer and media state.
8590 mediaState = renderHal->pfnAssignMediaState(renderHal, RENDERHAL_COMPONENT_CM);
8591 }
8592 CM_CHK_NULL_GOTOFINISH_MOSERROR(mediaState);
8593
8594 // Assign/Reset SSH instance
8595 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignSshInstance(renderHal));
8596
8597 if (state->useNewSamplerHeap == false)
8598 {
8599 HalCm_AcquireSamplerStatistics(state);
8600 }
8601
8602 for (i = 0; i < execGroupParam->numKernels; i++)
8603 {
8604 CM_HAL_INDEX_PARAM indexParam;
8605 MOS_ZeroMemory(&indexParam, sizeof(CM_HAL_INDEX_PARAM));
8606 kernelParam = execGroupParam->kernels[i];
8607
8608 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupStatesForKernelInitial(state, mediaState, nullptr, taskId, kernelParam, &indexParam,
8609 execGroupParam->kernelCurbeOffset[i], bti, mediaID, krnAllocations[i]));
8610
8611 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_FinishStatesForKernel(state, mediaState, nullptr, taskId, kernelParam, i, &indexParam,
8612 bti, mediaID, krnAllocations[i]));
8613
8614 vfeCurbeSize += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
8615
8616 if (execGroupParam->conditionalEndBitmap & (uint64_t)1 << i)
8617 {
8618 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetConditionalEndInfo(state, taskParam->conditionalEndInfo, taskParam->conditionalBBEndParams, i));
8619 }
8620 }
8621
8622 // Store the Max Payload Sizes in the Task params
8623 state->taskParam->vfeCurbeSize = vfeCurbeSize;
8624 state->taskParam->urbEntrySize = 0;
8625
8626 // We may have to send additional Binding table commands in command buffer.
8627 // This is needed because the surface offset (from the base on SSH)
8628 // calculation takes into account the max binding tables allocated in the
8629 // SSH.
8630 remBindingTables = renderHal->StateHeapSettings.iBindingTables - execGroupParam->numKernels;
8631
8632 if (remBindingTables > 0)
8633 {
8634 for (i = 0; i < remBindingTables; i++)
8635 {
8636 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(
8637 renderHal,
8638 &bindingTable));
8639 }
8640 }
8641
8642 // until now, we know binding table index for debug surface
8643 // let's get system thread
8644 if (execGroupParam->kernelDebugEnabled)
8645 {
8646 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetSipBinary(state));
8647 }
8648
8649 osInterface = state->osInterface;
8650 osInterface->pfnResetPerfBufferID(osInterface);
8651 if (osInterface->pfnIsPerfTagSet(osInterface) == false)
8652 {
8653 osInterface->pfnIncPerfFrameID(osInterface);
8654 int perfTag = HalCm_GetKernelPerfTag(state, execGroupParam->kernels, execGroupParam->numKernels);
8655 osInterface->pfnSetPerfTag(osInterface, (uint16_t)perfTag);
8656 }
8657
8658 // Submit HW commands and states
8659 CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitCommands(
8660 nullptr, taskId, execGroupParam->kernels, &cmdBuffer));
8661
8662 // Set the Task ID
8663 execGroupParam->taskIdOut = taskId;
8664
8665 // Set OS data
8666 if(cmdBuffer)
8667 {
8668 execGroupParam->osData = cmdBuffer;
8669 }
8670
8671 // Update the task ID table
8672 state->taskStatusTable[taskId] = (char)taskId;
8673
8674 finish:
8675
8676 if (state->dshEnabled)
8677 {
8678 state->criticalSectionDSH->Acquire();
8679 if (mediaState && eStatus != MOS_STATUS_SUCCESS)
8680 {
8681 // Failed, release media state and heap resources
8682 renderHal->pfnReleaseDynamicState(renderHal, mediaState);
8683 }
8684 else
8685 {
8686 renderHal->pfnSubmitDynamicState(renderHal, mediaState);
8687 }
8688 state->criticalSectionDSH->Release();
8689 }
8690
8691 return eStatus;
8692 }
8693
HalCm_ExecuteHintsTask(PCM_HAL_STATE state,PCM_HAL_EXEC_HINTS_TASK_PARAM execHintsParam)8694 MOS_STATUS HalCm_ExecuteHintsTask(
8695 PCM_HAL_STATE state, // [in] Pointer to CM State
8696 PCM_HAL_EXEC_HINTS_TASK_PARAM execHintsParam) // [in] Pointer to Task Param
8697 {
8698 MOS_STATUS eStatus;
8699 PRENDERHAL_INTERFACE renderHal;
8700 PRENDERHAL_MEDIA_STATE mediaState;
8701 PMHW_BATCH_BUFFER batchBuffer;
8702 PCM_HAL_BB_ARGS bbCmArgs;
8703 PCM_HAL_KERNEL_PARAM kernelParam;
8704 uint32_t i;
8705 uint32_t numTasks;
8706 uint64_t origKernelIds[CM_MAX_KERNELS_PER_TASK];
8707 int32_t taskId;
8708 int32_t remBindingTables;
8709 int32_t bindingTable;
8710 uint32_t vfeCurbeSize;
8711 uint32_t maxInlineDataSize;
8712 uint32_t maxIndirectDataSize;
8713 int32_t *bindingTableEntries;
8714 int32_t *mediaIds;
8715 PRENDERHAL_KRN_ALLOCATION *krnAllocations;
8716 PCM_HAL_INDEX_PARAM indexParams;
8717 bool useMediaObjects;
8718 void *cmdBuffer;
8719 bool splitTask;
8720 bool lastTask;
8721 PMOS_INTERFACE osInterface = nullptr;
8722
8723 //------------------------------------
8724 CM_ASSERT(state);
8725 CM_ASSERT(execHintsParam);
8726 //------------------------------------
8727
8728 eStatus = MOS_STATUS_SUCCESS;
8729 renderHal = state->renderHal;
8730 mediaState = nullptr;
8731 batchBuffer = nullptr;
8732 bindingTableEntries = nullptr;
8733 mediaIds = nullptr;
8734 krnAllocations = nullptr;
8735 indexParams = nullptr;
8736 useMediaObjects = false;
8737 cmdBuffer = nullptr;
8738 splitTask = false;
8739 lastTask = false;
8740
8741 if (execHintsParam->numKernels > state->cmDeviceParam.maxKernelsPerTask)
8742 {
8743 eStatus = MOS_STATUS_INVALID_PARAMETER;
8744 CM_ASSERTMESSAGE("Number of Kernels per task exceeds maximum");
8745 goto finish;
8746 }
8747
8748 bindingTableEntries = (int*)MOS_AllocAndZeroMemory(sizeof(int)*execHintsParam->numKernels);
8749 mediaIds = (int*)MOS_AllocAndZeroMemory(sizeof(int)* execHintsParam->numKernels);
8750 krnAllocations = (PRENDERHAL_KRN_ALLOCATION *)MOS_AllocAndZeroMemory(sizeof(void *)* execHintsParam->numKernels);
8751 indexParams = (PCM_HAL_INDEX_PARAM)MOS_AllocAndZeroMemory(sizeof(CM_HAL_INDEX_PARAM)* execHintsParam->numKernels);
8752 if (!bindingTableEntries || !mediaIds || !krnAllocations || !indexParams)
8753 {
8754 eStatus = MOS_STATUS_INVALID_PARAMETER;
8755 CM_ASSERTMESSAGE("Memory allocation failed in ExecuteHints Task");
8756 goto finish;
8757 }
8758
8759 // check hints to see if need to split into multiple tasks
8760 numTasks = ( execHintsParam->hints & CM_HINTS_MASK_NUM_TASKS ) >> CM_HINTS_NUM_BITS_TASK_POS;
8761 if( numTasks > 1 )
8762 {
8763 splitTask = true;
8764 }
8765
8766 MOS_FillMemory(bindingTableEntries, sizeof(int) * execHintsParam->numKernels, CM_INVALID_INDEX);
8767 MOS_FillMemory(mediaIds, sizeof(int) * execHintsParam->numKernels, CM_INVALID_INDEX);
8768 MOS_FillMemory(krnAllocations, sizeof(void *)* execHintsParam->numKernels, 0);
8769
8770 // Reset states before execute
8771 // (clear allocations, get GSH allocation index + any additional housekeeping)
8772 state->osInterface->pfnResetOsStates(state->osInterface);
8773 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnReset(renderHal));
8774
8775 MOS_ZeroMemory(state->taskParam, sizeof(CM_HAL_TASK_PARAM));
8776
8777 MOS_FillMemory(
8778 state->bti2DIndexTable,
8779 state->cmDeviceParam.max2DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8780 CM_INVALID_INDEX );
8781
8782 MOS_FillMemory(
8783 state->bti2DUPIndexTable,
8784 state->cmDeviceParam.max2DSurfaceUPTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8785 CM_INVALID_INDEX );
8786
8787 MOS_FillMemory(
8788 state->bti3DIndexTable,
8789 state->cmDeviceParam.max3DSurfaceTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8790 CM_INVALID_INDEX );
8791
8792 MOS_FillMemory(
8793 state->btiBufferIndexTable,
8794 state->cmDeviceParam.maxBufferTableSize * sizeof( CM_HAL_MULTI_USE_BTI_ENTRY ),
8795 CM_INVALID_INDEX );
8796
8797 MOS_FillMemory(
8798 state->samplerIndexTable,
8799 state->cmDeviceParam.maxSamplerTableSize,
8800 CM_INVALID_INDEX);
8801
8802 MOS_FillMemory(
8803 state->sampler8x8IndexTable,
8804 state->cmDeviceParam.maxSampler8x8TableSize,
8805 CM_INVALID_INDEX);
8806
8807 state->walkerParams.CmWalkerEnable = 0;
8808
8809 vfeCurbeSize = 0;
8810 maxInlineDataSize = 0;
8811 maxIndirectDataSize = 0;
8812
8813 MOS_ZeroMemory(&origKernelIds, CM_MAX_KERNELS_PER_TASK * sizeof(uint64_t));
8814
8815 // Get the Task Id
8816 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetNewTaskId(state, &taskId));
8817
8818 // Parse the task
8819 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_ParseHintsTask(state, execHintsParam));
8820
8821 // Assign a MediaState from the MediaStateHeap
8822 // !!!! THIS MUST BE BEFORE Getting the BATCH_BUFFER !!!
8823 // since this method syncs the batch buffer and media state.
8824 if (state->dshEnabled)
8825 {
8826 if ( execHintsParam->userDefinedMediaState != nullptr )
8827 {
8828 // use exsiting media state as current state
8829 mediaState = static_cast< PRENDERHAL_MEDIA_STATE >( execHintsParam->userDefinedMediaState );
8830
8831 // update current state to dsh
8832 renderHal->pStateHeap->pCurMediaState = mediaState;
8833 // Refresh sync tag for all media states in submitted queue
8834 state->criticalSectionDSH->Acquire();
8835 renderHal->pfnRefreshSync( renderHal );
8836 state->criticalSectionDSH->Release();
8837 }
8838 else
8839 {
8840 // Obtain media state configuration - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs, Kernel Spill area
8841 RENDERHAL_DYNAMIC_MEDIA_STATE_PARAMS params;
8842 state->criticalSectionDSH->Acquire();
8843 HalCm_DSH_GetDynamicStateConfiguration(state, ¶ms, execHintsParam->numKernels, execHintsParam->kernels, execHintsParam->kernelCurbeOffset);
8844
8845 // Prepare Media States to accommodate all parameters - Curbe, Samplers (3d/AVS/VA), 8x8 sampler table, Media IDs
8846 mediaState = renderHal->pfnAssignDynamicState(renderHal, ¶ms, RENDERHAL_COMPONENT_CM);
8847 state->criticalSectionDSH->Release();
8848 }
8849 }
8850 else
8851 {
8852 mediaState = renderHal->pfnAssignMediaState(renderHal, RENDERHAL_COMPONENT_CM);
8853 }
8854 CM_CHK_NULL_GOTOFINISH_MOSERROR(mediaState);
8855
8856 if (state->useNewSamplerHeap == false)
8857 {
8858 HalCm_AcquireSamplerStatistics(state);
8859 }
8860
8861 // Assign/Reset SSH instance
8862 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignSshInstance(renderHal));
8863
8864 if (!state->walkerParams.CmWalkerEnable)
8865 {
8866 if( splitTask )
8867 {
8868 // save original kernel IDs for kernel binary re-use in GSH
8869 for( i = 0; i < execHintsParam->numKernels; ++i )
8870 {
8871 origKernelIds[i] = execHintsParam->kernels[i]->kernelId;
8872 }
8873
8874 // need to add tag to kernel IDs to distinguish batch buffer
8875 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_AddKernelIDTag(execHintsParam->kernels, execHintsParam->numKernels, numTasks, execHintsParam->numTasksGenerated));
8876 }
8877
8878 // Get the Batch buffer
8879 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBatchBuffer(state, execHintsParam->numKernels, execHintsParam->kernels, &batchBuffer));
8880
8881 if( splitTask )
8882 {
8883 // restore kernel IDs for kernel binary re-use in GSH
8884 for( i = 0; i < execHintsParam->numKernels; ++i )
8885 {
8886 execHintsParam->kernels[i]->kernelId = origKernelIds[i];
8887 }
8888 }
8889
8890 // Lock the batch buffer
8891 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer->pPrivateData);
8892 bbCmArgs = (PCM_HAL_BB_ARGS)batchBuffer->pPrivateData;
8893 if ( (bbCmArgs->refCount == 1) ||
8894 ( state->taskParam->reuseBBUpdateMask == 1) )
8895 {
8896 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnLockBB(renderHal, batchBuffer));
8897 }
8898 }
8899
8900 // Load all kernels in the same state heap - expand ISH if necessary BEFORE programming media states.
8901 // This is better than having to expand ISH in the middle of loading, when part of MediaIDs are
8902 // already programmed - not a problem in the old implementation where it would simply remove old
8903 // kernels out of the way.
8904 if (state->dshEnabled)
8905 {
8906 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_DSH_LoadKernelArray(state, execHintsParam->kernels, execHintsParam->numKernels, krnAllocations));
8907 }
8908
8909 // 0: media walker
8910 // 1: media object
8911 if( (execHintsParam->hints & CM_HINTS_MASK_MEDIAOBJECT) == CM_HINTS_MASK_MEDIAOBJECT )
8912 {
8913 for (i = 0; i < execHintsParam->numKernels; ++i)
8914 {
8915 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_SetupStatesForKernelInitial(state, mediaState, batchBuffer, taskId, execHintsParam->kernels[i], &indexParams[i],
8916 execHintsParam->kernelCurbeOffset[i], bindingTableEntries[i], mediaIds[i], krnAllocations[i]));
8917 }
8918
8919 CM_CHK_NULL_GOTOFINISH_MOSERROR(batchBuffer);
8920
8921 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_FinishStatesForKernelMix(state, batchBuffer, taskId, execHintsParam->kernels,
8922 indexParams, bindingTableEntries, mediaIds, krnAllocations, execHintsParam->numKernels, execHintsParam->hints, execHintsParam->isLastTask));
8923
8924 for( i = 0; i < execHintsParam->numKernels; ++i)
8925 {
8926 kernelParam = execHintsParam->kernels[i];
8927 vfeCurbeSize += MOS_ALIGN_CEIL(kernelParam->totalCurbeSize, state->renderHal->dwCurbeBlockAlign);
8928 if( kernelParam->payloadSize > maxInlineDataSize)
8929 {
8930 maxInlineDataSize = kernelParam->payloadSize;
8931 }
8932 if( kernelParam->indirectDataParam.indirectDataSize > maxIndirectDataSize )
8933 {
8934 maxIndirectDataSize = kernelParam->indirectDataParam.indirectDataSize;
8935 }
8936 }
8937
8938 // Store the Max Payload Sizes in the Task Param
8939 state->taskParam->vfeCurbeSize = vfeCurbeSize;
8940 if( maxIndirectDataSize)
8941 {
8942 state->taskParam->vfeCurbeSize = maxIndirectDataSize;
8943 }
8944 else
8945 {
8946 state->taskParam->urbEntrySize = maxInlineDataSize;
8947 }
8948
8949 // We may have to send additional Binding table commands in command buffer.
8950 // This is needed because the surface offset (from the base on SSH)
8951 // calculation takes into account the max binding tables allocated in the
8952 // SSH.
8953 remBindingTables = state->cmDeviceParam.maxKernelsPerTask -
8954 execHintsParam->numKernels;
8955
8956 if( remBindingTables > 0)
8957 {
8958 for( i = 0; i < (uint32_t)remBindingTables; ++i)
8959 {
8960 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnAssignBindingTable(
8961 renderHal,
8962 &bindingTable));
8963 }
8964 }
8965
8966 osInterface = state->osInterface;
8967 osInterface->pfnResetPerfBufferID(osInterface);
8968 if (osInterface->pfnIsPerfTagSet(osInterface) == false)
8969 {
8970 osInterface->pfnIncPerfFrameID(osInterface);
8971 int perfTag = HalCm_GetKernelPerfTag(state, execHintsParam->kernels, execHintsParam->numKernels);
8972 osInterface->pfnSetPerfTag(osInterface, (uint16_t)perfTag);
8973 }
8974
8975 // Submit HW commands and states
8976 CM_CHK_MOSSTATUS_GOTOFINISH(state->cmHalInterface->SubmitCommands(
8977 batchBuffer, taskId, execHintsParam->kernels, &cmdBuffer));
8978
8979 // Set the Task ID
8980 execHintsParam->taskIdOut = taskId;
8981
8982 // Set OS data
8983 if( cmdBuffer )
8984 {
8985 execHintsParam->osData = cmdBuffer;
8986 }
8987
8988 // Update the task ID table
8989 state->taskStatusTable[taskId] = (char)taskId;
8990 }
8991 else
8992 {
8993 // use media walker
8994 // unimplemented for now
8995 CM_ASSERTMESSAGE("Error: Media walker is not supported.");
8996 eStatus = MOS_STATUS_UNKNOWN;
8997 }
8998
8999 finish:
9000
9001 if (state->dshEnabled)
9002 {
9003 state->criticalSectionDSH->Acquire();
9004 if (mediaState && eStatus != MOS_STATUS_SUCCESS)
9005 {
9006 // Failed, release media state and heap resources
9007 renderHal->pfnReleaseDynamicState(renderHal, mediaState);
9008 }
9009 else
9010 {
9011 renderHal->pfnSubmitDynamicState(renderHal, mediaState);
9012 }
9013 state->criticalSectionDSH->Release();
9014 }
9015
9016 if (batchBuffer) // for MediaWalker, batchBuffer is empty
9017 {
9018 if (batchBuffer->bLocked)
9019 {
9020 // Only happens in Error cases
9021 if (batchBuffer->pPrivateData && ((PCM_HAL_BB_ARGS)batchBuffer->pPrivateData)->refCount == 1)
9022 {
9023 renderHal->pfnUnlockBB(renderHal, batchBuffer);
9024 }
9025 else if (batchBuffer->pPrivateData == nullptr)
9026 {
9027 eStatus = MOS_STATUS_NULL_POINTER;
9028 }
9029 }
9030 }
9031
9032 // free memory
9033 if( bindingTableEntries ) MOS_FreeMemory(bindingTableEntries);
9034 if( mediaIds ) MOS_FreeMemory(mediaIds);
9035 if( krnAllocations ) MOS_FreeMemory(krnAllocations);
9036 if( indexParams ) MOS_FreeMemory( indexParams );
9037
9038 return eStatus;
9039 }
9040
9041 //*-----------------------------------------------------------------------------
9042 //| Purpose: Send Commands to HW
9043 //| Returns: Get the HAL Max values
9044 //*-----------------------------------------------------------------------------
HalCm_GetMaxValues(PCM_HAL_STATE state,PCM_HAL_MAX_VALUES maxValues)9045 MOS_STATUS HalCm_GetMaxValues(
9046 PCM_HAL_STATE state, // [in] Pointer to CM State
9047 PCM_HAL_MAX_VALUES maxValues) // [out] Pointer to Max values
9048 {
9049 PRENDERHAL_INTERFACE renderHal;
9050
9051 renderHal = state->renderHal;
9052
9053 maxValues->maxTasks = state->cmDeviceParam.maxTasks;
9054 maxValues->maxKernelsPerTask = CM_MAX_KERNELS_PER_TASK;
9055 maxValues->maxKernelBinarySize = state->cmDeviceParam.maxKernelBinarySize;
9056 maxValues->maxSpillSizePerHwThread = state->cmDeviceParam.maxPerThreadScratchSpaceSize;
9057 maxValues->maxSamplerTableSize = CM_MAX_SAMPLER_TABLE_SIZE;
9058 maxValues->maxBufferTableSize = CM_MAX_BUFFER_SURFACE_TABLE_SIZE;
9059 maxValues->max2DSurfaceTableSize = CM_MAX_2D_SURFACE_TABLE_SIZE;
9060 maxValues->max3DSurfaceTableSize = CM_MAX_3D_SURFACE_TABLE_SIZE;
9061 maxValues->maxArgsPerKernel = CM_MAX_ARGS_PER_KERNEL;
9062 maxValues->maxUserThreadsPerTask = CM_MAX_USER_THREADS;
9063 maxValues->maxUserThreadsPerTaskNoThreadArg = CM_MAX_USER_THREADS_NO_THREADARG;
9064 maxValues->maxArgByteSizePerKernel = CM_MAX_ARG_BYTE_PER_KERNEL;
9065 maxValues->maxSurfacesPerKernel = renderHal->pHwCaps->dwMaxBTIndex;
9066 maxValues->maxSamplersPerKernel = renderHal->pHwCaps->dwMaxUnormSamplers;
9067 maxValues->maxHwThreads = renderHal->pHwCaps->dwMaxThreads;
9068
9069 return MOS_STATUS_SUCCESS;
9070 }
9071
9072 //*-----------------------------------------------------------------------------
9073 //| Purpose: Get the HAL Max extended values
9074 //| Returns: Get the HAL Max extended values
9075 //*-----------------------------------------------------------------------------
HalCm_GetMaxValuesEx(PCM_HAL_STATE state,PCM_HAL_MAX_VALUES_EX maxValuesEx)9076 MOS_STATUS HalCm_GetMaxValuesEx(
9077 PCM_HAL_STATE state, // [in] Pointer to CM State
9078 PCM_HAL_MAX_VALUES_EX maxValuesEx) // [out] Pointer to extended Max values
9079 {
9080 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9081 maxValuesEx->max2DUPSurfaceTableSize = CM_MAX_2D_SURFACE_UP_TABLE_SIZE;
9082 maxValuesEx->maxSampler8x8TableSize = CM_MAX_SAMPLER_8X8_TABLE_SIZE;
9083 maxValuesEx->maxCURBESizePerKernel = CM_MAX_CURBE_SIZE_PER_KERNEL;
9084 maxValuesEx->maxCURBESizePerTask = CM_MAX_CURBE_SIZE_PER_TASK;
9085 maxValuesEx->maxIndirectDataSizePerKernel = CM_MAX_INDIRECT_DATA_SIZE_PER_KERNEL;
9086
9087 //MaxThreadWidth x MaxThreadHeight x ColorCount
9088 maxValuesEx->maxUserThreadsPerMediaWalker = \
9089 state->cmHalInterface->GetMediaWalkerMaxThreadWidth()* \
9090 state->cmHalInterface->GetMediaWalkerMaxThreadHeight() * \
9091 CM_THREADSPACE_MAX_COLOR_COUNT;
9092
9093 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetMaxThreadCountPerThreadGroup( state, &maxValuesEx->maxUserThreadsPerThreadGroup ) );
9094
9095 finish:
9096 return eStatus;
9097 }
9098
9099 //*-----------------------------------------------------------------------------
9100 //| Purpose: Register Sampler
9101 //| Returns: Result of the operation.
9102 //*-----------------------------------------------------------------------------
HalCm_RegisterSampler(PCM_HAL_STATE state,PCM_HAL_SAMPLER_PARAM param)9103 MOS_STATUS HalCm_RegisterSampler(
9104 PCM_HAL_STATE state, // [in] Pointer to CM State
9105 PCM_HAL_SAMPLER_PARAM param) // [in] Pointer to Sampler Param
9106 {
9107 MOS_STATUS eStatus;
9108 PMHW_SAMPLER_STATE_PARAM entry;
9109 uint32_t i;
9110
9111 eStatus = MOS_STATUS_SUCCESS;
9112 entry = nullptr;
9113
9114 // Find a free slot
9115 for (i = 0; i < state->cmDeviceParam.maxSamplerTableSize; i++)
9116 {
9117 if (!state->samplerTable[i].bInUse)
9118 {
9119 entry = &state->samplerTable[i];
9120 param->handle = (uint32_t)i;
9121 break;
9122 }
9123 }
9124
9125 if (!entry)
9126 {
9127 eStatus = MOS_STATUS_INVALID_PARAMETER;
9128 CM_ASSERTMESSAGE("Sampler table is full");
9129 goto finish;
9130 }
9131
9132 entry->SamplerType = MHW_SAMPLER_TYPE_3D;
9133 if (state->useNewSamplerHeap == true)
9134 {
9135 entry->ElementType = MHW_Sampler1Element;
9136 }
9137 else
9138 {
9139 entry->ElementType = MHW_Sampler4Elements;
9140 }
9141 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxMapFilter(param->minFilter, &entry->Unorm.MinFilter));
9142 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxMapFilter(param->magFilter, &entry->Unorm.MagFilter));
9143 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxTextAddress(param->addressU, &entry->Unorm.AddressU));
9144 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxTextAddress(param->addressV, &entry->Unorm.AddressV));
9145 CM_CHK_MOSSTATUS_GOTOFINISH(state->pfnGetGfxTextAddress(param->addressW, &entry->Unorm.AddressW));
9146
9147 entry->Unorm.SurfaceFormat = (MHW_SAMPLER_SURFACE_PIXEL_TYPE)param->surfaceFormat;
9148 switch (entry->Unorm.SurfaceFormat)
9149 {
9150 case MHW_SAMPLER_SURFACE_PIXEL_UINT:
9151 entry->Unorm.BorderColorRedU = param->borderColorRedU;
9152 entry->Unorm.BorderColorGreenU = param->borderColorGreenU;
9153 entry->Unorm.BorderColorBlueU = param->borderColorBlueU;
9154 entry->Unorm.BorderColorAlphaU = param->borderColorAlphaU;
9155 break;
9156 case MHW_SAMPLER_SURFACE_PIXEL_SINT:
9157 entry->Unorm.BorderColorRedS = param->borderColorRedS;
9158 entry->Unorm.BorderColorGreenS = param->borderColorGreenS;
9159 entry->Unorm.BorderColorBlueS = param->borderColorBlueS;
9160 entry->Unorm.BorderColorAlphaS = param->borderColorAlphaS;
9161 break;
9162 default:
9163 entry->Unorm.BorderColorRedF = param->borderColorRedF;
9164 entry->Unorm.BorderColorGreenF = param->borderColorGreenF;
9165 entry->Unorm.BorderColorBlueF = param->borderColorBlueF;
9166 entry->Unorm.BorderColorAlphaF = param->borderColorAlphaF;
9167 }
9168 entry->Unorm.bBorderColorIsValid = true;
9169
9170 entry->bInUse = true;
9171
9172 finish:
9173 return eStatus;
9174 }
9175
9176 //*-----------------------------------------------------------------------------
9177 //| Purpose: UnRegister Sampler
9178 //| Returns: Result of the operation.
9179 //*-----------------------------------------------------------------------------
HalCm_UnRegisterSampler(PCM_HAL_STATE state,uint32_t handle)9180 MOS_STATUS HalCm_UnRegisterSampler(
9181 PCM_HAL_STATE state, // [in] Pointer to CM State
9182 uint32_t handle) // [in] Pointer to Sampler Param
9183 {
9184 MOS_STATUS eStatus;
9185 PMHW_SAMPLER_STATE_PARAM entry;
9186
9187 eStatus = MOS_STATUS_SUCCESS;
9188
9189 if (handle >= state->cmDeviceParam.maxSamplerTableSize)
9190 {
9191 eStatus = MOS_STATUS_INVALID_HANDLE;
9192 CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
9193 goto finish;
9194 }
9195
9196 entry = &state->samplerTable[handle];
9197
9198 // need to clear the state entirely instead of just setting bInUse to false
9199 MOS_ZeroMemory(entry, sizeof(MHW_SAMPLER_STATE_PARAM));
9200
9201 finish:
9202 return eStatus;
9203 }
9204
9205 //*-----------------------------------------------------------------------------
9206 //| Purpose: Register Sampler8x8
9207 //| Returns: Result of the operation.
9208 //*-----------------------------------------------------------------------------
HalCm_RegisterSampler8x8(PCM_HAL_STATE state,PCM_HAL_SAMPLER_8X8_PARAM param)9209 MOS_STATUS HalCm_RegisterSampler8x8(
9210 PCM_HAL_STATE state,
9211 PCM_HAL_SAMPLER_8X8_PARAM param)
9212 {
9213 return state->cmHalInterface->RegisterSampler8x8(param);
9214 }
9215
9216 //*-----------------------------------------------------------------------------
9217 //| Purpose: UnRegister Sampler
9218 //| Returns: Result of the operation.
9219 //*-----------------------------------------------------------------------------
HalCm_UnRegisterSampler8x8(PCM_HAL_STATE state,uint32_t handle)9220 MOS_STATUS HalCm_UnRegisterSampler8x8(
9221 PCM_HAL_STATE state, // [in] Pointer to CM State
9222 uint32_t handle) // [in] Pointer to Sampler8x8 Param
9223 {
9224 MOS_STATUS eStatus;
9225 uint32_t index8x8;
9226 PMHW_SAMPLER_STATE_PARAM entry;
9227 PCM_HAL_SAMPLER_8X8_ENTRY sampler8x8Entry;
9228
9229 eStatus = MOS_STATUS_SUCCESS;
9230
9231 if (handle >= state->cmDeviceParam.maxSamplerTableSize) {
9232 eStatus = MOS_STATUS_INVALID_HANDLE;
9233 CM_ASSERTMESSAGE("Invalid handle '%d'", handle);
9234 goto finish;
9235 }
9236
9237 entry = &state->samplerTable[handle];
9238 entry->bInUse = false;
9239
9240 if ( entry->SamplerType == MHW_SAMPLER_TYPE_AVS )
9241 {
9242 index8x8 = entry->Avs.stateID;
9243 if ( index8x8 >= state->cmDeviceParam.maxSampler8x8TableSize )
9244 {
9245 eStatus = MOS_STATUS_INVALID_HANDLE;
9246 CM_ASSERTMESSAGE( "Invalid 8x8 handle '%d'", handle );
9247 goto finish;
9248 }
9249
9250 sampler8x8Entry = &state->sampler8x8Table[ index8x8 ];
9251 sampler8x8Entry->inUse = false;
9252 }
9253
9254 // need to clear the state entirely instead of just setting bInUse to false
9255 MOS_ZeroMemory(entry, sizeof(MHW_SAMPLER_STATE_PARAM));
9256 finish:
9257 return eStatus;
9258 }
9259
9260 //*-----------------------------------------------------------------------------
9261 //| Purpose: Frees the buffer and removes from the table
9262 //| Returns: Result of the operation.
9263 //*-----------------------------------------------------------------------------
HalCm_FreeBuffer(PCM_HAL_STATE state,uint32_t handle)9264 MOS_STATUS HalCm_FreeBuffer(
9265 PCM_HAL_STATE state, // [in] Pointer to CM State
9266 uint32_t handle) // [in] Pointer to Buffer Param
9267 {
9268 MOS_STATUS eStatus;
9269 PCM_HAL_BUFFER_ENTRY entry;
9270 PMOS_INTERFACE osInterface;
9271 MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
9272
9273 resFreeFlags.AssumeNotInUse = 1;
9274 eStatus = MOS_STATUS_SUCCESS;
9275 osInterface = state->osInterface;
9276
9277 // Get the Buffer Entry
9278 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBufferEntry(state, handle, &entry));
9279
9280 if (state->advExecutor)
9281 {
9282 state->advExecutor->DeleteBufferStateMgr(entry->surfStateMgr);
9283 }
9284 if (entry->isAllocatedbyCmrtUmd)
9285 {
9286 osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, resFreeFlags.Value);
9287 }
9288 else
9289 {
9290 HalCm_OsResource_Unreference(&entry->osResource);
9291 }
9292 osInterface->pfnResetResourceAllocationIndex(osInterface, &entry->osResource);
9293 entry->size = 0;
9294 entry->address = nullptr;
9295
9296 finish:
9297 return eStatus;
9298 }
9299
9300 //*-----------------------------------------------------------------------------
9301 //| Purpose: Set surface read flag used in on demand sync
9302 //| Returns: Result of the operation.
9303 //*-----------------------------------------------------------------------------
HalCm_SetSurfaceReadFlag(PCM_HAL_STATE state,uint32_t handle,bool readSync,MOS_GPU_CONTEXT gpuContext)9304 MOS_STATUS HalCm_SetSurfaceReadFlag(
9305 PCM_HAL_STATE state, // [in] Pointer to CM State
9306 uint32_t handle, // [in] index of surface 2d
9307 bool readSync,
9308 MOS_GPU_CONTEXT gpuContext)
9309 {
9310 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9311 PCM_HAL_SURFACE2D_ENTRY entry;
9312
9313 // Get the Buffer Entry
9314 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurface2DEntry(state, handle, &entry));
9315
9316 if (HalCm_IsValidGpuContext(gpuContext))
9317 {
9318 entry->readSyncs[gpuContext] = readSync;
9319 if (state->advExecutor)
9320 {
9321 state->advExecutor->Set2DRenderTarget(entry->surfStateMgr, !readSync);
9322 }
9323 }
9324 else
9325 {
9326 return MOS_STATUS_UNKNOWN;
9327 }
9328
9329 finish:
9330 return eStatus;
9331 }
9332
9333 //*-----------------------------------------------------------------------------
9334 //| Purpose: Read the data from buffer and return
9335 //| Returns: Result of the operation.
9336 //*-----------------------------------------------------------------------------
HalCm_LockBuffer(PCM_HAL_STATE state,PCM_HAL_BUFFER_PARAM param)9337 MOS_STATUS HalCm_LockBuffer(
9338 PCM_HAL_STATE state, // [in] Pointer to CM State
9339 PCM_HAL_BUFFER_PARAM param) // [in] Pointer to Buffer Param
9340 {
9341 MOS_STATUS eStatus;
9342 PCM_HAL_BUFFER_ENTRY entry;
9343 PMOS_INTERFACE osInterface;
9344 MOS_LOCK_PARAMS lockFlags;
9345 eStatus = MOS_STATUS_SUCCESS;
9346 osInterface = state->osInterface;
9347
9348 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBufferEntry(state, param->handle, &entry));
9349 if ((param->lockFlag != CM_HAL_LOCKFLAG_READONLY) && (param->lockFlag != CM_HAL_LOCKFLAG_WRITEONLY) )
9350 {
9351 eStatus = MOS_STATUS_INVALID_HANDLE;
9352 CM_ASSERTMESSAGE("Invalid lock flag!");
9353 eStatus = MOS_STATUS_UNKNOWN;
9354 goto finish;
9355 }
9356
9357 // RegisterResource will be called in AddResourceToHWCmd. It is not allowed to be called by hal explicitly
9358 if (!osInterface->apoMosEnabled)
9359 {
9360 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(
9361 osInterface->pfnRegisterResource(osInterface, &entry->osResource, true, true));
9362 }
9363
9364 // Lock the resource
9365 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
9366
9367 if (param->lockFlag == CM_HAL_LOCKFLAG_READONLY)
9368 {
9369 lockFlags.ReadOnly = true;
9370 }
9371 else
9372 {
9373 lockFlags.WriteOnly = true;
9374 }
9375
9376 lockFlags.ForceCached = true;
9377 param->data = osInterface->pfnLockResource(
9378 osInterface,
9379 &entry->osResource,
9380 &lockFlags);
9381 CM_CHK_NULL_GOTOFINISH_MOSERROR(param->data);
9382
9383 finish:
9384 return eStatus;
9385 }
9386
9387 //*-----------------------------------------------------------------------------
9388 //| Purpose: Writes the data to buffer
9389 //| Returns: Result of the operation.
9390 //*-----------------------------------------------------------------------------
HalCm_UnlockBuffer(PCM_HAL_STATE state,PCM_HAL_BUFFER_PARAM param)9391 MOS_STATUS HalCm_UnlockBuffer(
9392 PCM_HAL_STATE state, // [in] Pointer to CM State
9393 PCM_HAL_BUFFER_PARAM param) // [in] Pointer to Buffer Param
9394 {
9395 MOS_STATUS eStatus;
9396 PCM_HAL_BUFFER_ENTRY entry;
9397 PMOS_INTERFACE osInterface;
9398
9399 eStatus = MOS_STATUS_SUCCESS;
9400 osInterface = state->osInterface;
9401
9402 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetBufferEntry(state, param->handle, &entry));
9403
9404 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnUnlockResource(osInterface, &entry->osResource));
9405
9406 finish:
9407 return eStatus;
9408 }
9409
9410 //*-----------------------------------------------------------------------------
9411 //| Purpose: Frees the buffer and removes from the table
9412 //| Returns: Result of the operation.
9413 //*-----------------------------------------------------------------------------
HalCm_FreeSurface2DUP(PCM_HAL_STATE state,uint32_t handle)9414 MOS_STATUS HalCm_FreeSurface2DUP(
9415 PCM_HAL_STATE state, // [in] Pointer to CM State
9416 uint32_t handle) // [in] Pointer to Buffer Param
9417 {
9418 MOS_STATUS eStatus;
9419 PCM_HAL_SURFACE2D_UP_ENTRY entry;
9420 PMOS_INTERFACE osInterface;
9421 MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
9422
9423 resFreeFlags.AssumeNotInUse = 1;
9424 eStatus = MOS_STATUS_SUCCESS;
9425 osInterface = state->osInterface;
9426
9427 // Get the Buffer Entry
9428 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetResourceUPEntry(state, handle, &entry));
9429
9430 if (state->advExecutor)
9431 {
9432 state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
9433 }
9434
9435 osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, resFreeFlags.Value);
9436
9437 osInterface->pfnResetResourceAllocationIndex(osInterface, &entry->osResource);
9438 entry->width = 0;
9439
9440 finish:
9441 return eStatus;
9442 }
9443
9444 //*-----------------------------------------------------------------------------
9445 //| Purpose: Get 2D surface pitch and physical size
9446 //| Returns: Result of the operation.
9447 //*-----------------------------------------------------------------------------
HalCm_GetSurface2DTileYPitch(PCM_HAL_STATE state,PCM_HAL_SURFACE2D_PARAM param)9448 MOS_STATUS HalCm_GetSurface2DTileYPitch(
9449 PCM_HAL_STATE state, // [in] Pointer to CM State
9450 PCM_HAL_SURFACE2D_PARAM param) // [in] Pointer to Buffer Param
9451 {
9452 MOS_STATUS eStatus;
9453 MOS_SURFACE surface;
9454 PRENDERHAL_INTERFACE renderHal;
9455 uint32_t index;
9456 RENDERHAL_GET_SURFACE_INFO info;
9457
9458 //-----------------------------------------------
9459 CM_ASSERT(state);
9460 //-----------------------------------------------
9461
9462 eStatus = MOS_STATUS_UNKNOWN;
9463 renderHal = state->renderHal;
9464 index = param->handle;
9465
9466 // Get Details of 2D surface and fill the surface
9467 MOS_ZeroMemory(&surface, sizeof(surface));
9468
9469 surface.OsResource = state->umdSurf2DTable[index].osResource;
9470 surface.dwWidth = state->umdSurf2DTable[index].width;
9471 surface.dwHeight = state->umdSurf2DTable[index].height;
9472 surface.Format = state->umdSurf2DTable[index].format;
9473 surface.dwDepth = 1;
9474
9475 MOS_ZeroMemory(&info, sizeof(RENDERHAL_GET_SURFACE_INFO));
9476
9477 CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_GetSurfaceInfo(
9478 state->osInterface,
9479 &info,
9480 &surface));
9481
9482 param->pitch = surface.dwPitch;
9483
9484 finish:
9485 return eStatus;
9486 }
9487
9488 //*-----------------------------------------------------------------------------
9489 //| Purpose: Sets width and height values for 2D surface state
9490 //| Returns: Result of the operation.
9491 //*-----------------------------------------------------------------------------
HalCm_Set2DSurfaceStateParam(PCM_HAL_STATE state,PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM param,uint32_t aliasIndex,uint32_t handle)9492 MOS_STATUS HalCm_Set2DSurfaceStateParam(
9493 PCM_HAL_STATE state,
9494 PCM_HAL_SURFACE2D_SURFACE_STATE_PARAM param,
9495 uint32_t aliasIndex,
9496 uint32_t handle)
9497 {
9498 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9499
9500 CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
9501 CM_CHK_NULL_GOTOFINISH_MOSERROR(param);
9502
9503 if (aliasIndex < state->surfaceArraySize)
9504 {
9505 state->umdSurf2DTable[handle].surfStateSet = true;
9506 }
9507 state->umdSurf2DTable[handle].surfaceStateParam[
9508 aliasIndex / state->surfaceArraySize] = *param;
9509
9510 finish:
9511 return eStatus;
9512 }
9513
9514 //*-----------------------------------------------------------------------------
9515 //| Purpose: Sets width and height values for 2D surface state
9516 //| Returns: Result of the operation.
9517 //*-----------------------------------------------------------------------------
HalCm_SetBufferSurfaceStateParameters(PCM_HAL_STATE state,PCM_HAL_BUFFER_SURFACE_STATE_PARAM param)9518 MOS_STATUS HalCm_SetBufferSurfaceStateParameters(
9519 PCM_HAL_STATE state,
9520 PCM_HAL_BUFFER_SURFACE_STATE_PARAM param)
9521 {
9522 MOS_STATUS eStatus;
9523 uint32_t size;
9524 uint32_t offset;
9525 uint32_t index;
9526 uint32_t aliasIndex;
9527
9528 CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
9529 CM_CHK_NULL_GOTOFINISH_MOSERROR(param);
9530
9531 eStatus = MOS_STATUS_SUCCESS;
9532 index = param->handle;
9533 aliasIndex = param->aliasIndex;
9534
9535 if (aliasIndex < state->surfaceArraySize)
9536 state->bufferTable[index].surfStateSet = true;
9537
9538 state->bufferTable[index].surfaceStateEntry[aliasIndex / state->surfaceArraySize].surfaceStateSize = param->size;
9539 state->bufferTable[index].surfaceStateEntry[aliasIndex / state->surfaceArraySize].surfaceStateOffset = param->offset;
9540 state->bufferTable[index].surfaceStateEntry[aliasIndex / state->surfaceArraySize].surfaceStateMOCS = param->mocs;
9541
9542 finish:
9543 return eStatus;
9544 }
9545
9546 //*-----------------------------------------------------------------------------
9547 //| Purpose: Sets mocs value for surface
9548 //| Returns: Result of the operation.
9549 //*-----------------------------------------------------------------------------
HalCm_SetSurfaceMOCS(PCM_HAL_STATE state,uint32_t handle,uint16_t mocs,uint32_t argKind)9550 MOS_STATUS HalCm_SetSurfaceMOCS(
9551 PCM_HAL_STATE state,
9552 uint32_t handle,
9553 uint16_t mocs,
9554 uint32_t argKind)
9555 {
9556 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9557
9558 switch (argKind)
9559 {
9560 case CM_ARGUMENT_SURFACEBUFFER:
9561 state->bufferTable[handle].memObjCtl = mocs;
9562 if (state->advExecutor)
9563 {
9564 state->advExecutor->SetBufferMemoryObjectControl(state->bufferTable[handle].surfStateMgr, mocs);
9565 }
9566 break;
9567 case CM_ARGUMENT_SURFACE2D:
9568 case CM_ARGUMENT_SURFACE2D_SAMPLER:
9569 case CM_ARGUMENT_SURFACE_SAMPLER8X8_AVS:
9570 case CM_ARGUMENT_SURFACE_SAMPLER8X8_VA:
9571 state->umdSurf2DTable[handle].memObjCtl = mocs;
9572 if (state->advExecutor)
9573 {
9574 state->advExecutor->Set2Dor3DMemoryObjectControl(state->umdSurf2DTable[handle].surfStateMgr, mocs);
9575 }
9576 break;
9577 case CM_ARGUMENT_SURFACE2D_UP:
9578 case CM_ARGUMENT_SURFACE2DUP_SAMPLER:
9579 state->surf2DUPTable[handle].memObjCtl = mocs;
9580 if (state->advExecutor)
9581 {
9582 state->advExecutor->Set2Dor3DMemoryObjectControl(state->surf2DUPTable[handle].surfStateMgr, mocs);
9583 }
9584 break;
9585 case CM_ARGUMENT_SURFACE3D:
9586 state->surf3DTable[handle].memObjCtl = mocs;
9587 if (state->advExecutor)
9588 {
9589 state->advExecutor->Set2Dor3DMemoryObjectControl(state->surf3DTable[handle].surfStateMgr, mocs);
9590 }
9591 break;
9592 default:
9593 eStatus = MOS_STATUS_INVALID_PARAMETER;
9594 CM_ASSERTMESSAGE("Invalid argument type in MOCS settings");
9595 goto finish;
9596 }
9597
9598 finish:
9599 return eStatus;
9600 }
9601
9602 //*-----------------------------------------------------------------------------
9603 //| Purpose: Allocate surface 2D
9604 //| Returns: Result of the operation.
9605 //*-----------------------------------------------------------------------------
HalCm_AllocateSurface2D(PCM_HAL_STATE state,PCM_HAL_SURFACE2D_PARAM param)9606 MOS_STATUS HalCm_AllocateSurface2D(
9607 PCM_HAL_STATE state, // [in] Pointer to CM State
9608 PCM_HAL_SURFACE2D_PARAM param) // [in] Pointer to surface 2D Param
9609 {
9610 MOS_STATUS eStatus;
9611 PMOS_INTERFACE osInterface;
9612 PCM_HAL_SURFACE2D_ENTRY entry = nullptr;
9613 MOS_ALLOC_GFXRES_PARAMS allocParams;
9614 uint32_t i;
9615
9616 //-----------------------------------------------
9617 CM_ASSERT(param->width > 0);
9618 //-----------------------------------------------
9619
9620 eStatus = MOS_STATUS_SUCCESS;
9621 osInterface = state->osInterface;
9622
9623 // Find a free slot
9624 for (i = 0; i < state->cmDeviceParam.max2DSurfaceTableSize; i++)
9625 {
9626 if(Mos_ResourceIsNull(&state->umdSurf2DTable[i].osResource))
9627 {
9628 entry = &state->umdSurf2DTable[i];
9629 param->handle = (uint32_t)i;
9630 break;
9631 }
9632 }
9633
9634 if (!entry)
9635 {
9636 eStatus = MOS_STATUS_INVALID_PARAMETER;
9637 CM_ASSERTMESSAGE("Surface2D table is full");
9638 goto finish;
9639 }
9640
9641 if(param->isAllocatedbyCmrtUmd)
9642 {
9643 MOS_ZeroMemory(&allocParams, sizeof(MOS_ALLOC_GFXRES_PARAMS));
9644 allocParams.Type = MOS_GFXRES_2D;
9645 allocParams.dwWidth = param->width;
9646 allocParams.dwHeight = param->height;
9647 allocParams.pSystemMemory = param->data;
9648 allocParams.Format = param->format;
9649 allocParams.TileType = MOS_TILE_Y;
9650 allocParams.pBufName = "CmSurface2D";
9651
9652 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
9653 osInterface,
9654 &allocParams,
9655 &entry->osResource));
9656
9657 entry->width = param->width;
9658 entry->height = param->height;
9659 entry->format = param->format;
9660 entry->isAllocatedbyCmrtUmd = param->isAllocatedbyCmrtUmd;
9661 }
9662 else
9663 {
9664 entry->width = param->width;
9665 entry->height = param->height;
9666 entry->format = param->format;
9667 entry->isAllocatedbyCmrtUmd = false;
9668 entry->osResource = *param->mosResource;
9669 HalCm_OsResource_Reference(&entry->osResource);
9670 }
9671 // set default CM MOS usage
9672 entry->memObjCtl = (state->cmHalInterface->GetDefaultMOCS()) << 8;
9673
9674 if (state->advExecutor)
9675 {
9676 entry->surfStateMgr = state->advExecutor->Create2DStateMgr(&entry->osResource);
9677 state->advExecutor->Set2Dor3DOrigFormat(entry->surfStateMgr, entry->format);
9678 state->advExecutor->Set2Dor3DOrigDimension(entry->surfStateMgr,
9679 entry->width,
9680 entry->height,
9681 0); // no need to change depth in 2D surface
9682 }
9683
9684 for (int i = 0; i < CM_HAL_GPU_CONTEXT_COUNT; i++)
9685 {
9686 entry->readSyncs[i] = false;
9687 }
9688
9689 finish:
9690 return eStatus;
9691 }
9692
9693 //*-----------------------------------------------------------------------------
9694 //| Purpose: Allocate surface 2D
9695 //| Returns: Result of the operation.
9696 //*-----------------------------------------------------------------------------
HalCm_UpdateSurface2D(PCM_HAL_STATE state,PCM_HAL_SURFACE2D_PARAM param)9697 MOS_STATUS HalCm_UpdateSurface2D(
9698 PCM_HAL_STATE state, // [in] Pointer to CM State
9699 PCM_HAL_SURFACE2D_PARAM param) // [in] Pointer to surface 2D Param
9700 {
9701 MOS_STATUS hr;
9702 PMOS_INTERFACE osInterface;
9703 PCM_HAL_SURFACE2D_ENTRY entry = nullptr;
9704 MOS_ALLOC_GFXRES_PARAMS allocParams;
9705 uint32_t i = param->handle;
9706
9707 //-----------------------------------------------
9708 CM_ASSERT(param->width > 0);
9709 //-----------------------------------------------
9710
9711 hr = MOS_STATUS_SUCCESS;
9712 osInterface = state->osInterface;
9713
9714 entry = &state->umdSurf2DTable[i];
9715
9716 HalCm_OsResource_Unreference(&entry->osResource);
9717
9718 entry->width = param->width;
9719 entry->height = param->height;
9720 entry->format = param->format;
9721 entry->isAllocatedbyCmrtUmd = false;
9722 entry->osResource = *param->mosResource;
9723
9724 HalCm_OsResource_Reference(&entry->osResource);
9725
9726 if (state->advExecutor)
9727 {
9728 state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
9729 entry->surfStateMgr = state->advExecutor->Create2DStateMgr(&entry->osResource);
9730 state->advExecutor->Set2Dor3DOrigFormat(entry->surfStateMgr, entry->format);
9731 state->advExecutor->Set2Dor3DOrigDimension(entry->surfStateMgr,
9732 entry->width,
9733 entry->height,
9734 0); // no need to change depth in 2D surface
9735 }
9736
9737 for (int i = 0; i < CM_HAL_GPU_CONTEXT_COUNT; i++)
9738 {
9739 entry->readSyncs[i] = false;
9740 }
9741
9742 return hr;
9743 }
9744
9745 //*-----------------------------------------------------------------------------
9746 //| Purpose: Allocate Linear Buffer or BufferUP
9747 //| Returns: Result of the operation.
9748 //*-----------------------------------------------------------------------------
HalCm_UpdateBuffer(PCM_HAL_STATE state,PCM_HAL_BUFFER_PARAM param)9749 MOS_STATUS HalCm_UpdateBuffer(
9750 PCM_HAL_STATE state, // [in] Pointer to CM State
9751 PCM_HAL_BUFFER_PARAM param) // [in] Pointer to Buffer Param
9752 {
9753 MOS_STATUS hr;
9754 PMOS_INTERFACE osInterface;
9755 PCM_HAL_BUFFER_ENTRY entry = nullptr;
9756 MOS_ALLOC_GFXRES_PARAMS allocParams;
9757 uint32_t i = param->handle;
9758 PMOS_RESOURCE osResource;
9759
9760 //-----------------------------------------------
9761 CM_ASSERT(param->size > 0);
9762 //-----------------------------------------------
9763
9764 hr = MOS_STATUS_SUCCESS;
9765 osInterface = state->renderHal->pOsInterface;
9766
9767 entry = &state->bufferTable[i];
9768
9769 HalCm_OsResource_Unreference(&entry->osResource);
9770 entry->osResource = *param->mosResource;
9771 HalCm_OsResource_Reference(&entry->osResource);
9772
9773 entry->size = param->size;
9774 entry->isAllocatedbyCmrtUmd = false;
9775 entry->surfaceStateEntry[0].surfaceStateSize = entry->size;
9776 entry->surfaceStateEntry[0].surfaceStateOffset = 0;
9777 entry->surfaceStateEntry[0].surfaceStateMOCS = 0;
9778
9779 if (state->advExecutor)
9780 {
9781 state->advExecutor->DeleteBufferStateMgr(entry->surfStateMgr);
9782 entry->surfStateMgr = state->advExecutor->CreateBufferStateMgr(&entry->osResource);
9783 state->advExecutor->SetBufferOrigSize(entry->surfStateMgr, entry->size);
9784 }
9785
9786 return hr;
9787 }
9788
9789 //*-----------------------------------------------------------------------------
9790 //| Purpose: Frees the surface 2D and removes from the table
9791 //| Returns: Result of the operation.
9792 //*-----------------------------------------------------------------------------
HalCm_FreeSurface2D(PCM_HAL_STATE state,uint32_t handle)9793 MOS_STATUS HalCm_FreeSurface2D(
9794 PCM_HAL_STATE state, // [in] Pointer to CM State
9795 uint32_t handle) // [in] Pointer to Buffer Param
9796 {
9797 MOS_STATUS eStatus;
9798 PCM_HAL_SURFACE2D_ENTRY entry;
9799 PMOS_INTERFACE osInterface;
9800 MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
9801
9802 resFreeFlags.AssumeNotInUse = 1;
9803 eStatus = MOS_STATUS_SUCCESS;
9804 osInterface = state->osInterface;
9805
9806 // Get the Buffer Entry
9807 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurface2DEntry(state, handle, &entry));
9808 if (state->advExecutor)
9809 {
9810 state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
9811 }
9812
9813 if(entry->isAllocatedbyCmrtUmd)
9814 {
9815 osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, resFreeFlags.Value);
9816 }
9817 else
9818 {
9819 HalCm_OsResource_Unreference(&entry->osResource);
9820 }
9821
9822 MOS_ZeroMemory(&entry->osResource, sizeof(entry->osResource));
9823
9824 entry->width = 0;
9825 entry->height = 0;
9826 entry->frameType = CM_FRAME;
9827
9828 for (int i = 0; i < CM_HAL_GPU_CONTEXT_COUNT; i++)
9829 {
9830 entry->readSyncs[i] = false;
9831 }
9832
9833 finish:
9834 return eStatus;
9835 }
9836
9837 //*-----------------------------------------------------------------------------
9838 //| Purpose: Allocate 3D resource
9839 //| Returns: Result of the operation.
9840 //*-----------------------------------------------------------------------------
HalCm_AllocateSurface3D(CM_HAL_STATE * state,CM_HAL_3DRESOURCE_PARAM * param)9841 MOS_STATUS HalCm_AllocateSurface3D(CM_HAL_STATE *state, // [in] Pointer to CM State
9842 CM_HAL_3DRESOURCE_PARAM *param) // [in] Pointer to Buffer Param)
9843 {
9844 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9845
9846 //-----------------------------------------------
9847 CM_ASSERT(state);
9848 CM_ASSERT(param->depth > 1);
9849 CM_ASSERT(param->width > 0);
9850 CM_ASSERT(param->height > 0);
9851 //-----------------------------------------------
9852
9853 MOS_INTERFACE *osInterface = state->osInterface;
9854 // Finds a free slot.
9855 CM_HAL_3DRESOURCE_ENTRY *entry = nullptr;
9856 for (uint32_t i = 0; i < state->cmDeviceParam.max3DSurfaceTableSize; i++)
9857 {
9858 if (Mos_ResourceIsNull(&state->surf3DTable[i].osResource))
9859 {
9860 entry = &state->surf3DTable[i];
9861 param->handle = (uint32_t)i;
9862 break;
9863 }
9864 }
9865 if (!entry)
9866 {
9867 eStatus = MOS_STATUS_INVALID_PARAMETER;
9868 CM_ASSERTMESSAGE("3D surface table is full");
9869 return eStatus;
9870 }
9871 CM_CHK_NULL_GOTOFINISH_MOSERROR(osInterface);
9872 osInterface->pfnResetResource(&entry->osResource); // Resets the Resource
9873
9874 MOS_ALLOC_GFXRES_PARAMS alloc_params;
9875 MOS_ZeroMemory(&alloc_params, sizeof(alloc_params));
9876 alloc_params.Type = MOS_GFXRES_VOLUME;
9877 alloc_params.TileType = MOS_TILE_Y;
9878 alloc_params.dwWidth = param->width;
9879 alloc_params.dwHeight = param->height;
9880 alloc_params.dwDepth = param->depth;
9881 alloc_params.pSystemMemory = param->data;
9882 alloc_params.Format = param->format;
9883 alloc_params.pBufName = "CmSurface3D";
9884
9885 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnAllocateResource(
9886 osInterface,
9887 &alloc_params,
9888 &entry->osResource));
9889 entry->width = param->width;
9890 entry->height = param->height;
9891 entry->depth = param->depth;
9892 entry->format = param->format;
9893
9894 if (state->advExecutor)
9895 {
9896 entry->surfStateMgr = state->advExecutor->Create3DStateMgr(&entry->osResource);
9897 state->advExecutor->Set2Dor3DOrigDimension(entry->surfStateMgr,
9898 entry->width,
9899 entry->height,
9900 entry->depth);
9901 }
9902 finish:
9903 return eStatus;
9904 }
9905
9906 //*-----------------------------------------------------------------------------
9907 //| Purpose: Frees the resource and removes from the table
9908 //| Returns: Result of the operation.
9909 //*-----------------------------------------------------------------------------
HalCm_Free3DResource(PCM_HAL_STATE state,uint32_t handle)9910 MOS_STATUS HalCm_Free3DResource(
9911 PCM_HAL_STATE state, // [in] Pointer to CM State
9912 uint32_t handle) // [in] Pointer to Buffer Param
9913 {
9914 MOS_STATUS eStatus;
9915 PCM_HAL_3DRESOURCE_ENTRY entry;
9916 PMOS_INTERFACE osInterface;
9917 MOS_GFXRES_FREE_FLAGS resFreeFlags = {0};
9918
9919 resFreeFlags.AssumeNotInUse = 1;
9920 eStatus = MOS_STATUS_SUCCESS;
9921 osInterface = state->osInterface;
9922
9923 // Get the Buffer Entry
9924 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Get3DResourceEntry(state, handle, &entry));
9925 if (state->advExecutor)
9926 {
9927 state->advExecutor->Delete2Dor3DStateMgr(entry->surfStateMgr);
9928 }
9929
9930 osInterface->pfnFreeResourceWithFlag(osInterface, &entry->osResource, resFreeFlags.Value);
9931
9932 osInterface->pfnResetResourceAllocationIndex(osInterface, &entry->osResource);
9933
9934 finish:
9935 return eStatus;
9936 }
9937
9938 //*-----------------------------------------------------------------------------
9939 //| Purpose: Lock the resource and return
9940 //| Returns: Result of the operation.
9941 //*-----------------------------------------------------------------------------
HalCm_Lock3DResource(PCM_HAL_STATE state,PCM_HAL_3DRESOURCE_PARAM param)9942 MOS_STATUS HalCm_Lock3DResource(
9943 PCM_HAL_STATE state, // [in] Pointer to CM State
9944 PCM_HAL_3DRESOURCE_PARAM param) // [in] Pointer to 3D Param
9945 {
9946 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
9947 PCM_HAL_3DRESOURCE_ENTRY entry;
9948 MOS_LOCK_PARAMS lockFlags;
9949 RENDERHAL_GET_SURFACE_INFO info;
9950 PMOS_INTERFACE osInterface = nullptr;
9951 MOS_SURFACE surface;
9952
9953 // Get the 3D Resource Entry
9954 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Get3DResourceEntry(state, param->handle, &entry));
9955 if ((param->lockFlag != CM_HAL_LOCKFLAG_READONLY) && (param->lockFlag != CM_HAL_LOCKFLAG_WRITEONLY) )
9956 {
9957 CM_ASSERTMESSAGE("Invalid lock flag!");
9958 eStatus = MOS_STATUS_UNKNOWN;
9959 goto finish;
9960 }
9961
9962 // Get resource information
9963 MOS_ZeroMemory(&surface, sizeof(surface));
9964 surface.OsResource = entry->osResource;
9965 surface.Format = Format_Invalid;
9966 osInterface = state->osInterface;
9967
9968 MOS_ZeroMemory(&info, sizeof(RENDERHAL_GET_SURFACE_INFO));
9969
9970 CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_GetSurfaceInfo(
9971 osInterface,
9972 &info,
9973 &surface));
9974
9975 param->pitch = surface.dwPitch;
9976 param->qpitch = surface.dwQPitch;
9977 param->qpitchEnabled = state->cmHalInterface->IsSurf3DQpitchSupportedbyHw();
9978
9979 // Lock the resource
9980 MOS_ZeroMemory(&lockFlags, sizeof(MOS_LOCK_PARAMS));
9981
9982 if (param->lockFlag == CM_HAL_LOCKFLAG_READONLY)
9983 {
9984 lockFlags.ReadOnly = true;
9985 }
9986 else
9987 {
9988 lockFlags.WriteOnly = true;
9989 }
9990
9991 lockFlags.ForceCached = true;
9992 param->data = osInterface->pfnLockResource(
9993 osInterface,
9994 &entry->osResource,
9995 &lockFlags);
9996 CM_CHK_NULL_GOTOFINISH_MOSERROR(param->data);
9997
9998 finish:
9999 return eStatus;
10000 }
10001
10002 //*-----------------------------------------------------------------------------
10003 //| Purpose: Unlock the resource and return
10004 //| Returns: Result of the operation.
10005 //*-----------------------------------------------------------------------------
HalCm_Unlock3DResource(PCM_HAL_STATE state,PCM_HAL_3DRESOURCE_PARAM param)10006 MOS_STATUS HalCm_Unlock3DResource(
10007 PCM_HAL_STATE state, // [in] Pointer to CM State
10008 PCM_HAL_3DRESOURCE_PARAM param) // [in] Pointer to 3D Param
10009 {
10010 MOS_STATUS eStatus;
10011 PCM_HAL_3DRESOURCE_ENTRY entry;
10012 PMOS_INTERFACE osInterface;
10013
10014 eStatus = MOS_STATUS_SUCCESS;
10015 osInterface = state->osInterface;
10016
10017 // Get the 3D Resource Entry
10018 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_Get3DResourceEntry(state, param->handle, &entry));
10019
10020 // Lock the resource
10021 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(osInterface->pfnUnlockResource(osInterface, &entry->osResource));
10022
10023 finish:
10024 return eStatus;
10025 }
10026
HalCm_SetCompressionMode(PCM_HAL_STATE state,CM_HAL_SURFACE2D_COMPRESSIOM_PARAM mmcParam)10027 MOS_STATUS HalCm_SetCompressionMode(
10028 PCM_HAL_STATE state,
10029 CM_HAL_SURFACE2D_COMPRESSIOM_PARAM mmcParam)
10030 {
10031 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10032 PMOS_INTERFACE osInterface = state->osInterface;
10033 PCM_HAL_SURFACE2D_ENTRY entry;
10034
10035 // Get the 2D Resource Entry
10036 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurface2DEntry(state, mmcParam.handle, &entry));
10037
10038 //set compression bit passed down
10039 CM_CHK_MOSSTATUS_GOTOFINISH(osInterface->pfnSetMemoryCompressionMode(osInterface, &(entry->osResource), (MOS_MEMCOMP_STATE)mmcParam.mmcMode));
10040
10041 finish:
10042 return eStatus;
10043 }
10044
HalCm_SetL3Cache(const L3ConfigRegisterValues * l3Values,PCmHalL3Settings cmHalL3Cache)10045 MOS_STATUS HalCm_SetL3Cache(
10046 const L3ConfigRegisterValues *l3Values,
10047 PCmHalL3Settings cmHalL3Cache )
10048 {
10049 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10050
10051 // in legacy platforms, we map:
10052 // ConfigRegister0->SqcReg1
10053 // ConfigRegister1->CntlReg2
10054 // ConfigRegister2->CntlReg3
10055 // ConfigRegister3->CntlReg
10056 CM_CHK_NULL_GOTOFINISH_MOSERROR( cmHalL3Cache );
10057 CM_CHK_NULL_GOTOFINISH_MOSERROR(l3Values);
10058
10059 cmHalL3Cache->overrideSettings =
10060 (l3Values->config_register0 || l3Values->config_register1 ||
10061 l3Values->config_register2 || l3Values->config_register3 );
10062 cmHalL3Cache->cntlRegOverride = (l3Values->config_register3 != 0);
10063 cmHalL3Cache->cntlReg2Override = (l3Values->config_register1 != 0);
10064 cmHalL3Cache->cntlReg3Override = (l3Values->config_register2 != 0);
10065 cmHalL3Cache->sqcReg1Override = (l3Values->config_register0 != 0);
10066 cmHalL3Cache->cntlReg = l3Values->config_register3;
10067 cmHalL3Cache->cntlReg2 = l3Values->config_register1;
10068 cmHalL3Cache->cntlReg3 = l3Values->config_register2;
10069 cmHalL3Cache->sqcReg1 = l3Values->config_register0;
10070
10071 finish:
10072 return MOS_STATUS_SUCCESS;
10073 }
10074
10075 //*-----------------------------------------------------------------------------
10076 //| Purpose: Set Cap values
10077 //| Returns: Result of the operation.
10078 //*-----------------------------------------------------------------------------
HalCm_SetCaps(PCM_HAL_STATE state,PCM_HAL_MAX_SET_CAPS_PARAM setCapsParam)10079 MOS_STATUS HalCm_SetCaps(
10080 PCM_HAL_STATE state,
10081 PCM_HAL_MAX_SET_CAPS_PARAM setCapsParam)
10082 {
10083 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10084
10085 CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
10086 CM_CHK_NULL_GOTOFINISH_MOSERROR(setCapsParam);
10087 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderHal);
10088 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderHal->pHwCaps)
10089
10090 switch (setCapsParam->type)
10091 {
10092 case CM_SET_MAX_HW_THREADS:
10093 if( setCapsParam->maxValue <= 0 ||
10094 setCapsParam->maxValue > state->renderHal->pHwCaps->dwMaxThreads )
10095 {
10096 eStatus = MOS_STATUS_UNKNOWN;
10097 goto finish;
10098 }
10099 else
10100 {
10101 state->maxHWThreadValues.apiValue = (setCapsParam->maxValue == 0) ? 0:
10102 MOS_MAX(setCapsParam->maxValue, state->cmHalInterface->GetSmallestMaxThreadNum());
10103 }
10104 break;
10105
10106 case CM_SET_HW_L3_CONFIG:
10107 eStatus = state->cmHalInterface->SetL3CacheConfig( &setCapsParam->l3CacheValues,
10108 &state->l3Settings );
10109 break;
10110
10111 default:
10112 eStatus = MOS_STATUS_UNKNOWN;
10113 goto finish;
10114 }
10115
10116 finish:
10117 return eStatus;
10118 }
10119
10120 //*-----------------------------------------------------------------------------
10121 //| Purpose: Task sets the power option which will be used by this task
10122 //| Returns: Result of the operation.
10123 //*-----------------------------------------------------------------------------
HalCm_SetPowerOption(PCM_HAL_STATE state,PCM_POWER_OPTION powerOption)10124 MOS_STATUS HalCm_SetPowerOption(
10125 PCM_HAL_STATE state,
10126 PCM_POWER_OPTION powerOption )
10127 {
10128 if (state->cmHalInterface->IsOverridePowerOptionPerGpuContext())
10129 {
10130 CM_NORMALMESSAGE("WARNING: Deprecated function due to per context SSEU overriding is enabled.\n");
10131 return MOS_STATUS_SUCCESS;
10132 }
10133
10134 MOS_SecureMemcpy( &state->powerOption, sizeof( state->powerOption ), powerOption, sizeof( state->powerOption ) );
10135 return MOS_STATUS_SUCCESS;
10136 }
10137
10138 //*-----------------------------------------------------------------------------
10139 // Purpose: Get the time in ns from QueryPerformanceCounter
10140 // Returns: Result of the operation
10141 //*-----------------------------------------------------------------------------
HalCm_GetGlobalTime(LARGE_INTEGER * globalTime)10142 MOS_STATUS HalCm_GetGlobalTime(LARGE_INTEGER *globalTime)
10143 {
10144 if(globalTime == nullptr)
10145 {
10146 return MOS_STATUS_NULL_POINTER;
10147 }
10148
10149 if (MosUtilities::MosQueryPerformanceCounter((uint64_t *)&(globalTime->QuadPart)) == false)
10150 {
10151 return MOS_STATUS_UNKNOWN;
10152 }
10153
10154 return MOS_STATUS_SUCCESS;
10155 }
10156
10157 //*-----------------------------------------------------------------------------
10158 // Purpose: Convert time from nanosecond to QPC time
10159 // Returns: Result of the operation
10160 //*-----------------------------------------------------------------------------
HalCm_ConvertToQPCTime(uint64_t nanoseconds,LARGE_INTEGER * qpcTime)10161 MOS_STATUS HalCm_ConvertToQPCTime(uint64_t nanoseconds, LARGE_INTEGER *qpcTime)
10162 {
10163 LARGE_INTEGER perfFreq;
10164
10165 if(qpcTime == nullptr)
10166 {
10167 return MOS_STATUS_NULL_POINTER;
10168 }
10169
10170 if (MosUtilities::MosQueryPerformanceFrequency((uint64_t*)&perfFreq.QuadPart) == false)
10171 {
10172 return MOS_STATUS_UNKNOWN;
10173 }
10174
10175 qpcTime->QuadPart = (uint64_t)(nanoseconds * perfFreq.QuadPart / 1000000000.0);
10176
10177 return MOS_STATUS_SUCCESS;
10178 }
10179
10180 //------------------------------------------------------------------------------
10181 //| Purpose: Halcm updates power state to hw state
10182 //| Returns:
10183 //------------------------------------------------------------------------------
HalCm_UpdatePowerOption(PCM_HAL_STATE state,PCM_POWER_OPTION powerOption)10184 MOS_STATUS HalCm_UpdatePowerOption(
10185 PCM_HAL_STATE state,
10186 PCM_POWER_OPTION powerOption )
10187 {
10188 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10189
10190 if (state->cmHalInterface->IsOverridePowerOptionPerGpuContext())
10191 {
10192 CM_NORMALMESSAGE("WARNING: Deprecated function due to per context SSEU overriding is enabled.\n");
10193 return MOS_STATUS_SUCCESS;
10194 }
10195
10196 PRENDERHAL_INTERFACE renderHal = state->renderHal;
10197
10198 RENDERHAL_POWEROPTION renderPowerOption;
10199 renderPowerOption.nSlice = (uint8_t)powerOption->nSlice;
10200 renderPowerOption.nSubSlice = (uint8_t)powerOption->nSubSlice;
10201 renderPowerOption.nEU = (uint8_t)powerOption->nEU;
10202
10203 // option set in CM create device to use slice shutdown for life of CM device ( override previous value if necessary )
10204 if ( state->requestSingleSlice == true )
10205 {
10206 renderPowerOption.nSlice = 1;
10207 }
10208
10209 renderHal->pfnSetPowerOptionMode( renderHal, &renderPowerOption );
10210
10211 return eStatus;
10212 }
10213
HalCm_InitPerfTagIndexMap(PCM_HAL_STATE cmState)10214 MOS_STATUS HalCm_InitPerfTagIndexMap(PCM_HAL_STATE cmState)
10215 {
10216 using namespace std;
10217 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10218 CM_ASSERT(cmState);
10219 for (int i = 0; i < MAX_COMBINE_NUM_IN_PERFTAG; i++)
10220 {
10221 cmState->currentPerfTagIndex[i] = 1;
10222 #if MOS_MESSAGES_ENABLED
10223 cmState->perfTagIndexMap[i] = MosUtilities::MosNewUtil<map<string, int> >(__FUNCTION__, __FILE__, __LINE__);
10224 #else
10225 cmState->perfTagIndexMap[i] = MosUtilities::MosNewUtil<map<string, int> >();
10226 #endif
10227
10228 CM_CHK_NULL_GOTOFINISH_MOSERROR(cmState->perfTagIndexMap[i]);
10229 }
10230
10231 cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_NV12_32x32", GPUCOPY_READ_PERFTAG_INDEX));
10232 cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_NV12_aligned_32x32", GPUCOPY_READ_PERFTAG_INDEX));
10233 cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_32x32", GPUCOPY_READ_PERFTAG_INDEX));
10234 cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_read_aligned_32x32", GPUCOPY_READ_PERFTAG_INDEX));
10235
10236 cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_write_NV12_32x32", GPUCOPY_WRITE_PERFTAG_INDEX));
10237 cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_write_32x32", GPUCOPY_WRITE_PERFTAG_INDEX));
10238
10239 cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_2DTo2D_NV12_32x32", GPUCOPY_G2G_PERFTAG_INDEX));
10240 cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_2DTo2D_32x32", GPUCOPY_G2G_PERFTAG_INDEX));
10241
10242 cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_BufferToBuffer_4k", GPUCOPY_C2C_PERFTAG_INDEX));
10243 cmState->perfTagIndexMap[0]->insert(pair<string, int>("SurfaceCopy_BufferToBuffer_4k", GPUCOPY_C2C_PERFTAG_INDEX));
10244
10245 cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_set_NV12", GPUINIT_PERFTAG_INDEX));
10246 cmState->perfTagIndexMap[0]->insert(pair<string, int>("surfaceCopy_set", GPUINIT_PERFTAG_INDEX));
10247
10248 finish:
10249 return eStatus;
10250 }
10251
HalCm_DeleteFromStateBufferList(PCM_HAL_STATE state,void * kernelPtr)10252 MOS_STATUS HalCm_DeleteFromStateBufferList(
10253 PCM_HAL_STATE state,
10254 void *kernelPtr )
10255 {
10256 MOS_STATUS result = MOS_STATUS_SUCCESS;
10257
10258 state->state_buffer_list_ptr->erase( kernelPtr );
10259
10260 return result;
10261 }
10262
HalCm_GetMediaStatePtrForKernel(PCM_HAL_STATE state,void * kernelPtr)10263 PRENDERHAL_MEDIA_STATE HalCm_GetMediaStatePtrForKernel(
10264 PCM_HAL_STATE state,
10265 void *kernelPtr )
10266 {
10267 if ( state->state_buffer_list_ptr->find( kernelPtr ) != state->state_buffer_list_ptr->end() )
10268 {
10269 return ( *state->state_buffer_list_ptr )[ kernelPtr ].mediaStatePtr;
10270 }
10271 else
10272 {
10273 return nullptr;
10274 }
10275 }
10276
HalCm_GetStateBufferVAPtrForSurfaceIndex(PCM_HAL_STATE state,uint32_t surfIndex)10277 uint64_t HalCm_GetStateBufferVAPtrForSurfaceIndex(
10278 PCM_HAL_STATE state,
10279 uint32_t surfIndex )
10280 {
10281 for ( auto listItem = state->state_buffer_list_ptr->begin(); listItem != state->state_buffer_list_ptr->end(); listItem++ )
10282 {
10283 if ( listItem->second.stateBufferIndex == surfIndex )
10284 {
10285 return listItem->second.stateBufferVaPtr;
10286 }
10287 }
10288 return 0;
10289 }
10290
HalCm_GetMediaStatePtrForSurfaceIndex(PCM_HAL_STATE state,uint32_t surfIndex)10291 PRENDERHAL_MEDIA_STATE HalCm_GetMediaStatePtrForSurfaceIndex(
10292 PCM_HAL_STATE state,
10293 uint32_t surfIndex )
10294 {
10295 for ( auto listItem = state->state_buffer_list_ptr->begin(); listItem != state->state_buffer_list_ptr->end(); listItem++ )
10296 {
10297 if ( listItem->second.stateBufferIndex == surfIndex )
10298 {
10299 return listItem->second.mediaStatePtr;
10300 }
10301 }
10302 return nullptr;
10303 }
10304
HalCm_GetStateBufferVAPtrForMediaStatePtr(PCM_HAL_STATE state,PRENDERHAL_MEDIA_STATE mediaStatePtr)10305 uint64_t HalCm_GetStateBufferVAPtrForMediaStatePtr(
10306 PCM_HAL_STATE state,
10307 PRENDERHAL_MEDIA_STATE mediaStatePtr )
10308 {
10309 for ( auto listItem = state->state_buffer_list_ptr->begin(); listItem != state->state_buffer_list_ptr->end(); listItem++ )
10310 {
10311 if ( listItem->second.mediaStatePtr == mediaStatePtr )
10312 {
10313 return listItem->second.stateBufferVaPtr;
10314 }
10315 }
10316 return 0;
10317 }
10318
HalCm_GetStateBufferSizeForKernel(PCM_HAL_STATE state,void * kernelPtr)10319 uint32_t HalCm_GetStateBufferSizeForKernel(
10320 PCM_HAL_STATE state,
10321 void *kernelPtr )
10322 {
10323 if ( state->state_buffer_list_ptr->find( kernelPtr ) != state->state_buffer_list_ptr->end() )
10324 {
10325 return ( *state->state_buffer_list_ptr )[ kernelPtr ].stateBufferSize;
10326 }
10327 else
10328 {
10329 return 0;
10330 }
10331 }
10332
HalCm_GetStateBufferTypeForKernel(PCM_HAL_STATE state,void * kernelPtr)10333 CM_STATE_BUFFER_TYPE HalCm_GetStateBufferTypeForKernel(
10334 PCM_HAL_STATE state,
10335 void *kernelPtr )
10336 {
10337 if ( state->state_buffer_list_ptr->find( kernelPtr ) != state->state_buffer_list_ptr->end() )
10338 {
10339 return ( *state->state_buffer_list_ptr )[ kernelPtr ].stateBufferType;
10340 }
10341 else
10342 {
10343 return CM_STATE_BUFFER_NONE;
10344 }
10345 }
10346
LoadUserFeatures(CM_HAL_STATE * halState,MOS_GPUCTX_CREATOPTIONS * createOptions)10347 static void LoadUserFeatures(CM_HAL_STATE *halState,
10348 MOS_GPUCTX_CREATOPTIONS *createOptions)
10349 {
10350 #if (_DEBUG || _RELEASE_INTERNAL)
10351 MOS_USER_FEATURE_VALUE_DATA user_feature_data;
10352 MOS_ZeroMemory(&user_feature_data, sizeof(user_feature_data));
10353 MOS_STATUS result
10354 = MOS_UserFeature_ReadValue_ID(
10355 nullptr, __MEDIA_USER_FEATURE_VALUE_MDF_FORCE_RAMODE,
10356 &user_feature_data, halState->osInterface->pOsContext);
10357 if (MOS_STATUS_SUCCESS == result && user_feature_data.i32Data == 1)
10358 {
10359 createOptions->RAMode = 1;
10360 }
10361
10362 MOS_USER_FEATURE_VALUE_WRITE_DATA userFeatureWriteData;
10363 userFeatureWriteData = __NULL_USER_FEATURE_VALUE_WRITE_DATA__;
10364 userFeatureWriteData.Value.i32Data = createOptions->RAMode;
10365 userFeatureWriteData.ValueID = __MEDIA_USER_FEATURE_VALUE_MDF_FORCE_RAMODE;
10366 MOS_UserFeature_WriteValues_ID(nullptr, &userFeatureWriteData, 1, halState->osInterface->pOsContext);
10367
10368 #endif
10369 return;
10370 }
10371
HalCm_CreateGPUContext(PCM_HAL_STATE state,MOS_GPU_CONTEXT gpuContext,MOS_GPU_NODE gpuNode,PMOS_GPUCTX_CREATOPTIONS pMosGpuContextCreateOption)10372 MOS_STATUS HalCm_CreateGPUContext(
10373 PCM_HAL_STATE state,
10374 MOS_GPU_CONTEXT gpuContext,
10375 MOS_GPU_NODE gpuNode,
10376 PMOS_GPUCTX_CREATOPTIONS pMosGpuContextCreateOption)
10377 {
10378 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
10379
10380 LoadUserFeatures(state, pMosGpuContextCreateOption);
10381
10382 // Create Compute Context on Compute Node
10383 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(state->osInterface->pfnCreateGpuContext(
10384 state->osInterface,
10385 gpuContext,
10386 gpuNode,
10387 pMosGpuContextCreateOption));
10388
10389 // Register Compute Context with the Batch Buffer completion event
10390 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(state->osInterface->pfnRegisterBBCompleteNotifyEvent(
10391 state->osInterface,
10392 gpuContext));
10393
10394 finish:
10395 return eStatus;
10396 }
10397
10398 GPU_CONTEXT_HANDLE
HalCm_CreateGpuComputeContext(CM_HAL_STATE * state,MOS_GPUCTX_CREATOPTIONS * createOptions)10399 HalCm_CreateGpuComputeContext(CM_HAL_STATE *state,
10400 MOS_GPUCTX_CREATOPTIONS *createOptions)
10401 {
10402 LoadUserFeatures(state, createOptions);
10403
10404 GPU_CONTEXT_HANDLE context_handle
10405 = state->osInterface->pfnCreateGpuComputeContext(
10406 state->osInterface, MOS_GPU_CONTEXT_CM_COMPUTE, createOptions);
10407 if (MOS_GPU_CONTEXT_INVALID_HANDLE != context_handle)
10408 {
10409 state->osInterface->pfnRegisterBBCompleteNotifyEvent(
10410 state->osInterface, MOS_GPU_CONTEXT_CM_COMPUTE);
10411 }
10412 return context_handle;
10413 }
10414
HalCm_SetGpuContext(CM_HAL_STATE * halState,MOS_GPU_CONTEXT contextName,uint32_t streamIndex,GPU_CONTEXT_HANDLE contextHandle)10415 uint32_t HalCm_SetGpuContext(CM_HAL_STATE *halState,
10416 MOS_GPU_CONTEXT contextName,
10417 uint32_t streamIndex,
10418 GPU_CONTEXT_HANDLE contextHandle)
10419 {
10420 uint32_t old_stream_idx = halState->osInterface->streamIndex;
10421 halState->osInterface->streamIndex = streamIndex;
10422 MOS_STATUS result = MOS_STATUS_SUCCESS;
10423
10424 if (MOS_GPU_CONTEXT_INVALID_HANDLE == contextHandle)
10425 {
10426 result = halState->osInterface->pfnSetGpuContext(halState->osInterface,
10427 contextName);
10428 }
10429 else
10430 {
10431 result = halState->osInterface->pfnSetGpuContextFromHandle(
10432 halState->osInterface, contextName, contextHandle);
10433 }
10434
10435 if (MOS_STATUS_SUCCESS != result)
10436 {
10437 halState->osInterface->streamIndex = old_stream_idx;
10438 return INVALID_STREAM_INDEX;
10439 }
10440 return old_stream_idx;
10441 }
10442
HalCm_SelectSyncBuffer(CM_HAL_STATE * halState,uint32_t bufferIdx)10443 MOS_STATUS HalCm_SelectSyncBuffer(CM_HAL_STATE *halState, uint32_t bufferIdx)
10444 {
10445 if (bufferIdx >= halState->cmDeviceParam.maxBufferTableSize)
10446 {
10447 halState->syncBuffer = nullptr;
10448 return MOS_STATUS_SUCCESS;
10449 }
10450 CM_HAL_BUFFER_ENTRY *entry = halState->bufferTable + bufferIdx;
10451 halState->syncBuffer = &entry->osResource;
10452 MOS_INTERFACE *os_interface = halState->osInterface;
10453 return os_interface->pfnRegisterResource(os_interface, halState->syncBuffer,
10454 true, true);
10455 }
10456
10457 //*-----------------------------------------------------------------------------
10458 //| Purpose: Creates instance of HAL CM State
10459 //| Returns: Result of the operation
10460 //| Note: Caller must call pfnAllocate to allocate all HalCm/Mhw states and objects.
10461 //| Caller MUST call HalCm_Destroy to destroy the instance
10462 //*-----------------------------------------------------------------------------
HalCm_Create(PMOS_CONTEXT osDriverContext,PCM_HAL_CREATE_PARAM param,PCM_HAL_STATE * cmState)10463 MOS_STATUS HalCm_Create(
10464 PMOS_CONTEXT osDriverContext, // [in] OS Driver Context
10465 PCM_HAL_CREATE_PARAM param, // [in] Create Param
10466 PCM_HAL_STATE *cmState) // [out] double pointer to CM State
10467 {
10468 MOS_STATUS eStatus;
10469 PCM_HAL_STATE state = nullptr;
10470 uint32_t numCmdBuffers = 0;
10471 MhwInterfaces *mhwInterfaces = nullptr;
10472 MhwInterfaces::CreateParams params;
10473 MOS_GPUCTX_CREATOPTIONS createOption;
10474
10475 //-----------------------------------------
10476 CM_ASSERT(osDriverContext);
10477 CM_ASSERT(param);
10478 CM_ASSERT(cmState);
10479 //-----------------------------------------
10480
10481 eStatus = MOS_STATUS_SUCCESS;
10482
10483 // Allocate State structure
10484 state = (PCM_HAL_STATE)MOS_AllocAndZeroMemory(sizeof(CM_HAL_STATE));
10485 CM_CHK_NULL_GOTOFINISH_MOSERROR(state);
10486
10487 // Allocate/Initialize OS Interface
10488 state->osInterface = (PMOS_INTERFACE)
10489 MOS_AllocAndZeroMemory(sizeof(MOS_INTERFACE));
10490 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->osInterface);
10491 state->osInterface->bDeallocateOnExit = true;
10492 CM_CHK_HRESULT_GOTOFINISH_MOSERROR(Mos_InitInterface(state->osInterface, osDriverContext, COMPONENT_CM));
10493 #if (_RELEASE_INTERNAL || _DEBUG)
10494 #if defined(CM_DIRECT_GUC_SUPPORT)
10495 state->osInterface->m_pWorkQueueMngr = new CMRTWorkQueueMngr();
10496 #endif
10497 #endif
10498
10499 state->osInterface->pfnGetPlatform(state->osInterface, &state->platform);
10500 state->skuTable = state->osInterface->pfnGetSkuTable(state->osInterface);
10501 state->waTable = state->osInterface->pfnGetWaTable (state->osInterface);
10502
10503 // Create VEBOX Context
10504 createOption.CmdBufferNumScale = MOS_GPU_CONTEXT_CREATE_DEFAULT;
10505 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_CreateGPUContext(
10506 state,
10507 MOS_GPU_CONTEXT_VEBOX,
10508 MOS_GPU_NODE_VE,
10509 &createOption));
10510
10511 // Allocate/Initialize CM Rendering Interface
10512 state->renderHal = (PRENDERHAL_INTERFACE_LEGACY)
10513 MOS_AllocAndZeroMemory(sizeof(RENDERHAL_INTERFACE_LEGACY));
10514 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->renderHal);
10515
10516 state->dshEnabled = param->dynamicStateHeap;
10517 state->renderHal->bDynamicStateHeap = state->dshEnabled;
10518
10519 if (state->dshEnabled)
10520 {
10521 CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_InitInterface_Dynamic(state->renderHal, &state->cpInterface, state->osInterface));
10522 }
10523 else
10524 {
10525 CM_CHK_MOSSTATUS_GOTOFINISH(RenderHal_InitInterface_Legacy(state->renderHal, &state->cpInterface, state->osInterface));
10526 }
10527
10528 // Allocate/Initialize VEBOX Interface
10529 if (!param->disableVebox)
10530 {
10531 CmSafeMemSet(¶ms, 0, sizeof(params));
10532 params.Flags.m_vebox = 1;
10533 mhwInterfaces = MhwInterfaces::CreateFactory(params, state->osInterface);
10534 if (mhwInterfaces)
10535 {
10536 state->veboxInterface = mhwInterfaces->m_veboxInterface;
10537
10538 // MhwInterfaces always create CP and MI interfaces, so we have to delete those we don't need.
10539 MOS_Delete(mhwInterfaces->m_miInterface);
10540 state->osInterface->pfnDeleteMhwCpInterface(mhwInterfaces->m_cpInterface);
10541 mhwInterfaces->m_cpInterface = nullptr;
10542 MOS_Delete(mhwInterfaces);
10543 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->veboxInterface);
10544 }
10545 else
10546 {
10547 CM_ASSERTMESSAGE("Allocate MhwInterfaces failed");
10548 HalCm_Destroy(state);
10549 *cmState = nullptr;
10550 return MOS_STATUS_NO_SPACE;
10551 }
10552 }
10553 else
10554 {
10555 state->veboxInterface = nullptr;
10556 }
10557
10558 // set IsMDFLoad to distinguish MDF context from other Media Contexts
10559 state->renderHal->IsMDFLoad = true;
10560
10561 // disable YV12SinglePass as CMRT & compiler don't support it
10562 state->renderHal->bEnableYV12SinglePass = false;
10563
10564 state->cmDeviceParam.maxKernelBinarySize = CM_KERNEL_BINARY_BLOCK_SIZE;
10565
10566 // set if the new sampler heap management is used or not
10567 // currently new sampler heap management depends on DSH
10568 if (state->dshEnabled)
10569 {
10570 state->useNewSamplerHeap = true;
10571 }
10572 else
10573 {
10574 state->useNewSamplerHeap = false;
10575 }
10576
10577 //Get Max Scratch Space Size
10578 if( param->disableScratchSpace)
10579 {
10580 state->cmDeviceParam.maxPerThreadScratchSpaceSize = 0;
10581 }
10582 else
10583 {
10584 //Gen7_5 + : (MaxScratchSpaceSize + 1) *16k
10585 if(param->scratchSpaceSize == CM_DEVICE_CONFIG_SCRATCH_SPACE_SIZE_DEFAULT)
10586 { //By default, 128K for HSW
10587 state->cmDeviceParam.maxPerThreadScratchSpaceSize = 8 * CM_DEVICE_CONFIG_SCRATCH_SPACE_SIZE_16K_STEP;
10588 }
10589 else
10590 {
10591 state->cmDeviceParam.maxPerThreadScratchSpaceSize = (param->scratchSpaceSize)*
10592 CM_DEVICE_CONFIG_SCRATCH_SPACE_SIZE_16K_STEP;
10593 }
10594 }
10595
10596 // Initialize kernel parameters
10597 state->kernelParamsRenderHal.pMhwKernelParam = &state->kernelParamsMhw;
10598
10599 // Enable SLM in L3 Cache
10600 state->l3Settings.enableSlm = true;
10601
10602 // Slice shutdown
10603 state->requestSingleSlice = param->requestSliceShutdown;
10604
10605 //mid thread preemption on/off and SIP debug control
10606 state->midThreadPreemptionDisabled = param->disabledMidThreadPreemption;
10607 state->kernelDebugEnabled = param->enabledKernelDebug;
10608
10609 // init mapping for the state buffer
10610 #if MOS_MESSAGES_ENABLED
10611 state->state_buffer_list_ptr = MosUtilities::MosNewUtil<std::map< void *, CM_HAL_STATE_BUFFER_ENTRY> >(__FUNCTION__, __FILE__, __LINE__);
10612 #else
10613 state->state_buffer_list_ptr = MosUtilities::MosNewUtil<std::map< void *, CM_HAL_STATE_BUFFER_ENTRY> >();
10614 #endif
10615
10616 CM_CHK_NULL_GOTOFINISH_MOSERROR( state->state_buffer_list_ptr );
10617
10618 MOS_ZeroMemory(&state->hintIndexes.kernelIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
10619 MOS_ZeroMemory(&state->hintIndexes.dispatchIndexes, sizeof(uint32_t) * CM_MAX_TASKS_EU_SATURATION);
10620
10621 // get the global media profiler
10622 state->perfProfiler = MediaPerfProfiler::Instance();
10623 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->perfProfiler);
10624 CM_CHK_MOSSTATUS_GOTOFINISH(state->perfProfiler->Initialize((void*)state, state->osInterface));
10625
10626 state->criticalSectionDSH = MOS_New(CMRT_UMD::CSync);
10627 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->criticalSectionDSH);
10628
10629 state->cmDeviceParam.maxKernelsPerTask = CM_MAX_KERNELS_PER_TASK;
10630 state->cmDeviceParam.maxSamplerTableSize = CM_MAX_SAMPLER_TABLE_SIZE;
10631 state->cmDeviceParam.maxSampler8x8TableSize = state->renderHal->pHwSizes->dwSizeSampler8x8Table;
10632 state->cmDeviceParam.maxBufferTableSize = CM_MAX_BUFFER_SURFACE_TABLE_SIZE;
10633 state->cmDeviceParam.max2DSurfaceUPTableSize = CM_MAX_2D_SURFACE_UP_TABLE_SIZE;
10634 state->cmDeviceParam.max2DSurfaceTableSize = CM_MAX_2D_SURFACE_TABLE_SIZE;
10635 state->cmDeviceParam.max3DSurfaceTableSize = CM_MAX_3D_SURFACE_TABLE_SIZE;
10636 state->cmDeviceParam.maxTasks = param->maxTaskNumber;
10637 state->cmDeviceParam.maxAvsSamplers = CM_MAX_AVS_SAMPLER_SIZE;
10638 state->cmDeviceParam.maxGshKernelEntries = param->kernelBinarySizeinGSH / (CM_32K);
10639
10640 if (state->dshEnabled)
10641 {
10642 // Initialize Kernel Cache Hit/Miss counters
10643 state->dshKernelCacheMiss = 0;
10644 state->dshKernelCacheHit = 0;
10645 }
10646
10647 // Setup Function pointers
10648 state->pfnCmAllocate = HalCm_Allocate;
10649 state->pfnGetMaxValues = HalCm_GetMaxValues;
10650 state->pfnGetMaxValuesEx = HalCm_GetMaxValuesEx;
10651 state->pfnExecuteTask = HalCm_ExecuteTask;
10652 state->pfnExecuteGroupTask = HalCm_ExecuteGroupTask;
10653 state->pfnExecuteHintsTask = HalCm_ExecuteHintsTask;
10654 state->pfnRegisterSampler = HalCm_RegisterSampler;
10655 state->pfnUnRegisterSampler = HalCm_UnRegisterSampler;
10656 state->pfnRegisterSampler8x8 = HalCm_RegisterSampler8x8;
10657 state->pfnUnRegisterSampler8x8 = HalCm_UnRegisterSampler8x8;
10658 state->pfnFreeBuffer = HalCm_FreeBuffer;
10659 state->pfnLockBuffer = HalCm_LockBuffer;
10660 state->pfnUnlockBuffer = HalCm_UnlockBuffer;
10661 state->pfnFreeSurface2DUP = HalCm_FreeSurface2DUP;
10662 state->pfnGetSurface2DTileYPitch = HalCm_GetSurface2DTileYPitch;
10663 state->pfnSet2DSurfaceStateParam = HalCm_Set2DSurfaceStateParam;
10664 state->pfnSetBufferSurfaceStatePara = HalCm_SetBufferSurfaceStateParameters;
10665 state->pfnSetSurfaceMOCS = HalCm_SetSurfaceMOCS;
10666 /************************************************************/
10667 state->pfnAllocateSurface2D = HalCm_AllocateSurface2D;
10668 state->pfnAllocate3DResource = HalCm_AllocateSurface3D;
10669 state->pfnFreeSurface2D = HalCm_FreeSurface2D;
10670 state->pfnLock2DResource = HalCm_Lock2DResource;
10671 state->pfnUnlock2DResource = HalCm_Unlock2DResource;
10672 state->pfnSetCompressionMode = HalCm_SetCompressionMode;
10673 /************************************************************/
10674 state->pfnFree3DResource = HalCm_Free3DResource;
10675 state->pfnLock3DResource = HalCm_Lock3DResource;
10676 state->pfnUnlock3DResource = HalCm_Unlock3DResource;
10677 state->pfnSetCaps = HalCm_SetCaps;
10678 state->pfnSetPowerOption = HalCm_SetPowerOption;
10679 state->pfnUpdatePowerOption = HalCm_UpdatePowerOption;
10680
10681 state->pfnSendMediaWalkerState = HalCm_SendMediaWalkerState;
10682 state->pfnSendGpGpuWalkerState = HalCm_SendGpGpuWalkerState;
10683 state->pfnSetSurfaceReadFlag = HalCm_SetSurfaceReadFlag;
10684 state->pfnSetVtuneProfilingFlag = HalCm_SetVtuneProfilingFlag;
10685 state->pfnExecuteVeboxTask = HalCm_ExecuteVeboxTask;
10686 state->pfnGetTaskSyncLocation = HalCm_GetTaskSyncLocation;
10687
10688 state->pfnGetGlobalTime = HalCm_GetGlobalTime;
10689 state->pfnConvertToQPCTime = HalCm_ConvertToQPCTime;
10690
10691 state->pfnSyncOnResource = HalCm_SyncOnResource;
10692
10693 state->pfnDeleteFromStateBufferList = HalCm_DeleteFromStateBufferList;
10694 state->pfnGetMediaStatePtrForKernel = HalCm_GetMediaStatePtrForKernel;
10695 state->pfnGetStateBufferVAPtrForSurfaceIndex = HalCm_GetStateBufferVAPtrForSurfaceIndex;
10696 state->pfnGetMediaStatePtrForSurfaceIndex = HalCm_GetMediaStatePtrForSurfaceIndex;
10697 state->pfnGetStateBufferVAPtrForMediaStatePtr = HalCm_GetStateBufferVAPtrForMediaStatePtr;
10698 state->pfnGetStateBufferSizeForKernel = HalCm_GetStateBufferSizeForKernel;
10699 state->pfnGetStateBufferTypeForKernel = HalCm_GetStateBufferTypeForKernel;
10700 state->pfnCreateGPUContext = HalCm_CreateGPUContext;
10701 state->pfnCreateGpuComputeContext = HalCm_CreateGpuComputeContext;
10702 state->pfnSetGpuContext = HalCm_SetGpuContext;
10703 state->pfnSelectSyncBuffer = HalCm_SelectSyncBuffer;
10704 state->pfnDSHUnregisterKernel = HalCm_DSH_UnregisterKernel;
10705
10706 state->pfnUpdateBuffer = HalCm_UpdateBuffer;
10707 state->pfnUpdateSurface2D = HalCm_UpdateSurface2D;
10708
10709 //==========<Initialize 5 OS-dependent DDI functions: pfnAllocate3DResource, pfnAllocateSurface2DUP====
10710 // pfnAllocateBuffer,pfnRegisterKMDNotifyEventHandle, pfnGetSurface2DPitchAndSize >====
10711 HalCm_OsInitInterface(state);
10712
10713 state->osInterface->pfnInitCmInterface(state);
10714
10715 HalCm_InitPerfTagIndexMap(state);
10716
10717 state->maxHWThreadValues.userFeatureValue = 0;
10718 state->maxHWThreadValues.apiValue = 0;
10719
10720 HalCm_GetUserFeatureSettings(state);
10721
10722 #if MDF_COMMAND_BUFFER_DUMP
10723 HalCm_InitDumpCommandBuffer(state);
10724 state->pfnInitDumpCommandBuffer = HalCm_InitDumpCommandBuffer;
10725 state->pfnDumpCommadBuffer = HalCm_DumpCommadBuffer;
10726 #endif //MDF_COMMAND_BUFFER_DUMP
10727
10728 #if MDF_CURBE_DATA_DUMP
10729 HalCm_InitDumpCurbeData(state);
10730 #endif
10731
10732 #if MDF_SURFACE_CONTENT_DUMP
10733 HalCm_InitSurfaceDump(state);
10734 #endif
10735
10736 #if MDF_SURFACE_STATE_DUMP
10737 HalCm_InitDumpSurfaceState(state);
10738 state->pfnInitDumpSurfaceState = HalCm_InitDumpSurfaceState;
10739 state->pfnDumpSurfaceState = HalCm_DumpSurfaceState;
10740 #endif
10741
10742 #if MDF_INTERFACE_DESCRIPTOR_DATA_DUMP
10743 HalCm_InitDumpInterfaceDescriporData(state);
10744 #endif
10745
10746 state->cmHalInterface = CMHalDevice::CreateFactory(state);
10747 CM_CHK_NULL_GOTOFINISH_MOSERROR(state->cmHalInterface);
10748
10749 if (param->refactor)
10750 {
10751 state->refactor = true;
10752 }
10753 else
10754 {
10755 state->refactor = false;
10756 }
10757
10758 state->requestCustomGpuContext = param->requestCustomGpuContext;
10759
10760 #if (_DEBUG || _RELEASE_INTERNAL)
10761 {
10762 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
10763
10764 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10765 MOS_UserFeature_ReadValue_ID(
10766 nullptr,
10767 __MEDIA_USER_FEATURE_VALUE_MDF_FORCE_EXECUTION_PATH_ID,
10768 &userFeatureData,
10769 state->osInterface->pOsContext);
10770
10771 if (userFeatureData.i32Data == 1)
10772 {
10773 state->refactor = false;
10774 }
10775 else if (userFeatureData.i32Data == 2)
10776 {
10777 state->refactor = true;
10778 state->cmHalInterface->SetFastPathByDefault(true);
10779 }
10780
10781 FILE *fp1 = nullptr;
10782 MosUtilities::MosSecureFileOpen(&fp1, "refactor.key", "r");
10783 if (fp1 != nullptr)
10784 {
10785 state->refactor = true;
10786 state->cmHalInterface->SetFastPathByDefault(true);
10787 fclose(fp1);
10788 }
10789
10790 FILE *fp2 = nullptr;
10791 MosUtilities::MosSecureFileOpen(&fp2, "origin.key", "r");
10792 if (fp2 != nullptr)
10793 {
10794 state->refactor = false;
10795 fclose(fp2);
10796 }
10797 }
10798 #endif
10799
10800 if (state->refactor)
10801 {
10802 CM_NORMALMESSAGE("Info: Fast path is enabled!\n");
10803 }
10804 else
10805 {
10806 CM_NORMALMESSAGE("Info: Fast path is disabled!\n");
10807 }
10808
10809 finish:
10810 if (eStatus != MOS_STATUS_SUCCESS)
10811 {
10812 HalCm_Destroy(state);
10813 *cmState = nullptr;
10814 }
10815 else
10816 {
10817 *cmState = state;
10818 }
10819
10820 return eStatus;
10821 }
10822
10823 //*-----------------------------------------------------------------------------
10824 //| Purpose: Destroys instance of HAL CM State
10825 //| Returns: N/A
10826 //*-----------------------------------------------------------------------------
HalCm_Destroy(PCM_HAL_STATE state)10827 void HalCm_Destroy(
10828 PCM_HAL_STATE state) // [in] Pointer to CM State
10829 {
10830 MOS_STATUS eStatus;
10831 int32_t i;
10832
10833 if (state)
10834 {
10835 //Delete CmHal Interface
10836 MosSafeDelete(state->cmHalInterface);
10837 if (state->osInterface)
10838 {
10839 state->osInterface->pfnDeleteMhwCpInterface(state->cpInterface);
10840 state->cpInterface = nullptr;
10841 }
10842 else
10843 {
10844 CM_ASSERTMESSAGE("Failed to destroy cpInterface.");
10845 }
10846 MosSafeDelete(state->state_buffer_list_ptr);
10847 MosSafeDelete(state->criticalSectionDSH);
10848
10849 // Delete the unified media profiler
10850 if (state->perfProfiler)
10851 {
10852 MediaPerfProfiler::Destroy(state->perfProfiler, (void*)state, state->osInterface);
10853 state->perfProfiler = nullptr;
10854 }
10855
10856 // Delete Batch Buffers
10857 if (state->batchBuffers)
10858 {
10859 for (i=0; i < state->numBatchBuffers; i++)
10860 {
10861 if (!Mos_ResourceIsNull(&state->batchBuffers[i].OsResource))
10862 {
10863 eStatus = (MOS_STATUS)state->renderHal->pfnFreeBB(
10864 state->renderHal,
10865 &state->batchBuffers[i]);
10866
10867 CM_ASSERT(eStatus == MOS_STATUS_SUCCESS);
10868 }
10869
10870 MOS_FreeMemory(state->batchBuffers[i].pPrivateData);
10871 }
10872
10873 MOS_FreeMemory(state->batchBuffers);
10874 state->batchBuffers = nullptr;
10875 }
10876
10877 // Delete TimeStamp Buffer
10878 HalCm_FreeTsResource(state);
10879 if ((state->midThreadPreemptionDisabled == false) || (state->kernelDebugEnabled == true)) {
10880 // Delete CSR surface
10881 HalCm_FreeCsrResource(state);
10882
10883 // Delete sip surface
10884 HalCm_FreeSipResource(state);
10885 }
10886
10887 // Delete tracker resource
10888 HalCm_FreeTrackerResources(state);
10889
10890 // Delete advance executor
10891 MOS_Delete(state->advExecutor);
10892
10893 // Delete heap manager
10894 if (state->renderHal)
10895 {
10896 MOS_Delete(state->renderHal->dgsheapManager);
10897 }
10898
10899 if (state->hLibModule)
10900 {
10901 MosUtilities::MosFreeLibrary(state->hLibModule);
10902 state->hLibModule = nullptr;
10903 }
10904
10905 // Delete RenderHal Interface
10906 if (state->renderHal)
10907 {
10908 if (state->renderHal->pfnDestroy)
10909 {
10910 state->renderHal->pfnDestroy(state->renderHal);
10911 }
10912 MOS_FreeMemory(state->renderHal);
10913 state->renderHal = nullptr;
10914 }
10915
10916 // Delete VEBOX Interface
10917 if (state->veboxInterface
10918 && state->veboxInterface->m_veboxHeap)
10919 {
10920 state->veboxInterface->DestroyHeap( );
10921 MOS_Delete(state->veboxInterface);
10922 state->veboxInterface = nullptr;
10923 }
10924
10925 // Delete OS Interface
10926 if (state->osInterface)
10927 {
10928 if (state->osInterface->pfnDestroy)
10929 {
10930 state->osInterface->pfnDestroy(state->osInterface, true);
10931 }
10932 if (state->osInterface->bDeallocateOnExit)
10933 {
10934 MOS_FreeMemory(state->osInterface);
10935 state->osInterface = nullptr;
10936 }
10937 }
10938
10939 // Delete the TaskParam
10940 MOS_FreeMemory(state->taskParam);
10941
10942 // Delete the TaskTimeStamp
10943 MOS_FreeMemory(state->taskTimeStamp);
10944
10945 // Delete Tables
10946 MOS_FreeMemory(state->tableMemories);
10947
10948 // Delete the pTotalKernelSize table for GSH
10949 MOS_FreeMemory(state->totalKernelSize);
10950
10951 // Delete the perfTag Map
10952 for (int i = 0; i < MAX_COMBINE_NUM_IN_PERFTAG; i++)
10953 {
10954 MosSafeDelete(state->perfTagIndexMap[i]);
10955 }
10956
10957 // Delete the state
10958 MOS_FreeMemory(state);
10959 }
10960 }
10961
HalCm_GetUserFeatureSettings(PCM_HAL_STATE cmState)10962 void HalCm_GetUserFeatureSettings(
10963 PCM_HAL_STATE cmState
10964 )
10965 {
10966 #if (_DEBUG || _RELEASE_INTERNAL)
10967 PMOS_INTERFACE osInterface = cmState->osInterface;
10968
10969 MOS_USER_FEATURE_VALUE_DATA userFeatureData;
10970
10971 MOS_ZeroMemory(&userFeatureData, sizeof(userFeatureData));
10972 MOS_UserFeature_ReadValue_ID(
10973 nullptr,
10974 __MEDIA_USER_FEATURE_VALUE_MDF_MAX_THREAD_NUM_ID,
10975 &userFeatureData,
10976 cmState->osInterface->pOsContext);
10977
10978 if (userFeatureData.i32Data != 0)
10979 {
10980 uint32_t data = userFeatureData.i32Data;
10981 if ((data > 0) && (data <= cmState->renderHal->pHwCaps->dwMaxThreads))
10982 {
10983 cmState->maxHWThreadValues.userFeatureValue = data;
10984 }
10985 }
10986
10987 #else
10988 UNUSED(cmState);
10989 #endif // _DEBUG || _RELEASE_INTERNAL
10990 }
10991
10992 //*-----------------------------------------------------------------------------
10993 //| Purpose: Gathers information about the surface - used by GT-Pin
10994 //| Returns: MOS_STATUS_SUCCESS if surface type recognized, S_FAIL otherwise
10995 //*-----------------------------------------------------------------------------
HalCm_GetSurfaceDetails(PCM_HAL_STATE cmState,PCM_HAL_INDEX_PARAM indexParam,uint32_t btIndex,MOS_SURFACE & surface,int16_t globalSurface,PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry,uint32_t tempPlaneIndex,RENDERHAL_SURFACE_STATE_PARAMS surfaceParam,CM_HAL_KERNEL_ARG_KIND argKind)10996 MOS_STATUS HalCm_GetSurfaceDetails(
10997 PCM_HAL_STATE cmState,
10998 PCM_HAL_INDEX_PARAM indexParam,
10999 uint32_t btIndex,
11000 MOS_SURFACE& surface,
11001 int16_t globalSurface,
11002 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry,
11003 uint32_t tempPlaneIndex,
11004 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam,
11005 CM_HAL_KERNEL_ARG_KIND argKind
11006 )
11007 {
11008 MOS_STATUS eStatus = MOS_STATUS_UNKNOWN;
11009 PCM_SURFACE_DETAILS surfaceInfos = nullptr;
11010 PCM_SURFACE_DETAILS pgSurfaceInfos = nullptr;
11011 PCM_HAL_TASK_PARAM taskParam = cmState->taskParam;
11012 uint32_t curKernelIndex = taskParam->curKernelIndex;
11013 PMOS_PLANE_OFFSET planeOffset = 0;
11014 uint32_t maxEntryNum = 0;
11015 MOS_OS_FORMAT tempOsFormat ;
11016
11017 CM_SURFACE_BTI_INFO surfBTIInfo;
11018 CM_CHK_NULL_GOTOFINISH_MOSERROR(cmState);
11019 CM_CHK_NULL_GOTOFINISH_MOSERROR(cmState->cmHalInterface);
11020 CM_CHK_NULL_GOTOFINISH_MOSERROR(cmState->osInterface);
11021 cmState->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
11022
11023 UNUSED(indexParam);
11024
11025 if(curKernelIndex+1>taskParam->surfEntryInfoArrays.kernelNum)
11026 {
11027 eStatus = MOS_STATUS_INVALID_PARAMETER;
11028 CM_ASSERTMESSAGE(
11029 "Mismatched kernel index: curKernelIndex '%d' vs krnNum '%d'",
11030 curKernelIndex,taskParam->surfEntryInfoArrays.kernelNum);
11031 goto finish;
11032 }
11033
11034 surfaceInfos = taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].surfEntryInfos;
11035 pgSurfaceInfos = taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].globalSurfInfos;
11036
11037 tempOsFormat = (MOS_OS_FORMAT)cmState->osInterface->pfnMosFmtToOsFmt(surface.Format);
11038
11039 switch (argKind)
11040 {
11041 case CM_ARGUMENT_SURFACEBUFFER:
11042
11043 if((btIndex >= surfBTIInfo.reservedSurfaceStart) &&
11044 (btIndex < surfBTIInfo.reservedSurfaceStart + CM_MAX_GLOBAL_SURFACE_NUMBER))
11045 {
11046 btIndex = btIndex - surfBTIInfo.reservedSurfaceStart;
11047
11048 maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->globalSurfNum;
11049 if ( btIndex >= maxEntryNum )
11050 {
11051 eStatus = MOS_STATUS_INVALID_PARAMETER;
11052 CM_ASSERTMESSAGE(
11053 "Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
11054 maxEntryNum, btIndex);
11055 goto finish;
11056 }
11057
11058 MOS_ZeroMemory(&pgSurfaceInfos[btIndex], sizeof(CM_SURFACE_DETAILS));
11059 pgSurfaceInfos[btIndex].width = surface.dwWidth;
11060 pgSurfaceInfos[btIndex].format = DDI_FORMAT_UNKNOWN;
11061 }
11062 else
11063 {
11064 btIndex = btIndex - surfBTIInfo.reservedSurfaceStart - CM_MAX_GLOBAL_SURFACE_NUMBER;
11065 maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->maxEntryNum;
11066 if ( btIndex >= maxEntryNum )
11067 {
11068 eStatus = MOS_STATUS_INVALID_PARAMETER;
11069 CM_ASSERTMESSAGE(
11070 "Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
11071 maxEntryNum, btIndex);
11072 goto finish;
11073 }
11074
11075 MOS_ZeroMemory(&surfaceInfos[btIndex], sizeof(CM_SURFACE_DETAILS));
11076 surfaceInfos[btIndex].width = surface.dwWidth;
11077 surfaceInfos[btIndex].format = DDI_FORMAT_UNKNOWN;
11078 }
11079
11080 if (globalSurface < 0)
11081 {
11082 ++taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].usedIndex;
11083 }
11084
11085 eStatus = MOS_STATUS_SUCCESS;
11086 break;
11087
11088 case CM_ARGUMENT_SURFACE2D_UP:
11089 case CM_ARGUMENT_SURFACE2D:
11090 // VME surface and sampler8x8 called with CM_ARGUMENT_SURFACE2D
11091 btIndex = btIndex - surfBTIInfo.reservedSurfaceStart - CM_MAX_GLOBAL_SURFACE_NUMBER;
11092 maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->maxEntryNum;
11093
11094 if ( btIndex >= maxEntryNum )
11095 {
11096 eStatus = MOS_STATUS_INVALID_PARAMETER;
11097 CM_ASSERTMESSAGE(
11098 "Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
11099 maxEntryNum, btIndex);
11100 goto finish;
11101 }
11102
11103 surfaceInfos[btIndex].width = surfaceEntry->dwWidth;
11104 surfaceInfos[btIndex].height = surfaceEntry->dwHeight;
11105 surfaceInfos[btIndex].depth = 0;
11106 surfaceInfos[btIndex].format = (DdiSurfaceFormat)tempOsFormat;
11107 surfaceInfos[btIndex].planeIndex = tempPlaneIndex;
11108 surfaceInfos[btIndex].pitch = surfaceEntry->dwPitch;
11109 surfaceInfos[btIndex].slicePitch = 0;
11110 surfaceInfos[btIndex].surfaceBaseAddress = 0;
11111 surfaceInfos[btIndex].tileWalk = surfaceEntry->bTileWalk;
11112 surfaceInfos[btIndex].tiledSurface = surfaceEntry->bTiledSurface;
11113
11114 if (surfaceEntry->YUVPlane == MHW_U_PLANE ||
11115 surfaceEntry->YUVPlane == MHW_V_PLANE)
11116 {
11117 planeOffset = (surfaceEntry->YUVPlane == MHW_U_PLANE)
11118 ? &surface.UPlaneOffset
11119 : &surface.VPlaneOffset;
11120
11121 surfaceInfos[btIndex].yOffset = planeOffset->iYOffset >> 1;
11122
11123 if ( argKind == CM_ARGUMENT_SURFACE2D_UP )
11124 {
11125 surfaceInfos[btIndex].xOffset = (planeOffset->iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11126 }
11127 else
11128 {
11129 uint32_t pixelsPerSampleUV = 0;
11130 //Get Pixels Per Sample if we use dataport read
11131 if(surfaceParam.bWidthInDword_UV)
11132 {
11133 RenderHal_GetPixelsPerSample(surface.Format, &pixelsPerSampleUV);
11134 }
11135 else
11136 {
11137 // If the kernel uses sampler - do not change width (it affects coordinates)
11138 pixelsPerSampleUV = 1;
11139 }
11140
11141 if(pixelsPerSampleUV == 1)
11142 {
11143 surfaceInfos[btIndex].xOffset = planeOffset->iXOffset >> 2;
11144 }
11145 else
11146 {
11147 surfaceInfos[btIndex].xOffset = (planeOffset->iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11148 }
11149 }
11150 }
11151 else
11152 {
11153 surfaceInfos[btIndex].xOffset = (surface.YPlaneOffset.iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11154 surfaceInfos[btIndex].yOffset = surface.YPlaneOffset.iYOffset >> 1;
11155 }
11156
11157 ++taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].usedIndex;
11158 ++tempPlaneIndex;
11159
11160 eStatus = MOS_STATUS_SUCCESS;
11161 break;
11162
11163 case CM_ARGUMENT_SURFACE3D:
11164
11165 btIndex = btIndex - surfBTIInfo.normalSurfaceStart - CM_MAX_GLOBAL_SURFACE_NUMBER;
11166 maxEntryNum = taskParam->surfEntryInfoArrays.surfEntryInfosArray->maxEntryNum;
11167
11168 if ( btIndex >= maxEntryNum )
11169 {
11170 eStatus = MOS_STATUS_INVALID_PARAMETER;
11171 CM_ASSERTMESSAGE(
11172 "Array for surface details is full: Max number of entries '%d' and trying to add index '%d'",
11173 maxEntryNum, btIndex);
11174 goto finish;
11175 }
11176
11177 surfaceInfos[btIndex].width = surfaceEntry->dwWidth;
11178 surfaceInfos[btIndex].height = surfaceEntry->dwHeight;
11179 surfaceInfos[btIndex].depth = surface.dwDepth;
11180 surfaceInfos[btIndex].format = (DdiSurfaceFormat)tempOsFormat;
11181 surfaceInfos[btIndex].pitch = surfaceEntry->dwPitch;
11182 surfaceInfos[btIndex].planeIndex = tempPlaneIndex;
11183 surfaceInfos[btIndex].slicePitch = surface.dwSlicePitch;
11184 surfaceInfos[btIndex].surfaceBaseAddress = 0;
11185 surfaceInfos[btIndex].tileWalk = surfaceEntry->bTileWalk;
11186 surfaceInfos[btIndex].tiledSurface = surfaceEntry->bTiledSurface;
11187
11188 if (surfaceEntry->YUVPlane == MHW_U_PLANE ||
11189 surfaceEntry->YUVPlane == MHW_V_PLANE)
11190 {
11191 planeOffset = (surfaceEntry->YUVPlane == MHW_U_PLANE)
11192 ? &surface.UPlaneOffset
11193 : &surface.VPlaneOffset;
11194
11195 surfaceInfos[btIndex].yOffset = planeOffset->iYOffset >> 1;
11196 surfaceInfos[btIndex].xOffset = (planeOffset->iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11197 }
11198 else
11199 {
11200 surfaceInfos[btIndex].xOffset = (surface.YPlaneOffset.iXOffset/(uint32_t)sizeof(uint32_t)) >> 2;
11201 surfaceInfos[btIndex].yOffset = surface.YPlaneOffset.iYOffset >> 1;
11202 }
11203
11204 ++tempPlaneIndex;
11205 ++taskParam->surfEntryInfoArrays.surfEntryInfosArray[curKernelIndex].usedIndex;
11206
11207 eStatus = MOS_STATUS_SUCCESS;
11208 break;
11209
11210 default:
11211 break;
11212 }
11213
11214 finish:
11215 return eStatus;
11216 }
11217
HalCm_GetFreeBindingIndex(PCM_HAL_STATE state,PCM_HAL_INDEX_PARAM indexParam,uint32_t total)11218 uint32_t HalCm_GetFreeBindingIndex(
11219 PCM_HAL_STATE state,
11220 PCM_HAL_INDEX_PARAM indexParam,
11221 uint32_t total)
11222 {
11223 CM_SURFACE_BTI_INFO surfBTIInfo;
11224 state->cmHalInterface->GetHwSurfaceBTIInfo(&surfBTIInfo);
11225
11226 uint32_t btIndex = surfBTIInfo.normalSurfaceStart;
11227 uint32_t unAllocated = total;
11228
11229 while (btIndex < 256 && unAllocated > 0)
11230 {
11231 uint32_t arrayIndex = btIndex >> 5;
11232 uint32_t bitMask = (uint32_t)0x1 << (btIndex % 32);
11233 if (indexParam->btArray[arrayIndex] & bitMask)
11234 {
11235 // oops, occupied
11236 if (unAllocated != total)
11237 {
11238 // clear previous allocation
11239 uint32_t allocated = total - unAllocated;
11240 uint32_t tmpIndex = btIndex - 1;
11241 while (allocated > 0)
11242 {
11243 uint32_t arrayIndex = tmpIndex >> 5;
11244 uint32_t bitMask = 1 << (tmpIndex % 32);
11245 indexParam->btArray[arrayIndex] &= ~bitMask;
11246 allocated--;
11247 tmpIndex--;
11248 }
11249 // reset
11250 unAllocated = total;
11251 }
11252 }
11253 else
11254 {
11255 indexParam->btArray[arrayIndex] |= bitMask;
11256 unAllocated--;
11257 }
11258 btIndex++;
11259 }
11260
11261 if (unAllocated == 0)
11262 {
11263 // found slot
11264 return btIndex - total;
11265 }
11266
11267 // no slot
11268 return 0;
11269 }
11270
HalCm_PreSetBindingIndex(PCM_HAL_INDEX_PARAM indexParam,uint32_t start,uint32_t end)11271 void HalCm_PreSetBindingIndex(
11272 PCM_HAL_INDEX_PARAM indexParam,
11273 uint32_t start,
11274 uint32_t end)
11275 {
11276 uint32_t btIndex;
11277 for ( btIndex = start; btIndex <= end ; btIndex++)
11278 {
11279 uint32_t arrayIndex = btIndex >> 5;
11280 uint32_t bitMask = 1 << (btIndex % 32);
11281 indexParam->btArray[arrayIndex] |= bitMask;
11282 }
11283 }
11284
11285 //*-----------------------------------------------------------------------------
11286 //| Purpose: Setup surface State with BTIndex
11287 //| Returns: Result of the operation
11288 //*-----------------------------------------------------------------------------
HalCm_Setup2DSurfaceStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,bool pixelPitch)11289 MOS_STATUS HalCm_Setup2DSurfaceStateWithBTIndex(
11290 PCM_HAL_STATE state,
11291 int32_t bindingTable,
11292 uint32_t surfIndex,
11293 uint32_t btIndex,
11294 bool pixelPitch)
11295 {
11296 PRENDERHAL_INTERFACE renderHal = state->renderHal;
11297 MOS_STATUS eStatus;
11298 RENDERHAL_SURFACE surface;
11299 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
11300 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
11301 int32_t nSurfaceEntries, i;
11302 uint16_t memObjCtl;
11303 uint32_t offsetSrc;
11304 PRENDERHAL_STATE_HEAP stateHeap;
11305
11306 eStatus = MOS_STATUS_UNKNOWN;
11307 nSurfaceEntries = 0;
11308
11309 if (surfIndex == CM_NULL_SURFACE)
11310 {
11311 return MOS_STATUS_SUCCESS;
11312 }
11313
11314 memObjCtl = CM_DEFAULT_CACHE_TYPE;
11315
11316 // check the surfIndex
11317 if (surfIndex >= state->cmDeviceParam.max2DSurfaceTableSize ||
11318 Mos_ResourceIsNull(&state->umdSurf2DTable[surfIndex].osResource) )
11319 {
11320 CM_ASSERTMESSAGE(
11321 "Invalid 2D surface array index '%d'", surfIndex);
11322 return MOS_STATUS_UNKNOWN;
11323 }
11324
11325 // Check to see if surface is already assigned
11326 uint32_t nBTInTable = ( unsigned char )CM_INVALID_INDEX;
11327 if ( pixelPitch )
11328 {
11329 nBTInTable = state->bti2DIndexTable[ surfIndex ].BTI.samplerSurfIndex;
11330 }
11331 else
11332 {
11333 nBTInTable = state->bti2DIndexTable[ surfIndex ].BTI.regularSurfIndex;
11334 }
11335
11336 if ( btIndex == nBTInTable )
11337 {
11338 nSurfaceEntries = state->bti2DIndexTable[ surfIndex ].nPlaneNumber;
11339
11340 stateHeap = renderHal->pStateHeap;
11341
11342 // Get Offset to Current Binding Table
11343 uint32_t offsetDst = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
11344 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
11345 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
11346 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
11347
11348 uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
11349
11350 if ( pixelPitch )
11351 {
11352 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11353 }
11354 else
11355 {
11356 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11357 }
11358
11359 return MOS_STATUS_SUCCESS;
11360 }
11361
11362 // Get Details of 2D surface and fill the surface
11363 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE2D, surfIndex, pixelPitch));
11364
11365 // Setup 2D surface
11366 MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
11367 surfaceParam.Type = renderHal->SurfaceTypeDefault;
11368 surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
11369 if (!pixelPitch) {
11370 surfaceParam.bWidthInDword_UV = true;
11371 surfaceParam.bWidthInDword_Y = true;
11372 }
11373
11374 surfaceParam.isOutput = isRenderTarget(state, surfIndex);
11375
11376 //Cache configurations
11377 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11378
11379 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
11380 renderHal,
11381 &surface,
11382 &surfaceParam,
11383 &nSurfaceEntries,
11384 surfaceEntries,
11385 nullptr));
11386
11387 for (i = 0; i < nSurfaceEntries; i++)
11388 {
11389 // Bind the surface State
11390 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
11391 renderHal,
11392 bindingTable,
11393 btIndex + i,
11394 surfaceEntries[i]));
11395 }
11396
11397 state->bti2DIndexTable[ surfIndex ].nPlaneNumber = nSurfaceEntries;
11398 // Get Offset to Current Binding Table
11399 stateHeap = renderHal->pStateHeap;
11400 offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
11401 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
11402 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
11403 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
11404
11405 if ( pixelPitch )
11406 {
11407 state->bti2DIndexTable[ surfIndex ].BTI.samplerSurfIndex = btIndex;
11408 state->bti2DIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11409 }
11410 else
11411 {
11412 state->bti2DIndexTable[ surfIndex ].BTI.regularSurfIndex = btIndex;
11413 state->bti2DIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11414 }
11415
11416 eStatus = MOS_STATUS_SUCCESS;
11417
11418 finish:
11419 return eStatus;
11420 }
11421
11422 //*-----------------------------------------------------------------------------
11423 //| Purpose: Setup Buffer surface State with BTIndex
11424 //| Returns: Result of the operation
11425 //*-----------------------------------------------------------------------------
HalCm_SetupBufferSurfaceStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,bool pixelPitch)11426 MOS_STATUS HalCm_SetupBufferSurfaceStateWithBTIndex(
11427 PCM_HAL_STATE state,
11428 int32_t bindingTable,
11429 uint32_t surfIndex,
11430 uint32_t btIndex,
11431 bool pixelPitch)
11432 {
11433 PRENDERHAL_INTERFACE renderHal = state ? state->renderHal : nullptr;
11434 MOS_STATUS eStatus;
11435 RENDERHAL_SURFACE surface;
11436 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
11437 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntry;
11438 uint16_t memObjCtl;
11439 uint32_t offsetSrc;
11440 PRENDERHAL_STATE_HEAP stateHeap;
11441 UNUSED(pixelPitch);
11442
11443 eStatus = MOS_STATUS_UNKNOWN;
11444
11445 CM_CHK_NULL_RETURN_MOSERROR(state);
11446 CM_CHK_NULL_RETURN_MOSERROR(renderHal);
11447
11448 if (surfIndex == CM_NULL_SURFACE)
11449 {
11450 return MOS_STATUS_SUCCESS;
11451 }
11452
11453 memObjCtl = CM_DEFAULT_CACHE_TYPE;
11454
11455 // Check to see if surface is already assigned
11456 if ( btIndex == ( uint32_t )state->btiBufferIndexTable[ surfIndex ].BTI.regularSurfIndex )
11457 {
11458 uint32_t nSurfaceEntries = state->btiBufferIndexTable[ surfIndex ].nPlaneNumber;
11459
11460 stateHeap = renderHal->pStateHeap;
11461
11462 // Get Offset to Current Binding Table
11463 uint32_t offsetDst = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
11464 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
11465 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
11466 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
11467
11468 uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
11469 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->btiBufferIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11470
11471 return MOS_STATUS_SUCCESS;
11472 }
11473
11474 // Get Details of Buffer surface and fill the surface
11475 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACEBUFFER, surfIndex, 0));
11476
11477 // set up buffer surface
11478 MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
11479
11480 // Set isOutput by default
11481 surfaceParam.isOutput = true;
11482
11483 //Cache configurations default
11484 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11485
11486 // Setup Buffer surface
11487 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupBufferSurfaceState(
11488 renderHal,
11489 &surface,
11490 &surfaceParam,
11491 &surfaceEntry));
11492
11493 //Cache configurations
11494 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11495
11496 // Bind the surface State
11497 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
11498 renderHal,
11499 bindingTable,
11500 btIndex,
11501 surfaceEntry));
11502
11503 state->btiBufferIndexTable[ surfIndex ].BTI.regularSurfIndex = btIndex;
11504 state->btiBufferIndexTable[ surfIndex ].nPlaneNumber = 1;
11505
11506 stateHeap = renderHal->pStateHeap;
11507 offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
11508 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
11509 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
11510 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
11511
11512 state->btiBufferIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11513
11514 eStatus = MOS_STATUS_SUCCESS;
11515
11516 finish:
11517 return eStatus;
11518 }
11519
HalCm_Setup2DSurfaceUPStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,bool pixelPitch)11520 MOS_STATUS HalCm_Setup2DSurfaceUPStateWithBTIndex(
11521 PCM_HAL_STATE state,
11522 int32_t bindingTable,
11523 uint32_t surfIndex,
11524 uint32_t btIndex,
11525 bool pixelPitch)
11526 {
11527 MOS_STATUS eStatus;
11528 RENDERHAL_SURFACE surface;
11529 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
11530 PRENDERHAL_INTERFACE renderHal;
11531 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
11532 int32_t nSurfaceEntries, i;
11533 uint16_t memObjCtl;
11534 uint32_t offsetSrc;
11535 PRENDERHAL_STATE_HEAP stateHeap;
11536
11537 eStatus = MOS_STATUS_UNKNOWN;
11538 renderHal = state->renderHal;
11539
11540 if (surfIndex == CM_NULL_SURFACE)
11541 {
11542 return MOS_STATUS_SUCCESS;
11543 }
11544
11545 memObjCtl = CM_DEFAULT_CACHE_TYPE;
11546
11547 // Check to see if surface is already assigned
11548 uint32_t nBTInTable = ( unsigned char )CM_INVALID_INDEX;
11549 if ( pixelPitch )
11550 {
11551 nBTInTable = state->bti2DUPIndexTable[ surfIndex ].BTI.samplerSurfIndex;
11552 }
11553 else
11554 {
11555 nBTInTable = state->bti2DUPIndexTable[ surfIndex ].BTI.regularSurfIndex;
11556 }
11557
11558 if ( btIndex == nBTInTable )
11559 {
11560 uint32_t nSurfaceEntries = state->bti2DUPIndexTable[ surfIndex ].nPlaneNumber;
11561
11562 stateHeap = renderHal->pStateHeap;
11563
11564 // Get Offset to Current Binding Table
11565 uint32_t offsetDst = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
11566 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
11567 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
11568 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
11569
11570 uint32_t *bindingTableEntry = ( uint32_t *)( stateHeap->pSshBuffer + offsetDst );
11571 if ( pixelPitch )
11572 {
11573 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11574 }
11575 else
11576 {
11577 MOS_SecureMemcpy( bindingTableEntry, sizeof( uint32_t ) * nSurfaceEntries, state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition, sizeof( uint32_t ) * nSurfaceEntries );
11578 }
11579
11580 return MOS_STATUS_SUCCESS;
11581 }
11582
11583 // Get Details of 2DUP surface and fill the surface
11584 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &surface, CM_ARGUMENT_SURFACE2D_UP, surfIndex, pixelPitch ) );
11585
11586 // Setup 2D surface
11587 MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
11588 surfaceParam.Type = renderHal->SurfaceTypeDefault;
11589 surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
11590
11591 if ( !pixelPitch )
11592 {
11593 surfaceParam.bWidthInDword_UV = true;
11594 surfaceParam.bWidthInDword_Y = true;
11595 }
11596
11597 surfaceParam.isOutput = true;
11598
11599 //Cache configurations
11600 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11601
11602 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
11603 renderHal,
11604 &surface,
11605 &surfaceParam,
11606 &nSurfaceEntries,
11607 surfaceEntries,
11608 nullptr));
11609
11610 for (i = 0; i < nSurfaceEntries; i++)
11611 {
11612 // Bind the surface State
11613 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
11614 renderHal,
11615 bindingTable,
11616 btIndex + i,
11617 surfaceEntries[i]));
11618 }
11619
11620 state->bti2DUPIndexTable[ surfIndex ].nPlaneNumber = nSurfaceEntries;
11621
11622 stateHeap = renderHal->pStateHeap;
11623 offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
11624 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
11625 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
11626 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
11627
11628 if ( pixelPitch )
11629 {
11630 state->bti2DUPIndexTable[ surfIndex ].BTI.samplerSurfIndex = btIndex;
11631 state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.samplerBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11632 }
11633 else
11634 {
11635 state->bti2DUPIndexTable[ surfIndex ].BTI.regularSurfIndex = btIndex;
11636 state->bti2DUPIndexTable[ surfIndex ].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11637 }
11638
11639 eStatus = MOS_STATUS_SUCCESS;
11640
11641 finish:
11642 return eStatus;
11643 }
11644
HalCm_SetupSampler8x8SurfaceStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex,bool pixelPitch,CM_HAL_KERNEL_ARG_KIND kind,uint32_t addressControl)11645 MOS_STATUS HalCm_SetupSampler8x8SurfaceStateWithBTIndex(
11646 PCM_HAL_STATE state,
11647 int32_t bindingTable,
11648 uint32_t surfIndex,
11649 uint32_t btIndex,
11650 bool pixelPitch,
11651 CM_HAL_KERNEL_ARG_KIND kind,
11652 uint32_t addressControl )
11653 {
11654 MOS_STATUS eStatus;
11655 RENDERHAL_SURFACE surface;
11656 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
11657 PRENDERHAL_INTERFACE renderHal;
11658 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[ MHW_MAX_SURFACE_PLANES ];
11659 int32_t nSurfaceEntries;
11660 uint16_t memObjCtl;
11661 int32_t i;
11662 uint32_t offsetSrc;
11663 PRENDERHAL_STATE_HEAP stateHeap;
11664 UNUSED(pixelPitch);
11665
11666 eStatus = MOS_STATUS_UNKNOWN;
11667 renderHal = state->renderHal;
11668
11669 if ( surfIndex == CM_NULL_SURFACE )
11670 {
11671 eStatus = MOS_STATUS_SUCCESS;
11672 goto finish;
11673 }
11674
11675 memObjCtl = CM_DEFAULT_CACHE_TYPE;
11676
11677 // check to see if index is valid
11678 if ( surfIndex >= state->cmDeviceParam.max2DSurfaceTableSize ||
11679 Mos_ResourceIsNull( &state->umdSurf2DTable[ surfIndex ].osResource ) )
11680 {
11681 eStatus = MOS_STATUS_INVALID_PARAMETER;
11682 CM_ASSERTMESSAGE(
11683 "Invalid 2D surface array index '%d'", surfIndex );
11684 goto finish;
11685 }
11686
11687 // Get Details of Sampler8x8 surface and fill the surface
11688 CM_CHK_MOSSTATUS_GOTOFINISH( HalCm_GetSurfaceAndRegister( state, &surface, kind, surfIndex, 0 ) );
11689
11690 // Setup surface
11691 MOS_ZeroMemory( &surfaceParam, sizeof( surfaceParam ) );
11692 surfaceParam.Type = renderHal->SurfaceTypeAdvanced;
11693 surfaceParam.isOutput = true;
11694 surfaceParam.bWidthInDword_Y = false;
11695 surfaceParam.bWidthInDword_UV = false;
11696 surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
11697 surfaceParam.bVASurface = ( kind == CM_ARGUMENT_SURFACE_SAMPLER8X8_VA ) ? 1 : 0;
11698 surfaceParam.AddressControl = addressControl;
11699 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam );
11700 renderHal->bEnableP010SinglePass = state->cmHalInterface->IsP010SinglePassSupported();
11701 nSurfaceEntries = 0;
11702 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnSetupSurfaceState(
11703 renderHal,
11704 &surface,
11705 &surfaceParam,
11706 &nSurfaceEntries,
11707 surfaceEntries,
11708 nullptr ) );
11709
11710 CM_ASSERT( nSurfaceEntries == 1 );
11711
11712 for ( i = 0; i < nSurfaceEntries; i++ )
11713 {
11714 // Bind the surface State
11715 CM_CHK_MOSSTATUS_GOTOFINISH( renderHal->pfnBindSurfaceState(
11716 renderHal,
11717 bindingTable,
11718 btIndex + i,
11719 surfaceEntries[ i ] ) );
11720 }
11721
11722 stateHeap = renderHal->pStateHeap;
11723 offsetSrc = ( stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize ) + // Points to the Base of Current SSH Buffer Instance
11724 ( stateHeap->iBindingTableOffset ) + // Moves the pointer to Base of Array of Binding Tables
11725 ( bindingTable * stateHeap->iBindingTableSize ) + // Moves the pointer to a Particular Binding Table
11726 ( btIndex * sizeof( uint32_t ) ); // Move the pointer to correct entry
11727
11728 state->bti2DIndexTable[ surfIndex ].nPlaneNumber = nSurfaceEntries;
11729 state->bti2DIndexTable[ surfIndex ].BTITableEntry.sampler8x8BtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11730 state->bti2DIndexTable[ surfIndex ].BTI.sampler8x8SurfIndex = btIndex;
11731
11732 eStatus = MOS_STATUS_SUCCESS;
11733
11734 finish:
11735 renderHal->bEnableP010SinglePass = false;
11736 return eStatus;
11737 }
11738
11739 //*-----------------------------------------------------------------------------
11740 //| Purpose: Setup 3D surface State with BTIndex
11741 //| Returns: Result of the operation
11742 //*-----------------------------------------------------------------------------
HalCm_Setup3DSurfaceStateWithBTIndex(PCM_HAL_STATE state,int32_t bindingTable,uint32_t surfIndex,uint32_t btIndex)11743 MOS_STATUS HalCm_Setup3DSurfaceStateWithBTIndex(
11744 PCM_HAL_STATE state,
11745 int32_t bindingTable,
11746 uint32_t surfIndex,
11747 uint32_t btIndex)
11748 {
11749 PRENDERHAL_INTERFACE renderHal = state->renderHal;
11750 MOS_STATUS eStatus;
11751 RENDERHAL_SURFACE surface;
11752 RENDERHAL_SURFACE_STATE_PARAMS surfaceParam;
11753 PRENDERHAL_SURFACE_STATE_ENTRY surfaceEntries[MHW_MAX_SURFACE_PLANES];
11754 int32_t nSurfaceEntries, i;
11755 uint16_t memObjCtl;
11756 uint32_t offsetSrc;
11757 PRENDERHAL_STATE_HEAP stateHeap;
11758
11759 eStatus = MOS_STATUS_UNKNOWN;
11760 nSurfaceEntries = 0;
11761
11762 if (surfIndex == CM_NULL_SURFACE)
11763 {
11764 return MOS_STATUS_SUCCESS;
11765 }
11766
11767 memObjCtl = CM_DEFAULT_CACHE_TYPE;
11768
11769 // check the surfIndex
11770 if (surfIndex >= state->cmDeviceParam.max3DSurfaceTableSize ||
11771 Mos_ResourceIsNull(&state->surf3DTable[surfIndex].osResource))
11772 {
11773 eStatus = MOS_STATUS_INVALID_PARAMETER;
11774 CM_ASSERTMESSAGE(
11775 "Invalid 3D surface array index '%d'", surfIndex);
11776 return MOS_STATUS_UNKNOWN;
11777 }
11778
11779 // Check to see if surface is already assigned
11780 uint32_t nBTInTable = (unsigned char)CM_INVALID_INDEX;
11781 nBTInTable = state->bti3DIndexTable[surfIndex].BTI.regularSurfIndex;
11782
11783 if (btIndex == nBTInTable)
11784 {
11785 nSurfaceEntries = state->bti3DIndexTable[surfIndex].nPlaneNumber;
11786
11787 stateHeap = renderHal->pStateHeap;
11788
11789 // Get Offset to Current Binding Table
11790 uint32_t offsetDst = (stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize) + // Points to the Base of Current SSH Buffer Instance
11791 (stateHeap->iBindingTableOffset) + // Moves the pointer to Base of Array of Binding Tables
11792 (bindingTable * stateHeap->iBindingTableSize) + // Moves the pointer to a Particular Binding Table
11793 (btIndex * sizeof(uint32_t)); // Move the pointer to correct entry
11794
11795 uint32_t *bindingTableEntry = (uint32_t*)(stateHeap->pSshBuffer + offsetDst);
11796
11797 MOS_SecureMemcpy(bindingTableEntry, sizeof(uint32_t)* nSurfaceEntries, state->bti3DIndexTable[surfIndex].BTITableEntry.regularBtiEntryPosition, sizeof(uint32_t)* nSurfaceEntries);
11798
11799 return MOS_STATUS_SUCCESS;
11800 }
11801
11802 // Get Details of 3D surface and fill the surface
11803 CM_CHK_MOSSTATUS_GOTOFINISH(HalCm_GetSurfaceAndRegister(state, &surface, CM_ARGUMENT_SURFACE3D, surfIndex, false));
11804
11805 // Setup 3D surface
11806 MOS_ZeroMemory(&surfaceParam, sizeof(surfaceParam));
11807 surfaceParam.Type = renderHal->SurfaceTypeDefault;
11808 surfaceParam.Boundary = RENDERHAL_SS_BOUNDARY_ORIGINAL;
11809
11810 //Cache configurations
11811 state->cmHalInterface->HwSetSurfaceMemoryObjectControl(memObjCtl, &surfaceParam);
11812
11813 //Set isOutput by default
11814 surfaceParam.isOutput = true;
11815
11816 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnSetupSurfaceState(
11817 renderHal,
11818 &surface,
11819 &surfaceParam,
11820 &nSurfaceEntries,
11821 surfaceEntries,
11822 nullptr));
11823
11824 for (i = 0; i < nSurfaceEntries; i++)
11825 {
11826 // Bind the surface State
11827 CM_CHK_MOSSTATUS_GOTOFINISH(renderHal->pfnBindSurfaceState(
11828 renderHal,
11829 bindingTable,
11830 btIndex + i,
11831 surfaceEntries[i]));
11832 }
11833 state->bti3DIndexTable[surfIndex].BTI.regularSurfIndex = btIndex;
11834 state->bti3DIndexTable[surfIndex].nPlaneNumber = nSurfaceEntries;
11835 // Get Offset to Current Binding Table
11836 stateHeap = renderHal->pStateHeap;
11837 offsetSrc = (stateHeap->iCurSshBufferIndex * stateHeap->dwSshIntanceSize) + // Points to the Base of Current SSH Buffer Instance
11838 (stateHeap->iBindingTableOffset) + // Moves the pointer to Base of Array of Binding Tables
11839 (bindingTable * stateHeap->iBindingTableSize) + // Moves the pointer to a Particular Binding Table
11840 (btIndex * sizeof(uint32_t)); // Move the pointer to correct entry
11841
11842 state->bti3DIndexTable[surfIndex].BTI.regularSurfIndex = btIndex;
11843 state->bti3DIndexTable[surfIndex].BTITableEntry.regularBtiEntryPosition = stateHeap->pSshBuffer + offsetSrc;
11844
11845 eStatus = MOS_STATUS_SUCCESS;
11846
11847 finish:
11848 return eStatus;
11849 }
11850
11851 //|-----------------------------------------------------------------------------
11852 //| Purpose : Tag-based Synchronization on Resource
11853 //| Input : state - Hal CM State
11854 //| surface surface
11855 //| isWrite - Write or Read
11856 //| Returns : Result of the operation
11857 //|-----------------------------------------------------------------------------
HalCm_SyncOnResource(PCM_HAL_STATE state,PMOS_SURFACE surface,bool isWrite)11858 MOS_STATUS HalCm_SyncOnResource(
11859 PCM_HAL_STATE state,
11860 PMOS_SURFACE surface,
11861 bool isWrite)
11862 {
11863 MOS_STATUS eStatus;
11864 PMOS_INTERFACE osInterface;
11865
11866 eStatus = MOS_STATUS_SUCCESS;
11867 osInterface = state->osInterface;
11868
11869 if (surface == nullptr || Mos_ResourceIsNull(&surface->OsResource))
11870 {
11871 CM_ASSERTMESSAGE("Input resource is not valid.");
11872 eStatus = MOS_STATUS_UNKNOWN;
11873 return eStatus;
11874 }
11875
11876 osInterface->pfnSyncOnResource(
11877 osInterface,
11878 &(surface->OsResource),
11879 state->osInterface->CurrentGpuContextOrdinal, //state->GpuContext,
11880 isWrite);
11881
11882 // Sync Render Target with Overlay Context
11883 if (surface->bOverlay)
11884 {
11885 osInterface->pfnSyncOnOverlayResource(
11886 osInterface,
11887 &(surface->OsResource),
11888 state->osInterface->CurrentGpuContextOrdinal);
11889 }
11890
11891 return eStatus;
11892 }
11893
11894 //!
11895 //! \brief Send Media Walker State
11896 //! \details Send MEDIA_OBJECT_WALKER command
11897 //! \param PCM_HAL_STATE state
11898 //! [in] Pointer to CM_HAL_STATE Structure
11899 //! \param PRENDERHAL_INTERFACE renderHal
11900 //! [in] Pointer to Hardware Interface Structure
11901 //! \param PMOS_COMMAND_BUFFER cmdBuffer
11902 //! [in] Pointer to Command Buffer
11903 //! \return MOS_STATUS
11904 //!
HalCm_SendMediaWalkerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PMOS_COMMAND_BUFFER cmdBuffer)11905 MOS_STATUS HalCm_SendMediaWalkerState(
11906 PCM_HAL_STATE state,
11907 PCM_HAL_KERNEL_PARAM kernelParam,
11908 PMOS_COMMAND_BUFFER cmdBuffer)
11909 {
11910 PRENDERHAL_INTERFACE_LEGACY renderHal;
11911 MHW_WALKER_PARAMS mediaWalkerParams;
11912 MOS_STATUS eStatus;
11913
11914 eStatus = MOS_STATUS_SUCCESS;
11915 renderHal = state->renderHal;
11916
11917 MOS_SecureMemcpy(&mediaWalkerParams, sizeof(MHW_WALKER_PARAMS), &kernelParam->walkerParams, sizeof(CM_HAL_WALKER_PARAMS));
11918
11919 if (kernelParam->kernelThreadSpaceParam.threadSpaceWidth)
11920 {
11921 //per-kernel thread space is set, need use its own dependency mask
11922 mediaWalkerParams.UseScoreboard = renderHal->VfeScoreboard.ScoreboardEnable;
11923 mediaWalkerParams.ScoreboardMask = kernelParam->kernelThreadSpaceParam.globalDependencyMask;
11924 }
11925 else
11926 {
11927 //No per-kernel thread space setting, need use per-task depedency mask
11928 mediaWalkerParams.UseScoreboard = renderHal->VfeScoreboard.ScoreboardEnable;
11929 mediaWalkerParams.ScoreboardMask = renderHal->VfeScoreboard.ScoreboardMask;
11930 }
11931
11932 eStatus = renderHal->pMhwRenderInterface->AddMediaObjectWalkerCmd(
11933 cmdBuffer, &mediaWalkerParams);
11934
11935 return eStatus;
11936 }
11937
11938 //!
11939 //! \brief Send GpGpu Walker State
11940 //! \details Send GPGPU_WALKER state
11941 //! \param PCM_HAL_STATE state
11942 //! [in] Pointer to CM_HAL_STATE Structure
11943 //! \param PRENDERHAL_INTERFACE renderHal
11944 //! [in] Pointer to Hardware Interface Structure
11945 //! \param PMOS_COMMAND_BUFFER cmdBuffer
11946 //! [in] Pointer to Command Buffer
11947 //! \return MOS_STATUS
11948 //!
HalCm_SendGpGpuWalkerState(PCM_HAL_STATE state,PCM_HAL_KERNEL_PARAM kernelParam,PMOS_COMMAND_BUFFER cmdBuffer)11949 MOS_STATUS HalCm_SendGpGpuWalkerState(
11950 PCM_HAL_STATE state,
11951 PCM_HAL_KERNEL_PARAM kernelParam,
11952 PMOS_COMMAND_BUFFER cmdBuffer)
11953 {
11954 MhwRenderInterface *mhwRender;
11955 MHW_GPGPU_WALKER_PARAMS gpGpuWalkerParams;
11956 MOS_STATUS eStatus;
11957
11958 eStatus = MOS_STATUS_SUCCESS;
11959 mhwRender = state->renderHal->pMhwRenderInterface;
11960
11961 gpGpuWalkerParams.InterfaceDescriptorOffset = kernelParam->gpgpuWalkerParams.interfaceDescriptorOffset;
11962 gpGpuWalkerParams.GpGpuEnable = kernelParam->gpgpuWalkerParams.gpgpuEnabled;
11963 gpGpuWalkerParams.GroupWidth = kernelParam->gpgpuWalkerParams.groupWidth;
11964 gpGpuWalkerParams.GroupHeight = kernelParam->gpgpuWalkerParams.groupHeight;
11965 gpGpuWalkerParams.GroupDepth = kernelParam->gpgpuWalkerParams.groupDepth;
11966 gpGpuWalkerParams.ThreadWidth = kernelParam->gpgpuWalkerParams.threadWidth;
11967 gpGpuWalkerParams.ThreadHeight = kernelParam->gpgpuWalkerParams.threadHeight;
11968 gpGpuWalkerParams.ThreadDepth = kernelParam->gpgpuWalkerParams.threadDepth;
11969 gpGpuWalkerParams.SLMSize = kernelParam->slmSize;
11970
11971 eStatus = mhwRender->AddGpGpuWalkerStateCmd(cmdBuffer, &gpGpuWalkerParams);
11972
11973 return eStatus;
11974 }
11975
11976 //!
11977 //! \brief surface Format Convert
11978 //! \details Convert RENDERHAL_SURFACE to MHW_VEBOX_SURFACE
11979 //! \param PRENDERHAL_SURFACE renderHalSurface
11980 //! [in] Pointer to RENDERHAL_SURFACE Structure
11981 //! \param PMHW_VEBOX_SURFACE_PARAMS mhwVeboxSurface
11982 //! [in] Pointer to PMHW_VEBOX_SURFACE_PARAMS
11983 //! \return MOS_STATUS
11984 //!
HalCm_Convert_RENDERHAL_SURFACE_To_MHW_VEBOX_SURFACE(PRENDERHAL_SURFACE renderHalSurface,PMHW_VEBOX_SURFACE_PARAMS mhwVeboxSurface)11985 MOS_STATUS HalCm_Convert_RENDERHAL_SURFACE_To_MHW_VEBOX_SURFACE(
11986 PRENDERHAL_SURFACE renderHalSurface,
11987 PMHW_VEBOX_SURFACE_PARAMS mhwVeboxSurface)
11988 {
11989 PMOS_SURFACE surface;
11990 MOS_STATUS eStatus = MOS_STATUS_SUCCESS;
11991
11992 CM_CHK_NULL_GOTOFINISH_MOSERROR(renderHalSurface);
11993 CM_CHK_NULL_GOTOFINISH_MOSERROR(mhwVeboxSurface);
11994
11995 surface = &renderHalSurface->OsSurface;
11996 mhwVeboxSurface->Format = surface->Format;
11997 mhwVeboxSurface->dwWidth = surface->dwWidth;
11998 mhwVeboxSurface->dwHeight = surface->dwHeight;
11999 mhwVeboxSurface->dwPitch = surface->dwPitch;
12000 if (surface->dwPitch > 0)
12001 {
12002 mhwVeboxSurface->dwUYoffset = ((surface->UPlaneOffset.iSurfaceOffset - surface->YPlaneOffset.iSurfaceOffset) / surface->dwPitch)
12003 + surface->UPlaneOffset.iYOffset;
12004 }
12005 mhwVeboxSurface->TileType = surface->TileType;
12006 mhwVeboxSurface->TileModeGMM = surface->TileModeGMM;
12007 mhwVeboxSurface->bGMMTileEnabled = surface->bGMMTileEnabled;
12008 mhwVeboxSurface->rcMaxSrc = renderHalSurface->rcMaxSrc;
12009 mhwVeboxSurface->pOsResource = &surface->OsResource;
12010
12011 finish:
12012 return eStatus;
12013 }
12014
12015 //!
12016 //! \brief Set Vtune Profiling Flag
12017 //! \details Trun Vtune Profiling Flag On or off
12018 //! \param PCM_HAL_STATE state
12019 //! [in] Pointer to CM_HAL_STATE Structure
12020 //! \return MOS_STATUS_SUCCESS
12021 //!
HalCm_SetVtuneProfilingFlag(PCM_HAL_STATE state,bool vtuneOn)12022 MOS_STATUS HalCm_SetVtuneProfilingFlag(
12023 PCM_HAL_STATE state,
12024 bool vtuneOn)
12025 {
12026
12027 state->vtuneProfilerOn = vtuneOn;
12028
12029 return MOS_STATUS_SUCCESS;
12030 }
12031
12032 //*-----------------------------------------------------------------------------
12033 //| Purpose: Get the offset for the Task Sync Location given the task ID
12034 //| Returns: Sync Location
12035 //*-----------------------------------------------------------------------------
HalCm_GetTaskSyncLocation(PCM_HAL_STATE state,int32_t taskId)12036 int32_t HalCm_GetTaskSyncLocation(
12037 PCM_HAL_STATE state,
12038 int32_t taskId) // [in] Task ID
12039 {
12040 return (taskId * state->cmHalInterface->GetTimeStampResourceSize());
12041 }
12042
HalCm_GetLegacyRenderHalL3Setting(CmHalL3Settings * l3SettingsPtr,RENDERHAL_L3_CACHE_SETTINGS * l3SettingsLegacyPtr)12043 void HalCm_GetLegacyRenderHalL3Setting( CmHalL3Settings *l3SettingsPtr, RENDERHAL_L3_CACHE_SETTINGS *l3SettingsLegacyPtr )
12044 {
12045 *l3SettingsLegacyPtr = {};
12046 l3SettingsLegacyPtr->bOverride = l3SettingsPtr->overrideSettings;
12047 l3SettingsLegacyPtr->bEnableSLM = l3SettingsPtr->enableSlm;
12048 l3SettingsLegacyPtr->bL3CachingEnabled = l3SettingsPtr->l3CachingEnabled;
12049 l3SettingsLegacyPtr->bCntlRegOverride = l3SettingsPtr->cntlRegOverride;
12050 l3SettingsLegacyPtr->bCntlReg2Override = l3SettingsPtr->cntlReg2Override;
12051 l3SettingsLegacyPtr->bCntlReg3Override = l3SettingsPtr->cntlReg3Override;
12052 l3SettingsLegacyPtr->bSqcReg1Override = l3SettingsPtr->sqcReg1Override;
12053 l3SettingsLegacyPtr->bSqcReg4Override = l3SettingsPtr->sqcReg4Override;
12054 l3SettingsLegacyPtr->bLra1RegOverride = l3SettingsPtr->lra1RegOverride;
12055 l3SettingsLegacyPtr->dwCntlReg = l3SettingsPtr->cntlReg;
12056 l3SettingsLegacyPtr->dwCntlReg2 = l3SettingsPtr->cntlReg2;
12057 l3SettingsLegacyPtr->dwCntlReg3 = l3SettingsPtr->cntlReg3;
12058 l3SettingsLegacyPtr->dwSqcReg1 = l3SettingsPtr->sqcReg1;
12059 l3SettingsLegacyPtr->dwSqcReg4 = l3SettingsPtr->sqcReg4;
12060 l3SettingsLegacyPtr->dwLra1Reg = l3SettingsPtr->lra1Reg;
12061
12062 return;
12063 }
12064
HalCm_ConvertTicksToNanoSeconds(PCM_HAL_STATE state,uint64_t ticks)12065 uint64_t HalCm_ConvertTicksToNanoSeconds(
12066 PCM_HAL_STATE state,
12067 uint64_t ticks)
12068 {
12069 if (state->tsFrequency == 0)
12070 {
12071 // if KMD doesn't report an valid value, fall back to default configs
12072 return state->cmHalInterface->ConverTicksToNanoSecondsDefault(ticks);
12073 }
12074 return (ticks * 1000000000) / (state->tsFrequency);
12075 }
12076
12077 //!
12078 //! \brief Check GPU context
12079 //! \details Check if the GPU context is valid for CM layer
12080 //! \param MOS_GPU_CONTEXT gpuContext
12081 //! [in] GPU Context ordinal
12082 //! \return true/false
12083 //!
HalCm_IsValidGpuContext(MOS_GPU_CONTEXT gpuContext)12084 bool HalCm_IsValidGpuContext(
12085 MOS_GPU_CONTEXT gpuContext)
12086 {
12087 if( gpuContext == MOS_GPU_CONTEXT_RENDER3
12088 || gpuContext == MOS_GPU_CONTEXT_RENDER4
12089 || gpuContext == MOS_GPU_CONTEXT_CM_COMPUTE
12090 || gpuContext == MOS_GPU_CONTEXT_VEBOX)
12091 {
12092 return true;
12093 }
12094 else
12095 {
12096 CM_ASSERTMESSAGE("Invalid GPU context for CM.");
12097 return false;
12098 }
12099 }
12100